Overview
Context
Changes
Modified common.scm
from [3380145d50]
to [1694e7ccde].
︙ | | |
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
|
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
|
+
+
+
+
+
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
|
;; WARNING: This proc operates assuming that it is in the directory above the
;; logs directory you wish to log-rotate.
;;
(define (common:rotate-logs)
(if (not (directory-exists? "logs"))(create-directory "logs"))
(directory-fold
(lambda (file rem)
(handle-exceptions
exn
(debug:print-info 0 *default-log-port* "failed to rotate log " file ", probably handled by another process.")
(let* ((fullname (conc "logs/" file))
(file-age (- (current-seconds)(file-modification-time fullname))))
(if (and (string-match "^.*.log" file)
(> (file-size (conc "logs/" file)) 200000))
(let ((gzfile (conc "logs/" file ".gz")))
(if (file-exists? gzfile)
(begin
(debug:print-info 0 *default-log-port* "removing " gzfile)
(delete-file gzfile)))
(debug:print-info 0 *default-log-port* "compressing " file)
(system (conc "gzip logs/" file)))))
(if (or (and (string-match "^.*.log" file)
(> (file-size fullname) 200000))
(and (string-match "^server-.*.log" file)
(> (- (current-seconds) (file-modification-time fullname))
(* 8 60 60))))
(let ((gzfile (conc fullname ".gz")))
(if (file-exists? gzfile)
(begin
(debug:print-info 0 *default-log-port* "removing " gzfile)
(delete-file gzfile)))
(debug:print-info 0 *default-log-port* "compressing " file)
(system (conc "gzip " fullname)))
(if (> file-age (* (string->number (or (configf:lookup *configdat* "setup" "log-expire-days") "30")) 24 3600))
(handle-exceptions
exn
#f
(delete-file fullname)))))))
'()
"logs"))
;; Force a megatest cleanup-db if version is changed and skip-version-check not specified
;;
(define (common:exit-on-version-changed)
(if (common:version-changed?)
|
︙ | | |
Modified dashboard.scm
from [bb7acd661f]
to [9bbb1ee284].
︙ | | |
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
|
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
|
-
+
|
(if (args:get-arg "-h")
(begin
(print help)
(exit)))
(if (not (common:on-homehost?))
(begin
(debug:print 0 "ERROR: Current policy requires running dashboard on homehost: " (common:get-homehost))))
(debug:print 0 *default-log-port* "ERROR: Current policy requires running dashboard on homehost: " (common:get-homehost))))
;; TODO: Move this inside (main)
;;
(if (not (launch:setup))
(begin
(print "Failed to find megatest.config, exiting")
(exit 1)))
|
︙ | | |
Modified server.scm
from [b68dac663e]
to [5c1183db18].
︙ | | |
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
|
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
|
-
+
-
+
|
(let ((now (current-seconds)))
(sort
(filter (lambda (rec)
(let ((start-time (list-ref rec 3))
(mod-time (list-ref rec 0)))
;; (print "start-time: " start-time " mod-time: " mod-time)
(and start-time mod-time
(> (- now start-time) 1) ;; been running at least 1 seconds
(> (- now start-time) 0) ;; been running at least 0 seconds
(< (- now mod-time) 16) ;; still alive - file touched in last 16 seconds
(< (- now start-time) 3600) ;; under one hour running time
(< (- now start-time) (string->number (or (configf:lookup *configdat* "server" "runtime") "3600"))) ;; under one hour running time
)))
srvlst)
(lambda (a b)
(< (list-ref a 3)
(list-ref b 3))))))
(define (server:get-first-best areapath)
|
︙ | | |
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
|
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
|
-
-
-
+
+
+
+
+
+
+
+
+
+
-
-
-
+
+
-
+
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
-
|
(let ((sig (server:mk-signature)))
(set! *my-client-signature* sig)
*my-client-signature*)))
;; kind start up of servers, wait 40 seconds before allowing another server for a given
;; run-id to be launched
(define (server:kind-run areapath)
(let ((last-run-time (hash-table-ref/default *server-kind-run* areapath #f)))
(if (or (not last-run-time)
(> (- (current-seconds) last-run-time) 30))
(let* ((last-run-dat (hash-table-ref/default *server-kind-run* areapath '(0 0))) ;; callnum, whenrun
(call-num (car last-run-dat))
(when-run (cadr last-run-dat))
(run-delay (+ (case call-num
((0) 0)
((1) 20)
((2) 300)
(else 600))
(random 5)))) ;; add a small random number just in case a lot of jobs hit the work hosts simultaneously
(if (> (- (current-seconds) when-run) run-delay)
(begin
(server:run areapath)
(hash-table-set! *server-kind-run* areapath (current-seconds))))))
(server:run areapath))
(hash-table-set! *server-kind-run* areapath (list (+ call-num 1)(current-seconds)))))
(define (server:start-and-wait areapath #!key (timeout 60))
(let ((give-up-time (+ (current-seconds) timeout)))
(let loop ((server-url (server:check-if-running areapath)))
(if (or server-url
(> (current-seconds) give-up-time))
server-url
(let ((num-ok (server:get-best (server:get-list areapath))))
(let ((num-ok (length (server:get-best (server:get-list areapath)))))
(if (< num-ok 2) ;; if there are no decent candidates for servers then try starting a new one
(server:kind-run areapath))
(thread-sleep! 5)
(loop (server:check-if-running areapath)))))))
(define server:try-running server:run) ;; there is no more per-run servers ;; REMOVE ME. BUG.
(define (server:dotserver-age-seconds areapath)
(let ((server-file (conc areapath "/.server")))
(begin
(handle-exceptions
exn
#f
(- (current-seconds)
(file-modification-time server-file))))))
;; no longer care if multiple servers are started by accident. older servers will drop off in time.
;;
(define (server:check-if-running areapath)
(let* ((servers (server:get-best (server:get-list areapath)))
(best-server (if (null? servers) #f (car servers)))
(dotserver-url (if best-server
(server:record->url best-server)
#f))) ;; (server:read-dotserver->url areapath))) ;; tdbdat (tasks:open-db)))
(if dotserver-url
(let* ((res (case *transport-type*
((http)(server:ping dotserver-url))
;; ((nmsg)(nmsg-transport:ping (tasks:hostinfo-get-interface server)
)))
(if res
dotserver-url
(let* ((servers (server:get-best (server:get-list areapath))))
(if (null? servers)
#f
(let loop ((hed (car servers))
(tal (cdr servers)))
(let ((res (server:check-server hed)))
(if res
res
(if (null? tal)
#f
(loop (car tal)(cdr tal)))))))))
;; ping the given server
;;
(define (server:check-server server-record)
(let* ((server-url (server:record->url server-record))
(res (case *transport-type*
((http)(server:ping server-url))
;; ((nmsg)(nmsg-transport:ping (tasks:hostinfo-get-interface server)
)))
(if res
server-url
(begin
;; (server:kill best-server)
#f)))
#f)))
(define (server:kill servr)
(match-let (((mod-time hostname port start-time pid)
servr))
(tasks:kill-server hostname pid)))
|
︙ | | |
Modified utils/lock-stats.sh
from [3f061e6171]
to [84d255afaf].
1
2
3
4
5
6
7
8
9
10
11
12
13
|
1
2
3
4
5
6
7
8
9
10
11
12
13
|
-
+
|
#!/bin/bash
while IFS=': ' read x x x x p x x i x; do
if ! [[ ${i}x == "x" ]];then
if ! $(echo $i|grep EOF >/dev/null);then
fname=$(sudo find -L "/proc/$p/fd" -maxdepth 1 -inum "$i" -exec readlink {} \; -quit)
fname=$(find -L "/proc/$p/fd" -maxdepth 1 -inum "$i" -exec readlink {} \; -quit)
if $(echo $fname | grep megatest.db > /dev/null) || \
$(echo $fname | egrep '.db/\d+.db' > /dev/null);then
echo $fname
fi
fi
fi
done < /proc/locks
|