203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
|
(debug:print 0 *default-log-port* "WARNING: a prior step failed, stopping at " ezstep)))
(file-close status-file)
)
))))))
(define (launch:monitor-job run-id test-id item-path fullrunscript ezsteps test-name tconfigreg exit-info m work-area runtlim misc-flags)
(let* ((update-period (string->number (or (configf:lookup *configdat* "setup" "test-stats-update-period") "30")))
(start-seconds (current-seconds))
(calc-minutes (lambda ()
(inexact->exact
(round
(-
(current-seconds)
start-seconds)))))
|
|
|
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
|
(debug:print 0 *default-log-port* "WARNING: a prior step failed, stopping at " ezstep)))
(file-close status-file)
)
))))))
(define (launch:monitor-job run-id test-id item-path fullrunscript ezsteps test-name tconfigreg exit-info m work-area runtlim misc-flags)
(let* ((update-period (string->number (or (configf:lookup *configdat* "setup" "test-stats-update-period") "60")))
(start-seconds (current-seconds))
(calc-minutes (lambda ()
(inexact->exact
(round
(-
(current-seconds)
start-seconds)))))
|
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
|
((equal? status "DEAD")
(tests:update-central-meta-info run-id test-id new-cpu-load new-disk-free (calc-minutes) #f #f)
(rmt:set-state-status-and-roll-up-items run-id test-id 'foo "RUNNING" "n/a" "was marked dead; really still running.")
;;(set! kill-reason "KILLING TEST because it was marked as DEAD by launch:handle-zombie-tests (might indicate really overloaded server or else overzealous setup.deadtime)") ;; MARK RUNNING
(set! kill-job? #f)))
(debug:print 4 *default-log-port* "cpu: " new-cpu-load " disk: " new-disk-free " last-sync: " last-sync " do-sync: " do-sync)
(launch:handle-zombie-tests run-id)
(when do-sync
;;(with-output-to-file (conc (getenv "MT_TEST_RUN_DIR") "/last-loadinfo.log" #:append)
;; (lambda () (pp (list (current-seconds) new-cpu-load new-disk-free (calc-minutes)))))
;; (common:telemetry-log "zombie" (conc "launch:monitor-job - dosync started at "(current-seconds)))
(tests:update-central-meta-info run-id test-id new-cpu-load new-disk-free (calc-minutes) #f #f)
;; (common:telemetry-log "zombie" (conc "launch:monitor-job - dosync finished at "(current-seconds)))
)
|
>
|
|
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
|
((equal? status "DEAD")
(tests:update-central-meta-info run-id test-id new-cpu-load new-disk-free (calc-minutes) #f #f)
(rmt:set-state-status-and-roll-up-items run-id test-id 'foo "RUNNING" "n/a" "was marked dead; really still running.")
;;(set! kill-reason "KILLING TEST because it was marked as DEAD by launch:handle-zombie-tests (might indicate really overloaded server or else overzealous setup.deadtime)") ;; MARK RUNNING
(set! kill-job? #f)))
(debug:print 4 *default-log-port* "cpu: " new-cpu-load " disk: " new-disk-free " last-sync: " last-sync " do-sync: " do-sync)
(if (common:low-noise-print 600 "run zombie") ;; every five minutes is plenty
(launch:handle-zombie-tests run-id))
(when do-sync
;;(with-output-to-file (conc (getenv "MT_TEST_RUN_DIR") "/last-loadinfo.log" #:append)
;; (lambda () (pp (list (current-seconds) new-cpu-load new-disk-free (calc-minutes)))))
;; (common:telemetry-log "zombie" (conc "launch:monitor-job - dosync started at "(current-seconds)))
(tests:update-central-meta-info run-id test-id new-cpu-load new-disk-free (calc-minutes) #f #f)
;; (common:telemetry-log "zombie" (conc "launch:monitor-job - dosync finished at "(current-seconds)))
)
|