Index: runs.scm ================================================================== --- runs.scm +++ runs.scm @@ -885,11 +885,12 @@ (if (and mcj (string->number mcj)) (string->number mcj) 1))) ;; length of the register queue ahead (reglen (if (number? reglen-in) reglen-in 1)) (last-time-incomplete (- (current-seconds) 900)) ;; force at least one clean up cycle - (last-time-some-running (current-seconds))) + (last-time-some-running (current-seconds)) + (tdbdat (tasks:open-db))) ;; Initialize the test-registery hash with tests that already have a record ;; convert state to symbol and use that as the hash value (for-each (lambda (trec) (let ((id (db:test-get-id trec)) @@ -932,10 +933,14 @@ (item-path (item-list->path itemdat)) (tfullname (runs:make-full-test-name test-name item-path)) (newtal (append tal (list hed))) (regfull (>= (length reg) reglen)) (num-running (rmt:get-count-tests-running-for-run-id run-id))) + + ;; every couple minutes verify the server is there for this run + (if (common:low-noise-print 60 "try start server" run-id) + (tasks:start-and-wait-for-server tdbdat run-id 10)) (if (> num-running 0) (set! last-time-some-running (current-seconds))) (if (> (current-seconds)(+ last-time-some-running 240)) Index: tasks.scm ================================================================== --- tasks.scm +++ tasks.scm @@ -348,18 +348,17 @@ res)) ;; try to start a server and wait for it to be available ;; (define (tasks:start-and-wait-for-server tdbdat run-id delay-max-tries) - (tdbdat (tasks:open-db)) ;; ensure a server is running for this run (let loop ((server-dat (tasks:get-server (db:delay-if-busy tdbdat) run-id)) (delay-time 0)) (if (and (not server-dat) (< delay-time delay-max-tries)) (begin - (debug:print 0 "Try starting server") + (if (common:low-noise-print 60 run-id)(debug:print 0 "Try starting server for run-id " run-id)) (server:kind-run run-id) (thread-sleep! (min delay-time 5)) (loop (tasks:get-server (db:delay-if-busy tdbdat) run-id)(+ delay-time 1)))))) (define (tasks:get-all-servers mdb)