Index: http-transport.scm ================================================================== --- http-transport.scm +++ http-transport.scm @@ -355,11 +355,11 @@ (let* ((server-info (let loop ((start-time (current-seconds)) (changed #t) (last-sdat "not this")) (let ((sdat #f)) (thread-sleep! 0.01) - (debug:print-info 0 "Waiting for server alive signal") + (debug:print-info 0 "Waiting for server alive signature") (mutex-lock! *heartbeat-mutex*) (set! sdat *server-info*) (mutex-unlock! *heartbeat-mutex*) (if (and sdat (not changed) @@ -366,13 +366,19 @@ (> (- (current-seconds) start-time) 2)) sdat (begin (debug:print-info 0 "Still waiting, last-sdat=" last-sdat) (sleep 4) - (loop start-time - (equal? sdat last-sdat) - sdat)))))) + (if (> (- (current-seconds) start-time) 120) ;; been waiting for two minutes + (let ((tdb (tasks:open-db))) + (debug:print 0 "ERROR: transport appears to have died, exiting server " server-id " for run " run-id) + (tasks:server-delete-record tdb server-id "failed to start, never received server alive signature") + (sqlite3:finalize! tdb) + (exit)) + (loop start-time + (equal? sdat last-sdat) + sdat))))))) (iface (car server-info)) (port (cadr server-info)) (last-access 0) (tdb (tasks:open-db)) (server-timeout (let ((tmo (configf:lookup *configdat* "server" "timeout"))) Index: tasks.scm ================================================================== --- tasks.scm +++ tasks.scm @@ -687,13 +687,15 @@ (if (process:alive? pid) (process-signal pid signal/kill))))) ;; (call-with-environment-variables (let ((old-targethost (getenv "TARGETHOST"))) (setenv "TARGETHOST" hostname) + (setenv "TARGETHOST_LOGF" "server-kills.log") (system (conc "nbfake kill " pid)) (if old-targethost (setenv "TARGETHOST" old-targethost)) - (unsetenv "TARGETHOST")))) + (unsetenv "TARGETHOST") + (unsetenv "TARGETHOST_LOGF")))) (debug:print 0 "ERROR: no record or improper record for " target "/" run-name " in tasks_queue in monitor.db")))) records))) ;;======================================================================