Index: db.scm ================================================================== --- db.scm +++ db.scm @@ -285,16 +285,22 @@ (rundb (dbr:dbstruct-get-rundb dbstruct))) (if local (for-each (lambda (db) (if (sqlite3:database? db) - (sqlite3:finalize! db))) + (begin + (sqlite3:interrupt! db) + (sqlite3:finalize! db #t)))) (hash-table-values (dbr:dbstruct-get-locdbs dbstruct)))) - (if rundb - (if (sqlite3:database? rundb) - (sqlite3:finalize! rundb) - (debug:print 2 "WARNING: attempting to close databases but got " rundb " instead of a database"))))) + (thread-sleep! 3) + (if (and rundb + (sqlite3:database? rundb)) + (handle-exceptions + exn + (debug:print 0 "WARNING: database files may not have been closed correctly. Consider running -cleanup-db") + (sqlite3:interrupt! rundb) + (sqlite3:finalize! rundb #t))))) (define (db:open-inmem-db) (let* ((db (sqlite3:open-database ":memory:")) (handler (make-busy-timeout 3600))) (db:initialize-run-id-db db) Index: http-transport.scm ================================================================== --- http-transport.scm +++ http-transport.scm @@ -248,11 +248,14 @@ exn (if (> numretries 0) (begin (mutex-unlock! *http-mutex*) (thread-sleep! 1) - (close-all-connections!) + (handle-exceptions + exn + (debug:print 0 "WARNING: closing connections failed. Server at " fullurl " almost certainly dead") + (close-all-connections!)) (debug:print 0 "WARNING: Failed to communicate with server, trying again, numretries left: " numretries) (http-transport:client-api-send-receive run-id serverdat cmd params numretries: (- numretries 1))) (begin (mutex-unlock! *http-mutex*) #f)) @@ -268,16 +271,21 @@ ;; process and return it. (let* ((send-recieve (lambda () (mutex-lock! *http-mutex*) ;; (condition-case (with-input-from-request "http://localhost"; #f read-lines) ;; ((exn http client-error) e (print e))) - (set! res (with-input-from-request ;; was dat - fullurl - (list (cons 'key "thekey") - (cons 'cmd cmd) - (cons 'params params)) - read-string)) + (set! res (handle-exceptions + exn + (begin + (debug:print 0 "ERROR: failure in with-input-from-request. Giving up.") + #f) + (with-input-from-request ;; was dat + fullurl + (list (cons 'key "thekey") + (cons 'cmd cmd) + (cons 'params params)) + read-string))) ;; Shouldn't this be a call to the managed call-all-connections stuff above? (close-all-connections!) (mutex-unlock! *http-mutex*) )) (time-out (lambda () Index: launch.scm ================================================================== --- launch.scm +++ launch.scm @@ -91,10 +91,12 @@ fulln runscript))))) ;; assume it is on the path (rollup-status 0)) (change-directory top-path) + ;; (set-signal-handler! signal/int (lambda () + ;; Do not run the test if it is REMOVING, RUNNING, KILLREQ or REMOTEHOSTSTART, ;; Mark the test as REMOTEHOSTSTART *IMMEDIATELY* ;; (let ((test-info (rmt:get-testinfo-state-status run-id test-id))) (if (not (member (db:test-get-state test-info) '("REMOVING" "REMOTEHOSTSTART" "RUNNING" "KILLREQ"))) Index: megatest.scm ================================================================== --- megatest.scm +++ megatest.scm @@ -279,10 +279,32 @@ "-q" ;; quiet 0, errors/warnings only ) args:arg-hash 0)) +(define (std-exit-procedure) + (rmt:print-db-stats) + (let ((run-ids (hash-table-keys *db-local-sync*))) + (if (not (null? run-ids)) + (db:multi-db-sync run-ids 'new2old))) + (if *dbstruct-db* (db:close-all *dbstruct-db*)) + (if *megatest-db* (begin + (sqlite3:interrupt! *megatest-db*) + (sqlite3:finalize! *megatest-db* #t))) + (if *task-db* (let ((db (vector-ref *task-db* 0))) + (sqlite3:interrupt! db) + (sqlite3:finalize! db #t)))) + +(define (std-signal-handler signum) + (signal-mask! signum) + (debug:print 0 "ERROR: Received signal " signum " exiting promptly") + (std-exit-procedure) + (exit)) + +(set-signal-handler! signal/int std-signal-handler) +(set-signal-handler! signal/term std-signal-handler) + (if (args:get-arg "-log") (let ((oup (open-output-file (args:get-arg "-log")))) (debug:print-info 0 "Sending log output to " (args:get-arg "-log")) (current-error-port oup) (current-output-port oup))) @@ -347,18 +369,11 @@ (let ((newval (conc (args:get-arg "-testpatt") "/" (args:get-arg "-itempatt")))) (debug:print 0 "WARNING: -itempatt has been deprecated, please use -testpatt testpatt/itempatt method, new testpatt is "newval) (hash-table-set! args:arg-hash "-testpatt" newval) (hash-table-delete! args:arg-hash "-itempatt"))) -(on-exit (lambda () - (rmt:print-db-stats) - (let ((run-ids (hash-table-keys *db-local-sync*))) - (if (not (null? run-ids)) - (db:multi-db-sync run-ids 'new2old))) - (if *dbstruct-db* (db:close-all *dbstruct-db*)) - (if *megatest-db* (sqlite3:finalize! *megatest-db*)) - (if *task-db* (sqlite3:finalize! (vector-ref *task-db* 0))))) +(on-exit std-exit-procedure) ;;====================================================================== ;; Misc general calls ;;====================================================================== Index: runs.scm ================================================================== --- runs.scm +++ runs.scm @@ -217,14 +217,16 @@ (task-key (conc (hash-table->alist flags) " " (get-host-name) " " (current-process-id))) (tasks-db (tasks:open-db))) (set-signal-handler! signal/int (lambda (signum) + (signal-mask! signum) (let ((tdb (tasks:open-db))) (tasks:set-state-given-param-key tdb task-key "killed") + ;; (sqlite3:interrupt! tdb) ;; seems silly? (sqlite3:finalize! tdb)) - (print "Killed by sigint. Exiting") + (print "Killed by signal " signum ". Exiting") (exit))) ;; register this run in monitor.db (tasks:add tasks-db "run-tests" user target runname test-patts task-key) ;; params) (tasks:set-state-given-param-key tasks-db task-key "running")