Index: client.scm ================================================================== --- client.scm +++ client.scm @@ -214,10 +214,11 @@ (tasks:hostinfo-get-port server-info))) ;; client:signal-handler (define (client:signal-handler signum) (signal-mask! signum) + (set! *time-to-exit* #t) (handle-exceptions exn (debug:print " ... exiting ...") (let ((th1 (make-thread (lambda () "") ;; do nothing for now (was flush out last call if applicable) Index: common.scm ================================================================== --- common.scm +++ common.scm @@ -243,39 +243,51 @@ ;;====================================================================== ;; E X I T H A N D L I N G ;;====================================================================== (define (std-exit-procedure) - (debug:print-info 2 "starting exit process, finalizing databases.") - (rmt:print-db-stats) - (let ((run-ids (hash-table-keys *db-local-sync*))) - (if (and (not (null? run-ids)) - (configf:lookup *configdat* "setup" "megatest-db")) - (db:multi-db-sync run-ids 'new2old))) - (if *dbstruct-db* (db:close-all *dbstruct-db*)) - (if *inmemdb* (db:close-all *inmemdb*)) - (if (and *megatest-db* - (sqlite3:database? *megatest-db*)) - (begin - (sqlite3:interrupt! *megatest-db*) - (sqlite3:finalize! *megatest-db* #t) - (set! *megatest-db* #f))) - (if *task-db* (let ((db (cdr *task-db*))) - (if (sqlite3:database? db) - (begin - (sqlite3:interrupt! db) - (sqlite3:finalize! db #t) - (vector-set! *task-db* 0 #f)))))) + (set! *time-to-exit* #t) + (debug:print-info 0 "starting exit process, finalizing databases.") + (if (debug:debug-mode 18) + (rmt:print-db-stats)) + (let ((th1 (make-thread (lambda () ;; thread for cleaning up, give it five seconds + (let ((run-ids (hash-table-keys *db-local-sync*))) + (if (and (not (null? run-ids)) + (configf:lookup *configdat* "setup" "megatest-db")) + (db:multi-db-sync run-ids 'new2old))) + (if *dbstruct-db* (db:close-all *dbstruct-db*)) + (if *inmemdb* (db:close-all *inmemdb*)) + (if (and *megatest-db* + (sqlite3:database? *megatest-db*)) + (begin + (sqlite3:interrupt! *megatest-db*) + (sqlite3:finalize! *megatest-db* #t) + (set! *megatest-db* #f))) + (if *task-db* (let ((db (cdr *task-db*))) + (if (sqlite3:database? db) + (begin + (sqlite3:interrupt! db) + (sqlite3:finalize! db #t) + (vector-set! *task-db* 0 #f)))))) "Cleanup db exit thread")) + (th2 (make-thread (lambda () + (debug:print 0 "ERROR: Received ^C, attempting clean exit. Please be patient and wait a few seconds before hitting ^C again.") + (thread-sleep! 5) ;; give the clean up few seconds to do it's stuff + (debug:print 0 " Done.") + (exit 4)) + "exit on ^C timer"))) + (thread-start! th2) + (thread-start! th1) + (thread-join! th2))) (define (std-signal-handler signum) - (signal-mask! signum) + ;; (signal-mask! signum) (debug:print 0 "ERROR: Received signal " signum " exiting promptly") ;; (std-exit-procedure) ;; shouldn't need this since we are exiting and it will be called anyway (exit)) -(set-signal-handler! signal/int std-signal-handler) -(set-signal-handler! signal/term std-signal-handler) +(set-signal-handler! signal/int std-signal-handler) ;; ^C +;; (set-signal-handler! signal/term std-signal-handler) ;;====================================================================== ;; Misc utils ;;====================================================================== Index: db.scm ================================================================== --- db.scm +++ db.scm @@ -506,11 +506,12 @@ (for-each (lambda (dbdat) (debug:print 0 " dbpath: " (db:dbdat-get-path dbdat))) (cons todb slave-dbs)) (if *server-run* ;; we are inside a server, throw a sync-failed error (signal (make-composite-condition - (make-property-condition 'sync-failed 'message "db:sync-tables failed in a server context."))))) + (make-property-condition 'sync-failed 'message "db:sync-tables failed in a server context."))) + 0)) ;; return zero for num synced ;; (set! *time-to-exit* #t) ;; let watch dog know that it is time to die. ;; (tasks:server-set-state! (db:delay-if-busy tdbdat) server-id "shutting-down") ;; (portlogger:open-run-close portlogger:set-port port "released") ;; (exit 1))) @@ -566,11 +567,11 @@ ;; tack on remaining records in fromdat (if (not (null? fromdat)) (set! fromdats (cons fromdat fromdats))) - (debug:print-info 2 "found " totrecords " records to sync") + (debug:print-info 4 "found " totrecords " records to sync") ;; read the target table (sqlite3:for-each-row (lambda (a . b) (hash-table-set! todat a (apply vector a b))) Index: http-transport.scm ================================================================== --- http-transport.scm +++ http-transport.scm @@ -422,11 +422,11 @@ (debug:print 0 "ERROR: error from sync code other than 'sync-failed. Attempting to gracefully shutdown the server") (tasks:server-delete-record (db:delay-if-busy tdbdat) server-id " http-transport:keep-running crashed") (exit))) (set! sync-time (- (current-milliseconds) start-time)) (set! rem-time (quotient (- 4000 sync-time) 1000)) - (debug:print 2 "SYNC: time= " sync-time ", rem-time=" rem-time) + (debug:print 4 "SYNC: time= " sync-time ", rem-time=" rem-time) (if (and (<= rem-time 4) (> rem-time 0)) (thread-sleep! rem-time) (thread-sleep! 4))) ;; fallback for if the math is changed ... Index: megatest-version.scm ================================================================== --- megatest-version.scm +++ megatest-version.scm @@ -1,7 +1,7 @@ ;; Always use two or four digit decimal ;; 1.01, 1.02...1.10,1.11,1,1101 ... 1.99,2.00.. (declare (unit megatest-version)) -(define megatest-version 1.6013) +(define megatest-version 1.6014) Index: megatest.scm ================================================================== --- megatest.scm +++ megatest.scm @@ -307,50 +307,57 @@ (lambda () (thread-sleep! 0.05) ;; delay for startup (let ((legacy-sync (configf:lookup *configdat* "setup" "megatest-db")) (debug-mode (debug:debug-mode 1)) (last-time (current-seconds))) - (let loop () - ;; sync for filesystem local db writes - ;; - (let ((start-time (current-seconds)) - (servers-started (make-hash-table))) - (for-each - (lambda (run-id) - (mutex-lock! *db-multi-sync-mutex*) - (if (and legacy-sync - (hash-table-ref/default *db-local-sync* run-id #f)) - ;; (if (> (- start-time last-write) 5) ;; every five seconds - (begin ;; let ((sync-time (- (current-seconds) start-time))) - (db:multi-db-sync (list run-id) 'new2old) - (if (common:low-noise-print 30 "sync new to old") - (let ((sync-time (- (current-seconds) start-time))) - (debug:print-info 0 "Sync of newdb to olddb for run-id " run-id " completed in " sync-time " seconds"))) - ;; (if (> sync-time 10) ;; took more than ten seconds, start a server for this run - ;; (begin - ;; (debug:print-info 0 "Sync is taking a long time, start up a server to assist for run " run-id) - ;; (server:kind-run run-id))))) - (hash-table-delete! *db-local-sync* run-id))) - (mutex-unlock! *db-multi-sync-mutex*)) - (hash-table-keys *db-local-sync*)) - (if (and debug-mode - (> (- start-time last-time) 60)) - (begin - (set! last-time start-time) - (debug:print-info 4 "timestamp -> " (seconds->time-string (current-seconds)) ", time since start -> " (seconds->hr-min-sec (- (current-seconds) *time-zero*)))))) - - ;; keep going unless time to exit - ;; - (if (not *time-to-exit*) - (let delay-loop ((count 0)) - (if (and (not *time-to-exit*) - (< count 11)) ;; aprox 5-6 seconds - (begin - (thread-sleep! 1) - (delay-loop (+ count 1)))) - (loop)))))) - "Watchdog thread")) + (if (or (args:get-arg "-runtests") + (args:get-arg "-server") + (args:get-arg "-set-run-status") + (args:get-arg "-remove-runs") + (args:get-arg "-get-run-status") + ) + (let loop () + ;; sync for filesystem local db writes + ;; + (let ((start-time (current-seconds)) + (servers-started (make-hash-table))) + (for-each + (lambda (run-id) + (mutex-lock! *db-multi-sync-mutex*) + (if (and legacy-sync + (hash-table-ref/default *db-local-sync* run-id #f)) + ;; (if (> (- start-time last-write) 5) ;; every five seconds + (begin ;; let ((sync-time (- (current-seconds) start-time))) + (db:multi-db-sync (list run-id) 'new2old) + (if (common:low-noise-print 30 "sync new to old") + (let ((sync-time (- (current-seconds) start-time))) + (debug:print-info 0 "Sync of newdb to olddb for run-id " run-id " completed in " sync-time " seconds"))) + ;; (if (> sync-time 10) ;; took more than ten seconds, start a server for this run + ;; (begin + ;; (debug:print-info 0 "Sync is taking a long time, start up a server to assist for run " run-id) + ;; (server:kind-run run-id))))) + (hash-table-delete! *db-local-sync* run-id))) + (mutex-unlock! *db-multi-sync-mutex*)) + (hash-table-keys *db-local-sync*)) + (if (and debug-mode + (> (- start-time last-time) 60)) + (begin + (set! last-time start-time) + (debug:print-info 4 "timestamp -> " (seconds->time-string (current-seconds)) ", time since start -> " (seconds->hr-min-sec (- (current-seconds) *time-zero*)))))) + + ;; keep going unless time to exit + ;; + (if (not *time-to-exit*) + (let delay-loop ((count 0)) + (if (and (not *time-to-exit*) + (< count 11)) ;; aprox 5-6 seconds + (begin + (thread-sleep! 1) + (delay-loop (+ count 1)))) + (loop)))) + (debug:print-info 0 "Exiting watchdog timer"))) + "Watchdog thread"))) (thread-start! *watchdog*) (if (args:get-arg "-log") (let ((oup (open-output-file (args:get-arg "-log")))) Index: runs.scm ================================================================== --- runs.scm +++ runs.scm @@ -225,15 +225,24 @@ (if (tasks:need-server run-id)(tasks:start-and-wait-for-server tdbdat run-id 10)) (set-signal-handler! signal/int (lambda (signum) ;; (signal-mask! signum) ;; to mask or not? seems to cause issues in exiting + (set! *time-to-exit* #t) (print "Received signal " signum ", cleaning up before exit. Please wait...") - (let ((tdbdat (tasks:open-db))) - (rmt:tasks-set-state-given-param-key task-key "killed")) - (print "Killed by signal " signum ". Exiting") - (exit))) + (let ((th1 (make-thread (lambda () + (let ((tdbdat (tasks:open-db))) + (rmt:tasks-set-state-given-param-key task-key "killed")) + (print "Killed by signal " signum ". Exiting") + (exit)))) + (th2 (make-thread (lambda () + (thread-sleep! 3) + (debug:print 0 "Done") + (exit 4))))) + (thread-start! th2) + (thread-start! th1) + (thread-join! th2)))) ;; register this run in monitor.db (rmt:tasks-add "run-tests" user target runname test-patts task-key) ;; params) (rmt:tasks-set-state-given-param-key task-key "running") (runs:set-megatest-env-vars run-id inkeys: keys inrunname: runname) ;; these may be needed by the launching process