Index: client.scm ================================================================== --- client.scm +++ client.scm @@ -212,36 +212,38 @@ ;; keep this as a function to ease future (define (client:start run-id server-info) (http-transport:client-connect (tasks:hostinfo-get-interface server-info) (tasks:hostinfo-get-port server-info))) -;; client:signal-handler -(define (client:signal-handler signum) - (signal-mask! signum) - (handle-exceptions - exn - (debug:print " ... exiting ...") - (let ((th1 (make-thread (lambda () - "") ;; do nothing for now (was flush out last call if applicable) - "eat response")) - (th2 (make-thread (lambda () - (debug:print 0 "ERROR: Received ^C, attempting clean exit. Please be patient and wait a few seconds before hitting ^C again.") - (thread-sleep! 1) ;; give the flush one second to do it's stuff - (debug:print 0 " Done.") - (exit 4)) - "exit on ^C timer"))) - (thread-start! th2) - (thread-start! th1) - (thread-join! th2)))) - -;; client:launch -;; Need to set the signal handler somewhere other than here as this -;; routine will go away. -;; -(define (client:launch run-id *area-dat*) - (set-signal-handler! signal/int client:signal-handler) - (if (client:setup run-id *area-dat*) - (debug:print-info 2 "connected as client") - (begin - (debug:print 0 "ERROR: Failed to connect as client") - (exit)))) - +;; ;; client:signal-handler +;; (define (client:signal-handler signum) +;; (signal-mask! signum) +;; (set! *time-to-exit* #t) +;; (handle-exceptions +;; exn +;; (debug:print " ... exiting ...") +;; (let ((th1 (make-thread (lambda () +;; "") ;; do nothing for now (was flush out last call if applicable) +;; "eat response")) +;; (th2 (make-thread (lambda () +;; (debug:print 0 "ERROR: Received ^C, attempting clean exit. Please be patient and wait a few seconds before hitting ^C again.") +;; (thread-sleep! 1) ;; give the flush one second to do it's stuff +;; (debug:print 0 " Done.") +;; (exit 4)) +;; "exit on ^C timer"))) +;; (thread-start! th2) +;; (thread-start! th1) +;; (thread-join! th2)))) +;; +;; ;; client:launch +;; ;; Need to set the signal handler somewhere other than here as this +;; ;; routine will go away. +;; ;; +;; (define (client:launch run-id) +;; (set-signal-handler! signal/int client:signal-handler) +;; (set-signal-handler! signal/term client:signal-handler) +;; (if (client:setup run-id) +;; (debug:print-info 2 "connected as client") +;; (begin +;; (debug:print 0 "ERROR: Failed to connect as client") +;; (exit)))) +;; Index: common.scm ================================================================== --- common.scm +++ common.scm @@ -22,10 +22,17 @@ (include "common_records.scm") ;; (require-library margs) ;; (include "margs.scm") +;; (define old-exit exit) +;; +;; (define (exit . code) +;; (if (null? code) +;; (old-exit) +;; (old-exit code))) + (define getenv get-environment-variable) (define (safe-setenv key val) (if (and (string? val)(string? key)) (handle-exceptions exn @@ -275,40 +282,61 @@ ;;====================================================================== ;; E X I T H A N D L I N G ;;====================================================================== (define (std-exit-procedure area-dat) - (debug:print-info 2 "starting exit process, finalizing databases.") - (rmt:print-db-stats area-dat) - (let* ((configdat (megatest:area-configdat area-dat)) - (run-ids (hash-table-keys *db-local-sync*))) - (if (and (not (null? run-ids)) - (configf:lookup configdat "setup" "megatest-db")) - (db:multi-db-sync run-ids 'new2old))) - (if *dbstruct-db* (db:close-all *dbstruct-db* area-dat)) - (if *inmemdb* (db:close-all *inmemdb* area-dat)) - (if (and *megatest-db* - (sqlite3:database? *megatest-db*)) - (begin - (sqlite3:interrupt! *megatest-db*) - (sqlite3:finalize! *megatest-db* #t) - (set! *megatest-db* #f))) - (if *task-db* (let ((db (cdr *task-db*))) - (if (sqlite3:database? db) - (begin - (sqlite3:interrupt! db) - (sqlite3:finalize! db #t) - (vector-set! *task-db* 0 #f)))))) + (let* ((no-hurry (if *time-to-exit* ;; hurry up + #f + (begin + (set! *time-to-exit* #t) + #t))) + (configdat (megatest:area-configdat area-dat)) + (run-ids (hash-table-keys *db-local-sync*))) + (debug:print-info 4 "starting exit process, finalizing databases.") + (if (and no-hurry (debug:debug-mode 18)) + (rmt:print-db-stats area-dat)) + (let ((th1 (make-thread (lambda () ;; thread for cleaning up, give it five seconds + (if (and (not (null? run-ids)) + (configf:lookup configdat "setup" "megatest-db")) + (if no-hurry (db:multi-db-sync run-ids 'new2old))) + (if *dbstruct-db* (db:close-all *dbstruct-db* area-dat)) + (if *inmemdb* (db:close-all *inmemdb* area-dat)) + (if (and *megatest-db* + (sqlite3:database? *megatest-db*)) + (begin + (sqlite3:interrupt! *megatest-db*) + (sqlite3:finalize! *megatest-db* #t) + (set! *megatest-db* #f))) + (if *task-db* + (let ((db (cdr *task-db*))) + (if (sqlite3:database? db) + (begin + (sqlite3:interrupt! db) + (sqlite3:finalize! db #t) + (vector-set! *task-db* 0 #f)))))) "Cleanup db exit thread")) + (th2 (make-thread (lambda () + (debug:print 4 "Attempting clean exit. Please be patient and wait a few seconds...") + (if no-hurry + (thread-sleep! 5) ;; give the clean up few seconds to do it's stuff + (thread-sleep! 1)) + (debug:print 0 " Done.") + ) + "clean exit"))) + (thread-start! th2) + (thread-start! th1) + (thread-join! th2)))) (define (std-signal-handler signum) - (signal-mask! signum) + ;; (signal-mask! signum) + (set! *time-to-exit* #t) (debug:print 0 "ERROR: Received signal " signum " exiting promptly") ;; (std-exit-procedure) ;; shouldn't need this since we are exiting and it will be called anyway (exit)) -(set-signal-handler! signal/int std-signal-handler) +(set-signal-handler! signal/int std-signal-handler) ;; ^C (set-signal-handler! signal/term std-signal-handler) +(set-signal-handler! signal/stop std-signal-handler) ;; ^Z ;;====================================================================== ;; Misc utils ;;====================================================================== Index: db.scm ================================================================== --- db.scm +++ db.scm @@ -516,11 +516,12 @@ (for-each (lambda (dbdat) (debug:print 0 " dbpath: " (db:dbdat-get-path dbdat))) (cons todb slave-dbs)) (if *server-run* ;; we are inside a server, throw a sync-failed error (signal (make-composite-condition - (make-property-condition 'sync-failed 'message "db:sync-tables failed in a server context."))))) + (make-property-condition 'sync-failed 'message "db:sync-tables failed in a server context."))) + 0)) ;; return zero for num synced ;; (set! *time-to-exit* #t) ;; let watch dog know that it is time to die. ;; (tasks:server-set-state! (db:delay-if-busy tdbdat) server-id "shutting-down") ;; (portlogger:open-run-close portlogger:set-port port "released") ;; (exit 1))) @@ -576,11 +577,11 @@ ;; tack on remaining records in fromdat (if (not (null? fromdat)) (set! fromdats (cons fromdat fromdats))) - (debug:print-info 2 "found " totrecords " records to sync") + (debug:print-info 4 "found " totrecords " records to sync") ;; read the target table (sqlite3:for-each-row (lambda (a . b) (hash-table-set! todat a (apply vector a b))) Index: http-transport.scm ================================================================== --- http-transport.scm +++ http-transport.scm @@ -431,11 +431,11 @@ (debug:print 0 "ERROR: error from sync code other than 'sync-failed. Attempting to gracefully shutdown the server") (tasks:server-delete-record (db:delay-if-busy tdbdat area-dat) server-id " http-transport:keep-running crashed") (exit))) (set! sync-time (- (current-milliseconds) start-time)) (set! rem-time (quotient (- 4000 sync-time) 1000)) - (debug:print 2 "SYNC: time= " sync-time ", rem-time=" rem-time) + (debug:print 4 "SYNC: time= " sync-time ", rem-time=" rem-time) (if (and (<= rem-time 4) (> rem-time 0)) (thread-sleep! rem-time) (thread-sleep! 4))) ;; fallback for if the math is changed ... Index: megatest-version.scm ================================================================== --- megatest-version.scm +++ megatest-version.scm @@ -1,7 +1,7 @@ ;; Always use two or four digit decimal ;; 1.01, 1.02...1.10,1.11,1,1101 ... 1.99,2.00.. (declare (unit megatest-version)) -(define megatest-version 1.6013) +(define megatest-version 1.6014) Index: megatest.scm ================================================================== --- megatest.scm +++ megatest.scm @@ -324,50 +324,58 @@ (thread-sleep! 0.05) ;; delay for startup ;; the query to get megatest-db setting might not work, forcing it to be default on. Use "no" to turn off (let ((legacy-sync (configf:lookup (megatest:area-configdat *area-dat*) "setup" "megatest-db")) (debug-mode (debug:debug-mode 1)) (last-time (current-seconds))) - (let loop () - ;; sync for filesystem local db writes - ;; - (let ((start-time (current-seconds)) - (servers-started (make-hash-table))) - (for-each - (lambda (run-id) - (mutex-lock! *db-multi-sync-mutex*) + (if (or (args:get-arg "-runtests") + (args:get-arg "-server") + (args:get-arg "-set-run-status") + (args:get-arg "-remove-runs") + (args:get-arg "-get-run-status") + ) + (let loop () + ;; sync for filesystem local db writes + ;; + (let ((start-time (current-seconds)) + (servers-started (make-hash-table))) + (for-each + (lambda (run-id) + (mutex-lock! *db-multi-sync-mutex*) (if (and (not (equal? legacy-sync "no")) - (hash-table-ref/default *db-local-sync* run-id #f)) - ;; (if (> (- start-time last-write) 5) ;; every five seconds - (begin ;; let ((sync-time (- (current-seconds) start-time))) + (hash-table-ref/default *db-local-sync* run-id #f)) + ;; (if (> (- start-time last-write) 5) ;; every five seconds + (begin ;; let ((sync-time (- (current-seconds) start-time))) (db:multi-db-sync (list run-id) *area-dat* 'new2old) - (if (common:low-noise-print 30 "sync new to old") - (let ((sync-time (- (current-seconds) start-time))) - (debug:print-info 0 "Sync of newdb to olddb for run-id " run-id " completed in " sync-time " seconds"))) - ;; (if (> sync-time 10) ;; took more than ten seconds, start a server for this run - ;; (begin - ;; (debug:print-info 0 "Sync is taking a long time, start up a server to assist for run " run-id) - ;; (server:kind-run run-id))))) - (hash-table-delete! *db-local-sync* run-id))) - (mutex-unlock! *db-multi-sync-mutex*)) - (hash-table-keys *db-local-sync*)) - (if (and debug-mode - (> (- start-time last-time) 60)) - (begin - (set! last-time start-time) - (debug:print-info 4 "timestamp -> " (seconds->time-string (current-seconds)) ", time since start -> " (seconds->hr-min-sec (- (current-seconds) *time-zero*)))))) - - ;; keep going unless time to exit - ;; - (if (not *time-to-exit*) - (let delay-loop ((count 0)) - (if (and (not *time-to-exit*) - (< count 11)) ;; aprox 5-6 seconds - (begin - (thread-sleep! 1) - (delay-loop (+ count 1)))) - (loop)))))) - "Watchdog thread")) + (if (common:low-noise-print 30 "sync new to old") + (let ((sync-time (- (current-seconds) start-time))) + (debug:print-info 0 "Sync of newdb to olddb for run-id " run-id " completed in " sync-time " seconds"))) + ;; (if (> sync-time 10) ;; took more than ten seconds, start a server for this run + ;; (begin + ;; (debug:print-info 0 "Sync is taking a long time, start up a server to assist for run " run-id) + ;; (server:kind-run run-id))))) + (hash-table-delete! *db-local-sync* run-id))) + (mutex-unlock! *db-multi-sync-mutex*)) + (hash-table-keys *db-local-sync*)) + (if (and debug-mode + (> (- start-time last-time) 60)) + (begin + (set! last-time start-time) + (debug:print-info 4 "timestamp -> " (seconds->time-string (current-seconds)) ", time since start -> " (seconds->hr-min-sec (- (current-seconds) *time-zero*)))))) + + ;; keep going unless time to exit + ;; + (if (not *time-to-exit*) + (let delay-loop ((count 0)) + (if (and (not *time-to-exit*) + (< count 11)) ;; aprox 5-6 seconds + (begin + (thread-sleep! 1) + (delay-loop (+ count 1)))) + (loop))) + (if (common:low-noise-print 30) + (debug:print-info 0 "Exiting watchdog timer, *time-to-exit* = " *time-to-exit*))))) + "Watchdog thread"))) (thread-start! *watchdog*) (if (args:get-arg "-log") (let ((oup (open-output-file (args:get-arg "-log")))) Index: runs.scm ================================================================== --- runs.scm +++ runs.scm @@ -226,18 +226,32 @@ (task-key (conc (hash-table->alist flags) " " (get-host-name) " " (current-process-id))) (tdbdat (tasks:open-db area-dat))) (if (tasks:need-server run-id area-dat)(tasks:start-and-wait-for-server tdbdat run-id 10)) - (set-signal-handler! signal/int - (lambda (signum) - ;; (signal-mask! signum) ;; to mask or not? seems to cause issues in exiting - (print "Received signal " signum ", cleaning up before exit. Please wait...") + (let ((sighand (lambda (signum) + ;; (signal-mask! signum) ;; to mask or not? seems to cause issues in exiting + (if (eq? signum signal/stop) + (debug:print 0 "ERROR: attempt to STOP process. Exiting.")) + (set! *time-to-exit* #t) + (print "Received signal " signum ", cleaning up before exit. Please wait...") (let ((tdbdat (tasks:open-db area-dat))) - (rmt:tasks-set-state-given-param-key task-key "killed")) - (print "Killed by signal " signum ". Exiting") - (exit))) + (let ((th1 (make-thread (lambda () + (rmt:tasks-set-state-given-param-key task-key "killed")) + (print "Killed by signal " signum ". Exiting") + (thread-sleep! 3) + (exit)))) + (th2 (make-thread (lambda () + (thread-sleep! 5) + (debug:print 0 "Done") + (exit 4))))) + (thread-start! th2) + (thread-start! th1) + (thread-join! th2))))) + (set-signal-handler! signal/int sighand) + (set-signal-handler! signal/term sighand) + (set-signal-handler! signal/stop sighand)) ;; register this run in monitor.db (rmt:tasks-add "run-tests" user target runname test-patts task-key area-dat) ;; params) (rmt:tasks-set-state-given-param-key task-key "running" area-dat) (runs:set-megatest-env-vars run-id area-dat inkeys: keys inrunname: runname) ;; these may be needed by the launching process