Index: common.scm ================================================================== --- common.scm +++ common.scm @@ -199,14 +199,14 @@ (define (std-exit-procedure) ;;(common:telemetry-log-close) (on-exit (lambda () 0)) ;;(debug:print-info 13 *default-log-port* "std-exit-procedure called; *time-to-exit*="*time-to-exit*) - (let ((no-hurry (if *time-to-exit* ;; hurry up + (let ((no-hurry (if (bdat-time-to-exit *bdat*) ;; hurry up #f (begin - (set! *time-to-exit* #t) + (bdat-time-to-exit-set! *bdat* #t) #t)))) (debug:print-info 4 *default-log-port* "starting exit process, finalizing databases.") (if (and no-hurry (debug:debug-mode 18)) (rmt:print-db-stats)) (let ((th1 (make-thread (lambda () ;; thread for cleaning up, give it five seconds @@ -340,23 +340,23 @@ (debug:print-info 0 *default-log-port* "Read-only periodic sync thread started.") (let loop ((last-sync-time 0)) (debug:print-info 13 *default-log-port* "loop top tmp-mtpath="tmp-mtpath" golden-mtpath="golden-mtpath) (let* ((duration-since-last-sync (- (current-seconds) last-sync-time))) (debug:print-info 13 *default-log-port* "duration-since-last-sync="duration-since-last-sync) - (if (and (not *time-to-exit*) + (if (and (not (bdat-time-to-exit *bdat*)) (< duration-since-last-sync sync-cool-off-duration)) (thread-sleep! (- sync-cool-off-duration duration-since-last-sync))) - (if (not *time-to-exit*) + (if (not (bdat-time-to-exit *bdat*)) (let ((golden-mtdb-mtime (file-modification-time golden-mtpath)) (tmp-mtdb-mtime (file-modification-time tmp-mtpath))) (if (> golden-mtdb-mtime tmp-mtdb-mtime) (if (< golden-mtdb-mtime (- (current-seconds) 3)) ;; file has NOT been touched in past three seconds, this way multiple servers won't fight to sync back (let ((res (db:multi-db-sync dbstruct 'old2new))) (debug:print-info 13 *default-log-port* "rosync called, " res " records transferred.")))) (loop (current-seconds))) #t))) - (debug:print-info 0 *default-log-port* "Exiting readonly-watchdog timer, *time-to-exit* = " *time-to-exit*" pid="(current-process-id)" mtpath="golden-mtpath))) + (debug:print-info 0 *default-log-port* "Exiting readonly-watchdog timer, (bdat-time-to-exit *bdat*) = " (bdat-time-to-exit *bdat*)" pid="(current-process-id)" mtpath="golden-mtpath))) ;;====================================================================== ;; from metadat lookup MEGATEST_VERSION ;; (define (common:get-last-run-version) ;; RADT => How does this work in send-receive function??; assume it is the value saved in some DB Index: commonmod.scm ================================================================== --- commonmod.scm +++ commonmod.scm @@ -106,11 +106,11 @@ (defstruct bdat (home (getenv "HOME")) (user (getenv "USER")) (watchdog #f) - + (time-to-exit #f) (server-loop-heart-beat (current-seconds)) ) (define (make-and-init-bigdata) (set! *bdat* @@ -177,11 +177,11 @@ (define *runremote* #f) ;; if set up for server communication this will hold ;; (define *max-cache-size* 0) (define *logged-in-clients* (make-hash-table)) (define *server-id* #f) (define *server-info* #f) ;; good candidate for easily convert to non-global -(define *time-to-exit* #f) +;; (define *time-to-exit* #f) (define *server-run* #t) (define *run-id* #f) (define *server-kind-run* (make-hash-table)) (define *home-host* #f) ;; (define *total-non-write-delay* 0) @@ -1027,19 +1027,19 @@ ;; common:watchdog ;; std-exit-procedure was here (define (std-signal-handler signum) ;; (signal-mask! signum) - (set! *time-to-exit* #t) + (bdat-time-to-exit-set! *bdat* #t) ;;(debug:print-info 13 *default-log-port* "got signal "signum) (debug:print-error 0 *default-log-port* "Received signal " signum " aaa exiting promptly") ;; (std-exit-procedure) ;; shouldn't need this since we are exiting and it will be called anyway (exit)) (define (special-signal-handler signum) ;; (signal-mask! signum) - (set! *time-to-exit* #t) + (bdat-time-to-exit-set! *bdat* #t) ;;(debug:print-info 13 *default-log-port* "got signal "signum) (debug:print-error 0 *default-log-port* "Received signal " signum " sending email befor exiting!!") ;;TODO send email to notify admin contact listed in the config that the lisner got killed ;; (std-exit-procedure) ;; shouldn't need this since we are exiting and it will be called anyway (exit)) Index: http-transportmod.scm ================================================================== --- http-transportmod.scm +++ http-transportmod.scm @@ -608,11 +608,11 @@ ;;(BB> "http-transport:server-shutdown called") (debug:print-info 0 *default-log-port* "Starting to shutdown the server. pid="(current-process-id)) ;; ;; start_shutdown ;; - (set! *time-to-exit* #t) ;; tell on-exit to be fast as we've already cleaned up + (set! (bdat-time-to-exit *bdat*) #t) ;; tell on-exit to be fast as we've already cleaned up (portlogger:open-run-close portlogger:set-port port "released") (thread-sleep! 1) ;; (debug:print-info 0 *default-log-port* "Max cached queries was " *max-cache-size*) ;; (debug:print-info 0 *default-log-port* "Number of cached writes " *number-of-writes*) Index: launchmod.scm ================================================================== --- launchmod.scm +++ launchmod.scm @@ -460,11 +460,11 @@ (set! tconfigreg (tests:get-all)) ;; mapping of testname => test source path (let ((sighand (lambda (signum) ;; (signal-mask! signum) ;; to mask or not? seems to cause issues in exiting (if (eq? signum signal/stop) (debug:print-error 0 *default-log-port* "attempt to STOP process. Exiting.")) - (set! *time-to-exit* #t) + (set! (bdat-time-to-exit *bdat*) #t) (print "Received signal " signum ", cleaning up before exit (set this test to COMPLETED/ABORT) . Please wait...") (let ((th1 (make-thread (lambda () (print "set test to COMPLETED/ABORT begin.") (rmt:test-set-state-status run-id test-id "COMPLETED" "ABORT" "received kill signal") (print "set test to COMPLETED/ABORT complete.") Index: megatest.scm ================================================================== --- megatest.scm +++ megatest.scm @@ -1129,11 +1129,11 @@ (configf:config->ini data)) (else (debug:print-error 0 *default-log-port* "-dumpmode of " (args:get-arg "-dumpmode") " not recognised"))) (set! *didsomething* #t) (pop-directory) - (set! *time-to-exit* #t))) + (bdat-time-to-exit-set! *bdat* #t))) (if (args:get-arg "-show-cmdinfo") (if (or (args:get-arg ":value")(getenv "MT_CMDINFO")) (let ((data (common:read-encoded-string (or (args:get-arg ":value")(getenv "MT_CMDINFO"))))) (if (equal? (args:get-arg "-dumpmode") "json") @@ -1406,11 +1406,11 @@ (print (string-join (map ->string table-row) ","))) table-rows)))) (set! *didsomething* #t) - (set! *time-to-exit* #t)) + (bdat-time-to-exit-set! *bdat* #t)) ;; NOTE: list-runs and list-db-targets operate on local db!!! ;; @@ -1749,15 +1749,15 @@ (conc (current-directory) "/" outputfile))))) (create-directory tempdir #t) (ods:list->ods tempdir ouf sheets)))) ;; (system (conc "rm -rf " tempdir)) (set! *didsomething* #t) - (set! *time-to-exit* #t) - ) ;; end if true branch (end of a let) - ) ;; end if + (bdat-time-to-exit-set! *bdat* #t) + ) ;; end if true branch (end of a let) + ) ;; end if ) ;; end if -list-runs - + ;; list-waivers (if (and (args:get-arg "-list-waivers") (launch:setup)) (let* ((runpatt (or (args:get-arg "-runname") "%")) (testpatt (common:args-get-testpatt #f)) @@ -2582,11 +2582,11 @@ ;; Exit and clean up ;;====================================================================== (if (not *didsomething*) (debug:print 0 *default-log-port* help) - (set! *time-to-exit* #t) + (bdat-time-to-exit-set! *bdat* #t) ) ;;(debug:print-info 13 *default-log-port* "thread-join! watchdog") ;; join the watchdog thread if it has been thread-start!ed (it may not have been started in the case of a server that never enters running state) ;; (symbols returned by thread-state: created ready running blocked suspended sleeping terminated dead) @@ -2595,11 +2595,11 @@ (if (thread? watchdog) (case (thread-state watchdog) ((ready running blocked sleeping terminated dead) (thread-join! watchdog))))) - (set! *time-to-exit* #t) + (bdat-time-to-exit-set! *bdat* #t) (if (not (eq? *globalexitstatus* 0)) (if (or (args:get-arg "-run")(args:get-arg "-runtests")(args:get-arg "-runall")) (begin (debug:print 0 *default-log-port* "NOTE: Subprocesses with non-zero exit code detected: " *globalexitstatus*) Index: runsmod.scm ================================================================== --- runsmod.scm +++ runsmod.scm @@ -383,11 +383,11 @@ ;; (if (tasks:need-server run-id)(tasks:start-and-wait-for-server tdbdat run-id 10)) (let ((sighand (lambda (signum) ;; (signal-mask! signum) ;; to mask or not? seems to cause issues in exiting - (set! *time-to-exit* #t) + (bdat-time-to-exit-set! *bdat* #t) (print "Received signal " signum ", cleaning up before exit. Please wait...") (let ((th1 (make-thread (lambda () ;; (let ((tdbdat (tasks:open-db))) (rmt:tasks-set-state-given-param-key task-key "killed") ;; ) (print "Killed by signal " signum ". Exiting") Index: server.scm ================================================================== --- server.scm +++ server.scm @@ -264,17 +264,17 @@ (when (and (not (args:get-arg "-sync-to-megatest.db")) ;; conditions under which we do not run the sync (args:get-arg "-server")) (let loop () (do-a-sync) - (if (not *time-to-exit*) (loop))) ;; keep going unless time to exit + (if (not (bdat-time-to-exit *bdat*)) (loop))) ;; keep going unless time to exit ;; time to exit, close the no-sync db here (final-sync) (if (common:low-noise-print 30) - (debug:print-info 0 *default-log-port* "Exiting watchdog timer, *time-to-exit* = " *time-to-exit*" pid="(current-process-id) + (debug:print-info 0 *default-log-port* "Exiting watchdog timer, *time-to-exit* = "(bdat-time-to-exit *bdat*)" pid="(current-process-id) ))))) (define (server:writable-watchdog-deltasync dbstruct) (thread-sleep! 0.054) ;; delay for startup (let ((legacy-sync (common:run-sync?)) @@ -286,11 +286,11 @@ (sync-duration 0) ;; run time of the sync in milliseconds ) (set! *no-sync-db* no-sync-db) ;; make the no sync db available to api calls (debug:print-info 2 *default-log-port* "Periodic sync thread started.") (debug:print-info 3 *default-log-port* "watchdog starting. legacy-sync is " legacy-sync" pid="(current-process-id) );; " this-wd-num="this-wd-num) - (if (and legacy-sync (not *time-to-exit*)) + (if (and legacy-sync (not (bdat-time-to-exit *bdat*))) (let* (;;(dbstruct (db:setup)) (mtdb (dbr:dbstruct-mtdb dbstruct)) (mtpath (db:dbdat-get-path mtdb)) (tmp-area (common:get-db-tmp-area)) (start-file (conc tmp-area "/.start-sync")) @@ -301,11 +301,11 @@ ;; (mutex-lock! *db-multi-sync-mutex*) (let* ((need-sync (>= *db-last-access* *db-last-sync*)) ;; no sync since last write (sync-in-progress *db-sync-in-progress*) (min-intersync-delay (configf:lookup-number *configdat* "server" "minimum-intersync-delay" default: 5)) - (should-sync (and (not *time-to-exit*) + (should-sync (and (not (bdat-time-to-exit *bdat*)) (> (- (current-seconds) *db-last-sync*) min-intersync-delay))) ;; sync every five seconds minimum, deprecated logic, can probably be removed (start-time (current-seconds)) (cpu-load-adj (alist-ref 'adj-proc-load (common:get-normalized-cpu-load #f))) (mt-mod-time (file-modification-time mtpath)) (last-sync-start (if (common:file-exists? start-file) @@ -317,11 +317,11 @@ (sync-period (+ 3 (* cpu-load-adj 30))) ;; as adjusted load increases increase the sync period (recently-synced (and (< (- start-time mt-mod-time) sync-period) ;; not useful if sync didn't modify megatest.db! (< mt-mod-time last-sync-start))) (sync-done (<= last-sync-start last-sync-end)) (sync-stale (> start-time (+ last-sync-start sync-stale-seconds))) - (will-sync (and (not *time-to-exit*) ;; do not start a sync if we are in the process of exiting + (will-sync (and (not (bdat-time-to-exit *bdat*)) ;; do not start a sync if we are in the process of exiting (or need-sync should-sync) (or sync-done sync-stale) (not sync-in-progress) (not recently-synced)))) (debug:print-info 13 *default-log-port* "WD writable-watchdog top of loop. need-sync="need-sync" sync-in-progress=" sync-in-progress @@ -390,20 +390,20 @@ (set! last-time start-time) (debug:print-info 4 *default-log-port* "timestamp -> " (seconds->time-string (current-seconds)) ", time since start -> " (seconds->hr-min-sec (- (current-seconds) *time-zero*)))))) ;; keep going unless time to exit ;; - (if (not *time-to-exit*) + (if (not (bdat-time-to-exit *bdat*)) (let delay-loop ((count 0)) - ;;(debug:print-info 13 *default-log-port* "delay-loop top; count="count" pid="(current-process-id)" this-wd-num="this-wd-num" *time-to-exit*="*time-to-exit*) + ;;(debug:print-info 13 *default-log-port* "delay-loop top; count="count" pid="(current-process-id)" this-wd-num="this-wd-num" *time-to-exit*="(bdat-time-to-exit *bdat*)) - (if (and (not *time-to-exit*) + (if (and (not (bdat-time-to-exit *bdat*)) (< count 6)) ;; was 11, changing to 4. (begin (thread-sleep! 1) (delay-loop (+ count 1)))) - (if (not *time-to-exit*) (loop)))) + (if (not (bdat-time-to-exit *bdat*)) (loop)))) ;; time to exit, close the no-sync db here (db:no-sync-close-db no-sync-db stmt-cache) (if (common:low-noise-print 30) - (debug:print-info 0 *default-log-port* "Exiting watchdog timer, *time-to-exit* = " *time-to-exit*" pid="(current-process-id) ))))))) ;;" this-wd-num="this-wd-num))))))) + (debug:print-info 0 *default-log-port* "Exiting watchdog timer, *time-to-exit* = " (bdat-time-to-exit *bdat*)" pid="(current-process-id) ))))))) ;;" this-wd-num="this-wd-num)))))))