Index: common.scm ================================================================== --- common.scm +++ common.scm @@ -227,12 +227,10 @@ (delete-file gzfile))) (debug:print-info 0 *default-log-port* "compressing " file) (system (conc "gzip logs/" file))))) '() "logs")) - - ;; Force a megatest cleanup-db if version is changed and skip-version-check not specified ;; (define (common:exit-on-version-changed) (if (common:version-changed?) @@ -514,34 +512,40 @@ ;;====================================================================== ;; E X I T H A N D L I N G ;;====================================================================== (define (common:run-sync?) - (and (common:on-homehost?) - (args:get-arg "-server"))) + (let ((ohh (common:on-homehost?)) + (srv (args:get-arg "-server"))) + (debug:print-info 0 *default-log-port* "common:run-sync? ohh=" ohh ", srv=" srv) + (and (common:on-homehost?) + (args:get-arg "-server")))) ;;;; run-ids ;; if #f use *db-local-sync* : or 'local-sync-flags ;; if #t use timestamps : or 'timestamps (define (common:sync-to-megatest.db dbstruct) - (let ((start-time (current-seconds))) - (db:multi-db-sync dbstruct 'new2old) + (let ((start-time (current-seconds)) + (res (db:multi-db-sync dbstruct 'new2old))) (let ((sync-time (- (current-seconds) start-time))) (debug:print-info 3 *default-log-port* "Sync of newdb to olddb completed in " sync-time " seconds") (if (common:low-noise-print 30 "sync new to old") - (debug:print-info 0 *default-log-port* "Sync of newdb to olddb completed in " sync-time " seconds"))))) + (debug:print-info 0 *default-log-port* "Sync of newdb to olddb completed in " sync-time " seconds"))) + res)) ;; currently the primary job of the watchdog is to run the sync back to megatest.db from the db in /tmp ;; if we are on the homehost and we are a server (by definition we are on the homehost if we are a server) ;; (define (common:watchdog) (thread-sleep! 0.05) ;; delay for startup (let ((legacy-sync (common:run-sync?)) (debug-mode (debug:debug-mode 1)) (last-time (current-seconds))) + (debug:print-info 0 *default-log-port* "watchdog starting. legacy-sync is " legacy-sync) (if legacy-sync (let ((dbstruct (db:setup))) + (debug:print-info 0 *default-log-port* "Server running, periodic sync started.") (let loop () ;; sync for filesystem local db writes ;; (mutex-lock! *db-multi-sync-mutex*) (let* ((need-sync (>= *db-last-write* *db-last-sync*)) ;; no sync since last write @@ -548,17 +552,26 @@ (sync-in-progress *db-sync-in-progress*) (should-sync (> (- (current-seconds) *db-last-sync*) 5)) ;; sync every five seconds minimum (will-sync (and (or need-sync should-sync) (not sync-in-progress))) (start-time (current-seconds))) + ;; (debug:print-info 0 *default-log-port* "need-sync: " need-sync " sync-in-progress: " sync-in-progress " should-sync: " should-sync " will-sync: " will-sync) (if will-sync (set! *db-sync-in-progress* #t)) (mutex-unlock! *db-multi-sync-mutex*) - (if will-sync (common:sync-to-megatest.db dbstruct)) + (if will-sync + (let ((res (common:sync-to-megatest.db dbstruct))) ;; did we sync any data? If so need to set the db touched flag to keep the server alive + (if (> res 0) ;; some records were transferred, keep the db alive + (begin + (mutex-lock! *heartbeat-mutex*) + (set! *db-last-access* (current-seconds)) + (mutex-unlock! *heartbeat-mutex*) + (debug:print-info 0 *default-log-port* "sync called, " res " records transferred.")) + (debug:print-info 2 *default-log-port* "sync called but zero records transferred")))) (if will-sync (begin (mutex-lock! *db-multi-sync-mutex*) - (set! db-sync-in-progress* #f) + (set! *db-sync-in-progress* #f) (set! *db-last-sync* start-time) (mutex-unlock! *db-multi-sync-mutex*))) (if (and debug-mode (> (- start-time last-time) 60)) (begin @@ -792,15 +805,22 @@ (debug:print-error 0 *default-log-port* "Invalid target, spaces or blanks not allowed \"" target "\", target should be: " (string-intersperse keys "/") ", have " tlist " for elements") #f) #f)))) ;; logic for getting homehost. Returns (host . at-home) +;; IF *toppath* is not set, wait up to five seconds trying every two seconds +;; (this is to accomodate the watchdog) ;; -(define (common:get-homehost) +(define (common:get-homehost #!key (trynum 5)) (cond (*home-host* *home-host*) - ((not *toppath*) #f) ;; don't know toppath yet? return #f + ((not *toppath*) + (if (> trynum 0) + (begin + (thread-sleep! 2) + (common:get-homehost trynum: (- trynum 1))) + #f)) (else (let* ((currhost (get-host-name)) (bestadrs (server:get-best-guess-address currhost)) ;; first look in config, then look in file .homehost, create it if not found (homehost (or (configf:lookup *configdat* "server" "homehost" ) @@ -847,10 +867,11 @@ (if (null? tala) ;; we are done talb (loop (car tala) (cdr tala) (car talb) + (cdr talb))) #f))))) ;; Needed for long lists to be sorted where (apply max ... ) dies ;; Index: db.scm ================================================================== --- db.scm +++ db.scm @@ -798,11 +798,12 @@ (let* ((mtdb (dbr:dbstruct-mtdb dbstruct)) (tmpdb (dbr:dbstruct-tmpdb dbstruct)) (refndb (dbr:dbstruct-refndb dbstruct)) (allow-cleanup #t) ;; (if run-ids #f #t)) (tdbdat (tasks:open-db)) - (servers (tasks:get-all-servers (db:delay-if-busy tdbdat)))) + (servers (tasks:get-all-servers (db:delay-if-busy tdbdat))) + (data-synced 0)) ;; count of changed records (I hope) ;; kill servers (if (member 'killservers options) (for-each (lambda (server) @@ -844,11 +845,14 @@ ;; now ensure all newdb data are synced to megatest.db ;; do not use the run-ids list passed in to the function ;; (if (member 'new2old options) - (db:sync-tables (db:sync-all-tables-list dbstruct) #f tmpdb refndb mtdb)) + (set! data-synced + (+ (db:sync-tables (db:sync-all-tables-list dbstruct) #f tmpdb refndb mtdb) + data-synced))) + (if (member 'fixschema options) (begin (db:patch-schema-maindb (db:dbdat-get-db mtdb)) (db:patch-schema-maindb (db:dbdat-get-db tmpdb)) @@ -919,11 +923,11 @@ ;; (delete-file fullname))))) ;; dead-runs)))) ;; ;; (db:close-all dbstruct) ;; (sqlite3:finalize! mdb) - ))) + data-synced))) ;; keeping it around for debugging purposes only (define (open-run-close-no-exception-handling proc idb . params) (debug:print-info 11 *default-log-port* "open-run-close-no-exception-handling START given a db=" (if idb "yes " "no ") ", params=" params) (print "I don't work anymore. open-run-close-no-exception-handling needs fixing or removing...") Index: server.scm ================================================================== --- server.scm +++ server.scm @@ -186,11 +186,11 @@ #f ;; failed for some reason, for the moment simply return #f (with-output-to-file server-file (lambda () (print hostport))) #t))) - (debug:print-info 0 *default-log-port* "server file " serverfile " for " hostport " created") + (debug:print-info 0 *default-log-port* "server file " server-file " for " hostport " created") (common:simple-file-release-lock lock-file) res) #f))) (define (server:remove-dotserver-file areapath hostport)