Index: common.scm ================================================================== --- common.scm +++ common.scm @@ -572,33 +572,36 @@ (define *wdnum*mutex (make-mutex)) ;; currently the primary job of the watchdog is to run the sync back to megatest.db from the db in /tmp ;; if we are on the homehost and we are a server (by definition we are on the homehost if we are a server) ;; (define (common:watchdog) - (thread-sleep! 0.05) ;; delay for startup (let ((legacy-sync (common:run-sync?)) (debug-mode (debug:debug-mode 1)) (last-time (current-seconds)) - (this-wd-num (begin (mutex-lock! *wdnum*mutex) (let ((x *wdnum*)) (set! *wdnum* (add1 *wdnum*)) (mutex-unlock! *wdnum*mutex) x))) - ) + (this-wd-num (begin (mutex-lock! *wdnum*mutex) (let ((x *wdnum*)) (set! *wdnum* (add1 *wdnum*)) (mutex-unlock! *wdnum*mutex) x)))) (debug:print-info 0 *default-log-port* "watchdog starting. legacy-sync is " legacy-sync" pid="(current-process-id)" this-wd-num="this-wd-num) (if (and legacy-sync (not *time-to-exit*)) - (let ((dbstruct (db:setup))) + (let* ((dbstruct (db:setup)) + (mtdb (dbr:dbstruct-mtdb dbstruct)) + (mtpath (db:dbdat-get-path mtdb))) (debug:print-info 0 *default-log-port* "Server running, periodic sync started.") (let loop () - ;;(BB> "watchdog loop. pid="(current-process-id)" this-wd-num="this-wd-num" *time-to-exit*="*time-to-exit*) ;; sync for filesystem local db writes ;; (mutex-lock! *db-multi-sync-mutex*) (let* ((need-sync (>= *db-last-access* *db-last-sync*)) ;; no sync since last write (sync-in-progress *db-sync-in-progress*) (should-sync (and (not *time-to-exit*) (> (- (current-seconds) *db-last-sync*) 5))) ;; sync every five seconds minimum + (start-time (current-seconds)) + (mt-mod-time (file-modification-time mtpath)) + (recently-synced (> (- start-time mt-mod-time) 4)) (will-sync (and (or need-sync should-sync) - (not sync-in-progress))) - (start-time (current-seconds))) + (not sync-in-progress) + (not recently-synced)))) + ;; (if recently-synced (debug:print-info 0 *default-log-port* "Skipping sync due to recently-synced flag=" recently-synced)) ;; (debug:print-info 0 *default-log-port* "need-sync: " need-sync " sync-in-progress: " sync-in-progress " should-sync: " should-sync " will-sync: " will-sync) (if will-sync (set! *db-sync-in-progress* #t)) (mutex-unlock! *db-multi-sync-mutex*) (if will-sync (let ((res (common:sync-to-megatest.db dbstruct))) ;; did we sync any data? If so need to set the db touched flag to keep the server alive Index: db.scm ================================================================== --- db.scm +++ db.scm @@ -13,14 +13,14 @@ ;; Database access ;;====================================================================== ;; dbstruct vector containing all the relevant dbs like main.db, megatest.db, run.db etc -(use (srfi 18) extras tcp stack) ;; RADT => use of require-extension? +(use (srfi 18) extras tcp stack) (use sqlite3 srfi-1 posix regex regex-case srfi-69 csv-xml s11n md5 message-digest base64 format dot-locking z3 typed-records matchable) (import (prefix sqlite3 sqlite3:)) -(import (prefix base64 base64:)) ;; RADT => prefix?? +(import (prefix base64 base64:)) (declare (unit db)) (declare (uses common)) (declare (uses keys)) (declare (uses ods)) @@ -2040,13 +2040,16 @@ db "SELECT id FROM runs WHERE state != 'deleted' ORDER BY event_time DESC;") (reverse run-ids))))) ;; get some basic run stats +;; +;; data structure: ;; ;; ( (runname (( state count ) ... )) -;; ( ... +;; ( ... +;; (define (db:get-run-stats dbstruct) (let* ((totals (make-hash-table)) (curr (make-hash-table)) (res '()) (runs-info '())) Index: http-transport.scm ================================================================== --- http-transport.scm +++ http-transport.scm @@ -379,11 +379,11 @@ (if (not server-going) ;; *dbstruct-db* (begin (debug:print 0 *default-log-port* "SERVER: dbprep") (set! *dbstruct-db* (db:setup)) ;; run-id)) (set! server-going #t) - (debug:print 0 *default-log-port* "SERVER: running") ;; NOTE: the server is NOT yet marked as running in the log. We do that in the keep-running routine. + (debug:print 0 *default-log-port* "SERVER: running, megatest version: " (common:get-full-version)) ;; NOTE: the server is NOT yet marked as running in the log. We do that in the keep-running routine. (thread-start! *watchdog*))) ;; when things go wrong we don't want to be doing the various queries too often ;; so we strive to run this stuff only every four seconds or so. (let* ((sync-time (- (current-milliseconds) start-time)) @@ -425,19 +425,20 @@ server-timeout))) (if (common:low-noise-print 120 "server timeout") (debug:print-info 0 *default-log-port* "Adjusted server timeout: " adjusted-timeout)) (cond ((and *server-run* - (> (+ last-access server-timeout) - (current-seconds))) + (> (+ last-access server-timeout) + (current-seconds)) + (< (- (current-seconds) server-start-time) 3600)) ;; do not update log or touch log if we've been running for more than one hour. (if (common:low-noise-print 120 "server continuing") (debug:print-info 0 *default-log-port* "Server continuing, seconds since last db access: " (- (current-seconds) last-access)) (let ((curr-time (current-seconds))) (change-file-times server-log-file curr-time curr-time))) (loop 0 server-state bad-sync-count (current-milliseconds))) (else - (debug:print-info 0 *default-log-port* "Server timeed out. seconds since last db access: " (- (current-seconds) last-access)) + (debug:print-info 0 *default-log-port* "Server timed out. seconds since last db access: " (- (current-seconds) last-access)) (http-transport:server-shutdown port))))))) (define (http-transport:server-shutdown port) (let ((tdbdat (tasks:open-db))) ;;(BB> "http-transport:server-shutdown called")