Index: common.scm ================================================================== --- common.scm +++ common.scm @@ -937,11 +937,11 @@ 0) (define (std-signal-handler signum) ;; (signal-mask! signum) - (set! *time-to-exit* #t) + (set! *time-to-exit* #t) ;;(debug:print-info 13 *default-log-port* "got signal "signum) (debug:print-error 0 *default-log-port* "Received signal " signum " aaa exiting promptly") ;; (std-exit-procedure) ;; shouldn't need this since we are exiting and it will be called anyway (exit)) Index: db.scm ================================================================== --- db.scm +++ db.scm @@ -313,11 +313,12 @@ ;; (define (db:open-db dbstruct #!key (areapath #f)(do-sync #t)) ;; TODO: actually use areapath (let ((tmpdb-stack (dbr:dbstruct-dbstack dbstruct))) ;; RA => Returns the first reference in dbstruct (if (stack? tmpdb-stack) (db:get-db tmpdb-stack) ;; get previously opened db (will create new db handle if all in the stack are already used - (let* ((dbpath (db:dbfile-path )) ;; path to tmp db area + (let* ((max-stale-tmp (configf:lookup-number *configdat* "server" "filling-db-max-stale-seconds" default: 10)) + (dbpath (db:dbfile-path )) ;; path to tmp db area (dbexists (common:file-exists? dbpath)) (tmpdbfname (conc dbpath "/megatest.db")) (dbfexists (common:file-exists? tmpdbfname)) ;; (conc dbpath "/megatest.db"))) (mtdbexists (common:file-exists? (conc *toppath* "/megatest.db"))) @@ -351,11 +352,11 @@ (stack-push! (dbr:dbstruct-dbstack dbstruct) tmpdb) ;; olddb is already a (cons db path) (dbr:dbstruct-refndb-set! dbstruct refndb) ;; (mutex-unlock! *rundb-mutex*) (if (and (or (not dbfexists) (and modtimedelta - (> modtimedelta 10))) ;; if db in tmp is over ten seconds older than the file in MTRA then do a sync back + (> modtimedelta max-stale-tmp))) ;; if db in tmp is over ten seconds older than the file in MTRA then do a sync back do-sync) (begin (debug:print 1 *default-log-port* "filling db " (db:dbdat-get-path tmpdb) " with data \n from " (db:dbdat-get-path mtdb) " mod time delta: " modtimedelta) (db:sync-tables (db:sync-all-tables-list dbstruct) #f mtdb refndb tmpdb) ;touch tmp db to avoid wal mode wierdness Index: megatest.scm ================================================================== --- megatest.scm +++ megatest.scm @@ -1244,11 +1244,11 @@ (statuses (string-split (or (args:get-arg "-status") "") ",")) (run-id (db:get-value-by-header run header "id")) (runname (db:get-value-by-header run header "runname")) (states (string-split (or (args:get-arg "-state") "") ",")) (tests (if tests-spec - (db:dispatch-query access-mode rmt:get-tests-for-run db:get-tests-for-run run-id testpatt states statuses #f #f #f 'testname 'asc ;; (db:get-tests-for-run dbstruct run-id testpatt '() '() #f #f #f 'testname 'asc + (rmt:get-tests-for-run run-id testpatt states statuses #f #f #f 'testname 'asc ;; (db:get-tests-for-run dbstruct run-id testpatt '() '() #f #f #f 'testname 'asc ;; use qryvals if test-spec provided (if tests-spec (string-intersperse adj-tests-spec ",") ;; db:test-record-fields #f) @@ -1380,11 +1380,11 @@ (let* ((run-id (db:get-value-by-header run header "id")) (runname (db:get-value-by-header run header "runname")) (states (string-split (or (args:get-arg "-state") "") ",")) (statuses (string-split (or (args:get-arg "-status") "") ",")) (tests (if tests-spec - (db:dispatch-query access-mode rmt:get-tests-for-run db:get-tests-for-run run-id testpatt states statuses #f #f #f 'testname 'asc ;; (db:get-tests-for-run dbstruct run-id testpatt '() '() #f #f #f 'testname 'asc + (rmt:get-tests-for-run run-id testpatt states statuses #f #f #f 'testname 'asc ;; (db:get-tests-for-run dbstruct run-id testpatt '() '() #f #f #f 'testname 'asc ;; use qryvals if test-spec provided (if tests-spec (string-intersperse adj-tests-spec ",") ;; db:test-record-fields #f) @@ -1505,11 +1505,11 @@ ;; "\n rundir: " (get-value-by-fieldname test test-field-index "") ;; (sdb:qry 'getstr ;; (filedb:get-path *fdb* ;; (db:test-get-rundir test) ;; ) ) ;; Each test ;; DO NOT remote run - (let ((steps (db:dispatch-query access-mode rmt:get-steps-for-test db:get-steps-for-test run-id (db:test-get-id test)))) ;; (db:get-steps-for-test dbstruct run-id (db:test-get-id test)))) + (let ((steps (rmt:get-steps-for-test run-id (db:test-get-id test)))) ;; (db:get-steps-for-test dbstruct run-id (db:test-get-id test)))) (for-each (lambda (step) (format #t " Step: ~20a State: ~10a Status: ~10a Time ~22a\n" (tdb:step-get-stepname step) Index: server.scm ================================================================== --- server.scm +++ server.scm @@ -500,59 +500,179 @@ ;; (server:have-sync-lock?)) ;; (else #f)))) ;; moving this here as it needs access to db and cannot be in common. ;; + +(define (server:get-bruteforce-syncer dbstruct #!key (fork-to-background #f)) + (let* ((sqlite-exe (or (get-environment-variable "MT_SQLITE3_EXE"))) ;; defined in cfg.sh + (sync-log (or (args:get-arg "-sync-log") (conc *toppath* "/logs/sync-" (current-process-id) "-" (get-host-name) ".log"))) + (tmp-area (common:get-db-tmp-area)) + (tmp-db (conc tmp-area "/megatest.db")) + (staging-file (conc *toppath* "/.megatest.db")) + (mtdbfile (conc *toppath* "/megatest.db")) + (lockfile (conc tmp-db ".lock")) + (sync-cmd-core (conc sqlite-exe" " tmp-db " .dump | "sqlite-exe" " staging-file "&>"sync-log)) + (sync-cmd (if fork-to-background + (conc "/usr/bin/env NBFAKE_LOG="*toppath*"/logs/last-server-sync-"(current-process-id)".log nbfake "sync-cmd-core) + sync-cmd-core)) + (min-intersync-delay (configf:lookup-number *configdat* "server" "minimum-intersync-delay" default: 30)) + (default-duty-cycle 0.1) + (duty-cycle (configf:lookup-number *configdat* "server" "sync-duty-cycle" default: default-duty-cycle)) + (last-sync-seconds 10) ;; we will adjust this to a measurement and delay last-sync-seconds * (1 - duty-cycle) + (calculate-off-time (lambda (work-duration duty-cycle) + (* (/ (- 1 duty-cycle) duty-cycle) last-sync-seconds))) + (off-time min-intersync-delay) + (do-a-sync + (lambda () + (if (common:simple-file-lock lockfile) + (begin + (if (not fork-to-background) + (thread-sleep! (max off-time min-intersync-delay))) + (if (not (configf:lookup *configdat* "server" "disable-db-snapshot")) + (common:snapshot-file mtdbfile subdir: ".db-snapshot")) + (delete-file* staging-file) + (let* ((start-time (current-milliseconds)) + (res (system sync-cmd)) + (res2 + (cond + ((eq? 0 res) + (delete-file* (conc mtdbfile ".backup")) + (if (eq? 0 (file-size sync-log)) + (delete-file sync-log)) + (system (conc "/bin/mv " staging-file " " mtdbfile)) + + (set! last-sync-seconds (/ (- (current-milliseconds) start-time) 1000)) + (set! off-time (calculate-off-time + last-sync-seconds + (cond + ((and (number? duty-cycle) (> duty-cycle 0) (< duty-cycle 1)) + duty-cycle) + (else + (debug:print 0 *default-log-port* "WARNING: ["(common:human-time)"] server.sync-duty-cycle is invalid. Should be a number between 0 and 1, but "duty-cycle" was specified. Using default value: "default-duty-cycle) + default-duty-cycle)))) + + (debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] pid="(current-process-id)" SYNC took "last-sync-seconds" sec") + 'sync-completed) + (else + (system (conc "/bin/cp "sync-log" "sync-log".fail")) + (debug:print 0 *default-log-port* "ERROR: ["(common:human-time)"] Sync failed. See log at "sync-log".fail") + (if (file-exists? (conc mtdbfile ".backup")) + (system (conc "/bin/cp "mtdbfile ".backup " mtdbfile))) + #f))) + (common:simple-file-release-lock lockfile) + res2))) + ;; else + (debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] pid="(current-process-id)" other SYNC in progress; not syncing.") + 'parallel-sync + ) ;; end if got lockfile + ) ;; end lambda + )) + do-a-sync)) + (define (server:writable-watchdog dbstruct) (thread-sleep! 10) ;; delay for startup - (let* ((legacy-sync (common:run-sync?)) - (sqlite-exe (or (get-environment-variable "MT_SQLITE3_EXE"))) ;; defined in cfg.sh + (let* ((do-a-sync (server:get-bruteforce-syncer dbstruct))) + (if (and (not (args:get-arg "-sync-to-megatest.db")) ;; conditions under which we do not run the sync + (args:get-arg "-server")) + (let loop () + (debug:print 0 *default-log-port* "INFO: syncer thread sleeping for server.minimum-intersync-delay seconds ["min-intersync-delay"]") + + (do-a-sync) + + ;; keep going unless time to exit + ;; + (if (not *time-to-exit*) + (let delay-loop ((count 0)) + ;;(debug:print-info 13 *default-log-port* "delay-loop top; count="count" pid="(current-process-id)" this-wd-num="this-wd-num" *time-to-exit*="*time-to-exit*) + (if (eq? count 0) + ((server:get-bruteforce-syncer dbstruct fork-to-background: #t))) + (if (and (not *time-to-exit*) + (< count 6)) ;; was 11, changing to 4. + (begin + (thread-sleep! 1) + (delay-loop (+ count 1)))) + (if (not *time-to-exit*) (loop)))) + ;; time to exit, close the no-sync db here + ;; (db:no-sync-close-db no-sync-db) + (if (common:low-noise-print 30) + (debug:print-info 0 *default-log-port* "Exiting watchdog timer, *time-to-exit* = " *time-to-exit*" pid="(current-process-id) + )))))) + + +(define (server:old-writable-watchdog dbstruct) + (thread-sleep! 10) ;; delay for startup + (let* ((sqlite-exe (or (get-environment-variable "MT_SQLITE3_EXE"))) ;; defined in cfg.sh (sync-log (or (args:get-arg "-sync-log") (conc *toppath* "/logs/sync-" (current-process-id) "-" (get-host-name) ".log"))) (tmp-area (common:get-db-tmp-area)) (tmp-db (conc tmp-area "/megatest.db")) (staging-file (conc *toppath* "/.megatest.db")) (mtdbfile (conc *toppath* "/megatest.db")) (lockfile (conc tmp-db ".lock")) (sync-cmd (conc sqlite-exe" " tmp-db " .dump | "sqlite-exe" " staging-file "&>"sync-log)) - (min-intersync-delay (configf:lookup-number *configdat* "server" "minimum-intersync-delay" default: 30))) + (min-intersync-delay (configf:lookup-number *configdat* "server" "minimum-intersync-delay" default: 30)) + (default-duty-cycle 0.9) + (duty-cycle (configf:lookup-number *configdat* "server" "sync-duty-cycle" default: default-duty-cycle)) + (last-sync-seconds 10) ;; we will adjust this to a measurement and delay last-sync-seconds * (1 - duty-cycle) + (calculate-off-time (lambda (work-duration duty-cycle) + (* (/ (- 1 duty-cycle) duty-cycle) last-sync-seconds))) + (off-time min-intersync-delay) + (do-a-sync + (lambda () + (if (common:simple-file-lock lockfile) + (begin + + (if (not (configf:lookup *configdat* "server" "disable-db-snapshot")) + (common:snapshot-file mtdbfile subdir: ".db-snapshot")) + (delete-file* staging-file) + (let* ((start-time (current-milliseconds)) + (res (system sync-cmd))) + (cond + ((eq? 0 res) + (delete-file* (conc mtdbfile ".backup")) + (if (eq? 0 (file-size sync-log)) + (delete-file sync-log)) + (system (conc "/bin/mv " staging-file " " mtdbfile)) + + (set! last-sync-seconds (/ (- (current-milliseconds) start-time) 1000)) + (set! off-time (calculate-off-time + last-sync-seconds + (cond + ((and (number? duty-cycle) (> duty-cycle 0) (< duty-cycle 1)) + duty-cycle) + (else + (debug:print 0 *default-log-port* "WARNING: ["(common:human-time)"] server.sync-duty-cycle is invalid. Should be a number between 0 and 1, but "duty-cycle" was specified. Using default value: "default-duty-cycle) + default-duty-cycle)))) + + (debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] pid="(current-process-id)" SYNC took "last-sync-seconds" sec") + #t) + (else + (system (conc "/bin/cp "sync-log" "sync-log".fail")) + (debug:print 0 *default-log-port* "ERROR: ["(common:human-time)"] Sync failed. See log at "sync-log".fail") + (if (file-exists? (conc mtdbfile ".backup")) + (system (conc "/bin/cp "mtdbfile ".backup " mtdbfile))))) + (common:simple-file-release-lock lockfile))) + ;; else + (debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] pid="(current-process-id)" other SYNC in progress; not syncing.") + ) ;; end if got lockfile + ) ;; end lambda + )) + (if (and (not (args:get-arg "-sync-to-megatest.db")) ;; conditions under which we do not run the sync (args:get-arg "-server")) (let loop () (debug:print 0 *default-log-port* "INFO: syncer thread sleeping for server.minimum-intersync-delay seconds ["min-intersync-delay"]") - (thread-sleep! min-intersync-delay) - (if (common:simple-file-lock lockfile) - (begin - - (if (not (configf:lookup *configdat* "server" "disable-db-snapshot")) - (common:snapshot-file mtdbfile subdir: ".db-snapshot")) - (delete-file* staging-file) - (let* ((start-time (current-milliseconds)) - (res (system sync-cmd))) - (cond - ((eq? 0 res) - (delete-file* (conc mtdbfile ".backup")) - (if (eq? 0 (file-size sync-log)) - (delete-file sync-log)) - (system (conc "/bin/mv " staging-file " " mtdbfile)) - (debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] pid="(current-process-id)" SYNC took "(/ (- (current-milliseconds) start-time) 1000)" sec") - #t) - (else - (system (conc "/bin/cp "sync-log" "sync-log".fail")) - (debug:print 0 *default-log-port* "ERROR: ["(common:human-time)"] Sync failed. See log at "sync-log".fail") - (if (file-exists? (conc mtdbfile ".backup")) - (system (conc "/bin/cp "mtdbfile ".backup " mtdbfile))))) - (common:simple-file-release-lock lockfile))) - ;; else - (debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] pid="(current-process-id)" other SYNC in progress; not syncing.") - ) ;; end if got lockfile + (thread-sleep! (max off-time min-intersync-delay)) + (do-a-sync) ;; keep going unless time to exit ;; (if (not *time-to-exit*) (let delay-loop ((count 0)) ;;(debug:print-info 13 *default-log-port* "delay-loop top; count="count" pid="(current-process-id)" this-wd-num="this-wd-num" *time-to-exit*="*time-to-exit*) - + (if (eq? count 0) + (do-a-sync)) (if (and (not *time-to-exit*) (< count 6)) ;; was 11, changing to 4. (begin (thread-sleep! 1) (delay-loop (+ count 1))))