Index: common.scm ================================================================== --- common.scm +++ common.scm @@ -318,11 +318,17 @@ (common:version-signature)))) (define (common:api-changed?) (not (equal? (substring (->string megatest-version) 0 4) (substring (conc (common:get-last-run-version)) 0 4)))) - + + +(define (common:get-sync-lock-filepath) + (let* ((tmp-area (common:get-db-tmp-area)) + (lockfile (conc tmp-area "/megatest.db.sync-lock"))) + lockfile)) + ;; Move me elsewhere ... ;; RADT => Why do we meed the version check here, this is called only if version misma ;; (define (common:cleanup-db dbstruct #!key (full #f)) (apply db:multi-db-sync @@ -599,13 +605,13 @@ ;; dot-locking egg seems not to work, using this for now ;; if lock is older than expire-time then remove it and try again ;; to get the lock ;; (define (common:simple-file-lock fname #!key (expire-time 300)) - (handle-exceptions - exn - #f ;; don't really care what went wrong right now. NOTE: I have not seen this one actually fail. +; (handle-exceptions +; exn +; #f ;; don't really care what went wrong right now. NOTE: I have not seen this one actually fail. (if (common:file-exists? fname) (if (> (- (current-seconds)(file-modification-time fname)) expire-time) (begin (delete-file* fname) (common:simple-file-lock fname expire-time: expire-time)) @@ -617,11 +623,13 @@ (thread-sleep! 0.25) (if (common:file-exists? fname) (with-input-from-file fname (lambda () (equal? key-string (read-line)))) - #f))))) + #f))) +; ) + ) (define (common:simple-file-lock-and-wait fname #!key (expire-time 300)) (let ((end-time (+ expire-time (current-seconds)))) (let loop ((got-lock (common:simple-file-lock fname expire-time: expire-time))) (if got-lock @@ -937,11 +945,11 @@ 0) (define (std-signal-handler signum) ;; (signal-mask! signum) - (set! *time-to-exit* #t) + (set! *time-to-exit* #t) ;;(debug:print-info 13 *default-log-port* "got signal "signum) (debug:print-error 0 *default-log-port* "Received signal " signum " aaa exiting promptly") ;; (std-exit-procedure) ;; shouldn't need this since we are exiting and it will be called anyway (exit)) Index: db.scm ================================================================== --- db.scm +++ db.scm @@ -313,11 +313,12 @@ ;; (define (db:open-db dbstruct #!key (areapath #f)(do-sync #t)) ;; TODO: actually use areapath (let ((tmpdb-stack (dbr:dbstruct-dbstack dbstruct))) ;; RA => Returns the first reference in dbstruct (if (stack? tmpdb-stack) (db:get-db tmpdb-stack) ;; get previously opened db (will create new db handle if all in the stack are already used - (let* ((dbpath (db:dbfile-path )) ;; path to tmp db area + (let* ((max-stale-tmp (configf:lookup-number *configdat* "server" "filling-db-max-stale-seconds" default: 10)) + (dbpath (db:dbfile-path )) ;; path to tmp db area (dbexists (common:file-exists? dbpath)) (tmpdbfname (conc dbpath "/megatest.db")) (dbfexists (common:file-exists? tmpdbfname)) ;; (conc dbpath "/megatest.db"))) (mtdbexists (common:file-exists? (conc *toppath* "/megatest.db"))) @@ -351,11 +352,11 @@ (stack-push! (dbr:dbstruct-dbstack dbstruct) tmpdb) ;; olddb is already a (cons db path) (dbr:dbstruct-refndb-set! dbstruct refndb) ;; (mutex-unlock! *rundb-mutex*) (if (and (or (not dbfexists) (and modtimedelta - (> modtimedelta 10))) ;; if db in tmp is over ten seconds older than the file in MTRA then do a sync back + (> modtimedelta max-stale-tmp))) ;; if db in tmp is over ten seconds older than the file in MTRA then do a sync back do-sync) (begin (debug:print 1 *default-log-port* "filling db " (db:dbdat-get-path tmpdb) " with data \n from " (db:dbdat-get-path mtdb) " mod time delta: " modtimedelta) (db:sync-tables (db:sync-all-tables-list dbstruct) #f mtdb refndb tmpdb) ;touch tmp db to avoid wal mode wierdness @@ -1048,11 +1049,15 @@ (for-each (lambda (server) (match-let (((mod-time host port start-time pid) server)) (if (and host pid) (tasks:kill-server host pid)))) - servers)) + servers) + + ;; /tmp/bjbarcla/megatest_localdb/fullrun/.nfs.pdx.disks.icf_env_disk001.bjbarcla.gwa.issues.mtdev.matt-bisect.megatest.ext-tests.runs.all.v1.65.1.6524.dev.bb-24-justrollup-f8.rollup.fullrun/megatest.db.lock + (delete-file* (common:get-sync-lock-filepath)) + ) ;; clear out junk records ;; ((dejunk) (db:delay-if-busy mtdb) ;; ok to delay on mtdb Index: megatest.scm ================================================================== --- megatest.scm +++ megatest.scm @@ -426,10 +426,11 @@ "-convert-to-norm" "-convert-to-old" "-import-megatest.db" "-sync-to-megatest.db" + "-sync-brute-force" "-logging" "-v" ;; verbose 2, more than normal (normal is 1) "-q" ;; quiet 0, errors/warnings only "-diff-rep" @@ -1244,11 +1245,11 @@ (statuses (string-split (or (args:get-arg "-status") "") ",")) (run-id (db:get-value-by-header run header "id")) (runname (db:get-value-by-header run header "runname")) (states (string-split (or (args:get-arg "-state") "") ",")) (tests (if tests-spec - (db:dispatch-query access-mode rmt:get-tests-for-run db:get-tests-for-run run-id testpatt states statuses #f #f #f 'testname 'asc ;; (db:get-tests-for-run dbstruct run-id testpatt '() '() #f #f #f 'testname 'asc + (rmt:get-tests-for-run run-id testpatt states statuses #f #f #f 'testname 'asc ;; (db:get-tests-for-run dbstruct run-id testpatt '() '() #f #f #f 'testname 'asc ;; use qryvals if test-spec provided (if tests-spec (string-intersperse adj-tests-spec ",") ;; db:test-record-fields #f) @@ -1380,11 +1381,11 @@ (let* ((run-id (db:get-value-by-header run header "id")) (runname (db:get-value-by-header run header "runname")) (states (string-split (or (args:get-arg "-state") "") ",")) (statuses (string-split (or (args:get-arg "-status") "") ",")) (tests (if tests-spec - (db:dispatch-query access-mode rmt:get-tests-for-run db:get-tests-for-run run-id testpatt states statuses #f #f #f 'testname 'asc ;; (db:get-tests-for-run dbstruct run-id testpatt '() '() #f #f #f 'testname 'asc + (rmt:get-tests-for-run run-id testpatt states statuses #f #f #f 'testname 'asc ;; (db:get-tests-for-run dbstruct run-id testpatt '() '() #f #f #f 'testname 'asc ;; use qryvals if test-spec provided (if tests-spec (string-intersperse adj-tests-spec ",") ;; db:test-record-fields #f) @@ -1505,11 +1506,11 @@ ;; "\n rundir: " (get-value-by-fieldname test test-field-index "") ;; (sdb:qry 'getstr ;; (filedb:get-path *fdb* ;; (db:test-get-rundir test) ;; ) ) ;; Each test ;; DO NOT remote run - (let ((steps (db:dispatch-query access-mode rmt:get-steps-for-test db:get-steps-for-test run-id (db:test-get-id test)))) ;; (db:get-steps-for-test dbstruct run-id (db:test-get-id test)))) + (let ((steps (rmt:get-steps-for-test run-id (db:test-get-id test)))) ;; (db:get-steps-for-test dbstruct run-id (db:test-get-id test)))) (for-each (lambda (step) (format #t " Step: ~20a State: ~10a Status: ~10a Time ~22a\n" (tdb:step-get-stepname step) @@ -2296,10 +2297,14 @@ 'adj-testids 'old2new ;; 'new2old ) (set! *didsomething* #t))) + +(when (args:get-arg "-sync-brute-force") + ((server:get-bruteforce-syncer (db:setup #t) persist-until-sync: #t)) + (set! *didsomething* #t)) (if (args:get-arg "-sync-to-megatest.db") (let* ((dbstruct (db:setup #f)) (tmpdbpth (cdr (dbr:dbstruct-tmpdb dbstruct))) (lockfile (conc tmpdbpth ".lock")) Index: server.scm ================================================================== --- server.scm +++ server.scm @@ -500,207 +500,117 @@ ;; (server:have-sync-lock?)) ;; (else #f)))) ;; moving this here as it needs access to db and cannot be in common. ;; -(define (server:writable-watchdog dbstruct) - (thread-sleep! 10) ;; delay for startup - (let* ((legacy-sync (common:run-sync?)) - (sqlite-exe (or (get-environment-variable "MT_SQLITE3_EXE"))) ;; defined in cfg.sh + +(define (server:get-bruteforce-syncer dbstruct #!key (fork-to-background #f) (persist-until-sync #f)) + (let* ((sqlite-exe (or (get-environment-variable "MT_SQLITE3_EXE"))) ;; defined in cfg.sh (sync-log (or (args:get-arg "-sync-log") (conc *toppath* "/logs/sync-" (current-process-id) "-" (get-host-name) ".log"))) (tmp-area (common:get-db-tmp-area)) (tmp-db (conc tmp-area "/megatest.db")) (staging-file (conc *toppath* "/.megatest.db")) (mtdbfile (conc *toppath* "/megatest.db")) - (lockfile (conc tmp-db ".lock")) - (sync-cmd (conc sqlite-exe" " tmp-db " .dump | "sqlite-exe" " staging-file "&>"sync-log)) - (min-intersync-delay (configf:lookup-number *configdat* "server" "minimum-intersync-delay" default: 30))) - (if (and (not (args:get-arg "-sync-to-megatest.db")) ;; conditions under which we do not run the sync - (args:get-arg "-server")) - (let loop () - (debug:print 0 *default-log-port* "INFO: syncer thread sleeping for server.minimum-intersync-delay seconds ["min-intersync-delay"]") - (thread-sleep! min-intersync-delay) - (if (common:simple-file-lock lockfile) - (begin - - (if (not (configf:lookup *configdat* "server" "disable-db-snapshot")) - (common:snapshot-file mtdbfile subdir: ".db-snapshot")) - (delete-file* staging-file) - (let* ((start-time (current-milliseconds)) - (res (system sync-cmd))) - (cond - ((eq? 0 res) - (delete-file* (conc mtdbfile ".backup")) - (if (eq? 0 (file-size sync-log)) - (delete-file sync-log)) - (system (conc "/bin/mv " staging-file " " mtdbfile)) - (debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] pid="(current-process-id)" SYNC took "(/ (- (current-milliseconds) start-time) 1000)" sec") - #t) - (else - (system (conc "/bin/cp "sync-log" "sync-log".fail")) - (debug:print 0 *default-log-port* "ERROR: ["(common:human-time)"] Sync failed. See log at "sync-log".fail") - (if (file-exists? (conc mtdbfile ".backup")) - (system (conc "/bin/cp "mtdbfile ".backup " mtdbfile))))) - (common:simple-file-release-lock lockfile))) - ;; else - (debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] pid="(current-process-id)" other SYNC in progress; not syncing.") - ) ;; end if got lockfile - - ;; keep going unless time to exit - ;; - (if (not *time-to-exit*) - (let delay-loop ((count 0)) - ;;(debug:print-info 13 *default-log-port* "delay-loop top; count="count" pid="(current-process-id)" this-wd-num="this-wd-num" *time-to-exit*="*time-to-exit*) - - (if (and (not *time-to-exit*) - (< count 6)) ;; was 11, changing to 4. - (begin - (thread-sleep! 1) - (delay-loop (+ count 1)))) - (if (not *time-to-exit*) (loop)))) - ;; time to exit, close the no-sync db here - ;; (db:no-sync-close-db no-sync-db) - (if (common:low-noise-print 30) - (debug:print-info 0 *default-log-port* "Exiting watchdog timer, *time-to-exit* = " *time-to-exit*" pid="(current-process-id) - )))))) - -;; (let ((legacy-sync (common:run-sync?))) -;; (sync-stale-seconds (configf:lookup-number *configdat* "server" "sync-stale-seconds" default: 300)) -;; (debug-mode (debug:debug-mode 1)) -;; (last-time (current-seconds)) -;; (no-sync-db (db:open-no-sync-db)) -;; (sync-duration 0) ;; run time of the sync in milliseconds -;; ;;(this-wd-num (begin (mutex-lock! *wdnum*mutex) (let ((x *wdnum*)) (set! *wdnum* (add1 *wdnum*)) (mutex-unlock! *wdnum*mutex) x))) -;; ) -;; (set! *no-sync-db* no-sync-db) ;; make the no sync db available to api calls -;; (debug:print-info 2 *default-log-port* "Periodic sync thread started.") -;; (debug:print-info 3 *default-log-port* "watchdog starting. legacy-sync is " legacy-sync" pid="(current-process-id) );; " this-wd-num="this-wd-num) -;; (if (and legacy-sync (not *time-to-exit*)) -;; (let* (;;(dbstruct (db:setup)) -;; (mtdb (dbr:dbstruct-mtdb dbstruct)) -;; (mtpath (db:dbdat-get-path mtdb)) -;; (tmp-area (common:get-db-tmp-area)) -;; (lockfile (conc tmp-area "/megatest.db.lock")) -;; (start-file (conc tmp-area "/.start-sync")) -;; (end-file (conc tmp-area "/.end-sync"))) -;; (debug:print-info 0 *default-log-port* "Server running, periodic sync started.") -;; (let loop () -;; ;; sync for filesystem local db writes -;; ;; -;; -;; (mutex-lock! *db-multi-sync-mutex*) -;; (let* ((need-sync (>= *db-last-access* *db-last-sync*)) ;; no sync since last write -;; (sync-in-progress *db-sync-in-progress*) -;; -;; (min-intersync-delay (configf:lookup-number *configdat* "server" "minimum-intersync-delay" default: 5)) -;; (should-sync (and (not *time-to-exit*) -;; (> (- (current-seconds) *db-last-sync*) min-intersync-delay))) ;; sync every five seconds minimum, deprecated logic, can probably be removed -;; (start-time (current-seconds)) -;; (cpu-load-adj (alist-ref 'adj-proc-load (common:get-normalized-cpu-load #f))) -;; (mt-mod-time (file-modification-time mtpath)) -;; (last-sync-start (if (common:file-exists? start-file) -;; (file-modification-time start-file) -;; 0)) -;; (last-sync-end (if (common:file-exists? end-file) -;; (file-modification-time end-file) -;; 10)) -;; (sync-period (+ 3 (* cpu-load-adj 30))) ;; as adjusted load increases increase the sync period -;; (recently-synced (and (< (- start-time mt-mod-time) sync-period) ;; not useful if sync didn't modify megatest.db! -;; (< mt-mod-time last-sync-start))) -;; (sync-done (<= last-sync-start last-sync-end)) -;; (sync-stale (> start-time (+ last-sync-start sync-stale-seconds))) -;; (will-sync-pre (and (not *time-to-exit*) ;; do not start a sync if we are in the process of exiting -;; have-lock? -;; (or need-sync should-sync) -;; (or sync-done sync-stale) -;; (not sync-in-progress) -;; (not recently-synced))) -;; (will-sync (if will-sync-pre -;; ;; delay get lock until we decide to sync -;; #t ;; (server:have-sync-lock?) -;; #f))) -;; ;; if another server is syncing, postpone sync -;; (if (and will-sync-pre (not will-sync)) -;; (set! *db-last-sync* start-time)) -;; (debug:print-info 13 *default-log-port* "WD writable-watchdog top of loop. need-sync="need-sync" sync-in-progress=" sync-in-progress -;; " should-sync="should-sync" start-time="start-time" mt-mod-time="mt-mod-time" recently-synced="recently-synced" will-sync="will-sync -;; " sync-done=" sync-done " sync-period=" sync-period) -;; (if (and (> sync-period 5) -;; (common:low-noise-print 30 "sync-period")) -;; (debug:print-info 0 *default-log-port* "Increased sync period due to long sync times, sync took: " sync-period " seconds.")) -;; ;; (if recently-synced (debug:print-info 0 *default-log-port* "Skipping sync due to recently-synced flag=" recently-synced)) -;; ;; (debug:print-info 0 *default-log-port* "need-sync: " need-sync " sync-in-progress: " sync-in-progress " should-sync: " should-sync " will-sync: " will-sync) -;; (if will-sync (set! *db-sync-in-progress* #t)) -;; (mutex-unlock! *db-multi-sync-mutex*) -;; (if will-sync -;; (let (;; (max-sync-duration (configf:lookup-number *configdat* "server" "max-sync-duration")) ;; KEEPING THIS AVAILABLE BUT SHOULD NOT USE, I'M PRETTY SURE IT DOES NOT WORK! -;; (sync-start (current-milliseconds))) -;; (with-output-to-file start-file (lambda ()(print (current-process-id)))) -;; -;; ;; put lock here -;; -;; ;; (if (or (not max-sync-duration) -;; ;; (< sync-duration max-sync-duration)) ;; NOTE: db:sync-to-megatest.db keeps track of time of last sync and syncs incrementally -;; (let ((res (db:sync-to-megatest.db dbstruct no-sync-db: no-sync-db))) ;; did we sync any data? If so need to set the db touched flag to keep the server alive -;; (set! sync-duration (- (current-milliseconds) sync-start)) -;; (if (> res 0) ;; some records were transferred, keep the db alive -;; (begin -;; (mutex-lock! *heartbeat-mutex*) -;; (set! *db-last-access* (current-seconds)) -;; (mutex-unlock! *heartbeat-mutex*) -;; (debug:print-info 0 *default-log-port* "sync called, " res " records transferred.")) -;; (debug:print-info 2 *default-log-port* "sync called but zero records transferred"))))) -;; ;; ;; TODO: factor this next routine out into a function -;; ;; (with-input-from-pipe ;; this should not block other threads but need to verify this -;; ;; (conc "megatest -sync-to-megatest.db -m testsuite:" (common:get-area-name) ":" *toppath*) -;; ;; (lambda () -;; ;; (let loop ((inl (read-line)) -;; ;; (res #f)) -;; ;; (if (eof-object? inl) -;; ;; (begin -;; ;; (set! sync-duration (- (current-milliseconds) sync-start)) -;; ;; (cond -;; ;; ((not res) -;; ;; (debug:print 0 *default-log-port* "ERROR: sync from /tmp db to megatest.db appears to have failed. Recommended that you stop your runs and run \"megatest -cleanup-db\"")) -;; ;; ((> res 0) -;; ;; (mutex-lock! *heartbeat-mutex*) -;; ;; (set! *db-last-access* (current-seconds)) -;; ;; (mutex-unlock! *heartbeat-mutex*)))) -;; ;; (let ((num-synced (let ((matches (string-match "^Synced (\\d+).*$" inl))) -;; ;; (if matches -;; ;; (string->number (cadr matches)) -;; ;; #f)))) -;; ;; (loop (read-line) -;; ;; (or num-synced res)))))))))) -;; (if will-sync -;; (begin -;; (mutex-lock! *db-multi-sync-mutex*) -;; (set! *db-sync-in-progress* #f) -;; (set! *db-last-sync* start-time) -;; (with-output-to-file end-file (lambda ()(print (current-process-id)))) -;; -;; ;; release lock here -;; ;; (server:release-sync-lock) -;; (mutex-unlock! *db-multi-sync-mutex*))) -;; (if (and debug-mode -;; (> (- start-time last-time) 60)) -;; (begin -;; (set! last-time start-time) -;; (debug:print-info 4 *default-log-port* "timestamp -> " (seconds->time-string (current-seconds)) ", time since start -> " (seconds->hr-min-sec (- (current-seconds) *time-zero*)))))) -;; -;; ;; keep going unless time to exit -;; ;; -;; (if (not *time-to-exit*) -;; (let delay-loop ((count 0)) -;; ;;(debug:print-info 13 *default-log-port* "delay-loop top; count="count" pid="(current-process-id)" this-wd-num="this-wd-num" *time-to-exit*="*time-to-exit*) -;; -;; (if (and (not *time-to-exit*) -;; (< count 6)) ;; was 11, changing to 4. -;; (begin -;; (thread-sleep! 1) -;; (delay-loop (+ count 1)))) -;; (if (not *time-to-exit*) (loop)))) -;; ;; time to exit, close the no-sync db here -;; (db:no-sync-close-db no-sync-db) -;; (if (common:low-noise-print 30) -;; (debug:print-info 0 *default-log-port* "Exiting watchdog timer, *time-to-exit* = " *time-to-exit*" pid="(current-process-id) ))))))) ;;" this-wd-num="this-wd-num))))))) + (lockfile (common:get-sync-lock-filepath)) + (sync-cmd-core (conc sqlite-exe" " tmp-db " .dump | "sqlite-exe" " staging-file "&>"sync-log)) + (sync-cmd (if fork-to-background + (conc "/usr/bin/env NBFAKE_LOG="*toppath*"/logs/last-server-sync-"(current-process-id)".log nbfake \""sync-cmd-core" && /bin/mv -f " staging-file " " mtdbfile" \"") + sync-cmd-core)) + (default-min-intersync-delay 2) + (min-intersync-delay (configf:lookup-number *configdat* "server" "minimum-intersync-delay" default: default-min-intersync-delay)) + (default-duty-cycle 0.1) + (duty-cycle (configf:lookup-number *configdat* "server" "sync-duty-cycle" default: default-duty-cycle)) + (last-sync-seconds 10) ;; we will adjust this to a measurement and delay last-sync-seconds * (1 - duty-cycle) + (calculate-off-time (lambda (work-duration duty-cycle) + (* (/ (- 1 duty-cycle) duty-cycle) last-sync-seconds))) + (off-time min-intersync-delay) ;; adjusted in closure below. + (do-a-sync + (lambda () + (BB> "Start do-a-sync with fork-to-background="fork-to-background" persist-until-sync="persist-until-sync) + (let* ((finalres + (let retry-loop ((num-tries 0)) + (if (common:simple-file-lock lockfile) + (begin + (cond + ((not (or fork-to-background persist-until-sync)) + (debug:print 0 *default-log-port* "INFO: syncer thread sleeping for max of (server.minimum-intersync-delay="min-intersync-delay + " , off-time="off-time" seconds ]") + (thread-sleep! (max off-time min-intersync-delay))) + (else + (debug:print 0 *default-log-port* "INFO: syncer thread NOT sleeping ; maybe time-to-exit..."))) + + (if (not (configf:lookup *configdat* "server" "disable-db-snapshot")) + (common:snapshot-file mtdbfile subdir: ".db-snapshot")) + (delete-file* staging-file) + (let* ((start-time (current-milliseconds)) + (res (system sync-cmd)) + (res2 + (cond + ((eq? 0 res) + (delete-file* (conc mtdbfile ".backup")) + (if (eq? 0 (file-size sync-log)) + (delete-file sync-log)) + (system (conc "/bin/mv " staging-file " " mtdbfile)) + + (set! last-sync-seconds (/ (- (current-milliseconds) start-time) 1000)) + (set! off-time (calculate-off-time + last-sync-seconds + (cond + ((and (number? duty-cycle) (> duty-cycle 0) (< duty-cycle 1)) + duty-cycle) + (else + (debug:print 0 *default-log-port* "WARNING: ["(common:human-time)"] server.sync-duty-cycle is invalid. Should be a number between 0 and 1, but "duty-cycle" was specified. Using default value: "default-duty-cycle) + default-duty-cycle)))) + + (debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] pid="(current-process-id)" SYNC took "last-sync-seconds" sec") + (debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] pid="(current-process-id)" SYNC took "last-sync-seconds" sec ; with duty-cycle of "duty-cycle" off time is now "off-time) + 'sync-completed) + (else + (system (conc "/bin/cp "sync-log" "sync-log".fail")) + (debug:print 0 *default-log-port* "ERROR: ["(common:human-time)"] Sync failed. See log at "sync-log".fail") + (if (file-exists? (conc mtdbfile ".backup")) + (system (conc "/bin/cp "mtdbfile ".backup " mtdbfile))) + #f)))) + (common:simple-file-release-lock lockfile) + (BB> "released lockfile: " lockfile) + (when (common:file-exists? lockfile) + (BB> "DID NOT ACTUALLY RELEASE LOCKFILE")) + res2) ;; end let + );; end begin + ;; else + (cond + (persist-until-sync + (thread-sleep! 1) + (debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] pid="(current-process-id)" other SYNC in progress; we're in a fork-to-background so we need to succeed. Let's wait a jiffy and and try again. num-tries="num-tries" (waiting for lockfile="lockfile" to disappear)") + (retry-loop (add1 num-tries))) + (else + (thread-sleep! (max off-time (+ last-sync-seconds min-intersync-delay))) + (debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] pid="(current-process-id)" other SYNC in progress; not syncing.") + 'parallel-sync-in-progress)) + ) ;; end if got lockfile + ) + )) + (BB> "End do-a-sync with fork-to-background="fork-to-background" persist-until-sync="persist-until-sync" and result="finalres) + finalres) + ) ;; end lambda + )) + do-a-sync)) + +(define (server:writable-watchdog dbstruct) + (thread-sleep! 10) ;; delay for startup + (let* ((do-a-sync (server:get-bruteforce-syncer dbstruct)) + (final-sync (server:get-bruteforce-syncer dbstruct fork-to-background: #t persist-until-sync: #t))) + (when (and (not (args:get-arg "-sync-to-megatest.db")) ;; conditions under which we do not run the sync + (args:get-arg "-server")) + + (let loop () + (do-a-sync) + (if (not *time-to-exit*) (loop))) ;; keep going unless time to exit + + ;; time to exit, close the no-sync db here + (final-sync) + + (if (common:low-noise-print 30) + (debug:print-info 0 *default-log-port* "Exiting watchdog timer, *time-to-exit* = " *time-to-exit*" pid="(current-process-id) + ))))) +