Index: common.scm ================================================================== --- common.scm +++ common.scm @@ -337,10 +337,92 @@ ;; '()) ) (if (common:api-changed?) (common:set-last-run-version))) +(define (common:snapshot-file filepath #!key (subdir ".") ) + (if (file-exists? filepath) + (let* ((age-sec (lambda (file) + (if (file-exists? file) + (- (current-seconds) (file-modification-time file)) + 1000000000))) ;; return really old value if file doesn't exist. we want to clobber it if old or not exist. + (ok-flag #t) + (age-mins (lambda (file) (/ (age-sec file) 60))) + (age-hrs (lambda (file) (/ (age-mins file) 60))) + (age-days (lambda (file) (/ (age-hrs file) 24))) + (age-wks (lambda (file) (/ (age-days file) 7))) + (docmd (lambda (cmd) + (cond + (ok-flag + (let ((res (system cmd))) + (cond + ((eq? 0 res) + #t) + (else + (set! ok-flag #f) + (debug:print 0 *default-log-port* "ERROR: ["(common:human-time)"] Command failed with exit code " + (if (< res 0) + res + (/ res 8)) " ["cmd"]" ) + #f)))) + (else + (debug:print 0 *default-log-port* "ERROR: ["(common:human-time)"] Not runnining command due to prior error. ["cmd"]") + #f)))) + (copy (lambda (src dest) (docmd (conc "/bin/cp '"src"' '"dest"'")))) + (copy+zip (lambda (src dest) (docmd (conc "gzip -c - < '"src"' > '"dest"'")))) + (fullpath (realpath filepath)) + (basedir (pathname-directory fullpath)) + (basefile (pathname-strip-directory fullpath)) + ;;(prevfile (conc filepath ".prev.gz")) + (minsfile (conc basedir "/" subdir "/" basefile ".mins.gz")) + (hrsfile (conc basedir "/" subdir "/" basefile ".hrs.gz")) + (daysfile (conc basedir "/" subdir "/" basefile ".days.gz")) + (wksfile (conc basedir "/" subdir "/" basefile ".weeks.gz"))) + + ;; create subdir it not exists + (if (not (directory-exists? (conc basedir "/" subdir))) + (docmd (conc "/bin/mkdir -p '"(conc basedir "/" subdir)"'"))) + + ;; copy&zip to .mins if not exists + (if (not (file-exists? minsfile)) + (copy+zip filepath minsfile)) + ;; copy .mins to .hrs if not exists + (if (not (file-exists? hrsfile)) + (copy minsfile hrsfile)) + ;; copy .hrs to .days if not exists + (if (not (file-exists? daysfile)) + (copy hrsfile daysfile)) + ;; copy .days to .weeks if not exists + (if (not (file-exists? wksfile)) + (copy daysfile wksfile)) + + + ;; if age(.mins.gz) >= 1h: + ;; copy .mins.gz .hrs.gz + ;; copy .mins.gz + (when (>= (age-mins minsfile) 1) + (copy minsfile hrsfile) + (copy+zip filepath minsfile)) + + ;; if age(.hrs.gz) >= 1d: + ;; copy .hrs.gz .days.gz + ;; copy .mins.gz .hrs.gz + (when (>= (age-days hrsfile) 1) + (copy hrsfile daysfile) + (copy minsfile hrsfile)) + + ;; if age(.days.gz) >= 1w: + ;; copy .days.gz .weeks.gz + ;; copy .hrs.gz .days.gz + (when (>= (age-wks daysfile) 1) + (copy daysfile wksfile) + (copy hrsfile daysfile)) + #t) + #f)) + + + ;; Rotate logs, logic: ;; if > 500k and older than 1 week: ;; remove previous compressed log and compress this log ;; WARNING: This proc operates assuming that it is in the directory above the ;; logs directory you wish to log-rotate. @@ -749,15 +831,19 @@ (define *wdnum* 0) (define *wdnum*mutex (make-mutex)) + + +(define (common:human-time) + (time->string (seconds->local-time (current-seconds)) "%Y-%m-%d %H:%M:%S")) + + ;; currently the primary job of the watchdog is to run the sync back to megatest.db from the db in /tmp ;; if we are on the homehost and we are a server (by definition we are on the homehost if we are a server) ;; - - (define (common:readonly-watchdog dbstruct) (thread-sleep! 0.05) ;; delay for startup (debug:print-info 13 *default-log-port* "common:readonly-watchdog entered.") ;; sync megatest.db to /tmp/.../megatst.db (let* ((sync-cool-off-duration 3) @@ -1160,16 +1246,16 @@ (handle-exceptions exn (if (> trynum 0) (let ((delay-time (* (- 5 trynum) 5))) (mutex-unlock! *homehost-mutex*) - (debug:print 0 *default-log-port* "ERROR: Failed to read .homehost file, delaying " delay-time " seconds and trying again, message: " ((condition-property-accessor 'exn 'message) exn)) + (debug:print 0 *default-log-port* "ERROR: ["(common:human-time)"] Failed to read .homehost file, delaying " delay-time " seconds and trying again, message: " ((condition-property-accessor 'exn 'message) exn)) (thread-sleep! delay-time) (common:get-homehost trynum: (- trynum 1))) (begin (mutex-unlock! *homehost-mutex*) - (debug:print 0 *default-log-port* "ERROR: Failed to read .homehost file after trying five times. Giving up and exiting, message: " ((condition-property-accessor 'exn 'message) exn)) + (debug:print 0 *default-log-port* "ERROR: ["(common:human-time)"] Failed to read .homehost file after trying five times. Giving up and exiting, message: " ((condition-property-accessor 'exn 'message) exn)) (exit 1))) (let ((hhf (conc *toppath* "/.homehost"))) (if (common:file-exists? hhf) (with-input-from-file hhf read-line) (if (file-write-access? *toppath*) @@ -2686,11 +2772,11 @@ command: command host-port: host-port params: params))) (queue-push cmddat) ;; put request into the queue (nn-send soc "queued")) ;; reply with "queued" - (print "ERROR: BAD request " dat)) + (print "ERROR: ["(common:human-time)"] BAD request " dat)) (loop (nn-recv soc))))) (nn-close soc))) Index: megatest-version.scm ================================================================== --- megatest-version.scm +++ megatest-version.scm @@ -18,6 +18,6 @@ ;; Always use two or four digit decimal ;; 1.01, 1.02...1.10,1.11,1.1101 ... 1.99,2.00.. (declare (unit megatest-version)) -(define megatest-version 1.6523) +(define megatest-version 1.6524) Index: megatest.scm ================================================================== --- megatest.scm +++ megatest.scm @@ -343,10 +343,11 @@ "-run-id" "-ping" "-refdb2dat" "-o" "-log" + "-sync-log" "-since" "-fields" "-recover-test" ;; run-id,test-id - used internally to recover a test stuck in RUNNING state "-sort" "-target-db" @@ -2293,14 +2294,24 @@ ;; 'new2old ) (set! *didsomething* #t))) (if (args:get-arg "-sync-to-megatest.db") - (let ((res (db:multi-db-sync - (db:setup #f) - 'new2old))) - (print "Synced " res " records to megatest.db") + (let* ((dbstruct (db:setup #f)) + (tmpdbpth (cdr (dbr:dbstruct-tmpdb dbstruct))) + (lockfile (conc tmpdbpth ".lock")) + (locked (common:simple-file-lock lockfile)) + (res (if locked + (db:multi-db-sync + dbstruct + 'new2old) + #f))) + (if res + (begin + (common:simple-file-release-lock lockfile) + (print "Synced " res " records to megatest.db")) + (print "Skipping sync, there is a sync in progress.")) (set! *didsomething* #t))) (if (args:get-arg "-sync-to") (let ((toppath (launch:setup))) (tasks:sync-to-postgres *configdat* (args:get-arg "-sync-to")) Index: server.scm ================================================================== --- server.scm +++ server.scm @@ -482,164 +482,223 @@ (string-intersperse (map number->string (u8vector->list (if res res (hostname->ip hostname)))) "."))) -(define server:sync-lock-token "SERVER_SYNC_LOCK") -(define (server:release-sync-lock) - (db:no-sync-del! *no-sync-db* server:sync-lock-token)) -(define (server:have-sync-lock?) - (let* ((have-lock-pair (db:no-sync-get-lock *no-sync-db* server:sync-lock-token)) - (have-lock? (car have-lock-pair)) - (lock-time (cdr have-lock-pair)) - (lock-age (- (current-seconds) lock-time))) - (cond - (have-lock? #t) - ((>lock-age - (* 3 (configf:lookup-number *configdat* "server" "minimum-intersync-delay" default: 180))) - (server:release-sync-lock) - (server:have-sync-lock?)) - (else #f)))) +;; (define server:sync-lock-token "SERVER_SYNC_LOCK") +;; (define (server:release-sync-lock) +;; (db:no-sync-del! *no-sync-db* server:sync-lock-token)) +;; (define (server:have-sync-lock?) +;; (let* ((have-lock-pair (db:no-sync-get-lock *no-sync-db* server:sync-lock-token)) +;; (have-lock? (car have-lock-pair)) +;; (lock-time (cdr have-lock-pair)) +;; (lock-age (- (current-seconds) lock-time))) +;; (cond +;; (have-lock? #t) +;; ((>lock-age +;; (* 3 (configf:lookup-number *configdat* "server" "minimum-intersync-delay" default: 180))) +;; (server:release-sync-lock) +;; (server:have-sync-lock?)) +;; (else #f)))) ;; moving this here as it needs access to db and cannot be in common. ;; (define (server:writable-watchdog dbstruct) - (thread-sleep! 0.05) ;; delay for startup - (let ((legacy-sync (common:run-sync?)) - (sync-stale-seconds (configf:lookup-number *configdat* "server" "sync-stale-seconds" default: 300)) - (debug-mode (debug:debug-mode 1)) - (last-time (current-seconds)) - (no-sync-db (db:open-no-sync-db)) - (sync-duration 0) ;; run time of the sync in milliseconds - ;;(this-wd-num (begin (mutex-lock! *wdnum*mutex) (let ((x *wdnum*)) (set! *wdnum* (add1 *wdnum*)) (mutex-unlock! *wdnum*mutex) x))) - ) - (set! *no-sync-db* no-sync-db) ;; make the no sync db available to api calls - (debug:print-info 2 *default-log-port* "Periodic sync thread started.") - (debug:print-info 3 *default-log-port* "watchdog starting. legacy-sync is " legacy-sync" pid="(current-process-id) );; " this-wd-num="this-wd-num) - (if (and legacy-sync (not *time-to-exit*)) - (let* (;;(dbstruct (db:setup)) - (mtdb (dbr:dbstruct-mtdb dbstruct)) - (mtpath (db:dbdat-get-path mtdb)) - (tmp-area (common:get-db-tmp-area)) - (start-file (conc tmp-area "/.start-sync")) - (end-file (conc tmp-area "/.end-sync"))) - (debug:print-info 0 *default-log-port* "Server running, periodic sync started.") - (let loop () - ;; sync for filesystem local db writes - ;; - (mutex-lock! *db-multi-sync-mutex*) - (let* ((need-sync (>= *db-last-access* *db-last-sync*)) ;; no sync since last write - (sync-in-progress *db-sync-in-progress*) - - (min-intersync-delay (configf:lookup-number *configdat* "server" "minimum-intersync-delay" default: 5)) - (should-sync (and (not *time-to-exit*) - (> (- (current-seconds) *db-last-sync*) min-intersync-delay))) ;; sync every five seconds minimum, deprecated logic, can probably be removed - (start-time (current-seconds)) - (cpu-load-adj (alist-ref 'adj-proc-load (common:get-normalized-cpu-load #f))) - (mt-mod-time (file-modification-time mtpath)) - (last-sync-start (if (common:file-exists? start-file) - (file-modification-time start-file) - 0)) - (last-sync-end (if (common:file-exists? end-file) - (file-modification-time end-file) - 10)) - (sync-period (+ 3 (* cpu-load-adj 30))) ;; as adjusted load increases increase the sync period - (recently-synced (and (< (- start-time mt-mod-time) sync-period) ;; not useful if sync didn't modify megatest.db! - (< mt-mod-time last-sync-start))) - (sync-done (<= last-sync-start last-sync-end)) - (sync-stale (> start-time (+ last-sync-start sync-stale-seconds))) - (will-sync-pre (and (not *time-to-exit*) ;; do not start a sync if we are in the process of exiting - have-lock? - (or need-sync should-sync) - (or sync-done sync-stale) - (not sync-in-progress) - (not recently-synced))) - (will-sync (if will-sync-pre - ;; delay get lock until we decide to sync - (server:have-sync-lock?) - #f))) - ;; if another server is syncing, postpone sync - (if (and will-sync-pre (not will-sync)) - (set! *db-last-sync* start-time)) - (debug:print-info 13 *default-log-port* "WD writable-watchdog top of loop. need-sync="need-sync" sync-in-progress=" sync-in-progress - " should-sync="should-sync" start-time="start-time" mt-mod-time="mt-mod-time" recently-synced="recently-synced" will-sync="will-sync - " sync-done=" sync-done " sync-period=" sync-period) - (if (and (> sync-period 5) - (common:low-noise-print 30 "sync-period")) - (debug:print-info 0 *default-log-port* "Increased sync period due to long sync times, sync took: " sync-period " seconds.")) - ;; (if recently-synced (debug:print-info 0 *default-log-port* "Skipping sync due to recently-synced flag=" recently-synced)) - ;; (debug:print-info 0 *default-log-port* "need-sync: " need-sync " sync-in-progress: " sync-in-progress " should-sync: " should-sync " will-sync: " will-sync) - (if will-sync (set! *db-sync-in-progress* #t)) - (mutex-unlock! *db-multi-sync-mutex*) - (if will-sync - (let (;; (max-sync-duration (configf:lookup-number *configdat* "server" "max-sync-duration")) ;; KEEPING THIS AVAILABLE BUT SHOULD NOT USE, I'M PRETTY SURE IT DOES NOT WORK! - (sync-start (current-milliseconds))) - (with-output-to-file start-file (lambda ()(print (current-process-id)))) - - ;; put lock here - - ;; (if (or (not max-sync-duration) - ;; (< sync-duration max-sync-duration)) ;; NOTE: db:sync-to-megatest.db keeps track of time of last sync and syncs incrementally - (let ((res (db:sync-to-megatest.db dbstruct no-sync-db: no-sync-db))) ;; did we sync any data? If so need to set the db touched flag to keep the server alive - (set! sync-duration (- (current-milliseconds) sync-start)) - (if (> res 0) ;; some records were transferred, keep the db alive - (begin - (mutex-lock! *heartbeat-mutex*) - (set! *db-last-access* (current-seconds)) - (mutex-unlock! *heartbeat-mutex*) - (debug:print-info 0 *default-log-port* "sync called, " res " records transferred.")) - (debug:print-info 2 *default-log-port* "sync called but zero records transferred"))))) -;; ;; TODO: factor this next routine out into a function -;; (with-input-from-pipe ;; this should not block other threads but need to verify this -;; (conc "megatest -sync-to-megatest.db -m testsuite:" (common:get-area-name) ":" *toppath*) -;; (lambda () -;; (let loop ((inl (read-line)) -;; (res #f)) -;; (if (eof-object? inl) -;; (begin -;; (set! sync-duration (- (current-milliseconds) sync-start)) -;; (cond -;; ((not res) -;; (debug:print 0 *default-log-port* "ERROR: sync from /tmp db to megatest.db appears to have failed. Recommended that you stop your runs and run \"megatest -cleanup-db\"")) -;; ((> res 0) -;; (mutex-lock! *heartbeat-mutex*) -;; (set! *db-last-access* (current-seconds)) -;; (mutex-unlock! *heartbeat-mutex*)))) -;; (let ((num-synced (let ((matches (string-match "^Synced (\\d+).*$" inl))) -;; (if matches -;; (string->number (cadr matches)) -;; #f)))) -;; (loop (read-line) -;; (or num-synced res)))))))))) - (if will-sync - (begin - (mutex-lock! *db-multi-sync-mutex*) - (set! *db-sync-in-progress* #f) - (set! *db-last-sync* start-time) - (with-output-to-file end-file (lambda ()(print (current-process-id)))) - - ;; release lock here - (server:release-sync-lock) - (mutex-unlock! *db-multi-sync-mutex*))) - (if (and debug-mode - (> (- start-time last-time) 60)) - (begin - (set! last-time start-time) - (debug:print-info 4 *default-log-port* "timestamp -> " (seconds->time-string (current-seconds)) ", time since start -> " (seconds->hr-min-sec (- (current-seconds) *time-zero*)))))) - - ;; keep going unless time to exit - ;; - (if (not *time-to-exit*) - (let delay-loop ((count 0)) - ;;(debug:print-info 13 *default-log-port* "delay-loop top; count="count" pid="(current-process-id)" this-wd-num="this-wd-num" *time-to-exit*="*time-to-exit*) - - (if (and (not *time-to-exit*) - (< count 6)) ;; was 11, changing to 4. - (begin - (thread-sleep! 1) - (delay-loop (+ count 1)))) - (if (not *time-to-exit*) (loop)))) - ;; time to exit, close the no-sync db here - (db:no-sync-close-db no-sync-db) - (if (common:low-noise-print 30) - (debug:print-info 0 *default-log-port* "Exiting watchdog timer, *time-to-exit* = " *time-to-exit*" pid="(current-process-id) ))))))) ;;" this-wd-num="this-wd-num))))))) + (thread-sleep! 10) ;; delay for startup + (let* ((legacy-sync (common:run-sync?)) + (sqlite-exe (or (get-environment-variable "MT_SQLITE3_EXE"))) ;; defined in cfg.sh + (sync-log (or (args:get-arg "-sync-log") (conc *toppath* "/logs/sync-" (current-process-id) "-" (get-host-name) ".log"))) + (tmp-area (common:get-db-tmp-area)) + (tmp-db (conc tmp-area "/megatest.db")) + (staging-file (conc *toppath* "/.megatest.db")) + (mtdbfile (conc *toppath* "/megatest.db")) + (lockfile (conc tmp-db ".lock")) + (sync-cmd (conc sqlite-exe" " tmp-db " .dump | "sqlite-exe" " staging-file "&>"sync-log)) + (min-intersync-delay (configf:lookup-number *configdat* "server" "minimum-intersync-delay" default: 30))) + (if (and (not (args:get-arg "-sync-to-megatest.db")) ;; conditions under which we do not run the sync + (args:get-arg "-server")) + (let loop () + (debug:print 0 *default-log-port* "INFO: syncer thread sleeping for server.minimum-intersync-delay seconds ["min-intersync-delay"]") + (thread-sleep! min-intersync-delay) + (if (common:simple-file-lock lockfile) + (begin + + (if (not (configf:lookup *configdat* "server" "disable-db-snapshot")) + (common:snapshot-file mtdbfile subdir: ".db-snapshot")) + (delete-file* staging-file) + (let* ((start-time (current-milliseconds)) + (res (system sync-cmd))) + (cond + ((eq? 0 res) + (delete-file* (conc mtdbfile ".backup")) + (system (conc "/bin/mv " staging-file " " mtdbfile)) + (debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] pid="(current-process-id)" SYNC took "(/ (- (current-milliseconds) start-time) 1000)" sec") + #t) + (else + (system (conc "/bin/cp "sync-log" "sync-log".fail")) + (debug:print 0 *default-log-port* "ERROR: ["(common:human-time)"] Sync failed. See log at "sync-log".fail") + (if (file-exists? (conc mtdbfile ".backup")) + (system (conc "/bin/cp "mtdbfile ".backup " mtdbfile))))) + (common:simple-file-release-lock lockfile))) + ;; else + (debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] pid="(current-process-id)" other SYNC in progress; not syncing.") + ) ;; end if got lockfile + + ;; keep going unless time to exit + ;; + (if (not *time-to-exit*) + (let delay-loop ((count 0)) + ;;(debug:print-info 13 *default-log-port* "delay-loop top; count="count" pid="(current-process-id)" this-wd-num="this-wd-num" *time-to-exit*="*time-to-exit*) + + (if (and (not *time-to-exit*) + (< count 6)) ;; was 11, changing to 4. + (begin + (thread-sleep! 1) + (delay-loop (+ count 1)))) + (if (not *time-to-exit*) (loop)))) + ;; time to exit, close the no-sync db here + ;; (db:no-sync-close-db no-sync-db) + (if (common:low-noise-print 30) + (debug:print-info 0 *default-log-port* "Exiting watchdog timer, *time-to-exit* = " *time-to-exit*" pid="(current-process-id) + )))))) + +;; (let ((legacy-sync (common:run-sync?))) +;; (sync-stale-seconds (configf:lookup-number *configdat* "server" "sync-stale-seconds" default: 300)) +;; (debug-mode (debug:debug-mode 1)) +;; (last-time (current-seconds)) +;; (no-sync-db (db:open-no-sync-db)) +;; (sync-duration 0) ;; run time of the sync in milliseconds +;; ;;(this-wd-num (begin (mutex-lock! *wdnum*mutex) (let ((x *wdnum*)) (set! *wdnum* (add1 *wdnum*)) (mutex-unlock! *wdnum*mutex) x))) +;; ) +;; (set! *no-sync-db* no-sync-db) ;; make the no sync db available to api calls +;; (debug:print-info 2 *default-log-port* "Periodic sync thread started.") +;; (debug:print-info 3 *default-log-port* "watchdog starting. legacy-sync is " legacy-sync" pid="(current-process-id) );; " this-wd-num="this-wd-num) +;; (if (and legacy-sync (not *time-to-exit*)) +;; (let* (;;(dbstruct (db:setup)) +;; (mtdb (dbr:dbstruct-mtdb dbstruct)) +;; (mtpath (db:dbdat-get-path mtdb)) +;; (tmp-area (common:get-db-tmp-area)) +;; (lockfile (conc tmp-area "/megatest.db.lock")) +;; (start-file (conc tmp-area "/.start-sync")) +;; (end-file (conc tmp-area "/.end-sync"))) +;; (debug:print-info 0 *default-log-port* "Server running, periodic sync started.") +;; (let loop () +;; ;; sync for filesystem local db writes +;; ;; +;; +;; (mutex-lock! *db-multi-sync-mutex*) +;; (let* ((need-sync (>= *db-last-access* *db-last-sync*)) ;; no sync since last write +;; (sync-in-progress *db-sync-in-progress*) +;; +;; (min-intersync-delay (configf:lookup-number *configdat* "server" "minimum-intersync-delay" default: 5)) +;; (should-sync (and (not *time-to-exit*) +;; (> (- (current-seconds) *db-last-sync*) min-intersync-delay))) ;; sync every five seconds minimum, deprecated logic, can probably be removed +;; (start-time (current-seconds)) +;; (cpu-load-adj (alist-ref 'adj-proc-load (common:get-normalized-cpu-load #f))) +;; (mt-mod-time (file-modification-time mtpath)) +;; (last-sync-start (if (common:file-exists? start-file) +;; (file-modification-time start-file) +;; 0)) +;; (last-sync-end (if (common:file-exists? end-file) +;; (file-modification-time end-file) +;; 10)) +;; (sync-period (+ 3 (* cpu-load-adj 30))) ;; as adjusted load increases increase the sync period +;; (recently-synced (and (< (- start-time mt-mod-time) sync-period) ;; not useful if sync didn't modify megatest.db! +;; (< mt-mod-time last-sync-start))) +;; (sync-done (<= last-sync-start last-sync-end)) +;; (sync-stale (> start-time (+ last-sync-start sync-stale-seconds))) +;; (will-sync-pre (and (not *time-to-exit*) ;; do not start a sync if we are in the process of exiting +;; have-lock? +;; (or need-sync should-sync) +;; (or sync-done sync-stale) +;; (not sync-in-progress) +;; (not recently-synced))) +;; (will-sync (if will-sync-pre +;; ;; delay get lock until we decide to sync +;; #t ;; (server:have-sync-lock?) +;; #f))) +;; ;; if another server is syncing, postpone sync +;; (if (and will-sync-pre (not will-sync)) +;; (set! *db-last-sync* start-time)) +;; (debug:print-info 13 *default-log-port* "WD writable-watchdog top of loop. need-sync="need-sync" sync-in-progress=" sync-in-progress +;; " should-sync="should-sync" start-time="start-time" mt-mod-time="mt-mod-time" recently-synced="recently-synced" will-sync="will-sync +;; " sync-done=" sync-done " sync-period=" sync-period) +;; (if (and (> sync-period 5) +;; (common:low-noise-print 30 "sync-period")) +;; (debug:print-info 0 *default-log-port* "Increased sync period due to long sync times, sync took: " sync-period " seconds.")) +;; ;; (if recently-synced (debug:print-info 0 *default-log-port* "Skipping sync due to recently-synced flag=" recently-synced)) +;; ;; (debug:print-info 0 *default-log-port* "need-sync: " need-sync " sync-in-progress: " sync-in-progress " should-sync: " should-sync " will-sync: " will-sync) +;; (if will-sync (set! *db-sync-in-progress* #t)) +;; (mutex-unlock! *db-multi-sync-mutex*) +;; (if will-sync +;; (let (;; (max-sync-duration (configf:lookup-number *configdat* "server" "max-sync-duration")) ;; KEEPING THIS AVAILABLE BUT SHOULD NOT USE, I'M PRETTY SURE IT DOES NOT WORK! +;; (sync-start (current-milliseconds))) +;; (with-output-to-file start-file (lambda ()(print (current-process-id)))) +;; +;; ;; put lock here +;; +;; ;; (if (or (not max-sync-duration) +;; ;; (< sync-duration max-sync-duration)) ;; NOTE: db:sync-to-megatest.db keeps track of time of last sync and syncs incrementally +;; (let ((res (db:sync-to-megatest.db dbstruct no-sync-db: no-sync-db))) ;; did we sync any data? If so need to set the db touched flag to keep the server alive +;; (set! sync-duration (- (current-milliseconds) sync-start)) +;; (if (> res 0) ;; some records were transferred, keep the db alive +;; (begin +;; (mutex-lock! *heartbeat-mutex*) +;; (set! *db-last-access* (current-seconds)) +;; (mutex-unlock! *heartbeat-mutex*) +;; (debug:print-info 0 *default-log-port* "sync called, " res " records transferred.")) +;; (debug:print-info 2 *default-log-port* "sync called but zero records transferred"))))) +;; ;; ;; TODO: factor this next routine out into a function +;; ;; (with-input-from-pipe ;; this should not block other threads but need to verify this +;; ;; (conc "megatest -sync-to-megatest.db -m testsuite:" (common:get-area-name) ":" *toppath*) +;; ;; (lambda () +;; ;; (let loop ((inl (read-line)) +;; ;; (res #f)) +;; ;; (if (eof-object? inl) +;; ;; (begin +;; ;; (set! sync-duration (- (current-milliseconds) sync-start)) +;; ;; (cond +;; ;; ((not res) +;; ;; (debug:print 0 *default-log-port* "ERROR: sync from /tmp db to megatest.db appears to have failed. Recommended that you stop your runs and run \"megatest -cleanup-db\"")) +;; ;; ((> res 0) +;; ;; (mutex-lock! *heartbeat-mutex*) +;; ;; (set! *db-last-access* (current-seconds)) +;; ;; (mutex-unlock! *heartbeat-mutex*)))) +;; ;; (let ((num-synced (let ((matches (string-match "^Synced (\\d+).*$" inl))) +;; ;; (if matches +;; ;; (string->number (cadr matches)) +;; ;; #f)))) +;; ;; (loop (read-line) +;; ;; (or num-synced res)))))))))) +;; (if will-sync +;; (begin +;; (mutex-lock! *db-multi-sync-mutex*) +;; (set! *db-sync-in-progress* #f) +;; (set! *db-last-sync* start-time) +;; (with-output-to-file end-file (lambda ()(print (current-process-id)))) +;; +;; ;; release lock here +;; ;; (server:release-sync-lock) +;; (mutex-unlock! *db-multi-sync-mutex*))) +;; (if (and debug-mode +;; (> (- start-time last-time) 60)) +;; (begin +;; (set! last-time start-time) +;; (debug:print-info 4 *default-log-port* "timestamp -> " (seconds->time-string (current-seconds)) ", time since start -> " (seconds->hr-min-sec (- (current-seconds) *time-zero*)))))) +;; +;; ;; keep going unless time to exit +;; ;; +;; (if (not *time-to-exit*) +;; (let delay-loop ((count 0)) +;; ;;(debug:print-info 13 *default-log-port* "delay-loop top; count="count" pid="(current-process-id)" this-wd-num="this-wd-num" *time-to-exit*="*time-to-exit*) +;; +;; (if (and (not *time-to-exit*) +;; (< count 6)) ;; was 11, changing to 4. +;; (begin +;; (thread-sleep! 1) +;; (delay-loop (+ count 1)))) +;; (if (not *time-to-exit*) (loop)))) +;; ;; time to exit, close the no-sync db here +;; (db:no-sync-close-db no-sync-db) +;; (if (common:low-noise-print 30) +;; (debug:print-info 0 *default-log-port* "Exiting watchdog timer, *time-to-exit* = " *time-to-exit*" pid="(current-process-id) ))))))) ;;" this-wd-num="this-wd-num))))))) Index: utils/mk_wrapper ================================================================== --- utils/mk_wrapper +++ utils/mk_wrapper @@ -19,10 +19,18 @@ prefix=$1 cmd=$2 target=$3 cfgfile="$prefix/bin/.$(lsb_release -sr)/cfg.sh" + +# we wish to create a var in cfg.sh for finding sqlite3 executable +chicken_bin_dir=$(dirname $(which csi)) +if [[ -e $chicken_bin_dir/sqlite3 ]];then + sqlite3_exe=$chicken_bin_dir/sqlite3 +else + sqlite3_exe=$(which sqlite3) +fi if [ "$LD_LIBRARY_PATH" != "" ];then echo "INFO: Using LD_LIBRARY_PATH=$LD_LIBRARY_PATH" >&2 ( cat << __EOF if [ -z \$MT_ORIG_ENV ]; then @@ -32,10 +40,12 @@ if [ "\$LD_LIBRARY_PATH" != "" ];then export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:\$LD_LIBRARY_PATH else export LD_LIBRARY_PATH=$LD_LIBRARY_PATH fi + +export MT_SQLITE3_EXE=$sqlite3_exe __EOF ) > $cfgfile echo else echo "INFO: LD_LIBRARY_PATH not set" >&2