Index: common.scm ================================================================== --- common.scm +++ common.scm @@ -357,17 +357,17 @@ (cond ((eq? 0 res) #t) (else (set! ok-flag #f) - (debug:print 0 *default-log-port* "ERROR: Command failed with exit code " + (debug:print 0 *default-log-port* "ERROR: ["(common:human-time)"] Command failed with exit code " (if (< res 0) res (/ res 8)) " ["cmd"]" ) #f)))) (else - (debug:print 0 *default-log-port* "ERROR: Nor runnining command due to prior error. ["cmd"]") + (debug:print 0 *default-log-port* "ERROR: ["(common:human-time)"] Not runnining command due to prior error. ["cmd"]") #f)))) (copy (lambda (src dest) (docmd (conc "/bin/cp '"src"' '"dest"'")))) (copy+zip (lambda (src dest) (docmd (conc "gzip -c - < '"src"' > '"dest"'")))) (fullpath (realpath filepath)) (basedir (pathname-directory fullpath)) @@ -831,15 +831,19 @@ (define *wdnum* 0) (define *wdnum*mutex (make-mutex)) + + +(define (common:human-time) + (time->string (seconds->local-time (current-seconds)) "%Y-%m-%d %H:%M:%S")) + + ;; currently the primary job of the watchdog is to run the sync back to megatest.db from the db in /tmp ;; if we are on the homehost and we are a server (by definition we are on the homehost if we are a server) ;; - - (define (common:readonly-watchdog dbstruct) (thread-sleep! 0.05) ;; delay for startup (debug:print-info 13 *default-log-port* "common:readonly-watchdog entered.") ;; sync megatest.db to /tmp/.../megatst.db (let* ((sync-cool-off-duration 3) @@ -1242,16 +1246,16 @@ (handle-exceptions exn (if (> trynum 0) (let ((delay-time (* (- 5 trynum) 5))) (mutex-unlock! *homehost-mutex*) - (debug:print 0 *default-log-port* "ERROR: Failed to read .homehost file, delaying " delay-time " seconds and trying again, message: " ((condition-property-accessor 'exn 'message) exn)) + (debug:print 0 *default-log-port* "ERROR: ["(common:human-time)"] Failed to read .homehost file, delaying " delay-time " seconds and trying again, message: " ((condition-property-accessor 'exn 'message) exn)) (thread-sleep! delay-time) (common:get-homehost trynum: (- trynum 1))) (begin (mutex-unlock! *homehost-mutex*) - (debug:print 0 *default-log-port* "ERROR: Failed to read .homehost file after trying five times. Giving up and exiting, message: " ((condition-property-accessor 'exn 'message) exn)) + (debug:print 0 *default-log-port* "ERROR: ["(common:human-time)"] Failed to read .homehost file after trying five times. Giving up and exiting, message: " ((condition-property-accessor 'exn 'message) exn)) (exit 1))) (let ((hhf (conc *toppath* "/.homehost"))) (if (common:file-exists? hhf) (with-input-from-file hhf read-line) (if (file-write-access? *toppath*) @@ -2768,11 +2772,11 @@ command: command host-port: host-port params: params))) (queue-push cmddat) ;; put request into the queue (nn-send soc "queued")) ;; reply with "queued" - (print "ERROR: BAD request " dat)) + (print "ERROR: ["(common:human-time)"] BAD request " dat)) (loop (nn-recv soc))))) (nn-close soc))) Index: server.scm ================================================================== --- server.scm +++ server.scm @@ -516,26 +516,33 @@ (min-intersync-delay (configf:lookup-number *configdat* "server" "minimum-intersync-delay" default: 30))) (if (and (not (args:get-arg "-sync-to-megatest.db")) ;; conditions under which we do not run the sync (args:get-arg "-server")) (let loop () (thread-sleep! min-intersync-delay) - (if (not (common:file-exists? lockfile)) + (if (common:simple-file-lock lockfile) (begin + (if (not (configf:lookup *configdat* "server" "disable-db-snapshot")) (common:snapshot-file mtdbfile subdir: ".db-snapshot")) (delete-file* staging-file) (let ((start-time (current-milliseconds)) (res (system sync-cmd))) (cond ((eq? 0 res) (delete-file* (conc mtdbfile ".backup")) (system (conc "/bin/mv " staging-file " " mtdbfile)) - (debug:print 1 *default-log-port* "INFO: SYNC took "(/ (- (current-milliseconds) start-time))" sec") + (debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] SYNC took "(/ (- (current-milliseconds) start-time))" sec") #t) (else - (debug:print 0 *default-log-port* "ERROR: Sync failed. See log at "sync-log) - (system (conc "mv "mtdbfile ".backup" mtdbfile))))))) + (system (conc "/bin/cp "sync-log" "sync-log".fail")) + (debug:print 0 *default-log-port* "ERROR: ["(common:human-time)"] Sync failed. See log at "sync-log".fail") + (if (file-exists? (conc mtdbfile ".backup")) + (system (conc "/bin/cp "mtdbfile ".backup " mtdbfile))))) + (common:simple-file-release-lock lockfile))) + ;; else + (debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] other sync in progres; not syncing.") + ) ;; end if got lockfile ;; keep going unless time to exit ;; (if (not *time-to-exit*) (let delay-loop ((count 0))