@@ -357,14 +357,15 @@ (define (http-transport:sync-inmemdb-to-db tdbdat server-state run-id server-id bad-sync-count) (if *inmemdb* (let ((start-time (current-milliseconds)) (sync-time #f) (rem-time #f) - (sync-retry #f)) + (sync-retry #f) + (sync-touched (db:sync-touched *inmemdb* *run-id* force-sync: #t))) ;; inmemdb is a dbstruct - (condition-case - (db:sync-touched *inmemdb* *run-id* force-sync: #t) + (condition-case sync-touched + ((sync-failed)(cond ((> bad-sync-count 10) ;; time to give up (http-transport:server-shutdown server-id port)) (else ;; (> bad-sync-count 0) ;; we've had a fail or two, delay and loop (thread-sleep! 5) @@ -372,41 +373,42 @@ ((exn) (debug:print 0 "ERROR: error from sync code other than 'sync-failed. Attempting to gracefully shutdown the server") (tasks:server-delete-record (db:delay-if-busy tdbdat) server-id " http-transport:keep-running crashed") (exit))) (if sync-retry - #t ; return true - retry + (begin + #t) ; return true - retry (begin (set! sync-time (- (current-milliseconds) start-time)) (set! rem-time (quotient (- 4000 sync-time) 1000)) (debug:print 4 "SYNC: time= " sync-time ", rem-time=" rem-time) (if (and (<= rem-time 4) (> rem-time 0)) (thread-sleep! rem-time) - (thread-sleep! 4)) ;; fallback for if the math is changed ... - - ;; - ;; no *inmemdb* yet, set running after our first pass through and start the db - ;; - (if (eq? server-state 'available) - (let ((new-server-id (tasks:server-am-i-the-server? (db:delay-if-busy tdbdat) run-id))) ;; try to ensure no double registering of servers - (if (equal? new-server-id server-id) - (begin - (tasks:server-set-state! (db:delay-if-busy tdbdat) server-id "dbprep") - (thread-sleep! 0.5) ;; give some margin for queries to complete before switching from file based access to server based access - (set! *inmemdb* (db:setup run-id)) - ;; force initialization - ;; (db:get-db *inmemdb* #t) - (db:get-db *inmemdb* run-id) - (tasks:server-set-state! (db:delay-if-busy tdbdat) server-id "running")) - (begin ;; gotta exit nicely - (tasks:server-set-state! (db:delay-if-busy tdbdat) server-id "collision") - (http-transport:server-shutdown server-id port))))) - #f))) ; return #f - don't retry - #f)) ; return #f - don't retry since there is no inmemdb - + (thread-sleep! 4)))) + #f) ;; fallback for if the math is changed ... + + ;; + ;; no *inmemdb* yet, set running after our first pass through and start the db + ;; + (begin + (if (eq? server-state 'available) + (let ((new-server-id (tasks:server-am-i-the-server? (db:delay-if-busy tdbdat) run-id))) ;; try to ensure no double registering of servers + (if (equal? new-server-id server-id) + (begin + (tasks:server-set-state! (db:delay-if-busy tdbdat) server-id "dbprep") + (thread-sleep! 0.5) ;; give some margin for queries to complete before switching from file based access to server based access + (set! *inmemdb* (db:setup run-id)) + ;; force initialization + ;; (db:get-db *inmemdb* #t) + (db:get-db *inmemdb* run-id) + (tasks:server-set-state! (db:delay-if-busy tdbdat) server-id "running")) + (begin ;; gotta exit nicely + (tasks:server-set-state! (db:delay-if-busy tdbdat) server-id "collision") + (http-transport:server-shutdown server-id port))))))) + #f) ;;; factored out of http-transport:keep-running (define (http-transport:get-server-info tdbdat server-start-time server-id run-id) (let loop ((start-time (current-seconds)) (changed #t) @@ -417,15 +419,15 @@ (mutex-lock! *heartbeat-mutex*) (set! sdat *server-info*) (mutex-unlock! *heartbeat-mutex*) (if (and sdat (not changed) - (> (- (current-seconds) start-time) 2)) + (> (- (current-seconds) start-time) (- (tasks:update-pause-seconds) 1) )) sdat (begin - (debug:print-info 0 "Still waiting, last-sdat=" last-sdat) - (sleep 4) + (debug:print-info 0 "Still waiting, sdat="sdat" last-sdat=" last-sdat) + (sleep (tasks:update-pause-seconds)) (if (> (- (current-seconds) start-time) 120) ;; been waiting for two minutes (begin (debug:print 0 "ERROR: transport appears to have died, exiting server " server-id " for run " run-id) (tasks:server-delete-record (db:delay-if-busy tdbdat) server-id "failed to start, never received server alive signature") (exit))