Index: server.scm ================================================================== --- server.scm +++ server.scm @@ -202,11 +202,11 @@ (cadr (cddr dat)))))) (begin (if dbprep-found (begin (debug:print-info 0 *default-log-port* "Server is in dbprep at " (current-seconds)) - (thread-sleep! 25) + (thread-sleep! 0.5) ;; was 25 sec but that blocked things from starting? ) (debug:print-info 0 *default-log-port* "Unable to get server info from " logf " at " (current-seconds)) ) (list #f #f #f #f))))))))) @@ -405,28 +405,29 @@ (server:wait-for-server-start-last-flag areapath) (if (not (server:check-if-running areapath)) ;; why try if there is already a server running? (let* ((last-run-dat (hash-table-ref/default *server-kind-run* areapath '(0 0))) ;; callnum, whenrun (call-num (car last-run-dat)) (when-run (cadr last-run-dat)) - (run-delay (+ (case call-num + (run-delay (+ (case call-num ;; NOT USED. Waiting is handled by wait-for-server ((0) 0) ((1) 20) ((2) 300) (else 600)) - ;(random 5) + (random 5) 0)) ;; add a small random number just in case a lot of jobs hit the work hosts simultaneously (lock-file (conc areapath "/logs/server-start.lock"))) - (if (> (- (current-seconds) when-run) run-delay) - (let* ((start-flag (conc areapath "/logs/server-start-last"))) - (common:simple-file-lock-and-wait lock-file expire-time: 15) - (debug:print-info 0 *default-log-port* "server:kind-run: touching " start-flag) - (system (conc "touch " start-flag)) ;; lazy but safe - (server:run areapath) - (thread-sleep! 2) ;; don't release the lock for at least a few seconds - (common:simple-file-release-lock lock-file))) - (hash-table-set! *server-kind-run* areapath (list (+ call-num 1)(current-seconds)))))) + ;; (if (> (- (current-seconds) when-run) run-delay) + (let* ((start-flag (conc areapath "/logs/server-start-last"))) + (common:simple-file-lock-and-wait lock-file expire-time: 15) + (debug:print-info 0 *default-log-port* "server:kind-run: touching " start-flag) + (system (conc "touch " start-flag)) ;; lazy but safe + (server:run areapath) + (thread-sleep! 2) ;; don't release the lock for at least a few seconds + (common:simple-file-release-lock lock-file))) + (debug:print-info 0 *default-log-port* "Found server already running. NOT trying to start another."))) + ;; (hash-table-set! *server-kind-run* areapath (list (+ call-num 1)(current-seconds)))))) ;; this one seems to be the general entry point ;; (define (server:start-and-wait areapath #!key (timeout 60)) (let ((give-up-time (+ (current-seconds) timeout)))