Index: rmt.scm ================================================================== --- rmt.scm +++ rmt.scm @@ -62,11 +62,11 @@ (debug:print 0 *default-log-port* "ERROR: 15 tries to start/connect to server. Giving up.") (exit 1)) ;; reset the connection if it has been unused too long ((and *runremote* (remote-conndat *runremote*) - (let ((expire-time (- start-time (remote-server-timeout *runremote*)))) + (let ((expire-time (+ (- start-time (remote-server-timeout *runremote*))(random 30)))) ;; add 30 seconds of noise so that not all running tests expire at the same time causing a storm of server starts (< (http-transport:server-dat-get-last-access (remote-conndat *runremote*)) expire-time))) (debug:print-info 12 *default-log-port* "rmt:send-receive, case 8") (remote-conndat-set! *runremote* #f) (mutex-unlock! *rmt-mutex*) (rmt:send-receive cmd rid params attemptnum: attemptnum)) Index: server.scm ================================================================== --- server.scm +++ server.scm @@ -221,11 +221,14 @@ (mod-time (list-ref rec 0))) ;; (print "start-time: " start-time " mod-time: " mod-time) (and start-time mod-time (> (- now start-time) 0) ;; been running at least 0 seconds (< (- now mod-time) 16) ;; still alive - file touched in last 16 seconds - (< (- now start-time) (string->number (or (configf:lookup *configdat* "server" "runtime") "3600"))) ;; under one hour running time + (< (- now start-time) + (+ (- (string->number (or (configf:lookup *configdat* "server" "runtime") "3600")) + 180) + (random 360))) ;; under one hour running time +/- 180 ))) srvlst) (lambda (a b) (< (list-ref a 3) (list-ref b 3))))))