Index: runsmod.scm ================================================================== --- runsmod.scm +++ runsmod.scm @@ -1484,11 +1484,11 @@ ;; prereqs-not-met: prereqs-not-met ))) (runs:dat-regfull-set! runsdat regfull) (if (> (- (current-seconds) *last-test-launch*) 5) ;; be pretty aggressive for five seconds after - (runs:too-soon-delay (conc "loop delay " hed) 1 1) ;; starting a test then apply more delay + (runs:too-soon-delay (conc "loop delay " hed) 1 0.6) ;; starting a test then apply more delay (runs:too-soon-delay (conc "loop delay " hed) 1 0.1)) (if (> num-running 0) (set! last-time-some-running (current-seconds))) @@ -1734,11 +1734,11 @@ (rmt:set-var run-id (conc "launch-complete-" run-id) "yes") ;; now *if* -run-wait we wait for all tests to be done ;; Now wait for any RUNNING tests to complete (if in run-wait mode) ;; (if (runs:dat-load-mgmt-function runsdat)((runs:dat-load-mgmt-function runsdat))) - (thread-sleep! 10) ;; I think there is a race condition here. Let states/statuses settle + (thread-sleep! 0.1) ;; I think there is a race condition here. Let states/statuses settle (let wait-loop ((num-running (rmt:get-count-tests-running-for-run-id run-id)) (prev-num-running 0)) ;; (debug:print-info 13 *default-log-port* "num-running=" num-running ", prev-num-running=" prev-num-running) (if (and (or (args:get-arg "-run-wait") Index: ulex-dual/dbmgr.scm ================================================================== --- ulex-dual/dbmgr.scm +++ ulex-dual/dbmgr.scm @@ -333,18 +333,24 @@ ;; sometime in the future ;; (define (rmt:send-receive-real sinfo apath dbname cmd params) (let* ((cdat (rmt:get-conn sinfo apath dbname))) (assert cdat "FATAL: rmt:send-receive-real called without the needed channels opened") - (let* ((uconn (servdat-uconn sinfo)) ;; get the interface to ulex - ;; then send-receive using the ulex layer to host-port stored in cdat - (res (send-receive uconn (conndat-hostport cdat) cmd params))) - ;; since we accessed the server we can bump the expires time up - (conndat-expires-set! cdat (+ (current-seconds) - (server:expiration-timeout) - -2)) ;; two second margin for network time misalignments etc. - res))) + (condition-case + (let* ((uconn (servdat-uconn sinfo)) ;; get the interface to ulex + (hostport (conndat-hostport cdat)) + ;; then send-receive using the ulex layer to host-port stored in cdat + (res (send-receive uconn hostport cmd params))) + ;; since we accessed the server we can bump the expires time up + (conndat-expires-set! cdat (+ (current-seconds) + (server:expiration-timeout) + -2)) ;; two second margin for network time misalignments etc. + res) + ((exn i/o net) + (debug:print-info 0 *default-log-port* "IO failure in connection to "hostport + ", resetting connection.") + ; ;; db is at apath/.db/dbname, rid is an intermediary solution and will be removed ;; sometime in the future. ;; Index: ulex-dual/ulex.scm ================================================================== --- ulex-dual/ulex.scm +++ ulex-dual/ulex.scm @@ -260,11 +260,24 @@ `(cmd . ,cmd) `(params . ,params)))) (cond (isme (do-work udata dat)) ;; no transmission needed (else - (handle-exceptions ;; TODO - MAKE THIS EXCEPTION CMD SPECIFIC? + (let-values (((inp oup)(tcp-connect host port))) + (let ((res (if (and inp oup) + (begin + (write (obj->string dat) oup) + (close-output-port oup) + (string->obj (read inp))) + (begin + (print "ERROR: send called but no receiver has been setup. Please call setup first!") + #f)))) + (close-input-port inp))) + + + + #;(handle-exceptions ;; TODO - MAKE THIS EXCEPTION CMD SPECIFIC? exn (begin (print "ULEX send-receive: "cmd", "params", exn="exn) (message exn)) (begin