Index: tcp-transportmod.scm ================================================================== --- tcp-transportmod.scm +++ tcp-transportmod.scm @@ -242,11 +242,11 @@ (if (not res) ;; tt:handler is telling us that communication failed (let* ((host (tt-conn-host conn)) (port (tt-conn-port conn)) ;; (dbfname (tt-conn-port conn)) ;; 192.168.0.127:4242-726924:4.db (pid (tt-conn-pid conn)) - (servinf (conc areapath"/.servinfo/"host":"port"-"pid":"dbfname))) + (servinf (tt-servinf-file ttdat))) ;; (conc areapath"/.servinfo/"host":"port"-"pid":"dbfname))) ;; TODO, use (server:get-servinfo-dir areapath) (hash-table-set! (tt-conns ttdat) dbfname #f) (if (file-exists? servinf) (begin (if (< attemptnum 3) (begin @@ -265,15 +265,16 @@ (tt:handler ttdat cmd run-id params (+ attemptnum 1) area-dat areapath readonly-mode dbfname testsuite mtexe)) (begin ;; start server - addressed in client-connect-to-server ;; delay - addressed in client-connect-to-server ;; try again + (thread-sleep! 0.25) ;; dunno, I think this needs to be here (tt:handler ttdat cmd run-id params (+ attemptnum 1) area-dat areapath readonly-mode dbfname testsuite mtexe)) )))) (begin ;; no server file, delay and try again (debug:print 0 *default-log-port* "INFO: connection to server "host":"port" broken for "dbfname", but do not see servinf file "servinf) - (thread-sleep! 1) + (thread-sleep! 0.5) (tt:handler ttdat cmd run-id params (+ attemptnum 1) area-dat areapath readonly-mode dbfname testsuite mtexe)))) (begin ;; this case is where res is malformed. Probably should abort (assert #f "FATAL: tt:handler received bad data "res) ;; (debug:print 0 *default-log-port* "INFO: got corrupt data from server "host":"port", "res", for "dbfname", will try again.") ;; (tt:handler ttdat cmd run-id params (+ attemptnum 1) area-dat areapath readonly-mode dbfname testsuite mtexe) @@ -374,17 +375,20 @@ (assert (number? port) "FATAL: tt:send-receive-direct called with port not a number "port) (tt:backoff-decr-and-wait host port) (let* ((retry (lambda () (tt:send-receive-direct host port dat tries-remaining: (- tries-remaining 1)))) (full-err-print (lambda (exn msg) - (pp (condition->list exn) *default-log-port*) - (pp dat *default-log-port*) - (debug:print 0 *default-log-port* msg - ", error: " ((condition-property-accessor 'exn 'message) exn) - ", arguments: " ((condition-property-accessor 'exn 'arguments) exn) - ", location: " ((condition-property-accessor 'exn 'location) exn) - )))) + (if (condition? exn) + (begin + (pp (condition->list exn) *default-log-port*) + (pp dat *default-log-port*) + (debug:print 0 *default-log-port* msg + ", error: " ((condition-property-accessor 'exn 'message) exn) + ", arguments: " ((condition-property-accessor 'exn 'arguments) exn) + ", location: " ((condition-property-accessor 'exn 'location) exn) + )) + (debug:print 0 *default-log-port* msg "(note: exn="exn", is not a condition object."))))) (condition-case (let-values (((inp oup)(tcp-connect host port))) (let ((res (if (and inp oup) (begin (serialize dat oup) @@ -412,11 +416,11 @@ (cond ((> tries-remaining 4) ;; server likely defunct (tt:backoff-incr host port) #f) ((>= tries-remaining 0) - (let* ((backoff-delay (* (- 26 tries-remaining) 0.1))) + (let* ((backoff-delay (max (* (- 26 tries-remaining) 0.1) 1.0))) (debug:print 0 *default-log-port* "WARNING: TCP overload, trying again in "backoff-delay"s.") (thread-sleep! backoff-delay) (tt:backoff-incr host port) (retry)) (assert #f "FATAL: Too many retries in tt:send-receive-direct"))