Index: http-transport.scm ================================================================== --- http-transport.scm +++ http-transport.scm @@ -288,13 +288,13 @@ (debug:print 0 "WARNING: failure in with-input-from-request to " fullurl ".") (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn)) (hash-table-delete! *runremote* run-id) ;; Killing associated server to allow clean retry.") (tasks:kill-server-run-id run-id) ;; better to kill the server in the logic that called this routine? - (signal (make-composite-condition - (make-property-condition 'commfail 'message "failed to connect to server"))) - #f) + ;; (signal (make-composite-condition + ;; (make-property-condition 'commfail 'message "failed to connect to server"))) + "communications failed") (with-input-from-request ;; was dat fullurl (list (cons 'key "thekey") (cons 'cmd cmd) (cons 'params sparams)) @@ -405,18 +405,11 @@ (equal? sdat last-sdat) sdat))))))) (iface (car server-info)) (port (cadr server-info)) (last-access 0) - (server-timeout (let ((tmo (configf:lookup *configdat* "server" "timeout"))) - (if (and (string? tmo) - (string->number tmo)) - (* 60 60 (string->number tmo)) - ;; (* 3 24 60 60) ;; default to three days - (* 60 1) ;; default to one minute - ;; (* 60 60 25) ;; default to 25 hours - )))) + (server-timeout (server:get-timeout))) (let loop ((count 0) (server-state 'available)) ;; Use this opportunity to sync the inmemdb to db (let ((start-time (current-milliseconds)) (sync-time #f) Index: rmt.scm ================================================================== --- rmt.scm +++ rmt.scm @@ -67,23 +67,26 @@ #f)))) (define *send-receive-mutex* (make-mutex)) ;; should have separate mutex per run-id (define (rmt:send-receive cmd rid params #!key (attemptnum 1)) ;; start attemptnum at 1 so the modulo below works as expected ;; clean out old connections - ;; (mutex-lock! *db-multi-sync-mutex*) - ;; (let ((expire-time (- (current-seconds) 60))) - ;; (for-each - ;; (lambda (run-id) - ;; (let ((connection (hash-table-ref/default *runremote* run-id #f))) - ;; (if (and connection - ;; (< (http-transport:server-dat-get-last-access connection) expire-time)) - ;; (begin - ;; (debug:print-info 0 "Discarding connection to server for run-id " run-id ", too long between accesses") - ;; ;; SHOULD CLOSE THE CONNECTION HERE - ;; (hash-table-delete! *runremote* run-id))))) - ;; (hash-table-keys *runremote*))) - ;; (mutex-unlock! *db-multi-sync-mutex*) + (mutex-lock! *db-multi-sync-mutex*) + (let ((expire-time (- (current-seconds) (server:get-timeout) 10))) ;; don't forget the 10 second margin + (for-each + (lambda (run-id) + (let ((connection (hash-table-ref/default *runremote* run-id #f))) + (if (and connection + (< (http-transport:server-dat-get-last-access connection) expire-time)) + (begin + (debug:print-info 0 "Discarding connection to server for run-id " run-id ", too long between accesses") + ;; SHOULD CLOSE THE CONNECTION HERE + (case *transport-type* + ((nmsg)(nn-close (http-transport:server-dat-get-socket + (hash-table-ref *runremote* run-id))))) + (hash-table-delete! *runremote* run-id))))) + (hash-table-keys *runremote*))) + (mutex-unlock! *db-multi-sync-mutex*) ;; (mutex-lock! *send-receive-mutex*) (let* ((run-id (if rid rid 0)) (connection-info (rmt:get-connection-info run-id))) ;; the nmsg method does the encoding under the hood (the http method should be changed to do this also) (if connection-info Index: server.scm ================================================================== --- server.scm +++ server.scm @@ -204,5 +204,17 @@ (case (string->symbol res) ((NOREPLY) #f) ((LOGIN_OK) #t) (else #f)) (loop (read-line) inl)))))) + + +(define (server:get-timeout) + (let ((tmo (configf:lookup *configdat* "server" "timeout"))) + (if (and (string? tmo) + (string->number tmo)) + (* 60 60 (string->number tmo)) + ;; (* 3 24 60 60) ;; default to three days + (* 60 1) ;; default to one minute + ;; (* 60 60 25) ;; default to 25 hours + ))) +