Index: api.scm ================================================================== --- api.scm +++ api.scm @@ -124,11 +124,12 @@ (run-id (cadr params)) (realparams (cddr params))) (db:with-db dbstruct run-id #t ;; these are all for modifying the db (lambda (db) (db:general-call db stmtname realparams))))) - ((sync-inmem->db) (db:sync-touched dbstruct run-id force-sync: #t)) + ((sync-inmem->db) (let ((run-id (car params))) + (db:sync-touched dbstruct run-id force-sync: #t))) ((sdb-qry) (apply sdb:qry params)) ((ping) (current-process-id)) ;; TESTMETA ((testmeta-get-record) (apply db:testmeta-get-record dbstruct params)) Index: db.scm ================================================================== --- db.scm +++ db.scm @@ -325,12 +325,12 @@ (> mtime stime) force-sync) (begin (db:delay-if-busy rundb) (db:delay-if-busy olddb) + (dbr:dbstruct-set-stime! dbstruct (current-milliseconds)) (let ((num-synced (db:sync-tables db:sync-tests-only inmem refdb rundb olddb))) - (dbr:dbstruct-set-stime! dbstruct (current-milliseconds)) (mutex-unlock! *http-mutex*) num-synced) (begin (mutex-unlock! *http-mutex*) 0)))))) Index: nmsg-transport.scm ================================================================== --- nmsg-transport.scm +++ nmsg-transport.scm @@ -78,11 +78,11 @@ (set! *server-info* (list hostn start-port)) ;; probably not needed anymore? currently used by keep-running (thread-sleep! 3) ;; give some margin for queries to complete before switching from file based access to server based access (set! *inmemdb* dbstruct) (tasks:server-set-state! (db:delay-if-busy tdbdat) server-id "running") (thread-start! (make-thread - (lambda ()(nmsg-transport:keep-running server-id)) + (lambda ()(nmsg-transport:keep-running server-id run-id)) "keep running")) (thread-join! server-thread)) (if (> retrynum 0) (begin (debug:print 0 "WARNING: Failed to connect to server (self) on host " hostn ":" start-port ", trying again.") @@ -241,11 +241,11 @@ (vector success result))) ;; run nmsg-transport:keep-running in a parallel thread to monitor that the db is being ;; used and to shutdown after sometime if it is not. ;; -(define (nmsg-transport:keep-running server-id) +(define (nmsg-transport:keep-running server-id run-id) ;; if none running or if > 20 seconds since ;; server last used then start shutdown ;; This thread waits for the server to come alive (let* ((server-info (let loop () (let ((sdat #f)) @@ -278,14 +278,14 @@ (let () ;; (queue-len (cdb:client-call server-info 'sync #t 1))) ;; (print "Server running, count is " count) (if (< count 1) ;; 3x3 = 9 secs aprox (loop (+ count 1))) - ;; (if ;; (or (> numrunning 0) ;; stay alive for two days after last access (mutex-lock! *heartbeat-mutex*) (set! last-access *last-db-access*) (mutex-unlock! *heartbeat-mutex*) + (db:sync-touched *inmemdb* run-id force-sync: #t) (if (and *server-run* (> (+ last-access server-timeout) (current-seconds))) (begin (debug:print-info 0 "Server continuing, seconds since last db access: " (- (current-seconds) last-access)) Index: rmt.scm ================================================================== --- rmt.scm +++ rmt.scm @@ -64,11 +64,11 @@ ;; ;; (and (not (rmt:write-frequency-over-limit? cmd run-id)) (if (tasks:server-running-or-starting? (db:delay-if-busy (tasks:open-db)) run-id) (client:setup run-id) #f)))) -(define (rmt:send-receive cmd rid params #!key (attemptnum 0)) +(define (rmt:send-receive cmd rid params #!key (attemptnum 1)) ;; start attemptnum at 1 so the modulo below works as expected ;; clean out old connections (mutex-lock! *db-multi-sync-mutex*) ;; (let ((expire-time (- (current-seconds) 60))) ;; (for-each ;; (lambda (run-id) @@ -101,11 +101,12 @@ (begin ;; let ((new-connection-info (client:setup run-id))) (debug:print 0 "WARNING: Communication failed, trying call to http-transport:client-api-send-receive again.") ;; (case *transport-type* ;; ((nmsg)(nn-close (http-transport:server-dat-get-socket connection-info)))) (hash-table-delete! *runremote* run-id) ;; don't keep using the same connection - (tasks:kill-server-run-id run-id tag: "api-send-receive-failed") + (if (eq? (modulo attemptnum 5) 0) + (tasks:kill-server-run-id run-id tag: "api-send-receive-failed")) (tasks:start-and-wait-for-server (tasks:open-db) run-id 15) ;; (nmsg-transport:client-api-send-receive run-id connection-info cmd param remtries: (- remtries 1)))))) ;; no longer killing the server in http-transport:client-api-send-receive ;; may kill it here but what are the criteria? @@ -112,15 +113,15 @@ ;; start with three calls then kill server ;; (if (eq? attemptnum 3)(tasks:kill-server-run-id run-id)) ;; (thread-sleep! 2) (rmt:send-receive cmd run-id params attemptnum: (+ attemptnum 1))))) ;; no connection info? try to start a server - (if (and (< attemptnum 10) + (if (and (< attemptnum 15) (tasks:need-server run-id)) (begin - (tasks:start-and-wait-for-server (db:delay-if-busy (tasks:open-db)) run-id 10) (hash-table-delete! *runremote* run-id) + (tasks:start-and-wait-for-server (db:delay-if-busy (tasks:open-db)) run-id 10) (client:setup run-id) (thread-sleep! (random 5)) ;; give some time to settle and minimize collison? (rmt:send-receive cmd rid params attemptnum: (+ attemptnum 1))) (begin (debug:print 0 "ERROR: Communication failed!")