Index: http-transport.scm ================================================================== --- http-transport.scm +++ http-transport.scm @@ -533,11 +533,12 @@ (daemon:ize) (if *alt-log-file* ;; we should re-connect to this port, I think daemon:ize disrupts it (begin (current-error-port *alt-log-file*) (current-output-port *alt-log-file*))))) - (if (server:check-if-running run-id) + (if (and (server:read-dotserver *toppath*) + (server:check-if-running run-id)) (begin (debug:print 0 *default-log-port* "INFO: Server for run-id " run-id " already running") (exit 0)) (begin ;; ok, no server detected, clean out any lingering records (tasks:server-force-clean-running-records-for-run-id (db:delay-if-busy tdbdat) run-id "notresponding"))) Index: rmt.scm ================================================================== --- rmt.scm +++ rmt.scm @@ -26,11 +26,11 @@ ;; ;; grep define ../rmt.scm | grep rmt: |perl -pi -e 's/\(define\s+\((\S+)\W.*$/\1/'|sort -u (defstruct remote (hh-dat (common:get-homehost)) ;; homehost record ( addr . hhflag ) - (server-url (if *toppath* (server:check-if-running *toppath*) #f)) + (server-url (if *toppath* (server:read-dotserver *toppath*))) ;; (server:check-if-running *toppath*) #f)) (last-server-check 0) ;; last time we checked to see if the server was alive (conndat #f) (transport *transport-type*) (server-timeout (or (server:get-timeout) 100))) ;; default to 100 seconds @@ -111,21 +111,21 @@ (rmt:open-qry-close-locally cmd 0 params)) ;; no server contact made and this is a write, passively start a server ((and (not (remote-server-url *runremote*)) (not (member cmd api:read-only-queries))) (debug:print-info 12 *default-log-port* "rmt:send-receive, case 5") - (let ((serverconn (server:check-if-running *toppath*))) + (let ((serverconn (server:read-dotserver *toppath*))) ;; (server:check-if-running *toppath*))) ;; Do NOT want to run server:check-if-running - very expensive to do for every write call (if serverconn (remote-server-url-set! *runremote* serverconn) ;; the string can be consumed by the client setup if needed (if (not (server:start-attempted? *toppath*)) (server:kind-run *toppath*)))) (if (cdr (remote-hh-dat *runremote*)) ;; we are on the homehost, just do the call (begin (mutex-unlock! *rmt-mutex*) (debug:print-info 12 *default-log-port* "rmt:send-receive, case 5.1") (rmt:open-qry-close-locally cmd 0 params)) - (begin + (begin ;; not on homehost, start server and wait (mutex-unlock! *rmt-mutex*) (debug:print-info 12 *default-log-port* "rmt:send-receive, case 5.2") (tasks:start-and-wait-for-server (tasks:open-db) 0 15) (rmt:send-receive cmd rid params attemptnum: attemptnum)))) ;; if not on homehost ensure we have a connection to a live server Index: tasks.scm ================================================================== --- tasks.scm +++ tasks.scm @@ -404,11 +404,15 @@ (if (common:low-noise-print 60 "tasks:start-and-wait-for-server" run-id) (debug:print 0 *default-log-port* "Try starting server for run-id " run-id)) (thread-sleep! (/ (random 2000) 1000)) (server:kind-run run-id) (thread-sleep! (min delay-time 1)) - (loop (tasks:get-server (db:delay-if-busy tdbdat) run-id)(+ delay-time 1)))))) + (if (not (or (server:start-attempted? *toppath*) + (server:read-dotserver *toppath*))) ;; no point in trying + (loop (tasks:get-server (db:delay-if-busy tdbdat) run-id)(+ delay-time 1)) + #f)) + #f))) (define (tasks:get-all-servers mdb) (let ((res '())) (sqlite3:for-each-row (lambda (id pid hostname interface port pubport start-time priority state mt-version last-update transport run-id)