Index: api.scm ================================================================== --- api.scm +++ api.scm @@ -62,10 +62,12 @@ synchash-get )) (define api:write-queries '( + get-keys-write ;; dummy "write" query to force server start + ;; SERVERS start-server kill-server ;; TESTS @@ -189,10 +191,11 @@ ;;====================================================================== ;; KEYS ((get-key-val-pairs) (apply db:get-key-val-pairs dbstruct params)) ((get-keys) (db:get-keys dbstruct)) + ((get-keys-write) (db:get-keys dbstruct)) ;; force a dummy "write" query to force server ((get-key-vals) (apply db:get-key-vals dbstruct params)) ((get-target) (apply db:get-target dbstruct params)) ((get-targets) (db:get-targets dbstruct)) ;; ARCHIVES Index: common.scm ================================================================== --- common.scm +++ common.scm @@ -553,13 +553,13 @@ ;; if #t use timestamps : or 'timestamps (define (common:sync-to-megatest.db dbstruct) (let ((start-time (current-seconds)) (res (db:multi-db-sync dbstruct 'new2old))) (let ((sync-time (- (current-seconds) start-time))) - (debug:print-info 3 *default-log-port* "Sync of newdb to olddb completed in " sync-time " seconds") + (debug:print-info 3 *default-log-port* "Sync of newdb to olddb completed in " sync-time " seconds pid="(current-process-id)) (if (common:low-noise-print 30 "sync new to old") - (debug:print-info 0 *default-log-port* "Sync of newdb to olddb completed in " sync-time " seconds"))) + (debug:print-info 0 *default-log-port* "Sync of newdb to olddb completed in " sync-time " seconds pid="(current-process-id)))) res)) ;; currently the primary job of the watchdog is to run the sync back to megatest.db from the db in /tmp ;; if we are on the homehost and we are a server (by definition we are on the homehost if we are a server) ;; @@ -566,20 +566,20 @@ (define (common:watchdog) (thread-sleep! 0.05) ;; delay for startup (let ((legacy-sync (common:run-sync?)) (debug-mode (debug:debug-mode 1)) (last-time (current-seconds))) - (debug:print-info 0 *default-log-port* "watchdog starting. legacy-sync is " legacy-sync) + (debug:print-info 0 *default-log-port* "watchdog starting. legacy-sync is " legacy-sync" pid="(current-process-id)) (if legacy-sync (let ((dbstruct (db:setup))) (debug:print-info 0 *default-log-port* "Server running, periodic sync started.") (let loop () + ;;(BB> "watchdog loop. pid="(current-process-id)) ;; sync for filesystem local db writes ;; (mutex-lock! *db-multi-sync-mutex*) - (let* ( - (need-sync (>= *db-last-access* *db-last-sync*)) ;; no sync since last write + (let* ((need-sync (>= *db-last-access* *db-last-sync*)) ;; no sync since last write (sync-in-progress *db-sync-in-progress*) (should-sync (> (- (current-seconds) *db-last-sync*) 5)) ;; sync every five seconds minimum (will-sync (and (or need-sync should-sync) (not sync-in-progress))) (start-time (current-seconds))) @@ -616,11 +616,11 @@ (begin (thread-sleep! 1) (delay-loop (+ count 1)))) (loop))) (if (common:low-noise-print 30) - (debug:print-info 0 *default-log-port* "Exiting watchdog timer, *time-to-exit* = " *time-to-exit*))))))) + (debug:print-info 0 *default-log-port* "Exiting watchdog timer, *time-to-exit* = " *time-to-exit*" pid="(current-process-id)))))))) (define (std-exit-procedure) (on-exit (lambda () 0)) (let ((no-hurry (if *time-to-exit* ;; hurry up #f Index: http-transport.scm ================================================================== --- http-transport.scm +++ http-transport.scm @@ -486,11 +486,11 @@ ;; (thread-sleep! rem-time) ;; (thread-sleep! 4))) ;; fallback for if the math is changed ... (define (http-transport:server-shutdown server-id port) (let ((tdbdat (tasks:open-db))) - (debug:print-info 0 *default-log-port* "Starting to shutdown the server.") + (debug:print-info 0 *default-log-port* "Starting to shutdown the server. pid="(current-process-id)) ;; ;; start_shutdown ;; (tasks:server-set-state! (db:delay-if-busy tdbdat) server-id "shutting-down") (set! *time-to-exit* #t) ;; tell on-exit to be fast as we've already cleaned up Index: rmt.scm ================================================================== --- rmt.scm +++ rmt.scm @@ -86,23 +86,38 @@ ((and (cdr (remote-hh-dat *runremote*)) ;; on homehost (member cmd api:read-only-queries)) ;; this is a read (mutex-unlock! *rmt-mutex*) (debug:print-info 12 *default-log-port* "rmt:send-receive, case 3") (rmt:open-qry-close-locally cmd 0 params)) + + ;; on homehost and this is a write, we already have a server, but server has died + ((and (cdr (remote-hh-dat *runremote*)) ;; on homehost + (not (member cmd api:read-only-queries)) ;; this is a write + (remote-server-url *runremote*) ;; have a server + (not (server:read-dotserver *toppath*))) ;; server has died. + (set! *runremote* #f) + (mutex-unlock! *rmt-mutex*) + (debug:print-info 12 *default-log-port* "rmt:send-receive, case 4.1") + (rmt:send-receive cmd rid params attemptnum: attemptnum)) + ;; on homehost and this is a write, we already have a server ((and (cdr (remote-hh-dat *runremote*)) ;; on homehost (not (member cmd api:read-only-queries)) ;; this is a write (remote-server-url *runremote*)) ;; have a server (mutex-unlock! *rmt-mutex*) (debug:print-info 12 *default-log-port* "rmt:send-receive, case 4") (rmt:open-qry-close-locally cmd 0 params)) - ;; on homehost and this is a write, we have a server (we know because case 4 checked) - ((and (cdr (remote-hh-dat *runremote*)) ;; on homehost - (not (member cmd api:read-only-queries))) - (mutex-unlock! *rmt-mutex*) - (debug:print-info 12 *default-log-port* "rmt:send-receive, case 4.1") - (rmt:open-qry-close-locally cmd 0 params)) + + ;; commented by bb; this was blocking server passive start on write on homehost (case 5) + ;; ;; on homehost and this is a write, we have a server (we know because case 4 checked) + ;; ((and (cdr (remote-hh-dat *runremote*)) ;; on homehost + ;; (not (member cmd api:read-only-queries))) + ;; (mutex-unlock! *rmt-mutex*) + ;; (debug:print-info 12 *default-log-port* "rmt:send-receive, case 4.1") + ;; (rmt:open-qry-close-locally cmd 0 params)) + + ;; no server contact made and this is a write, passively start a server ((and (not (remote-server-url *runremote*)) (not (member cmd api:read-only-queries))) (debug:print-info 12 *default-log-port* "rmt:send-receive, case 5") (let ((serverconn (server:read-dotserver *toppath*))) ;; (server:check-if-running *toppath*))) ;; Do NOT want to run server:check-if-running - very expensive to do for every write call @@ -349,10 +364,15 @@ (define (rmt:get-keys) (if *db-keys* *db-keys* (let ((res (rmt:send-receive 'get-keys #f '()))) (set! *db-keys* res) res))) + +(define (rmt:get-keys-write) ;; dummy query to force server start + (let ((res (rmt:send-receive 'get-keys-write #f '()))) + (set! *db-keys* res) + res)) ;; we don't reuse run-id's (except possibly *after* a db cleanup) so it is safe ;; to cache the resuls in a hash ;; (define (rmt:get-key-vals run-id)