Index: server.scm ================================================================== --- server.scm +++ server.scm @@ -33,11 +33,11 @@ (define *heartbeat-mutex* (make-mutex)) (define (server:self-ping iface port) (let ((zsocket (server:client-connect iface port))) (let loop () - (thread-sleep! 5) + (thread-sleep! 2) (cdb:client-call zsocket 'ping #t) (debug:print 4 "server:self-ping - I'm alive on " iface ":" port "!") (mutex-lock! *heartbeat-mutex*) (set! *server-loop-heart-beat* (current-seconds)) (mutex-unlock! *heartbeat-mutex*) @@ -124,16 +124,16 @@ ;; (define (server:keep-running) ;; if none running or if > 20 seconds since ;; server last used then start shutdown (let loop ((count 0)) - (thread-sleep! 3) ;; no need to do this very often + (thread-sleep! 4) ;; no need to do this very often (db:write-cached-data) ;; (print "Server running, count is " count) - (if (< count 2) ;; 3x3 = 9 secs aprox + (if (< count 1) ;; 3x3 = 9 secs aprox (loop (+ count 1)) - (let ((numrunning (open-run-close db:get-count-tests-running #f)) + (let (;; (numrunning (open-run-close db:get-count-tests-running #f)) (server-loop-heartbeat #f) (server-info #f) (pulse 0)) ;; BUG add a wait on server alive here!! ;; ;; Ugly yuk. @@ -143,24 +143,26 @@ (mutex-unlock! *heartbeat-mutex*) ;; The logic here is that if the server loop gets stuck blocked in working ;; we don't want to update our heartbeat (set! pulse (- (current-seconds) server-loop-heartbeat)) (debug:print-info 1 "Heartbeat period is " pulse " seconds on " (cadr server-info) ":" (caddr server-info) ", last db access is " (- (current-seconds) *last-db-access*) " seconds ago") - (if (> pulse 11) ;; must stay less than 10 seconds + (if (> pulse 15) ;; must stay less than 10 seconds (begin + (open-run-close tasks:server-deregister tasks:open-db (cadr server-info) port: (caddr server-info)) (debug:print 0 "ERROR: Heartbeat failed, committing servercide") (exit)) (open-run-close tasks:server-update-heartbeat tasks:open-db (car server-info))) - (if (or (> numrunning 0) ;; stay alive for two days after last access - (> (+ *last-db-access* - ;; (* 48 60 60) ;; 48 hrs - ;; 60 ;; one minute - (* 60 60) ;; one hour - ) - (current-seconds))) + ;; (if ;; (or (> numrunning 0) ;; stay alive for two days after last access + (if (> (+ *last-db-access* + ;; (* 48 60 60) ;; 48 hrs + ;; 60 ;; one minute + (* 60 60) ;; one hour + ) + (current-seconds)) (begin - (debug:print-info 2 "Server continuing, tests running: " numrunning ", seconds since last db access: " (- (current-seconds) *last-db-access*)) + ;; (debug:print-info 2 "Server continuing, tests running: " numrunning ", seconds since last db access: " (- (current-seconds) *last-db-access*)) + (debug:print-info 2 "Server continuing, seconds since last db access: " (- (current-seconds) *last-db-access*)) (loop 0)) (begin (debug:print-info 0 "Starting to shutdown the server.") ;; need to delete only *my* server entry (future use) (set! *time-to-exit* #t) @@ -294,11 +296,11 @@ (let* ((th1 (make-thread (lambda () (let ((server-info #f)) ;; wait for the server to be online and available (let loop () (debug:print-info 1 "Waiting for the server to come online before starting heartbeat") - (thread-sleep! 5) + (thread-sleep! 2) (mutex-lock! *heartbeat-mutex*) (set! server-info *server-info* ) (mutex-unlock! *heartbeat-mutex*) (if (not server-info)(loop))) (debug:print 1 "Server alive, starting self-ping") @@ -315,11 +317,11 @@ (thread-join! th3)) (debug:print 0 "ERROR: Failed to setup for megatest"))) (exit))) (define (server:client-launch #!key (do-ping #f)) - (if (server:client-setup do-ping: do-ping) + (if (server:client-setup) (debug:print-info 2 "connected as client") (begin (debug:print 0 "ERROR: Failed to connect as client") (exit)))) Index: tasks.scm ================================================================== --- tasks.scm +++ tasks.scm @@ -26,10 +26,11 @@ (let* ((dbpath (conc *toppath* "/monitor.db")) (exists (file-exists? dbpath)) (mdb (sqlite3:open-database dbpath)) ;; (never-give-up-open-db dbpath)) (handler (make-busy-timeout 36000))) (sqlite3:set-busy-handler! mdb handler) + (sqlite3:execute mdb (conc "PRAGMA synchronous = 0;")) (if (not exists) (begin (sqlite3:execute mdb "CREATE TABLE IF NOT EXISTS tasks_queue (id INTEGER PRIMARY KEY, action TEXT DEFAULT '', owner TEXT, @@ -188,11 +189,10 @@ (define (tasks:kill-server status hostname port pid) (debug:print-info 1 "Removing defunct server record for " hostname ":" port) (if port (open-run-close tasks:server-deregister tasks:open-db hostname port: port) (open-run-close tasks:server-deregister tasks:open-db hostname pid: pid)) - (if status ;; #t means alive (begin (if (equal? hostname (get-host-name)) (handle-exceptions exn