Index: megatest.scm ================================================================== --- megatest.scm +++ megatest.scm @@ -289,11 +289,11 @@ (port (vector-ref server 3)) (start-time (vector-ref server 4)) (priority (vector-ref server 5)) (state (vector-ref server 6)) (stat-numc (server:ping hostname port)) - (status (car stat-numc)) + (status (car stat-numc)) (numclients (cadr stat-numc)) (killed #f) (zmq-socket (if status (server:client-connect hostname port) #f))) ;; no need to login as status of #t indicates we are connecting to correct ;; server @@ -308,32 +308,34 @@ (cdb:kill-server zmq-socket) (debug:print-info 1 "Killed server by host:port at " hostname ":" port)) (debug:print-info 1 "Removing defunct server record for " hostname ":" port)) (set! killed #t))) (if (and kpid - (equal? hostname (car khost-port)) - (equal? kpid pid)) + ;; (equal? hostname (car khost-port)) + (equal? kpid pid)) ;;; YEP, ALL WITH PID WILL BE KILLED!!! (begin (open-run-close tasks:server-deregister tasks:open-db hostname pid: pid) (set! killed #t) (if status (cdb:kill-server zmq-socket)) (debug:print-info 1 "Killed server by pid at " hostname ":" port))) ;; (if zmq-socket (close-socket zmq-socket)) (format #t fmtstr id pid hostname port start-time priority status numclients))) servers) + (debug:print-info 1 "Done with listservers") + (exit) ;; must do, would have to add checks to many/all calls below (set! *didsomething* #t)))) ;; if not list or kill then start a client (if appropriate) (if (or (args-defined? "-h" "-version" "-gen-megatest-area" "-gen-megatest-test") (eq? (length (hash-table-keys args:arg-hash)) 0)) (debug:print-info 1 "Server connection not needed") ;; ping servers only if -runall -runtests (let ((ping (args-defined? "-runall" "-runtests" "-remove-runs" "-set-state-status" "-rerun" "-rollup" "-lock" "-unlock" - "-set-values" "-list-runs"))) + "-set-values" "-list-runs" "-repl"))) (server:client-launch do-ping: ping)))) - + ;;====================================================================== ;; Remove old run(s) ;;====================================================================== ;; since several actions can be specified on the command line the removal Index: server.scm ================================================================== --- server.scm +++ server.scm @@ -45,11 +45,11 @@ (set! *cache-on* #t) ;; what to do when we quit ;; (on-exit (lambda () - (open-run-close tasks:server-deregister-self tasks:open-db) + (open-run-close tasks:server-deregister-self tasks:open-db #f) (let loop () (let ((queue-len 0)) (thread-sleep! (random 5)) (mutex-lock! *incoming-mutex*) (set! queue-len (length *incoming-data*)) @@ -70,12 +70,12 @@ (debug:print-info 12 "server=> processed res=" res) (send-message zmq-socket (db:obj->string res)) (if (not *time-to-exit*) (loop) (begin + (open-run-close tasks:server-deregister-self tasks:open-db #f) (db:write-cached-data) - (open-run-close tasks:server-deregister-self tasks:open-db) (exit) )))))) ;; run server:keep-running in a parallel thread to monitor that the db is being ;; used and to shutdown after sometime if it is not. @@ -82,30 +82,30 @@ ;; (define (server:keep-running) ;; if none running or if > 20 seconds since ;; server last used then start shutdown (let loop ((count 0)) - (thread-sleep! 1) ;; no need to do this very often + (thread-sleep! 3) ;; no need to do this very often (db:write-cached-data) ;; (print "Server running, count is " count) (if (< count 10) (loop (+ count 1)) (let ((numrunning (open-run-close db:get-count-tests-running #f))) - (if (or (> numrunning 0) - (> (+ *last-db-access* 60)(current-seconds))) + (if (or (> numrunning 0) ;; stay alive for two days after last access + (> (+ *last-db-access* (* 48 60 60))(current-seconds))) (begin - (debug:print-info 0 "Server continuing, tests running: " numrunning ", seconds since last db access: " (- (current-seconds) *last-db-access*)) - (loop 0))) + (debug:print-info 2 "Server continuing, tests running: " numrunning ", seconds since last db access: " (- (current-seconds) *last-db-access*)) + (loop 0)) (begin (debug:print-info 0 "Starting to shutdown the server.") ;; need to delete only *my* server entry (future use) (set! *time-to-exit* #t) - (open-run-close tasks:server-deregister-self tasks:open-db) - (thread-sleep! 5) + (open-run-close tasks:server-deregister-self tasks:open-db #f) + (thread-sleep! 1) (debug:print-info 0 "Max cached queries was " *max-cache-size*) (debug:print-info 0 "Server shutdown complete. Exiting") - (exit)))))) + (exit))))))) (define (server:find-free-port-and-open host s port #!key (trynum 50)) (let ((s (if s s (make-socket 'rep))) (p (if (number? port) port 5555))) (handle-exceptions @@ -197,12 +197,13 @@ #f)))))) (if (> numtries 0) (let ((exe (car (argv)))) (debug:print-info 1 "No server available, attempting to start one...") (process-run exe (list "-server" "-" "-debug" (conc *verbosity*))) - (sleep 5) - (server:client-setup numtries: (- numtries 1) do-ping: do-ping)) + (sleep 2) + ;; not doing ping, assume the server started and registered itself + (server:client-setup numtries: (- numtries 1) do-ping: #f)) (debug:print-info 1 "Too many retries, giving up"))))) (define (server:launch) (let* ((toppath (setup-for-run))) (debug:print-info 0 "Starting the standalone server") Index: tasks.scm ================================================================== --- tasks.scm +++ tasks.scm @@ -78,20 +78,21 @@ (sqlite3:execute mdb "INSERT OR REPLACE INTO servers (pid,hostname,port,start_time,priority,state) VALUES(?,?,?,strftime('%s','now'),?,?);" pid hostname port priority (conc state))) +;; NB// two servers with same pid on different hosts will be removed from the list if pid: is used! (define (tasks:server-deregister mdb hostname #!key (port #f)(pid #f)) (debug:print-info 11 "server-deregister " hostname ", port " port ", pid " pid) (if pid - (sqlite3:execute mdb "DELETE FROM servers WHERE hostname=? AND pid=?;" hostname pid) + (sqlite3:execute mdb "DELETE FROM servers WHERE pid=?;" pid) (if port (sqlite3:execute mdb "DELETE FROM servers WHERE hostname=? AND port=?;" hostname port) (debug:print 0 "ERROR: tasks:server-deregister called with neither pid nor port specified")))) -(define (tasks:server-deregister-self mdb) - (tasks:server-deregister mdb (get-host-name) pid: (current-process-id))) +(define (tasks:server-deregister-self mdb hostname) + (tasks:server-deregister mdb hostname pid: (current-process-id))) (define (tasks:server-get-server-id mdb) ;; dunno yet 0)