Index: apimod.scm ================================================================== --- apimod.scm +++ apimod.scm @@ -203,16 +203,16 @@ ((get-keys-write) (db:get-keys dbstruct)) ;; force a dummy "write" query to force server; for debug in -repl ;; SERVERS ;; ((start-server) (apply server:kind-run params)) ((kill-server) (set! *server-run* #f)) - ((get-server) (api:start-server dbstruct params)) + ((start-server get-server) (api:start-server dbstruct params)) ((get-server-info) (apply db:get-server-info dbstruct params)) ((register-server) (apply db:register-server dbstruct params) );; dbstruct host port servkey pid ipaddr dbpath) ((deregister-server) (apply db:deregister-server dbstruct params) );; dbstruct host port servkey pid ipaddr dbpath) ((get-count-servers) (apply db:get-count-servers dbstruct params)) - + ((get-servers-info) (apply db:get-servers-info dbstruct params)) ;; TESTS ;;((test-set-state-status-by-id) (apply mt:test-set-state-status-by-id dbstruct params)) ;;BB - commented out above because it was calling below, eventually, incorrectly (dbstruct passed to mt:test-set-state-status-by-id, which previosly did more, but now only passes thru to db:set-state-status-and-roll-up-items. ((test-set-state-status-by-id) Index: commonmod.scm ================================================================== --- commonmod.scm +++ commonmod.scm @@ -275,16 +275,16 @@ launch:is-test-alive common:get-num-cpus common:wait-for-normalized-load common:wait-for-cpuload tasks:kill-server -server:get-logs-list -server:get-list -server:get-num-alive -server:get-best -server:get-first-best -server:get-rand-best +;; server:get-logs-list +;; server:get-list +;; server:get-num-alive +;; server:get-best +;; server:get-first-best +;; server:get-rand-best server:record->id server:get-num-servers server:logf-get-start-info get-uname realpath @@ -2825,21 +2825,21 @@ (system (conc "gzip " logfile)) (unset-environment-variable! "TARGETHOST_LOGF") (unset-environment-variable! "TARGETHOST")))) -(define (server:get-logs-list area-path) +#;(define (server:get-logs-list area-path) (let* (;; (server-logs-cmd (conc "grep -iL exiting " area-path "/logs/server-*-*.log")) ;; (server-logs (string-split (string-chomp (with-input-from-pipe server-logs-cmd read-string)))) (server-logs (glob (conc area-path"/logs/server-*-*.log"))) ) server-logs)) ;; get a list of servers with all relevant data ;; ( mod-time host port start-time pid ) ;; -(define (server:get-list areapath #!key (limit #f)) +#;(define (server:get-list areapath #!key (limit #f)) (let ((fname-rx (regexp "^(|.*/)server-(\\d+)-(\\S+).log$")) (day-seconds (* 24 60 60))) ;; if the directory exists continue to get the list ;; otherwise attempt to create the logs dir and then ;; continue @@ -2887,11 +2887,11 @@ (> (length new-res) limit)) new-res ;; (take new-res limit) <= need intelligent sorting before this will work new-res) (loop (string-chomp (car tal)) (cdr tal) new-res))))))))) -(define (server:get-num-alive srvlst) +#;(define (server:get-num-alive srvlst) (let ((num-alive 0)) (for-each (lambda (server) (handle-exceptions exn @@ -2914,11 +2914,11 @@ ;; mod-time host port start-time pid ;; ;; sort by start-time descending. I.e. get the oldest first. Young servers will thus drop off ;; and servers should stick around for about two hours or so. ;; -(define (server:get-best srvlst) +#;(define (server:get-best srvlst) (let* ((nums (server:get-num-servers)) (now (current-seconds)) (slst (sort (filter (lambda (rec) (if (and (list? rec) @@ -2942,18 +2942,18 @@ (list-ref b 3)))))) (if (> (length slst) nums) (take slst nums) slst))) -(define (server:get-first-best areapath) +#;(define (server:get-first-best areapath) (let ((srvrs (server:get-best (server:get-list areapath)))) (if (and srvrs (not (null? srvrs))) (car srvrs) #f))) -(define (server:get-rand-best areapath) +#;(define (server:get-rand-best areapath) (let ((srvrs (server:get-best (server:get-list areapath)))) (if (and (list? srvrs) (not (null? srvrs))) (let* ((len (length srvrs)) (idx (pseudo-random-integer len))) Index: dashboard.scm ================================================================== --- dashboard.scm +++ dashboard.scm @@ -240,10 +240,11 @@ ;; ;;;(if (file-write-access? (conc *toppath* "/megatest.db")) ;;(debug:print-info 13 *default-log-port* "Before common:watchdog spawn") (thread-start! (make-thread common:watchdog "Watchdog thread")) + ;;(debug:print-info 13 *default-log-port* "After common:watchdog spawn") ;; (if (not (args:get-arg "-use-db-cache")) ;; (begin ;; (debug:print-info 0 *default-log-port* "Forcing db-cache mode due to read-only access to megatest.db") ;; (hash-table-set! args:arg-hash "-use-db-cache" #t)));;;) @@ -3669,11 +3670,11 @@ ;;====================================================================== ;; The heavy lifting starts here ;;====================================================================== (define (dashboard-main) - (let ((mtdb-path (conc *toppath* "/megatest.db"))) ;; + (let ((mtdb-path (conc *toppath* "/megatest.db"))) ;; not used for now, update for .db area and use for write access detection #;(if (and (common:file-exists? mtdb-path) (file-writable? mtdb-path)) (if (not (args:get-arg "-skip-version-check")) (common:exit-on-version-changed))) (let* ((commondat (dboard:commondat-make))) Index: dbmod.scm ================================================================== --- dbmod.scm +++ dbmod.scm @@ -227,10 +227,11 @@ db:get-cache-stmth db:register-server db:deregister-server db:get-server-info db:get-count-servers +db:get-servers-info db:get-steps-info-by-id make-dbr:dbdat dbr:dbdat-db dbr:dbdat-inmem @@ -5889,7 +5890,20 @@ (max res count)) 0 db "SELECT count(*) FROM servers WHERE apath=?;" apath)))) + +(define (db:get-servers-info dbstruct apath) + (db:with-db + dbstruct + #f #f + (lambda (db) + (sqlite3:fold-row + (lambda (res count) + (max res count)) + 0 + db + "SELECT * FROM servers WHERE apath=?;" + apath)))) ) Index: dcommon.scm ================================================================== --- dcommon.scm +++ dcommon.scm @@ -905,11 +905,11 @@ #:numlin-visible 5 )) (colnames (list "Id" "MTver" "Pid" "Host" "Interface:OutPort" "RunTime" "State" "RunId")) (updater (lambda () (if (dashboard:monitor-changed? commondat tabdat) - (let ((servers (server:get-list *toppath* limit: 10))) + (let ((servers (rmt:get-servers-info *remotedat* *toppath*)#;(server:get-list *toppath* limit: 10))) (iup:attribute-set! servers-matrix "NUMLIN" (length servers)) ;; (set! colnum 0) ;; (for-each (lambda (colname) ;; ;; (print "colnum: " colnum " colname: " colname) ;; (iup:attribute-set! servers-matrix (conc "0:" colnum) colname) Index: megatest.scm ================================================================== --- megatest.scm +++ megatest.scm @@ -1135,12 +1135,21 @@ (if (or (args:get-arg "-list-servers") (args:get-arg "-kill-servers")) (let ((tl (launch:setup))) (if tl ;; all roads from here exit - (let* ((servers (server:get-list *toppath*)) + (let* ((servers (rmt:get-servers-info *remotedat* *toppath*)) (fmtstr "~8a~22a~20a~20a~8a\n")) + ;; id INTEGER PRIMARY KEY, + ;; host TEXT, + ;; port INTEGER, + ;; servkey TEXT, + ;; pid TEXT, + ;; ipaddr TEXT, + ;; apath TEXT, + ;; dbname TEXT, + ;; event_time (format #t fmtstr "pid" "Interface:port" "age (hms)" "Last mod" "State") (format #t fmtstr "===" "==============" "=========" "========" "=====") (for-each ;; ( mod-time host port start-time pid ) (lambda (server) (let* ((mtm (any->number (car server))) Index: rmtmod.scm ================================================================== --- rmtmod.scm +++ rmtmod.scm @@ -432,14 +432,14 @@ ;;====================================================================== ;; S E R V E R ;;====================================================================== (define (rmt:kill-server run-id) - (rmt:send-receive 'kill-server run-id (list run-id))) + (rmt:send-receive 'kill-server #f (list run-id))) (define (rmt:start-server run-id) - (rmt:send-receive 'start-server 0 (list run-id))) + (rmt:send-receive 'start-server #f (list run-id))) (define (rmt:get-server-info apath dbname) (rmt:send-receive 'get-server-info #f (list apath dbname))) ;;====================================================================== @@ -2134,12 +2134,14 @@ (define (rmt:get-count-servers remdat apath) (remotedat-conns remdat) ;; just checking types (rmt:open-main-connection remdat apath) ;; we need a channel to main.db (rmt:send-receive-real remdat apath ;; params: host port servkey pid ipaddr dbpath (db:run-id->dbname #f) - 'get-count-servers `(,apath - ))) + 'get-count-servers `(,apath))) + +(define (rmt:get-servers-info apath) + (rmt:send-receive 'get-servers-info #f `(,apath))) (define (rmt:deregister-server remdat apath iface port server-key dbname) (remotedat-conns remdat) ;; just checking types (rmt:open-main-connection remdat apath) ;; we need a channel to main.db (rmt:send-receive-real remdat apath ;; params: host port servkey pid ipaddr dbpath @@ -2204,11 +2206,20 @@ (server-start-time (current-seconds)) (pkts-dir (get-pkts-dir)) (server-key (rmt:get-signature)) ;; This servers key (is-main (equal? (args:get-arg "-db") ".db/main.db")) (last-access 0) - (server-timeout (server:expiration-timeout))) + (server-timeout (server:expiration-timeout)) + (shutdown-server-sequence (lambda (port) + (set! *unclean-shutdown* #f) + (debug:print-info 0 *default-log-port* "Starting to shutdown the server. pid="(current-process-id)) + (rmt:server-shutdown) + (portlogger:open-run-close portlogger:set-port port "released") + (exit))) + (timed-out? (lambda () + (<= (+ last-access server-timeout) + (current-seconds))))) ;; main and run db servers have both got wait logic (could/should merge it) (if is-main (rmt:wait-for-server pkts-dir dbname server-key) (rmt:wait-for-stable-interface)) ;; this is our forever loop @@ -2290,26 +2301,28 @@ (begin (debug:print 0 *default-log-port* "Server stats:") (db:print-current-query-stats))) (let* ((hrs-since-start (/ (- (current-seconds) server-start-time) 3600))) (cond + ((not *server-run*) + (debug:print-info 0 *default-log-port* "*server-run* set to #f. Shutting down.") + (shutdown-server-sequence port)) + ((timed-out?) + (debug:print-info 0 *default-log-port* "Server timed out. seconds since last db access: " (- (current-seconds) last-access)) + (shutdown-server-sequence port)) ((and *server-run* - (> (+ last-access server-timeout) - (current-seconds)) - (if is-main + (not (timed-out?)) + #;(if is-main ;; intention here was to exit main server quickly. (> (rmt:get-count-servers remdat *toppath*) 1) #t)) (if (common:low-noise-print 120 "server continuing") (debug:print-info 0 *default-log-port* "Server continuing, seconds since last db access: " (- (current-seconds) last-access))) (loop 0 bad-sync-count (current-milliseconds))) (else (set! *unclean-shutdown* #f) (debug:print-info 0 *default-log-port* "Server timed out. seconds since last db access: " (- (current-seconds) last-access)) - (debug:print-info 0 *default-log-port* "Starting to shutdown the server. pid="(current-process-id)) - (rmt:server-shutdown) - (portlogger:open-run-close portlogger:set-port port "released") - (exit) + (shutdown-server-sequence port) #;(debug:print-info 0 *default-log-port* "Sending 'quit to server, received: " (open-send-receive-nn (conc iface":"port) ;; do this here and not in server-shutdown (sexpr->string 'quit))) ))))))) Index: tests/unittests/server.scm ================================================================== --- tests/unittests/server.scm +++ tests/unittests/server.scm @@ -67,12 +67,11 @@ (test #f #t (rmt:general-open-connection *remotedat* *toppath* ".db/2.db")) (test #f '("SYSTEM" "RELEASE") (rmt:get-keys)) (test #f 1 (rmt:send-receive 'register-run #f (list keyvals "run1" "new" "n/a" "justme" #f))) -(print "Got here.") -(exit) +;; (print "Got here.") (test #f 1 (rmt:send-receive 'register-run 1 (list keyvals "run1" "new" "n/a" "justme" #f))) (test #f 2 (rmt:register-run keyvals "run2" "new" "n/a" "justme" #f)) ;; (test #f 2 (rmt:deregister-server *remotedat* *toppath* iface port server-key dbname @@ -79,7 +78,9 @@ (test #f 2 (rmt:get-count-servers *remotedat* *toppath*)) (test #f "run2" (rmt:get-run-name-from-id 2)) -;; (exit) +(test #f #t (list? (rmt:get-servers-info *toppath*))) + +(exit)