Index: TODO ================================================================== --- TODO +++ TODO @@ -16,12 +16,17 @@ # along with Megatest. If not, see . TODO ==== -NextSteps -. Remove servermod.scm +Loose ends +---------- + +. -list-servers not correct +. move *remotedat* into bigdata +. add back server stats on exit (look in rmt:run in rmtmod.scm) + WW15 . fill newview matrix with data, filter pipeline gui elements . improve [script], especially indent handling Index: dbmod.scm ================================================================== --- dbmod.scm +++ dbmod.scm @@ -5863,11 +5863,12 @@ #f) ;; server already deregistered (begin (sqlite3:execute db "DELETE FROM servers WHERE apath=? AND dbname=?;" ;; (host,port,servkey,pid,ipaddr,apath,dbname) VALUES (?,?,?,?,?,?,?);" ;; host port servkey pid ipaddr apath dbname) - #;(db:get-server-info dbstruct apath dbname))))))))) + #;(db:get-server-info dbstruct apath dbname) + 'done)))))))) (define (db:get-server-info dbstruct apath dbname) (db:with-db dbstruct #f #f Index: megatest.scm ================================================================== --- megatest.scm +++ megatest.scm @@ -1146,36 +1146,29 @@ ;; pid TEXT, ;; ipaddr TEXT, ;; apath TEXT, ;; dbname TEXT, ;; event_time - (format #t fmtstr "pid" "Interface:port" "age (hms)" "Last mod" "State") - (format #t fmtstr "===" "==============" "=========" "========" "=====") + (format #t fmtstr "pid" "Interface:port" "State" "dbname" "apath") + (format #t fmtstr "===" "==============" "=====" "======" "=====") (for-each ;; ( mod-time host port start-time pid ) (lambda (server) - (let* ((mtm (any->number (car server))) - (mod (if mtm (- (current-seconds) mtm) "unk")) - (age (- (current-seconds)(or (any->number (list-ref server 3)) (current-seconds)))) - (url (conc (cadr server) ":" (caddr server))) - (pid (list-ref server 4)) - (alv (if (number? mod)(< mod 10) #f))) - (format #t - fmtstr - pid - url - (seconds->hr-min-sec age) - (seconds->hr-min-sec mod) - (if alv "alive" "dead")) - (if (and alv - (args:get-arg "-kill-servers")) + (match-let + (((id host port servkey pid ipaddr apath dbname event_time) server)) + (format #t + fmtstr + pid + (conc host":"port) + (if (server-ready? host port servkey) "Running" "Dead") + dbname ;; (seconds->hr-min-sec mod) + apath + ) + (if (args:get-arg "-kill-servers") (begin (debug:print-info 0 *default-log-port* "Attempting to kill server with pid " pid " !!needs completion!!") #;(server:kill server))))) - (sort servers (lambda (a b) - (let ((ma (or (any->number (car a)) 9e9)) - (mb (or (any->number (car b)) 9e9))) - (> ma mb))))) + servers) ;; (debug:print-info 1 *default-log-port* "Done with listservers") (set! *didsomething* #t) (exit)) (exit)))) ;; must do, would have to add checks to many/all calls below Index: rmtmod.scm ================================================================== --- rmtmod.scm +++ rmtmod.scm @@ -1527,11 +1527,12 @@ (debug:print-info 0 *default-log-port* "in rmt:server-shutdown, db is not a database, not finalizing...")) (if (sqlite3:database? inmem) (sqlite3:finalize! inmem) (debug:print-info 0 *default-log-port* "in rmt:server-shutdown, inmem is not a database, not finalizing...")) (debug:print-info 0 *default-log-port* "Finalizing db and inmem complete") - (if am-server + (if (not am-server) + (debug:print-info 0 *default-log-port* "I am not a server, should NOT get here!") (if (string-match ".*/main.db$" dbfile) (let ((pkt-file (conc (get-pkts-dir *toppath*) "/" (servdat-uuid *server-info*) ".pkt"))) (debug:print-info 0 *default-log-port* "removing pkt "pkt-file) @@ -1541,21 +1542,13 @@ (lambda (dbh dbfile) (db:release-lock dbh dbfile)))) (let* ((sdat *server-info*) ;; we have a run-id server (host (servdat-host sdat)) (port (servdat-port sdat)) - (uuid (servdat-uuid sdat))) - (if (not (string-match ".db/main.db" (args:get-arg "-db"))) - (let* ((res (rmt:deregister-server remdat - *toppath* - (servdat-host *server-info*) ;; iface - (servdat-port *server-info*) - (servdat-uuid *server-info*) - dbfile ;; (current-process-id) - ))) - (debug:print-info 0 *default-log-port* "deregistered-server, res="res))) - + (uuid (servdat-uuid sdat)) + (res (rmt:deregister-server remdat *toppath* host port uuid dbfile))) + (debug:print-info 0 *default-log-port* "deregistered-server, res="res) (debug:print-info 0 *default-log-port* "deregistering server "host":"port" with uuid "uuid) ))))))) (define (std-exit-procedure) ;;(common:telemetry-log-close) @@ -1655,11 +1648,11 @@ ;; (string-intersperse (map number->string (u8vector->list (hostname->ip hostname))) ".") (server:get-best-guess-address hostname) #f))) (if ipstr ipstr hostn))) ;; hostname))) (port (portlogger:open-run-close portlogger:find-port)) - (link-tree-path (common:get-linktree)) + ;; (link-tree-path (common:get-linktree)) ;; (tmp-area (common:get-db-tmp-area)) #;(start-file (conc tmp-area "/.server-start"))) (debug:print-info 0 *default-log-port* "portlogger recommended port: " port) (if *server-info* (begin @@ -1718,49 +1711,37 @@ (set! *server-info* (make-servdat host: ipaddrstr port: portnum))) (debug:print-info 0 *default-log-port* "rmt:try-start-server time=" (seconds->time-string (current-seconds)) " ipaddrsstr=" ipaddrstr " portnum=" portnum) -;;(if (is-port-in-use portnum) -;; (begin -;; (portlogger:open-run-close portlogger:set-failed portnum) -;; (debug:print 0 *default-log-port* "WARNING: failed to start on portnum: " portnum ", trying next port") -;; ;; (thread-sleep! 0.1) -;; (rmt:try-start-server ipaddrstr -;; (portlogger:open-run-close -;; portlogger:find-port))) - (begin - (if (not *server-info*) - (set! *server-info* (make-servdat - host: ipaddrstr - port: portnum))) - (servdat-status-set! *server-info* 'starting) - (servdat-port-set! *server-info* portnum) - (if (not (servdat-rep *server-info*)) - (let ((rep (make-rep-socket))) - (servdat-rep-set! *server-info* rep) - (socket-set! rep 'nng/recvtimeo 2000))) - (let* ((rep (servdat-rep *server-info*))) - (debug:print 0 *default-log-port* "INFO: Trying to start server on " ipaddrstr ":" portnum) - (handle-exceptions - exn - (begin - (print-error-message exn) - (if (< portnum 64000) - (begin - (debug:print 0 *default-log-port* "WARNING: attempt to start server failed. Trying again ...") - (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) - (debug:print 5 *default-log-port* "exn=" (condition->list exn)) - (portlogger:open-run-close portlogger:set-failed portnum) - (debug:print 0 *default-log-port* "WARNING: failed to start on portnum: " portnum ", trying next port") - ;; (thread-sleep! 0.1) - (rmt:try-start-server ipaddrstr - (portlogger:open-run-close portlogger:find-port))) - (begin - (print "ERROR: Tried and tried but could not start the server, stopping at port "portnum)))) - (nng-listen rep (conc "tcp://*:" portnum)) - rep)))) ;;) + (assert (servdat? *server-info*) "FATAL: Must always have *server-info* properly set up by here.") + (servdat-status-set! *server-info* 'starting) + (servdat-port-set! *server-info* portnum) + (if (not (servdat-rep *server-info*)) + (let ((rep (make-rep-socket))) + (servdat-rep-set! *server-info* rep) + (socket-set! rep 'nng/recvtimeo 2000))) + (let* ((rep (servdat-rep *server-info*))) + (debug:print 0 *default-log-port* "INFO: Trying to start server on " ipaddrstr ":" portnum) + (handle-exceptions + exn + (begin + (print-error-message exn) + (if (< portnum 64000) + (begin + (debug:print 0 *default-log-port* "WARNING: attempt to start server failed. Trying again ...") + (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) + (debug:print 5 *default-log-port* "exn=" (condition->list exn)) + (portlogger:open-run-close portlogger:set-failed portnum) + (debug:print 0 *default-log-port* "WARNING: failed to start on portnum: " portnum ", trying next port") + ;; (thread-sleep! 0.1) + (rmt:try-start-server ipaddrstr + (portlogger:open-run-close portlogger:find-port))) + (begin + (print "ERROR: Tried and tried but could not start the server, stopping at port "portnum)))) + (nng-listen rep (conc "tcp://*:" portnum)) + rep))) ;;====================================================================== ;; S E R V E R U T I L I T I E S ;;====================================================================== @@ -2215,11 +2196,12 @@ (rmt:server-shutdown) (portlogger:open-run-close portlogger:set-port port "released") (exit))) (timed-out? (lambda () (<= (+ last-access server-timeout) - (current-seconds))))) + (current-seconds))))) + (servdat-dbfile-set! *server-info* (args:get-arg "-db")) ;; main and run db servers have both got wait logic (could/should merge it) (if is-main (rmt:wait-for-server pkts-dir dbname server-key) (rmt:wait-for-stable-interface)) ;; this is our forever loop