Index: Makefile
==================================================================
--- Makefile
+++ Makefile
@@ -32,10 +32,13 @@
archive.scm env.scm diff-report.scm cgisetup/models/pgdb.scm
# module source files
MSRCFILES = ftail.scm portlogger.scm nmsg-transport.scm
+# files needed for mtserve
+MTSERVEFILES = common.scm megatest-version.scm margs.scm
+
# Eggs to install (straightforward ones)
EGGS=matchable readline apropos base64 regex-literals format regex-case test coops trace csv \
dot-locking posix-utils posix-extras directory-utils hostinfo tcp-server rpc csv-xml fmt \
json md5 awful http-client spiffy uri-common intarweb spiffy-request-vars \
spiffy-directory-listing ssax sxml-serializer sxml-modifications iup canvas-draw sqlite3
@@ -42,10 +45,11 @@
GUISRCF = dashboard-context-menu.scm dashboard-tests.scm dashboard-guimonitor.scm gutils.scm dcommon.scm tree.scm vg.scm
OFILES = $(SRCFILES:%.scm=%.o)
GOFILES = $(GUISRCF:%.scm=%.o)
+MTSERVEOFILES = $(MTSERVEFILES:%.scm=%.o)
MOFILES = $(addprefix mofiles/,$(MSRCFILES:%.scm=%.o))
mofiles/%.o : %.scm
mkdir -p mofiles
@@ -69,15 +73,18 @@
# ARCHSTR=$(shell bash -c "echo \$$MACHTYPE")
PNGFILES = $(shell cd docs/manual;ls *png)
#all : $(PREFIX)/bin/.$(ARCHSTR) mtest dboard mtut ndboard
-all : $(PREFIX)/bin/.$(ARCHSTR) mtest dboard mtut
+all : $(PREFIX)/bin/.$(ARCHSTR) mtest dboard mtut mtserve
mtest: $(OFILES) readline-fix.scm megatest.o $(MOFILES)
csc $(CSCOPTS) $(OFILES) $(MOFILES) megatest.o -o mtest
+mtserve: $(MTSERVEOFILES) readline-fix.scm mtserve.o $(MOFILES)
+ csc $(CSCOPTS) $(MTSERVEOFILES) $(MOFILES) mtserve.o -o mtserve
+
showmtesthash:
@echo $(MTESTHASH)
dboard : $(OFILES) $(GOFILES) dashboard.scm $(MOFILES)
csc $(CSCOPTS) $(OFILES) dashboard.scm $(GOFILES) $(MOFILES) -o dboard
@@ -181,10 +188,16 @@
$(PREFIX)/bin/.$(ARCHSTR)/mtest : mtest utils/mk_wrapper
@echo Installing to PREFIX=$(PREFIX)
$(INSTALL) mtest $(PREFIX)/bin/.$(ARCHSTR)/mtest
utils/mk_wrapper $(PREFIX) mtest $(PREFIX)/bin/megatest
chmod a+x $(PREFIX)/bin/megatest
+
+$(PREFIX)/bin/.$(ARCHSTR)/mtserve : mtserve utils/mk_wrapper
+ @echo Installing to PREFIX=$(PREFIX)
+ $(INSTALL) mtserve $(PREFIX)/bin/.$(ARCHSTR)/mtserve
+ utils/mk_wrapper $(PREFIX) mtserve $(PREFIX)/bin/mtserver
+ chmod a+x $(PREFIX)/bin/mtserver
$(PREFIX)/bin/.$(ARCHSTR)/ndboard : ndboard
$(INSTALL) ndboard $(PREFIX)/bin/.$(ARCHSTR)/ndboard
$(PREFIX)/bin/newdashboard : $(PREFIX)/bin/.$(ARCHSTR)/ndboard utils/mk_wrapper
Index: api.scm
==================================================================
--- api.scm
+++ api.scm
@@ -16,355 +16,20 @@
;; You should have received a copy of the GNU General Public License
;; along with Megatest. If not, see .
;;
;;======================================================================
-(use srfi-69 posix)
-
-(declare (unit api))
-(declare (uses rmt))
-(declare (uses db))
-(declare (uses tasks))
-
-;; allow these queries through without starting a server
-;;
-(define api:read-only-queries
- '(get-key-val-pairs
- get-var
- get-keys
- get-key-vals
- test-toplevel-num-items
- get-test-info-by-id
- get-steps-info-by-id
- get-data-info-by-id
- test-get-rundir-from-test-id
- get-count-tests-running-for-testname
- get-count-tests-running
- get-count-tests-running-in-jobgroup
- get-previous-test-run-record
- get-matching-previous-test-run-records
- test-get-logfile-info
- test-get-records-for-index-file
- get-testinfo-state-status
- test-get-top-process-pid
- test-get-paths-matching-keynames-target-new
- get-prereqs-not-met
- get-count-tests-running-for-run-id
- get-run-info
- get-run-status
- get-run-stats
- get-run-times
- get-targets
- get-target
- ;; register-run
- get-tests-tags
- get-test-times
- get-tests-for-run
- get-test-id
- get-tests-for-runs-mindata
- get-tests-for-run-mindata
- get-run-name-from-id
- get-runs
- simple-get-runs
- get-num-runs
- get-runs-cnt-by-patt
- get-all-run-ids
- get-prev-run-ids
- get-run-ids-matching-target
- get-runs-by-patt
- get-steps-data
- get-steps-for-test
- read-test-data
- read-test-data*
- login
- tasks-get-last
- testmeta-get-record
- have-incompletes?
- synchash-get
- get-changed-record-ids
- get-run-record-ids
- ))
-
-(define api:write-queries
- '(
- get-keys-write ;; dummy "write" query to force server start
-
- ;; SERVERS
- start-server
- kill-server
-
- ;; TESTS
- test-set-state-status-by-id
- delete-test-records
- delete-old-deleted-test-records
- test-set-state-status
- test-set-top-process-pid
- set-state-status-and-roll-up-items
- update-pass-fail-counts
- top-test-set-per-pf-counts ;; (db:top-test-set-per-pf-counts (db:get-db *db* 5) 5 "runfirst")
-
- ;; RUNS
- register-run
- set-tests-state-status
- delete-run
- lock/unlock-run
- update-run-event_time
- mark-incomplete
-
- ;; STEPS
- teststep-set-status!
-
- ;; TEST DATA
- test-data-rollup
- csv->test-data
-
- ;; MISC
- sync-inmem->db
-
- ;; TESTMETA
- testmeta-add-record
- testmeta-update-field
-
- ;; TASKS
- tasks-add
- tasks-set-state-given-param-key
- ))
-
-;; These are called by the server on recipt of /api calls
-;; - keep it simple, only return the actual result of the call, i.e. no meta info here
-;;
-;; - returns #( flag result )
-;;
-(define (api:execute-requests dbstruct dat)
- (handle-exceptions
- exn
- (let ((call-chain (get-call-chain)))
- (debug:print 0 *default-log-port* "WARNING: api:execute-requests received an exception from peer, dat=" dat)
- (print-call-chain (current-error-port))
- (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
- (vector #f (vector exn call-chain dat))) ;; return some stuff for debug if an exception happens
- (cond
- ((not (vector? dat)) ;; it is an error to not receive a vector
- (vector #f (vector #f "remote must be called with a vector")))
- ((> *api-process-request-count* 20) ;; 20)
- (debug:print 0 *default-log-port* "WARNING: api:execute-requests received an overloaded message.")
- (set! *server-overloaded* #t)
- (vector #f (vector #f 'overloaded))) ;; the inner vector is what gets returned. nope, don't know why. please refactor!
- (else
- (let* ((cmd-in (vector-ref dat 0))
- (cmd (if (symbol? cmd-in)
- cmd-in
- (string->symbol cmd-in)))
- (params (vector-ref dat 1))
- (start-t (current-milliseconds))
- (readonly-mode (dbr:dbstruct-read-only dbstruct))
- (readonly-command (member cmd api:read-only-queries))
- (writecmd-in-readonly-mode (and readonly-mode (not readonly-command)))
- (res
- (if writecmd-in-readonly-mode
- (conc "attempt to run write command "cmd" on a read-only database")
- (case cmd
- ;;===============================================
- ;; READ/WRITE QUERIES
- ;;===============================================
-
- ((get-keys-write) (db:get-keys dbstruct)) ;; force a dummy "write" query to force server; for debug in -repl
-
- ;; SERVERS
- ((start-server) (apply server:kind-run params))
- ((kill-server) (set! *server-run* #f))
-
- ;; TESTS
-
- ;;((test-set-state-status-by-id) (apply mt:test-set-state-status-by-id dbstruct params))
- ;;BB - commented out above because it was calling below, eventually, incorrectly (dbstruct passed to mt:test-set-state-status-by-id, which previosly did more, but now only passes thru to db:set-state-status-and-roll-up-items.
- ((test-set-state-status-by-id)
-
- ;; (define (db:set-state-status-and-roll-up-items dbstruct run-id test-name item-path state status comment)
- (db:set-state-status-and-roll-up-items
- dbstruct
- (list-ref params 0) ; run-id
- (list-ref params 1) ; test-name
- #f ; item-path
- (list-ref params 2) ; state
- (list-ref params 3) ; status
- (list-ref params 4) ; comment
- ))
-
- ((delete-test-records) (apply db:delete-test-records dbstruct params))
- ((delete-old-deleted-test-records) (apply db:delete-old-deleted-test-records dbstruct params))
- ((test-set-state-status) (apply db:test-set-state-status dbstruct params))
- ((test-set-top-process-pid) (apply db:test-set-top-process-pid dbstruct params))
- ((set-state-status-and-roll-up-items) (apply db:set-state-status-and-roll-up-items dbstruct params))
- ((top-test-set-per-pf-counts) (apply db:top-test-set-per-pf-counts dbstruct params))
- ((test-set-archive-block-id) (apply db:test-set-archive-block-id dbstruct params))
-
- ;; RUNS
- ((register-run) (apply db:register-run dbstruct params))
- ((set-tests-state-status) (apply db:set-tests-state-status dbstruct params))
- ((delete-run) (apply db:delete-run dbstruct params))
- ((lock/unlock-run) (apply db:lock/unlock-run dbstruct params))
- ((update-run-event_time) (apply db:update-run-event_time dbstruct params))
- ((update-run-stats) (apply db:update-run-stats dbstruct params))
- ((set-var) (apply db:set-var dbstruct params))
- ((del-var) (apply db:del-var dbstruct params))
-
- ;; STEPS
- ((teststep-set-status!) (apply db:teststep-set-status! dbstruct params))
-
- ;; TEST DATA
- ((test-data-rollup) (apply db:test-data-rollup dbstruct params))
- ((csv->test-data) (apply db:csv->test-data dbstruct params))
-
- ;; MISC
- ((sync-inmem->db) (let ((run-id (car params)))
- (db:sync-touched dbstruct run-id force-sync: #t)))
- ((mark-incomplete) (apply db:find-and-mark-incomplete dbstruct params))
-
- ;; TESTMETA
- ((testmeta-add-record) (apply db:testmeta-add-record dbstruct params))
- ((testmeta-update-field) (apply db:testmeta-update-field dbstruct params))
- ((get-tests-tags) (db:get-tests-tags dbstruct))
-
- ;; TASKS
- ((tasks-add) (apply tasks:add dbstruct params))
- ((tasks-set-state-given-param-key) (apply tasks:set-state-given-param-key dbstruct params))
- ((tasks-get-last) (apply tasks:get-last dbstruct params))
-
- ;; NO SYNC DB
- ((no-sync-set) (apply db:no-sync-set *no-sync-db* params))
- ((no-sync-get/default) (apply db:no-sync-get/default *no-sync-db* params))
- ((no-sync-del!) (apply db:no-sync-del! *no-sync-db* params))
- ((no-sync-get-lock) (apply db:no-sync-get-lock *no-sync-db* params))
-
- ;; ARCHIVES
- ;; ((archive-get-allocations)
- ((archive-register-disk) (apply db:archive-register-disk dbstruct params))
- ((archive-register-block-name)(apply db:archive-register-block-name dbstruct params))
- ((archive-allocate-testsuite/area-to-block)(apply db:archive-allocate-testsuite/area-to-block dbstruct block-id testsuite-name areakey))
-
- ;;======================================================================
- ;; READ ONLY QUERIES
- ;;======================================================================
-
- ;; KEYS
- ((get-key-val-pairs) (apply db:get-key-val-pairs dbstruct params))
- ((get-keys) (db:get-keys dbstruct))
- ((get-key-vals) (apply db:get-key-vals dbstruct params))
- ((get-target) (apply db:get-target dbstruct params))
- ((get-targets) (db:get-targets dbstruct))
-
- ;; ARCHIVES
- ((test-get-archive-block-info) (apply db:test-get-archive-block-info dbstruct params))
-
- ;; TESTS
- ((test-toplevel-num-items) (apply db:test-toplevel-num-items dbstruct params))
- ((get-test-info-by-id) (apply db:get-test-info-by-id dbstruct params))
- ((test-get-rundir-from-test-id) (apply db:test-get-rundir-from-test-id dbstruct params))
- ((get-count-tests-running-for-testname) (apply db:get-count-tests-running-for-testname dbstruct params))
- ((get-count-tests-running) (apply db:get-count-tests-running dbstruct params))
- ((get-count-tests-running-in-jobgroup) (apply db:get-count-tests-running-in-jobgroup dbstruct params))
- ;; ((delete-test-step-records) (apply db:delete-test-step-records dbstruct params))
- ((get-previous-test-run-record) (apply db:get-previous-test-run-record dbstruct params))
- ((get-matching-previous-test-run-records)(apply db:get-matching-previous-test-run-records dbstruct params))
- ((test-get-logfile-info) (apply db:test-get-logfile-info dbstruct params))
- ((test-get-records-for-index-file) (apply db:test-get-records-for-index-file dbstruct params))
- ((get-testinfo-state-status) (apply db:get-testinfo-state-status dbstruct params))
- ((test-get-top-process-pid) (apply db:test-get-top-process-pid dbstruct params))
- ((test-get-paths-matching-keynames-target-new) (apply db:test-get-paths-matching-keynames-target-new dbstruct params))
- ((get-prereqs-not-met) (apply db:get-prereqs-not-met dbstruct params))
- ((get-count-tests-running-for-run-id) (apply db:get-count-tests-running-for-run-id dbstruct params))
- ((synchash-get) (apply synchash:server-get dbstruct params))
- ((get-raw-run-stats) (apply db:get-raw-run-stats dbstruct params))
- ((get-test-times) (apply db:get-test-times dbstruct params))
-
- ;; RUNS
- ((get-run-info) (apply db:get-run-info dbstruct params))
- ((get-run-status) (apply db:get-run-status dbstruct params))
- ((set-run-status) (apply db:set-run-status dbstruct params))
- ((get-tests-for-run) (apply db:get-tests-for-run dbstruct params))
- ((get-test-id) (apply db:get-test-id dbstruct params))
- ((get-tests-for-run-mindata) (apply db:get-tests-for-run-mindata dbstruct params))
- ((get-tests-for-runs-mindata) (apply db:get-tests-for-runs-mindata dbstruct params))
- ((get-runs) (apply db:get-runs dbstruct params))
- ((simple-get-runs) (apply db:simple-get-runs dbstruct params))
- ((get-num-runs) (apply db:get-num-runs dbstruct params))
- ((get-runs-cnt-by-patt) (apply db:get-runs-cnt-by-patt dbstruct params))
- ((get-all-run-ids) (db:get-all-run-ids dbstruct))
- ((get-prev-run-ids) (apply db:get-prev-run-ids dbstruct params))
- ((get-run-ids-matching-target) (apply db:get-run-ids-matching-target dbstruct params))
- ((get-runs-by-patt) (apply db:get-runs-by-patt dbstruct params))
- ((get-run-name-from-id) (apply db:get-run-name-from-id dbstruct params))
- ((get-main-run-stats) (apply db:get-main-run-stats dbstruct params))
- ((get-var) (apply db:get-var dbstruct params))
- ((get-run-stats) (apply db:get-run-stats dbstruct params))
- ((get-run-times) (apply db:get-run-times dbstruct params))
-
- ;; STEPS
- ((get-steps-data) (apply db:get-steps-data dbstruct params))
- ((get-steps-for-test) (apply db:get-steps-for-test dbstruct params))
- ((get-steps-info-by-id) (apply db:get-steps-info-by-id dbstruct params))
-
- ;; TEST DATA
- ((read-test-data) (apply db:read-test-data dbstruct params))
- ((read-test-data*) (apply db:read-test-data* dbstruct params))
- ((get-data-info-by-id) (apply db:get-data-info-by-id dbstruct params))
-
- ;; MISC
- ((get-latest-host-load) (apply db:get-latest-host-load dbstruct params))
- ((have-incompletes?) (apply db:have-incompletes? dbstruct params))
- ((login) (apply db:login dbstruct params))
- ((general-call) (let ((stmtname (car params))
- (run-id (cadr params))
- (realparams (cddr params)))
- (db:general-call dbstruct stmtname realparams)))
- ((sdb-qry) (apply sdb:qry params))
- ((ping) (current-process-id))
- ((get-changed-record-ids) (apply db:get-changed-record-ids dbstruct params))
- ((get-run-record-ids) (apply db:get-run-record-ids dbstruct params))
- ;; TESTMETA
- ((testmeta-get-record) (apply db:testmeta-get-record dbstruct params))
-
- ;; TASKS
- ((find-task-queue-records) (apply tasks:find-task-queue-records dbstruct params))
- (else
- (debug:print 0 *default-log-port* "ERROR: bad api call " cmd)
- (conc "ERROR: BAD api call " cmd))))))
-
- ;; save all stats
- (let ((delta-t (- (current-milliseconds)
- start-t)))
- (hash-table-set! *db-api-call-time* cmd
- (cons delta-t (hash-table-ref/default *db-api-call-time* cmd '()))))
- (if writecmd-in-readonly-mode
- (vector #f res)
- (vector #t res)))))))
-
-;; http-server send-response
-;; api:process-request
-;; db:*
-;;
-;; NB// Runs on the server as part of the server loop
-;;
-(define (api:process-request dbstruct $) ;; the $ is the request vars proc
- (set! *api-process-request-count* (+ *api-process-request-count* 1))
- (let* ((cmd ($ 'cmd))
- (paramsj ($ 'params))
- (params (db:string->obj paramsj transport: 'http)) ;; incoming data from the POST (or is it a GET?)
- (resdat (api:execute-requests dbstruct (vector cmd params))) ;; process the request, resdat = #( flag result )
- (success (vector-ref resdat 0))
- (res (vector-ref resdat 1))) ;; (vector flag payload), get the payload, ignore the flag (why?)
- (if (not success)
- (debug:print 0 *default-log-port* "ERROR: success flag is #f for " cmd " with params " params))
- (if (> *api-process-request-count* *max-api-process-requests*)
- (set! *max-api-process-requests* *api-process-request-count*))
- (set! *api-process-request-count* (- *api-process-request-count* 1))
- ;; This can be here but needs controls to ensure it doesn't run more than every 4 seconds
- ;; (rmt:dat->json-str
- ;; (if (or (string? res)
- ;; (list? res)
- ;; (number? res)
- ;; (boolean? res))
- ;; res
- ;; (list "ERROR, not string, list, number or boolean" 1 cmd params res)))))
- (db:obj->string res transport: 'http)))
-
+(declare (unit api))
+
+(module
+ api
+ (
+ *
+ )
+
+(import scheme posix chicken data-structures ports)
+
+
+(define (api:execute-requests . args) #t)
+(define (api:process-request . args) #t)
+
+)
Index: client.scm
==================================================================
--- client.scm
+++ client.scm
@@ -31,91 +31,86 @@
(declare (uses tasks)) ;; tasks are where stuff is maintained about what is running.
(include "common_records.scm")
(include "db_records.scm")
-;; client:get-signature
-(define (client:get-signature)
- (if *my-client-signature* *my-client-signature*
- (let ((sig (conc (get-host-name) " " (current-process-id))))
- (set! *my-client-signature* sig)
- *my-client-signature*)))
-
-;; Not currently used! But, I think it *should* be used!!!
-(define (client:logout serverdat)
- (let ((ok (and (socket? serverdat)
- (cdb:logout serverdat *toppath* (client:get-signature)))))
- ok))
-
-(define (client:connect iface port)
- (case (server:get-transport)
- ((rpc) (rpc:client-connect iface port))
- ((http) (http:client-connect iface port))
- ((zmq) (zmq:client-connect iface port))
- (else (rpc:client-connect iface port))))
-
-(define (client:setup areapath #!key (remaining-tries 100) (failed-connects 0))
- (case (server:get-transport)
- ((rpc) (rpc-transport:client-setup remaining-tries: remaining-tries failed-connects: failed-connects)) ;;(client:setup-rpc run-id))
- ((http)(client:setup-http areapath remaining-tries: remaining-tries failed-connects: failed-connects))
- (else (rpc-transport:client-setup remaining-tries: remaining-tries failed-connects: failed-connects)))) ;; (client:setup-rpc run-id))))
-
-;; Do all the connection work, look up the transport type and set up the
-;; connection if required.
-;;
-;; There are two scenarios.
-;; 1. We are a test manager and we received *transport-type* and *runremote* via cmdline
-;; 2. We are a run tests, list runs or other interactive process and we must figure out
-;; *transport-type* and *runremote* from the monitor.db
-;;
-;; client:setup
-;;
-;; lookup_server, need to remove *runremote* stuff
-;;
-
-(define (client:setup-http areapath #!key (remaining-tries 100) (failed-connects 0)(area-dat #f))
- (debug:print-info 2 *default-log-port* "client:setup remaining-tries=" remaining-tries)
- (server:start-and-wait areapath)
- (if (<= remaining-tries 0)
- (begin
- (debug:print-error 0 *default-log-port* "failed to start or connect to server")
- (exit 1))
- ;;
- ;; Alternatively here, we can get the list of candidate servers and work our way
- ;; through them searching for a good one.
- ;;
- (let* ((server-dat (server:get-rand-best areapath)) ;; (server:get-first-best areapath))
- (runremote (or area-dat *runremote*)))
- (if (not server-dat) ;; no server found
- (client:setup-http areapath remaining-tries: (- remaining-tries 1))
- (let ((host (cadr server-dat))
- (port (caddr server-dat)))
- (debug:print-info 4 *default-log-port* "client:setup server-dat=" server-dat ", remaining-tries=" remaining-tries)
- (if (and (not area-dat)
- (not *runremote*))
- (set! *runremote* (make-remote)))
- (if (and host port)
- (let* ((start-res (case *transport-type*
- ((http)(http-transport:client-connect host port))))
- (ping-res (case *transport-type*
- ((http)(rmt:login-no-auto-client-setup start-res)))))
- (if (and start-res
- ping-res)
- (let ((runremote (or area-dat *runremote*))) ;; it might have been generated only a few statements ago
- (remote-conndat-set! runremote start-res) ;; (hash-table-set! runremote run-id start-res)
- (debug:print-info 2 *default-log-port* "connected to " (http-transport:server-dat-make-url start-res))
- start-res)
- (begin ;; login failed but have a server record, clean out the record and try again
- (debug:print-info 0 *default-log-port* "client:setup, login failed, will attempt to start server ... start-res=" start-res ", server-dat=" server-dat) ;; had runid. Fixes part of Randy;s ticket 1405717332
- (case *transport-type*
- ((http)(http-transport:close-connections)))
- (remote-conndat-set! runremote #f) ;; (hash-table-delete! runremote run-id)
- (thread-sleep! 1)
- (client:setup-http areapath remaining-tries: (- remaining-tries 1))
- )))
- (begin ;; no server registered
- ;; (server:kind-run areapath)
- (server:start-and-wait areapath)
- (debug:print-info 0 *default-log-port* "client:setup, no server registered, remaining-tries=" remaining-tries)
- (thread-sleep! 1) ;; (+ 5 (random (- 20 remaining-tries)))) ;; give server a little time to start up, randomize a little to avoid start storms.
- (client:setup-http areapath remaining-tries: (- remaining-tries 1)))))))))
-
+;; ;; Not currently used! But, I think it *should* be used!!!
+;; (define (client:logout serverdat)
+;; (let ((ok (and (socket? serverdat)
+;; (cdb:logout serverdat *toppath* (client:get-signature)))))
+;; ok))
+;;
+;; (define (client:connect iface port)
+;; (case (server:get-transport)
+;; ((http) (http:client-connect iface port))
+;; ((zmq) (zmq:client-connect iface port))
+;; (else (begin
+;; (debug:print 0 *default-log-port* "ERROR: no such transport " (server:get-transport) ", exiting now.")
+;; (exit 1)))))
+;;
+;; (define (client:setup areapath #!key (remaining-tries 100) (failed-connects 0))
+;; (case (server:get-transport)
+;; ((http)(client:setup-http areapath remaining-tries: remaining-tries failed-connects: failed-connects))
+;; (else (begin
+;; (debug:print 0 *default-log-port* "ERROR: no such transport " (server:get-transport) ", exiting now.")
+;; (exit 1)))))
+;;
+;; ;; Do all the connection work, look up the transport type and set up the
+;; ;; connection if required.
+;; ;;
+;; ;; There are two scenarios.
+;; ;; 1. We are a test manager and we received *transport-type* and *runremote* via cmdline
+;; ;; 2. We are a run tests, list runs or other interactive process and we must figure out
+;; ;; *transport-type* and *runremote* from the monitor.db
+;; ;;
+;; ;; client:setup
+;; ;;
+;; ;; lookup_server, need to remove *runremote* stuff
+;; ;;
+;;
+;; (define (client:setup-http areapath #!key (remaining-tries 100) (failed-connects 0)(area-dat #f))
+;; (debug:print-info 2 *default-log-port* "client:setup remaining-tries=" remaining-tries)
+;; (server:start-and-wait areapath)
+;; (if (<= remaining-tries 0)
+;; (begin
+;; (debug:print-error 0 *default-log-port* "failed to start or connect to server")
+;; (exit 1))
+;; ;;
+;; ;; Alternatively here, we can get the list of candidate servers and work our way
+;; ;; through them searching for a good one.
+;; ;;
+;; (let* ((server-dat (server:get-rand-best areapath)) ;; (server:get-first-best areapath))
+;; (runremote (or area-dat *runremote*)))
+;; (if (not server-dat) ;; no server found
+;; (client:setup-http areapath remaining-tries: (- remaining-tries 1))
+;; (let ((host (cadr server-dat))
+;; (port (caddr server-dat)))
+;; (debug:print-info 4 *default-log-port* "client:setup server-dat=" server-dat ", remaining-tries=" remaining-tries)
+;; (if (and (not area-dat)
+;; (not *runremote*))
+;; (set! *runremote* (make-remote)))
+;; (if (and host port)
+;; (let* ((start-res (case *transport-type*
+;; ((http)(http-transport:client-connect host port))))
+;; (ping-res (case *transport-type*
+;; ((http)(rmt:login-no-auto-client-setup start-res)))))
+;; (if (and start-res
+;; ping-res)
+;; (let ((runremote (or area-dat *runremote*))) ;; it might have been generated only a few statements ago
+;; (remote-conndat-set! runremote start-res) ;; (hash-table-set! runremote run-id start-res)
+;; (debug:print-info 2 *default-log-port* "connected to " (http-transport:server-dat-make-url start-res))
+;; start-res)
+;; (begin ;; login failed but have a server record, clean out the record and try again
+;; (debug:print-info 0 *default-log-port* "client:setup, login failed, will attempt to start server ... start-res=" start-res ", server-dat=" server-dat) ;; had runid. Fixes part of Randy;s ticket 1405717332
+;; (case *transport-type*
+;; ((http)(http-transport:close-connections)))
+;; (remote-conndat-set! runremote #f) ;; (hash-table-delete! runremote run-id)
+;; (thread-sleep! 1)
+;; (client:setup-http areapath remaining-tries: (- remaining-tries 1))
+;; )))
+;; (begin ;; no server registered
+;; ;; (server:kind-run areapath)
+;; (server:start-and-wait areapath)
+;; (debug:print-info 0 *default-log-port* "client:setup, no server registered, remaining-tries=" remaining-tries)
+;; (thread-sleep! 1) ;; (+ 5 (random (- 20 remaining-tries)))) ;; give server a little time to start up, randomize a little to avoid start storms.
+;; (client:setup-http areapath remaining-tries: (- remaining-tries 1)))))))))
+;;
Index: common.scm
==================================================================
--- common.scm
+++ common.scm
@@ -1135,11 +1135,11 @@
;; logic for getting homehost. Returns (host . at-home)
;; IF *toppath* is not set, wait up to five seconds trying every two seconds
;; (this is to accomodate the watchdog)
;;
-(define (common:get-homehost #!key (trynum 5))
+#;(define (common:get-homehost #!key (trynum 5))
;; called often especially at start up. use mutex to eliminate collisions
(mutex-lock! *homehost-mutex*)
(cond
(*home-host*
(mutex-unlock! *homehost-mutex*)
@@ -1187,11 +1187,11 @@
(mutex-unlock! *homehost-mutex*)
*home-host*))))
;; am I on the homehost?
;;
-(define (common:on-homehost?)
+#;(define (common:on-homehost?)
(let ((hh (common:get-homehost)))
(if hh
(cdr hh)
#f)))
Index: megatest.scm
==================================================================
--- megatest.scm
+++ megatest.scm
@@ -585,11 +585,11 @@
(if (args:any? "-run" "-runall" "-remove-runs" "-set-state-status" "-kill-runs" "-kill-rerun")
(debug:print 0 *default-log-port* (string-intersperse (argv) " ")))
;; some switches imply homehost. Exit here if not on homehost
;;
-(let ((homehost-required (list "-cleanup-db" "-server")))
+#;(let ((homehost-required (list "-cleanup-db" "-server")))
(if (apply args:any? homehost-required)
(if (not (common:on-homehost?))
(for-each
(lambda (switch)
(if (args:get-arg switch)
@@ -2196,11 +2196,11 @@
(if (or (getenv "MT_RUNSCRIPT")
(args:get-arg "-repl")
(args:get-arg "-load"))
(let* ((toppath (launch:setup))
(dbstruct (if (and toppath
- (common:on-homehost?))
+ #;(common:on-homehost?))
(db:setup #t)
#f))) ;; make-dbr:dbstruct path: toppath local: (args:get-arg "-local")) #f)))
(if *toppath*
(cond
((getenv "MT_RUNSCRIPT")
Index: nmsg-transport.scm
==================================================================
--- nmsg-transport.scm
+++ nmsg-transport.scm
@@ -14,19 +14,24 @@
;; GNU General Public License for more details.
;;
;; You should have received a copy of the GNU General Public License
;; along with Megatest. If not, see .
+;;======================================================================
+;; Support routines for nmsg usage.
+;; This should be reusable, non-megatest specific stuff
+;;======================================================================
+
(declare (unit nmsg-transport))
(module
- nmsg-transport
- (
- nmsg:start-server
- nmsg:open-send-close
- nmsg:open-send-receive
- )
+ nmsg-transport
+ (
+ nmsg:start-server
+ nmsg:open-send-close
+ nmsg:open-send-receive
+ )
(import scheme posix chicken data-structures ports)
(use pkts)
(use nanomsg srfi-18)
@@ -114,6 +119,10 @@
(thread-start! th1)
(thread-start! th2)
(thread-join! th1)
res))))
+;; get a signature for identifing this process
+(define (nmsg:get-process-signature)
+ (conc (get-host-name) " " (current-process-id)))
+
)
Index: rmt.scm
==================================================================
--- rmt.scm
+++ rmt.scm
@@ -20,970 +20,130 @@
(use format typed-records) ;; RADT => purpose of json format??
(declare (unit rmt))
(declare (uses api))
-(declare (uses http-transport))
(include "common_records.scm")
-(declare (uses portlogger))
-(import portlogger)
-(declare (uses nmsg-transport))
-(import nmsg-transport)
-
(use (prefix pkts pkts:) srfi-18)
-;;
-;; THESE ARE ALL CALLED ON THE CLIENT SIDE!!!
-;;
-
-;; generate entries for ~/.megatestrc with the following
-;;
-;; grep define ../rmt.scm | grep rmt: |perl -pi -e 's/\(define\s+\((\S+)\W.*$/\1/'|sort -u
-
-;;======================================================================
-;; N A N O M S G S E R V E R
-;;======================================================================
-
-(defstruct nmsg
- (conn #f)
- (hosts (make-hash-table))
- pkt
- pktspec
- (mutex (make-mutex))
- )
-
-;; make it a global
-(define *nmsg-conndat* (make-nmsg))
-(nmsg-pktspec-set! *nmsg-conndat*
- `((server (hostname . h)
- (port . p)
- (pid . i)
- )))
-;; get a port
-;; start the nmsg server
-;; look for other servers
-;; contact other servers and compile list of servers
-;; there are two types of server
-;; main servers - dashboards, runners and dedicated servers - need pkt
-;; passive servers - test executers, step calls, list-runs - no pkt
-;;
-(define (rmt:start-nmsg #!key (force-server-type #f))
- (mutex-lock! (nmsg-mutex *nmsg-conndat*))
- (let* ((server-type (or force-server-type
- (if (args:any? "-run" "-server")
- 'main
- 'passive)))
- (port-num (portlogger:open-run-close portlogger:find-port))
- (nmsg-conn (nmsg:start-server port-num))
- (pktspec (nmsg-pktspec *nmsg-conndat*))
- (pktdir (conc (get-environment-variable "MT_RUN_AREA_HOME")
- "/.server-pkts")))
- (if (not (directory? pktdir))(create-directory pktdir))
- ;; server is started, now create pkt if needed
- (if (eq? server-type 'main)
- (nmsg-pkt-set! *nmsg-conndat*
- (pkts:write-alist->pkt
- pktdir
- `((hostname . ,(get-host-name))
- (port . ,port-num)
- (pid . ,(current-process-id)))
- pktspec: pktspec
- ptype: 'server)))
- (nmsg-conn-set! *nmsg-conndat* nmsg-conn)
- (mutex-unlock! (nmsg-mutex *nmsg-conndat*))
- ))
-
-;;======================================================================
-;; S U P P O R T F U N C T I O N S
-;;======================================================================
-
-;; if a server is either running or in the process of starting call client:setup
-;; else return #f to let the calling proc know that there is no server available
-;;
-(define (rmt:get-connection-info areapath #!key (area-dat #f)) ;; TODO: push areapath down.
- (let* ((runremote (or area-dat *runremote*))
- (cinfo (if (remote? runremote)
- (remote-conndat runremote)
- #f)))
- (if cinfo
- cinfo
- (if (server:check-if-running areapath)
- (client:setup areapath)
- #f))))
-
-(define *send-receive-mutex* (make-mutex)) ;; should have separate mutex per run-id
-
-;; RA => e.g. usage (rmt:send-receive 'get-var #f (list varname))
-;;
-(define (rmt:send-receive cmd rid params #!key (attemptnum 1)(area-dat #f)) ;; start attemptnum at 1 so the modulo below works as expected
-
- ;;DOT digraph megatest_state_status {
- ;;DOT ranksep=0;
- ;;DOT // rankdir=LR;
- ;;DOT node [shape="box"];
- ;;DOT "rmt:send-receive" -> MUTEXLOCK;
- ;;DOT { edge [style=invis];"case 1" -> "case 2" -> "case 3" -> "case 4" -> "case 5" -> "case 6" -> "case 7" -> "case 8" -> "case 9" -> "case 10" -> "case 11"; }
- ;; do all the prep locked under the rmt-mutex
- (mutex-lock! *rmt-mutex*)
-
- ;; 1. check if server is started IFF cmd is a write OR if we are not on the homehost, store in runremote
- ;; 2. check the age of the connections. refresh the connection if it is older than timeout-20 seconds.
- ;; 3. do the query, if on homehost use local access
- ;;
- (let* ((start-time (current-seconds)) ;; snapshot time so all use cases get same value
- (areapath *toppath*);; TODO - resolve from dbstruct to be compatible with multiple areas
- (runremote (or area-dat
- *runremote*))
- (readonly-mode (if (and runremote
- (remote-ro-mode-checked runremote))
- (remote-ro-mode runremote)
- (let* ((dbfile (conc *toppath* "/megatest.db"))
- (ro-mode (not (file-write-access? dbfile)))) ;; TODO: use dbstruct or runremote to figure this out in future
- (if runremote
- (begin
- (remote-ro-mode-set! runremote ro-mode)
- (remote-ro-mode-checked-set! runremote #t)
- ro-mode)
- ro-mode)))))
-
- ;; DOT INIT_RUNREMOTE; // leaving off - doesn't really add to the clarity
- ;; DOT MUTEXLOCK -> INIT_RUNREMOTE [label="no remote?"];
- ;; DOT INIT_RUNREMOTE -> MUTEXLOCK;
- ;; ensure we have a record for our connection for given area
- (if (not runremote) ;; can remove this one. should never get here.
- (begin
- (set! *runremote* (make-remote))
- (set! runremote *runremote*))) ;; new runremote will come from this on next iteration
-
- ;; DOT SET_HOMEHOST; // leaving off - doesn't really add to the clarity
- ;; DOT MUTEXLOCK -> SET_HOMEHOST [label="no homehost?"];
- ;; DOT SET_HOMEHOST -> MUTEXLOCK;
- ;; ensure we have a homehost record
- (if (not (pair? (remote-hh-dat runremote))) ;; not on homehost
- (thread-sleep! 0.1) ;; since we shouldn't get here, delay a little
- (remote-hh-dat-set! runremote (common:get-homehost)))
-
- ;;(print "BB> readonly-mode is "readonly-mode" dbfile is "dbfile)
- (cond
- ;;DOT EXIT;
- ;;DOT MUTEXLOCK -> EXIT [label="> 15 attempts"]; {rank=same "case 1" "EXIT" }
- ;; give up if more than 15 attempts
- ((> attemptnum 15)
- (debug:print 0 *default-log-port* "ERROR: 15 tries to start/connect to server. Giving up.")
- (exit 1))
-
- ;;DOT CASE2 [label="local\nreadonly\nquery"];
- ;;DOT MUTEXLOCK -> CASE2; {rank=same "case 2" CASE2}
- ;;DOT CASE2 -> "rmt:open-qry-close-locally";
- ;; readonly mode, read request- handle it - case 2
- ((and readonly-mode
- (member cmd api:read-only-queries))
- (mutex-unlock! *rmt-mutex*)
- (debug:print-info 12 *default-log-port* "rmt:send-receive, case 2")
- (rmt:open-qry-close-locally cmd 0 params)
- )
-
- ;;DOT CASE3 [label="write in\nread-only mode"];
- ;;DOT MUTEXLOCK -> CASE3 [label="readonly\nmode?"]; {rank=same "case 3" CASE3}
- ;;DOT CASE3 -> "#f";
- ;; readonly mode, write request. Do nothing, return #f
- (readonly-mode
- (mutex-unlock! *rmt-mutex*)
- (debug:print-info 12 *default-log-port* "rmt:send-receive, case 3")
- (debug:print 0 *default-log-port* "WARNING: write transaction requested on a readonly area. cmd="cmd" params="params)
- #f)
-
- ;; This block was for pre-emptively resetting the connection if there had been no communication for some time.
- ;; I don't think it adds any value. If the server is not there, just fail and start a new connection.
- ;; also, the expire-time calculation might not be correct. We want, time-since-last-server-access > (server:get-timeout)
- ;;
- ;;DOT CASE4 [label="reset\nconnection"];
- ;;DOT MUTEXLOCK -> CASE4 [label="have connection,\nlast_access > expire_time"]; {rank=same "case 4" CASE4}
- ;;DOT CASE4 -> "rmt:send-receive";
- ;; reset the connection if it has been unused too long
- ((and runremote
- (remote-conndat runremote)
- (> (current-seconds) ;; if it has been more than server-timeout seconds since last contact, close this connection and start a new on
- (+ (http-transport:server-dat-get-last-access (remote-conndat runremote))
- (remote-server-timeout runremote))))
- (debug:print-info 0 *default-log-port* "Connection to " (remote-server-url runremote) " expired due to no accesses, forcing new connection.")
- (http-transport:close-connections area-dat: runremote)
- (remote-conndat-set! runremote #f) ;; invalidate the connection, thus forcing a new connection.
- (mutex-unlock! *rmt-mutex*)
- (rmt:send-receive cmd rid params attemptnum: attemptnum))
-
- ;;DOT CASE5 [label="local\nread"];
- ;;DOT MUTEXLOCK -> CASE5 [label="server not required,\non homehost,\nread-only query"]; {rank=same "case 5" CASE5};
- ;;DOT CASE5 -> "rmt:open-qry-close-locally";
-
- ;; on homehost and this is a read
- ((and (not (remote-force-server runremote)) ;; honor forced use of server, i.e. server NOT required
- (cdr (remote-hh-dat runremote)) ;; on homehost
- (member cmd api:read-only-queries)) ;; this is a read
- (mutex-unlock! *rmt-mutex*)
- (debug:print-info 12 *default-log-port* "rmt:send-receive, case 5")
- (rmt:open-qry-close-locally cmd 0 params))
-
- ;;DOT CASE6 [label="init\nremote"];
- ;;DOT MUTEXLOCK -> CASE6 [label="on homehost,\nwrite query,\nhave server,\ncan't reach it"]; {rank=same "case 6" CASE6};
- ;;DOT CASE6 -> "rmt:send-receive";
- ;; on homehost and this is a write, we already have a server, but server has died
- ((and (cdr (remote-hh-dat runremote)) ;; on homehost
- (not (member cmd api:read-only-queries)) ;; this is a write
- (remote-server-url runremote) ;; have a server
- (not (server:ping (remote-server-url runremote)))) ;; server has died. NOTE: this is not a cheap call! Need better approach.
- (set! *runremote* (make-remote))
- (remote-force-server-set! runremote (common:force-server?))
- (mutex-unlock! *rmt-mutex*)
- (debug:print-info 12 *default-log-port* "rmt:send-receive, case 6")
- (rmt:send-receive cmd rid params attemptnum: attemptnum))
-
- ;;DOT CASE7 [label="homehost\nwrite"];
- ;;DOT MUTEXLOCK -> CASE7 [label="server not required,\non homehost,\na write,\nhave a server"]; {rank=same "case 7" CASE7};
- ;;DOT CASE7 -> "rmt:open-qry-close-locally";
- ;; on homehost and this is a write, we already have a server
- ((and (not (remote-force-server runremote)) ;; honor forced use of server, i.e. server NOT required
- (cdr (remote-hh-dat runremote)) ;; on homehost
- (not (member cmd api:read-only-queries)) ;; this is a write
- (remote-server-url runremote)) ;; have a server
- (mutex-unlock! *rmt-mutex*)
- (debug:print-info 12 *default-log-port* "rmt:send-receive, case 4.1")
- (rmt:open-qry-close-locally cmd 0 params))
-
- ;;DOT CASE8 [label="force\nserver"];
- ;;DOT MUTEXLOCK -> CASE8 [label="server not required,\nhave homehost info,\nno connection yet,\nnot a read-only query"]; {rank=same "case 8" CASE8};
- ;;DOT CASE8 -> "rmt:open-qry-close-locally";
- ;; on homehost, no server contact made and this is a write, passively start a server
- ((and (not (remote-force-server runremote)) ;; honor forced use of server, i.e. server NOT required
- (cdr (remote-hh-dat runremote)) ;; have homehost
- (not (remote-server-url runremote)) ;; no connection yet
- (not (member cmd api:read-only-queries))) ;; not a read-only query
- (debug:print-info 12 *default-log-port* "rmt:send-receive, case 8")
- (let ((server-url (server:check-if-running *toppath*))) ;; (server:read-dotserver->url *toppath*))) ;; (server:check-if-running *toppath*))) ;; Do NOT want to run server:check-if-running - very expensive to do for every write call
- (if server-url
- (remote-server-url-set! runremote server-url) ;; the string can be consumed by the client setup if needed
- (if (common:force-server?)
- (server:start-and-wait *toppath*)
- (server:kind-run *toppath*))))
- (remote-force-server-set! runremote (common:force-server?))
- (mutex-unlock! *rmt-mutex*)
- (debug:print-info 12 *default-log-port* "rmt:send-receive, case 8.1")
- (rmt:open-qry-close-locally cmd 0 params))
-
- ;;DOT CASE9 [label="force server\nnot on homehost"];
- ;;DOT MUTEXLOCK -> CASE9 [label="no connection\nand either require server\nor not on homehost"]; {rank=same "case 9" CASE9};
- ;;DOT CASE9 -> "start\nserver" -> "rmt:send-receive";
- ((or (and (remote-force-server runremote) ;; we are forcing a server and don't yet have a connection to one
- (not (remote-conndat runremote)))
- (and (not (cdr (remote-hh-dat runremote))) ;; not on a homehost
- (not (remote-conndat runremote)))) ;; and no connection
- (debug:print-info 12 *default-log-port* "rmt:send-receive, case 9, hh-dat: " (remote-hh-dat runremote) " conndat: " (remote-conndat runremote))
- (mutex-unlock! *rmt-mutex*)
- (if (not (server:check-if-running *toppath*)) ;; who knows, maybe one has started up?
- (server:start-and-wait *toppath*))
- (remote-conndat-set! runremote (rmt:get-connection-info *toppath*)) ;; calls client:setup which calls client:setup-http
- (rmt:send-receive cmd rid params attemptnum: attemptnum)) ;; TODO: add back-off timeout as
-
- ;;DOT CASE10 [label="on homehost"];
- ;;DOT MUTEXLOCK -> CASE10 [label="server not required,\non homehost"]; {rank=same "case 10" CASE10};
- ;;DOT CASE10 -> "rmt:open-qry-close-locally";
- ;; all set up if get this far, dispatch the query
- ((and (not (remote-force-server runremote))
- (cdr (remote-hh-dat runremote))) ;; we are on homehost
- (mutex-unlock! *rmt-mutex*)
- (debug:print-info 12 *default-log-port* "rmt:send-receive, case 10")
- (rmt:open-qry-close-locally cmd (if rid rid 0) params))
-
- ;;DOT CASE11 [label="send_receive"];
- ;;DOT MUTEXLOCK -> CASE11 [label="else"]; {rank=same "case 11" CASE11};
- ;;DOT CASE11 -> "rmt:send-receive" [label="call failed"];
- ;;DOT CASE11 -> "RESULT" [label="call succeeded"];
- ;; not on homehost, do server query
- (else
- ;; (mutex-unlock! *rmt-mutex*)
- (debug:print-info 12 *default-log-port* "rmt:send-receive, case 9")
- ;; (mutex-lock! *rmt-mutex*)
- (let* ((conninfo (remote-conndat runremote))
- (dat (case (remote-transport runremote)
- ((http) (condition-case ;; handling here has caused a lot of problems. However it is needed to deal with attemtped communication to servers that have gone away
- (http-transport:client-api-send-receive 0 conninfo cmd params)
- ((commfail)(vector #f "communications fail"))
- ((exn)(vector #f "other fail" (print-call-chain)))))
- (else
- (debug:print 0 *default-log-port* "ERROR: transport " (remote-transport runremote) " not supported")
- (exit))))
- (success (if (vector? dat) (vector-ref dat 0) #f))
- (res (if (vector? dat) (vector-ref dat 1) #f)))
- (if (and (vector? conninfo) (< 5 (vector-length conninfo)))
- (http-transport:server-dat-update-last-access conninfo) ;; refresh access time
- (begin
- (debug:print 0 *default-log-port* "INFO: Should not get here! conninfo=" conninfo)
- (set! conninfo #f)
- (remote-conndat-set! *runremote* #f)
- (http-transport:close-connections area-dat: runremote)))
- ;; (mutex-unlock! *rmt-mutex*)
- (debug:print-info 13 *default-log-port* "rmt:send-receive, case 9. conninfo=" conninfo " dat=" dat " runremote = " runremote)
- (mutex-unlock! *rmt-mutex*)
- (if success ;; success only tells us that the transport was successful, have to examine the data to see if there was a detected issue at the other end
- (if (and (vector? res)
- (eq? (vector-length res) 2)
- (eq? (vector-ref res 1) 'overloaded)) ;; since we are looking at the data to carry the error we'll use a fairly obtuse combo to minimise the chances of some sort of collision.
- ;; this is the case where the returned data is bad or the server is overloaded and we want
- ;; to ease off the queries
- (let ((wait-delay (+ attemptnum (* attemptnum 10))))
- (debug:print 0 *default-log-port* "WARNING: server is overloaded. Delaying " wait-delay " seconds and trying call again.")
- (mutex-lock! *rmt-mutex*)
- (http-transport:close-connections area-dat: runremote)
- (set! *runremote* #f) ;; force starting over
- (mutex-unlock! *rmt-mutex*)
- (thread-sleep! wait-delay)
- (rmt:send-receive cmd rid params attemptnum: (+ attemptnum 1)))
- res) ;; All good, return res
- (begin
- (debug:print 0 *default-log-port* "WARNING: communication failed. Trying again, try num: " attemptnum)
- (mutex-lock! *rmt-mutex*)
- (remote-conndat-set! runremote #f)
- (http-transport:close-connections area-dat: runremote)
- (remote-server-url-set! runremote #f)
- (mutex-unlock! *rmt-mutex*)
- (debug:print-info 12 *default-log-port* "rmt:send-receive, case 9.1")
- ;; (if (not (server:check-if-running *toppath*))
- ;; (server:start-and-wait *toppath*))
- (rmt:send-receive cmd rid params attemptnum: (+ attemptnum 1)))))))))
-
- ;;DOT }
-
-;; (define (rmt:update-db-stats run-id rawcmd params duration)
-;; (mutex-lock! *db-stats-mutex*)
-;; (handle-exceptions
-;; exn
-;; (begin
-;; (debug:print 0 *default-log-port* "WARNING: stats collection failed in update-db-stats")
-;; (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
-;; (print "exn=" (condition->list exn))
-;; #f) ;; if this fails we don't care, it is just stats
-;; (let* ((cmd (conc "run-id=" run-id " " (if (eq? rawcmd 'general-call) (car params) rawcmd)))
-;; (stat-vec (hash-table-ref/default *db-stats* cmd #f)))
-;; (if (not (vector? stat-vec))
-;; (let ((newvec (vector 0 0)))
-;; (hash-table-set! *db-stats* cmd newvec)
-;; (set! stat-vec newvec)))
-;; (vector-set! stat-vec 0 (+ (vector-ref stat-vec 0) 1))
-;; (vector-set! stat-vec 1 (+ (vector-ref stat-vec 1) duration))))
-;; (mutex-unlock! *db-stats-mutex*))
-
-(define (rmt:print-db-stats)
- (let ((fmtstr "~40a~7-d~9-d~20,2-f")) ;; "~20,2-f"
- (debug:print 18 *default-log-port* "DB Stats\n========")
- (debug:print 18 *default-log-port* (format #f "~40a~8a~10a~10a" "Cmd" "Count" "TotTime" "Avg"))
- (for-each (lambda (cmd)
- (let ((cmd-dat (hash-table-ref *db-stats* cmd)))
- (debug:print 18 *default-log-port* (format #f fmtstr cmd (vector-ref cmd-dat 0) (vector-ref cmd-dat 1) (/ (vector-ref cmd-dat 1)(vector-ref cmd-dat 0))))))
- (sort (hash-table-keys *db-stats*)
- (lambda (a b)
- (> (vector-ref (hash-table-ref *db-stats* a) 0)
- (vector-ref (hash-table-ref *db-stats* b) 0)))))))
-
-(define (rmt:get-max-query-average run-id)
- (mutex-lock! *db-stats-mutex*)
- (let* ((runkey (conc "run-id=" run-id " "))
- (cmds (filter (lambda (x)
- (substring-index runkey x))
- (hash-table-keys *db-stats*)))
- (res (if (null? cmds)
- (cons 'none 0)
- (let loop ((cmd (car cmds))
- (tal (cdr cmds))
- (max-cmd (car cmds))
- (res 0))
- (let* ((cmd-dat (hash-table-ref *db-stats* cmd))
- (tot (vector-ref cmd-dat 0))
- (curravg (/ (vector-ref cmd-dat 1) (vector-ref cmd-dat 0))) ;; count is never zero by construction
- (currmax (max res curravg))
- (newmax-cmd (if (> curravg res) cmd max-cmd)))
- (if (null? tal)
- (if (> tot 10)
- (cons newmax-cmd currmax)
- (cons 'none 0))
- (loop (car tal)(cdr tal) newmax-cmd currmax)))))))
- (mutex-unlock! *db-stats-mutex*)
- res))
-
-(define (rmt:open-qry-close-locally cmd run-id params #!key (remretries 5))
- (let* ((qry-is-write (not (member cmd api:read-only-queries)))
- (db-file-path (db:dbfile-path)) ;; 0))
- (dbstruct-local (db:setup #t)) ;; make-dbr:dbstruct path: dbdir local: #t)))
- (read-only (not (file-write-access? db-file-path)))
- (start (current-milliseconds))
- (resdat (if (not (and read-only qry-is-write))
- (let ((v (api:execute-requests dbstruct-local (vector (symbol->string cmd) params))))
- (handle-exceptions ;; there has been a long history of receiving strange errors from values returned by the client when things go wrong..
- exn ;; This is an attempt to detect that situation and recover gracefully
- (begin
- (debug:print0 *default-log-port* "ERROR: bad data from server " v " message: " ((condition-property-accessor 'exn 'message) exn))
- (vector #t '())) ;; should always get a vector but if something goes wrong return a dummy
- (if (and (vector? v)
- (> (vector-length v) 1))
- (let ((newvec (vector (vector-ref v 0)(vector-ref v 1))))
- newvec) ;; by copying the vector while inside the error handler we should force the detection of a corrupted record
- (vector #t '())))) ;; we could also check that the returned types are valid
- (vector #t '())))
- (success (vector-ref resdat 0))
- (res (vector-ref resdat 1))
- (duration (- (current-milliseconds) start)))
- (if (and read-only qry-is-write)
- (debug:print 0 *default-log-port* "ERROR: attempt to write to read-only database ignored. cmd=" cmd))
- (if (not success)
- (if (> remretries 0)
- (begin
- (debug:print-error 0 *default-log-port* "local query failed. Trying again.")
- (thread-sleep! (/ (random 5000) 1000)) ;; some random delay
- (rmt:open-qry-close-locally cmd run-id params remretries: (- remretries 1)))
- (begin
- (debug:print-error 0 *default-log-port* "too many retries in rmt:open-qry-close-locally, giving up")
- #f))
- (begin
- ;; (rmt:update-db-stats run-id cmd params duration)
- ;; mark this run as dirty if this was a write, the watchdog is responsible for syncing it
- (if qry-is-write
- (let ((start-time (current-seconds)))
- (mutex-lock! *db-multi-sync-mutex*)
-/ (set! *db-last-access* start-time) ;; THIS IS PROBABLY USELESS? (we are on a client)
- (mutex-unlock! *db-multi-sync-mutex*)))))
- res))
-
-(define (rmt:send-receive-no-auto-client-setup connection-info cmd run-id params)
- (let* ((run-id (if run-id run-id 0))
- (res (handle-exceptions
- exn
- #f
- (http-transport:client-api-send-receive run-id connection-info cmd params))))
- (if (and res (vector-ref res 0))
- (vector-ref res 1) ;;; YES!! THIS IS CORRECT!! CHANGE IT HERE, THEN CHANGE rmt:send-receive ALSO!!!
- #f)))
-
-;; ;; Wrap json library for strings (why the ports crap in the first place?)
-;; (define (rmt:dat->json-str dat)
-;; (with-output-to-string
-;; (lambda ()
-;; (json-write dat))))
-;;
-;; (define (rmt:json-str->dat json-str)
-;; (with-input-from-string json-str
-;; (lambda ()
-;; (json-read))))
-
-;;======================================================================
-;;
-;; A C T U A L A P I C A L L S
-;;
-;;======================================================================
-
-;;======================================================================
-;; S E R V E R
-;;======================================================================
-
-(define (rmt:kill-server run-id)
- (rmt:send-receive 'kill-server run-id (list run-id)))
-
-(define (rmt:start-server run-id)
- (rmt:send-receive 'start-server 0 (list run-id)))
-
-;;======================================================================
-;; M I S C
-;;======================================================================
-
-(define (rmt:login run-id)
- (rmt:send-receive 'login run-id (list *toppath* megatest-version *my-client-signature*)))
-
-;; This login does no retries under the hood - it acts a bit like a ping.
-;; Deprecated for nmsg-transport.
-;;
-(define (rmt:login-no-auto-client-setup connection-info)
- (case *transport-type* ;; run-id of 0 is just a placeholder
- ((http)(rmt:send-receive-no-auto-client-setup connection-info 'login 0 (list *toppath* megatest-version *my-client-signature*)))
- ;;((nmsg)(nmsg-transport:client-api-send-receive run-id connection-info 'login (list *toppath* megatest-version run-id *my-client-signature*)))
- ))
-
-;; hand off a call to one of the db:queries statements
-;; added run-id to make looking up the correct db possible
-;;
-(define (rmt:general-call stmtname run-id . params)
- (rmt:send-receive 'general-call run-id (append (list stmtname run-id) params)))
-
-
-;; given a hostname, return a pair of cpu load and update time representing latest intelligence from tests running on that host
-(define (rmt:get-latest-host-load hostname)
- (rmt:send-receive 'get-latest-host-load 0 (list hostname)))
-
-;; (define (rmt:sync-inmem->db run-id)
-;; (rmt:send-receive 'sync-inmem->db run-id '()))
-
-(define (rmt:sdb-qry qry val run-id)
- ;; add caching if qry is 'getid or 'getstr
- (rmt:send-receive 'sdb-qry run-id (list qry val)))
-
-;; NOT COMPLETED
-(define (rmt:runtests user run-id testpatt params)
- (rmt:send-receive 'runtests run-id testpatt))
-
-(define (rmt:get-run-record-ids target run keynames test-patt)
- (rmt:send-receive 'get-run-record-ids #f (list target run keynames test-patt)))
-
-(define (rmt:get-changed-record-ids since-time)
- (rmt:send-receive 'get-changed-record-ids #f (list since-time)) )
-
-;;======================================================================
-;; T E S T M E T A
-;;======================================================================
-
-(define (rmt:get-tests-tags)
- (rmt:send-receive 'get-tests-tags #f '()))
-
-;;======================================================================
-;; K E Y S
-;;======================================================================
-
-;; These require run-id because the values come from the run!
-;;
-(define (rmt:get-key-val-pairs run-id)
- (rmt:send-receive 'get-key-val-pairs run-id (list run-id)))
-
-(define (rmt:get-keys)
- (if *db-keys* *db-keys*
- (let ((res (rmt:send-receive 'get-keys #f '())))
- (set! *db-keys* res)
- res)))
-
-(define (rmt:get-keys-write) ;; dummy query to force server start
- (let ((res (rmt:send-receive 'get-keys-write #f '())))
- (set! *db-keys* res)
- res))
-
-;; we don't reuse run-id's (except possibly *after* a db cleanup) so it is safe
-;; to cache the resuls in a hash
-;;
-(define (rmt:get-key-vals run-id)
- (or (hash-table-ref/default *keyvals* run-id #f)
- (let ((res (rmt:send-receive 'get-key-vals #f (list run-id))))
- (hash-table-set! *keyvals* run-id res)
- res)))
-
-(define (rmt:get-targets)
- (rmt:send-receive 'get-targets #f '()))
-
-(define (rmt:get-target run-id)
- (rmt:send-receive 'get-target run-id (list run-id)))
-
-(define (rmt:get-run-times runpatt targetpatt)
- (rmt:send-receive 'get-run-times #f (list runpatt targetpatt )))
-
-
-;;======================================================================
-;; T E S T S
-;;======================================================================
-
-;; Just some syntatic sugar
-(define (rmt:register-test run-id test-name item-path)
- (rmt:general-call 'register-test run-id run-id test-name item-path))
-
-(define (rmt:get-test-id run-id testname item-path)
- (rmt:send-receive 'get-test-id run-id (list run-id testname item-path)))
-
-;; run-id is NOT used
-;;
-(define (rmt:get-test-info-by-id run-id test-id)
- (if (number? test-id)
- (rmt:send-receive 'get-test-info-by-id run-id (list run-id test-id))
- (begin
- (debug:print 0 *default-log-port* "WARNING: Bad data handed to rmt:get-test-info-by-id run-id=" run-id ", test-id=" test-id)
- (print-call-chain (current-error-port))
- #f)))
-
-(define (rmt:test-get-rundir-from-test-id run-id test-id)
- (rmt:send-receive 'test-get-rundir-from-test-id run-id (list run-id test-id)))
-
-(define (rmt:open-test-db-by-test-id run-id test-id #!key (work-area #f))
- (let* ((test-path (if (string? work-area)
- work-area
- (rmt:test-get-rundir-from-test-id run-id test-id))))
- (debug:print 3 *default-log-port* "TEST PATH: " test-path)
- (open-test-db test-path)))
-
-;; WARNING: This currently bypasses the transaction wrapped writes system
-(define (rmt:test-set-state-status-by-id run-id test-id newstate newstatus newcomment)
- (rmt:send-receive 'test-set-state-status-by-id run-id (list run-id test-id newstate newstatus newcomment)))
-
-(define (rmt:set-tests-state-status run-id testnames currstate currstatus newstate newstatus)
- (rmt:send-receive 'set-tests-state-status run-id (list run-id testnames currstate currstatus newstate newstatus)))
-
-(define (rmt:get-tests-for-run run-id testpatt states statuses offset limit not-in sort-by sort-order qryvals last-update mode)
- ;; (if (number? run-id)
- (rmt:send-receive 'get-tests-for-run run-id (list run-id testpatt states statuses offset limit not-in sort-by sort-order qryvals last-update mode)))
- ;; (begin
- ;; (debug:print-error 0 *default-log-port* "rmt:get-tests-for-run called with bad run-id=" run-id)
- ;; (print-call-chain (current-error-port))
- ;; '())))
-
-;; get stuff via synchash
-(define (rmt:synchash-get run-id proc synckey keynum params)
- (rmt:send-receive 'synchash-get run-id (list run-id proc synckey keynum params)))
-
-(define (rmt:get-tests-for-run-mindata run-id testpatt states status not-in)
- (rmt:send-receive 'get-tests-for-run-mindata run-id (list run-id testpatt states status not-in)))
-
-;; IDEA: Threadify these - they spend a lot of time waiting ...
-;;
-(define (rmt:get-tests-for-runs-mindata run-ids testpatt states status not-in)
- (let ((multi-run-mutex (make-mutex))
- (run-id-list (if run-ids
- run-ids
- (rmt:get-all-run-ids)))
- (result '()))
- (if (null? run-id-list)
- '()
- (let loop ((hed (car run-id-list))
- (tal (cdr run-id-list))
- (threads '()))
- (if (> (length threads) 5)
- (loop hed tal (filter (lambda (th)(not (member (thread-state th) '(terminated dead)))) threads))
- (let* ((newthread (make-thread
- (lambda ()
- (let ((res (rmt:send-receive 'get-tests-for-run-mindata hed (list hed testpatt states status not-in))))
- (if (list? res)
- (begin
- (mutex-lock! multi-run-mutex)
- (set! result (append result res))
- (mutex-unlock! multi-run-mutex))
- (debug:print-error 0 *default-log-port* "get-tests-for-run-mindata failed for run-id " hed ", testpatt " testpatt ", states " states ", status " status ", not-in " not-in))))
- (conc "multi-run-thread for run-id " hed)))
- (newthreads (cons newthread threads)))
- (thread-start! newthread)
- (thread-sleep! 0.05) ;; give that thread some time to start
- (if (null? tal)
- newthreads
- (loop (car tal)(cdr tal) newthreads))))))
- result))
-
-;; ;; IDEA: Threadify these - they spend a lot of time waiting ...
-;; ;;
-;; (define (rmt:get-tests-for-runs-mindata run-ids testpatt states status not-in)
-;; (let ((run-id-list (if run-ids
-;; run-ids
-;; (rmt:get-all-run-ids))))
-;; (apply append (map (lambda (run-id)
-;; (rmt:send-receive 'get-tests-for-run-mindata run-id (list run-ids testpatt states status not-in)))
-;; run-id-list))))
-
-(define (rmt:delete-test-records run-id test-id)
- (rmt:send-receive 'delete-test-records run-id (list run-id test-id)))
-
-;; This is not needed as test steps are deleted on test delete call
-;;
-;; (define (rmt:delete-test-step-records run-id test-id)
-;; (rmt:send-receive 'delete-test-step-records run-id (list run-id test-id)))
-
-(define (rmt:test-set-state-status run-id test-id state status msg)
- (rmt:send-receive 'test-set-state-status run-id (list run-id test-id state status msg)))
-
-(define (rmt:test-toplevel-num-items run-id test-name)
- (rmt:send-receive 'test-toplevel-num-items run-id (list run-id test-name)))
-
-;; (define (rmt:get-previous-test-run-record run-id test-name item-path)
-;; (rmt:send-receive 'get-previous-test-run-record run-id (list run-id test-name item-path)))
-
-(define (rmt:get-matching-previous-test-run-records run-id test-name item-path)
- (rmt:send-receive 'get-matching-previous-test-run-records run-id (list run-id test-name item-path)))
-
-(define (rmt:test-get-logfile-info run-id test-name)
- (rmt:send-receive 'test-get-logfile-info run-id (list run-id test-name)))
-
-(define (rmt:test-get-records-for-index-file run-id test-name)
- (rmt:send-receive 'test-get-records-for-index-file run-id (list run-id test-name)))
-
-(define (rmt:get-testinfo-state-status run-id test-id)
- (rmt:send-receive 'get-testinfo-state-status run-id (list run-id test-id)))
-
-(define (rmt:test-set-log! run-id test-id logf)
- (if (string? logf)(rmt:general-call 'test-set-log run-id logf test-id)))
-
-(define (rmt:test-set-top-process-pid run-id test-id pid)
- (rmt:send-receive 'test-set-top-process-pid run-id (list run-id test-id pid)))
-
-(define (rmt:test-get-top-process-pid run-id test-id)
- (rmt:send-receive 'test-get-top-process-pid run-id (list run-id test-id)))
-
-(define (rmt:get-run-ids-matching-target keynames target res runname testpatt statepatt statuspatt)
- (rmt:send-receive 'get-run-ids-matching-target #f (list keynames target res runname testpatt statepatt statuspatt)))
-
-;; NOTE: This will open and access ALL run databases.
-;;
-(define (rmt:test-get-paths-matching-keynames-target-new keynames target res testpatt statepatt statuspatt runname)
- (let ((run-ids (rmt:get-run-ids-matching-target keynames target res runname testpatt statepatt statuspatt)))
- (apply append
- (map (lambda (run-id)
- (rmt:send-receive 'test-get-paths-matching-keynames-target-new run-id (list run-id keynames target res testpatt statepatt statuspatt runname)))
- run-ids))))
-
-;; (define (rmt:get-run-ids-matching keynames target res)
-;; (rmt:send-receive #f 'get-run-ids-matching (list keynames target res)))
-
-(define (rmt:get-prereqs-not-met run-id waitons ref-test-name ref-item-path #!key (mode '(normal))(itemmaps #f))
- (rmt:send-receive 'get-prereqs-not-met run-id (list run-id waitons ref-test-name ref-item-path mode itemmaps)))
-
-(define (rmt:get-count-tests-running-for-run-id run-id)
- (rmt:send-receive 'get-count-tests-running-for-run-id run-id (list run-id)))
-
-;; Statistical queries
-
-(define (rmt:get-count-tests-running run-id)
- (rmt:send-receive 'get-count-tests-running run-id (list run-id)))
-
-(define (rmt:get-count-tests-running-for-testname run-id testname)
- (rmt:send-receive 'get-count-tests-running-for-testname run-id (list run-id testname)))
-
-(define (rmt:get-count-tests-running-in-jobgroup run-id jobgroup)
- (rmt:send-receive 'get-count-tests-running-in-jobgroup run-id (list run-id jobgroup)))
-
-;; state and status are extra hints not usually used in the calculation
-;;
-(define (rmt:set-state-status-and-roll-up-items run-id test-name item-path state status comment)
- (rmt:send-receive 'set-state-status-and-roll-up-items run-id (list run-id test-name item-path state status comment)))
-
-(define (rmt:update-pass-fail-counts run-id test-name)
- (rmt:general-call 'update-pass-fail-counts run-id test-name test-name test-name))
-
-(define (rmt:top-test-set-per-pf-counts run-id test-name)
- (rmt:send-receive 'top-test-set-per-pf-counts run-id (list run-id test-name)))
-
-(define (rmt:get-raw-run-stats run-id)
- (rmt:send-receive 'get-raw-run-stats run-id (list run-id)))
-
-(define (rmt:get-test-times runname target)
- (rmt:send-receive 'get-test-times #f (list runname target )))
-
-;;======================================================================
-;; R U N S
-;;======================================================================
-
-(define (rmt:get-run-info run-id)
- (rmt:send-receive 'get-run-info run-id (list run-id)))
-
-(define (rmt:get-num-runs runpatt)
- (rmt:send-receive 'get-num-runs #f (list runpatt)))
-
-(define (rmt:get-runs-cnt-by-patt runpatt targetpatt keys)
- (rmt:send-receive 'get-runs-cnt-by-patt #f (list runpatt targetpatt keys)))
-
-;; Use the special run-id == #f scenario here since there is no run yet
-(define (rmt:register-run keyvals runname state status user contour)
- (rmt:send-receive 'register-run #f (list keyvals runname state status user contour)))
-
-(define (rmt:get-run-name-from-id run-id)
- (rmt:send-receive 'get-run-name-from-id run-id (list run-id)))
-
-(define (rmt:delete-run run-id)
- (rmt:send-receive 'delete-run run-id (list run-id)))
-
-(define (rmt:update-run-stats run-id stats)
- (rmt:send-receive 'update-run-stats #f (list run-id stats)))
-
-(define (rmt:delete-old-deleted-test-records)
- (rmt:send-receive 'delete-old-deleted-test-records #f '()))
-
-(define (rmt:get-runs runpatt count offset keypatts)
- (rmt:send-receive 'get-runs #f (list runpatt count offset keypatts)))
-
-(define (rmt:simple-get-runs runpatt count offset target)
- (rmt:send-receive 'simple-get-runs #f (list runpatt count offset target)))
-
-(define (rmt:get-all-run-ids)
- (rmt:send-receive 'get-all-run-ids #f '()))
-
-(define (rmt:get-prev-run-ids run-id)
- (rmt:send-receive 'get-prev-run-ids #f (list run-id)))
-
-(define (rmt:lock/unlock-run run-id lock unlock user)
- (rmt:send-receive 'lock/unlock-run #f (list run-id lock unlock user)))
-
-;; set/get status
-(define (rmt:get-run-status run-id)
- (rmt:send-receive 'get-run-status #f (list run-id)))
-
-(define (rmt:set-run-status run-id run-status #!key (msg #f))
- (rmt:send-receive 'set-run-status #f (list run-id run-status msg)))
-
-(define (rmt:update-run-event_time run-id)
- (rmt:send-receive 'update-run-event_time #f (list run-id)))
-
-(define (rmt:get-runs-by-patt keys runnamepatt targpatt offset limit fields last-runs-update #!key (sort-order "asc")) ;; fields of #f uses default
- (rmt:send-receive 'get-runs-by-patt #f (list keys runnamepatt targpatt offset limit fields last-runs-update sort-order)))
-
-(define (rmt:find-and-mark-incomplete run-id ovr-deadtime)
- ;; (if (rmt:send-receive 'have-incompletes? run-id (list run-id ovr-deadtime))
- (rmt:send-receive 'mark-incomplete run-id (list run-id ovr-deadtime))) ;; )
-
-(define (rmt:get-main-run-stats run-id)
- (rmt:send-receive 'get-main-run-stats #f (list run-id)))
-
-(define (rmt:get-var varname)
- (rmt:send-receive 'get-var #f (list varname)))
-
-(define (rmt:del-var varname)
- (rmt:send-receive 'del-var #f (list varname)))
-
-(define (rmt:set-var varname value)
- (rmt:send-receive 'set-var #f (list varname value)))
-
-;;======================================================================
-;; M U L T I R U N Q U E R I E S
-;;======================================================================
-
-;; Need to move this to multi-run section and make associated changes
-(define (rmt:find-and-mark-incomplete-all-runs #!key (ovr-deadtime #f))
- (let ((run-ids (rmt:get-all-run-ids)))
- (for-each (lambda (run-id)
- (rmt:find-and-mark-incomplete run-id ovr-deadtime))
- run-ids)))
-
-;; get the previous record for when this test was run where all keys match but runname
-;; returns #f if no such test found, returns a single test record if found
-;;
-;; Run this at the client end since we have to connect to multiple run-id dbs
-;;
-(define (rmt:get-previous-test-run-record run-id test-name item-path)
- (let* ((keyvals (rmt:get-key-val-pairs run-id))
- (keys (rmt:get-keys))
- (selstr (string-intersperse keys ","))
- (qrystr (string-intersperse (map (lambda (x)(conc x "=?")) keys) " AND ")))
- (if (not keyvals)
- #f
- (let ((prev-run-ids (rmt:get-prev-run-ids run-id)))
- ;; for each run starting with the most recent look to see if there is a matching test
- ;; if found then return that matching test record
- (debug:print 4 *default-log-port* "selstr: " selstr ", qrystr: " qrystr ", keyvals: " keyvals ", previous run ids found: " prev-run-ids)
- (if (null? prev-run-ids) #f
- (let loop ((hed (car prev-run-ids))
- (tal (cdr prev-run-ids)))
- (let ((results (rmt:get-tests-for-run hed (conc test-name "/" item-path) '() '() ;; run-id testpatt states statuses
- #f #f #f ;; offset limit not-in hide/not-hide
- #f #f #f #f 'normal))) ;; sort-by sort-order qryvals last-update mode
- (debug:print 4 *default-log-port* "Got tests for run-id " run-id ", test-name " test-name ", item-path " item-path ": " results)
- (if (and (null? results)
- (not (null? tal)))
- (loop (car tal)(cdr tal))
- (if (null? results) #f
- (car results))))))))))
-
-(define (rmt:get-run-stats)
- (rmt:send-receive 'get-run-stats #f '()))
-
-;;======================================================================
-;; S T E P S
-;;======================================================================
-
-;; Getting steps is more complicated.
-;;
-;; If given work area
-;; 1. Find the testdat.db file
-;; 2. Open the testdat.db file and do the query
-;; If not given the work area
-;; 1. Do a remote call to get the test path
-;; 2. Continue as above
-;;
-;;(define (rmt:get-steps-for-test run-id test-id)
-;; (rmt:send-receive 'get-steps-data run-id (list test-id)))
-
-(define (rmt:teststep-set-status! run-id test-id teststep-name state-in status-in comment logfile)
- (let* ((state (items:check-valid-items "state" state-in))
- (status (items:check-valid-items "status" status-in)))
- (if (or (not state)(not status))
- (debug:print 3 *default-log-port* "WARNING: Invalid " (if status "status" "state")
- " value \"" (if status state-in status-in) "\", update your validvalues section in megatest.config"))
- (rmt:send-receive 'teststep-set-status! run-id (list run-id test-id teststep-name state-in status-in comment logfile))))
-
-(define (rmt:get-steps-for-test run-id test-id)
- (rmt:send-receive 'get-steps-for-test run-id (list run-id test-id)))
-
-(define (rmt:get-steps-info-by-id test-step-id)
- (rmt:send-receive 'get-steps-info-by-id #f (list test-step-id)))
-
-;;======================================================================
-;; T E S T D A T A
-;;======================================================================
-
-(define (rmt:read-test-data run-id test-id categorypatt #!key (work-area #f))
- (rmt:send-receive 'read-test-data run-id (list run-id test-id categorypatt)))
-(define (rmt:read-test-data* run-id test-id categorypatt varpatt #!key (work-area #f))
- (rmt:send-receive 'read-test-data* run-id (list run-id test-id categorypatt varpatt)))
-
-(define (rmt:get-data-info-by-id test-data-id)
- (rmt:send-receive 'get-data-info-by-id #f (list test-data-id)))
-
-(define (rmt:testmeta-add-record testname)
- (rmt:send-receive 'testmeta-add-record #f (list testname)))
-
-(define (rmt:testmeta-get-record testname)
- (rmt:send-receive 'testmeta-get-record #f (list testname)))
-
-(define (rmt:testmeta-update-field test-name fld val)
- (rmt:send-receive 'testmeta-update-field #f (list test-name fld val)))
-
-(define (rmt:test-data-rollup run-id test-id status)
- (rmt:send-receive 'test-data-rollup run-id (list run-id test-id status)))
-
-(define (rmt:csv->test-data run-id test-id csvdata)
- (rmt:send-receive 'csv->test-data run-id (list run-id test-id csvdata)))
-
-;;======================================================================
-;; T A S K S
-;;======================================================================
-
-(define (rmt:tasks-find-task-queue-records target run-name test-patt state-patt action-patt)
- (rmt:send-receive 'find-task-queue-records #f (list target run-name test-patt state-patt action-patt)))
-
-(define (rmt:tasks-add action owner target runname testpatt params)
- (rmt:send-receive 'tasks-add #f (list action owner target runname testpatt params)))
-
-(define (rmt:tasks-set-state-given-param-key param-key new-state)
- (rmt:send-receive 'tasks-set-state-given-param-key #f (list param-key new-state)))
-
-(define (rmt:tasks-get-last target runname)
- (rmt:send-receive 'tasks-get-last #f (list target runname)))
-
-;;======================================================================
-;; N O S Y N C D B
-;;======================================================================
-
-(define (rmt:no-sync-set var val)
- (rmt:send-receive 'no-sync-set #f `(,var ,val)))
-
-(define (rmt:no-sync-get/default var default)
- (rmt:send-receive 'no-sync-get/default #f `(,var ,default)))
-
-(define (rmt:no-sync-del! var)
- (rmt:send-receive 'no-sync-del! #f `(,var)))
-
-(define (rmt:no-sync-get-lock keyname)
- (rmt:send-receive 'no-sync-get-lock #f `(,keyname)))
-
-;;======================================================================
-;; A R C H I V E S
-;;======================================================================
-
-(define (rmt:archive-get-allocations testname itempath dneeded)
- (rmt:send-receive 'archive-get-allocations #f (list testname itempath dneeded)))
-
-(define (rmt:archive-register-block-name bdisk-id archive-path)
- (rmt:send-receive 'archive-register-block-name #f (list bdisk-id archive-path)))
-
-(define (rmt:archive-allocate-testsuite/area-to-block block-id testsuite-name areakey)
- (rmt:send-receive 'archive-allocate-test-to-block #f (list block-id testsuite-name areakey)))
-
-(define (rmt:archive-register-disk bdisk-name bdisk-path df)
- (rmt:send-receive 'archive-register-disk #f (list bdisk-name bdisk-path df)))
-
-(define (rmt:test-set-archive-block-id run-id test-id archive-block-id)
- (rmt:send-receive 'test-set-archive-block-id run-id (list run-id test-id archive-block-id)))
-
-(define (rmt:test-get-archive-block-info archive-block-id)
- (rmt:send-receive 'test-get-archive-block-info #f (list archive-block-id)))
+
+(defstruct cmdrec
+ cmd
+ (host #f)
+ (run-ids #f)
+ params)
+
+;; call cmd on remote host (#f for any host)
+;;
+;; example: (rmt:run 'get-runs target run-name test-patt state status)
+;;
+(define (rmt:run cmd . params)
+ (let ((server (rmt:get-server cmdrec))) ;; look up server
+ #f))
+
+(define (rmt:get-connection-info . args) #t)
+(define (rmt:send-receive . args) #t)
+(define (rmt:print-db-stats . args) #t)
+(define (rmt:get-max-query-average . args) #t)
+(define (rmt:open-qry-close-locally . args) #t)
+(define (rmt:send-receive-no-auto-client-setup . args) #t)
+(define (rmt:kill-server . args) #t)
+(define (rmt:start-server . args) #t)
+(define (rmt:login . args) #t)
+(define (rmt:login-no-auto-client-setup . args) #t)
+(define (rmt:general-call . args) #t)
+(define (rmt:get-latest-host-load . args) #t)
+(define (rmt:sdb-qry . args) #t)
+(define (rmt:runtests . args) #t)
+(define (rmt:get-run-record-ids . args) #t)
+(define (rmt:get-changed-record-ids . args) #t)
+(define (rmt:get-tests-tags . args) #t)
+(define (rmt:get-key-val-pairs . args) #t)
+(define (rmt:get-keys . args) #t)
+(define (rmt:get-keys-write . args) #t)
+(define (rmt:get-key-vals . args) #t)
+(define (rmt:get-targets . args) #t)
+(define (rmt:get-target . args) #t)
+(define (rmt:get-run-times . args) #t)
+(define (rmt:register-test . args) #t)
+(define (rmt:get-test-id . args) #t)
+(define (rmt:get-test-info-by-id . args) #t)
+(define (rmt:test-get-rundir-from-test-id . args) #t)
+(define (rmt:open-test-db-by-test-id . args) #t)
+(define (rmt:test-set-state-status-by-id . args) #t)
+(define (rmt:set-tests-state-status . args) #t)
+(define (rmt:get-tests-for-run . args) #t)
+(define (rmt:synchash-get . args) #t)
+(define (rmt:get-tests-for-run-mindata . args) #t)
+(define (rmt:get-tests-for-runs-mindata . args) #t)
+(define (rmt:delete-test-records . args) #t)
+(define (rmt:test-set-state-status . args) #t)
+(define (rmt:test-toplevel-num-items . args) #t)
+(define (rmt:get-matching-previous-test-run-records . args) #t)
+(define (rmt:test-get-logfile-info . args) #t)
+(define (rmt:test-get-records-for-index-file . args) #t)
+(define (rmt:get-testinfo-state-status . args) #t)
+(define (rmt:test-set-log! . args) #t)
+(define (rmt:test-set-top-process-pid . args) #t)
+(define (rmt:test-get-top-process-pid . args) #t)
+(define (rmt:get-run-ids-matching-target . args) #t)
+(define (rmt:test-get-paths-matching-keynames-target-new . args) #t)
+(define (rmt:get-prereqs-not-met . args) #t)
+(define (rmt:get-count-tests-running-for-run-id . args) #t)
+(define (rmt:get-count-tests-running . args) #t)
+(define (rmt:get-count-tests-running-for-testname . args) #t)
+(define (rmt:get-count-tests-running-in-jobgroup . args) #t)
+(define (rmt:set-state-status-and-roll-up-items . args) #t)
+(define (rmt:update-pass-fail-counts . args) #t)
+(define (rmt:top-test-set-per-pf-counts . args) #t)
+(define (rmt:get-raw-run-stats . args) #t)
+(define (rmt:get-test-times . args) #t)
+(define (rmt:get-run-info . args) #t)
+(define (rmt:get-num-runs . args) #t)
+(define (rmt:get-runs-cnt-by-patt . args) #t)
+(define (rmt:register-run . args) #t)
+(define (rmt:get-run-name-from-id . args) #t)
+(define (rmt:delete-run . args) #t)
+(define (rmt:update-run-stats . args) #t)
+(define (rmt:delete-old-deleted-test-records . args) #t)
+(define (rmt:get-runs . args) #t)
+(define (rmt:simple-get-runs . args) #t)
+(define (rmt:get-all-run-ids . args) #t)
+(define (rmt:get-prev-run-ids . args) #t)
+(define (rmt:lock/unlock-run . args) #t)
+(define (rmt:get-run-status . args) #t)
+(define (rmt:set-run-status . args) #t)
+(define (rmt:update-run-event_time . args) #t)
+(define (rmt:get-runs-by-patt . args) #t)
+(define (rmt:find-and-mark-incomplete . args) #t)
+(define (rmt:get-main-run-stats . args) #t)
+(define (rmt:get-var . args) #t)
+(define (rmt:del-var . args) #t)
+(define (rmt:set-var . args) #t)
+(define (rmt:find-and-mark-incomplete-all-runs . args) #t)
+(define (rmt:get-previous-test-run-record . args) #t)
+(define (rmt:get-run-stats . args) #t)
+(define (rmt:teststep-set-status! . args) #t)
+(define (rmt:get-steps-for-test . args) #t)
+(define (rmt:get-steps-info-by-id . args) #t)
+(define (rmt:read-test-data . args) #t)
+(define (rmt:read-test-data* . args) #t)
+(define (rmt:get-data-info-by-id . args) #t)
+(define (rmt:testmeta-add-record . args) #t)
+(define (rmt:testmeta-get-record . args) #t)
+(define (rmt:testmeta-update-field . args) #t)
+(define (rmt:test-data-rollup . args) #t)
+(define (rmt:csv->test-data . args) #t)
+(define (rmt:tasks-find-task-queue-records . args) #t)
+(define (rmt:tasks-add . args) #t)
+(define (rmt:tasks-set-state-given-param-key . args) #t)
+(define (rmt:tasks-get-last . args) #t)
+(define (rmt:no-sync-set . args) #t)
+(define (rmt:no-sync-get/default . args) #t)
+(define (rmt:no-sync-del! . args) #t)
+(define (rmt:no-sync-get-lock . args) #t)
+(define (rmt:archive-get-allocations . args) #t)
+(define (rmt:archive-register-block-name . args) #t)
+(define (rmt:archive-allocate-testsuite/area-to-block . args) #t)
+(define (rmt:archive-register-disk . args) #t)
+(define (rmt:test-set-archive-block-id . args) #t)
+(define (rmt:test-get-archive-block-info . args) #t)
DELETED rpc-transport.scm
Index: rpc-transport.scm
==================================================================
--- rpc-transport.scm
+++ /dev/null
@@ -1,237 +0,0 @@
-
-;; Copyright 2006-2012, Matthew Welland.
-;;
-;; This file is part of Megatest.
-;;
-;; Megatest is free software: you can redistribute it and/or modify
-;; it under the terms of the GNU General Public License as published by
-;; the Free Software Foundation, either version 3 of the License, or
-;; (at your option) any later version.
-;;
-;; Megatest is distributed in the hope that it will be useful,
-;; but WITHOUT ANY WARRANTY; without even the implied warranty of
-;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-;; GNU General Public License for more details.
-;;
-;; You should have received a copy of the GNU General Public License
-;; along with Megatest. If not, see .
-;;
-
-(require-extension (srfi 18) extras tcp s11n rpc)
-(import (prefix rpc rpc:))
-
-(use sqlite3 srfi-1 posix regex regex-case srfi-69 hostinfo md5 message-digest)
-(import (prefix sqlite3 sqlite3:))
-
-(declare (unit rpc-transport))
-
-(declare (uses common))
-(declare (uses db))
-(declare (uses tests))
-(declare (uses tasks)) ;; tasks are where stuff is maintained about what is running.
-
-(include "common_records.scm")
-(include "db_records.scm")
-
-;; procstr is the name of the procedure to be called as a string
-(define (rpc-transport:autoremote procstr params)
- (handle-exceptions
- exn
- (begin
- (debug:print 1 *default-log-port* "Remote failed for " proc " " params)
- (apply (eval (string->symbol procstr)) params))
- ;; (if *runremote*
- ;; (apply (eval (string->symbol (conc "remote:" procstr))) params)
- (apply (eval (string->symbol procstr)) params)))
-
-;; all routes though here end in exit ...
-;;
-;; start_server?
-;;
-(define (rpc-transport:launch run-id)
- (let* ((tdbdat (tasks:open-db)))
- (BB> "rpc-transport:launch fired for run-id="run-id)
- (set! *run-id* run-id)
- (if (args:get-arg "-daemonize")
- (daemon:ize))
- (if (server:check-if-running run-id)
- (begin
- (debug:print 0 *default-log-port* "INFO: Server for run-id " run-id " already running")
- (exit 0)))
- (let loop ((server-id (tasks:server-lock-slot (db:delay-if-busy tdbdat) run-id))
- (remtries 4))
- (if (not server-id)
- (if (> remtries 0)
- (begin
- (thread-sleep! 2)
- (loop (tasks:server-lock-slot (db:delay-if-busy tdbdat) run-id)
- (- remtries 1)))
- (begin
- ;; since we didn't get the server lock we are going to clean up and bail out
- (debug:print-info 2 *default-log-port* "INFO: server pid=" (current-process-id) ", hostname=" (get-host-name) " not starting due to other candidates ahead in start queue")
- (tasks:server-delete-records-for-this-pid (db:delay-if-busy tdbdat) " rpc-transport:launch")))
- (begin
- (rpc-transport:run (if (args:get-arg "-server")(args:get-arg "-server") "-") run-id server-id)
- (exit))))))
-
-(define (rpc-transport:run hostn run-id server-id)
- (debug:print 2 *default-log-port* "Attempting to start the rpc server ...")
- ;; (trace rpc:publish-procedure!)
-
- (rpc:publish-procedure! 'server:login server:login)
- (rpc:publish-procedure! 'testing (lambda () "Just testing"))
-
- (let* ((db #f)
- (hostname (get-host-name))
- (ipaddrstr (let ((ipstr (if (string=? "-" hostn)
- ;; (string-intersperse (map number->string (u8vector->list (hostname->ip hostname))) ".")
- (server:get-best-guess-address hostname)
- #f)))
- (if ipstr ipstr hostn))) ;; hostname)))
- (start-port (open-run-close tasks:server-get-next-port tasks:open-db))
- (link-tree-path (configf:lookup *configdat* "setup" "linktree"))
- (rpc:listener (rpc-transport:find-free-port-and-open (rpc:default-server-port)))
- (th1 (make-thread
- (lambda ()
- ((rpc:make-server rpc:listener) #t))
- "rpc:server"))
- ;; (cute (rpc:make-server rpc:listener) "rpc:server")
- ;; 'rpc:server))
- (hostname (if (string=? "-" hostn)
- (get-host-name)
- hostn))
- (ipaddrstr (if (string=? "-" hostn)
- (server:get-best-guess-address hostname) ;; (string-intersperse (map number->string (u8vector->list (hostname->ip hostname))) ".")
- #f))
- (portnum (rpc:default-server-port))
- (host:port (conc (if ipaddrstr ipaddrstr hostname) ":" portnum))
- (tdb (tasks:open-db)))
- (thread-start! th1)
- (set! db *dbstruct-db*)
- (open-run-close tasks:server-set-interface-port
- tasks:open-db
- server-id
- ipaddrstr portnum)
- (debug:print 0 *default-log-port* "Server started on " host:port)
-
- ;; (trace rpc:publish-procedure!)
- ;; (rpc:publish-procedure! 'server:login server:login)
- ;; (rpc:publish-procedure! 'testing (lambda () "Just testing"))
-
- ;;======================================================================
- ;; ;; end of publish-procedure section
- ;;======================================================================
- ;;
- (on-exit (lambda ()
- (open-run-close tasks:server-set-state! tasks:open-db server-id "stopped")))
-
- (set! *rpc:listener* rpc:listener)
- (tasks:server-set-state! tdb server-id "running")
- (set! *dbstruct-db* (db:setup run-id))
- ;; if none running or if > 20 seconds since
- ;; server last used then start shutdown
- (let loop ((count 0))
- (thread-sleep! 5) ;; no need to do this very often
- (let ((numrunning -1)) ;; (db:get-count-tests-running db)))
- (if (or (> numrunning 0)
- (> (+ *db-last-access* 60)(current-seconds)))
- (begin
- (debug:print-info 0 *default-log-port* "Server continuing, tests running: " numrunning ", seconds since last db access: " (- (current-seconds) *db-last-access*))
- (loop (+ 1 count)))
- (begin
- (debug:print-info 0 *default-log-port* "Starting to shutdown the server side")
- (open-run-close tasks:server-delete-record tasks:open-db server-id " rpc-transport:try-start-server stop")
- (thread-sleep! 10)
- (debug:print-info 0 *default-log-port* "Max cached queries was " *max-cache-size*)
- (debug:print-info 0 *default-log-port* "Server shutdown complete. Exiting")
- ))))))
-
-(define (rpc-transport:find-free-port-and-open port)
- (handle-exceptions
- exn
- (begin
- (print "Failed to bind to port " (rpc:default-server-port) ", trying next port")
- (rpc-transport:find-free-port-and-open (+ port 1)))
- (rpc:default-server-port port)
- (tcp-read-timeout 240000)
- (tcp-listen (rpc:default-server-port) 10000)))
-
-(define (rpc-transport:ping run-id host port)
- (handle-exceptions
- exn
- (begin
- (print "SERVER_NOT_FOUND")
- (exit 1))
- (let ((login-res ((rpc:procedure 'server:login host port) *toppath*)))
- (if (and (list? login-res)
- (car login-res))
- (begin
- (print "LOGIN_OK")
- (exit 0))
- (begin
- (print "LOGIN_FAILED")
- (exit 1))))))
-
-(define (rpc-transport:client-setup run-id #!key (remtries 10))
- (if *runremote*
- (begin
- (debug:print-error 0 *default-log-port* "Attempt to connect to server but already connected")
- #f)
- (let* ((host-info (hash-table-ref/default *runremote* run-id #f))) ;; (open-run-close db:get-var #f "SERVER"))
- (if host-info
- (let ((iface (car host-info))
- (port (cadr host-info))
- (ping-res ((rpc:procedure 'server:login host port) *toppath*)))
- (if ping-res
- (let ((server-dat (list iface port #f #f #f)))
- (hash-table-set! *runremote* run-id server-dat)
- server-dat)
- (begin
- (server:try-running *toppath*)
- (thread-sleep! 2)
- (rpc-transport:client-setup run-id (- remtries 1)))))
- (let* ((server-db-info (open-run-close tasks:get-server tasks:open-db run-id)))
- (debug:print-info 0 *default-log-port* "client:setup server-dat=" server-dat ", remaining-tries=" remaining-tries)
- (if server-db-info
- (let* ((iface (tasks:hostinfo-get-interface server-db-info))
- (port (tasks:hostinfo-get-port server-db-info))
- (server-dat (list iface port #f #f #f))
- (ping-res ((rpc:procedure 'server:login host port) *toppath*)))
- (if start-res
- (begin
- (hash-table-set! *runremote* run-id server-dat)
- server-dat)
- (begin
- (server:try-running *toppath*)
- (thread-sleep! 2)
- (rpc-transport:client-setup run-id (- remtries 1)))))
- (begin
- (server:try-running *toppath*)
- (thread-sleep! 2)
- (rpc-transport:client-setup run-id (- remtries 1)))))))))
-;;
-;; (port (if (and hostinfo (> (length hostdat) 1))(cadr hostdat) #f)))
-;; (if (and port
-;; (string->number port))
-;; (let ((portn (string->number port)))
-;; (debug:print-info 2 *default-log-port* "Setting up to connect to host " host ":" port)
-;; (handle-exceptions
-;; exn
-;; (begin
-;; (debug:print-error 0 *default-log-port* "Failed to open a connection to the server at host: " host " port: " port)
-;; (debug:print 0 *default-log-port* " EXCEPTION: " ((condition-property-accessor 'exn 'message) exn))
-;; ;; (open-run-close
-;; ;; (lambda (db . param)
-;; ;; (sqlite3:execute db "DELETE FROM metadat WHERE var='SERVER'"))
-;; ;; #f)
-;; (set! *runremote* #f))
-;; (if (and (not (args:get-arg "-server")) ;; no point in the server using the server using the server
-;; ((rpc:procedure 'server:login host portn) *toppath*))
-;; (begin
-;; (debug:print-info 2 *default-log-port* "Logged in and connected to " host ":" port)
-;; (set! *runremote* (vector host portn)))
-;; (begin
-;; (debug:print-info 2 *default-log-port* "Failed to login or connect to " host ":" port)
-;; (set! *runremote* #f)))))
-;; (debug:print-info 2 *default-log-port* "no server available")))))
-
Index: server.scm
==================================================================
--- server.scm
+++ server.scm
@@ -15,606 +15,658 @@
;;
;; You should have received a copy of the GNU General Public License
;; along with Megatest. If not, see .
;;
+;;======================================================================
+;;
+;; This is the Megatest specific stuff for starting and maintaining a
+;; server. Stuff that talks to the server should go in client.scm.
+;; General nanomsg stuff (not Megatest specific) should go in the
+;; nmsg-transport.scm file.
+;;
+;;======================================================================
+
(require-extension (srfi 18) extras tcp s11n)
(use srfi-1 posix regex regex-case srfi-69 hostinfo md5 message-digest
- directory-utils posix-extras matchable)
+ directory-utils posix-extras matchable typed-records)
(use spiffy uri-common intarweb http-client spiffy-request-vars)
(declare (unit server))
(declare (uses common))
(declare (uses db))
-(declare (uses tasks)) ;; tasks are where stuff is maintained about what is running.
-;; (declare (uses synchash))
-(declare (uses http-transport))
-;;(declare (uses rpc-transport))
-(declare (uses launch))
-;; (declare (uses daemon))
-
-(include "common_records.scm")
-(include "db_records.scm")
-
-(define (server:make-server-url hostport)
- (if (not hostport)
- #f
- (conc "http://" (car hostport) ":" (cadr hostport))))
-
-(define *server-loop-heart-beat* (current-seconds))
-
-;;======================================================================
-;; P K T S S T U F F
-;;======================================================================
-
-;; ???
-
-;;======================================================================
-;; P K T S S T U F F
-;;======================================================================
-
-;; ???
-
-;;======================================================================
-;; S E R V E R
-;;======================================================================
-
-;; Call this to start the actual server
-;;
-
-;; all routes though here end in exit ...
-;;
-;; start_server
-;;
-(define (server:launch run-id transport-type)
- (case transport-type
- ((http)(http-transport:launch))
- ;;((nmsg)(nmsg-transport:launch run-id))
- ;;((rpc) (rpc-transport:launch run-id))
- (else (debug:print-error 0 *default-log-port* "unknown server type " transport-type))))
-
-;;======================================================================
-;; S E R V E R U T I L I T I E S
-;;======================================================================
-
-;; Get the transport
-(define (server:get-transport)
- (if *transport-type*
- *transport-type*
- (let ((ttype (string->symbol
- (or (args:get-arg "-transport")
- (configf:lookup *configdat* "server" "transport")
- "rpc"))))
- (set! *transport-type* ttype)
- ttype)))
-
-;; Generate a unique signature for this server
-(define (server:mk-signature)
- (message-digest-string (md5-primitive)
- (with-output-to-string
- (lambda ()
- (write (list (current-directory)
- (argv)))))))
-
-;; When using zmq this would send the message back (two step process)
-;; with spiffy or rpc this simply returns the return data to be returned
-;;
-(define (server:reply return-addr query-sig success/fail result)
- (debug:print-info 11 *default-log-port* "server:reply return-addr=" return-addr ", result=" result)
- ;; (send-message pubsock target send-more: #t)
- ;; (send-message pubsock
- (case (server:get-transport)
- ((rpc) (db:obj->string (vector success/fail query-sig result)))
- ((http) (db:obj->string (vector success/fail query-sig result)))
- ((fs) result)
- (else
- (debug:print-error 0 *default-log-port* "unrecognised transport type: " *transport-type*)
- result)))
-
-;; Given a run id start a server process ### NOTE ### > file 2>&1
-;; if the run-id is zero and the target-host is set
-;; try running on that host
-;; incidental: rotate logs in logs/ dir.
-;;
-(define (server:run areapath) ;; areapath is *toppath* for a given testsuite area
- (let* ((curr-host (get-host-name))
- ;; (attempt-in-progress (server:start-attempted? areapath))
- ;; (dot-server-url (server:check-if-running areapath))
- (curr-ip (server:get-best-guess-address curr-host))
- (curr-pid (current-process-id))
- (homehost (common:get-homehost)) ;; configf:lookup *configdat* "server" "homehost" ))
- (target-host (car homehost))
- (testsuite (common:get-testsuite-name))
- (logfile (conc areapath "/logs/server.log")) ;; -" curr-pid "-" target-host ".log"))
- (cmdln (conc (common:get-megatest-exe)
- " -server " (or target-host "-") (if (equal? (configf:lookup *configdat* "server" "daemonize") "yes")
- " -daemonize "
- "")
- ;; " -log " logfile
- " -m testsuite:" testsuite)) ;; (conc " >> " logfile " 2>&1 &")))))
- (log-rotate (make-thread common:rotate-logs "server run, rotate logs thread"))
- (load-limit (configf:lookup-number *configdat* "jobtools" "max-server-start-load" default: 3.0)))
- ;; we want the remote server to start in *toppath* so push there
- (push-directory areapath)
- (debug:print 0 *default-log-port* "INFO: Trying to start server (" cmdln ") ...")
- (thread-start! log-rotate)
-
- ;; host.domain.tld match host?
- (if (and target-host
- ;; look at target host, is it host.domain.tld or ip address and does it
- ;; match current ip or hostname
- (not (string-match (conc "("curr-host "|" curr-host"\\..*)") target-host))
- (not (equal? curr-ip target-host)))
- (begin
- (debug:print-info 0 *default-log-port* "Starting server on " target-host ", logfile is " logfile)
- (setenv "TARGETHOST" target-host)))
-
- (setenv "TARGETHOST_LOGF" logfile)
- (thread-sleep! (/ (random 5000) 1000)) ;; add about a random (up to 5 seconds) initial delay. It seems pretty common that many running tests request a server at the same time
- (common:wait-for-normalized-load load-limit " delaying server start due to load" target-host) ;; do not try starting servers on an already overloaded machine, just wait forever
- (system (conc "nbfake " cmdln))
- (unsetenv "TARGETHOST_LOGF")
- (if (get-environment-variable "TARGETHOST")(unsetenv "TARGETHOST"))
- (thread-join! log-rotate)
- (pop-directory)))
-
-;; given a path to a server log return: host port startseconds
-;;
-(define (server:logf-get-start-info logf)
- (let ((rx (regexp "^SERVER STARTED: (\\S+):(\\d+) AT ([\\d\\.]+)"))) ;; SERVER STARTED: host:port AT timesecs
- (handle-exceptions
- exn
- (list #f #f #f) ;; no idea what went wrong, call it a bad server
- (with-input-from-file
- logf
- (lambda ()
- (let loop ((inl (read-line))
- (lnum 0))
- (if (not (eof-object? inl))
- (let ((mlst (string-match rx inl)))
- (if (not mlst)
- (if (< lnum 500) ;; give up if more than 500 lines of server log read
- (loop (read-line)(+ lnum 1))
- (list #f #f #f))
- (let ((dat (cdr mlst)))
- (list (car dat) ;; host
- (string->number (cadr dat)) ;; port
- (string->number (caddr dat))))))
- (list #f #f #f))))))))
-
-;; get a list of servers with all relevant data
-;; ( mod-time host port start-time pid )
-;;
-(define (server:get-list areapath #!key (limit #f))
- (let ((fname-rx (regexp "^(|.*/)server-(\\d+)-(\\S+).log$"))
- (day-seconds (* 24 60 60)))
- ;; if the directory exists continue to get the list
- ;; otherwise attempt to create the logs dir and then
- ;; continue
- (if (if (directory-exists? (conc areapath "/logs"))
- '()
- (if (file-write-access? areapath)
- (begin
- (condition-case
- (create-directory (conc areapath "/logs") #t)
- (exn (i/o file)(debug:print 0 *default-log-port* "ERROR: Cannot create directory at " (conc areapath "/logs")))
- (exn ()(debug:print 0 *default-log-port* "ERROR: Unknown error attemtping to get server list.")))
- (directory-exists? (conc areapath "/logs")))
- '()))
- (let* ((server-logs (glob (conc areapath "/logs/server-*.log")))
- (num-serv-logs (length server-logs)))
- (if (null? server-logs)
- '()
- (let loop ((hed (car server-logs))
- (tal (cdr server-logs))
- (res '()))
- (let* ((mod-time (handle-exceptions
- exn
- (current-seconds) ;; 0
- (file-modification-time hed))) ;; default to *very* old so log gets ignored if deleted
- (down-time (- (current-seconds) mod-time))
- (serv-dat (if (or (< num-serv-logs 10)
- (< down-time 900)) ;; day-seconds))
- (server:logf-get-start-info hed)
- '())) ;; don't waste time processing server files not touched in the 15 minutes if there are more than ten servers to look at
- (serv-rec (cons mod-time serv-dat))
- (fmatch (string-match fname-rx hed))
- (pid (if fmatch (string->number (list-ref fmatch 2)) #f))
- (new-res (if (null? serv-dat)
- res
- (cons (append serv-rec (list pid)) res))))
- (if (null? tal)
- (if (and limit
- (> (length new-res) limit))
- new-res ;; (take new-res limit) <= need intelligent sorting before this will work
- new-res)
- (loop (car tal)(cdr tal) new-res)))))))))
-
-(define (server:get-num-alive srvlst)
- (let ((num-alive 0))
- (for-each
- (lambda (server)
- (match-let (((mod-time host port start-time pid)
- server))
- (let* ((uptime (- (current-seconds) mod-time))
- (runtime (if start-time
- (- mod-time start-time)
- 0)))
- (if (< uptime 5)(set! num-alive (+ num-alive 1))))))
- srvlst)
- num-alive))
-
-;; given a list of servers get a list of valid servers, i.e. at least
-;; 10 seconds old, has started and is less than 1 hour old and is
-;; active (i.e. mod-time < 10 seconds
-;;
-;; mod-time host port start-time pid
-;;
-;; sort by start-time descending. I.e. get the oldest first. Young servers will thus drop off
-;; and servers should stick around for about two hours or so.
-;;
-(define (server:get-best srvlst)
- (let* ((nums (server:get-num-servers))
- (now (current-seconds))
- (slst (sort
- (filter (lambda (rec)
- (if (and (list? rec)
- (> (length rec) 2))
- (let ((start-time (list-ref rec 3))
- (mod-time (list-ref rec 0)))
- ;; (print "start-time: " start-time " mod-time: " mod-time)
- (and start-time mod-time
- (> (- now start-time) 0) ;; been running at least 0 seconds
- (< (- now mod-time) 16) ;; still alive - file touched in last 16 seconds
- (< (- now start-time)
- (+ (- (string->number (or (configf:lookup *configdat* "server" "runtime") "3600"))
- 180)
- (random 360))) ;; under one hour running time +/- 180
- ))
- #f))
- srvlst)
- (lambda (a b)
- (< (list-ref a 3)
- (list-ref b 3))))))
- (if (> (length slst) nums)
- (take slst nums)
- slst)))
-
-(define (server:get-first-best areapath)
- (let ((srvrs (server:get-best (server:get-list areapath))))
- (if (and srvrs
- (not (null? srvrs)))
- (car srvrs)
- #f)))
-
-(define (server:get-rand-best areapath)
- (let ((srvrs (server:get-best (server:get-list areapath))))
- (if (and (list? srvrs)
- (not (null? srvrs)))
- (let* ((len (length srvrs))
- (idx (random len)))
- (list-ref srvrs idx))
- #f)))
-
-
-(define (server:record->url servr)
- (match-let (((mod-time host port start-time pid)
- servr))
- (if (and host port)
- (conc host ":" port)
- #f)))
-
-(define (server:get-client-signature) ;; BB> why is this proc named "get-"? it returns nothing -- set! has not return value.
- (if *my-client-signature* *my-client-signature*
- (let ((sig (server:mk-signature)))
- (set! *my-client-signature* sig)
- *my-client-signature*)))
-
-;; kind start up of servers, wait 40 seconds before allowing another server for a given
-;; run-id to be launched
-(define (server:kind-run areapath)
- (if (not (server:check-if-running areapath)) ;; why try if there is already a server running?
- (let* ((last-run-dat (hash-table-ref/default *server-kind-run* areapath '(0 0))) ;; callnum, whenrun
- (call-num (car last-run-dat))
- (when-run (cadr last-run-dat))
- (run-delay (+ (case call-num
- ((0) 0)
- ((1) 20)
- ((2) 300)
- (else 600))
- (random 5))) ;; add a small random number just in case a lot of jobs hit the work hosts simultaneously
- (lock-file (conc areapath "/logs/server-start.lock")))
- (if (> (- (current-seconds) when-run) run-delay)
- (begin
- (common:simple-file-lock-and-wait lock-file expire-time: 15)
- (server:run areapath)
- (thread-sleep! 5) ;; don't release the lock for at least a few seconds
- (common:simple-file-release-lock lock-file)))
- (hash-table-set! *server-kind-run* areapath (list (+ call-num 1)(current-seconds))))))
-
-(define (server:start-and-wait areapath #!key (timeout 60))
- (let ((give-up-time (+ (current-seconds) timeout)))
- (let loop ((server-url (server:check-if-running areapath))
- (try-num 0))
- (if (or server-url
- (> (current-seconds) give-up-time)) ;; server-url will be #f if no server available.
- server-url
- (let ((num-ok (length (server:get-best (server:get-list areapath)))))
- (if (and (> try-num 0) ;; first time through simply wait a little while then try again
- (< num-ok 1)) ;; if there are no decent candidates for servers then try starting a new one
- (server:kind-run areapath))
- (thread-sleep! 5)
- (loop (server:check-if-running areapath)
- (+ try-num 1)))))))
-
-(define server:try-running server:run) ;; there is no more per-run servers ;; REMOVE ME. BUG.
-
-(define (server:get-num-servers #!key (numservers 2))
- (let ((ns (string->number
- (or (configf:lookup *configdat* "server" "numservers") "notanumber"))))
- (or ns numservers)))
-
-;; no longer care if multiple servers are started by accident. older servers will drop off in time.
-;;
-(define (server:check-if-running areapath) ;; #!key (numservers "2"))
- (let* ((ns (server:get-num-servers))
- (servers (server:get-best (server:get-list areapath))))
- ;; (print "servers: " servers " ns: " ns)
- (if (or (and servers
- (null? servers))
- (not servers)
- (and (list? servers)
- (< (length servers) (random ns)))) ;; somewhere between 0 and numservers
- #f
- (let loop ((hed (car servers))
- (tal (cdr servers)))
- (let ((res (server:check-server hed)))
- (if res
- res
- (if (null? tal)
- #f
- (loop (car tal)(cdr tal)))))))))
-
-;; ping the given server
-;;
-(define (server:check-server server-record)
- (let* ((server-url (server:record->url server-record))
- (res (case *transport-type*
- ((http)(server:ping server-url))
- ;; ((nmsg)(nmsg-transport:ping (tasks:hostinfo-get-interface server)
- )))
- (if res
- server-url
- #f)))
-
-(define (server:kill servr)
- (match-let (((mod-time hostname port start-time pid)
- servr))
- (tasks:kill-server hostname pid)))
-
-;; called in megatest.scm, host-port is string hostname:port
-;;
-;; NOTE: This is NOT called directly from clients as not all transports support a client running
-;; in the same process as the server.
-;;
-(define (server:ping host-port-in #!key (do-exit #f))
- (let ((host:port (if (not host-port-in) ;; use read-dotserver to find
- #f ;; (server:check-if-running *toppath*)
- ;; (if (number? host-port-in) ;; we were handed a server-id
- ;; (let ((srec (tasks:get-server-by-id (db:delay-if-busy (tasks:open-db)) host-port-in)))
- ;; ;; (print "srec: " srec " host-port-in: " host-port-in)
- ;; (if srec
- ;; (conc (vector-ref srec 3) ":" (vector-ref srec 4))
- ;; (conc "no such server-id " host-port-in)))
- host-port-in))) ;; )
- (let* ((host-port (if host:port
- (let ((slst (string-split host:port ":")))
- (if (eq? (length slst) 2)
- (list (car slst)(string->number (cadr slst)))
- #f))
- #f)))
-;; (toppath (launch:setup)))
- ;; (print "host-port=" host-port)
- (if (not host-port)
- (begin
- (if host-port-in
- (debug:print 0 *default-log-port* "ERROR: bad host:port"))
- (if do-exit (exit 1))
- #f)
- (let* ((iface (car host-port))
- (port (cadr host-port))
- (server-dat (http-transport:client-connect iface port))
- (login-res (rmt:login-no-auto-client-setup server-dat)))
- (if (and (list? login-res)
- (car login-res))
- (begin
- ;; (print "LOGIN_OK")
- (if do-exit (exit 0))
- #t)
- (begin
- ;; (print "LOGIN_FAILED")
- (if do-exit (exit 1))
- #f)))))))
-
-;; run ping in separate process, safest way in some cases
-;;
-(define (server:ping-server ifaceport)
- (with-input-from-pipe
- (conc (common:get-megatest-exe) " -ping " ifaceport)
- (lambda ()
- (let loop ((inl (read-line))
- (res "NOREPLY"))
- (if (eof-object? inl)
- (case (string->symbol res)
- ((NOREPLY) #f)
- ((LOGIN_OK) #t)
- (else #f))
- (loop (read-line) inl))))))
-
-;; NOT USED (well, ok, reference in rpc-transport but otherwise not used).
-;;
-(define (server:login toppath)
- (lambda (toppath)
- (set! *db-last-access* (current-seconds)) ;; might not be needed.
- (if (equal? *toppath* toppath)
- #t
- #f)))
-
-;; timeout is hms string: 1h 5m 3s, default is 1 minute
-;;
-(define (server:expiration-timeout)
- (let ((tmo (configf:lookup *configdat* "server" "timeout")))
- (if (and (string? tmo)
- (common:hms-string->seconds tmo)) ;; BUG: hms-string->seconds is broken, if given "10" returns 0. Also, it doesn't belong in this logic unless the string->number is changed below
- (* 3600 (string->number tmo))
- 60)))
-
-(define (server:get-best-guess-address hostname)
- (let ((res #f))
- (for-each
- (lambda (adr)
- (if (not (eq? (u8vector-ref adr 0) 127))
- (set! res adr)))
- ;; NOTE: This can fail when there is no mention of the host in /etc/hosts. FIXME
- (vector->list (hostinfo-addresses (hostname->hostinfo hostname))))
- (string-intersperse
- (map number->string
- (u8vector->list
- (if res res (hostname->ip hostname)))) ".")))
-
-;; moving this here as it needs access to db and cannot be in common.
-;;
-(define (server:writable-watchdog dbstruct)
- (thread-sleep! 0.05) ;; delay for startup
- (let ((legacy-sync (common:run-sync?))
- (sync-stale-seconds (configf:lookup-number *configdat* "server" "sync-stale-seconds" default: 300))
- (debug-mode (debug:debug-mode 1))
- (last-time (current-seconds))
- (no-sync-db (db:open-no-sync-db))
- (sync-duration 0) ;; run time of the sync in milliseconds
- ;;(this-wd-num (begin (mutex-lock! *wdnum*mutex) (let ((x *wdnum*)) (set! *wdnum* (add1 *wdnum*)) (mutex-unlock! *wdnum*mutex) x)))
- )
- (set! *no-sync-db* no-sync-db) ;; make the no sync db available to api calls
- (debug:print-info 2 *default-log-port* "Periodic sync thread started.")
- (debug:print-info 3 *default-log-port* "watchdog starting. legacy-sync is " legacy-sync" pid="(current-process-id) );; " this-wd-num="this-wd-num)
- (if (and legacy-sync (not *time-to-exit*))
- (let* (;;(dbstruct (db:setup))
- (mtdb (dbr:dbstruct-mtdb dbstruct))
- (mtpath (db:dbdat-get-path mtdb))
- (tmp-area (common:get-db-tmp-area))
- (start-file (conc tmp-area "/.start-sync"))
- (end-file (conc tmp-area "/.end-sync")))
- (debug:print-info 0 *default-log-port* "Server running, periodic sync started.")
- (let loop ()
- ;; sync for filesystem local db writes
- ;;
- (mutex-lock! *db-multi-sync-mutex*)
- (let* ((need-sync (>= *db-last-access* *db-last-sync*)) ;; no sync since last write
- (sync-in-progress *db-sync-in-progress*)
- (min-intersync-delay (configf:lookup-number *configdat* "server" "minimum-intersync-delay" default: 5))
- (should-sync (and (not *time-to-exit*)
- (> (- (current-seconds) *db-last-sync*) min-intersync-delay))) ;; sync every five seconds minimum, deprecated logic, can probably be removed
- (start-time (current-seconds))
- (cpu-load-adj (alist-ref 'adj-proc-load (common:get-normalized-cpu-load #f)))
- (mt-mod-time (file-modification-time mtpath))
- (last-sync-start (if (common:file-exists? start-file)
- (file-modification-time start-file)
- 0))
- (last-sync-end (if (common:file-exists? end-file)
- (file-modification-time end-file)
- 10))
- (sync-period (+ 3 (* cpu-load-adj 30))) ;; as adjusted load increases increase the sync period
- (recently-synced (and (< (- start-time mt-mod-time) sync-period) ;; not useful if sync didn't modify megatest.db!
- (< mt-mod-time last-sync-start)))
- (sync-done (<= last-sync-start last-sync-end))
- (sync-stale (> start-time (+ last-sync-start sync-stale-seconds)))
- (will-sync (and (not *time-to-exit*) ;; do not start a sync if we are in the process of exiting
- (or need-sync should-sync)
- (or sync-done sync-stale)
- (not sync-in-progress)
- (not recently-synced))))
- (debug:print-info 13 *default-log-port* "WD writable-watchdog top of loop. need-sync="need-sync" sync-in-progress=" sync-in-progress
- " should-sync="should-sync" start-time="start-time" mt-mod-time="mt-mod-time" recently-synced="recently-synced" will-sync="will-sync
- " sync-done=" sync-done " sync-period=" sync-period)
- (if (and (> sync-period 5)
- (common:low-noise-print 30 "sync-period"))
- (debug:print-info 0 *default-log-port* "Increased sync period due to long sync times, sync took: " sync-period " seconds."))
- ;; (if recently-synced (debug:print-info 0 *default-log-port* "Skipping sync due to recently-synced flag=" recently-synced))
- ;; (debug:print-info 0 *default-log-port* "need-sync: " need-sync " sync-in-progress: " sync-in-progress " should-sync: " should-sync " will-sync: " will-sync)
- (if will-sync (set! *db-sync-in-progress* #t))
- (mutex-unlock! *db-multi-sync-mutex*)
- (if will-sync
- (let (;; (max-sync-duration (configf:lookup-number *configdat* "server" "max-sync-duration")) ;; KEEPING THIS AVAILABLE BUT SHOULD NOT USE, I'M PRETTY SURE IT DOES NOT WORK!
- (sync-start (current-milliseconds)))
- (with-output-to-file start-file (lambda ()(print (current-process-id))))
-
- ;; put lock here
-
- ;; (if (or (not max-sync-duration)
- ;; (< sync-duration max-sync-duration)) ;; NOTE: db:sync-to-megatest.db keeps track of time of last sync and syncs incrementally
- (let ((res (db:sync-to-megatest.db dbstruct no-sync-db: no-sync-db))) ;; did we sync any data? If so need to set the db touched flag to keep the server alive
- (set! sync-duration (- (current-milliseconds) sync-start))
- (if (> res 0) ;; some records were transferred, keep the db alive
- (begin
- (mutex-lock! *heartbeat-mutex*)
- (set! *db-last-access* (current-seconds))
- (mutex-unlock! *heartbeat-mutex*)
- (debug:print-info 0 *default-log-port* "sync called, " res " records transferred."))
- (debug:print-info 2 *default-log-port* "sync called but zero records transferred")))))
-;; ;; TODO: factor this next routine out into a function
-;; (with-input-from-pipe ;; this should not block other threads but need to verify this
-;; (conc "megatest -sync-to-megatest.db -m testsuite:" (common:get-area-name) ":" *toppath*)
-;; (lambda ()
-;; (let loop ((inl (read-line))
-;; (res #f))
-;; (if (eof-object? inl)
-;; (begin
-;; (set! sync-duration (- (current-milliseconds) sync-start))
-;; (cond
-;; ((not res)
-;; (debug:print 0 *default-log-port* "ERROR: sync from /tmp db to megatest.db appears to have failed. Recommended that you stop your runs and run \"megatest -cleanup-db\""))
-;; ((> res 0)
-;; (mutex-lock! *heartbeat-mutex*)
-;; (set! *db-last-access* (current-seconds))
-;; (mutex-unlock! *heartbeat-mutex*))))
-;; (let ((num-synced (let ((matches (string-match "^Synced (\\d+).*$" inl)))
-;; (if matches
-;; (string->number (cadr matches))
-;; #f))))
-;; (loop (read-line)
-;; (or num-synced res))))))))))
- (if will-sync
- (begin
- (mutex-lock! *db-multi-sync-mutex*)
- (set! *db-sync-in-progress* #f)
- (set! *db-last-sync* start-time)
- (with-output-to-file end-file (lambda ()(print (current-process-id))))
-
- ;; release lock here
-
- (mutex-unlock! *db-multi-sync-mutex*)))
- (if (and debug-mode
- (> (- start-time last-time) 60))
- (begin
- (set! last-time start-time)
- (debug:print-info 4 *default-log-port* "timestamp -> " (seconds->time-string (current-seconds)) ", time since start -> " (seconds->hr-min-sec (- (current-seconds) *time-zero*))))))
-
- ;; keep going unless time to exit
- ;;
- (if (not *time-to-exit*)
- (let delay-loop ((count 0))
- ;;(debug:print-info 13 *default-log-port* "delay-loop top; count="count" pid="(current-process-id)" this-wd-num="this-wd-num" *time-to-exit*="*time-to-exit*)
-
- (if (and (not *time-to-exit*)
- (< count 6)) ;; was 11, changing to 4.
- (begin
- (thread-sleep! 1)
- (delay-loop (+ count 1))))
- (if (not *time-to-exit*) (loop))))
- ;; time to exit, close the no-sync db here
- (db:no-sync-close-db no-sync-db)
- (if (common:low-noise-print 30)
- (debug:print-info 0 *default-log-port* "Exiting watchdog timer, *time-to-exit* = " *time-to-exit*" pid="(current-process-id) ))))))) ;;" this-wd-num="this-wd-num)))))))
-
+
+;; Basic stuff for safely kicking off a server
+(declare (uses portlogger))
+(import portlogger)
+(declare (uses nmsg-transport))
+(import nmsg-transport)
+
+;; Might want to bring the daemonizing back
+;; (declare (uses daemon))
+
+(include "common_records.scm")
+(include "db_records.scm")
+
+;;======================================================================
+;; P K T S S T U F F
+;;======================================================================
+
+;;======================================================================
+;; N A N O M S G B A S E D S E R V E R
+;;======================================================================
+
+(defstruct nmsg
+ (conn #f)
+ (hosts (make-hash-table))
+ pkt
+ pktspec
+ (mutex (make-mutex))
+ )
+
+;; make it a global? Well, it is local to nmsg module
+
+(define *nmsg-conndat* (make-nmsg))
+(nmsg-pktspec-set! *nmsg-conndat*
+ `((server (hostname . h)
+ (port . p)
+ (pid . i)
+ )))
+;; get a port
+;; start the nmsg server
+;; look for other servers
+;; contact other servers and compile list of servers
+;; there are two types of server
+;; main servers - dashboards, runners and dedicated servers - need pkt
+;; passive servers - test executers, step calls, list-runs - no pkt
+;;
+(define (server:start-nmsg #!key (force-server-type #f))
+ (mutex-lock! (nmsg-mutex *nmsg-conndat*))
+ (let* ((server-type (or force-server-type
+ (if (args:any? "-run" "-server")
+ 'main
+ 'passive)))
+ (port-num (portlogger:open-run-close portlogger:find-port))
+ (nmsg-conn (nmsg:start-server port-num))
+ (pktspec (nmsg-pktspec *nmsg-conndat*))
+ (pktdir (conc (get-environment-variable "MT_RUN_AREA_HOME")
+ "/.server-pkts")))
+ (if (not (directory? pktdir))(create-directory pktdir))
+ ;; server is started, now create pkt if needed
+ (if (eq? server-type 'main)
+ (nmsg-pkt-set! *nmsg-conndat*
+ (pkts:write-alist->pkt
+ pktdir
+ `((hostname . ,(get-host-name))
+ (port . ,port-num)
+ (pid . ,(current-process-id)))
+ pktspec: pktspec
+ ptype: 'server)))
+ (nmsg-conn-set! *nmsg-conndat* nmsg-conn)
+ (mutex-unlock! (nmsg-mutex *nmsg-conndat*))
+ ))
+
+;;
+;;
+;; ;; Call this to start the actual server
+;; ;;
+;;
+;; ;; all routes though here end in exit ...
+;; ;;
+;; ;; start_server
+;; ;;
+;; (define (server:launch run-id transport-type)
+;; (case transport-type
+;; ((http)(http-transport:launch))
+;; ;;((nmsg)(nmsg-transport:launch run-id))
+;; ;;((rpc) (rpc-transport:launch run-id))
+;; (else (debug:print-error 0 *default-log-port* "unknown server type " transport-type))))
+;;
+;; ;;======================================================================
+;; ;; S E R V E R U T I L I T I E S
+;; ;;======================================================================
+;;
+;; ;; Get the transport
+;; (define (server:get-transport)
+;; (if *transport-type*
+;; *transport-type*
+;; (let ((ttype (string->symbol
+;; (or (args:get-arg "-transport")
+;; (configf:lookup *configdat* "server" "transport")
+;; "rpc"))))
+;; (set! *transport-type* ttype)
+;; ttype)))
+;;
+;; ;; Generate a unique signature for this server
+;; (define (server:mk-signature)
+;; (message-digest-string (md5-primitive)
+;; (with-output-to-string
+;; (lambda ()
+;; (write (list (current-directory)
+;; (argv)))))))
+;;
+;; ;; When using zmq this would send the message back (two step process)
+;; ;; with spiffy or rpc this simply returns the return data to be returned
+;; ;;
+;; (define (server:reply return-addr query-sig success/fail result)
+;; (debug:print-info 11 *default-log-port* "server:reply return-addr=" return-addr ", result=" result)
+;; ;; (send-message pubsock target send-more: #t)
+;; ;; (send-message pubsock
+;; (case (server:get-transport)
+;; ((rpc) (db:obj->string (vector success/fail query-sig result)))
+;; ((http) (db:obj->string (vector success/fail query-sig result)))
+;; ((fs) result)
+;; (else
+;; (debug:print-error 0 *default-log-port* "unrecognised transport type: " *transport-type*)
+;; result)))
+;;
+;; ;; Given a run id start a server process ### NOTE ### > file 2>&1
+;; ;; if the run-id is zero and the target-host is set
+;; ;; try running on that host
+;; ;; incidental: rotate logs in logs/ dir.
+;; ;;
+;; (define (server:run areapath) ;; areapath is *toppath* for a given testsuite area
+;; (let* ((curr-host (get-host-name))
+;; ;; (attempt-in-progress (server:start-attempted? areapath))
+;; ;; (dot-server-url (server:check-if-running areapath))
+;; (curr-ip (server:get-best-guess-address curr-host))
+;; (curr-pid (current-process-id))
+;; (homehost (common:get-homehost)) ;; configf:lookup *configdat* "server" "homehost" ))
+;; (target-host (car homehost))
+;; (testsuite (common:get-testsuite-name))
+;; (logfile (conc areapath "/logs/server.log")) ;; -" curr-pid "-" target-host ".log"))
+;; (cmdln (conc (common:get-megatest-exe)
+;; " -server " (or target-host "-") (if (equal? (configf:lookup *configdat* "server" "daemonize") "yes")
+;; " -daemonize "
+;; "")
+;; ;; " -log " logfile
+;; " -m testsuite:" testsuite)) ;; (conc " >> " logfile " 2>&1 &")))))
+;; (log-rotate (make-thread common:rotate-logs "server run, rotate logs thread"))
+;; (load-limit (configf:lookup-number *configdat* "jobtools" "max-server-start-load" default: 3.0)))
+;; ;; we want the remote server to start in *toppath* so push there
+;; (push-directory areapath)
+;; (debug:print 0 *default-log-port* "INFO: Trying to start server (" cmdln ") ...")
+;; (thread-start! log-rotate)
+;;
+;; ;; host.domain.tld match host?
+;; (if (and target-host
+;; ;; look at target host, is it host.domain.tld or ip address and does it
+;; ;; match current ip or hostname
+;; (not (string-match (conc "("curr-host "|" curr-host"\\..*)") target-host))
+;; (not (equal? curr-ip target-host)))
+;; (begin
+;; (debug:print-info 0 *default-log-port* "Starting server on " target-host ", logfile is " logfile)
+;; (setenv "TARGETHOST" target-host)))
+;;
+;; (setenv "TARGETHOST_LOGF" logfile)
+;; (thread-sleep! (/ (random 5000) 1000)) ;; add about a random (up to 5 seconds) initial delay. It seems pretty common that many running tests request a server at the same time
+;; (common:wait-for-normalized-load load-limit " delaying server start due to load" target-host) ;; do not try starting servers on an already overloaded machine, just wait forever
+;; (system (conc "nbfake " cmdln))
+;; (unsetenv "TARGETHOST_LOGF")
+;; (if (get-environment-variable "TARGETHOST")(unsetenv "TARGETHOST"))
+;; (thread-join! log-rotate)
+;; (pop-directory)))
+;;
+;; ;; given a path to a server log return: host port startseconds
+;; ;;
+;; (define (server:logf-get-start-info logf)
+;; (let ((rx (regexp "^SERVER STARTED: (\\S+):(\\d+) AT ([\\d\\.]+)"))) ;; SERVER STARTED: host:port AT timesecs
+;; (handle-exceptions
+;; exn
+;; (list #f #f #f) ;; no idea what went wrong, call it a bad server
+;; (with-input-from-file
+;; logf
+;; (lambda ()
+;; (let loop ((inl (read-line))
+;; (lnum 0))
+;; (if (not (eof-object? inl))
+;; (let ((mlst (string-match rx inl)))
+;; (if (not mlst)
+;; (if (< lnum 500) ;; give up if more than 500 lines of server log read
+;; (loop (read-line)(+ lnum 1))
+;; (list #f #f #f))
+;; (let ((dat (cdr mlst)))
+;; (list (car dat) ;; host
+;; (string->number (cadr dat)) ;; port
+;; (string->number (caddr dat))))))
+;; (list #f #f #f))))))))
+;;
+;; ;; get a list of servers with all relevant data
+;; ;; ( mod-time host port start-time pid )
+;; ;;
+;; (define (server:get-list areapath #!key (limit #f))
+;; (let ((fname-rx (regexp "^(|.*/)server-(\\d+)-(\\S+).log$"))
+;; (day-seconds (* 24 60 60)))
+;; ;; if the directory exists continue to get the list
+;; ;; otherwise attempt to create the logs dir and then
+;; ;; continue
+;; (if (if (directory-exists? (conc areapath "/logs"))
+;; '()
+;; (if (file-write-access? areapath)
+;; (begin
+;; (condition-case
+;; (create-directory (conc areapath "/logs") #t)
+;; (exn (i/o file)(debug:print 0 *default-log-port* "ERROR: Cannot create directory at " (conc areapath "/logs")))
+;; (exn ()(debug:print 0 *default-log-port* "ERROR: Unknown error attemtping to get server list.")))
+;; (directory-exists? (conc areapath "/logs")))
+;; '()))
+;; (let* ((server-logs (glob (conc areapath "/logs/server-*.log")))
+;; (num-serv-logs (length server-logs)))
+;; (if (null? server-logs)
+;; '()
+;; (let loop ((hed (car server-logs))
+;; (tal (cdr server-logs))
+;; (res '()))
+;; (let* ((mod-time (handle-exceptions
+;; exn
+;; (current-seconds) ;; 0
+;; (file-modification-time hed))) ;; default to *very* old so log gets ignored if deleted
+;; (down-time (- (current-seconds) mod-time))
+;; (serv-dat (if (or (< num-serv-logs 10)
+;; (< down-time 900)) ;; day-seconds))
+;; (server:logf-get-start-info hed)
+;; '())) ;; don't waste time processing server files not touched in the 15 minutes if there are more than ten servers to look at
+;; (serv-rec (cons mod-time serv-dat))
+;; (fmatch (string-match fname-rx hed))
+;; (pid (if fmatch (string->number (list-ref fmatch 2)) #f))
+;; (new-res (if (null? serv-dat)
+;; res
+;; (cons (append serv-rec (list pid)) res))))
+;; (if (null? tal)
+;; (if (and limit
+;; (> (length new-res) limit))
+;; new-res ;; (take new-res limit) <= need intelligent sorting before this will work
+;; new-res)
+;; (loop (car tal)(cdr tal) new-res)))))))))
+;;
+;; (define (server:get-num-alive srvlst)
+;; (let ((num-alive 0))
+;; (for-each
+;; (lambda (server)
+;; (match-let (((mod-time host port start-time pid)
+;; server))
+;; (let* ((uptime (- (current-seconds) mod-time))
+;; (runtime (if start-time
+;; (- mod-time start-time)
+;; 0)))
+;; (if (< uptime 5)(set! num-alive (+ num-alive 1))))))
+;; srvlst)
+;; num-alive))
+;;
+;; ;; given a list of servers get a list of valid servers, i.e. at least
+;; ;; 10 seconds old, has started and is less than 1 hour old and is
+;; ;; active (i.e. mod-time < 10 seconds
+;; ;;
+;; ;; mod-time host port start-time pid
+;; ;;
+;; ;; sort by start-time descending. I.e. get the oldest first. Young servers will thus drop off
+;; ;; and servers should stick around for about two hours or so.
+;; ;;
+;; (define (server:get-best srvlst)
+;; (let* ((nums (server:get-num-servers))
+;; (now (current-seconds))
+;; (slst (sort
+;; (filter (lambda (rec)
+;; (if (and (list? rec)
+;; (> (length rec) 2))
+;; (let ((start-time (list-ref rec 3))
+;; (mod-time (list-ref rec 0)))
+;; ;; (print "start-time: " start-time " mod-time: " mod-time)
+;; (and start-time mod-time
+;; (> (- now start-time) 0) ;; been running at least 0 seconds
+;; (< (- now mod-time) 16) ;; still alive - file touched in last 16 seconds
+;; (< (- now start-time)
+;; (+ (- (string->number (or (configf:lookup *configdat* "server" "runtime") "3600"))
+;; 180)
+;; (random 360))) ;; under one hour running time +/- 180
+;; ))
+;; #f))
+;; srvlst)
+;; (lambda (a b)
+;; (< (list-ref a 3)
+;; (list-ref b 3))))))
+;; (if (> (length slst) nums)
+;; (take slst nums)
+;; slst)))
+;;
+;; (define (server:get-first-best areapath)
+;; (let ((srvrs (server:get-best (server:get-list areapath))))
+;; (if (and srvrs
+;; (not (null? srvrs)))
+;; (car srvrs)
+;; #f)))
+;;
+;; (define (server:get-rand-best areapath)
+;; (let ((srvrs (server:get-best (server:get-list areapath))))
+;; (if (and (list? srvrs)
+;; (not (null? srvrs)))
+;; (let* ((len (length srvrs))
+;; (idx (random len)))
+;; (list-ref srvrs idx))
+;; #f)))
+;;
+;;
+;; (define (server:record->url servr)
+;; (match-let (((mod-time host port start-time pid)
+;; servr))
+;; (if (and host port)
+;; (conc host ":" port)
+;; #f)))
+;;
+;; (define (server:get-client-signature) ;; BB> why is this proc named "get-"? it returns nothing -- set! has not return value.
+;; (if *my-client-signature* *my-client-signature*
+;; (let ((sig (server:mk-signature)))
+;; (set! *my-client-signature* sig)
+;; *my-client-signature*)))
+;;
+;; ;; kind start up of servers, wait 40 seconds before allowing another server for a given
+;; ;; run-id to be launched
+;; (define (server:kind-run areapath)
+;; (if (not (server:check-if-running areapath)) ;; why try if there is already a server running?
+;; (let* ((last-run-dat (hash-table-ref/default *server-kind-run* areapath '(0 0))) ;; callnum, whenrun
+;; (call-num (car last-run-dat))
+;; (when-run (cadr last-run-dat))
+;; (run-delay (+ (case call-num
+;; ((0) 0)
+;; ((1) 20)
+;; ((2) 300)
+;; (else 600))
+;; (random 5))) ;; add a small random number just in case a lot of jobs hit the work hosts simultaneously
+;; (lock-file (conc areapath "/logs/server-start.lock")))
+;; (if (> (- (current-seconds) when-run) run-delay)
+;; (begin
+;; (common:simple-file-lock-and-wait lock-file expire-time: 15)
+;; (server:run areapath)
+;; (thread-sleep! 5) ;; don't release the lock for at least a few seconds
+;; (common:simple-file-release-lock lock-file)))
+;; (hash-table-set! *server-kind-run* areapath (list (+ call-num 1)(current-seconds))))))
+;;
+;; (define (server:start-and-wait areapath #!key (timeout 60))
+;; (let ((give-up-time (+ (current-seconds) timeout)))
+;; (let loop ((server-url (server:check-if-running areapath))
+;; (try-num 0))
+;; (if (or server-url
+;; (> (current-seconds) give-up-time)) ;; server-url will be #f if no server available.
+;; server-url
+;; (let ((num-ok (length (server:get-best (server:get-list areapath)))))
+;; (if (and (> try-num 0) ;; first time through simply wait a little while then try again
+;; (< num-ok 1)) ;; if there are no decent candidates for servers then try starting a new one
+;; (server:kind-run areapath))
+;; (thread-sleep! 5)
+;; (loop (server:check-if-running areapath)
+;; (+ try-num 1)))))))
+;;
+;; (define server:try-running server:run) ;; there is no more per-run servers ;; REMOVE ME. BUG.
+;;
+;; (define (server:get-num-servers #!key (numservers 2))
+;; (let ((ns (string->number
+;; (or (configf:lookup *configdat* "server" "numservers") "notanumber"))))
+;; (or ns numservers)))
+;;
+;; ;; no longer care if multiple servers are started by accident. older servers will drop off in time.
+;; ;;
+;; (define (server:check-if-running areapath) ;; #!key (numservers "2"))
+;; (let* ((ns (server:get-num-servers))
+;; (servers (server:get-best (server:get-list areapath))))
+;; ;; (print "servers: " servers " ns: " ns)
+;; (if (or (and servers
+;; (null? servers))
+;; (not servers)
+;; (and (list? servers)
+;; (< (length servers) (random ns)))) ;; somewhere between 0 and numservers
+;; #f
+;; (let loop ((hed (car servers))
+;; (tal (cdr servers)))
+;; (let ((res (server:check-server hed)))
+;; (if res
+;; res
+;; (if (null? tal)
+;; #f
+;; (loop (car tal)(cdr tal)))))))))
+;;
+;; ;; ping the given server
+;; ;;
+;; (define (server:check-server server-record)
+;; (let* ((server-url (server:record->url server-record))
+;; (res (case *transport-type*
+;; ((http)(server:ping server-url))
+;; ;; ((nmsg)(nmsg-transport:ping (tasks:hostinfo-get-interface server)
+;; )))
+;; (if res
+;; server-url
+;; #f)))
+;;
+;; ;; DO STUFF HERE - BUG
+;; (define (server:kill servr)
+;; (match-let (((mod-time hostname port start-time pid)
+;; servr))
+;; #;(tasks:kill-server hostname pid)
+;; #f
+;; ))
+;;
+;; ;; called in megatest.scm, host-port is string hostname:port
+;; ;;
+;; ;; NOTE: This is NOT called directly from clients as not all transports support a client running
+;; ;; in the same process as the server.
+;; ;;
+;; (define (server:ping host-port-in #!key (do-exit #f))
+;; (let ((host:port (if (not host-port-in) ;; use read-dotserver to find
+;; #f ;; (server:check-if-running *toppath*)
+;; ;; (if (number? host-port-in) ;; we were handed a server-id
+;; ;; (let ((srec (tasks:get-server-by-id (db:delay-if-busy (tasks:open-db)) host-port-in)))
+;; ;; ;; (print "srec: " srec " host-port-in: " host-port-in)
+;; ;; (if srec
+;; ;; (conc (vector-ref srec 3) ":" (vector-ref srec 4))
+;; ;; (conc "no such server-id " host-port-in)))
+;; host-port-in))) ;; )
+;; (let* ((host-port (if host:port
+;; (let ((slst (string-split host:port ":")))
+;; (if (eq? (length slst) 2)
+;; (list (car slst)(string->number (cadr slst)))
+;; #f))
+;; #f)))
+;; ;; (toppath (launch:setup)))
+;; ;; (print "host-port=" host-port)
+;; (if (not host-port)
+;; (begin
+;; (if host-port-in
+;; (debug:print 0 *default-log-port* "ERROR: bad host:port"))
+;; (if do-exit (exit 1))
+;; #f)
+;; (let* ((iface (car host-port))
+;; (port (cadr host-port))
+;; (server-dat (http-transport:client-connect iface port))
+;; (login-res (rmt:login-no-auto-client-setup server-dat)))
+;; (if (and (list? login-res)
+;; (car login-res))
+;; (begin
+;; ;; (print "LOGIN_OK")
+;; (if do-exit (exit 0))
+;; #t)
+;; (begin
+;; ;; (print "LOGIN_FAILED")
+;; (if do-exit (exit 1))
+;; #f)))))))
+;;
+;; ;; run ping in separate process, safest way in some cases
+;; ;;
+;; (define (server:ping-server ifaceport)
+;; (with-input-from-pipe
+;; (conc (common:get-megatest-exe) " -ping " ifaceport)
+;; (lambda ()
+;; (let loop ((inl (read-line))
+;; (res "NOREPLY"))
+;; (if (eof-object? inl)
+;; (case (string->symbol res)
+;; ((NOREPLY) #f)
+;; ((LOGIN_OK) #t)
+;; (else #f))
+;; (loop (read-line) inl))))))
+;;
+;; ;; NOT USED (well, ok, reference in rpc-transport but otherwise not used).
+;; ;;
+;; (define (server:login toppath)
+;; (lambda (toppath)
+;; (set! *db-last-access* (current-seconds)) ;; might not be needed.
+;; (if (equal? *toppath* toppath)
+;; #t
+;; #f)))
+;;
+;; ;; timeout is hms string: 1h 5m 3s, default is 1 minute
+;; ;;
+;; (define (server:expiration-timeout)
+;; (let ((tmo (configf:lookup *configdat* "server" "timeout")))
+;; (if (and (string? tmo)
+;; (common:hms-string->seconds tmo)) ;; BUG: hms-string->seconds is broken, if given "10" returns 0. Also, it doesn't belong in this logic unless the string->number is changed below
+;; (* 3600 (string->number tmo))
+;; 60)))
+;;
+;; (define (server:get-best-guess-address hostname)
+;; (let ((res #f))
+;; (for-each
+;; (lambda (adr)
+;; (if (not (eq? (u8vector-ref adr 0) 127))
+;; (set! res adr)))
+;; ;; NOTE: This can fail when there is no mention of the host in /etc/hosts. FIXME
+;; (vector->list (hostinfo-addresses (hostname->hostinfo hostname))))
+;; (string-intersperse
+;; (map number->string
+;; (u8vector->list
+;; (if res res (hostname->ip hostname)))) ".")))
+;;
+;; ;; moving this here as it needs access to db and cannot be in common.
+;; ;;
+;; (define (server:writable-watchdog dbstruct)
+;; (thread-sleep! 0.05) ;; delay for startup
+;; (let ((legacy-sync (common:run-sync?))
+;; (sync-stale-seconds (configf:lookup-number *configdat* "server" "sync-stale-seconds" default: 300))
+;; (debug-mode (debug:debug-mode 1))
+;; (last-time (current-seconds))
+;; (no-sync-db (db:open-no-sync-db))
+;; (sync-duration 0) ;; run time of the sync in milliseconds
+;; ;;(this-wd-num (begin (mutex-lock! *wdnum*mutex) (let ((x *wdnum*)) (set! *wdnum* (add1 *wdnum*)) (mutex-unlock! *wdnum*mutex) x)))
+;; )
+;; (set! *no-sync-db* no-sync-db) ;; make the no sync db available to api calls
+;; (debug:print-info 2 *default-log-port* "Periodic sync thread started.")
+;; (debug:print-info 3 *default-log-port* "watchdog starting. legacy-sync is " legacy-sync" pid="(current-process-id) );; " this-wd-num="this-wd-num)
+;; (if (and legacy-sync (not *time-to-exit*))
+;; (let* (;;(dbstruct (db:setup))
+;; (mtdb (dbr:dbstruct-mtdb dbstruct))
+;; (mtpath (db:dbdat-get-path mtdb))
+;; (tmp-area (common:get-db-tmp-area))
+;; (start-file (conc tmp-area "/.start-sync"))
+;; (end-file (conc tmp-area "/.end-sync")))
+;; (debug:print-info 0 *default-log-port* "Server running, periodic sync started.")
+;; (let loop ()
+;; ;; sync for filesystem local db writes
+;; ;;
+;; (mutex-lock! *db-multi-sync-mutex*)
+;; (let* ((need-sync (>= *db-last-access* *db-last-sync*)) ;; no sync since last write
+;; (sync-in-progress *db-sync-in-progress*)
+;; (min-intersync-delay (configf:lookup-number *configdat* "server" "minimum-intersync-delay" default: 5))
+;; (should-sync (and (not *time-to-exit*)
+;; (> (- (current-seconds) *db-last-sync*) min-intersync-delay))) ;; sync every five seconds minimum, deprecated logic, can probably be removed
+;; (start-time (current-seconds))
+;; (cpu-load-adj (alist-ref 'adj-proc-load (common:get-normalized-cpu-load #f)))
+;; (mt-mod-time (file-modification-time mtpath))
+;; (last-sync-start (if (common:file-exists? start-file)
+;; (file-modification-time start-file)
+;; 0))
+;; (last-sync-end (if (common:file-exists? end-file)
+;; (file-modification-time end-file)
+;; 10))
+;; (sync-period (+ 3 (* cpu-load-adj 30))) ;; as adjusted load increases increase the sync period
+;; (recently-synced (and (< (- start-time mt-mod-time) sync-period) ;; not useful if sync didn't modify megatest.db!
+;; (< mt-mod-time last-sync-start)))
+;; (sync-done (<= last-sync-start last-sync-end))
+;; (sync-stale (> start-time (+ last-sync-start sync-stale-seconds)))
+;; (will-sync (and (not *time-to-exit*) ;; do not start a sync if we are in the process of exiting
+;; (or need-sync should-sync)
+;; (or sync-done sync-stale)
+;; (not sync-in-progress)
+;; (not recently-synced))))
+;; (debug:print-info 13 *default-log-port* "WD writable-watchdog top of loop. need-sync="need-sync" sync-in-progress=" sync-in-progress
+;; " should-sync="should-sync" start-time="start-time" mt-mod-time="mt-mod-time" recently-synced="recently-synced" will-sync="will-sync
+;; " sync-done=" sync-done " sync-period=" sync-period)
+;; (if (and (> sync-period 5)
+;; (common:low-noise-print 30 "sync-period"))
+;; (debug:print-info 0 *default-log-port* "Increased sync period due to long sync times, sync took: " sync-period " seconds."))
+;; ;; (if recently-synced (debug:print-info 0 *default-log-port* "Skipping sync due to recently-synced flag=" recently-synced))
+;; ;; (debug:print-info 0 *default-log-port* "need-sync: " need-sync " sync-in-progress: " sync-in-progress " should-sync: " should-sync " will-sync: " will-sync)
+;; (if will-sync (set! *db-sync-in-progress* #t))
+;; (mutex-unlock! *db-multi-sync-mutex*)
+;; (if will-sync
+;; (let (;; (max-sync-duration (configf:lookup-number *configdat* "server" "max-sync-duration")) ;; KEEPING THIS AVAILABLE BUT SHOULD NOT USE, I'M PRETTY SURE IT DOES NOT WORK!
+;; (sync-start (current-milliseconds)))
+;; (with-output-to-file start-file (lambda ()(print (current-process-id))))
+;;
+;; ;; put lock here
+;;
+;; ;; (if (or (not max-sync-duration)
+;; ;; (< sync-duration max-sync-duration)) ;; NOTE: db:sync-to-megatest.db keeps track of time of last sync and syncs incrementally
+;; (let ((res (db:sync-to-megatest.db dbstruct no-sync-db: no-sync-db))) ;; did we sync any data? If so need to set the db touched flag to keep the server alive
+;; (set! sync-duration (- (current-milliseconds) sync-start))
+;; (if (> res 0) ;; some records were transferred, keep the db alive
+;; (begin
+;; (mutex-lock! *heartbeat-mutex*)
+;; (set! *db-last-access* (current-seconds))
+;; (mutex-unlock! *heartbeat-mutex*)
+;; (debug:print-info 0 *default-log-port* "sync called, " res " records transferred."))
+;; (debug:print-info 2 *default-log-port* "sync called but zero records transferred")))))
+;; ;; ;; TODO: factor this next routine out into a function
+;; ;; (with-input-from-pipe ;; this should not block other threads but need to verify this
+;; ;; (conc "megatest -sync-to-megatest.db -m testsuite:" (common:get-area-name) ":" *toppath*)
+;; ;; (lambda ()
+;; ;; (let loop ((inl (read-line))
+;; ;; (res #f))
+;; ;; (if (eof-object? inl)
+;; ;; (begin
+;; ;; (set! sync-duration (- (current-milliseconds) sync-start))
+;; ;; (cond
+;; ;; ((not res)
+;; ;; (debug:print 0 *default-log-port* "ERROR: sync from /tmp db to megatest.db appears to have failed. Recommended that you stop your runs and run \"megatest -cleanup-db\""))
+;; ;; ((> res 0)
+;; ;; (mutex-lock! *heartbeat-mutex*)
+;; ;; (set! *db-last-access* (current-seconds))
+;; ;; (mutex-unlock! *heartbeat-mutex*))))
+;; ;; (let ((num-synced (let ((matches (string-match "^Synced (\\d+).*$" inl)))
+;; ;; (if matches
+;; ;; (string->number (cadr matches))
+;; ;; #f))))
+;; ;; (loop (read-line)
+;; ;; (or num-synced res))))))))))
+;; (if will-sync
+;; (begin
+;; (mutex-lock! *db-multi-sync-mutex*)
+;; (set! *db-sync-in-progress* #f)
+;; (set! *db-last-sync* start-time)
+;; (with-output-to-file end-file (lambda ()(print (current-process-id))))
+;;
+;; ;; release lock here
+;;
+;; (mutex-unlock! *db-multi-sync-mutex*)))
+;; (if (and debug-mode
+;; (> (- start-time last-time) 60))
+;; (begin
+;; (set! last-time start-time)
+;; (debug:print-info 4 *default-log-port* "timestamp -> " (seconds->time-string (current-seconds)) ", time since start -> " (seconds->hr-min-sec (- (current-seconds) *time-zero*))))))
+;;
+;; ;; keep going unless time to exit
+;; ;;
+;; (if (not *time-to-exit*)
+;; (let delay-loop ((count 0))
+;; ;;(debug:print-info 13 *default-log-port* "delay-loop top; count="count" pid="(current-process-id)" this-wd-num="this-wd-num" *time-to-exit*="*time-to-exit*)
+;;
+;; (if (and (not *time-to-exit*)
+;; (< count 6)) ;; was 11, changing to 4.
+;; (begin
+;; (thread-sleep! 1)
+;; (delay-loop (+ count 1))))
+;; (if (not *time-to-exit*) (loop))))
+;; ;; time to exit, close the no-sync db here
+;; (db:no-sync-close-db no-sync-db)
+;; (if (common:low-noise-print 30)
+;; (debug:print-info 0 *default-log-port* "Exiting watchdog timer, *time-to-exit* = " *time-to-exit*" pid="(current-process-id) ))))))) ;;" this-wd-num="this-wd-num)))))))
+;;