Index: Makefile
==================================================================
--- Makefile
+++ Makefile
@@ -28,16 +28,18 @@
ezsteps.scm lock-queue.scm rmt.scm api.scm \
subrun.scm portlogger.scm archive.scm env.scm \
diff-report.scm cgisetup/models/pgdb.scm
# module source files
-MSRCFILES = dbmod.scm dbfile.scm debugprint.scm mtargs.scm commonmod.scm
+MSRCFILES = dbmod.scm dbfile.scm debugprint.scm mtargs.scm commonmod.scm \
+ treemod.scm
-all : $(PREFIX)/bin/.$(ARCHSTR) mtest dboard mtut tcmt
+all : $(PREFIX)/bin/.$(ARCHSTR) mtest ndboard dboard mtut tcmt
# dbmod.import.o is just a hack here
-mofiles/dbfile.o : mofiles/debugprint.o dbmod.import.o
+mofiles/dbfile.o : mofiles/debugprint.o mofiles/commonmod.o dbmod.import.o
+db.o : dbmod.import.o
mofiles/debugprint.o : mofiles/mtargs.o
# ftail.scm rmtmod.scm commonmod.scm removed
# MSRCFILES = ducttape-lib.scm pkts.scm stml2.scm cookie.scm mutils.scm \
# mtargs.scm commonmod.scm dbmod.scm adjutant.scm ulex.scm \
@@ -86,18 +88,21 @@
# ARCHSTR=$(shell bash -c "echo \$$MACHTYPE")
PNGFILES = $(shell cd docs/manual;ls *png)
-mtest: $(OFILES) readline-fix.scm megatest.o $(MOFILES) $(MOIMPFILES) megatest-version.scm
+mtest: $(OFILES) readline-fix.scm $(MOFILES) $(MOIMPFILES) megatest.o megatest-version.scm
csc $(CSCOPTS) $(OFILES) $(MOFILES) $(MOIMPFILES) megatest.o -o mtest
showmtesthash:
@echo $(MTESTHASH)
dboard : $(OFILES) $(GOFILES) dashboard.scm $(MOFILES) $(MOIMPFILES) megatest-version.scm megatest-fossil-hash.scm
csc $(CSCOPTS) $(OFILES) dashboard.scm $(GOFILES) $(MOFILES) $(MOIMPFILES) -o dboard
+
+ndboard : $(OFILES) $(GOFILES) newdashboard.scm $(MOFILES) $(MOIMPFILES) megatest-version.scm megatest-fossil-hash.scm
+ csc $(CSCOPTS) $(OFILES) newdashboard.scm $(GOFILES) $(MOFILES) $(MOIMPFILES) -o ndboard
mtut: $(OFILES) $(MOFILES) megatest-fossil-hash.scm mtut.scm megatest-version.scm
csc $(CSCOPTS) $(OFILES) $(MOFILES) mtut.scm -o mtut
# include makefile.inc
@@ -132,11 +137,11 @@
ezsteps.o
# mofiles/rmtmod.o \
# mofiles/commonmod.o \
-tcmt : $(TCMTOBJS) tcmt.scm megatest-version.scm
+tcmt : $(TCMTOBJS) tcmt.scm megatest-version.scm $(MOFILES) $(MOIMPFILES)
csc $(CSCOPTS) $(TCMTOBJS) $(MOFILES) $(MOIMPFILES) tcmt.scm -o tcmt
# install documentation to $(PREFIX)/docs
# DOES NOT REBUILD DOCS
#
@@ -169,17 +174,20 @@
monitor.o dashboard.o archive.o megatest.o : db_records.scm megatest-fossil-hash.scm
tests.o runs.o dashboard.o dashboard-tests.o dashboard-context-menu.o dashboard-main.o : run_records.scm
db.o ezsteps.o keys.o launch.o megatest.o monitor.o runs-for-ref.o runs.o tests.o : key_records.scm
-db.o api.o : mofiles/dbmod.o mofiles/dbfile.o mofiles/debugprint.o mofiles/commonmod.o
+db.o api.o : mofiles/dbmod.o mofiles/dbfile.o mofiles/debugprint.o mofiles/commonmod.o dbmod.import.o
tests.o tasks.o dashboard-tasks.o : task_records.scm
runs.o : test_records.scm
-megatest.o : megatest-fossil-hash.scm megatest-version.scm
+mofiles-made : $(MOFILES)
+ make $(MOIMPFILES)
+
+megatest.o : mofiles-made megatest-fossil-hash.scm megatest-version.scm $(MOFILES) $(MOIMPFILES)
rmt.scm client.scm common.scm configf.scm dashboard-guimonitor.scm dashboard-tests.scm dashboard.scm db.scm dcommon.scm ezsteps.scm http-transport.scm index-tree.scm items.scm keys.scm launch.scm megatest.scm monitor.scm mt.scm newdashboard.scm runconfig.scm runs.scm server.scm tdb.scm tests.scm tree.scm : common_records.scm megatest-version.scm
common_records.scm : altdb.scm
@@ -223,121 +231,20 @@
@echo Installing to PREFIX=$(PREFIX)
$(INSTALL) mtest $(PREFIX)/bin/.$(ARCHSTR)/mtest
utils/mk_wrapper $(PREFIX) mtest $(PREFIX)/bin/megatest
chmod a+x $(PREFIX)/bin/megatest
-$(PREFIX)/bin/.$(ARCHSTR)/ndboard : ndboard
- $(INSTALL) ndboard $(PREFIX)/bin/.$(ARCHSTR)/ndboard
-
-$(PREFIX)/bin/newdashboard : $(PREFIX)/bin/.$(ARCHSTR)/ndboard utils/mk_wrapper
- utils/mk_wrapper $(PREFIX) ndboard $(PREFIX)/bin/newdashboard
- chmod a+x $(PREFIX)/bin/newdashboard
-
-# mtutil
-
-$(PREFIX)/bin/.$(ARCHSTR)/mtut : mtut
- $(INSTALL) mtut $(PREFIX)/bin/.$(ARCHSTR)/mtut
-
-install-mtut : mtut
- $(INSTALL) mtut $(PREFIX)/bin/mtut
-
-$(PREFIX)/bin/mtutil : $(PREFIX)/bin/.$(ARCHSTR)/mtut utils/mk_wrapper
- utils/mk_wrapper $(PREFIX) mtut $(PREFIX)/bin/mtutil
- chmod a+x $(PREFIX)/bin/mtutil
-
-# mtexec
-
-mtexec: $(OFILES) $(MOFILES) megatest-fossil-hash.scm mtexec.scm
- csc $(CSCOPTS) $(OFILES) $(MOFILES) mtexec.scm -o mtexec
-
-$(PREFIX)/bin/.$(ARCHSTR)/mtexec : mtexec
- $(INSTALL) mtexec $(PREFIX)/bin/.$(ARCHSTR)/mtexec
-
-$(PREFIX)/bin/mtexec : $(PREFIX)/bin/.$(ARCHSTR)/mtexec utils/mk_wrapper
- utils/mk_wrapper $(PREFIX) mtexec $(PREFIX)/bin/mtexec
- chmod a+x $(PREFIX)/bin/mtexec
-
-# tcmt
-
-$(PREFIX)/bin/.$(ARCHSTR)/tcmt : tcmt
- $(INSTALL) tcmt $(PREFIX)/bin/.$(ARCHSTR)/tcmt
-
-$(PREFIX)/bin/tcmt : $(PREFIX)/bin/.$(ARCHSTR)/tcmt utils/mk_wrapper
- utils/mk_wrapper $(PREFIX) tcmt $(PREFIX)/bin/tcmt
- chmod a+x $(PREFIX)/bin/tcmt
-
-$(PREFIX)/bin/mt_laststep : utils/mt_laststep
- $(INSTALL) $< $@
- chmod a+x $@
-
-$(PREFIX)/bin/mt_runstep : utils/mt_runstep
- $(INSTALL) $< $@
- chmod a+x $@
-
-$(PREFIX)/bin/serialize-env: serialize-env.scm
- csc serialize-env.scm
- $(INSTALL) serialize-env $@
-
-$(PREFIX)/bin/mt_ezstep : utils/mt_ezstep
- $(INSTALL) $< $@
- chmod a+x $@
-
-$(PREFIX)/bin/mt_xterm : utils/mt_xterm
- $(INSTALL) $< $@
- chmod a+x $@
-
-$(PREFIX)/bin/nbfake : utils/nbfake
- $(INSTALL) $< $@
- chmod a+x $@
-
-$(PREFIX)/bin/remrun : utils/remrun
- $(INSTALL) $< $@
- chmod a+x $@
-
-$(PREFIX)/bin/viewscreen : utils/viewscreen
- $(INSTALL) $< $@
- chmod a+x $@
-
-$(PREFIX)/bin/nbfind : utils/nbfind
- $(INSTALL) $< $@
- chmod a+x $@
-
-$(PREFIX)/bin/mtrunner : utils/mtrunner
- $(INSTALL) $< $@
- chmod a+x $@
-
-$(PREFIX)/bin/mt-old-to-new.sh : utils/mt-old-to-new.sh
- $(INSTALL) $< $@
- chmod a+x $@
-
-$(PREFIX)/bin/mt-new-to-old.sh : utils/mt-new-to-old.sh
- $(INSTALL) $< $@
- chmod a+x $@
-
-
-deploytarg/nbfake : utils/nbfake
- $(INSTALL) $< $@
- chmod a+x $@
-
-deploytarg/viewscreen : utils/viewscreen
- $(INSTALL) $< $@
- chmod a+x $@
-
-deploytarg/nbfind : utils/nbfind
- $(INSTALL) $< $@
- chmod a+x $@
-
-$(PREFIX)/bin/mtest-reaper: helpers/mtest-reaper.scm helpers/ducttape-lib.scm helpers/inteldate.scm helpers/mimetypes.scm
- make -C helpers $@ PREFIX=$(PREFIX) INSTALL=$(INSTALL) ARCHSTR=$(ARCHSTR)
-
-mtest-reaper: $(PREFIX)/bin/mtest-reaper
-
-# install dashboard as dboard so wrapper script can be called dashboard
$(PREFIX)/bin/.$(ARCHSTR)/dboard : dboard $(FILES) utils/mk_wrapper
utils/mk_wrapper $(PREFIX) dboard $(PREFIX)/bin/dashboard
chmod a+x $(PREFIX)/bin/dashboard
$(INSTALL) dboard $(PREFIX)/bin/.$(ARCHSTR)/dboard
+
+# install dashboard as dboard so wrapper script can be called dashboard
+$(PREFIX)/bin/.$(ARCHSTR)/ndboard : ndboard $(FILES) utils/mk_wrapper
+ utils/mk_wrapper $(PREFIX) ndboard $(PREFIX)/bin/newdashboard
+ chmod a+x $(PREFIX)/bin/newdashboard
+ $(INSTALL) ndboard $(PREFIX)/bin/.$(ARCHSTR)/ndboard
$(PREFIX)/bin/.$(ARCHSTR)/lib/libpangox-1.0.so : lib/libpangox-1.0.so
if [[ $(ARCHSTR) == 12.5 ]]; then \
mkdir -p $(PREFIX)/bin/.$(ARCHSTR)/lib; \
$(INSTALL) lib/libpangox-1.0.so $(PREFIX)/bin/.$(ARCHSTR)/lib/libpangox-1.0.so; \
@@ -354,11 +261,12 @@
mkdir -p $(PREFIX)/bin/.$(ARCHSTR)/lib; \
$(INSTALL) lib/libxcb-xlib.so.0 $(PREFIX)/bin/.$(ARCHSTR)/lib/libxcb-xlib.so.0; \
fi
install : $(PREFIX)/bin/.$(ARCHSTR) $(PREFIX)/bin/.$(ARCHSTR)/mtest $(PREFIX)/bin/megatest \
- $(PREFIX)/bin/.$(ARCHSTR)/dboard $(PREFIX)/bin/dashboard $(HELPERS) $(PREFIX)/bin/nbfake \
+ $(PREFIX)/bin/.$(ARCHSTR)/dboard $(PREFIX)/bin/.$(ARCHSTR)/ndboard $(PREFIX)/bin/dashboard \
+ $(HELPERS) $(PREFIX)/bin/nbfake \
$(PREFIX)/bin/.$(ARCHSTR)/mtexec $(PREFIX)/bin/mtexec $(PREFIX)/bin/serialize-env \
$(PREFIX)/bin/nbfind $(PREFIX)/bin/mtrunner $(PREFIX)/bin/viewscreen $(PREFIX)/bin/mt_xterm \
$(PREFIX)/bin/mt-old-to-new.sh $(PREFIX)/bin/mt-new-to-old.sh \
$(PREFIX)/share/docs/megatest_manual.html $(PREFIX)/bin/remrun \
$(PREFIX)/share/docs/megatest_manual.html $(PREFIX)/bin/remrun $(PREFIX)/bin/mtutil \
@@ -365,11 +273,10 @@
$(PREFIX)/bin/tcmt $(PREFIX)/share/db/mt-pg.sql \
$(PREFIX)/share/js/jquery-3.1.0.slim.min.js \
$(PREFIX)/bin/.$(ARCHSTR)/lib/libpangox-1.0.so \
$(PREFIX)/bin/.$(ARCHSTR)/lib/libpangox-1.0.so.0 \
$(PREFIX)/bin/.$(ARCHSTR)/lib/libxcb-xlib.so.0
-# $(PREFIX)/bin/.$(ARCHSTR)/ndboard
# $(PREFIX)/bin/newdashboard
$(PREFIX)/bin/.$(ARCHSTR) :
mkdir -p $(PREFIX)/bin/.$(ARCHSTR)
@@ -390,11 +297,11 @@
$(PREFIX)/megatest $(PREFIX)/dashboard mtest mtutil mtut \
tcmt readline-fix.scm serialize-env dboard *.o \
megatest-fossil-hash.* altdb.scm mofiles/*.o \
mofiles/*.o vg.o cookie.o dashboard-main.o \
ducttape-lib.o ftail.o mutils.o pkts.o rmtmod.o stml2.o \
- tcmt.o *.import.scm *.import.o
+ tcmt.o *.import.scm *.import.o ndboard
rm -f $(OFILES) $(GOFILES) $(MOFILES) $(TCMTOBJS) \
$(PREFIX)/megatest $(PREFIX)/dashboard mtest mtutil mtut \
tcmt ftail.import.scm readline-fix.scm serialize-env \
dboard dboard.o megatest.o dashboard.o \
megatest-fossil-hash.* altdb.scm mofiles/*.o vg.o
Index: api.scm
==================================================================
--- api.scm
+++ api.scm
@@ -429,8 +429,8 @@
;; (boolean? res))
;; res
;; (list "ERROR, not string, list, number or boolean" 1 cmd params res)))))
(db:obj->string res transport: 'http)))
(begin
- (debug:print 0 *default-log-port* "Server refused to process request. Sever id mismatch. recived " key " expected: " *server-id* ".\nOther arguments recived: cmd=" cmd " params = " params)
+ (debug:print 0 *default-log-port* "Server refused to process request. Server id mismatch. recived " key " expected: " *server-id* ".\nOther arguments recived: cmd=" cmd " params = " params)
(db:obj->string (conc "Server refused to process request server-id mismatch: " key ", " *server-id*) transport: 'http)))))
Index: archive.scm
==================================================================
--- archive.scm
+++ archive.scm
@@ -346,11 +346,11 @@
(rsync-exe (or (configf:lookup *configdat* "archive" "rsync") "rsync"))
(print-prefix "Running: ")
(archive-info (archive:allocate-new-archive-block blockid-cache *toppath* tsname min-space target-patt run-patt "megatest-db"))
(archive-dir (if archive-info (cdr archive-info) #f))
(archive-id (if archive-info (car archive-info) -1))
- (home-host (common:get-homehost))
+ (home-host (server:choose-server *toppath* 'homehost))
(archive-time (seconds->std-time-str (current-seconds)))
(archive-staging-db (conc *toppath* "/.db-snapshot/archive_" archive-time))
(tmp-db-path (conc (common:get-db-tmp-area) "/megatest.db"))
(dbfile (conc archive-staging-db "/megatest.db")))
(create-directory archive-staging-db #t)
Index: client.scm
==================================================================
--- client.scm
+++ client.scm
@@ -44,24 +44,12 @@
#;(define (client:logout serverdat)
(let ((ok (and (socket? serverdat)
(cdb:logout serverdat *toppath* (client:get-signature)))))
ok))
-#;(define (client:connect iface port)
- (http-transport:client-connect iface port)
- #;(case (server:get-transport)
- ((rpc) (rpc:client-connect iface port))
- ((http) (http:client-connect iface port))
- ((zmq) (zmq:client-connect iface port))
- (else (rpc:client-connect iface port))))
-
(define (client:setup areapath #!key (remaining-tries 100) (failed-connects 0))
- (client:setup-http areapath remaining-tries: remaining-tries failed-connects: failed-connects)
- #;(case (server:get-transport)
- ((rpc) (rpc-transport:client-setup remaining-tries: remaining-tries failed-connects: failed-connects)) ;;(client:setup-rpc run-id))
- ((http)(client:setup-http areapath remaining-tries: remaining-tries failed-connects: failed-connects))
- (else (rpc-transport:client-setup remaining-tries: remaining-tries failed-connects: failed-connects)))) ;; (client:setup-rpc run-id))))
+ (client:setup-http areapath remaining-tries: remaining-tries failed-connects: failed-connects))
;; Do all the connection work, look up the transport type and set up the
;; connection if required.
;;
;; There are two scenarios.
@@ -89,53 +77,52 @@
(exit 1))
;;
;; Alternatively here, we can get the list of candidate servers and work our way
;; through them searching for a good one.
;;
- (let* ((server-dat (server:get-rand-best areapath)) ;; (server:get-first-best areapath))
+ (let* ((server-dat (server:choose-server areapath 'best))
(runremote (or area-dat *runremote*)))
(if (not server-dat) ;; no server found
(client:setup-http-baby areapath remaining-tries: (- remaining-tries 1))
- (let ((host (cadr server-dat))
- (port (caddr server-dat))
- (server-id (caddr (cddr server-dat))))
- (debug:print-info 4 *default-log-port* "client:setup server-dat=" server-dat ", remaining-tries=" remaining-tries)
- (if (and (not area-dat)
- (not *runremote*))
- (begin
- (set! *runremote* (make-remote))
- (let* ((server-info (remote-server-info *runremote*)))
- (if server-info
- (begin
- (remote-server-url-set! *runremote* (server:record->url server-info))
- (remote-server-id-set! *runremote* (server:record->id server-info)))))))
- (if (and host port server-id)
- (let* ((start-res (case *transport-type*
- ((http)(http-transport:client-connect host port server-id))))
- (ping-res (case *transport-type*
- ((http)(rmt:login-no-auto-client-setup start-res)))))
- (if (and start-res
- ping-res)
- (let ((runremote (or area-dat *runremote*))) ;; it might have been generated only a few statements ago
- (if runremote
- (begin
- (remote-conndat-set! runremote start-res) ;; (hash-table-set! runremote run-id start-res)
- (debug:print-info 2 *default-log-port* "connected to " (http-transport:server-dat-make-url start-res))
- start-res)
- (client:setup-http-baby areapath remaining-tries: (- remaining-tries 1))))
- (begin ;; login failed but have a server record, clean out the record and try again
- (debug:print-info 0 *default-log-port* "client:setup, login unsuccessful, will attempt to start server ... start-res=" start-res ", server-dat=" server-dat) ;; had runid. Fixes part of Randy;s ticket 1405717332
- (case *transport-type*
- ((http)(http-transport:close-connections)))
- (if *runremote*
- (remote-conndat-set! runremote #f) ;; (hash-table-delete! runremote run-id)
- )
- (thread-sleep! 1)
- (client:setup-http-baby areapath remaining-tries: (- remaining-tries 1))
- )))
- (begin ;; no server registered
- ;; (server:kind-run areapath)
- (server:start-and-wait areapath)
- (debug:print-info 0 *default-log-port* "client:setup, no server registered, remaining-tries=" remaining-tries)
- (thread-sleep! 1) ;; (+ 5 (random (- 20 remaining-tries)))) ;; give server a little time to start up, randomize a little to avoid start storms.
- (client:setup-http-baby areapath remaining-tries: (- remaining-tries 1)))))))))
+ (match server-dat
+ ((host port start-time server-id pid)
+ (debug:print-info 4 *default-log-port* "client:setup server-dat=" server-dat ", remaining-tries=" remaining-tries)
+ (if (and (not area-dat)
+ (not *runremote*))
+ (begin
+ (set! *runremote* (make-remote))
+ (let* ((server-info (remote-server-info *runremote*)))
+ (if server-info
+ (begin
+ (remote-server-url-set! *runremote* (server:record->url server-info))
+ (remote-server-id-set! *runremote* (server:record->id server-info)))))))
+ (if (and host port server-id)
+ (let* ((start-res (http-transport:client-connect host port server-id))
+ (ping-res (rmt:login-no-auto-client-setup start-res)))
+ (if (and start-res
+ ping-res)
+ (let ((runremote (or area-dat *runremote*))) ;; it might have been generated only a few statements ago
+ (if runremote
+ (begin
+ (remote-conndat-set! runremote start-res) ;; (hash-table-set! runremote run-id start-res)
+ (debug:print-info 2 *default-log-port* "connected to " (http-transport:server-dat-make-url start-res))
+ start-res)
+ (client:setup-http-baby areapath remaining-tries: (- remaining-tries 1))))
+ (begin ;; login failed but have a server record, clean out the record and try again
+ (debug:print-info 0 *default-log-port* "client:setup, login unsuccessful, will attempt to start server ... start-res=" start-res ", server-dat=" server-dat) ;; had runid. Fixes part of Randy;s ticket 1405717332
+ (case *transport-type*
+ ((http)(http-transport:close-connections)))
+ (if *runremote*
+ (remote-conndat-set! runremote #f) ;; (hash-table-delete! runremote run-id)
+ )
+ (thread-sleep! 1)
+ (client:setup-http-baby areapath remaining-tries: (- remaining-tries 1))
+ )))
+ (begin ;; no server registered
+ ;; (server:kind-run areapath)
+ (server:start-and-wait areapath)
+ (debug:print-info 0 *default-log-port* "client:setup, no server registered, remaining-tries=" remaining-tries)
+ (thread-sleep! 1) ;; (+ 5 (random (- 20 remaining-tries)))) ;; give server a little time to start up, randomize a little to avoid start storms.
+ (client:setup-http-baby areapath remaining-tries: (- remaining-tries 1)))))
+ (else
+ (debug:print 0 *default-log-port* "ERROR: malformed server-dat="server-dat)))))))
Index: common.scm
==================================================================
--- common.scm
+++ common.scm
@@ -142,10 +142,11 @@
(define *passnum* 0) ;; when running track calls to run-tests or similar
;; (define *alt-log-file* #f) ;; used by -log
;; (define *common:denoise* (make-hash-table)) ;; for low noise printing
(define *default-log-port* (current-error-port))
(define *time-zero* (current-seconds)) ;; for the watchdog
+(define *on-exit-procs* '()) ;; add procs to this list to be executed on exit
(define *default-area-tag* "local")
;; DATABASE
;; (define *dbstruct-dbs* #f) ;; used to cache the dbstruct in db:setup. Goal is to remove this.
;; db stats
@@ -315,11 +316,14 @@
(define (common:logpro-exit-code->test-status exit-code)
(status-sym->string (common:logpro-exit-code->status-sym exit-code)))
(defstruct remote
- (hh-dat (common:get-homehost)) ;; homehost record ( addr . hhflag )
+ (hh-dat (let ((res (or (server:choose-server *toppath* 'homehost)
+ (cons #f #f))))
+ (assert (pair? res)(conc "FATAL: hh-dat should be a pair, got "res))
+ res))
(server-url #f) ;; (server:check-if-running *toppath*) #f))
(server-id #f)
(server-info (if *toppath* (server:check-if-running *toppath*) #f))
(last-server-check 0) ;; last time we checked to see if the server was alive
(connect-time (current-seconds))
@@ -1305,72 +1309,10 @@
;;======================================================================
;; logic for getting homehost. Returns (host . at-home)
;; IF *toppath* is not set, wait up to five seconds trying every two seconds
;; (this is to accomodate the watchdog)
;;
-(define (common:get-homehost #!key (trynum 5))
- ;; called often especially at start up. use mutex to eliminate collisions
- (mutex-lock! *homehost-mutex*)
- (cond
- (*home-host*
- (mutex-unlock! *homehost-mutex*)
- *home-host*)
- ((not *toppath*)
- (mutex-unlock! *homehost-mutex*)
- (launch:setup) ;; safely mutexed now
- (if (> trynum 0)
- (begin
- (thread-sleep! 2)
- (common:get-homehost trynum: (- trynum 1)))
- #f))
- (else
- (let* ((currhost (get-host-name))
- (bestadrs (server:get-best-guess-address currhost))
- ;; first look in config, then look in file .homehost, create it if not found
- (homehost (or (configf:lookup *configdat* "server" "homehost" )
- (handle-exceptions
- exn
- (if (> trynum 0)
- (let ((delay-time (* (- 5 trynum) 5)))
- (mutex-unlock! *homehost-mutex*)
- (debug:print 0 *default-log-port* "ERROR: ["(common:human-time)"] Failed to read .homehost file, delaying "
- delay-time " seconds and trying again, message: " ((condition-property-accessor 'exn 'message) exn)
- ", exn=" exn)
- (thread-sleep! delay-time)
- (common:get-homehost trynum: (- trynum 1)))
- (begin
- (mutex-unlock! *homehost-mutex*)
- (debug:print 0 *default-log-port* "ERROR: ["(common:human-time)
- "] Failed to read .homehost file after trying five times. Giving up and exiting, message: "
- ((condition-property-accessor 'exn 'message) exn))
- (exit 1)))
- (let ((hhf (conc *toppath* "/.homehost")))
- (if (common:file-exists? hhf)
- (with-input-from-file hhf read-line)
- (if (file-write-access? *toppath*)
- (begin
- (with-output-to-file hhf
- (lambda ()
- (print bestadrs)))
- (begin
- (mutex-unlock! *homehost-mutex*)
- (car (common:get-homehost))))
- #f))))))
- (at-home (or (equal? homehost currhost)
- (equal? homehost bestadrs))))
- (set! *home-host* (cons homehost at-home))
- (mutex-unlock! *homehost-mutex*)
- *home-host*))))
-
-;;======================================================================
-;; am I on the homehost?
-;;
-(define (common:on-homehost?)
- (let ((hh (common:get-homehost)))
- (if hh
- (cdr hh)
- #f)))
;;======================================================================
;; do we honor the caches of the config files?
;;
(define (common:use-cache?)
@@ -2049,11 +1991,11 @@
(if (null? tal) #f (loop (car tal)(cdr tal) best-host)))))))))
(define (common:wait-for-homehost-load maxnormload msg)
(let* ((hh-dat (if (common:on-homehost?) ;; if we are on the homehost then pass in #f so the calls are local.
#f
- (common:get-homehost)))
+ (server:choose-server *toppath* 'homehost)))
(hh (if hh-dat (car hh-dat) #f)))
(common:wait-for-normalized-load maxnormload msg hh)))
(define (common:get-num-cpus remote-host)
(let* ((actual-host (or remote-host (get-host-name))))
@@ -3344,11 +3286,11 @@
pktsdirs))
;;======================================================================
;; use-lt is use linktree "lt" link to find pkts dir
(define (common:save-pkt pktalist-in mtconf use-lt #!key (add-only #f)) ;; add-only saves the pkt only if there is a parent already
- (if (or add-only
+ (if (or (not add-only)
(hash-table-exists? *pkts-info* 'last-parent))
(let* ((parent (hash-table-ref/default *pkts-info* 'last-parent #f))
(pktalist (if parent
(cons `(parent . ,parent)
pktalist-in)
@@ -3359,10 +3301,11 @@
(let ((pktsdir (or (hash-table-ref/default *pkts-info* 'pkts-dir #f)
(let* ((pktsdirs (common:get-pkts-dirs mtconf use-lt))
(pktsdir (car pktsdirs))) ;; assume it is there
(hash-table-set! *pkts-info* 'pkts-dir pktsdir)
pktsdir))))
+ (debug:print 0 *default-log-port* "pktsdir: "pktsdir)
(handle-exceptions
exn
(debug:print-info 0 "failed to write out packet to " pktsdir ", exn=" exn) ;; don't care if this failed for now but MUST FIX - BUG!!
(if (not (file-exists? pktsdir))
(create-directory pktsdir #t))
Index: dashboard.scm
==================================================================
--- dashboard.scm
+++ dashboard.scm
@@ -16,10 +16,28 @@
;; You should have received a copy of the GNU General Public License
;; along with Megatest. If not, see .
;;
;;======================================================================
+(declare (uses common))
+(declare (uses margs))
+(declare (uses keys))
+(declare (uses items))
+(declare (uses db))
+(declare (uses configf))
+(declare (uses process))
+(declare (uses launch))
+(declare (uses runs))
+(declare (uses dashboard-tests))
+(declare (uses treemod))
+(declare (uses dcommon))
+(declare (uses dashboard-context-menu))
+(declare (uses vg))
+(declare (uses subrun))
+(declare (uses mt))
+(declare (uses dbfile))
+
(use format)
(require-library iup)
(import (prefix iup iup:))
@@ -27,28 +45,11 @@
(import canvas-draw-iup)
(use ducttape-lib)
(use sqlite3 srfi-1 posix regex regex-case srfi-69 typed-records sparse-vectors) ;; defstruct
(import (prefix sqlite3 sqlite3:))
(import dbfile)
-
-(declare (uses common))
-(declare (uses margs))
-(declare (uses keys))
-(declare (uses items))
-(declare (uses db))
-(declare (uses configf))
-(declare (uses process))
-(declare (uses launch))
-(declare (uses runs))
-(declare (uses dashboard-tests))
-(declare (uses tree))
-(declare (uses dcommon))
-(declare (uses dashboard-context-menu))
-(declare (uses vg))
-(declare (uses subrun))
-(declare (uses mt))
-(declare (uses dbfile))
+(import treemod)
(include "common_records.scm")
(include "db_records.scm")
(include "run_records.scm")
(include "task_records.scm")
@@ -637,11 +638,11 @@
;;
(define (dboard:get-tests-for-run-duplicate tabdat run-id run testnamepatt key-vals)
(let* ((start-time (current-seconds))
(access-mode (dboard:tabdat-access-mode tabdat))
(num-to-get (string->number (or (configf:lookup *configdat* "setup" "num-tests-to-get")
- "200")))
+ "400")))
(states (hash-table-keys (dboard:tabdat-state-ignore-hash tabdat)))
(statuses (hash-table-keys (dboard:tabdat-status-ignore-hash tabdat)))
(do-not-use-db-file-timestamps #t) ;; (configf:lookup *configdat* "setup" "do-not-use-db-file-timestamps")) ;; this still hosts runs-summary-tab
(do-not-use-query-timestamps #t) ;; (configf:lookup *configdat* "setup" "do-not-use-query-timestamps")) ;; this no longer troubles runs-summary-tab
(sort-info (get-curr-sort))
@@ -3809,13 +3810,13 @@
(debug:print 0 *default-log-port* "Failed to find megatest.config, exiting")
(exit 1)
)
)
- (if (not (common:on-homehost?))
+ #;(if (not (common:on-homehost?))
(begin
- (debug:print 0 *default-log-port* "WARNING: You are starting the dashboard on a machine that is not the homehost:" (common:get-homehost))
+ (debug:print 0 *default-log-port* "WARNING: You are starting the dashboard on a machine that is not the homehost:" (server:get-homehost))
(debug:print 0 *default-log-port* "It will be slower.")
))
(if (and (common:file-exists? mtdb-path)
Index: db.scm
==================================================================
--- db.scm
+++ db.scm
@@ -477,11 +477,11 @@
(data-synced 0) ;; count of changed records
(tmp-area (common:get-db-tmp-area))
(old2new (member 'old2new options))
(dejunk (member 'dejunk options))
(killservers (member 'killservers options))
- (servers (server:get-list *toppath*))
+ (servers (server:choose-server *toppath* 'all-valid)) ;; (server:get-list *toppath*))
(src-area (if old2new *toppath* tmp-area))
(dest-area (if old2new tmp-area *toppath*))
(dbfiles (if old2new (glob (conc *toppath* "/.megatest/*.db")) (glob (conc tmp-area "/.megatest/*.db"))))
(keys (db:get-keys dbstruct))
(sync-durations (make-hash-table)))
@@ -4586,31 +4586,41 @@
))
(define (std-exit-procedure)
;;(common:telemetry-log-close)
- (on-exit (lambda () 0))
+ (on-exit (lambda () 0)) ;; why is this here?
;;(debug:print-info 13 *default-log-port* "std-exit-procedure called; *time-to-exit*="*time-to-exit*)
(let ((no-hurry (if *time-to-exit* ;; hurry up
#f
(begin
(set! *time-to-exit* #t)
#t))))
(debug:print-info 4 *default-log-port* "starting exit process, finalizing databases.")
- (if (and no-hurry (debug:debug-mode 18))
+ (if (and no-hurry
+ (debug:debug-mode 18))
(rmt:print-db-stats))
(let ((th1 (make-thread (lambda () ;; thread for cleaning up, give it five seconds
(if *dbstruct-dbs* (db:close-all *dbstruct-dbs*)) ;; one second allocated
+ (if (list? *on-exit-procs*)
+ (for-each
+ (lambda (proc)
+ (proc))
+ *on-exit-procs*))
(if *task-db*
(let ((db (cdr *task-db*)))
(if (sqlite3:database? db)
(begin
(sqlite3:interrupt! db)
(sqlite3:finalize! db #t)
;; (vector-set! *task-db* 0 #f)
(set! *task-db* #f)))))
- (http-client#close-all-connections!)
+ (if (and (not (args:get-arg "-server"))
+ *runremote*)
+ (begin
+ (debug:print-info 0 *default-log-port* "Closing all client connections...")
+ (http-client#close-all-connections!)))
;; (if (and *runremote*
;; (remote-conndat *runremote*))
;; (begin
;; (http-client#close-all-connections!))) ;; for http-client
(if (not (eq? *default-log-port* (current-error-port)))
Index: dbfile.scm
==================================================================
--- dbfile.scm
+++ dbfile.scm
@@ -1015,11 +1015,11 @@
(dbfile:print-err "INFO: "jfile" exists, delaying to reduce database load")
(thread-sleep! 0.2)))
(if (and use-mutex
(common:low-noise-print 120 "over-50-parallel-api-requests"))
(dbfile:print-err *api-process-request-count* " parallel api requests being processed in process "
- (current-process-id) ", throttling access"))
+ (current-process-id))) ;; ", throttling access"))
(condition-case
(begin
(if use-mutex (mutex-lock! *db-with-db-mutex*))
(let ((res (apply proc dbdat db params))) ;; the actual call is here.
(if use-mutex (mutex-unlock! *db-with-db-mutex*))
Index: dcommon.scm
==================================================================
--- dcommon.scm
+++ dcommon.scm
@@ -704,11 +704,11 @@
#:numlin-visible 5
))
(colnames (list "Id" "MTver" "Pid" "Host" "Interface:OutPort" "RunTime" "State" "RunId"))
(updater (lambda ()
(if (dashboard:monitor-changed? commondat tabdat)
- (let ((servers (server:get-list *toppath* limit: 10)))
+ (let ((servers (server:choose-server *toppath* 'all-valid))) ;; (server:get-list *toppath* limit: 10)))
(iup:attribute-set! servers-matrix "NUMLIN" (length servers))
;; (set! colnum 0)
;; (for-each (lambda (colname)
;; ;; (print "colnum: " colnum " colname: " colname)
;; (iup:attribute-set! servers-matrix (conc "0:" colnum) colname)
@@ -716,26 +716,27 @@
;; colnames)
(set! rownum 1)
(for-each
(lambda (server)
(set! colnum 0)
- (match-let (((mod-time host port start-time server-id pid)
+ ;; (("192.168.0.127" 60215 1669088591.0 "c85484f764df7a8550b0224409bd4bcd")
+ (match-let (((host port start-time server-id pid)
server))
- (let* ((uptime (- (current-seconds) mod-time))
+ (let* (;; (uptime (- (current-seconds) mod-time))
(runtime (if start-time
- (- mod-time start-time)
+ (- (current-seconds) start-time)
0))
(vals (list "-" ;; (vector-ref server 0) ;; Id
"-" ;; (vector-ref server 9) ;; MT-Ver
pid ;; (vector-ref server 1) ;; Pid
host ;; (vector-ref server 2) ;; Hostname
(conc host ":" port) ;; (conc (vector-ref server 3) ":" (vector-ref server 4)) ;; IP:Port
(seconds->hr-min-sec runtime) ;; (- (current-seconds) start-time)) ;; (vector-ref server 6)))
- (cond
- ((< uptime 5) "alive")
- ((< uptime 16) "probably alive");; less than 15 seconds since mod, call it alive (vector-ref server 8) ;; State
- (else "dead"))
+ "-" #;(cond
+ ((< uptime 5) "alive")
+ ((< uptime 16) "probably alive");; less than 15 seconds since mod, call it alive (vector-ref server 8) ;; State
+ (else "dead"))
"-" ;; (vector-ref server 12) ;; RunId
)))
(for-each (lambda (val)
(let* ((row-col (conc rownum ":" colnum))
(curr-val (iup:attribute servers-matrix row-col)))
@@ -745,11 +746,11 @@
(iup:attribute-set! servers-matrix "FITTOTEXT" (conc "C" colnum))))
(set! colnum (+ 1 colnum))))
vals)
(set! rownum (+ rownum 1)))
(iup:attribute-set! servers-matrix "REDRAW" "ALL")))
- (sort servers (lambda (a b)(> (car a)(car b))))))))))
+ (sort servers (lambda (a b)(> (caddr a)(caddr b))))))))))
(set! colnum 0)
(for-each (lambda (colname)
(iup:attribute-set! servers-matrix (conc "0:" colnum) colname)
(iup:attribute-set! servers-matrix "FITTOTEXT" (conc "C" colnum))
(set! colnum (+ colnum 1)))
Index: http-transport.scm
==================================================================
--- http-transport.scm
+++ http-transport.scm
@@ -285,10 +285,11 @@
(begin
(debug:print 0 *default-log-port* "WARNING: failure in with-input-from-request to " fullurl ".")
(debug:print 0 *default-log-port* " message: " msg ", exn=" exn)
(debug:print 0 *default-log-port* " cmd: " cmd " params: " params " key:" (or server-id "thekey"))
(debug:print 0 *default-log-port* " call-chain: " call-chain)))
+ ;; what if another thread is communicating ok? Can't happen due to mutex
(set! *runremote* #f)
(set! runremote #f)
;; (if runremote
;; (remote-conndat-set! runremote #f))
;; Killing associated server to allow clean retry.")
@@ -391,11 +392,12 @@
(debug:print-error 0 *default-log-port* "call to http-transport:server-dat-update-last-access with non-vector!!"))))
;;
;; connect
;;
-(define (http-transport:client-connect iface port server-id)
+(define (http-transport:client-connect iface port server-id)
+ (debug:print-info 0 *default-log-port* "Connecting to client at "iface":"port", with server-id "server-id)
(let* ((api-url (conc "http://" iface ":" port "/api"))
(api-uri (uri-reference (conc "http://" iface ":" port "/api")))
(api-req (make-request method: 'POST uri: api-uri))
(server-dat (vector iface port api-uri api-url api-req (current-seconds) server-id)))
server-dat))
@@ -409,11 +411,12 @@
(define (http-transport:keep-running)
;; if none running or if > 20 seconds since
;; server last used then start shutdown
;; This thread waits for the server to come alive
(debug:print-info 0 *default-log-port* "Starting the sync-back, keep alive thread in server")
- (let* ((sdat #f)
+ (let* ((servinfofile #f)
+ (sdat #f)
(no-sync-db (db:open-no-sync-db))
(tmp-area (common:get-db-tmp-area))
(started-file (conc tmp-area "/.server-started"))
(server-start-time (current-seconds))
(server-info (let loop ((start-time (current-seconds))
@@ -426,13 +429,30 @@
(set! sdat *server-info*)
(mutex-unlock! *heartbeat-mutex*)
(if (and sdat
(not changed)
(> (- (current-seconds) start-time) 2))
- (begin
+ (let* ((servinfodir (conc *toppath*"/.servinfo"))
+ (ipaddr (car sdat))
+ (port (cadr sdat))
+ (servinf (conc servinfodir"/"ipaddr":"port)))
+ (set! servinfofile servinf)
+ (if (not (file-exists? servinfodir))
+ (create-directory servinfodir #t))
+ (with-output-to-file servinf
+ (lambda ()
+ (let* ((serv-id (server:mk-signature)))
+ (set! *server-id* serv-id)
+ (print "SERVER STARTED: "ipaddr":"port" AT "(current-seconds)" server-id: "serv-id" pid: "(current-process-id))
+ (print "started: "(seconds->year-week/day-time (current-seconds))))))
+ (set! *on-exit-procs* (cons
+ (lambda ()
+ (delete-file* servinf))
+ *on-exit-procs*))
+ ;; put data about this server into a simple flat file host.port
(debug:print-info 0 *default-log-port* "Received server alive signature")
- (common:save-pkt `((action . alive)
+ #;(common:save-pkt `((action . alive)
(T . server)
(pid . ,(current-process-id))
(ipaddr . ,(car sdat))
(port . ,(cadr sdat)))
*configdat* #t)
@@ -439,13 +459,16 @@
sdat)
(begin
(debug:print-info 0 *default-log-port* "Still waiting, last-sdat=" last-sdat)
(sleep 4)
(if (> (- (current-seconds) start-time) 120) ;; been waiting for two minutes
- (begin
+ (let* ((ipaddr (car sdat))
+ (port (cadr sdat))
+ (servinf (conc *toppath*"/.servinfo/"ipaddr":"port)))
(debug:print-error 0 *default-log-port* "transport appears to have died, exiting server")
- (common:save-pkt `((action . died)
+ ;; (delete-file* servinf) ;; handled by on-exit, can be removed
+ #;(common:save-pkt `((action . died)
(T . server)
(pid . ,(current-process-id))
(ipaddr . ,(car sdat))
(port . ,(cadr sdat))
(msg . "Transport died?"))
@@ -475,29 +498,17 @@
(if (not server-going) ;; *dbstruct-dbs*
(begin
(debug:print 0 *default-log-port* "SERVER: dbprep")
(set! *dbstruct-dbs* (db:setup #t)) ;; run-id)) FIXME!!!
(set! server-going #t)
- (debug:print 0 *default-log-port* "SERVER: running, megatest version: " (common:get-full-version)) ;; NOTE: the server is NOT yet marked as running in the log. We do that in the keep-running routine.
-
- ;; (thread-start! *watchdog*)
- )
+ (debug:print 0 *default-log-port* "SERVER: running, megatest version: " (common:get-full-version))) ;; NOTE: the server is NOT yet marked as running in the log. We do that in the keep-running routine.
(if (and no-sync-db
- (common:low-noise-print 5 "sync-all")) ;; cheesy way to reduce frequency of running sync :)
+ (common:low-noise-print 10 "sync-all")) ;; cheesy way to reduce frequency of running sync :)
(begin
- (debug:print 0 *default-log-port* "keep-running calling db:all-db-sync at " (time->string (seconds->local-time) "%H:%M:%S"))
-
- ;; This is tougher than it seems - have to deal with multiple dbs
- ;; (db:process-transaction-queue *dbstruct-dbs*)
-
- (db:all-db-sync *dbstruct-dbs*)
-
- ;; (db:do-sync no-sync-db)
- ;; (db:run-lock-and-sync *no-sync-db*)
- )
- )
- )
+ (if (common:low-noise-print 120 "sync-all-print")
+ (debug:print 0 *default-log-port* "keep-running calling db:all-db-sync at " (time->string (seconds->local-time) "%H:%M:%S")))
+ (db:all-db-sync *dbstruct-dbs*))))
;; when things go wrong we don't want to be doing the various queries too often
;; so we strive to run this stuff only every four seconds or so.
(let* ((sync-time (- (current-milliseconds) start-time))
(rem-time (quotient (- 4000 sync-time) 1000)))
@@ -518,11 +529,11 @@
(new-port (cadr sdat)))
(debug:print-info 0 *default-log-port* "WARNING: interface changed, refreshing iface and port info")
(set! iface new-iface)
(set! port new-port)
(if (not *server-id*)
- (set! *server-id* (server:mk-signature)))
+ (set! *server-id* (server:mk-signature)))
(debug:print 0 *default-log-port* "SERVER STARTED: " iface ":" port " AT " (current-seconds) " server-id: " *server-id*)
(flush-output *default-log-port*)))
;; Transfer *db-last-access* to last-access to use in checking that we are still alive
(mutex-lock! *heartbeat-mutex*)
@@ -540,37 +551,32 @@
(begin
(debug:print 0 *default-log-port* "Server stats:")
(db:print-current-query-stats)))
(let* ((hrs-since-start (/ (- (current-seconds) server-start-time) 3600)))
(cond
- #;((and *server-run*
- (> (- (current-seconds) server-start-time) 420)) ;; let's try server replacement
- ;; ((adj-proc-load . 0.056875) (adj-core-load . 0.11375) (1m-load . 0.91) (5m-load . 0.77) (15m-load . 1.0) (proc . 16) (core . 8) (phys . 1))
- (let* ((loaddat (common:get-normalized-cpu-load #f))
- (adj-proc-load (alist-ref 'adj-proc-load loaddat))
- (adj-core-load (alist-ref 'adj-core-load loaddat))
- (adj-load (max adj-proc-load adj-core-load)))
- (if (< adj-load 2) ;; reduce chance of runaway
- (server:run *toppath*))
- (db:all-db-sync *dbstruct-dbs*)
- (thread-sleep! 30)
- (http-transport:server-shutdown port)))
((and *server-run*
(> (+ last-access server-timeout)
(current-seconds)))
(if (common:low-noise-print 120 "server continuing")
(debug:print-info 0 *default-log-port* "Server continuing, seconds since last db access: " (- (current-seconds) last-access))
(let ((curr-time (current-seconds)))
(handle-exceptions
exn
- (debug:print 0 *default-log-port* "ERROR: Failed to change timestamp on log file " server-log-file ". Are you out of space on that disk? exn=" exn)
+ (debug:print 0 *default-log-port* "ERROR: Failed to change timestamp on info file " servinfofile ". Are you out of space on that disk? exn=" exn)
(if (and (< (- (current-seconds) server-start-time) 600) ;; run for ten minutes for experiment, 3600 thereafter
- (not *server-overloaded*))
- (change-file-times server-log-file curr-time curr-time)
- (if (common:low-noise-print 120 "start new server")
- (server:kind-run *toppath*) ;; server:kind-run uses [servers] numservers
- )))))
+ (not *server-overloaded*)
+ (file-exists? servinfofile))
+ (change-file-times servinfofile curr-time curr-time)))
+ (if (or (common:low-noise-print 120 "start new server")
+ (> *api-process-request-count* 50)) ;; if this server is kind of busy start up another
+ (begin
+ (debug:print-info 0 *default-log-port* "Server is busy, parallel-api-count "*api-process-request-count*", start another if possible...")
+ (server:kind-run *toppath*)
+ (if (> *api-process-request-count* 100)
+ (begin
+ (debug:print-info 0 *default-log-port* "Server is overloaded at parallel-api-count="*api-process-request-count*", removing "servinfofile)
+ (delete-file* servinfofile)))))))
(loop 0 server-state bad-sync-count (current-milliseconds)))
(else
(debug:print-info 0 *default-log-port* "Server timed out. seconds since last db access: " (- (current-seconds) last-access))
(http-transport:server-shutdown port)))))))
@@ -600,14 +606,17 @@
;; (/ *total-non-write-delay*
;; *number-non-write-queries*))
;; " ms")
(db:print-current-query-stats)
- (common:save-pkt `((action . exit)
+ #;(common:save-pkt `((action . exit)
(T . server)
(pid . ,(current-process-id)))
- *configdat* #t)
+ *configdat* #t)
+
+ ;; remove .servinfo file(s) here
+
(debug:print-info 0 *default-log-port* "Server shutdown complete. Exiting")
(exit)))
;; all routes though here end in exit ...
;;
@@ -640,14 +649,14 @@
#;(let* ((num-alive (server:get-num-alive (server:get-list *toppath*))))
(if (> num-alive 3)
(begin
(cleanup-proc (conc "ERROR: Aborting server start because there are already " num-alive " possible servers either running or starting up"))
(exit))))
- (common:save-pkt `((action . start)
- (T . server)
- (pid . ,(current-process-id)))
- *configdat* #t)
+ #;(common:save-pkt `((action . start)
+ (T . server)
+ (pid . ,(current-process-id)))
+ *configdat* #t)
(let* ((th2 (make-thread (lambda ()
(debug:print-info 0 *default-log-port* "Server run thread started")
(http-transport:run
(if (args:get-arg "-server")
(args:get-arg "-server")
Index: launch.scm
==================================================================
--- launch.scm
+++ launch.scm
@@ -1563,11 +1563,11 @@
(with-output-to-string
(lambda () ;; (list 'hosts hosts)
(write (list (list 'testpath test-path)
;; (list 'transport (conc *transport-type*))
;; (list 'serverinf *server-info*)
- (list 'homehost (let* ((hhdat (common:get-homehost)))
+ #;(list 'homehost (let* ((hhdat (server:get-homehost)))
(if hhdat
(car hhdat)
#f)))
(list 'serverurl (if *runremote*
(remote-server-url *runremote*)
Index: megatest.scm
==================================================================
--- megatest.scm
+++ megatest.scm
@@ -656,13 +656,13 @@
(if (args:any? "-run" "-runall" "-remove-runs" "-set-state-status" "-kill-runs" "-kill-rerun")
(debug:print 0 *default-log-port* (string-intersperse (argv) " ")))
;; some switches imply homehost. Exit here if not on homehost
;;
-(let ((homehost-required (list "-cleanup-db" "-server")))
+(let ((homehost-required (list "-cleanup-db")))
(if (apply args:any? homehost-required)
- (if (not (common:on-homehost?))
+ (if (not (server:choose-server *toppath* 'home?))
(for-each
(lambda (switch)
(if (args:get-arg switch)
(begin
(debug:print 0 *default-log-port* "ERROR: you must be on the homehost to run with " switch
@@ -951,11 +951,11 @@
(if (or (args:get-arg "-list-servers")
(args:get-arg "-kill-servers"))
(let ((tl (launch:setup)))
(if tl ;; all roads from here exit
- (let* ((servers (server:get-list *toppath*))
+ (let* ((servers (server:choose-server *toppath* 'all-valid)) ;; (server:get-list *toppath*))
(fmtstr "~33a~22a~20a~20a~8a\n"))
(format #t fmtstr "ID" "host:port" "age (hms)" "Last mod" "State")
(format #t fmtstr "==" "=========" "=========" "========" "=====")
(for-each ;; ( mod-time host port start-time pid )
(lambda (server)
@@ -2279,12 +2279,11 @@
(if (not (launch:setup))
(begin
(debug:print 0 *default-log-port* "Failed to setup, exiting")
(exit 1)))
(set! keys (rmt:get-keys)) ;; db))
- (debug:print 1 *default-log-port* "Keys: " (string-intersperse keys ", "))
- (if (sqlite3:database? db)(sqlite3:finalize! db))
+ (print (string-intersperse keys " "))
(set! *didsomething* #t)))
(if (args:get-arg "-gui")
(begin
(debug:print 0 *default-log-port* "Look at the dashboard for now")
@@ -2379,11 +2378,11 @@
(if (or (getenv "MT_RUNSCRIPT")
(args:get-arg "-repl")
(args:get-arg "-load"))
(let* ((toppath (launch:setup))
(dbstructs (if (and toppath
- (common:on-homehost?))
+ (server:choose-server toppath 'home?))
(db:setup #t)
#f))) ;; make-dbr:dbstruct path: toppath local: (args:get-arg "-local")) #f)))
(if *toppath*
(cond
((getenv "MT_RUNSCRIPT")
Index: newdashboard.scm
==================================================================
--- newdashboard.scm
+++ newdashboard.scm
@@ -16,31 +16,54 @@
;; You should have received a copy of the GNU General Public License
;; along with Megatest. If not, see .
;;======================================================================
-(use format)
-
-(use (prefix iup iup:))
+;; (declare (uses common))
+;; (declare (uses megatest-version))
+(declare (uses mtargs))
+(declare (uses treemod))
-(use canvas-draw)
+(use srfi-1
+ posix regex regex-case srfi-69 typed-records sparse-vectors
+ format
+ extras
+ (prefix iup iup:)
+ canvas-draw
+ sqlite3)
+
(import canvas-draw-iup)
-(use sql-de-lite srfi-1 posix regex regex-case srfi-69 typed-records sparse-vectors ;; defstruct
- (prefix dbi dbi:))
+(module ndboard
+ *
+
+(import scheme
+ chicken
+ data-structures
+ extras
+ format
+ (prefix iup iup:)
+ canvas-draw
+ canvas-draw-iup
+ matchable
+ srfi-1 posix regex regex-case
+ srfi-69 typed-records sparse-vectors ;; defstruct
+ sqlite3
+
+ treemod
+ (prefix mtargs args:)
+ )
+
-(declare (uses common))
-(declare (uses megatest-version))
-(declare (uses margs))
+(include "megatest-version.scm")
;; (declare (uses launch))
;; (declare (uses gutils))
;; (declare (uses db))
;; (declare (uses server))
;; (declare (uses synchash))
-(declare (uses dcommon))
-;; (declare (uses tree))
+;; (declare (uses dcommon))
;;
;; (include "common_records.scm")
;; (include "db_records.scm")
;; (include "key_records.scm")
@@ -81,21 +104,159 @@
(begin
(print help)
(exit)))
;; ease debugging by loading ~/.dashboardrc
-(let ((debugcontrolf (conc (get-environment-variable "HOME") "/.dashboardrc")))
- (if (common:file-exists? debugcontrolf)
+(let ((debugcontrolf (conc (get-environment-variable "HOME") "/.newdashboardrc")))
+ (if (file-exists? debugcontrolf)
(load debugcontrolf)))
-(debug:setup)
-
(define *tim* (iup:timer))
(define *ord* #f)
(iup:attribute-set! *tim* "TIME" 300)
(iup:attribute-set! *tim* "RUN" "YES")
+
+;; areas
+;;
+
+(define *areas* (make-hash-table))
+(defstruct area
+ path
+ keys
+ targets
+ targets-update-time
+ (dbhs (make-hash-table))
+ )
+
+(define (area-get-path area-name)
+ (let* ((adat (get-area-info area-name)))
+ (if adat
+ (area-path adat)
+ #f)))
+
+(define (get-areas-file)
+ (conc (get-environment-variable "HOME")"/.ndboard/areas.scm"))
+
+(define (get-areas)
+ (let* ((areas-file (get-areas-file)))
+ (if (file-exists? areas-file)
+ (with-input-from-file areas-file read))))
+
+(define (register-area areadat)
+ (hash-table-set! *areas* (car areadat)
+ (make-area path: (cdr areadat))))
+
+(define (get-area-info area-name)
+ (hash-table-ref/default *areas* area-name #f))
+
+(define (area-save-dbh area-name dbname mtdbh)
+ (hash-table-set! (area-dbhs (get-area-info area-name)) dbname mtdbh))
+
+(define (area-get-dbh area-name dbname)
+ (hash-table-ref/default (area-dbhs (get-area-info area-name)) dbname #f))
+
+;; megatest calls, run in "area"
+;;
+
+;; TODO store the last time the query was run
+;; and clear cache based on timestamp on main.db
+;;
+(define (megatest-get-targets area-name)
+ (let* ((ainfo (get-area-info area-name))
+ (targets (area-targets ainfo)))
+ (if targets
+ targets
+ (let* ((path (area-get-path area-name))
+ (raw-targs (with-input-from-pipe
+ (conc "megatest -list-targets -start-dir "path)
+ read-lines))
+ (clean-targs (filter (lambda (x)
+ (not (equal? x "default")))
+ raw-targs)))
+ (area-targets-set! ainfo clean-targs)
+ (area-targets-update-time-set! ainfo (current-seconds))
+ clean-targs))))
+
+(define (megatest-get-keys area-name)
+ (let* ((ainfo (get-area-info area-name))
+ (keys (area-keys ainfo)))
+ (if keys
+ keys
+ (let* ((path (area-path ainfo))
+ (keysstr (with-input-from-pipe
+ (conc "megatest -show-keys -start-dir "path)
+ read-line)))
+ (if (not (string? keysstr))
+ (print "Unknown error getting keys for area "area-name", path: "path)
+ (let* ((keys (string-split keysstr)))
+ (area-keys-set! ainfo keys)
+ keys))))))
+
+;; megatest area database access functions
+;;
+
+(defstruct mtdb
+ name
+ db
+ path)
+
+;; fall back to old megatest db if .megatest/dbname not found
+;;
+(define (megatest-find-db path dbname)
+ (let ((newpath (conc path"/.megatest/"dbname))
+ (oldpath (conc path"/megatest.db")))
+ (if (file-exists? newpath)
+ newpath
+ (if (file-exists? oldpath)
+ oldpath
+ #f))))
+
+;; dbname is main.db, 1.db ...
+(define (megatest-open-db area-name dbname)
+ (let* ((mtdbh (area-get-dbh area-name dbname)))
+ (if mtdbh
+ mtdbh
+ (let* ((ainfo (get-area-info area-name))
+ (path (area-path ainfo))
+ (dbpath (megatest-find-db path dbname))
+ (dbexists (and dbpath
+ (file-exists? dbpath)
+ (file-read-access? dbpath))))
+ (if dbexists
+ (let* ((db (open-database dbpath)))
+ (set-busy-handler! db (make-busy-timeout 136000))
+ (execute db "PRAGMA synchronous = 0;")
+ (let* ((mtdbh (make-mtdb db: db path: dbpath)))
+ (area-save-dbh area-name dbname mtdbh)
+ mtdbh))
+ #f)))))
+
+;; ADD on-exit to close the opened dbs
+
+;; keys is list, targpatts is list, both same length
+;; and *fully* specified
+;; returns targvals and runname
+(define (megatest-get-run-names area-name keys targpatts)
+ (let* ((mtdbh (megatest-open-db area-name "main.db"))
+ (selector (string-intersperse
+ (map (lambda (k v)(conc k" like '"v"'")) keys targpatts)
+ " AND "))
+ (field-sel (string-intersperse keys ","))
+ (fullqry (conc "SELECT "field-sel",runname FROM runs WHERE "selector";")))
+ (print "fullqry="fullqry)
+ (fold-row ;; proc init db-or-stmt . params)
+ (lambda (res . row)
+ (cons row res))
+ '()
+ (mtdb-db mtdbh) ;; get the db handle
+ fullqry)))
+
+
+
+;; gui utils
+;;
(define (message-window msg)
(iup:show
(iup:dialog
(iup:vbox
@@ -112,605 +273,95 @@
(iup:attribute-set! lb "VALUE" item))) ;; (number->string i))))
(set! i (+ i 1)))
items)
i))
+;; simple widget registration and finding
+(define *widgets* (make-hash-table))
+(define (add-widget name wgt)
+ (hash-table-set! *widgets* name wgt)
+ wgt)
+(define (get-widget name)
+ (hash-table-ref/default *widgets* name #f))
+
(define (pad-list l n)(append l (make-list (- n (length l)))))
-
-(define (mkstr . x)
- (string-intersperse (map conc x) ","))
-
-(define (update-search x val)
- (hash-table-set! *searchpatts* x val))
-
-
-;; data for each specific tab goes here
-;;
-(defstruct dboard:tabdat
- ;; runs
- ((allruns '()) : list) ;; list of dboard:rundat records
- ((allruns-by-id (make-hash-table)) : hash-table) ;; hash of run-id -> dboard:rundat records
- ((done-runs '()) : list) ;; list of runs already drawn
- ((not-done-runs '()) : list) ;; list of runs not yet drawn
- (header #f) ;; header for decoding the run records
- (keys #f) ;; keys for this run (i.e. target components)
- ((numruns (string->number (or (args:get-arg "-cols") "10"))) : number) ;;
- ((tot-runs 0) : number)
- ((last-data-update 0) : number) ;; last time the data in allruns was updated
- ((last-runs-update 0) : number) ;; last time we pulled the runs info to update the tree
- (runs-mutex (make-mutex)) ;; use to prevent parallel access to draw objects
- ((run-update-times (make-hash-table)) : hash-table) ;; update times indexed by run-id
- ((last-test-dat (make-hash-table)) : hash-table) ;; cache last tests dat by run-id
- ((run-db-paths (make-hash-table)) : hash-table) ;; cache the paths to the run db files
-
- ;; Runs view
- ((buttondat (make-hash-table)) : hash-table) ;;
- ((item-test-names '()) : list) ;; list of itemized tests
- ((run-keys (make-hash-table)) : hash-table)
- (runs-matrix #f) ;; used in newdashboard
- ((start-run-offset 0) : number) ;; left-right slider value
- ((start-test-offset 0) : number) ;; up-down slider value
- ((runs-btn-height (or (configf:lookup *configdat* "dashboard" "btn-height") "x16")) : string) ;; was 12
- ((runs-btn-fontsz (or (configf:lookup *configdat* "dashboard" "btn-fontsz") "10")) : string) ;; was 8
- ((runs-cell-width (or (configf:lookup *configdat* "dashboard" "cell-width") "60")) : string) ;; was 50
- ((all-test-names '()) : list)
-
- ;; Canvas and drawing data
- (cnv #f)
- (cnv-obj #f)
- (drawing #f)
- ((run-start-row 0) : number)
- ((max-row 0) : number)
- ((running-layout #f) : boolean)
- (originx #f)
- (originy #f)
- ((layout-update-ok #t) : boolean)
- ((compact-layout #t) : boolean)
-
- ;; Run times layout
- ;; (graph-button-box #f) ;; RA => Think it is not referenced anywhere
- (graph-matrix #f)
- ((graph-matrix-table (make-hash-table)) : hash-table) ;; graph-dats referenced thru graph name info
- ((graph-cell-table (make-hash-table)) : hash-table) ;; graph-dats referenced thru matrix cell info
- ((graph-matrix-row 1) : number)
- ((graph-matrix-col 1) : number)
-
- ;; Controls used to launch runs etc.
- ((command "") : string) ;; for run control this is the command being built up
- (command-tb #f) ;; widget for the type of command; run, remove-runs etc.
- (test-patterns-textbox #f) ;; text box widget for editing a list of test patterns
- (key-listboxes #f)
- (key-lbs #f)
- run-name ;; from run name setting widget
- states ;; states for -state s1,s2 ...
- statuses ;; statuses for -status s1,s2 ...
-
- ;; Selector variables
- curr-run-id ;; current row to display in Run summary view
- prev-run-id ;; previous runid selected before current runid was selected (used in xor-two-runs runs summary mode
- curr-test-ids ;; used only in dcommon:run-update which is used in newdashboard
- ((filters-changed #t) : boolean) ;; to indicate that the user changed filters for this tab
- ((last-filter-str "") : string) ;; conc the target runname and testpatt for a signature of changed filters
- ((hide-empty-runs #f) : boolean)
- ((hide-not-hide #t) : boolean) ;; toggle for hide/not hide empty runs
- (hide-not-hide-button #f)
- ((searchpatts (make-hash-table)) : hash-table) ;;
- ((state-ignore-hash (make-hash-table)) : hash-table) ;; hash of STATE => #t/#f for display control
- ((status-ignore-hash (make-hash-table)) : hash-table) ;; hash of STATUS => #t/#f
- (target #f)
- (test-patts #f)
-
- ;; db info to file the .db files for the area
- (access-mode (db:get-access-mode)) ;; use cached db or not
- (dbdir #f)
- (dbfpath #f)
- (dbkeys #f)
- ((last-db-update (make-hash-table)) : hash-table) ;; last db file timestamp
- (monitor-db-path #f) ;; where to find monitor.db
- ro ;; is the database read-only?
-
- ;; tests data
- ((num-tests 10) : number) ;; total number of tests to show (used in the old runs display)
-
- ;; runs tree
- ((path-run-ids (make-hash-table)) : hash-table) ;; path (target / runname) => id
- (runs-tree #f)
- ((runs-tree-ht (make-hash-table)) : hash-table) ;; track which targets added to tree (merge functionality with path-run-ids?)
-
- ;; tab data
- ((view-changed #t) : boolean)
- ((xadj 0) : number) ;; x slider number (if using canvas)
- ((yadj 0) : number) ;; y slider number (if using canvas)
- ;; runs-summary tab state
- ((runs-summary-modes '((one-run . "Show One Run") (xor-two-runs . "XOR Two Runs") (xor-two-runs-hide-clean . "XOR; Hide Clean")) ) : list)
- ((runs-summary-mode-buttons '()) : list)
- ((runs-summary-mode 'one-run) : symbol)
- ((runs-summary-mode-change-callbacks '()) : list)
- (runs-summary-source-runname-label #f)
- (runs-summary-dest-runname-label #f)
- ;; runs summary view
-
- tests-tree ;; used in newdashboard
- )
-
-
-
-;; mtest is actually the megatest.config file
-;;
-(define (mtest toppath window-id)
- (let* ((curr-row-num 0)
- ;; (rawconfig (read-config (conc toppath "/megatest.config") #f 'return-string))
- (keys-matrix (iup:matrix)) ;; (dcommon:keys-matrix rawconfig))
- (setup-matrix (iup:matrix)) ;; (dcommon:section-matrix rawconfig "setup" "Varname" "Value"))
- (jobtools-matrix (iup:matrix
- #:expand "YES"
- #:numcol 1
- #:numlin 5
- #:numcol-visible 1
- #:numlin-visible 3))
- (validvals-matrix (iup:matrix
- #:expand "YES"
- #:numcol 1
- #:numlin 2
- #:numcol-visible 1
- #:numlin-visible 2))
- (envovrd-matrix (iup:matrix
- #:expand "YES"
- #:numcol 1
- #:numlin 20
- #:numcol-visible 1
- #:numlin-visible 8))
- (disks-matrix (iup:matrix
- #:expand "YES"
- #:numcol 1
- #:numlin 20
- #:numcol-visible 1
- #:numlin-visible 8))
- )
- (iup:attribute-set! disks-matrix "0:0" "Disk Name")
- (iup:attribute-set! disks-matrix "0:1" "Disk Path")
- (iup:attribute-set! disks-matrix "WIDTH1" "120")
- (iup:attribute-set! disks-matrix "WIDTH0" "100")
- (iup:attribute-set! disks-matrix "ALIGNMENT1" "ALEFT")
- (iup:attribute-set! disks-matrix "FIXTOTEXT" "C1")
- (iup:attribute-set! disks-matrix "RESIZEMATRIX" "YES")
-
- ;; fill in existing info
- (for-each
- (lambda (mat fname)
- (set! curr-row-num 1)
- (for-each
- (lambda (var)
- (iup:attribute-set! mat (conc curr-row-num ":0") var)
- ;; (iup:attribute-set! mat (conc curr-row-num ":1") (config-lookup rawconfig fname var))
- (set! curr-row-num (+ curr-row-num 1)))
- '()));; (configf:section-vars rawconfig fname)))
- (list setup-matrix jobtools-matrix validvals-matrix envovrd-matrix disks-matrix)
- (list "setup" "jobtools" "validvalues" "env-override" "disks"))
-
- (for-each
- (lambda (mat)
- (iup:attribute-set! mat "0:1" "Value")
- (iup:attribute-set! mat "0:0" "Var")
- (iup:attribute-set! mat "ALIGNMENT1" "ALEFT")
- (iup:attribute-set! mat "FIXTOTEXT" "C1")
- (iup:attribute-set! mat "RESIZEMATRIX" "YES")
- (iup:attribute-set! mat "WIDTH1" "120")
- (iup:attribute-set! mat "WIDTH0" "100")
- )
- (list setup-matrix jobtools-matrix validvals-matrix envovrd-matrix))
-
- (iup:attribute-set! validvals-matrix "WIDTH1" "290")
- (iup:attribute-set! envovrd-matrix "WIDTH1" "290")
-
- (iup:vbox
- (iup:hbox
-
- (iup:vbox
- (let ((tabs (iup:tabs
- ;; The required tab
- (iup:hbox
- ;; The keys
- (iup:frame
- #:title "Keys (required)"
- (iup:vbox
- (iup:label (conc "Set the fields for organising your runs\n"
- "here. Note: can only be changed before\n"
- "running the first run when megatest.db\n"
- "is created."))
- keys-matrix))
- (iup:vbox
- ;; The setup section
- (iup:frame
- #:title "Setup"
- (iup:vbox
- (iup:label (conc "max_concurrent_jobs : limits total concurrent jobs (optional)\n"
- "linktree : directory where linktree will be created."))
- setup-matrix))
- ;; The jobtools
- (iup:frame
- #:title "Jobtools"
- (iup:vbox
- (iup:label (conc "launcher : tool or script to run jobs (try nbfake)\n"
- "useshell : use system to run your launcher\n"
- "workhosts : spread jobs out on these hosts"))
- jobtools-matrix))
- ;; The disks
- (iup:frame
- #:title "Disks"
- (iup:vbox
- (iup:label (conc "Enter names and existing paths of locations to run tests"))
- disks-matrix))))
- ;; The optional tab
- (iup:vbox
- ;; The Environment Overrides
- (iup:frame
- #:title "Env override"
- envovrd-matrix)
- ;; The valid values
- (iup:frame
- #:title "Validvalues"
- validvals-matrix)
- ))))
- (iup:attribute-set! tabs "TABTITLE0" "Required settings")
- (iup:attribute-set! tabs "TABTITLE1" "Optional settings")
- tabs))
- ))))
-
-;; The runconfigs.config file
-;;
-(define (rconfig window-id)
- (iup:vbox
- (iup:frame #:title "Default")))
-
-;;======================================================================
-;; T E S T S
-;;======================================================================
-
-(define (tree-path->test-id path)
- (if (not (null? path))
- (hash-table-ref/default (dboard:data-path-test-ids *data*) path #f)
- #f))
-
-(define (test-panel window-id)
- (let* ((curr-row-num 0)
- (viewlog (lambda (x)
- (if (common:file-exists? logfile)
- ;(system (conc "firefox " logfile "&"))
- (iup:send-url logfile)
- (message-window (conc "File " logfile " not found")))))
- (xterm (lambda (x)
- (if (directory-exists? rundir)
- (let ((shell (if (get-environment-variable "SHELL")
- (conc "-e " (get-environment-variable "SHELL"))
- "")))
- (system (conc "cd " rundir
- ";xterm -T \"" (string-translate testfullname "()" " ") "\" " shell "&")))
- (message-window (conc "Directory " rundir " not found")))))
- (command-text-box (iup:textbox #:expand "HORIZONTAL" #:font "Courier New, -12"))
- (command-launch-button (iup:button "Execute!"
- ;; #:expand "HORIZONTAL"
- #:size "50x"
- #:action (lambda (x)
- (let ((cmd (iup:attribute command-text-box "VALUE")))
- (system (conc cmd " &"))))))
- (run-test (lambda (x)
- (iup:attribute-set!
- command-text-box "VALUE"
- (conc "xterm -geometry 180x20 -e \"megatest -target " keystring " :runname " runname
- " -runtests " (conc testname "/" (if (equal? item-path "")
- "%"
- item-path))
- ";echo Press any key to continue;bash -c 'read -n 1 -s'\""))))
- (remove-test (lambda (x)
- (iup:attribute-set!
- command-text-box "VALUE"
- (conc "xterm -geometry 180x20 -e \"megatest -remove-runs -target " keystring " :runname " runname
- " -testpatt " (conc testname "/" (if (equal? item-path "")
- "%"
- item-path))
- " -v;echo Press any key to continue;bash -c 'read -n 1 -s'\""))))
- (run-info-matrix (iup:matrix
- #:expand "YES"
- ;; #:scrollbar "YES"
- #:numcol 1
- #:numlin 4
- #:numcol-visible 1
- #:numlin-visible 4
- #:click-cb (lambda (obj lin col status)
- #f
- ;;(print "obj: " obj " lin: " lin " col: " col " status: " status)
- )))
- (test-info-matrix (iup:matrix
- #:expand "YES"
- #:numcol 1
- #:numlin 7
- #:numcol-visible 1
- #:numlin-visible 7))
- (test-run-matrix (iup:matrix
- #:expand "YES"
- #:numcol 1
- #:numlin 5
- #:numcol-visible 1
- #:numlin-visible 5))
- (meta-dat-matrix (iup:matrix
- #:expand "YES"
- #:numcol 1
- #:numlin 5
- #:numcol-visible 1
- #:numlin-visible 5))
- (steps-matrix (iup:matrix
- #:expand "YES"
- #:numcol 6
- #:numlin 50
- #:numcol-visible 6
- #:numlin-visible 8))
- (data-matrix (iup:matrix
- #:expand "YES"
- #:numcol 8
- #:numlin 50
- #:numcol-visible 8
- #:numlin-visible 8))
- (updater (lambda (testdat)
- (test-update window-id testdat run-info-matrix test-info-matrix test-run-matrix meta-dat-matrix steps-matrix data-matrix))))
-
- ;; Set the updater in updaters
- ;; (hash-table-set! (dboard:data-updaters *data*) window-id updater)
- ;;
- (for-each
- (lambda (mat)
- ;; (iup:attribute-set! mat "0:1" "Value")
- ;; (iup:attribute-set! mat "0:0" "Var")
- (iup:attribute-set! mat "HEIGHT0" 0)
- (iup:attribute-set! mat "ALIGNMENT1" "ALEFT")
- ;; (iup:attribute-set! mat "FIXTOTEXT" "C1")
- (iup:attribute-set! mat "RESIZEMATRIX" "YES"))
- ;; (iup:attribute-set! mat "WIDTH1" "120")
- ;; (iup:attribute-set! mat "WIDTH0" "100"))
- (list run-info-matrix test-info-matrix test-run-matrix meta-dat-matrix))
-
- ;; Steps matrix
- (iup:attribute-set! steps-matrix "0:1" "Step Name")
- (iup:attribute-set! steps-matrix "0:2" "Start")
- (iup:attribute-set! steps-matrix "WIDTH2" "40")
- (iup:attribute-set! steps-matrix "0:3" "End")
- (iup:attribute-set! steps-matrix "WIDTH3" "40")
- (iup:attribute-set! steps-matrix "0:4" "Status")
- (iup:attribute-set! steps-matrix "WIDTH4" "40")
- (iup:attribute-set! steps-matrix "0:5" "Duration")
- (iup:attribute-set! steps-matrix "WIDTH5" "40")
- (iup:attribute-set! steps-matrix "0:6" "Log File")
- (iup:attribute-set! steps-matrix "ALIGNMENT1" "ALEFT")
- ;; (iup:attribute-set! steps-matrix "FIXTOTEXT" "C1")
- (iup:attribute-set! steps-matrix "RESIZEMATRIX" "YES")
- ;; (iup:attribute-set! steps-matrix "WIDTH1" "120")
- ;; (iup:attribute-set! steps-matrix "WIDTH0" "100")
-
- ;; Data matrix
- ;;
- (let ((rownum 1))
- (for-each
- (lambda (x)
- (iup:attribute-set! data-matrix (conc "0:" rownum) x)
- (iup:attribute-set! data-matrix (conc "WIDTH" rownum) "50")
- (set! rownum (+ rownum 1)))
- (list "Category" "Variable" "Value" "Expected" "Tolerance" "Status" "Units" "Type" "Comment")))
- (iup:attribute-set! data-matrix "REDRAW" "ALL")
-
- (for-each
- (lambda (data)
- (let ((mat (car data))
- (keys (cadr data))
- (rownum 1))
- (for-each
- (lambda (key)
- (iup:attribute-set! mat (conc rownum ":0") key)
- (set! rownum (+ rownum 1)))
- keys)
- (iup:attribute-set! mat "REDRAW" "ALL")))
- (list
- (list run-info-matrix '("Run Id" "Target" "Runname" "Run Start Time" ))
- (list test-info-matrix '("Test Id" "Testname" "Itempath" "State" "Status" "Test Start Time" "Comment"))
- (list test-run-matrix '("Hostname" "Host info" "Disk Free" "CPU Load" "Run Duration"))
- (list meta-dat-matrix '("Author" "Owner" "Last Reviewed" "Tags" "Description"))))
-
- (iup:split
- #:orientation "HORIZONTAL"
- (iup:vbox
- (iup:hbox
- (iup:vbox
- run-info-matrix
- test-info-matrix)
- ;; test-info-matrix)
- (iup:vbox
- test-run-matrix
- meta-dat-matrix))
- (iup:vbox
- (iup:vbox
- (iup:hbox
- (iup:button "View Log" #:action viewlog #:size "60x" ) ;; #:size "30x"
- (iup:button "Start Xterm" #:action xterm #:size "60x" )) ;; #:size "30x"
- (iup:hbox
- (iup:button "Run Test" #:action run-test #:size "60x" ) ;; #:size "30x"
- (iup:button "Clean Test" #:action remove-test #:size "60x" ))) ;; #:size "30x"
- (iup:hbox
- ;; hiup:split ;; hbox
- ;; #:orientation "HORIZONTAL"
- ;; #:value 300
- command-text-box
- command-launch-button)))
- (iup:vbox
- (let ((tabs (iup:tabs
- steps-matrix
- data-matrix)))
- (iup:attribute-set! tabs "TABTITLE0" "Test Steps")
- (iup:attribute-set! tabs "TABTITLE1" "Test Data")
- tabs)))))
-
-;; Test browser
-(define (tests window-id)
- (iup:split
- (let* ((tb (iup:treebox
- #:selection-cb
- (lambda (obj id state)
- ;; (print "obj: " obj ", id: " id ", state: " state)
- (let* ((run-path (tree:node->path obj id))
- (test-id (tree-path->test-id (cdr run-path))))
- ;; (if test-id
- ;; (hash-table-set! (dboard:data-curr-test-ids *data*)
- ;; window-id test-id))
- ;; (print "path: " (tree:node->path obj id) " test-id: " test-id)
- )))))
- (iup:attribute-set! tb "VALUE" "0")
- (iup:attribute-set! tb "NAME" "Runs")
- ;;(iup:attribute-set! tb "ADDEXPANDED" "NO")
- ;; (dboard:data-tests-tree-set! *data* tb)
- tb)
- (test-panel window-id)))
-
-;; The function to update the fields in the test view panel
-(define (test-update window-id testdat run-info-matrix test-info-matrix test-run-matrix meta-dat-matrix steps-matrix data-matrix)
- ;; get test-id
- ;; then get test record
- (if testdat
- (let* ((test-id 0) ;; (hash-table-ref/default (dboard:data-curr-test-ids *data*) window-id #f))
- (test-data (hash-table-ref/default testdat test-id #f))
- (run-id (db:test-get-run_id test-data))
- (targ/runname (hash-table-ref/default (dboard:data-run-keys *data*)
- run-id
- '()))
- (target (if (null? targ/runname) "" (string-intersperse (reverse (cdr (reverse targ/runname))) "/")))
- (runname (if (null? targ/runname) "" (car (cdr targ/runname))))
- (steps-dat (tests:get-compressed-steps *dbstruct-local* run-id test-id)))
-
- (if test-data
- (begin
- ;;
- (for-each
- (lambda (data)
- (let ((mat (car data))
- (vals (cadr data))
- (rownum 1))
- (for-each
- (lambda (key)
- (let ((cell (conc rownum ":1")))
- (if (not (equal? (iup:attribute mat cell)(conc key)))
- (begin
- ;; (print "setting cell " cell " in matrix " mat " to value " key)
- (iup:attribute-set! mat cell (conc key))
- (iup:attribute-set! mat "REDRAW" cell)))
- (set! rownum (+ rownum 1))))
- vals)))
- (list
- (list run-info-matrix
- (if test-id
- (list (db:test-get-run_id test-data)
- target
- runname
- "n/a")
- (make-list 4 "")))
- (list test-info-matrix
- (if test-id
- (list test-id
- (db:test-get-testname test-data)
- (db:test-get-item-path test-data)
- (db:test-get-state test-data)
- (db:test-get-status test-data)
- (seconds->string (db:test-get-event_time test-data))
- (db:test-get-comment test-data))
- (make-list 7 "")))
- (list test-run-matrix
- (if test-id
- (list (db:test-get-host test-data)
- (db:test-get-uname test-data)
- (db:test-get-diskfree test-data)
- (db:test-get-cpuload test-data)
- (seconds->hr-min-sec (db:test-get-run_duration test-data)))
- (make-list 5 "")))
- ))
- (dcommon:populate-steps steps-dat steps-matrix))))))
- ;;(list meta-dat-matrix
- ;; (if test-id
- ;; (list (
-
-
-;; db:test-get-id
-;; db:test-get-run_id
-;; db:test-get-testname
-;; db:test-get-state
-;; db:test-get-status
-;; db:test-get-event_time
-;; db:test-get-host
-;; db:test-get-cpuload
-;; db:test-get-diskfree
-;; db:test-get-uname
-;; db:test-get-rundir
-;; db:test-get-item-path
-;; db:test-get-run_duration
-;; db:test-get-final_logf
-;; db:test-get-comment
-;; db:test-get-fullname
-
-
-;;======================================================================
-;; R U N C O N T R O L
-;;======================================================================
-
-;; Overall runs browser
-;;
-(define (runs window-id)
- (let* ((runs-matrix (iup:matrix
- #:expand "YES"
- ;; #:fittosize "YES"
- #:scrollbar "YES"
- #:numcol 100
- #:numlin 100
- #:numcol-visible 7
- #:numlin-visible 7
- #:click-cb (lambda (obj lin col status)
- #f
- ;; (print "obj: " obj " lin: " lin " col: " col " status: " status)
- ))))
-
- (iup:attribute-set! runs-matrix "RESIZEMATRIX" "YES")
- (iup:attribute-set! runs-matrix "WIDTH0" "100")
-
- ;; (dboard:data-runs-matrix-set! *data* runs-matrix)
- (iup:hbox
- (iup:frame
- #:title "Runs browser"
- (iup:vbox
- runs-matrix)))))
+;; the main tree, everything starts from here
+;;
+(define (main-tree)
+ (iup:treebox
+ #:value 0
+ #:title "Areas"
+ #:expand "YES"
+ #:addexpanded "YES"
+ #:size "10x"
+ #:selection-cb
+ (lambda (obj id state)
+ (let* ((path (tree:node->path obj id)))
+ (match path
+ ((treename) #f) ;;(print "nothing to do here"))
+ ((treename area)
+ (let ((tb (get-widget "main-tree"))) ;; wait, isn't this just "obj"?
+ (refresh-targets tb area)))
+ ((treename area . target)
+ (let* ((keys (megatest-get-keys area)))
+ (if (eq? (length keys)(length target))
+ (let* ((runnames (megatest-get-run-names area keys target)))
+ (for-each
+ (lambda (runnamedat)
+ (tree:add-node obj "Areas" (cons area runnamedat)))
+ runnames)))))
+ (else
+ (print "path: "path))
+ )
+ #;(print "obj: "obj", id: "id", state: "state", path: "path)))))
+
+(define (refresh-targets tb area)
+ (let* ((targets (megatest-get-targets area)))
+ (for-each
+ (lambda (target)
+ (let* ((t-path (string-split target "/")))
+ (tree:add-node tb "Areas" (cons area t-path))))
+ targets)))
+
+(define (runs window-id)
+ (iup:hbox
+ (add-widget "main-tree" (main-tree))
+ ))
+
+(define (runs-init)
+ (let* ((areas (get-areas))
+ (tb (get-widget "main-tree")))
+ (for-each
+ (lambda (areadat)
+ (tree:add-node tb "Areas" `(,(car areadat)))
+ (register-area areadat))
+ areas)))
;; Browse and control a single run
;;
(define (runcontrol window-id)
(iup:hbox))
-;;======================================================================
-;; D A S H B O A R D
-;;======================================================================
-
;; Main Panel
(define (main-panel window-id)
(iup:dialog
#:title "Megatest Control Panel"
- #:menu (dcommon:main-menu)
+ ;; #:menu (dcommon:main-menu)
#:shrink "YES"
(let ((tabtop (iup:tabs
- (runs window-id)
- (tests window-id)
+ (add-widget "runs" (runs window-id))
+ ;; (tests window-id)
(runcontrol window-id)
- (mtest *toppath* window-id)
- (rconfig window-id)
+ ;; (mtest *toppath* window-id)
+ ;; (rconfig window-id)
)))
(iup:attribute-set! tabtop "TABTITLE0" "Runs")
- (iup:attribute-set! tabtop "TABTITLE1" "Tests")
- (iup:attribute-set! tabtop "TABTITLE2" "Run Control")
- (iup:attribute-set! tabtop "TABTITLE3" "megatest.config")
- (iup:attribute-set! tabtop "TABTITLE4" "runconfigs.config")
+ ;; (iup:attribute-set! tabtop "TABTITLE1" "Tests")
+ (iup:attribute-set! tabtop "TABTITLE1" "Run Control")
+ ;; (iup:attribute-set! tabtop "TABTITLE3" "megatest.config")
+ ;; (iup:attribute-set! tabtop "TABTITLE4" "runconfigs.config")
tabtop)))
(define *current-window-id* 0)
(define (newdashboard dbstruct)
@@ -724,24 +375,34 @@
(nextmintime (current-milliseconds))
(my-window-id *current-window-id*))
(set! *current-window-id* (+ 1 *current-window-id*))
;; (dboard:data-runs-set! *data* data) ;; make this data available to the rest of the application
(iup:show (main-panel my-window-id))
+ (runs-init)
;; Yes, running iup:show will pop up a new panel
;; (iup:show (main-panel my-window-id))
(iup:callback-set! *tim*
"ACTION_CB"
(lambda (x)
;; Want to dedicate no more than 50% of the time to this so skip if
;; 2x delta time has not passed since last query
- (if (< nextmintime (current-milliseconds))
+ #t
+ #;(if (< nextmintime (current-milliseconds))
(let* ((starttime (current-milliseconds))
;; (changes (dcommon:run-update keys data runname keypatts testpatt states statuses 'full my-window-id))
(endtime (current-milliseconds)))
(set! nextmintime (+ endtime (* 2 (- endtime starttime))))
;; (debug:print 11 *default-log-port* "CHANGE(S): " (car changes) "..."))
)
- (debug:print-info 11 *default-log-port* "Server overloaded"))))))
+ (print "Server overloaded"))))))
+
+)
+
+
+;;======================================================================
+;; D A S H B O A R D
+;;======================================================================
+
-;; (dboard:data-updaters-set! *data* (make-hash-table))
-(newdashboard #f) ;; *dbstruct-local*)
+(import ndboard)
+(newdashboard #f)
(iup:main-loop)
Index: rmt.scm
==================================================================
--- rmt.scm
+++ rmt.scm
@@ -52,10 +52,19 @@
(if cinfo
cinfo
(if (server:check-if-running areapath)
(client:setup areapath)
#f))))
+
+(define (rmt:on-homehost? runremote)
+ (let* ((hh-dat (remote-hh-dat runremote)))
+ (if (pair? hh-dat)
+ (cdr hh-dat)
+ (begin
+ (debug:print-info 0 *default-log-port* "hh-dat="hh-dat)
+ #f))))
+
;;======================================================================
(define *send-receive-mutex* (make-mutex)) ;; should have separate mutex per run-id
@@ -116,11 +125,12 @@
;; DOT MUTEXLOCK -> SET_HOMEHOST [label="no homehost?"];
;; DOT SET_HOMEHOST -> MUTEXLOCK;
;; ensure we have a homehost record
(if (not (pair? (remote-hh-dat runremote))) ;; not on homehost
(thread-sleep! 0.1) ;; since we shouldn't get here, delay a little
- (remote-hh-dat-set! runremote (common:get-homehost)))
+ (let ((hh-data (server:choose-server areapath 'homehost)))
+ (remote-hh-dat-set! runremote (or hh-data (cons #f #f)))))
;;(print "BB> readonly-mode is "readonly-mode" dbfile is "dbfile)
(cond
#;((> (- (current-seconds)(remote-connect-time runremote)) 180) ;; reconnect to server every 180 seconds
(debug:print 0 *default-log-port* "Forcing reconnect to server(s) due to 180 second timeout.")
@@ -176,21 +186,24 @@
;;DOT MUTEXLOCK -> CASE5 [label="server not required,\non homehost,\nread-only query"]; {rank=same "case 5" CASE5};
;;DOT CASE5 -> "rmt:open-qry-close-locally";
;; on homehost and this is a read
((and (not (remote-force-server runremote)) ;; honor forced use of server, i.e. server NOT required
- (cdr (remote-hh-dat runremote)) ;; on homehost
+ (rmt:on-homehost? runremote)
(member cmd api:read-only-queries)) ;; this is a read
(mutex-unlock! *rmt-mutex*)
(debug:print-info 12 *default-log-port* "rmt:send-receive, case 5")
(rmt:open-qry-close-locally cmd 0 params))
;;DOT CASE6 [label="init\nremote"];
;;DOT MUTEXLOCK -> CASE6 [label="on homehost,\nwrite query,\nhave server,\ncan't reach it"]; {rank=same "case 6" CASE6};
;;DOT CASE6 -> "rmt:send-receive";
;; on homehost and this is a write, we already have a server, but server has died
- ((and (cdr (remote-hh-dat runremote)) ;; on homehost
+
+ ;; reinstate this keep-alive section but inject a time condition into the (add ...
+
+ #;((and (cdr (remote-hh-dat runremote)) ;; on homehost
(not (member cmd api:read-only-queries)) ;; this is a write
(remote-server-url runremote) ;; have a server
(not (server:ping (remote-server-url runremote) (remote-server-id runremote)))) ;; server has died. NOTE: this is not a cheap call! Need better approach.
(debug:print 0 *default-log-port* "WARNING: server appears to have died, trying to reconnect, case 6")
(http-transport:close-connections area-dat: runremote) ;; make sure to clean up
Index: server.scm
==================================================================
--- server.scm
+++ server.scm
@@ -99,10 +99,22 @@
(with-output-to-string
(lambda ()
(write (list (current-directory)
(current-process-id)
(argv)))))))
+
+(define (server:get-client-signature)
+ (if *my-client-signature* *my-client-signature*
+ (let ((sig (server:mk-signature))) ;; clients re-use the server:mk-signature logic
+ (set! *my-client-signature* sig)
+ *my-client-signature*)))
+
+(define (server:get-server-id)
+ (if *server-id* *server-id*
+ (let ((sig (server:mk-signature))) ;; clients re-use the server:mk-signature logic
+ (set! *server-id* sig)
+ *server-id*)))
;; When using zmq this would send the message back (two step process)
;; with spiffy or rpc this simply returns the return data to be returned
;;
(define (server:reply return-addr query-sig success/fail result)
@@ -121,25 +133,26 @@
;; if the target-host is set
;; try running on that host
;; incidental: rotate logs in logs/ dir.
;;
(define (server:run areapath) ;; areapath is *toppath* for a given testsuite area
- (let* ((curr-host (get-host-name))
+ (let* (;; (curr-host (get-host-name))
;; (attempt-in-progress (server:start-attempted? areapath))
;; (dot-server-url (server:check-if-running areapath))
- (curr-ip (server:get-best-guess-address curr-host))
- (curr-pid (current-process-id))
- (homehost (common:get-homehost)) ;; configf:lookup *configdat* "server" "homehost" ))
- (target-host (car homehost))
+ ;; (curr-ip (server:get-best-guess-address curr-host))
+ ;; (curr-pid (current-process-id))
+ ;; (homehost (server:get-homehost)) ;; configf:lookup *configdat* "server" "homehost" ))
+ ;; (target-host (car homehost))
(testsuite (common:get-testsuite-name))
(logfile (conc areapath "/logs/server.log")) ;; -" curr-pid "-" target-host ".log"))
(profile-mode (or (configf:lookup *configdat* "misc" "profilesw")
""))
(cmdln (conc (common:get-megatest-exe)
- " -server " (or target-host "-") (if (equal? (configf:lookup *configdat* "server" "daemonize") "yes")
- " -daemonize "
- "")
+ " -server - ";; (or target-host "-")
+ (if (equal? (configf:lookup *configdat* "server" "daemonize") "yes")
+ " -daemonize "
+ "")
;; " -log " logfile
" -m testsuite:" testsuite
" " profile-mode
)) ;; (conc " >> " logfile " 2>&1 &")))))
(log-rotate (make-thread common:rotate-logs "server run, rotate logs thread")) ;; why are we rotating logs here? This is a sensitive location with a lot going on!?
@@ -148,134 +161,135 @@
(push-directory areapath)
(debug:print 0 *default-log-port* "INFO: Trying to start server (" cmdln ") ...")
(thread-start! log-rotate)
;; host.domain.tld match host?
- (if (and target-host
- ;; look at target host, is it host.domain.tld or ip address and does it
- ;; match current ip or hostname
- (not (string-match (conc "("curr-host "|" curr-host"\\..*)") target-host))
- (not (equal? curr-ip target-host)))
- (begin
- (debug:print-info 0 *default-log-port* "Starting server on " target-host ", logfile is " logfile)
- (setenv "TARGETHOST" target-host)))
-
+ ;; (if (and target-host
+ ;; ;; look at target host, is it host.domain.tld or ip address and does it
+ ;; ;; match current ip or hostname
+ ;; (not (string-match (conc "("curr-host "|" curr-host"\\..*)") target-host))
+ ;; (not (equal? curr-ip target-host)))
+ ;; (begin
+ ;; (debug:print-info 0 *default-log-port* "Starting server on " target-host ", logfile is " logfile)
+ ;; (setenv "TARGETHOST" target-host)))
+ ;;
(setenv "TARGETHOST_LOGF" logfile)
(thread-sleep! (/ (random 3000) 1000)) ;; add a random initial delay. It seems pretty common that many running tests request a server at the same time
(debug:print 0 *default-log-port* "INFO: starting server at " (common:human-time))
(system (conc "nbfake " cmdln))
(unsetenv "TARGETHOST_LOGF")
- (if (get-environment-variable "TARGETHOST")(unsetenv "TARGETHOST"))
+ ;; (if (get-environment-variable "TARGETHOST")(unsetenv "TARGETHOST"))
(thread-join! log-rotate)
(pop-directory)))
;; given a path to a server log return: host port startseconds server-id
;; any changes to number of elements returned by this fuction will dirctly affect server:record->url,server:record->id,server:kill,server:get-num-alive which use match let
;; example of what it's looking for in the log file:
;; SERVER STARTED: 10.38.175.67:50216 AT 1616502350.0 server-id: 4907e90fc55c7a09694e3f658c639cf4
(define (server:logf-get-start-info logf)
- (let ((server-rx (regexp "^SERVER STARTED: (\\S+):(\\d+) AT ([\\d\\.]+) server-id: (\\S+)")) ;; SERVER STARTED: host:port AT timesecs server id
- (dbprep-rx (regexp "^SERVER: dbprep"))
- (dbprep-found 0))
+ (let ((server-rx (regexp "^SERVER STARTED: (\\S+):(\\d+) AT ([\\d\\.]+) server-id: (\\S+) pid: (\\d+)")) ;; SERVER STARTED: host:port AT timesecs server id
+ (dbprep-rx (regexp "^SERVER: dbprep"))
+ (dbprep-found 0)
+ (bad-dat (list #f #f #f #f #f)))
(handle-exceptions
exn
(begin
(debug:print-info 0 *default-log-port* "Unable to get server info from " logf ", exn=" exn)
- (list #f #f #f #f)) ;; no idea what went wrong, call it a bad server
+ bad-dat) ;; no idea what went wrong, call it a bad server
(with-input-from-file
logf
(lambda ()
(let loop ((inl (read-line))
(lnum 0))
(if (not (eof-object? inl))
(let ((mlst (string-match server-rx inl))
- (dbprep (string-match dbprep-rx inl))
- )
- (if dbprep
- (set! dbprep-found 1)
- )
+ (dbprep (string-match dbprep-rx inl)))
+ (if dbprep (set! dbprep-found 1))
(if (not mlst)
(if (< lnum 500) ;; give up if more than 500 lines of server log read
(loop (read-line)(+ lnum 1))
(begin
(debug:print-info 0 *default-log-port* "Unable to get server info from first 500 lines of " logf )
- (list #f #f #f #f)))
- (let ((dat (cdr mlst)))
- (list (car dat) ;; host
- (string->number (cadr dat)) ;; port
- (string->number (caddr dat))
- (cadr (cddr dat))))))
+ bad-dat))
+ (match mlst
+ ((_ host port start server-id pid)
+ (list host
+ (string->number port)
+ (string->number start)
+ server-id
+ (string->number pid)))
+ (else
+ (debug:print 0 *current-log-port* "ERROR: did not recognise SERVER line info "mlst)
+ bad-dat))))
(begin
- (if dbprep-found
+ (if dbprep-found
(begin
(debug:print-info 2 *default-log-port* "Server is in dbprep at " (common:human-time))
- (thread-sleep! 0.5) ;; was 25 sec but that blocked things from starting?
- )
- (debug:print-info 0 *default-log-port* "Unable to get server info from " logf " at " (seconds->time-string (current-seconds)))
- )
- (list #f #f #f #f)))))))))
-
-;; get a list of servers from the log files, with all relevant data
-;; ( mod-time host port start-time pid )
-;;
-(define (server:get-list areapath #!key (limit #f))
- (let ((fname-rx (regexp "^(|.*/)server-(\\d+)-(\\S+).log$"))
- (day-seconds (* 24 60 60)))
- ;; if the directory exists continue to get the list
- ;; otherwise attempt to create the logs dir and then
- ;; continue
- (if (if (directory-exists? (conc areapath "/logs"))
- '()
- (if (file-write-access? areapath)
- (begin
- (condition-case
- (create-directory (conc areapath "/logs") #t)
- (exn (i/o file)(debug:print 0 *default-log-port* "ERROR: Cannot create directory at " (conc areapath "/logs")))
- (exn ()(debug:print 0 *default-log-port* "ERROR: Unknown error attemtping to get server list. exn=" exn)))
- (directory-exists? (conc areapath "/logs")))
- '()))
-
- ;; Get the list of server logs.
- (let* (
- ;; For some reason, when I uncomment the below line, ext-tests sometimes starts 1000's of servers.
- ;; (exiting-servers (system (conc "bash -c 'rm -f `grep -il exiting " areapath "/logs/server-*-*.log 2> /dev/null`'")))
- (server-logs (glob (conc areapath "/logs/server-*-*.log")))
- (num-serv-logs (length server-logs)))
- (if (or (null? server-logs) (= num-serv-logs 0))
- (let ()
- (debug:print 2 *default-log-port* "There are no servers running at " (common:human-time))
- '()
- )
- (let loop ((hed (string-chomp (car server-logs)))
- (tal (cdr server-logs))
- (res '()))
- (let* ((mod-time (handle-exceptions
- exn
- (begin
- (debug:print 0 *default-log-port* "server:get-list: failed to get modification time on " hed ", exn=" exn)
- (current-seconds)) ;; 0
- (file-modification-time hed))) ;; default to *very* old so log gets ignored if deleted
- (down-time (- (current-seconds) mod-time))
- (serv-dat (if (or (< num-serv-logs 10)
- (< down-time 900)) ;; day-seconds))
- (server:logf-get-start-info hed)
- '())) ;; don't waste time processing server files not touched in the 15 minutes if there are more than ten servers to look at
- (serv-rec (cons mod-time serv-dat))
- (fmatch (string-match fname-rx hed))
- (pid (if fmatch (string->number (list-ref fmatch 2)) #f))
- (new-res (if (null? serv-dat)
- res
- (cons (append serv-rec (list pid)) res)))) ;; any changes to number of elements in new-res will dirctly affect server:record->url,server:record->id,server:kill,server:get-num-alive which uses match let
- (if (null? tal)
- (if (and limit
- (> (length new-res) limit))
- new-res ;; (take new-res limit) <= need intelligent sorting before this will work
- new-res)
- (loop (string-chomp (car tal)) (cdr tal) new-res)))))))))
-
-(define (server:get-num-alive srvlst)
+ (thread-sleep! 0.5)) ;; was 25 sec but that blocked things from starting?
+ (debug:print-info 0 *default-log-port* "Unable to get server info from " logf " at " (seconds->time-string (current-seconds))))
+ bad-dat))))))))
+
+;; ;; get a list of servers from the log files, with all relevant data
+;; ;; ( mod-time host port start-time pid )
+;; ;;
+;; (define (server:get-list areapath #!key (limit #f))
+;; (let ((fname-rx (regexp "^(|.*/)server-(\\d+)-(\\S+).log$"))
+;; (day-seconds (* 24 60 60)))
+;; ;; if the directory exists continue to get the list
+;; ;; otherwise attempt to create the logs dir and then
+;; ;; continue
+;; (if (if (directory-exists? (conc areapath "/logs"))
+;; '()
+;; (if (file-write-access? areapath)
+;; (begin
+;; (condition-case
+;; (create-directory (conc areapath "/logs") #t)
+;; (exn (i/o file)(debug:print 0 *default-log-port* "ERROR: Cannot create directory at " (conc areapath "/logs")))
+;; (exn ()(debug:print 0 *default-log-port* "ERROR: Unknown error attemtping to get server list. exn=" exn)))
+;; (directory-exists? (conc areapath "/logs")))
+;; '()))
+;;
+;; ;; Get the list of server logs.
+;; (let* (
+;; ;; For some reason, when I uncomment the below line, ext-tests sometimes starts 1000's of servers.
+;; ;; (exiting-servers (system (conc "bash -c 'rm -f `grep -il exiting " areapath "/logs/server-*-*.log 2> /dev/null`'")))
+;; (server-logs (glob (conc areapath "/logs/server-*-*.log")))
+;; (num-serv-logs (length server-logs)))
+;; (if (or (null? server-logs) (= num-serv-logs 0))
+;; (let ()
+;; (debug:print 2 *default-log-port* "There are no servers running at " (common:human-time))
+;; '()
+;; )
+;; (let loop ((hed (string-chomp (car server-logs)))
+;; (tal (cdr server-logs))
+;; (res '()))
+;; (let* ((mod-time (handle-exceptions
+;; exn
+;; (begin
+;; (debug:print 0 *default-log-port* "server:get-list: failed to get modification time on " hed ", exn=" exn)
+;; (current-seconds)) ;; 0
+;; (file-modification-time hed))) ;; default to *very* old so log gets ignored if deleted
+;; (down-time (- (current-seconds) mod-time))
+;; (serv-dat (if (or (< num-serv-logs 10)
+;; (< down-time 900)) ;; day-seconds))
+;; (server:logf-get-start-info hed)
+;; '())) ;; don't waste time processing server files not touched in the 15 minutes if there are more than ten servers to look at
+;; (serv-rec (cons mod-time serv-dat))
+;; (fmatch (string-match fname-rx hed))
+;; (pid (if fmatch (string->number (list-ref fmatch 2)) #f))
+;; (new-res (if (null? serv-dat)
+;; res
+;; (cons (append serv-rec (list pid)) res)))) ;; any changes to number of elements in new-res will dirctly affect server:record->url,server:record->id,server:kill,server:get-num-alive which uses match let
+;; (if (null? tal)
+;; (if (and limit
+;; (> (length new-res) limit))
+;; new-res ;; (take new-res limit) <= need intelligent sorting before this will work
+;; new-res)
+;; (loop (string-chomp (car tal)) (cdr tal) new-res)))))))))
+
+#;(define (server:get-num-alive srvlst)
(let ((num-alive 0))
(for-each
(lambda (server)
(handle-exceptions
exn
@@ -289,70 +303,72 @@
0)))
(if (< uptime 5)(set! num-alive (+ num-alive 1)))))))
srvlst)
num-alive))
-;; given a list of servers get a list of valid servers, i.e. at least
-;; 10 seconds old, has started and is less than 1 hour old and is
-;; active (i.e. mod-time < 10 seconds
-;;
-;; mod-time host port start-time pid
-;;
-;; sort by start-time descending. I.e. get the oldest first. Young servers will thus drop off
-;; and servers should stick around for about two hours or so.
-;;
-(define (server:get-best srvlst)
- (let* ((nums (server:get-num-servers))
- (now (current-seconds))
- (slst (sort
- (filter (lambda (rec)
- (if (and (list? rec)
- (> (length rec) 2))
- (let ((start-time (list-ref rec 3))
- (mod-time (list-ref rec 0)))
- ;; (print "start-time: " start-time " mod-time: " mod-time)
- (and start-time mod-time
- (> (- now start-time) 0) ;; been running at least 0 seconds
- (< (- now mod-time) 16) ;; still alive - file touched in last 16 seconds
- (or (not (configf:lookup *configdat* "server" "runtime")) ;; skip if not set
- (< (- now start-time)
- (+ (- (string->number (configf:lookup *configdat* "server" "runtime"))
- 180)
- (random 360)))) ;; under one hour running time +/- 180
- ))
- #f))
- srvlst)
- (lambda (a b)
- (< (list-ref a 3)
- (list-ref b 3))))))
- (if (> (length slst) nums)
- (take slst nums)
- slst)))
-
-(define (server:get-first-best areapath)
- (let ((srvrs (server:get-best (server:get-list areapath))))
- (if (and srvrs
- (not (null? srvrs)))
- (car srvrs)
- #f)))
-
-(define (server:get-rand-best areapath)
- (let ((srvrs (server:get-best (server:get-list areapath))))
- (if (and (list? srvrs)
- (not (null? srvrs)))
- (let* ((len (length srvrs))
- (idx (random len)))
- (list-ref srvrs idx))
- #f)))
+;; ;; given a list of servers get a list of valid servers, i.e. at least
+;; ;; 10 seconds old, has started and is less than 1 hour old and is
+;; ;; active (i.e. mod-time < 10 seconds
+;; ;;
+;; ;; mod-time host port start-time pid
+;; ;;
+;; ;; sort by start-time descending. I.e. get the oldest first. Young servers will thus drop off
+;; ;; and servers should stick around for about two hours or so.
+;; ;;
+;; (define (server:get-best srvlst)
+;; (let* ((nums (server:get-num-servers))
+;; (now (current-seconds))
+;; (slst (sort
+;; (filter (lambda (rec)
+;; (if (and (list? rec)
+;; (> (length rec) 2))
+;; (let ((start-time (list-ref rec 3))
+;; (mod-time (list-ref rec 0)))
+;; ;; (print "start-time: " start-time " mod-time: " mod-time)
+;; (and start-time mod-time
+;; (> (- now start-time) 0) ;; been running at least 0 seconds
+;; (< (- now mod-time) 16) ;; still alive - file touched in last 16 seconds
+;; (or (not (configf:lookup *configdat* "server" "runtime")) ;; skip if not set
+;; (< (- now start-time)
+;; (+ (- (string->number (configf:lookup *configdat* "server" "runtime"))
+;; 180)
+;; (random 360)))) ;; under one hour running time +/- 180
+;; ))
+;; #f))
+;; srvlst)
+;; (lambda (a b)
+;; (< (list-ref a 3)
+;; (list-ref b 3))))))
+;; (if (> (length slst) nums)
+;; (take slst nums)
+;; slst)))
+
+;; ;; switch from server:get-list to server:get-servers-info
+;; ;;
+;; (define (server:get-first-best areapath)
+;; (let ((srvrs (server:get-best (server:get-list areapath))))
+;; (if (and srvrs
+;; (not (null? srvrs)))
+;; (car srvrs)
+;; #f)))
+;;
+;; (define (server:get-rand-best areapath)
+;; (let ((srvrs (server:get-best (server:get-list areapath))))
+;; (if (and (list? srvrs)
+;; (not (null? srvrs)))
+;; (let* ((len (length srvrs))
+;; (idx (random len)))
+;; (list-ref srvrs idx))
+;; #f)))
(define (server:record->id servr)
(handle-exceptions
exn
(begin
(debug:print-info 0 *default-log-port* "Unable to get server id from " servr ", exn=" exn)
#f)
- (match-let (((mod-time host port start-time server-id pid)
+ (match-let (((host port start-time server-id pid)
servr))
(if server-id
server-id
#f))))
@@ -360,28 +376,22 @@
(handle-exceptions
exn
(begin
(debug:print-info 0 *default-log-port* "Unable to get server url from " servr ", exn=" exn)
#f)
- (match-let (((mod-time host port start-time server-id pid)
+ (match-let (((host port start-time server-id pid)
servr))
(if (and host port)
(conc host ":" port)
#f))))
-(define (server:get-client-signature) ;; BB> why is this proc named "get-"? it returns nothing -- set! has not return value.
- (if *my-client-signature* *my-client-signature*
- (let ((sig (server:mk-signature)))
- (set! *my-client-signature* sig)
- *my-client-signature*)))
-
;; if server-start-last exists, and wasn't old enough, wait + 1, then call this function recursively until it is old enough.
;; if it is old enough, overwrite it and wait 0.25 seconds.
;; if it then has the wrong server key, wait + 1 and call this function recursively.
;;
-(define (server:wait-for-server-start-last-flag areapath)
+#;(define (server:wait-for-server-start-last-flag areapath)
(let* ((start-flag (conc areapath "/logs/server-start-last"))
;;; THIS INTERACTS WITH [server] timeout. Suggest using 0.1 or above for timeout (6 seconds)
(idletime (configf:lookup-number *configdat* "server" "idletime" default: 4))
(server-key (conc (get-host-name) "-" (current-process-id))))
(if (file-exists? start-flag)
@@ -405,20 +415,114 @@
(seconds->time-string fmodtime) ", time since last start: " delta ", required idletime: " idletime ", gating reason:" (if old-enough "another job started a server" "too soon to start another server"))
(thread-sleep! ( + 1 idletime))
(server:wait-for-server-start-last-flag areapath)))))))
+;; oldest server alive determines host then choose random of youngest
+;; five servers on that host
+;;
+(define (server:get-servers-info areapath)
+ (let* ((servinfodir (conc *toppath*"/.servinfo")))
+ (if (not (file-exists? servinfodir))
+ (create-directory servinfodir))
+ (let* ((allfiles (glob (conc servinfodir"/*")))
+ (res (make-hash-table)))
+ (for-each
+ (lambda (f)
+ (let* ((hostport (pathname-strip-directory f))
+ (serverdat (server:logf-get-start-info f)))
+ (match serverdat
+ ((host port start server-id pid)
+ (if (and host port start server-id pid)
+ (hash-table-set! res hostport serverdat)
+ (debug:print-info 0 *default-log-port* "bad server info for "f": "serverdat)))
+ (else
+ (debug:print-info 0 *default-log-port* "bad server info for "f": "serverdat)))))
+ allfiles)
+ res)))
+
+;; oldest server alive determines host then choose random of youngest
+;; five servers on that host
+;;
+;; mode:
+;; best - get best server (random of newest five)
+;; home - get home host based on oldest server
+;; info - print info
+(define (server:choose-server areapath #!optional (mode 'best))
+ ;; age is current-starttime
+ ;; find oldest alive
+ ;; 1. sort by age ascending and ping until good
+ ;; find alive rand from youngest
+ ;; 1. sort by age descending
+ ;; 2. take five
+ ;; 3. check alive, discard if not and repeat
+ (let* ((serversdat (server:get-servers-info areapath))
+ (servkeys (hash-table-keys serversdat))
+ (by-time-asc (if (not (null? servkeys))
+ (sort servkeys ;; list of "host:port"
+ (lambda (a b)
+ (>= (list-ref (hash-table-ref serversdat a) 2)
+ (list-ref (hash-table-ref serversdat b) 2))))
+ '())))
+ (if (not (null? by-time-asc))
+ (let* ((oldest (last by-time-asc))
+ (oldest-dat (hash-table-ref serversdat oldest))
+ (host (list-ref oldest-dat 0))
+ (all-valid (filter (lambda (x)
+ (equal? host (list-ref (hash-table-ref serversdat x) 0)))
+ by-time-asc))
+ (best-five (lambda ()
+ (if (> (length all-valid) 5)
+ (take all-valid 5)
+ all-valid)))
+ (names->dats (lambda (names)
+ (map (lambda (x)
+ (hash-table-ref serversdat x))
+ names)))
+ (am-home? (lambda ()
+ (let* ((currhost (get-host-name))
+ (bestadrs (server:get-best-guess-address currhost)))
+ (or (equal? host currhost)
+ (equal? host bestadrs))))))
+ (case mode
+ ((info)
+ (print "oldest: "oldest-dat", selected host: "host", all-valid: "all-valid)
+ (print "youngest: "(hash-table-ref serversdat (car all-valid))))
+ ((home) host)
+ ((homehost) (cons host (am-home?))) ;; shut up old code
+ ((home?) (am-home?))
+ ((best-five)(names->dats (best-five)))
+ ((all-valid)(names->dats all-valid))
+ ((best) (let* ((best-five (best-five))
+ (len (length best-five)))
+ (hash-table-ref serversdat (list-ref best-five (random len)))))
+ ((count)(length all-valid))
+ (else
+ (debug:print 0 *default-log-port* "ERROR: invalid command "mode)
+ #f)))
+ (begin
+ (server:run areapath)
+ (thread-sleep! 3)
+ (case mode
+ ((homehost) (cons #f #f))
+ (else #f))))))
-
+;; would like to eventually get rid of this
+;;
+(define (common:on-homehost?)
+ (server:choose-server *toppath* 'home?))
+
;; kind start up of server, wait before allowing another server for a given
;; area to be launched
;;
(define (server:kind-run areapath)
;; look for $MT_RUN_AREA_HOME/logs/server-start-last
;; and wait for it to be at least seconds old
- (server:wait-for-server-start-last-flag areapath)
- (if (not (server:check-if-running areapath)) ;; why try if there is already a server running?
+ ;; (server:wait-for-server-start-last-flag areapath)
+ (if (< (server:choose-server areapath 'count) 10)
+ (server:run areapath))
+ #;(if (not (server:check-if-running areapath)) ;; why try if there is already a server running?
(let* ((lock-file (conc areapath "/logs/server-start.lock")))
(let* ((start-flag (conc areapath "/logs/server-start-last")))
(common:simple-file-lock-and-wait lock-file expire-time: 25)
(debug:print-info 2 *default-log-port* "server:kind-run: touching " start-flag)
(system (conc "touch " start-flag)) ;; lazy but safe
@@ -434,35 +538,33 @@
(let loop ((server-info (server:check-if-running areapath))
(try-num 0))
(if (or server-info
(> (current-seconds) give-up-time)) ;; server-url will be #f if no server available.
(server:record->url server-info)
- (let ((num-ok (length (server:get-best (server:get-list areapath)))))
+ (let ((num-ok (length (server:choose-server areapath 'all-valid))))
(if (and (> try-num 0) ;; first time through simply wait a little while then try again
(< num-ok 1)) ;; if there are no decent candidates for servers then try starting a new one
- (server:kind-run areapath))
+ (server:run areapath))
(thread-sleep! 5)
(loop (server:check-if-running areapath)
(+ try-num 1)))))))
-(define server:try-running server:run) ;; there is no more per-run servers ;; REMOVE ME. BUG.
-
(define (server:get-num-servers #!key (numservers 2))
(let ((ns (string->number
(or (configf:lookup *configdat* "server" "numservers") "notanumber"))))
(or ns numservers)))
;; no longer care if multiple servers are started by accident. older servers will drop off in time.
;;
(define (server:check-if-running areapath) ;; #!key (numservers "2"))
(let* ((ns (server:get-num-servers)) ;; get the setting the for maximum number of servers allowed
- (servers (server:get-best (server:get-list areapath))))
+ (servers (server:choose-server areapath 'best-five))) ;; (server:get-best (server:get-list areapath))))
(if (or (and servers
(null? servers))
- (not servers)
- (and (list? servers)
- (< (length servers) (+ 1 (random ns))))) ;; somewhere between 1 and numservers
+ (not servers))
+ ;; (and (list? servers)
+ ;; (< (length servers) (+ 1 (random ns))))) ;; somewhere between 1 and numservers
#f
(let loop ((hed (car servers))
(tal (cdr servers)))
(let ((res (server:check-server hed)))
(if res
@@ -473,15 +575,12 @@
;; ping the given server
;;
(define (server:check-server server-record)
(let* ((server-url (server:record->url server-record))
- (server-id (server:record->id server-record))
- (res (case *transport-type*
- ((http)(server:ping server-url server-id))
- ;; ((nmsg)(nmsg-transport:ping (tasks:hostinfo-get-interface server)
- )))
+ (server-id (server:record->id server-record))
+ (res (server:ping server-url server-id)))
(if res
server-url
#f)))
(define (server:kill servr)
@@ -569,11 +668,11 @@
(define (server:expiration-timeout)
(let ((tmo (configf:lookup *configdat* "server" "timeout")))
(if (and (string? tmo)
(common:hms-string->seconds tmo)) ;; BUG: hms-string->seconds is broken, if given "10" returns 0. Also, it doesn't belong in this logic unless the string->number is changed below
(* 3600 (string->number tmo))
- 1200)))
+ 60)))
(define (server:get-best-guess-address hostname)
(let ((res #f))
(for-each
(lambda (adr)
ADDED treemod.scm
Index: treemod.scm
==================================================================
--- /dev/null
+++ treemod.scm
@@ -0,0 +1,167 @@
+;;======================================================================
+;; Copyright 2006-2013, Matthew Welland.
+;;
+;; This file is part of Megatest.
+;;
+;; Megatest is free software: you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation, either version 3 of the License, or
+;; (at your option) any later version.
+;;
+;; Megatest is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with Megatest. If not, see .
+;;
+;;======================================================================
+
+(declare (unit treemod))
+;; (declare (uses margs))
+;; (declare (uses launch))
+;; ;; (declare (uses megatest-version))
+;; (declare (uses gutils))
+;; (declare (uses db))
+;; (declare (uses server))
+;; ;; (declare (uses synchash))
+;; (declare (uses dcommon))
+;;
+;; (include "megatest-version.scm")
+;; (include "common_records.scm")
+;; (include "db_records.scm")
+;; (include "key_records.scm")
+
+(module treemod
+ *
+
+(import
+ scheme
+ chicken
+ data-structures
+
+ (prefix iup iup:)
+ canvas-draw
+ iup
+ regex
+ srfi-1
+ srfi-13
+ format
+ )
+
+;;======================================================================
+;; T R E E S T U F F
+;;======================================================================
+
+;; path is a list of nodes, each the child of the previous
+;; this routine returns the id so another node can be added
+;; either as a leaf or as a branch
+;;
+;; BUG: This needs a stop sensor for when a branch is exhausted
+;;
+(define (tree:find-node obj path)
+ ;; start at the base of the tree
+ (if (null? path)
+ #f ;; or 0 ????
+ (let loop ((hed (car path))
+ (tal (cdr path))
+ (depth 0)
+ (nodenum 0))
+ ;; nodes in iup tree are 100% sequential so iterate over nodenum
+ (if (iup:attribute obj (conc "DEPTH" nodenum)) ;; end when no more nodes
+ (let ((node-depth (string->number (iup:attribute obj (conc "DEPTH" nodenum))))
+ (node-title (iup:attribute obj (conc "TITLE" nodenum))))
+ (if (and (equal? depth node-depth)
+ (equal? hed node-title)) ;; yep, this is the one!
+ (if (null? tal) ;; end of the line
+ nodenum
+ (loop (car tal)(cdr tal)(+ depth 1)(+ 1 nodenum)))
+ ;; this is the case where we found part of the hierarchy but not
+ ;; all of it, i.e. the node-depth went from deep to less deep
+ (if (> depth node-depth) ;; (+ 1 node-depth))
+ #f
+ (loop hed tal depth (+ nodenum 1)))))
+ #f))))
+
+;; top is the top node name zeroeth node VALUE=0
+(define (tree:add-node obj top nodelst #!key (userdata #f))
+ (let ((curr-top (iup:attribute obj "TITLE0")))
+ (if (or (not (string? curr-top))
+ (string-null? curr-top)
+ (string-match "^\\s*$" curr-top))
+ (iup:attribute-set! obj "ADDBRANCH0" top))
+
+
+
+ (cond
+ ((not (equal? top (iup:attribute obj "TITLE0")))
+ (print "ERROR: top name " top " doesn't match " (iup:attribute obj "TITLE0")))
+ ((null? nodelst))
+ (else
+ (let loop ((hed (car nodelst))
+ (tal (cdr nodelst))
+ (depth 1)
+ (pathl (list top)))
+ ;; Because the tree dialog changes node numbers when
+ ;; nodes are added or removed we must look up nodes
+ ;; each and every time. 0 is the top node so default
+ ;; to that.
+ (let* ((newpath (append pathl (list hed)))
+ (parentnode (tree:find-node obj pathl))
+ (nodenum (tree:find-node obj newpath)))
+ ;; Add the branch under lastnode if not found
+ (if (not nodenum)
+ (begin
+ (iup:attribute-set! obj (conc "ADDBRANCH" parentnode) hed)
+ ;; ERROR? ADDING DATA TO PARENT, DONT WE WANT IT ON CREATED NODE?
+ (if userdata
+ (iup:attribute-set! obj (conc "USERDATA" parentnode) userdata))
+ (if (null? tal)
+ #t
+ ;; reset to top
+ (loop (car nodelst)(cdr nodelst) 1 (list top))))
+ (if (null? tal) ;; if null here then this path has already been added
+ #t
+ (loop (car tal)(cdr tal)(+ depth 1) newpath)))))))))
+
+(define (tree:node->path obj nodenum)
+ (let loop ((currnode 0)
+ (path '()))
+ (let* ((node-depth (string->number (iup:attribute obj (conc "DEPTH" currnode))))
+ (node-title (iup:attribute obj (conc "TITLE" currnode)))
+ (trimpath (if (and (not (null? path))
+ (> (length path) node-depth))
+ (take path node-depth)
+ path))
+ (newpath (append trimpath (list node-title))))
+ (if (>= currnode nodenum)
+ newpath
+ (loop (+ currnode 1)
+ newpath)))))
+
+(define (tree:delete-node obj top node-path) ;; node-path is a list of strings
+ (let ((id (tree:find-node obj (cons top node-path))))
+ (print "Found node to remove " id " for path " top " " node-path)
+ (iup:attribute-set! obj (conc "DELNODE" id) "SELECTED")))
+
+)
+#|
+
+ (let* ((tb (iup:treebox
+ #:value 0
+ #:name "Runs"
+ #:expand "YES"
+ #:addexpanded "NO"
+ #:selection-cb
+ (lambda (obj id state)
+ ;; (print "obj: " obj ", id: " id ", state: " state)
+ (let* ((run-path (tree:node->path obj id))
+ (run-id (tree-path->run-id (cdr run-path))))
+ (if run-id
+ (begin
+ (dboard:data-curr-run-id-set! data run-id)
+ (dashboard:update-run-summary-tab)))
+ ;; (print "path: " (tree:node->path obj id) " run-id: " run-id)
+ ))))
+|#