Index: Makefile ================================================================== --- Makefile +++ Makefile @@ -28,16 +28,18 @@ ezsteps.scm lock-queue.scm rmt.scm api.scm \ subrun.scm portlogger.scm archive.scm env.scm \ diff-report.scm cgisetup/models/pgdb.scm # module source files -MSRCFILES = dbmod.scm dbfile.scm debugprint.scm mtargs.scm commonmod.scm +MSRCFILES = dbmod.scm dbfile.scm debugprint.scm mtargs.scm commonmod.scm \ + treemod.scm -all : $(PREFIX)/bin/.$(ARCHSTR) mtest dboard mtut tcmt +all : $(PREFIX)/bin/.$(ARCHSTR) mtest ndboard dboard mtut tcmt # dbmod.import.o is just a hack here -mofiles/dbfile.o : mofiles/debugprint.o dbmod.import.o +mofiles/dbfile.o : mofiles/debugprint.o mofiles/commonmod.o dbmod.import.o +db.o : dbmod.import.o mofiles/debugprint.o : mofiles/mtargs.o # ftail.scm rmtmod.scm commonmod.scm removed # MSRCFILES = ducttape-lib.scm pkts.scm stml2.scm cookie.scm mutils.scm \ # mtargs.scm commonmod.scm dbmod.scm adjutant.scm ulex.scm \ @@ -86,18 +88,21 @@ # ARCHSTR=$(shell bash -c "echo \$$MACHTYPE") PNGFILES = $(shell cd docs/manual;ls *png) -mtest: $(OFILES) readline-fix.scm megatest.o $(MOFILES) $(MOIMPFILES) megatest-version.scm +mtest: $(OFILES) readline-fix.scm $(MOFILES) $(MOIMPFILES) megatest.o megatest-version.scm csc $(CSCOPTS) $(OFILES) $(MOFILES) $(MOIMPFILES) megatest.o -o mtest showmtesthash: @echo $(MTESTHASH) dboard : $(OFILES) $(GOFILES) dashboard.scm $(MOFILES) $(MOIMPFILES) megatest-version.scm megatest-fossil-hash.scm csc $(CSCOPTS) $(OFILES) dashboard.scm $(GOFILES) $(MOFILES) $(MOIMPFILES) -o dboard + +ndboard : $(OFILES) $(GOFILES) newdashboard.scm $(MOFILES) $(MOIMPFILES) megatest-version.scm megatest-fossil-hash.scm + csc $(CSCOPTS) $(OFILES) newdashboard.scm $(GOFILES) $(MOFILES) $(MOIMPFILES) -o ndboard mtut: $(OFILES) $(MOFILES) megatest-fossil-hash.scm mtut.scm megatest-version.scm csc $(CSCOPTS) $(OFILES) $(MOFILES) mtut.scm -o mtut # include makefile.inc @@ -132,11 +137,11 @@ ezsteps.o # mofiles/rmtmod.o \ # mofiles/commonmod.o \ -tcmt : $(TCMTOBJS) tcmt.scm megatest-version.scm +tcmt : $(TCMTOBJS) tcmt.scm megatest-version.scm $(MOFILES) $(MOIMPFILES) csc $(CSCOPTS) $(TCMTOBJS) $(MOFILES) $(MOIMPFILES) tcmt.scm -o tcmt # install documentation to $(PREFIX)/docs # DOES NOT REBUILD DOCS # @@ -169,17 +174,20 @@ monitor.o dashboard.o archive.o megatest.o : db_records.scm megatest-fossil-hash.scm tests.o runs.o dashboard.o dashboard-tests.o dashboard-context-menu.o dashboard-main.o : run_records.scm db.o ezsteps.o keys.o launch.o megatest.o monitor.o runs-for-ref.o runs.o tests.o : key_records.scm -db.o api.o : mofiles/dbmod.o mofiles/dbfile.o mofiles/debugprint.o mofiles/commonmod.o +db.o api.o : mofiles/dbmod.o mofiles/dbfile.o mofiles/debugprint.o mofiles/commonmod.o dbmod.import.o tests.o tasks.o dashboard-tasks.o : task_records.scm runs.o : test_records.scm -megatest.o : megatest-fossil-hash.scm megatest-version.scm +mofiles-made : $(MOFILES) + make $(MOIMPFILES) + +megatest.o : mofiles-made megatest-fossil-hash.scm megatest-version.scm $(MOFILES) $(MOIMPFILES) rmt.scm client.scm common.scm configf.scm dashboard-guimonitor.scm dashboard-tests.scm dashboard.scm db.scm dcommon.scm ezsteps.scm http-transport.scm index-tree.scm items.scm keys.scm launch.scm megatest.scm monitor.scm mt.scm newdashboard.scm runconfig.scm runs.scm server.scm tdb.scm tests.scm tree.scm : common_records.scm megatest-version.scm common_records.scm : altdb.scm @@ -223,121 +231,20 @@ @echo Installing to PREFIX=$(PREFIX) $(INSTALL) mtest $(PREFIX)/bin/.$(ARCHSTR)/mtest utils/mk_wrapper $(PREFIX) mtest $(PREFIX)/bin/megatest chmod a+x $(PREFIX)/bin/megatest -$(PREFIX)/bin/.$(ARCHSTR)/ndboard : ndboard - $(INSTALL) ndboard $(PREFIX)/bin/.$(ARCHSTR)/ndboard - -$(PREFIX)/bin/newdashboard : $(PREFIX)/bin/.$(ARCHSTR)/ndboard utils/mk_wrapper - utils/mk_wrapper $(PREFIX) ndboard $(PREFIX)/bin/newdashboard - chmod a+x $(PREFIX)/bin/newdashboard - -# mtutil - -$(PREFIX)/bin/.$(ARCHSTR)/mtut : mtut - $(INSTALL) mtut $(PREFIX)/bin/.$(ARCHSTR)/mtut - -install-mtut : mtut - $(INSTALL) mtut $(PREFIX)/bin/mtut - -$(PREFIX)/bin/mtutil : $(PREFIX)/bin/.$(ARCHSTR)/mtut utils/mk_wrapper - utils/mk_wrapper $(PREFIX) mtut $(PREFIX)/bin/mtutil - chmod a+x $(PREFIX)/bin/mtutil - -# mtexec - -mtexec: $(OFILES) $(MOFILES) megatest-fossil-hash.scm mtexec.scm - csc $(CSCOPTS) $(OFILES) $(MOFILES) mtexec.scm -o mtexec - -$(PREFIX)/bin/.$(ARCHSTR)/mtexec : mtexec - $(INSTALL) mtexec $(PREFIX)/bin/.$(ARCHSTR)/mtexec - -$(PREFIX)/bin/mtexec : $(PREFIX)/bin/.$(ARCHSTR)/mtexec utils/mk_wrapper - utils/mk_wrapper $(PREFIX) mtexec $(PREFIX)/bin/mtexec - chmod a+x $(PREFIX)/bin/mtexec - -# tcmt - -$(PREFIX)/bin/.$(ARCHSTR)/tcmt : tcmt - $(INSTALL) tcmt $(PREFIX)/bin/.$(ARCHSTR)/tcmt - -$(PREFIX)/bin/tcmt : $(PREFIX)/bin/.$(ARCHSTR)/tcmt utils/mk_wrapper - utils/mk_wrapper $(PREFIX) tcmt $(PREFIX)/bin/tcmt - chmod a+x $(PREFIX)/bin/tcmt - -$(PREFIX)/bin/mt_laststep : utils/mt_laststep - $(INSTALL) $< $@ - chmod a+x $@ - -$(PREFIX)/bin/mt_runstep : utils/mt_runstep - $(INSTALL) $< $@ - chmod a+x $@ - -$(PREFIX)/bin/serialize-env: serialize-env.scm - csc serialize-env.scm - $(INSTALL) serialize-env $@ - -$(PREFIX)/bin/mt_ezstep : utils/mt_ezstep - $(INSTALL) $< $@ - chmod a+x $@ - -$(PREFIX)/bin/mt_xterm : utils/mt_xterm - $(INSTALL) $< $@ - chmod a+x $@ - -$(PREFIX)/bin/nbfake : utils/nbfake - $(INSTALL) $< $@ - chmod a+x $@ - -$(PREFIX)/bin/remrun : utils/remrun - $(INSTALL) $< $@ - chmod a+x $@ - -$(PREFIX)/bin/viewscreen : utils/viewscreen - $(INSTALL) $< $@ - chmod a+x $@ - -$(PREFIX)/bin/nbfind : utils/nbfind - $(INSTALL) $< $@ - chmod a+x $@ - -$(PREFIX)/bin/mtrunner : utils/mtrunner - $(INSTALL) $< $@ - chmod a+x $@ - -$(PREFIX)/bin/mt-old-to-new.sh : utils/mt-old-to-new.sh - $(INSTALL) $< $@ - chmod a+x $@ - -$(PREFIX)/bin/mt-new-to-old.sh : utils/mt-new-to-old.sh - $(INSTALL) $< $@ - chmod a+x $@ - - -deploytarg/nbfake : utils/nbfake - $(INSTALL) $< $@ - chmod a+x $@ - -deploytarg/viewscreen : utils/viewscreen - $(INSTALL) $< $@ - chmod a+x $@ - -deploytarg/nbfind : utils/nbfind - $(INSTALL) $< $@ - chmod a+x $@ - -$(PREFIX)/bin/mtest-reaper: helpers/mtest-reaper.scm helpers/ducttape-lib.scm helpers/inteldate.scm helpers/mimetypes.scm - make -C helpers $@ PREFIX=$(PREFIX) INSTALL=$(INSTALL) ARCHSTR=$(ARCHSTR) - -mtest-reaper: $(PREFIX)/bin/mtest-reaper - -# install dashboard as dboard so wrapper script can be called dashboard $(PREFIX)/bin/.$(ARCHSTR)/dboard : dboard $(FILES) utils/mk_wrapper utils/mk_wrapper $(PREFIX) dboard $(PREFIX)/bin/dashboard chmod a+x $(PREFIX)/bin/dashboard $(INSTALL) dboard $(PREFIX)/bin/.$(ARCHSTR)/dboard + +# install dashboard as dboard so wrapper script can be called dashboard +$(PREFIX)/bin/.$(ARCHSTR)/ndboard : ndboard $(FILES) utils/mk_wrapper + utils/mk_wrapper $(PREFIX) ndboard $(PREFIX)/bin/newdashboard + chmod a+x $(PREFIX)/bin/newdashboard + $(INSTALL) ndboard $(PREFIX)/bin/.$(ARCHSTR)/ndboard $(PREFIX)/bin/.$(ARCHSTR)/lib/libpangox-1.0.so : lib/libpangox-1.0.so if [[ $(ARCHSTR) == 12.5 ]]; then \ mkdir -p $(PREFIX)/bin/.$(ARCHSTR)/lib; \ $(INSTALL) lib/libpangox-1.0.so $(PREFIX)/bin/.$(ARCHSTR)/lib/libpangox-1.0.so; \ @@ -354,11 +261,12 @@ mkdir -p $(PREFIX)/bin/.$(ARCHSTR)/lib; \ $(INSTALL) lib/libxcb-xlib.so.0 $(PREFIX)/bin/.$(ARCHSTR)/lib/libxcb-xlib.so.0; \ fi install : $(PREFIX)/bin/.$(ARCHSTR) $(PREFIX)/bin/.$(ARCHSTR)/mtest $(PREFIX)/bin/megatest \ - $(PREFIX)/bin/.$(ARCHSTR)/dboard $(PREFIX)/bin/dashboard $(HELPERS) $(PREFIX)/bin/nbfake \ + $(PREFIX)/bin/.$(ARCHSTR)/dboard $(PREFIX)/bin/.$(ARCHSTR)/ndboard $(PREFIX)/bin/dashboard \ + $(HELPERS) $(PREFIX)/bin/nbfake \ $(PREFIX)/bin/.$(ARCHSTR)/mtexec $(PREFIX)/bin/mtexec $(PREFIX)/bin/serialize-env \ $(PREFIX)/bin/nbfind $(PREFIX)/bin/mtrunner $(PREFIX)/bin/viewscreen $(PREFIX)/bin/mt_xterm \ $(PREFIX)/bin/mt-old-to-new.sh $(PREFIX)/bin/mt-new-to-old.sh \ $(PREFIX)/share/docs/megatest_manual.html $(PREFIX)/bin/remrun \ $(PREFIX)/share/docs/megatest_manual.html $(PREFIX)/bin/remrun $(PREFIX)/bin/mtutil \ @@ -365,11 +273,10 @@ $(PREFIX)/bin/tcmt $(PREFIX)/share/db/mt-pg.sql \ $(PREFIX)/share/js/jquery-3.1.0.slim.min.js \ $(PREFIX)/bin/.$(ARCHSTR)/lib/libpangox-1.0.so \ $(PREFIX)/bin/.$(ARCHSTR)/lib/libpangox-1.0.so.0 \ $(PREFIX)/bin/.$(ARCHSTR)/lib/libxcb-xlib.so.0 -# $(PREFIX)/bin/.$(ARCHSTR)/ndboard # $(PREFIX)/bin/newdashboard $(PREFIX)/bin/.$(ARCHSTR) : mkdir -p $(PREFIX)/bin/.$(ARCHSTR) @@ -390,11 +297,11 @@ $(PREFIX)/megatest $(PREFIX)/dashboard mtest mtutil mtut \ tcmt readline-fix.scm serialize-env dboard *.o \ megatest-fossil-hash.* altdb.scm mofiles/*.o \ mofiles/*.o vg.o cookie.o dashboard-main.o \ ducttape-lib.o ftail.o mutils.o pkts.o rmtmod.o stml2.o \ - tcmt.o *.import.scm *.import.o + tcmt.o *.import.scm *.import.o ndboard rm -f $(OFILES) $(GOFILES) $(MOFILES) $(TCMTOBJS) \ $(PREFIX)/megatest $(PREFIX)/dashboard mtest mtutil mtut \ tcmt ftail.import.scm readline-fix.scm serialize-env \ dboard dboard.o megatest.o dashboard.o \ megatest-fossil-hash.* altdb.scm mofiles/*.o vg.o Index: api.scm ================================================================== --- api.scm +++ api.scm @@ -429,8 +429,8 @@ ;; (boolean? res)) ;; res ;; (list "ERROR, not string, list, number or boolean" 1 cmd params res))))) (db:obj->string res transport: 'http))) (begin - (debug:print 0 *default-log-port* "Server refused to process request. Sever id mismatch. recived " key " expected: " *server-id* ".\nOther arguments recived: cmd=" cmd " params = " params) + (debug:print 0 *default-log-port* "Server refused to process request. Server id mismatch. recived " key " expected: " *server-id* ".\nOther arguments recived: cmd=" cmd " params = " params) (db:obj->string (conc "Server refused to process request server-id mismatch: " key ", " *server-id*) transport: 'http))))) Index: archive.scm ================================================================== --- archive.scm +++ archive.scm @@ -346,11 +346,11 @@ (rsync-exe (or (configf:lookup *configdat* "archive" "rsync") "rsync")) (print-prefix "Running: ") (archive-info (archive:allocate-new-archive-block blockid-cache *toppath* tsname min-space target-patt run-patt "megatest-db")) (archive-dir (if archive-info (cdr archive-info) #f)) (archive-id (if archive-info (car archive-info) -1)) - (home-host (common:get-homehost)) + (home-host (server:choose-server *toppath* 'homehost)) (archive-time (seconds->std-time-str (current-seconds))) (archive-staging-db (conc *toppath* "/.db-snapshot/archive_" archive-time)) (tmp-db-path (conc (common:get-db-tmp-area) "/megatest.db")) (dbfile (conc archive-staging-db "/megatest.db"))) (create-directory archive-staging-db #t) Index: client.scm ================================================================== --- client.scm +++ client.scm @@ -44,24 +44,12 @@ #;(define (client:logout serverdat) (let ((ok (and (socket? serverdat) (cdb:logout serverdat *toppath* (client:get-signature))))) ok)) -#;(define (client:connect iface port) - (http-transport:client-connect iface port) - #;(case (server:get-transport) - ((rpc) (rpc:client-connect iface port)) - ((http) (http:client-connect iface port)) - ((zmq) (zmq:client-connect iface port)) - (else (rpc:client-connect iface port)))) - (define (client:setup areapath #!key (remaining-tries 100) (failed-connects 0)) - (client:setup-http areapath remaining-tries: remaining-tries failed-connects: failed-connects) - #;(case (server:get-transport) - ((rpc) (rpc-transport:client-setup remaining-tries: remaining-tries failed-connects: failed-connects)) ;;(client:setup-rpc run-id)) - ((http)(client:setup-http areapath remaining-tries: remaining-tries failed-connects: failed-connects)) - (else (rpc-transport:client-setup remaining-tries: remaining-tries failed-connects: failed-connects)))) ;; (client:setup-rpc run-id)))) + (client:setup-http areapath remaining-tries: remaining-tries failed-connects: failed-connects)) ;; Do all the connection work, look up the transport type and set up the ;; connection if required. ;; ;; There are two scenarios. @@ -89,53 +77,52 @@ (exit 1)) ;; ;; Alternatively here, we can get the list of candidate servers and work our way ;; through them searching for a good one. ;; - (let* ((server-dat (server:get-rand-best areapath)) ;; (server:get-first-best areapath)) + (let* ((server-dat (server:choose-server areapath 'best)) (runremote (or area-dat *runremote*))) (if (not server-dat) ;; no server found (client:setup-http-baby areapath remaining-tries: (- remaining-tries 1)) - (let ((host (cadr server-dat)) - (port (caddr server-dat)) - (server-id (caddr (cddr server-dat)))) - (debug:print-info 4 *default-log-port* "client:setup server-dat=" server-dat ", remaining-tries=" remaining-tries) - (if (and (not area-dat) - (not *runremote*)) - (begin - (set! *runremote* (make-remote)) - (let* ((server-info (remote-server-info *runremote*))) - (if server-info - (begin - (remote-server-url-set! *runremote* (server:record->url server-info)) - (remote-server-id-set! *runremote* (server:record->id server-info))))))) - (if (and host port server-id) - (let* ((start-res (case *transport-type* - ((http)(http-transport:client-connect host port server-id)))) - (ping-res (case *transport-type* - ((http)(rmt:login-no-auto-client-setup start-res))))) - (if (and start-res - ping-res) - (let ((runremote (or area-dat *runremote*))) ;; it might have been generated only a few statements ago - (if runremote - (begin - (remote-conndat-set! runremote start-res) ;; (hash-table-set! runremote run-id start-res) - (debug:print-info 2 *default-log-port* "connected to " (http-transport:server-dat-make-url start-res)) - start-res) - (client:setup-http-baby areapath remaining-tries: (- remaining-tries 1)))) - (begin ;; login failed but have a server record, clean out the record and try again - (debug:print-info 0 *default-log-port* "client:setup, login unsuccessful, will attempt to start server ... start-res=" start-res ", server-dat=" server-dat) ;; had runid. Fixes part of Randy;s ticket 1405717332 - (case *transport-type* - ((http)(http-transport:close-connections))) - (if *runremote* - (remote-conndat-set! runremote #f) ;; (hash-table-delete! runremote run-id) - ) - (thread-sleep! 1) - (client:setup-http-baby areapath remaining-tries: (- remaining-tries 1)) - ))) - (begin ;; no server registered - ;; (server:kind-run areapath) - (server:start-and-wait areapath) - (debug:print-info 0 *default-log-port* "client:setup, no server registered, remaining-tries=" remaining-tries) - (thread-sleep! 1) ;; (+ 5 (random (- 20 remaining-tries)))) ;; give server a little time to start up, randomize a little to avoid start storms. - (client:setup-http-baby areapath remaining-tries: (- remaining-tries 1))))))))) + (match server-dat + ((host port start-time server-id pid) + (debug:print-info 4 *default-log-port* "client:setup server-dat=" server-dat ", remaining-tries=" remaining-tries) + (if (and (not area-dat) + (not *runremote*)) + (begin + (set! *runremote* (make-remote)) + (let* ((server-info (remote-server-info *runremote*))) + (if server-info + (begin + (remote-server-url-set! *runremote* (server:record->url server-info)) + (remote-server-id-set! *runremote* (server:record->id server-info))))))) + (if (and host port server-id) + (let* ((start-res (http-transport:client-connect host port server-id)) + (ping-res (rmt:login-no-auto-client-setup start-res))) + (if (and start-res + ping-res) + (let ((runremote (or area-dat *runremote*))) ;; it might have been generated only a few statements ago + (if runremote + (begin + (remote-conndat-set! runremote start-res) ;; (hash-table-set! runremote run-id start-res) + (debug:print-info 2 *default-log-port* "connected to " (http-transport:server-dat-make-url start-res)) + start-res) + (client:setup-http-baby areapath remaining-tries: (- remaining-tries 1)))) + (begin ;; login failed but have a server record, clean out the record and try again + (debug:print-info 0 *default-log-port* "client:setup, login unsuccessful, will attempt to start server ... start-res=" start-res ", server-dat=" server-dat) ;; had runid. Fixes part of Randy;s ticket 1405717332 + (case *transport-type* + ((http)(http-transport:close-connections))) + (if *runremote* + (remote-conndat-set! runremote #f) ;; (hash-table-delete! runremote run-id) + ) + (thread-sleep! 1) + (client:setup-http-baby areapath remaining-tries: (- remaining-tries 1)) + ))) + (begin ;; no server registered + ;; (server:kind-run areapath) + (server:start-and-wait areapath) + (debug:print-info 0 *default-log-port* "client:setup, no server registered, remaining-tries=" remaining-tries) + (thread-sleep! 1) ;; (+ 5 (random (- 20 remaining-tries)))) ;; give server a little time to start up, randomize a little to avoid start storms. + (client:setup-http-baby areapath remaining-tries: (- remaining-tries 1))))) + (else + (debug:print 0 *default-log-port* "ERROR: malformed server-dat="server-dat))))))) Index: common.scm ================================================================== --- common.scm +++ common.scm @@ -142,10 +142,11 @@ (define *passnum* 0) ;; when running track calls to run-tests or similar ;; (define *alt-log-file* #f) ;; used by -log ;; (define *common:denoise* (make-hash-table)) ;; for low noise printing (define *default-log-port* (current-error-port)) (define *time-zero* (current-seconds)) ;; for the watchdog +(define *on-exit-procs* '()) ;; add procs to this list to be executed on exit (define *default-area-tag* "local") ;; DATABASE ;; (define *dbstruct-dbs* #f) ;; used to cache the dbstruct in db:setup. Goal is to remove this. ;; db stats @@ -315,11 +316,14 @@ (define (common:logpro-exit-code->test-status exit-code) (status-sym->string (common:logpro-exit-code->status-sym exit-code))) (defstruct remote - (hh-dat (common:get-homehost)) ;; homehost record ( addr . hhflag ) + (hh-dat (let ((res (or (server:choose-server *toppath* 'homehost) + (cons #f #f)))) + (assert (pair? res)(conc "FATAL: hh-dat should be a pair, got "res)) + res)) (server-url #f) ;; (server:check-if-running *toppath*) #f)) (server-id #f) (server-info (if *toppath* (server:check-if-running *toppath*) #f)) (last-server-check 0) ;; last time we checked to see if the server was alive (connect-time (current-seconds)) @@ -1305,72 +1309,10 @@ ;;====================================================================== ;; logic for getting homehost. Returns (host . at-home) ;; IF *toppath* is not set, wait up to five seconds trying every two seconds ;; (this is to accomodate the watchdog) ;; -(define (common:get-homehost #!key (trynum 5)) - ;; called often especially at start up. use mutex to eliminate collisions - (mutex-lock! *homehost-mutex*) - (cond - (*home-host* - (mutex-unlock! *homehost-mutex*) - *home-host*) - ((not *toppath*) - (mutex-unlock! *homehost-mutex*) - (launch:setup) ;; safely mutexed now - (if (> trynum 0) - (begin - (thread-sleep! 2) - (common:get-homehost trynum: (- trynum 1))) - #f)) - (else - (let* ((currhost (get-host-name)) - (bestadrs (server:get-best-guess-address currhost)) - ;; first look in config, then look in file .homehost, create it if not found - (homehost (or (configf:lookup *configdat* "server" "homehost" ) - (handle-exceptions - exn - (if (> trynum 0) - (let ((delay-time (* (- 5 trynum) 5))) - (mutex-unlock! *homehost-mutex*) - (debug:print 0 *default-log-port* "ERROR: ["(common:human-time)"] Failed to read .homehost file, delaying " - delay-time " seconds and trying again, message: " ((condition-property-accessor 'exn 'message) exn) - ", exn=" exn) - (thread-sleep! delay-time) - (common:get-homehost trynum: (- trynum 1))) - (begin - (mutex-unlock! *homehost-mutex*) - (debug:print 0 *default-log-port* "ERROR: ["(common:human-time) - "] Failed to read .homehost file after trying five times. Giving up and exiting, message: " - ((condition-property-accessor 'exn 'message) exn)) - (exit 1))) - (let ((hhf (conc *toppath* "/.homehost"))) - (if (common:file-exists? hhf) - (with-input-from-file hhf read-line) - (if (file-write-access? *toppath*) - (begin - (with-output-to-file hhf - (lambda () - (print bestadrs))) - (begin - (mutex-unlock! *homehost-mutex*) - (car (common:get-homehost)))) - #f)))))) - (at-home (or (equal? homehost currhost) - (equal? homehost bestadrs)))) - (set! *home-host* (cons homehost at-home)) - (mutex-unlock! *homehost-mutex*) - *home-host*)))) - -;;====================================================================== -;; am I on the homehost? -;; -(define (common:on-homehost?) - (let ((hh (common:get-homehost))) - (if hh - (cdr hh) - #f))) ;;====================================================================== ;; do we honor the caches of the config files? ;; (define (common:use-cache?) @@ -2049,11 +1991,11 @@ (if (null? tal) #f (loop (car tal)(cdr tal) best-host))))))))) (define (common:wait-for-homehost-load maxnormload msg) (let* ((hh-dat (if (common:on-homehost?) ;; if we are on the homehost then pass in #f so the calls are local. #f - (common:get-homehost))) + (server:choose-server *toppath* 'homehost))) (hh (if hh-dat (car hh-dat) #f))) (common:wait-for-normalized-load maxnormload msg hh))) (define (common:get-num-cpus remote-host) (let* ((actual-host (or remote-host (get-host-name)))) @@ -3344,11 +3286,11 @@ pktsdirs)) ;;====================================================================== ;; use-lt is use linktree "lt" link to find pkts dir (define (common:save-pkt pktalist-in mtconf use-lt #!key (add-only #f)) ;; add-only saves the pkt only if there is a parent already - (if (or add-only + (if (or (not add-only) (hash-table-exists? *pkts-info* 'last-parent)) (let* ((parent (hash-table-ref/default *pkts-info* 'last-parent #f)) (pktalist (if parent (cons `(parent . ,parent) pktalist-in) @@ -3359,10 +3301,11 @@ (let ((pktsdir (or (hash-table-ref/default *pkts-info* 'pkts-dir #f) (let* ((pktsdirs (common:get-pkts-dirs mtconf use-lt)) (pktsdir (car pktsdirs))) ;; assume it is there (hash-table-set! *pkts-info* 'pkts-dir pktsdir) pktsdir)))) + (debug:print 0 *default-log-port* "pktsdir: "pktsdir) (handle-exceptions exn (debug:print-info 0 "failed to write out packet to " pktsdir ", exn=" exn) ;; don't care if this failed for now but MUST FIX - BUG!! (if (not (file-exists? pktsdir)) (create-directory pktsdir #t)) Index: dashboard.scm ================================================================== --- dashboard.scm +++ dashboard.scm @@ -16,10 +16,28 @@ ;; You should have received a copy of the GNU General Public License ;; along with Megatest. If not, see . ;; ;;====================================================================== +(declare (uses common)) +(declare (uses margs)) +(declare (uses keys)) +(declare (uses items)) +(declare (uses db)) +(declare (uses configf)) +(declare (uses process)) +(declare (uses launch)) +(declare (uses runs)) +(declare (uses dashboard-tests)) +(declare (uses treemod)) +(declare (uses dcommon)) +(declare (uses dashboard-context-menu)) +(declare (uses vg)) +(declare (uses subrun)) +(declare (uses mt)) +(declare (uses dbfile)) + (use format) (require-library iup) (import (prefix iup iup:)) @@ -27,28 +45,11 @@ (import canvas-draw-iup) (use ducttape-lib) (use sqlite3 srfi-1 posix regex regex-case srfi-69 typed-records sparse-vectors) ;; defstruct (import (prefix sqlite3 sqlite3:)) (import dbfile) - -(declare (uses common)) -(declare (uses margs)) -(declare (uses keys)) -(declare (uses items)) -(declare (uses db)) -(declare (uses configf)) -(declare (uses process)) -(declare (uses launch)) -(declare (uses runs)) -(declare (uses dashboard-tests)) -(declare (uses tree)) -(declare (uses dcommon)) -(declare (uses dashboard-context-menu)) -(declare (uses vg)) -(declare (uses subrun)) -(declare (uses mt)) -(declare (uses dbfile)) +(import treemod) (include "common_records.scm") (include "db_records.scm") (include "run_records.scm") (include "task_records.scm") @@ -637,11 +638,11 @@ ;; (define (dboard:get-tests-for-run-duplicate tabdat run-id run testnamepatt key-vals) (let* ((start-time (current-seconds)) (access-mode (dboard:tabdat-access-mode tabdat)) (num-to-get (string->number (or (configf:lookup *configdat* "setup" "num-tests-to-get") - "200"))) + "400"))) (states (hash-table-keys (dboard:tabdat-state-ignore-hash tabdat))) (statuses (hash-table-keys (dboard:tabdat-status-ignore-hash tabdat))) (do-not-use-db-file-timestamps #t) ;; (configf:lookup *configdat* "setup" "do-not-use-db-file-timestamps")) ;; this still hosts runs-summary-tab (do-not-use-query-timestamps #t) ;; (configf:lookup *configdat* "setup" "do-not-use-query-timestamps")) ;; this no longer troubles runs-summary-tab (sort-info (get-curr-sort)) @@ -3809,13 +3810,13 @@ (debug:print 0 *default-log-port* "Failed to find megatest.config, exiting") (exit 1) ) ) - (if (not (common:on-homehost?)) + #;(if (not (common:on-homehost?)) (begin - (debug:print 0 *default-log-port* "WARNING: You are starting the dashboard on a machine that is not the homehost:" (common:get-homehost)) + (debug:print 0 *default-log-port* "WARNING: You are starting the dashboard on a machine that is not the homehost:" (server:get-homehost)) (debug:print 0 *default-log-port* "It will be slower.") )) (if (and (common:file-exists? mtdb-path) Index: db.scm ================================================================== --- db.scm +++ db.scm @@ -477,11 +477,11 @@ (data-synced 0) ;; count of changed records (tmp-area (common:get-db-tmp-area)) (old2new (member 'old2new options)) (dejunk (member 'dejunk options)) (killservers (member 'killservers options)) - (servers (server:get-list *toppath*)) + (servers (server:choose-server *toppath* 'all-valid)) ;; (server:get-list *toppath*)) (src-area (if old2new *toppath* tmp-area)) (dest-area (if old2new tmp-area *toppath*)) (dbfiles (if old2new (glob (conc *toppath* "/.megatest/*.db")) (glob (conc tmp-area "/.megatest/*.db")))) (keys (db:get-keys dbstruct)) (sync-durations (make-hash-table))) @@ -4586,31 +4586,41 @@ )) (define (std-exit-procedure) ;;(common:telemetry-log-close) - (on-exit (lambda () 0)) + (on-exit (lambda () 0)) ;; why is this here? ;;(debug:print-info 13 *default-log-port* "std-exit-procedure called; *time-to-exit*="*time-to-exit*) (let ((no-hurry (if *time-to-exit* ;; hurry up #f (begin (set! *time-to-exit* #t) #t)))) (debug:print-info 4 *default-log-port* "starting exit process, finalizing databases.") - (if (and no-hurry (debug:debug-mode 18)) + (if (and no-hurry + (debug:debug-mode 18)) (rmt:print-db-stats)) (let ((th1 (make-thread (lambda () ;; thread for cleaning up, give it five seconds (if *dbstruct-dbs* (db:close-all *dbstruct-dbs*)) ;; one second allocated + (if (list? *on-exit-procs*) + (for-each + (lambda (proc) + (proc)) + *on-exit-procs*)) (if *task-db* (let ((db (cdr *task-db*))) (if (sqlite3:database? db) (begin (sqlite3:interrupt! db) (sqlite3:finalize! db #t) ;; (vector-set! *task-db* 0 #f) (set! *task-db* #f))))) - (http-client#close-all-connections!) + (if (and (not (args:get-arg "-server")) + *runremote*) + (begin + (debug:print-info 0 *default-log-port* "Closing all client connections...") + (http-client#close-all-connections!))) ;; (if (and *runremote* ;; (remote-conndat *runremote*)) ;; (begin ;; (http-client#close-all-connections!))) ;; for http-client (if (not (eq? *default-log-port* (current-error-port))) Index: dbfile.scm ================================================================== --- dbfile.scm +++ dbfile.scm @@ -1015,11 +1015,11 @@ (dbfile:print-err "INFO: "jfile" exists, delaying to reduce database load") (thread-sleep! 0.2))) (if (and use-mutex (common:low-noise-print 120 "over-50-parallel-api-requests")) (dbfile:print-err *api-process-request-count* " parallel api requests being processed in process " - (current-process-id) ", throttling access")) + (current-process-id))) ;; ", throttling access")) (condition-case (begin (if use-mutex (mutex-lock! *db-with-db-mutex*)) (let ((res (apply proc dbdat db params))) ;; the actual call is here. (if use-mutex (mutex-unlock! *db-with-db-mutex*)) Index: dcommon.scm ================================================================== --- dcommon.scm +++ dcommon.scm @@ -704,11 +704,11 @@ #:numlin-visible 5 )) (colnames (list "Id" "MTver" "Pid" "Host" "Interface:OutPort" "RunTime" "State" "RunId")) (updater (lambda () (if (dashboard:monitor-changed? commondat tabdat) - (let ((servers (server:get-list *toppath* limit: 10))) + (let ((servers (server:choose-server *toppath* 'all-valid))) ;; (server:get-list *toppath* limit: 10))) (iup:attribute-set! servers-matrix "NUMLIN" (length servers)) ;; (set! colnum 0) ;; (for-each (lambda (colname) ;; ;; (print "colnum: " colnum " colname: " colname) ;; (iup:attribute-set! servers-matrix (conc "0:" colnum) colname) @@ -716,26 +716,27 @@ ;; colnames) (set! rownum 1) (for-each (lambda (server) (set! colnum 0) - (match-let (((mod-time host port start-time server-id pid) + ;; (("192.168.0.127" 60215 1669088591.0 "c85484f764df7a8550b0224409bd4bcd") + (match-let (((host port start-time server-id pid) server)) - (let* ((uptime (- (current-seconds) mod-time)) + (let* (;; (uptime (- (current-seconds) mod-time)) (runtime (if start-time - (- mod-time start-time) + (- (current-seconds) start-time) 0)) (vals (list "-" ;; (vector-ref server 0) ;; Id "-" ;; (vector-ref server 9) ;; MT-Ver pid ;; (vector-ref server 1) ;; Pid host ;; (vector-ref server 2) ;; Hostname (conc host ":" port) ;; (conc (vector-ref server 3) ":" (vector-ref server 4)) ;; IP:Port (seconds->hr-min-sec runtime) ;; (- (current-seconds) start-time)) ;; (vector-ref server 6))) - (cond - ((< uptime 5) "alive") - ((< uptime 16) "probably alive");; less than 15 seconds since mod, call it alive (vector-ref server 8) ;; State - (else "dead")) + "-" #;(cond + ((< uptime 5) "alive") + ((< uptime 16) "probably alive");; less than 15 seconds since mod, call it alive (vector-ref server 8) ;; State + (else "dead")) "-" ;; (vector-ref server 12) ;; RunId ))) (for-each (lambda (val) (let* ((row-col (conc rownum ":" colnum)) (curr-val (iup:attribute servers-matrix row-col))) @@ -745,11 +746,11 @@ (iup:attribute-set! servers-matrix "FITTOTEXT" (conc "C" colnum)))) (set! colnum (+ 1 colnum)))) vals) (set! rownum (+ rownum 1))) (iup:attribute-set! servers-matrix "REDRAW" "ALL"))) - (sort servers (lambda (a b)(> (car a)(car b)))))))))) + (sort servers (lambda (a b)(> (caddr a)(caddr b)))))))))) (set! colnum 0) (for-each (lambda (colname) (iup:attribute-set! servers-matrix (conc "0:" colnum) colname) (iup:attribute-set! servers-matrix "FITTOTEXT" (conc "C" colnum)) (set! colnum (+ colnum 1))) Index: http-transport.scm ================================================================== --- http-transport.scm +++ http-transport.scm @@ -285,10 +285,11 @@ (begin (debug:print 0 *default-log-port* "WARNING: failure in with-input-from-request to " fullurl ".") (debug:print 0 *default-log-port* " message: " msg ", exn=" exn) (debug:print 0 *default-log-port* " cmd: " cmd " params: " params " key:" (or server-id "thekey")) (debug:print 0 *default-log-port* " call-chain: " call-chain))) + ;; what if another thread is communicating ok? Can't happen due to mutex (set! *runremote* #f) (set! runremote #f) ;; (if runremote ;; (remote-conndat-set! runremote #f)) ;; Killing associated server to allow clean retry.") @@ -391,11 +392,12 @@ (debug:print-error 0 *default-log-port* "call to http-transport:server-dat-update-last-access with non-vector!!")))) ;; ;; connect ;; -(define (http-transport:client-connect iface port server-id) +(define (http-transport:client-connect iface port server-id) + (debug:print-info 0 *default-log-port* "Connecting to client at "iface":"port", with server-id "server-id) (let* ((api-url (conc "http://" iface ":" port "/api")) (api-uri (uri-reference (conc "http://" iface ":" port "/api"))) (api-req (make-request method: 'POST uri: api-uri)) (server-dat (vector iface port api-uri api-url api-req (current-seconds) server-id))) server-dat)) @@ -409,11 +411,12 @@ (define (http-transport:keep-running) ;; if none running or if > 20 seconds since ;; server last used then start shutdown ;; This thread waits for the server to come alive (debug:print-info 0 *default-log-port* "Starting the sync-back, keep alive thread in server") - (let* ((sdat #f) + (let* ((servinfofile #f) + (sdat #f) (no-sync-db (db:open-no-sync-db)) (tmp-area (common:get-db-tmp-area)) (started-file (conc tmp-area "/.server-started")) (server-start-time (current-seconds)) (server-info (let loop ((start-time (current-seconds)) @@ -426,13 +429,30 @@ (set! sdat *server-info*) (mutex-unlock! *heartbeat-mutex*) (if (and sdat (not changed) (> (- (current-seconds) start-time) 2)) - (begin + (let* ((servinfodir (conc *toppath*"/.servinfo")) + (ipaddr (car sdat)) + (port (cadr sdat)) + (servinf (conc servinfodir"/"ipaddr":"port))) + (set! servinfofile servinf) + (if (not (file-exists? servinfodir)) + (create-directory servinfodir #t)) + (with-output-to-file servinf + (lambda () + (let* ((serv-id (server:mk-signature))) + (set! *server-id* serv-id) + (print "SERVER STARTED: "ipaddr":"port" AT "(current-seconds)" server-id: "serv-id" pid: "(current-process-id)) + (print "started: "(seconds->year-week/day-time (current-seconds)))))) + (set! *on-exit-procs* (cons + (lambda () + (delete-file* servinf)) + *on-exit-procs*)) + ;; put data about this server into a simple flat file host.port (debug:print-info 0 *default-log-port* "Received server alive signature") - (common:save-pkt `((action . alive) + #;(common:save-pkt `((action . alive) (T . server) (pid . ,(current-process-id)) (ipaddr . ,(car sdat)) (port . ,(cadr sdat))) *configdat* #t) @@ -439,13 +459,16 @@ sdat) (begin (debug:print-info 0 *default-log-port* "Still waiting, last-sdat=" last-sdat) (sleep 4) (if (> (- (current-seconds) start-time) 120) ;; been waiting for two minutes - (begin + (let* ((ipaddr (car sdat)) + (port (cadr sdat)) + (servinf (conc *toppath*"/.servinfo/"ipaddr":"port))) (debug:print-error 0 *default-log-port* "transport appears to have died, exiting server") - (common:save-pkt `((action . died) + ;; (delete-file* servinf) ;; handled by on-exit, can be removed + #;(common:save-pkt `((action . died) (T . server) (pid . ,(current-process-id)) (ipaddr . ,(car sdat)) (port . ,(cadr sdat)) (msg . "Transport died?")) @@ -475,29 +498,17 @@ (if (not server-going) ;; *dbstruct-dbs* (begin (debug:print 0 *default-log-port* "SERVER: dbprep") (set! *dbstruct-dbs* (db:setup #t)) ;; run-id)) FIXME!!! (set! server-going #t) - (debug:print 0 *default-log-port* "SERVER: running, megatest version: " (common:get-full-version)) ;; NOTE: the server is NOT yet marked as running in the log. We do that in the keep-running routine. - - ;; (thread-start! *watchdog*) - ) + (debug:print 0 *default-log-port* "SERVER: running, megatest version: " (common:get-full-version))) ;; NOTE: the server is NOT yet marked as running in the log. We do that in the keep-running routine. (if (and no-sync-db - (common:low-noise-print 5 "sync-all")) ;; cheesy way to reduce frequency of running sync :) + (common:low-noise-print 10 "sync-all")) ;; cheesy way to reduce frequency of running sync :) (begin - (debug:print 0 *default-log-port* "keep-running calling db:all-db-sync at " (time->string (seconds->local-time) "%H:%M:%S")) - - ;; This is tougher than it seems - have to deal with multiple dbs - ;; (db:process-transaction-queue *dbstruct-dbs*) - - (db:all-db-sync *dbstruct-dbs*) - - ;; (db:do-sync no-sync-db) - ;; (db:run-lock-and-sync *no-sync-db*) - ) - ) - ) + (if (common:low-noise-print 120 "sync-all-print") + (debug:print 0 *default-log-port* "keep-running calling db:all-db-sync at " (time->string (seconds->local-time) "%H:%M:%S"))) + (db:all-db-sync *dbstruct-dbs*)))) ;; when things go wrong we don't want to be doing the various queries too often ;; so we strive to run this stuff only every four seconds or so. (let* ((sync-time (- (current-milliseconds) start-time)) (rem-time (quotient (- 4000 sync-time) 1000))) @@ -518,11 +529,11 @@ (new-port (cadr sdat))) (debug:print-info 0 *default-log-port* "WARNING: interface changed, refreshing iface and port info") (set! iface new-iface) (set! port new-port) (if (not *server-id*) - (set! *server-id* (server:mk-signature))) + (set! *server-id* (server:mk-signature))) (debug:print 0 *default-log-port* "SERVER STARTED: " iface ":" port " AT " (current-seconds) " server-id: " *server-id*) (flush-output *default-log-port*))) ;; Transfer *db-last-access* to last-access to use in checking that we are still alive (mutex-lock! *heartbeat-mutex*) @@ -540,37 +551,32 @@ (begin (debug:print 0 *default-log-port* "Server stats:") (db:print-current-query-stats))) (let* ((hrs-since-start (/ (- (current-seconds) server-start-time) 3600))) (cond - #;((and *server-run* - (> (- (current-seconds) server-start-time) 420)) ;; let's try server replacement - ;; ((adj-proc-load . 0.056875) (adj-core-load . 0.11375) (1m-load . 0.91) (5m-load . 0.77) (15m-load . 1.0) (proc . 16) (core . 8) (phys . 1)) - (let* ((loaddat (common:get-normalized-cpu-load #f)) - (adj-proc-load (alist-ref 'adj-proc-load loaddat)) - (adj-core-load (alist-ref 'adj-core-load loaddat)) - (adj-load (max adj-proc-load adj-core-load))) - (if (< adj-load 2) ;; reduce chance of runaway - (server:run *toppath*)) - (db:all-db-sync *dbstruct-dbs*) - (thread-sleep! 30) - (http-transport:server-shutdown port))) ((and *server-run* (> (+ last-access server-timeout) (current-seconds))) (if (common:low-noise-print 120 "server continuing") (debug:print-info 0 *default-log-port* "Server continuing, seconds since last db access: " (- (current-seconds) last-access)) (let ((curr-time (current-seconds))) (handle-exceptions exn - (debug:print 0 *default-log-port* "ERROR: Failed to change timestamp on log file " server-log-file ". Are you out of space on that disk? exn=" exn) + (debug:print 0 *default-log-port* "ERROR: Failed to change timestamp on info file " servinfofile ". Are you out of space on that disk? exn=" exn) (if (and (< (- (current-seconds) server-start-time) 600) ;; run for ten minutes for experiment, 3600 thereafter - (not *server-overloaded*)) - (change-file-times server-log-file curr-time curr-time) - (if (common:low-noise-print 120 "start new server") - (server:kind-run *toppath*) ;; server:kind-run uses [servers] numservers - ))))) + (not *server-overloaded*) + (file-exists? servinfofile)) + (change-file-times servinfofile curr-time curr-time))) + (if (or (common:low-noise-print 120 "start new server") + (> *api-process-request-count* 50)) ;; if this server is kind of busy start up another + (begin + (debug:print-info 0 *default-log-port* "Server is busy, parallel-api-count "*api-process-request-count*", start another if possible...") + (server:kind-run *toppath*) + (if (> *api-process-request-count* 100) + (begin + (debug:print-info 0 *default-log-port* "Server is overloaded at parallel-api-count="*api-process-request-count*", removing "servinfofile) + (delete-file* servinfofile))))))) (loop 0 server-state bad-sync-count (current-milliseconds))) (else (debug:print-info 0 *default-log-port* "Server timed out. seconds since last db access: " (- (current-seconds) last-access)) (http-transport:server-shutdown port))))))) @@ -600,14 +606,17 @@ ;; (/ *total-non-write-delay* ;; *number-non-write-queries*)) ;; " ms") (db:print-current-query-stats) - (common:save-pkt `((action . exit) + #;(common:save-pkt `((action . exit) (T . server) (pid . ,(current-process-id))) - *configdat* #t) + *configdat* #t) + + ;; remove .servinfo file(s) here + (debug:print-info 0 *default-log-port* "Server shutdown complete. Exiting") (exit))) ;; all routes though here end in exit ... ;; @@ -640,14 +649,14 @@ #;(let* ((num-alive (server:get-num-alive (server:get-list *toppath*)))) (if (> num-alive 3) (begin (cleanup-proc (conc "ERROR: Aborting server start because there are already " num-alive " possible servers either running or starting up")) (exit)))) - (common:save-pkt `((action . start) - (T . server) - (pid . ,(current-process-id))) - *configdat* #t) + #;(common:save-pkt `((action . start) + (T . server) + (pid . ,(current-process-id))) + *configdat* #t) (let* ((th2 (make-thread (lambda () (debug:print-info 0 *default-log-port* "Server run thread started") (http-transport:run (if (args:get-arg "-server") (args:get-arg "-server") Index: launch.scm ================================================================== --- launch.scm +++ launch.scm @@ -1563,11 +1563,11 @@ (with-output-to-string (lambda () ;; (list 'hosts hosts) (write (list (list 'testpath test-path) ;; (list 'transport (conc *transport-type*)) ;; (list 'serverinf *server-info*) - (list 'homehost (let* ((hhdat (common:get-homehost))) + #;(list 'homehost (let* ((hhdat (server:get-homehost))) (if hhdat (car hhdat) #f))) (list 'serverurl (if *runremote* (remote-server-url *runremote*) Index: megatest.scm ================================================================== --- megatest.scm +++ megatest.scm @@ -656,13 +656,13 @@ (if (args:any? "-run" "-runall" "-remove-runs" "-set-state-status" "-kill-runs" "-kill-rerun") (debug:print 0 *default-log-port* (string-intersperse (argv) " "))) ;; some switches imply homehost. Exit here if not on homehost ;; -(let ((homehost-required (list "-cleanup-db" "-server"))) +(let ((homehost-required (list "-cleanup-db"))) (if (apply args:any? homehost-required) - (if (not (common:on-homehost?)) + (if (not (server:choose-server *toppath* 'home?)) (for-each (lambda (switch) (if (args:get-arg switch) (begin (debug:print 0 *default-log-port* "ERROR: you must be on the homehost to run with " switch @@ -951,11 +951,11 @@ (if (or (args:get-arg "-list-servers") (args:get-arg "-kill-servers")) (let ((tl (launch:setup))) (if tl ;; all roads from here exit - (let* ((servers (server:get-list *toppath*)) + (let* ((servers (server:choose-server *toppath* 'all-valid)) ;; (server:get-list *toppath*)) (fmtstr "~33a~22a~20a~20a~8a\n")) (format #t fmtstr "ID" "host:port" "age (hms)" "Last mod" "State") (format #t fmtstr "==" "=========" "=========" "========" "=====") (for-each ;; ( mod-time host port start-time pid ) (lambda (server) @@ -2279,12 +2279,11 @@ (if (not (launch:setup)) (begin (debug:print 0 *default-log-port* "Failed to setup, exiting") (exit 1))) (set! keys (rmt:get-keys)) ;; db)) - (debug:print 1 *default-log-port* "Keys: " (string-intersperse keys ", ")) - (if (sqlite3:database? db)(sqlite3:finalize! db)) + (print (string-intersperse keys " ")) (set! *didsomething* #t))) (if (args:get-arg "-gui") (begin (debug:print 0 *default-log-port* "Look at the dashboard for now") @@ -2379,11 +2378,11 @@ (if (or (getenv "MT_RUNSCRIPT") (args:get-arg "-repl") (args:get-arg "-load")) (let* ((toppath (launch:setup)) (dbstructs (if (and toppath - (common:on-homehost?)) + (server:choose-server toppath 'home?)) (db:setup #t) #f))) ;; make-dbr:dbstruct path: toppath local: (args:get-arg "-local")) #f))) (if *toppath* (cond ((getenv "MT_RUNSCRIPT") Index: newdashboard.scm ================================================================== --- newdashboard.scm +++ newdashboard.scm @@ -16,31 +16,54 @@ ;; You should have received a copy of the GNU General Public License ;; along with Megatest. If not, see . ;;====================================================================== -(use format) - -(use (prefix iup iup:)) +;; (declare (uses common)) +;; (declare (uses megatest-version)) +(declare (uses mtargs)) +(declare (uses treemod)) -(use canvas-draw) +(use srfi-1 + posix regex regex-case srfi-69 typed-records sparse-vectors + format + extras + (prefix iup iup:) + canvas-draw + sqlite3) + (import canvas-draw-iup) -(use sql-de-lite srfi-1 posix regex regex-case srfi-69 typed-records sparse-vectors ;; defstruct - (prefix dbi dbi:)) +(module ndboard + * + +(import scheme + chicken + data-structures + extras + format + (prefix iup iup:) + canvas-draw + canvas-draw-iup + matchable + srfi-1 posix regex regex-case + srfi-69 typed-records sparse-vectors ;; defstruct + sqlite3 + + treemod + (prefix mtargs args:) + ) + -(declare (uses common)) -(declare (uses megatest-version)) -(declare (uses margs)) +(include "megatest-version.scm") ;; (declare (uses launch)) ;; (declare (uses gutils)) ;; (declare (uses db)) ;; (declare (uses server)) ;; (declare (uses synchash)) -(declare (uses dcommon)) -;; (declare (uses tree)) +;; (declare (uses dcommon)) ;; ;; (include "common_records.scm") ;; (include "db_records.scm") ;; (include "key_records.scm") @@ -81,21 +104,159 @@ (begin (print help) (exit))) ;; ease debugging by loading ~/.dashboardrc -(let ((debugcontrolf (conc (get-environment-variable "HOME") "/.dashboardrc"))) - (if (common:file-exists? debugcontrolf) +(let ((debugcontrolf (conc (get-environment-variable "HOME") "/.newdashboardrc"))) + (if (file-exists? debugcontrolf) (load debugcontrolf))) -(debug:setup) - (define *tim* (iup:timer)) (define *ord* #f) (iup:attribute-set! *tim* "TIME" 300) (iup:attribute-set! *tim* "RUN" "YES") + +;; areas +;; + +(define *areas* (make-hash-table)) +(defstruct area + path + keys + targets + targets-update-time + (dbhs (make-hash-table)) + ) + +(define (area-get-path area-name) + (let* ((adat (get-area-info area-name))) + (if adat + (area-path adat) + #f))) + +(define (get-areas-file) + (conc (get-environment-variable "HOME")"/.ndboard/areas.scm")) + +(define (get-areas) + (let* ((areas-file (get-areas-file))) + (if (file-exists? areas-file) + (with-input-from-file areas-file read)))) + +(define (register-area areadat) + (hash-table-set! *areas* (car areadat) + (make-area path: (cdr areadat)))) + +(define (get-area-info area-name) + (hash-table-ref/default *areas* area-name #f)) + +(define (area-save-dbh area-name dbname mtdbh) + (hash-table-set! (area-dbhs (get-area-info area-name)) dbname mtdbh)) + +(define (area-get-dbh area-name dbname) + (hash-table-ref/default (area-dbhs (get-area-info area-name)) dbname #f)) + +;; megatest calls, run in "area" +;; + +;; TODO store the last time the query was run +;; and clear cache based on timestamp on main.db +;; +(define (megatest-get-targets area-name) + (let* ((ainfo (get-area-info area-name)) + (targets (area-targets ainfo))) + (if targets + targets + (let* ((path (area-get-path area-name)) + (raw-targs (with-input-from-pipe + (conc "megatest -list-targets -start-dir "path) + read-lines)) + (clean-targs (filter (lambda (x) + (not (equal? x "default"))) + raw-targs))) + (area-targets-set! ainfo clean-targs) + (area-targets-update-time-set! ainfo (current-seconds)) + clean-targs)))) + +(define (megatest-get-keys area-name) + (let* ((ainfo (get-area-info area-name)) + (keys (area-keys ainfo))) + (if keys + keys + (let* ((path (area-path ainfo)) + (keysstr (with-input-from-pipe + (conc "megatest -show-keys -start-dir "path) + read-line))) + (if (not (string? keysstr)) + (print "Unknown error getting keys for area "area-name", path: "path) + (let* ((keys (string-split keysstr))) + (area-keys-set! ainfo keys) + keys)))))) + +;; megatest area database access functions +;; + +(defstruct mtdb + name + db + path) + +;; fall back to old megatest db if .megatest/dbname not found +;; +(define (megatest-find-db path dbname) + (let ((newpath (conc path"/.megatest/"dbname)) + (oldpath (conc path"/megatest.db"))) + (if (file-exists? newpath) + newpath + (if (file-exists? oldpath) + oldpath + #f)))) + +;; dbname is main.db, 1.db ... +(define (megatest-open-db area-name dbname) + (let* ((mtdbh (area-get-dbh area-name dbname))) + (if mtdbh + mtdbh + (let* ((ainfo (get-area-info area-name)) + (path (area-path ainfo)) + (dbpath (megatest-find-db path dbname)) + (dbexists (and dbpath + (file-exists? dbpath) + (file-read-access? dbpath)))) + (if dbexists + (let* ((db (open-database dbpath))) + (set-busy-handler! db (make-busy-timeout 136000)) + (execute db "PRAGMA synchronous = 0;") + (let* ((mtdbh (make-mtdb db: db path: dbpath))) + (area-save-dbh area-name dbname mtdbh) + mtdbh)) + #f))))) + +;; ADD on-exit to close the opened dbs + +;; keys is list, targpatts is list, both same length +;; and *fully* specified +;; returns targvals and runname +(define (megatest-get-run-names area-name keys targpatts) + (let* ((mtdbh (megatest-open-db area-name "main.db")) + (selector (string-intersperse + (map (lambda (k v)(conc k" like '"v"'")) keys targpatts) + " AND ")) + (field-sel (string-intersperse keys ",")) + (fullqry (conc "SELECT "field-sel",runname FROM runs WHERE "selector";"))) + (print "fullqry="fullqry) + (fold-row ;; proc init db-or-stmt . params) + (lambda (res . row) + (cons row res)) + '() + (mtdb-db mtdbh) ;; get the db handle + fullqry))) + + + +;; gui utils +;; (define (message-window msg) (iup:show (iup:dialog (iup:vbox @@ -112,605 +273,95 @@ (iup:attribute-set! lb "VALUE" item))) ;; (number->string i)))) (set! i (+ i 1))) items) i)) +;; simple widget registration and finding +(define *widgets* (make-hash-table)) +(define (add-widget name wgt) + (hash-table-set! *widgets* name wgt) + wgt) +(define (get-widget name) + (hash-table-ref/default *widgets* name #f)) + (define (pad-list l n)(append l (make-list (- n (length l))))) - -(define (mkstr . x) - (string-intersperse (map conc x) ",")) - -(define (update-search x val) - (hash-table-set! *searchpatts* x val)) - - -;; data for each specific tab goes here -;; -(defstruct dboard:tabdat - ;; runs - ((allruns '()) : list) ;; list of dboard:rundat records - ((allruns-by-id (make-hash-table)) : hash-table) ;; hash of run-id -> dboard:rundat records - ((done-runs '()) : list) ;; list of runs already drawn - ((not-done-runs '()) : list) ;; list of runs not yet drawn - (header #f) ;; header for decoding the run records - (keys #f) ;; keys for this run (i.e. target components) - ((numruns (string->number (or (args:get-arg "-cols") "10"))) : number) ;; - ((tot-runs 0) : number) - ((last-data-update 0) : number) ;; last time the data in allruns was updated - ((last-runs-update 0) : number) ;; last time we pulled the runs info to update the tree - (runs-mutex (make-mutex)) ;; use to prevent parallel access to draw objects - ((run-update-times (make-hash-table)) : hash-table) ;; update times indexed by run-id - ((last-test-dat (make-hash-table)) : hash-table) ;; cache last tests dat by run-id - ((run-db-paths (make-hash-table)) : hash-table) ;; cache the paths to the run db files - - ;; Runs view - ((buttondat (make-hash-table)) : hash-table) ;; - ((item-test-names '()) : list) ;; list of itemized tests - ((run-keys (make-hash-table)) : hash-table) - (runs-matrix #f) ;; used in newdashboard - ((start-run-offset 0) : number) ;; left-right slider value - ((start-test-offset 0) : number) ;; up-down slider value - ((runs-btn-height (or (configf:lookup *configdat* "dashboard" "btn-height") "x16")) : string) ;; was 12 - ((runs-btn-fontsz (or (configf:lookup *configdat* "dashboard" "btn-fontsz") "10")) : string) ;; was 8 - ((runs-cell-width (or (configf:lookup *configdat* "dashboard" "cell-width") "60")) : string) ;; was 50 - ((all-test-names '()) : list) - - ;; Canvas and drawing data - (cnv #f) - (cnv-obj #f) - (drawing #f) - ((run-start-row 0) : number) - ((max-row 0) : number) - ((running-layout #f) : boolean) - (originx #f) - (originy #f) - ((layout-update-ok #t) : boolean) - ((compact-layout #t) : boolean) - - ;; Run times layout - ;; (graph-button-box #f) ;; RA => Think it is not referenced anywhere - (graph-matrix #f) - ((graph-matrix-table (make-hash-table)) : hash-table) ;; graph-dats referenced thru graph name info - ((graph-cell-table (make-hash-table)) : hash-table) ;; graph-dats referenced thru matrix cell info - ((graph-matrix-row 1) : number) - ((graph-matrix-col 1) : number) - - ;; Controls used to launch runs etc. - ((command "") : string) ;; for run control this is the command being built up - (command-tb #f) ;; widget for the type of command; run, remove-runs etc. - (test-patterns-textbox #f) ;; text box widget for editing a list of test patterns - (key-listboxes #f) - (key-lbs #f) - run-name ;; from run name setting widget - states ;; states for -state s1,s2 ... - statuses ;; statuses for -status s1,s2 ... - - ;; Selector variables - curr-run-id ;; current row to display in Run summary view - prev-run-id ;; previous runid selected before current runid was selected (used in xor-two-runs runs summary mode - curr-test-ids ;; used only in dcommon:run-update which is used in newdashboard - ((filters-changed #t) : boolean) ;; to indicate that the user changed filters for this tab - ((last-filter-str "") : string) ;; conc the target runname and testpatt for a signature of changed filters - ((hide-empty-runs #f) : boolean) - ((hide-not-hide #t) : boolean) ;; toggle for hide/not hide empty runs - (hide-not-hide-button #f) - ((searchpatts (make-hash-table)) : hash-table) ;; - ((state-ignore-hash (make-hash-table)) : hash-table) ;; hash of STATE => #t/#f for display control - ((status-ignore-hash (make-hash-table)) : hash-table) ;; hash of STATUS => #t/#f - (target #f) - (test-patts #f) - - ;; db info to file the .db files for the area - (access-mode (db:get-access-mode)) ;; use cached db or not - (dbdir #f) - (dbfpath #f) - (dbkeys #f) - ((last-db-update (make-hash-table)) : hash-table) ;; last db file timestamp - (monitor-db-path #f) ;; where to find monitor.db - ro ;; is the database read-only? - - ;; tests data - ((num-tests 10) : number) ;; total number of tests to show (used in the old runs display) - - ;; runs tree - ((path-run-ids (make-hash-table)) : hash-table) ;; path (target / runname) => id - (runs-tree #f) - ((runs-tree-ht (make-hash-table)) : hash-table) ;; track which targets added to tree (merge functionality with path-run-ids?) - - ;; tab data - ((view-changed #t) : boolean) - ((xadj 0) : number) ;; x slider number (if using canvas) - ((yadj 0) : number) ;; y slider number (if using canvas) - ;; runs-summary tab state - ((runs-summary-modes '((one-run . "Show One Run") (xor-two-runs . "XOR Two Runs") (xor-two-runs-hide-clean . "XOR; Hide Clean")) ) : list) - ((runs-summary-mode-buttons '()) : list) - ((runs-summary-mode 'one-run) : symbol) - ((runs-summary-mode-change-callbacks '()) : list) - (runs-summary-source-runname-label #f) - (runs-summary-dest-runname-label #f) - ;; runs summary view - - tests-tree ;; used in newdashboard - ) - - - -;; mtest is actually the megatest.config file -;; -(define (mtest toppath window-id) - (let* ((curr-row-num 0) - ;; (rawconfig (read-config (conc toppath "/megatest.config") #f 'return-string)) - (keys-matrix (iup:matrix)) ;; (dcommon:keys-matrix rawconfig)) - (setup-matrix (iup:matrix)) ;; (dcommon:section-matrix rawconfig "setup" "Varname" "Value")) - (jobtools-matrix (iup:matrix - #:expand "YES" - #:numcol 1 - #:numlin 5 - #:numcol-visible 1 - #:numlin-visible 3)) - (validvals-matrix (iup:matrix - #:expand "YES" - #:numcol 1 - #:numlin 2 - #:numcol-visible 1 - #:numlin-visible 2)) - (envovrd-matrix (iup:matrix - #:expand "YES" - #:numcol 1 - #:numlin 20 - #:numcol-visible 1 - #:numlin-visible 8)) - (disks-matrix (iup:matrix - #:expand "YES" - #:numcol 1 - #:numlin 20 - #:numcol-visible 1 - #:numlin-visible 8)) - ) - (iup:attribute-set! disks-matrix "0:0" "Disk Name") - (iup:attribute-set! disks-matrix "0:1" "Disk Path") - (iup:attribute-set! disks-matrix "WIDTH1" "120") - (iup:attribute-set! disks-matrix "WIDTH0" "100") - (iup:attribute-set! disks-matrix "ALIGNMENT1" "ALEFT") - (iup:attribute-set! disks-matrix "FIXTOTEXT" "C1") - (iup:attribute-set! disks-matrix "RESIZEMATRIX" "YES") - - ;; fill in existing info - (for-each - (lambda (mat fname) - (set! curr-row-num 1) - (for-each - (lambda (var) - (iup:attribute-set! mat (conc curr-row-num ":0") var) - ;; (iup:attribute-set! mat (conc curr-row-num ":1") (config-lookup rawconfig fname var)) - (set! curr-row-num (+ curr-row-num 1))) - '()));; (configf:section-vars rawconfig fname))) - (list setup-matrix jobtools-matrix validvals-matrix envovrd-matrix disks-matrix) - (list "setup" "jobtools" "validvalues" "env-override" "disks")) - - (for-each - (lambda (mat) - (iup:attribute-set! mat "0:1" "Value") - (iup:attribute-set! mat "0:0" "Var") - (iup:attribute-set! mat "ALIGNMENT1" "ALEFT") - (iup:attribute-set! mat "FIXTOTEXT" "C1") - (iup:attribute-set! mat "RESIZEMATRIX" "YES") - (iup:attribute-set! mat "WIDTH1" "120") - (iup:attribute-set! mat "WIDTH0" "100") - ) - (list setup-matrix jobtools-matrix validvals-matrix envovrd-matrix)) - - (iup:attribute-set! validvals-matrix "WIDTH1" "290") - (iup:attribute-set! envovrd-matrix "WIDTH1" "290") - - (iup:vbox - (iup:hbox - - (iup:vbox - (let ((tabs (iup:tabs - ;; The required tab - (iup:hbox - ;; The keys - (iup:frame - #:title "Keys (required)" - (iup:vbox - (iup:label (conc "Set the fields for organising your runs\n" - "here. Note: can only be changed before\n" - "running the first run when megatest.db\n" - "is created.")) - keys-matrix)) - (iup:vbox - ;; The setup section - (iup:frame - #:title "Setup" - (iup:vbox - (iup:label (conc "max_concurrent_jobs : limits total concurrent jobs (optional)\n" - "linktree : directory where linktree will be created.")) - setup-matrix)) - ;; The jobtools - (iup:frame - #:title "Jobtools" - (iup:vbox - (iup:label (conc "launcher : tool or script to run jobs (try nbfake)\n" - "useshell : use system to run your launcher\n" - "workhosts : spread jobs out on these hosts")) - jobtools-matrix)) - ;; The disks - (iup:frame - #:title "Disks" - (iup:vbox - (iup:label (conc "Enter names and existing paths of locations to run tests")) - disks-matrix)))) - ;; The optional tab - (iup:vbox - ;; The Environment Overrides - (iup:frame - #:title "Env override" - envovrd-matrix) - ;; The valid values - (iup:frame - #:title "Validvalues" - validvals-matrix) - )))) - (iup:attribute-set! tabs "TABTITLE0" "Required settings") - (iup:attribute-set! tabs "TABTITLE1" "Optional settings") - tabs)) - )))) - -;; The runconfigs.config file -;; -(define (rconfig window-id) - (iup:vbox - (iup:frame #:title "Default"))) - -;;====================================================================== -;; T E S T S -;;====================================================================== - -(define (tree-path->test-id path) - (if (not (null? path)) - (hash-table-ref/default (dboard:data-path-test-ids *data*) path #f) - #f)) - -(define (test-panel window-id) - (let* ((curr-row-num 0) - (viewlog (lambda (x) - (if (common:file-exists? logfile) - ;(system (conc "firefox " logfile "&")) - (iup:send-url logfile) - (message-window (conc "File " logfile " not found"))))) - (xterm (lambda (x) - (if (directory-exists? rundir) - (let ((shell (if (get-environment-variable "SHELL") - (conc "-e " (get-environment-variable "SHELL")) - ""))) - (system (conc "cd " rundir - ";xterm -T \"" (string-translate testfullname "()" " ") "\" " shell "&"))) - (message-window (conc "Directory " rundir " not found"))))) - (command-text-box (iup:textbox #:expand "HORIZONTAL" #:font "Courier New, -12")) - (command-launch-button (iup:button "Execute!" - ;; #:expand "HORIZONTAL" - #:size "50x" - #:action (lambda (x) - (let ((cmd (iup:attribute command-text-box "VALUE"))) - (system (conc cmd " &")))))) - (run-test (lambda (x) - (iup:attribute-set! - command-text-box "VALUE" - (conc "xterm -geometry 180x20 -e \"megatest -target " keystring " :runname " runname - " -runtests " (conc testname "/" (if (equal? item-path "") - "%" - item-path)) - ";echo Press any key to continue;bash -c 'read -n 1 -s'\"")))) - (remove-test (lambda (x) - (iup:attribute-set! - command-text-box "VALUE" - (conc "xterm -geometry 180x20 -e \"megatest -remove-runs -target " keystring " :runname " runname - " -testpatt " (conc testname "/" (if (equal? item-path "") - "%" - item-path)) - " -v;echo Press any key to continue;bash -c 'read -n 1 -s'\"")))) - (run-info-matrix (iup:matrix - #:expand "YES" - ;; #:scrollbar "YES" - #:numcol 1 - #:numlin 4 - #:numcol-visible 1 - #:numlin-visible 4 - #:click-cb (lambda (obj lin col status) - #f - ;;(print "obj: " obj " lin: " lin " col: " col " status: " status) - ))) - (test-info-matrix (iup:matrix - #:expand "YES" - #:numcol 1 - #:numlin 7 - #:numcol-visible 1 - #:numlin-visible 7)) - (test-run-matrix (iup:matrix - #:expand "YES" - #:numcol 1 - #:numlin 5 - #:numcol-visible 1 - #:numlin-visible 5)) - (meta-dat-matrix (iup:matrix - #:expand "YES" - #:numcol 1 - #:numlin 5 - #:numcol-visible 1 - #:numlin-visible 5)) - (steps-matrix (iup:matrix - #:expand "YES" - #:numcol 6 - #:numlin 50 - #:numcol-visible 6 - #:numlin-visible 8)) - (data-matrix (iup:matrix - #:expand "YES" - #:numcol 8 - #:numlin 50 - #:numcol-visible 8 - #:numlin-visible 8)) - (updater (lambda (testdat) - (test-update window-id testdat run-info-matrix test-info-matrix test-run-matrix meta-dat-matrix steps-matrix data-matrix)))) - - ;; Set the updater in updaters - ;; (hash-table-set! (dboard:data-updaters *data*) window-id updater) - ;; - (for-each - (lambda (mat) - ;; (iup:attribute-set! mat "0:1" "Value") - ;; (iup:attribute-set! mat "0:0" "Var") - (iup:attribute-set! mat "HEIGHT0" 0) - (iup:attribute-set! mat "ALIGNMENT1" "ALEFT") - ;; (iup:attribute-set! mat "FIXTOTEXT" "C1") - (iup:attribute-set! mat "RESIZEMATRIX" "YES")) - ;; (iup:attribute-set! mat "WIDTH1" "120") - ;; (iup:attribute-set! mat "WIDTH0" "100")) - (list run-info-matrix test-info-matrix test-run-matrix meta-dat-matrix)) - - ;; Steps matrix - (iup:attribute-set! steps-matrix "0:1" "Step Name") - (iup:attribute-set! steps-matrix "0:2" "Start") - (iup:attribute-set! steps-matrix "WIDTH2" "40") - (iup:attribute-set! steps-matrix "0:3" "End") - (iup:attribute-set! steps-matrix "WIDTH3" "40") - (iup:attribute-set! steps-matrix "0:4" "Status") - (iup:attribute-set! steps-matrix "WIDTH4" "40") - (iup:attribute-set! steps-matrix "0:5" "Duration") - (iup:attribute-set! steps-matrix "WIDTH5" "40") - (iup:attribute-set! steps-matrix "0:6" "Log File") - (iup:attribute-set! steps-matrix "ALIGNMENT1" "ALEFT") - ;; (iup:attribute-set! steps-matrix "FIXTOTEXT" "C1") - (iup:attribute-set! steps-matrix "RESIZEMATRIX" "YES") - ;; (iup:attribute-set! steps-matrix "WIDTH1" "120") - ;; (iup:attribute-set! steps-matrix "WIDTH0" "100") - - ;; Data matrix - ;; - (let ((rownum 1)) - (for-each - (lambda (x) - (iup:attribute-set! data-matrix (conc "0:" rownum) x) - (iup:attribute-set! data-matrix (conc "WIDTH" rownum) "50") - (set! rownum (+ rownum 1))) - (list "Category" "Variable" "Value" "Expected" "Tolerance" "Status" "Units" "Type" "Comment"))) - (iup:attribute-set! data-matrix "REDRAW" "ALL") - - (for-each - (lambda (data) - (let ((mat (car data)) - (keys (cadr data)) - (rownum 1)) - (for-each - (lambda (key) - (iup:attribute-set! mat (conc rownum ":0") key) - (set! rownum (+ rownum 1))) - keys) - (iup:attribute-set! mat "REDRAW" "ALL"))) - (list - (list run-info-matrix '("Run Id" "Target" "Runname" "Run Start Time" )) - (list test-info-matrix '("Test Id" "Testname" "Itempath" "State" "Status" "Test Start Time" "Comment")) - (list test-run-matrix '("Hostname" "Host info" "Disk Free" "CPU Load" "Run Duration")) - (list meta-dat-matrix '("Author" "Owner" "Last Reviewed" "Tags" "Description")))) - - (iup:split - #:orientation "HORIZONTAL" - (iup:vbox - (iup:hbox - (iup:vbox - run-info-matrix - test-info-matrix) - ;; test-info-matrix) - (iup:vbox - test-run-matrix - meta-dat-matrix)) - (iup:vbox - (iup:vbox - (iup:hbox - (iup:button "View Log" #:action viewlog #:size "60x" ) ;; #:size "30x" - (iup:button "Start Xterm" #:action xterm #:size "60x" )) ;; #:size "30x" - (iup:hbox - (iup:button "Run Test" #:action run-test #:size "60x" ) ;; #:size "30x" - (iup:button "Clean Test" #:action remove-test #:size "60x" ))) ;; #:size "30x" - (iup:hbox - ;; hiup:split ;; hbox - ;; #:orientation "HORIZONTAL" - ;; #:value 300 - command-text-box - command-launch-button))) - (iup:vbox - (let ((tabs (iup:tabs - steps-matrix - data-matrix))) - (iup:attribute-set! tabs "TABTITLE0" "Test Steps") - (iup:attribute-set! tabs "TABTITLE1" "Test Data") - tabs))))) - -;; Test browser -(define (tests window-id) - (iup:split - (let* ((tb (iup:treebox - #:selection-cb - (lambda (obj id state) - ;; (print "obj: " obj ", id: " id ", state: " state) - (let* ((run-path (tree:node->path obj id)) - (test-id (tree-path->test-id (cdr run-path)))) - ;; (if test-id - ;; (hash-table-set! (dboard:data-curr-test-ids *data*) - ;; window-id test-id)) - ;; (print "path: " (tree:node->path obj id) " test-id: " test-id) - ))))) - (iup:attribute-set! tb "VALUE" "0") - (iup:attribute-set! tb "NAME" "Runs") - ;;(iup:attribute-set! tb "ADDEXPANDED" "NO") - ;; (dboard:data-tests-tree-set! *data* tb) - tb) - (test-panel window-id))) - -;; The function to update the fields in the test view panel -(define (test-update window-id testdat run-info-matrix test-info-matrix test-run-matrix meta-dat-matrix steps-matrix data-matrix) - ;; get test-id - ;; then get test record - (if testdat - (let* ((test-id 0) ;; (hash-table-ref/default (dboard:data-curr-test-ids *data*) window-id #f)) - (test-data (hash-table-ref/default testdat test-id #f)) - (run-id (db:test-get-run_id test-data)) - (targ/runname (hash-table-ref/default (dboard:data-run-keys *data*) - run-id - '())) - (target (if (null? targ/runname) "" (string-intersperse (reverse (cdr (reverse targ/runname))) "/"))) - (runname (if (null? targ/runname) "" (car (cdr targ/runname)))) - (steps-dat (tests:get-compressed-steps *dbstruct-local* run-id test-id))) - - (if test-data - (begin - ;; - (for-each - (lambda (data) - (let ((mat (car data)) - (vals (cadr data)) - (rownum 1)) - (for-each - (lambda (key) - (let ((cell (conc rownum ":1"))) - (if (not (equal? (iup:attribute mat cell)(conc key))) - (begin - ;; (print "setting cell " cell " in matrix " mat " to value " key) - (iup:attribute-set! mat cell (conc key)) - (iup:attribute-set! mat "REDRAW" cell))) - (set! rownum (+ rownum 1)))) - vals))) - (list - (list run-info-matrix - (if test-id - (list (db:test-get-run_id test-data) - target - runname - "n/a") - (make-list 4 ""))) - (list test-info-matrix - (if test-id - (list test-id - (db:test-get-testname test-data) - (db:test-get-item-path test-data) - (db:test-get-state test-data) - (db:test-get-status test-data) - (seconds->string (db:test-get-event_time test-data)) - (db:test-get-comment test-data)) - (make-list 7 ""))) - (list test-run-matrix - (if test-id - (list (db:test-get-host test-data) - (db:test-get-uname test-data) - (db:test-get-diskfree test-data) - (db:test-get-cpuload test-data) - (seconds->hr-min-sec (db:test-get-run_duration test-data))) - (make-list 5 ""))) - )) - (dcommon:populate-steps steps-dat steps-matrix)))))) - ;;(list meta-dat-matrix - ;; (if test-id - ;; (list ( - - -;; db:test-get-id -;; db:test-get-run_id -;; db:test-get-testname -;; db:test-get-state -;; db:test-get-status -;; db:test-get-event_time -;; db:test-get-host -;; db:test-get-cpuload -;; db:test-get-diskfree -;; db:test-get-uname -;; db:test-get-rundir -;; db:test-get-item-path -;; db:test-get-run_duration -;; db:test-get-final_logf -;; db:test-get-comment -;; db:test-get-fullname - - -;;====================================================================== -;; R U N C O N T R O L -;;====================================================================== - -;; Overall runs browser -;; -(define (runs window-id) - (let* ((runs-matrix (iup:matrix - #:expand "YES" - ;; #:fittosize "YES" - #:scrollbar "YES" - #:numcol 100 - #:numlin 100 - #:numcol-visible 7 - #:numlin-visible 7 - #:click-cb (lambda (obj lin col status) - #f - ;; (print "obj: " obj " lin: " lin " col: " col " status: " status) - )))) - - (iup:attribute-set! runs-matrix "RESIZEMATRIX" "YES") - (iup:attribute-set! runs-matrix "WIDTH0" "100") - - ;; (dboard:data-runs-matrix-set! *data* runs-matrix) - (iup:hbox - (iup:frame - #:title "Runs browser" - (iup:vbox - runs-matrix))))) +;; the main tree, everything starts from here +;; +(define (main-tree) + (iup:treebox + #:value 0 + #:title "Areas" + #:expand "YES" + #:addexpanded "YES" + #:size "10x" + #:selection-cb + (lambda (obj id state) + (let* ((path (tree:node->path obj id))) + (match path + ((treename) #f) ;;(print "nothing to do here")) + ((treename area) + (let ((tb (get-widget "main-tree"))) ;; wait, isn't this just "obj"? + (refresh-targets tb area))) + ((treename area . target) + (let* ((keys (megatest-get-keys area))) + (if (eq? (length keys)(length target)) + (let* ((runnames (megatest-get-run-names area keys target))) + (for-each + (lambda (runnamedat) + (tree:add-node obj "Areas" (cons area runnamedat))) + runnames))))) + (else + (print "path: "path)) + ) + #;(print "obj: "obj", id: "id", state: "state", path: "path))))) + +(define (refresh-targets tb area) + (let* ((targets (megatest-get-targets area))) + (for-each + (lambda (target) + (let* ((t-path (string-split target "/"))) + (tree:add-node tb "Areas" (cons area t-path)))) + targets))) + +(define (runs window-id) + (iup:hbox + (add-widget "main-tree" (main-tree)) + )) + +(define (runs-init) + (let* ((areas (get-areas)) + (tb (get-widget "main-tree"))) + (for-each + (lambda (areadat) + (tree:add-node tb "Areas" `(,(car areadat))) + (register-area areadat)) + areas))) ;; Browse and control a single run ;; (define (runcontrol window-id) (iup:hbox)) -;;====================================================================== -;; D A S H B O A R D -;;====================================================================== - ;; Main Panel (define (main-panel window-id) (iup:dialog #:title "Megatest Control Panel" - #:menu (dcommon:main-menu) + ;; #:menu (dcommon:main-menu) #:shrink "YES" (let ((tabtop (iup:tabs - (runs window-id) - (tests window-id) + (add-widget "runs" (runs window-id)) + ;; (tests window-id) (runcontrol window-id) - (mtest *toppath* window-id) - (rconfig window-id) + ;; (mtest *toppath* window-id) + ;; (rconfig window-id) ))) (iup:attribute-set! tabtop "TABTITLE0" "Runs") - (iup:attribute-set! tabtop "TABTITLE1" "Tests") - (iup:attribute-set! tabtop "TABTITLE2" "Run Control") - (iup:attribute-set! tabtop "TABTITLE3" "megatest.config") - (iup:attribute-set! tabtop "TABTITLE4" "runconfigs.config") + ;; (iup:attribute-set! tabtop "TABTITLE1" "Tests") + (iup:attribute-set! tabtop "TABTITLE1" "Run Control") + ;; (iup:attribute-set! tabtop "TABTITLE3" "megatest.config") + ;; (iup:attribute-set! tabtop "TABTITLE4" "runconfigs.config") tabtop))) (define *current-window-id* 0) (define (newdashboard dbstruct) @@ -724,24 +375,34 @@ (nextmintime (current-milliseconds)) (my-window-id *current-window-id*)) (set! *current-window-id* (+ 1 *current-window-id*)) ;; (dboard:data-runs-set! *data* data) ;; make this data available to the rest of the application (iup:show (main-panel my-window-id)) + (runs-init) ;; Yes, running iup:show will pop up a new panel ;; (iup:show (main-panel my-window-id)) (iup:callback-set! *tim* "ACTION_CB" (lambda (x) ;; Want to dedicate no more than 50% of the time to this so skip if ;; 2x delta time has not passed since last query - (if (< nextmintime (current-milliseconds)) + #t + #;(if (< nextmintime (current-milliseconds)) (let* ((starttime (current-milliseconds)) ;; (changes (dcommon:run-update keys data runname keypatts testpatt states statuses 'full my-window-id)) (endtime (current-milliseconds))) (set! nextmintime (+ endtime (* 2 (- endtime starttime)))) ;; (debug:print 11 *default-log-port* "CHANGE(S): " (car changes) "...")) ) - (debug:print-info 11 *default-log-port* "Server overloaded")))))) + (print "Server overloaded")))))) + +) + + +;;====================================================================== +;; D A S H B O A R D +;;====================================================================== + -;; (dboard:data-updaters-set! *data* (make-hash-table)) -(newdashboard #f) ;; *dbstruct-local*) +(import ndboard) +(newdashboard #f) (iup:main-loop) Index: rmt.scm ================================================================== --- rmt.scm +++ rmt.scm @@ -52,10 +52,19 @@ (if cinfo cinfo (if (server:check-if-running areapath) (client:setup areapath) #f)))) + +(define (rmt:on-homehost? runremote) + (let* ((hh-dat (remote-hh-dat runremote))) + (if (pair? hh-dat) + (cdr hh-dat) + (begin + (debug:print-info 0 *default-log-port* "hh-dat="hh-dat) + #f)))) + ;;====================================================================== (define *send-receive-mutex* (make-mutex)) ;; should have separate mutex per run-id @@ -116,11 +125,12 @@ ;; DOT MUTEXLOCK -> SET_HOMEHOST [label="no homehost?"]; ;; DOT SET_HOMEHOST -> MUTEXLOCK; ;; ensure we have a homehost record (if (not (pair? (remote-hh-dat runremote))) ;; not on homehost (thread-sleep! 0.1) ;; since we shouldn't get here, delay a little - (remote-hh-dat-set! runremote (common:get-homehost))) + (let ((hh-data (server:choose-server areapath 'homehost))) + (remote-hh-dat-set! runremote (or hh-data (cons #f #f))))) ;;(print "BB> readonly-mode is "readonly-mode" dbfile is "dbfile) (cond #;((> (- (current-seconds)(remote-connect-time runremote)) 180) ;; reconnect to server every 180 seconds (debug:print 0 *default-log-port* "Forcing reconnect to server(s) due to 180 second timeout.") @@ -176,21 +186,24 @@ ;;DOT MUTEXLOCK -> CASE5 [label="server not required,\non homehost,\nread-only query"]; {rank=same "case 5" CASE5}; ;;DOT CASE5 -> "rmt:open-qry-close-locally"; ;; on homehost and this is a read ((and (not (remote-force-server runremote)) ;; honor forced use of server, i.e. server NOT required - (cdr (remote-hh-dat runremote)) ;; on homehost + (rmt:on-homehost? runremote) (member cmd api:read-only-queries)) ;; this is a read (mutex-unlock! *rmt-mutex*) (debug:print-info 12 *default-log-port* "rmt:send-receive, case 5") (rmt:open-qry-close-locally cmd 0 params)) ;;DOT CASE6 [label="init\nremote"]; ;;DOT MUTEXLOCK -> CASE6 [label="on homehost,\nwrite query,\nhave server,\ncan't reach it"]; {rank=same "case 6" CASE6}; ;;DOT CASE6 -> "rmt:send-receive"; ;; on homehost and this is a write, we already have a server, but server has died - ((and (cdr (remote-hh-dat runremote)) ;; on homehost + + ;; reinstate this keep-alive section but inject a time condition into the (add ... + + #;((and (cdr (remote-hh-dat runremote)) ;; on homehost (not (member cmd api:read-only-queries)) ;; this is a write (remote-server-url runremote) ;; have a server (not (server:ping (remote-server-url runremote) (remote-server-id runremote)))) ;; server has died. NOTE: this is not a cheap call! Need better approach. (debug:print 0 *default-log-port* "WARNING: server appears to have died, trying to reconnect, case 6") (http-transport:close-connections area-dat: runremote) ;; make sure to clean up Index: server.scm ================================================================== --- server.scm +++ server.scm @@ -99,10 +99,22 @@ (with-output-to-string (lambda () (write (list (current-directory) (current-process-id) (argv))))))) + +(define (server:get-client-signature) + (if *my-client-signature* *my-client-signature* + (let ((sig (server:mk-signature))) ;; clients re-use the server:mk-signature logic + (set! *my-client-signature* sig) + *my-client-signature*))) + +(define (server:get-server-id) + (if *server-id* *server-id* + (let ((sig (server:mk-signature))) ;; clients re-use the server:mk-signature logic + (set! *server-id* sig) + *server-id*))) ;; When using zmq this would send the message back (two step process) ;; with spiffy or rpc this simply returns the return data to be returned ;; (define (server:reply return-addr query-sig success/fail result) @@ -121,25 +133,26 @@ ;; if the target-host is set ;; try running on that host ;; incidental: rotate logs in logs/ dir. ;; (define (server:run areapath) ;; areapath is *toppath* for a given testsuite area - (let* ((curr-host (get-host-name)) + (let* (;; (curr-host (get-host-name)) ;; (attempt-in-progress (server:start-attempted? areapath)) ;; (dot-server-url (server:check-if-running areapath)) - (curr-ip (server:get-best-guess-address curr-host)) - (curr-pid (current-process-id)) - (homehost (common:get-homehost)) ;; configf:lookup *configdat* "server" "homehost" )) - (target-host (car homehost)) + ;; (curr-ip (server:get-best-guess-address curr-host)) + ;; (curr-pid (current-process-id)) + ;; (homehost (server:get-homehost)) ;; configf:lookup *configdat* "server" "homehost" )) + ;; (target-host (car homehost)) (testsuite (common:get-testsuite-name)) (logfile (conc areapath "/logs/server.log")) ;; -" curr-pid "-" target-host ".log")) (profile-mode (or (configf:lookup *configdat* "misc" "profilesw") "")) (cmdln (conc (common:get-megatest-exe) - " -server " (or target-host "-") (if (equal? (configf:lookup *configdat* "server" "daemonize") "yes") - " -daemonize " - "") + " -server - ";; (or target-host "-") + (if (equal? (configf:lookup *configdat* "server" "daemonize") "yes") + " -daemonize " + "") ;; " -log " logfile " -m testsuite:" testsuite " " profile-mode )) ;; (conc " >> " logfile " 2>&1 &"))))) (log-rotate (make-thread common:rotate-logs "server run, rotate logs thread")) ;; why are we rotating logs here? This is a sensitive location with a lot going on!? @@ -148,134 +161,135 @@ (push-directory areapath) (debug:print 0 *default-log-port* "INFO: Trying to start server (" cmdln ") ...") (thread-start! log-rotate) ;; host.domain.tld match host? - (if (and target-host - ;; look at target host, is it host.domain.tld or ip address and does it - ;; match current ip or hostname - (not (string-match (conc "("curr-host "|" curr-host"\\..*)") target-host)) - (not (equal? curr-ip target-host))) - (begin - (debug:print-info 0 *default-log-port* "Starting server on " target-host ", logfile is " logfile) - (setenv "TARGETHOST" target-host))) - + ;; (if (and target-host + ;; ;; look at target host, is it host.domain.tld or ip address and does it + ;; ;; match current ip or hostname + ;; (not (string-match (conc "("curr-host "|" curr-host"\\..*)") target-host)) + ;; (not (equal? curr-ip target-host))) + ;; (begin + ;; (debug:print-info 0 *default-log-port* "Starting server on " target-host ", logfile is " logfile) + ;; (setenv "TARGETHOST" target-host))) + ;; (setenv "TARGETHOST_LOGF" logfile) (thread-sleep! (/ (random 3000) 1000)) ;; add a random initial delay. It seems pretty common that many running tests request a server at the same time (debug:print 0 *default-log-port* "INFO: starting server at " (common:human-time)) (system (conc "nbfake " cmdln)) (unsetenv "TARGETHOST_LOGF") - (if (get-environment-variable "TARGETHOST")(unsetenv "TARGETHOST")) + ;; (if (get-environment-variable "TARGETHOST")(unsetenv "TARGETHOST")) (thread-join! log-rotate) (pop-directory))) ;; given a path to a server log return: host port startseconds server-id ;; any changes to number of elements returned by this fuction will dirctly affect server:record->url,server:record->id,server:kill,server:get-num-alive which use match let ;; example of what it's looking for in the log file: ;; SERVER STARTED: 10.38.175.67:50216 AT 1616502350.0 server-id: 4907e90fc55c7a09694e3f658c639cf4 (define (server:logf-get-start-info logf) - (let ((server-rx (regexp "^SERVER STARTED: (\\S+):(\\d+) AT ([\\d\\.]+) server-id: (\\S+)")) ;; SERVER STARTED: host:port AT timesecs server id - (dbprep-rx (regexp "^SERVER: dbprep")) - (dbprep-found 0)) + (let ((server-rx (regexp "^SERVER STARTED: (\\S+):(\\d+) AT ([\\d\\.]+) server-id: (\\S+) pid: (\\d+)")) ;; SERVER STARTED: host:port AT timesecs server id + (dbprep-rx (regexp "^SERVER: dbprep")) + (dbprep-found 0) + (bad-dat (list #f #f #f #f #f))) (handle-exceptions exn (begin (debug:print-info 0 *default-log-port* "Unable to get server info from " logf ", exn=" exn) - (list #f #f #f #f)) ;; no idea what went wrong, call it a bad server + bad-dat) ;; no idea what went wrong, call it a bad server (with-input-from-file logf (lambda () (let loop ((inl (read-line)) (lnum 0)) (if (not (eof-object? inl)) (let ((mlst (string-match server-rx inl)) - (dbprep (string-match dbprep-rx inl)) - ) - (if dbprep - (set! dbprep-found 1) - ) + (dbprep (string-match dbprep-rx inl))) + (if dbprep (set! dbprep-found 1)) (if (not mlst) (if (< lnum 500) ;; give up if more than 500 lines of server log read (loop (read-line)(+ lnum 1)) (begin (debug:print-info 0 *default-log-port* "Unable to get server info from first 500 lines of " logf ) - (list #f #f #f #f))) - (let ((dat (cdr mlst))) - (list (car dat) ;; host - (string->number (cadr dat)) ;; port - (string->number (caddr dat)) - (cadr (cddr dat)))))) + bad-dat)) + (match mlst + ((_ host port start server-id pid) + (list host + (string->number port) + (string->number start) + server-id + (string->number pid))) + (else + (debug:print 0 *current-log-port* "ERROR: did not recognise SERVER line info "mlst) + bad-dat)))) (begin - (if dbprep-found + (if dbprep-found (begin (debug:print-info 2 *default-log-port* "Server is in dbprep at " (common:human-time)) - (thread-sleep! 0.5) ;; was 25 sec but that blocked things from starting? - ) - (debug:print-info 0 *default-log-port* "Unable to get server info from " logf " at " (seconds->time-string (current-seconds))) - ) - (list #f #f #f #f))))))))) - -;; get a list of servers from the log files, with all relevant data -;; ( mod-time host port start-time pid ) -;; -(define (server:get-list areapath #!key (limit #f)) - (let ((fname-rx (regexp "^(|.*/)server-(\\d+)-(\\S+).log$")) - (day-seconds (* 24 60 60))) - ;; if the directory exists continue to get the list - ;; otherwise attempt to create the logs dir and then - ;; continue - (if (if (directory-exists? (conc areapath "/logs")) - '() - (if (file-write-access? areapath) - (begin - (condition-case - (create-directory (conc areapath "/logs") #t) - (exn (i/o file)(debug:print 0 *default-log-port* "ERROR: Cannot create directory at " (conc areapath "/logs"))) - (exn ()(debug:print 0 *default-log-port* "ERROR: Unknown error attemtping to get server list. exn=" exn))) - (directory-exists? (conc areapath "/logs"))) - '())) - - ;; Get the list of server logs. - (let* ( - ;; For some reason, when I uncomment the below line, ext-tests sometimes starts 1000's of servers. - ;; (exiting-servers (system (conc "bash -c 'rm -f `grep -il exiting " areapath "/logs/server-*-*.log 2> /dev/null`'"))) - (server-logs (glob (conc areapath "/logs/server-*-*.log"))) - (num-serv-logs (length server-logs))) - (if (or (null? server-logs) (= num-serv-logs 0)) - (let () - (debug:print 2 *default-log-port* "There are no servers running at " (common:human-time)) - '() - ) - (let loop ((hed (string-chomp (car server-logs))) - (tal (cdr server-logs)) - (res '())) - (let* ((mod-time (handle-exceptions - exn - (begin - (debug:print 0 *default-log-port* "server:get-list: failed to get modification time on " hed ", exn=" exn) - (current-seconds)) ;; 0 - (file-modification-time hed))) ;; default to *very* old so log gets ignored if deleted - (down-time (- (current-seconds) mod-time)) - (serv-dat (if (or (< num-serv-logs 10) - (< down-time 900)) ;; day-seconds)) - (server:logf-get-start-info hed) - '())) ;; don't waste time processing server files not touched in the 15 minutes if there are more than ten servers to look at - (serv-rec (cons mod-time serv-dat)) - (fmatch (string-match fname-rx hed)) - (pid (if fmatch (string->number (list-ref fmatch 2)) #f)) - (new-res (if (null? serv-dat) - res - (cons (append serv-rec (list pid)) res)))) ;; any changes to number of elements in new-res will dirctly affect server:record->url,server:record->id,server:kill,server:get-num-alive which uses match let - (if (null? tal) - (if (and limit - (> (length new-res) limit)) - new-res ;; (take new-res limit) <= need intelligent sorting before this will work - new-res) - (loop (string-chomp (car tal)) (cdr tal) new-res))))))))) - -(define (server:get-num-alive srvlst) + (thread-sleep! 0.5)) ;; was 25 sec but that blocked things from starting? + (debug:print-info 0 *default-log-port* "Unable to get server info from " logf " at " (seconds->time-string (current-seconds)))) + bad-dat)))))))) + +;; ;; get a list of servers from the log files, with all relevant data +;; ;; ( mod-time host port start-time pid ) +;; ;; +;; (define (server:get-list areapath #!key (limit #f)) +;; (let ((fname-rx (regexp "^(|.*/)server-(\\d+)-(\\S+).log$")) +;; (day-seconds (* 24 60 60))) +;; ;; if the directory exists continue to get the list +;; ;; otherwise attempt to create the logs dir and then +;; ;; continue +;; (if (if (directory-exists? (conc areapath "/logs")) +;; '() +;; (if (file-write-access? areapath) +;; (begin +;; (condition-case +;; (create-directory (conc areapath "/logs") #t) +;; (exn (i/o file)(debug:print 0 *default-log-port* "ERROR: Cannot create directory at " (conc areapath "/logs"))) +;; (exn ()(debug:print 0 *default-log-port* "ERROR: Unknown error attemtping to get server list. exn=" exn))) +;; (directory-exists? (conc areapath "/logs"))) +;; '())) +;; +;; ;; Get the list of server logs. +;; (let* ( +;; ;; For some reason, when I uncomment the below line, ext-tests sometimes starts 1000's of servers. +;; ;; (exiting-servers (system (conc "bash -c 'rm -f `grep -il exiting " areapath "/logs/server-*-*.log 2> /dev/null`'"))) +;; (server-logs (glob (conc areapath "/logs/server-*-*.log"))) +;; (num-serv-logs (length server-logs))) +;; (if (or (null? server-logs) (= num-serv-logs 0)) +;; (let () +;; (debug:print 2 *default-log-port* "There are no servers running at " (common:human-time)) +;; '() +;; ) +;; (let loop ((hed (string-chomp (car server-logs))) +;; (tal (cdr server-logs)) +;; (res '())) +;; (let* ((mod-time (handle-exceptions +;; exn +;; (begin +;; (debug:print 0 *default-log-port* "server:get-list: failed to get modification time on " hed ", exn=" exn) +;; (current-seconds)) ;; 0 +;; (file-modification-time hed))) ;; default to *very* old so log gets ignored if deleted +;; (down-time (- (current-seconds) mod-time)) +;; (serv-dat (if (or (< num-serv-logs 10) +;; (< down-time 900)) ;; day-seconds)) +;; (server:logf-get-start-info hed) +;; '())) ;; don't waste time processing server files not touched in the 15 minutes if there are more than ten servers to look at +;; (serv-rec (cons mod-time serv-dat)) +;; (fmatch (string-match fname-rx hed)) +;; (pid (if fmatch (string->number (list-ref fmatch 2)) #f)) +;; (new-res (if (null? serv-dat) +;; res +;; (cons (append serv-rec (list pid)) res)))) ;; any changes to number of elements in new-res will dirctly affect server:record->url,server:record->id,server:kill,server:get-num-alive which uses match let +;; (if (null? tal) +;; (if (and limit +;; (> (length new-res) limit)) +;; new-res ;; (take new-res limit) <= need intelligent sorting before this will work +;; new-res) +;; (loop (string-chomp (car tal)) (cdr tal) new-res))))))))) + +#;(define (server:get-num-alive srvlst) (let ((num-alive 0)) (for-each (lambda (server) (handle-exceptions exn @@ -289,70 +303,72 @@ 0))) (if (< uptime 5)(set! num-alive (+ num-alive 1))))))) srvlst) num-alive)) -;; given a list of servers get a list of valid servers, i.e. at least -;; 10 seconds old, has started and is less than 1 hour old and is -;; active (i.e. mod-time < 10 seconds -;; -;; mod-time host port start-time pid -;; -;; sort by start-time descending. I.e. get the oldest first. Young servers will thus drop off -;; and servers should stick around for about two hours or so. -;; -(define (server:get-best srvlst) - (let* ((nums (server:get-num-servers)) - (now (current-seconds)) - (slst (sort - (filter (lambda (rec) - (if (and (list? rec) - (> (length rec) 2)) - (let ((start-time (list-ref rec 3)) - (mod-time (list-ref rec 0))) - ;; (print "start-time: " start-time " mod-time: " mod-time) - (and start-time mod-time - (> (- now start-time) 0) ;; been running at least 0 seconds - (< (- now mod-time) 16) ;; still alive - file touched in last 16 seconds - (or (not (configf:lookup *configdat* "server" "runtime")) ;; skip if not set - (< (- now start-time) - (+ (- (string->number (configf:lookup *configdat* "server" "runtime")) - 180) - (random 360)))) ;; under one hour running time +/- 180 - )) - #f)) - srvlst) - (lambda (a b) - (< (list-ref a 3) - (list-ref b 3)))))) - (if (> (length slst) nums) - (take slst nums) - slst))) - -(define (server:get-first-best areapath) - (let ((srvrs (server:get-best (server:get-list areapath)))) - (if (and srvrs - (not (null? srvrs))) - (car srvrs) - #f))) - -(define (server:get-rand-best areapath) - (let ((srvrs (server:get-best (server:get-list areapath)))) - (if (and (list? srvrs) - (not (null? srvrs))) - (let* ((len (length srvrs)) - (idx (random len))) - (list-ref srvrs idx)) - #f))) +;; ;; given a list of servers get a list of valid servers, i.e. at least +;; ;; 10 seconds old, has started and is less than 1 hour old and is +;; ;; active (i.e. mod-time < 10 seconds +;; ;; +;; ;; mod-time host port start-time pid +;; ;; +;; ;; sort by start-time descending. I.e. get the oldest first. Young servers will thus drop off +;; ;; and servers should stick around for about two hours or so. +;; ;; +;; (define (server:get-best srvlst) +;; (let* ((nums (server:get-num-servers)) +;; (now (current-seconds)) +;; (slst (sort +;; (filter (lambda (rec) +;; (if (and (list? rec) +;; (> (length rec) 2)) +;; (let ((start-time (list-ref rec 3)) +;; (mod-time (list-ref rec 0))) +;; ;; (print "start-time: " start-time " mod-time: " mod-time) +;; (and start-time mod-time +;; (> (- now start-time) 0) ;; been running at least 0 seconds +;; (< (- now mod-time) 16) ;; still alive - file touched in last 16 seconds +;; (or (not (configf:lookup *configdat* "server" "runtime")) ;; skip if not set +;; (< (- now start-time) +;; (+ (- (string->number (configf:lookup *configdat* "server" "runtime")) +;; 180) +;; (random 360)))) ;; under one hour running time +/- 180 +;; )) +;; #f)) +;; srvlst) +;; (lambda (a b) +;; (< (list-ref a 3) +;; (list-ref b 3)))))) +;; (if (> (length slst) nums) +;; (take slst nums) +;; slst))) + +;; ;; switch from server:get-list to server:get-servers-info +;; ;; +;; (define (server:get-first-best areapath) +;; (let ((srvrs (server:get-best (server:get-list areapath)))) +;; (if (and srvrs +;; (not (null? srvrs))) +;; (car srvrs) +;; #f))) +;; +;; (define (server:get-rand-best areapath) +;; (let ((srvrs (server:get-best (server:get-list areapath)))) +;; (if (and (list? srvrs) +;; (not (null? srvrs))) +;; (let* ((len (length srvrs)) +;; (idx (random len))) +;; (list-ref srvrs idx)) +;; #f))) (define (server:record->id servr) (handle-exceptions exn (begin (debug:print-info 0 *default-log-port* "Unable to get server id from " servr ", exn=" exn) #f) - (match-let (((mod-time host port start-time server-id pid) + (match-let (((host port start-time server-id pid) servr)) (if server-id server-id #f)))) @@ -360,28 +376,22 @@ (handle-exceptions exn (begin (debug:print-info 0 *default-log-port* "Unable to get server url from " servr ", exn=" exn) #f) - (match-let (((mod-time host port start-time server-id pid) + (match-let (((host port start-time server-id pid) servr)) (if (and host port) (conc host ":" port) #f)))) -(define (server:get-client-signature) ;; BB> why is this proc named "get-"? it returns nothing -- set! has not return value. - (if *my-client-signature* *my-client-signature* - (let ((sig (server:mk-signature))) - (set! *my-client-signature* sig) - *my-client-signature*))) - ;; if server-start-last exists, and wasn't old enough, wait + 1, then call this function recursively until it is old enough. ;; if it is old enough, overwrite it and wait 0.25 seconds. ;; if it then has the wrong server key, wait + 1 and call this function recursively. ;; -(define (server:wait-for-server-start-last-flag areapath) +#;(define (server:wait-for-server-start-last-flag areapath) (let* ((start-flag (conc areapath "/logs/server-start-last")) ;;; THIS INTERACTS WITH [server] timeout. Suggest using 0.1 or above for timeout (6 seconds) (idletime (configf:lookup-number *configdat* "server" "idletime" default: 4)) (server-key (conc (get-host-name) "-" (current-process-id)))) (if (file-exists? start-flag) @@ -405,20 +415,114 @@ (seconds->time-string fmodtime) ", time since last start: " delta ", required idletime: " idletime ", gating reason:" (if old-enough "another job started a server" "too soon to start another server")) (thread-sleep! ( + 1 idletime)) (server:wait-for-server-start-last-flag areapath))))))) +;; oldest server alive determines host then choose random of youngest +;; five servers on that host +;; +(define (server:get-servers-info areapath) + (let* ((servinfodir (conc *toppath*"/.servinfo"))) + (if (not (file-exists? servinfodir)) + (create-directory servinfodir)) + (let* ((allfiles (glob (conc servinfodir"/*"))) + (res (make-hash-table))) + (for-each + (lambda (f) + (let* ((hostport (pathname-strip-directory f)) + (serverdat (server:logf-get-start-info f))) + (match serverdat + ((host port start server-id pid) + (if (and host port start server-id pid) + (hash-table-set! res hostport serverdat) + (debug:print-info 0 *default-log-port* "bad server info for "f": "serverdat))) + (else + (debug:print-info 0 *default-log-port* "bad server info for "f": "serverdat))))) + allfiles) + res))) + +;; oldest server alive determines host then choose random of youngest +;; five servers on that host +;; +;; mode: +;; best - get best server (random of newest five) +;; home - get home host based on oldest server +;; info - print info +(define (server:choose-server areapath #!optional (mode 'best)) + ;; age is current-starttime + ;; find oldest alive + ;; 1. sort by age ascending and ping until good + ;; find alive rand from youngest + ;; 1. sort by age descending + ;; 2. take five + ;; 3. check alive, discard if not and repeat + (let* ((serversdat (server:get-servers-info areapath)) + (servkeys (hash-table-keys serversdat)) + (by-time-asc (if (not (null? servkeys)) + (sort servkeys ;; list of "host:port" + (lambda (a b) + (>= (list-ref (hash-table-ref serversdat a) 2) + (list-ref (hash-table-ref serversdat b) 2)))) + '()))) + (if (not (null? by-time-asc)) + (let* ((oldest (last by-time-asc)) + (oldest-dat (hash-table-ref serversdat oldest)) + (host (list-ref oldest-dat 0)) + (all-valid (filter (lambda (x) + (equal? host (list-ref (hash-table-ref serversdat x) 0))) + by-time-asc)) + (best-five (lambda () + (if (> (length all-valid) 5) + (take all-valid 5) + all-valid))) + (names->dats (lambda (names) + (map (lambda (x) + (hash-table-ref serversdat x)) + names))) + (am-home? (lambda () + (let* ((currhost (get-host-name)) + (bestadrs (server:get-best-guess-address currhost))) + (or (equal? host currhost) + (equal? host bestadrs)))))) + (case mode + ((info) + (print "oldest: "oldest-dat", selected host: "host", all-valid: "all-valid) + (print "youngest: "(hash-table-ref serversdat (car all-valid)))) + ((home) host) + ((homehost) (cons host (am-home?))) ;; shut up old code + ((home?) (am-home?)) + ((best-five)(names->dats (best-five))) + ((all-valid)(names->dats all-valid)) + ((best) (let* ((best-five (best-five)) + (len (length best-five))) + (hash-table-ref serversdat (list-ref best-five (random len))))) + ((count)(length all-valid)) + (else + (debug:print 0 *default-log-port* "ERROR: invalid command "mode) + #f))) + (begin + (server:run areapath) + (thread-sleep! 3) + (case mode + ((homehost) (cons #f #f)) + (else #f)))))) - +;; would like to eventually get rid of this +;; +(define (common:on-homehost?) + (server:choose-server *toppath* 'home?)) + ;; kind start up of server, wait before allowing another server for a given ;; area to be launched ;; (define (server:kind-run areapath) ;; look for $MT_RUN_AREA_HOME/logs/server-start-last ;; and wait for it to be at least seconds old - (server:wait-for-server-start-last-flag areapath) - (if (not (server:check-if-running areapath)) ;; why try if there is already a server running? + ;; (server:wait-for-server-start-last-flag areapath) + (if (< (server:choose-server areapath 'count) 10) + (server:run areapath)) + #;(if (not (server:check-if-running areapath)) ;; why try if there is already a server running? (let* ((lock-file (conc areapath "/logs/server-start.lock"))) (let* ((start-flag (conc areapath "/logs/server-start-last"))) (common:simple-file-lock-and-wait lock-file expire-time: 25) (debug:print-info 2 *default-log-port* "server:kind-run: touching " start-flag) (system (conc "touch " start-flag)) ;; lazy but safe @@ -434,35 +538,33 @@ (let loop ((server-info (server:check-if-running areapath)) (try-num 0)) (if (or server-info (> (current-seconds) give-up-time)) ;; server-url will be #f if no server available. (server:record->url server-info) - (let ((num-ok (length (server:get-best (server:get-list areapath))))) + (let ((num-ok (length (server:choose-server areapath 'all-valid)))) (if (and (> try-num 0) ;; first time through simply wait a little while then try again (< num-ok 1)) ;; if there are no decent candidates for servers then try starting a new one - (server:kind-run areapath)) + (server:run areapath)) (thread-sleep! 5) (loop (server:check-if-running areapath) (+ try-num 1))))))) -(define server:try-running server:run) ;; there is no more per-run servers ;; REMOVE ME. BUG. - (define (server:get-num-servers #!key (numservers 2)) (let ((ns (string->number (or (configf:lookup *configdat* "server" "numservers") "notanumber")))) (or ns numservers))) ;; no longer care if multiple servers are started by accident. older servers will drop off in time. ;; (define (server:check-if-running areapath) ;; #!key (numservers "2")) (let* ((ns (server:get-num-servers)) ;; get the setting the for maximum number of servers allowed - (servers (server:get-best (server:get-list areapath)))) + (servers (server:choose-server areapath 'best-five))) ;; (server:get-best (server:get-list areapath)))) (if (or (and servers (null? servers)) - (not servers) - (and (list? servers) - (< (length servers) (+ 1 (random ns))))) ;; somewhere between 1 and numservers + (not servers)) + ;; (and (list? servers) + ;; (< (length servers) (+ 1 (random ns))))) ;; somewhere between 1 and numservers #f (let loop ((hed (car servers)) (tal (cdr servers))) (let ((res (server:check-server hed))) (if res @@ -473,15 +575,12 @@ ;; ping the given server ;; (define (server:check-server server-record) (let* ((server-url (server:record->url server-record)) - (server-id (server:record->id server-record)) - (res (case *transport-type* - ((http)(server:ping server-url server-id)) - ;; ((nmsg)(nmsg-transport:ping (tasks:hostinfo-get-interface server) - ))) + (server-id (server:record->id server-record)) + (res (server:ping server-url server-id))) (if res server-url #f))) (define (server:kill servr) @@ -569,11 +668,11 @@ (define (server:expiration-timeout) (let ((tmo (configf:lookup *configdat* "server" "timeout"))) (if (and (string? tmo) (common:hms-string->seconds tmo)) ;; BUG: hms-string->seconds is broken, if given "10" returns 0. Also, it doesn't belong in this logic unless the string->number is changed below (* 3600 (string->number tmo)) - 1200))) + 60))) (define (server:get-best-guess-address hostname) (let ((res #f)) (for-each (lambda (adr) ADDED treemod.scm Index: treemod.scm ================================================================== --- /dev/null +++ treemod.scm @@ -0,0 +1,167 @@ +;;====================================================================== +;; Copyright 2006-2013, Matthew Welland. +;; +;; This file is part of Megatest. +;; +;; Megatest is free software: you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. +;; +;; Megatest is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with Megatest. If not, see . +;; +;;====================================================================== + +(declare (unit treemod)) +;; (declare (uses margs)) +;; (declare (uses launch)) +;; ;; (declare (uses megatest-version)) +;; (declare (uses gutils)) +;; (declare (uses db)) +;; (declare (uses server)) +;; ;; (declare (uses synchash)) +;; (declare (uses dcommon)) +;; +;; (include "megatest-version.scm") +;; (include "common_records.scm") +;; (include "db_records.scm") +;; (include "key_records.scm") + +(module treemod + * + +(import + scheme + chicken + data-structures + + (prefix iup iup:) + canvas-draw + iup + regex + srfi-1 + srfi-13 + format + ) + +;;====================================================================== +;; T R E E S T U F F +;;====================================================================== + +;; path is a list of nodes, each the child of the previous +;; this routine returns the id so another node can be added +;; either as a leaf or as a branch +;; +;; BUG: This needs a stop sensor for when a branch is exhausted +;; +(define (tree:find-node obj path) + ;; start at the base of the tree + (if (null? path) + #f ;; or 0 ???? + (let loop ((hed (car path)) + (tal (cdr path)) + (depth 0) + (nodenum 0)) + ;; nodes in iup tree are 100% sequential so iterate over nodenum + (if (iup:attribute obj (conc "DEPTH" nodenum)) ;; end when no more nodes + (let ((node-depth (string->number (iup:attribute obj (conc "DEPTH" nodenum)))) + (node-title (iup:attribute obj (conc "TITLE" nodenum)))) + (if (and (equal? depth node-depth) + (equal? hed node-title)) ;; yep, this is the one! + (if (null? tal) ;; end of the line + nodenum + (loop (car tal)(cdr tal)(+ depth 1)(+ 1 nodenum))) + ;; this is the case where we found part of the hierarchy but not + ;; all of it, i.e. the node-depth went from deep to less deep + (if (> depth node-depth) ;; (+ 1 node-depth)) + #f + (loop hed tal depth (+ nodenum 1))))) + #f)))) + +;; top is the top node name zeroeth node VALUE=0 +(define (tree:add-node obj top nodelst #!key (userdata #f)) + (let ((curr-top (iup:attribute obj "TITLE0"))) + (if (or (not (string? curr-top)) + (string-null? curr-top) + (string-match "^\\s*$" curr-top)) + (iup:attribute-set! obj "ADDBRANCH0" top)) + + + + (cond + ((not (equal? top (iup:attribute obj "TITLE0"))) + (print "ERROR: top name " top " doesn't match " (iup:attribute obj "TITLE0"))) + ((null? nodelst)) + (else + (let loop ((hed (car nodelst)) + (tal (cdr nodelst)) + (depth 1) + (pathl (list top))) + ;; Because the tree dialog changes node numbers when + ;; nodes are added or removed we must look up nodes + ;; each and every time. 0 is the top node so default + ;; to that. + (let* ((newpath (append pathl (list hed))) + (parentnode (tree:find-node obj pathl)) + (nodenum (tree:find-node obj newpath))) + ;; Add the branch under lastnode if not found + (if (not nodenum) + (begin + (iup:attribute-set! obj (conc "ADDBRANCH" parentnode) hed) + ;; ERROR? ADDING DATA TO PARENT, DONT WE WANT IT ON CREATED NODE? + (if userdata + (iup:attribute-set! obj (conc "USERDATA" parentnode) userdata)) + (if (null? tal) + #t + ;; reset to top + (loop (car nodelst)(cdr nodelst) 1 (list top)))) + (if (null? tal) ;; if null here then this path has already been added + #t + (loop (car tal)(cdr tal)(+ depth 1) newpath))))))))) + +(define (tree:node->path obj nodenum) + (let loop ((currnode 0) + (path '())) + (let* ((node-depth (string->number (iup:attribute obj (conc "DEPTH" currnode)))) + (node-title (iup:attribute obj (conc "TITLE" currnode))) + (trimpath (if (and (not (null? path)) + (> (length path) node-depth)) + (take path node-depth) + path)) + (newpath (append trimpath (list node-title)))) + (if (>= currnode nodenum) + newpath + (loop (+ currnode 1) + newpath))))) + +(define (tree:delete-node obj top node-path) ;; node-path is a list of strings + (let ((id (tree:find-node obj (cons top node-path)))) + (print "Found node to remove " id " for path " top " " node-path) + (iup:attribute-set! obj (conc "DELNODE" id) "SELECTED"))) + +) +#| + + (let* ((tb (iup:treebox + #:value 0 + #:name "Runs" + #:expand "YES" + #:addexpanded "NO" + #:selection-cb + (lambda (obj id state) + ;; (print "obj: " obj ", id: " id ", state: " state) + (let* ((run-path (tree:node->path obj id)) + (run-id (tree-path->run-id (cdr run-path)))) + (if run-id + (begin + (dboard:data-curr-run-id-set! data run-id) + (dashboard:update-run-summary-tab))) + ;; (print "path: " (tree:node->path obj id) " run-id: " run-id) + )))) +|#