Index: Makefile ================================================================== --- Makefile +++ Makefile @@ -4,14 +4,21 @@ INSTALL=install SRCFILES = common.scm items.scm launch.scm \ ods.scm runconfig.scm server.scm configf.scm \ db.scm keys.scm margs.scm megatest-version.scm \ process.scm runs.scm tasks.scm tests.scm genexample.scm \ - http-transport.scm filedb.scm \ + http-transport.scm nmsg-transport.scm filedb.scm \ client.scm gutils.scm synchash.scm daemon.scm mt.scm dcommon.scm \ tree.scm ezsteps.scm lock-queue.scm sdb.scm \ - rmt.scm api.scm tdb.scm rpc-transport.scm + rmt.scm api.scm tdb.scm rpc-transport.scm \ + portlogger.scm + +# Eggs to install (straightforward ones) +EGGS=matchable readline apropos base64 regex-literals format regex-case test coops trace csv \ + dot-locking posix-utils posix-extras directory-utils hostinfo tcp-server rpc csv-xml fmt \ + json md5 awful http-client spiffy uri-common intarweb spiffy-request-vars \ + spiffy-directory-listing ssax sxml-serializer sxml-modifications iup canvas-draw sqlite3 GUISRCF = dashboard-tests.scm dashboard-guimonitor.scm OFILES = $(SRCFILES:%.scm=%.o) GOFILES = $(GUISRCF:%.scm=%.o) @@ -21,12 +28,17 @@ DEPLOYHELPERS=$(addprefix deploytarg/,$(ADTLSCR)) MTESTHASH=$(shell fossil info|grep checkout:| awk '{print $$2}') CSIPATH=$(shell which csi) CKPATH=$(shell dirname $(shell dirname $(CSIPATH))) +# ARCHSTR=$(shell uname -m)_$(shell uname -r) +# BASH_MACHTYPE=$(shell bash -c "echo \$$MACHTYPE") +# ARCHSTR=$(BASH_MACHTYPE)_$(shell lsb_release -sr) +ARCHSTR=$(shell lsb_release -sr) +# ARCHSTR=$(shell bash -c "echo \$$MACHTYPE") -all : mtest dboard newdboard txtdb +all : $(PREFIX)/bin/.$(ARCHSTR) mtest dboard txtdb refdb : txtdb/txtdb.scm csc -I txtdb txtdb/txtdb.scm -o refdb mtest: $(OFILES) megatest.o @@ -33,31 +45,16 @@ csc $(CSCOPTS) $(OFILES) megatest.o -o mtest dboard : $(OFILES) $(GOFILES) dashboard.scm csc $(OFILES) dashboard.scm $(GOFILES) -o dboard -newdboard : newdashboard.scm $(OFILES) $(GOFILES) - csc $(OFILES) $(GOFILES) newdashboard.scm -o newdboard - -$(PREFIX)/bin/revtagfsl : utils/revtagfsl.scm - csc utils/revtagfsl.scm -o $(PREFIX)/bin/revtagfsl - -deploytarg/libiupcd.so : $(CKPATH)/lib/libiupcd.so - for i in iup im cd av call sqlite; do \ - cp $(CKPATH)/lib/lib$$i* deploytarg/ ; \ - done - cp $(CKPATH)/include/*.h deploytarg - -# puts deployed megatest in directory "megatest" -deploytarg/megatest : $(OFILES) megatest.o - csc -deploy $(CSCOPTS) $(OFILES) megatest.scm - rsync -av megatest/ deploytarg/ - -deploytarg/dashboard : $(OFILES) $(GOFILES) - csc -deploy $(OFILES) $(GOFILES) dashboard.scm - rsync -av dashboard/ deploytarg/ - +ndboard : newdashboard.scm $(OFILES) $(GOFILES) + csc $(OFILES) $(GOFILES) newdashboard.scm -o ndboard + +# +# $(PREFIX)/bin/revtagfsl : utils/revtagfsl.scm +# csc utils/revtagfsl.scm -o $(PREFIX)/bin/revtagfsl # Special dependencies for the includes tests.o db.o launch.o runs.o dashboard-tests.o dashboard-guimonitor.o dashboard-main.o monitor.o dashboard.o megatest.o : db_records.scm tests.o runs.o dashboard.o dashboard-tests.o dashboard-main.o : run_records.scm db.o ezsteps.o keys.o launch.o megatest.o monitor.o runs-for-ref.o runs.o tests.o : key_records.scm @@ -76,29 +73,27 @@ $(OFILES) $(GOFILES) : common_records.scm %.o : %.scm csc $(CSCOPTS) -c $< -$(PREFIX)/bin/mtest : mtest +$(PREFIX)/bin/.$(ARCHSTR)/mtest : mtest @echo Installing to PREFIX=$(PREFIX) - $(INSTALL) mtest $(PREFIX)/bin/mtest - utils/mk_wrapper $(PREFIX) mtest > $(PREFIX)/bin/megatest + $(INSTALL) mtest $(PREFIX)/bin/.$(ARCHSTR)/mtest + utils/mk_wrapper $(PREFIX) mtest $(PREFIX)/bin/megatest chmod a+x $(PREFIX)/bin/megatest -$(PREFIX)/bin/newdboard : newdboard - $(INSTALL) newdboard $(PREFIX)/bin/newdboard - utils/mk_wrapper $(PREFIX) newdboard > $(PREFIX)/bin/newdashboard +$(PREFIX)/bin/.$(ARCHSTR)/ndboard : ndboard + $(INSTALL) ndboard $(PREFIX)/bin/.$(ARCHSTR)/ndboard + +$(PREFIX)/bin/newdashboard : $(PREFIX)/bin/.$(ARCHSTR)/ndboard + utils/mk_wrapper $(PREFIX) ndboard $(PREFIX)/bin/newdashboard chmod a+x $(PREFIX)/bin/newdashboard $(HELPERS) : utils/mt_* $(INSTALL) $< $@ chmod a+x $@ -$(DEPLOYHELPERS) : utils/mt_* - $(INSTALL) $< $@ - chmod a+X $@ - $(PREFIX)/bin/mt_xterm : utils/mt_xterm $(INSTALL) $< $@ chmod a+x $@ $(PREFIX)/bin/nbfake : utils/nbfake @@ -125,33 +120,67 @@ $(INSTALL) $< $@ chmod a+x $@ # install dashboard as dboard so wrapper script can be called dashboard -$(PREFIX)/bin/dboard : dboard $(FILES) - $(INSTALL) dboard $(PREFIX)/bin/dboard - utils/mk_wrapper $(PREFIX) dboard > $(PREFIX)/bin/dashboard +$(PREFIX)/bin/.$(ARCHSTR)/dboard : dboard $(FILES) + utils/mk_wrapper $(PREFIX) dboard $(PREFIX)/bin/dashboard chmod a+x $(PREFIX)/bin/dashboard - -install : bin $(PREFIX)/bin/mtest $(PREFIX)/bin/megatest $(PREFIX)/bin/dboard $(PREFIX)/bin/dashboard $(HELPERS) $(PREFIX)/bin/nbfake \ - $(PREFIX)/bin/nbfind $(PREFIX)/bin/loadrunner $(PREFIX)/bin/newdboard $(PREFIX)/bin/refdb $(PREFIX)/bin/mt_xterm $(PREFIX)/bin/revtagfsl - -deploytarg/apropos.so : Makefile - for i in apropos base64 canvas-draw csv-xml directory-utils dot-locking extras fmt format hostinfo http-client intarweb json md5 message-digest posix posix-extras readline regex regex-case s11n spiffy spiffy-request-vars sqlite3 srfi-1 srfi-18 srfi-69 tcp test uri-common check-errors synch matchable sql-null tcp-server rpc blob-utils string-utils variable-item defstruct uri-generic sendfile opensll openssl lookup-table list-utils stack; do \ - chicken-install -prefix deploytarg -deploy $$i;done - -deploytarg/libsqlite3.so : - CSC_OPTIONS="-Ideploytarg -Ldeploytarg" $CHICKEN_INSTALL -prefix deploytarg -deploy sqlite3 - - - -deploy : deploytarg/megatest deploytarg/dashboard $(DEPLOYHELPERS) deploytarg/nbfake deploytarg/nbfind deploytarg/libiupcd.so deploytarg/apropos.so - - -bin : - mkdir -p $(PREFIX)/bin + $(INSTALL) dboard $(PREFIX)/bin/.$(ARCHSTR)/dboard + +install : $(PREFIX)/bin/.$(ARCHSTR) $(PREFIX)/bin/.$(ARCHSTR)/mtest $(PREFIX)/bin/megatest \ + $(PREFIX)/bin/.$(ARCHSTR)/dboard $(PREFIX)/bin/dashboard $(HELPERS) $(PREFIX)/bin/nbfake \ + $(PREFIX)/bin/nbfind $(PREFIX)/bin/loadrunner $(PREFIX)/bin/refdb $(PREFIX)/bin/mt_xterm \ + $(PREFIX)/bin/newdashboard + +$(PREFIX)/bin/.$(ARCHSTR) : + mkdir -p $(PREFIX)/bin/.$(ARCHSTR) test: tests/tests.scm cd tests;csi -I .. -b -n tests.scm clean : rm -f $(OFILES) $(GOFILES) megatest dboard dboard.o megatest.o dashboard.o + +# Deploy section (not complete yet) +# +$(DEPLOYHELPERS) : utils/mt_* + $(INSTALL) $< $@ + chmod a+X $@ + +deploytarg/apropos.so : Makefile + chicken-install -p deploytarg -deploy $(EGGS) + +# for i in apropos base64 canvas-draw csv-xml directory-utils dot-locking extras fmt format hostinfo http-client intarweb json md5 message-digest posix posix-extras readline regex regex-case s11n spiffy spiffy-request-vars sqlite3 srfi-1 srfi-18 srfi-69 tcp test uri-common check-errors synch matchable sql-null tcp-server rpc blob-utils string-utils variable-item defstruct uri-generic sendfile opensll openssl lookup-table list-utils stack; do \ +# chicken-install -prefix deploytarg -deploy $$i;done + +# deploytarg/libsqlite3.so : +# CSC_OPTIONS="-Ideploytarg -Ldeploytarg" $CHICKEN_INSTALL -prefix deploytarg -deploy sqlite3 + +deploy : deploytarg/mtest deploytarg/dboard $(DEPLOYHELPERS) deploytarg/nbfake deploytarg/nbfind deploytarg/apropos.so + +# deploytarg/libiupcd.so : $(CKPATH)/lib/libiupcd.so +# for i in iup im cd av call sqlite; do \ +# cp $(CKPATH)/lib/lib$$i* deploytarg/ ; \ +# done +# cp $(CKPATH)/include/*.h deploytarg + +# puts deployed megatest in directory "megatest" +deploytarg/mtest : $(OFILES) megatest.o deploytarg/apropos.so + csc -deploy $(CSCOPTS) $(OFILES) megatest.scm -o deploytarg + mv deploytarg/deploytarg deploytarg/mtest + +deploytarg/dboard : $(OFILES) $(GOFILES) dashboard.scm deploytarg/apropos.so + csc -deploy $(OFILES) $(GOFILES) dashboard.scm -o deploytarg + mv deploytarg/deploytarg deploytarg/dboard + +# DATASHAREO=configf.o common.o process.o tree.o dcommon.o margs.o launch.o gutils.o db.o synchash.o server.o \ +# megatest-version.o tdb.o ods.o mt.o keys.o +datashare-testing/sd : datashare.scm $(OFILES) + csc datashare.scm $(OFILES) -o datashare-testing/sd + +sd : datashare-testing/sd + mkdir -p /tmp/$(USER)/datashare/disk1 /tmp/$(USER)/basepath + +xterm : sd + (export BASEPATH=/tmp/$(USER)/basepath ; export PATH="$(PWD)/datashare-testing:$(PATH)" ; xterm &) + Index: api.scm ================================================================== --- api.scm +++ api.scm @@ -10,102 +10,144 @@ ;;====================================================================== (declare (unit api)) (declare (uses rmt)) (declare (uses db)) + +;; allow these queries through without starting a server +;; +(define api:read-only-queries + '(get-key-val-pairs + get-keys + test-toplevel-num-items + get-test-info-by-id + test-get-rundir-from-test-id + get-count-tests-running + get-count-tests-running-in-jobgroup + get-previous-test-run-record + get-matching-previous-test-run-records + test-get-logfile-info + test-get-records-for-index-file + get-testinfo-state-status + test-get-top-process-pid + test-get-paths-matching-keynames-target-new + get-prereqs-not-met + get-count-tests-running-for-run-id + get-run-info + get-run-status + register-run + get-tests-for-run + get-test-id + get-tests-for-runs-mindata + get-run-name-from-id + get-runs + get-all-run-ids + get-prev-run-ids + get-run-ids-matching-target + get-runs-by-patt + get-steps-data + login + testmeta-get-record)) ;; These are called by the server on recipt of /api calls - -(define (api:execute-requests dbstruct cmd params) - (case (string->symbol cmd) - ;; SERVERS - ((start-server) (apply server:kind-run params)) - ;; ((kill-server) - ;; (db:sync-tables (db:tbls *inmemdb*) *inmemdb* *db*) ;; (db:sync-to *inmemdb* *db*) - ;; (let ((hostname (car *runremote*)) - ;; (port (cadr *runremote*)) - ;; (pid (if (null? params) #f (car params))) - ;; (th1 (make-thread (lambda ()(thread-sleep! 3)(debug:print 0 "Server exiting!")(exit 0)) "Server exit thread"))) - ;; (debug:print 0 "WARNING: Server on " hostname ":" port " going down by user request!") - ;; (debug:print-info 1 "current pid=" (current-process-id)) - ;; (open-run-close tasks:server-deregister tasks:open-db - ;; hostname - ;; port: port) - ;; (set! *server-run* #f) - ;; (thread-sleep! 3) - ;; (if pid - ;; (process-signal pid signal/kill) - ;; (thread-start! th1)) - ;; '(#t "exit process started"))) - - ;; KEYS - ((get-key-val-pairs) (apply db:get-key-val-pairs dbstruct params)) - ((get-keys) (db:get-keys dbstruct)) - - ;; TESTS - ;; json doesn't do vectors, convert to list - ((get-test-info-by-id) (apply db:get-test-info-by-id dbstruct params)) - ((test-get-rundir-from-test-id) (apply db:test-get-rundir-from-test-id dbstruct params)) - ((test-set-state-status-by-id) (apply db:test-set-state-status-by-id dbstruct params)) - ((get-count-tests-running) (apply db:get-count-tests-running dbstruct params)) - ((get-count-tests-running-in-jobgroup) (apply db:get-count-tests-running-in-jobgroup dbstruct params)) - ((delete-test-records) (apply db:delete-test-records dbstruct params)) - ((delete-old-deleted-test-records) (apply db:delete-old-deleted-test-records dbstruct params)) - ((test-set-status-state) (apply db:test-set-status-state dbstruct params)) - ((get-previous-test-run-record) (apply db:get-previous-test-run-record dbstruct params)) - ((get-matching-previous-test-run-records)(apply db:get-matching-previous-test-run-records dbstruct params)) - ((db:test-get-logfile-info) (apply db:test-get-logfile-info dbstruct params)) - ((test-get-records-for-index-file (apply db:test-get-records-for-index-file dbstruct params))) - ((get-testinfo-state-status) (apply db:get-testinfo-state-status dbstruct params)) - ((test-get-paths-matching-keynames-target-new) (apply db:test-get-paths-matching-keynames-target-new dbstruct params)) - ((get-prereqs-not-met) (apply db:get-prereqs-not-met dbstruct params)) - ((roll-up-pass-fail-counts) (apply db:roll-up-pass-fail-counts dbstruct params)) - ((update-fail-pass-counts) (apply db:general-call dbstruct 'update-pass-fail-counts params)) - ((get-count-tests-running-for-run-id) (apply db:get-count-tests-running-for-run-id dbstruct params)) - - ;; RUNS - ((get-run-info) (apply db:get-run-info dbstruct params)) - ((register-run) (apply db:register-run dbstruct params)) - ((set-tests-state-status) (apply db:set-tests-state-status dbstruct params)) - ((get-tests-for-run) (apply db:get-tests-for-run dbstruct params)) - ((get-test-id) (apply db:get-test-id dbstruct params)) - ((get-tests-for-runs-mindata) (apply db:get-tests-for-runs-mindata dbstruct params)) - ((get-run-name-from-id) (apply db:get-run-name-from-id dbstruct params)) - ((delete-run) (apply db:delete-run dbstruct params)) - ((get-runs) (apply db:get-runs dbstruct params)) - ((get-all-run-ids) (db:get-all-run-ids dbstruct)) - ((get-prev-run-ids) (apply db:get-prev-run-ids dbstruct params)) - ((get-run-ids-matching-target) (apply db:get-run-ids-matching-target dbstruct params)) - ((get-runs-by-patt) (apply db:get-runs-by-patt dbstruct params)) - ((lock/unlock-run) (apply db:lock/unlock-run dbstruct params)) - ((update-run-event_time) (apply db:update-run-event_time dbstruct params)) - - ;; STEPS - ((teststep-set-status!) (apply db:teststep-set-status! dbstruct params)) - - ;; TEST DATA - ((test-data-rollup) (apply db:test-data-rollup dbstruct params)) - ((csv->test-data) (apply db:csv->test-data dbstruct params)) - ((get-steps-data) (apply db:get-steps-data dbstruct params)) - - ;; MISC - ((login) (apply db:login dbstruct params)) - ((general-call) (let ((stmtname (car params)) - (run-id (cadr params)) - (realparams (cddr params))) - (db:with-db dbstruct run-id #t ;; these are all for modifying the db - (lambda (db) - (db:general-call db stmtname realparams))))) - ((sync-inmem->db) (db:sync-touched dbstruct run-id force-sync: #t)) - ((sdb-qry) (apply sdb:qry params)) - - ;; TESTMETA - ((testmeta-get-record) (apply db:testmeta-get-record dbstruct params)) - ((testmeta-add-record) (apply db:testmeta-add-record dbstruct params)) - ((testmeta-update-field) (apply db:testmeta-update-field dbstruct params)) - (else - (list "ERROR" 0)))) +;; - keep it simple, only return the actual result of the call, i.e. no meta info here +;; +;; - returns #( flag result ) +;; +(define (api:execute-requests dbstruct dat) + (handle-exceptions + exn + (let ((call-chain (get-call-chain))) + (print-call-chain (current-error-port)) + (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn)) + (vector #f (vector exn call-chain dat))) ;; return some stuff for debug if an exception happens + (if (not (vector? dat)) ;; it is an error to not receive a vector + (vector #f #f "remote must be called with a vector") + (vector ;; return a vector + the returned data structure + #t + (let ((cmd (vector-ref dat 0)) + (params (vector-ref dat 1))) + (case (if (symbol? cmd) + cmd + (string->symbol cmd)) + ;; SERVERS + ((start-server) (apply server:kind-run params)) + ((kill-server) (set! *server-run* #f)) + + ;; KEYS + ((get-key-val-pairs) (apply db:get-key-val-pairs dbstruct params)) + ((get-keys) (db:get-keys dbstruct)) + + ;; TESTS + ((test-toplevel-num-items) (apply db:test-toplevel-num-items dbstruct params)) + ((get-test-info-by-id) (apply db:get-test-info-by-id dbstruct params)) + ((test-get-rundir-from-test-id) (apply db:test-get-rundir-from-test-id dbstruct params)) + ((test-set-state-status-by-id) (apply db:test-set-state-status-by-id dbstruct params)) + ((get-count-tests-running) (apply db:get-count-tests-running dbstruct params)) + ((get-count-tests-running-in-jobgroup) (apply db:get-count-tests-running-in-jobgroup dbstruct params)) + ((delete-test-records) (apply db:delete-test-records dbstruct params)) + ;; ((delete-test-step-records) (apply db:delete-test-step-records dbstruct params)) + ((delete-old-deleted-test-records) (apply db:delete-old-deleted-test-records dbstruct params)) + ((test-set-status-state) (apply db:test-set-status-state dbstruct params)) + ((get-previous-test-run-record) (apply db:get-previous-test-run-record dbstruct params)) + ((get-matching-previous-test-run-records)(apply db:get-matching-previous-test-run-records dbstruct params)) + ((test-get-logfile-info) (apply db:test-get-logfile-info dbstruct params)) + ((test-get-records-for-index-file) (apply db:test-get-records-for-index-file dbstruct params)) + ((get-testinfo-state-status) (apply db:get-testinfo-state-status dbstruct params)) + ((test-set-top-process-pid) (apply db:test-set-top-process-pid dbstruct params)) + ((test-get-top-process-pid) (apply db:test-get-top-process-pid dbstruct params)) + ((test-get-paths-matching-keynames-target-new) (apply db:test-get-paths-matching-keynames-target-new dbstruct params)) + ((get-prereqs-not-met) (apply db:get-prereqs-not-met dbstruct params)) + ((roll-up-pass-fail-counts) (apply db:roll-up-pass-fail-counts dbstruct params)) + ((update-fail-pass-counts) (apply db:general-call dbstruct 'update-pass-fail-counts params)) + ((get-count-tests-running-for-run-id) (apply db:get-count-tests-running-for-run-id dbstruct params)) + + ;; RUNS + ((get-run-info) (apply db:get-run-info dbstruct params)) + ((get-run-status) (apply db:get-run-status dbstruct params)) + ((set-run-status) (apply db:set-run-status dbstruct params)) + ((register-run) (apply db:register-run dbstruct params)) + ((set-tests-state-status) (apply db:set-tests-state-status dbstruct params)) + ((get-tests-for-run) (apply db:get-tests-for-run dbstruct params)) + ((get-test-id) (apply db:get-test-id dbstruct params)) + ((get-tests-for-run-mindata) (apply db:get-tests-for-run-mindata dbstruct params)) + ((get-run-name-from-id) (apply db:get-run-name-from-id dbstruct params)) + ((delete-run) (apply db:delete-run dbstruct params)) + ((get-runs) (apply db:get-runs dbstruct params)) + ((get-all-run-ids) (db:get-all-run-ids dbstruct)) + ((get-prev-run-ids) (apply db:get-prev-run-ids dbstruct params)) + ((get-run-ids-matching-target) (apply db:get-run-ids-matching-target dbstruct params)) + ((get-runs-by-patt) (apply db:get-runs-by-patt dbstruct params)) + ((lock/unlock-run) (apply db:lock/unlock-run dbstruct params)) + ((update-run-event_time) (apply db:update-run-event_time dbstruct params)) + ((find-and-mark-incomplete) (apply db:find-and-mark-incomplete dbstruct params)) + + ;; STEPS + ((teststep-set-status!) (apply db:teststep-set-status! dbstruct params)) + + ;; TEST DATA + ((test-data-rollup) (apply db:test-data-rollup dbstruct params)) + ((csv->test-data) (apply db:csv->test-data dbstruct params)) + ((get-steps-data) (apply db:get-steps-data dbstruct params)) + + ;; MISC + ((login) (apply db:login dbstruct params)) + ((general-call) (let ((stmtname (car params)) + (run-id (cadr params)) + (realparams (cddr params))) + (db:with-db dbstruct run-id #t ;; these are all for modifying the db + (lambda (db) + (db:general-call db stmtname realparams))))) + ((sync-inmem->db) (let ((run-id (car params))) + (db:sync-touched dbstruct run-id force-sync: #t))) + ((sdb-qry) (apply sdb:qry params)) + ((ping) (current-process-id)) + + ;; TESTMETA + ((testmeta-get-record) (apply db:testmeta-get-record dbstruct params)) + ((testmeta-add-record) (apply db:testmeta-add-record dbstruct params)) + ((testmeta-update-field) (apply db:testmeta-update-field dbstruct params)))))))) ;; http-server send-response ;; api:process-request ;; db:* ;; @@ -112,18 +154,19 @@ ;; NB// Runs on the server as part of the server loop ;; (define (api:process-request dbstruct $) ;; the $ is the request vars proc (let* ((cmd ($ 'cmd)) (paramsj ($ 'params)) - (params (db:string->obj paramsj)) ;; (rmt:json-str->dat paramsj)) - (res (api:execute-requests dbstruct cmd params))) + (params (db:string->obj paramsj transport: 'http)) ;; (rmt:json-str->dat paramsj)) + (resdat (api:execute-requests dbstruct (vector cmd params))) ;; #( flag result ) + (res (vector-ref resdat 1))) ;; This can be here but needs controls to ensure it doesn't run more than every 4 seconds ;; (rmt:dat->json-str ;; (if (or (string? res) ;; (list? res) ;; (number? res) ;; (boolean? res)) ;; res ;; (list "ERROR, not string, list, number or boolean" 1 cmd params res))))) - (db:obj->string res))) + (db:obj->string res transport: 'http))) Index: client.scm ================================================================== --- client.scm +++ client.scm @@ -153,96 +153,103 @@ ;; client:setup ;; ;; lookup_server, need to remove *runremote* stuff ;; (define (client:setup-http run-id #!key (remaining-tries 10) (failed-connects 0)) - (debug:print 0 "INFO: client:setup remaining-tries=" remaining-tries) - (if (<= remaining-tries 0) - (begin - (debug:print 0 "ERROR: failed to start or connect to server for run-id " run-id) - (exit 1)) - (let ((host-info (hash-table-ref/default *runremote* run-id #f))) - (debug:print-info 0 "client:setup host-info=" host-info ", remaining-tries=" remaining-tries) - (if host-info - (let* ((iface (car host-info)) - (port (cadr host-info)) - (start-res (client:connect iface port)) - ;; (ping-res (server:ping-server run-id iface port)) - (ping-res (client:login-no-auto-setup start-res run-id))) - (if ping-res ;; sucessful login? - (begin - (hash-table-set! *runremote* run-id start-res) - start-res) ;; return the server info - (if (member remaining-tries '(3 4 6)) - (begin ;; login failed - (debug:print 25 "INFO: client:setup start-res=" start-res ", run-id=" run-id ", server-dat=" host-info) - (hash-table-delete! *runremote* run-id) - (open-run-close tasks:server-force-clean-run-record - tasks:open-db - run-id - (car host-info) - (cadr host-info) - " client:setup (host-info=#t)") - (thread-sleep! 5) - (client:setup run-id remaining-tries: 10)) ;; (- remaining-tries 1))) - (begin - (debug:print 25 "INFO: client:setup failed to connect, start-res=" start-res ", run-id=" run-id ", host-info=" host-info) - (thread-sleep! 5) - (client:setup run-id remaining-tries: (- remaining-tries 1)))))) - ;; YUK: rename server-dat here - (let* ((server-dat (open-run-close tasks:get-server tasks:open-db run-id))) - (debug:print-info 0 "client:setup server-dat=" server-dat ", remaining-tries=" remaining-tries) - (if server-dat - (let* ((iface (tasks:hostinfo-get-interface server-dat)) - (port (tasks:hostinfo-get-port server-dat)) - (start-res (http-transport:client-connect iface port)) - ;; (ping-res (server:ping-server run-id iface port)) - (ping-res (rmt:login-no-auto-client-setup start-res run-id))) - (if start-res - (begin - (hash-table-set! *runremote* run-id start-res) - start-res) - (if (member remaining-tries '(2 5)) - (begin ;; login failed - (debug:print 25 "INFO: client:setup start-res=" start-res ", run-id=" run-id ", server-dat=" server-dat) - (hash-table-delete! *runremote* run-id) - (open-run-close tasks:server-force-clean-run-record - tasks:open-db - run-id - (tasks:hostinfo-get-interface server-dat) - (tasks:hostinfo-get-port server-dat) - " client:setup (server-dat = #t)") - (thread-sleep! 2) - (server:try-running run-id) - (thread-sleep! 10) ;; give server a little time to start up - (client:setup run-id remaining-tries: 10)) ;; (- remaining-tries 1))) - (begin - (debug:print 25 "INFO: client:setup start-res=" start-res ", run-id=" run-id ", server-dat=" server-dat) - (thread-sleep! 5) - (client:setup run-id remaining-tries: (- remaining-tries 1)))))) - (begin ;; no server registered - (if (eq? remaining-tries 2) - (begin - ;; (open-run-close tasks:server-clean-out-old-records-for-run-id tasks:open-db run-id " client:setup (server-dat=#f)") - (client:setup run-id remaining-tries: 10)) - (begin - (thread-sleep! 2) - (debug:print 25 "INFO: client:setup start-res (not defined here), run-id=" run-id ", server-dat=" server-dat) - (if (< (open-run-close tasks:num-in-available-state tasks:open-db run-id) 3) - (begin - ;; (open-run-close tasks:server-clean-out-old-records-for-run-id tasks:open-db run-id " client:setup (server-dat=#f)") - (server:try-running run-id))) - (thread-sleep! 10) ;; give server a little time to start up - (client:setup run-id remaining-tries: (- remaining-tries 1))))))))))) + (debug:print-info 2 "client:setup remaining-tries=" remaining-tries) + (let* ((tdbdat (tasks:open-db))) + (if (<= remaining-tries 0) + (begin + (debug:print 0 "ERROR: failed to start or connect to server for run-id " run-id) + (exit 1)) + (let* ((server-dat (tasks:get-server (db:delay-if-busy tdbdat) run-id))) + (debug:print-info 4 "client:setup server-dat=" server-dat ", remaining-tries=" remaining-tries) + (if server-dat + (let* ((iface (tasks:hostinfo-get-interface server-dat)) + (hostname (tasks:hostinfo-get-hostname server-dat)) + (port (tasks:hostinfo-get-port server-dat)) + (start-res (case *transport-type* + ((http)(http-transport:client-connect iface port)) + ((nmsg)(nmsg-transport:client-connect hostname port)))) + (ping-res (case *transport-type* + ((http)(rmt:login-no-auto-client-setup start-res run-id)) + ((nmsg)(let ((logininfo (rmt:login-no-auto-client-setup start-res run-id))) + (if logininfo + (car (vector-ref logininfo 1)) + #f)))))) + (if (and start-res + ping-res) + (begin + (hash-table-set! *runremote* run-id start-res) + (debug:print-info 2 "connected to " (http-transport:server-dat-make-url start-res)) + start-res) + (begin ;; login failed but have a server record, clean out the record and try again + (debug:print-info 0 "client:setup, login failed, will attempt to start server ... start-res=" start-res ", run-id=" run-id ", server-dat=" server-dat) + (case *transport-type* + ((http)(http-transport:close-connections run-id))) + (hash-table-delete! *runremote* run-id) + (tasks:kill-server-run-id run-id) + (tasks:server-force-clean-run-record (db:delay-if-busy tdbdat) + run-id + (tasks:hostinfo-get-interface server-dat) + (tasks:hostinfo-get-port server-dat) + " client:setup (server-dat = #t)") + (if (> remaining-tries 8) + (thread-sleep! (+ 1 (random 5))) ;; spread out the starts a little + (thread-sleep! (+ 15 (random 20)))) ;; it isn't going well. give it plenty of time + (server:try-running run-id) + (thread-sleep! 5) ;; give server a little time to start up + (client:setup run-id remaining-tries: (- remaining-tries 1)) + ))) + (begin ;; no server registered + (let ((num-available (tasks:num-in-available-state (db:dbdat-get-db tdbdat) run-id))) + (debug:print-info 0 "client:setup, no server registered, remaining-tries=" remaining-tries " num-available=" num-available) + (if (< num-available 2) + (server:try-running run-id)) + (thread-sleep! (+ 5 (random (- 20 remaining-tries)))) ;; give server a little time to start up, randomize a little to avoid start storms. + (client:setup run-id remaining-tries: (- remaining-tries 1))))))))) + +;; (let ((host-info (hash-table-ref/default *runremote* run-id #f))) +;; (if host-info ;; this is a bit circular. the host-info *is* the start-res FIXME +;; (let* ((iface (http-transport:server-dat-get-iface host-info)) +;; (port (http-transport:server-dat-get-port host-info)) +;; (start-res (case *transport-type* +;; ((http)(http-transport:client-connect iface port)) +;; ((nmsg)(nmsg-transport:client-connect iface port)) ;; (http-transport:server-dat-get-socket host-info)) +;; (else #f))) +;; (ping-res (case *transport-type* +;; ((http)(rmt:login-no-auto-client-setup start-res run-id)) +;; ((nmsg)(let ((logininfo (rmt:login-no-auto-client-setup start-res run-id))) +;; (if logininfo +;; (vector-ref (vector-ref logininfo 1) 1) +;; #f))) +;; (else #f)))) +;; (if ping-res ;; sucessful login? +;; (begin +;; (debug:print-info 2 "client:setup, ping is good using host-info=" host-info ", remaining-tries=" remaining-tries) +;; start-res) ;; return the server info +;; ;; have host info but no ping. shutdown the current connection and try again +;; (begin ;; login failed +;; (debug:print-info 1 "client:setup, ping is bad for start-res=" start-res " and *runremote*=" host-info) +;; (case *transport-type* +;; ((http)(http-transport:close-connections run-id))) +;; (hash-table-delete! *runremote* run-id) +;; (if (< remaining-tries 8) +;; (thread-sleep! 5) +;; (thread-sleep! 1)) +;; (client:setup run-id remaining-tries: (- remaining-tries 1))))) +;; ;; YUK: rename server-dat here +;; ;; keep this as a function to ease future (define (client:start run-id server-info) (http-transport:client-connect (tasks:hostinfo-get-interface server-info) (tasks:hostinfo-get-port server-info))) ;; client:signal-handler (define (client:signal-handler signum) + (signal-mask! signum) (handle-exceptions exn (debug:print " ... exiting ...") (let ((th1 (make-thread (lambda () "") ;; do nothing for now (was flush out last call if applicable) Index: common.scm ================================================================== --- common.scm +++ common.scm @@ -7,11 +7,11 @@ ;; This program is distributed WITHOUT ANY WARRANTY; without even the ;; implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR ;; PURPOSE. ;;====================================================================== -(use sqlite3 srfi-1 posix regex-case base64 format dot-locking csv-xml) +(use sqlite3 srfi-1 posix regex-case base64 format dot-locking csv-xml z3) (require-extension sqlite3 regex posix) (require-extension (srfi 18) extras tcp rpc) (import (prefix sqlite3 sqlite3:)) @@ -44,32 +44,43 @@ (define *already-seen-runconfig-info* #f) (define *waiting-queue* (make-hash-table)) (define *test-meta-updated* (make-hash-table)) (define *globalexitstatus* 0) ;; attempt to work around possible thread issues (define *passnum* 0) ;; when running track calls to run-tests or similar +(define *write-frequency* (make-hash-table)) ;; run-id => (vector (current-seconds) 0)) +(define *alt-log-file* #f) ;; used by -log +(define *common:denoise* (make-hash-table)) ;; for low noise printing ;; DATABASE -(define *open-dbs* (vector #f (make-hash-table))) ;; megatestdb run-id-dbs +(define *dbstruct-db* #f) +(define *db-stats* (make-hash-table)) ;; hash of vectors < count duration-total > +(define *db-stats-mutex* (make-mutex)) +(define *db-sync-mutex* (make-mutex)) +(define *db-multi-sync-mutex* (make-mutex)) +(define *db-local-sync* (make-hash-table)) ;; used to record last touch of db +(define *megatest-db* #f) +(define *last-db-access* (current-seconds)) ;; update when db is accessed via server +(define *db-write-access* #t) +(define *inmemdb* #f) +(define *task-db* #f) ;; (vector db path-to-db) +(define *db-access-allowed* #t) ;; flag to allow access +(define *db-access-mutex* (make-mutex)) ;; SERVER (define *my-client-signature* #f) -(define *transport-type* #f) -(define *megatest-db* #f) -(define *rpc:listener* #f) ;; if set up for server communication this will hold the tcp port +(define *transport-type* 'http) +(define *transport-type* 'http) ;; override with [server] transport http|rpc|nmsg (define *runremote* (make-hash-table)) ;; if set up for server communication this will hold -(define *last-db-access* (current-seconds)) ;; update when db is accessed via server (define *max-cache-size* 0) (define *logged-in-clients* (make-hash-table)) (define *client-non-blocking-mode* #f) (define *server-id* #f) (define *server-info* #f) (define *time-to-exit* #f) (define *received-response* #f) (define *default-numtries* 10) (define *server-run* #t) -(define *db-write-access* #t) -(define *inmemdb* #f) (define *run-id* #f) (define *server-kind-run* (make-hash-table)) (define *target* (make-hash-table)) ;; cache the target here; target is keyval1/keyval2/.../keyvalN (define *keys* (make-hash-table)) ;; cache the keys here @@ -103,22 +114,61 @@ (set! *test-info* (make-hash-table)) (set! *run-info-cache* (make-hash-table)) (set! *env-vars-by-run-id* (make-hash-table)) (set! *test-id-cache* (make-hash-table))) -;; Generic string database (normalization of sorts) +;; Generic string database (define sdb:qry #f) ;; (make-sdb:qry)) ;; 'init #f) -;; Generic path database (normalization of sorts) +;; Generic path database (define *fdb* #f) + +;;====================================================================== +;; L O C K E R S A N D B L O C K E R S +;;====================================================================== + +;; block further accesses to databases. Call this before shutting db down +(define (common:db-block-further-queries) + (mutex-lock! *db-access-mutex*) + (set! *db-access-allowed* #f) + (mutex-unlock! *db-access-mutex*)) + +(define (common:db-access-allowed?) + (let ((val (begin + (mutex-lock! *db-access-mutex*) + *db-access-allowed* + (mutex-unlock! *db-access-mutex*)))) + val)) ;;====================================================================== ;; U S E F U L S T U F F ;;====================================================================== + +(define (common:low-noise-print waitval . keys) + (let* ((key (string-intersperse (map conc keys) "-" )) + (lasttime (hash-table-ref/default *common:denoise* key 0)) + (currtime (current-seconds))) + (if (> (- currtime lasttime) waitval) + (begin + (hash-table-set! *common:denoise* key currtime) + #t) + #f))) (define (common:get-megatest-exe) (if (getenv "MT_MEGATEST") (getenv "MT_MEGATEST") "megatest")) +(define (common:read-encoded-string instr) + (handle-exceptions + exn + (handle-exceptions + exn + (begin + (debug:print 0 "ERROR: received bad encoded string \"" instr "\", message: " ((condition-property-accessor 'exn 'message) exn)) + (print-call-chain (current-error-port)) + #f) + (read (open-input-string (base64:base64-decode instr)))) + (read (open-input-string (z3:decode-buffer (base64:base64-decode instr)))))) + ;;====================================================================== ;; S T A T E S A N D S T A T U S E S ;;====================================================================== (define *common:std-states* @@ -159,10 +209,49 @@ (define (assoc/default key lst . default) (let ((res (assoc key lst))) (if res (cadr res)(if (null? default) #f (car default))))) +(define (common:get-testsuite-name) + (or (configf:lookup *configdat* "setup" "testsuite" ) + (pathname-file *toppath*))) + +;;====================================================================== +;; E X I T H A N D L I N G +;;====================================================================== + +(define (std-exit-procedure) + (debug:print-info 2 "starting exit process, finalizing databases.") + (rmt:print-db-stats) + (let ((run-ids (hash-table-keys *db-local-sync*))) + (if (and (not (null? run-ids)) + (configf:lookup *configdat* "setup" "megatest-db")) + (db:multi-db-sync run-ids 'new2old))) + (if *dbstruct-db* (db:close-all *dbstruct-db*)) + (if *inmemdb* (db:close-all *inmemdb*)) + (if (and *megatest-db* + (sqlite3:database? *megatest-db*)) + (begin + (sqlite3:interrupt! *megatest-db*) + (sqlite3:finalize! *megatest-db* #t) + (set! *megatest-db* #f))) + (if *task-db* (let ((db (cdr *task-db*))) + (if (sqlite3:database? db) + (begin + (sqlite3:interrupt! db) + (sqlite3:finalize! db #t) + (vector-set! *task-db* 0 #f)))))) + +(define (std-signal-handler signum) + (signal-mask! signum) + (debug:print 0 "ERROR: Received signal " signum " exiting promptly") + ;; (std-exit-procedure) ;; shouldn't need this since we are exiting and it will be called anyway + (exit)) + +(set-signal-handler! signal/int std-signal-handler) +(set-signal-handler! signal/term std-signal-handler) + ;;====================================================================== ;; Misc utils ;;====================================================================== ;; Convert strings like "5s 2h 3m" => 60x60x2 + 3x60 + 5 @@ -224,20 +313,47 @@ (string-split patts ",")) res) #t)) ;; (map print (map car (hash-table->alist (read-config "runconfigs.config" #f #t)))) -(define (common:get-runconfig-targets) +(define (common:get-runconfig-targets #!key (configf #f)) (sort (map car (hash-table->alist - (read-config "runconfigs.config" - #f #t))) stringlist "uptime")) - (load-rx (regexp "load average:\\s+(\\d+)")) - (cpu-load #f)) - (for-each (lambda (l) - (let ((match (string-search load-rx l))) - (if match - (let ((newval (string->number (cadr match)))) - (if (number? newval) - (set! cpu-load newval)))))) - (car load-res)) - cpu-load)) + (car (common:get-cpu-load))) +;; (let* ((load-res (cmd-run->list "uptime")) +;; (load-rx (regexp "load average:\\s+(\\d+)")) +;; (cpu-load #f)) +;; (for-each (lambda (l) +;; (let ((match (string-search load-rx l))) +;; (if match +;; (let ((newval (string->number (cadr match)))) +;; (if (number? newval) +;; (set! cpu-load newval)))))) +;; (car load-res)) +;; cpu-load)) + +;; get cpu load by reading from /proc/loadavg, return all three values +;; +(define (common:get-cpu-load) + (with-input-from-file "/proc/loadavg" + (lambda ()(list (read)(read)(read))))) + +(define (common:wait-for-cpuload maxload numcpus waitdelay #!key (count 1000)) + (let* ((loadavg (common:get-cpu-load)) + (first (car loadavg)) + (next (cadr loadavg)) + (adjload (* maxload numcpus)) + (loadjmp (- first next))) + (cond + ((and (> first adjload) + (> count 0)) + (debug:print-info 0 "waiting " waitdelay " seconds due to load " first " exceeding max of " adjload) + (thread-sleep! waitdelay) + (common:wait-for-cpuload maxload numcpus waitdelay count: (- count 1))) + ((and (> loadjmp numcpus) + (> count 0)) + (debug:print-info 0 "waiting " waitdelay " seconds due to load jump " loadjmp " > numcpus " numcpus) + (thread-sleep! waitdelay) + (common:wait-for-cpuload maxload numcpus waitdelay count: (- count 1)))))) + +(define (common:get-num-cpus) + (with-input-from-file "/proc/cpuinfo" + (lambda () + (let loop ((numcpu 0) + (inl (read-line))) + (if (eof-object? inl) + numcpu + (loop (if (string-match "^processor\\s+:\\s+\\d+$" inl) + (+ numcpu 1) + numcpu) + (read-line))))))) (define (get-uname . params) (let* ((uname-res (cmd-run->list (conc "uname " (if (null? params) "-a" (car params))))) (uname #f)) (if (null? (car uname-res)) "unknown" (caar uname-res)))) -(define (save-environment-as-files fname #!key (ignorevars (list "DISPLAY" "LS_COLORS" "XKEYSYMDB" "EDITOR"))) +(define (save-environment-as-files fname #!key (ignorevars (list "USER" "HOME" "DISPLAY" "LS_COLORS" "XKEYSYMDB" "EDITOR" "MAKEFLAGS" "MAKEF"))) (let ((envvars (get-environment-variables)) (whitesp (regexp "[^a-zA-Z0-9_\\-:,.\\/%$]"))) (with-output-to-file (conc fname ".csh") (lambda () - (for-each (lambda (key) - (if (not (member key ignorevars)) - (let* ((val (cdr key)) - (sval (if (string-search whitesp val)(conc "\"" val "\"") val))) - (print "setenv " (car key) " " sval)))) - envvars))) + (for-each (lambda (keyval) + (let* ((key (car keyval)) + (val (cdr keyval)) + (delim (if (string-search whitesp val) + "\"" + ""))) + (print (if (member key ignorevars) + "# setenv " + "setenv ") + key " " delim val delim))) + envvars))) (with-output-to-file (conc fname ".sh") (lambda () - (for-each (lambda (key) - (if (not (member key ignorevars)) - (let* ((val (cdr key)) - (sval (if (string-search whitesp val)(conc "\"" val "\"") val))) - (print "export " (car key) "=" sval)))) + (for-each (lambda (keyval) + (let* ((key (car keyval)) + (val (cdr keyval)) + (delim (if (string-search whitesp val) + "\"" + ""))) + (print (if (member key ignorevars) + "# export " + "export ") + key "=" delim val delim))) envvars))))) ;; set some env vars from an alist, return an alist with original values ;; (("VAR" "value") ...) (define (alist->env-vars lst) @@ -428,10 +591,21 @@ (seconds->local-time sec) "%yww%V.%w")) (define (seconds->year-work-week/day-time sec) (time->string (seconds->local-time sec) "%yww%V.%w %H:%M")) + +(define (seconds->quarter sec) + (case (string->number + (time->string + (seconds->local-time sec) + "%m")) + ((1 2 3) 1) + ((4 5 6) 2) + ((7 8 9) 3) + ((10 11 12) 4) + (else #f))) ;;====================================================================== ;; Colors ;;====================================================================== Index: common_records.scm ================================================================== --- common_records.scm +++ common_records.scm @@ -7,11 +7,30 @@ ;; This program is distributed WITHOUT ANY WARRANTY; without even the ;; implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR ;; PURPOSE. ;;====================================================================== -(use trace) +;; (use trace) + +;; Some of these routines use: +;; +;; http://www.cs.toronto.edu/~gfb/scheme/simple-macros.html +;; +;; Syntax for defining macros in a simple style similar to function definiton, +;; when there is a single pattern for the argument list and there are no keywords. +;; +;; (define-simple-syntax (name arg ...) body ...) +;; + +(define-syntax define-simple-syntax + (syntax-rules () + ((_ (name arg ...) body ...) + (define-syntax name (syntax-rules () ((name arg ...) (begin body ...))))))) + +(define-syntax common:handle-exceptions + (syntax-rules () + ((_ exn-in errstmt ...)(handle-exceptions exn-in errstmt ...)))) (define (debug:calc-verbosity vstr) (cond ((number? vstr) vstr) ((not (string? vstr)) 1) Index: configf.scm ================================================================== --- configf.scm +++ configf.scm @@ -61,11 +61,11 @@ (define configf:cont-ln-rx (regexp "^(\\s+)(\\S+.*)$")) ;; read a line and process any #{ ... } constructs (define configf:var-expand-regex (regexp "^(.*)#\\{(scheme|system|shell|getenv|get|runconfigs-get|rget)\\s+([^\\}\\{]*)\\}(.*)")) -(define (configf:process-line l ht) +(define (configf:process-line l ht allow-system) (let loop ((res l)) (if (string? res) (let ((matchdat (string-search configf:var-expand-regex res))) (if matchdat (let* ((prestr (list-ref matchdat 1)) @@ -85,14 +85,16 @@ (conc "(lambda (ht)(config-lookup ht \"" sect "\" \"" var "\"))"))) ((runconfigs-get) (conc "(lambda (ht)(runconfigs-get ht \"" cmd "\"))")) ((rget) (conc "(lambda (ht)(runconfigs-get ht \"" cmd "\"))")) (else "(lambda (ht)(print \"ERROR\") \"ERROR\")")))) ;; (print "fullcmd=" fullcmd) - (with-input-from-string fullcmd - (lambda () - (set! result ((eval (read)) ht)))) - (loop (conc prestr result poststr))) + (if (or allow-system + (not (member cmdtype '("system" "shell")))) + (with-input-from-string fullcmd + (lambda () + (set! result ((eval (read)) ht)))) + (set! result (conc "#{(" cmdtype ") " cmd "}"))) (loop (conc prestr result poststr))) res)) res))) ;; Run a shell command and return the output as a string (define (shell cmd) @@ -105,22 +107,25 @@ "\n"))) (debug:print-info 4 "shell result:\n" outres) outres) (begin (with-output-to-port (current-error-port) - (print "ERROR: " cmd " returned bad exit code " status)) + (lambda () + (print "ERROR: " cmd " returned bad exit code " status))) "")))) ;; Lookup a value in runconfigs based on -reqtarg or -target (define (runconfigs-get config var) - (let ((targ (or (args:get-arg "-reqtarg")(args:get-arg "-target")))) + (let ((targ (or (args:get-arg "-reqtarg")(args:get-arg "-target")(getenv "MT_TARGET")))) (if targ (or (configf:lookup config targ var) (configf:lookup config "default" var)) (configf:lookup config "default" var)))) -(define-inline (configf:read-line p ht allow-processing) +;; this was inline but I'm pretty sure that is a hold over from when it was *very* simple ... +;; +(define (configf:read-line p ht allow-processing) (let loop ((inl (read-line p))) (let ((cont-line (and (string? inl) (not (string-null? inl)) (equal? "\\" (string-take-right inl 1))))) (if cont-line ;; last character is \ @@ -128,14 +133,18 @@ (if (not (eof-object? nextl)) (loop (string-append (if cont-line (string-take inl (- (string-length inl) 1)) inl) nextl)))) - (if (and allow-processing - (not (eq? allow-processing 'return-string))) - (configf:process-line inl ht) - inl))))) + (case allow-processing ;; if (and allow-processing + ;; (not (eq? allow-processing 'return-string))) + ((#t #f) + (configf:process-line inl ht allow-processing)) + ((return-string) + inl) + (else + (configf:process-line inl ht allow-processing))))))) ;; read a config file, returns hash table of alists ;; read a config file, returns hash table of alists ;; adds to ht if given (must be #f otherwise) @@ -269,10 +278,11 @@ #f)) )) #f)) (define configf:lookup config-lookup) +(define configf:read-file read-config) (define (configf:section-vars cfgdat section) (let ((sectdat (hash-table-ref/default cfgdat section '()))) (if (null? sectdat) '() @@ -339,11 +349,11 @@ (res '())) (if (eof-object? inl) (begin (close-input-port inp) (reverse res)) - (loop (read-line inp)(cons inl))))) + (loop (read-line inp)(cons inl res))))) '())) ;;====================================================================== ;; Write a config ;; 0. Given a refererence data structure "indat" @@ -427,5 +437,58 @@ (for-each (lambda (line) (print line)) (configf:expand-multi-lines fdat)))))) +;;====================================================================== +;; refdb +;;====================================================================== + +;; reads a refdb into an assoc array of assoc arrays +;; returns (list dat msg) +(define (configf:read-refdb refdb-path) + (let ((sheets-file (conc refdb-path "/sheet-names.cfg"))) + (if (not (file-exists? sheets-file)) + (list #f (conc "ERROR: no refdb found at " refdb-path)) + (if (not (file-read-access? sheets-file)) + (list #f (conc "ERROR: refdb file not readable at " refdb-path)) + (let* ((sheets (with-input-from-file sheets-file + (lambda () + (let loop ((inl (read-line)) + (res '())) + (if (eof-object? inl) + (reverse res) + (loop (read-line)(cons inl res))))))) + (data '())) + (for-each + (lambda (sheet-name) + (let* ((dat-path (conc refdb-path "/" sheet-name ".dat")) + (ref-dat (configf:read-file dat-path #f #t)) + (ref-assoc (map (lambda (key) + (list key (hash-table-ref ref-dat key))) + (hash-table-keys ref-dat)))) + ;; (hash-table->alist ref-dat))) + (set! data (append data (list (list sheet-name ref-assoc)))))) + sheets) + (list data "NO ERRORS")))))) + +;; map over all pairs in a three level hierarchial alist and apply a function to the keys/val +;; +(define (configf:map-all-hier-alist data proc #!key (initproc1 #f)(initproc2 #f)(initproc3 #f)) + (for-each + (lambda (sheetname) + (let* ((sheettmp (assoc sheetname data)) + (sheetdat (if sheettmp (cadr sheettmp) '()))) + (if initproc1 (initproc1 sheetname)) + (for-each + (lambda (sectionname) + (let* ((sectiontmp (assoc sectionname sheetdat)) + (sectiondat (if sectiontmp (cadr sectiontmp) '()))) + (if initproc2 (initproc2 sheetname sectionname)) + (for-each + (lambda (varname) + (let* ((valtmp (assoc varname sectiondat)) + (val (if valtmp (cadr valtmp) ""))) + (proc sheetname sectionname varname val))) + (map car sectiondat)))) + (map car sheetdat)))) + (map car data))) Index: dashboard-tests.scm ================================================================== --- dashboard-tests.scm +++ dashboard-tests.scm @@ -32,10 +32,25 @@ ;; (declare (uses filedb)) (include "common_records.scm") (include "db_records.scm") (include "run_records.scm") + +;;====================================================================== +;; C O M M O N +;;====================================================================== + +(define *dashboard-comment-share-slot* #f) + +(define (dtests:get-pre-command #!key (default-override #f)) + (let ((cfg-ovrd (configf:lookup *configdat* "dashboard" "pre-command"))) + (or cfg-ovrd default-override "xterm -geometry 180x20 -e \""))) + +(define (dtests:get-post-command #!key (default-override #f)) + (let ((cfg-ovrd (configf:lookup *configdat* "dashboard" "post-command"))) + (or cfg-ovrd default-override ";echo Press any key to continue;bash -c 'read -n 1 -s'\" &"))) + (define (test-info-panel testdat store-label widgets) (iup:frame #:title "Test Info" ; #:expand "YES" (iup:hbox ; #:expand "YES" @@ -115,12 +130,11 @@ )) (list "Author: " "Owner: " "Reviewed: " "Tags: " - "Description: " - )) + "Description: ")) (list (iup:label "" #:expand "VERTICAL")))) (apply iup:vbox ; #:expand "YES" (list (store-meta "author" (iup:label (db:testmeta-get-author testmeta) #:expand "HORIZONTAL") @@ -184,11 +198,12 @@ (list "Hostname: " "Uname -a: " "Disk free: " "CPU Load: " "Run duration: " - "Logfile: ")) + "Logfile: " + "Top process id: ")) (iup:label "" #:expand "VERTICAL"))) (apply iup:vbox ; #:expand "YES" (list ;; NOTE: Yes, the host can change! (store-label "HostName" @@ -207,13 +222,17 @@ (iup:label (conc (db:test-get-cpuload testdat)) #:expand "HORIZONTAL") (lambda (testdat)(conc (db:test-get-cpuload testdat)))) (store-label "RunDuration" (iup:label (conc (seconds->hr-min-sec (db:test-get-run_duration testdat))) #:expand "HORIZONTAL") (lambda (testdat)(conc (seconds->hr-min-sec (db:test-get-run_duration testdat))))) - (store-label "CPULoad" + (store-label "LogFile" (iup:label (conc (db:test-get-final_logf testdat)) #:expand "HORIZONTAL") - (lambda (testdat)(conc (db:test-get-final_logf testdat))))))))) + (lambda (testdat)(conc (db:test-get-final_logf testdat)))) + (store-label "ProcessId" + (iup:label (conc (db:test-get-process_id testdat)) #:expand "HORIZONTAL") + (lambda (testdat)(conc (db:test-get-process_id testdat)))) + ))))) ;; use a global for setting the buttons colors ;; state status teststeps (define *state-status* (vector #f #f #f)) (define (update-state-status-buttons testdat) @@ -274,11 +293,11 @@ (let ((btn (iup:button status #:expand "HORIZONTAL" #:size "50x" #:font "Courier New, -10" #:action (lambda (x) (let ((t (iup:attribute x "TITLE"))) (if (equal? t "WAIVED") - (iup:show (dashboard-tests:waiver testdat + (iup:show (dashboard-tests:waiver run-id testdat (if wtxtbox (iup:attribute wtxtbox "VALUE") #f) (lambda (c) (set! newcomment c) (if wtxtbox (begin @@ -334,11 +353,11 @@ ;; #:expand "HORIZONTAL" ;; #:action (lambda (obj) ;; (print "Refresh test data " stepname)) ))) -(define (dashboard-tests:waiver testdat ovrdval cmtcmd) +(define (dashboard-tests:waiver run-id testdat ovrdval cmtcmd) (let* ((wpatt (configf:lookup *configdat* "setup" "waivercommentpatt")) (wregx (if (string? wpatt)(regexp wpatt) #f)) (wmesg (iup:label (if wpatt (conc "Comment must match pattern " wpatt) ""))) (comnt (iup:textbox #:action (lambda (val a b) (if wpatt @@ -376,105 +395,17 @@ #:expand "HORIZONTAL" #:action (lambda (obj) (iup:destroy! dlog))))))) dlog)) -;; CHECK - WAS THIS ADDED OR REMOVED? MANUAL MERGE WITH API STUFF!!! -;; -;; get a pretty table to summarize steps -;; -(define (dashboard-tests:process-steps-table steps);; db test-id #!key (work-area #f)) -;; (let ((steps (db:get-steps-for-test db test-id work-area: work-area))) - ;; organise the steps for better readability - (let ((res (make-hash-table))) - (for-each - (lambda (step) - (debug:print 6 "step=" step) - (let ((record (hash-table-ref/default - res - (tdb:step-get-stepname step) - ;; stepname start end status Duration Logfile - (vector (tdb:step-get-stepname step) "" "" "" "" "")))) - (debug:print 6 "record(before) = " record - "\nid: " (tdb:step-get-id step) - "\nstepname: " (tdb:step-get-stepname step) - "\nstate: " (tdb:step-get-state step) - "\nstatus: " (tdb:step-get-status step) - "\ntime: " (tdb:step-get-event_time step)) - (case (string->symbol (tdb:step-get-state step)) - ((start)(vector-set! record 1 (tdb:step-get-event_time step)) - (vector-set! record 3 (if (equal? (vector-ref record 3) "") - (tdb:step-get-status step))) - (if (> (string-length (tdb:step-get-logfile step)) - 0) - (vector-set! record 5 (tdb:step-get-logfile step)))) - ((end) - (vector-set! record 2 (any->number (tdb:step-get-event_time step))) - (vector-set! record 3 (tdb:step-get-status step)) - (vector-set! record 4 (let ((startt (any->number (vector-ref record 1))) - (endt (any->number (vector-ref record 2)))) - (debug:print 4 "record[1]=" (vector-ref record 1) - ", startt=" startt ", endt=" endt - ", get-status: " (tdb:step-get-status step)) - (if (and (number? startt)(number? endt)) - (seconds->hr-min-sec (- endt startt)) "-1"))) - (if (> (string-length (tdb:step-get-logfile step)) - 0) - (vector-set! record 5 (tdb:step-get-logfile step)))) - (else - (vector-set! record 2 (tdb:step-get-state step)) - (vector-set! record 3 (tdb:step-get-status step)) - (vector-set! record 4 (tdb:step-get-event_time step)))) - (hash-table-set! res (tdb:step-get-stepname step) record) - (debug:print 6 "record(after) = " record - "\nid: " (tdb:step-get-id step) - "\nstepname: " (tdb:step-get-stepname step) - "\nstate: " (tdb:step-get-state step) - "\nstatus: " (tdb:step-get-status step) - "\ntime: " (tdb:step-get-event_time step)))) - ;; (else (vector-set! record 1 (tdb:step-get-event_time step))) - (sort steps (lambda (a b) - (cond - ((< (tdb:step-get-event_time a)(tdb:step-get-event_time b)) #t) - ((eq? (tdb:step-get-event_time a)(tdb:step-get-event_time b)) - (< (tdb:step-get-id a) (tdb:step-get-id b))) - (else #f))))) - res)) - -(define (dashboard-tests:get-compressed-steps dbstruct run-id test-id) - (let* ((steps-data (db:get-steps-for-test dbstruct run-id test-id)) - (comprsteps (dashboard-tests:process-steps-table steps-data))) ;; (open-run-close db:get-steps-table #f test-id work-area: work-area))) - (map (lambda (x) - ;; take advantage of the \n on time->string - (vector - (vector-ref x 0) - (let ((s (vector-ref x 1))) - (if (number? s)(seconds->time-string s) s)) - (let ((s (vector-ref x 2))) - (if (number? s)(seconds->time-string s) s)) - (vector-ref x 3) ;; status - (vector-ref x 4) - (vector-ref x 5))) ;; time delta - (sort (hash-table-values comprsteps) - (lambda (a b) - (let ((time-a (vector-ref a 1)) - (time-b (vector-ref b 1))) - (if (and (number? time-a)(number? time-b)) - (if (< time-a time-b) - #t - (if (eq? time-a time-b) - (string rownum max-row)(set! max-row rownum)) - (let ((val (vector-ref hed (- colnum 1))) - (mtrx-rc (conc rownum ":" colnum))) - (iup:attribute-set! steps-matrix mtrx-rc (if val (conc val) "")) - (if (< colnum 6) - (loop hed tal rownum (+ colnum 1)) - (if (not (null? tal)) - (loop (car tal)(cdr tal)(+ rownum 1) 1)))))) - (if (> max-row 0) - (begin - ;; we are going to speculatively clear rows until we find a row that is already cleared - (let loop ((rownum (+ max-row 1)) - (colnum 0) - (deleted #f)) - ;; (debug:print-info 0 "cleaning " rownum ":" colnum) - (let* ((next-row (if (eq? colnum 6) (+ rownum 1) rownum)) - (next-col (if (eq? colnum 6) 1 (+ colnum 1))) - (mtrx-rc (conc rownum ":" colnum)) - (curr-val (iup:attribute steps-matrix mtrx-rc))) - ;; (debug:print-info 0 "cleaning " rownum ":" colnum " currval= " curr-val) - (if (and (string? curr-val) - (not (equal? curr-val ""))) - (begin - (iup:attribute-set! steps-matrix mtrx-rc "") - (loop next-row next-col #t)) - (if (eq? colnum 6) ;; not done, didn't get a full blank row - (if deleted (loop next-row next-col #f)) ;; exit on this not met - (loop next-row next-col deleted))))) - (iup:attribute-set! steps-matrix "REDRAW" "ALL"))))))) + (dcommon:populate-steps teststeps steps-matrix)))) (hash-table-set! widgets "StepsMatrix" proc) (proc testdat)) steps-matrix) ;; populate the Test Data panel (iup:frame Index: dashboard.scm ================================================================== --- dashboard.scm +++ dashboard.scm @@ -16,11 +16,10 @@ (use canvas-draw) (import canvas-draw-iup) (use sqlite3 srfi-1 posix regex regex-case srfi-69) (import (prefix sqlite3 sqlite3:)) -(use trace) (declare (uses common)) (declare (uses margs)) (declare (uses keys)) (declare (uses items)) @@ -81,27 +80,27 @@ (if (args:get-arg "-h") (begin (print help) (exit))) -(if (not (setup-for-run)) +(if (not (launch:setup-for-run)) (begin (print "Failed to find megatest.config, exiting") (exit 1))) -(define *dbstruct-local* (make-dbr:dbstruct path: *toppath* local: #t)) +(define *dbdir* (conc (configf:lookup *configdat* "setup" "linktree") "/.db")) +(define *dbstruct-local* (make-dbr:dbstruct path: *dbdir* + local: #t)) +(define *db-file-path* (db:dbfile-path 0)) ;; HACK ALERT: this is a hack, please fix. -(define *read-only* (not (file-read-access? (conc *toppath* "db/main.db")))) -;; (client:setup *dbstruct-local*) +(define *read-only* (not (file-read-access? *db-file-path*))) (define toplevel #f) (define dlg #f) (define max-test-num 0) (define *keys* (db:get-keys *dbstruct-local*)) -;; (define *keys* (cdb:remote-run db:get-keys #f)) -;; (define *keys* (db:get-keys *dbstruct-local*)) (define *dbkeys* (append *keys* (list "runname"))) (define *header* #f) (define *allruns* '()) @@ -131,30 +130,41 @@ (define *examine-test-dat* (make-hash-table)) (define *exit-started* #f) (define *status-ignore-hash* (make-hash-table)) (define *state-ignore-hash* (make-hash-table)) -(define *db-file-path* (conc *toppath* "/db/main.db")) - (define *tests-sort-options* (vector (vector "Sort +a" 'testname "ASC") (vector "Sort -a" 'testname "DESC") (vector "Sort +t" 'event_time "ASC") (vector "Sort -t" 'event_time "DESC") (vector "Sort +s" 'statestatus "ASC") - (vector "Sort -s" 'statestatus "DESC"))) + (vector "Sort -s" 'statestatus "DESC") + (vector "Sort +a" 'testname "ASC"))) + +(define *tests-sort-type-index* '(("+testname" 0) + ("-testname" 1) + ("+event_time" 2) + ("-event_time" 3) + ("+statestatus" 4) + ("-statestatus" 5))) ;; Don't forget to adjust the >= below if you add to the sort-options above (define (next-sort-option) (if (>= *tests-sort-reverse* 5) (set! *tests-sort-reverse* 0) (set! *tests-sort-reverse* (+ *tests-sort-reverse* 1))) *tests-sort-reverse*) + +(define *tests-sort-reverse* + (let ((t-sort (assoc (configf:lookup *configdat* "dashboard" "testsort") *tests-sort-type-index*))) + (if t-sort + (cadr t-sort) + 3))) (define (get-curr-sort) (vector-ref *tests-sort-options* *tests-sort-reverse*)) -(define *tests-sort-reverse* 3) (define *hide-empty-runs* #f) (define *hide-not-hide* #t) ;; toggle for hide/not hide (define *hide-not-hide-button* #f) (define *hide-not-hide-tabs* #f) @@ -167,10 +177,12 @@ (define-inline (dboard:uidat-get-keycol vec)(vector-ref vec 0)) (define-inline (dboard:uidat-get-lftcol vec)(vector-ref vec 1)) (define-inline (dboard:uidat-get-header vec)(vector-ref vec 2)) (define-inline (dboard:uidat-get-runsvec vec)(vector-ref vec 3)) + +(if (get-environment-variable "MT_RUN_AREA_HOME")(change-directory (get-environment-variable "MT_RUN_AREA_HOME"))) (define (message-window msg) (iup:show (iup:dialog (iup:vbox @@ -336,13 +348,16 @@ (let ((tnames '())) (for-each (lambda (tdat) (let ((tname (vector-ref tdat 0)) ;; (db:test-get-testname tdat)) (ipath (vector-ref tdat 1))) ;; (db:test-get-item-path tdat))) (if (not (equal? ipath "")) - (if (not (member tname tnames)) + (if (and (list? tnames) + (string? tname) + (not (member tname tnames))) (set! tnames (append tnames (list tname))))))) - test-dats))) + test-dats) + tnames)) ;; Bubble up the top tests to above the items, collect the items underneath ;; all while preserving the sort order from the SQL query as best as possible. ;; (define (bubble-up test-dats #!key (priority 'itempath)) @@ -458,12 +473,18 @@ (itempath (db:test-get-item-path test)) (testfullname (test:test-get-fullname test)) (teststatus (db:test-get-status test)) (teststate (db:test-get-state test)) ;;(teststart (db:test-get-event_time test)) - (runtime (db:test-get-run_duration test)) - (buttontxt (if (equal? teststate "COMPLETED") teststatus teststate)) + ;;(runtime (db:test-get-run_duration test)) + (buttontxt (cond + ((equal? teststate "COMPLETED") teststatus) + ((and (equal? teststate "NOT_STARTED") + (member teststatus '("ZERO_ITEMS" "BLOCKED" "PREQ_FAIL" "PREQ_DISCARDED" "TIMED_OUT" "KEEP_TRYING" "TEN_STRIKES"))) + teststatus) + (else + teststate))) (button (vector-ref columndat rown)) (color (car (gutils:get-color-for-state-status teststate teststatus))) (curr-color (vector-ref buttondat 1)) ;; (iup:attribute button "BGCOLOR")) (curr-title (vector-ref buttondat 2))) ;; (iup:attribute button "TITLE"))) (if (not (equal? curr-color color)) @@ -644,16 +665,16 @@ (set! full-cmd (conc full-cmd " -runtests " test-patt " -target " target - " :runname " + " -runname " run-name ))) ((remove-runs) (set! full-cmd (conc full-cmd - " -remove-runs :runname " + " -remove-runs -runname " run-name " -target " target " -testpatt " test-patt @@ -678,47 +699,14 @@ (hash-table-set! tests-draw-state 'scalef 8) (hash-table-set! tests-draw-state 'tests-info (make-hash-table)) (hash-table-set! tests-draw-state 'selected-tests (make-hash-table)) ;; set these (hash-table-set! tests-draw-state 'test-browse-xoffset 20) ;; (- 0 (* (/ sizex 2) (* 8 xadj)))) - (hash-table-set! tests-draw-state 'test-browse-yoffset 20))) ;; (- 0 (* (/ sizey 2) (* 8 (- 1 yadj))))))) - (let* ((scalef (hash-table-ref/default tests-draw-state 'scalef 8)) - (test-browse-xoffset (hash-table-ref tests-draw-state 'test-browse-xoffset)) - (test-browse-yoffset (hash-table-ref tests-draw-state 'test-browse-yoffset)) - (xtorig (+ test-browse-xoffset (* (/ sizex 2) scalef (- 0.5 xadj)))) ;; (- xadj 1)))) - (ytorig (+ test-browse-yoffset (* (/ sizey 2) scalef (- yadj 0.5)))) - (boxw 90) - (boxh 25) - (gapx 20) - (gapy 30) - (tests-hash (hash-table-ref tests-draw-state 'tests-info)) - (selected-tests (hash-table-ref tests-draw-state 'selected-tests ))) - ;; (print "sizex: " sizex " sizey: " sizey " font: " (canvas-font cnv) " originx: " originx " originy: " originy " xtorig: " xtorig " ytorig: " ytorig " xadj: " xadj " yadj: " yadj) - (let loop ((hed (car (reverse sorted-testnames))) - (tal (cdr (reverse sorted-testnames))) - (llx xtorig) - (lly ytorig) - (urx (+ xtorig boxw)) - (ury (+ ytorig boxh))) - ; (print "hed " hed " llx " llx " lly " lly " urx " urx " ury " ury) - (canvas-text! cnv (+ llx 5)(+ lly 5) hed) ;; (conc testname " (" xtorig "," ytorig ")")) - (canvas-rectangle! cnv llx urx lly ury) - (if (hash-table-ref/default selected-tests hed #f) - (canvas-box! cnv llx (+ llx 5) lly (+ lly 5))) - (hash-table-set! tests-hash hed (list llx urx (- sizey ury)(- sizey lly))) ;; NB// Swap ury and lly - (if (not (null? tal)) - ;; leave a column of space to the right to list items - (let ((have-room - (if #t ;; put "auto" here where some form of auto rearanging can be done - (> (* 3 (+ boxw gapx)) (- urx xtorig)) - (< urx (- sizex boxw gapx boxw))))) ;; is there room for another column? - (loop (car tal) - (cdr tal) - (if have-room (+ llx boxw gapx) xtorig) ;; have room, - (if have-room lly (+ lly boxh gapy)) - (if have-room (+ urx boxw gapx) (+ xtorig boxw)) - (if have-room ury (+ ury boxh gapy))))))))) + (hash-table-set! tests-draw-state 'test-browse-yoffset 20) ;; (- 0 (* (/ sizey 2) (* 8 (- 1 yadj))))))) + (dcommon:initial-draw-tests cnv xadj yadj sizex sizey sizexmm sizeymm originx originy tests-draw-state sorted-testnames)) + (dcommon:redraw-tests cnv xadj yadj sizex sizey sizexmm sizeymm originx originy tests-draw-state sorted-testnames)) + )) ;;====================================================================== ;; R U N C O N T R O L S ;;====================================================================== ;; @@ -801,11 +789,11 @@ (dboard:data-set-command! *data* default-cmd) lb))) (iup:frame #:title "Runname" - (let* ((default-run-name (conc "ww" (seconds->work-week/day (current-seconds)))) + (let* ((default-run-name (seconds->work-week/day (current-seconds))) (tb (iup:textbox #:expand "HORIZONTAL" #:action (lambda (obj val txt) ;; (print "obj: " obj " val: " val " unk: " unk) (dboard:data-set-run-name! *data* txt) ;; (iup:attribute obj "VALUE")) (dashboard:update-run-command)) @@ -883,65 +871,76 @@ (iup:frame #:title "Tests and Tasks" (let* ((updater #f) (last-xadj 0) (last-yadj 0) - (canvas-obj - (iup:canvas #:action (make-canvas-action - (lambda (cnv xadj yadj) - (if (not updater) - (set! updater (lambda (xadj yadj) - ;; (print "cnv: " cnv " x: " x " y: " y) - (dashboard:draw-tests cnv xadj yadj tests-draw-state sorted-testnames)))) - (updater xadj yadj) - (set! last-xadj xadj) - (set! last-yadj yadj))) - ;; Following doesn't work - ;; #:wheel-cb (make-canvas-action - ;; (lambda (cnv xadj yadj) - ;; ;; (print "cnv: " cnv " x: " x " y: " y) - ;; (dashboard:draw-tests cnv xadj yadj tests-draw-state sorted-testnames))) - ;; #:size "50x50" - #:expand "YES" - #:scrollbar "YES" - #:posx "0.5" - #:posy "0.5" - #:button-cb (lambda (obj btn pressed x y status) - ;; (print "obj: " obj) - (let ((tests-info (hash-table-ref tests-draw-state 'tests-info)) - (selected-tests (hash-table-ref tests-draw-state 'selected-tests))) - ;; (print "x\ty\tllx\tlly\turx\tury") - (for-each (lambda (test-name) - (let* ((rec-coords (hash-table-ref tests-info test-name)) - (llx (list-ref rec-coords 0)) - (urx (list-ref rec-coords 1)) - (lly (list-ref rec-coords 2)) - (ury (list-ref rec-coords 3))) - ;; (print x "\t" y "\t" llx "\t" lly "\t" urx "\t" ury "\t" test-name " " - (if (and (eq? pressed 1) - (> x llx) - (> y lly) - (< x urx) - (< y ury)) - (let ((patterns (string-split (iup:attribute test-patterns-textbox "VALUE")))) - (let* ((selected (not (member test-name patterns))) - (newpatt-list (if selected - (cons test-name patterns) - (delete test-name patterns))) - (newpatt (string-intersperse newpatt-list "\n"))) - ;; (if cnv-obj - ;; (dashboard:draw-tests cnv-obj 0 0 tests-draw-state sorted-testnames)) - (iup:attribute-set! obj "REDRAW" "ALL") - (hash-table-set! selected-tests test-name selected) - (iup:attribute-set! test-patterns-textbox "VALUE" newpatt) - (dboard:data-set-test-patts! *data* (dboard:lines->test-patt newpatt)) - (dashboard:update-run-command) - (if updater (updater last-xadj last-yadj))))))) - (hash-table-keys tests-info))))))) + (the-cnv #f) + (canvas-obj + (iup:canvas #:action (make-canvas-action + (lambda (cnv xadj yadj) + (if (not updater) + (set! updater (lambda (xadj yadj) + ;; (print "cnv: " cnv " xadj: " xadj " yadj: " yadj) + (dashboard:draw-tests cnv xadj yadj tests-draw-state sorted-testnames) + (set! last-xadj xadj) + (set! last-yadj yadj)))) + (updater xadj yadj) + (set! the-cnv cnv) + )) + ;; Following doesn't work + #:wheel-cb (lambda (obj step x y dir) ;; dir is 4 for up and 5 for down. I think. + (let ((xadj last-xadj) + (yadj (+ last-yadj (if (> step 0) + -0.01 + 0.01)))) + ;; (print "step: " step " x: " x " y: " y " dir: \"" dir "\"") + ;; (print "the-cnv: " the-cnv " obj: " obj " xadj: " xadj " yadj: " yadj " dir: " dir) + (if the-cnv + (dashboard:draw-tests the-cnv xadj yadj tests-draw-state sorted-testnames)) + (set! last-xadj xadj) + (set! last-yadj yadj) + )) + ;; #:size "50x50" + #:expand "YES" + #:scrollbar "YES" + #:posx "0.5" + #:posy "0.5" + #:button-cb (lambda (obj btn pressed x y status) + ;; (print "obj: " obj) + (let ((tests-info (hash-table-ref tests-draw-state 'tests-info)) + (selected-tests (hash-table-ref tests-draw-state 'selected-tests))) + ;; (print "x\ty\tllx\tlly\turx\tury") + (for-each (lambda (test-name) + (let* ((rec-coords (hash-table-ref tests-info test-name)) + (llx (list-ref rec-coords 0)) + (urx (list-ref rec-coords 1)) + (lly (list-ref rec-coords 2)) + (ury (list-ref rec-coords 3))) + ;; (print x "\t" y "\t" llx "\t" lly "\t" urx "\t" ury "\t" test-name " " + (if (and (eq? pressed 1) + (> x llx) + (> y lly) + (< x urx) + (< y ury)) + (let ((patterns (string-split (iup:attribute test-patterns-textbox "VALUE")))) + (let* ((selected (not (member test-name patterns))) + (newpatt-list (if selected + (cons test-name patterns) + (delete test-name patterns))) + (newpatt (string-intersperse newpatt-list "\n"))) + ;; (if cnv-obj + ;; (dashboard:draw-tests cnv-obj 0 0 tests-draw-state sorted-testnames)) + (iup:attribute-set! obj "REDRAW" "ALL") + (hash-table-set! selected-tests test-name selected) + (iup:attribute-set! test-patterns-textbox "VALUE" newpatt) + (dboard:data-set-test-patts! *data* (dboard:lines->test-patt newpatt)) + (dashboard:update-run-command) + (if updater (updater last-xadj last-yadj))))))) + (hash-table-keys tests-info))))))) canvas-obj))) ;; (print "obj: " obj " btn: " btn " pressed: " pressed " x: " x " y: " y " status: " status)) - + (iup:frame #:title "Logs" ;; To be replaced with tabs (let ((logs-tb (iup:textbox #:expand "YES" #:multiline "YES"))) (dboard:data-set-logs-textbox! *data* logs-tb) @@ -977,11 +976,11 @@ ;; S U M M A R Y ;;====================================================================== ;; ;; General info about the run(s) and megatest area (define (dashboard:summary db) - (let ((rawconfig (read-config (conc *toppath* "/megatest.config") #f 'return-string))) + (let ((rawconfig (read-config (conc *toppath* "/megatest.config") #f #f))) ;; changed to #f since I want #{} to be expanded by [system ...] to NOT be expanded. WAS: 'return-string))) (iup:vbox (iup:split #:value 500 (iup:frame #:title "General Info" @@ -1038,12 +1037,20 @@ (begin (dboard:data-set-curr-run-id! *data* run-id) (dashboard:update-run-summary-tab))) ;; (print "path: " (tree:node->path obj id) " run-id: " run-id) )))) + (cell-lookup (make-hash-table)) (run-matrix (iup:matrix - #:expand "YES")) + #:expand "YES" + #:click-cb + (lambda (obj lin col status) + (let* ((toolpath (car (argv))) + (key (conc lin ":" col)) + (test-id (hash-table-ref/default cell-lookup key -1)) + (cmd (conc toolpath " -test " (dboard:data-get-curr-run-id *data*) "," test-id "&"))) + (system cmd))))) (updater (lambda () (let* ((runs-dat (db:get-runs-by-patt db *keys* "%" #f #f #f)) (runs-header (vector-ref runs-dat 0)) ;; 0 is header, 1 is list of records (run-id (dboard:data-get-curr-run-id *data*)) (tests-dat (let ((tdat (db:get-tests-for-run db run-id @@ -1107,12 +1114,13 @@ userdata: (conc "run-id: " run-id)) (hash-table-set! (dboard:data-get-path-run-ids *data*) run-path run-id) ;; (set! colnum (+ colnum 1)) )))) run-ids) - (iup:attribute-set! run-matrix "CLEARVALUE" "CONTENTS") + (iup:attribute-set! run-matrix "CLEARVALUE" "ALL") ;; NOTE: Was CONTENTS (iup:attribute-set! run-matrix "CLEARATTRIB" "CONTENTS") + (iup:attribute-set! run-matrix "RESIZEMATRIX" "YES") (iup:attribute-set! run-matrix "NUMCOL" max-col ) (iup:attribute-set! run-matrix "NUMLIN" (if (< max-row max-visible) max-visible max-row)) ;; min of 20 ;; (iup:attribute-set! run-matrix "NUMCOL_VISIBLE" max-col) ;; (iup:attribute-set! run-matrix "NUMLIN_VISIBLE" (if (> max-row max-visible) max-visible max-row)) @@ -1125,22 +1133,10 @@ (begin (set! changed #t) (iup:attribute-set! run-matrix key name))))) row-indices) - ;; Col labels - (for-each (lambda (ind) - (let* ((name (car ind)) - (num (cadr ind)) - (key (conc "0:" num))) - (if (not (equal? (iup:attribute run-matrix key) name)) - (begin - (set! changed #t) - (iup:attribute-set! run-matrix key name) - (iup:attribute-set! run-matrix "FITTOTEXT" (conc "C" num)))))) - col-indices) - ;; Cell contents (for-each (lambda (entry) (let* ((row-name (cadr entry)) (col-name (car entry)) (valuedat (caddr entry)) @@ -1151,17 +1147,32 @@ (status (list-ref valuedat 2)) (value (gutils:get-color-for-state-status state status)) (row-num (cadr (assoc row-name row-indices))) (col-num (cadr (assoc col-name col-indices))) (key (conc row-num ":" col-num))) + (hash-table-set! cell-lookup key test-id) (if (not (equal? (iup:attribute run-matrix key) (cadr value))) (begin (set! changed #t) (iup:attribute-set! run-matrix key (cadr value)) (iup:attribute-set! run-matrix (conc "BGCOLOR" key) (car value)))))) tests-mindat) + + ;; Col labels - do after setting Cell contents so they are accounted for in the size calc. + + (for-each (lambda (ind) + (let* ((name (car ind)) + (num (cadr ind)) + (key (conc "0:" num))) + (if (not (equal? (iup:attribute run-matrix key) name)) + (begin + (set! changed #t) + (iup:attribute-set! run-matrix key name) + (iup:attribute-set! run-matrix "FITTOTEXT" (conc "C" num)))))) + col-indices) (if changed (iup:attribute-set! run-matrix "REDRAW" "ALL")))))) + (set! dashboard:update-run-summary-tab updater) (dboard:data-set-runs-tree! *data* tb) (iup:split tb run-matrix))) @@ -1198,14 +1209,25 @@ ;; (mark-for-update) ;; (update-search "item-name" val)) )) (iup:vbox (iup:hbox - (iup:button "Sort -t" #:action (lambda (obj) - (next-sort-option) - (iup:attribute-set! obj "TITLE" (vector-ref (vector-ref *tests-sort-options* *tests-sort-reverse*) 0)) - (mark-for-update))) + (let* ((cmds-list '("+testname" "-testname" "+event_time" "-event_time" "+statestatus" "-statestatus")) + (lb (iup:listbox #:expand "HORIZONTAL" + #:dropdown "YES" + #:action (lambda (obj val index lbstate) + (set! *tests-sort-reverse* index) + (mark-for-update)))) + (default-cmd (car (list-ref *tests-sort-type-index* *tests-sort-reverse*)))) + (iuplistbox-fill-list lb cmds-list selected-item: default-cmd) + (mark-for-update) + ;; (set! *tests-sort-reverse* *tests-sort-reverse*0) + lb) + ;; (iup:button "Sort -t" #:action (lambda (obj) + ;; (next-sort-option) + ;; (iup:attribute-set! obj "TITLE" (vector-ref (vector-ref *tests-sort-options* *tests-sort-reverse*) 0)) + ;; (mark-for-update))) (iup:button "HideEmpty" #:action (lambda (obj) (set! *hide-empty-runs* (not *hide-empty-runs*)) (iup:attribute-set! obj "TITLE" (if *hide-empty-runs* "+HideE" "-HideE")) (mark-for-update))) (let ((hideit (iup:button "HideTests" #:action (lambda (obj) @@ -1213,11 +1235,13 @@ (iup:attribute-set! obj "TITLE" (if *hide-not-hide* "HideTests" "NotHide")) (mark-for-update))))) (set! *hide-not-hide-button* hideit) hideit)) (iup:hbox - (iup:button "Quit" #:action (lambda (obj)(if *dbstruct-local* (db:close-all *dbstruct-local*))(exit))) + (iup:button "Quit" #:action (lambda (obj) + ;; (if *dbstruct-local* (db:close-all *dbstruct-local*)) + (exit))) (iup:button "Refresh" #:action (lambda (obj) (mark-for-update))) (iup:button "Collapse" #:action (lambda (obj) (let ((myname (iup:attribute obj "TITLE"))) (if (equal? myname "Collapse") @@ -1410,40 +1434,46 @@ (iup:attribute-set! *tim* "TIME" 300) (iup:attribute-set! *tim* "RUN" "YES") ;; Move this stuff to db.scm? I'm not sure that is the right thing to do... ;; -(define *last-db-update-time* (file-modification-time (conc *toppath* "/db/main.db"))) +(define *last-db-update-time* (file-modification-time *db-file-path*)) ;; (conc *toppath* "/db/main.db"))) (define *last-recalc-ended-time* 0) (define (dashboard:been-changed) - (> (file-modification-time (conc *toppath* "/db/main.db")) *last-db-update-time*)) + (> (file-modification-time *db-file-path*) *last-db-update-time*)) (define (dashboard:set-db-update-time) - (set! *last-db-update-time* (file-modification-time (conc *toppath* "/db/main.db")))) + (set! *last-db-update-time* (file-modification-time *db-file-path*))) (define (dashboard:recalc modtime please-update-buttons last-db-update-time) (or please-update-buttons (and (> (current-milliseconds)(+ *last-recalc-ended-time* 150)) (> modtime last-db-update-time) (> (current-seconds)(+ last-db-update-time 1))))) -(define *monitor-db-path* (conc *toppath* "/db/monitor.db")) +(define *monitor-db-path* (conc *dbdir* "/monitor.db")) (define *last-monitor-update-time* 0) ;; Force creation of the db in case it isn't already there. -(let ((db (tasks:open-db))) - (sqlite3:finalize! db)) +(tasks:open-db) (define (dashboard:get-youngest-run-db-mod-time) - (apply max (map (lambda (filen) - (file-modification-time filen)) - (glob (conc *toppath* "/db/*.db"))))) + (handle-exceptions + exn + (begin + (debug:print 0 "WARNING: error in accessing databases in get-youngest-run-db-mod-time: " ((condition-property-accessor 'exn 'message) exn)) + (current-seconds)) ;; something went wrong - just print an error and return current-seconds + (apply max (map (lambda (filen) + (file-modification-time filen)) + (glob (conc *dbdir* "/*.db")))))) (define (dashboard:run-update x) (let* ((modtime (dashboard:get-youngest-run-db-mod-time)) ;; (file-modification-time *db-file-path*)) - (monitor-modtime (file-modification-time *monitor-db-path*)) + (monitor-modtime (if (file-exists? *monitor-db-path*) + (file-modification-time *monitor-db-path*) + -1)) (run-update-time (current-seconds)) (recalc (dashboard:recalc modtime *please-update-buttons* *last-db-update-time*))) (if (and (eq? *current-tab-number* 0) (> monitor-modtime *last-monitor-update-time*)) (begin @@ -1489,12 +1519,11 @@ ((args:get-arg "-run") (let ((runid (string->number (args:get-arg "-run")))) (if runid (begin (lambda (x) - (on-exit (lambda () - (if *dbstruct-local* (db:close-all *dbstruct-local*)))) + (on-exit std-exit-procedure) (examine-run *dbstruct-local* runid))) (begin (print "ERROR: runid is not a number " (args:get-arg "-run")) (exit 1))))) ((args:get-arg "-test") ;; run-id,test-id @@ -1527,7 +1556,27 @@ (mutex-lock! *update-mutex*) (set! *update-is-running* #f) (mutex-unlock! *update-mutex*)))) 1)))) -(iup:main-loop) -(db:close-all *dbstruct-local*) +(let ((th1 (make-thread (lambda () + (thread-sleep! 1) + (set! *please-update-buttons* #t) + (dashboard:run-update 1)) "update buttons once")) + ;; need to wait for first *update-is-running* #t + ;; (let loop () + ;; (mutex-lock! *update-mutex*) + ;; (if *update-is-running* + ;; (begin + ;; (set! *please-update-buttons* #t) + ;; (mark-for-update) + ;; (print "Did redraw trigger")) "First update after startup") + ;; (mutex-unlock! *update-mutex*) + ;; (thread-sleep! 1) + ;; (if (not *please-update-buttons*) + ;; (loop)))))) + (th2 (make-thread iup:main-loop "Main loop"))) + (thread-start! th1) + (thread-start! th2) + (thread-join! th2)) + +;; (iup:main-loop)(db:close-all *dbstruct-local*) ADDED datashare-testing/.sd.config Index: datashare-testing/.sd.config ================================================================== --- /dev/null +++ datashare-testing/.sd.config @@ -0,0 +1,35 @@ +# Read in the users vars first (so the offical data cannot be overridden +[include ~/.datashare.config] + +# Read in local overrides +[include datashare.config] + +# Replace [storage] with settings entry - more secure +[settings] + +storage /tmp/#{getenv USER}/datashare/disk1 \ + /tmp/#{getenv USER}/datashare/disk2 + +basepath #{getenv BASEPATH} + +[areas] +synthesis asic/synthesis +verilog asic/verilog +customlibs custom/oalibs +megatest tools/megatest + +[quality] +0 untested +1 lightly tested +2 tested +3 full QA + +[database] +location /tmp/#{getenv USER}/datashare + +[pathmaps] +SHELF /tmp/#{getenv USER}/theshelf + +[buildmethods] +customlibs make setup;make install + ADDED datashare.scm Index: datashare.scm ================================================================== --- /dev/null +++ datashare.scm @@ -0,0 +1,817 @@ + +;; Copyright 2006-2013, Matthew Welland. +;; +;; This program is made available under the GNU GPL version 2.0 or +;; greater. See the accompanying file COPYING for details. +;; +;; This program is distributed WITHOUT ANY WARRANTY; without even the +;; implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR +;; PURPOSE. + +(use ssax) +(use sxml-serializer) +(use sxml-modifications) +(use regex) +(use srfi-69) +(use regex-case) +(use posix) +(use json) +(use csv) +(use srfi-18) +(use format) + +(require-library iup) +(import (prefix iup iup:)) +(require-library ini-file) +(import (prefix ini-file ini:)) + +(use canvas-draw) +(import canvas-draw-iup) + +(use sqlite3 srfi-1 posix regex regex-case srfi-69) +(import (prefix sqlite3 sqlite3:)) + +(declare (uses configf)) +(declare (uses tree)) +(declare (uses margs)) +;; (declare (uses dcommon)) +;; (declare (uses launch)) +;; (declare (uses gutils)) +;; (declare (uses db)) +;; (declare (uses synchash)) +;; (declare (uses server)) +;; (declare (uses megatest-version)) +;; (declare (uses tbd)) + +(include "megatest-fossil-hash.scm") + +;; +;; GLOBALS +;; +(define *datashare:current-tab-number* 0) +(define *args-hash* (make-hash-table)) +(define datashare:help (conc "Usage: datashare [action [params ...]] + +Note: run datashare without parameters to start the gui. + + list-areas : List the allowed areas + + list-versions : List versions available in + options : -full, -vpatt patt + + publish : Publish data for area and with version + + get : Get a link to data, put the link in destpath + options : -i iteration + + update : Update the link to data to the latest iteration. + +Part of the Megatest tool suite. +Learn more at http://www.kiatoa.com/fossils/megatest + +Version: " megatest-fossil-hash)) ;; " + +;;====================================================================== +;; RECORDS +;;====================================================================== + +;; make-vector-record "testing" datastore pkg id area version_name store_type copied source_path iteration submitter datetime storegrp datavol quality disk_id comment +;; testing +(define (make-datashare:pkg)(make-vector 15)) +(define-inline (datashare:pkg-get-id vec) (vector-ref vec 0)) +(define-inline (datashare:pkg-get-area vec) (vector-ref vec 1)) +(define-inline (datashare:pkg-get-version_name vec) (vector-ref vec 2)) +(define-inline (datashare:pkg-get-store_type vec) (vector-ref vec 3)) +(define-inline (datashare:pkg-get-copied vec) (vector-ref vec 4)) +(define-inline (datashare:pkg-get-source_path vec) (vector-ref vec 5)) +(define-inline (datashare:pkg-get-iteration vec) (vector-ref vec 6)) +(define-inline (datashare:pkg-get-submitter vec) (vector-ref vec 7)) +(define-inline (datashare:pkg-get-datetime vec) (vector-ref vec 8)) +(define-inline (datashare:pkg-get-storegrp vec) (vector-ref vec 9)) +(define-inline (datashare:pkg-get-datavol vec) (vector-ref vec 10)) +(define-inline (datashare:pkg-get-quality vec) (vector-ref vec 11)) +(define-inline (datashare:pkg-get-disk_id vec) (vector-ref vec 12)) +(define-inline (datashare:pkg-get-comment vec) (vector-ref vec 13)) +(define-inline (datashare:pkg-get-stored_path vec) (vector-ref vec 14)) +(define-inline (datashare:pkg-set-id! vec val)(vector-set! vec 0 val)) +(define-inline (datashare:pkg-set-area! vec val)(vector-set! vec 1 val)) +(define-inline (datashare:pkg-set-version_name! vec val)(vector-set! vec 2 val)) +(define-inline (datashare:pkg-set-store_type! vec val)(vector-set! vec 3 val)) +(define-inline (datashare:pkg-set-copied! vec val)(vector-set! vec 4 val)) +(define-inline (datashare:pkg-set-source_path! vec val)(vector-set! vec 5 val)) +(define-inline (datashare:pkg-set-iteration! vec val)(vector-set! vec 6 val)) +(define-inline (datashare:pkg-set-submitter! vec val)(vector-set! vec 7 val)) +(define-inline (datashare:pkg-set-datetime! vec val)(vector-set! vec 8 val)) +(define-inline (datashare:pkg-set-storegrp! vec val)(vector-set! vec 9 val)) +(define-inline (datashare:pkg-set-datavol! vec val)(vector-set! vec 10 val)) +(define-inline (datashare:pkg-set-quality! vec val)(vector-set! vec 11 val)) +(define-inline (datashare:pkg-set-disk_id! vec val)(vector-set! vec 12 val)) +(define-inline (datashare:pkg-set-comment! vec val)(vector-set! vec 13 val)) +(define-inline (datashare:pkg-set-stored_path! vec val)(vector-set! vec 14 val)) + +;;====================================================================== +;; DB +;;====================================================================== + +(define (datashare:initialize-db db) + (for-each + (lambda (qry) + (sqlite3:execute db qry)) + (list + "CREATE TABLE pkgs + (id INTEGER PRIMARY KEY, + area TEXT, + version_name TEXT, + store_type TEXT DEFAULT 'copy', + copied INTEGER DEFAULT 0, + source_path TEXT, + stored_path TEXT, + iteration INTEGER DEFAULT 0, + submitter TEXT, + datetime TIMESTAMP DEFAULT (strftime('%s','now')), + storegrp TEXT, + datavol INTEGER, + quality TEXT, + disk_id INTEGER, + comment TEXT);" + "CREATE TABLE refs + (id INTEGER PRIMARY KEY, + pkg_id INTEGER, + destlink TEXT);" + "CREATE TABLE disks + (id INTEGER PRIMARY KEY, + storegrp TEXT, + path TEXT);"))) + +(define (datashare:register-data db area version-name store-type submitter quality source-path comment) + (let ((iter-qry (sqlite3:prepare db "SELECT max(iteration) FROM pkgs WHERE area=? AND version_name=?;")) + (next-iteration 0)) + (sqlite3:with-transaction + db + (lambda () + (sqlite3:for-each-row + (lambda (iteration) + (if (and (number? iteration) + (>= iteration next-iteration)) + (set! next-iteration (+ iteration 1)))) + iter-qry area version-name) + ;; now store the data + (sqlite3:execute db "INSERT INTO pkgs (area,version_name,iteration,store_type,submitter,source_path,quality,comment) + VALUES (?,?,?,?,?,?,?,?);" + area version-name next-iteration (conc store-type) submitter source-path quality comment))) + (sqlite3:finalize! iter-qry) + next-iteration)) + +(define (datashare:get-id db area version-name iteration) + (let ((res #f)) + (sqlite3:for-each-row + (lambda (id) + (set! res id)) + db + "SELECT id FROM pkgs WHERE area=? AND version_name=? AND iteration=?;" + area version-name iteration) + res)) + +(define (datashare:set-stored-path db id path) + (sqlite3:execute db "UPDATE pkgs SET stored_path=? WHERE id=?;" path id)) + +(define (datashare:set-copied db id value) + (sqlite3:execute db "UPDATE pkgs SET copied=? WHERE id=?;" value id)) + +(define (datashare:get-pkg-record db area version-name iteration) + (let ((res #f)) + (sqlite3:for-each-row + (lambda (a . b) + (set! res (apply vector a b))) + db + "SELECT * FROM pkgs WHERE area=? AND version_name=? AND iteration=?;" + area + version-name + iteration) + res)) + +;; take version-name iteration and register or update "lastest/0" +;; +(define (datashare:set-latest db id area version-name iteration) + (let* ((rec (datashare:get-pkg-record db area version-name iteration)) + (latest-id (datashare:get-id db area "latest" 0)) + (stored-path (datashare:pkg-get-stored_path rec))) + (if latest-id ;; have a record - bump the link pointer + (datashare:set-stored-path db latest-id stored-path) + (datashare:register-data db area "latest" 'link "auto" "na" stored-path "latest data")))) + +;; set a package ref, this is the location where the link back to the stored data +;; is put. +;; +;; if there is nothing at that location then the record can be removed +;; if there are no refs for a particular pkg-id then that pkg-id is a +;; candidate for removal +;; +(define (datashare:record-pkg-ref db pkg-id dest-link) + (sqlite3:execute db "INSERT INTO refs (pkg_id,destlink) VALUES (?,?);" pkg-id dest-link)) + +(define (datashare:count-refs db pkg-id) + (let ((res 0)) + (sqlite3:for-each-row + (lambda (count) + (set! res count)) + db + "SELECT count(id) FROM refs WHERE pkg_id=?;" + pkg-id) + res)) + +;; Create the sqlite db +(define (datashare:open-db configdat) + (let ((path (configf:lookup configdat "database" "location"))) + (if (and path + (directory? path) + (file-read-access? path)) + (let* ((dbpath (conc path "/datashare.db")) + (writeable (file-write-access? dbpath)) + (dbexists (file-exists? dbpath)) + (handler (make-busy-timeout 136000))) + (handle-exceptions + exn + (begin + (debug:print 2 "ERROR: problem accessing db " dbpath + ((condition-property-accessor 'exn 'message) exn)) + (exit)) + (set! db (sqlite3:open-database dbpath))) + (if *db-write-access* (sqlite3:set-busy-handler! db handler)) + (if (not dbexists) + (begin + (datashare:initialize-db db))) + db) + (print "ERROR: invalid path for storing database: " path)))) + +(define (open-run-close-exception-handling proc idb . params) + (handle-exceptions + exn + (let ((sleep-time (random 30)) + (err-status ((condition-property-accessor 'sqlite3 'status #f) exn))) + (case err-status + ((busy) + (thread-sleep! sleep-time)) + (else + (print "EXCEPTION: database overloaded or unreadable.") + (print " message: " ((condition-property-accessor 'exn 'message) exn)) + (print "exn=" (condition->list exn)) + (print " status: " ((condition-property-accessor 'sqlite3 'status) exn)) + (print-call-chain (current-error-port)) + (thread-sleep! sleep-time) + (print "trying db call one more time....this may never recover, if necessary kill process " (current-process-id) " on host " (get-host-name) " to clean up"))) + (apply open-run-close-exception-handling proc idb params)) + (apply open-run-close-no-exception-handling proc idb params))) + +(define (open-run-close-no-exception-handling proc idb . params) + ;; (print "open-run-close-no-exception-handling START given a db=" (if idb "yes " "no ") ", params=" params) + (let* ((db (cond + ((sqlite3:database? idb) idb) + ((not idb) (print "ERROR: cannot open-run-close with #f anymore")) + ((procedure? idb) (idb)) + (else (print "ERROR: cannot open-run-close with #f anymore")))) + (res #f)) + (set! res (apply proc db params)) + (if (not idb)(sqlite3:finalize! dbstruct)) + ;; (print "open-run-close-no-exception-handling END" ) + res)) + +(define open-run-close open-run-close-no-exception-handling) + +(define (datashare:get-pkgs db area-filter version-filter iter-filter) + (let ((res '())) + (sqlite3:for-each-row ;; replace with fold ... + (lambda (a . b) + (set! res (cons (list->vector (cons a b)) res))) + db + (conc "SELECT id,area,version_name,store_type,copied,source_path,iteration,submitter,datetime,storegrp,datavol,quality,disk_id,comment,stored_path " + " FROM pkgs WHERE area like ? AND version_name LIKE ? AND iteration " iter-filter ";") + area-filter version-filter) + (reverse res))) + +(define (datashare:get-pkg db area-name version-name #!key (iteration #f)) + (let ((dat '()) + (res #f)) + (sqlite3:for-each-row ;; replace with fold ... + (lambda (a . b) + (set! dat (cons (list->vector (cons a b)) dat))) + db + (conc "SELECT id,area,version_name,store_type,copied,source_path,iteration,submitter,datetime,storegrp,datavol,quality,disk_id,comment,stored_path " + " FROM pkgs WHERE area=? AND version_name=? ORDER BY iteration ASC;") + area-name version-name) + ;; now filter for iteration, either max if #f or specific one + (if (null? dat) + #f + (let loop ((hed (car dat)) + (tal (cdr dat)) + (cur 0)) + (let ((itr (datashare:pkg-get-iteration hed))) + (if (equal? itr iteration) ;; this is the one if iteration is specified + hed + (if (null? tal) + hed + (loop (car tal)(cdr tal))))))))) + +(define (datashare:get-versions-for-area db area-name #!key (version-patt #f)) + (let ((res '()) + (data (make-hash-table))) + (sqlite3:for-each-row + (lambda (version-name submitter iteration submitted-time comment) + ;; 0 1 2 3 4 + (hash-table-set! data version-name (vector version-name submitter iteration submitted-time comment))) + db + "SELECT version_name,submitter,iteration,datetime,comment FROM pkgs WHERE area='megatest' AND version_name != 'latest' AND version_name LIKE ? ORDER BY datetime asc;" + (or version-patt "%")) + (map (lambda (x)(hash-table-ref data x))(sort (hash-table-keys data) string-ci>=)))) + +;;====================================================================== +;; DATA IMPORT/EXPORT +;;====================================================================== + +(define (datashare:import-data configdat source-path dest-path area version iteration) + (let* ((space-avail (car dest-path)) + (disk-path (cdr dest-path)) + (targ-path (conc disk-path "/" area "/" version "/" iteration)) + (id (datashare:get-id db area version iteration)) + (db (datashare:open-db configdat))) + (if (> space-avail 10000) ;; dumb heuristic + (begin + (create-directory targ-path #t) + (datashare:set-stored-path db id targ-path) + (print "Running command: rsync -av " source-path "/ " targ-path "/") + (let ((th1 (make-thread (lambda () + (let ((pid (process-run "rsync" (list "-av" (conc source-path "/") (conc targ-path "/"))))) + (process-wait pid) + (datashare:set-copied db id "yes") + (sqlite3:finalize! db))) + "Data copy"))) + (thread-start! th1)) + #t) + (begin + (print "ERROR: Not enough space in storage area " dest-path) + (datashare:set-copied db id "no") + (sqlite3:finalize! db) + #f)))) + +(define (datashare:get-areas configdat) + (let* ((areadat (configf:get-section configdat "areas")) + (areas (if areadat (map car areadat) '()))) + areas)) + +(define (datashare:publish configdat publish-type area-name version comment spath submitter quality) + ;; input checks + (cond + ((not (member area-name (datashare:get-areas configdat))) + (cons #f (conc "Illegal area name \"" area-name "\""))) + (else + (let ((db (datashare:open-db configdat)) + (iteration (datashare:register-data db area-name version publish-type submitter quality spath comment)) + (dest-store (datashare:get-best-storage configdat))) + (if iteration + (if (eq? 'copy publish-type) + (begin + (datashare:import-data configdat spath dest-store area-name version iteration) + (let ((id (datashare:get-id db area-name version iteration))) + (datashare:set-latest db id area-name version iteration))) + (let ((id (datashare:get-id db area-name version iteration))) + (datashare:set-stored-path db id spath) + (datashare:set-copied db id "yes") + (datashare:set-copied db id "n/a") + (datashare:set-latest db id area-name version iteration))) + (print "ERROR: Failed to get an iteration number")) + (sqlite3:finalize! db) + (cons #t "Successfully saved data"))))) + +(define (datashare:get-best-storage configdat) + (let* ((storage (configf:lookup configdat "settings" "storage")) + (store-areas (if storage (string-split storage) '()))) + (print "Looking for available space in " store-areas) + (datashare:find-most-space store-areas))) + +;; (string->number (list-ref (with-input-from-pipe "df -B1000000 /tmp" (lambda ()(read-line)(string-split (read-line)))) 3)) + +(define (datashare:find-most-space paths) + (fold (lambda (area res) + ;; (print "area=" area " res=" res) + (let ((maxspace (car res)) + (currpath (cdr res))) + ;; (print currpath " " maxspace) + (if (file-write-access? area) + (let ((currspace (string->number + (list-ref + (with-input-from-pipe + ;; (conc "df --output=avail " area) + (conc "df -B1000000 " area) + ;; (lambda ()(read)(read)) + (lambda ()(read-line)(string-split (read-line)))) + 3)))) + (if (> currspace maxspace) + (cons currspace area) + res)) + res))) + (cons 0 #f) + paths)) + +;; remove existing link and if possible ... +;; create path to next of tip of target, create link back to source +(define (datashare:build-dir-make-link source target) + (if (file-exists? target)(datashare:backup-move target)) + (create-directory (pathname-directory target) #t) + (create-symbolic-link source target)) + +(define (datashare:backup-move path) + (let* ((trashdir (conc (pathname-directory path) "/.trash")) + (trashfile (conc trashdir "/" (current-seconds) "-" (pathname-file path)))) + (create-directory trashdir #t) + (if (directory? path) + (system (conc "mv " path " " trashfile)) + (file-move path trash-file)))) + +;;====================================================================== +;; GUI +;;====================================================================== + +;; The main menu +(define (datashare:main-menu) + (iup:menu ;; a menu is a special attribute to a dialog (think Gnome putting the menu at screen top) + (iup:menu-item "Files" (iup:menu ;; Note that you can use either #:action or action: for options + (iup:menu-item "Open" action: (lambda (obj) + (iup:show (iup:file-dialog)) + (print "File->open " obj))) + (iup:menu-item "Save" #:action (lambda (obj)(print "File->save " obj))) + (iup:menu-item "Exit" #:action (lambda (obj)(exit))))) + (iup:menu-item "Tools" (iup:menu + (iup:menu-item "Create new blah" #:action (lambda (obj)(print "Tools->new blah"))) + ;; (iup:menu-item "Show dialog" #:action (lambda (obj) + ;; (show message-window + ;; #:modal? #t + ;; ;; set positon using coordinates or center, start, top, left, end, bottom, right, parent-center, current + ;; ;; #:x 'mouse + ;; ;; #:y 'mouse + ;; ) + )))) + +(define (datashare:publish-view configdat) + ;; (pp (hash-table->alist configdat)) + (let* ((areas (configf:get-section configdat "areas")) + (label-size "70x") + (areas-sel (iup:listbox #:expand "HORIZONTAL" #:dropdown "YES")) + (version-tb (iup:textbox #:expand "HORIZONTAL")) ;; #:size "50x")) + (areas-sel (iup:listbox #:expand "HORIZONTAL" #:dropdown "YES")) + (component (iup:listbox #:expand "HORIZONTAL" #:dropdown "YES" )) + (version-val (iup:textbox #:expand "HORIZONTAL" #:size "50x")) + ;; (copy-link (iup:toggle #:expand "HORIZONTAL")) + ;; (iteration (iup:textbox #:expand "YES" #:size "20x")) + ;; (iteration (iup:textbox #:expand "HORIZONTAL" #:size "20x")) + (area-filter (iup:textbox #:expand "HORIZONTAL" #:value "%")) + (comment-tb (iup:textbox #:expand "YES" #:multiline "YES")) + (source-tb (iup:textbox #:expand "HORIZONTAL" + #:value (or (configf:lookup configdat "settings" "basepath") + ""))) + (publish (lambda (publish-type) + (let* ((area-num (or (string->number (iup:attribute areas-sel "VALUE")) 0)) + (area-dat (if (> area-num 0)(list-ref areas (- area-num 1))'("NOT SELECTED" "NOT SELECTED"))) + (area-path (cadr area-dat)) + (area-name (car area-dat)) + (version (iup:attribute version-tb "VALUE")) + (comment (iup:attribute comment-tb "VALUE")) + (spath (iup:attribute source-tb "VALUE")) + (submitter (current-user-name)) + (quality 2)) + (datashare:publish configdat publish-type area-name version comment spath submitter quality)))) + (copy (iup:button "Copy and Publish" + #:expand "HORIZONTAL" + #:action (lambda (obj) + (publish 'copy)))) + (link (iup:button "Link and Publish" + #:expand "HORIZONTAL" + #:action (lambda (obj) + (publish 'link)))) + (browse-btn (iup:button "Browse" + #:size "40x" + #:action (lambda (obj) + (let* ((fd (iup:file-dialog #:dialogtype "DIR")) + (top (iup:show fd #:modal? "YES"))) + (iup:attribute-set! source-tb "VALUE" + (iup:attribute fd "VALUE")) + (iup:destroy! fd)))))) + (print "areas") + ;; (pp areas) + (fold (lambda (areadat num) + ;; (print "Adding num=" num ", areadat=" areadat) + (iup:attribute-set! areas-sel (conc num) (car areadat)) + (+ 1 num)) + 1 areas) + (iup:vbox + (iup:hbox (iup:label "Area:" #:size label-size) ;; area-filter + areas-sel) + (iup:hbox (iup:label "Version:" #:size label-size) version-tb) + ;; (iup:hbox (iup:label "Link only" #:size label-size) copy-link) + ;; (iup:label "Iteration:") iteration) + (iup:hbox (iup:label "Comment:" #:size label-size) comment-tb) + (iup:hbox (iup:label "Source base path:" #:size label-size) source-tb browse-btn) + (iup:hbox copy link)))) + +(define (datashare:lst->path pathlst) + (conc "/" (string-intersperse (map conc pathlst) "/"))) + +(define (datashare:path->lst path) + (string-split path "/")) + +(define (datashare:pathdat-apply-heuristics configdat path) + (cond + ((file-exists? path) "found") + (else (conc path " not installed")))) + +(define (datashare:get-view configdat) + (iup:vbox + (iup:hbox + (let* ((label-size "60x") + ;; filter elements + (area-filter "%") + (version-filter "%") + (iter-filter ">= 0") + ;; reverse lookup from path to data for src and installed + (srcdat (make-hash-table)) ;; reverse lookup + (installed-dat (make-hash-table)) + ;; config values + (basepath (configf:lookup configdat "settings" "basepath")) + ;; gui elements + (submitter (iup:label "" #:expand "HORIZONTAL")) + (date-submitted (iup:label "" #:expand "HORIZONTAL")) + (comment (iup:label "" #:expand "HORIZONTAL")) + (copy-link (iup:label "" #:expand "HORIZONTAL")) + (quality (iup:label "" #:expand "HORIZONTAL")) + (installed-status (iup:label "" #:expand "HORIZONTAL")) + ;; misc + (curr-record #f) + ;; (source-data (iup:label "" #:expand "HORIZONTAL")) + (tb (iup:treebox + #:value 0 + #:name "Packages" + #:expand "YES" + #:addexpanded "NO" + #:selection-cb + (lambda (obj id state) + ;; (print "obj: " obj ", id: " id ", state: " state) + (let* ((path (datashare:lst->path (cdr (tree:node->path obj id)))) + (record (hash-table-ref/default srcdat path #f))) + (if record + (begin + (set! curr-record record) + (iup:attribute-set! submitter "TITLE" (datashare:pkg-get-submitter record)) + (iup:attribute-set! date-submitted "TITLE" (time->string (seconds->local-time (datashare:pkg-get-datetime record)))) + (iup:attribute-set! comment "TITLE" (datashare:pkg-get-comment record)) + (iup:attribute-set! quality "TITLE" (datashare:pkg-get-quality record)) + (iup:attribute-set! copy-link "TITLE" (datashare:pkg-get-store_type record)) + )) + ;; (print "id=" id " path=" path " record=" record);; (tree:node->path obj id) " run-id: " run-id) + )))) + (tb2 (iup:treebox + #:value 0 + #:name "Installed" + #:expand "YES" + #:addexpanded "NO" + #:selection-cb + (lambda (obj id state) + ;; (print "obj: " obj ", id: " id ", state: " state) + (let* ((path (datashare:lst->path (cdr (tree:node->path obj id)))) + (status (hash-table-ref/default installed-dat path #f))) + (iup:attribute-set! installed-status "TITLE" (if status status "")) + )))) + (refresh (lambda (obj) + (let* ((db (datashare:open-db configdat)) + (areas (or (configf:get-section configdat "areas") '()))) + ;; + ;; first update the Sources + ;; + (for-each + (lambda (pkgitem) + (let* ((pkg-path (list (datashare:pkg-get-area pkgitem) + (datashare:pkg-get-version_name pkgitem) + (datashare:pkg-get-iteration pkgitem))) + (pkg-id (datashare:pkg-get-id pkgitem)) + (path (datashare:lst->path pkg-path))) + ;; (print "tree:add-node tb=" tb ", pkg-path=" pkg-path ", pkg-id=" pkg-id) + (if (not (hash-table-ref/default srcdat path #f)) + (tree:add-node tb "Packages" pkg-path userdata: (conc "pkg-id: " pkg-id))) + ;; (print "path=" path " pkgitem=" pkgitem) + (hash-table-set! srcdat path pkgitem))) + (datashare:get-pkgs db area-filter version-filter iter-filter)) + ;; + ;; then update the installed + ;; + (for-each + (lambda (area) + (let* ((path (conc "/" (cadr area))) + (fullpath (conc basepath path))) + (if (not (hash-table-ref/default installed-dat path #f)) + (tree:add-node tb2 "Installed" (datashare:path->lst path))) + (hash-table-set! installed-dat path (datashare:pathdat-apply-heuristics configdat fullpath)))) + areas) + (sqlite3:finalize! db)))) + (apply (iup:button "Apply" + #:action + (lambda (obj) + (if curr-record + (let* ((area (datashare:pkg-get-area curr-record)) + (stored-path (datashare:pkg-get-stored_path curr-record)) + (source-type (datashare:pkg-get-store_type curr-record)) + (source-path (case source-type ;; (equal? source-type "link")) + ((link)(datashare:pkg-get-source-path curr-record)) + ((copy)stored-path) + (else #f))) + (dest-stub (configf:lookup configdat "areas" area)) + (target-path (conc basepath "/" dest-stub))) + (datashare:build-dir-make-link stored-path target-path) + (print "Creating link from " stored-path " to " target-path))))))) + (iup:vbox + (iup:hbox tb tb2) + (iup:frame + #:title "Source Info" + (iup:vbox + (iup:hbox (iup:button "Refresh" #:action refresh) apply) + (iup:hbox (iup:label "Submitter: ") ;; #:size label-size) + submitter + (iup:label "Submitted on: ") ;; #:size label-size) + date-submitted) + (iup:hbox (iup:label "Data stored: ") + copy-link + (iup:label "Quality: ") + quality) + (iup:hbox (iup:label "Comment: ") + comment))) + (iup:frame + #:title "Installed Info" + (iup:vbox + (iup:hbox (iup:label "Installed status/path: ") installed-status))) + ))))) + +(define (datashare:manage-view configdat) + (iup:vbox + (iup:hbox + (iup:button "Pushme" + #:expand "YES" + )))) + +(define (datashare:gui configdat) + (iup:show + (iup:dialog + #:title (conc "DataShare dashboard " (current-user-name) ":" (current-directory)) + #:menu (datashare:main-menu) + (let* ((tabs (iup:tabs + #:tabchangepos-cb (lambda (obj curr prev) + (set! *datashare:current-tab-number* curr)) + (datashare:publish-view configdat) + (datashare:get-view configdat) + (datashare:manage-view configdat) + ))) + ;; (set! (iup:callback tabs tabchange-cb:) (lambda (a b c)(print "SWITCHED TO TAB: " a " " b " " c))) + (iup:attribute-set! tabs "TABTITLE0" "Publish") + (iup:attribute-set! tabs "TABTITLE1" "Get") + (iup:attribute-set! tabs "TABTITLE2" "Manage") + ;; (iup:attribute-set! tabs "BGCOLOR" "190 190 190") + tabs))) + (iup:main-loop)) + +;;====================================================================== +;; MISC +;;====================================================================== + + +(define (datashare:do-as-calling-user proc) + (let ((eid (current-effective-user-id)) + (cid (current-user-id))) + (if (not (eq? eid cid)) ;; running suid + (set! (current-effective-user-id) cid)) + ;; (print "running as " (current-effective-user-id)) + (proc) + (if (not (eq? eid cid)) + (set! (current-effective-user-id) eid)))) + +(define (datashare:find name paths) + (if (null? paths) + #f + (let loop ((hed (car paths)) + (tal (cdr paths))) + (if (file-exists? (conc hed "/" name)) + hed + (if (null? tal) + #f + (loop (car tal)(cdr tal))))))) + +;;====================================================================== +;; MAIN +;;====================================================================== + +(define (datashare:load-config exe-dir exe-name) + (let* ((fname (conc exe-dir "/." exe-name ".config"))) + (ini:property-separator-patt " * *") + (ini:property-separator #\space) + (if (file-exists? fname) + ;; (ini:read-ini fname) + (read-config fname #f #t) + (make-hash-table)))) + +(define (datashare:process-action configdat action . args) + (case (string->symbol action) + ((get) + (if (< (length args) 2) + (begin + (print "ERROR: Missing arguments; " (string-intersperse args ", ")) + (exit 1)) + (let* ((basepath (configf:lookup configdat "settings" "basepath")) + (db (datashare:open-db configdat)) + (area (car args)) + (version (cadr args)) ;; iteration + (remargs (args:get-args args '("-i") '() args:arg-hash 0)) + (iteration (if (args:get-arg "-i")(string->number (args:get-arg "-i")) #f)) + (curr-record (datashare:get-pkg db area version iteration: iteration))) + (if (not curr-record) + (begin + (print "ERROR: No matching record found; area=" area ", version=" version ", iteration=" (if iteration iteration "(max)")) + (exit 1)) + (let* ((stored-path (datashare:pkg-get-stored_path curr-record)) + (source-type (datashare:pkg-get-store_type curr-record)) + (source-path (case source-type ;; (equal? source-type "link")) + ((link) (datashare:pkg-get-source-path curr-record)) + ((copy) stored-path) + (else #f))) + (dest-stub (configf:lookup configdat "areas" area)) + (target-path (conc basepath "/" dest-stub))) + (datashare:build-dir-make-link stored-path target-path) + (datashare:record-pkg-ref db (datashare:pkg-get-id curr-record) target-path) + (sqlite3:finalize! db) + (print "Creating link from " stored-path " to " target-path)))))) + ((publish) + (if (< (length args) 3) + (begin + (print "ERROR: Missing arguments; " (string-intersperse args ", ")) + (exit 1)) + (let* ((srcpath (list-ref args 0)) + (areaname (list-ref args 1)) + (version (list-ref args 2)) + (remargs (args:get-args (drop args 2) + '("-type" ;; link or copy (default is copy) + "-m") + '() + args:arg-hash + 0)) + (publish-type (if (equal? (args:get-arg "-type") "link") 'link 'copy)) + (comment (or (args:get-arg "-m") "")) + (submitter (current-user-name)) + (quality (args:get-arg "-quality")) + (publish-res (datashare:publish configdat publish-type areaname version comment srcpath submitter quality))) + (if (not (car publish-res)) + (begin + (print "ERROR: " (cdr publish-res)) + (exit 1)))))) + ((list-versions) + (let ((area-name (car args)) ;; version patt full print + (remargs (args:get-args args '("-vpatt") '("-full") args:arg-hash 0)) + (db (datashare:open-db configdat)) + (versions (datashare:get-versions-for-area db (car args) version-patt: (args:get-arg "-vpatt")))) + ;; (print "area-name=" area-name " args=" args " *args-hash*=" (hash-table->alist *args-hash*)) + (map (lambda (x) + (if (args:get-arg "-full") + (format #t + "~10a~10a~4a~27a~30a\n" + (vector-ref x 0) + (vector-ref x 1) + (vector-ref x 2) + (conc "\"" (time->string (seconds->local-time (vector-ref x 3))) "\"") + (conc "\"" (vector-ref x 4) "\"")) + (print (vector-ref x 0)))) + versions) + (sqlite3:finalize! db))))) + +;; ease debugging by loading ~/.dashboardrc - REMOVE FROM PRODUCTION! +(let ((debugcontrolf (conc (get-environment-variable "HOME") "/.datasharerc"))) + (if (file-exists? debugcontrolf) + (load debugcontrolf))) + +(define (main) + (let* ((args (argv)) + (prog (car args)) + (rema (cdr args)) + (exe-name (pathname-file (car (argv)))) + (exe-dir (or (pathname-directory prog) + (datashare:find exe-name (string-split (get-environment-variable "PATH") ":")))) + (configdat (datashare:load-config exe-dir exe-name))) + (cond + ;; one-word commands + ((eq? (length rema) 1) + (case (string->symbol (car rema)) + ((help -h -help --h --help) + (print datashare:help)) + ((list-areas) + (map print (datashare:get-areas configdat))) + (else + (print "ERROR: Unrecognised command. Try \"datashare help\"")))) + ;; multi-word commands + ((null? rema)(datashare:gui configdat)) + ((>= (length rema) 2) + (apply datashare:process-action configdat (car rema)(cdr rema))) + (else (print "ERROR: Unrecognised command. Try \"datashare help\""))))) + +(main) Index: db.scm ================================================================== --- db.scm +++ db.scm @@ -12,11 +12,11 @@ ;;====================================================================== ;; Database access ;;====================================================================== (require-extension (srfi 18) extras tcp) -(use sqlite3 srfi-1 posix regex regex-case srfi-69 csv-xml s11n md5 message-digest base64 format) +(use sqlite3 srfi-1 posix regex regex-case srfi-69 csv-xml s11n md5 message-digest base64 format dot-locking z3) (import (prefix sqlite3 sqlite3:)) (import (prefix base64 base64:)) (declare (unit db)) (declare (uses common)) @@ -32,10 +32,28 @@ (define *rundb-mutex* (make-mutex)) ;; prevent problems opening/closing rundb's (define *number-of-writes* 0) (define *number-non-write-queries* 0) +;;====================================================================== +;; SQLITE3 HELPERS +;;====================================================================== + +;; convert to -inline +(define (db:first-result-default db stmt default . params) + (handle-exceptions + exn + (let ((err-status ((condition-property-accessor 'sqlite3 'status #f) exn))) + ;; check for (exn sqlite3) ((condition-property-accessor 'exn 'message) exn) + (if (eq? err-status 'done) + default + (begin + (debug:print 0 "ERROR: query " stmt " failed, params: " params ", error: " ((condition-property-accessor 'exn 'message) exn)) + (print-call-chain (current-error-port)) + default))) + (apply sqlite3:first-result db stmt params))) + ;; Get/open a database ;; if run-id => get run specific db ;; if #f => get main db ;; if db already open - return inmem ;; if db not open, open inmem, rundb and sync then return inmem @@ -44,16 +62,28 @@ (define (db:get-db dbstruct run-id) (if (sqlite3:database? dbstruct) ;; pass sqlite3 databases on through dbstruct (begin (mutex-lock! *rundb-mutex*) - (let ((db (if run-id - (db:open-rundb dbstruct run-id) - (db:open-main dbstruct)))) + (let ((dbdat (if (or (not run-id) + (eq? run-id 0)) + (db:open-main dbstruct) + (db:open-rundb dbstruct run-id) + ))) ;; db prunning would go here (mutex-unlock! *rundb-mutex*) - db)))) + dbdat)))) + +(define (db:dbdat-get-db dbdat) + (if (pair? dbdat) + (car dbdat) + dbdat)) + +(define (db:dbdat-get-path dbdat) + (if (pair? dbdat) + (cdr dbdat) + #f)) ;; mod-read: ;; 'mod modified data ;; 'read read data ;; @@ -69,22 +99,23 @@ ;; (db:with-db dbstruct run-id sqlite3:exec "select blah from blaz;") ;; r/w is a flag to indicate if the db is modified by this query #t = yes, #f = no ;; (define (db:with-db dbstruct run-id r/w proc . params) - (let* ((db (db:get-db dbstruct run-id)) - ) - ;; (proc2 (lambda () - (let ((res (apply proc db params))) - (db:done-with dbstruct run-id r/w) - res))) -;; (handle-exceptions -;; exn -;; (begin -;; (thread-sleep! 10) -;; (proc2)) -;; (proc2)))) + (let* ((dbdat (if (vector? dbstruct) + (db:get-db dbstruct run-id) + dbstruct)) ;; cheat, allow for passing in a dbdat + (db (db:dbdat-get-db dbdat))) + (db:delay-if-busy dbdat) + (handle-exceptions + exn + (begin + (debug:print 0 "ERROR: sqlite3 issue in db:with-db, dbstruct=" dbstruct ", run-id=" run-id ", proc=" proc ", params=" params " error: " ((condition-property-accessor 'exn 'message) exn)) + (print-call-chain (current-error-port))) + (let ((res (apply proc db params))) + (if (vector? dbstruct)(db:done-with dbstruct run-id r/w)) + res)))) ;;====================================================================== ;; K E E P F I L E D B I N dbstruct ;;====================================================================== @@ -97,172 +128,280 @@ ;; fdb)))) ;; ;; ;; Can also be used to save arbitrary strings ;; ;; ;; (define (db:save-path dbstruct path) -;; (let ((fdb (db:get-filedb dbstruct))) +;; (let ((fdb (db:get-filedb dbstruct)))b ;; (filedb:register-path fdb path))) ;; ;; ;; Use to get a path. To get an arbitrary string see next define ;; ;; ;; (define (db:get-path dbstruct id) ;; (let ((fdb (db:get-filedb dbstruct))) ;; (filedb:get-path db id))) + +;; NB// #f => zeroth db with name=main.db +;; +(define (db:dbfile-path run-id) + (let* (;; (toppath (dbr:dbstruct-get-path dbstruct)) + (link-tree-path (configf:lookup *configdat* "setup" "linktree")) + (fname (if (eq? run-id 0) "main.db" (conc run-id ".db"))) + (dbdir (conc link-tree-path "/.db/"))) + (handle-exceptions + exn + (begin + (debug:print 0 "ERROR: Couldn't create path to " dbdir) + (exit 1)) + (if (not (directory? dbdir))(create-directory dbdir #t))) + (conc dbdir fname))) + +(define (db:set-sync db) + (let ((syncprag (configf:lookup *configdat* "setup" "sychronous"))) + (sqlite3:execute db (conc "PRAGMA synchronous = " (or syncprag 1) ";")))) + +;; open an sql database inside a file lock +;; +;; returns: db existed-prior-to-opening +;; +(define (db:lock-create-open fname initproc) + (if (file-exists? fname) + (let ((db (sqlite3:open-database fname))) + (sqlite3:set-busy-handler! db (make-busy-timeout 136000)) + (db:set-sync db) ;; (sqlite3:execute db "PRAGMA synchronous = 0;") + db) + (let* ((parent-dir (pathname-directory fname)) + (dir-writable (file-write-access? parent-dir))) + (if dir-writable + (let ((exists (file-exists? fname)) + (lock (obtain-dot-lock fname 1 5 10)) + (db (sqlite3:open-database fname))) + (sqlite3:set-busy-handler! db (make-busy-timeout 136000)) + (db:set-sync db) ;; (sqlite3:execute db "PRAGMA synchronous = 0;") + (if (not exists)(initproc db)) + (release-dot-lock fname) + db) + (begin + (debug:print 0 "ERROR: no such db in non-writable dir " fname) + (sqlite3:open-database fname)))))) ;; This routine creates the db. It is only called if the db is not already opened ;; -(define (db:open-rundb dbstruct run-id) ;; (conc *toppath* "/megatest.db") (car *configinfo*))) +(define (db:open-rundb dbstruct run-id #!key (attemptnum 0)(do-not-open #f)) ;; (conc *toppath* "/megatest.db") (car *configinfo*))) (let* ((local (dbr:dbstruct-get-local dbstruct)) (rdb (if local (dbr:dbstruct-get-localdb dbstruct run-id) (dbr:dbstruct-get-inmem dbstruct)))) ;; (dbr:dbstruct-get-runrec dbstruct run-id 'inmem))) - (if rdb + (if (or rdb + do-not-open) rdb - (let* ((toppath (dbr:dbstruct-get-path dbstruct)) - (dbpath (conc toppath "/db/" run-id ".db")) + (let* ((dbpath (db:dbfile-path run-id)) ;; (conc toppath "/db/" run-id ".db")) (dbexists (file-exists? dbpath)) (inmem (if local #f (db:open-inmem-db))) (refdb (if local #f (db:open-inmem-db))) - (db (sqlite3:open-database dbpath)) + (db (db:lock-create-open dbpath ;; this is the database physically on disk + (lambda (db) + (handle-exceptions + exn + (begin + (release-dot-lock dbpath) + (if (> attemptnum 2) + (debug:print 0 "ERROR: tried twice, cannot create/initialize db for run-id " run-id ", at path " dbpath) + (db:open-rundb dbstruct run-id attemptnum (+ attemptnum 1)))) + (db:initialize-run-id-db db) + (sqlite3:execute + db + "INSERT OR IGNORE INTO tests (id,run_id,testname,event_time,item_path,state,status) VALUES (?,?,'bogustest',strftime('%s','now'),'nowherepath','DELETED','n/a');" + (* run-id 30000) ;; allow for up to 30k tests per run + run-id) + ;; do a dummy query to test that the table exists and the db is truly readable + (sqlite3:execute db "SELECT * FROM tests WHERE id=?;" (* run-id 30000)) + )))) ;; add strings db to rundb, not in use yet + ;; )) ;; (sqlite3:open-database dbpath)) + (olddb (if *megatest-db* + *megatest-db* + (let ((db (db:open-megatest-db))) + (set! *megatest-db* db) + db))) (write-access (file-write-access? dbpath)) - (handler (make-busy-timeout 136000))) + ;; (handler (make-busy-timeout 136000)) + ) (if (and dbexists (not write-access)) (set! *db-write-access* #f)) ;; only unset so other db's also can use this control - (if write-access - (begin - (if (not dbexists) - (begin - (db:initialize-run-id-db db) - ;; (sdb:initialize db) - )) ;; add strings db to rundb, not in use yet - (sqlite3:set-busy-handler! db handler) - (sqlite3:execute db "PRAGMA synchronous = 1;"))) ;; was 0 but 0 is a gamble - (dbr:dbstruct-set-rundb! dbstruct db) - (dbr:dbstruct-set-inuse! dbstruct #t) + (dbr:dbstruct-set-rundb! dbstruct (cons db dbpath)) + (dbr:dbstruct-set-inuse! dbstruct #t) + (dbr:dbstruct-set-olddb! dbstruct olddb) + ;; (dbr:dbstruct-set-run-id! dbstruct run-id) (if local (begin (dbr:dbstruct-set-localdb! dbstruct run-id db) ;; (dbr:dbstruct-set-inmem! dbstruct db) ;; direct access ... db) (begin - (dbr:dbstruct-set-inmem! dbstruct inmem) + (dbr:dbstruct-set-inmem! dbstruct inmem) + (sqlite3:execute db "DELETE FROM tests WHERE state='DELETED';") ;; they just slow us down in this context (db:sync-tables db:sync-tests-only db inmem) - (dbr:dbstruct-set-refdb! dbstruct refdb) + (db:delay-if-busy refdb) ;; dbpath: (db:dbdat-get-path refdb)) + (dbr:dbstruct-set-refdb! dbstruct refdb) + (db:sync-tables db:sync-tests-only db refdb) + ;; sync once more to deal with delays + (db:sync-tables db:sync-tests-only db inmem) (db:sync-tables db:sync-tests-only db refdb) inmem)))))) -;; This routine creates the db. It is only called if the db is not already opened +;; This routine creates the db. It is only called if the db is not already ls opened ;; (define (db:open-main dbstruct) ;; (conc *toppath* "/megatest.db") (car *configinfo*))) (let ((mdb (dbr:dbstruct-get-main dbstruct))) (if mdb mdb - (let* ((toppath (dbr:dbstruct-get-path dbstruct)) - (dbpath (let ((dbdir (conc *toppath* "/db"))) ;; use this opportunity to create our db dir - (if (not (directory-exists? dbdir)) - (create-direcory dbdir)) - (conc *toppath* "/db/main.db"))) + (let* ((dbpath (db:dbfile-path 0)) (dbexists (file-exists? dbpath)) - (db (sqlite3:open-database dbpath)) + (db (db:lock-create-open dbpath db:initialize-main-db)) + (olddb (db:open-megatest-db)) (write-access (file-write-access? dbpath)) - (handler (make-busy-timeout 136000))) + (dbdat (cons db dbpath))) (if (and dbexists (not write-access)) (set! *db-write-access* #f)) - (if write-access - (begin - (sqlite3:set-busy-handler! db handler) - (sqlite3:execute db "PRAGMA synchronous = 0;"))) - (if (not dbexists) - (db:initialize-main-db db)) - (dbr:dbstruct-set-main! dbstruct db) - db)))) + (dbr:dbstruct-set-main! dbstruct dbdat) + (dbr:dbstruct-set-olddb! dbstruct olddb) ;; olddb is already a (cons db path) + dbdat)))) ;; Make the dbstruct, setup up auxillary db's and call for main db at least once ;; (define (db:setup run-id #!key (local #f)) - (let ((dbstruct (make-dbr:dbstruct path: *toppath* local: local))) - (db:get-db dbstruct #f) ;; force one call to main + (let* ((dbdir (conc (configf:lookup *configdat* "setup" "linktree") "/.db")) + (dbstruct (make-dbr:dbstruct path: dbdir local: local))) dbstruct)) ;; Open the classic megatest.db file in toppath ;; (define (db:open-megatest-db) (let* ((dbpath (conc *toppath* "/megatest.db")) (dbexists (file-exists? dbpath)) - (db (sqlite3:open-database dbpath)) - (write-access (file-write-access? dbpath)) - (handler (make-busy-timeout 136000))) + (db (db:lock-create-open dbpath + (lambda (db) + (db:initialize-main-db db) + (db:initialize-run-id-db db)))) + (write-access (file-write-access? dbpath))) (if (and dbexists (not write-access)) (set! *db-write-access* #f)) - (if write-access - (begin - (sqlite3:set-busy-handler! db handler) - (sqlite3:execute db "PRAGMA synchronous = 0;"))) - (if (not dbexists) - (begin - (db:initialize-main-db db) - (db:initialize-run-id-db db))) - db)) - -;; sync all touched runs to disk -;; -(define (db:sync-touched dbstruct #!key (force-sync #f)) - (let ((tot-synced 0)) - (for-each - (lambda (runvec) - (let ((mtime (vector-ref runvec (dbr:dbstruct-field-name->num 'mtime))) - (stime (vector-ref runvec (dbr:dbstruct-field-name->num 'stime))) - (rundb (vector-ref runvec (dbr:dbstruct-field-name->num 'rundb))) - (inmem (vector-ref runvec (dbr:dbstruct-field-name->num 'inmem))) - (refdb (vector-ref runvec (dbr:dbstruct-field-name->num 'refdb)))) - (if (or (> mtime stime) force-sync) - (let ((num-synced (db:sync-tables db:sync-tests-only inmem refdb rundb))) - (set! tot-synced (+ tot-synced num-synced)) - (vector-set! runvec (dbr:dbstruct-field-name->num 'stime) (current-milliseconds)))))) - (hash-table-values (vector-ref dbstruct 1))) - tot-synced)) + (cons db dbpath))) ;; sync run to disk if touched ;; -(define (db:sync-touched dbstruct #!key (force-sync #f)) - (let ((mtime (dbr:dbstruct-get-mtime dbstruct)) - (stime (dbr:dbstruct-get-stime dbstruct)) - (rundb (dbr:dbstruct-get-rundb dbstruct)) - (inmem (dbr:dbstruct-get-inmem dbstruct)) - (refdb (dbr:dbstruct-get-refdb dbstruct))) - (if (or (not (number? mtime)) - (not (number? stime)) - (> mtime stime) - force-sync) - (let ((num-synced (db:sync-tables db:sync-tests-only inmem refdb rundb))) - (dbr:dbstruct-set-stime! dbstruct (current-milliseconds)) - num-synced) - 0))) +(define (db:sync-touched dbstruct run-id #!key (force-sync #f)) + (let ((mtime (dbr:dbstruct-get-mtime dbstruct)) + (stime (dbr:dbstruct-get-stime dbstruct)) + (rundb (dbr:dbstruct-get-rundb dbstruct)) + (inmem (dbr:dbstruct-get-inmem dbstruct)) + (maindb (dbr:dbstruct-get-main dbstruct)) + (refdb (dbr:dbstruct-get-refdb dbstruct)) + (olddb (dbr:dbstruct-get-olddb dbstruct)) + ;; (runid (dbr:dbstruct-get-run-id dbstruct)) + ) + (debug:print-info 4 "Syncing for run-id: " run-id) + ;; (mutex-lock! *http-mutex*) + (if (eq? run-id 0) + ;; runid equal to 0 is main.db + (if maindb + (if (or (not (number? mtime)) + (not (number? stime)) + (> mtime stime) + force-sync) + (begin + (db:delay-if-busy maindb) + (db:delay-if-busy olddb) + (let ((num-synced (db:sync-tables (db:sync-main-list maindb) maindb olddb))) + (dbr:dbstruct-set-stime! dbstruct (current-milliseconds)) + num-synced) + 0)) + (begin + ;; this can occur when using local access (i.e. not in a server) + ;; need a flag to turn it off. + ;; + (debug:print 3 "WARNING: call to sync main.db to megatest.db but main not initialized") + 0)) + ;; any other runid is a run + (if (or (not (number? mtime)) + (not (number? stime)) + (> mtime stime) + force-sync) + (begin + (db:delay-if-busy rundb) + (db:delay-if-busy olddb) + (dbr:dbstruct-set-stime! dbstruct (current-milliseconds)) + (let ((num-synced (db:sync-tables db:sync-tests-only inmem refdb rundb olddb))) + ;; (mutex-unlock! *http-mutex*) + num-synced) + (begin + ;; (mutex-unlock! *http-mutex*) + 0)))))) + +(define (db:close-main dbstruct) + (let ((maindb (dbr:dbstruct-get-main dbstruct))) + (if maindb + (begin + (sqlite3:finalize! (db:dbdat-get-db maindb)) + (dbr:dbstruct-set-main! dbstruct #f))))) + +(define (db:close-run-db dbstruct run-id) + (let ((rdb (db:open-rundb dbstruct run-id do-not-open: #t))) + (if (and rdb + (sqlite3:database? rdb)) + (begin + (sqlite3:finalize! rdb) + (dbr:dbstruct-set-localdb! dbstruct run-id #f) + (dbr:dbstruct-set-inmem! dbstruct #f))))) ;; close all opened run-id dbs (define (db:close-all dbstruct) ;; finalize main.db - (db:sync-touched dbstruct force-sync: #t) - (sqlite3:finalize! (db:get-db dbstruct #f)) - (let* ((local (dbr:dbstruct-get-local dbstruct)) - (rundb (dbr:dbstruct-get-rundb dbstruct))) - (if local - (for-each - (lambda (db) - (if (sqlite3:database? db) - (sqlite3:finalize! db))) - (hash-table-values (dbr:dbstruct-get-locdbs dbstruct))) - (if (sqlite3:database? rundb) - (sqlite3:finalize! rundb) - (debug:print 0 "WARNING: attempting to close databases but got " rundb " instead of a database"))))) + (db:sync-touched dbstruct 0 force-sync: #t) + ;;(common:db-block-further-queries) + ;; (mutex-lock! *db-sync-mutex*) ;; with this perhaps it isn't necessary to use the block-further-queries mechanism? + + (db:close-main dbstruct) + + (let ((locdbs (dbr:dbstruct-get-locdbs dbstruct))) + (if (hash-table? locdbs) + (for-each (lambda (run-id) + (db:close-run-db dbstruct run-id)) + (hash-table-keys locdbs)))) + + ;; (let* ((local (dbr:dbstruct-get-local dbstruct)) + ;; (rundb (db:dbdat-get-db (dbr:dbstruct-get-rundb dbstruct)))) + ;; (if local + ;; (for-each + ;; (lambda (dbdat) + ;; (let ((db (db:dbdat-get-db dbdat))) + ;; (if (sqlite3:database? db) + ;; (begin + ;; (sqlite3:interrupt! db) + ;; (sqlite3:finalize! db #t))))) + ;; ;; TODO: Come back to this and rework to delete from hashtable when finalized + ;; (hash-table-values (dbr:dbstruct-get-locdbs dbstruct)))) + ;; (thread-sleep! 3) + ;; (if (and rundb + ;; (sqlite3:database? rundb)) + ;; (handle-exceptions + ;; exn + ;; (begin + ;; (debug:print 0 "WARNING: database files may not have been closed correctly. Consider running -cleanup-db") + ;; (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn)) + ;; (debug:print 0 " db: " rundb) + ;; (print-call-chain (current-error-port)) + ;; #f) + ;; (sqlite3:interrupt! rundb) + ;; (sqlite3:finalize! rundb #t)))) + ;; ;; (mutex-unlock! *db-sync-mutex*) + ) (define (db:open-inmem-db) (let* ((db (sqlite3:open-database ":memory:")) - (handler (make-busy-timeout 3600))) + (handler (make-busy-timeout 3600))) + (sqlite3:set-busy-handler! db handler) (db:initialize-run-id-db db) - ;; (sdb:initialize db) ;; for future use - (sqlite3:set-busy-handler! db handler) - db)) + (cons db #f))) ;; just tests, test_steps and test_data tables (define db:sync-tests-only (list ;; (list "strs" @@ -338,107 +477,245 @@ '("avg_disk" #f) '("tags" #f) '("jobgroup" #f))))) ;; tbls is ( ("tablename" ( "field1" [#f|proc1] ) ( "field2" [#f|proc2] ) .... ) ) +;; db's are dbdat's +;; (define (db:sync-tables tbls fromdb todb . slave-dbs) - (cond - ((not fromdb) (debug:print 0 "ERROR: db:sync-tables called with fromdb missing") -1) - ((not todb) (debug:print 0 "ERROR: db:sync-tables called with todb missing") -2) - ((not (sqlite3:database? fromdb)) - (debug:print 0 "ERROR: db:sync-tables called with fromdb not a database " fromdb) -3) - ((not (sqlite3:database? todb)) - (debug:print 0 "ERROR: db:sync-tables called with todb not a database " todb) -4) - (else - (let ((stmts (make-hash-table)) ;; table-field => stmt - (all-stmts '()) ;; ( ( stmt1 value1 ) ( stml2 value2 )) - (numrecs (make-hash-table)) - (start-time (current-milliseconds)) - (tot-count 0)) - (for-each ;; table - (lambda (tabledat) - (let* ((tablename (car tabledat)) - (fields (cdr tabledat)) - (num-fields (length fields)) - (field->num (make-hash-table)) - (num->field (apply vector (map car fields))) - (full-sel (conc "SELECT " (string-intersperse (map car fields) ",") - " FROM " tablename ";")) - (full-ins (conc "INSERT OR REPLACE INTO " tablename " ( " (string-intersperse (map car fields) ",") " ) " - " VALUES ( " (string-intersperse (make-list num-fields "?") ",") " );")) - (fromdat '()) - (todat (make-hash-table)) - (count 0)) - - ;; set up the field->num table - (for-each - (lambda (field) - (hash-table-set! field->num field count) - (set! count (+ count 1))) - fields) - - ;; read the source table - (sqlite3:for-each-row - (lambda (a . b) - (set! fromdat (cons (apply vector a b) fromdat))) - fromdb - full-sel) - - (debug:print 0 "INFO: found " (length fromdat) " records to sync") - - ;; read the target table - (sqlite3:for-each-row - (lambda (a . b) - (hash-table-set! todat a (apply vector a b))) - todb - full-sel) - - ;; first pass implementation, just insert all changed rows - (for-each - (lambda (targdb) - (let ((stmth (sqlite3:prepare targdb full-ins))) - (sqlite3:with-transaction - targdb - (lambda () - (for-each ;; - (lambda (fromrow) - (let* ((a (vector-ref fromrow 0)) - (curr (hash-table-ref/default todat a #f)) - (same #t)) - (let loop ((i 0)) - (if (or (not curr) - (not (equal? (vector-ref fromrow i)(vector-ref curr i)))) - (set! same #f)) - (if (and same - (< i (- num-fields 1))) - (loop (+ i 1)))) - (if (not same) - (begin - (apply sqlite3:execute stmth (vector->list fromrow)) - (hash-table-set! numrecs tablename (+ 1 (hash-table-ref/default numrecs tablename 0))))))) - fromdat))) - (sqlite3:finalize! stmth))) - (append (list todb) slave-dbs)))) - tbls) - (let ((runtime (- (current-milliseconds) start-time))) - (debug:print 0 "INFO: db sync, total run time " runtime " ms") - (for-each - (lambda (dat) - (let ((tblname (car dat)) - (count (cdr dat))) - (set! tot-count (+ tot-count count)) - (if (> count 0) - (debug:print 0 (format #f " ~10a ~5a" tblname count))))) - (sort (hash-table->alist numrecs)(lambda (a b)(> (cdr a)(cdr b)))))) - tot-count)))) + (mutex-lock! *db-sync-mutex*) + (handle-exceptions + exn + (begin + (debug:print 0 "EXCEPTION: database probably overloaded or unreadable in db:sync-tables.") + (print-call-chain (current-error-port)) + (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn)) + (print "exn=" (condition->list exn)) + (debug:print 0 " status: " ((condition-property-accessor 'sqlite3 'status) exn)) + (debug:print 0 " src db: " (db:dbdat-get-path fromdb)) + (for-each (lambda (dbdat) + (debug:print 0 " dbpath: " (db:dbdat-get-path dbdat))) + (cons todb slave-dbs)) + (if *server-run* ;; we are inside a server + (set! *time-to-exit* #t) ;; let watch dog know that it is time to die. + (exit 1))) + (cond + ((not fromdb) (debug:print 3 "WARNING: db:sync-tables called with fromdb missing") -1) + ((not todb) (debug:print 3 "WARNING: db:sync-tables called with todb missing") -2) + ((not (sqlite3:database? (db:dbdat-get-db fromdb))) + (debug:print 0 "ERROR: db:sync-tables called with fromdb not a database " fromdb) -3) + ((not (sqlite3:database? (db:dbdat-get-db todb))) + (debug:print 0 "ERROR: db:sync-tables called with todb not a database " todb) -4) + (else + (let ((stmts (make-hash-table)) ;; table-field => stmt + (all-stmts '()) ;; ( ( stmt1 value1 ) ( stml2 value2 )) + (numrecs (make-hash-table)) + (start-time (current-milliseconds)) + (tot-count 0)) + (for-each ;; table + (lambda (tabledat) + (let* ((tablename (car tabledat)) + (fields (cdr tabledat)) + (num-fields (length fields)) + (field->num (make-hash-table)) + (num->field (apply vector (map car fields))) + (full-sel (conc "SELECT " (string-intersperse (map car fields) ",") + " FROM " tablename ";")) + (full-ins (conc "INSERT OR REPLACE INTO " tablename " ( " (string-intersperse (map car fields) ",") " ) " + " VALUES ( " (string-intersperse (make-list num-fields "?") ",") " );")) + (fromdat '()) + (fromdats '()) + (totrecords 0) + (batch-len (string->number (or (configf:lookup *configdat* "sync" "batchsize") "10"))) + (todat (make-hash-table)) + (count 0)) + + ;; set up the field->num table + (for-each + (lambda (field) + (hash-table-set! field->num field count) + (set! count (+ count 1))) + fields) + + ;; read the source table + (sqlite3:for-each-row + (lambda (a . b) + (set! fromdat (cons (apply vector a b) fromdat)) + (if (> (length fromdat) batch-len) + (begin + (set! fromdats (cons fromdat fromdats)) + (set! fromdat '()) + (set! totrecords (+ totrecords 1))))) + (db:dbdat-get-db fromdb) + full-sel) + + (debug:print-info 2 "found " totrecords " records to sync") + + ;; read the target table + (sqlite3:for-each-row + (lambda (a . b) + (hash-table-set! todat a (apply vector a b))) + (db:dbdat-get-db todb) + full-sel) + + ;; first pass implementation, just insert all changed rows + (for-each + (lambda (targdb) + (let* ((db (db:dbdat-get-db targdb)) + (stmth (sqlite3:prepare db full-ins))) + ;; (db:delay-if-busy targdb) ;; NO WAITING + (for-each + (lambda (fromdat-lst) + (sqlite3:with-transaction + db + (lambda () + (for-each ;; + (lambda (fromrow) + (let* ((a (vector-ref fromrow 0)) + (curr (hash-table-ref/default todat a #f)) + (same #t)) + (let loop ((i 0)) + (if (or (not curr) + (not (equal? (vector-ref fromrow i)(vector-ref curr i)))) + (set! same #f)) + (if (and same + (< i (- num-fields 1))) + (loop (+ i 1)))) + (if (not same) + (begin + (apply sqlite3:execute stmth (vector->list fromrow)) + (hash-table-set! numrecs tablename (+ 1 (hash-table-ref/default numrecs tablename 0))))))) + fromdat-lst)) + )) + fromdats) + (sqlite3:finalize! stmth))) + (append (list todb) slave-dbs)))) + tbls) + (let* ((runtime (- (current-milliseconds) start-time)) + (should-print (common:low-noise-print 30 "db sync" (> runtime 500)))) ;; low and high sync times treated as separate. + (if should-print (debug:print 0 "INFO: db sync, total run time " runtime " ms")) + (for-each + (lambda (dat) + (let ((tblname (car dat)) + (count (cdr dat))) + (set! tot-count (+ tot-count count)) + (if (> count 0) + (if should-print (debug:print 0 (format #f " ~10a ~5a" tblname count)))))) + (sort (hash-table->alist numrecs)(lambda (a b)(> (cdr a)(cdr b)))))) + tot-count))) + (mutex-unlock! *db-sync-mutex*))) + +;; options: +;; +;; 'killservers - kills all servers +;; 'dejunk - removes junk records +;; 'adj-testids - move test-ids into correct ranges +;; 'old2new - sync megatest.db records to .db/{main,1,2 ...}.db +;; 'new2old - sync .db/{main,1,2,3 ...}.db to megatest.db +;; 'closeall - close all opened dbs +;; +;; run-ids: '(1 2 3 ...) or #f (for all) +;; +(define (db:multi-db-sync run-ids . options) + (let* ((toppath (launch:setup-for-run)) + (dbstruct (if toppath (make-dbr:dbstruct path: toppath) #f)) + (mtdb (if toppath (db:open-megatest-db))) + (allow-cleanup (if run-ids #f #t)) + (run-ids (if run-ids + run-ids + (if toppath (begin + (db:delay-if-busy mtdb) + (db:get-all-run-ids mtdb))))) + (tdbdat (tasks:open-db)) + (servers (tasks:get-all-servers (db:delay-if-busy tdbdat)))) + + ;; kill servers + (if (member 'killservers options) + (for-each + (lambda (server) + (tasks:server-delete-record (db:delay-if-busy tdbdat) (vector-ref server 0) "dbmigration") + (tasks:kill-server (vector-ref server 2)(vector-ref server 1))) + servers)) + + ;; clear out junk records + ;; + (if (member 'dejunk options) + (begin + (db:delay-if-busy mtdb) + (db:clean-up mtdb))) + + ;; adjust test-ids to fit into proper range + ;; + (if (member 'adj-testids options) + (begin + (db:delay-if-busy mtdb) + (db:prep-megatest.db-for-migration mtdb))) + + ;; sync runs, test_meta etc. + ;; + (if (member 'old2new options) + (begin + (db:sync-tables (db:sync-main-list mtdb) mtdb (db:get-db dbstruct #f)) + (for-each + (lambda (run-id) + (db:delay-if-busy mtdb) + (let ((testrecs (db:get-all-tests-info-by-run-id mtdb run-id)) + (dbstruct (if toppath (make-dbr:dbstruct path: toppath local: #t) #f))) + (debug:print 0 "INFO: Propagating " (length testrecs) " records for run-id=" run-id " to run specific db") + (db:replace-test-records dbstruct run-id testrecs) + (sqlite3:finalize! (db:dbdat-get-db (dbr:dbstruct-get-rundb dbstruct))))) + run-ids))) + + ;; now ensure all newdb data are synced to megatest.db + ;; do not use the run-ids list passed in to the function + ;; + (if (member 'new2old options) + (let* ((maindb (make-dbr:dbstruct path: toppath local: #t)) + (src-run-ids (db:get-all-run-ids (db:dbdat-get-db (db:get-db maindb 0)))) + (all-run-ids (sort (delete-duplicates (cons 0 src-run-ids)) <)) + (count 1) + (total (length all-run-ids)) + (dead-runs '())) + (for-each + (lambda (run-id) + (debug:print 0 "Processing run " (if (eq? run-id 0) " main.db " run-id) ", " count " of " total) + (set! count (+ count 1)) + (let* ((fromdb (if toppath (make-dbr:dbstruct path: toppath local: #t) #f)) + (frundb (db:dbdat-get-db (db:get-db fromdb run-id)))) + ;; (db:delay-if-busy frundb) + ;; (db:delay-if-busy mtdb) + ;; (db:clean-up frundb) + (if (eq? run-id 0) + (begin + (db:sync-tables (db:sync-main-list dbstruct) (db:get-db fromdb #f) mtdb) + (set! dead-runs (db:clean-up-maindb (db:get-db fromdb #f)))) + (begin + ;; NB// must sync first to ensure deleted tests get marked as such in megatest.db + (db:sync-tables db:sync-tests-only (db:get-db fromdb run-id) mtdb) + (db:clean-up-rundb (db:get-db fromdb run-id)) + )))) + all-run-ids) + ;; removed deleted runs + (let ((dbdir (tasks:get-task-db-path))) + (for-each (lambda (run-id) + (let ((fullname (conc dbdir "/" run-id ".db"))) + (if (file-exists? fullname) + (begin + (debug:print 0 "Removing database file for deleted run " fullname) + (delete-file fullname))))) + dead-runs)))) + ;; (db:close-all dbstruct) + ;; (sqlite3:finalize! mdb) + )) ;; keeping it around for debugging purposes only (define (open-run-close-no-exception-handling proc idb . params) (debug:print-info 11 "open-run-close-no-exception-handling START given a db=" (if idb "yes " "no ") ", params=" params) (if (or *db-write-access* (not (member proc *db:all-write-procs*))) (let* ((db (cond + ((pair? idb) (db:dbdat-get-db idb)) ((sqlite3:database? idb) idb) ((not idb) (debug:print 0 "ERROR: cannot open-run-close with #f anymore")) ((procedure? idb) (idb)) (else (debug:print 0 "ERROR: cannot open-run-close with #f anymore")))) (res #f)) @@ -449,58 +726,68 @@ #f)) (define (open-run-close-exception-handling proc idb . params) (handle-exceptions exn - (begin - (debug:print 0 "EXCEPTION: database probably overloaded or unreadable.") - (debug:print 0 " " ((condition-property-accessor 'exn 'message) exn)) - (print-call-chain) - (thread-sleep! (random 120)) - (debug:print-info 0 "trying db call one more time....") - (apply open-run-close-no-exception-handling proc idb params)) + (let ((sleep-time (random 30)) + (err-status ((condition-property-accessor 'sqlite3 'status #f) exn))) + (case err-status + ((busy) + (thread-sleep! sleep-time)) + (else + (debug:print 0 "EXCEPTION: database probably overloaded or unreadable.") + (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn)) + (print "exn=" (condition->list exn)) + (debug:print 0 " status: " ((condition-property-accessor 'sqlite3 'status) exn)) + (print-call-chain (current-error-port)) + (thread-sleep! sleep-time) + (debug:print-info 0 "trying db call one more time....this may never recover, if necessary kill process " (current-process-id) " on host " (get-host-name) " to clean up"))) + (apply open-run-close-exception-handling proc idb params)) (apply open-run-close-no-exception-handling proc idb params))) ;; (define open-run-close -(define open-run-close ;; (if (debug:debug-mode 2) +(define open-run-close open-run-close-exception-handling) ;; open-run-close-no-exception-handling - open-run-close-exception-handling) +;; open-run-close-exception-handling) ;;) -(define (db:initialize-main-db db) +(define (db:initialize-main-db dbdat) (let* ((configdat (car *configinfo*)) ;; tut tut, global warning... (keys (keys:config-get-fields configdat)) (havekeys (> (length keys) 0)) (keystr (keys->keystr keys)) - (fieldstr (keys->key/field keys))) + (fieldstr (keys->key/field keys)) + (db (db:dbdat-get-db dbdat))) (for-each (lambda (key) (let ((keyn key)) (if (member (string-downcase keyn) (list "runname" "state" "status" "owner" "event_time" "comment" "fail_count" "pass_count")) (begin - (print "ERROR: your key cannot be named " keyn " as this conflicts with the same named field in the runs table") - (system (conc "rm -f " dbpath)) + (print "ERROR: your key cannot be named " keyn " as this conflicts with the same named field in the runs table, you must remove your megatest.db and /.db before trying again.") (exit 1))))) keys) - (sqlite3:execute db "CREATE TABLE IF NOT EXISTS keys (id INTEGER PRIMARY KEY, fieldname TEXT, fieldtype TEXT, CONSTRAINT keyconstraint UNIQUE (fieldname));") - (for-each (lambda (key) - (sqlite3:execute db "INSERT INTO keys (fieldname,fieldtype) VALUES (?,?);" key "TEXT")) - keys) - (sqlite3:execute db (conc - "CREATE TABLE IF NOT EXISTS runs (id INTEGER PRIMARY KEY, \n " - fieldstr (if havekeys "," "") " + (sqlite3:with-transaction + db + (lambda () + (sqlite3:execute db "CREATE TABLE IF NOT EXISTS keys (id INTEGER PRIMARY KEY, fieldname TEXT, fieldtype TEXT, CONSTRAINT keyconstraint UNIQUE (fieldname));") + (for-each (lambda (key) + (sqlite3:execute db "INSERT OR REPLACE INTO keys (fieldname,fieldtype) VALUES (?,?);" key "TEXT")) + keys) + (sqlite3:execute db (conc + "CREATE TABLE IF NOT EXISTS runs (id INTEGER PRIMARY KEY, \n " + fieldstr (if havekeys "," "") " runname TEXT DEFAULT 'norun', state TEXT DEFAULT '', status TEXT DEFAULT '', owner TEXT DEFAULT '', event_time TIMESTAMP DEFAULT (strftime('%s','now')), comment TEXT DEFAULT '', fail_count INTEGER DEFAULT 0, pass_count INTEGER DEFAULT 0, CONSTRAINT runsconstraint UNIQUE (runname" (if havekeys "," "") keystr "));")) - (sqlite3:execute db "CREATE TABLE IF NOT EXISTS test_meta ( + (sqlite3:execute db "CREATE TABLE IF NOT EXISTS test_meta ( id INTEGER PRIMARY KEY, testname TEXT DEFAULT '', author TEXT DEFAULT '', owner TEXT DEFAULT '', description TEXT DEFAULT '', @@ -509,27 +796,30 @@ avg_runtime REAL, avg_disk REAL, tags TEXT DEFAULT '', jobgroup TEXT DEFAULT 'default', CONSTRAINT test_meta_constraint UNIQUE (testname));") - (sqlite3:execute db (conc "CREATE INDEX runs_index ON runs (runname" (if havekeys "," "") keystr ");")) - ;; (sqlite3:execute db "CREATE VIEW runs_tests AS SELECT * FROM runs INNER JOIN tests ON runs.id=tests.run_id;") - (sqlite3:execute db "CREATE TABLE IF NOT EXISTS extradat (id INTEGER PRIMARY KEY, run_id INTEGER, key TEXT, val TEXT);") - (sqlite3:execute db "CREATE TABLE IF NOT EXISTS metadat (id INTEGER PRIMARY KEY, var TEXT, val TEXT, + (sqlite3:execute db (conc "CREATE INDEX IF NOT EXISTS runs_index ON runs (runname" (if havekeys "," "") keystr ");")) + ;; (sqlite3:execute db "CREATE VIEW runs_tests AS SELECT * FROM runs INNER JOIN tests ON runs.id=tests.run_id;") + (sqlite3:execute db "CREATE TABLE IF NOT EXISTS extradat (id INTEGER PRIMARY KEY, run_id INTEGER, key TEXT, val TEXT);") + (sqlite3:execute db "CREATE TABLE IF NOT EXISTS metadat (id INTEGER PRIMARY KEY, var TEXT, val TEXT, CONSTRAINT metadat_constraint UNIQUE (var));") - (sqlite3:execute db "CREATE TABLE IF NOT EXISTS access_log (id INTEGER PRIMARY KEY, user TEXT, accessed TIMESTAMP, args TEXT);") - ;; Must do this *after* running patch db !! No more. - ;; cannot use db:set-var since it will deadlock, hardwire the code here - (sqlite3:execute db "INSERT OR REPLACE INTO metadat (var,val) VALUES (?,?);" "MEGATEST_VERSION" megatest-version) - (debug:print-info 11 "db:initialize END"))) + (sqlite3:execute db "CREATE TABLE IF NOT EXISTS access_log (id INTEGER PRIMARY KEY, user TEXT, accessed TIMESTAMP, args TEXT);") + ;; Must do this *after* running patch db !! No more. + ;; cannot use db:set-var since it will deadlock, hardwire the code here + (sqlite3:execute db "INSERT OR REPLACE INTO metadat (var,val) VALUES (?,?);" "MEGATEST_VERSION" megatest-version) + (debug:print-info 11 "db:initialize END"))))) ;;====================================================================== ;; R U N S P E C I F I C D B ;;====================================================================== (define (db:initialize-run-id-db db) - (sqlite3:execute db "CREATE TABLE IF NOT EXISTS tests + (sqlite3:with-transaction + db + (lambda () + (sqlite3:execute db "CREATE TABLE IF NOT EXISTS tests (id INTEGER PRIMARY KEY, run_id INTEGER DEFAULT -1, testname TEXT DEFAULT 'noname', host TEXT DEFAULT 'n/a', cpuload REAL DEFAULT -1, @@ -548,31 +838,31 @@ event_time TIMESTAMP DEFAULT (strftime('%s','now')), fail_count INTEGER DEFAULT 0, pass_count INTEGER DEFAULT 0, archived INTEGER DEFAULT 0, -- 0=no, 1=in progress, 2=yes CONSTRAINT testsconstraint UNIQUE (run_id, testname, item_path));") - (sqlite3:execute db "CREATE INDEX tests_index ON tests (run_id, testname, item_path);") - (sqlite3:execute db "CREATE TABLE IF NOT EXISTS test_steps + (sqlite3:execute db "CREATE INDEX IF NOT EXISTS tests_index ON tests (run_id, testname, item_path);") + (sqlite3:execute db "CREATE TABLE IF NOT EXISTS test_steps (id INTEGER PRIMARY KEY, test_id INTEGER, stepname TEXT, state TEXT DEFAULT 'NOT_STARTED', status TEXT DEFAULT 'n/a', event_time TIMESTAMP, comment TEXT DEFAULT '', logfile TEXT DEFAULT '', CONSTRAINT test_steps_constraint UNIQUE (test_id,stepname,state));") -;; (sqlite3:execute db "CREATE TABLE IF NOT EXISTS test_data -;; (id INTEGER PRIMARY KEY, -;; reviewed TIMESTAMP DEFAULT (strftime('%s','now')), -;; iterated TEXT DEFAULT '', -;; avg_runtime REAL DEFAULT -1, -;; avg_disk REAL DEFAULT -1, -;; tags TEXT DEFAULT '', -;; jobgroup TEXT DEFAULT 'default', -;; CONSTRAINT test_meta_constraint UNIQUE (testname));") - (sqlite3:execute db "CREATE TABLE IF NOT EXISTS test_data (id INTEGER PRIMARY KEY, + ;; (sqlite3:execute db "CREATE TABLE IF NOT EXISTS test_data + ;; (id INTEGER PRIMARY KEY, + ;; reviewed TIMESTAMP DEFAULT (strftime('%s','now')), + ;; iterated TEXT DEFAULT '', + ;; avg_runtime REAL DEFAULT -1, + ;; avg_disk REAL DEFAULT -1, + ;; tags TEXT DEFAULT '', + ;; jobgroup TEXT DEFAULT 'default', + ;; CONSTRAINT test_meta_constraint UNIQUE (testname));") + (sqlite3:execute db "CREATE TABLE IF NOT EXISTS test_data (id INTEGER PRIMARY KEY, test_id INTEGER, category TEXT DEFAULT '', variable TEXT, value REAL, expected REAL, @@ -580,18 +870,20 @@ units TEXT, comment TEXT DEFAULT '', status TEXT DEFAULT 'n/a', type TEXT DEFAULT '', CONSTRAINT test_data_constraint UNIQUE (test_id,category,variable));") - (sqlite3:execute db "CREATE TABLE IF NOT EXISTS test_rundat ( + ;; Why use FULL here? This data is not that critical + ;; (sqlite3:execute db "PRAGMA synchronous = FULL;") + (sqlite3:execute db "CREATE TABLE IF NOT EXISTS test_rundat ( id INTEGER PRIMARY KEY, test_id INTEGER, update_time TIMESTAMP, cpuload INTEGER DEFAULT -1, diskfree INTEGER DEFAULT -1, diskusage INTGER DEFAULT -1, - run_duration INTEGER DEFAULT 0);") + run_duration INTEGER DEFAULT 0);"))) db) ;;====================================================================== ;; L O G G I N G D B ;;====================================================================== @@ -605,11 +897,12 @@ 136000)))) ;; 136000))) (sqlite3:set-busy-handler! db handler) (if (not dbexists) (begin (sqlite3:execute db "CREATE TABLE IF NOT EXISTS log (id INTEGER PRIMARY KEY,event_time TIMESTAMP DEFAULT (strftime('%s','now')),logline TEXT,pwd TEXT,cmdline TEXT,pid INTEGER);") - (sqlite3:execute db (conc "PRAGMA synchronous = 0;")))) + (db:set-sync db) ;; (sqlite3:execute db (conc "PRAGMA synchronous = 0;")) + )) db)) (define (db:log-local-event . loglst) (let ((logline (apply conc loglst))) (db:log-event logline))) @@ -635,59 +928,89 @@ ;; select end_time-now from ;; (select testname,item_path,event_time+run_duration as ;; end_time,strftime('%s','now') as now from tests where state in ;; ('RUNNING','REMOTEHOSTSTART','LAUNCED')); - -(define (db:find-and-mark-incomplete db #!key (ovr-deadtime #f)) - (let* ((incompleted '()) +(define (db:find-and-mark-incomplete dbstruct run-id ovr-deadtime) + (let* ((dbdat (db:get-db dbstruct run-id)) + (db (db:dbdat-get-db dbdat)) + (incompleted '()) + (oldlaunched '()) + (toplevels '()) (deadtime-str (configf:lookup *configdat* "setup" "deadtime")) (deadtime (if (and deadtime-str (string->number deadtime-str)) (string->number deadtime-str) - 7200)) ;; two hours - (run-ids (db:get-all-run-ids db))) ;; iterate over runs to divy up the calls + 7200))) ;; two hours (if (number? ovr-deadtime)(set! deadtime ovr-deadtime)) - (for-each - (lambda (run-id) - - ;; in RUNNING or REMOTEHOSTSTART for more than 10 minutes - ;; - ;; THIS CANNOT WORK. The run_duration is not updated in the central db due to performance concerns. - ;; The testdat.db file must be consulted. - ;; - ;; HOWEVER: this code in run:test seems to work fine - ;; (> (- (current-seconds)(+ (db:test-get-event_time testdat) - ;; (db:test-get-run_duration testdat))) - ;; 600) - (sqlite3:for-each-row - (lambda (test-id) - (set! incompleted (cons test-id incompleted))) - db - "SELECT id FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time - run_duration) > ? AND state IN ('RUNNING','REMOTEHOSTSTART');" - run-id deadtime) - - ;; in LAUNCHED for more than one day. Could be long due to job queues TODO/BUG: Need override for this in config - ;; - (sqlite3:for-each-row - (lambda (test-id) - (set! incompleted (cons test-id incompleted))) - db - "SELECT id FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time - run_duration) > ? AND state IN ('LAUNCHED');" - run-id (* 60 60 24))) - run-ids) - + + ;; in RUNNING or REMOTEHOSTSTART for more than 10 minutes + ;; + ;; HOWEVER: this code in run:test seems to work fine + ;; (> (- (current-seconds)(+ (db:test-get-event_time testdat) + ;; (db:test-get-run_duration testdat))) + ;; 600) + (db:delay-if-busy dbdat) + (sqlite3:for-each-row + (lambda (test-id run-dir uname testname item-path) + (if (and (equal? uname "n/a") + (equal? item-path "")) ;; this is a toplevel test + ;; what to do with toplevel? call rollup? + (begin + (set! toplevels (cons (list test-id run-dir uname testname item-path run-id) toplevels)) + (debug:print-info 0 "Found old toplevel test in RUNNING state, test-id=" test-id)) + (set! incompleted (cons (list test-id run-dir uname testname item-path run-id) incompleted)))) + db + "SELECT id,rundir,uname,testname,item_path FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > (run_duration + ?) AND state IN ('RUNNING','REMOTEHOSTSTART');" + run-id deadtime) + + ;; in LAUNCHED for more than one day. Could be long due to job queues TODO/BUG: Need override for this in config + ;; + (db:delay-if-busy dbdat) + (sqlite3:for-each-row + (lambda (test-id run-dir uname testname item-path) + (if (and (equal? uname "n/a") + (equal? item-path "")) ;; this is a toplevel test + ;; what to do with toplevel? call rollup? + (set! toplevels (cons (list test-id run-dir uname testname item-path run-id) toplevels)) + (set! oldlaunched (cons (list test-id run-dir uname testname item-path run-id) oldlaunched)))) + db + "SELECT id,rundir,uname,testname,item_path FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > 86400 AND state IN ('LAUNCHED');" + run-id) + + (debug:print-info 18 "Found " (length oldlaunched) " old LAUNCHED items, " (length toplevels) " old LAUNCHED toplevel tests and " (length incompleted) " tests marked RUNNING but apparently dead.") + ;; These are defunct tests, do not do all the overhead of set-state-status. Force them to INCOMPLETE. ;; - (if (> (length incompleted) 0) - (begin - (debug:print 0 "WARNING: Marking test(s); " (string-intersperse (map conc incompleted) ", ") " as INCOMPLETE") - (sqlite3:execute - db - (conc "UPDATE tests SET state='INCOMPLETE' WHERE id IN (" - (string-intersperse (map conc incompleted) ",") - ");")))))) + (db:delay-if-busy dbdat) + (let* (;; (min-incompleted (filter (lambda (x) + ;; (let* ((testpath (cadr x)) + ;; (tdatpath (conc testpath "/testdat.db")) + ;; (dbexists (file-exists? tdatpath))) + ;; (or (not dbexists) ;; if no file then something wrong - mark as incomplete + ;; (> (- (current-seconds)(file-modification-time tdatpath)) 600)))) ;; no change in 10 minutes to testdat.db - she's dead Jim + ;; incompleted)) + (min-incompleted-ids (map car incompleted)) ;; do 'em all + (all-ids (append min-incompleted-ids (map car oldlaunched)))) + (if (> (length all-ids) 0) + (begin + (debug:print 0 "WARNING: Marking test(s); " (string-intersperse (map conc all-ids) ", ") " as INCOMPLETE") + (sqlite3:execute + db + (conc "UPDATE tests SET state='INCOMPLETE' WHERE id IN (" + (string-intersperse (map conc all-ids) ",") + ");"))))) + + ;; Now do rollups for the toplevel tests + ;; + (db:delay-if-busy dbdat) + (for-each + (lambda (toptest) + (let ((test-name (list-ref toptest 3))) +;; (run-id (list-ref toptest 5))) + (db:general-call db 'top-test-set-per-pf-counts (list test-name run-id test-name test-name test-name)))) ;; (list run-id test-name)))) + toplevels))) ;; Clean out old junk and vacuum the database ;; ;; Ultimately do something like this: ;; @@ -696,15 +1019,13 @@ ;; b. If test dir gone, delete the test record ;; 2. Look at run records ;; a. If have tests that are not deleted, set state='unknown' ;; b. .... ;; -(define (db:clean-up dbstruct) - - (debug:print 0 "ERROR: db clean up not ported yet") - - (let* ((db (db:get-db dbstruct #f)) +(define (db:clean-up dbdat) + ;; (debug:print 0 "WARNING: db clean up not fully ported to v1.60, cleanup action will be on megatest.db") + (let* ((db (db:dbdat-get-db dbdat)) (count-stmt (sqlite3:prepare db "SELECT (SELECT count(id) FROM tests)+(SELECT count(id) FROM runs);")) (statements (map (lambda (stmt) (sqlite3:prepare db stmt)) (list @@ -717,10 +1038,99 @@ ;; delete all runs that are state='deleted' "DELETE FROM runs WHERE state='deleted';" ;; delete empty runs "DELETE FROM runs WHERE id NOT IN (SELECT DISTINCT r.id FROM runs AS r INNER JOIN tests AS t ON t.run_id=r.id);" )))) + (db:delay-if-busy dbdat) + (sqlite3:with-transaction + db + (lambda () + (sqlite3:for-each-row (lambda (tot) + (debug:print-info 0 "Records count before clean: " tot)) + count-stmt) + (map sqlite3:execute statements) + (sqlite3:for-each-row (lambda (tot) + (debug:print-info 0 "Records count after clean: " tot)) + count-stmt))) + (map sqlite3:finalize! statements) + (sqlite3:finalize! count-stmt) + ;; (db:find-and-mark-incomplete db) + (db:delay-if-busy dbdat) + (sqlite3:execute db "VACUUM;"))) + +;; Clean out old junk and vacuum the database +;; +;; Ultimately do something like this: +;; +;; 1. Look at test records either deleted or part of deleted run: +;; a. If test dir exists, set the the test to state='UNKNOWN', Set the run to 'unknown' +;; b. If test dir gone, delete the test record +;; 2. Look at run records +;; a. If have tests that are not deleted, set state='unknown' +;; b. .... +;; +(define (db:clean-up-rundb dbdat) + ;; (debug:print 0 "WARNING: db clean up not fully ported to v1.60, cleanup action will be on megatest.db") + (let* ((db (db:dbdat-get-db dbdat)) + (count-stmt (sqlite3:prepare db "SELECT (SELECT count(id) FROM tests);")) + (statements + (map (lambda (stmt) + (sqlite3:prepare db stmt)) + (list + ;; delete all tests that belong to runs that are 'deleted' + ;; (conc "DELETE FROM tests WHERE run_id NOT IN (" (string-intersperse (map conc valid-runs) ",") ");") + ;; delete all tests that are 'DELETED' + "DELETE FROM tests WHERE state='DELETED';" + )))) + (db:delay-if-busy dbdat) + (sqlite3:with-transaction + db + (lambda () + (sqlite3:for-each-row (lambda (tot) + (debug:print-info 0 "Records count before clean: " tot)) + count-stmt) + (map sqlite3:execute statements) + (sqlite3:for-each-row (lambda (tot) + (debug:print-info 0 "Records count after clean: " tot)) + count-stmt))) + (map sqlite3:finalize! statements) + (sqlite3:finalize! count-stmt) + ;; (db:find-and-mark-incomplete db) + (db:delay-if-busy dbdat) + (sqlite3:execute db "VACUUM;"))) + +;; Clean out old junk and vacuum the database +;; +;; Ultimately do something like this: +;; +;; 1. Look at test records either deleted or part of deleted run: +;; a. If test dir exists, set the the test to state='UNKNOWN', Set the run to 'unknown' +;; b. If test dir gone, delete the test record +;; 2. Look at run records +;; a. If have tests that are not deleted, set state='unknown' +;; b. .... +;; +(define (db:clean-up-maindb dbdat) + ;; (debug:print 0 "WARNING: db clean up not fully ported to v1.60, cleanup action will be on megatest.db") + (let* ((db (db:dbdat-get-db dbdat)) + (count-stmt (sqlite3:prepare db "SELECT (SELECT count(id) FROM runs);")) + (statements + (map (lambda (stmt) + (sqlite3:prepare db stmt)) + (list + ;; delete all tests that belong to runs that are 'deleted' + ;; (conc "DELETE FROM tests WHERE run_id NOT IN (" (string-intersperse (map conc valid-runs) ",") ");") + ;; delete all tests that are 'DELETED' + "DELETE FROM runs WHERE state='deleted';" + ))) + (dead-runs '())) + (sqlite3:for-each-row + (lambda (run-id) + (set! dead-runs (cons run-id dead-runs))) + db + "SELECT id FROM runs WHERE state='deleted';") + (db:delay-if-busy dbdat) (sqlite3:with-transaction db (lambda () (sqlite3:for-each-row (lambda (tot) (debug:print-info 0 "Records count before clean: " tot)) @@ -729,12 +1139,14 @@ (sqlite3:for-each-row (lambda (tot) (debug:print-info 0 "Records count after clean: " tot)) count-stmt))) (map sqlite3:finalize! statements) (sqlite3:finalize! count-stmt) - (db:find-and-mark-incomplete db) - (sqlite3:execute db "VACUUM;"))) + ;; (db:find-and-mark-incomplete db) + (db:delay-if-busy dbdat) + (sqlite3:execute db "VACUUM;") + dead-runs)) ;;====================================================================== ;; M E T A G E T A N D S E T V A R S ;;====================================================================== @@ -745,15 +1157,18 @@ ;; (define (db:get-var dbstruct var) (let* ((start-ms (current-milliseconds)) (throttle (let ((t (config-lookup *configdat* "setup" "throttle"))) (if t (string->number t) t))) - (res #f)) + (res #f) + (dbdat (db:get-db dbstruct #f)) + (db (db:dbdat-get-db dbdat))) + (db:delay-if-busy dbdat) (sqlite3:for-each-row (lambda (val) (set! res val)) - (db:get-db dbstruct #f) + db "SELECT val FROM metadat WHERE var=?;" var) ;; convert to number if can (if (string? res) (let ((valnum (string->number res))) (if valnum (set! res valnum)))) @@ -766,14 +1181,20 @@ (debug:print-info 4 "launch throttle factor=" *global-delta*) (set! *last-global-delta-printed* *global-delta*))) res)) (define (db:set-var dbstruct var val) - (sqlite3:execute (db:get-db dbstruct #f) "INSERT OR REPLACE INTO metadat (var,val) VALUES (?,?);" var val)) + (let ((dbdat (db:get-db dbstruct #f)) + (db (db:dbdat-get-db dbdat))) + (db:delay-if-busy dbdat) + (sqlite3:execute db "INSERT OR REPLACE INTO metadat (var,val) VALUES (?,?);" var val))) (define (db:del-var dbstruct var) - (sqlite3:execute (db:get-db dbstruct #f) "DELETE FROM metadat WHERE var=?;" var)) + ;; (db:delay-if-busy) + (db:with-db dbstruct #f #t + (lambda (db) + (sqlite3:execute db "DELETE FROM metadat WHERE var=?;" var)))) ;; use a global for some primitive caching, it is just silly to ;; re-read the db over and over again for the keys since they never ;; change @@ -786,11 +1207,11 @@ (db:with-db dbstruct #f #f (lambda (db) (sqlite3:for-each-row (lambda (key) (set! res (cons key res))) - (db:get-db dbstruct #f) + db "SELECT fieldname FROM keys ORDER BY id DESC;"))) (set! *db-keys* res) res))) ;; look up values in a header/data structure @@ -811,28 +1232,38 @@ ;;====================================================================== ;; R U N S ;;====================================================================== (define (db:get-run-name-from-id dbstruct run-id) - (let ((res #f)) - (sqlite3:for-each-row - (lambda (runname) - (set! res runname)) - (db:get-db dbstruct #f) - "SELECT runname FROM runs WHERE id=?;" - run-id) - res)) + (db:with-db + dbstruct + #f ;; this is for the main runs db + #f ;; does not modify db + (lambda (db) + (let ((res #f)) + (sqlite3:for-each-row + (lambda (runname) + (set! res runname)) + db + "SELECT runname FROM runs WHERE id=?;" + run-id) + res)))) (define (db:get-run-key-val dbstruct run-id key) - (let ((res #f)) - (sqlite3:for-each-row - (lambda (val) - (set! res val)) - (db:get-db dbstruct #f) - (conc "SELECT " key " FROM runs WHERE id=?;") - run-id) - res)) + (db:with-db + dbstruct + #f + #f + (lambda (db) + (let ((res #f)) + (sqlite3:for-each-row + (lambda (val) + (set! res val)) + db + (conc "SELECT " key " FROM runs WHERE id=?;") + run-id) + res)))) ;; keys list to key1,key2,key3 ... (define (runs:get-std-run-fields keys remfields) (let* ((header (append keys remfields)) (keystr (conc (keys->keystr keys) "," @@ -855,11 +1286,12 @@ ;; register a test run with the db, this accesses the main.db and does NOT ;; use server api ;; (define (db:register-run dbstruct keyvals runname state status user) - (let* ((db (db:get-db dbstruct #f)) + (let* ((dbdat (db:get-db dbstruct #f)) + (db (db:dbdat-get-db dbdat)) (keys (map car keyvals)) (keystr (keys->keystr keys)) (comma (if (> (length keys) 0) "," "")) (andstr (if (> (length keys) 0) " AND " "")) (valslots (keys->valslots keys)) ;; ?,?,? ... @@ -868,20 +1300,23 @@ (key=?str (string-intersperse (map (lambda (k)(conc k "=?")) keys) " AND "))) (debug:print 3 "keys: " keys " allvals: " allvals " keyvals: " keyvals " key=?str is " key=?str) (debug:print 2 "NOTE: using target " (string-intersperse (map cadr keyvals) "/") " for this run") (if (and runname (null? (filter (lambda (x)(not x)) keyvals))) ;; there must be a better way to "apply and" (let ((res #f)) + (db:delay-if-busy dbdat) (apply sqlite3:execute db (conc "INSERT OR IGNORE INTO runs (runname,state,status,owner,event_time" comma keystr ") VALUES (?,?,?,?,strftime('%s','now')" comma valslots ");") allvals) + (db:delay-if-busy dbdat) (apply sqlite3:for-each-row (lambda (id) (set! res id)) db (let ((qry (conc "SELECT id FROM runs WHERE (runname=? " andstr key=?str ");"))) ;(debug:print 4 "qry: " qry) qry) qryvals) + (db:delay-if-busy dbdat) (sqlite3:execute db "UPDATE runs SET state=?,status=?,event_time=strftime('%s','now') WHERE id=? AND state='deleted';" state status res) res) (begin (debug:print 0 "ERROR: Called without all necessary keys") #f)))) @@ -981,76 +1416,105 @@ (keys (db:get-keys dbstruct)) (header keys) ;; (map key:get-fieldname keys)) (keystr (keys->keystr keys)) (qrystr (conc "SELECT " keystr " FROM runs WHERE state != 'deleted';")) (seen (make-hash-table))) - (sqlite3:for-each-row - (lambda (a . x) - (let ((targ (cons a x))) - (if (not (hash-table-ref/default seen targ #f)) - (begin - (hash-table-set! seen targ #t) - (set! res (cons (apply vector targ) res)))))) - (db:get-db dbstruct #f) - qrystr) - (debug:print-info 11 "db:get-targets END qrystr: " qrystr ) - (vector header res))) + (db:with-db + dbstruct + #f + #f + (lambda (db) + (sqlite3:for-each-row + (lambda (a . x) + (let ((targ (cons a x))) + (if (not (hash-table-ref/default seen targ #f)) + (begin + (hash-table-set! seen targ #t) + (set! res (cons (apply vector targ) res)))))) + db + qrystr) + (debug:print-info 11 "db:get-targets END qrystr: " qrystr ) + (vector header res))))) ;; just get count of runs (define (db:get-num-runs dbstruct runpatt) - (let ((numruns 0)) - (debug:print-info 11 "db:get-num-runs START " runpatt) - (sqlite3:for-each-row - (lambda (count) - (set! numruns count)) - (db:get-db dbstruct #f) - "SELECT COUNT(id) FROM runs WHERE runname LIKE ? AND state != 'deleted';" runpatt) - (debug:print-info 11 "db:get-num-runs END " runpatt) - numruns)) + (db:with-db + dbstruct + #f + #f + (lambda (db) + (let ((numruns 0)) + (debug:print-info 11 "db:get-num-runs START " runpatt) + (sqlite3:for-each-row + (lambda (count) + (set! numruns count)) + db + "SELECT COUNT(id) FROM runs WHERE runname LIKE ? AND state != 'deleted';" runpatt) + (debug:print-info 11 "db:get-num-runs END " runpatt) + numruns)))) (define (db:get-all-run-ids dbstruct) - (let ((run-ids '())) - (sqlite3:for-each-row - (lambda (run-id) - (set! run-ids (cons run-id run-ids))) - (db:get-db dbstruct #f) - "SELECT id FROM runs WHERE state != 'deleted';") - (reverse run-ids))) + (db:with-db + dbstruct + #f + #f + (lambda (db) + (let ((run-ids '())) + (sqlite3:for-each-row + (lambda (run-id) + (set! run-ids (cons run-id run-ids))) + db + "SELECT id FROM runs WHERE state != 'deleted' ORDER BY event_time DESC;") + (reverse run-ids))))) ;; get some basic run stats ;; ;; ( (runname (( state count ) ... )) ;; ( ... (define (db:get-run-stats dbstruct) - (let ((totals (make-hash-table)) - (res '()) - (runs-info '())) + (let* ((dbdat (db:get-db dbstruct #f)) + (db (db:dbdat-get-db dbdat)) + (totals (make-hash-table)) + (curr (make-hash-table)) + (res '()) + (runs-info '())) ;; First get all the runname/run-ids + (db:delay-if-busy dbdat) (sqlite3:for-each-row (lambda (run-id runname) (set! runs-info (cons (list run-id runname) runs-info))) - (db:get-db dbstruct #f) + db "SELECT id,runname FROM runs WHERE state != 'deleted';") ;; for each run get stats data (for-each (lambda (run-info) - (let ((run-id (car run-info)) - (run-name (cadr run-info))) - (sqlite3:for-each-row - (lambda (state count) - (if (string? state) - (let* ((stateparts (string-split state "|")) - (newstate (conc (car stateparts) "\n" (cadr stateparts)))) - (hash-table-set! totals newstate (+ (hash-table-ref/default totals newstate 0) count)) - (set! res (cons (list run-name newstate count) res))))) - (db:get-db dbstruct run-id) - "SELECT state||'|'||status AS s,count(id) FROM tests AS t ORDER BY s DESC;" ) - ;; (set! res (reverse res)) + ;; get the net state/status counts for this run + (let* ((run-id (car run-info)) + (run-name (cadr run-info))) + (db:with-db + dbstruct + run-id + #f + (lambda (db) + (sqlite3:for-each-row + (lambda (state status count) + (let ((netstate (if (equal? state "COMPLETED") status state))) + (if (string? netstate) + (begin + (hash-table-set! totals netstate (+ (hash-table-ref/default totals netstate 0) count)) + (hash-table-set! curr netstate (+ (hash-table-ref/default curr netstate 0) count)))))) + db + "SELECT state,status,count(id) FROM tests AS t GROUP BY state,status ORDER BY state,status DESC;") + ;; add the per run counts to res + (for-each (lambda (state) + (set! res (cons (list run-name state (hash-table-ref curr state)) res))) + (sort (hash-table-keys curr) string>=)) + (set! curr (make-hash-table)))))) + runs-info) (for-each (lambda (state) (set! res (cons (list "Totals" state (hash-table-ref totals state)) res))) - (sort (hash-table-keys totals) string>=)))) - runs-info) + (sort (hash-table-keys totals) string>=)) res)) ;; db:get-runs-by-patt ;; get runs by list of criteria ;; register a test run with the db @@ -1086,109 +1550,145 @@ (db:with-db dbstruct #f #f ;; reads db, does not write to it. (lambda (db) (sqlite3:for-each-row (lambda (a . r) (set! res (cons (list->vector (cons a r)) res))) - (db:get-db dbstruct #f) + db qry-str runnamepatt))) (vector header res))) ;; use (get-value-by-header (db:get-header runinfo)(db:get-rows runinfo)) (define (db:get-run-info dbstruct run-id) ;;(if (hash-table-ref/default *run-info-cache* run-id #f) ;; (hash-table-ref *run-info-cache* run-id) - (let* ((res (vector #f #f #f #f)) + (let* ((dbdat (db:get-db dbstruct #f)) + (db (db:dbdat-get-db dbdat)) + (res (vector #f #f #f #f)) (keys (db:get-keys dbstruct)) (remfields (list "id" "runname" "state" "status" "owner" "event_time")) (header (append keys remfields)) (keystr (conc (keys->keystr keys) "," (string-intersperse remfields ",")))) (debug:print-info 11 "db:get-run-info run-id: " run-id " header: " header " keystr: " keystr) + (db:delay-if-busy dbdat) (sqlite3:for-each-row (lambda (a . x) (set! res (apply vector a x))) - (db:get-db dbstruct #f) + db (conc "SELECT " keystr " FROM runs WHERE id=? AND state != 'deleted';") run-id) (debug:print-info 11 "db:get-run-info run-id: " run-id " header: " header " keystr: " keystr) (let ((finalres (vector header res))) ;; (hash-table-set! *run-info-cache* run-id finalres) finalres))) (define (db:set-comment-for-run dbstruct run-id comment) - (sqlite3:execute (db:get-db dbstruct #f) "UPDATE runs SET comment=? WHERE id=?;" comment ;; (sdb:qry 'getid comment) - run-id)) + (db:with-db + dbstruct + #f + #f + (lambda (db) + (sqlite3:execute db "UPDATE runs SET comment=? WHERE id=?;" comment ;; (sdb:qry 'getid comment) + run-id)))) ;; does not (obviously!) removed dependent data. But why not!!? (define (db:delete-run dbstruct run-id) ;; First set any related tests to DELETED - (let ((db (db:get-db dbstruct run-id))) - (sqlite3:execute db "UPDATE tests SET state='DELETED',comment='';") - (sqlite3:execute db "DELETE FROM test_steps;") - (sqlite3:execute db "DELETE FROM test_data;") - (sqlite3:execute (db:get-db dbstruct #f) "UPDATE runs SET state='deleted',comment='' WHERE id=?;" run-id))) + (let* ((rdbdat (db:get-db dbstruct run-id)) + (rdb (db:dbdat-get-db rdbdat)) + (dbdat (db:get-db dbstruct #f)) + (db (db:dbdat-get-db dbdat))) + (db:delay-if-busy rdbdat) + (sqlite3:execute rdb "UPDATE tests SET state='DELETED',comment='';") + (sqlite3:execute rdb "DELETE FROM test_steps;") + (sqlite3:execute rdb "DELETE FROM test_data;") + (db:delay-if-busy dbdat) + (sqlite3:execute db "UPDATE runs SET state='deleted',comment='' WHERE id=?;" run-id))) (define (db:update-run-event_time dbstruct run-id) - (sqlite3:execute (db:get-db dbstruct #f) "UPDATE runs SET event_time=strftime('%s','now') WHERE id=?;" run-id)) + (db:with-db + dbstruct + #f + #t + (lambda (db) + (sqlite3:execute db "UPDATE runs SET event_time=strftime('%s','now') WHERE id=?;" run-id)))) (define (db:lock/unlock-run dbstruct run-id lock unlock user) - (let ((newlockval (if lock "locked" - (if unlock - "unlocked" - "locked")))) ;; semi-failsafe - (sqlite3:execute (db:get-db dbstruct #f) "UPDATE runs SET state=? WHERE id=?;" newlockval run-id) - (sqlite3:execute (db:get-db dbstruct #f) "INSERT INTO access_log (user,accessed,args) VALUES(?,strftime('%s','now'),?);" - user (conc newlockval " " run-id)) - (debug:print-info 1 "" newlockval " run number " run-id))) - -(define (db:set-run-status db run-id status #!key (msg #f)) - (if msg - (sqlite3:execute db "UPDATE runs SET status=?,comment=? WHERE id=?;" status msg run-id) - (sqlite3:execute db "UPDATE runs SET status=? WHERE id=?;" status run-id))) - -(define (db:get-run-status db run-id) + (db:with-db + dbstruct + #f + #t + (lambda (db) + (let ((newlockval (if lock "locked" + (if unlock + "unlocked" + "locked")))) ;; semi-failsafe + (sqlite3:execute db "UPDATE runs SET state=? WHERE id=?;" newlockval run-id) + (sqlite3:execute db "INSERT INTO access_log (user,accessed,args) VALUES(?,strftime('%s','now'),?);" + user (conc newlockval " " run-id)) + (debug:print-info 1 "" newlockval " run number " run-id))))) + +(define (db:set-run-status dbstruct run-id status msg) + (let* ((dbdat (db:get-db dbstruct #f)) + (db (db:dbdat-get-db dbdat))) + (db:delay-if-busy dbdat) + (if msg + (sqlite3:execute db "UPDATE runs SET status=?,comment=? WHERE id=?;" status msg run-id) + (sqlite3:execute db "UPDATE runs SET status=? WHERE id=?;" status run-id)))) + +(define (db:get-run-status dbstruct run-id) (let ((res "n/a")) - (sqlite3:for-each-row - (lambda (status) - (set! res status)) - db - "SELECT status FROM runs WHERE id=?;" - run-id) - res)) + (db:with-db + dbstruct + #f + #f + (lambda (db) + (sqlite3:for-each-row + (lambda (status) + (set! res status)) + db + "SELECT status FROM runs WHERE id=?;" + run-id) + res)))) ;;====================================================================== ;; K E Y S ;;====================================================================== ;; get key val pairs for a given run-id ;; ( (FIELDNAME1 keyval1) (FIELDNAME2 keyval2) ... ) (define (db:get-key-val-pairs dbstruct run-id) (let* ((keys (db:get-keys dbstruct)) - (res '())) + (res '()) + (dbdat (db:get-db dbstruct #f)) + (db (db:dbdat-get-db dbdat))) (for-each (lambda (key) (let ((qry (conc "SELECT " key " FROM runs WHERE id=?;"))) - ;; (debug:print 0 "qry: " qry) + (db:delay-if-busy dbdat) (sqlite3:for-each-row (lambda (key-val) (set! res (cons (list key key-val) res))) - (db:get-db dbstruct #f) qry run-id))) + db qry run-id))) keys) (reverse res))) ;; get key vals for a given run-id (define (db:get-key-vals dbstruct run-id) (let* ((keys (db:get-keys dbstruct)) - (res '())) + (res '()) + (dbdat (db:get-db dbstruct #f)) + (db (db:dbdat-get-db dbdat))) (for-each (lambda (key) (let ((qry (conc "SELECT " key " FROM runs WHERE id=?;"))) + (db:delay-if-busy dbdat) (sqlite3:for-each-row (lambda (key-val) (set! res (cons key-val res))) - (db:get-db dbstruct #f) qry run-id))) + db qry run-id))) keys) (let ((final-res (reverse res))) (hash-table-set! *keyvals* run-id final-res) final-res))) @@ -1223,13 +1723,13 @@ ;; i.e. these lists define what to NOT show. ;; states and statuses are required to be lists, empty is ok ;; not-in #t = above behaviour, #f = must match (define (db:get-tests-for-run dbstruct run-id testpatt states statuses offset limit not-in sort-by sort-order qryvals) (if (not (number? run-id)) - (begin - (debug:print 0 "ERROR: call to db:get-tests-for-run with bad run-id=" run-id) - (print-call-chain) + (begin ;; no need to treat this as an error by default + (debug:print 4 "WARNING: call to db:get-tests-for-run with bad run-id=" run-id) + ;; (print-call-chain (current-error-port)) '()) (let* ((qryvalstr (case qryvals ((shortlist) "id,run_id,testname,item_path,state,status") ((#f) db:test-record-qry-selector) ;; "id,run_id,testname,state,status,event_time,host,cpuload,diskfree,uname,rundir,item_path,run_duration,final_logf,comment") (else qryvals))) @@ -1336,14 +1836,25 @@ test-id))) res)) ;; get a useful subset of the tests data (used in dashboard ;; use db:mintests-get-{id ,run_id,testname ...} +;; (define (db:get-tests-for-runs-mindata dbstruct run-ids testpatt states statuses not-in) - (db:get-tests-for-runs dbstruct run-ids testpatt states statuses not-in: not-in qryvals: "id,run_id,testname,state,status,event_time,item_path")) + (debug:print 0 "ERROR: BROKN!") + ;; (db:get-tests-for-runs dbstruct run-ids testpatt states statuses not-in: not-in qryvals: "id,run_id,testname,state,status,event_time,item_path")) +) +;; get a useful subset of the tests data (used in dashboard +;; +(define (db:get-tests-for-run-mindata dbstruct run-id testpatt states statuses not-in) + (db:get-tests-for-run dbstruct run-id testpatt states statuses #f #f not-in #f #f "id,run_id,testname,state,status,event_time,item_path")) + +;; do not use. +;; (define (db:get-tests-for-runs dbstruct run-ids testpatt states statuses #!key (not-in #f)(qryvals #f)) + ;; (db:delay-if-busy) (let ((res '())) (for-each (lambda (run-id) (set! res (append res @@ -1355,151 +1866,234 @@ ;; Convert calling routines to get list of run-ids and loop, do not use the get-tests-for-runs ;; (define (db:delete-test-records dbstruct run-id test-id) - (let ((db (db:get-db dbstruct run-id))) - (db:general-call db 'delete-test-step-records (list test-id)) - (db:general-call db 'delete-test-data-records (list test-id)) + (let* ((dbdat (db:get-db dbstruct run-id)) + (db (db:dbdat-get-db dbdat))) + (db:general-call dbdat 'delete-test-step-records (list test-id)) + ;; (db:delay-if-busy) + (db:general-call dbdat 'delete-test-data-records (list test-id)) (sqlite3:execute db "UPDATE tests SET state='DELETED',status='n/a',comment='' WHERE id=?;" test-id))) -(define (db:delete-tests-for-run dbdbstruct run-id) - (let ((db (db:get-db dbstruct run-id))) - (sqlite3:execute db "DELETE FROM tests WHERE run_id=?;" run-id))) - (define (db:delete-old-deleted-test-records dbstruct) (let ((run-ids (db:get-all-run-ids dbstruct)) (targtime (- (current-seconds)(* 30 24 60 60)))) ;; one month in the past (for-each (lambda (run-id) - (sqlite3:execute (db:get-db dbstruct run-id) "DELETE FROM tests WHERE state='DELETED' AND event_timenumber fieldname fields) + (if (null? fields) + #f + (let loop ((hed (car fields)) + (tal (cdr fields)) + (indx 0)) + (if (equal? fieldname hed) + indx + (if (null? tal) + #f + (loop (car tal)(cdr tal)(+ indx 1))))))) (define db:test-record-qry-selector (string-intersperse db:test-record-fields ",")) + ;; NOTE: Use db:test-get* to access records ;; NOTE: This needs rundir decoding? Decide, decode here or where used? For the moment decode where used. (define (db:get-all-tests-info-by-run-id dbstruct run-id) - (let ((db (db:get-db dbstruct run-id)) - (res '())) + (let* ((dbdat (if (vector? dbstruct) + (db:get-db dbstruct run-id) + dbstruct)) ;; still settling on when to use dbstruct or dbdat + (db (db:dbdat-get-db dbdat)) + (res '())) + (db:delay-if-busy dbdat) (sqlite3:for-each-row - (lambda (id run-id testname state status event-time host cpuload diskfree uname rundir item-path run-duration final-logf comment shortdir) - ;; 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 - (set! res (cons (vector id run-id testname state status event-time host cpuload diskfree uname rundir item-path run-duration final-logf comment shortdir) + (lambda (id run-id testname state status event-time host cpuload diskfree uname rundir item-path run-duration final-logf comment shortdir attemptnum) + ;; 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 + (set! res (cons (vector id run-id testname state status event-time host cpuload diskfree uname rundir item-path run-duration final-logf comment shortdir attemptnum) res))) - (db:get-db dbstruct run-id) + db (conc "SELECT " db:test-record-qry-selector " FROM tests WHERE state != 'DELETED' AND run_id=?;") run-id) res)) (define (db:replace-test-records dbstruct run-id testrecs) @@ -1506,107 +2100,173 @@ (db:with-db dbstruct run-id #t (lambda (db) (let* ((qmarks (string-intersperse (make-list (length db:test-record-fields) "?") ",")) (qrystr (conc "INSERT OR REPLACE INTO tests (" db:test-record-qry-selector ") VALUES (" qmarks ");")) (qry (sqlite3:prepare db qrystr))) - ;; (debug:print 8 "INFO: replace-test-records, qrystr=" qrystr) - (for-each - (lambda (rec) - (debug:print 0 "INFO: Inserting values: " (string-intersperse (map conc (vector->list rec)) ", ")) - (apply sqlite3:execute qry (vector->list rec))) - testrecs) + (debug:print 0 "INFO: migrating test records for run with id " run-id) + (sqlite3:with-transaction + db + (lambda () + (for-each + (lambda (rec) + ;; (debug:print 0 "INFO: Inserting values: " (string-intersperse (map conc (vector->list rec)) ",") "\n") + (apply sqlite3:execute qry (vector->list rec))) + testrecs))) (sqlite3:finalize! qry))))) + +;; map a test-id into the proper range +;; +(define (db:adj-test-id mtdb min-test-id test-id) + (if (>= test-id min-test-id) + test-id + (let loop ((new-id min-test-id)) + (let ((test-id-found #f)) + (sqlite3:for-each-row + (lambda (id) + (set! test-id-found id)) + (db:dbdat-get-db mtdb) + "SELECT id FROM tests WHERE id=?;" + new-id) + ;; if test-id-found then need to try again + (if test-id-found + (loop (+ new-id 1)) + (begin + (debug:print-info 0 "New test id " new-id " selected for test with id " test-id) + (sqlite3:execute mtdb "UPDATE tests SET id=? WHERE id=?;" new-id test-id))))))) + +;; move test ids into the 30k * run_id range +;; +(define (db:prep-megatest.db-adj-test-ids mtdb run-id testrecs) + (debug:print-info 0 "Adjusting test ids in megatest.db for run " run-id) + (let ((min-test-id (* run-id 30000))) + (for-each + (lambda (testrec) + (let* ((test-id (vector-ref testrec (db:field->number "id" db:test-record-fields)))) + (db:adj-test-id (db:dbdat-get-db mtdb) min-test-id test-id))) + testrecs))) +;; 1. move test ids into the 30k * run_id range +;; 2. move step ids into the 30k * run_id range +;; +(define (db:prep-megatest.db-for-migration mtdb) + (let* ((run-ids (db:get-all-run-ids mtdb))) + (for-each + (lambda (run-id) + (let ((testrecs (db:get-all-tests-info-by-run-id mtdb run-id))) + (db:prep-megatest.db-adj-test-ids (db:dbdat-get-db mtdb) run-id testrecs))) + run-ids))) ;; Get test data using test_id (define (db:get-test-info-by-id dbstruct run-id test-id) - (let ((db (db:get-db dbstruct run-id)) - (res #f)) - (sqlite3:for-each-row - (lambda (id run-id testname state status event-time host cpuload diskfree uname rundir-id item-path run_duration final-logf-id comment short-dir-id) - ;; 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 - (set! res (vector id run-id testname state status event-time host cpuload diskfree uname rundir-id item-path run_duration final-logf-id comment short-dir-id))) - (db:get-db dbstruct run-id) - (conc "SELECT " db:test-record-qry-selector " FROM tests WHERE id=?;") - test-id) - res)) + (db:with-db + dbstruct + run-id + #f + (lambda (db) + (let ((res #f)) + (sqlite3:for-each-row ;; attemptnum added to hold pid of top process (not Megatest) controlling a test + (lambda (id run-id testname state status event-time host cpuload diskfree uname rundir-id item-path run_duration final-logf-id comment short-dir-id attemptnum) + ;; 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 + (set! res (vector id run-id testname state status event-time host cpuload diskfree uname rundir-id item-path run_duration final-logf-id comment short-dir-id attemptnum))) + db + (conc "SELECT " db:test-record-qry-selector " FROM tests WHERE id=?;") + test-id) + res)))) ;; Use db:test-get* to access ;; Get test data using test_ids. NB// Only works within a single run!! ;; (define (db:get-test-info-by-ids dbstruct run-id test-ids) - (let ((db (db:get-db dbstruct run-id)) - (res '())) - (sqlite3:for-each-row - (lambda (id run-id testname state status event-time host cpuload diskfree uname rundir-id item-path run_duration final-logf-id comment short-dir-id) - ;; 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 - (set! res (cons (vector id run-id testname state status event-time host cpuload diskfree uname rundir-id item-path run_duration final-logf-id comment short-dir-id) - res))) - (db:get-db dbstruct run-id) - (conc "SELECT " db:test-record-qry-selector " FROM tests WHERE id in (" - (string-intersperse (map conc test-ids) ",") ");")) - res)) + (db:with-db + dbstruct + run-id + #f + (lambda (db) + (let ((res '())) + (sqlite3:for-each-row + (lambda (a . b) + ;; 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 + (set! res (cons (apply vector a b) res))) + db + (conc "SELECT " db:test-record-qry-selector " FROM tests WHERE id in (" + (string-intersperse (map conc test-ids) ",") ");")) + res)))) (define (db:get-test-info dbstruct run-id testname item-path) - (let ((db (db:get-db dbstruct run-id)) - (res #f)) - (sqlite3:for-each-row - (lambda (a . b) - (set! res (apply vector a b))) - (db:get-db dbstruct run-id) - (conc "SELECT " db:test-record-qry-selector " FROM tests WHERE testname=? AND item_path=?;") - test-name item-path) - res)) + (db:with-db + dbstruct + run-id + #f + (lambda (db) + (let ((res #f)) + (sqlite3:for-each-row + (lambda (a . b) + (set! res (apply vector a b))) + db + (conc "SELECT " db:test-record-qry-selector " FROM tests WHERE testname=? AND item_path=?;") + test-name item-path) + res)))) (define (db:test-get-rundir-from-test-id dbstruct run-id test-id) - (let ((db (db:get-db dbstruct run-id)) - (res #f)) - (sqlite3:for-each-row - (lambda (tpath) - (set! res tpath)) - (db:get-db dbstruct run-id) - "SELECT rundir FROM tests WHERE id=?;" - test-id) - res)) + (db:with-db + dbstruct + run-id + #f + (lambda (db) + (db:first-result-default + db + "SELECT rundir FROM tests WHERE id=?;" + #f ;; default result + test-id)))) ;;====================================================================== ;; S T E P S ;;====================================================================== (define (db:teststep-set-status! dbstruct run-id test-id teststep-name state-in status-in comment logfile) - (let ((db (db:get-db dbstruct run-id))) - (sqlite3:execute - db - "INSERT OR REPLACE into test_steps (test_id,stepname,state,status,event_time,comment,logfile) VALUES(?,?,?,?,?,?,?);" - test-id teststep-name state-in status-in (current-seconds) - ;; (sdb:qry 'getid - (if comment comment "") ;; ) - ;; (sdb:qry 'getid - (if logfile logfile "")))) ;; ) + (db:with-db + dbstruct + run-id + #t + (lambda (db) + (sqlite3:execute + db + "INSERT OR REPLACE into test_steps (test_id,stepname,state,status,event_time,comment,logfile) VALUES(?,?,?,?,?,?,?);" + test-id teststep-name state-in status-in (current-seconds) + (if comment comment "") + (if logfile logfile ""))))) ;; db-get-test-steps-for-run (define (db:get-steps-for-test dbstruct run-id test-id) - (let* ((db (db:get-db dbstruct run-id)) - (res '())) - (sqlite3:for-each-row - (lambda (id test-id stepname state status event-time logfile) - (set! res (cons (vector id test-id stepname state status event-time (if (string? logfile) logfile "")) res))) - db - "SELECT id,test_id,stepname,state,status,event_time,logfile FROM test_steps WHERE status != 'DELETED' AND test_id=? ORDER BY id ASC;" ;; event_time DESC,id ASC; - test-id) - (reverse res))) + (db:with-db + dbstruct + run-id + #f + (lambda (db) + (let* ((res '())) + (sqlite3:for-each-row + (lambda (id test-id stepname state status event-time logfile) + (set! res (cons (vector id test-id stepname state status event-time (if (string? logfile) logfile "")) res))) + db + "SELECT id,test_id,stepname,state,status,event_time,logfile FROM test_steps WHERE status != 'DELETED' AND test_id=? ORDER BY id ASC;" ;; event_time DESC,id ASC; + test-id) + (reverse res))))) (define (db:get-steps-data dbstruct run-id test-id) - (let ((db (db:get-db dbstruct run-id)) - (res '())) - (sqlite3:for-each-row - (lambda (id test-id stepname state status event-time logfile) - (set! res (cons (vector id test-id stepname state status event-time (if (string? logfile) logfile "")) res))) - db - "SELECT id,test_id,stepname,state,status,event_time,logfile FROM test_steps WHERE status != 'DELETED' AND test_id=? ORDER BY id ASC;" ;; event_time DESC,id ASC; - test-id) - (reverse res))) + (db:with-db + dbstruct + run-id + #f + (lambda (db) + (let ((res '())) + (sqlite3:for-each-row + (lambda (id test-id stepname state status event-time logfile) + (set! res (cons (vector id test-id stepname state status event-time (if (string? logfile) logfile "")) res))) + db + "SELECT id,test_id,stepname,state,status,event_time,logfile FROM test_steps WHERE status != 'DELETED' AND test_id=? ORDER BY id ASC;" ;; event_time DESC,id ASC; + test-id) + (reverse res))))) ;;====================================================================== ;; T E S T D A T A ;;====================================================================== @@ -1614,33 +2274,36 @@ ;; Roll up test_data pass/fail results ;; look at the test_data status field, ;; if all are pass (any case) and the test status is PASS or NULL or '' then set test status to PASS. ;; if one or more are fail (any case) then set test status to PASS, non "pass" or "fail" are ignored (define (db:test-data-rollup dbstruct run-id test-id status) - (let ((db (db:get-db dbstruct run-id)) - (fail-count 0) - (pass-count 0)) + (let* ((dbdat (db:get-db dbstruct run-id)) + (db (db:dbdat-get-db dbdat)) + (fail-count 0) + (pass-count 0)) + (db:delay-if-busy dbdat) (sqlite3:for-each-row (lambda (fcount pcount) (set! fail-count fcount) (set! pass-count pcount)) db "SELECT (SELECT count(id) FROM test_data WHERE test_id=? AND status like 'fail') AS fail_count, (SELECT count(id) FROM test_data WHERE test_id=? AND status like 'pass') AS pass_count;" test-id test-id) ;; Now rollup the counts to the central megatest.db - (db:general-call db 'pass-fail-counts (list pass-count fail-count test-id)) + (db:general-call dbdat 'pass-fail-counts (list pass-count fail-count test-id)) ;; if the test is not FAIL then set status based on the fail and pass counts. - (db:general-call db 'test_data-pf-rollup (list test-id test-id test-id test-id)))) + (db:general-call dbdat 'test_data-pf-rollup (list test-id test-id test-id test-id)))) (define (db:csv->test-data dbstruct run-id test-id csvdata) (debug:print 4 "test-id " test-id ", csvdata: " csvdata) - (let ((db (db:get-db dbstruct run-id)) - (csvlist (csv->list (make-csv-reader - (open-input-string csvdata) - '((strip-leading-whitespace? #t) - (strip-trailing-whitespace? #t)) )))) ;; (csv->list csvdata))) + (let* ((dbdat (db:get-db dbstruct run-id)) + (db (db:dbdat-get-db dbdat)) + (csvlist (csv->list (make-csv-reader + (open-input-string csvdata) + '((strip-leading-whitespace? #t) + (strip-trailing-whitespace? #t)))))) ;; (csv->list csvdata))) (for-each (lambda (csvrow) (let* ((padded-row (take (append csvrow (list #f #f #f #f #f #f #f #f #f)) 9)) (category (list-ref padded-row 0)) (variable (list-ref padded-row 1)) @@ -1684,28 +2347,31 @@ ((>=) (if (>= value expected) "pass" "fail")) ((<=) (if (<= value expected) "pass" "fail")) (else (conc "ERROR: bad tol comparator " tol)))))) (debug:print 4 "AFTER2: category: " category " variable: " variable " value: " value ", expected: " expected " tol: " tol " units: " units " status: " status " comment: " comment) + (db:delay-if-busy dbdat) (sqlite3:execute db "INSERT OR REPLACE INTO test_data (test_id,category,variable,value,expected,tol,units,comment,status,type) VALUES (?,?,?,?,?,?,?,?,?,?);" test-id category variable value expected tol units (if comment comment "") status type))) csvlist))) ;;====================================================================== ;; Misc. test related queries ;;====================================================================== (define (db:get-run-ids-matching-target dbstruct keynames target res runname testpatt statepatt statuspatt) - (let* ((row-ids '()) + (let* ((dbdat (db:get-db dbstruct #f)) + (db (db:dbdat-get-db dbdat)) + (row-ids '()) (keystr (string-intersperse (map (lambda (key val) (conc key " like '" val "'")) keynames (string-split target "/")) " AND ")) ;; (testqry (tests:match->sqlqry testpatt)) - (runsqry (sqlite3:prepare (db:get-db dbstruct #f)(conc "SELECT id FROM runs WHERE " keystr " AND runname LIKE '" runname "';")))) + (runsqry (sqlite3:prepare db (conc "SELECT id FROM runs WHERE " keystr " AND runname LIKE '" runname "';")))) ;; (debug:print 8 "db:test-get-paths-matching-keynames-target-new\n runsqry=" runsqry "\n tstsqry=" testqry) (sqlite3:for-each-row (lambda (rid) (set! row-ids (cons rid row-ids))) runsqry) @@ -1713,84 +2379,123 @@ row-ids)) (define (db:test-get-paths-matching-keynames-target-new dbstruct run-id keynames target res testpatt statepatt statuspatt runname) (let* ((testqry (tests:match->sqlqry testpatt)) (tstsqry (conc "SELECT rundir FROM tests WHERE " testqry " AND state LIKE '" statepatt "' AND status LIKE '" statuspatt "' ORDER BY event_time ASC;"))) - (sqlite3:for-each-row - (lambda (p) - (set! res (cons p res))) - (db:get-db dbstruct run-id) - tstsqry) - res)) + (db:with-db + dbstruct + run-id + #f + (lambda (db) + (sqlite3:for-each-row + (lambda (p) + (set! res (cons p res))) + db + tstsqry) + res)))) + +(define (db:test-toplevel-num-items dbstruct run-id testname) + (db:with-db + dbstruct + run-id + #f + (lambda (db) + (let ((res 0)) + (sqlite3:for-each-row + (lambda (num-items) + (set! res num-items)) + db + "SELECT count(id) FROM tests WHERE run_id=? AND testname=? AND item_path != '' AND state NOT IN ('DELETED');" + run-id + testname) + res)))) ;;====================================================================== ;; QUEUE UP META, TEST STATUS AND STEPS REMOTE ACCESS ;;====================================================================== ;; NOTE: Can remove the regex and base64 encoding for zmq -(define (db:obj->string obj) - (case *transport-type* +(define (db:obj->string obj #!key (transport 'http)) + (case transport ;; ((fs) obj) ((http fs) (string-substitute (regexp "=") "_" - (base64:base64-encode (with-output-to-string (lambda ()(serialize obj)))) + (base64:base64-encode + (z3:encode-buffer + (with-output-to-string + (lambda ()(serialize obj))))) #t)) - ((zmq)(with-output-to-string (lambda ()(serialize obj)))) + ((zmq nmsg)(with-output-to-string (lambda ()(serialize obj)))) (else obj))) -(define (db:string->obj msg) - (case *transport-type* +(define (db:string->obj msg #!key (transport 'http)) + (case transport ;; ((fs) msg) ((http fs) (if (string? msg) (with-input-from-string - (base64:base64-decode - (string-substitute - (regexp "_") "=" msg #t)) + (z3:decode-buffer + (base64:base64-decode + (string-substitute + (regexp "_") "=" msg #t))) (lambda ()(deserialize))) - (vector #f #f #f))) ;; crude reply for when things go awry - ((zmq)(with-input-from-string msg (lambda ()(deserialize)))) + (begin + (debug:print 0 "ERROR: reception failed. Received " msg " but cannot translate it.") + msg))) ;; crude reply for when things go awry + ((zmq nmsg)(with-input-from-string msg (lambda ()(deserialize)))) (else msg))) (define (db:test-set-status-state dbstruct run-id test-id status state msg) - (let ((db (db:get-db dbstruct run-id))) - (if (member state '("LAUNCHED" "REMOTEHOSTSTART")) - (db:general-call db 'set-test-start-time (list test-id))) - (if msg - (db:general-call db 'state-status-msg (list state status msg test-id)) - (db:general-call db 'state-status (list state status test-id))))) + (let ((dbdat (db:get-db dbstruct run-id))) + (if (member state '("LAUNCHED" "REMOTEHOSTSTART")) + (db:general-call dbdat 'set-test-start-time (list test-id))) + (if msg + (db:general-call dbdat 'state-status-msg (list state status msg test-id)) + (db:general-call dbdat 'state-status (list state status test-id))))) (define (db:roll-up-pass-fail-counts dbstruct run-id test-name item-path status) (if (and (not (equal? item-path "")) - (member status '("PASS" "WARN" "FAIL" "WAIVED" "RUNNING" "CHECK" "SKIP"))) - (let ((db (db:get-db dbstruct run-id))) - (db:general-call db 'update-pass-fail-counts (list test-name test-name test-name)) + (member status '("PASS" "WARN" "FAIL" "WAIVED" "RUNNING" "CHECK" "SKIP" "LAUNCHED"))) + (let ((dbdat (db:get-db dbstruct run-id))) + (db:general-call dbdat 'update-pass-fail-counts (list test-name test-name test-name)) (if (equal? status "RUNNING") - (db:general-call db 'top-test-set-running (list test-name)) - (db:general-call db 'top-test-set-per-pf-counts (list test-name test-name test-name))) + (db:general-call dbdat 'top-test-set-running (list test-name)) + (if (equal? status "LAUNCHED") + (db:general-call dbdat 'top-test-set (list "LAUNCHED" test-name)) + (db:general-call dbdat 'top-test-set-per-pf-counts (list test-name run-id test-name test-name test-name)))) #f) #f)) (define (db:tests-register-test dbstruct run-id test-name item-path) - (sqlite3:execute (db:get-db dbstruct run-id) 'register-test run-id test-name item-path)) + (db:with-db + dbstruct + run-id + #t + (lambda (db) + (sqlite3:execute db 'register-test run-id test-name item-path)))) (define (db:test-get-logfile-info dbstruct run-id test-name) - (let ((res #f)) - (sqlite3:for-each-row - (lambda (path final_logf) - ;; (let ((path (sdb:qry 'getstr path-id)) - ;; (final_logf (sdb:qry 'getstr final_logf-id))) - (set! logf final_logf) - (set! res (list path final_logf)) - (if (directory? path) - (debug:print 2 "Found path: " path) - (debug:print 2 "No such path: " path))) ;; ) - (db:get-db dbstruct run-id) - "SELECT rundir,final_logf FROM tests WHERE testname=? AND item_path='';" - test-name) - res)) + (db:with-db + dbstruct + run-id + #f + (lambda (db) + (let ((res #f)) + (sqlite3:for-each-row + (lambda (path final_logf) + ;; (let ((path (sdb:qry 'getstr path-id)) + ;; (final_logf (sdb:qry 'getstr final_logf-id))) + (set! logf final_logf) + (set! res (list path final_logf)) + (if (directory? path) + (debug:print 2 "Found path: " path) + (debug:print 2 "No such path: " path))) ;; ) + db + "SELECT rundir,final_logf FROM tests WHERE testname=? AND item_path='';" + test-name) + res)))) ;;====================================================================== ;; A G R E G A T E D T R A N S A C T I O N D B W R I T E S ;;====================================================================== @@ -1819,11 +2524,12 @@ END WHERE id=?;") ;; DONE '(test-set-log "UPDATE tests SET final_logf=? WHERE id=?;") ;; DONE ;; '(test-set-rundir-by-test-id "UPDATE tests SET rundir=? WHERE id=?") ;; DONE ;; '(test-set-rundir "UPDATE tests SET rundir=? AND testname=? AND item_path=?;") ;; DONE '(test-set-rundir-shortdir "UPDATE tests SET rundir=?,shortdir=? WHERE testname=? AND item_path=?;") - '(delete-tests-in-state "DELETE FROM tests WHERE state=?;") ;; DONE + '(delete-tests-in-state ;; "DELETE FROM tests WHERE state=?;") ;; DONE + "UPDATE tests SET state='DELETED' WHERE state=?") '(tests:test-set-toplog "UPDATE tests SET final_logf=? WHERE run_id=? AND testname=? AND item_path='';") '(update-cpuload-diskfree "UPDATE tests SET cpuload=?,diskfree=? WHERE id=?;") ;; DONE '(update-uname-host "UPDATE tests SET uname=?,host=? WHERE id=?;") ;; DONE '(update-test-state "UPDATE tests SET state=? WHERE state=? AND run_id=? AND testname=? AND NOT (item_path='' AND testname IN (SELECT DISTINCT testname FROM tests WHERE testname=? AND item_path != ''));") '(update-test-status "UPDATE tests SET status=? WHERE status like ? AND run_id=? AND testname=? AND NOT (item_path='' AND testname IN (SELECT DISTINCT testname FROM tests WHERE testname=? AND item_path != ''));") @@ -1830,19 +2536,25 @@ ;; stuff for roll-up-pass-fail-counts '(update-pass-fail-counts "UPDATE tests SET fail_count=(SELECT count(id) FROM tests WHERE testname=? AND item_path != '' AND status IN ('FAIL','CHECK')), pass_count=(SELECT count(id) FROM tests WHERE testname=? AND item_path != '' AND status IN ('PASS','WARN','WAIVED')) WHERE testname=? AND item_path='';") ;; DONE + '(top-test-set "UPDATE tests SET state=? WHERE testname=? AND item_path='';") ;; DONE '(top-test-set-running "UPDATE tests SET state='RUNNING' WHERE testname=? AND item_path='';") ;; DONE '(top-test-set-per-pf-counts "UPDATE tests SET state=CASE WHEN (SELECT count(id) FROM tests WHERE testname=? AND item_path != '' + AND status NOT IN ('TEN_STRIKES','BLOCKED') AND state in ('RUNNING','NOT_STARTED','LAUNCHED','REMOTEHOSTSTART')) > 0 THEN 'RUNNING' ELSE 'COMPLETED' END, status=CASE + WHEN (SELECT count(id) FROM tests + WHERE run_id=? AND testname=? + AND item_path != '' + AND state IN ('NOT_STARTED','BLOCKED')) > 0 THEN 'FAIL' WHEN fail_count > 0 THEN 'FAIL' WHEN pass_count > 0 AND fail_count=0 THEN 'PASS' WHEN (SELECT count(id) FROM tests WHERE testname=? AND item_path != '' @@ -1849,12 +2561,12 @@ AND status = 'SKIP') > 0 THEN 'SKIP' ELSE 'UNKNOWN' END WHERE testname=? AND item_path='';") ;; DONE ;; STEPS - '(delete-test-step-records "UPDATE test_steps SET status='DELETED' WHERE id=?;") - '(delete-test-data-records "UPDATE test_data SET status='DELETED' WHERE id=?;") ;; using status since no state field + '(delete-test-step-records "UPDATE test_steps SET status='DELETED' WHERE test_id=?;") + '(delete-test-data-records "UPDATE test_data SET status='DELETED' WHERE test_id=?;") ;; using status since no state field )) (define (db:lookup-query qry-name) (let ((q (alist-ref qry-name db:queries))) (if q (car q) #f))) @@ -1880,33 +2592,36 @@ (list #f "Login failed due to mismatch megatest version: " calling-version ", " megatest-version)) (else (hash-table-set! *logged-in-clients* client-signature (current-seconds)) '(#t "successful login")))) -(define (db:general-call db stmtname params) +(define (db:general-call dbdat stmtname params) (let ((query (let ((q (alist-ref (if (string? stmtname) (string->symbol stmtname) stmtname) db:queries))) (if q (car q) #f)))) - (apply sqlite3:execute db query params) - #t)) + (db:delay-if-busy dbdat) + (apply sqlite3:execute (db:dbdat-get-db dbdat) query params) + #t)) ;; BUG or Sillyness, why do I return #t instead of the query result? ;; get the previous records for when these tests were run where all keys match but runname ;; NB// Merge this with test:get-previous-test-run-records? This one looks for all matching tests ;; can use wildcards. Also can likely be factored in with get test paths? ;; ;; Run this remotely!! ;; (define (db:get-matching-previous-test-run-records dbstruct run-id test-name item-path) - (let* ((db (db:get-db dbstruct #f)) + (let* ((dbdat (db:get-db dbstruct #f)) + (db (db:dbdat-get-db dbdat)) (keys (db:get-keys db)) (selstr (string-intersperse (map (lambda (x)(vector-ref x 0)) keys) ",")) (qrystr (string-intersperse (map (lambda (x)(conc (vector-ref x 0) "=?")) keys) " AND ")) (keyvals #f) (tests-hash (make-hash-table))) ;; first look up the key values from the run selected by run-id + (db:delay-if-busy dbdat) (sqlite3:for-each-row (lambda (a . b) (set! keyvals (cons a b))) db (conc "SELECT " selstr " FROM runs WHERE id=? ORDER BY event_time DESC;") run-id) @@ -1940,58 +2655,129 @@ (hash-table-set! tests-hash full-testname testdat)))) results) (if (null? tal) (map cdr (hash-table->alist tests-hash)) ;; return a list of the most recent tests (loop (car tal)(cdr tal)))))))))) + +(define (db:delay-if-busy dbdat #!key (count 6)) + (if (not (configf:lookup *configdat* "server" "delay-on-busy")) + (and dbdat (db:dbdat-get-db dbdat)) + (if dbdat + (let* ((dbpath (db:dbdat-get-path dbdat)) + (db (db:dbdat-get-db dbdat)) ;; we'll return this so (db:delay--if-busy can be called inline + (dbfj (conc dbpath "-journal"))) + (if (handle-exceptions + exn + (begin + (debug:print-info 0 "WARNING: failed to test for existance of " dbfj) + (thread-sleep! 1) + (db:delay-if-busy count (- count 1))) + (file-exists? dbfj)) + (case count + ((6) + (thread-sleep! 0.2) + (db:delay-if-busy count: 5)) + ((5) + (thread-sleep! 0.4) + (db:delay-if-busy count: 4)) + ((4) + (thread-sleep! 0.8) + (db:delay-if-busy count: 3)) + ((3) + (thread-sleep! 1.6) + (db:delay-if-busy count: 2)) + ((2) + (thread-sleep! 3.2) + (db:delay-if-busy count: 1)) + ((1) + (thread-sleep! 6.4) + (db:delay-if-busy count: 0)) + (else + (debug:print-info 0 "delaying db access due to high database load.") + (thread-sleep! 12.8)))) + db) + "bogus result from db:delay-if-busy"))) (define (db:test-get-records-for-index-file dbstruct run-id test-name) (let ((res '())) - (sqlite3:for-each-row - (lambda (id itempath state status run_duration logf-id comment-id) - (let ((logf (db:get-string dbstruct logf-id)) - (comment (db:get-string dbstruct comment-id))) - (set! res (cons (vector id itempath state status run_duration logf comment) res))) - (db:get-db dbstruct run-id) - "SELECT id,item_path,state,status,run_duration,final_logf,comment FROM tests WHERE testname=? AND item_path != '';" - test-name) - res))) + (db:with-db + dbstruct + run-id + #f + (lambda (db) + (sqlite3:for-each-row + (lambda (id itempath state status run_duration logf comment) + (set! res (cons (vector id itempath state status run_duration logf comment) res))) + db + "SELECT id,item_path,state,status,run_duration,final_logf,comment FROM tests WHERE testname=? AND item_path != '';" + test-name) + res)))) ;;====================================================================== ;; Tests meta data ;;====================================================================== ;; read the record given a testname (define (db:testmeta-get-record dbstruct testname) (let ((res #f)) - (sqlite3:for-each-row - (lambda (id testname author owner description reviewed iterated avg_runtime avg_disk tags jobgroup) - (set! res (vector id testname author owner description reviewed iterated avg_runtime avg_disk tags jobgroup))) - (db:get-db dbstruct #f) - "SELECT id,testname,author,owner,description,reviewed,iterated,avg_runtime,avg_disk,tags,jobgroup FROM test_meta WHERE testname=?;" - testname) - res)) + (db:with-db + dbstruct + #f + #f + (lambda (db) + (sqlite3:for-each-row + (lambda (id testname author owner description reviewed iterated avg_runtime avg_disk tags jobgroup) + (set! res (vector id testname author owner description reviewed iterated avg_runtime avg_disk tags jobgroup))) + db + "SELECT id,testname,author,owner,description,reviewed,iterated,avg_runtime,avg_disk,tags,jobgroup FROM test_meta WHERE testname=?;" + testname) + res)))) ;; create a new record for a given testname (define (db:testmeta-add-record dbstruct testname) - (sqlite3:execute (db:get-db dbstruct #f) "INSERT OR IGNORE INTO test_meta (testname,author,owner,description,reviewed,iterated,avg_runtime,avg_disk,tags) VALUES (?,'','','','','','','','');" testname)) + (db:with-db dbstruct #f #f + (lambda (db) + (sqlite3:execute + db + "INSERT OR IGNORE INTO test_meta (testname,author,owner,description,reviewed,iterated,avg_runtime,avg_disk,tags) VALUES (?,'','','','','','','','');" testname)))) ;; update one of the testmeta fields (define (db:testmeta-update-field dbstruct testname field value) - (sqlite3:execute (db:get-db dbstruct #f) (conc "UPDATE test_meta SET " field "=? WHERE testname=?;") value testname)) + (db:with-db dbstruct #f #f + (lambda (db) + (sqlite3:execute + db + (conc "UPDATE test_meta SET " field "=? WHERE testname=?;") value testname)))) (define (db:testmeta-get-all dbstruct) - (let ((res '())) - (sqlite3:for-each-row - (lambda (a . b) - (set! res (cons (apply vector a b) res))) - (db:get-db dbstruct run-id) - "SELECT id,testname,author,owner,description,reviewed,iterated,avg_runtime,avg_disk,tags,jobgroup FROM test_meta;") - res)) + (db:with-db dbstruct #f #f + (lambda (db) + (let ((res '())) + (sqlite3:for-each-row + (lambda (a . b) + (set! res (cons (apply vector a b) res))) + db + "SELECT id,testname,author,owner,description,reviewed,iterated,avg_runtime,avg_disk,tags,jobgroup FROM test_meta;") + res)))) ;;====================================================================== ;; M I S C M A N A G E M E N T I T E M S ;;====================================================================== + +;; A routine to map itempaths using a itemmap +(define (db:compare-itempaths patha pathb itemmap) + (debug:print-info 6 "ITEMMAP is " itemmap) + (if itemmap + (let* ((mapparts (string-split itemmap)) + (pattern (car mapparts)) + (replacement (if (> (length mapparts) 1) (cadr mapparts) ""))) + (if replacement + (equal? (string-substitute pattern replacement patha) + (string-substitute pattern replacement pathb)) + (equal? (string-substitute pattern "" patha) + (string-substitute pattern "" pathb)))) + (equal? patha pathb))) ;; the new prereqs calculation, looks also at itempath if specified ;; all prereqs must be met: ;; if prereq test with itempath='' is COMPLETED and PASS, WARN, CHECK, or WAIVED then prereq is met ;; if prereq test with itempath=ref-item-path and COMPLETED with PASS, WARN, CHECK, or WAIVED then prereq is met @@ -1998,11 +2784,12 @@ ;; ;; Note: mode 'normal means that tests must be COMPLETED and ok (i.e. PASS, WARN, CHECK, SKIP or WAIVED) ;; mode 'toplevel means that tests must be COMPLETED only ;; mode 'itemmatch or 'itemwait means that tests items must be COMPLETED and (PASS|WARN|WAIVED|CHECK) [[ NB// NOT IMPLEMENTED YET ]] ;; -(define (db:get-prereqs-not-met dbstruct run-id waitons ref-item-path mode) +;; (define (db:get-prereqs-not-met dbstruct run-id waitons ref-item-path mode) +(define (db:get-prereqs-not-met dbstruct run-id waitons ref-item-path #!key (mode '(normal))(itemmap #f)) (if (or (not waitons) (null? waitons)) '() (let* ((unmet-pre-reqs '()) (result '())) @@ -2023,25 +2810,25 @@ (item-path (db:test-get-item-path test)) (is-completed (equal? state "COMPLETED")) (is-running (equal? state "RUNNING")) (is-killed (equal? state "KILLED")) (is-ok (member status '("PASS" "WARN" "CHECK" "WAIVED" "SKIP"))) - (same-itempath (equal? ref-item-path item-path))) + (same-itempath (db:compare-itempaths ref-item-path item-path itemmap))) ;; (equal? ref-item-path item-path))) (set! ever-seen #t) (cond ;; case 1, non-item (parent test) is - ((and (equal? item-path "") ;; this is the parent test + ((and (equal? item-path "") ;; this is the parent test of the waiton being examined is-completed - (or is-ok (member mode '(toplevel itemmatch itemwait)))) + (or is-ok (not (null? (lset-intersection eq? mode '(toplevel)))))) ;; itemmatch itemwait)))))) (set! parent-waiton-met #t)) ;; Special case for toplevel and KILLED ((and (equal? item-path "") ;; this is the parent test is-killed - (eq? mode 'toplevel)) + (member 'toplevel mode)) (set! parent-waiton-met #t)) ;; For itemwait mode IFF the previous matching item is good the set parent-waiton-met - ((and (member mode '(itemmatch itemwait)) + ((and (not (null? (lset-intersection eq? mode '(itemmatch itemwait)))) ;; (not (equal? item-path "")) ;; this applies to both top level (to allow launching of next batch) and items same-itempath) (if (and is-completed is-ok) (set! item-waiton-met #t)) (if (and (equal? item-path "") @@ -2048,14 +2835,14 @@ (or is-completed is-running));; this is the parent, set it to run if completed or running (set! parent-waiton-met #t))) ;; normal checking of parent items, any parent or parent item not ok blocks running ((and is-completed (or is-ok - (eq? mode 'toplevel)) ;; toplevel does not block on FAIL - (and is-ok (eq? mode 'itemmatch))) ;; itemmatch blocks on not ok + (member 'toplevel mode)) ;; toplevel does not block on FAIL + (and is-ok (member 'itemmatch mode))) ;; itemmatch blocks on not ok (set! item-waiton-met #t))))) - tests) + tests) ;; both requirements, parent and item-waiton must be met to NOT add item to ;; prereq's not met list (if (not (or parent-waiton-met item-waiton-met)) (set! result (append (if (null? tests) (list waitontest-name) tests) result))) ;; if the test is not found then clearly the waiton is not met... Index: db_records.scm ================================================================== --- db_records.scm +++ db_records.scm @@ -13,22 +13,26 @@ ;; ;; ;; Accessors for a dbstruct ;; -(define-inline (dbr:dbstruct-get-main vec) (vector-ref vec 0)) -(define-inline (dbr:dbstruct-get-strdb vec) (vector-ref vec 1)) -(define-inline (dbr:dbstruct-get-path vec) (vector-ref vec 2)) +(define-inline (dbr:dbstruct-get-main vec) (vector-ref vec 0)) ;; ( db path ) +(define-inline (dbr:dbstruct-get-strdb vec) (vector-ref vec 1)) ;; ( db path ) +(define-inline (dbr:dbstruct-get-path vec) (vector-ref vec 2)) (define-inline (dbr:dbstruct-get-local vec) (vector-ref vec 3)) -(define-inline (dbr:dbstruct-get-rundb vec) (vector-ref vec 4)) -(define-inline (dbr:dbstruct-get-inmem vec) (vector-ref vec 5)) +(define-inline (dbr:dbstruct-get-rundb vec) (vector-ref vec 4)) ;; ( db path ) +(define-inline (dbr:dbstruct-get-inmem vec) (vector-ref vec 5)) ;; ( db #f ) (define-inline (dbr:dbstruct-get-mtime vec) (vector-ref vec 6)) (define-inline (dbr:dbstruct-get-rtime vec) (vector-ref vec 7)) (define-inline (dbr:dbstruct-get-stime vec) (vector-ref vec 8)) (define-inline (dbr:dbstruct-get-inuse vec) (vector-ref vec 9)) -(define-inline (dbr:dbstruct-get-refdb vec) (vector-ref vec 10)) +(define-inline (dbr:dbstruct-get-refdb vec) (vector-ref vec 10)) ;; ( db path ) (define-inline (dbr:dbstruct-get-locdbs vec) (vector-ref vec 11)) +(define-inline (dbr:dbstruct-get-olddb vec) (vector-ref vec 12)) ;; ( db path ) +;; (define-inline (dbr:dbstruct-get-main-path vec) (vector-ref vec 13)) +;; (define-inline (dbr:dbstruct-get-rundb-path vec) (vector-ref vec 14)) +;; (define-inline (dbr:dbstruct-get-run-id vec) (vector-ref vec 13)) (define-inline (dbr:dbstruct-set-main! vec val)(vector-set! vec 0 val)) (define-inline (dbr:dbstruct-set-strdb! vec val)(vector-set! vec 1 val)) (define-inline (dbr:dbstruct-set-path! vec val)(vector-set! vec 2 val)) (define-inline (dbr:dbstruct-set-local! vec val)(vector-set! vec 3 val)) @@ -38,15 +42,20 @@ (define-inline (dbr:dbstruct-set-rtime! vec val)(vector-set! vec 7 val)) (define-inline (dbr:dbstruct-set-stime! vec val)(vector-set! vec 8 val)) (define-inline (dbr:dbstruct-set-inuse! vec val)(vector-set! vec 9 val)) (define-inline (dbr:dbstruct-set-refdb! vec val)(vector-set! vec 10 val)) (define-inline (dbr:dbstruct-set-locdbs! vec val)(vector-set! vec 11 val)) +(define-inline (dbr:dbstruct-set-olddb! vec val)(vector-set! vec 12 val)) +(define-inline (dbr:dbstruct-set-main-path! vec val)(vector-set! vec 13 val)) +(define-inline (dbr:dbstruct-set-rundb-path! vec val)(vector-set! vec 14 val)) + +; (define-inline (dbr:dbstruct-set-run-id! vec val)(vector-set! vec 13 val)) ;; constructor for dbstruct ;; (define (make-dbr:dbstruct #!key (path #f)(local #f)) - (let ((v (make-vector 12 #f))) + (let ((v (make-vector 15 #f))) (dbr:dbstruct-set-path! v path) (dbr:dbstruct-set-local! v local) (dbr:dbstruct-set-locdbs! v (make-hash-table)) v)) @@ -54,67 +63,10 @@ (hash-table-ref/default (dbr:dbstruct-get-locdbs v) run-id #f)) (define (dbr:dbstruct-set-localdb! v run-id db) (hash-table-set! (dbr:dbstruct-get-locdbs v) run-id db)) -;; ;; get and set main db -;; (define-inline (dbr:dbstruct-get-main vec) (vector-ref vec 0)) -;; (define-inline (dbr:dbstruct-set-main! vec db)(vector-set! vec 0 db)) -;; ;; get the runs hash -;; (define-inline (dbr:dbstruct-get-dbhash vec) (vector-ref vec 1)) -;; ;; the string db -;; (define-inline (dbr:dbstruct-get-strdb vec) (vector-ref vec 2)) -;; (define-inline (dbr:dbstruct-set-strdb! vec db)(vector-set! vec 2 db)) -;; ;; path -;; (define-inline (dbr:dbstruct-get-path vec) (vector-ref vec 3)) -;; (define-inline (dbr:dbstruct-set-path! vec path)(vector-set! vec 3)) -;; ;; local -;; (define-inline (dbr:dbstruct-get-local vec) (vector-ref vec 4)) -;; (define-inline (dbr:dbstruct-set-local! vec val)(vector-set! vec 4 val)) -;; -;; ;; get a rundb vector, create it if not already existing -;; (define (dbr:dbstruct-get-rundb-rec vec run-id) -;; (let* ((dbhash (dbr:dbstruct-get-dbhash vec)) ;; get the runs hash -;; (runvec (hash-table-ref/default dbhash run-id #f))) ;; get the vector for run-id -;; (if (vector? runvec) -;; runvec ;; rundb inmemdb last-mod last-read last-sync in-use refdb -;; (let ((nvec (vector #f #f -1 -1 -1 #f #f))) -;; (hash-table-set! dbhash run-id nvec) -;; nvec)))) -;; -;; ;; [ rundb inmemdb last-mod last-read last-sync ] -;; (define-inline (dbr:dbstruct-field-name->num field-name) -;; (case field-name -;; ((rundb) 0) ;; the on-disk db -;; ((inmem) 1) ;; the in-memory db -;; ((mtime) 2) ;; last modification time -;; ((rtime) 3) ;; last read time -;; ((stime) 4) ;; last sync time -;; ((inuse) 5) ;; is the db currently in use, #t yes, #f no. -;; ((refdb) 6) ;; the db used for reference (can be on disk or inmem) -;; (else -1))) -;; -;; ;; get/set rundb fields -;; (define (dbr:dbstruct-get-runvec-val vec run-id field-name) -;; (let ((runvec (dbr:dbstruct-get-rundb-rec vec run-id)) -;; (fieldnum (dbr:dbstruct-field-name->num field-name))) -;; ;; (vector-set! runvec (dbr:dbstruct-field-name->num 'inuse) #t) -;; (vector-ref runvec fieldnum))) -;; -;; (define (dbr:dbstruct-set-runvec-val! vec run-id field-name val) -;; (let ((runvec (dbr:dbstruct-get-rundb-rec vec run-id))) -;; (vector-set! runvec (dbr:dbstruct-field-name->num field-name) val))) -;; -;; ;; get/set inmemdb -;; (define (dbr:dbstruct-get-inmemdb vec run-id) -;; (let ((runvec (dbr:dbstruct-get-rundb-rec vec run-id))) -;; (vector-ref runvec 1))) -;; -;; (define (dbr:dbstruct-set-inmemdb! vec run-id inmemdb) -;; (let ((runvec (dbr:dbstruct-get-rundb-rec vec run-id))) -;; (vector-set! runvec 1 inmemdb))) - (define (make-db:test)(make-vector 20)) (define-inline (db:test-get-id vec) (vector-ref vec 0)) (define-inline (db:test-get-run_id vec) (vector-ref vec 1)) (define-inline (db:test-get-testname vec) (vector-ref vec 2)) @@ -129,12 +81,13 @@ (define-inline (db:test-get-rundir vec) (vector-ref vec 10)) (define-inline (db:test-get-item-path vec) (vector-ref vec 11)) (define-inline (db:test-get-run_duration vec) (vector-ref vec 12)) (define-inline (db:test-get-final_logf vec) (vector-ref vec 13)) (define-inline (db:test-get-comment vec) (vector-ref vec 14)) -(define-inline (db:test-get-pass_count vec) (vector-ref vec 15)) -(define-inline (db:test-get-fail_count vec) (vector-ref vec 16)) +(define-inline (db:test-get-process_id vec) (vector-ref vec 16)) +;; (define-inline (db:test-get-pass_count vec) (vector-ref vec 15)) +;; (define-inline (db:test-get-fail_count vec) (vector-ref vec 16)) (define-inline (db:test-get-fullname vec) (conc (db:test-get-testname vec) "/" (db:test-get-item-path vec))) (define-inline (db:test-get-first_err vec) (printable (vector-ref vec 15))) (define-inline (db:test-get-first_warn vec) (printable (vector-ref vec 16))) @@ -145,10 +98,18 @@ (define-inline (db:test-set-state! vec val)(vector-set! vec 3 val)) (define-inline (db:test-set-status! vec val)(vector-set! vec 4 val)) (define-inline (db:test-set-run_duration! vec val)(vector-set! vec 12 val)) (define-inline (db:test-set-final_logf! vec val)(vector-set! vec 13 val)) +;; Test record utility functions + +;; Is a test a toplevel? +;; +(define (db:test-get-is-toplevel vec) + (and (equal? (db:test-get-item-path vec) "") ;; test is not an item + (equal? (db:test-get-uname vec) "n/a"))) ;; test has never been run + ;; make-vector-record "" db mintest id run_id testname state status event_time item_path ;; (define (make-db:mintest)(make-vector 7)) (define-inline (db:mintest-get-id vec) (vector-ref vec 0)) (define-inline (db:mintest-get-run_id vec) (vector-ref vec 1)) Index: dcommon.scm ================================================================== --- dcommon.scm +++ dcommon.scm @@ -127,11 +127,11 @@ ;; 1. Make "data" hash-table hierarchial store of all displayed data ;; 2. Update synchash to understand "get-runs", "get-tests" etc. ;; 3. Add extraction of filters to synchash calls ;; ;; Mode is 'full or 'incremental for full refresh or incremental refresh -(define (run-update keys data runname keypatts testpatt states statuses mode window-id) +(define (dcommon:run-update keys data runname keypatts testpatt states statuses mode window-id) (let* (;; count and offset => #f so not used ;; the synchash calls modify the "data" hash (get-runs-sig (conc (client:get-signature) " get-runs")) (get-tests-sig (conc (client:get-signature) " get-tests")) (get-details-sig (conc (client:get-signature) " get-test-details")) @@ -139,11 +139,11 @@ ;; test-ids to get and display are indexed on window-id in curr-test-ids hash (test-ids (hash-table-values (dboard:data-get-curr-test-ids *data*))) (run-changes (synchash:client-get 'db:get-runs get-runs-sig (length keypatts) data runname #f #f keypatts)) (tests-detail-changes (if (not (null? test-ids)) - (synchash:client-get 'db:get-test-info-by-ids get-details-sig 0 data test-ids) + (synchash:client-get 'db:get-test-info-by-ids get-details-sig 0 data #f test-ids) '())) ;; Now can calculate the run-ids (run-hash (hash-table-ref/default data get-runs-sig #f)) (run-ids (if run-hash (filter number? (hash-table-keys run-hash)) '())) @@ -228,14 +228,20 @@ (fullname (conc testname "/" itempath)) (dispname (if (string=? itempath "") testname (conc " " itempath))) (rownum (hash-table-ref/default testname-to-row fullname #f)) (test-path (append run-path (if (equal? itempath "") (list testname) - (list testname itempath))))) + (list testname itempath)))) + (tb (dboard:data-get-tests-tree *data*))) + (print "INFONOTE: run-path: " run-path) (tree:add-node (dboard:data-get-tests-tree *data*) "Runs" test-path userdata: (conc "test-id: " test-id)) + (let ((node-num (tree:find-node tb (cons "Runs" test-path))) + (color (car (gutils:get-color-for-state-status state status)))) + (debug:print 0 "node-num: " node-num ", color: " color) + (iup:attribute-set! tb (conc "COLOR" node-num) color)) (hash-table-set! (dboard:data-get-path-test-ids *data*) test-path test-id) (if (not rownum) (let ((rownums (hash-table-values testname-to-row))) (set! rownum (if (null? rownums) 1 @@ -440,20 +446,21 @@ (iup:vbox ;; (iup:label "Run statistics" #:expand "HORIZONTAL") stats-matrix))) (define (dcommon:servers-table) - (let* ((colnum 0) + (let* ((tdbdat (tasks:open-db)) + (colnum 0) (rownum 0) (servers-matrix (iup:matrix #:expand "YES" #:numcol 7 #:numcol-visible 7 #:numlin-visible 5 )) (colnames (list "Id" "MTver" "Pid" "Host" "Interface:OutPort" "RunTime" "State" "RunId")) (updater (lambda () - (let ((servers (open-run-close tasks:get-all-servers tasks:open-db))) + (let ((servers (tasks:get-all-servers (db:delay-if-busy tdbdat)))) (iup:attribute-set! servers-matrix "NUMLIN" (length servers)) ;; (set! colnum 0) ;; (for-each (lambda (colname) ;; ;; (print "colnum: " colnum " colname: " colname) ;; (iup:attribute-set! servers-matrix (conc "0:" colnum) colname) @@ -546,5 +553,220 @@ ;; ;; #:x 'mouse ;; ;; #:y 'mouse ;; ) )))) +;;====================================================================== +;; CANVAS STUFF FOR TESTS +;;====================================================================== + +(define (dcommon:draw-test cnv x y w h name selected) + (let* ((llx x) + (lly y) + (urx (+ x w)) + (ury (+ y h))) + (canvas-text! cnv (+ llx 5)(+ lly 5) name) ;; (conc testname " (" xtorig "," ytorig ")")) + (canvas-rectangle! cnv llx urx lly ury) + (if selected (canvas-box! cnv llx (+ llx 5) lly (+ lly 5))))) + +(define (dcommon:initial-draw-tests cnv xadj yadj sizex sizey sizexmm sizeymm originx originy tests-draw-state sorted-testnames) + (let* ((scalef (hash-table-ref/default tests-draw-state 'scalef 8)) + (test-browse-xoffset (hash-table-ref tests-draw-state 'test-browse-xoffset)) + (test-browse-yoffset (hash-table-ref tests-draw-state 'test-browse-yoffset)) + (xtorig (+ test-browse-xoffset (* (/ sizex 2) scalef (- 0.5 xadj)))) ;; (- xadj 1)))) + (ytorig (+ test-browse-yoffset (* (/ sizey 2) scalef (- yadj 0.5)))) + (boxw 90) ;; default, overriden by length estimate below + (boxh 25) + (gapx 20) + (gapy 30) + (tests-hash (hash-table-ref tests-draw-state 'tests-info)) + (selected-tests (hash-table-ref tests-draw-state 'selected-tests ))) + (hash-table-set! tests-draw-state 'xtorig xtorig) + (hash-table-set! tests-draw-state 'ytorig ytorig) + (let ((longest-str (if (null? sorted-testnames) " " (car (sort sorted-testnames (lambda (a b)(>= (string-length a)(string-length b)))))))) + (let-values (((x-max y-max) (canvas-text-size cnv longest-str))) + (if (> x-max boxw)(set! boxw (+ 10 x-max))))) + ;; (print "sizex: " sizex " sizey: " sizey " font: " (canvas-font cnv) " originx: " originx " originy: " originy " xtorig: " xtorig " ytorig: " ytorig " xadj: " xadj " yadj: " yadj) + (if (not (null? sorted-testnames)) + (let loop ((hed (car (reverse sorted-testnames))) + (tal (cdr (reverse sorted-testnames))) + (llx xtorig) + (lly ytorig) + (urx (+ xtorig boxw)) + (ury (+ ytorig boxh))) + ; (print "hed " hed " llx " llx " lly " lly " urx " urx " ury " ury) + (dcommon:draw-test cnv llx lly boxw boxh hed (hash-table-ref/default selected-tests hed #f)) + ;; data used by mouse click calc. keep the wacky order for now. + (hash-table-set! tests-hash hed (list llx urx (- sizey ury)(- sizey lly) lly boxw boxh)) + ;; (list llx lly boxw boxh)) ;; NB// Swap ury and lly + (if (not (null? tal)) + ;; leave a column of space to the right to list items + (let ((have-room + (if #t ;; put "auto" here where some form of auto rearanging can be done + (> (* 3 (+ boxw gapx)) (- urx xtorig)) + (< urx (- sizex boxw gapx boxw))))) ;; is there room for another column? + (loop (car tal) + (cdr tal) + (if have-room (+ llx boxw gapx) xtorig) ;; have room, + (if have-room lly (+ lly boxh gapy)) + (if have-room (+ urx boxw gapx) (+ xtorig boxw)) + (if have-room ury (+ ury boxh gapy))))))))) + +(define (dcommon:redraw-tests cnv xadj yadj sizex sizey sizexmm sizeymm originx originy tests-draw-state sorted-testnames) + (let* ((scalef (hash-table-ref/default tests-draw-state 'scalef 8)) + (test-browse-xoffset (hash-table-ref tests-draw-state 'test-browse-xoffset)) + (test-browse-yoffset (hash-table-ref tests-draw-state 'test-browse-yoffset)) + (xtorig (+ test-browse-xoffset (* (/ sizex 2) scalef (- 0.5 xadj)))) ;; (- xadj 1)))) + (ytorig (+ test-browse-yoffset (* (/ sizey 2) scalef (- yadj 0.5)))) + (xdelta (- (hash-table-ref tests-draw-state 'xtorig) xtorig)) + (ydelta (- (hash-table-ref tests-draw-state 'ytorig) ytorig)) + (tests-hash (hash-table-ref tests-draw-state 'tests-info)) + (selected-tests (hash-table-ref tests-draw-state 'selected-tests ))) + (hash-table-set! tests-draw-state 'xtorig xtorig) + (hash-table-set! tests-draw-state 'ytorig ytorig) + (if (not (null? sorted-testnames)) + (let loop ((hed (car (reverse sorted-testnames))) + (tal (cdr (reverse sorted-testnames)))) + (let* ((tvals (hash-table-ref tests-hash hed)) + (llx (+ xdelta (list-ref tvals 0))) + (lly (+ ydelta (list-ref tvals 4))) + (boxw (list-ref tvals 5)) + (boxh (list-ref tvals 6)) + (urx (+ llx boxw)) + (ury (+ lly boxh))) + (dcommon:draw-test cnv llx lly boxw boxh hed (hash-table-ref/default selected-tests hed #f)) + (hash-table-set! tests-hash hed (list llx urx (- sizey ury)(- sizey lly) lly boxw boxh)) + (if (not (null? tal)) + ;; leave a column of space to the right to list items + (loop (car tal) + (cdr tal)))))))) + +;;====================================================================== +;; S T E P S +;;====================================================================== + +;; CHECK - WAS THIS ADDED OR REMOVED? MANUAL MERGE WITH API STUFF!!! +;; +;; get a pretty table to summarize steps +;; +(define (dcommon:process-steps-table steps);; db test-id #!key (work-area #f)) +;; (let ((steps (db:get-steps-for-test db test-id work-area: work-area))) + ;; organise the steps for better readability + (let ((res (make-hash-table))) + (for-each + (lambda (step) + (debug:print 6 "step=" step) + (let ((record (hash-table-ref/default + res + (tdb:step-get-stepname step) + ;; stepname start end status Duration Logfile + (vector (tdb:step-get-stepname step) "" "" "" "" "")))) + (debug:print 6 "record(before) = " record + "\nid: " (tdb:step-get-id step) + "\nstepname: " (tdb:step-get-stepname step) + "\nstate: " (tdb:step-get-state step) + "\nstatus: " (tdb:step-get-status step) + "\ntime: " (tdb:step-get-event_time step)) + (case (string->symbol (tdb:step-get-state step)) + ((start)(vector-set! record 1 (tdb:step-get-event_time step)) + (vector-set! record 3 (if (equal? (vector-ref record 3) "") + (tdb:step-get-status step))) + (if (> (string-length (tdb:step-get-logfile step)) + 0) + (vector-set! record 5 (tdb:step-get-logfile step)))) + ((end) + (vector-set! record 2 (any->number (tdb:step-get-event_time step))) + (vector-set! record 3 (tdb:step-get-status step)) + (vector-set! record 4 (let ((startt (any->number (vector-ref record 1))) + (endt (any->number (vector-ref record 2)))) + (debug:print 4 "record[1]=" (vector-ref record 1) + ", startt=" startt ", endt=" endt + ", get-status: " (tdb:step-get-status step)) + (if (and (number? startt)(number? endt)) + (seconds->hr-min-sec (- endt startt)) "-1"))) + (if (> (string-length (tdb:step-get-logfile step)) + 0) + (vector-set! record 5 (tdb:step-get-logfile step)))) + (else + (vector-set! record 2 (tdb:step-get-state step)) + (vector-set! record 3 (tdb:step-get-status step)) + (vector-set! record 4 (tdb:step-get-event_time step)))) + (hash-table-set! res (tdb:step-get-stepname step) record) + (debug:print 6 "record(after) = " record + "\nid: " (tdb:step-get-id step) + "\nstepname: " (tdb:step-get-stepname step) + "\nstate: " (tdb:step-get-state step) + "\nstatus: " (tdb:step-get-status step) + "\ntime: " (tdb:step-get-event_time step)))) + ;; (else (vector-set! record 1 (tdb:step-get-event_time step))) + (sort steps (lambda (a b) + (cond + ((< (tdb:step-get-event_time a)(tdb:step-get-event_time b)) #t) + ((eq? (tdb:step-get-event_time a)(tdb:step-get-event_time b)) + (< (tdb:step-get-id a) (tdb:step-get-id b))) + (else #f))))) + res)) + +(define (dcommon:get-compressed-steps dbstruct run-id test-id) + (let* ((steps-data (db:get-steps-for-test dbstruct run-id test-id)) + (comprsteps (dcommon:process-steps-table steps-data))) ;; (open-run-close db:get-steps-table #f test-id work-area: work-area))) + (map (lambda (x) + ;; take advantage of the \n on time->string + (vector + (vector-ref x 0) + (let ((s (vector-ref x 1))) + (if (number? s)(seconds->time-string s) s)) + (let ((s (vector-ref x 2))) + (if (number? s)(seconds->time-string s) s)) + (vector-ref x 3) ;; status + (vector-ref x 4) + (vector-ref x 5))) ;; time delta + (sort (hash-table-values comprsteps) + (lambda (a b) + (let ((time-a (vector-ref a 1)) + (time-b (vector-ref b 1))) + (if (and (number? time-a)(number? time-b)) + (if (< time-a time-b) + #t + (if (eq? time-a time-b) + (string rownum max-row)(set! max-row rownum)) + (let ((val (vector-ref hed (- colnum 1))) + (mtrx-rc (conc rownum ":" colnum))) + (iup:attribute-set! steps-matrix mtrx-rc (if val (conc val) "")) + (if (< colnum 6) + (loop hed tal rownum (+ colnum 1)) + (if (not (null? tal)) + (loop (car tal)(cdr tal)(+ rownum 1) 1)))))) + (if (> max-row 0) + (begin + ;; we are going to speculatively clear rows until we find a row that is already cleared + (let loop ((rownum (+ max-row 1)) + (colnum 0) + (deleted #f)) + ;; (debug:print-info 0 "cleaning " rownum ":" colnum) + (let* ((next-row (if (eq? colnum 6) (+ rownum 1) rownum)) + (next-col (if (eq? colnum 6) 1 (+ colnum 1))) + (mtrx-rc (conc rownum ":" colnum)) + (curr-val (iup:attribute steps-matrix mtrx-rc))) + ;; (debug:print-info 0 "cleaning " rownum ":" colnum " currval= " curr-val) + (if (and (string? curr-val) + (not (equal? curr-val ""))) + (begin + (iup:attribute-set! steps-matrix mtrx-rc "") + (loop next-row next-col #t)) + (if (eq? colnum 6) ;; not done, didn't get a full blank row + (if deleted (loop next-row next-col #f)) ;; exit on this not met + (loop next-row next-col deleted))))) + (iup:attribute-set! steps-matrix "REDRAW" "ALL"))))) Index: docs/manual/megatest_manual.html ================================================================== --- docs/manual/megatest_manual.html +++ docs/manual/megatest_manual.html @@ -1,1196 +1,1361 @@ - - - - - -The Megatest Users Manual - - - - - -
-
-

Preface

-
-

This book is organised as three sub-books; getting started, writing tests and reference.

-
-

Why Megatest?

-

The Megatest project was started for two reasons, the first was an -immediate and pressing need for a generalized tool to manage a suite -of regression tests and the second was the fact that the author had -written or maintained several such tools at different companies over -the years and it seemed a good thing to have a single open source -tool, flexible enough to meet the needs of any team doing continuous -integrating and or running a complex suite of tests for release -qualification.

-
-
-

Megatest Design Philosophy

-

Megatest is intended to provide the minimum needed resources to make -writing a suite of tests and tasks for implementing continuous build -for software, design engineering or process control (via owlfs for -example) without being specialized for any specific problem -space. Megatest in of itself does not know what constitutes a PASS or -FAIL of a test. In most cases megatest is best used in conjunction -with logpro or a similar tool to parse, analyze and decide on the test -outcome.

-
-
-

Megatest Architecture

-

All data to specify the tests and configure the system is stored in -plain text files. All system state is stored in an sqlite3 -database. Tests are launched using the launching system available for -the distributed compute platform in use. A template script is provided -which can launch jobs on local and remote Linux hosts. Currently -megatest uses the network filesystem to call home to your master -sqlite3 database.

-
-
-
-

Road Map

-

Note 1: This road-map is tentative and subject to change without notice.

-

Note 2: Starting over. Old plan is commented out.

-
-

Current Items

-
-
-

ww05 - migrate to inmem-db

-

Keep as much the same as possible. Add internal reference to almost -eliminate contention on db(s).

-
    -
  1. -

    -Add internal reference db -

    -
  2. -
  3. -

    -Verify that actions are accessing correct db -

    -
      -
    1. -

      --runtests - inmem -

      -
    2. -
    3. -

      --list-runs - local (but not megatest.db) -

      -
    4. -
    5. -

      -dashboard - local (but not megatest.db) -

      -
    6. -
    -
  4. -
  5. -

    -Mirror db to /var/tmp… -

    -
  6. -
  7. -

    -Dashboard read db from per-run db. -

    -
  8. -
  9. -

    -Dashboard read db from /var/tmp -

    -
  10. -
  11. -

    -Runs register in tasks table in monitor.db -

    -
  12. -
  13. -

    -Server polls tasks table for next action (in addition?) -

    -
  14. -
  15. -

    -Change run loop to execute in server, triggered by call to polling of tasks table -

    -
  16. -
-
-
-
-

Getting Started

-
-
Getting started with Megatest
-
-

How to install Megatest and set it up for running your regressions and continuous integration process.

-
-
-

Installation

-
-
-

Dependencies

-

Chicken scheme and a number of "eggs" are required for building -Megatest. See the script installall.sch in the utils directory of the -distribution for a mostly automated way to install everything needed -for building Megatest on Linux.

-


[An example footnote.]

-

And now for something completely different: monkeys, lions and -tigers (Bengal and Siberian) using the alternative syntax index -entries. - - - -Note that multi-entry terms generate separate index entries.

-

Here are a couple of image examples: an -images/smallnew.png - -example inline image followed by an example block image:

-
-
-Tiger image -
-
Figure 1. Tiger block image
-
-

Followed by an example table:

-
- - --- - - - - - - - - - - - - - - - -
Table 1. An example table
Option Description

-a USER GROUP

Add USER to GROUP.

-R GROUP

Disables access to GROUP.

-
-
-
Example 1. An example example
-
-

Lorum ipum…

-
-
-
-

Sub-section with Anchor

-

Sub-section at level 2.

-
-

Chapter Sub-section

-

Sub-section at level 3.

-
-
Chapter Sub-section
-

Sub-section at level 4.

-

This is the maximum sub-section depth supported by the distributed -AsciiDoc configuration. -
[A second example footnote.]

-
-
-
-
-
-
-

The Second Chapter

-
-

An example link to anchor at start of the first sub-section.

-

An example link to a bibliography entry [taoup].

-
-
-

Writing Tests

-
-

The First Chapter of the Second Part

-
-

Chapters grouped into book parts are at level 1 and can contain -sub-sections.

-
-
-

How To Do Things

-
-

Tricks

-
-

This section is a compendium of a various useful tricks for debugging, -configuring and generally getting the most out of Megatest.

-
-
-
-

Debugging Tricks

-
-
-

Examining The Environment

-
-

During Config File Processing

-
-
-

Organising Your Tests and Tasks

-
-
-
[tests-paths]
-1 #{get misc parent}/simplerun/tests
-
-
-
-
[setup]
-
-

The runscript method is a brute force way to run scripts where the -user is responsible for setting STATE and STATUS

-
-
-
runscript main.csh
-
-
-
-
-
-
-

Tricks

-
-

This section is a compendium of a various useful tricks for debugging, -configuring and generally getting the most out of Megatest.

-
-
-
-

Debugging Tricks

-
-
-

Examining The Environment

-
-

During Config File Processing

-
-
-

Organising Your Tests and Tasks

-

/nfs/ch/disks/ch_unienv_disk005/qa_mrwellan/interim/src/megatest/tests/fdktestqa/testqa

-
-
-
[tests-paths]
-1 #{get misc parent}/simplerun/tests
-
-
-
-
[setup]
-
-

The runscript method is a brute force way to run scripts where the -user is responsible for setting STATE and STATUS

-
-
-
runscript main.csh
-
-

ww30.2 -cellname/LVS/cellname.LAYOUT_ERRORS

-

Error: text open

-

ww31.3 -cellname/LVS/cellname.LAYOUT_ERRORS

-

Error: text open -Reference

-
-
-

Chapters grouped into book parts are at level 1 and can contain -sub-sections.

-
-
-
[setup]
-
-

The runscript method is a brute force way to run scripts where the -user is responsible for setting STATE and STATUS

-
-
-
runscript main.csh
-
-
-
-
[requirements]
-
-
-
-
# A normal waiton waits for the prior tests to be COMPLETED
-# and PASS, CHECK or WAIVED
-waiton test1 test2
-
-

The default (i.e. if mode is not specified) is normal. All pre-dependent tests -must be COMPLETED and PASS, CHECK or WAIVED before the test will start

-
-
-
mode   normal
-
-

The toplevel mode requires only that the prior tests are COMPLETED.

-
-
-
mode toplevel
-
-

A item based waiton will start items in a test when the -same-named item is COMPLETED and PASS, CHECK or WAIVED -in the prior test

-
-
-
mode itemmatch
-
-
-
-
# With a toplevel test you may wish to generate your list
-# of tests to run dynamically
-#
-# waiton #{shell get-valid-tests-to-run.sh}
-
-
-
-
runtimelim 1h 2m 3s  # this will automatically kill the test if it runs for more than 1h 2m and 3s
-
-
-
-
[skip]
-
-
-
-
# NB// If the prevrunning line exists with *any* value the test will
-# automatically SKIP if the same-named test is currently RUNNING
-
-prevrunning x
-
-
-
-
fileexists /path/to/a/file # skip if /path/to/a/file exists
-
-

If test is FAIL and previous test in run with same MT_TARGET is WAIVED then apply the following rules from the testconfig: -If a waiver check is specified in the testconfig apply the check and if it passes then set this FAIL to WAIVED

-

Waiver check has two parts, 1) a list of waiver, rulename, filepatterns and 2) the rulename script spec (note that "diff" and "logpro" are predefined)

-
-
-
###### EXAMPLE FROM testconfig #########
-# matching file(s) will be diff'd with previous run and logpro applied
-# if PASS or WARN result from logpro then WAIVER state is set
-#
-[waivers]
-# logpro_file    rulename      input_glob
-waiver_1         logpro        lookittmp.log
-
-[waiver_rules]
-
-# This builtin rule is the default if there is no <waivername>.logpro file
-# diff   diff %file1% %file2%
-
-# This builtin rule is applied if a <waivername>.logpro file exists
-# logpro diff %file1% %file2% | logpro %waivername%.logpro %waivername%.html
-
-

To transfer the environment to the next step you can do the following:

-
-
-
$MT_MEGATEST -env2file .ezsteps/${stepname}
-
-
-

Megatest Internals

-
-
-server.png -
-
-
-

One or more optional appendixes go here at section level zero.

-
- - - -
-
Note
-
Preface and appendix subsections start out of sequence at level -2 (level 1 is skipped). This only applies to multi-part book -documents.
-
-

The bibliography list is a style of AsciiDoc bulleted list.

-
    -
  • -

    -[taoup] Eric Steven Raymond. The Art of Unix - Programming. Addison-Wesley. ISBN 0-13-142901-9. -

    -
  • -
  • -

    -[walsh-muellner] Norman Walsh & Leonard Muellner. - DocBook - The Definitive Guide. O’Reilly & Associates. 1999. - ISBN 1-56592-580-7. -

    -
  • -
-

Glossaries are optional. Glossaries entries are an example of a style -of AsciiDoc labeled lists.

-
-
-A glossary term -
-
-

- The corresponding (indented) definition. -

-
-
-A second glossary term -
-
-

- The corresponding (indented) definition. -

-
-
-

Text at the end of a book describing facts about its production.

-
-
-
-
-
-
-

- - - + + + + + +The Megatest Users Manual + + + + + +
+
+

Preface

+
+

This book is organised as three sub-books; getting started, writing tests and reference.

+
+

Why Megatest?

+

The Megatest project was started for two reasons, the first was an +immediate and pressing need for a generalized tool to manage a suite +of regression tests and the second was the fact that the author had +written or maintained several such tools at different companies over +the years and it seemed a good thing to have a single open source +tool, flexible enough to meet the needs of any team doing continuous +integrating and or running a complex suite of tests for release +qualification.

+
+
+

Megatest Design Philosophy

+

Megatest is intended to provide the minimum needed resources to make +writing a suite of tests and tasks for implementing continuous build +for software, design engineering or process control (via owlfs for +example) without being specialized for any specific problem +space. Megatest in of itself does not know what constitutes a PASS or +FAIL of a test. In most cases megatest is best used in conjunction +with logpro or a similar tool to parse, analyze and decide on the test +outcome.

+
+
+

Megatest Architecture

+

All data to specify the tests and configure the system is stored in +plain text files. All system state is stored in an sqlite3 +database. Tests are launched using the launching system available for +the distributed compute platform in use. A template script is provided +which can launch jobs on local and remote Linux hosts. Currently +megatest uses the network filesystem to call home to your master +sqlite3 database.

+
+
+
+

Road Map

+

Note 1: This road-map is tentative and subject to change without notice.

+

Note 2: Starting over. Old plan is commented out.

+
+

Current Items

+
+
+

ww05 - migrate to inmem-db

+

Keep as much the same as possible. Add internal reference to almost +eliminate contention on db(s).

+
    +
  1. +

    +Add internal reference db +

    +
  2. +
  3. +

    +Verify that actions are accessing correct db +

    +
      +
    1. +

      +-runtests - inmem +

      +
    2. +
    3. +

      +-list-runs - local (but not megatest.db) +

      +
    4. +
    5. +

      +dashboard - local (but not megatest.db) +

      +
    6. +
    +
  4. +
  5. +

    +Mirror db to /var/tmp… +

    +
  6. +
  7. +

    +Dashboard read db from per-run db. +

    +
  8. +
  9. +

    +Dashboard read db from /var/tmp +

    +
  10. +
  11. +

    +Runs register in tasks table in monitor.db +

    +
  12. +
  13. +

    +Server polls tasks table for next action (in addition?) +

    +
  14. +
  15. +

    +Change run loop to execute in server, triggered by call to polling of tasks table +

    +
  16. +
+
+
+
+

Getting Started

+
+
Getting started with Megatest
+
+

How to install Megatest and set it up for running your regressions and continuous integration process.

+
+
+

Installation

+
+
+

Dependencies

+

Chicken scheme and a number of "eggs" are required for building +Megatest. See the script installall.sch in the utils directory of the +distribution for a mostly automated way to install everything needed +for building Megatest on Linux.

+


[An example footnote.]

+

And now for something completely different: monkeys, lions and +tigers (Bengal and Siberian) using the alternative syntax index +entries. + + + +Note that multi-entry terms generate separate index entries.

+

Here are a couple of image examples: an +images/smallnew.png + +example inline image followed by an example block image:

+
+
+Tiger image +
+
Figure 1. Tiger block image
+
+

Followed by an example table:

+
+ + +++ + + + + + + + + + + + + + + + +
Table 1. An example table
Option Description

-a USER GROUP

Add USER to GROUP.

-R GROUP

Disables access to GROUP.

+
+
+
Example 1. An example example
+
+

Lorum ipum…

+
+
+
+

Sub-section with Anchor

+

Sub-section at level 2.

+
+

Chapter Sub-section

+

Sub-section at level 3.

+
+
Chapter Sub-section
+

Sub-section at level 4.

+

This is the maximum sub-section depth supported by the distributed +AsciiDoc configuration. +
[A second example footnote.]

+
+
+
+
+
+
+

The Second Chapter

+
+

An example link to anchor at start of the first sub-section.

+

An example link to a bibliography entry [taoup].

+
+
+

Writing Tests

+
+

The First Chapter of the Second Part

+
+

Chapters grouped into book parts are at level 1 and can contain +sub-sections.

+
+
+

How To Do Things

+
+

Tricks

+
+

This section is a compendium of a various useful tricks for debugging, +configuring and generally getting the most out of Megatest.

+
+
+
+

Limiting your running jobs

+
+

The following example will limit a test in the jobgroup "group1" to no more than 10 tests simultaneously.

+

In your testconfig:

+
+
+
[test_meta]
+jobgroup group1
+
+

In your megatest.config:

+
+
+
[jobgroups]
+group1 10
+custdes 4
+
+
+
+
+

Debugging Tricks

+
+
+

Examining The Environment

+
+

During Config File Processing

+
+
+

Organising Your Tests and Tasks

+
+
+
[tests-paths]
+1 #{get misc parent}/simplerun/tests
+
+
+
+
[setup]
+
+

The runscript method is a brute force way to run scripts where the +user is responsible for setting STATE and STATUS

+
+
+
runscript main.csh
+
+
+
+
+

Debugging Server Problems

+
+
+
sudo lsof -i
+sudo netstat -lptu
+sudo netstat -tulpn
+
+
+
+
+

Reference

+
+

The testconfig File

+
+
+

Setup section

+
+

Header

+
+
+
[setup]
+
+

The runscript method is a brute force way to run scripts where the +user is responsible for setting STATE and STATUS

+
+
+
runscript main.csh
+
+
+
+
+

Requirements section

+
+

Header

+
+
+
[requirements]
+
+
+
+

Wait on Other Tests

+
+
+
# A normal waiton waits for the prior tests to be COMPLETED
+# and PASS, CHECK or WAIVED
+waiton test1 test2
+
+
+
+

Mode

+

The default (i.e. if mode is not specified) is normal. All pre-dependent tests +must be COMPLETED and PASS, CHECK or WAIVED before the test will start

+
+
+
mode   normal
+
+

The toplevel mode requires only that the prior tests are COMPLETED.

+
+
+
mode toplevel
+
+

A item based waiton will start items in a test when the +same-named item is COMPLETED and PASS, CHECK or WAIVED +in the prior test

+
+
+
mode itemmatch
+
+
+
+
# With a toplevel test you may wish to generate your list
+# of tests to run dynamically
+#
+# waiton #{shell get-valid-tests-to-run.sh}
+
+
+
+

Run time limit

+
+
+
runtimelim 1h 2m 3s  # this will automatically kill the test if it runs for more than 1h 2m and 3s
+
+
+
+

Skip

+
+
+

Header

+
+
+
[skip]
+
+
+
+

Skip on Still-running Tests

+
+
+
# NB// If the prevrunning line exists with *any* value the test will
+# automatically SKIP if the same-named test is currently RUNNING
+
+prevrunning x
+
+
+
+

Skip if a File Exists

+
+
+
fileexists /path/to/a/file # skip if /path/to/a/file exists
+
+
+
+

Controlled waiver propagation

+

If test is FAIL and previous test in run with same MT_TARGET is WAIVED then apply the following rules from the testconfig: +If a waiver check is specified in the testconfig apply the check and if it passes then set this FAIL to WAIVED

+

Waiver check has two parts, 1) a list of waiver, rulename, filepatterns and 2) the rulename script spec (note that "diff" and "logpro" are predefined)

+
+
+
###### EXAMPLE FROM testconfig #########
+# matching file(s) will be diff'd with previous run and logpro applied
+# if PASS or WARN result from logpro then WAIVER state is set
+#
+[waivers]
+# logpro_file    rulename      input_glob
+waiver_1         logpro        lookittmp.log
+
+[waiver_rules]
+
+# This builtin rule is the default if there is no <waivername>.logpro file
+# diff   diff %file1% %file2%
+
+# This builtin rule is applied if a <waivername>.logpro file exists
+# logpro diff %file1% %file2% | logpro %waivername%.logpro %waivername%.html
+
+
+
+
+

Ezsteps

+

To transfer the environment to the next step you can do the following:

+
+
+
$MT_MEGATEST -env2file .ezsteps/${stepname}
+
+
+
+

Triggers

+

In your testconfig triggers can be specified

+
+
+
[triggers]
+
+# Call script running.sh when test goes to state=RUNNING, status=PASS
+RUNNING/PASS running.sh
+
+# Call script running.sh any time state goes to RUNNING
+RUNNING/ running.sh
+
+# Call script onpass.sh any time status goes to PASS
+PASS/ onpass.sh
+
+

Scripts called will have; test-id test-rundir trigger, added to the commandline.

+

HINT

+

To start an xterm (useful for debugging), use a command line like the following:

+
+
+
[triggers]
+COMPLETED/ xterm -e bash -s --
+
+
+ + + +
+
Note
+
There is a trailing space after the --
+
+
+
+
+
+

Programming API

+
+

These routines can be called from the megatest repl.

+
+ + +++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 2. API Server Management Calls
API Call Purpose comments Returns Comments

(rmt:login run-id)

Verify the the version, testsuite area etc. are correct.

#( #t "successful login" )

(rmt:start-server run-id)

#( success/fail n/a )

(rmt:kill-server run-id)

#( success/fail n/a )

Works only if the server is still reachable

+
+
+ + +++++ + + + + + + + + + + + + + + + + + + + + + + + +
Table 3. API Keys Related Calls
API Call Purpose comments Returns Comments

(rmt:get-key-val-pairs run-id)

#t=success/#f=fail

Works only if the server is still reachable

(rmt:get-keys run-id)

( key1 key2 … )

+
+
+

Megatest Internals

+
+
+server.png +
+
+
+
+
+
+

Appendix A: Example Appendix

+
+

One or more optional appendixes go here at section level zero.

+
+

Appendix Sub-section

+
+ + + +
+
Note
+
Preface and appendix subsections start out of sequence at level +2 (level 1 is skipped). This only applies to multi-part book +documents.
+
+
+
+
+
+

Example Bibliography

+
+

The bibliography list is a style of AsciiDoc bulleted list.

+
    +
  • +

    +[taoup] Eric Steven Raymond. The Art of Unix + Programming. Addison-Wesley. ISBN 0-13-142901-9. +

    +
  • +
  • +

    +[walsh-muellner] Norman Walsh & Leonard Muellner. + DocBook - The Definitive Guide. O’Reilly & Associates. 1999. + ISBN 1-56592-580-7. +

    +
  • +
+
+
+
+

Example Glossary

+
+

Glossaries are optional. Glossaries entries are an example of a style +of AsciiDoc labeled lists.

+
+
+A glossary term +
+
+

+ The corresponding (indented) definition. +

+
+
+A second glossary term +
+
+

+ The corresponding (indented) definition. +

+
+
+
+
+
+

Example Colophon

+
+

Text at the end of a book describing facts about its production.

+
+
+
+

Example Index

+
+
+
+
+

+ + + Index: docs/manual/reference.txt ================================================================== --- docs/manual/reference.txt +++ docs/manual/reference.txt @@ -1,15 +1,9 @@ Reference ========= -The First Chapter of the Second Part ------------------------------------- - -Chapters grouped into book parts are at level 1 and can contain -sub-sections. - The testconfig File ------------------- Setup section ~~~~~~~~~~~~~ @@ -147,8 +141,62 @@ To transfer the environment to the next step you can do the following: ---------------------------- $MT_MEGATEST -env2file .ezsteps/${stepname} ---------------------------- + +Triggers +~~~~~~~~ + +In your testconfig triggers can be specified + +----------------- +[triggers] + +# Call script running.sh when test goes to state=RUNNING, status=PASS +RUNNING/PASS running.sh + +# Call script running.sh any time state goes to RUNNING +RUNNING/ running.sh + +# Call script onpass.sh any time status goes to PASS +PASS/ onpass.sh +----------------- + +Scripts called will have; test-id test-rundir trigger, added to the commandline. + +HINT + +To start an xterm (useful for debugging), use a command line like the following: + +----------------- +[triggers] +COMPLETED/ xterm -e bash -s -- +----------------- + +NOTE: There is a trailing space after the -- + +Programming API +--------------- + +These routines can be called from the megatest repl. + +.API Server Management Calls +[width="70%",cols="^,2m,2m,2m",frame="topbot",options="header,footer"] +|====================== +|API Call | Purpose comments | Returns | Comments +|(rmt:start-server run-id) | | #( success/fail n/a ) | +|(rmt:kill-server run-id) | | #( success/fail n/a ) | Works only if the server is still reachable +|(rmt:login run-id) | Verify the the version, testsuite area etc. are correct. | #( #t "successful login" ) | +|====================== + +.API Keys Related Calls +[width="70%",cols="^,2m,2m,2m",frame="topbot",options="header,footer"] +|====================== +|API Call | Purpose comments | Returns | Comments +|(rmt:get-keys run-id) | | ( key1 key2 ... ) | +| (rmt:get-key-val-pairs run-id) | | #t=success/#f=fail | Works only if the server is still reachable +|====================== + :numbered!: Index: docs/manual/server.png ================================================================== --- docs/manual/server.png +++ docs/manual/server.png cannot compute difference between binary files ADDED docs/megatest-state-status.dot Index: docs/megatest-state-status.dot ================================================================== --- /dev/null +++ docs/megatest-state-status.dot @@ -0,0 +1,47 @@ +digraph megatest_state_status { + ranksep=0.05 + // rankdir=LR + +node [shape=box,style=filled]; + +// subgraph cluster_notstarted { +// label="Not started"; + + "NOT_STARTED FAILS" [ + label = "{ NOT_STARTED/FAILS |{ NO_ITEMS | FAIL_PREREQ | FAIL_TIMEOUT }}"; + shape= "record"; + ] + +"NOT_STARTED n/a" -> "LAUNCHED n/a" [label=" launch"]; +"NOT_STARTED WAIT" -> "LAUNCHED n/a" + + "NOT_STARTED n/a"; + "NOT_STARTED WAIT" [ + label = "{NOT_STARTED WAIT|{ NO_SLOTS | WAIT_PREREQ}}"; + shape = "record"; +] + +// struct3 [shape=record,label="hello\nworld |{ b |{c| d|e}| f}| g | h"]; + + "NOT_STARTED n/a" -> "NOT_STARTED FAILS"; + "NOT_STARTED n/a" -> "NOT_STARTED WAIT"; + + "RUNNING" [ + shape="record"; + label="{RUNNING|{n/a| PASS | FAIL}}"; + ] + + "COMPLETED" [ + shape="record"; + label = "{COMPLETED|{PASS | FAIL | CHECK| SKIP}}"; + ] + + +"RUNNING" -> "COMPLETED"; +"RUNNING" -> "INCOMPLETE" [label="test dead for > 24hrs"]; + + +"LAUNCHED n/a" -> "REMOTEHOSTSTART n/a" -> "RUNNING"; + +} + Index: docs/megatest-training.odp ================================================================== --- docs/megatest-training.odp +++ docs/megatest-training.odp cannot compute difference between binary files Index: http-transport.scm ================================================================== --- http-transport.scm +++ http-transport.scm @@ -25,10 +25,11 @@ (declare (uses db)) (declare (uses tests)) (declare (uses tasks)) ;; tasks are where stuff is maintained about what is running. (declare (uses server)) (declare (uses daemon)) +(declare (uses portlogger)) (include "common_records.scm") (include "db_records.scm") (define (http-transport:make-server-url hostport) @@ -67,13 +68,14 @@ (ipaddrstr (let ((ipstr (if (string=? "-" hostn) ;; (string-intersperse (map number->string (u8vector->list (hostname->ip hostname))) ".") (server:get-best-guess-address hostname) #f))) (if ipstr ipstr hostn))) ;; hostname))) - (start-port (open-run-close tasks:server-get-next-port tasks:open-db)) + (start-port (portlogger:open-run-close portlogger:find-port)) (link-tree-path (configf:lookup *configdat* "setup" "linktree"))) - (set! db *inmemdb*) + ;; (set! db *inmemdb*) + (debug:print-info 0 "portlogger recommended port: " start-port) (root-path (if link-tree-path link-tree-path (current-directory))) ;; WARNING: SECURITY HOLE. FIX ASAP! (handle-directory spiffy-directory-listing) ;; http-transport:handle-directory) ;; simple-directory-handler) @@ -86,11 +88,11 @@ (dat ($ 'dat)) (res #f)) (cond ((equal? (uri-path (request-uri (current-request))) '(/ "api")) - (send-response body: (api:process-request db $) ;; the $ is the request vars proc + (send-response body: (api:process-request *inmemdb* $) ;; the $ is the request vars proc headers: '((content-type text/plain))) (mutex-lock! *heartbeat-mutex*) (set! *last-db-access* (current-seconds)) (mutex-unlock! *heartbeat-mutex*)) ((equal? (uri-path (request-uri (current-request))) @@ -111,38 +113,52 @@ (http-transport:try-start-server run-id ipaddrstr start-port server-id))) ;; This is recursively run by http-transport:run until sucessful ;; (define (http-transport:try-start-server run-id ipaddrstr portnum server-id) - (handle-exceptions - exn - (begin - (print-error-message exn) - (if (< portnum 9000) - (begin - (debug:print 0 "WARNING: failed to start on portnum: " portnum ", trying next port") - (thread-sleep! 0.1) - - ;; get_next_port goes here - - (http-transport:try-start-server run-id ipaddrstr (+ portnum 1) server-id)) - (begin - (open-run-close tasks:server-force-clean-run-record tasks:open-db run-id ipaddrstr portnum " http-transport:try-start-server") - (print "ERROR: Tried and tried but could not start the server")))) - ;; any error in following steps will result in a retry - (set! *server-info* (list ipaddrstr portnum)) - (open-run-close tasks:server-set-interface-port - tasks:open-db - server-id - ipaddrstr portnum) - (debug:print 1 "INFO: Trying to start server on " ipaddrstr ":" portnum) - ;; This starts the spiffy server - ;; NEED WAY TO SET IP TO #f TO BIND ALL - ;; (start-server bind-address: ipaddrstr port: portnum) - (start-server port: portnum) - (open-run-close tasks:server-force-clean-run-record tasks:open-db run-id ipaddrstr portnum " http-transport:try-start-server") - (debug:print 1 "INFO: server has been stopped"))) + (let ((config-hostname (configf:lookup *configdat* "server" "hostname")) + (tdbdat (tasks:open-db))) + (debug:print-info 0 "http-transport:try-start-server run-id=" run-id " ipaddrsstr=" ipaddrstr " portnum=" portnum " server-id=" server-id " config-hostname=" config-hostname) + (handle-exceptions + exn + (begin + (print-error-message exn) + (if (< portnum 64000) + (begin + (debug:print 0 "WARNING: attempt to start server failed. Trying again ...") + (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn)) + (debug:print 0 "exn=" (condition->list exn)) + (portlogger:open-run-close portlogger:set-failed portnum) + (debug:print 0 "WARNING: failed to start on portnum: " portnum ", trying next port") + (thread-sleep! 0.1) + + ;; get_next_port goes here + (http-transport:try-start-server run-id + ipaddrstr + (portlogger:open-run-close portlogger:find-port) + server-id)) + (begin + (tasks:server-force-clean-run-record (db:delay-if-busy tdbdat) run-id ipaddrstr portnum " http-transport:try-start-server") + (print "ERROR: Tried and tried but could not start the server")))) + ;; any error in following steps will result in a retry + (set! *server-info* (list ipaddrstr portnum)) + (tasks:server-set-interface-port + (db:delay-if-busy tdbdat) + server-id + ipaddrstr portnum) + (debug:print 0 "INFO: Trying to start server on " ipaddrstr ":" portnum) + ;; This starts the spiffy server + ;; NEED WAY TO SET IP TO #f TO BIND ALL + ;; (start-server bind-address: ipaddrstr port: portnum) + (if config-hostname ;; this is a hint to bind directly + (start-server port: portnum bind-address: (if (equal? config-hostname "-") + ipaddrstr + config-hostname)) + (start-server port: portnum)) + ;; (portlogger:open-run-close portlogger:set-port portnum "released") + (tasks:server-force-clean-run-record (db:delay-if-busy tdbdat) run-id ipaddrstr portnum " http-transport:try-start-server") + (debug:print 1 "INFO: server has been stopped")))) ;;====================================================================== ;; S E R V E R U T I L I T I E S ;;====================================================================== @@ -199,43 +215,72 @@ (mutex-lock! *http-mutex*) (set! *http-requests-in-progress* (+ 1 *http-requests-in-progress*))) ;; Send "cmd" with json payload "params" to serverdat and receive result ;; -(define (http-transport:client-api-send-receive run-id serverdat cmd params #!key (numretries 30)) - (let* ((fullurl (if (list? serverdat) - (list-ref serverdat 4) ;; (cadddr serverdat) ;; this is the uri for /api +(define (http-transport:client-api-send-receive run-id serverdat cmd params #!key (numretries 3)) + (let* ((fullurl (if (vector? serverdat) + (http-transport:server-dat-get-api-req serverdat) (begin (debug:print 0 "FATAL ERROR: http-transport:client-api-send-receive called with no server info") (exit 1)))) - (res #f)) - (handle-exceptions - exn - (if (> numretries 0) - (begin - (mutex-unlock! *http-mutex*) - (thread-sleep! 10) - (http-transport:client-api-send-receive run-id serverdat cmd params (- numretries 1))) - #f) - (begin + (res #f) + (success #t) + (sparams (db:obj->string params transport: 'http))) +;; (condition-case +;; handle-exceptions +;; exn +;; (if (> numretries 0) +;; (begin +;; (mutex-unlock! *http-mutex*) +;; (thread-sleep! 1) +;; (handle-exceptions +;; exn +;; (debug:print 0 "WARNING: closing connections failed. Server at " fullurl " almost certainly dead") +;; (close-all-connections!)) +;; (debug:print 0 "WARNING: Failed to communicate with server, trying again, numretries left: " numretries) +;; (http-transport:client-api-send-receive run-id serverdat cmd sparams numretries: (- numretries 1))) +;; (begin +;; (mutex-unlock! *http-mutex*) +;; (tasks:kill-server-run-id run-id) +;; #f)) +;; (begin (debug:print-info 11 "fullurl=" fullurl ", cmd=" cmd ", params=" params ", run-id=" run-id "\n") ;; set up the http-client here - (max-retry-attempts 5) + (max-retry-attempts 1) ;; consider all requests indempotent (retry-request? (lambda (request) - #t)) + #f)) ;; send the data and get the response ;; extract the needed info from the http data and ;; process and return it. (let* ((send-recieve (lambda () (mutex-lock! *http-mutex*) - (set! res (with-input-from-request ;; was dat - fullurl - (list (cons 'key "thekey") - (cons 'cmd cmd) - (cons 'params params)) - read-string)) + ;; (condition-case (with-input-from-request "http://localhost"; #f read-lines) + ;; ((exn http client-error) e (print e))) + (set! res (vector + success + (db:string->obj + (handle-exceptions + exn + (begin + (set! success #f) + (debug:print 0 "WARNING: failure in with-input-from-request to " fullurl ".") + (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn)) + (hash-table-delete! *runremote* run-id) + ;; Killing associated server to allow clean retry.") + (tasks:kill-server-run-id run-id) ;; better to kill the server in the logic that called this routine? + ;; (signal (make-composite-condition + ;; (make-property-condition 'commfail 'message "failed to connect to server"))) + "communications failed") + (with-input-from-request ;; was dat + fullurl + (list (cons 'key "thekey") + (cons 'cmd cmd) + (cons 'params sparams)) + read-string)) + transport: 'http))) ;; Shouldn't this be a call to the managed call-all-connections stuff above? (close-all-connections!) (mutex-unlock! *http-mutex*) )) (time-out (lambda () @@ -246,89 +291,127 @@ (thread-start! th1) (thread-start! th2) (thread-join! th1) (thread-terminate! th2) (debug:print-info 11 "got res=" res) - res))))) + (if (vector? res) + (if (vector-ref res 0) + res + (begin ;; note: this code also called in nmsg-transport - consider consolidating it + (debug:print 0 "ERROR: error occured at server, info=" (vector-ref res 2)) + (debug:print 0 " client call chain:") + (print-call-chain (current-error-port)) + (debug:print 0 " server call chain:") + (pp (vector-ref res 1) (current-error-port)) + (signal (vector-ref result 0)))) + (signal (make-composite-condition + (make-property-condition + 'timeout + 'message "nmsg-transport:client-api-send-receive-raw timed out talking to server"))))))) + +;; careful closing of connections stored in *runremote* +;; +(define (http-transport:close-connections run-id) + (let* ((server-dat (hash-table-ref/default *runremote* run-id #f))) + (if (vector? server-dat) + (let ((api-dat (http-transport:server-dat-get-api-uri server-dat))) + (close-connection! api-dat) + #t) + #f))) + + +(define (make-http-transport:server-dat)(make-vector 6)) +(define (http-transport:server-dat-get-iface vec) (vector-ref vec 0)) +(define (http-transport:server-dat-get-port vec) (vector-ref vec 1)) +(define (http-transport:server-dat-get-api-uri vec) (vector-ref vec 2)) +(define (http-transport:server-dat-get-api-url vec) (vector-ref vec 3)) +(define (http-transport:server-dat-get-api-req vec) (vector-ref vec 4)) +(define (http-transport:server-dat-get-last-access vec) (vector-ref vec 5)) +(define (http-transport:server-dat-get-socket vec) (vector-ref vec 6)) + +(define (http-transport:server-dat-make-url vec) + (if (and (http-transport:server-dat-get-iface vec) + (http-transport:server-dat-get-port vec)) + (conc "http://" + (http-transport:server-dat-get-iface vec) + ":" + (http-transport:server-dat-get-port vec)) + #f)) + +(define (http-transport:server-dat-update-last-access vec) + (vector-set! vec 5 (current-seconds))) ;; ;; connect ;; (define (http-transport:client-connect iface port) - (let* ((api-url (conc "http://" iface ":" port "/api")) - (uri-dat (make-request method: 'POST uri: (uri-reference (conc "http://" iface ":" port "/ctrl")))) - ;; (uri-dat (make-request method: 'GET uri: (uri-reference (conc "http://" iface ":" port "/ctrl")))) - (uri-api-dat (make-request method: 'POST uri: api-url)) ;; (uri-reference (conc "http://" iface ":" port "/api")))) - ;; (uri-api-dat (make-request method: 'GET uri: (uri-reference (conc "http://" iface ":" port "/api")))) - (server-dat (list iface port uri-dat uri-api-dat api-url))) -;; (login-res (server:ping-server run-id server-dat))) ;; login-no-auto-client-setup server-dat run-id))) + (let* ((api-url (conc "http://" iface ":" port "/api")) + (api-uri (uri-reference (conc "http://" iface ":" port "/api"))) + (api-req (make-request method: 'POST uri: api-uri)) + (server-dat (vector iface port api-uri api-url api-req (current-seconds)))) server-dat)) -;; (if (and (list? login-res) -;; (car login-res)) -;; (begin -;; (hash-table-set! *runremote* run-id server-dat) -;; (debug:print-info 2 "Logged in and connected to " iface ":" port) -;; (hash-table-set! *runremote* run-id server-dat) -;; server-dat) -;; (begin -;; (debug:print-info 0 "ERROR: Failed to login or connect to " iface ":" port) -;; #f)))) ;; run http-transport:keep-running in a parallel thread to monitor that the db is being ;; used and to shutdown after sometime if it is not. ;; -(define (http-transport:keep-running server-id) +(define (http-transport:keep-running server-id run-id) ;; if none running or if > 20 seconds since ;; server last used then start shutdown ;; This thread waits for the server to come alive - (let* ((server-info (let loop ((start-time (current-seconds)) + (debug:print-info 0 "Starting the sync-back, keep alive thread in server for run-id=" run-id) + (let* ((tdbdat (tasks:open-db)) + (server-info (let loop ((start-time (current-seconds)) (changed #t) (last-sdat "not this")) (let ((sdat #f)) + (thread-sleep! 0.01) + (debug:print-info 0 "Waiting for server alive signature") (mutex-lock! *heartbeat-mutex*) (set! sdat *server-info*) (mutex-unlock! *heartbeat-mutex*) (if (and sdat (not changed) (> (- (current-seconds) start-time) 2)) sdat (begin + (debug:print-info 0 "Still waiting, last-sdat=" last-sdat) (sleep 4) - (loop start-time - (equal? sdat last-sdat) - sdat)))))) + (if (> (- (current-seconds) start-time) 120) ;; been waiting for two minutes + (begin + (debug:print 0 "ERROR: transport appears to have died, exiting server " server-id " for run " run-id) + (tasks:server-delete-record (db:delay-if-busy tdbdat) server-id "failed to start, never received server alive signature") + (exit)) + (loop start-time + (equal? sdat last-sdat) + sdat))))))) (iface (car server-info)) (port (cadr server-info)) (last-access 0) - (tdb (tasks:open-db)) - (server-timeout (let ((tmo (configf:lookup *configdat* "server" "timeout"))) - (if (and (string? tmo) - (string->number tmo)) - (* 60 60 (string->number tmo)) - ;; (* 3 24 60 60) ;; default to three days - ;; (* 60 1) ;; default to one minute - (* 60 60 25) ;; default to 25 hours - )))) + (server-timeout (server:get-timeout))) (let loop ((count 0) (server-state 'available)) ;; Use this opportunity to sync the inmemdb to db (let ((start-time (current-milliseconds)) (sync-time #f) (rem-time #f)) - - (if *inmemdb* (db:sync-touched *inmemdb* force-sync: #t)) + ;; inmemdb is a dbstruct + (if *inmemdb* (db:sync-touched *inmemdb* *run-id* force-sync: #t)) (set! sync-time (- (current-milliseconds) start-time)) (set! rem-time (quotient (- 4000 sync-time) 1000)) - (debug:print 0 "SYNC: time= " sync-time ", rem-time=" rem-time) + (debug:print 2 "SYNC: time= " sync-time ", rem-time=" rem-time) ;; - ;; set_running after our first pass through + ;; set_running after our first pass through and start the db ;; (if (eq? server-state 'available) - (tasks:server-set-state! tdb server-id "running")) + (begin + (tasks:server-set-state! (db:delay-if-busy tdbdat) server-id "dbprep") + (thread-sleep! 5) ;; give some margin for queries to complete before switching from file based access to server based access + (set! *inmemdb* (db:setup run-id)) + (tasks:server-set-state! (db:delay-if-busy tdbdat) server-id "running"))) (if (and (<= rem-time 4) (> rem-time 0)) (thread-sleep! rem-time) (thread-sleep! 4))) ;; fallback for if the math is changed ... @@ -353,11 +436,13 @@ (set! last-access *last-db-access*) (mutex-unlock! *heartbeat-mutex*) ;; (debug:print 11 "last-access=" last-access ", server-timeout=" server-timeout) ;; - ;; no_traffic + ;; no_traffic, no running tests, if server 0, no running servers + ;; + ;; (let ((wait-on-running (configf:lookup *configdat* "server" "wait-on-running"))) ;; wait on running tasks (if not true then exit on time out) ;; (if (and *server-run* (> (+ last-access server-timeout) (current-seconds))) (begin @@ -368,81 +453,92 @@ ;; ;; (if (tasks:server-am-i-the-server? tdb run-id) ;; (tasks:server-set-state! tdb server-id "running")) ;; (loop 0 server-state)) - (begin - (debug:print-info 0 "Starting to shutdown the server.") - ;; need to delete only *my* server entry (future use) - (set! *time-to-exit* #t) - (if *inmemdb* (db:sync-touched *inmemdb* force-sync: #t)) - ;; - ;; start_shutdown - ;; - ( tasks:server-set-state! tdb server-id "shutting-down") - (thread-sleep! 5) - (debug:print-info 0 "Max cached queries was " *max-cache-size*) - (debug:print-info 0 "Number of cached writes " *number-of-writes*) - (debug:print-info 0 "Average cached write time " - (if (eq? *number-of-writes* 0) - "n/a (no writes)" - (/ *writes-total-delay* - *number-of-writes*)) - " ms") - (debug:print-info 0 "Number non-cached queries " *number-non-write-queries*) - (debug:print-info 0 "Average non-cached time " - (if (eq? *number-non-write-queries* 0) - "n/a (no queries)" - (/ *total-non-write-delay* - *number-non-write-queries*)) - " ms") - (debug:print-info 0 "Server shutdown complete. Exiting") - (tasks:server-delete-record tdb server-id " http-transport:keep-running") - (exit)))))) + (http-transport:server-shutdown server-id port))))) + +(define (http-transport:server-shutdown server-id port) + (let ((tdbdat (tasks:open-db))) + (debug:print-info 0 "Starting to shutdown the server.") + ;; need to delete only *my* server entry (future use) + (set! *time-to-exit* #t) + (if *inmemdb* (db:sync-touched *inmemdb* *run-id* force-sync: #t)) + ;; + ;; start_shutdown + ;; + (tasks:server-set-state! (db:delay-if-busy tdbdat) server-id "shutting-down") + (portlogger:open-run-close portlogger:set-port port "released") + (thread-sleep! 5) + (debug:print-info 0 "Max cached queries was " *max-cache-size*) + (debug:print-info 0 "Number of cached writes " *number-of-writes*) + (debug:print-info 0 "Average cached write time " + (if (eq? *number-of-writes* 0) + "n/a (no writes)" + (/ *writes-total-delay* + *number-of-writes*)) + " ms") + (debug:print-info 0 "Number non-cached queries " *number-non-write-queries*) + (debug:print-info 0 "Average non-cached time " + (if (eq? *number-non-write-queries* 0) + "n/a (no queries)" + (/ *total-non-write-delay* + *number-non-write-queries*)) + " ms") + (debug:print-info 0 "Server shutdown complete. Exiting") + (tasks:server-delete-record (db:delay-if-busy tdbdat) server-id " http-transport:keep-running complete") + (exit))) ;; all routes though here end in exit ... ;; ;; start_server? ;; (define (http-transport:launch run-id) - (set! *run-id* run-id) - (if (args:get-arg "-daemonize") - (daemon:ize)) - (if (server:check-if-running run-id) - (begin - (debug:print 0 "INFO: Server for run-id " run-id " already running") - (exit 0))) - (let loop ((server-id (open-run-close tasks:server-lock-slot tasks:open-db run-id)) - (remtries 4)) - (if (not server-id) - (if (> remtries 0) - (begin - (thread-sleep! 2) - (loop (open-run-close tasks:server-lock-slot tasks:open-db run-id) - (- remtries 1))) - (begin - ;; since we didn't get the server lock we are going to clean up and bail out - (debug:print-info 2 "INFO: server pid=" (current-process-id) ", hostname=" (get-host-name) " not starting due to other candidates ahead in start queue") - (open-run-close tasks:server-delete-records-for-this-pid tasks:open-db " http-transport:launch") - )) - (let* ((th2 (make-thread (lambda () - (http-transport:run - (if (args:get-arg "-server") - (args:get-arg "-server") - "-") - run-id - server-id)) "Server run")) - (th3 (make-thread (lambda () - (http-transport:keep-running server-id)) - "Keep running"))) - ;; Database connection - (set! *inmemdb* (db:setup run-id)) - (thread-start! th2) - (thread-start! th3) - (set! *didsomething* #t) - (thread-join! th2) - (exit))))) + (let* ((tdbdat (tasks:open-db))) + (set! *run-id* run-id) + (if (args:get-arg "-daemonize") + (begin + (daemon:ize) + (if *alt-log-file* ;; we should re-connect to this port, I think daemon:ize disrupts it + (begin + (current-error-port *alt-log-file*) + (current-output-port *alt-log-file*))))) + (if (server:check-if-running run-id) + (begin + (debug:print 0 "INFO: Server for run-id " run-id " already running") + (exit 0))) + (let loop ((server-id (tasks:server-lock-slot (db:delay-if-busy tdbdat) run-id)) + (remtries 4)) + (if (not server-id) + (if (> remtries 0) + (begin + (thread-sleep! 2) + (loop (tasks:server-lock-slot (db:delay-if-busy tdbdat) run-id) + (- remtries 1))) + (begin + ;; since we didn't get the server lock we are going to clean up and bail out + (debug:print-info 2 "INFO: server pid=" (current-process-id) ", hostname=" (get-host-name) " not starting due to other candidates ahead in start queue") + (tasks:server-delete-records-for-this-pid (db:delay-if-busy tdbdat) " http-transport:launch") + )) + (let* ((th2 (make-thread (lambda () + (debug:print-info 0 "Server run thread started") + (http-transport:run + (if (args:get-arg "-server") + (args:get-arg "-server") + "-") + run-id + server-id)) "Server run")) + (th3 (make-thread (lambda () + (debug:print-info 0 "Server monitor thread started") + (http-transport:keep-running server-id run-id)) + "Keep running"))) + (thread-start! th2) + (thread-sleep! 0.25) ;; give the server time to settle before starting the keep-running monitor. + (thread-start! th3) + (set! *didsomething* #t) + (thread-join! th2) + (exit)))))) (define (http:ping run-id host-port) (let* ((server-dat (http-transport:client-connect (car host-port)(cadr host-port))) (login-res (rmt:login-no-auto-client-setup server-dat run-id))) (if (and (list? login-res) @@ -453,10 +549,11 @@ (begin (print "LOGIN_FAILED") (exit 1))))) (define (http-transport:server-signal-handler signum) + (signal-mask! signum) (handle-exceptions exn (debug:print " ... exiting ...") (let ((th1 (make-thread (lambda () (thread-sleep! 1)) Index: launch.scm ================================================================== --- launch.scm +++ launch.scm @@ -11,11 +11,11 @@ ;;====================================================================== ;; launch a task - this runs on the originating host, tests themselves ;; ;;====================================================================== -(use regex regex-case base64 sqlite3 srfi-18 directory-utils posix-extras) +(use regex regex-case base64 sqlite3 srfi-18 directory-utils posix-extras z3) (import (prefix base64 base64:)) (import (prefix sqlite3 sqlite3:)) (declare (unit launch)) (declare (uses common)) @@ -48,15 +48,15 @@ ;; if handed a string, process it, else look for MT_CMDINFO (define (launch:get-cmdinfo-assoc-list #!key (encoded-cmd #f)) (let ((enccmd (if encoded-cmd encoded-cmd (getenv "MT_CMDINFO")))) (if enccmd - (read (open-input-string (base64:base64-decode enccmd))) + (common:read-encoded-string enccmd) '()))) (define (launch:execute encoded-cmd) - (let* ((cmdinfo (read (open-input-string (base64:base64-decode encoded-cmd))))) + (let* ((cmdinfo (common:read-encoded-string encoded-cmd))) (setenv "MT_CMDINFO" encoded-cmd) (if (list? cmdinfo) ;; ((testpath /tmp/mrwellan/jazzmind/src/example_run/tests/sqlitespeed) ;; (test-name sqlitespeed) (runscript runscript.rb) (db-host localhost) (run-id 1)) (let* ((testpath (assoc/default 'testpath cmdinfo)) ;; testpath is the test spec area (top-path (assoc/default 'toppath cmdinfo)) @@ -64,11 +64,11 @@ (test-name (assoc/default 'test-name cmdinfo)) (runscript (assoc/default 'runscript cmdinfo)) (ezsteps (assoc/default 'ezsteps cmdinfo)) ;; (runremote (assoc/default 'runremote cmdinfo)) (transport (assoc/default 'transport cmdinfo)) - (serverinf (assoc/default 'serverinf cmdinfo)) + ;; (serverinf (assoc/default 'serverinf cmdinfo)) (port (assoc/default 'port cmdinfo)) (run-id (assoc/default 'run-id cmdinfo)) (test-id (assoc/default 'test-id cmdinfo)) (target (assoc/default 'target cmdinfo)) (itemdat (assoc/default 'itemdat cmdinfo)) @@ -75,10 +75,11 @@ (env-ovrd (assoc/default 'env-ovrd cmdinfo)) (set-vars (assoc/default 'set-vars cmdinfo)) ;; pre-overrides from -setvar (runname (assoc/default 'runname cmdinfo)) (megatest (assoc/default 'megatest cmdinfo)) (runtlim (assoc/default 'runtlim cmdinfo)) + (item-path (item-list->path itemdat)) (mt-bindir-path (assoc/default 'mt-bindir-path cmdinfo)) (keys #f) (keyvals #f) (fullrunscript (if (not runscript) #f @@ -89,12 +90,54 @@ (file-execute-access? fulln)) fulln runscript))))) ;; assume it is on the path (rollup-status 0)) (change-directory top-path) + + ;; (set-signal-handler! signal/int (lambda () + + ;; Do not run the test if it is REMOVING, RUNNING, KILLREQ or REMOTEHOSTSTART, + ;; Mark the test as REMOTEHOSTSTART *IMMEDIATELY* + ;; + (let ((test-info (rmt:get-testinfo-state-status run-id test-id))) + (if (not (member (db:test-get-state test-info) '("REMOVING" "REMOTEHOSTSTART" "RUNNING" "KILLREQ"))) + (tests:test-force-state-status! run-id test-id "REMOTEHOSTSTART" "n/a") + (begin + (debug:print 0 "ERROR: test state is " (db:test-get-state test-info) ", cannot proceed") + (exit)))) + (debug:print 2 "Exectuing " test-name " (id: " test-id ") on " (get-host-name)) (set! keys (rmt:get-keys)) + ;; (runs:set-megatest-env-vars run-id inkeys: keys inkeyvals: keyvals) ;; these may be needed by the launching process + ;; one of these is defunct/redundant ... + (if (not (launch:setup-for-run force: #t)) + (begin + (debug:print 0 "Failed to setup, exiting") + ;; (sqlite3:finalize! db) + ;; (sqlite3:finalize! tdb) + (exit 1))) + (change-directory *toppath*) + + ;; NOTE: Current order is to process runconfigs *before* setting the MT_ vars. This + ;; seems non-ideal but could well break stuff + ;; BUG? BUG? BUG? + + (let ((rconfig (full-runconfigs-read))) ;; (read-config (conc *toppath* "/runconfigs.config") #f #t sections: (list "default" target)))) + ;; (setup-env-defaults (conc *toppath* "/runconfigs.config") run-id (make-hash-table) keyvals target) + ;; (set-run-config-vars run-id keyvals target) ;; (db:get-target db run-id)) + ;; Now have runconfigs data loaded, set environment vars + (for-each (lambda (section) + (for-each (lambda (varval) + (let ((var (car varval)) + (val (cadr varval))) + (if (and (string? var)(string? val)) + (begin + (setenv var (config:eval-string-in-environment val))) ;; val) + (debug:print 0 "ERROR: bad variable spec, " var "=" val)))) + (configf:get-section rconfig section))) + (list "default" target))) + (change-directory work-area) (set! keyvals (keys:target->keyval keys target)) ;; apply pre-overrides before other variables. The pre-override vars must not ;; clobbers things from the official sources such as megatest.config and runconfigs.config (if (string? set-vars) (let ((varpairs (string-split set-vars ","))) @@ -118,40 +161,33 @@ (exit))))) (list (list "MT_TEST_RUN_DIR" work-area) (list "MT_TEST_NAME" test-name) (list "MT_ITEM_INFO" (conc itemdat)) + (list "MT_ITEMPATH" item-path) (list "MT_RUNNAME" runname) (list "MT_MEGATEST" megatest) (list "MT_TARGET" target) - (list "MT_LINKTREE" (configf:lookup *configdat* "setup" "linktree")))) + (list "MT_LINKTREE" (configf:lookup *configdat* "setup" "linktree")) + (list "MT_TESTSUITENAME" (common:get-testsuite-name)))) + (if mt-bindir-path (setenv "PATH" (conc (getenv "PATH") ":" mt-bindir-path))) ;; (change-directory top-path) - (if (not (setup-for-run)) - (begin - (debug:print 0 "Failed to setup, exiting") - ;; (sqlite3:finalize! db) - ;; (sqlite3:finalize! tdb) - (exit 1))) ;; Can setup as client for server mode now ;; (client:setup) - (change-directory *toppath*) - (set-megatest-env-vars run-id inkeys: keys inkeyvals: keyvals) ;; these may be needed by the launching process - (change-directory work-area) - - (set-run-config-vars run-id keyvals target) ;; (db:get-target db run-id)) + ;; environment overrides are done *before* the remaining critical envars. (alist->env-vars env-ovrd) - (set-megatest-env-vars run-id inkeys: keys inkeyvals: keyvals) + (runs:set-megatest-env-vars run-id inkeys: keys inkeyvals: keyvals) (set-item-env-vars itemdat) (save-environment-as-files "megatest") ;; open-run-close not needed for test-set-meta-info - (tests:set-full-meta-info test-id run-id 0 work-area) + ;; (tests:set-full-meta-info #f test-id run-id 0 work-area) + ;; (tests:set-full-meta-info test-id run-id 0 work-area) + (tests:set-full-meta-info #f test-id run-id 0 work-area 10) - ;; (tests:test-set-status! test-id "REMOTEHOSTSTART" "n/a" (args:get-arg "-m") #f) - (tests:test-force-state-status! run-id test-id "REMOTEHOSTSTART" "n/a") (thread-sleep! 0.3) ;; NFS slowness has caused grief here (if (args:get-arg "-xterm") (set! fullrunscript "xterm") (if (and fullrunscript (not (file-execute-access? fullrunscript))) @@ -176,15 +212,17 @@ ;; force RUNNING/n/a (thread-sleep! 0.3) (tests:test-force-state-status! run-id test-id "RUNNING" "n/a") + (rmt:roll-up-pass-fail-counts run-id test-name item-path "RUNNING") (thread-sleep! 0.3) ;; NFS slowness has caused grief here ;; if there is a runscript do it first (if fullrunscript (let ((pid (process-run fullrunscript))) + (rmt:test-set-top-process-pid run-id test-id pid) (let loop ((i 0)) (let-values (((pid-val exit-status exit-code) (process-wait pid #t))) (mutex-lock! m) (vector-set! exit-info 0 pid) @@ -236,10 +274,11 @@ (debug:print 4 "script: " script) (rmt:teststep-set-status! run-id test-id stepname "start" "-" #f #f) ;; now launch (let ((pid (process-run script))) + (rmt:test-set-top-process-pid run-id test-id pid) (let processloop ((i 0)) (let-values (((pid-val exit-status exit-code)(process-wait pid #t))) (mutex-lock! m) (vector-set! exit-info 0 pid) (vector-set! exit-info 1 exit-status) @@ -302,11 +341,13 @@ (round (- (current-seconds) start-seconds))))) (kill-tries 0)) - (tests:set-full-meta-info test-id run-id (calc-minutes) work-area) + ;; (tests:set-full-meta-info #f test-id run-id (calc-minutes) work-area) + ;; (tests:set-full-meta-info test-id run-id (calc-minutes) work-area) + (tests:set-full-meta-info #f test-id run-id (calc-minutes) work-area 10) (let loop ((minutes (calc-minutes))) (begin (set! kill-job? (or (test-get-kill-request run-id test-id) ;; run-id test-name itemdat)) (and runtlim (let* ((run-seconds (- (current-seconds) start-seconds)) (time-exceeded (> run-seconds runtlim))) @@ -313,45 +354,45 @@ (if time-exceeded (begin (debug:print-info 0 "KILLING TEST DUE TO TIME LIMIT EXCEEDED! Runtime=" run-seconds " seconds, limit=" runtlim) #t) #f))))) - ;; open-run-close not needed for test-set-meta-info (tests:update-central-meta-info run-id test-id (get-cpu-load) (get-df (current-directory))(calc-minutes) #f #f) - ;; (tests:set-partial-meta-info test-id run-id minutes work-area) (if kill-job? (begin (mutex-lock! m) ;; NOTE: The pid can change as different steps are run. Do we need handshaking between this ;; section and the runit section? Or add a loop that tries three times with a 1/4 second ;; between tries? - (let* ((pid (vector-ref exit-info 0))) - (if (number? pid) - (process-signal pid signal/kill) - ;; (begin - ;; (debug:print 0 "WARNING: Request received to kill job (attempt # " kill-tries ")") - ;; (let ((processes (cmd-run->list (conc "pgrep -l -P " pid)))) - ;; (for-each - ;; (lambda (p) - ;; (let* ((parts (string-split p)) - ;; (p-id (if (> (length parts) 0) - ;; (string->number (car parts)) - ;; #f))) - ;; (if p-id - ;; (begin - ;; (debug:print 0 "Killing " (cadr parts) "; kill -9 " p-id) - ;; (system (conc "kill -9 " p-id)))))) - ;; (car processes)) - ;; (system (conc "kill -9 -" pid)))) + (let* ((pid1 (vector-ref exit-info 0)) + (pid2 (rmt:test-get-top-process-pid run-id test-id)) + (pids (delete-duplicates (filter number? (list pid1 pid2))))) + (if (not (null? pids)) + (begin + (for-each + (lambda (pid) + (handle-exceptions + exn + (begin + (debug:print-info 0 "Unable to kill process with pid " pid ", possibly already killed.") + (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn))) + (debug:print 0 "WARNING: Request received to kill job " pid) ;; " (attempt # " kill-tries ")") + (if (process:alive? pid) + (begin + (process-signal pid signal/int) + (thread-sleep! 5) + (if (process:process-alive? pid) + (process-signal pid signal/kill)))))) + pids) + (tests:test-set-status! run-id test-id "KILLED" "KILLED" (args:get-arg "-m") #f)) (begin - (debug:print 0 "WARNING: Request received to kill job but problem with process, attempting to kill manager process") - (tests:test-set-status! run-id test-id "KILLED" "FAIL" - (args:get-arg "-m") #f) - (exit 1) ;; IS THIS NECESSARY OR WISE??? + (debug:print 0 "ERROR: Nothing to kill, pid1=" pid1 ", pid2=" pid2) + (tests:test-set-status! run-id test-id "KILLED" "FAILED TO KILL" (args:get-arg "-m") #f) ))) - (set! kill-tries (+ 1 kill-tries)) - (mutex-unlock! m))) + (mutex-unlock! m) + ;; no point in sticking around. Exit now. + (exit))) (if keep-going (begin (thread-sleep! 3) ;; (+ 3 (random 6))) ;; add some jitter to the call home time to spread out the db accesses (if keep-going (loop (calc-minutes))))))) @@ -360,10 +401,11 @@ (th2 (make-thread runit "run job"))) (set! job-thread th2) (thread-start! th1) (thread-start! th2) (thread-join! th2) + (debug:print-info 0 "Megatest exectute of test " test-name ", item path " item-path " complete. Notifying the db ...") (set! keep-going #f) (thread-join! th1) (thread-sleep! 1) ;; give thread th1 a chance to be done TODO: Verify this is needed. At 0.1 I was getting fail to stop, increased to total of 1.1 sec. (mutex-lock! m) (let* ((item-path (item-list->path itemdat)) @@ -402,32 +444,63 @@ work-area ":\n====\n exit code " (vector-ref exit-info 2) "\n" "====\n") (if (not (vector-ref exit-info 1)) (exit 4))))))) ;; set up the very basics needed for doing anything here. -(define (setup-for-run) +(define (launch:setup-for-run #!key (force #f)) ;; would set values for KEYS in the environment here for better support of env-override but ;; have chicken/egg scenario. need to read megatest.config then read it again. Going to ;; pass on that idea for now ;; special case - (if (not (hash-table? *configdat*)) ;; no need to re-open on every call + (if (or force (not (hash-table? *configdat*))) ;; no need to re-open on every call (begin (set! *configinfo* (find-and-read-config (if (args:get-arg "-config")(args:get-arg "-config") "megatest.config") environ-patt: "env-override" given-toppath: (get-environment-variable "MT_RUN_AREA_HOME") pathenvvar: "MT_RUN_AREA_HOME")) (set! *configdat* (if (car *configinfo*)(car *configinfo*) #f)) (set! *toppath* (if (car *configinfo*)(cadr *configinfo*) #f)) - (if *toppath* - (let ((dbdir (conc *toppath* "/db"))) - (handle-exceptions - exn - (debug:print 0 "ERROR: failed to create the " dbdir " area for your database files") - (if (not (directory-exists? dbdir))(create-directory dbdir))) - (setenv "MT_RUN_AREA_HOME" *toppath*)) - (debug:print 0 "ERROR: failed to find the top path to your Megatest area.")))) + (let* ((tmptransport (configf:lookup *configdat* "server" "transport")) + (transport (if tmptransport (string->symbol tmptransport) 'http))) + (if (member transport '(http rpc nmsg)) + (set! *transport-type* transport) + (begin + (debug:print 0 "ERROR: Unrecognised transport " transport) + (exit)))) + (let ((linktree (configf:lookup *configdat* "setup" "linktree"))) ;; link tree is critical + (if linktree + (if (not (file-exists? linktree)) + (begin + (handle-exceptions + exn + (begin + (debug:print 0 "ERROR: Something went wrong when trying to create linktree dir at " linktree) + (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn)) + (exit 1)) + (create-directory linktree #t)))) + (begin + (debug:print 0 "ERROR: linktree not defined in [setup] section of megatest.config") + (exit 1))) + (if linktree + (let ((dbdir (conc linktree "/.db"))) + (handle-exceptions + exn + (begin + (debug:print 0 "ERROR: failed to create the " dbdir " area for your database files") + (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn))) + (if (not (directory-exists? dbdir))(create-directory dbdir))) + (setenv "MT_LINKTREE" linktree)) + (begin + (debug:print 0 "ERROR: linktree is required in your megatest.config [setup] section") + (exit 1))) + (if (and *toppath* + (directory-exists? *toppath*)) + (setenv "MT_RUN_AREA_HOME" *toppath*) + (begin + (debug:print 0 "ERROR: failed to find the top path to your Megatest area.") + (exit 1)))))) *toppath*) (define (get-best-disk confdat) (let* ((disks (hash-table-ref/default confdat "disks" #f)) (best #f) @@ -434,25 +507,35 @@ (bestsize 0)) (if disks (for-each (lambda (disk-num) (let* ((dirpath (cadr (assoc disk-num disks))) - (freespc (if (and (directory? dirpath) - (file-write-access? dirpath)) - (get-df dirpath) - (begin - (debug:print 0 "WARNING: path " dirpath " in [disks] section not valid or writable") - 0)))) + (freespc (cond + ((not (directory? dirpath)) + (if (common:low-noise-print 50 "disks not a dir " disk-num) + (debug:print 0 "WARNING: disk " disk-num " at path " dirpath " is not a directory - ignoring it.")) + -1) + ((not (file-write-access? dirpath)) + (if (common:low-noise-print 50 "disks not writeable " disk-num) + (debug:print 0 "WARNING: disk " disk-num " at path " dirpath " is not writeable - ignoring it.")) + -1) + ((not (eq? (string-ref dirpath 0) #\/)) + (if (common:low-noise-print 50 "disks not a proper path " disk-num) + (debug:print 0 "WARNING: disk " disk-num " at path " dirpath " is not a fully qualified path - ignoring it.")) + -1) + (else + (get-df dirpath))))) (if (> freespc bestsize) (begin (set! best dirpath) (set! bestsize freespc))))) (map car disks))) - (if best + (if (and best (> bestsize 0)) best (begin - (debug:print 0 "ERROR: No valid disks found in megatest.config. Please add some to your [disks] section") + (if (common:low-noise-print 20 "no valid disks") + (debug:print 0 "ERROR: No valid disks found in megatest.config. Please add some to your [disks] section and ensure the directory exists!")) (exit 1))))) ;; Desired directory structure: ;; ;; - - -. @@ -466,11 +549,11 @@ ;; ;; All log file links should be stored relative to the top of link path ;; ;; - [ - ] ;; -(define (create-work-area run-id run-info keyvals test-id test-src-path disk-path testname itemdat) +(define (create-work-area run-id run-info keyvals test-id test-src-path disk-path testname itemdat #!key (remtries 2)) (let* ((item-path (item-list->path itemdat)) (runname (db:get-value-by-header (db:get-rows run-info) (db:get-header run-info) "runname")) ;; convert back to db: from rdb: - this is always run at server end @@ -488,15 +571,17 @@ ;; ensure this exists first as links to subtests must be created there (linktree (let ((rd (config-lookup *configdat* "setup" "linktree"))) (if rd rd (conc *toppath* "/runs")))) - (lnkbase (conc linktree "/" target "/" runname)) - (lnkpath (conc lnkbase "/" testname)) - (lnkpathf (conc lnkpath (if not-iterated "" "/") item-path))) + (lnkbase (conc linktree "/" target "/" runname)) + (lnkpath (conc lnkbase "/" testname)) + (lnkpathf (conc lnkpath (if not-iterated "" "/") item-path)) + (lnktarget (conc lnkpath "/" item-path))) - ;; Update the rundir path in the test record for all + ;; Update the rundir path in the test record for all, rundir=physical, shortdir=logical + ;; rundir shortdir (rmt:general-call 'test-set-rundir-shortdir run-id lnkpathf test-path testname item-path) (debug:print 2 "INFO:\n lnkbase=" lnkbase "\n lnkpath=" lnkpath "\n toptest-path=" toptest-path "\n test-path=" test-path) (if (not (file-exists? linktree)) (begin @@ -572,11 +657,11 @@ (hash-table-set! *toptest-paths* testname toptest-path))))) ;; The toptest path has been created, the link to the test in the linktree has ;; been created. Now, if this is an iterated test the real test dir must be created (if (not not-iterated) ;; this is an iterated test - (let ((lnktarget (conc lnkpath "/" item-path))) + (begin ;; (let ((lnktarget (conc lnkpath "/" item-path))) (debug:print 2 "Setting up sub test run area") (debug:print 2 " - creating run area in " test-path) (handle-exceptions exn (begin @@ -589,15 +674,18 @@ ;; If there is already a symlink delete it and recreate it. (handle-exceptions exn (begin - (debug:print 0 "ERROR: Failed to re-create link " linktarget ((condition-property-accessor 'exn 'message) exn) ", exiting") + (debug:print 0 "ERROR: Failed to re-create link " lnktarget ((condition-property-accessor 'exn 'message) exn) ", exiting") (exit)) (if (symbolic-link? lnktarget) (delete-file lnktarget)) (if (not (file-exists? lnktarget)) (create-symbolic-link test-path lnktarget))))) + (if (not (directory? test-path)) + (create-directory test-path #t)) ;; this is a hack, I don't know why out of the blue this path does not exist sometimes + (if (directory? test-path) (begin (let* ((ovrcmd (let ((cmd (config-lookup *configdat* "setup" "testcopycmd"))) (if cmd ;; substitute the TEST_SRC_PATH and TEST_TARG_PATH @@ -610,11 +698,16 @@ " >> " test-path "/mt_launch.log 2>> " test-path "/mt_launch.log"))) (status (system cmd))) (if (not (eq? status 0)) (debug:print 2 "ERROR: problem with running \"" cmd "\""))) (list lnkpathf lnkpath )) - (list #f #f)))) + (if (> remtries 0) + (begin + (debug:print 0 "ERROR: Failed to create work area at " test-path " with link at " lnktarget ", remaining attempts " remtries) + ;; + (create-work-area run-id run-info keyvals test-id test-src-path disk-path testname itemdat remtries: (- remtries 1))) + (list #f #f))))) ;; 1. look though disks list for disk with most space ;; 2. create run dir on disk, path name is meaningful ;; 3. create link from run dir to megatest runs area ;; 4. remotely run the test on allocated host @@ -653,22 +746,22 @@ (local-megatest (let* ((lm (car (argv))) (dir (pathname-directory lm)) (exe (pathname-strip-directory lm))) (conc (if dir (conc dir "/") "") (case (string->symbol exe) - ((dboard) "megatest") - ((mtest) "megatest") + ((dboard) "../megatest") + ((mtest) "../megatest") ((dashboard) "megatest") (else exe))))) - (test-sig (conc test-name ":" (item-list->path itemdat))) ;; test-path is the full path including the item-path + (item-path (item-list->path itemdat)) + (test-sig (conc test-name ":" item-path)) ;; (item-list->path itemdat))) ;; test-path is the full path including the item-path (work-area #f) (toptest-work-area #f) ;; for iterated tests the top test contains data relevant for all (diskpath #f) (cmdparms #f) (fullcmd #f) ;; (define a (with-output-to-string (lambda ()(write x)))) (mt-bindir-path #f) - (item-path (item-list->path itemdat)) ;; (test-id (cdb:remote-run db:get-test-id #f run-id test-name item-path)) (testinfo (rmt:get-test-info-by-id run-id test-id)) (mt_target (string-intersperse (map cadr keyvals) "/")) (debug-param (append (if (args:get-arg "-debug") (list "-debug" (args:get-arg "-debug")) '()) (if (args:get-arg "-logging")(list "-logging") '())))) @@ -677,10 +770,20 @@ ;; set the megatest to be called on the remote host (if (not remote-megatest)(set! remote-megatest local-megatest)) ;; "megatest")) (set! mt-bindir-path (pathname-directory remote-megatest)) (if launcher (set! launcher (string-split launcher))) ;; set up the run work area for this test + (if (and (args:get-arg "-preclean") ;; user has requested to preclean for this run + (not (member (db:test-get-rundir testinfo)(list "n/a" "/tmp/badname")))) ;; n/a is a placeholder and thus not a read dir + (begin + (debug:print-info 0 "attempting to preclean directory " (db:test-get-rundir testinfo) " for test " test-name "/" item-path) + (runs:remove-test-directory #f testinfo #t))) ;; remove data only, do not perturb the record + + ;; prevent overlapping actions - set to LAUNCHED as early as possible + ;; + (tests:test-set-status! run-id test-id "LAUNCHED" "n/a" #f #f) ;; (if launch-results launch-results "FAILED")) + (rmt:roll-up-pass-fail-counts run-id test-name item-path "LAUNCHED") (set! diskpath (get-best-disk *configdat*)) (if diskpath (let ((dat (create-work-area run-id run-info keyvals test-id test-path diskpath test-name itemdat))) (set! work-area (car dat)) (set! toptest-work-area (cadr dat)) @@ -688,35 +791,36 @@ (begin (set! work-area (conc test-path "/tmp_run")) (create-directory work-area #t) (debug:print 0 "WARNING: No disk work area specified - running in the test directory under tmp_run"))) (set! cmdparms (base64:base64-encode - (with-output-to-string - (lambda () ;; (list 'hosts hosts) - (write (list (list 'testpath test-path) - (list 'transport (conc *transport-type*)) - (list 'serverinf *server-info*) - (list 'toppath *toppath*) - (list 'work-area work-area) - (list 'test-name test-name) - (list 'runscript runscript) - (list 'run-id run-id ) - (list 'test-id test-id ) - (list 'itemdat itemdat ) - (list 'megatest remote-megatest) - (list 'ezsteps ezsteps) - (list 'target mt_target) - (list 'runtlim (if run-time-limit (common:hms-string->seconds run-time-limit) #f)) - (list 'env-ovrd (hash-table-ref/default *configdat* "env-override" '())) - (list 'set-vars (if params (hash-table-ref/default params "-setvars" #f))) - (list 'runname runname) - (list 'mt-bindir-path mt-bindir-path))))))) + (z3:encode-buffer + (with-output-to-string + (lambda () ;; (list 'hosts hosts) + (write (list (list 'testpath test-path) + (list 'transport (conc *transport-type*)) + ;; (list 'serverinf *server-info*) + (list 'toppath *toppath*) + (list 'work-area work-area) + (list 'test-name test-name) + (list 'runscript runscript) + (list 'run-id run-id ) + (list 'test-id test-id ) + ;; (list 'item-path item-path ) + (list 'itemdat itemdat ) + (list 'megatest remote-megatest) + (list 'ezsteps ezsteps) + (list 'target mt_target) + (list 'runtlim (if run-time-limit (common:hms-string->seconds run-time-limit) #f)) + (list 'env-ovrd (hash-table-ref/default *configdat* "env-override" '())) + (list 'set-vars (if params (hash-table-ref/default params "-setvars" #f))) + (list 'runname runname) + (list 'mt-bindir-path mt-bindir-path)))))))) + ;; clean out step records from previous run if they exist - ;; (debug:print-info 4 "FIXMEEEEE!!!! This can be removed some day, perhaps move all test records to the test db?") - ;; (open-run-close db:delete-test-step-records db test-id) + ;; (rmt:delete-test-step-records run-id test-id) (change-directory work-area) ;; so that log files from the launch process don't clutter the test dir - (tests:test-set-status! run-id test-id "LAUNCHED" "n/a" #f #f) ;; (if launch-results launch-results "FAILED")) (cond ((and launcher hosts) ;; must be using ssh hostname (set! fullcmd (append launcher (car hosts)(list remote-megatest test-sig "-execute" cmdparms) debug-param))) ;; (set! fullcmd (append launcher (car hosts)(list remote-megatest test-sig "-execute" cmdparms)))) (launcher Index: lock-queue.scm ================================================================== --- lock-queue.scm +++ lock-queue.scm @@ -5,84 +5,124 @@ ;; ;; This program is distributed WITHOUT ANY WARRANTY; without even the ;; implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR ;; PURPOSE. -;;====================================================================== -;; launch a task - this runs on the originating host, tests themselves -;; -;;====================================================================== - (use sqlite3 srfi-18) (import (prefix sqlite3 sqlite3:)) (declare (unit lock-queue)) (declare (uses common)) +(declare (uses tasks)) ;;====================================================================== ;; attempt to prevent overlapping updates of rollup files by queueing ;; update requests in an sqlite db ;;====================================================================== -(define (lock-queue:open-db fname) +;;====================================================================== +;; db record, +;;====================================================================== + +(define (make-lock-queue:db-dat)(make-vector 3)) +(define-inline (lock-queue:db-dat-get-db vec) (vector-ref vec 0)) +(define-inline (lock-queue:db-dat-get-path vec) (vector-ref vec 1)) +(define-inline (lock-queue:db-dat-set-db! vec val)(vector-set! vec 0 val)) +(define-inline (lock-queue:db-dat-set-path! vec val)(vector-set! vec 1 val)) + +(define (lock-queue:open-db fname #!key (count 10)) (let* ((actualfname (conc fname ".lockdb")) (dbexists (file-exists? actualfname)) (db (sqlite3:open-database actualfname)) (handler (make-busy-timeout 136000))) (if dbexists - db + (vector db actualfname) (begin - (sqlite3:execute - db - "CREATE TABLE IF NOT EXISTS queue ( - id INTEGER PRIMARY KEY, - test_id INTEGER, - start_time INTEGER, - state TEXT, - CONSTRAINT queue_constraint UNIQUE (test_id));") - (sqlite3:execute - db - "CREATE TABLE IF NOT EXISTS runlocks ( - id INTEGER PRIMARY KEY, - test_id INTEGER, - run_lock TEXT, - CONSTRAINT runlock_constraint UNIQUE (run_lock));"))) + (handle-exceptions + exn + (begin + (thread-sleep! 10) + (if (> count 0) + (lock-queue:open-db fname count: (- count 1)) + (vector db actualfname))) + (sqlite3:with-transaction + db + (lambda () + (sqlite3:execute + db + "CREATE TABLE IF NOT EXISTS queue ( + id INTEGER PRIMARY KEY, + test_id INTEGER, + start_time INTEGER, + state TEXT, + CONSTRAINT queue_constraint UNIQUE (test_id));") + (sqlite3:execute + db + "CREATE TABLE IF NOT EXISTS runlocks ( + id INTEGER PRIMARY KEY, + test_id INTEGER, + run_lock TEXT, + CONSTRAINT runlock_constraint UNIQUE (run_lock));")))))) (sqlite3:set-busy-handler! db handler) - db)) + (vector db actualfname))) -(define (lock-queue:set-state db test-id newstate) +(define (lock-queue:set-state dbdat test-id newstate #!key (remtries 10)) + (tasks:wait-on-journal (lock-queue:db-dat-get-path dbdat) 1200) (handle-exceptions exn - (thread-sleep! 30) - (lock-queue:set-state db test-id newstate) - (sqlite3:execute db "UPDATE queue SET state=? WHERE test_id=?;" + (if (> remtries 0) + (begin + (debug:print 0 "WARNING: exception on lock-queue:set-state. Trying again in 30 seconds.") + (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn)) + (thread-sleep! 30) + (lock-queue:set-state dbdat test-id newstate remtries: (- remtries 1))) + (begin + (debug:print 0 "ERROR: Failed to set lock state for test with id " test-id ", error: " ((condition-property-accessor 'exn 'message) exn) ", giving up.") + #f)) + (sqlite3:execute (lock-queue:db-dat-get-db dbdat) "UPDATE queue SET state=? WHERE test_id=?;" newstate test-id))) -(define (lock-queue:any-younger? db mystart test-id) +(define (lock-queue:any-younger? dbdat mystart test-id #!key (remtries 10)) + (tasks:wait-on-journal (lock-queue:db-dat-get-path dbdat) 1200) (handle-exceptions exn - (thread-sleep! 30) - (lock-queue:any-younger? db mystart test-id) + (if (> remtries 0) + (begin + (debug:print 0 "WARNING: exception on lock-queue:any-younger. Trying again in 30 seconds.") + (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn)) + (thread-sleep! 30) + (lock-queue:any-younger? dbdat mystart test-id remtries: (- remtries 1))) + (begin + (debug:print 0 "ERROR: Failed to find younger locks for test with id " test-id ", error: " ((condition-property-accessor 'exn 'message) exn) ", giving up.") + #f)) (let ((res #f)) (sqlite3:for-each-row (lambda (tid) ;; Actually this should not be needed as mystart cannot be simultaneously less than and test-id same as (if (not (equal? tid test-id)) (set! res tid))) - db + (lock-queue:db-dat-get-db dbdat) "SELECT test_id FROM queue WHERE start_time > ?;" mystart) res))) -(define (lock-queue:get-lock db test-id) - (let ((res #f) - (lckqry (sqlite3:prepare db "SELECT test_id,run_lock FROM runlocks WHERE run_lock='locked';")) - (mklckqry (sqlite3:prepare db "INSERT INTO runlocks (test_id,run_lock) VALUES (?,'locked');"))) +(define (lock-queue:get-lock dbdat test-id #!key (count 10)(waiting-msg #f)) + (tasks:wait-on-journal (lock-queue:db-dat-get-path dbdat) 1200 remove: #t waiting-msg: "lock-queue:get-lock, waiting on journal") + (let* ((res #f) + (db (lock-queue:db-dat-get-db dbdat)) + (lckqry (sqlite3:prepare db "SELECT test_id,run_lock FROM runlocks WHERE run_lock='locked';")) + (mklckqry (sqlite3:prepare db "INSERT INTO runlocks (test_id,run_lock) VALUES (?,'locked');"))) (let ((result (handle-exceptions exn - #f + (begin + (debug:print 0 "WARNING: failed to get queue lock. Will try again in a few seconds") + (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn)) + (thread-sleep! 10) + (if (> count 0) + (lock-queue:get-lock dbdat test-id count: (- count 1))) + #f) (sqlite3:with-transaction db (lambda () (sqlite3:for-each-row (lambda (tid lockstate) (set! res (list tid lockstate))) @@ -97,46 +137,93 @@ #t))))))) (sqlite3:finalize! lckqry) (sqlite3:finalize! mklckqry) result))) -(define (lock-queue:release-lock fname test-id) - (let ((db (lock-queue:open-db fname))) - (sqlite3:execute db "DELETE FROM runlocks WHERE test_id=?;" test-id) - (sqlite3:finalize! db))) - -(define (lock-queue:steal-lock db test-id) - (sqlite3:execute db "DELETE FROM runlocks WHERE run_lock='locked';") - (lock-queue:get-lock db test-it)) +(define (lock-queue:release-lock fname test-id #!key (count 10)) + (let* ((dbdat (lock-queue:open-db fname))) + (handle-exceptions + exn + (begin + (debug:print 0 "WARNING: Failed to release queue lock. Will try again in few seconds") + (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn)) + (thread-sleep! (/ count 10)) + (if (> count 0) + (begin + (sqlite3:finalize! (lock-queue:db-dat-get-db dbdat)) + (lock-queue:release-lock fname test-id count: (- count 1))) + (let ((journal (conc fname "-journal"))) + ;; If we've tried ten times and failed there is a serious problem + ;; try to remove the lock db and allow it to be recreated + (handle-exceptions + exn + #f + (if (file-exists? journal)(delete-file journal)) + (if (file-exists? fname) (delete-file fname)) + #f)))) + (sqlite3:execute (lock-queue:db-dat-get-db dbdat) "DELETE FROM runlocks WHERE test_id=?;" test-id) + (sqlite3:finalize! (lock-queue:db-dat-get-db dbdat))))) + +(define (lock-queue:steal-lock dbdat test-id #!key (count 10)) + (debug:print-info 0 "Attempting to steal lock at " (lock-queue:db-dat-get-path dbdat)) + (tasks:wait-on-journal (lock-queue:db-dat-get-path dbdat) 1200 "lock-queue:steal-lock; waiting on journal") + (handle-exceptions + exn + (begin + (debug:print 0 "WARNING: Failed to steal queue lock. Will try again in few seconds") + (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn)) + (thread-sleep! 10) + (if (> count 0) + (lock-queue:steal-lock dbdat test-id count: (- count 1)) + #f)) + (sqlite3:execute (lock-queue:db-dat-get-db dbdat) "DELETE FROM runlocks WHERE run_lock='locked';")) + (lock-queue:get-lock dbdat test-it)) ;; returns #f if ok to skip the task ;; returns #t if ok to proceed with task ;; otherwise waits ;; -(define (lock-queue:wait-turn fname test-id) - (let ((db (lock-queue:open-db fname)) - (mystart (current-seconds))) - (sqlite3:execute - db - "INSERT OR REPLACE INTO queue (test_id,start_time,state) VALUES (?,?,'waiting');" - test-id mystart) - (thread-sleep! 1) ;; give other tests a chance to register - (let ((result - (let loop ((younger-waiting (lock-queue:any-younger? db mystart test-id))) - (if younger-waiting - (begin - ;; no need for us to wait. mark in the lock queue db as skipping - (lock-queue:set-state db test-id "skipping") - #f) ;; let the calling process know that nothing needs to be done - (if (lock-queue:get-lock db test-id) - #t - (if (> (- (current-seconds) mystart) 36000) ;; waited too long, steal the lock - (lock-queue:steal-lock db test-id) - (begin - (thread-sleep! 1) - (loop (lock-queue:any-younger? db mystart test-id))))))))) - (sqlite3:finalize! db) - result))) +(define (lock-queue:wait-turn fname test-id #!key (count 10)(waiting-msg #f)) + (let* ((dbdat (lock-queue:open-db fname)) + (mystart (current-seconds)) + (db (lock-queue:db-dat-get-db dbdat))) + (handle-exceptions + exn + (begin + (debug:print 0 "WARNING: Failed to find out if it is ok to skip the wait queue. Will try again in few seconds") + (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn)) + (print-call-chain (current-error-port)) + (thread-sleep! 10) + (if (> count 0) + (begin + (sqlite3:finalize! db) + (lock-queue:wait-turn fname test-id count: (- count 1))) + (begin + (debug:print 0 "Giving up calls to lock-queue:wait-turn for test-id " test-id " at path " fname ", printing call chain") + (print-call-chain (current-error-port)) + #f))) + (tasks:wait-on-journal (lock-queue:db-dat-get-path dbdat) 1200 waiting-msg: "lock-queue:wait-turn; waiting on journal file") + (sqlite3:execute + db + "INSERT OR REPLACE INTO queue (test_id,start_time,state) VALUES (?,?,'waiting');" + test-id mystart) + (thread-sleep! 1) ;; give other tests a chance to register + (let ((result + (let loop ((younger-waiting (lock-queue:any-younger? dbdat mystart test-id))) + (if younger-waiting + (begin + ;; no need for us to wait. mark in the lock queue db as skipping + (lock-queue:set-state dbdat test-id "skipping") + #f) ;; let the calling process know that nothing needs to be done + (if (lock-queue:get-lock dbdat test-id) + #t + (if (> (- (current-seconds) mystart) 36000) ;; waited too long, steal the lock + (lock-queue:steal-lock dbdat test-id) + (begin + (thread-sleep! 1) + (loop (lock-queue:any-younger? dbdat mystart test-id))))))))) + (sqlite3:finalize! db) + result)))) ;; (use trace) ;; (trace lock-queue:get-lock lock-queue:release-lock lock-queue:wait-turn lock-queue:any-younger? lock-queue:set-state) Index: megatest-version.scm ================================================================== --- megatest-version.scm +++ megatest-version.scm @@ -1,7 +1,7 @@ -;; Always use two digit decimal -;; 1.01, 1.02...1.10,1.11 ... 1.99,2.00.. +;; Always use two or four digit decimal +;; 1.01, 1.02...1.10,1.11,1,1101 ... 1.99,2.00.. (declare (unit megatest-version)) -(define megatest-version 1.6001) +(define megatest-version 1.6006) Index: megatest.scm ================================================================== --- megatest.scm +++ megatest.scm @@ -8,11 +8,13 @@ ;; PURPOSE. ;; (include "common.scm") ;; (include "megatest-version.scm") -(use sqlite3 srfi-1 posix regex regex-case srfi-69 base64 format readline apropos json http-client directory-utils rpc) ;; (srfi 18) extras) +(use sqlite3 srfi-1 posix regex regex-case srfi-69 base64 format readline apropos json http-client directory-utils rpc ;; (srfi 18) extras) + http-client srfi-18) ;; zmq extras) + (import (prefix sqlite3 sqlite3:)) (import (prefix base64 base64:)) (import (prefix rpc rpc:)) ;; (use zmq) @@ -61,27 +63,28 @@ Launching and managing runs -runall : run all tests that are not state COMPLETED and status PASS, CHECK or KILLED -runtests tst1,tst2 ... : run tests - -remove-runs : remove the data for a run, requires :runname and -testpatt + -remove-runs : remove the data for a run, requires -runname and -testpatt Optionally use :state and :status -set-state-status X,Y : set state to X and status to Y, requires controls per -remove-runs -rerun FAIL,WARN... : force re-run for tests with specificed status(s) -lock : lock run specified by target and runname -unlock : unlock run specified by target and runname - -set-run-status status : sets status for run to status, requires -target and :runname + -set-run-status status : sets status for run to status, requires -target and -runname -get-run-status : gets status for run specified by target and runname -run-wait : wait on run specified by target and runname + -preclean : remove the existing test directory before running the test Selectors (e.g. use for -runtests, -remove-runs, -set-state-status, -list-runs etc.) -target key1/key2/... : run for key1, key2, etc. -reqtarg key1/key2/... : run for key1, key2, etc. but key1/key2 must be in runconfig -testpatt patt1/patt2,patt3/... : % is wildcard - :runname : required, name for this particular test run - :state : Applies to runs, tests or steps depending on context - :status : Applies to runs, tests or steps depending on context + -runname : required, name for this particular test run + -state : Applies to runs, tests or steps depending on context + -status : Applies to runs, tests or steps depending on context Test helpers (for use inside tests) -step stepname -test-status : set the state and status of a test (use :state and :status) -setlog logfname : set the path/filename to the final log relative to the test @@ -114,31 +117,41 @@ -list-db-targets : list the target combinations used in the db -show-config : dump the internal representation of the megatest.config file -show-runconfig : dump the internal representation of the runconfigs.config file -dumpmode json : dump in json format instead of sexpr -show-cmdinfo : dump the command info for a test (run in test environment) + -section sectionName + -var varName : for config and runconfig lookup value for sectionName varName Misc -start-dir path : switch to this directory before running megatest -rebuild-db : bring the database schema up to date -cleanup-db : remove any orphan records, vacuum the db + -import-megatest.db : migrate a database from v1.55 series to v1.60 series + -sync-to-megatest.db : migrate data back to megatest.db -update-meta : update the tests metadata for all tests - -env2file fname : write the environment to fname.csh and fname.sh -setvars VAR1=val1,VAR2=val2 : Add environment variables to a run NB// these are overwritten by values set in config files. -server -|hostname : start the server (reduces contention on megatest.db), use - to automatically figure out hostname -transport http|zmq : use http or zmq for transport (default is http) -daemonize : fork into background and disconnect from stdin/out + -log logfile : send stdout and stderr to logfile -list-servers : list the servers -stop-server id : stop server specified by id (see output of -list-servers), use 0 to kill all -repl : start a repl (useful for extending megatest) -load file.scm : load and run file.scm -mark-incompletes : find and mark incomplete tests -ping run-id|host:port : ping server, exit with 0 if found +Utilities + -env2file fname : write the environment to fname.csh and fname.sh + -refdb2dat refdb : convert refdb to sexp or to format specified by -dumpmode + formats: perl, ruby, sqlite3 + -o : output file for refdb2dat (defaults to stdout) + Spreadsheet generation -extract-ods fname.ods : extract an open document spreadsheet from the database -pathmod path : insert path, i.e. path/runame/itempath/logfile.html will clear the field if no rundir/testname/itempath/logfile if it contains forward slashes the path will be converted @@ -148,11 +161,11 @@ -gen-megatest-test tname : create a skeleton megatest test. You will be prompted for info Examples # Get test path, use '.' to get a single path or a specific path/file pattern -megatest -test-files 'logs/*.log' -target ubuntu/n%/no% :runname w49% -testpatt test_mt% +megatest -test-files 'logs/*.log' -target ubuntu/n%/no% -runname w49% -testpatt test_mt% Called as " (string-intersperse (argv) " ") " Version " megatest-version ", built from " megatest-fossil-hash )) ;; -gui : start a gui interface @@ -163,11 +176,10 @@ (argv) (list "-runtests" ;; run a specific test "-config" ;; override the config file name "-execute" ;; run the command encoded in the base64 parameter "-step" - ":runname" "-target" "-reqtarg" ":runname" "-runname" ":state" @@ -209,15 +221,20 @@ "-debug" ;; for *verbosity* > 2 "-gen-megatest-test" "-override-timeout" "-test-files" ;; -test-paths is for listing all "-load" ;; load and exectute a scheme file + "-section" + "-var" "-dumpmode" "-run-id" "-ping" + "-refdb2dat" + "-o" + "-log" ) - (list "-h" + (list "-h" "-help" "--help" "-version" "-force" "-xterm" "-showkeys" "-show-keys" @@ -225,10 +242,11 @@ "-set-values" "-load-test-data" "-summarize-items" "-gui" "-daemonize" + "-preclean" ;; misc "-archive" "-repl" "-lock" "-unlock" @@ -257,19 +275,77 @@ "-mark-incompletes" "-convert-to-norm" "-convert-to-old" "-import-megatest.db" + "-sync-to-megatest.db" "-logging" "-v" ;; verbose 2, more than normal (normal is 1) "-q" ;; quiet 0, errors/warnings only ) args:arg-hash 0)) -(if (args:get-arg "-h") +;; The watchdog is to keep an eye on things like db sync etc. +;; +(define *time-zero* (current-seconds)) +(define *watchdog* + (make-thread + (lambda () + (thread-sleep! 0.05) ;; delay for startup + (let ((legacy-sync (configf:lookup *configdat* "setup" "megatest-db")) + (debug-mode (debug:debug-mode 1)) + (last-time (current-seconds))) + (let loop () + ;; sync for filesystem local db writes + ;; + (let ((start-time (current-seconds)) + (servers-started (make-hash-table))) + (for-each + (lambda (run-id) + (mutex-lock! *db-multi-sync-mutex*) + (if (and legacy-sync + (hash-table-ref/default *db-local-sync* run-id #f)) + ;; (if (> (- start-time last-write) 5) ;; every five seconds + (begin ;; let ((sync-time (- (current-seconds) start-time))) + (db:multi-db-sync (list run-id) 'new2old) + (if (common:low-noise-print 30 "sync new to old") + (let ((sync-time (- (current-seconds) start-time))) + (debug:print-info 0 "Sync of newdb to olddb for run-id " run-id " completed in " sync-time " seconds"))) + ;; (if (> sync-time 10) ;; took more than ten seconds, start a server for this run + ;; (begin + ;; (debug:print-info 0 "Sync is taking a long time, start up a server to assist for run " run-id) + ;; (server:kind-run run-id))))) + (hash-table-delete! *db-local-sync* run-id))) + (mutex-unlock! *db-multi-sync-mutex*)) + (hash-table-keys *db-local-sync*)) + (if (and debug-mode + (> (- start-time last-time) 14)) + (begin + (set! last-time start-time) + (debug:print-info 0 "timestamp -> " (seconds->time-string (current-seconds)) ", time since start -> " (seconds->hr-min-sec (- (current-seconds) *time-zero*)))))) + + ;; keep going unless time to exit + ;; + (if (not *time-to-exit*) + (begin + (thread-sleep! 5) ;; wait five seconds before syncing again, we'll also sync on exit + (loop))))) + "Watchdog thread"))) + +(thread-start! *watchdog*) + +(if (args:get-arg "-log") + (let ((oup (open-output-file (args:get-arg "-log")))) + (debug:print-info 0 "Sending log output to " (args:get-arg "-log")) + (current-error-port oup) + (current-output-port oup))) + +(if (or (args:get-arg "-h") + (args:get-arg "-help") + (args:get-arg "--help")) (begin (print help) (exit))) (if (args:get-arg "-start-dir") @@ -324,10 +400,12 @@ (if (args:get-arg "-itempatt") (let ((newval (conc (args:get-arg "-testpatt") "/" (args:get-arg "-itempatt")))) (debug:print 0 "WARNING: -itempatt has been deprecated, please use -testpatt testpatt/itempatt method, new testpatt is "newval) (hash-table-set! args:arg-hash "-testpatt" newval) (hash-table-delete! args:arg-hash "-itempatt"))) + +(on-exit std-exit-procedure) ;;====================================================================== ;; Misc general calls ;;====================================================================== @@ -335,45 +413,90 @@ (begin (save-environment-as-files (args:get-arg "-env2file")) (set! *didsomething* #t))) (if (args:get-arg "-list-disks") - (begin + (let ((toppath (launch:setup-for-run))) (print (string-intersperse (map (lambda (x) (string-intersperse x " => ")) - (common:get-disks) ) + (common:get-disks *configdat*)) "\n")) (set! *didsomething* #t))) + +(if (args:get-arg "-refdb2dat") + (let* ((input-db (args:get-arg "-refdb2dat")) + (out-file (args:get-arg "-o")) + (out-fmt (or (args:get-arg "-dumpmode") "scheme")) + (out-port (if (and out-file + (not (equal? out-fmt "sqlite3"))) + (open-output-file out-file) + (current-output-port))) + (res-data (configf:read-refdb input-db)) + (data (car res-data)) + (msg (cadr res-data))) + (if (not data) + (debug:print 0 data) ;; some error occurred + (with-output-to-port out-port + (lambda () + (case (string->symbol out-fmt) + ((scheme)(pp data)) + ((perl) + ;; (print "%hash = (") + ;; key1 => 'value1', + ;; key2 => 'value2', + ;; key3 => 'value3', + ;; ); + (configf:map-all-hier-alist + data + (lambda (sheetname sectionname varname val) + (print "$data{\"" sheetname "\"}{\"" sectionname "\"}{\"" varname "\"} = \"" val "\";")))) + ((python ruby) + (print "data={}") + (configf:map-all-hier-alist + data + (lambda (sheetname sectionname varname val) + (print "data[\"" sheetname "\"][\"" sectionname "\"][\"" varname "\"] = \"" val "\"")) + initproc1: + (lambda (sheetname) + (print "data[\"" sheetname "\"] = {}")) + initproc2: + (lambda (sheetname sectionname) + (print "data[\"" sheetname "\"][\"" sectionname "\"] = {}")))) + ((sqlite3) + (let* ((db-file (or out-file (pathname-file input-db))) + (db-exists (file-exists? db-file)) + (db (sqlite3:open-database db-file))) + (if (not db-exists)(sqlite3:execute db "CREATE TABLE data (sheet,section,var,val);")) + (configf:map-all-hier-alist + data + (lambda (sheetname sectionname varname val) + (sqlite3:execute db + "INSERT OR REPLACE INTO data (sheet,section,var,val) VALUES (?,?,?,?);" + sheetname sectionname varname val))) + (sqlite3:finalize! db))) + (else + (pp data)))))) + (if out-file (close-output-port out-port)) + (exit) ;; yes, bending the rules here - need to exit since this is a utility + )) (if (args:get-arg "-ping") - (let* ((run-id (string->number (args:get-arg "-run-id"))) - (host-port (let ((slst (string-split (args:get-arg "-ping") ":"))) - (if (eq? (length slst) 2) - (list (car slst)(string->number (cadr slst))) - #f))) - (toppath (setup-for-run))) - (set! *did-something* #t) - (if (not run-id) - (begin - (debug:print 0 "ERROR: must specify run-id when doing ping, -run-id n") - (print "ERROR: No run-id") - (exit 1)) - (if (not host-port) - (begin - (debug:print 0 "ERROR: argument to -ping is host:port, got " (args:get-arg "-ping")) - (print "ERROR: bad host:port") - (exit 1)) - (begin - (print ((rpc:procedure 'testing (car host-port)(cadr host-port)))) - (case (server:get-transport) - ((http)(http:ping run-id host-port)) - ((rpc) ((rpc:procedure 'server:login (car host-port)(cadr host-port)) *toppath*)) ;; (rpc-transport:ping run-id (car host-port)(cadr host-port))) - (else (debug:print 0 "ERROR: No transport set")(exit)))))))) + (let* ((run-id (string->number (args:get-arg "-run-id"))) + (host:port (args:get-arg "-ping"))) + (server:ping run-id host:port))) + +;; (set! *did-something* #t) +;; (begin +;; (print ((rpc:procedure 'testing (car host-port)(cadr host-port)))) +;; (case (server:get-transport) +;; ((http)(http:ping run-id host-port)) +;; ((rpc) (rpc:procedure 'server:login (car host-port)(cadr host-port));; *toppath*)) ;; (rpc-transport:ping run-id (car host-port)(cadr host-port))) +;; (else (debug:print 0 "ERROR: No transport set")(exit))))) ;;====================================================================== ;; Start the server - can be done in conjunction with -runall or -runtests (one day...) ;; we start the server if not running else start the client thread ;;====================================================================== @@ -380,11 +503,11 @@ (if (args:get-arg "-server") ;; Server? Start up here. ;; - (let ((tl (setup-for-run)) + (let ((tl (launch:setup-for-run)) (run-id (and (args:get-arg "-run-id") (string->number (args:get-arg "-run-id"))))) (if run-id (begin (server:launch run-id) @@ -400,32 +523,34 @@ '("-list-servers" "-stop-server" "-show-cmdinfo" "-list-runs" "-ping"))) - (if (setup-for-run) + (if (launch:setup-for-run) (let ((run-id (and (args:get-arg "-run-id") (string->number (args:get-arg "-run-id"))))) ;; (set! *fdb* (filedb:open-db (conc *toppath* "/db/paths.db"))) ;; if not list or kill then start a client (if appropriate) (if (or (args-defined? "-h" "-version" "-gen-megatest-area" "-gen-megatest-test") (eq? (length (hash-table-keys args:arg-hash)) 0)) (debug:print-info 1 "Server connection not needed") (begin - (if run-id - (client:launch run-id) - (client:launch 0) ;; without run-id we'll start a server for "0" - ))))))) + ;; (if run-id + ;; (client:launch run-id) + ;; (client:launch 0) ;; without run-id we'll start a server for "0" + #t + )))))) ;; MAY STILL NEED THIS ;; (set! *megatest-db* (make-dbr:dbstruct path: *toppath* local: #t)))))))))) (if (or (args:get-arg "-list-servers") (args:get-arg "-stop-server")) - (let ((tl (setup-for-run))) + (let ((tl (launch:setup-for-run))) (if tl - (let* ((servers (open-run-close tasks:get-all-servers tasks:open-db)) + (let* ((tdbdat (tasks:open-db)) + (servers (tasks:get-all-servers (db:delay-if-busy tdbdat))) (fmtstr "~5a~12a~8a~20a~24a~10a~10a~10a~10a\n") (servers-to-kill '()) (killinfo (args:get-arg "-stop-server")) (khost-port (if killinfo (if (substring-index ":" killinfo)(string-split ":") #f) #f)) (sid (if killinfo (if (substring-index ":" killinfo) #f (string->number killinfo)) #f))) @@ -450,13 +575,13 @@ ;; (zmq-sockets (if status (server:client-connect hostname port) #f))) ;; no need to login as status of #t indicates we are connecting to correct ;; server (if (equal? state "dead") (if (> last-update (* 25 60 60)) ;; keep records around for slighly over a day. - (open-run-close tasks:server-deregister tasks:open-db hostname pullport: pullport pid: pid action: 'delete)) + (tasks:server-deregister (db:delay-if-busy tdbdat) hostname pullport: pullport pid: pid action: 'delete)) (if (> last-update 20) ;; Mark as dead if not updated in last 20 seconds - (open-run-close tasks:server-deregister tasks:open-db hostname pullport: pullport pid: pid))) + (tasks:server-deregister (db:delay-if-busy tdbdat) hostname pullport: pullport pid: pid))) (format #t fmtstr id mt-ver pid hostname (conc interface ":" pullport) pubport last-update (if status "alive" "dead") transport) (if (or (equal? id sid) (equal? sid 0)) ;; kill all/any (begin @@ -481,33 +606,33 @@ targets) (set! *didsomething* #t))) (define (full-runconfigs-read) (let* ((keys (rmt:get-keys)) - (target (if (args:get-arg "-reqtarg") - (args:get-arg "-reqtarg") - (if (args:get-arg "-target") - (args:get-arg "-target") - #f))) + (target (common:args-get-target)) (key-vals (if target (keys:target->keyval keys target) #f)) (sections (if target (list "default" target) #f)) (data (begin (setenv "MT_RUN_AREA_HOME" *toppath*) (if key-vals (for-each (lambda (kt) (setenv (car kt) (cadr kt))) key-vals)) - (read-config "runconfigs.config" #f #t sections: sections)))) + (read-config (conc *toppath* "/runconfigs.config") #f #t sections: sections)))) data)) (if (args:get-arg "-show-runconfig") - (let ((tl (setup-for-run))) + (let ((tl (launch:setup-for-run))) (push-directory *toppath*) (let ((data (full-runconfigs-read))) ;; keep this one local (cond + ((and (args:get-arg "-section") + (args:get-arg "-var")) + (let ((val (configf:lookup data (args:get-arg "-section")(args:get-arg "-var")))) + (if val (print val)))) ((not (args:get-arg "-dumpmode")) (pp (hash-table->alist data))) ((string=? (args:get-arg "-dumpmode") "json") (json-write data)) (else @@ -514,15 +639,19 @@ (debug:print 0 "ERROR: -dumpmode of " (args:get-arg "-dumpmode") " not recognised"))) (set! *didsomething* #t)) (pop-directory))) (if (args:get-arg "-show-config") - (let ((tl (setup-for-run)) + (let ((tl (launch:setup-for-run)) (data *configdat*)) ;; (read-config "megatest.config" #f #t))) (push-directory *toppath*) ;; keep this one local (cond + ((and (args:get-arg "-section") + (args:get-arg "-var")) + (let ((val (configf:lookup data (args:get-arg "-section")(args:get-arg "-var")))) + (if val (print val)))) ((not (args:get-arg "-dumpmode")) (pp (hash-table->alist data))) ((string=? (args:get-arg "-dumpmode") "json") (json-write data)) (else @@ -530,11 +659,11 @@ (set! *didsomething* #t) (pop-directory))) (if (args:get-arg "-show-cmdinfo") (if (getenv "MT_CMDINFO") - (let ((data (read (open-input-string (base64:base64-decode (getenv "MT_CMDINFO")))))) + (let ((data (common:read-encoded-string (getenv "MT_CMDINFO")))) (if (equal? (args:get-arg "-dumpmode") "json") (json-write data) (pp data)) (set! *didsomething* #t)) (debug:print-info 0 "environment variable MT_CMDINFO is not set"))) @@ -545,18 +674,18 @@ ;; since several actions can be specified on the command line the removal ;; is done first (define (operate-on action) (let* ((runrec (runs:runrec-make-record)) - (target (or (args:get-arg "-reqtarg") - (args:get-arg "-target")))) + (target (common:args-get-target))) (cond ((not target) (debug:print 0 "ERROR: Missing required parameter for " action ", you must specify -target or -reqtarg") (exit 1)) - ((not (args:get-arg ":runname")) - (debug:print 0 "ERROR: Missing required parameter for " action ", you must specify the run name pattern with :runname patt") + ((not (or (args:get-arg ":runname") + (args:get-arg "-runname"))) + (debug:print 0 "ERROR: Missing required parameter for " action ", you must specify the run name pattern with -runname patt") (exit 2)) ((not (args:get-arg "-testpatt")) (debug:print 0 "ERROR: Missing required parameter for " action ", you must specify the test pattern with -testpatt") (exit 3)) (else @@ -565,14 +694,14 @@ (debug:print 0 "ERROR: Attempted " action "on test(s) but run area config file not found") (exit 1)) ;; put test parameters into convenient variables (runs:operate-on action target - (args:get-arg ":runname") + (or (args:get-arg "-runname")(args:get-arg ":runname")) (args:get-arg "-testpatt") - state: (args:get-arg ":state") - status: (args:get-arg ":status") + state: (or (args:get-arg "-state")(args:get-arg ":state") ) + status: (or (args:get-arg "-status")(args:get-arg ":status")) new-state-status: (args:get-arg "-set-state-status"))) (set! *didsomething* #t))))) (if (args:get-arg "-remove-runs") (general-run-call @@ -592,23 +721,24 @@ (args:get-arg "-get-run-status")) (general-run-call "-set-run-status" "set run status" (lambda (target runname keys keyvals) - (let* ((runsdat (cdb:remote-run db:get-runs-by-patt #f keys runname (or (args:get-arg "-target") - (args:get-arg "-reqtarg")) #f #f)) + (let* ((runsdat (rmt:get-runs-by-patt keys runname + (common:args-get-target) + #f #f)) (header (vector-ref runsdat 0)) (rows (vector-ref runsdat 1))) (if (null? rows) (begin (debug:print-info 0 "No matching run found.") (exit 1)) (let* ((row (car (vector-ref runsdat 1))) (run-id (db:get-value-by-header row header "id"))) (if (args:get-arg "-set-run-status") - (cdb:remote-run db:set-run-status #f run-id (args:get-arg "-set-run-status") msg: (args:get-arg "-m")) - (print (open-run-close db:get-run-status #f run-id)) + (rmt:set-run-status run-id (args:get-arg "-set-run-status") msg: (args:get-arg "-m")) + (print (rmt:get-run-status run-id)) ))))))) ;;====================================================================== ;; Query runs ;;====================================================================== @@ -615,20 +745,20 @@ ;; NOTE: list-runs and list-db-targets operate on local db!!! ;; (if (or (args:get-arg "-list-runs") (args:get-arg "-list-db-targets")) - (if (setup-for-run) + (if (launch:setup-for-run) (let* ((dbstruct (make-dbr:dbstruct path: *toppath* local: #t)) (runpatt (args:get-arg "-list-runs")) (testpatt (if (args:get-arg "-testpatt") (args:get-arg "-testpatt") "%")) (keys (db:get-keys dbstruct)) ;; (runsdat (db:get-runs dbstruct runpatt #f #f '())) - (runsdat (db:get-runs-by-patt dbstruct keys runpatt (or (args:get-arg "-target") - (args:get-arg "-reqtarg")) #f #f)) + (runsdat (db:get-runs-by-patt dbstruct keys (or runpatt "%") (common:args-get-target) + #f #f)) ;; (cdb:remote-run db:get-runs #f runpatt #f #f '())) (runs (db:get-rows runsdat)) (header (db:get-header runsdat)) (db-targets (args:get-arg "-list-db-targets")) (seen (make-hash-table))) @@ -686,11 +816,11 @@ (tdb:step-get-status step) (tdb:step-get-event_time step))) steps))))) tests))))) runs) - (db:close-all dbstruct) + ;; (db:close-all dbstruct) (set! *didsomething* #t)))) ;;====================================================================== ;; full run ;;====================================================================== @@ -769,11 +899,11 @@ "-rollup" "rollup tests" (lambda (target runname keys keyvals) (runs:rollup-run keys keyvals - (args:get-arg ":runname") + (or (args:get-arg "-runname")(args:get-arg ":runname") ) user)))) ;;====================================================================== ;; Lock or unlock a run ;;====================================================================== @@ -784,11 +914,11 @@ "lock/unlock tests" (lambda (target runname keys keyvals) (runs:handle-locking target keys - (args:get-arg ":runname") + (or (args:get-arg "-runname")(args:get-arg ":runname") ) (args:get-arg "-lock") (args:get-arg "-unlock") user)))) ;;====================================================================== @@ -797,12 +927,11 @@ ;; Get test paths matching target, runname, and testpatt (if (or (args:get-arg "-test-files")(args:get-arg "-test-paths")) ;; if we are in a test use the MT_CMDINFO data (if (getenv "MT_CMDINFO") (let* ((startingdir (current-directory)) - (cmdinfo (read (open-input-string (base64:base64-decode (getenv "MT_CMDINFO"))))) - ;; (runremote (assoc/default 'runremote cmdinfo)) + (cmdinfo (common:read-encoded-string (getenv "MT_CMDINFO"))) (transport (assoc/default 'transport cmdinfo)) (testpath (assoc/default 'testpath cmdinfo)) (test-name (assoc/default 'test-name cmdinfo)) (runscript (assoc/default 'runscript cmdinfo)) (db-host (assoc/default 'db-host cmdinfo)) @@ -815,11 +944,11 @@ (change-directory toppath) (if (not target) (begin (debug:print 0 "ERROR: -target is required.") (exit 1))) - (if (not (setup-for-run)) + (if (not (launch:setup-for-run)) (begin (debug:print 0 "Failed to setup, giving up on -test-paths or -test-files, exiting") (exit 1))) (let* ((keys (rmt:get-keys)) ;; db:test-get-paths must not be run remote @@ -846,12 +975,11 @@ ;; Archive tests matching target, runname, and testpatt (if (args:get-arg "-archive") ;; if we are in a test use the MT_CMDINFO data (if (getenv "MT_CMDINFO") (let* ((startingdir (current-directory)) - (cmdinfo (read (open-input-string (base64:base64-decode (getenv "MT_CMDINFO"))))) - ;; (runremote (assoc/default 'runremote cmdinfo)) + (cmdinfo (common:read-encoded-string (getenv "MT_CMDINFO"))) (transport (assoc/default 'transport cmdinfo)) (testpath (assoc/default 'testpath cmdinfo)) (test-name (assoc/default 'test-name cmdinfo)) (runscript (assoc/default 'runscript cmdinfo)) (db-host (assoc/default 'db-host cmdinfo)) @@ -863,11 +991,11 @@ (change-directory testpath) (if (not target) (begin (debug:print 0 "ERROR: -target is required.") (exit 1))) - (if (not (setup-for-run)) + (if (not (launch:setup-for-run)) (begin (debug:print 0 "Failed to setup, giving up on -archive, exiting") (exit 1))) (let* ((keys (rmt:get-keys)) (paths (tests:test-get-paths-matching keys target))) @@ -896,11 +1024,11 @@ "-extract-ods" "Make ods spreadsheet" (lambda (target runname keys keyvals) (let ((dbstruct (make-dbr:dbstruct path: *toppath* local: #t)) (outputfile (args:get-arg "-extract-ods")) - (runspatt (args:get-arg ":runname")) + (runspatt (or (args:get-arg "-runname")(args:get-arg ":runname"))) (pathmod (args:get-arg "-pathmod"))) ;; (keyvalalist (keys->alist keys "%"))) (debug:print 2 "Extract ods, outputfile: " outputfile " runspatt: " runspatt " keyvals: " keyvals) (db:extract-ods-file dbstruct outputfile keyvals (if runspatt runspatt "%") pathmod) (db:close-all dbstruct) @@ -926,12 +1054,11 @@ (define (megatest:step step state status logfile msg) (if (not (getenv "MT_CMDINFO")) (begin (debug:print 0 "ERROR: MT_CMDINFO env var not set, -step must be called *inside* a megatest invoked environment!") (exit 5)) - (let* ((cmdinfo (read (open-input-string (base64:base64-decode (getenv "MT_CMDINFO"))))) - ;; (runremote (assoc/default 'runremote cmdinfo)) + (let* ((cmdinfo (common:read-encoded-string (getenv "MT_CMDINFO"))) (transport (assoc/default 'transport cmdinfo)) (testpath (assoc/default 'testpath cmdinfo)) (test-name (assoc/default 'test-name cmdinfo)) (runscript (assoc/default 'runscript cmdinfo)) (db-host (assoc/default 'db-host cmdinfo)) @@ -939,11 +1066,11 @@ (test-id (assoc/default 'test-id cmdinfo)) (itemdat (assoc/default 'itemdat cmdinfo)) (work-area (assoc/default 'work-area cmdinfo)) (db #f)) (change-directory testpath) - (if (not (setup-for-run)) + (if (not (launch:setup-for-run)) (begin (debug:print 0 "Failed to setup, exiting") (exit 1))) (if (and state status) (rmt:teststep-set-status! run-id test-id step state status msg logfile) @@ -974,12 +1101,11 @@ (if (not (getenv "MT_CMDINFO")) (begin (debug:print 0 "ERROR: MT_CMDINFO env var not set, commands -test-status, -runstep and -setlog must be called *inside* a megatest environment!") (exit 5)) (let* ((startingdir (current-directory)) - (cmdinfo (read (open-input-string (base64:base64-decode (getenv "MT_CMDINFO"))))) - ;; (runremote (assoc/default 'runremote cmdinfo)) + (cmdinfo (common:read-encoded-string (getenv "MT_CMDINFO"))) (transport (assoc/default 'transport cmdinfo)) (testpath (assoc/default 'testpath cmdinfo)) (test-name (assoc/default 'test-name cmdinfo)) (runscript (assoc/default 'runscript cmdinfo)) (db-host (assoc/default 'db-host cmdinfo)) @@ -988,11 +1114,11 @@ (itemdat (assoc/default 'itemdat cmdinfo)) (work-area (assoc/default 'work-area cmdinfo)) (db #f) ;; (open-db)) (state (args:get-arg ":state")) (status (args:get-arg ":status"))) - (if (not (setup-for-run)) + (if (not (launch:setup-for-run)) (begin (debug:print 0 "Failed to setup, exiting") (exit 1))) (if (args:get-arg "-runstep")(debug:print-info 1 "Running -runstep, first change to directory " work-area)) @@ -1023,11 +1149,14 @@ (logprofile (args:get-arg "-logpro")) (logfile (conc stepname ".log")) (cmd (if (null? remargs) #f (car remargs))) (params (if cmd (cdr remargs) '())) (exitstat #f) - (shell (last (string-split (get-environment-variable "SHELL") "/"))) + (shell (let ((sh (get-environment-variable "SHELL") )) + (if sh + (last (string-split sh "/")) + "bash"))) (redir (case (string->symbol shell) ((tcsh csh ksh) ">&") ((zsh bash sh ash) "2>&1 >") (else ">&"))) (fullcmd (conc "(" (string-intersperse @@ -1090,11 +1219,11 @@ (if (or (args:get-arg "-showkeys") (args:get-arg "-show-keys")) (let ((db #f) (keys #f)) - (if (not (setup-for-run)) + (if (not (launch:setup-for-run)) (begin (debug:print 0 "Failed to setup, exiting") (exit 1))) (set! keys (cdb:remote-run db:get-keys db)) (debug:print 1 "Keys: " (string-intersperse keys ", ")) @@ -1121,57 +1250,53 @@ ;; Update the database schema, clean up the db ;;====================================================================== (if (args:get-arg "-rebuild-db") (begin - (if (not (setup-for-run)) + (if (not (launch:setup-for-run)) (begin (debug:print 0 "Failed to setup, exiting") (exit 1))) ;; keep this one local (open-run-close patch-db #f) (set! *didsomething* #t))) (if (args:get-arg "-cleanup-db") (begin - (if (not (setup-for-run)) + (if (not (launch:setup-for-run)) (begin (debug:print 0 "Failed to setup, exiting") (exit 1))) ;; keep this one local - (open-run-close db:clean-up #f) + ;; (open-run-close db:clean-up #f) + (db:multi-db-sync + #f ;; do all run-ids + ;; 'new2old + 'killservers + 'dejunk + ;; 'adj-testids + ;; 'old2new + 'new2old + ) (set! *didsomething* #t))) (if (args:get-arg "-mark-incompletes") (begin - (if (not (setup-for-run)) + (if (not (launch:setup-for-run)) (begin - (debug:print 0 "Failed to setup, exiting") + (debug:print 0 "Failed to setup, exiting") b (exit 1))) (open-run-close db:find-and-mark-incomplete #f) - (set! *didsomething* #t))) - -;;====================================================================== -;; Wait on a run to complete -;;====================================================================== - -(if (args:get-arg "-run-wait") - (begin - (if (not (setup-for-run)) - (begin - (debug:print 0 "Failed to setup, exiting") - (exit 1))) - (operate-on 'run-wait) (set! *didsomething* #t))) ;;====================================================================== ;; Update the tests meta data from the testconfig files ;;====================================================================== (if (args:get-arg "-update-meta") (begin - (if (not (setup-for-run)) + (if (not (launch:setup-for-run)) (begin (debug:print 0 "Failed to setup, exiting") (exit 1))) ;; now can find our db ;; keep this one local @@ -1182,11 +1307,11 @@ ;; Start a repl ;;====================================================================== (if (or (args:get-arg "-repl") (args:get-arg "-load")) - (let* ((toppath (setup-for-run)) + (let* ((toppath (launch:setup-for-run)) (dbstruct (if toppath (make-dbr:dbstruct path: toppath local: #t) #f))) (if dbstruct (begin (set! *db* dbstruct) (set! *client-non-blocking-mode* #t) @@ -1202,56 +1327,72 @@ (load (args:get-arg "-load"))) (db:close-all dbstruct)) (exit)) (set! *didsomething* #t))) -;; Not converted to use dbstruct yet -;; -(if (args:get-arg "-convert-to-norm") - (let* ((toppath (setup-for-run)) - (dbstruct (if toppath (make-dbr:dbstruct path: toppath local: #t)))) - (for-each - (lambda (field) - (let ((dat '())) - (debug:print-info 0 "Getting data for field " field) - (sqlite3:for-each-row - (lambda (id val) - (set! dat (cons (list id val) dat))) - (get-db db run-id) - (conc "SELECT id," field " FROM tests;")) - (debug:print-info 0 "found " (length dat) " items for field " field) - (let ((qry (sqlite3:prepare db (conc "UPDATE tests SET " field "=? WHERE id=?;")))) - (for-each - (lambda (item) - (let ((newval ;; (sdb:qry 'getid - (cadr item))) ;; ) - (if (not (equal? newval (cadr item))) - (debug:print-info 0 "Converting " (cadr item) " to " newval " for test #" (car item))) - (sqlite3:execute qry newval (car item)))) - dat) - (sqlite3:finalize! qry)))) - (db:close-all dbstruct) - (list "uname" "rundir" "final_logf" "comment")) +;;====================================================================== +;; Wait on a run to complete +;;====================================================================== + +(if (and (args:get-arg "-run-wait") + (not (args:get-arg "-runtests"))) ;; run-wait is built into runtests now + (begin + (if (not (launch:setup-for-run)) + (begin + (debug:print 0 "Failed to setup, exiting") + (exit 1))) + (operate-on 'run-wait) (set! *didsomething* #t))) + +;; ;; ;; redo me ;; Not converted to use dbstruct yet +;; ;; ;; redo me ;; +;; ;; ;; redo me (if (args:get-arg "-convert-to-norm") +;; ;; ;; redo me (let* ((toppath (setup-for-run)) +;; ;; ;; redo me (dbstruct (if toppath (make-dbr:dbstruct path: toppath local: #t)))) +;; ;; ;; redo me (for-each +;; ;; ;; redo me (lambda (field) +;; ;; ;; redo me (let ((dat '())) +;; ;; ;; redo me (debug:print-info 0 "Getting data for field " field) +;; ;; ;; redo me (sqlite3:for-each-row +;; ;; ;; redo me (lambda (id val) +;; ;; ;; redo me (set! dat (cons (list id val) dat))) +;; ;; ;; redo me (db:get-db db run-id) +;; ;; ;; redo me (conc "SELECT id," field " FROM tests;")) +;; ;; ;; redo me (debug:print-info 0 "found " (length dat) " items for field " field) +;; ;; ;; redo me (let ((qry (sqlite3:prepare db (conc "UPDATE tests SET " field "=? WHERE id=?;")))) +;; ;; ;; redo me (for-each +;; ;; ;; redo me (lambda (item) +;; ;; ;; redo me (let ((newval ;; (sdb:qry 'getid +;; ;; ;; redo me (cadr item))) ;; ) +;; ;; ;; redo me (if (not (equal? newval (cadr item))) +;; ;; ;; redo me (debug:print-info 0 "Converting " (cadr item) " to " newval " for test #" (car item))) +;; ;; ;; redo me (sqlite3:execute qry newval (car item)))) +;; ;; ;; redo me dat) +;; ;; ;; redo me (sqlite3:finalize! qry)))) +;; ;; ;; redo me (db:close-all dbstruct) +;; ;; ;; redo me (list "uname" "rundir" "final_logf" "comment")) +;; ;; ;; redo me (set! *didsomething* #t))) (if (args:get-arg "-import-megatest.db") - (let* ((toppath (setup-for-run)) - (dbstruct (if toppath (make-dbr:dbstruct path: toppath) #f)) - (mtdb (if toppath (db:open-megatest-db))) - (run-ids (if toppath (db:get-all-run-ids mtdb)))) - ;; sync runs, test_meta etc. - (db:sync-tables (db:sync-main-list mtdb) mtdb (db:get-db dbstruct #f)) - (for-each - (lambda (run-id) - (let ((testrecs (db:get-all-tests-info-by-run-id mtdb run-id))) - (debug:print 0 "INFO: Updating " (length testrecs) " records for run-id=" run-id) - (db:replace-test-records dbstruct run-id testrecs))) - run-ids) - (set! *didsomething* #t) - (db:close-all dbstruct))) - - + (begin + (db:multi-db-sync + #f ;; do all run-ids + 'killservers + 'dejunk + 'adj-testids + 'old2new + ;; 'new2old + ) + (set! *didsomething* #t))) + +(if (args:get-arg "-sync-to-megatest.db") + (begin + (db:multi-db-sync + #f ;; do all run-ids + 'new2old + ) + (set! *didsomething* #t))) ;;====================================================================== ;; Exit and clean up ;;====================================================================== @@ -1258,11 +1399,12 @@ (if *runremote* (close-all-connections!)) (if (not *didsomething*) (debug:print 0 help)) -;; (if *runremote* (rpc:close-all-connections!)) +(set! *time-to-exit* #t) +(thread-join! *watchdog*) (if (not (eq? *globalexitstatus* 0)) (if (or (args:get-arg "-runtests")(args:get-arg "-runall")) (begin (debug:print 0 "NOTE: Subprocesses with non-zero exit code detected: " *globalexitstatus*) Index: mt.scm ================================================================== --- mt.scm +++ mt.scm @@ -82,11 +82,11 @@ full-list new-offset limit)) full-list)))) -(define (mt:lazy-get-prereqs-not-met run-id waitons ref-item-path #!key (mode 'normal)) +(define (mt:lazy-get-prereqs-not-met run-id waitons ref-item-path #!key (mode '(normal))(itemmap #f) ) (let* ((key (list run-id waitons ref-item-path mode)) (res (hash-table-ref/default *pre-reqs-met-cache* key #f)) (useres (let ((last-time (if (vector? res) (vector-ref res 0) #f))) (if last-time (< (current-seconds)(+ last-time 5)) @@ -93,11 +93,12 @@ #f)))) (if useres (let ((result (vector-ref res 1))) (debug:print 4 "Using lazy value res: " result) result) - (let ((newres (rmt:get-prereqs-not-met run-id waitons ref-item-path mode: mode))) + (let ((newres (rmt:get-prereqs-not-met run-id waitons ref-item-path mode: mode itemmap: itemmap))) +;; (let ((newres (db:get-prereqs-not-met run-id waitons ref-item-path mode: mode itemmap: itemmap))) (hash-table-set! *pre-reqs-met-cache* key (vector (current-seconds) newres)) newres)))) (define (mt:get-run-stats dbstruct run-id) ;; Get run stats from local access, move this ... but where? @@ -109,20 +110,23 @@ (begin (debug:print-info 1 "Discarding tests from " tests " that are waiting on " failed-test) (let loop ((testn (car tests)) (remt (cdr tests)) (res '())) - (let ((waitons (vector-ref (hash-table-ref/default test-records testn (vector #f #f '())) 2))) + (let* ((test-dat (hash-table-ref/default test-records testn (vector #f #f '()))) + (waitons (vector-ref test-dat 2))) ;; (print "mt:discard-blocked-tests run-id: " run-id " failed-test: " failed-test " testn: " testn " with waitons: " waitons) (if (null? remt) (let ((new-res (reverse res))) ;; (print " new-res: " new-res) new-res) (loop (car remt) (cdr remt) (if (member failed-test waitons) - res + (begin + (debug:print 0 "Discarding test " testn "(" test-dat ") due to " failed-test) + res) (cons testn res))))))))) ;;====================================================================== ;; T R I G G E R S ;;====================================================================== @@ -161,36 +165,53 @@ ;; S T A T E A N D S T A T U S F O R T E S T S ;;====================================================================== ;; speed up for common cases with a little logic (define (mt:test-set-state-status-by-id run-id test-id newstate newstatus newcomment) - (cond - ((and newstate newstatus newcomment) - (rmt:general-call 'state-status-msg run-id newstate newstatus newcomment test-id)) - ((and newstate newstatus) - (rmt:general-call 'state-status run-id newstate newstatus test-id)) - (else - (if newstate (rmt:general-call 'set-test-state run-id newstate test-id)) - (if newstatus (rmt:general-call 'set-test-status run-id newstatus test-id)) - (if newcomment (rmt:general-call 'set-test-comment run-id newcomment test-id)))) - (mt:process-triggers run-id test-id newstate newstatus) - #t) + (if (not (and run-id test-id)) + (begin + (debug:print 0 "ERROR: bad data handed to mt:test-set-state-status-by-id, run-id=" run-id ", test-id=" test-id ", newstate=" newstate) + (print-call-chain (current-error-port)) + #f) + (begin + (cond + ((and newstate newstatus newcomment) + (rmt:general-call 'state-status-msg run-id newstate newstatus newcomment test-id)) + ((and newstate newstatus) + (rmt:general-call 'state-status run-id newstate newstatus test-id)) + (else + (if newstate (rmt:general-call 'set-test-state run-id newstate test-id)) + (if newstatus (rmt:general-call 'set-test-status run-id newstatus test-id)) + (if newcomment (rmt:general-call 'set-test-comment run-id newcomment test-id)))) + (mt:process-triggers run-id test-id newstate newstatus) + #t))) + +(define (mt:test-set-state-status-by-testname run-id test-name item-path new-state new-status new-comment) + (let ((test-id (cdb:remote-run db:get-test-id-cached #f run-id test-name item-path))) + (mt:test-set-state-status-by-id test-id new-state new-status new-comment))) (define (mt:lazy-read-test-config test-name) (let ((tconf (hash-table-ref/default *testconfigs* test-name #f))) (if tconf tconf (let ((test-dirs (tests:get-tests-search-path *configdat*))) (let loop ((hed (car test-dirs)) (tal (cdr test-dirs))) + ;; Setting MT_LINKTREE here is almost certainly unnecessary. (let ((tconfig-file (conc hed "/" test-name "/testconfig"))) (if (and (file-exists? tconfig-file) (file-read-access? tconfig-file)) - (let ((newtcfg (read-config tconfig-file #f #f))) ;; NOTE: Does NOT run [system ...] - (hash-table-set! *testconfigs* test-name newtcfg) - newtcfg) + (let ((link-tree-path (configf:lookup *configdat* "setup" "linktree")) + (old-link-tree (get-environment-variable "MT_LINKTREE"))) + (if link-tree-path (setenv "MT_LINKTREE" link-tree-path)) + (let ((newtcfg (read-config tconfig-file #f #f))) ;; NOTE: Does NOT run [system ...] + (hash-table-set! *testconfigs* test-name newtcfg) + (if old-link-tree + (setenv "MT_LINKTREE" old-link-tree) + (unsetenv "MT_LINKTREE")) + newtcfg)) (if (null? tal) (begin (debug:print 0 "ERROR: No readable testconfig found for " test-name) #f) (loop (car tal)(cdr tal)))))))))) Index: newdashboard.scm ================================================================== --- newdashboard.scm +++ newdashboard.scm @@ -7,11 +7,11 @@ ;; This program is distributed WITHOUT ANY WARRANTY; without even the ;; implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR ;; PURPOSE. ;;====================================================================== -(use format) +(use format numbers) (require-library iup) (import (prefix iup iup:)) (use canvas-draw) (use sqlite3 srfi-1 posix regex regex-case srfi-69) @@ -67,21 +67,33 @@ (if (args:get-arg "-h") (begin (print help) (exit))) -(if (not (setup-for-run)) +(if (not (launch:setup-for-run)) (begin (print "Failed to find megatest.config, exiting") (exit 1))) -(if (args:get-arg "-host") - (begin - (set! *runremote* (string-split (args:get-arg "-host" ":"))) - (client:launch)) - (client:launch)) +;; (if (args:get-arg "-host") +;; (begin +;; (set! *runremote* (string-split (args:get-arg "-host" ":"))) +;; (client:launch)) +;; (client:launch)) + +;; ease debugging by loading ~/.dashboardrc +(let ((debugcontrolf (conc (get-environment-variable "HOME") "/.dashboardrc"))) + (if (file-exists? debugcontrolf) + (load debugcontrolf))) + +(define *dbdir* (conc (configf:lookup *configdat* "setup" "linktree") "/.db")) +(define *dbstruct-local* (make-dbr:dbstruct path: *dbdir* + local: #t)) +(define *db-file-path* (db:dbfile-path 0)) +;; HACK ALERT: this is a hack, please fix. +(define *read-only* (not (file-read-access? *db-file-path*))) (debug:setup) (define *tim* (iup:timer)) (define *ord* #f) @@ -267,14 +279,17 @@ (conc "-e " (get-environment-variable "SHELL")) ""))) (system (conc "cd " rundir ";xterm -T \"" (string-translate testfullname "()" " ") "\" " shell "&"))) (message-window (conc "Directory " rundir " not found"))))) - (command-text-box (iup:textbox #:expand "HORIZONTAL" #:font "Courier New, -10")) - (command-launch-button (iup:button "Execute!" #:action (lambda (x) - (let ((cmd (iup:attribute command-text-box "VALUE"))) - (system (conc cmd " &")))))) + (command-text-box (iup:textbox #:expand "HORIZONTAL" #:font "Courier New, -12")) + (command-launch-button (iup:button "Execute!" + ;; #:expand "HORIZONTAL" + #:size "50x" + #:action (lambda (x) + (let ((cmd (iup:attribute command-text-box "VALUE"))) + (system (conc cmd " &")))))) (run-test (lambda (x) (iup:attribute-set! command-text-box "VALUE" (conc "xterm -geometry 180x20 -e \"megatest -target " keystring " :runname " runname " -runtests " (conc testname "/" (if (equal? item-path "") @@ -316,13 +331,13 @@ #:numlin 5 #:numcol-visible 1 #:numlin-visible 5)) (steps-matrix (iup:matrix #:expand "YES" - #:numcol 5 + #:numcol 6 #:numlin 50 - #:numcol-visible 5 + #:numcol-visible 6 #:numlin-visible 8)) (data-matrix (iup:matrix #:expand "YES" #:numcol 8 #:numlin 50 @@ -347,15 +362,18 @@ (list run-info-matrix test-info-matrix test-run-matrix meta-dat-matrix)) ;; Steps matrix (iup:attribute-set! steps-matrix "0:1" "Step Name") (iup:attribute-set! steps-matrix "0:2" "Start") + (iup:attribute-set! steps-matrix "WIDTH2" "40") (iup:attribute-set! steps-matrix "0:3" "End") - (iup:attribute-set! steps-matrix "WIDTH3" "50") + (iup:attribute-set! steps-matrix "WIDTH3" "40") (iup:attribute-set! steps-matrix "0:4" "Status") - (iup:attribute-set! steps-matrix "WIDTH4" "50") - (iup:attribute-set! steps-matrix "0:5" "Log File") + (iup:attribute-set! steps-matrix "WIDTH4" "40") + (iup:attribute-set! steps-matrix "0:5" "Duration") + (iup:attribute-set! steps-matrix "WIDTH5" "40") + (iup:attribute-set! steps-matrix "0:6" "Log File") (iup:attribute-set! steps-matrix "ALIGNMENT1" "ALEFT") ;; (iup:attribute-set! steps-matrix "FIXTOTEXT" "C1") (iup:attribute-set! steps-matrix "RESIZEMATRIX" "YES") ;; (iup:attribute-set! steps-matrix "WIDTH1" "120") ;; (iup:attribute-set! steps-matrix "WIDTH0" "100") @@ -386,38 +404,46 @@ (list run-info-matrix '("Run Id" "Target" "Runname" "Run Start Time" )) (list test-info-matrix '("Test Id" "Testname" "Itempath" "State" "Status" "Test Start Time" "Comment")) (list test-run-matrix '("Hostname" "Host info" "Disk Free" "CPU Load" "Run Duration")) (list meta-dat-matrix '("Author" "Owner" "Last Reviewed" "Tags" "Description")))) - (iup:vbox - (iup:hbox - run-info-matrix - test-info-matrix) - (iup:hbox - test-run-matrix - meta-dat-matrix) - (iup:vbox - (iup:vbox - (iup:hbox - (iup:button "View Log" #:action viewlog #:size "80x") - (iup:button "Start Xterm" #:action xterm #:size "80x") - (iup:button "Run Test" #:action run-test #:size "80x") - (iup:button "Clean Test" #:action remove-test #:size "80x")) - (apply - iup:hbox - (list command-text-box command-launch-button)))) - (iup:vbox - (let ((tabs (iup:tabs - steps-matrix - data-matrix))) - (iup:attribute-set! tabs "TABTITLE0" "Test Steps") - (iup:attribute-set! tabs "TABTITLE1" "Test Data") - tabs))))) + (iup:split + #:orientation "HORIZONTAL" + (iup:vbox + (iup:hbox + (iup:vbox + run-info-matrix + test-info-matrix) + ;; test-info-matrix) + (iup:vbox + test-run-matrix + meta-dat-matrix)) + (iup:vbox + (iup:vbox + (iup:hbox + (iup:button "View Log" #:action viewlog #:size "60x" ) ;; #:size "30x" + (iup:button "Start Xterm" #:action xterm #:size "60x" )) ;; #:size "30x" + (iup:hbox + (iup:button "Run Test" #:action run-test #:size "60x" ) ;; #:size "30x" + (iup:button "Clean Test" #:action remove-test #:size "60x" ))) ;; #:size "30x" + (iup:hbox + ;; hiup:split ;; hbox + ;; #:orientation "HORIZONTAL" + ;; #:value 300 + command-text-box + command-launch-button))) + (iup:vbox + (let ((tabs (iup:tabs + steps-matrix + data-matrix))) + (iup:attribute-set! tabs "TABTITLE0" "Test Steps") + (iup:attribute-set! tabs "TABTITLE1" "Test Data") + tabs))))) ;; Test browser (define (tests window-id) - (iup:hbox + (iup:split (let* ((tb (iup:treebox #:selection-cb (lambda (obj id state) ;; (print "obj: " obj ", id: " id ", state: " state) (let* ((run-path (tree:node->path obj id)) @@ -438,14 +464,18 @@ ;; get test-id ;; then get test record (if testdat (let* ((test-id (hash-table-ref/default (dboard:data-get-curr-test-ids *data*) window-id #f)) (test-data (hash-table-ref/default testdat test-id #f)) + (run-id (db:test-get-run_id test-data)) (targ/runname (hash-table-ref/default (dboard:data-get-run-keys *data*) - (db:test-get-run_id test-data) '())) + run-id + '())) (target (if (null? targ/runname) "" (string-intersperse (reverse (cdr (reverse targ/runname))) "/"))) - (runname (if (null? targ/runname) "" (car (cdr targ/runname))))) + (runname (if (null? targ/runname) "" (car (cdr targ/runname)))) + (steps-dat (dcommon:get-compressed-steps *dbstruct-local* run-id test-id))) + (if test-data (begin ;; (for-each (lambda (data) @@ -486,14 +516,15 @@ (db:test-get-uname test-data) (db:test-get-diskfree test-data) (db:test-get-cpuload test-data) (seconds->hr-min-sec (db:test-get-run_duration test-data))) (make-list 5 ""))) + )) + (dcommon:populate-steps steps-dat steps-matrix)))))) ;;(list meta-dat-matrix ;; (if test-id ;; (list ( - ))))))) ;; db:test-get-id ;; db:test-get-run_id ;; db:test-get-testname @@ -552,10 +583,11 @@ ;; Main Panel (define (main-panel window-id) (iup:dialog #:title "Megatest Control Panel" #:menu (dcommon:main-menu) + #:shrink "YES" (let ((tabtop (iup:tabs (runs window-id) (tests window-id) (runcontrol window-id) (mtest window-id) @@ -590,13 +622,14 @@ (lambda (x) ;; Want to dedicate no more than 50% of the time to this so skip if ;; 2x delta time has not passed since last query (if (< nextmintime (current-milliseconds)) (let* ((starttime (current-milliseconds)) - (changes (run-update keys data runname keypatts testpatt states statuses 'full my-window-id)) + (changes (dcommon:run-update keys data runname keypatts testpatt states statuses 'full my-window-id)) (endtime (current-milliseconds))) (set! nextmintime (+ endtime (* 2 (- endtime starttime)))) (debug:print 11 "CHANGE(S): " (car changes) "...")) (debug:print-info 11 "Server overloaded")))))) -(newdashboard) +(dboard:data-set-updaters! *data* (make-hash-table)) +(newdashboard *dbstruct-local*) (iup:main-loop) ADDED nmsg-transport.scm Index: nmsg-transport.scm ================================================================== --- /dev/null +++ nmsg-transport.scm @@ -0,0 +1,358 @@ + +;; Copyright 2006-2012, Matthew Welland. +;; +;; This program is made available under the GNU GPL version 2.0 or +;; greater. See the accompanying file COPYING for details. +;; +;; This program is distributed WITHOUT ANY WARRANTY; without even the +;; implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR +;; PURPOSE. + +(require-extension (srfi 18) extras tcp s11n) + +(use sqlite3 srfi-1 posix regex regex-case srfi-69 hostinfo md5 message-digest) +(import (prefix sqlite3 sqlite3:)) + +(use nanomsg) + +(declare (unit nmsg-transport)) + +(declare (uses common)) +(declare (uses db)) +(declare (uses tests)) +(declare (uses tasks)) ;; tasks are where stuff is maintained about what is running. +(declare (uses server)) + +(include "common_records.scm") +(include "db_records.scm") + +;; Transition to pub --> sub with pull <-- push +;; +;; 1. client sends request to server via push to the pull port +;; 2. server puts request in queue or processes immediately as appropriate +;; 3. server puts responses from completed requests into pub port +;; +;; TODO +;; +;; Done Tested +;; [x] [ ] 1. Add columns pullport pubport to servers table +;; [x] [ ] 2. Add rm of monitor.db if older than 11/12/2012 +;; [x] [ ] 3. Add create of pullport and pubport with finding of available ports +;; [x] [ ] 4. Add client compose of request +;; [x] [ ] - name of client: testname/itempath-test_id-hostname +;; [x] [ ] - name of request: callname, params +;; [x] [ ] - request key: f(clientname, callname, params) +;; [x] [ ] 5. Add processing of subscription hits +;; [x] [ ] - done when get key +;; [x] [ ] - return results +;; [x] [ ] 6. Add timeout processing +;; [x] [ ] - after 60 seconds +;; [ ] [ ] i. check server alive, connect to new if necessary +;; [ ] [ ] ii. resend request +;; [ ] [ ] 7. Turn self ping back on + +(define (nmsg-transport:make-server-url hostport #!key (bindall #f)) + (if (not hostport) + #f + (conc "tcp://" (if bindall "*" (car hostport)) ":" (cadr hostport)))) + +(define *server-loop-heart-beat* (current-seconds)) +(define *heartbeat-mutex* (make-mutex)) + +;;====================================================================== +;; S E R V E R +;;====================================================================== + +(define (nmsg-transport:run dbstruct hostn run-id server-id #!key (retrynum 1000)) + (debug:print 2 "Attempting to start the server ...") + (let* ((start-port (portlogger:open-run-close portlogger:find-port)) + (server-thread (make-thread (lambda () + (nmsg-transport:try-start-server dbstruct run-id start-port server-id)) + "server thread")) + (tdbdat (tasks:open-db))) + (thread-start! server-thread) + (thread-sleep! 0.1) + (if (nmsg-transport:ping hostn start-port timeout: 2 expected-key: (current-process-id)) + (let ((interface (if (equal? hostn "-")(get-host-name) hostn))) + (tasks:server-set-interface-port (db:delay-if-busy tdbdat) server-id interface start-port) + (tasks:server-set-state! (db:delay-if-busy tdbdat) server-id "dbprep") + (set! *server-info* (list hostn start-port)) ;; probably not needed anymore? currently used by keep-running + (thread-sleep! 3) ;; give some margin for queries to complete before switching from file based access to server based access + ;; (set! *inmemdb* dbstruct) + (tasks:server-set-state! (db:delay-if-busy tdbdat) server-id "running") + (thread-start! (make-thread + (lambda ()(nmsg-transport:keep-running server-id run-id)) + "keep running")) + (thread-join! server-thread)) + (if (> retrynum 0) + (begin + (debug:print 0 "WARNING: Failed to connect to server (self) on host " hostn ":" start-port ", trying again.") + (tasks:server-delete-record (db:delay-if-busy tdbdat) server-id "failed to start, never received server alive signature") + (portlogger:open-run-close portlogger:set-failed start-port) + (nmsg-transport:run dbstruct hostn run-id server-id)) + (begin + (debug:print 0 "ERROR: could not find an open port to start server on. Giving up") + (exit 1)))))) + +(define (nmsg-transport:try-start-server dbstruct run-id portnum server-id) + (let ((repsoc (nn-socket 'rep))) + (nn-bind repsoc (conc "tcp://*:" portnum)) + (let loop ((msg-in (nn-recv repsoc))) + (let* ((dat (db:string->obj msg-in transport: 'nmsg))) + (debug:print 0 "server, received: " dat) + (let ((result (api:execute-requests dbstruct dat))) + (debug:print 0 "server, sending: " result) + (nn-send repsoc (db:obj->string result transport: 'nmsg))) + (loop (nn-recv repsoc)))))) + +;; all routes though here end in exit ... +;; +(define (nmsg-transport:launch run-id) + (let* ((tdbdat (tasks:open-db)) + (dbstruct (db:setup run-id)) + (hostn (or (args:get-arg "-server") "-"))) + (set! *run-id* run-id) + (set! *inmemdb* dbstruct) + ;; with nbfake daemonize isn't really needed + ;; + ;; (if (args:get-arg "-daemonize") + ;; (begin + ;; (daemon:ize) + ;; (if *alt-log-file* ;; we should re-connect to this port, I think daemon:ize disrupts it + ;; (begin + ;; (current-error-port *alt-log-file*) + ;; (current-output-port *alt-log-file*))))) + (if (server:check-if-running run-id) + (begin + (debug:print-info 0 "Server for run-id " run-id " already running") + (exit 0))) + (let loop ((server-id (tasks:server-lock-slot (db:delay-if-busy tdbdat) run-id)) + (remtries 4)) + (if (not server-id) + (if (> remtries 0) + (begin + (thread-sleep! 2) + (if (not (server:check-if-running run-id)) + (loop (tasks:server-lock-slot (db:delay-if-busy tdbdat) run-id) + (- remtries 1)) + (begin + (debug:print-info 0 "Another server took the slot, exiting") + (exit 0)))) + (begin + ;; since we didn't get the server lock we are going to clean up and bail out + (debug:print-info 2 "INFO: server pid=" (current-process-id) ", hostname=" (get-host-name) " not starting due to other candidates ahead in start queue") + (tasks:server-delete-records-for-this-pid (db:delay-if-busy tdbdat) " http-transport:launch") + )) + ;; locked in a server id, try to start up + (nmsg-transport:run dbstruct hostn run-id server-id)) + (set! *didsomething* #t) + (exit)))) + +;;====================================================================== +;; S E R V E R U T I L I T I E S +;;====================================================================== + +(define (nmsg-transport:mk-signature) + (message-digest-string (md5-primitive) + (with-output-to-string + (lambda () + (write (list (current-directory) + (argv))))))) + +;;====================================================================== +;; C L I E N T S +;;====================================================================== + +;; ping the server at host:port +;; return the open socket if successful (return-socket == #t) +;; expect the key expected-key returned in payload +;; send our-key or #f as payload +;; +(define (nmsg-transport:ping hostn port #!key (timeout 3)(return-socket #t)(expected-key #f)(our-key #f)(socket #f)) + ;; send a random number along with pid and check that we get it back + (let* ((host (if (or (not hostn) + (equal? hostn "-")) ;; use localhost + (get-host-name) + hostn)) + (req (or socket + (let ((soc (nn-socket 'req))) + (nn-connect soc (conc "tcp://" host ":" port)) + soc))) + (success #t) + (dat (vector "ping" our-key)) + (result (condition-case + (nmsg-transport:client-api-send-receive-raw req dat timeout: timeout) + ((timeout)(set! success #f) #f))) + (key (if success + (vector-ref result 1) + #f))) + (debug:print 0 "success=" success ", key=" key ", expected-key=" expected-key ", equal? " (equal? key expected-key)) + (if (and success + (or (not expected-key) ;; just getting a reply is good enough then + (equal? key expected-key))) + (if return-socket + req + (begin + (if (not socket)(nn-close req)) ;; don't want a side effect of closing socket if handed it + #t)) + (begin + (if (not socket)(nn-close req)) ;; failed to ping, close socket as side effect + #f)))) + +;; send data to server, wait max of timeout seconds for a response. +;; return #( success/fail result ) +;; +;; for effiency it is easier to do the obj->string and string->obj here. +;; +(define (nmsg-transport:client-api-send-receive-raw socreq indat #!key (enable-send #t)(timeout 25)) + (let* ((success #f) + (result #f) + (keepwaiting #t) + (dat (db:obj->string indat transport: 'nmsg)) + (send-recv (make-thread + (lambda () + (nn-send socreq dat) + (let* ((res (nn-recv socreq))) + (set! success #t) + (set! result (db:string->obj res transport: 'nmsg)))) + "send-recv")) + (timeout (make-thread + (lambda () + (let loop ((count 0)) + (thread-sleep! 1) + (debug:print-info 1 "send-receive-raw, still waiting after " count " seconds...") + (if (and keepwaiting (< count timeout)) ;; yes, this is very aproximate + (loop (+ count 1)))) + (if keepwaiting + (begin + (print "timeout waiting for ping") + (thread-terminate! send-recv)))) + "timeout"))) + ;; replace with condition-case? + (handle-exceptions + exn + (set! result "timeout") + (thread-start! timeout) + (thread-start! send-recv) + (thread-join! send-recv) + (if success (thread-terminate! timeout))) + ;; raise timeout error if timed out + (if success + (if (and (vector? result) + (vector-ref result 0)) ;; did it fail at the server? + result ;; nope, all good + (begin + (debug:print 0 "ERROR: error occured at server, info=" (vector-ref result 2)) + (debug:print 0 " client call chain:") + (print-call-chain (current-error-port)) + (debug:print 0 " server call chain:") + (pp (vector-ref result 1) (current-error-port)) + (signal (vector-ref result 0)))) + (signal (make-composite-condition + (make-property-condition 'timeout 'message "nmsg-transport:client-api-send-receive-raw timed out talking to server")))))) + +;; run nmsg-transport:keep-running in a parallel thread to monitor that the db is being +;; used and to shutdown after sometime if it is not. +;; +(define (nmsg-transport:keep-running server-id run-id) + ;; if none running or if > 20 seconds since + ;; server last used then start shutdown + ;; This thread waits for the server to come alive + (let* ((server-info (let loop () + (let ((sdat #f)) + (mutex-lock! *heartbeat-mutex*) + (set! sdat *server-info*) + (mutex-unlock! *heartbeat-mutex*) + (if sdat + (begin + (debug:print-info 0 "keep-running got sdat=" sdat) + sdat) + (begin + (thread-sleep! 0.5) + (loop)))))) + (iface (car server-info)) + (port (cadr server-info)) + (last-access 0) + (tdbdat (tasks:open-db)) + (server-timeout (let ((tmo (configf:lookup *configdat* "server" "timeout"))) + (if (and (string? tmo) + (string->number tmo)) + (* 60 60 (string->number tmo)) + ;; (* 3 24 60 60) ;; default to three days + (* 60 1) ;; default to one minute + ;; (* 60 60 25) ;; default to 25 hours + )))) + (print "Keep-running got server pid " server-id ", using iface " iface " and port " port) + (let loop ((count 0)) + (thread-sleep! 4) ;; no need to do this very often + ;; NB// sync currently does NOT return queue-length + (let () ;; (queue-len (cdb:client-call server-info 'sync #t 1))) + ;; (print "Server running, count is " count) + (if (< count 1) ;; 3x3 = 9 secs aprox + (loop (+ count 1))) + + (mutex-lock! *heartbeat-mutex*) + (set! last-access *last-db-access*) + (mutex-unlock! *heartbeat-mutex*) + (db:sync-touched *inmemdb* run-id force-sync: #t) + (if (and *server-run* + (> (+ last-access server-timeout) + (current-seconds))) + (begin + (debug:print-info 0 "Server continuing, seconds since last db access: " (- (current-seconds) last-access)) + (loop 0)) + (begin + (debug:print-info 0 "Starting to shutdown the server.") + (set! *time-to-exit* #t) + (db:sync-touched *inmemdb* run-id force-sync: #t) + (tasks:server-delete-record (db:delay-if-busy tdbdat) server-id " http-transport:keep-running") + (debug:print-info 0 "Server shutdown complete. Exiting") + (exit) + )))))) + +;;====================================================================== +;; C L I E N T S +;;====================================================================== + +(define (nmsg-transport:client-connect iface portnum) + (let* ((reqsoc (nmsg-transport:ping iface portnum return-socket: #t))) + (vector iface portnum #f #f #f (current-seconds) reqsoc))) + +;; returns result, there is no sucess/fail flag - handled via excpections +;; +(define (nmsg-transport:client-api-send-receive run-id connection-info cmd param #!key (remtries 5)) + ;; NB// In the html version of this routine there is a call to + ;; tasks:kill-server-run-id when there is an exception + (mutex-lock! *http-mutex*) + (let* ((packet (vector cmd param)) + (reqsoc (http-transport:server-dat-get-socket connection-info)) + (res (nmsg-transport:client-api-send-receive-raw reqsoc packet))) +;; (status (vector-ref rawres 0)) +;; (result (vector-ref rawres 1))) + (mutex-unlock! *http-mutex*) + res)) ;; (vector status (if status (db:string->obj result transport: 'nmsg) result)))) + +;;====================================================================== +;; J U N K +;;====================================================================== + +;; DO NOT USE +;; +(define (nmsg-transport:client-signal-handler signum) + (handle-exceptions + exn + (debug:print " ... exiting ...") + (let ((th1 (make-thread (lambda () + (if (not *received-response*) + (receive-message* *runremote*))) ;; flush out last call if applicable + "eat response")) + (th2 (make-thread (lambda () + (debug:print 0 "ERROR: Received ^C, attempting clean exit. Please be patient and wait a few seconds before hitting ^C again.") + (thread-sleep! 3) ;; give the flush three seconds to do it's stuff + (debug:print 0 " Done.") + (exit 4)) + "exit on ^C timer"))) + (thread-start! th2) + (thread-start! th1) + (thread-join! th2)))) + ADDED portlogger.scm Index: portlogger.scm ================================================================== --- /dev/null +++ portlogger.scm @@ -0,0 +1,178 @@ + +;; Copyright 2006-2014, Matthew Welland. +;; +;; This program is made available under the GNU GPL version 2.0 or +;; greater. See the accompanying file COPYING for details. +;; +;; This program is distributed WITHOUT ANY WARRANTY; without even the +;; implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR +;; PURPOSE. + +(require-extension (srfi 18) extras tcp s11n) + +(use sqlite3 srfi-1 posix srfi-69 hostinfo dot-locking z3) +(import (prefix sqlite3 sqlite3:)) + +(declare (unit portlogger)) +(declare (uses db)) + +;; lsof -i + + +(define (portlogger:open-db fname) + (let* ((avail (tasks:wait-on-journal fname 5 remove: #t)) ;; wait up to about 10 seconds for the journal to go away + (exists (file-exists? fname)) + (db (if avail + (sqlite3:open-database fname) + (begin + (system (conc "rm -f " fname)) + (sqlite3:open-database fname)))) + (handler (make-busy-timeout 136000)) + (canwrite (file-write-access? fname))) + ;; (db-init (lambda () + ;; (sqlite3:execute + ;; db + ;; "CREATE TABLE IF NOT EXISTS ports ( + ;; port INTEGER PRIMARY KEY, + ;; state TEXT DEFAULT 'not-used', + ;; fail_count INTEGER DEFAULT 0, + ;; update_time TIMESTAMP DEFAULT (strftime('%s','now')) );")))) + (sqlite3:set-busy-handler! db handler) + (db:set-sync db) ;; (sqlite3:execute db "PRAGMA synchronous = 0;") + ;; (if (not exists) ;; needed with IF NOT EXISTS? + (sqlite3:execute + db + "CREATE TABLE IF NOT EXISTS ports ( + port INTEGER PRIMARY KEY, + state TEXT DEFAULT 'not-used', + fail_count INTEGER DEFAULT 0, + update_time TIMESTAMP DEFAULT (strftime('%s','now')) );") + db)) + +(define (portlogger:open-run-close proc . params) + (let* ((fname (conc "/tmp/." (current-user-name) "-portlogger.db")) + (avail (tasks:wait-on-journal fname 10))) ;; wait up to about 10 seconds for the journal to go away + (handle-exceptions + exn + (begin + ;; (release-dot-lock fname) + (debug:print 0 "ERROR: portlogger:open-run-close failed. " proc " " params) + (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn)) + (debug:print 0 "exn=" (condition->list exn)) + (if (file-exists? fname)(delete-file fname)) ;; brutally get rid of it + (print-call-chain (current-error-port))) + (let* (;; (lock (obtain-dot-lock fname 2 9 10)) + (db (portlogger:open-db fname)) + (res (apply proc db params))) + (sqlite3:finalize! db) + ;; (release-dot-lock fname) + res)))) + +;; (fold-row PROC INIT DATABASE SQL . PARAMETERS) +(define (portlogger:take-port db portnum) + (let* ((qry1 (sqlite3:prepare db "INSERT INTO ports (port,state) VALUES (?,?);")) + (qry2 (sqlite3:prepare db "UPDATE ports SET state=?,update_time=strftime('%s','now') WHERE port=?;")) + (qry3 (sqlite3:prepare db "SELECT state FROM ports WHERE port=?;")) + (res (sqlite3:with-transaction + db + (lambda () + ;; (fold-row (lambda (var curr) (or var curr)) #f db "SELECT var FROM foo WHERE id=100;") + (let* ((curr #f) + (res #f)) + (set! curr (sqlite3:fold-row + (lambda (var curr) + (or curr var curr)) + "not-tried" + qry3 + portnum)) + ;; (print "curr=" curr) + (set! res (case (string->symbol curr) + ((released) (sqlite3:execute qry2 "taken" portnum) 'taken) + ((not-tried) (sqlite3:execute qry1 portnum "taken") 'taken) + ((taken) 'already-taken) + ((failed) 'failed) + (else 'error))) + ;; (print "res=" res) + res))))) + (sqlite3:finalize! qry1) + (sqlite3:finalize! qry2) + (sqlite3:finalize! qry3) + res)) + +(define (portlogger:get-prev-used-port db) + (handle-exceptions + exn + (begin + (debug:print 0 "EXCEPTION: portlogger database probably overloaded or unreadable. If you see this message again remove /tmp/.$USER-portlogger.db") + (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn)) + (debug:print 0 "exn=" (condition->list exn)) + (print-call-chain (current-error-port)) + (debug:print 0 "Continuing anyway.") + #f) + (sqlite3:fold-row + (lambda (var curr) + (or curr var curr)) + #f + db + "SELECT (port) FROM ports WHERE state='released' LIMIT 1;"))) + +(define (portlogger:find-port db) + (let* ((lowport (let ((val (configf:lookup *configdat* "server" "lowport"))) + (if (and val + (string->number val)) + (string->number val) + 32768))) + (portnum (or (portlogger:get-prev-used-port db) + (+ lowport ;; top of registered ports is 49152 but lets use ports in the registered range + (random (- 64000 lowport)))))) + (handle-exceptions + exn + (begin + (debug:print 0 "EXCEPTION: portlogger database probably overloaded or unreadable. If you see this message again remove /tmp/.$USER-portlogger.db") + (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn)) + (debug:print 0 "exn=" (condition->list exn)) + (print-call-chain (current-error-port)) + (debug:print 0 "Continuing anyway.")) + (portlogger:take-port db portnum)) + portnum)) + +;; set port to "released", "failed" etc. +;; +(define (portlogger:set-port db portnum value) + (sqlite3:execute db "UPDATE ports SET state=?,update_time=strftime('%s','now') WHERE port=?;" value portnum)) + +;; set port to failed (attempted to take but got error) +;; +(define (portlogger:set-failed db portnum) + (sqlite3:execute db "UPDATE ports SET state='failed',fail_count=fail_count+1,update_time=strftime('%s','now') WHERE port=?;" portnum)) + +;;====================================================================== +;; MAIN +;;====================================================================== + +(define (portlogger:main . args) + (let* ((dbfname (conc "/tmp/." (current-user-name) "-portlogger.db")) + (db (portlogger:open-db dbfname)) + (numargs (length args)) + (result + (handle-exceptions + exn + (begin + (debug:print 0 "EXCEPTION: portlogger database at " dbfname " probably overloaded or unreadable. Try removing it.") + (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn)) + (print "exn=" (condition->list exn)) + (debug:print 0 " status: " ((condition-property-accessor 'sqlite3 'status) exn)) + (print-call-chain (current-error-port))) + (cond + ((> numargs 1) ;; most commands + (case (string->symbol (car args)) ;; commands with two or more params + ((take)(portlogger:take-port db (string->number (cadr args)))) + ((set) (portlogger:set-port db + (string->number (cadr args)) + (caddr args)) + (caddr args)) + ((failed)(portlogger:set-failed db (string->number (cadr args))) 'failed))))))) + (sqlite3:finalize! db) + result)) + +;; (print (apply portlogger:main (cdr (argv)))) Index: process.scm ================================================================== --- process.scm +++ process.scm @@ -51,10 +51,12 @@ ;; (print "Called with cmd=" cmd ", proc=" proc ", params=" params) (handle-exceptions exn (begin (print "ERROR: Failed to run command: " cmd " " (string-intersperse params " ")) + (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn)) + (print "exn=" (condition->list exn)) #f) (let-values (((fh fho pid) (if (null? params) (process cmd) (process cmd params)))) (let loop ((curr (read-line fh)) @@ -124,6 +126,15 @@ (if (eof-object? inl) (reverse res) (let ((pid (string->number inl))) (if proc (proc pid)) (loop (read-line) (cons pid res)))))))) - + +(define (process:alive? pid) + (handle-exceptions + exn + ;; possibly pid is a process not a child, look in /proc to see if it is running still + (file-exists? (conc "/proc/" pid)) + (let-values (((rpid exit-type exit-signal)(process-wait pid #t))) + (and (number? rpid) + (equal? rpid pid))))) + Index: rmt.scm ================================================================== --- rmt.scm +++ rmt.scm @@ -7,16 +7,17 @@ ;; This program is distributed WITHOUT ANY WARRANTY; without even the ;; implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR ;; PURPOSE. ;;====================================================================== -(use json) +(use json format) (declare (unit rmt)) (declare (uses api)) (declare (uses tdb)) (declare (uses http-transport)) +(declare (uses nmsg-transport)) ;; ;; THESE ARE ALL CALLED ON THE CLIENT SIDE!!! ;; @@ -40,45 +41,208 @@ ((http) (http-transport:client-api-send-receive run-id connection-info cmd jparams)) ((fs) ( fs-transport:client-api-send-receive run-id connection-info cmd jparams)) ((zmq) (zmq-transport:client-api-send-receive run-id connection-info cmd jparams)) (else ( rpc-transport:client-api-send-receive run-id connection-info cmd jparams)))) -;; cmd is a symbol -;; vars is a json string encoding the parameters for the call ;; -(define (rmt:send-receive cmd rid params) - (let* ((run-id (if rid rid 0)) - (connection-info (let ((cinfo (hash-table-ref/default *runremote* run-id #f))) - (if cinfo - cinfo - (let loop ((numtries 100)) - (let ((res (client:setup run-id))) - (if res - (hash-table-ref/default *runremote* run-id #f) ;; client:setup filled this in (hopefully) - (if (> numtries 0) - (begin - (thread-sleep! 10) - (loop (- numtries 1))) - (begin - (debug:print 0 "ERROR: 100 tries and no server, giving up") - (exit 1))))))))) - (jparams (db:obj->string params)) - (res (rmt:call-transport connection-info cmd jparams))) - (if res - (db:string->obj res) ;; (rmt:json-str->dat res) - (let ((new-connection-info (client:setup run-id))) - (debug:print 0 "WARNING: Communication failed, trying call to http-transport:client-api-send-receive again.") - (rmt:send-receive cmd run-id params))))) +(define (rmt:write-frequency-over-limit? cmd run-id) + (and (not (member cmd api:read-only-queries)) + (let* ((tmprec (hash-table-ref/default *write-frequency* run-id #f)) + (record (if tmprec tmprec + (let ((v (vector (current-seconds) 0))) + (hash-table-set! *write-frequency* run-id v) + v))) + (count (+ 1 (vector-ref record 1))) + (start (vector-ref record 0)) + (queries-per-second (/ (* count 1.0) + (max (- (current-seconds) start) 1)))) + (vector-set! record 1 count) + (if (and (> count 10) + (> queries-per-second 10)) + (begin + (debug:print-info 1 "db write rate too high, starting a server, count=" count " start=" start " run-id=" run-id " queries-per-second=" queries-per-second) + #t) + #f)))) + +(define (rmt:get-connection-info run-id) + (let ((cinfo (hash-table-ref/default *runremote* run-id #f))) + (if cinfo + cinfo + ;; NB// can cache the answer for server running for 10 seconds ... + ;; ;; (and (not (rmt:write-frequency-over-limit? cmd run-id)) + (if (tasks:server-running-or-starting? (db:delay-if-busy (tasks:open-db)) run-id) + (client:setup run-id) + #f)))) + +(define *send-receive-mutex* (make-mutex)) ;; should have separate mutex per run-id +(define (rmt:send-receive cmd rid params #!key (attemptnum 1)) ;; start attemptnum at 1 so the modulo below works as expected + ;; clean out old connections + (mutex-lock! *db-multi-sync-mutex*) + (let ((expire-time (- (current-seconds) (server:get-timeout) 10))) ;; don't forget the 10 second margin + (for-each + (lambda (run-id) + (let ((connection (hash-table-ref/default *runremote* run-id #f))) + (if (and connection + (< (http-transport:server-dat-get-last-access connection) expire-time)) + (begin + (debug:print-info 0 "Discarding connection to server for run-id " run-id ", too long between accesses") + ;; SHOULD CLOSE THE CONNECTION HERE + (case *transport-type* + ((nmsg)(nn-close (http-transport:server-dat-get-socket + (hash-table-ref *runremote* run-id))))) + (hash-table-delete! *runremote* run-id))))) + (hash-table-keys *runremote*))) + (mutex-unlock! *db-multi-sync-mutex*) + ;; (mutex-lock! *send-receive-mutex*) + (let* ((run-id (if rid rid 0)) + (connection-info (rmt:get-connection-info run-id))) + ;; the nmsg method does the encoding under the hood (the http method should be changed to do this also) + (if connection-info + ;; use the server if have connection info + (let* ((dat (case *transport-type* + ((http)(condition-case + (http-transport:client-api-send-receive run-id connection-info cmd params) + ((commfail)(vector #f "communications fail")))) + ((nmsg)(condition-case + (nmsg-transport:client-api-send-receive run-id connection-info cmd params) + ((timeout)(vector #f "timeout talking to server")))) + (else (exit)))) + (success (if (and dat (vector? dat)) (vector-ref dat 0) #f)) + (res (if (and dat (vector? dat)) (vector-ref dat 1) #f))) + (http-transport:server-dat-update-last-access connection-info) + (if success + (begin + ;; (mutex-unlock! *send-receive-mutex*) + (case *transport-type* + ((http) res) ;; (db:string->obj res)) + ((nmsg) res))) ;; (vector-ref res 1))) + (begin ;; let ((new-connection-info (client:setup run-id))) + (debug:print 0 "WARNING: Communication failed, trying call to rmt:send-receive again.") + ;; (case *transport-type* + ;; ((nmsg)(nn-close (http-transport:server-dat-get-socket connection-info)))) + (hash-table-delete! *runremote* run-id) ;; don't keep using the same connection + (if (eq? (modulo attemptnum 5) 0) + (tasks:kill-server-run-id run-id tag: "api-send-receive-failed")) + ;; (mutex-unlock! *send-receive-mutex*) ;; close the mutex here to allow other threads access to communications + (tasks:start-and-wait-for-server (tasks:open-db) run-id 15) + ;; (nmsg-transport:client-api-send-receive run-id connection-info cmd param remtries: (- remtries 1)))))) + + ;; no longer killing the server in http-transport:client-api-send-receive + ;; may kill it here but what are the criteria? + ;; start with three calls then kill server + ;; (if (eq? attemptnum 3)(tasks:kill-server-run-id run-id)) + ;; (thread-sleep! 2) + (rmt:send-receive cmd run-id params attemptnum: (+ attemptnum 1))))) + ;; no connection info? try to start a server + (if (and (< attemptnum 15) + (tasks:need-server run-id)) + (begin + (hash-table-delete! *runremote* run-id) + ;; (mutex-unlock! *send-receive-mutex*) + (tasks:start-and-wait-for-server (db:delay-if-busy (tasks:open-db)) run-id 10) + (client:setup run-id) + (thread-sleep! (random 5)) ;; give some time to settle and minimize collison? + (rmt:send-receive cmd rid params attemptnum: (+ attemptnum 1))) + (begin + (debug:print 0 "ERROR: Communication failed!") + ;; (mutex-unlock! *send-receive-mutex*) + (exit) + ;; (rmt:open-qry-close-locally cmd run-id params)))) + ))))) + +(define (rmt:update-db-stats run-id rawcmd params duration) + (mutex-lock! *db-stats-mutex*) + (handle-exceptions + exn + (begin + (debug:print 0 "WARNING: stats collection failed in update-db-stats") + (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn)) + (print "exn=" (condition->list exn)) + #f) ;; if this fails we don't care, it is just stats + (let* ((cmd (conc "run-id=" run-id " " (if (eq? rawcmd 'general-call) (car params) rawcmd))) + (stat-vec (hash-table-ref/default *db-stats* cmd #f))) + (if (not stat-vec) + (let ((newvec (vector 0 0))) + (hash-table-set! *db-stats* cmd newvec) + (set! stat-vec newvec))) + (vector-set! stat-vec 0 (+ (vector-ref stat-vec 0) 1)) + (vector-set! stat-vec 1 (+ (vector-ref stat-vec 1) duration)))) + (mutex-unlock! *db-stats-mutex*)) + + +(define (rmt:print-db-stats) + (let ((fmtstr "~40a~7-d~9-d~20,2-f")) ;; "~20,2-f" + (debug:print 18 "DB Stats\n========") + (debug:print 18 (format #f "~40a~8a~10a~10a" "Cmd" "Count" "TotTime" "Avg")) + (for-each (lambda (cmd) + (let ((cmd-dat (hash-table-ref *db-stats* cmd))) + (debug:print 18 (format #f fmtstr cmd (vector-ref cmd-dat 0) (vector-ref cmd-dat 1) (/ (vector-ref cmd-dat 1)(vector-ref cmd-dat 0)))))) + (sort (hash-table-keys *db-stats*) + (lambda (a b) + (> (vector-ref (hash-table-ref *db-stats* a) 0) + (vector-ref (hash-table-ref *db-stats* b) 0))))))) + +(define (rmt:get-max-query-average run-id) + (mutex-lock! *db-stats-mutex*) + (let* ((runkey (conc "run-id=" run-id " ")) + (cmds (filter (lambda (x) + (substring-index runkey x)) + (hash-table-keys *db-stats*))) + (res (if (null? cmds) + (cons 'none 0) + (let loop ((cmd (car cmds)) + (tal (cdr cmds)) + (max-cmd (car cmds)) + (res 0)) + (let* ((cmd-dat (hash-table-ref *db-stats* cmd)) + (tot (vector-ref cmd-dat 0)) + (curravg (/ (vector-ref cmd-dat 1) (vector-ref cmd-dat 0))) ;; count is never zero by construction + (currmax (max res curravg)) + (newmax-cmd (if (> curravg res) cmd max-cmd))) + (if (null? tal) + (if (> tot 10) + (cons newmax-cmd currmax) + (cons 'none 0)) + (loop (car tal)(cdr tal) newmax-cmd currmax))))))) + (mutex-unlock! *db-stats-mutex*) + res)) + +(define (rmt:open-qry-close-locally cmd run-id params) + (let* ((dbstruct-local (if *dbstruct-db* + *dbstruct-db* + (let* ((dbdir (conc (configf:lookup *configdat* "setup" "linktree") "/.db")) + (db (make-dbr:dbstruct path: dbdir local: #t))) + (set! *dbstruct-db* db) + db))) + (db-file-path (db:dbfile-path 0))) + ;; (read-only (not (file-read-access? db-file-path))) + (let* ((start (current-milliseconds)) + (resdat (api:execute-requests dbstruct-local (symbol->string cmd) params)) + (res (vector-ref resdat 1)) + (duration (- (current-milliseconds) start))) + (rmt:update-db-stats run-id cmd params duration) + ;; mark this run as dirty if this was a write + (if (not (member cmd api:read-only-queries)) + (let ((start-time (current-seconds))) + (mutex-lock! *db-multi-sync-mutex*) + ;; (if (not (hash-table-ref/default *db-local-sync* run-id #f)) + ;; just set it every time. Is a write more expensive than a read and does it matter? + (hash-table-set! *db-local-sync* (or run-id 0) start-time) ;; the oldest "write" + (mutex-unlock! *db-multi-sync-mutex*))) + res))) (define (rmt:send-receive-no-auto-client-setup connection-info cmd run-id params) (let* ((run-id (if run-id run-id 0)) - (jparams (db:obj->string params)) ;; (rmt:dat->json-str params)) - (res (http-transport:client-api-send-receive run-id connection-info cmd jparams numretries: 3))) - (if res - (db:string->obj res) ;; (rmt:json-str->dat res) - ;; this one does NOT keep trying - res))) + ;; (jparams (db:obj->string params)) ;; (rmt:dat->json-str params)) + (res (http-transport:client-api-send-receive run-id connection-info cmd params))) + (if (and res (vector-ref res 0)) + res + #f))) +;; (db:string->obj (vector-ref dat 1)) +;; (begin +;; (debug:print 0 "ERROR: rmt:send-receive-no-auto-client-setup failed, attempting to continue. Got " dat) +;; dat)))) ;; Wrap json library for strings (why the ports crap in the first place?) (define (rmt:dat->json-str dat) (with-output-to-string (lambda () @@ -111,14 +275,17 @@ (define (rmt:login run-id) (rmt:send-receive 'login run-id (list *toppath* megatest-version run-id *my-client-signature*))) ;; This login does no retries under the hood - it acts a bit like a ping. +;; Deprecated for nmsg-transport. ;; (define (rmt:login-no-auto-client-setup connection-info run-id) - (rmt:send-receive-no-auto-client-setup connection-info 'login run-id (list *toppath* megatest-version run-id *my-client-signature*))) - + (case *transport-type* + ((http)(rmt:send-receive-no-auto-client-setup connection-info 'login run-id (list *toppath* megatest-version run-id *my-client-signature*))) + ((nmsg)(nmsg-transport:client-api-send-receive run-id connection-info 'login (list *toppath* megatest-version run-id *my-client-signature*))))) + ;; hand off a call to one of the db:queries statements ;; added run-id to make looking up the correct db possible ;; (define (rmt:general-call stmtname run-id . params) (rmt:send-receive 'general-call run-id (append (list stmtname run-id) params))) @@ -155,12 +322,12 @@ (define (rmt:get-test-info-by-id run-id test-id) (if (and (number? run-id)(number? test-id)) (rmt:send-receive 'get-test-info-by-id run-id (list run-id test-id)) (begin - (debug:print 0 "ERROR: Bad data handed to rmt:get-test-info-by-id run-id=" run-id ", test-id=" test-id) - (print-call-chain) + (debug:print 0 "WARNING: Bad data handed to rmt:get-test-info-by-id run-id=" run-id ", test-id=" test-id) + (print-call-chain (current-error-port)) #f))) (define (rmt:test-get-rundir-from-test-id run-id test-id) (rmt:send-receive 'test-get-rundir-from-test-id run-id (list run-id test-id))) @@ -181,27 +348,61 @@ (define (rmt:get-tests-for-run run-id testpatt states statuses offset limit not-in sort-by sort-order qryvals) (if (number? run-id) (rmt:send-receive 'get-tests-for-run run-id (list run-id testpatt states statuses offset limit not-in sort-by sort-order qryvals)) (begin (debug:print "ERROR: rmt:get-tests-for-run called with bad run-id=" run-id) - (print-call-chain) + (print-call-chain (current-error-port)) '()))) +;; IDEA: Threadify these - they spend a lot of time waiting ... +;; (define (rmt:get-tests-for-runs-mindata run-ids testpatt states status not-in) - (let ((run-id-list (if run-ids + (let ((multi-run-mutex (make-mutex)) + (run-id-list (if run-ids run-ids - (rmt:get-all-run-ids)))) - (apply append (map (lambda (run-id) - (rmt:send-receive 'get-tests-for-run-mindata run-id (list run-ids testpatt states status not-in))) - run-id-list)))) + (rmt:get-all-run-ids))) + (result '())) + (if (null? run-id-list) + '() + (for-each + (lambda (th) + (thread-join! th)) ;; I assume that joining completed threads just moves on + (let loop ((hed (car run-id-list)) + (tal (cdr run-id-list)) + (threads '())) + (let* ((newthread (make-thread + (lambda () + (let ((res (rmt:send-receive 'get-tests-for-run-mindata hed (list hed testpatt states status not-in)))) + (if (list? res) + (begin + (mutex-lock! multi-run-mutex) + (set! result (append result res)) + (mutex-unlock! multi-run-mutex)) + (debug:print 0 "ERROR: get-tests-for-run-mindata failed for run-id " hed ", testpatt " testpatt ", states " states ", status " status ", not-in " not-in)))) + (conc "multi-run-thread for run-id " hed))) + (newthreads (cons newthread threads))) + (thread-start! newthread) + (thread-sleep! 0.5) ;; give that thread some time to start + (if (null? tal) + newthreads + (loop (car tal)(cdr tal) newthreads)))))) + result)) (define (rmt:delete-test-records run-id test-id) (rmt:send-receive 'delete-test-records run-id (list run-id test-id))) +;; This is not needed as test steps are deleted on test delete call +;; +;; (define (rmt:delete-test-step-records run-id test-id) +;; (rmt:send-receive 'delete-test-step-records run-id (list run-id test-id))) + (define (rmt:test-set-status-state run-id test-id status state msg) (rmt:send-receive 'test-set-status-state run-id (list run-id test-id status state msg))) +(define (rmt:test-toplevel-num-items run-id test-name) + (rmt:send-receive 'test-toplevel-num-items run-id (list run-id test-name))) + ;; (define (rmt:get-previous-test-run-record run-id test-name item-path) ;; (rmt:send-receive 'get-previous-test-run-record run-id (list run-id test-name item-path))) (define (rmt:get-matching-previous-test-run-records run-id test-name item-path) (rmt:send-receive 'get-matching-previous-test-run-records run-id (list run-id test-name item-path))) @@ -216,10 +417,16 @@ (rmt:send-receive 'get-testinfo-state-status run-id (list run-id test-id))) (define (rmt:test-set-log! run-id test-id logf) (if (string? logf)(rmt:general-call 'test-set-log run-id logf test-id))) +(define (rmt:test-set-top-process-pid run-id test-id pid) + (rmt:send-receive 'test-set-top-process-pid run-id (list run-id test-id pid))) + +(define (rmt:test-get-top-process-pid run-id test-id) + (rmt:send-receive 'test-get-top-process-pid run-id (list run-id test-id))) + (define (rmt:get-run-ids-matching-target keynames target res runname testpatt statepatt statuspatt) (rmt:send-receive 'get-run-ids-matching-target #f (list keynames target res runname testpatt statepatt statuspatt))) ;; NOTE: This will open and access ALL run databases. ;; @@ -231,11 +438,11 @@ run-ids)))) (define (rmt:get-run-ids-matching keynames target res) (rmt:send-receive #f 'get-run-ids-matching (list keynames target res))) -(define (rmt:get-prereqs-not-met run-id waitons ref-item-path #!key (mode 'normal)) +(define (rmt:get-prereqs-not-met run-id waitons ref-item-path #!key (mode '(normal))) (rmt:send-receive 'get-prereqs-not-met run-id (list run-id waitons ref-item-path mode))) (define (rmt:get-count-tests-running-for-run-id run-id) (rmt:send-receive 'get-count-tests-running-for-run-id run-id (list run-id))) @@ -286,19 +493,36 @@ (rmt:send-receive 'get-prev-run-ids #f (list run-id))) (define (rmt:lock/unlock-run run-id lock unlock user) (rmt:send-receive 'lock/unlock-run #f (list run-id lock unlock user))) +;; set/get status +(define (rmt:get-run-status run-id) + (rmt:send-receive 'get-run-status #f (list run-id))) + +(define (rmt:set-run-status run-id run-status #!key (msg #f)) + (rmt:send-receive 'set-run-status #f (list run-id run-status msg))) + (define (rmt:update-run-event_time run-id) (rmt:send-receive 'update-run-event_time #f (list run-id))) (define (rmt:get-runs-by-patt keys runnamepatt targpatt offset limit) (rmt:send-receive 'get-runs-by-patt #f (list keys runnamepatt targpatt offset limit))) +(define (rmt:find-and-mark-incomplete run-id ovr-deadtime) + (rmt:send-receive 'find-and-mark-incomplete run-id (list run-id ovr-deadtime))) + ;;====================================================================== ;; M U L T I R U N Q U E R I E S ;;====================================================================== + +;; Need to move this to multi-run section and make associated changes +(define (rmt:find-and-mark-incomplete-all-runs #!key (ovr-deadtime #f)) + (let ((run-ids (rmt:get-all-run-ids))) + (for-each (lambda (run-id) + (rmt:find-and-mark-incomplete run-id ovr-deadtime)) + run-ids))) ;; get the previous record for when this test was run where all keys match but runname ;; returns #f if no such test found, returns a single test record if found ;; ;; Run this at the client end since we have to connect to multiple run-id dbs Index: runconfig.scm ================================================================== --- runconfig.scm +++ runconfig.scm @@ -11,12 +11,11 @@ (include "common_records.scm") (define (setup-env-defaults fname run-id already-seen keyvals #!key (environ-patt #f)(change-env #t)) (let* ((keys (map car keyvals)) (thekey (if keyvals (string-intersperse (map (lambda (x)(if x x "-na-")) (map cadr keyvals)) "/") - (or (args:get-arg "-reqtarg") - (args:get-arg "-target") + (or (common:args-get-target) (get-environment-variable "MT_TARGET") (begin (debug:print 0 "ERROR: setup-env-defaults called with no run-id or -target or -reqtarg") "nothing matches this I hope")))) ;; Why was system disallowed in the reading of the runconfigs file? @@ -60,12 +59,11 @@ finaldat)) (define (set-run-config-vars run-id keyvals targ-from-db) (push-directory *toppath*) ;; the push/pop doesn't appear to do anything ... (let ((runconfigf (conc *toppath* "/runconfigs.config")) - (targ (or (args:get-arg "-target") - (args:get-arg "-reqtarg") + (targ (or (common:args-get-target) targ-from-db (get-environment-variable "MT_TARGET")))) (pop-directory) (if (file-exists? runconfigf) (setup-env-defaults runconfigf run-id #t keyvals Index: runs.scm ================================================================== --- runs.scm +++ runs.scm @@ -37,20 +37,19 @@ ;; This is the *new* methodology. One record to inform them and in the chaos, organise them. ;; (define (runs:create-run-record) (let* ((mconfig (if *configdat* *configdat* - (if (setup-for-run) + (if (launch:setup-for-run) *configdat* (begin (debug:print 0 "ERROR: Called setup in a non-megatest area, exiting") (exit 1))))) (runrec (runs:runrec-make-record)) - (target (or (args:get-arg "-reqtarg") - (args:get-arg "-target"))) - (runname (or (args:get-arg ":runname") - (args:get-arg "-runname"))) + (target (common:args-get-target)) + (runname (or (args:get-arg "-runname") + (args:get-arg ":runname"))) (testpatt (or (args:get-arg "-testpatt") (args:get-arg "-runtests"))) (keys (keys:config-get-fields mconfig)) (keyvals (keys:target->keyval keys target)) (toppath *toppath*) @@ -67,10 +66,11 @@ keyvals) ;; Set up various and sundry known vars here (setenv "MT_RUN_AREA_HOME" toppath) (setenv "MT_RUNNAME" runname) (setenv "MT_TARGET" target) + (setenv "MT_TESTSUITENAME" (common:get-testsuite-name)) (set! envdat (append envdat (list (list "MT_RUN_AREA_HOME" toppath) (list "MT_RUNNAME" runname) (list "MT_TARGET" target)))) @@ -89,18 +89,21 @@ (safe-setenv (car varval)(cadr varval))) (configf:get-section runconfig section))) (list "default" target)) (vector target runname testpatt keys keyvals envdat mconfig runconfig serverdat transport db toppath run-id))) -(define (set-megatest-env-vars run-id #!key (inkeys #f)(inrunname #f)(inkeyvals #f)) - (let* ((target (or (args:get-arg "-reqtarg") - (args:get-arg "-target") - (get-environment-variable "MT_TARGET"))) +(define (runs:set-megatest-env-vars run-id #!key (inkeys #f)(inrunname #f)(inkeyvals #f)) + (let* ((target (or (common:args-get-target) + (get-environment-variable "MT_TARGET"))) (keys (if inkeys inkeys (rmt:get-keys))) - (keyvals (if inkeyvals inkeyvals (keys:target->keyval keys target))) - (vals (hash-table-ref/default *env-vars-by-run-id* run-id #f))) + (keyvals (if inkeyvals inkeyvals (keys:target->keyval keys target))) + (vals (hash-table-ref/default *env-vars-by-run-id* run-id #f)) + (link-tree (configf:lookup *configdat* "setup" "linktree"))) ;; get the info from the db and put it in the cache + (if link-tree + (setenv "MT_LINKTREE" link-tree) + (debug:print 0 "ERROR: linktree not set, should be set in megatest.config in [setup] section.")) (if (not vals) (let ((ht (make-hash-table))) (hash-table-set! *env-vars-by-run-id* run-id ht) (set! vals ht) (for-each @@ -157,13 +160,13 @@ (hash-table-set! *runs:denoise* key currtime) #t) #f))) (define (runs:can-run-more-tests run-id jobgroup max-concurrent-jobs) - (thread-sleep! (cond - ((> *runs:can-run-more-tests-count* 20) 2);; obviously haven't had any work to do for a while - (else 0))) + ;;(thread-sleep! (cond + ;; ((> *runs:can-run-more-tests-count* 20) 2);; obviously haven't had any work to do for a while + ;; (else 0))) (let* ((num-running (rmt:get-count-tests-running run-id)) (num-running-in-jobgroup (rmt:get-count-tests-running-in-jobgroup run-id jobgroup)) (job-group-limit (let ((jobg-count (config-lookup *configdat* "jobgroups" jobgroup))) (if (string? jobg-count) (string->number jobg-count) @@ -192,48 +195,83 @@ (debug:print 1 "WARNING: number of jobs " num-running-in-jobgroup " in jobgroup \"" jobgroup "\" exceeds limit of " job-group-limit)) #t) (else #f)))) (list (not can-not-run-more) num-running num-running-in-jobgroup max-concurrent-jobs job-group-limit))))) + ;; test-names: Comma separated patterns same as test-patts but used in selection ;; of tests to run. The item portions are not respected. ;; FIXME: error out if /patt specified ;; -(define (runs:run-tests target runname test-patts user flags) ;; test-names +(define (runs:run-tests target runname test-patts user flags #!key (run-count 3)) ;; test-names (let* ((keys (keys:config-get-fields *configdat*)) (keyvals (keys:target->keyval keys target)) (run-id (rmt:register-run keyvals runname "new" "n/a" user)) ;; test-name))) (deferred '()) ;; delay running these since they have a waiton clause (runconfigf (conc *toppath* "/runconfigs.config")) - (required-tests '()) (test-records (make-hash-table)) - (all-tests-registry (tests:get-all)) ;; (tests:get-valid-tests (make-hash-table) test-search-path)) ;; all valid tests to check waiton names - (all-test-names (hash-table-keys all-tests-registry)) - (test-names (tests:filter-test-names all-test-names test-patts))) + ;; need to process runconfigs before generating these lists + (all-tests-registry #f) ;; (tests:get-all)) ;; (tests:get-valid-tests (make-hash-table) test-search-path)) ;; all valid tests to check waiton names + (all-test-names #f) ;; (hash-table-keys all-tests-registry)) + (test-names #f) ;; (tests:filter-test-names all-test-names test-patts)) + (required-tests #f) ;;(lset-intersection equal? (string-split test-patts ",") test-names))) ;; test-names)) ;; Added test-names as initial for required-tests but that failed to work + (task-key (conc (hash-table->alist flags) " " (get-host-name) " " (current-process-id))) + (tdbdat (tasks:open-db))) - (set-megatest-env-vars run-id inkeys: keys inrunname: runname) ;; these may be needed by the launching process + (if (tasks:need-server run-id)(tasks:start-and-wait-for-server tdbdat run-id 10)) + + (set-signal-handler! signal/int + (lambda (signum) + (signal-mask! signum) + (print "Received signal " signum ", cleaning up before exit. Please wait...") + (let ((tdbdat (tasks:open-db))) + (tasks:set-state-given-param-key (db:delay-if-busy tdbdat) task-key "killed")) + (print "Killed by signal " signum ". Exiting") + (exit))) + + ;; register this run in monitor.db + (tasks:add (db:delay-if-busy tdbdat) "run-tests" user target runname test-patts task-key) ;; params) + (tasks:set-state-given-param-key (db:delay-if-busy tdbdat) task-key "running") + (runs:set-megatest-env-vars run-id inkeys: keys inrunname: runname) ;; these may be needed by the launching process (if (file-exists? runconfigf) - (setup-env-defaults runconfigf run-id *already-seen-runconfig-info* keyvals "pre-launch-env-vars") + (setup-env-defaults runconfigf run-id *already-seen-runconfig-info* keyvals target) (debug:print 0 "WARNING: You do not have a run config file: " runconfigf)) + + ;; Now generate all the tests lists + (set! all-tests-registry (tests:get-all)) + (set! all-test-names (hash-table-keys all-tests-registry)) + (set! test-names (tests:filter-test-names all-test-names test-patts)) + (set! required-tests (lset-intersection equal? (string-split test-patts ",") test-names)) ;; look up all tests matching the comma separated list of globs in ;; test-patts (using % as wildcard) ;; (set! test-names (delete-duplicates (tests:get-valid-tests *toppath* test-patts))) - (debug:print-info 0 "test names " test-names) + (debug:print-info 0 "tests search path: " (tests:get-tests-search-path *configdat*)) + (debug:print-info 0 "all tests: " (string-intersperse (sort all-test-names string<) " ")) + (debug:print-info 0 "test names: " (string-intersperse (sort test-names string<) " ")) ;; on the first pass or call to run-tests set FAILS to NOT_STARTED if ;; -keepgoing is specified (if (eq? *passnum* 0) (begin + ;; Is this still necessary? I think not. Unreachable tests are marked as such and + ;; should not cause problems here. + ;; ;; have to delete test records where NOT_STARTED since they can cause -keepgoing to ;; get stuck due to becoming inaccessible from a failed test. I.e. if test B depends ;; on test A but test B reached the point on being registered as NOT_STARTED and test ;; A failed for some reason then on re-run using -keepgoing the run can never complete. - (rmt:general-call 'delete-tests-in-state run-id "NOT_STARTED") - (rmt:set-tests-state-status run-id test-names #f "FAIL" "NOT_STARTED" "FAIL"))) + ;; + ;; (rmt:general-call 'delete-tests-in-state run-id "NOT_STARTED") + + ;; Now convert FAIL and anything in allow-auto-rerun to NOT_STARTED + ;; + (for-each (lambda (state) + (rmt:set-tests-state-status run-id test-names state #f "NOT_STARTED" state)) + (string-split (or (configf:lookup *configdat* "setup" "allow-auto-rerun") ""))))) ;; Ensure all tests are registered in the test_meta table (runs:update-all-test_meta #f) ;; now add non-directly referenced dependencies (i.e. waiton) @@ -329,13 +367,40 @@ (debug:print-info 1 "Adding " required-tests " to the run queue")) ;; NOTE: these are all parent tests, items are not expanded yet. (debug:print-info 4 "test-records=" (hash-table->alist test-records)) (let ((reglen (configf:lookup *configdat* "setup" "runqueue"))) (if (> (length (hash-table-keys test-records)) 0) - (runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests (any->number reglen) all-tests-registry) + (let* ((keep-going #t) + (th1 (make-thread (lambda () + (runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests (any->number reglen) all-tests-registry)) + "runs:run-tests-queue")) + (th2 (make-thread (lambda () + ;; (rmt:find-and-mark-incomplete-all-runs))))) CAN'T INTERRUPT IT ... + (let ((run-ids (rmt:get-all-run-ids))) + (for-each (lambda (run-id) + (if keep-going + (rmt:find-and-mark-incomplete run-id #f))) ;; ovr-deadtime))) + run-ids))) + "runs: mark-incompletes"))) + (thread-start! th1) + (thread-start! th2) + (thread-join! th1) + (set! keep-going #f) + (thread-join! th2) + ;; if run-count > 0 call, set -preclean and -rerun STUCK/DEAD + (if (> run-count 0) + (begin + (if (not (hash-table-ref/default flags "-preclean" #f)) + (hash-table-set! flags "-preclean" #t)) + (if (not (hash-table-ref/default flags "-rerun" #f)) + (hash-table-set! flags "-rerun" "STUCK/DEAD,n/a,ZERO_ITEMS")) + (runs:run-tests target runname test-patts user flags run-count: (- run-count 1))))) (debug:print-info 0 "No tests to run"))) - (debug:print-info 4 "All done by here"))) + (debug:print-info 4 "All done by here") + (tasks:set-state-given-param-key (db:delay-if-busy tdbdat) task-key "done") + ;; (sqlite3:finalize! tasks-db) + )) ;; loop logic. These are used in runs:run-tests-queue to make it a bit more readable. ;; ;; If reg not full and have items in tal then loop with (car tal)(cdr tal) reg reruns @@ -375,34 +440,39 @@ '() reg))) (define runs:nothing-left-in-queue-count 0) -(define (runs:expand-items hed tal reg reruns regfull newtal jobgroup max-concurrent-jobs run-id waitons item-path testmode test-record can-run-more items runname tconfig reglen test-registry test-records) +(define (runs:expand-items hed tal reg reruns regfull newtal jobgroup max-concurrent-jobs run-id waitons item-path testmode test-record can-run-more items runname tconfig reglen test-registry test-records itemmap) (let* ((loop-list (list hed tal reg reruns)) - (prereqs-not-met (rmt:get-prereqs-not-met run-id waitons item-path testmode)) + (prereqs-not-met (rmt:get-prereqs-not-met run-id waitons item-path testmode itemmap: itemmap)) + ;; (prereqs-not-met (mt:lazy-get-prereqs-not-met run-id waitons item-path mode: testmode itemmap: itemmap)) (fails (runs:calc-fails prereqs-not-met)) - (non-completed (runs:calc-not-completed prereqs-not-met))) + (prereq-fails (runs:calc-prereq-fail prereqs-not-met)) + (non-completed (runs:calc-not-completed prereqs-not-met)) + (runnables (runs:calc-runnable prereqs-not-met))) (debug:print-info 4 "START OF INNER COND #2 " "\n can-run-more: " can-run-more "\n testname: " hed "\n prereqs-not-met: " (runs:pretty-string prereqs-not-met) "\n non-completed: " (runs:pretty-string non-completed) + "\n prereq-fails: " (runs:pretty-string prereq-fails) "\n fails: " (runs:pretty-string fails) "\n testmode: " testmode - "\n (eq? testmode 'toplevel): " (eq? testmode 'toplevel) + "\n (member 'toplevel testmode): " (member 'toplevel testmode) "\n (null? non-completed): " (null? non-completed) "\n reruns: " reruns "\n items: " items "\n can-run-more: " can-run-more) (cond ;; all prereqs met, fire off the test ;; or, if it is a 'toplevel test and all prereqs not met are COMPLETED then launch - ((member (hash-table-ref/default test-registry (runs:make-full-test-name hed item-path) 'n/a) - '(DONOTRUN removed)) ;; *common:cant-run-states-sym*) ;; '(COMPLETED KILLED WAIVED UNKNOWN INCOMPLETE)) ;; try to catch repeat processing of COMPLETED tests here + ((and (not (member 'toplevel testmode)) + (member (hash-table-ref/default test-registry (runs:make-full-test-name hed item-path) 'n/a) + '(DONOTRUN removed CANNOTRUN))) ;; *common:cant-run-states-sym*) ;; '(COMPLETED KILLED WAIVED UNKNOWN INCOMPLETE)) ;; try to catch repeat processing of COMPLETED tests here (debug:print-info 1 "Test " hed " set to \"" (hash-table-ref test-registry (runs:make-full-test-name hed item-path)) "\". Removing it from the queue") (if (or (not (null? tal)) (not (null? reg))) (list (runs:queue-next-hed tal reg reglen regfull) (runs:queue-next-tal tal reg reglen regfull) @@ -420,27 +490,31 @@ (set! runs:nothing-left-in-queue-count (+ runs:nothing-left-in-queue-count 1))) #f))) ;; ((or (null? prereqs-not-met) - (and (eq? testmode 'toplevel) + (and (member 'toplevel testmode) (null? non-completed))) - (debug:print-info 4 "runs:expand-items: (or (null? prereqs-not-met) (and (eq? testmode 'toplevel)(null? non-completed)))") + (debug:print-info 4 "runs:expand-items: (or (null? prereqs-not-met) (and (member 'toplevel testmode)(null? non-completed)))") (let ((test-name (tests:testqueue-get-testname test-record))) (setenv "MT_TEST_NAME" test-name) ;; (setenv "MT_RUNNAME" runname) - (set-megatest-env-vars run-id inrunname: runname) ;; these may be needed by the launching process + (runs:set-megatest-env-vars run-id inrunname: runname) ;; these may be needed by the launching process (let ((items-list (items:get-items-from-config tconfig))) (if (list? items-list) (begin + (if (null? items-list) + (let ((test-id (rmt:get-test-id run-id test-name ""))) + (if test-id (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "ZERO_ITEMS" "Failed to run due to failed prerequisites")))) (tests:testqueue-set-items! test-record items-list) (list hed tal reg reruns)) (begin (debug:print 0 "ERROR: The proc from reading the items table did not yield a list - please report this") (exit 1)))))) ((and (null? fails) + (null? prereq-fails) (not (null? non-completed))) (let* ((allinqueue (map (lambda (x)(if (string? x) x (db:test-get-testname x))) (append newtal reruns))) ;; prereqstrs is a list of test names as strings that are prereqs for hed (prereqstrs (delete-duplicates (map (lambda (x)(if (string? x) x (db:test-get-testname x))) @@ -452,154 +526,134 @@ ;; prereqstrs)) (give-up #f)) ;; We can get here when a prereq has not been run due to *it* having a prereq that failed. ;; We need to use this to dequeue this item as CANNOTRUN - (for-each (lambda (prereq) - (if (eq? (hash-table-ref/default test-registry prereq 'justfine) 'CANNOTRUN) - (set! give-up #t))) - prereqstrs) + ;; + (if (member testmode '(toplevel)) + (for-each (lambda (prereq) + (if (eq? (hash-table-ref/default test-registry prereq 'justfine) 'CANNOTRUN) + (set! give-up #t))) + prereqstrs)) + (if (and give-up (not (and (null? tal)(null? reg)))) (let ((trimmed-tal (mt:discard-blocked-tests run-id hed tal test-records)) (trimmed-reg (mt:discard-blocked-tests run-id hed reg test-records))) (debug:print 1 "WARNING: test " hed " has discarded prerequisites, removing it from the queue") + + (let ((test-id (rmt:get-test-id run-id hed ""))) + (if test-id (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_DISCARDED" "Failed to run due to discarded prerequisites"))) + (if (and (null? trimmed-tal) (null? trimmed-reg)) #f (list (runs:queue-next-hed trimmed-tal trimmed-reg reglen regfull) (runs:queue-next-tal trimmed-tal trimmed-reg reglen regfull) (runs:queue-next-reg trimmed-tal trimmed-reg reglen regfull) reruns))) (list (car newtal)(append (cdr newtal) reg) '() reruns)))) - ;; (debug:print-info 1 "allinqueue: " allinqueue) - ;; (debug:print-info 1 "prereqstrs: " prereqstrs) - ;; (debug:print-info 1 "notinqueue: " notinqueue) - ;; (debug:print-info 1 "tal: " tal) - ;; (debug:print-info 1 "newtal: " newtal) - ;; (debug:print-info 1 "reg: " reg) - -;; == == ;; num-retries code was here -;; == == ;; we use this opportunity to move contents of reg to tal -;; == == ;; but also lets check that the prerequisites are all in the newtal or reruns lists -;; == == -;; == == (let* ((allinqueue (map (lambda (x)(if (string? x) x (db:test-get-testname x))) -;; == == (append newtal reruns))) -;; == == ;; prereqstrs is a list of test names as strings that are prereqs for hed -;; == == (prereqstrs (map (lambda (x)(if (string? x) x (db:test-get-testname x))) -;; == == prereqs-not-met)) -;; == == ;; a prereq that is not found in allinqueue will be put in the notinqueue list -;; == == ;; -;; == == (notinqueue (filter (lambda (x) -;; == == (not (member x allinqueue))) -;; == == prereqstrs))) -;; == == (if (not (null? notinqueue)) -;; == == (if (runs:can-keep-running? hed 5) ;; try five times -;; == == (begin -;; == == (debug:print-info 4 "increment cant-run-tests for " hed) -;; == == (runs:inc-cant-run-tests hed) -;; == == (list (car newtal)(append (cdr newtal) reg) '() reruns)) -;; == == (begin -;; == == -;; == == (if (runs:lownoise (conc "no fails prereq, null notinqueue " hed) 30) -;; == == (begin -;; == == (debug:print 1 "WARNING: test " hed " has no failed prerequisites but does have prerequistes that are NOT in the queue: " (string-intersperse notinqueue ", ")) -;; == == (debug:print-info 4 "allinqueue: " allinqueue) -;; == == (debug:print-info 4 "prereqstrs: " prereqstrs) -;; == == (debug:print-info 4 "notinqueue: " notinqueue))) -;; == == (if (and (null? tal)(null? reg)) -;; == == (list (car newtal)(append (cdr newtal) reg) '() reruns) -;; == == (list (runs:queue-next-hed tal reg reglen regfull) -;; == == (runs:queue-next-tal tal reg reglen regfull) -;; == == (runs:queue-next-reg tal reg reglen regfull) -;; == == reruns)))) -;; == == ;; have prereqs in queue, keep going. -;; == == (begin -;; == == (if (runs:lownoise (conc "no fails prereq " hed) 30) -;; == == (debug:print-info 1 "no fails in prerequisites for " hed ", waiting on tests; " -;; == == (string-intersperse (map (lambda (x) -;; == == (if (string? x) -;; == == x -;; == == (runs:make-full-test-name (db:test-get-testname x) -;; == == (db:test-get-item-path x)))) -;; == == non-completed) ", ") -;; == == ". Delaying launch of " hed ".")) -;; == == (list (car newtal)(append (cdr newtal) reg) '() reruns))))) ;; an issue with prereqs not yet met? - ((and (null? fails) + (null? prereq-fails) (null? non-completed)) - (if (runs:can-keep-running? hed 5) + (if (runs:can-keep-running? hed 20) (begin (runs:inc-cant-run-tests hed) (debug:print-info 1 "no fails in prerequisites for " hed " but also none running, keeping " hed " for now. Try count: " (hash-table-ref/default *seen-cant-run-tests* hed 0)) + ;; getting here likely means the system is way overloaded, kill a full minute before continuing + (thread-sleep! 60) ;; num-retries code was here ;; we use this opportunity to move contents of reg to tal (list (car newtal)(append (cdr newtal) reg) '() reruns)) ;; an issue with prereqs not yet met? (begin (debug:print-info 1 "no fails in prerequisites for " hed " but nothing seen running in a while, dropping test " hed " from the run queue") + (let ((test-id (rmt:get-test-id run-id hed ""))) + (if test-id (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "TIMED_OUT" "Nothing seen running in a while."))) (list (runs:queue-next-hed tal reg reglen regfull) (runs:queue-next-tal tal reg reglen regfull) (runs:queue-next-reg tal reg reglen regfull) reruns)))) - ((and (not (null? fails))(eq? testmode 'normal)) + ((and + (or (not (null? fails)) + (not (null? prereq-fails))) + (member 'normal testmode)) (debug:print-info 1 "test " hed " (mode=" testmode ") has failed prerequisite(s); " (string-intersperse (map (lambda (t)(conc (db:test-get-testname t) ":" (db:test-get-state t)"/"(db:test-get-status t))) fails) ", ") ", removing it from to-do list") + (let ((test-id (rmt:get-test-id run-id hed ""))) + (if test-id + (if (not (null? prereq-fails)) + (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_DISCARDED" "Failed to run due to prior failed prerequisites") + (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_FAIL" "Failed to run due to failed prerequisites")))) (if (or (not (null? reg))(not (null? tal))) (begin (hash-table-set! test-registry hed 'CANNOTRUN) (list (runs:queue-next-hed tal reg reglen regfull) (runs:queue-next-tal tal reg reglen regfull) (runs:queue-next-reg tal reg reglen regfull) (cons hed reruns))) #f)) ;; #f flags do not loop - ((and (not (null? fails))(eq? testmode 'toplevel)) + ((and (not (null? fails))(member 'toplevel testmode)) (if (or (not (null? reg))(not (null? tal))) (list (car newtal)(append (cdr newtal) reg) '() reruns) #f)) + ((null? runnables) #f) ;; if we get here and non-completed is null the it's all over. (else - (debug:print 1 "WARNING: FAILS or incomplete tests are preventing completion of this run. Dropping test " hed " from the run queue") - (list (runs:queue-next-hed tal reg reglen regfull) - (runs:queue-next-tal tal reg reglen regfull) - (runs:queue-next-reg tal reg reglen regfull) - reruns))))) ;; (list (car newtal)(cdr newtal) reg reruns))))) + (debug:print 0 "WARNING: FAILS or incomplete tests maybe preventing completion of this run. Watch for issues with test " hed ", continuing for now") + ;; (list (runs:queue-next-hed tal reg reglen regfull) + ;; (runs:queue-next-tal tal reg reglen regfull) + ;; (runs:queue-next-reg tal reg reglen regfull) + ;; reruns) + (list (car newtal)(cdr newtal) reg reruns))))) (define (runs:mixed-list-testname-and-testrec->list-of-strings inlst) - (map (lambda (t) - (cond - ((vector? t) - (conc (db:test-get-state t) "/" (db:test-get-status t))) - ((string? t) - t) - (else - (conc t)))) - inlst)) - -(define (runs:process-expanded-tests hed tal reg reruns reglen regfull test-record runname test-name item-path jobgroup max-concurrent-jobs run-id waitons item-path testmode test-patts required-tests test-registry registry-mutex flags keyvals run-info newtal all-tests-registry) + (if (null? inlst) + '() + (map (lambda (t) + (cond + ((vector? t) + (let ((test-name (db:test-get-testname t)) + (item-path (db:test-get-item-path t)) + (test-state (db:test-get-state t)) + (test-status (db:test-get-status t))) + (conc test-name (if (equal? item-path "") "" "/") item-path ":" test-state "/" test-status))) + ((string? t) + t) + (else + (conc t)))) + inlst))) + +(define (runs:process-expanded-tests hed tal reg reruns reglen regfull test-record runname test-name item-path jobgroup max-concurrent-jobs run-id waitons item-path testmode test-patts required-tests test-registry registry-mutex flags keyvals run-info newtal all-tests-registry itemmap) (let* ((run-limits-info (runs:can-run-more-tests run-id jobgroup max-concurrent-jobs)) ;; look at the test jobgroup and tot jobs running (have-resources (car run-limits-info)) (num-running (list-ref run-limits-info 1)) (num-running-in-jobgroup (list-ref run-limits-info 2)) (max-concurrent-jobs (list-ref run-limits-info 3)) (job-group-limit (list-ref run-limits-info 4)) - (prereqs-not-met (rmt:get-prereqs-not-met run-id waitons item-path testmode)) + (prereqs-not-met (rmt:get-prereqs-not-met run-id waitons item-path testmode itemmap: itemmap)) + ;; (prereqs-not-met (mt:lazy-get-prereqs-not-met run-id waitons item-path mode: testmode itemmap: itemmap)) (fails (runs:calc-fails prereqs-not-met)) (non-completed (runs:calc-not-completed prereqs-not-met)) - (loop-list (list hed tal reg reruns))) + (loop-list (list hed tal reg reruns)) + ;; configure the load runner + (numcpus (common:get-num-cpus)) + (maxload (string->number (or (configf:lookup *configdat* "jobtools" "maxload") "3"))) + (waitdelay (string->number (or (configf:lookup *configdat* "jobtools" "waitdelay") "60")))) (debug:print-info 4 "have-resources: " have-resources " prereqs-not-met: (" (string-intersperse (map (lambda (t) (if (vector? t) (conc (db:test-get-state t) "/" (db:test-get-status t)) (conc " WARNING: t is not a vector=" t ))) prereqs-not-met) ", ") ") fails: " fails) - (if (not (null? prereqs-not-met)) - (debug:print-info 1 "waiting on tests; " (string-intersperse (runs:mixed-list-testname-and-testrec->list-of-strings prereqs-not-met) ", "))) + (if (and (not (null? prereqs-not-met)) + (runs:lownoise (conc "waiting on tests " prereqs-not-met hed) 60)) + (debug:print-info 2 "waiting on tests; " (string-intersperse (runs:mixed-list-testname-and-testrec->list-of-strings prereqs-not-met) ", "))) ;; Don't know at this time if the test have been launched at some time in the past ;; i.e. is this a re-launch? (debug:print-info 4 "run-limits-info = " run-limits-info) @@ -620,27 +674,24 @@ ;; Register tests ;; ((not (hash-table-ref/default test-registry (runs:make-full-test-name test-name item-path) #f)) (debug:print-info 4 "Pre-registering test " test-name "/" item-path " to create placeholder" ) - (if (eq? *transport-type* 'fs) ;; no point in parallel registration if use fs + ;; always do firm registration now in v1.60 and greater ;; (eq? *transport-type* 'fs) ;; no point in parallel registration if use fs + (let register-loop ((numtries 15)) + (rmt:general-call 'register-test run-id run-id test-name item-path) + (thread-sleep! 0.5) + (if (rmt:get-test-id run-id test-name item-path) + (hash-table-set! test-registry (runs:make-full-test-name test-name item-path) 'done) + (if (> numtries 0) + (register-loop (- numtries 1)) + (debug:print 0 "ERROR: failed to register test " (runs:make-full-test-name test-name item-path))))) + (if (not (eq? (hash-table-ref/default test-registry (runs:make-full-test-name test-name "") #f) 'done)) (begin - (rmt:general-call 'register-test run-id run-id test-name item-path) - (hash-table-set! test-registry (runs:make-full-test-name test-name item-path) 'done)) - (let ((th (make-thread (lambda () - (mutex-lock! registry-mutex) - (hash-table-set! test-registry (runs:make-full-test-name test-name item-path) 'start) - (mutex-unlock! registry-mutex) - ;; If haven't done it before register a top level test if this is an itemized test - (if (not (eq? (hash-table-ref/default test-registry (runs:make-full-test-name test-name "") #f) 'done)) - (rmt:general-call 'register-test run-id run-id test-name "")) - (rmt:general-call 'register-test run-id run-id test-name item-path) - (mutex-lock! registry-mutex) - (hash-table-set! test-registry (runs:make-full-test-name test-name item-path) 'done) - (mutex-unlock! registry-mutex)) - (conc test-name "/" item-path)))) - (thread-start! th))) + (rmt:general-call 'register-test run-id run-id test-name "") + (if (rmt:get-test-id run-id test-name "") + (hash-table-set! test-registry (runs:make-full-test-name test-name "") 'done)))) (runs:shrink-can-run-more-tests-count) ;; DELAY TWEAKER (still needed?) (if (and (null? tal)(null? reg)) (list hed tal (append reg (list hed)) reruns) (list (runs:queue-next-hed tal reg reglen regfull) (runs:queue-next-tal tal reg reglen regfull) @@ -683,10 +734,14 @@ (null? non-completed)))) ;; (hash-table-delete! *max-tries-hash* (runs:make-full-test-name test-name item-path)) ;; we are going to reset all the counters for test retries by setting a new hash table ;; this means they will increment only when nothing can be run (set! *max-tries-hash* (make-hash-table)) + ;; well, first lets see if cpu load throttling is enabled. If so wait around until the + ;; average cpu load is under the threshold before continuing + (if (configf:lookup *configdat* "jobtools" "maxload") ;; only gate if maxload is specified + (common:wait-for-cpuload maxload numcpus waitdelay)) (run:test run-id run-info keyvals runname test-record flags #f test-registry all-tests-registry) (hash-table-set! test-registry (runs:make-full-test-name test-name item-path) 'running) (runs:shrink-can-run-more-tests-count) ;; DELAY TWEAKER (still needed?) ;; (thread-sleep! *global-delta*) (if (or (not (null? tal))(not (null? reg))) @@ -701,40 +756,117 @@ (else (debug:print 4 "FAILS: " fails) ;; If one or more of the prereqs-not-met are FAIL then we can issue ;; a message and drop hed from the items to be processed. ;; (runs:mixed-list-testname-and-testrec->list-of-strings prereqs-not-met) - (if (not (null? prereqs-not-met)) + (if (and (not (null? prereqs-not-met)) + (runs:lownoise (conc "waiting on tests " prereqs-not-met hed) 60)) (debug:print-info 1 "waiting on tests; " (string-intersperse (runs:mixed-list-testname-and-testrec->list-of-strings prereqs-not-met) ", "))) - (if (null? fails) (begin ;; couldn't run, take a breather - (debug:print-info 0 "Waiting for more work to do...") + (if (runs:lownoise "Waiting for more work to do..." 60) + (debug:print-info 0 "Waiting for more work to do...")) (thread-sleep! 1) (list (car newtal)(cdr newtal) reg reruns)) ;; the waiton is FAIL so no point in trying to run hed ever again (if (or (not (null? reg))(not (null? tal))) (if (vector? hed) - (begin - (debug:print 1 "WARN: Dropping test " (db:test-get-testname hed) "/" (db:test-get-item-path hed) + (begin + (debug:print 1 "WARNING: Dropping test " test-name "/" item-path " from the launch list as it has prerequistes that are FAIL") + (let ((test-id (rmt:get-test-id run-id hed ""))) + (if test-id (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_FAIL" "Failed to run due to failed prerequisites"))) (runs:shrink-can-run-more-tests-count) ;; DELAY TWEAKER (still needed?) ;; (thread-sleep! *global-delta*) + ;; This next is for the items + (mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "BLOCKED" #f) (hash-table-set! test-registry (runs:make-full-test-name test-name item-path) 'removed) (list (runs:queue-next-hed tal reg reglen regfull) (runs:queue-next-tal tal reg reglen regfull) (runs:queue-next-reg tal reg reglen regfull) - (cons hed reruns))) - (begin - (debug:print 0 "WARNING: Test not processed correctly. Could be a race condition in your test implementation? Dropping test " hed) ;; " as it has prerequistes that are FAIL. (NOTE: hed is not a vector)") - (runs:shrink-can-run-more-tests-count) ;; DELAY TWEAKER (still needed?) - ;; (list hed tal reg reruns) - (list (car newtal)(cdr newtal) reg reruns) - )))))))) + reruns ;; WAS: (cons hed reruns) ;; but that makes no sense? + )) + (let ((nth-try (hash-table-ref/default test-registry hed 0))) + (cond + ((member "RUNNING" (map db:test-get-state prereqs-not-met)) + (if (runs:lownoise (conc "possible RUNNING prerequistes " hed) 60) + (debug:print 0 "WARNING: test " hed " has possible RUNNING prerequisites, don't give up on it yet.")) + (thread-sleep! 4) + (list (runs:queue-next-hed newtal reg reglen regfull) + (runs:queue-next-tal newtal reg reglen regfull) + (runs:queue-next-reg newtal reg reglen regfull) + reruns)) + ((or (not nth-try) + (and (number? nth-try) + (< nth-try 10))) + (hash-table-set! test-registry hed (if (number? nth-try) + (+ nth-try 1) + 0)) + (if (runs:lownoise (conc "not removing test " hed) 60) + (debug:print 1 "WARNING: not removing test " hed " from queue although it may not be runnable due to FAILED prerequisites")) + ;; may not have processed correctly. Could be a race condition in your test implementation? Dropping test " hed) ;; " as it has prerequistes that are FAIL. (NOTE: hed is not a vector)") + (runs:shrink-can-run-more-tests-count) ;; DELAY TWEAKER (still needed?) + ;; (list hed tal reg reruns) + ;; (list (car newtal)(cdr newtal) reg reruns) + ;; (hash-table-set! test-registry hed 'removed) + (list (runs:queue-next-hed newtal reg reglen regfull) + (runs:queue-next-tal newtal reg reglen regfull) + (runs:queue-next-reg newtal reg reglen regfull) + reruns)) + ((symbol? nth-try) + (if (eq? nth-try 'removed) ;; removed is removed - drop it NOW + (if (null? tal) + #f ;; yes, really + (list (car tal)(cdr tal) reg reruns)) + (begin + (if (runs:lownoise (conc "FAILED prerequisites or other issue" hed) 60) + (debug:print 0 "WARNING: test " hed " has FAILED prerequisites or other issue. Internal state " nth-try " will be overridden and we'll retry.")) + (mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "KEEP_TRYING" #f) + (hash-table-set! test-registry hed 0) + (list (runs:queue-next-hed newtal reg reglen regfull) + (runs:queue-next-tal newtal reg reglen regfull) + (runs:queue-next-reg newtal reg reglen regfull) + reruns)))) + (else + (if (runs:lownoise (conc "FAILED prerequitests and we tried" hed) 60) + (debug:print 0 "WARNING: test " hed " has FAILED prerequitests and we've tried at least 10 times to run it. Giving up now.")) + ;; (debug:print 0 " prereqs: " prereqs-not-met) + (hash-table-set! test-registry hed 'removed) + (mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "TEN_STRIKES" #f) + (mt:roll-up-pass-fail-counts run-id test-name item-path "FAIL") ;; treat as FAIL + (list (if (null? tal)(car newtal)(car tal)) + tal + reg + reruns))))) + ;; can't drop this - maybe running? Just keep trying + (let ((runable-tests (runs:runable-tests prereqs-not-met))) + (if (null? runable-tests) + #f ;; I think we are truly done here + (list (runs:queue-next-hed newtal reg reglen regfull) + (runs:queue-next-tal newtal reg reglen regfull) + (runs:queue-next-reg newtal reg reglen regfull) + reruns))))))))) + +;; scan a list of tests looking to see if any are potentially runnable +(define (runs:runable-tests tests) + (filter (lambda (t) + (if (not (vector? t)) + t + (let ((state (db:test-get-state t)) + (status (db:test-get-status t))) + (case (string->symbol state) + ((COMPLETED) #f) + ((NOT_STARTED) + (if (member status '("TEN_STRIKES" "BLOCKED" "PREQ_FAIL" "ZERO_ITEMS" "PREQ_DISCARDED" "TIMED_OUT" )) + #f + t)) + ((DELETED) #f) + (else t))))) + tests)) ;; every time though the loop increment the test/itempatt val. ;; when the min is > max-allowed and none running then force exit ;; (define *max-tries-hash* (make-hash-table)) @@ -759,12 +891,13 @@ (max-concurrent-jobs (let ((mcj (config-lookup *configdat* "setup" "max_concurrent_jobs"))) (if (and mcj (string->number mcj)) (string->number mcj) 1))) ;; length of the register queue ahead (reglen (if (number? reglen-in) reglen-in 1)) - (last-time-incomplete (current-seconds)) - (last-time-some-running (current-seconds))) + (last-time-incomplete (- (current-seconds) 900)) ;; force at least one clean up cycle + (last-time-some-running (current-seconds)) + (tdbdat (tasks:open-db))) ;; Initialize the test-registery hash with tests that already have a record ;; convert state to symbol and use that as the hash value (for-each (lambda (trec) (let ((id (db:test-get-id trec)) @@ -778,25 +911,30 @@ (let loop ((hed (car sorted-test-names)) (tal (cdr sorted-test-names)) (reg '()) ;; registered, put these at the head of tal (reruns '())) + (if (not (null? reruns))(debug:print-info 4 "reruns=" reruns)) ;; Here we mark any old defunct tests as incomplete. Do this every fifteen minutes - ;; (if (> (current-seconds)(+ last-time-incomplete 900)) - ;; (begin - ;; (set! last-time-incomplete (current-seconds)) - ;; (cdb:remote-run db:find-and-mark-incomplete #f))) + ;; moving this to a parallel thread and just run it once. + ;; + (if (> (current-seconds)(+ last-time-incomplete 900)) + (begin + (set! last-time-incomplete (current-seconds)) + ;; (rmt:find-and-mark-incomplete-all-runs) + )) ;; (print "Top of loop, hed=" hed ", tal=" tal " ,reruns=" reruns) (let* ((test-record (hash-table-ref test-records hed)) (test-name (tests:testqueue-get-testname test-record)) (tconfig (tests:testqueue-get-testconfig test-record)) (jobgroup (config-lookup tconfig "test_meta" "jobgroup")) (testmode (let ((m (config-lookup tconfig "requirements" "mode"))) - (if m (string->symbol m) 'normal))) + (if m (map string->symbol (string-split m)) '(normal)))) + (itemmap (configf:lookup tconfig "requirements" "itemmap")) (waitons (tests:testqueue-get-waitons test-record)) (priority (tests:testqueue-get-priority test-record)) (itemdat (tests:testqueue-get-itemdat test-record)) ;; itemdat can be a string, list or #f (items (tests:testqueue-get-items test-record)) (item-path (item-list->path itemdat)) @@ -803,14 +941,19 @@ (tfullname (runs:make-full-test-name test-name item-path)) (newtal (append tal (list hed))) (regfull (>= (length reg) reglen)) (num-running (rmt:get-count-tests-running-for-run-id run-id))) - (if (> num-running 0) + ;; every couple minutes verify the server is there for this run + (if (and (common:low-noise-print 60 "try start server" run-id) + (tasks:need-server run-id)) + (tasks:start-and-wait-for-server tdbdat run-id 10)) ;; NOTE: delay and wait is done under the hood + + (if (> num-running 0) (set! last-time-some-running (current-seconds))) - (if (> (current-seconds)(+ last-time-some-running 60)) + (if (> (current-seconds)(+ last-time-some-running 240)) (hash-table-set! *max-tries-hash* tfullname (+ (hash-table-ref/default *max-tries-hash* tfullname 0) 1))) ;; (debug:print 0 "max-tries-hash: " (hash-table->alist *max-tries-hash*)) ;; Ensure all top level tests get registered. This way they show up as "NOT_STARTED" on the dashboard ;; and it is clear they *should* have run but did not. @@ -822,11 +965,12 @@ ;; Fast skip of tests that are already "COMPLETED" - NO! Cannot do that as the items may not have been expanded yet :( ;; (if (member (hash-table-ref/default test-registry tfullname #f) '(DONOTRUN removed)) ;; *common:cant-run-states-sym*) ;; '(COMPLETED KILLED WAIVED UNKNOWN INCOMPLETE)) (begin - (debug:print-info 0 "Skipping test " tfullname " as it has been marked do not run due to being completed or not runnable") + (if (runs:lownoise (conc "been marked do not run " tfullname) 60) + (debug:print-info 0 "Skipping test " tfullname " as it has been marked do not run due to being completed or not runnable")) (if (or (not (null? tal))(not (null? reg))) (loop (runs:queue-next-hed tal reg reglen regfull) (runs:queue-next-tal tal reg reglen regfull) (runs:queue-next-reg tal reg reglen regfull) reruns)))) @@ -878,11 +1022,11 @@ ((not items) (debug:print-info 4 "OUTER COND: (not items)") (if (and (not (tests:match test-patts (tests:testqueue-get-testname test-record) item-path required: required-tests)) (not (null? tal))) (loop (car tal)(cdr tal) reg reruns)) - (let ((loop-list (runs:process-expanded-tests hed tal reg reruns reglen regfull test-record runname test-name item-path jobgroup max-concurrent-jobs run-id waitons item-path testmode test-patts required-tests test-registry registry-mutex flags keyvals run-info newtal all-tests-registry))) + (let ((loop-list (runs:process-expanded-tests hed tal reg reruns reglen regfull test-record runname test-name item-path jobgroup max-concurrent-jobs run-id waitons item-path testmode test-patts required-tests test-registry registry-mutex flags keyvals run-info newtal all-tests-registry itemmap))) (if loop-list (apply loop loop-list)))) ;; items processed into a list but not came in as a list been processed ;; ((and (list? items) ;; thus we know our items are already calculated @@ -933,11 +1077,11 @@ ;; EXPAND ITEMS ((or (procedure? items)(eq? items 'have-procedure)) (let ((can-run-more (runs:can-run-more-tests run-id jobgroup max-concurrent-jobs))) (if (and (list? can-run-more) (car can-run-more)) - (let ((loop-list (runs:expand-items hed tal reg reruns regfull newtal jobgroup max-concurrent-jobs run-id waitons item-path testmode test-record can-run-more items runname tconfig reglen test-registry test-records))) + (let ((loop-list (runs:expand-items hed tal reg reruns regfull newtal jobgroup max-concurrent-jobs run-id waitons item-path testmode test-record can-run-more items runname tconfig reglen test-registry test-records itemmap))) (if loop-list (apply loop loop-list))) ;; if can't run more just loop with next possible test (loop (car newtal)(cdr newtal) reg reruns)))) @@ -961,12 +1105,33 @@ ((not (null? reg)) ;; could we get here with leftovers? (debug:print-info 0 "Have leftovers!") (loop (car reg)(cdr reg) '() reruns)) (else (debug:print-info 4 "Exiting loop with...\n hed=" hed "\n tal=" tal "\n reruns=" reruns)) - ))) ;; LET* ((test-record - + ))) + ;; now *if* -run-wait we wait for all tests to be done + ;; Now wait for any RUNNING tests to complete (if in run-wait mode) + (let wait-loop ((num-running (rmt:get-count-tests-running-for-run-id run-id)) + (prev-num-running 0)) + ;; (debug:print 0 "num-running=" num-running ", prev-num-running=" prev-num-running) + (if (and (or (args:get-arg "-run-wait") + (equal? (configf:lookup *configdat* "setup" "run-wait") "yes")) + (> num-running 0)) + (begin + ;; Here we mark any old defunct tests as incomplete. Do this every fifteen minutes + ;; (debug:print 0 "Got here eh! num-running=" num-running " (> num-running 0) " (> num-running 0)) + (if (> (current-seconds)(+ last-time-incomplete 900)) + (begin + (debug:print-info 0 "Marking stuck tests as INCOMPLETE while waiting for run " run-id ". Running as pid " (current-process-id) " on " (get-host-name)) + (set! last-time-incomplete (current-seconds)) + (rmt:find-and-mark-incomplete run-id #f))) + (if (not (eq? num-running prev-num-running)) + (debug:print-info 0 "run-wait specified, waiting on " num-running " tests in RUNNING, REMOTEHOSTSTART or LAUNCHED state at " (time->string (seconds->local-time (current-seconds))))) + (thread-sleep! 5) + ;; (wait-loop (rmt:get-count-tests-running-for-run-id run-id) num-running)))) + (wait-loop (rmt:get-count-tests-running-for-run-id run-id) num-running)))) + ;; LET* ((test-record ;; we get here on "drop through". All done! (debug:print-info 1 "All tests launched"))) (define (runs:calc-fails prereqs-not-met) (filter (lambda (test) @@ -973,17 +1138,41 @@ (and (vector? test) ;; not (string? test)) (equal? (db:test-get-state test) "COMPLETED") (not (member (db:test-get-status test) '("PASS" "WARN" "CHECK" "WAIVED" "SKIP"))))) prereqs-not-met)) + +(define (runs:calc-prereq-fail prereqs-not-met) + (filter (lambda (test) + (and (vector? test) ;; not (string? test)) + (equal? (db:test-get-state test) "NOT_STARTED") + (not (member (db:test-get-status test) + '("n/a" "KEEP_TRYING"))))) + prereqs-not-met)) + +(define (runs:calc-not-completed prereqs-not-met) + (filter + (lambda (t) + (or (not (vector? t)) + (not (equal? "COMPLETED" (db:test-get-state t))))) + prereqs-not-met)) (define (runs:calc-not-completed prereqs-not-met) (filter (lambda (t) (or (not (vector? t)) (not (equal? "COMPLETED" (db:test-get-state t))))) prereqs-not-met)) + +(define (runs:calc-runnable prereqs-not-met) + (filter + (lambda (t) + (or (not (vector? t)) + (and (equal? "NOT_STARTED" (db:test-get-state t)) + (member (db:test-get-status t) + '("n/a" "KEEP_TRYING"))))) + prereqs-not-met)) (define (runs:pretty-string lst) (map (lambda (t) (if (not (vector? t)) (conc t) @@ -1002,10 +1191,11 @@ (itemdat (tests:testqueue-get-itemdat test-record)) (test-path (hash-table-ref all-tests-registry test-name)) ;; (conc *toppath* "/tests/" test-name)) ;; could use tests:get-testconfig here ... (force (hash-table-ref/default flags "-force" #f)) (rerun (hash-table-ref/default flags "-rerun" #f)) (keepgoing (hash-table-ref/default flags "-keepgoing" #f)) + (incomplete-timeout (string->number (or (configf:lookup *configdat* "setup" "incomplete-timeout") "x"))) (item-path "") (db #f) (full-test-name #f)) ;; setting itemdat to a list if it is #f @@ -1019,11 +1209,11 @@ ) (debug:print 2 "Attempting to launch test " full-test-name) (setenv "MT_TEST_NAME" test-name) ;; (setenv "MT_ITEMPATH" item-path) (setenv "MT_RUNNAME" runname) - (set-megatest-env-vars run-id inrunname: runname) ;; these may be needed by the launching process + (runs:set-megatest-env-vars run-id inrunname: runname) ;; these may be needed by the launching process (change-directory *toppath*) ;; Here is where the test_meta table is best updated ;; Yes, another use of a global for caching. Need a better way? ;; @@ -1048,11 +1238,11 @@ ;; ;; (open-run-close tests:register-test db run-id test-name item-path) ;; ;; NB// for the above line. I want the test to be registered long before this routine gets called! ;; - (if (not test-id)(set! test-id (rmt:get-test-id-cached run-id test-name item-path))) + (if (not test-id)(set! test-id (rmt:get-test-id run-id test-name item-path))) (if (not test-id) (begin (debug:print 2 "WARN: Test not pre-created? test-name=" test-name ", item-path=" item-path ", run-id=" run-id) (rmt:general-call 'register-test run-id run-id test-name item-path) (set! test-id (rmt:get-test-id run-id test-name item-path)))) @@ -1111,13 +1301,14 @@ (set! runflag #t)) (else (set! runflag #f))) (debug:print 4 "RUNNING => runflag: " runflag " STATE: " (test:get-state testdat) " STATUS: " (test:get-status testdat)) (if (not runflag) (if (not parent-test) - (debug:print 1 "NOTE: Not starting test " full-test-name " as it is state \"" (test:get-state testdat) - "\" and status \"" (test:get-status testdat) "\", use -rerun \"" (test:get-status testdat) - "\" or -force to override")) + (if (runs:lownoise (conc "not starting test" full-test-name) 60) + (debug:print 1 "NOTE: Not starting test " full-test-name " as it is state \"" (test:get-state testdat) + "\" and status \"" (test:get-status testdat) "\", use -rerun \"" (test:get-status testdat) + "\" or -force to override"))) ;; NOTE: No longer be checking prerequisites here! Will never get here unless prereqs are ;; already met. ;; This would be a great place to do the process-fork ;; (let ((skip-test #f) @@ -1146,17 +1337,20 @@ (process-signal (current-process-id) signal/kill)))))))) ((KILLED) (debug:print 1 "NOTE: " full-test-name " is already running or was explictly killed, use -force to launch it.") (hash-table-set! test-registry (runs:make-full-test-name test-name test-path) 'DONOTRUN)) ;; KILLED)) ((LAUNCHED REMOTEHOSTSTART RUNNING) - (if (> (- (current-seconds)(+ (db:test-get-event_time testdat) - (db:test-get-run_duration testdat))) - 600) ;; i.e. no update for more than 600 seconds - (begin - (debug:print 0 "WARNING: Test " test-name " appears to be dead. Forcing it to state INCOMPLETE and status STUCK/DEAD") - (tests:test-set-status! run-id test-id "INCOMPLETE" "STUCK/DEAD" "Test is stuck or dead" #f)) - (debug:print 2 "NOTE: " test-name " is already running"))) + (debug:print 2 "NOTE: " test-name " is already running")) + ;; (if (> (- (current-seconds)(+ (db:test-get-event_time testdat) + ;; (db:test-get-run_duration testdat))) + ;; (or incomplete-timeout + ;; 6000)) ;; i.e. no update for more than 6000 seconds + ;; (begin + ;; (debug:print 0 "WARNING: Test " test-name " appears to be dead. Forcing it to state INCOMPLETE and status STUCK/DEAD") + ;; (tests:test-set-status! run-id test-id "INCOMPLETE" "STUCK/DEAD" "" #f)) + ;; ;; (tests:test-set-status! test-id "INCOMPLETE" "STUCK/DEAD" "" #f)) + ;; (debug:print 2 "NOTE: " test-name " is already running"))) (else (debug:print 0 "ERROR: Failed to launch test " full-test-name ". Unrecognised state " (test:get-state testdat)) (case (string->symbol (test:get-state testdat)) ((COMPLETED INCOMPLETE) (hash-table-set! test-registry (runs:make-full-test-name test-name test-path) 'DONOTRUN)) @@ -1174,11 +1368,15 @@ (take dparts (- (length dparts) count)) "/")))) (define (runs:recursive-delete-with-error-msg real-dir) (if (> (system (conc "rm -rf " real-dir)) 0) - (debug:print 0 "ERROR: There was a problem removing " real-dir " with rm -f"))) + (begin + ;; FAILED, possibly due to permissions, do chmod a+rwx then try one more time + (system (conc "chmod -R a+rwx " real-dir)) + (if (> (system (conc "rm -rf " real-dir)) 0) + (debug:print 0 "ERROR: There was a problem removing " real-dir " with rm -f"))))) (define (runs:safe-delete-test-dir real-dir) ;; first delete all sub-directories (directory-fold (lambda (f x) @@ -1203,13 +1401,14 @@ ;; 'remove-runs ;; 'set-state-status ;; ;; NB// should pass in keys? ;; -(define (runs:operate-on action target runnamepatt testpatt #!key (state #f)(status #f)(new-state-status #f)) +(define (runs:operate-on action target runnamepatt testpatt #!key (state #f)(status #f)(new-state-status #f)(remove-data-only #f)) (common:clear-caches) ;; clear all caches (let* ((db #f) + (tdbdat (tasks:open-db)) (keys (rmt:get-keys)) (rundat (mt:get-runs-by-patt keys runnamepatt target)) (header (vector-ref rundat 0)) (runs (vector-ref rundat 1)) (states (if state (string-split state ",") '())) @@ -1232,21 +1431,28 @@ sort-by: (case action ((remove-runs) 'rundir) (else 'event_time)))))) (let* ((run-id (db:get-value-by-header run header "id")) (run-state (db:get-value-by-header run header "state")) + (run-name (db:get-value-by-header run header "runname")) (tests (if (not (equal? run-state "locked")) (proc-get-tests run-id) '())) (lasttpath "/does/not/exist/I/hope")) (debug:print-info 4 "runs:operate-on run=" run ", header=" header) (if (not (null? tests)) (begin (case action ((remove-runs) + (if (tasks:need-server run-id)(tasks:start-and-wait-for-server tdbdat run-id 10)) + ;; seek and kill in flight -runtests with % as testpatt here + (if (equal? testpatt "%") + (tasks:kill-runner (db:delay-if-busy tdbdat) target run-name) + (debug:print 0 "not attempting to kill any run launcher processes as testpatt is " testpatt)) (debug:print 1 "Removing tests for run: " runkey " " (db:get-value-by-header run header "runname"))) ((set-state-status) + (if (tasks:need-server run-id)(tasks:start-and-wait-for-server tdbdat run-id 10)) (debug:print 1 "Modifying state and staus for tests for run: " runkey " " (db:get-value-by-header run header "runname"))) ((print-run) (debug:print 1 "Printing info for run " runkey ", run=" run ", tests=" tests ", header=" header) action) ((run-wait) @@ -1258,10 +1464,11 @@ (dirb ;; (rmt:sdb-qry 'getstr (db:test-get-rundir b))) ;; ) ;; ((filedb:get-path *fdb* (db:test-get-rundir b)))) (if (and (string? dira)(string? dirb)) (> (string-length dira)(string-length dirb)) #f))))) + (toplevel-retries (make-hash-table)) ;; try three times to loop through and remove top level tests (test-retry-time (make-hash-table)) (allow-run-time 10)) ;; seconds to allow for killing tests before just brutally killing 'em (let loop ((test (car sorted-tests)) (tal (cdr sorted-tests))) (let* ((test-id (db:test-get-id test)) @@ -1274,75 +1481,54 @@ (let* ((item-path (db:test-get-item-path new-test-dat)) (test-name (db:test-get-testname new-test-dat)) (run-dir ;;(filedb:get-path *fdb* ;; (rmt:sdb-qry 'getid (db:test-get-rundir new-test-dat)) ;; ) ;; run dir is from the link tree - (real-dir (if (file-exists? run-dir) - (resolve-pathname run-dir) - #f)) (test-state (db:test-get-state new-test-dat)) - (test-fulln (db:test-get-fullname new-test-dat))) + (test-fulln (db:test-get-fullname new-test-dat)) + (uname (db:test-get-uname new-test-dat)) + (toplevel-with-children (and (db:test-get-is-toplevel test) + (> (rmt:test-toplevel-num-items run-id test-name) 0)))) (case action ((remove-runs) - (debug:print-info 0 "test: " test-name " itest-state: " test-state) - (if (member test-state (list "RUNNING" "LAUNCHED" "REMOTEHOSTSTART" "KILLREQ")) - (begin - (if (not (hash-table-ref/default test-retry-time test-fulln #f)) - (begin - ;; want to set to REMOVING BUT CANNOT do it here? - (hash-table-set! test-retry-time test-fulln (current-seconds)))) - (if (> (- (current-seconds)(hash-table-ref test-retry-time test-fulln)) allow-run-time) - ;; This test is not in a correct state for cleaning up. Let's try some graceful shutdown steps first - ;; Set the test to "KILLREQ" and wait five seconds then try again. Repeat up to five times then give - ;; up and blow it away. - (begin - (debug:print 0 "WARNING: could not gracefully remove test " test-fulln ", tried to kill it to no avail. Forcing state to FAILEDKILL and continuing") - (mt:test-set-state-status-by-id run-id (db:test-get-id test) "FAILEDKILL" "n/a" #f) - (thread-sleep! 1)) - (begin - (mt:test-set-state-status-by-id run-id (db:test-get-id test) "KILLREQ" "n/a" #f) - (thread-sleep! 1))) - ;; NOTE: This is suboptimal as the testdata will be used later and the state/status may have changed ... - (if (null? tal) - (loop new-test-dat tal) - (loop (car tal)(append tal (list new-test-dat))))) - (begin - (mt:test-set-state-status-by-id run-id (db:test-get-id test) "REMOVING" "LOCKED" #f) - (debug:print-info 1 "Attempting to remove " (if real-dir (conc " dir " real-dir " and ") "") " link " run-dir) - (if (and real-dir - (> (string-length real-dir) 5) - (file-exists? real-dir)) ;; bad heuristic but should prevent /tmp /home etc. - (begin ;; let* ((realpath (resolve-pathname run-dir))) - (debug:print-info 1 "Recursively removing " real-dir) - (if (file-exists? real-dir) - (runs:safe-delete-test-dir real-dir) - (debug:print 0 "WARNING: test dir " real-dir " appears to not exist or is not readable"))) - (if real-dir - (debug:print 0 "WARNING: directory " real-dir " does not exist") - (debug:print 0 "WARNING: no real directory corrosponding to link " run-dir ", nothing done"))) - (if (symbolic-link? run-dir) - (begin - (debug:print-info 1 "Removing symlink " run-dir) - (handle-exceptions - exn - (debug:print 0 "ERROR: Failed to remove symlink " run-dir ((condition-property-accessor 'exn 'message) exn) ", attempting to continue") - (delete-file run-dir))) - (if (directory? run-dir) - (if (> (directory-fold (lambda (f x)(+ 1 x)) 0 run-dir) 0) - (debug:print 0 "WARNING: refusing to remove " run-dir " as it is not empty") - (handle-exceptions - exn - (debug:print 0 "ERROR: Failed to remove directory " run-dir ((condition-property-accessor 'exn 'message) exn) ", attempting to continue") - (delete-directory run-dir))) - (if run-dir - (debug:print 0 "WARNING: not removing " run-dir " as it either doesn't exist or is not a symlink") - (debug:print 0 "NOTE: the run dir for this test is undefined. Test may have already been deleted.")) - )) - ;; Only delete the records *after* removing the directory. If things fail we have a record - (rmt:delete-test-records (db:test-get-run_id test)(db:test-get-id test)) - (if (not (null? tal)) - (loop (car tal)(cdr tal)))))) + ;; if the test is a toplevel-with-children issue an error and do not remove + (if toplevel-with-children + (begin + (debug:print 0 "WARNING: skipping removal of " test-fulln " with run-id " run-id " as it has sub tests") + (hash-table-set! toplevel-retries test-fulln (+ (hash-table-ref/default toplevel-retries test-fulln 0) 1)) + (if (> (hash-table-ref toplevel-retries test-fulln) 3) + (if (not (null? tal)) + (loop (car tal)(cdr tal))) ;; no else clause - drop it if no more in queue and > 3 tries + (let ((newtal (append tal (list test)))) + (loop (car newtal)(cdr newtal))))) ;; loop with test still in queue + (begin + (debug:print-info 0 "test: " test-name " itest-state: " test-state) + (if (member test-state (list "RUNNING" "LAUNCHED" "REMOTEHOSTSTART" "KILLREQ")) + (begin + (if (not (hash-table-ref/default test-retry-time test-fulln #f)) + (begin + ;; want to set to REMOVING BUT CANNOT do it here? + (hash-table-set! test-retry-time test-fulln (current-seconds)))) + (if (> (- (current-seconds)(hash-table-ref test-retry-time test-fulln)) allow-run-time) + ;; This test is not in a correct state for cleaning up. Let's try some graceful shutdown steps first + ;; Set the test to "KILLREQ" and wait five seconds then try again. Repeat up to five times then give + ;; up and blow it away. + (begin + (debug:print 0 "WARNING: could not gracefully remove test " test-fulln ", tried to kill it to no avail. Forcing state to FAILEDKILL and continuing") + (mt:test-set-state-status-by-id run-id (db:test-get-id test) "FAILEDKILL" "n/a" #f) + (thread-sleep! 1)) + (begin + (mt:test-set-state-status-by-id run-id (db:test-get-id test) "KILLREQ" "n/a" #f) + (thread-sleep! 1))) + ;; NOTE: This is suboptimal as the testdata will be used later and the state/status may have changed ... + (if (null? tal) + (loop new-test-dat tal) + (loop (car tal)(append tal (list new-test-dat))))) + (begin + (runs:remove-test-directory db new-test-dat remove-data-only) + (if (not (null? tal)) + (loop (car tal)(cdr tal)))))))) ((set-state-status) (debug:print-info 2 "new state " (car state-status) ", new status " (cadr state-status)) (mt:test-set-state-status-by-id run-id (db:test-get-id test) (car state-status)(cadr state-status) #f) (if (not (null? tal)) (loop (car tal)(cdr tal)))) @@ -1371,38 +1557,79 @@ ;; (begin ;; (debug:print 1 "Removing run dir " runpath) ;; (system (conc "rmdir -p " runpath)))) ))))) )) - runs)) + runs) + ;; (sqlite3:finalize! (db:delay-if-busy tdbdat)) + ) #t) +(define (runs:remove-test-directory db test remove-data-only) + (let* ((run-dir (db:test-get-rundir test)) ;; run dir is from the link tree + (real-dir (if (file-exists? run-dir) + (resolve-pathname run-dir) + #f))) + (if remove-data-only + (mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) "CLEANING" "LOCKED" #f) + (mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) "REMOVING" "LOCKED" #f)) + (debug:print-info 1 "Attempting to remove " (if real-dir (conc " dir " real-dir " and ") "") " link " run-dir) + (if (and real-dir + (> (string-length real-dir) 5) + (file-exists? real-dir)) ;; bad heuristic but should prevent /tmp /home etc. + (begin ;; let* ((realpath (resolve-pathname run-dir))) + (debug:print-info 1 "Recursively removing " real-dir) + (if (file-exists? real-dir) + (runs:safe-delete-test-dir real-dir) + (debug:print 0 "WARNING: test dir " real-dir " appears to not exist or is not readable"))) + (if real-dir + (debug:print 0 "WARNING: directory " real-dir " does not exist") + (debug:print 0 "WARNING: no real directory corrosponding to link " run-dir ", nothing done"))) + (if (symbolic-link? run-dir) + (begin + (debug:print-info 1 "Removing symlink " run-dir) + (handle-exceptions + exn + (debug:print 0 "ERROR: Failed to remove symlink " run-dir ((condition-property-accessor 'exn 'message) exn) ", attempting to continue") + (delete-file run-dir))) + (if (directory? run-dir) + (if (> (directory-fold (lambda (f x)(+ 1 x)) 0 run-dir) 0) + (debug:print 0 "WARNING: refusing to remove " run-dir " as it is not empty") + (handle-exceptions + exn + (debug:print 0 "ERROR: Failed to remove directory " run-dir ((condition-property-accessor 'exn 'message) exn) ", attempting to continue") + (delete-directory run-dir))) + (if (and run-dir + (not (member run-dir (list "n/a" "/tmp/badname")))) + (debug:print 0 "WARNING: not removing " run-dir " as it either doesn't exist or is not a symlink") + (debug:print 0 "NOTE: the run dir for this test is undefined. Test may have already been deleted.")) + )) + ;; Only delete the records *after* removing the directory. If things fail we have a record + (if remove-data-only + (mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) "NOT_STARTED" "n/a" #f) + (rmt:delete-test-records (db:test-get-run_id test) (db:test-get-id test))))) + ;;====================================================================== ;; Routines for manipulating runs ;;====================================================================== ;; Since many calls to a run require pretty much the same setup ;; this wrapper is used to reduce the replication of code (define (general-run-call switchname action-desc proc) - (let ((runname (args:get-arg ":runname")) - (target (if (args:get-arg "-target") - (args:get-arg "-target") - (args:get-arg "-reqtarg")))) - ;; (th1 #f)) + (let ((runname (or (args:get-arg "-runname")(args:get-arg ":runname"))) + (target (common:args-get-target))) (cond ((not target) (debug:print 0 "ERROR: Missing required parameter for " switchname ", you must specify the target with -target") (exit 3)) ((not runname) - (debug:print 0 "ERROR: Missing required parameter for " switchname ", you must specify the run name with :runname runname") + (debug:print 0 "ERROR: Missing required parameter for " switchname ", you must specify the run name with -runname runname") (exit 3)) (else (let ((db #f) - (keys #f) - (target (or (args:get-arg "-reqtarg") - (args:get-arg "-target")))) - (if (not (setup-for-run)) + (keys #f)) + (if (not (launch:setup-for-run)) (begin (debug:print 0 "Failed to setup, exiting") (exit 1))) ;; (if (args:get-arg "-server") ;; (cdb:remote-run server:start db (args:get-arg "-server"))) @@ -1484,11 +1711,11 @@ ;; This could probably be refactored into one complex query ... ;; NOT PORTED - DO NOT USE YET ;; (define (runs:rollup-run keys runname user keyvals) - (debug:print 4 "runs:rollup-run, keys: " keys " :runname " runname " user: " user) + (debug:print 4 "runs:rollup-run, keys: " keys " -runname " runname " user: " user) (let* ((db #f) ;; register run operates on the main db (new-run-id (rmt:register-run keyvals runname "new" "n/a" user)) (prev-tests (rmt:get-matching-previous-test-run-records new-run-id "%" "%")) (curr-tests (mt:get-tests-for-run new-run-id "%/%" '() '())) Index: server.scm ================================================================== --- server.scm +++ server.scm @@ -21,10 +21,12 @@ (declare (uses db)) (declare (uses tasks)) ;; tasks are where stuff is maintained about what is running. (declare (uses synchash)) (declare (uses http-transport)) (declare (uses rpc-transport)) +(declare (uses nmsg-transport)) +(declare (uses launch)) (declare (uses daemon)) (include "common_records.scm") (include "db_records.scm") @@ -46,16 +48,17 @@ ;; all routes though here end in exit ... ;; ;; start_server ;; (define (server:launch run-id) - (let ((transport (server:get-transport))) - (case transport - ((http) (http-transport:launch run-id)) - ((rpc) (rpc-transport:launch run-id)) - (else (debug:print 0 "ERROR: No known transport set, transport=" transport ", using rpc") - (rpc-transport:launch run-id))))) + (case *transport-type* + ((http)(http-transport:launch run-id)) + ((nmsg)(nmsg-transport:launch run-id)) + ((rpc) (rpc-transport:launch run-id)) + (else (debug:print 0 "ERROR: unknown server type " *transport-type*)))) +;; (else (debug:print 0 "ERROR: No known transport set, transport=" transport ", using rpc") +;; (rpc-transport:launch run-id))))) ;;====================================================================== ;; S E R V E R U T I L I T I E S ;;====================================================================== @@ -100,20 +103,37 @@ ;; Given a run id start a server process ### NOTE ### > file 2>&1 ;; if the run-id is zero and the target-host is set ;; try running on that host ;; (define (server:run run-id) - (let* ((target-host (configf:lookup *configdat* "server" "homehost" )) + (let* ((curr-host (get-host-name)) + (curr-ip (server:get-best-guess-address curr-host)) + (target-host (configf:lookup *configdat* "server" "homehost" )) + (testsuite (common:get-testsuite-name)) + (logfile (conc *toppath* "/logs/" run-id ".log")) (cmdln (conc (common:get-megatest-exe) - " -server - -run-id " run-id " >> " *toppath* "/db/" run-id ".log 2>&1 &"))) + " -server " (or target-host "-") " -run-id " run-id (if (equal? (configf:lookup *configdat* "server" "daemonize") "yes") + (conc " -daemonize -log " logfile) + "") + " -debug 4 testsuite:" testsuite))) ;; (conc " >> " logfile " 2>&1 &"))))) (debug:print 0 "INFO: Starting server (" cmdln ") as none running ...") (push-directory *toppath*) - (if target-host + (if (not (directory-exists? "logs"))(create-directory "logs")) + ;; host.domain.tld match host? + (if (and target-host + ;; look at target host, is it host.domain.tld or ip address and does it + ;; match current ip or hostname + (not (string-match (conc "("curr-host "|" curr-host"\\..*)") target-host)) + (not (equal? curr-ip target-host))) (begin - (set-environment-variable "TARGETHOST" target-host) - (system (conc "nbfake " cmdln))) - (system cmdln)) + (debug:print-info 0 "Starting server on " target-host ", logfile is " logfile) + (setenv "TARGETHOST" target-host))) + (setenv "TARGETHOST_LOGF" logfile) + (system (conc "nbfake " cmdln)) + (unsetenv "TARGETHOST_LOGF") + (if (get-environment-variable "TARGETHOST")(unsetenv "TARGETHOST")) + ;; (system cmdln) (pop-directory))) (define (server:get-client-signature) (if *my-client-signature* *my-client-signature* (let ((sig (server:mk-signature))) @@ -123,11 +143,11 @@ ;; kind start up of servers, wait 40 seconds before allowing another server for a given ;; run-id to be launched (define (server:kind-run run-id) (let ((last-run-time (hash-table-ref/default *server-kind-run* run-id #f))) (if (or (not last-run-time) - (> (- (current-seconds) last-run-time) 40)) + (> (- (current-seconds) last-run-time) 30)) (begin (server:run run-id) (hash-table-set! *server-kind-run* run-id (current-seconds)))))) ;; The generic run a server command. Dispatches the call to server 0 if run-id != 0 @@ -136,28 +156,71 @@ (if (eq? run-id 0) (server:run run-id) (rmt:start-server run-id))) (define (server:check-if-running run-id) - (let loop ((server (open-run-close tasks:get-server tasks:open-db run-id)) - (trycount 0)) + (let ((tdbdat (tasks:open-db))) + (let loop ((server (tasks:get-server (db:delay-if-busy tdbdat) run-id)) + (trycount 0)) (if server ;; note: client:start will set *runremote*. this needs to be changed ;; also, client:start will login to the server, also need to change that. ;; ;; client:start returns #t if login was successful. ;; - (let ((res (server:ping-server run-id (vector-ref server 1)(vector-ref server 0)))) + (let ((res (case *transport-type* + ((http)(server:ping-server run-id + (tasks:hostinfo-get-interface server) + (tasks:hostinfo-get-port server))) + ((nmsg)(nmsg-transport:ping (tasks:hostinfo-get-interface server) + (tasks:hostinfo-get-port server) + timeout: 2))))) ;; if the server didn't respond we must remove the record (if res #t (begin - (open-run-close tasks:server-force-clean-running-records-for-run-id tasks:open-db run-id + (debug:print-info 0 "server at " server " not responding, removing record") + (tasks:server-force-clean-running-records-for-run-id (db:delay-if-busy tdbdat) run-id " server:check-if-running") res))) - #f))) + #f)))) + +;; called in megatest.scm, host-port is string hostname:port +;; +(define (server:ping run-id host:port) + (let ((tdbdat (tasks:open-db))) + (let* ((host-port (let ((slst (string-split host:port ":"))) + (if (eq? (length slst) 2) + (list (car slst)(string->number (cadr slst))) + #f))) + (toppath (launch:setup-for-run)) + (server-db-dat (if (not host-port)(tasks:get-server (db:delay-if-busy tdbdat) run-id) #f))) + (if (not run-id) + (begin + (debug:print 0 "ERROR: must specify run-id when doing ping, -run-id n") + (print "ERROR: No run-id") + (exit 1)) + (if (and (not host-port) + (not server-db-dat)) + (begin + (print "ERROR: bad host:port") + (exit 1)) + (let* ((iface (if host-port (car host-port) (tasks:hostinfo-get-interface server-db-dat))) + (port (if host-port (cadr host-port)(tasks:hostinfo-get-port server-db-dat))) + (server-dat (http-transport:client-connect iface port)) + (login-res (rmt:login-no-auto-client-setup server-dat run-id))) + (if (and (list? login-res) + (car login-res)) + (begin + (print "LOGIN_OK") + (exit 0)) + (begin + (print "LOGIN_FAILED") + (exit 1))))))))) +;; run ping in separate process, safest way in some cases +;; (define (server:ping-server run-id iface port) (with-input-from-pipe (conc (common:get-megatest-exe) " -run-id " run-id " -ping " (conc iface ":" port)) (lambda () (let loop ((inl (read-line)) @@ -177,5 +240,16 @@ ;; (debug:print-info 2 "login successful") #t) (begin ;; (debug:print-info 2 "login failed") #f)))) + +(define (server:get-timeout) + (let ((tmo (configf:lookup *configdat* "server" "timeout"))) + (if (and (string? tmo) + (string->number tmo)) + (* 60 60 (string->number tmo)) + ;; (* 3 24 60 60) ;; default to three days + (* 60 1) ;; default to one minute + ;; (* 60 60 25) ;; default to 25 hours + ))) + Index: synchash.scm ================================================================== --- synchash.scm +++ synchash.scm @@ -12,11 +12,12 @@ ;;====================================================================== ;; A hash of hashes that can be kept in sync by sending minial deltas ;;====================================================================== (use format) -(use srfi-1 srfi-69) +(use srfi-1 srfi-69 sqlite3) +(import (prefix sqlite3 sqlite3:)) (declare (unit synchash)) (declare (uses db)) (declare (uses server)) (include "db_records.scm") @@ -62,11 +63,12 @@ ;; (cdb:remote-run db:get-runs #f runnamepatt numruns *start-run-offset* keypatts) ;; ;; keynum => the field to use as the unique key (usually 0 but can be other field) ;; (define (synchash:client-get proc synckey keynum synchash . params) - (let* ((data (apply cdb:remote-run synchash:server-get #f proc synckey keynum params)) + (let* ((data ;; (apply cdb:remote-run synchash:server-get #f proc synckey keynum params)) + (apply synchash:server-get #f proc synckey keynum params)) (newdat (car data)) (removs (cadr data)) (myhash (hash-table-ref/default synchash synckey #f))) (if (not myhash) (begin @@ -87,13 +89,14 @@ ;; data)) ;; return the changed and deleted list (list newdat removs))) ;; synchash)) (define *synchashes* (make-hash-table)) -(define (synchash:server-get db proc synckey keynum . params) +(define (synchash:server-get indb proc synckey keynum . params) ;; (debug:print-info 2 "synckey: " synckey ", keynum: " keynum ", params: " params) - (let* ((synchash (hash-table-ref/default *synchashes* synckey #f)) + (let* ((db (if indb indb (db:open-megatest-db))) + (synchash (hash-table-ref/default *synchashes* synckey #f)) (newdat (apply (case proc ((db:get-runs) db:get-runs) ((db:get-tests-for-runs-mindata) db:get-tests-for-runs-mindata) ((db:get-test-info-by-ids) db:get-test-info-by-ids) (else @@ -114,11 +117,12 @@ (map make-indexed data)))) ;; add each element keyed by the keynum'th val (else ;; (debug:print-info 2 "Non-get runs call") (map make-indexed newdat)))) ;; (debug:print-info 2 "postdat: " postdat) + ;; (if (not indb)(sqlite3:finalize! db)) (if (not synchash) (begin (set! synchash (make-hash-table)) (hash-table-set! *synchashes* synckey synchash))) (synchash:get-delta postdat synchash))) Index: tasks.scm ================================================================== --- tasks.scm +++ tasks.scm @@ -20,24 +20,94 @@ ;;====================================================================== ;; Tasks db ;;====================================================================== -(define (tasks:open-db) - (let* ((dbpath (conc *toppath* "/db/monitor.db")) - (exists (file-exists? dbpath)) - (write-access (file-write-access? dbpath)) - (mdb (sqlite3:open-database dbpath)) ;; (never-give-up-open-db dbpath)) - (handler (make-busy-timeout 36000))) - (if (and exists - (not write-access)) - (set! *db-write-access* write-access)) ;; only unset so other db's also can use this control - (sqlite3:set-busy-handler! mdb handler) - (sqlite3:execute mdb (conc "PRAGMA synchronous = 0;")) - (if (not exists) - (begin - (sqlite3:execute mdb "CREATE TABLE IF NOT EXISTS tasks_queue (id INTEGER PRIMARY KEY, +;; wait up to aprox n seconds for a journal to go away +;; +(define (tasks:wait-on-journal path n #!key (remove #f)(waiting-msg #f)) + (if (not (string? path)) + (debug:print 0 "ERROR: Called tasks:wait-on-journal with path=" path " (not a string)") + (let ((fullpath (conc path "-journal"))) + (handle-exceptions + exn + (begin + (print-call-chain (current-error-port)) + (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn)) + (debug:print 0 " exn=" (condition->list exn)) + (debug:print 0 "tasks:wait-on-journal failed. Continuing on, you can ignore this call-chain") + #t) ;; if stuff goes wrong just allow it to move on + (let loop ((journal-exists (file-exists? fullpath)) + (count n)) ;; wait ten times ... + (if journal-exists + (begin + (if (and waiting-msg + (eq? (modulo n 30) 0)) + (debug:print 0 waiting-msg)) + (if (> count 0) + (begin + (thread-sleep! 1) + (loop (file-exists? fullpath) + (- count 1))) + (begin + (if remove (system (conc "rm -rf " fullpath))) + #f))) + #t)))))) + +(define (tasks:get-task-db-path) + (let* ((linktree (configf:lookup *configdat* "setup" "linktree")) + (dbpath (conc linktree "/.db"))) + dbpath)) + + + +;; If file exists AND +;; file readable +;; ==> open it +;; If file exists AND +;; file NOT readable +;; ==> open in-mem version +;; If file NOT exists +;; ==> open in-mem version +;; +(define (tasks:open-db #!key (numretries 4)) + (if *task-db* + *task-db* + (handle-exceptions + exn + (if (> numretries 0) + (begin + (print-call-chain (current-error-port)) + (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn)) + (debug:print 0 " exn=" (condition->list exn)) + (thread-sleep! 1) + (tasks:open-db numretries (- numretries 1))) + (begin + (print-call-chain (current-error-port)) + (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn)) + (debug:print 0 " exn=" (condition->list exn)))) + (let* ((dbpath (tasks:get-task-db-path)) + (dbfile (conc dbpath "/monitor.db")) + (avail (tasks:wait-on-journal dbpath 10)) ;; wait up to about 10 seconds for the journal to go away + (exists (file-exists? dbpath)) + (write-access (file-write-access? dbpath)) + (mdb (cond ;; what the hek is *toppath* doing here? + ((and (string? *toppath*)(file-write-access? *toppath*)) + (sqlite3:open-database dbfile)) + ((file-read-access? dbpath) (sqlite3:open-database dbfile)) + (else (sqlite3:open-database ":memory:")))) ;; (never-give-up-open-db dbpath)) + (handler (make-busy-timeout 36000))) + (if (and exists + (not write-access)) + (set! *db-write-access* write-access)) ;; only unset so other db's also can use this control + (sqlite3:set-busy-handler! mdb handler) + (db:set-sync mdb) ;; (sqlite3:execute mdb (conc "PRAGMA synchronous = 0;")) + ;; (if (or (and (not exists) + ;; (file-write-access? *toppath*)) + ;; (not (file-read-access? dbpath))) + ;; (begin + (sqlite3:execute mdb "CREATE TABLE IF NOT EXISTS tasks_queue (id INTEGER PRIMARY KEY, action TEXT DEFAULT '', owner TEXT, state TEXT DEFAULT 'new', target TEXT DEFAULT '', name TEXT DEFAULT '', @@ -44,18 +114,18 @@ testpatt TEXT DEFAULT '', keylock TEXT, params TEXT, creation_time TIMESTAMP, execution_time TIMESTAMP);") - (sqlite3:execute mdb "CREATE TABLE IF NOT EXISTS monitors (id INTEGER PRIMARY KEY, + (sqlite3:execute mdb "CREATE TABLE IF NOT EXISTS monitors (id INTEGER PRIMARY KEY, pid INTEGER, start_time TIMESTAMP, last_update TIMESTAMP, hostname TEXT, username TEXT, CONSTRAINT monitors_constraint UNIQUE (pid,hostname));") - (sqlite3:execute mdb "CREATE TABLE IF NOT EXISTS servers (id INTEGER PRIMARY KEY, + (sqlite3:execute mdb "CREATE TABLE IF NOT EXISTS servers (id INTEGER PRIMARY KEY, pid INTEGER, interface TEXT, hostname TEXT, port INTEGER, pubport INTEGER, @@ -64,23 +134,25 @@ state TEXT, mt_version TEXT, heartbeat TIMESTAMP, transport TEXT, run_id INTEGER);") -;; CONSTRAINT servers_constraint UNIQUE (pid,hostname,port));") - (sqlite3:execute mdb "CREATE TABLE IF NOT EXISTS clients (id INTEGER PRIMARY KEY, + ;; CONSTRAINT servers_constraint UNIQUE (pid,hostname,port));") + (sqlite3:execute mdb "CREATE TABLE IF NOT EXISTS clients (id INTEGER PRIMARY KEY, server_id INTEGER, pid INTEGER, hostname TEXT, cmdline TEXT, login_time TIMESTAMP, logout_time TIMESTAMP DEFAULT -1, CONSTRAINT clients_constraint UNIQUE (pid,hostname));") - - )) - mdb)) - + + ;)) + (sqlite3:execute mdb "DELETE FROM tasks_queue WHERE state='done' AND creation_time < ?;" (- (current-seconds)(* 24 60 60))) ;; remove older than 24 hrs + (set! *task-db* (cons mdb dbpath)) + *task-db*)))) + ;;====================================================================== ;; Server and client management ;;====================================================================== ;; make-vector-record tasks hostinfo id interface port pubport transport pid hostname @@ -95,11 +167,11 @@ (define (tasks:server-lock-slot mdb run-id) (tasks:server-clean-out-old-records-for-run-id mdb run-id " tasks:server-lock-slot") (if (< (tasks:num-in-available-state mdb run-id) 4) (begin (tasks:server-set-available mdb run-id) - (thread-sleep! 2) ;; Try removing this. It may not be needed. + ;; (thread-sleep! 2) ;; Try removing this. It may not be needed. (tasks:server-am-i-the-server? mdb run-id)) #f)) ;; register that this server may come online (first to register goes though with the process) (define (tasks:server-set-available mdb run-id) @@ -113,26 +185,36 @@ -1 ;; pubport (random 1000) ;; priority (used a tiebreaker on get-available) "available" ;; state (common:version-signature) ;; mt_version -1 ;; interface - (conc (server:get-transport)) ;; transport + ;; (conc (server:get-transport)) ;; transport + (conc *transport-type*) ;; transport run-id )) (define (tasks:num-in-available-state mdb run-id) (let ((res 0)) (sqlite3:for-each-row (lambda (num-in-queue) (set! res num-in-queue)) mdb - "SELECT count(id) FROM servers WHERE run_id=? AND state = 'available';" + "SELECT count(id) FROM servers WHERE run_id=? AND state = 'available' AND (strftime('%s','now') - start_time) < 30 ;" run-id) res)) + +(define (tasks:num-servers-non-zero-running mdb) + (let ((res 0)) + (sqlite3:for-each-row + (lambda (num-running) + (set! res num-running)) + mdb + "SELECT count(id) FROM servers WHERE run_id != 0 AND state = 'running';") + res)) (define (tasks:server-clean-out-old-records-for-run-id mdb run-id tag) - (sqlite3:execute mdb "UPDATE servers SET state=?,heartbeat=strftime('%s','now') WHERE state in ('available','shutting-down') AND (strftime('%s','now') - start_time) > 50 AND run_id=?;" + (sqlite3:execute mdb "UPDATE servers SET state=?,heartbeat=strftime('%s','now') WHERE state in ('available','dbprep','shutting-down') AND (strftime('%s','now') - start_time) > 50 AND run_id=?;" (conc "defunct" tag) run-id)) (define (tasks:server-force-clean-running-records-for-run-id mdb run-id tag) (sqlite3:execute mdb "UPDATE servers SET state=?,heartbeat=strftime('%s','now') WHERE state = 'running' AND run_id=?;" (conc "defunct" tag) run-id)) @@ -147,11 +229,11 @@ (define (tasks:server-delete-record mdb server-id tag) (sqlite3:execute mdb "UPDATE servers SET state=?,heartbeat=strftime('%s','now') WHERE id=?;" (conc "defunct" tag) server-id) ;; use this opportuntity to clean out records over one month old or over 10 minutes old with port = -1 (i.e. a never used placeholder) - (sqlite3:execute mdb "DELETE FROM servers WHERE state not in ('running','shutting-down') AND (strftime('%s','now') - start_time) > 2628000;") + (sqlite3:execute mdb "DELETE FROM servers WHERE state not in ('running','shutting-down','dbprep') AND (strftime('%s','now') - start_time) > 2628000;") (sqlite3:execute mdb "DELETE FROM servers WHERE state like 'defunct%' AND port=-1 AND (strftime('%s','now') - start_time) > 600;") ) (define (tasks:server-set-state! mdb server-id state) (sqlite3:execute mdb "UPDATE servers SET state=?,heartbeat=strftime('%s','now') WHERE id=?;" state server-id)) @@ -223,70 +305,128 @@ (res '())) (sqlite3:for-each-row (lambda (a . b) (set! res (cons (apply vector a b) res))) mdb - (conc "SELECT " selstr " FROM servers WHERE run_id=? AND state in ('available','running') ORDER BY start_time DESC;") + (conc "SELECT " selstr " FROM servers WHERE run_id=? AND state in ('available','running','dbprep') ORDER BY start_time DESC;") run-id) (vector header res))) -(define (tasks:get-server mdb run-id) +(define (tasks:get-server mdb run-id #!key (retries 10)) (let ((res #f) (best #f)) - (sqlite3:for-each-row - (lambda (id interface port pubport transport pid hostname) - (set! res (vector id interface port pubport transport pid hostname))) - mdb - ;; removed: - ;; strftime('%s','now')-heartbeat < 10 AND mt_version = ? - "SELECT id,interface,port,pubport,transport,pid,hostname FROM servers + (handle-exceptions + exn + (begin + (print-call-chain (current-error-port)) + (debug:print 0 "WARNING: tasks:get-server db access error.") + (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn)) + (debug:print 0 " for run " run-id) + (print-call-chain (current-error-port)) + (if (> retries 0) + (begin + (debug:print 0 " trying call to tasks:get-server again in 10 seconds") + (thread-sleep! 10) + (tasks:get-server mdb run-id retries: (- retries 0))) + (debug:print 0 "10 tries of tasks:get-server all crashed and burned. Giving up and returning \"no server found\""))) + (sqlite3:for-each-row + (lambda (id interface port pubport transport pid hostname) + (set! res (vector id interface port pubport transport pid hostname))) + mdb + ;; removed: + ;; strftime('%s','now')-heartbeat < 10 AND mt_version = ? + "SELECT id,interface,port,pubport,transport,pid,hostname FROM servers WHERE run_id=? AND state='running' ORDER BY start_time DESC LIMIT 1;" run-id) ;; (common:version-signature) run-id) + res))) + +(define (tasks:server-running-or-starting? mdb run-id) + (let ((res #f)) + (sqlite3:for-each-row + (lambda (id) + (set! res id)) + mdb ;; NEEDS dbprep ADDED + "SELECT id FROM servers WHERE run_id=? AND (state = 'running' OR (state = 'dbprep' AND (strftime('%s','now') - start_time) < 60));" run-id) + res)) + +(define (tasks:server-running? mdb run-id) + (let ((res #f)) + (sqlite3:for-each-row + (lambda (id) + (set! res id)) + mdb ;; NEEDS dbprep ADDED + "SELECT id FROM servers WHERE run_id=? AND state = 'running';" run-id) res)) +(define (tasks:need-server run-id) + (let ((forced (configf:lookup *configdat* "server" "required")) + (maxqry (cdr (rmt:get-max-query-average run-id))) + (threshold (string->number (or (configf:lookup *configdat* "server" "server-query-threshold") "10")))) + (cond + (forced + (if (common:low-noise-print 60 run-id "server required is set") + (debug:print-info 0 "Server required is set, starting server for run-id " run-id ".")) + #t) + ((> maxqry threshold) + (if (common:low-noise-print 60 run-id "Max query time execeeded") + (debug:print-info 0 "Max avg query time of " maxqry "ms exceeds limit of " threshold "ms, starting server.")) + #t) + (else + #f)))) + +;; try to start a server and wait for it to be available +;; +(define (tasks:start-and-wait-for-server tdbdat run-id delay-max-tries) + ;; ensure a server is running for this run + (let loop ((server-dat (tasks:get-server (db:delay-if-busy tdbdat) run-id)) + (delay-time 0)) + (if (and (not server-dat) + (< delay-time delay-max-tries)) + (begin + (if (common:low-noise-print 60 "tasks:start-and-wait-for-server" run-id) + (debug:print 0 "Try starting server for run-id " run-id)) + (server:kind-run run-id) + (thread-sleep! (min delay-time 5)) + (loop (tasks:get-server (db:delay-if-busy tdbdat) run-id)(+ delay-time 1)))))) + (define (tasks:get-all-servers mdb) (let ((res '())) (sqlite3:for-each-row (lambda (id pid hostname interface port pubport start-time priority state mt-version last-update transport run-id) - ;; 0 1 2 3 4 5 6 7 8 9 10 11 12 + ;; 0 1 2 3 4 5 6 7 8 9 10 11 12 (set! res (cons (vector id pid hostname interface port pubport start-time priority state mt-version last-update transport run-id) res))) mdb "SELECT id,pid,hostname,interface,port,pubport,start_time,priority,state,mt_version,strftime('%s','now')-heartbeat AS last_update,transport,run_id FROM servers WHERE state NOT LIKE 'defunct%' ORDER BY start_time DESC;") res)) -(define (tasks:kill-server status hostname port pid) - (debug:print-info 1 "Removing defunct server record for " hostname ":" port) - (if port - (open-run-close tasks:server-deregister tasks:open-db hostname port: port) - (open-run-close tasks:server-deregister tasks:open-db hostname pid: pid)) - (if status ;; #t means alive - (begin - (if (equal? hostname (get-host-name)) - (handle-exceptions - exn - (debug:print-info 0 "server may or may not be dead, check for megatest -server running as pid " pid "\n" - " EXCEPTION: " ((condition-property-accessor 'exn 'message) exn)) - (debug:print 1 "Sending signal/term to " pid " on " hostname) - (process-signal pid signal/term) - (thread-sleep! 5) ;; give it five seconds to die peacefully then do a brutal kill - ;;(process-signal pid signal/kill) - ) ;; local machine, send sig term - (begin - ;;(debug:print-info 1 "Stopping remote servers not yet supported.")))) - (debug:print-info 1 "Telling alive server on " hostname ":" port " to commit servercide") - (let ((serverdat (list hostname port))) - (hash-table-set! *runremote* run-id (http-transport:client-connect hostname port)) - (cdb:kill-server serverdat pid))))) ;; remote machine, try telling server to commit suicide - (begin - (if status - (if (equal? hostname (get-host-name)) - (begin - (debug:print-info 1 "Sending signal/term to " pid " on " hostname) - (process-signal pid signal/term) ;; local machine, send sig term - (thread-sleep! 5) ;; give it five seconds to die peacefully then do a brutal kill - (process-signal pid signal/kill)) - (debug:print 0 "WARNING: Can't kill frozen server on remote host " hostname)))))) +;; no elegance here ... +;; +(define (tasks:kill-server hostname pid) + (debug:print-info 0 "Attempting to kill server process " pid " on host " hostname) + (setenv "TARGETHOST" hostname) + (setenv "TARGETHOST_LOGF" "server-kills.log") + (system (conc "nbfake kill " pid)) + (unsetenv "TARGETHOST_LOGF") + (unsetenv "TARGETHOST")) + +;; look up a server by run-id and send it a kill, also delete the record for that server +;; +(define (tasks:kill-server-run-id run-id #!key (tag "default")) + (let* ((tdbdat (tasks:open-db)) + (sdat (tasks:get-server (db:delay-if-busy tdbdat) run-id))) + (if sdat + (let ((hostname (vector-ref sdat 6)) + (pid (vector-ref sdat 5)) + (server-id (vector-ref sdat 0))) + (tasks:server-set-state! (db:delay-if-busy tdbdat) server-id "killed") + (debug:print-info 0 "Killing server " server-id " for run-id " run-id " on host " hostname " with pid " pid) + (tasks:kill-server hostname pid) + (tasks:server-delete-record (db:delay-if-busy tdbdat) server-id tag) ) + (debug:print-info 0 "No server found for run-id " run-id ", nothing to kill")) + ;; (sqlite3:finalize! tdb) + )) + ;;====================================================================== ;; Tasks and Task monitors ;;====================================================================== @@ -321,17 +461,16 @@ res)) ;; register a task (define (tasks:add mdb action owner target runname testpatt params) (sqlite3:execute mdb "INSERT INTO tasks_queue (action,owner,state,target,name,testpatt,params,creation_time,execution_time) - VALUES (?,?,'new',?,?,?,?,?,strftime('%s','now'),0);" + VALUES (?,?,'new',?,?,?,?,strftime('%s','now'),0);" action owner target runname - test - item + testpatt (if params params ""))) (define (keys:key-vals-hash->target keys key-params) (let ((tmp (hash-table-ref/default key-params (vector-ref (car keys) 0) ""))) (if (> (length keys) 1) @@ -411,11 +550,11 @@ ;; (define (tasks:start-monitor db mdb) (if (> (tasks:get-num-alive-monitors mdb) 2) ;; have two running, no need for more (debug:print-info 1 "Not starting monitor, already have more than two running") (let* ((megatestdb (conc *toppath* "/megatest.db")) - (monitordbf (conc *toppath* "/db/monitor.db")) + (monitordbf (conc (configf:lookup *configdat* "setup" "linktree") "/.db/monitor.db")) (last-db-update 0)) ;; (file-modification-time megatestdb))) (task:register-monitor mdb) (let loop ((count 0) (next-touch 0)) ;; next-touch is the time where we need to update last_update ;; if the db has been modified we'd best look at the task queue @@ -509,10 +648,91 @@ (define (tasks:set-state mdb task-id state) (sqlite3:execute mdb "UPDATE tasks_queue SET state=? WHERE id=?;" state task-id)) + +;;====================================================================== +;; Access using task key (stored in params; (hash-table->alist flags) hostname pid +;;====================================================================== + +(define (tasks:param-key->id mdb task-params) + (handle-exceptions + exn + #f + (sqlite3:first-result mdb "SELECT id FROM tasks_queue WHERE params LIKE ?;" task-params))) + +(define (tasks:set-state-given-param-key mdb param-key new-state) + (sqlite3:execute mdb "UPDATE tasks_queue SET state=? WHERE params LIKE ?;" new-state param-key)) + +(define (tasks:get-records-given-param-key mdb param-key state-patt action-patt test-patt) + (handle-exceptions + exn + '() + (sqlite3:first-row mdb "SELECT id,action,owner,state,target,name,testpatt,keylock,params WHERE + params LIKE ? AND state LIKE ? AND action LIKE ? AND testpatt LIKE ?;" + param-key state-patt action-patt test-patt))) + + +;;====================================================================== +;; Rogue items, no place to put these yet +;;====================================================================== + +(define (tasks:find-task-queue-records mdb target run-name test-patt state-patt action-patt) + ;; (handle-exceptions + ;; exn + ;; '() + ;; (sqlite3:first-row + (let ((res '())) + (sqlite3:for-each-row + (lambda (a . b) + (set! res (cons (cons a b) res))) + mdb "SELECT id,action,owner,state,target,name,testpatt,keylock,params FROM tasks_queue + WHERE + target = ? AND name = ? AND state LIKE ? AND action LIKE ? AND testpatt LIKE ?;" + target run-name state-patt action-patt test-patt) + res)) ;; ) + + +(define (tasks:kill-runner mdb target run-name) + (let ((records (tasks:find-task-queue-records mdb target run-name "%" "running" "run-tests")) + (hostpid-rx (regexp "\\s+(\\w+)\\s+(\\d+)$"))) ;; host pid is at end of param string + (if (null? records) + (debug:print 0 "No run launching processes found for " target " / " run-name) + (debug:print 0 "Found " (length records) " run(s) to kill.")) + (for-each + (lambda (record) + (let* ((param-key (list-ref record 8)) + (match-dat (string-search hostpid-rx param-key))) + (if match-dat + (let ((hostname (cadr match-dat)) + (pid (string->number (caddr match-dat)))) + (debug:print 0 "Sending SIGINT to process " pid " on host " hostname) + (if (equal? (get-host-name) hostname) + (if (process:alive? pid) + (begin + (handle-exceptions + exn + (begin + (debug:print 0 "Kill of process " pid " on host " hostname " failed.") + (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn)) + #t) + (process-signal pid signal/int) + (thread-sleep! 5) + (if (process:alive? pid) + (process-signal pid signal/kill))))) + ;; (call-with-environment-variables + (let ((old-targethost (getenv "TARGETHOST"))) + (setenv "TARGETHOST" hostname) + (setenv "TARGETHOST_LOGF" "server-kills.log") + (system (conc "nbfake kill " pid)) + (if old-targethost (setenv "TARGETHOST" old-targethost)) + (unsetenv "TARGETHOST") + (unsetenv "TARGETHOST_LOGF")))) + (debug:print 0 "ERROR: no record or improper record for " target "/" run-name " in tasks_queue in monitor.db")))) + records))) + ;;====================================================================== ;; The routines to process tasks ;;====================================================================== Index: tdb.scm ================================================================== --- tdb.scm +++ tdb.scm @@ -22,10 +22,11 @@ (declare (uses common)) (declare (uses keys)) (declare (uses ods)) (declare (uses client)) (declare (uses mt)) +(declare (uses db)) (include "common_records.scm") (include "db_records.scm") (include "key_records.scm") (include "run_records.scm") @@ -53,27 +54,31 @@ (string->number (args:get-arg "-override-timeout")) 136000)))) (handle-exceptions exn (begin + (print-call-chain (current-error-port)) (debug:print 2 "ERROR: problem accessing test db " work-area ", you probably should clean and re-run this test" ((condition-property-accessor 'exn 'message) exn)) - (set! db (sqlite3:open-database ":memory:"))) ;; open an in-memory db to allow readonly access + (set! db (sqlite3:open-database ":memory:")) ;; open an in-memory db to allow readonly access + (set! dbexists #f)) ;; must force re-creation of tables, more tom-foolery (set! db (sqlite3:open-database dbpath))) (if *db-write-access* (sqlite3:set-busy-handler! db handler)) (if (not dbexists) (begin - (sqlite3:execute db "PRAGMA synchronous = FULL;") + (db:set-sync db) ;; (sqlite3:execute db "PRAGMA synchronous = FULL;") (debug:print-info 11 "Initialized test database " dbpath) (tdb:testdb-initialize db))) ;; (sqlite3:execute db "PRAGMA synchronous = 0;") (debug:print-info 11 "open-test-db END (sucessful)" work-area) ;; now let's test that everything is correct (handle-exceptions exn (begin - (debug:print 0 "ERROR: problem accessing test db " work-area ", you probably should clean and re-run this test" + (print-call-chain (current-error-port)) + (debug:print 0 "ERROR: problem accessing test db " work-area ", you probably should clean and re-run this test or remove the file " + dbpath ".\n " ((condition-property-accessor 'exn 'message) exn)) #f) ;; Is there a cheaper single line operation that will check for existance of a table ;; and raise an exception ? (sqlite3:execute db "SELECT id FROM test_data LIMIT 1;")) @@ -108,21 +113,24 @@ (tdb (open-test-db test-path))) (apply proc tdb params))) (define (tdb:testdb-initialize db) (debug:print 11 "db:testdb-initialize START") - (for-each - (lambda (sqlcmd) - (sqlite3:execute db sqlcmd)) - (list "CREATE TABLE IF NOT EXISTS test_rundat ( + (sqlite3:with-transaction + db + (lambda () + (for-each + (lambda (sqlcmd) + (sqlite3:execute db sqlcmd)) + (list "CREATE TABLE IF NOT EXISTS test_rundat ( id INTEGER PRIMARY KEY, update_time TIMESTAMP, cpuload INTEGER DEFAULT -1, diskfree INTEGER DEFAULT -1, diskusage INTGER DEFAULT -1, run_duration INTEGER DEFAULT 0);" - "CREATE TABLE IF NOT EXISTS test_data ( + "CREATE TABLE IF NOT EXISTS test_data ( id INTEGER PRIMARY KEY, test_id INTEGER, category TEXT DEFAULT '', variable TEXT, value REAL, @@ -131,29 +139,29 @@ units TEXT, comment TEXT DEFAULT '', status TEXT DEFAULT 'n/a', type TEXT DEFAULT '', CONSTRAINT test_data_constraint UNIQUE (test_id,category,variable));" - "CREATE TABLE IF NOT EXISTS test_steps ( + "CREATE TABLE IF NOT EXISTS test_steps ( id INTEGER PRIMARY KEY, test_id INTEGER, stepname TEXT, state TEXT DEFAULT 'NOT_STARTED', status TEXT DEFAULT 'n/a', event_time TIMESTAMP, comment TEXT DEFAULT '', logfile TEXT DEFAULT '', CONSTRAINT test_steps_constraint UNIQUE (test_id,stepname,state));" - ;; test_meta can be used for handing commands to the test - ;; e.g. KILLREQ - ;; the ackstate is set to 1 once the command has been completed - "CREATE TABLE IF NOT EXISTS test_meta ( + ;; test_meta can be used for handing commands to the test + ;; e.g. KILLREQ + ;; the ackstate is set to 1 once the command has been completed + "CREATE TABLE IF NOT EXISTS test_meta ( id INTEGER PRIMARY KEY, var TEXT, val TEXT, ackstate INTEGER DEFAULT 0, - CONSTRAINT metadat_constraint UNIQUE (var));")) + CONSTRAINT metadat_constraint UNIQUE (var));")))) (debug:print 11 "db:testdb-initialize END")) (define (tdb:read-test-data tdb test-id categorypatt) (let ((res '())) (sqlite3:for-each-row ADDED testnanomsg/basic-req-rep.scm Index: testnanomsg/basic-req-rep.scm ================================================================== --- /dev/null +++ testnanomsg/basic-req-rep.scm @@ -0,0 +1,3 @@ +(use nanomsg srfi-18 sqlite3 numbers) + +(define resp (nn-socket 'rep)) ADDED testnanomsg/mockupclient.scm Index: testnanomsg/mockupclient.scm ================================================================== --- /dev/null +++ testnanomsg/mockupclient.scm @@ -0,0 +1,42 @@ +(use zmq posix numbers) + +(define cname "Bob") +(define runtime 10) +(let ((args (argv))) + (if (< (length args) 3) + (begin + (print "Usage: mockupclient clientname runtime") + (exit)) + (begin + (set! cname (cadr args)) + (set! runtime (string->number (caddr args)))))) + +;; (define start-delay (/ (random 100) 9)) +;; (define runtime (+ 1 (/ (random 200) 2))) + +(print "Starting client " cname " with runtime " runtime) + +(include "mockupclientlib.scm") + +(set! endtime (+ (current-seconds) runtime)) + +;; first ping the server to ensure we have a connection +(if (server-ping cname 5) + (print "SUCCESS: Client " cname " connected to server") + (begin + (print "ERROR: Client " cname " failed ping of server, exiting") + (exit))) + +(let loop () + (let ((x (random 15)) + (varname (list-ref (list "hello" "goodbye" "saluton" "kiaorana")(random 4)))) + (case x + ;; ((1)(dbaccess cname 'sync "nodat" #f)) + ((2 3 4 5)(dbaccess cname 'set varname (random 999))) + ((6 7 8 9 10)(print cname ": Get \"" varname "\" " (dbaccess cname 'get varname #f))) + (else + (thread-sleep! 0.011))) + (if (< (current-seconds) endtime) + (loop)))) + +(print "Client " cname " all done!!") ADDED testnanomsg/mockupclientlib.scm Index: testnanomsg/mockupclientlib.scm ================================================================== --- /dev/null +++ testnanomsg/mockupclientlib.scm @@ -0,0 +1,58 @@ +(define reqs (nn-socket 'req)) + +(connect-socket reqs "tcp://localhost:6563") + +(thread-sleep! 0.2) + +(define (server-ping cname timeout) + (let ((msg (conc cname ":ping:" timeout)) + (maxtime (+ (current-seconds) timeout))) + (print "pinging server from " cname " with timeout " timeout) + (let loop ((res #f)) + (if (< maxtime (current-seconds)) + #f ;; failed to ping + (if (equal? res "Got ping") + #t + (begin + (print "Ping received from server " res) + (send-message push msg) + (thread-sleep! 0.1) + (loop (receive-message sub non-blocking: #t)))))))) + +(define (dbaccess cname cmd var val #!key (numtries 20)) + (let* ((msg (conc cname ":" cmd ":" (if val (conc var " " val) var))) + (res #f) + (mtx1 (make-mutex)) + (do-access (lambda () + (let ((tmpres #f)) + (print "Sending msg: " msg) + (send-message push msg) + (print "Message " msg " sent") + (print "Client " cname " waiting for response to " msg) + (print "Client " cname " received address " (receive-message* sub)) + (set! tmpres (receive-message* sub)) + (mutex-lock! mtx1) + (set! res tmpres) + (mutex-unlock! mtx1)))) + (th1 (make-thread do-access "do access")) + (th2 (make-thread (lambda () + (let ((result #f)) + (mutex-lock! mtx1) + (set! result res) + (mutex-unlock! mtx1) + (thread-sleep! 5) + (if (not result) + (if (> numtries 0) + (begin + (print "WARNING: access timed out for " cname ", trying again. Trys remaining=" numtries) + (dbaccess cname cmd var val numtries: (- numtries 1))) + (begin + (print "ERROR: dbaccess timed out. Exiting") + (exit))))) + "timeout thread")))) + (thread-start! th1) + (thread-start! th2) + (thread-join! th1) + (if res (print "SUCCESS: received " res " with " numtries " remaining possible attempts")) + res)) + ADDED testnanomsg/mockupserver.scm Index: testnanomsg/mockupserver.scm ================================================================== --- /dev/null +++ testnanomsg/mockupserver.scm @@ -0,0 +1,146 @@ +;; pub/sub with envelope address +;; Note that if you don't insert a sleep, the server will crash with SIGPIPE as soon +;; as a client disconnects. Also a remaining client may receive tons of +;; messages afterward. + +(use nanomsg srfi-18 sqlite3 numbers) + +(define resp (nn-socket 'rep)) +(define cname "server") +(define total-db-accesses 0) +(define start-time (current-seconds)) + +(nn-bind resp "tcp://*:6563") + +(thread-sleep! 0.2) + +(define (open-db) + (let* ((dbpath "mockup.db") + (dbexists (file-exists? dbpath)) + (db (open-database dbpath)) ;; (never-give-up-open-db dbpath)) + (handler (make-busy-timeout 10))) + (set-busy-handler! db handler) + (if (not dbexists) + (for-each + (lambda (stmt) + (execute db stmt)) + (list + "PRAGMA SYNCHRONOUS=0;" + "CREATE TABLE clients (id INTEGER PRIMARY KEY,name TEXT,num_accesses INTEGER DEFAULT 0);" + "CREATE TABLE vars (var TEXT,val TEXT,CONSTRAINT vars_constraint UNIQUE (var));"))) + db)) + +(define cid-cache (make-hash-table)) + +(define (get-client-id db cname) + (let ((cid (hash-table-ref/default cid-cache cname #f))) + (if cid + cid + (begin + (execute db "INSERT OR REPLACE INTO clients (name) VALUES(?);" cname) + (for-each-row + (lambda (id) + (set! cid id)) + db + "SELECT id FROM clients WHERE name=?;" cname) + (hash-table-set! cid-cache cname cid) + (set! total-db-accesses (+ total-db-accesses 2)) + cid)))) + +(define (count-client db cname) + (let ((cid (get-client-id db cname))) + (execute db "UPDATE clients SET num_accesses=num_accesses+1 WHERE id=?;" cid) + (set! total-db-accesses (+ total-db-accesses 1)) + )) + +(define db (open-db)) +;; (define queuelst '()) +;; (define mx1 (make-mutex)) + +(define max-queue-len 0) + +(define (process-queue queuelst) + (let ((queuelen (length queuelst))) + (if (> queuelen max-queue-len) + (set! max-queue-len queuelen)) + (for-each + (lambda (item) + (let ((cname (vector-ref item 1)) + (clcmd (vector-ref item 2)) + (cdata (vector-ref item 3))) + (send-message pub cname send-more: #t) + (send-message pub (case clcmd + ((sync) + (conc queuelen)) + ((set) + (set! total-db-accesses (+ total-db-accesses 1)) + (apply execute db "INSERT OR REPLACE INTO vars (var,val) VALUES (?,?);" (string-split cdata)) + "ok") + ((get) + (set! total-db-accesses (+ total-db-accesses 1)) + (let ((res "noval")) + (for-each-row + (lambda (val) + (set! res val)) + db + "SELECT val FROM vars WHERE var=?;" cdata) + res)) + (else (conc "unk cmd: " clcmd)))))) + queuelst))) + +;; SERVER THREAD +(define th1 (make-thread + (lambda () + (let ((last-run 0)) ;; current-seconds when run last + (let loop ((queuelst '())) + (let* ((indat (receive-message* pull)) + (parts (string-split indat ":")) + (cname (car parts)) ;; client name + (clcmd (string->symbol (cadr parts))) ;; client cmd + (cdata (caddr parts)) ;; client data + (svect (vector (current-seconds) cname clcmd cdata))) ;; record for the queue + ;; (print "Server received message: " indat) + (count-client db cname) + (case clcmd + ((ping) + (print "Got ping from " cname) + (send-message pub cname send-more: #t) + (send-message pub "Got ping") + (loop queuelst)) + ((sync) ;; just process the queue + (print "Got sync from " cname) + (process-queue (cons svect queuelst)) + (loop '())) + ((get) + (process-queue (cons svect queuelst)) + (loop '())) + (else + (loop (cons svect queuelst)))))))) + "server thread")) + +(include "mockupclientlib.scm") + +;; SYNC THREAD +;; send a sync to the pull port +(define th2 (make-thread + (lambda () + (let ((last-action-time (current-seconds))) + (let loop () + (thread-sleep! 5) + (let ((queuelen (string->number (dbaccess "server" 'sync "nada" #f))) + (last-action-delta #f)) + (if (> queuelen 1)(set! last-action-time (current-seconds))) + (set! last-action-delta (- (current-seconds) last-action-time)) + (print "Server: Got queuelen=" queuelen ", last-action-delta=" last-action-delta) + (if (< last-action-delta 60) + (loop) + (print "Server exiting, 25 seconds since last access")))))) + "sync thread")) + +(thread-start! th1) +(thread-start! th2) +(thread-join! th2) + +(let* ((run-time (- (current-seconds) start-time)) + (queries/second (/ total-db-accesses run-time))) + (print "Server exited! Total db accesses=" total-db-accesses " in " run-time " seconds for " queries/second " queries/second with max queue length of: " max-queue-len)) ADDED testnanomsg/pipeline.scm Index: testnanomsg/pipeline.scm ================================================================== --- /dev/null +++ testnanomsg/pipeline.scm @@ -0,0 +1,25 @@ +;; watch nanomsg's pipeline load-balancer in action. +(use nanomsg) + +(define push (nn-socket 'push)) +(define pull1 (nn-socket 'pull)) +(define pull2 (nn-socket 'pull)) + +(nn-bind push "inproc://test") +(nn-connect pull1 "inproc://test") +(nn-connect pull2 "inproc://test") + +(nn-send push "a") +(nn-send push "b") +(nn-send push "c") +(nn-send push "d") + +(define ((th sock)) + (print (current-thread) ": " (nn-recv sock)) + (print (current-thread) ": " (nn-recv sock)) + (print (current-thread) " is done")) + +(thread-start! (th pull1)) +(thread-start! (th pull2)) + +(thread-sleep! 1) ADDED testnanomsg/req-rep-client.scm Index: testnanomsg/req-rep-client.scm ================================================================== --- /dev/null +++ testnanomsg/req-rep-client.scm @@ -0,0 +1,30 @@ +;; watch nanomsg's pipeline load-balancer in action. +(use nanomsg) + +(define req (nn-socket 'req)) + +(nn-connect req "tcp://localhost:22022") + +;; (with-output-to-string (lambda ()(serialize obj))) +(define (client-send-receive soc msg) + (nn-send soc msg) + (nn-recv soc)) + +(define ((talk-to-server soc)) + (let loop ((cnt 20)) + (let ((name (list-ref '("Matt" "Tom" "Bob" "Jill" "James" "Jane")(random 6)))) + (print "Sending " name) + (print (client-send-receive req name)) + (if (> cnt 0)(loop (- cnt 1))))) + (print (client-send-receive req "quit")) + (nn-close req) + (exit)) + +;; (thread-start! (lambda () +;; (thread-sleep! 20) +;; (print "Give up on waiting for the server") +;; (nn-close req) +;; (exit))) + +(thread-join! (thread-start! (talk-to-server req))) + ADDED testnanomsg/req-rep-server.scm Index: testnanomsg/req-rep-server.scm ================================================================== --- /dev/null +++ testnanomsg/req-rep-server.scm @@ -0,0 +1,90 @@ +;; watch nanomsg's pipeline load-balancer in action. +(use nanomsg) + +;; (use trace) +;; (trace nn-bind nn-socket nn-assert nn-recv nn-send thread-terminate! nn-close ) + +(define port 22022) +(define host "127.0.0.1") + +(define rep (nn-socket 'rep)) + +(print "connecting, got: " (nn-bind rep (conc "tcp://" "*" ":" port))) + +(define (server soc) + (print "server starting") + (let loop ((msg-in (nn-recv soc))) + (print "server received: " msg-in) + (cond + ((equal? msg-in "quit") + (nn-send soc "Ok, quitting")) + ((and (>= (string-length msg-in) 4) + (equal? (substring msg-in 0 4) "ping")) + (nn-send soc (conc (current-process-id))) + (loop (nn-recv soc))) + ;;((and (>= (string-length msg-in) + (else + (let ((this-task (random 15))) + (thread-sleep! this-task) + (nn-send soc (conc "hello " msg-in " this task took " this-task " seconds to complete")) + (loop (nn-recv soc))))))) + +(define (ping-self host port #!key (return-socket #t)) + ;; send a random number along with pid and check that we get it back + (let* ((req (nn-socket 'req)) + (key "ping") + (success #f) + (keepwaiting #t) + (ping (make-thread + (lambda () + (print "ping: sending string \"" key "\", expecting " (current-process-id)) + (nn-send req key) + (let ((result (nn-recv req))) + (if (equal? (conc (current-process-id)) result) + (begin + (print "ping, success: received \"" result "\"") + (set! success #t)) + (begin + (print "ping, failed: received key \"" result "\"") + (set! keepwaiting #f) + (set! success #f))))) + "ping")) + (timeout (make-thread (lambda () + (let loop ((count 0)) + (thread-sleep! 1) + (print "still waiting after count seconds...") + (if (and keepwaiting (< count 10)) + (loop (+ count 1)))) + (if keepwaiting + (begin + (print "timeout waiting for ping") + (thread-terminate! ping)))) + "timeout"))) + (nn-connect req (conc "tcp://" host ":" port)) + (handle-exceptions + exn + (begin + (print-call-chain) + (print 0 " message: " ((condition-property-accessor 'exn 'message) exn)) + (print "exn=" (condition->list exn)) + (print "ping failed to connect to " host ":" port)) + (thread-start! timeout) + (thread-start! ping) + (thread-join! ping) + (if success (thread-terminate! timeout))) + (if return-socket + (if success req #f) + (begin + (nn-close req) + success)))) + +(let ((server-thread (make-thread (lambda ()(server rep)) "server"))) + (thread-start! server-thread) + ;; (thread-sleep! 1) + (if (ping-self host port) + (begin + (thread-join! server-thread) + (nn-close rep)) + (print "ping failed"))) + +(exit) ADDED testnanomsg/req-rep.scm Index: testnanomsg/req-rep.scm ================================================================== --- /dev/null +++ testnanomsg/req-rep.scm @@ -0,0 +1,30 @@ +;; watch nanomsg's pipeline load-balancer in action. +(use nanomsg) + +(define req (nn-socket 'req)) +(define rep (nn-socket 'rep)) + +(nn-bind rep "inproc://test") +(nn-connect req "inproc://test") + +(define (client-send-receive soc msg) + (nn-send soc msg) + (nn-recv soc)) + +(define ((server soc)) + (let loop ((msg-in (nn-recv soc))) + (if (not (equal? msg-in "quit")) + (begin + (nn-send soc (conc "hello " msg-in)) + (loop (nn-recv soc)))))) + +(thread-start! (server rep)) + +(print (client-send-receive req "Matt")) +(print (client-send-receive req "Tom")) + +;; (client-send-receive req "quit") + +(nn-close req) +(nn-close rep) +(exit) Index: tests.scm ================================================================== --- tests.scm +++ tests.scm @@ -31,12 +31,11 @@ (include "run_records.scm") (include "test_records.scm") ;; Call this one to do all the work and get a standardized list of tests (define (tests:get-all) - (let* ((test-search-path (cons (conc *toppath* "/tests") ;; the default - (tests:get-tests-search-path *configdat*)))) + (let* ((test-search-path (tests:get-tests-search-path *configdat*))) (tests:get-valid-tests (make-hash-table) test-search-path))) (define (tests:get-tests-search-path cfgdat) (let ((paths (map cadr (configf:get-section cfgdat "tests-paths")))) (cons (conc *toppath* "/tests") paths))) @@ -68,12 +67,12 @@ (define (tests:glob-like-match patt str) (let ((like (substring-index "%" patt))) (let* ((notpatt (equal? (substring-index "~" patt) 0)) (newpatt (if notpatt (substring patt 1) patt)) (finpatt (if like - (string-substitute (regexp "%") ".*" newpatt) - (string-substitute (regexp "\\*") ".*" newpatt))) + (string-substitute (regexp "%") ".*" newpatt #f) + (string-substitute (regexp "\\*") ".*" newpatt #f))) (res #f)) ;; (print "tests:glob-like-match => notpatt: " notpatt ", newpatt: " newpatt ", finpatt: " finpatt) (set! res (string-match (regexp finpatt (if like #t #f)) str)) (if notpatt (not res) res)))) @@ -351,11 +350,16 @@ ">" status "" "" (if (equal? comment "") " " comment) "" "")))) - testdat) + (if (list? testdat) + testdat + (begin + (print "ERROR: failed to get records with rmt:test-get-records-for-index-file run-id=" run-id "test-name=" test-name) + '()))) + (print "
") ;; Print out stats for status (set! tot 0) (print "") (for-each (lambda (state) @@ -474,11 +478,11 @@ #f ;; cannot have a which is waiting on b happening before b (if (and b-waitons (member (tests:testqueue-get-testname a-record) b-waitons)) #t ;; this is the correct order, b is waiting on a and b is before a (if (> a-priority b-priority) #t ;; if a is a higher priority than b then we are good to go - #f)))))))) + (string-compare3 a b))))))))) ;; for each test: ;; (define (tests:filter-non-runnable run-id testkeynames testrecordshash) (let ((runnables '())) @@ -636,23 +640,42 @@ (rmt:general-call 'update-run-duration run-id minutes test-id)) (if (and uname hostname) (rmt:general-call 'update-uname-host run-id uname hostname test-id))) ;; This one is for running with no db access (i.e. via rmt: internally) -(define (tests:set-full-meta-info test-id run-id minutes work-area) - (let* ((num-records 0) - (cpuload (get-cpu-load)) +(define (tests:set-full-meta-info db test-id run-id minutes work-area remtries) +;; (define (tests:set-full-meta-info test-id run-id minutes work-area) +;; (let ((remtries 10)) + (let* ((cpuload (get-cpu-load)) (diskfree (get-df (current-directory))) (uname (get-uname "-srvpio")) (hostname (get-host-name))) - (tdb:remote-update-testdat-meta-info run-id test-id work-area cpuload diskfree minutes) (tests:update-central-meta-info run-id test-id cpuload diskfree minutes uname hostname))) - -(define (tests:set-partial-meta-info test-id run-id minutes work-area) + +;; (define (tests:set-partial-meta-info test-id run-id minutes work-area) +(define (tests:set-partial-meta-info test-id run-id minutes work-area remtries) (let* ((cpuload (get-cpu-load)) - (diskfree (get-df (current-directory)))) - (tdb:remote-update-testdat-meta-info run-id test-id work-area cpuload diskfree minutes))) + (diskfree (get-df (current-directory))) + (remtries 10)) + (handle-exceptions + exn + (if (> remtries 0) + (begin + (print-call-chain (current-error-port)) + (debug:print-info 0 "WARNING: failed to set meta info. Will try " remtries " more times") + (set! remtries (- remtries 1)) + (thread-sleep! 10) + (tests:set-full-meta-info db test-id run-id minutes work-area (- remtries 1))) + (let ((err-status ((condition-property-accessor 'sqlite3 'status #f) exn))) + (debug:print 0 "ERROR: tried for over a minute to update meta info and failed. Giving up") + (debug:print 0 "EXCEPTION: database probably overloaded or unreadable.") + (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn)) + (print "exn=" (condition->list exn)) + (debug:print 0 " status: " ((condition-property-accessor 'sqlite3 'status) exn)) + (print-call-chain (current-error-port)))) + (tests:update-testdat-meta-info db test-id work-area cpuload diskfree minutes) + ))) ;;====================================================================== ;; A R C H I V I N G ;;====================================================================== Index: tests/Makefile ================================================================== --- tests/Makefile +++ tests/Makefile @@ -9,170 +9,182 @@ IPADDR := "-" RUNID := 1 SERVER = DEBUG = 1 LOGGING = +ROWS = 20 OS = $(shell grep ID /etc/*-release|cut -d= -f2) FS = $(shell df -T .|tail -1|awk '{print $$2}') VER = $(shell fsl info|grep checkout|awk '{print $$2}'|cut -c 1-5) # The NEWTARGET causes some tests to fail. Do not use until this is fixed. NEWTARGET = "$(OS)/$(FS)/$(VER)" TARGET = "ubuntu/nfs/none" -all : test1 test2 test3 test4 test5 test6 test7 test8 test9 +all : unit test1 test2 test3 test4 test5 test6 test7 test8 test9 + +unit : + ./rununittest.sh basicserver $(DEBUG) server : - cd ..;make;make install - cd fullrun;../../bin/megatest -server - -debug $(DEBUG) -run-id $(RUNID) + cd ..;make -j;make install + cd fullrun;$(MEGATEST) -server - -debug $(DEBUG) -run-id $(RUNID) stopserver : - cd ..;make && make install + cd ..;make -j && make install cd fullrun;$(MEGATEST) -stop-server 0 repl : - cd ..;make && make install - cd fullrun;$(MEGATEST) -repl + cd ..;make -j && make install + cd fullrun;$(MEGATEST) -:b -repl test0 : cleanprep cd simplerun ; $(MEGATEST) -server - -debug $(DEBUG) test1 : cleanprep test2 : fullprep - cd fullrun;$(MEGATEST) -runtests ez_pass,runfirst/a/% -reqtarg ubuntu/nfs/none :runname $(RUNNAME) -debug $(DEBUG) $(LOGGING) - cd fullrun;megatest -runtests % -target ubuntu/nfs/none :runname $(RUNNAME)_01 -testpatt %/,%/ai -debug $(DEBUG) - cd fullrun;megatest -runtests %/,%/ai -target ubuntu/nfs/none :runname $(RUNAME)_02 -debug $(DEBUG) - cd fullrun;megatest -runtests runfirst/%,%/ai -target ubuntu/nfs/none :runname $(RUNNAME)_02 -debug $(DEBUG) + cd fullrun;$(MEGATEST) -preclean -runtests ez_pass,runfirst/a/% -reqtarg ubuntu/nfs/none :runname $(RUNNAME) -debug $(DEBUG) $(LOGGING) + cd fullrun;megatest -preclean -runtests % -target ubuntu/nfs/none :runname $(RUNNAME)_01 -testpatt %/,%/ai -debug $(DEBUG) + cd fullrun;megatest -preclean -runtests %/,%/ai -target ubuntu/nfs/none :runname $(RUNAME)_02 -debug $(DEBUG) + cd fullrun;megatest -preclean -runtests runfirst/%,%/ai -target ubuntu/nfs/none :runname $(RUNNAME)_02 -debug $(DEBUG) cd fullrun;megatest -runtests %/,%/winter -target ubuntu/nfs/none :runname $(RUNNAME)_03 -debug $(DEBUG) sleep 40;cd fullrun;megatest -target ubuntu/nfs/none :runname $(RUNNAME) -set-state-status COMPLETED,FORCED :state COMPLETED :status PASS -testpatt ez_p%s,runfirst/ -debug $(DEBUG) $(LOGGING) test3 : fullprep test3a test3b test3a : @echo Run runfirst and any waitons. - cd fullrun;$(MEGATEST) -runtests runfirst -reqtarg ubuntu/nfs/none :runname $(RUNNAME)_b + cd fullrun;$(MEGATEST) -preclean -runtests runfirst -reqtarg ubuntu/nfs/none :runname $(RUNNAME)_b test3b : @echo Run all_toplevel and all waitons - cd fullrun;$(MEGATEST) -runtests all_toplevel -reqtarg ubuntu/nfs/none :runname $(RUNNAME)_c + cd fullrun;$(MEGATEST) -preclean -runtests all_toplevel -reqtarg ubuntu/nfs/none :runname $(RUNNAME)_c test4 : cleanprep @echo "WARNING: No longer running fullprep, test converage may be lessened" - cd fullrun;time $(MEGATEST) -debug $(DEBUG) -runtests % -reqtarg ubuntu/nfs/none :runname $(RUNNAME)_b -m "This is a comment specific to a run" -v $(LOGGING) + cd fullrun;time $(MEGATEST) -debug $(DEBUG) -run-wait -runtests % -reqtarg ubuntu/nfs/none :runname $(RUNNAME)_b -m "This is a comment specific to a run" -v $(LOGGING) + +test4a : cleanprep + cd fullrun;time $(MEGATEST) -debug $(DEBUG) -preclean -runtests all_toplevel -reqtarg ubuntu/nfs/none :runname $(RUNNAME)_b -m "This is a comment specific to a run" -v $(LOGGING) # NOTE: Only one instance can be a server test5 : cleanprep @echo "WARNING: No longer running fullprep, test converage may be lessened" - cd fullrun;sleep 0;$(MEGATEST) -runtests % -target $(TARGET) :runname $(RUNNAME)_aa -debug $(DEBUG) $(LOGGING) > aa.log 2> aa.log & - cd fullrun;sleep 3;$(MEGATEST) -runtests % -target $(TARGET) :runname $(RUNNAME)_ab -debug $(DEBUG) $(LOGGING) > ab.log 2> ab.log & - cd fullrun;sleep 6;$(MEGATEST) -runtests % -target $(TARGET) :runname $(RUNNAME)_ac -debug $(DEBUG) $(LOGGING) > ac.log 2> ac.log & - cd fullrun;sleep 9;$(MEGATEST) -runtests % -target $(TARGET) :runname $(RUNNAME)_ad -debug $(DEBUG) $(LOGGING) > ad.log 2> ad.log & - cd fullrun;sleep 12;$(MEGATEST) -runtests % -target $(TARGET) :runname $(RUNNAME)_ae -debug $(DEBUG) $(LOGGING) > ae.log 2> ae.log & - cd fullrun;sleep 15;$(MEGATEST) -runtests % -target $(TARGET) :runname $(RUNNAME)_af -debug $(DEBUG) $(LOGGING) > af.log 2> af.log & + cd fullrun;sleep 0;$(MEGATEST) -preclean -runtests % -target $(TARGET) :runname $(RUNNAME)_aa -debug $(DEBUG) $(LOGGING) > aa.log 2> aa.log & + cd fullrun;sleep 0;$(MEGATEST) -preclean -runtests % -target $(TARGET) :runname $(RUNNAME)_ab -debug $(DEBUG) $(LOGGING) > ab.log 2> ab.log & + cd fullrun;sleep 5;$(MEGATEST) -preclean -runtests % -target $(TARGET) :runname $(RUNNAME)_ac -debug $(DEBUG) $(LOGGING) > ac.log 2> ac.log & + cd fullrun;sleep 8;$(MEGATEST) -preclean -runtests % -target $(TARGET) :runname $(RUNNAME)_ad -debug $(DEBUG) $(LOGGING) > ad.log 2> ad.log & +# cd fullrun;sleep 0;$(MEGATEST) -preclean -runtests % -target $(TARGET) :runname $(RUNNAME)_ae -debug $(DEBUG) $(LOGGING) > ae.log 2> ae.log & +# cd fullrun;sleep 0;$(MEGATEST) -preclean -runtests % -target $(TARGET) :runname $(RUNNAME)_af -debug $(DEBUG) $(LOGGING) > af.log 2> af.log & # MUST ADD THIS BACK IN ASAP!!!! # cd fullrun;sleep 10;$(MEGATEST) -run-wait -target $(TARGET) :runname % -testpatt % :state RUNNING,LAUNCHED,NOT_STARTED,REMOTEHOSTSTART;echo ALL DONE test6: fullprep - cd fullrun;$(MEGATEST) -runtests runfirst -testpatt %/1 -reqtarg ubuntu/nfs/none :runname $(RUNNAME)_itempatt -v - cd fullrun;$(MEGATEST) -runtests runfirst -testpatt %blahha% -reqtarg ubuntu/nfs/none :runname $(RUNNAME)_itempatt -debug 10 + cd fullrun;$(MEGATEST) -preclean -runtests runfirst -testpatt %/1 -reqtarg ubuntu/nfs/none :runname $(RUNNAME)_itempatt -v + cd fullrun;$(MEGATEST) -preclean -runtests runfirst -testpatt %blahha% -reqtarg ubuntu/nfs/none :runname $(RUNNAME)_itempatt -debug 10 cd fullrun;$(MEGATEST) -rollup :runname newrun -target ubuntu/nfs/none -debug 10 test7: @echo Only a/c testname c should remain. If there is a run a/b/c then there is a cache issue. + cd simplerun;$(DASHBOARD) & (cd simplerun; \ $(MEGATEST) -server - -daemonize; \ $(MEGATEST) -remove-runs -target %/% :runname % -testpatt %; \ - $(MEGATEST) -runtests % -target a/b :runname c; sleep 5; \ + $(MEGATEST) -preclean -runtests % -target a/b :runname c; sleep 5; \ $(MEGATEST) -remove-runs -target a/c :runname c; \ - $(MEGATEST) -runtests % -target a/c :runname c; \ + $(MEGATEST) -preclean -runtests % -target a/c :runname c; \ $(MEGATEST) -remove-runs -target a/b :runname c -testpatt % ; \ - $(MEGATEST) -runtests % -target a/d :runname c;$(MEGATEST) -list-runs %|egrep ^Run:) > test7.log 2> test7.log + $(MEGATEST) -preclean -runtests % -target a/d :runname c;$(MEGATEST) -list-runs %|egrep ^Run:) > test7.log 2> test7.log logpro test7.logpro test7.html < test7.log @echo @echo Run \"firefox test7.html\" to see the results. # This one failed with v1.55 test8a : - cd fullrun;$(MEGATEST) -runtests priority_10_waiton_1 -target ubuntu/nfs/none :runname $(RUNNAME)_waiton_single + cd fullrun;$(MEGATEST) -preclean -runtests priority_10_waiton_1 -target ubuntu/nfs/none :runname $(RUNNAME)_waiton_single test8 : test8a - cd fullrun;$(MEGATEST) -runtests lineitem_fail 1 -target ubuntu/nfs/none :runname $(RUNNAME)_singletest - cd fullrun;$(MEGATEST) -runtests runfirst/fall 1 -target ubuntu/nfs/none :runname $(RUNNAME)_singleitem - cd fullrun;$(MEGATEST) -runtests test_mt_vars/2 -target ubuntu/nfs/none :runname $(RUNNAME)_singleitem_waiton + cd fullrun;$(MEGATEST) -preclean -runtests lineitem_fail 1 -target ubuntu/nfs/none :runname $(RUNNAME)_singletest + cd fullrun;$(MEGATEST) -preclean -runtests runfirst/fall 1 -target ubuntu/nfs/none :runname $(RUNNAME)_singleitem + cd fullrun;$(MEGATEST) -preclean -runtests test_mt_vars/2 -target ubuntu/nfs/none :runname $(RUNNAME)_singleitem_waiton # Some simple checks for bootstrapping and run loop logic test9 : minsetup test9a test9b test9c test9d test9e test9a : @echo Run super-simple mintest e, no waitons. - cd mintest;$(MEGATEST) -runtests e -target $(VER) :runname `date +%H.%M.%S` -debug $(DEBUG) + cd mintest;$(DASHBOARD)& + cd mintest;$(MEGATEST) -preclean -runtests e -target $(VER) -runname $(shell date +%H.%M.%S) -debug $(DEBUG) test9b : @echo Run simple mintest d with one waiton c - cd mintest;$(MEGATEST) -runtests d -target $(VER) :runname `date +%H.%M.%S` -debug $(DEBUG) + cd mintest;$(MEGATEST) -preclean -runtests d -target $(VER) -runname `date +%H.%M.%S` -debug $(DEBUG) test9c : @echo Run mintest a with full waiton chain a -> b -> c -> d -> e - cd mintest;$(MEGATEST) -runtests a -target $(VER) :runname `date +%H.%M.%S` -debug $(DEBUG) + cd mintest;$(MEGATEST) -preclean -runtests a -target $(VER) -runname `date +%H.%M.%S` -debug $(DEBUG) test9d : @echo Run an itemized test with no items - cd mintest;$(MEGATEST) -runtests g -target $(VER) :runname `date +%H.%M.%S` -debug $(DEBUG) + cd mintest;$(MEGATEST) -preclean -runtests g -target $(VER) -runname `date +%H.%M.%S` -debug $(DEBUG) test9e : @echo Run mintest a1 with full waiton chain with d1fail: a1 -> b1 -> c1 -> d1fail -> e1 - cd mintest;$(MEGATEST) -runtests a1 -target $(VER) :runname `date +%H.%M.%S` -debug $(DEBUG) + cd mintest;$(MEGATEST) -preclean -runtests a1 -target $(VER) -runname `date +%H.%M.%S` -debug $(DEBUG) test10 : @echo Run a bunch of different targets simultaneously (cd fullrun;$(MEGATEST) -server - ;sleep 2)& for targ in mint/btrfs/mintdir sunos/sshfs/loc; do \ - (cd fullrun;$(MEGATEST) -runtests priority_10_waiton_1 -target $$targ :runname $(RUNNAME) &); done + (cd fullrun;$(MEGATEST) -preclean -runtests priority_10_waiton_1 -target $$targ :runname $(RUNNAME) &); done for sys in ubuntu suse redhat debian;do \ for fs in afs nfs zfs; do \ for dpath in none tmp; do \ - (cd fullrun;$(MEGATEST) -runtests priority_10_waiton_1 -target $$sys/$$fs/$$dpath :runname $(RUNNAME) &);\ + (cd fullrun;$(MEGATEST) -preclean -runtests priority_10_waiton_1 -target $$sys/$$fs/$$dpath :runname $(RUNNAME) &);\ done;done;done test11 : cd fullrun;time (for a in 1 2 3 4 5 6 7 8 9 10 1 2 3 4 5 6 7 8 9 10 1 2 3 4 5 6 7 8 9 10 1 2 3 4 5 6 7 8 9 10 ;do (megatest -test-paths -target %/%/% > /dev/null ) & done; wait; ) minsetup : - cd ..;make && make install + cd ..;make -j && make install mkdir -p mintest/runs mintest/links cd mintest;$(MEGATEST) -stop-server 0 cd mintest;$(MEGATEST) -server - -debug $(DEBUG) > server.log 2> server.log & sleep 3 cd mintest;$(DASHBOARD) -rows 18 & cleanprep : ../*.scm Makefile */*.config mkdir -p fullrun/tmp/mt_runs fullrun/tmp/mt_links - cd ..;make;make install + cd ..;make -j;make install rm -f */logging.db touch cleanprep fullprep : cleanprep cd fullrun;$(MEGATEST) -remove-runs :runname $(RUNNAME)% -target %/%/% -testpatt %/% cd fullrun;$(BINPATH)/dashboard -rows 15 & dashboard : cleanprep - cd fullrun && $(BINPATH)/dashboard -rows 20 & + cd fullrun && $(BINPATH)/dashboard -rows $(ROWS) & + +newdashboard : cleanprep + cd fullrun && $(BINPATH)/newdashboard & remove : cd fullrun;$(MEGATEST) -remove-runs :runname $(RUN) -testpatt % -itempatt % :sysname % :fsname % :datapath % clean : rm cleanprep kill : killall -v mtest main.sh dboard || true - rm -rf *run/db/* */megatest.db */logging.db */monitor.db fullrun/tmp/mt_*/* || true + rm -rf /tmp/.$(USER)-portlogger.db *run/db/* */megatest.db */logging.db */monitor.db fullrun/tmp/mt_*/* fullrun/tmp/mt_*/.db* fullrun/logs/*.log fullrun/*.log || true killall -v mtest dboard || true hardkill : kill sleep 2;killall -v mtest main.sh dboard -9 Index: tests/fdktestqa/fdk.config ================================================================== --- tests/fdktestqa/fdk.config +++ tests/fdktestqa/fdk.config @@ -2,11 +2,32 @@ SYSTEM TEXT RELEASE TEXT [setup] # Adjust max_concurrent_jobs to limit how much you load your machines -max_concurrent_jobs 500 +# max_concurrent_jobs 150 +max_concurrent_jobs 1000 # This is your link path, you can move it but it is generally better to keep it stable -linktree #{shell readlink -f #{getenv PWD}/../simplelinks} +linktree #{shell readlink -f #{getenv MT_RUN_AREA_HOME}/../simplelinks} [include testqa/configs/megatest.abc.config] + +# timeout 0.025 + +[jobtools] +maxload 4 +launcher nbfake + +[server] +# timeout 0.01 +# homehost xena +# homehost 143.182.225.38 + +# force server +server-query-threshold 0 + + +[jobtools] +# launcher nbq -P ch_vp -C SLES11_EM64T_4G -Q /ciaf/fdk +# launcher nbfake +# maxload 4 Index: tests/fdktestqa/testqa/Makefile ================================================================== --- tests/fdktestqa/testqa/Makefile +++ tests/fdktestqa/testqa/Makefile @@ -1,9 +1,13 @@ BINDIR = $(PWD)/../../../bin PATH := $(BINDIR):$(PATH) MEGATEST = $(BINDIR)/megatest DASHBOARD = $(BINDIR)/dashboard +NEWDASHBOARD = $(BINDIR)/newdashboard +RUNNAME = a + + all : $(MEGATEST) -remove-runs -target a/b :runname c -testpatt %/% $(MEGATEST) -runtests % -target a/b :runname c bigbig : @@ -15,13 +19,20 @@ $(MEGATEST) -runtests bigrun -target a/bigrun :runname a$(shell date +%V) bigrun2 : $(MEGATEST) -runtests bigrun2 -target a/bigrun2 :runname a$(shell date +%V) +bigrun3 : + $(MEGATEST) -runtests bigrun3 -target a/bigrun3 :runname $(RUNNAME) + dashboard : $(DASHBOARD) -rows 20 & +newdashboard : + $(NEWDASHBOARD) & + compile : - (cd ../../..;make && make install) + (cd ../../..;make -j && make install) clean : - rm -rf ../simple*/*/* megatest.db db/* + rm -rf ../simple*/*/* megatest.db db/* ../simple*/.db/* logs/* monitor.db + Index: tests/fdktestqa/testqa/configs/megatest.abc.config ================================================================== --- tests/fdktestqa/testqa/configs/megatest.abc.config +++ tests/fdktestqa/testqa/configs/megatest.abc.config @@ -2,9 +2,8 @@ [validvalues] state start end completed # Job tools are more advanced ways to control how your jobs are launched [jobtools] -useshell yes -launcher nbfake +# useshell yes [include megatest.def.config] Index: tests/fdktestqa/testqa/configs/megatest.def.config ================================================================== --- tests/fdktestqa/testqa/configs/megatest.def.config +++ tests/fdktestqa/testqa/configs/megatest.def.config @@ -2,7 +2,7 @@ [env-override] EXAMPLE_VAR example value # As you run more tests you may need to add additional disks, the names are arbitrary but must be unique [disks] -disk0 #{scheme (nice-path "#{getenv PWD}/../simpleruns")} +disk0 #{scheme (nice-path "#{getenv MT_RUN_AREA_HOME}/../simpleruns")} Index: tests/fdktestqa/testqa/megatest.config ================================================================== --- tests/fdktestqa/testqa/megatest.config +++ tests/fdktestqa/testqa/megatest.config @@ -1,11 +1,9 @@ [setup] testcopycmd cp --remove-destination -rlv TEST_SRC_PATH/. TEST_TARG_PATH/. >> TEST_TARG_PATH/mt_launch.log 2>> TEST_TARG_PATH/mt_launch.log -launchwait no +# launchwait no [jobtools] -launcher loadrunner +launcher nbfake [include ../fdk.config] -[server] -port 9080 Index: tests/fdktestqa/testqa/tests/bigrun/step1.sh ================================================================== --- tests/fdktestqa/testqa/tests/bigrun/step1.sh +++ tests/fdktestqa/testqa/tests/bigrun/step1.sh @@ -1,9 +1,13 @@ -#!/bin/sh -if [ $NUMBER -lt 15 ];then - sleep 2 - sleep `echo 2 * $NUMBER | bc` +#!/bin/bash +if [ $NUMBER -lt 10 ];then + sleep 20 + sleep `echo 4 * $NUMBER | bc` else - sleep 100 + sleep 130 fi -exit 0 +if [[ $RANDOM -lt 10000 ]];then + exit 1 +else + exit 0 +fi Index: tests/fdktestqa/testqa/tests/bigrun/testconfig ================================================================== --- tests/fdktestqa/testqa/tests/bigrun/testconfig +++ tests/fdktestqa/testqa/tests/bigrun/testconfig @@ -1,8 +1,11 @@ # Add additional steps here. Format is "stepname script" +[vars] +step1var step1.sh + [ezsteps] -step1 step1.sh +step1 #{get vars step1var} # Test requirements are specified here [requirements] # waiton setup priority 0 Index: tests/fdktestqa/testqa/tests/bigrun2/testconfig ================================================================== --- tests/fdktestqa/testqa/tests/bigrun2/testconfig +++ tests/fdktestqa/testqa/tests/bigrun2/testconfig @@ -5,15 +5,19 @@ # Test requirements are specified here [requirements] waiton bigrun priority 0 mode itemwait - +itemmap .*/ # Iteration for your tests are controlled by the items section [items] -NUMBER #{scheme (string-intersperse (map number->string (sort (let loop ((a 0)(res '()))(if (<= a (or (any->number (get-environment-variable "NUMTESTS")) 2500))(loop (+ a 1)(cons a res)) res)) <)) " ")} +NUMBER #{scheme (string-intersperse (map (lambda (x)(conc "blah/" x)) \ + (map number->string (sort (let loop ((a 0)(res '())) \ + (if (<= a (or (any->number (get-environment-variable "NUMTESTS")) 2500)) \ + (loop (+ a 1)(cons a res)) res)) <))) " ")} + # test_meta is a section for storing additional data on your test [test_meta] author matt owner matt Index: tests/fdktestqa/testqa/tests/bigrun3/testconfig ================================================================== --- tests/fdktestqa/testqa/tests/bigrun3/testconfig +++ tests/fdktestqa/testqa/tests/bigrun3/testconfig @@ -5,15 +5,18 @@ # Test requirements are specified here [requirements] waiton bigrun2 priority 0 mode itemwait - +itemmap .*/ # Iteration for your tests are controlled by the items section [items] -NUMBER #{scheme (string-intersperse (map number->string (sort (let loop ((a 0)(res '()))(if (<= a (or (any->number (get-environment-variable "NUMTESTS")) 2500))(loop (+ a 1)(cons a res)) res)) <)) " ")} +NUMBER #{scheme (string-intersperse (map (lambda (x)(conc "blah/" x)) \ + (map number->string (sort (let loop ((a 0)(res '())) \ + (if (<= a (or (any->number (get-environment-variable "NUMTESTS")) 2500)) \ + (loop (+ a 1)(cons a res)) res)) <))) " ")} # test_meta is a section for storing additional data on your test [test_meta] author matt owner matt Index: tests/fullrun/config/mt_include_1.config ================================================================== --- tests/fullrun/config/mt_include_1.config +++ tests/fullrun/config/mt_include_1.config @@ -1,8 +1,8 @@ [setup] # exectutable /path/to/megatest -max_concurrent_jobs 500 +max_concurrent_jobs 50 linktree #{getenv MT_RUN_AREA_HOME}/tmp/mt_links [jobtools] useshell yes @@ -9,15 +9,21 @@ # ## launcher launches jobs, the job is managed on the target host ## by megatest, comment out launcher to run local # workhosts localhost hermes # launcher exec nbfake # launcher nbfake -launcher loadrunner +# launcher loadrunner # launcher echo # launcher nbfind # launcher nodanggood -# launcher nbload +# launcher loadrunner +launcher nbfake +# maxload *per cpu* +maxload 1.5 +# default waitdelay is 60 seconds +waitdelay 15 + ## use "xterm -e csi -- " as a launcher to examine the launch environment. ## exit with (exit) ## get a shell with (system "bash") # launcher xterm -e csi -- Index: tests/fullrun/megatest.config ================================================================== --- tests/fullrun/megatest.config +++ tests/fullrun/megatest.config @@ -8,10 +8,15 @@ [refareas] area1 /tmp/oldarea/megatest [include config/mt_include_1.config] +[dashboard] +pre-command xterm -geometry 180x20 -e " +post-command |& tee results.log ;echo Press any key to continue;bash -c 'read -n 1 -s'" & +testsort -event_time + [misc] home #{shell readlink -f $MT_RUN_AREA_HOME} parent #{shell readlink -f $MT_RUN_AREA_HOME/..} [tests-paths] @@ -20,14 +25,16 @@ [setup] # Set launchwait to no to use the more agressive code that does not wait for the launch to complete before proceeding # this may save a few milliseconds on launching tests # launchwait no waivercommentpatt ^WW\d+ [a-z].* +incomplete-timeout 1 + +# yes, anything else is no +run-wait yes -# Use http instead of direct filesystem access -# transport http -# transport fs + # If set to "default" the old code is used. Otherwise defaults to 200 or uses # numeric value given. # runqueue 20 @@ -51,11 +58,11 @@ # or for hard links # testcopycmd cp --remove-destination -rlv TEST_SRC_PATH/. TEST_TARG_PATH/. # FULL or 2, NORMAL or 1, OFF or 0 -synchronous OFF +synchronous 0 # Throttle roughly scales the db access milliseconds to seconds delay throttle 0.2 # Max retries allows megatest to re-check that a tests status has changed # as tests can have transient FAIL status occasionally maxretries 20 @@ -70,20 +77,25 @@ # override the html viewer launch command # # htmlviewercmd firefox -new-window htmlviewercmd konqueror +# -runtests automatically deletes the records for tests with the listed states on starting up a run allowing them to re-run +# (nb// this is in addition to NOT_STARTED which is automatically re-run) +# +allow-auto-rerun INCOMPLETE ZERO_ITEMS +# could add: STUCK STUCK/DEAD UNKNOWN KILLED KILLREQ PREQ_DISCARD + [validvalues] state start end 0 1 - 2 status pass fail n/a 0 1 running - 2 # These are set before all tests, override them # in the testconfig [pre-launch-env-overrides] section [env-override] # This variable is honored by the loadrunner script. The value is in percent -# a value of 200 will stop new jobs from starting. MAX_ALLOWED_LOAD 200 # MT_XTERM_CMD overrides the terminal command # MT_XTERM_CMD xterm -bg lightgreen -fg black @@ -104,30 +116,49 @@ WRAPPEDVAR This var should have the work blah thrice: \ blah \ blah +MAX_ALLOWED_LOAD 200 # XTERM [system xterm] # RUNDEAD [system exit 56] [server] + +# Use http instead of direct filesystem access +transport http +# transport fs +# transport nmsg + +synchronous 0 # If the server can't be started on this port it will try the next port until # it succeeds port 9080 # This server will keep running this number of hours after last access. # Three minutes is 0.05 hours # timeout 0.025 -timeout 0.25 +timeout 0.061 + +# Server is required - slower but more resistant to Sqlite issues. +required yes + +# Start server when average query takes longer than this +# server-query-threshold 55500 +server-query-threshold -1 + +# daemonize yes +# hostname #{scheme (get-host-name)} ## disks are: ## name host:/path/to/area ## -or- ## name /path/to/area [disks] disk0 /foobarbazz +disk1 not-a-disk [include config/mt_include_2.config] [include #{getenv USER}_testing.config] [jobgroups] Index: tests/fullrun/runconfigs.config ================================================================== --- tests/fullrun/runconfigs.config +++ tests/fullrun/runconfigs.config @@ -17,10 +17,11 @@ WACKYVAR2 #{runconfigs-get CURRENT} [ubuntu/nfs/none] WACKYVAR2 #{runconfigs-get CURRENT} SOMEVAR2 This should show up in SOMEVAR4 if the target is ubuntu/nfs/none +VARWITHDOLLARSIGNS The$USER/signs/should/be/replaced/with/variable [default] SOMEVAR3 #{rget SOMEVAR} SOMEVAR4 #{rget SOMEVAR2} SOMEVAR5 #{runconfigs-get SOMEVAR2} Index: tests/fullrun/tests/exit_0/testconfig ================================================================== --- tests/fullrun/tests/exit_0/testconfig +++ tests/fullrun/tests/exit_0/testconfig @@ -6,5 +6,10 @@ owner bob description This test checks that a multi-lineitem test with mix of pass and non-fail rolls up a PASS tags first,single reviewed 09/10/2011, by Matt + +[triggers] +NOT_STARTED/ xterm -e bash -s -- +RUNNING/ xterm -e bash -s -- + ADDED tests/fullrun/tests/test_mt_vars/eval_vars.sh Index: tests/fullrun/tests/test_mt_vars/eval_vars.sh ================================================================== --- /dev/null +++ tests/fullrun/tests/test_mt_vars/eval_vars.sh @@ -0,0 +1,7 @@ +#!/bin/bash + +if env | grep VARWITHDOLLARSIGNS | grep USER;then + exit 1 # fails! +else + exit 0 # good! +fi Index: tests/fullrun/tests/test_mt_vars/testconfig ================================================================== --- tests/fullrun/tests/test_mt_vars/testconfig +++ tests/fullrun/tests/test_mt_vars/testconfig @@ -18,10 +18,13 @@ # VACKYVAR should be set to a path vackyvar vackyvar.sh # test-path and test-file test-path test-path-file.sh + +# verify that vars with $ signs get expanded +varwithdollar eval_vars.sh [requirements] waiton runfirst priority 0 Index: tests/mintest/megatest.config ================================================================== --- tests/mintest/megatest.config +++ tests/mintest/megatest.config @@ -1,11 +1,11 @@ [fields] X TEXT [setup] max_concurrent_jobs 50 -linktree #{getenv PWD}/linktree +linktree #{getenv MT_RUN_AREA_HOME}/linktree transport http [server] port 8090 Index: tests/rununittest.sh ================================================================== --- tests/rununittest.sh +++ tests/rununittest.sh @@ -4,14 +4,20 @@ # # Ensure all is made (cd ..;make && make install) +# put megatest on path from correct location +mtbindir=$(readlink -f ../bin) + +export PATH="${mtbindir}:$PATH" + # Clean setup # -rm -f simplerun/megatest.db simplerun/monitor.db simplerun/db/monitor.db -rm -rf simplelinks/ simpleruns/ simplerun/db/ +dbdir=$(cd simplerun;megatest -show-config -section setup -var linktree)/.db +rm -f simplerun/megatest.db simplerun/monitor.db simplerun/db/monitor.db $dbdir/*.db +rm -rf simplelinks/ simpleruns/ simplerun/db/ $dbdir mkdir -p simplelinks simpleruns (cd simplerun;cp ../../*_records.scm .;perl -pi.bak -e 's/define-inline/define/' *_records.scm) # Run the test $1 is the unit test to run cd simplerun;echo '(load "../tests.scm")' | ../../bin/megatest -repl -debug $2 $1 Index: tests/simplerun/megatest.config ================================================================== --- tests/simplerun/megatest.config +++ tests/simplerun/megatest.config @@ -10,11 +10,11 @@ # be aware that some unit tests will fail with this due to persistent data # # tmpdb /tmp # This is your link path, you can move it but it is generally better to keep it stable -linktree #{shell readlink -f #{getenv PWD}/../simplelinks} +linktree #{getenv MT_RUN_AREA_HOME}/../simplelinks # Valid values for state and status for steps, NB// It is not recommended you use this [validvalues] state start end completed @@ -27,6 +27,6 @@ [env-override] EXAMPLE_VAR example value # As you run more tests you may need to add additional disks, the names are arbitrary but must be unique [disks] -disk0 #{shell readlink -f #{getenv PWD}/../simpleruns} +disk0 #{getenv MT_RUN_AREA_HOME}/../simpleruns ADDED tests/simplerun/tests/test2/step1.sh Index: tests/simplerun/tests/test2/step1.sh ================================================================== --- /dev/null +++ tests/simplerun/tests/test2/step1.sh @@ -0,0 +1,3 @@ +#!/usr/bin/env bash + +# Run your step here DELETED tests/simplerun/tests/test2/step1.sh.sh Index: tests/simplerun/tests/test2/step1.sh.sh ================================================================== --- tests/simplerun/tests/test2/step1.sh.sh +++ /dev/null @@ -1,3 +0,0 @@ -#!/usr/bin/env bash - -# Run your step here ADDED tests/simplerun/tests/test2/step2.sh Index: tests/simplerun/tests/test2/step2.sh ================================================================== --- /dev/null +++ tests/simplerun/tests/test2/step2.sh @@ -0,0 +1,3 @@ +#!/usr/bin/env bash + +# Run your step here DELETED tests/simplerun/tests/test2/step2.sh.sh Index: tests/simplerun/tests/test2/step2.sh.sh ================================================================== --- tests/simplerun/tests/test2/step2.sh.sh +++ /dev/null @@ -1,3 +0,0 @@ -#!/usr/bin/env bash - -# Run your step here ADDED tests/stats.txt Index: tests/stats.txt ================================================================== --- /dev/null +++ tests/stats.txt @@ -0,0 +1,77 @@ +DB Stats: a1236d6bf92ec5cb8955f490761b21b0d3eea9d3 +======== +Cmd Count TotTime Avg +get-count-tests-running-for-run-id 1035 237.0 0.23 +get-count-tests-running-in-jobgroup 884 119.0 0.13 +get-count-tests-running 884 169.0 0.19 +get-prereqs-not-met 884 732.0 0.83 +get-test-info-by-id 673 122.0 0.18 +get-keys 476 1.0 0.00 +get-test-id 356 42.0 0.12 +testmeta-get-record 203 24.0 0.12 +roll-up-pass-fail-counts 159 39.0 0.25 +register-test 140 30.0 0.21 +test-set-rundir-shortdir 128 98.0 0.77 +test-set-status-state 94 45.0 0.48 +find-and-mark-incomplete 32 0.0 0.00 +state-status-msg 25 4.0 0.16 +delete-tests-in-state 12 4.0 0.33 +get-tests-for-run-mindata 8 0.0 0.00 +get-all-run-ids 5 2.0 0.40 +get-run-info 4 0.0 0.00 +register-run 4 5.0 1.25 +set-tests-state-status 4 15.0 3.75 +get-tests-for-run 4 15.0 3.75 + +# After converting first three functions above to sqlite3:first-result +DB Stats +======== +Cmd Count TotTime Avg +get-count-tests-running-for-run-id 1138 179.0 0.16 +get-count-tests-running-in-jobgroup 987 91.0 0.09 +get-count-tests-running 987 171.0 0.17 +get-prereqs-not-met 987 892.0 0.90 +get-test-info-by-id 672 95.0 0.14 +get-keys 476 0.0 0.00 +get-test-id 355 41.0 0.12 +testmeta-get-record 203 15.0 0.07 +roll-up-pass-fail-counts 159 30.0 0.19 +register-test 140 22.0 0.16 +test-set-rundir-shortdir 128 855.0 6.68 +test-set-status-state 94 20.0 0.21 +find-and-mark-incomplete 36 1.0 0.03 +state-status-msg 24 5.0 0.21 +delete-tests-in-state 12 2.0 0.17 +get-tests-for-run-mindata 9 0.0 0.00 +get-all-run-ids 5 1.0 0.20 +register-run 4 1.0 0.25 +get-tests-for-run 4 11.0 2.75 +get-run-info 4 0.0 0.00 +set-tests-state-status 4 17.0 4.25 + +DB Stats another run, converted one or two non-relevant functions to sqlite3:first-result +======== +Cmd Count TotTime Avg +get-count-tests-running-for-run-id 987 157.0 0.16 +get-count-tests-running-in-jobgroup 836 79.0 0.09 +get-count-tests-running 836 121.0 0.14 +get-prereqs-not-met 836 513.0 0.61 +get-test-info-by-id 673 85.0 0.13 +get-keys 476 0.0 0.00 +get-test-id 356 32.0 0.09 +testmeta-get-record 203 19.0 0.09 +roll-up-pass-fail-counts 159 27.0 0.17 +register-test 140 23.0 0.16 +test-set-rundir-shortdir 128 35.0 0.27 +test-set-status-state 94 20.0 0.21 +find-and-mark-incomplete 40 0.0 0.00 +state-status-msg 25 5.0 0.20 +delete-tests-in-state 12 1.0 0.08 +get-tests-for-run-mindata 10 0.0 0.00 +get-all-run-ids 5 0.0 0.00 +set-tests-state-status 4 15.0 3.75 +register-run 4 2.0 0.50 +get-run-info 4 1.0 0.25 +get-tests-for-run 4 12.0 3.00 + + Index: tests/tests.scm ================================================================== --- tests/tests.scm +++ tests/tests.scm @@ -24,13 +24,17 @@ (lambda (file) (print "Loading " file) (load file)) files)) -(define *runremote* #f) - (let* ((unit-test-name (list-ref (argv) 4)) (fname (conc "../unittests/" unit-test-name ".scm"))) (if (file-exists? fname) (load fname) (print "ERROR: Unit test " unit-test-name " not found in unittests directory"))) + + (list "abc" "abc/%" "ab%/c%" "~abc/c%" "abc/~c%" "a,b/c,%/d" "%/,%/a" "%/,%/a" "%/,%/a" "%" "%" "%/" "%/" "%abc%") + (list "abc" "abc" "abcd" "abc" "abc" "a" "abc" "def" "ghi" "a" "a" "a" "a" "abc") + (list "" "" "cde" "cde" "cde" "" "" "a" "b" "" "b" "" "b" "abc") + (list #t #t #t #f #f #t #t #t #f #t #t #t #f #t)) + Index: tests/unittests/basicserver.scm ================================================================== --- tests/unittests/basicserver.scm +++ tests/unittests/basicserver.scm @@ -2,113 +2,223 @@ ;; S E R V E R ;;====================================================================== ;; Run like this: ;; -;; (cd ..;make && make install) && ./rununittest.sh server 1;(cd simplerun;megatest -stop-server 0) - -(set! *transport-type* 'http) - -(test "setup for run" #t (begin (setup-for-run) - (string? (getenv "MT_RUN_AREA_HOME")))) - -(test "server-register, get-best-server" #t (let ((res #f)) - (open-run-close tasks:server-register tasks:open-db 1 "bob" 1234 100 'live 'http) - (set! res (open-run-close tasks:get-best-server tasks:open-db)) - (number? (vector-ref res 3)))) - -(test "de-register server" #f (let ((res #f)) - (open-run-close tasks:server-deregister tasks:open-db "bob" port: 1234) - (vector? (open-run-close tasks:get-best-server tasks:open-db)))) - -(define server-pid #f) - -;; Not sure how the following should work, replacing it with system of megatest -server -;; (test "launch server" #t (let ((pid (process-fork (lambda () -;; ;; (daemon:ize) -;; (server:launch 'http))))) -;; (set! server-pid pid) -;; (number? pid))) -(system "../../bin/megatest -server - -debug 22 > server.log 2> server.log &") - -(let loop ((n 10)) - (thread-sleep! 1) ;; need to wait for server to start. - (let ((res (open-run-close tasks:get-best-server tasks:open-db))) - (print "tasks:get-best-server returned " res) - (if (and (not res) - (> n 0)) - (loop (- n 1))))) - -(test "get-best-server" #t (begin - (client:launch) - (let ((dat (open-run-close tasks:get-best-server tasks:open-db))) - (vector? dat)))) - -(define *keys* (keys:config-get-fields *configdat*)) -(define *keyvals* (keys:target->keyval *keys* "a/b/c")) - -(test #f #t (string? (car *runremote*))) -(test #f '(#t "successful login") (rmt:login)) ;; *runremote* *toppath* *my-client-signature*))) - -(test #f #f (rmt:get-test-info-by-id 99)) ;; get non-existant test - -;; RUNS -(test #f 1 (rmt:register-run *keyvals* "firstrun" "new" "n/a" (current-user-name))) -(test "get run info" "firstrun" (let ((rinfo (rmt:get-run-info 1))) - (vector-ref (vector-ref rinfo 1) 3))) -(test "get runname from id" "firstrun" (rmt:get-run-name-from-id 1)) - -;; TESTS -(test "get tests (no data)" '() (rmt:get-tests-for-run 1 "%" '() '() #f #f #f #f #f #f)) -(test "register test" #t (rmt:general-call 'register-test 1 "test1" "")) -(test "get tests (some data)" 1 (length (rmt:get-tests-for-run 1 "%" '() '() #f #f #f #f #f #f))) -(test "get test id" 1 (rmt:get-test-id 1 "test1" "")) -(test "sync back" #t (> (rmt:sync-inmem->db) 0)) -(test "get test id from main" 1 (db:get-test-id *db* 1 "test1" "")) -(test "get keys" #t (list? (rmt:get-keys))) -(test "set comment" #t (begin (rmt:general-call 'set-test-comment "this is a comment" 1) #t)) -(test "get comment" "this is a comment" (let ((trec (rmt:get-test-info-by-id 1))) - (db:test-get-comment trec))) - -;; MORE RUNS -(test "get runs" #t (let* ((runs (rmt:get-runs "%" #f #f '())) - (header (vector-ref runs 0)) - (data (vector-ref runs 1))) - (and (list? header) - (list? data) - (vector? (car data))))) - -(test "get local testinfo" "test1" (vector-ref (db:get-testinfo-state-status *db* 1) 2)) -(test "get testinfo" "test1" (vector-ref (rmt:get-testinfo-state-status 1) 2)) - -;;====================================================================== -;; D B -;;====================================================================== - -(test "pass fail counts" #t (rmt:general-call 'pass-fail-counts 10 9 1)) -(test "get pass fail counts" 19 (let ((dat (rmt:get-test-info-by-id 1))) - (+ (db:test-get-pass_count dat) - (db:test-get-fail_count dat)))) - -(define testregistry (make-hash-table)) -(for-each - (lambda (tname) - (for-each - (lambda (itempath) - (let ((tkey (conc tname "/" itempath)) - (rpass (random 10)) - (rfail (random 10))) - (hash-table-set! testregistry tkey (list tname itempath)) - (rmt:general-call 'register-test 1 tname itempath) - (let* ((tid (rmt:get-test-id 1 tname itempath)) - (tdat (rmt:get-test-info-by-id tid))) - (rmt:general-call 'pass-fail-counts rpass rfail (db:test-get-id tdat)) - (let* ((resdat (rmt:get-test-info-by-id tid))) - (test "set/get pass fail counts" (list rpass rfail) - (list (db:test-get-pass_count resdat) - (db:test-get-fail_count resdat))))))) - (list "" "a" "b" "c" "d" "e" "f" "g" "h" "i" "j"))) - (list "test1" "test2" "test3" "test4" "test5")) - - -(test #f '(#t "exit process started") (rmt:kill-server)) ;; *toppath* *my-client-signature* #f))) - +;; ./rununittest.sh server 1;(cd simplerun;megatest -stop-server 0) + +(delete-file* "logs/1.log") +(define run-id 1) + +(test "setup for run" #t (begin (launch:setup-for-run) + (string? (getenv "MT_RUN_AREA_HOME")))) + +;; NON Server tests go here + +(test #f #f (db:dbdat-get-path *db*)) +(test #f #f (db:get-run-name-from-id *db* run-id)) +;; (test #f '("SYSTEM" "RELEASE") (rmt:get-keys)) + +;; (exit) + +;; Server tests go here +(for-each (lambda (run-id) +(test #f #f (tasks:server-running-or-starting? (db:delay-if-busy (tasks:open-db)) run-id)) +(server:kind-run run-id) +(test "did server start within 20 seconds?" + #t + (let loop ((remtries 20) + (running (tasks:server-running-or-starting? (db:delay-if-busy + (tasks:open-db)) + run-id))) + (if running + (> running 0) + (if (> remtries 0) + (begin + (thread-sleep! 1) + (loop (- remtries 1) + (tasks:server-running-or-starting? (db:delay-if-busy + (tasks:open-db)) + run-id))))))) + +(test "did server become available" #t + (let loop ((remtries 10) + (res (tasks:get-server (db:delay-if-busy (tasks:open-db)) run-id))) + (if res + (vector? res) + (begin + (if (> remtries 0) + (begin + (thread-sleep! 1.1) + (loop (- remtries 1)(tasks:get-server (db:delay-if-busy (tasks:open-db)) run-id))) + res))))) +) +(list 0 1)) +(define user (current-user-name)) +(define runname "mytestrun") +(define keys (rmt:get-keys)) +(define runinfo #f) +(define keyvals '(("SYSTEM" "abc")("RELEASE" "def"))) +(define header (vector "SYSTEM" "RELEASE" "id" "runname" "state" "status" "owner" "event_time")) + +;; Setup +;; +(test #f #f (not (client:setup run-id))) +(test #f #f (not (hash-table-ref/default *runremote* run-id #f))) + +;; Login +;; +(test #f '(#t "successful login") (rmt:login-no-auto-client-setup (hash-table-ref/default *runremote* run-id #f) run-id)) +(test #f '(#t "successful login") (rmt:login run-id)) + +;; Keys +;; +(test #f '("SYSTEM" "RELEASE") (rmt:get-keys)) + +;; No data in db +;; +(test #f '() (rmt:get-all-run-ids)) +(test #f #f (rmt:get-run-name-from-id run-id)) +(test #f + (let ((runrec (vector #f #f))) + (vector-set! runrec header 0) + (vector-set! runrec (vector #f #f #f #f) 1) + runrec) + (rmt:get-run-info run-id)) + +;; Insert data into db +;; +(test #f 1 (rmt:register-run keyvals runname "new" "n/a" user)) +;; (test #f #f (rmt:get-runs-by-patt keys runname)) +(test #f #t (rmt:general-call 'register-test run-id run-id "test-one" "")) + +;; With data in db +;; +(test #f '(1) (rmt:get-all-run-ids)) +(test #f runname (rmt:get-run-name-from-id run-id)) +(test #f + runname + (let ((run-info (rmt:get-run-info run-id))) + (db:get-value-by-header (db:get-rows run-info) + (db:get-header run-info) + "runname"))) + + ;; (vector header (vector "abc" "def" 1 "mytestrun" "new" "n/a" "matt" 1416280640.0)) + +(for-each (lambda (run-id) +;; test killing server +;; +(tasks:kill-server-run-id run-id) + +(test #f #f (tasks:server-running-or-starting? (db:delay-if-busy (tasks:open-db)) run-id)) +) +(list 0 1)) +;; (test #f #f (client:setup run-id)) + +;; (set! *transport-type* 'http) +;; +;; (test "setup for run" #t (begin (launch:setup-for-run) +;; (string? (getenv "MT_RUN_AREA_HOME")))) +;; +;; (test "server-register, get-best-server" #t (let ((res #f)) +;; (open-run-close tasks:server-register tasks:open-db 1 "bob" 1234 100 'live 'http) +;; (set! res (open-run-close tasks:get-best-server tasks:open-db)) +;; (number? (vector-ref res 3)))) +;; +;; (test "de-register server" #f (let ((res #f)) +;; (open-run-close tasks:server-deregister tasks:open-db "bob" port: 1234) +;; (vector? (open-run-close tasks:get-best-server tasks:open-db)))) +;; +;; (define server-pid #f) +;; +;; ;; Not sure how the following should work, replacing it with system of megatest -server +;; ;; (test "launch server" #t (let ((pid (process-fork (lambda () +;; ;; ;; (daemon:ize) +;; ;; (server:launch 'http))))) +;; ;; (set! server-pid pid) +;; ;; (number? pid))) +;; (system "../../bin/megatest -server - -debug 22 > server.log 2> server.log &") +;; +;; (let loop ((n 10)) +;; (thread-sleep! 1) ;; need to wait for server to start. +;; (let ((res (open-run-close tasks:get-best-server tasks:open-db))) +;; (print "tasks:get-best-server returned " res) +;; (if (and (not res) +;; (> n 0)) +;; (loop (- n 1))))) +;; +;; (test "get-best-server" #t (begin +;; (client:launch) +;; (let ((dat (open-run-close tasks:get-best-server tasks:open-db))) +;; (vector? dat)))) +;; +;; (define *keys* (keys:config-get-fields *configdat*)) +;; (define *keyvals* (keys:target->keyval *keys* "a/b/c")) +;; +;; (test #f #t (string? (car *runremote*))) +;; (test #f '(#t "successful login") (rmt:login)) ;; *runremote* *toppath* *my-client-signature*))) +;; +;; (test #f #f (rmt:get-test-info-by-id 99)) ;; get non-existant test +;; +;; ;; RUNS +;; (test #f 1 (rmt:register-run *keyvals* "firstrun" "new" "n/a" (current-user-name))) +;; (test "get run info" "firstrun" (let ((rinfo (rmt:get-run-info 1))) +;; (vector-ref (vector-ref rinfo 1) 3))) +;; (test "get runname from id" "firstrun" (rmt:get-run-name-from-id 1)) +;; +;; ;; TESTS +;; (test "get tests (no data)" '() (rmt:get-tests-for-run 1 "%" '() '() #f #f #f #f #f #f)) +;; (test "register test" #t (rmt:general-call 'register-test 1 "test1" "")) +;; (test "get tests (some data)" 1 (length (rmt:get-tests-for-run 1 "%" '() '() #f #f #f #f #f #f))) +;; (test "get test id" 1 (rmt:get-test-id 1 "test1" "")) +;; (test "sync back" #t (> (rmt:sync-inmem->db) 0)) +;; (test "get test id from main" 1 (db:get-test-id *db* 1 "test1" "")) +;; (test "get keys" #t (list? (rmt:get-keys))) +;; (test "set comment" #t (begin (rmt:general-call 'set-test-comment "this is a comment" 1) #t)) +;; (test "get comment" "this is a comment" (let ((trec (rmt:get-test-info-by-id 1))) +;; (db:test-get-comment trec))) +;; +;; ;; MORE RUNS +;; (test "get runs" #t (let* ((runs (rmt:get-runs "%" #f #f '())) +;; (header (vector-ref runs 0)) +;; (data (vector-ref runs 1))) +;; (and (list? header) +;; (list? data) +;; (vector? (car data))))) +;; +;; (test "get local testinfo" "test1" (vector-ref (db:get-testinfo-state-status *db* 1) 2)) +;; (test "get testinfo" "test1" (vector-ref (rmt:get-testinfo-state-status 1) 2)) +;; +;; ;;====================================================================== +;; ;; D B +;; ;;====================================================================== +;; +;; (test "pass fail counts" #t (rmt:general-call 'pass-fail-counts 10 9 1)) +;; (test "get pass fail counts" 19 (let ((dat (rmt:get-test-info-by-id 1))) +;; (+ (db:test-get-pass_count dat) +;; (db:test-get-fail_count dat)))) +;; +;; (define testregistry (make-hash-table)) +;; (for-each +;; (lambda (tname) +;; (for-each +;; (lambda (itempath) +;; (let ((tkey (conc tname "/" itempath)) +;; (rpass (random 10)) +;; (rfail (random 10))) +;; (hash-table-set! testregistry tkey (list tname itempath)) +;; (rmt:general-call 'register-test 1 tname itempath) +;; (let* ((tid (rmt:get-test-id 1 tname itempath)) +;; (tdat (rmt:get-test-info-by-id tid))) +;; (rmt:general-call 'pass-fail-counts rpass rfail (db:test-get-id tdat)) +;; (let* ((resdat (rmt:get-test-info-by-id tid))) +;; (test "set/get pass fail counts" (list rpass rfail) +;; (list (db:test-get-pass_count resdat) +;; (db:test-get-fail_count resdat))))))) +;; (list "" "a" "b" "c" "d" "e" "f" "g" "h" "i" "j"))) +;; (list "test1" "test2" "test3" "test4" "test5")) +;; +;; +;; (test #f '(#t "exit process started") (rmt:kill-server)) ;; *toppath* *my-client-signature* #f))) +;; + +(exit) Index: tree.scm ================================================================== --- tree.scm +++ tree.scm @@ -65,14 +65,15 @@ (loop hed tal depth (+ nodenum 1))))) #f)))) ;; top is the top node name zeroeth node VALUE=0 (define (tree:add-node obj top nodelst #!key (userdata #f)) - (if (not (iup:attribute obj "TITLE0")) + (if (or (not (string? (iup:attribute obj "TITLE0"))) + (string-null? (iup:attribute obj "TITLE0"))) (iup:attribute-set! obj "ADDBRANCH0" top)) (cond - ((not (string=? top (iup:attribute obj "TITLE0"))) + ((not (equal? top (iup:attribute obj "TITLE0"))) (print "ERROR: top name " top " doesn't match " (iup:attribute obj "TITLE0"))) ((null? nodelst)) (else (let loop ((hed (car nodelst)) (tal (cdr nodelst)) @@ -87,10 +88,11 @@ (nodenum (tree:find-node obj newpath))) ;; Add the branch under lastnode if not found (if (not nodenum) (begin (iup:attribute-set! obj (conc "ADDBRANCH" parentnode) hed) + ;; ERROR? ADDING DATA TO PARENT, DONT WE WANT IT ON CREATED NODE? (if userdata (iup:attribute-set! obj (conc "USERDATA" parentnode) userdata)) (if (null? tal) #t ;; reset to top @@ -111,6 +113,30 @@ (newpath (append trimpath (list node-title)))) (if (>= currnode nodenum) newpath (loop (+ currnode 1) newpath))))) + +(define (tree:delete-node obj top node-path) ;; node-path is a list of strings + (let ((id (tree:find-node obj (cons top node-path)))) + (print "Found node to remove " id " for path " top " " node-path) + (iup:attribute-set! obj (conc "DELNODE" id) "SELECTED"))) +#| + + (let* ((tb (iup:treebox + #:value 0 + #:name "Runs" + #:expand "YES" + #:addexpanded "NO" + #:selection-cb + (lambda (obj id state) + ;; (print "obj: " obj ", id: " id ", state: " state) + (let* ((run-path (tree:node->path obj id)) + (run-id (tree-path->run-id (cdr run-path)))) + (if run-id + (begin + (dboard:data-set-curr-run-id! *data* run-id) + (dashboard:update-run-summary-tab))) + ;; (print "path: " (tree:node->path obj id) " run-id: " run-id) + )))) +|# Index: txtdb/txtdb.scm ================================================================== --- txtdb/txtdb.scm +++ txtdb/txtdb.scm @@ -15,10 +15,11 @@ (use srfi-69) (use regex-case) (use posix) (use json) (use csv) +(use srfi-18) (include "../megatest-fossil-hash.scm") ;; Read a non-compressed gnumeric file (define (refdb:read-gnumeric-xml fname) @@ -112,21 +113,23 @@ (let ((ref-colnums (map (lambda (c) (list (cdr c)(car c))) (hash-table->alist colnums)))) (with-output-to-file (conc targdir "/" sheet-name ".dat") (lambda () - (print "[" col0title "]") + (if (not (string-null? col0title))(print "[" col0title "]")) (for-each (lambda (colname) (print "[" colname "]") (for-each (lambda (row) (let ((key (car row)) (val (cadr row))) (if (string-search comment-rx key) (print val) (if (string-search blank-rx key) (print) - (print key " " val))))) + (if (string-search " " key) + (print "\"" key "\" " val) + (print key " " val)))))) (reverse (hash-table-ref cols colname))) ;; (print) ) (sort (hash-table-keys cols)(lambda (a b) (let ((colnum-a (assoc a ref-colnums)) @@ -216,10 +219,11 @@ (hash-table-fold ht (lambda (k v res)(if (equal? v val) k res)) #f)) (define (read-dat fname) (let ((section-rx (regexp "^\\[(.*)\\]\\s*$")) (comment-rx (regexp "^#.*")) ;; This means a cell name cannot start with # + (quoted-cell-rx (regexp "^\"([^\"]*)\" (.*)$")) (cell-rx (regexp "^(\\S+) (.*)$")) ;; One space only for the cellname content separator (blank-rx (regexp "^\\s*$")) (continue-rx (regexp ".*\\\\$")) (var-no-val-rx (regexp "^(\\S+)\\s*$")) (inp (open-input-file fname)) @@ -251,10 +255,13 @@ (if (not first-section) (set! first-section sname)) (loop (read-line inp) sname res))) + (quoted-cell-rx (x k v)(loop (read-line inp) + section + (cons (list k section v) res))) (cell-rx (x k v) (loop (read-line inp) section (cons (list k section v) res))) (var-no-val-rx (x k) (loop (read-line inp) section @@ -263,10 +270,11 @@ (print "ERROR: Unrecognised line in input file " fname ", ignoring it") (loop (read-line inp) section res)))))))) (define (get-value-type val expressions) (cond + ((not val) '(ValueType "60")) ((string->number val) '(ValueType "40")) ((equal? val "") '(ValueType "60")) ((equal? (substring val 0 1) "=") (let ((exid (hash-table-ref/default expressions val #f))) (if exid @@ -454,13 +462,29 @@ (exit))) (let* ((dbname (pathname-strip-directory path)) (tmpf (conc (create-temporary-file dbname) ".gnumeric"))) (if (file-exists? (conc path "/sheet-names.cfg")) (refdb-export path tmpf)) - (let ((pid (process-run "gnumeric" (list tmpf)))) - (process-wait pid) - (import-gnumeric-file tmpf path)))) + (let* ((pid (process-run "gnumeric" (list tmpf)))) + (let loop ((last-mod-time (current-seconds))) + (let-values (((pid-code exit-status exit-signal)(process-wait pid #t))) + (if (eq? pid-code 0) ;; still going + (if (file-exists? tmpf) + (let ((mod-time (file-modification-time tmpf))) + (if (> mod-time last-mod-time) + (begin + (print "saved data to " path) + (import-gnumeric-file tmpf path))) + (thread-sleep! 0.5) + (loop mod-time)) + (begin + (thread-sleep! 0.5) + (loop last-mod-time)))))) + ;; all done + (print "all done, writing new data to " path) + (import-gnumeric-file tmpf path) + (print "data written, exiting refdb edit.")))) ;;====================================================================== ;; This routine dispaches or executes most of the commands for refdb ;;====================================================================== ;; Index: utils/Makefile.installall ================================================================== --- utils/Makefile.installall +++ utils/Makefile.installall @@ -1,31 +1,28 @@ -# Copyright 2013, Matthew Welland. +# Copyright 2013,2014 Matthew Welland. # # This program is made available under the GNU GPL version 2.0 or # greater. See the accompanying file COPYING for details. # # This program is distributed WITHOUT ANY WARRANTY; without even the # implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR # PURPOSE. +# make PREFIX=/mfs/pkgs/chicken/chicken-core all + help : @echo You may need to do the following first: @echo sudo apt-get install libreadline-dev - @echo sudo apt-get install libwebkitgtk-dev - @echo sudo apt-get install libmotif3 -OR- set KTYPE=26g4 - @echo KTYPE can be 26, 26g4, or 32 - @echo KTYPE=$KTYPE - @echo You are using PREFIX=$PREFIX - @echo You are using proxy="$(proxy)" - @echo If needed set proxy to host.dom:port - @echo - @echo "Set additional_libpath to help find gtk or other libraries, don't forget a leading :" - @echo ADDITIONAL_LIBPATH=$(ADDITIONAL_LIBPATH) - @echo - @echo To use previous IUP libraries set USEOLDIUP to yes - @echo USEOLDIUP=$(USEOLDIUP) + @echo sudo apt-get install libwebkitgtk-dev libfreetype6-dev libx11-dev libxpm-dev libxmu-dev libxft-dev libgtk2.0-dev libgl1-mesa-dev libglu1-mesa-dev libpangox-1.0-dev bison + @echo sudo apt-get install libmotif3 + @echo For IUP set IUPBRANCH, currently $(IUPBRANCH) + @echo You are using PREFIX=$(PREFIX) + @echo You are using PROXY="$(PROXY)" + @echo If needed set PROXY to host.dom:port + @echo http_proxy=$(http_proxy) + @echo PROX=$(PROX) @echo @echo To make all do: make all # Put the installation here ifeq ($(PREFIX),) @@ -33,29 +30,34 @@ endif # Set this on the command line of your make call if needed: make PROXY=host.com:1234 PROXY= -# Select IUP library type -KTYPE=26g4 - # Select version of chicken, sqlite3 etc -CHICKEN_VERSION=4.8.0 -SQLITE3_VERSION=3071401 +CHICKEN_VERSION=4.9.0.1 +SQLITE3_VERSION=3080500 +# http://www.sqlite.org/2014/sqlite-autoconf-3080500.tar.gz + +# Override IUPBRANCH to use other than trunk +IUPBRANCH=iup-3.10.1 # Eggs to install (straightforward ones) -EGGS=matchable readline apropos base64 regex-literals format regex-case test coops trace csv dot-locking posix-utils posix-extras directory-utils hostinfo tcp-server rpc csv-xml fmt json md5 awful http-client spiffy uri-common intarweb spiffy-request-vars +EGGS=matchable readline apropos base64 regex-literals format regex-case test coops trace csv \ + dot-locking posix-utils posix-extras directory-utils hostinfo tcp-server rpc csv-xml fmt \ + json md5 awful http-client spiffy uri-common intarweb spiffy-request-vars \ + spiffy-directory-listing ssax sxml-serializer sxml-modifications sql-de-lite \ + srfi-19 refdb ini-file # # Derived variables # ifeq ($(PROXY),) -PROX= +PROX:= else -http_proxy=http://$(PROXY) -PROX="-proxy $(PROXY)" +http_proxy:=http://$(PROXY) +PROX:=-proxy $(PROXY) endif BUILDHOME=$(PWD) PATH:=$(PREFIX)/bin:$(PATH) LIBPATH=$(PREFIX)/lib$(ADDITIONAL_LIBPATH) @@ -77,27 +79,30 @@ ARCHSIZE= else ARCHSIZE=64_ endif -IUPFILES=cd-5.5.1_Linux$(KTYPE)_$(ARCHSIZE)lib.tar.gz im-3.8_Linux$(KTYPE)_$(ARCHSIZE)lib.tar.gz iup-3.6_Linux$(KTYPE)_$(ARCHSIZE)lib.tar.gz CSCLIBS=$(shell echo $(LD_LIBRARY_PATH) | sed 's/:/ -L/g') -CSC_OPTIONS=-I$(PREFIX)/include -L$(CSCLIBS) +CSC_OPTIONS="-I$(PREFIX)/include -L$(CSCLIBS)" +# CSC_OPTIONS=-I$(PREFIX)/include -L$(CSCLIBS) -all : chkn eggs iup +all : chkn eggs libiup logprobin $(PREFIX)/lib/sqlite3.so $(PREFIX)/bin/hs chkn : $(CHICKEN_INSTALL) eggs : $(EGGSOFILES) -sqlite3 : $(CHICKEN_EGG_DIR)/sqlite3.so +libiup : $(PREFIX)/lib/libavcall.a $(CHICKEN_EGG_DIR)/iup.so $(CHICKEN_EGG_DIR)/canvas-draw.so + +logprobin : $(PREFIX)/bin/logpro -iup : $(PREFIX)/lib/libavcall.a $(CHICKEN_EGG_DIR)/iup.so $(CHICKEN_EGG_DIR)/canvas-draw.so +$(PREFIX)/bin/logpro : $(CHICKEN_EGG_DIR)/regex-literals.so + $(CHICKEN_INSTALL) logpro # Silly rule to make installing eggs more makeish, I don't understand why I need the basename -$(CHICKEN_EGG_DIR)/%.so : %.flag - $(CHICKEN_INSTALL) $(PROX) $(shell basename $*) +$(CHICKEN_EGG_DIR)/%.so : eggflags/%.flag + $(CHICKEN_INSTALL) $(PROX) -keep-installed $(shell basename $*) $(EGGFLAGS) : # $(CHICKEN_INSTALL) mkdir -p eggflags touch $(EGGFLAGS) @@ -106,79 +111,113 @@ setup-chicken4x.sh : $(EGGFLAGS) (echo "export PATH=$(PATH)" > setup-chicken4x.sh) (echo "export LD_LIBRARY_PATH=$(LD_LIBRARY_PATH)" >> setup-chicken4x.sh) mkdir -p $(PREFIX) -# Download chicken source -chicken-$(CHICKEN_VERSION).tar.gz : - wget http://code.call-cc.org/releases/$(CHICKEN_VERSION)/chicken-$(CHICKEN_VERSION).tar.gz - -# NB// Must touch csi.scm since tar puts original date on it and deps are wrong then -chicken-$(CHICKEN_VERSION)/csi.scm : chicken-$(CHICKEN_VERSION).tar.gz - tar xfvz chicken-$(CHICKEN_VERSION).tar.gz - touch -c chicken-$(CHICKEN_VERSION)/csi.scm - -$(CHICKEN_INSTALL) : chicken-$(CHICKEN_VERSION)/csi.scm setup-chicken4x.sh - cd chicken-$(CHICKEN_VERSION);make PLATFORM=linux PREFIX=$(PREFIX) - cd chicken-$(CHICKEN_VERSION);make PLATFORM=linux PREFIX=$(PREFIX) install - -sqlite-autoconf-$(SQLITE3_VERSION).tar.gz : - wget http://www.sqlite.org/sqlite-autoconf-$(SQLITE3_VERSION).tar.gz - -sqlite-autoconf-$(SQLITE3_VERSION) : sqlite-autoconf-$(SQLITE3_VERSION).tar.gz - tar xfz sqlite-autoconf-$(SQLITE3_VERSION).tar.gz - -$(PREFIX)/bin/sqlite3 : sqlite-autoconf-$(SQLITE3_VERSION) - (cd sqlite-autoconf-$(SQLITE3_VERSION);./configure --prefix=$(PREFIX);make;make install) - -$(CHICKEN_EGG_DIR)/sqlite3.so : $(PREFIX)/bin/sqlite3 +chicken-core/chicken.scm : chicken-$(CHICKEN_VERSION).tar.gz + tar xfz chicken-$(CHICKEN_VERSION).tar.gz + ln -sf chicken-$(CHICKEN_VERSION) chicken-core + + +chicken-4.9.0rc1.tar.gz : + wget http://code.call-cc.org/dev-snapshots/2014/04/17/chicken-4.9.0rc1.tar.gz + +chicken-4.9.0.1.tar.gz : + wget http://code.call-cc.org/releases/4.9.0/chicken-4.9.0.1.tar.gz + +# git clone git://code.call-cc.org/chicken-core +# git clone http://code.call-cc.org/git/chicken-core.git + +$(CHICKEN_INSTALL) : chicken-core/chicken.scm setup-chicken4x.sh + cd chicken-core;make PLATFORM=linux PREFIX=$(PREFIX) + cd chicken-core;make PLATFORM=linux PREFIX=$(PREFIX) install + +#====================================================================== +# S Q L I T E 3 +#====================================================================== + +sqlite-autoconf-$(SQLITE3_VERSION).tar.gz : + wget http://www.sqlite.org/2014/sqlite-autoconf-$(SQLITE3_VERSION).tar.gz + +sqlite-autoconf-$(SQLITE3_VERSION)/config.log : sqlite-autoconf-$(SQLITE3_VERSION).tar.gz + tar xfz sqlite-autoconf-$(SQLITE3_VERSION).tar.gz + +$(PREFIX)/bin/sqlite3 : sqlite-autoconf-$(SQLITE3_VERSION)/config.log + cd sqlite-autoconf-$(SQLITE3_VERSION);./configure --prefix=$(PREFIX);make;make install + +$(PREFIX)/lib/sqlite3.so : $(PREFIX)/bin/sqlite3 CSC_OPTIONS="-I$(PREFIX)/include -L$(PREFIX)/lib" $(CHICKEN_INSTALL) $(PROX) sqlite3 +#====================================================================== +# N A N O M S G +#====================================================================== + +nanomsg-0.5-beta.tar.gz : + wget http://download.nanomsg.org/nanomsg-0.5-beta.tar.gz + +nanomsg-0.5-beta/COPYING : nanomsg-0.5-beta.tar.gz + tar xfvz nanomsg-0.5-beta.tar.gz + +$(PREFIX)/bin/nanocat : nanomsg-0.5-beta/COPYING + cd nanomsg-0.5-beta;./configure --prefix=$(PREFIX);make;make install + +$(PREFIX)/lib/nanomsg.so : $(PREFIX)/bin/nanocat + CSC_OPTIONS="-I$(PREFIX)/include -L$(PREFIX)/lib" $(CHICKEN_INSTALL) $(PROX) nanomsg + +#====================================================================== +# M A T T S U T I L S +#====================================================================== + +opensrc.fossil : + fossil clone http://www.kiatoa.com/fossils/opensrc opensrc.fossil -# Get and install my various utilities that haven't been eggified yet. -opensrc/margs/margs.scm opensrc/dbi/dbi.scm opensrc/qtree/qtree.scm : $(CHICKEN_INSTALL) $(CHICKEN_EGG_DIR)/sqlite3.so +opensrc/histstore/histstore.scm : opensrc.fossil mkdir -p opensrc - cd opensrc;if [ ! -e opensrc.fossil ]; then fossil clone http://www.kiatoa.com/fossils/opensrc opensrc.fossil; fi - cd opensrc;if [ -e dbi/dbi.scm ]; then fossil update; else fossil open opensrc.fossil; fi - -$(CHICKEN_EGG_DIR)/dbi.so : opensrc/dbi/dbi.scm - cd opensrc/dbi;chicken-install - -$(CHICKEN_EGG_DIR)/margs.so : opensrc/margs/margs.scm - cd opensrc/margs;chicken-install - -$(CHICKEN_EGG_DIR)/qtree.so : opensrc/qtree/qtree.scm - cd opensrc/qtree;chicken-install - -# $(CHICKEN_EGG_DIR)/dbi.so # Don't include as requires postgres -mattseggs : $(CHICKEN_EGG_DIR)/margs.so $(CHICKEN_EGG_DIR)/qtree.so - -# -# IUP -# - -ffcall.tar.gz : - wget http://www.kiatoa.com/matt/iup/ffcall.tar.gz - -ffcall/README : ffcall.tar.gz - tar xfvz ffcall.tar.gz - touch -c ffcall/README - + cd opensrc;fossil open ../opensrc.fossil + +opensrc/histstore/hs : opensrc/histstore/histstore.scm chkn eggs $(PREFIX)/lib/sqlite3.so + cd opensrc/histstore;$(PREFIX)/bin/csc histstore.scm -o hs + +$(PREFIX)/bin/hs : opensrc/histstore/hs + cp -f opensrc/histstore/hs $(PREFIX)/bin/hs + +#====================================================================== +# I U P +#====================================================================== + +ffcall.fossil : + fossil clone http://www.kiatoa.com/fossils/ffcall ffcall.fossil + +ffcall/README : ffcall.fossil + mkdir -p ffcall + cd ffcall && if [ -e README ];then fossil update; else fossil open ../ffcall.fossil; fi + +# NOTE: This worked fine *without* the enable-shared +# $(PREFIX)/lib/libavcall.a : ffcall/README cd ffcall;./configure --prefix=$(PREFIX) --enable-shared && make && make install -$(IUPFILES) : - wget http://www.kiatoa.com/matt/iup/$@ - cd $(PREFIX)/lib;tar xfvz $(BUILDHOME)/$@ - mv $(PREFIX)/lib/include/* $(PREFIX)/include +iuplib.fossil : + fossil clone http://www.kiatoa.com/fossils/iuplib iuplib.fossil + +iup/installall.sh : iuplib.fossil + mkdir -p iup + cd iup && if [ -e installall.sh ];then fossil update $(IUPBRANCH); else fossil open ../iuplib.fossil;fossil update $(IUPBRANCH); fi + +iup/alldone : iup/installall.sh $(PREFIX)/include/iup.h $(PREFIX)/lib/libiup.so + cd iup && ./makeall.sh + +$(PREFIX)/lib/libiup.so $(PREFIX)/include/iup.h : iup/installall.sh iup/alldone + cd iup && ./installall.sh -$(PREFIX)/lib/libiup.so : $(IUPFILES) - touch -c $(PREFIX)/lib/libiup.so +# $(PREFIX)/lib/libiup.so : iup/iup/alldone +# touch -c $(PREFIX)/lib/libiup.so $(CHICKEN_EGG_DIR)/iup.so : $(PREFIX)/lib/libiup.so - $(CHICKEN_INSTALL) $(PROX) -D no-library-checks -feature disable-iup-web iup + LD_LIBRARY_PATH=$(LD_LIBRARY_PATH) CSC_OPTIONS=$(CSC_OPTIONS) $(CHICKEN_INSTALL) $(PROX) -D no-library-checks -feature disable-iup-web iup $(CHICKEN_EGG_DIR)/canvas-draw.so : $(PREFIX)/lib/libiup.so - $(CHICKEN_INSTALL) $(PROX) -D no-library-checks canvas-draw + CSC_OPTIONS=$(CSC_OPTIONS) $(CHICKEN_INSTALL) $(PROX) -D no-library-checks canvas-draw + clean : rm -rf chicken-4.8.0 eggflags ffcall sqlite-autoconf-$(SQLITE3_VERSION) ADDED utils/installck.sh Index: utils/installck.sh ================================================================== --- /dev/null +++ utils/installck.sh @@ -0,0 +1,22 @@ +#!/bin/bash + +myhome=$(dirname $0) + +if [[ $proxy == "" ]]; then + echo 'Please set the environment variable "proxy" to host.com:port (e.g. foo.com:1234) to use a proxy' + echo PROX="" +else + export http_proxy=http://$proxy + export PROX="-proxy $proxy" +fi + +if [[ -z $PREFIX ]];then + echo "\$PREFIX variable is required" + exit +fi + +export LD_LIBRARY_NAME=$PREFIX/lib + +logname=$(basename $PREFIX) + +script -c "make -f $myhome/Makefile_latest.installall all" $logname.log Index: utils/loadrunner ================================================================== --- utils/loadrunner +++ utils/loadrunner @@ -1,9 +1,42 @@ #!/bin/bash + +LOADRUNNER=$0 # load=`uptime|awk '{print $10}'|cut -d, -f1` -load=`uptime|perl -pe 's/.*: (\d+.\d+),.*/$1/'` +load=$(uptime|perl -pe 's/.*: (\d+.\d+),.*/$1/') +load2=$(uptime|perl -pe 's/.*: (\d+.\d+), (\d+.\d+),.*/$2/') +# echo "load2=$load2, load=$load" + +# Run a job detached from stdin/stdout (i.e. daemonized) +# Launch on remotehost if specified by TARGETHOST +# +function launchjob () { + # Can't always trust $PWD + CURRWD=`pwd` + if [[ $TARGETHOST_LOGF == "" ]]; then + TARGETHOST_LOGF=NBFAKE-`date +%GWW%V.%u_%T` + fi + # echo "#======================================================================" + # echo "# NBFAKE Running command:" + # echo "# \"$*\"" + # echo "#======================================================================" + + if [[ $TARGETHOST == "" ]]; then + sh -c "cd $CURRWD;export DISPLAY=$DISPLAY; export PATH=$PATH; nohup $* > $TARGETHOST_LOGF 2>&1 &" + else + ssh -n -f $TARGETHOST "sh -c \"cd $CURRWD;export DISPLAY=$DISPLAY; export PATH=$PATH; nohup $* > $TARGETHOST_LOGF 2>&1 &\"" + fi +} + +function get_delay_time () { + RANGE=$1 + number=$RANDOM + let "number %= $RANGE" + echo $number +} + if which cpucheck > /dev/null;then numcpu=`cpucheck|tail -1|awk '{print $6}'` elif which lscpu > /dev/null;then numcpu=`lscpu|grep "CPU.s.:"|awk '{print $2}'` else @@ -10,20 +43,36 @@ numcpu=2 fi # NB// max_load is in units of percent. # -lperc=`echo "100 * $load / $numcpu"|bc` +lperc=$(echo "100 * $load / $numcpu"|bc) +lperc2=$(echo "100 * $load2 / $numcpu"|bc) +let "lperc2adj=$lperc2 + $numcpu" if [[ "x$MAX_ALLOWED_LOAD" == "x" ]]; then max_load=100 else max_load=$MAX_ALLOWED_LOAD fi +lfile=/tmp/loadrunner-$USER.lockfile +lockfile -r 5 -l 60 $lfile &> /dev/null + if [[ $lperc -lt $max_load ]];then - echo "Load acceptable: lperc=$lperc %, max_load=$max_load %, load=$load, numcpu=$numcpu, MAX_ALLOWED_LOAD=$MAX_ALLOWED_LOAD %" - echo "Starting command: \"$@\"" - nbfake "$@" + if [[ $lperc -le $lperc2adj ]];then + # echo "Load acceptable: lperc=$lperc %, max_load=$max_load %, load=$load, numcpu=$numcpu, MAX_ALLOWED_LOAD=$MAX_ALLOWED_LOAD % and $lperc2 < $lperc" + # echo "Starting command: \"$@\"" + launchjob "$@" + # we sleep ten seconds here to keep the lock a little longer and give time for + # the uptime to show a response + # sleep 2 + else + echo "$LOADRUNNER $@" | at now + 2 minutes &> /dev/null + # sleep 5 + fi else # echo "Load too high: lperc=$lperc, max_load=$max_load, waiting two minutes before trying to run command: \"$@\"" - echo "loadrunner $@" | at now + 2 minutes 2> /dev/null + echo "$LOADRUNNER $@" | at now + 2 minutes &> /dev/null fi + +sleep $(get_delay_time 3) +rm -f $lfile ADDED utils/loadrunner.scm.notfinished Index: utils/loadrunner.scm.notfinished ================================================================== --- /dev/null +++ utils/loadrunner.scm.notfinished @@ -0,0 +1,192 @@ + +;; Copyright 2006-2013, Matthew Welland. +;; +;; This program is made available under the GNU GPL version 2.0 or +;; greater. See the accompanying file COPYING for details. +;; +;; This program is distributed WITHOUT ANY WARRANTY; without even the +;; implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR +;; PURPOSE. + +(use ssax) +(use sxml-serializer) +(use sxml-modifications) +(use regex) +(use srfi-69) +(use regex-case) +(use posix) +(use json) +(use csv) +(use srfi-18) +(use format) + +(require-library iup) +(import (prefix iup iup:)) +(require-library ini-file) +(import (prefix ini-file ini:)) + +(use canvas-draw) +(import canvas-draw-iup) + +(use sqlite3 srfi-1 posix regex regex-case srfi-69) +(import (prefix sqlite3 sqlite3:)) + +(include "megatest-fossil-hash.scm") + +;; +;; GLOBALS +;; +(define *loadrunner:current-tab-number* 0) +(define loadrunner:unrecognised-command "ERROR: Unrecognised command or missing params. Try \"loadrunner help\"") +(define loadrunner:help (conc "Usage: loadrunner [action [params ...]] + +Note: run loadrunner without parameters to start the gui. + + run cmd [params ..] : Run cmd params ... when system load drops + process : Process the queue + +Part of the Megatest tool suite. Learn more at http://www.kiatoa.com/fossils/megatest + +Version: " megatest-fossil-hash)) ;; " + +;;====================================================================== +;; DB +;;====================================================================== + +(define (loadrunner:initialize-db db) + (for-each + (lambda (qry) + (sqlite3:execute db qry)) + (list + "CREATE TABLE pkgs + (id INTEGER PRIMARY KEY, + cmd TEXT, + datetime TEXT);"))) + +;; Create the sqlite db +(define (loadrunner:open-db path) + (if (and path + (directory? path) + (file-read-access? path)) + (let* ((dbpath (conc path "/loadrunner.db")) + (writeable (file-write-access? dbpath)) + (dbexists (file-exists? dbpath)) + (handler (make-busy-timeout 136000))) + (handle-exceptions + exn + (begin + (debug:print 2 "ERROR: problem accessing db " dbpath + ((condition-property-accessor 'exn 'message) exn)) + (exit)) + (set! db (sqlite3:open-database dbpath))) + (if *db-write-access* (sqlite3:set-busy-handler! db handler)) + (if (not dbexists) + (begin + (loadrunner:initialize-db db))) + db))) + +;;====================================================================== +;; GUI +;;====================================================================== + +;; The main menu +(define (loadrunner:main-menu) + (iup:menu ;; a menu is a special attribute to a dialog (think Gnome putting the menu at screen top) + (iup:menu-item "Files" (iup:menu ;; Note that you can use either #:action or action: for options + (iup:menu-item "Open" action: (lambda (obj) + (iup:show (iup:file-dialog)) + (print "File->open " obj))) + (iup:menu-item "Save" #:action (lambda (obj)(print "File->save " obj))) + (iup:menu-item "Exit" #:action (lambda (obj)(exit))))) + (iup:menu-item "Tools" (iup:menu + (iup:menu-item "Create new blah" #:action (lambda (obj)(print "Tools->new blah"))) + ;; (iup:menu-item "Show dialog" #:action (lambda (obj) + ;; (show message-window + ;; #:modal? #t + ;; ;; set positon using coordinates or center, start, top, left, end, bottom, right, parent-center, current + ;; ;; #:x 'mouse + ;; ;; #:y 'mouse + ;; ) + )))) + +(define (loadrunner:publish-view) + (iup:vbox + (iup:hbox + (iup:button "Pushme" + #:expand "YES" + )))) + +(define (loadrunner:get-view) + (iup:vbox + (iup:hbox + (iup:button "Pushme" + #:expand "YES" + )))) + +(define (loadrunner:manage-view) + (iup:vbox + (iup:hbox + (iup:button "Pushme" + #:expand "YES" + )))) + +(define (loadrunner:gui) + (iup:show + (iup:dialog + #:title (conc "Loadrunner dashboard " (current-user-name) ":" (current-directory)) + #:menu (loadrunner:main-menu) + (let* ((tabs (iup:tabs + #:tabchangepos-cb (lambda (obj curr prev) + (set! *loadrunner:current-tab-number* curr)) + (loadrunner:publish-view) + (loadrunner:get-view) + (loadrunner:manage-view) + ))) + ;; (set! (iup:callback tabs tabchange-cb:) (lambda (a b c)(print "SWITCHED TO TAB: " a " " b " " c))) + (iup:attribute-set! tabs "TABTITLE0" "Publish") + (iup:attribute-set! tabs "TABTITLE1" "Get") + (iup:attribute-set! tabs "TABTITLE2" "Manage") + ;; (iup:attribute-set! tabs "BGCOLOR" "190 190 190") + tabs))) + (iup:main-loop)) + +;;====================================================================== +;; MAIN +;;====================================================================== + +(define (loadrunner:load-config path) + (let ((fname (conc path "/.loadrunner.config"))) + (ini:property-separator-patt " * *") + (ini:property-separator #\space) + (if (file-exists? fname) + (ini:read fname) + '()))) + +(define (main) + (let* ((args (argv)) + (prog (car args)) + (rema (cdr args)) + (conf (loadrunner:load-config (pathname-directory prog)))) + ;; ( ????? + (cond + ((eq? (length rema) 1) + (case (string->symbol (car rema)) + ((process)(loadrunner:process-queue)) + ((pause) + (loadrunner:pause-queue (cdr rema))) + ((help -h -help --h --help) + (print loadrunner:help)) + (else + (print loadrunner:unrecognised-command)))) + ((null? rema)(loadrunner:gui)) + ((>= (length rema) 2) + (case (string->symbol (car rema)) + ((run) + (loadrunner:process-cmd (cdr rema))) + ((remove) + (loadrunner:remove-cmds (cdr rema))) + (else + (print loadrunner:unrecognised-command)))) + (else (print loadrunner:unrecognised-command))))) + +(main) Index: utils/mk_wrapper ================================================================== --- utils/mk_wrapper +++ utils/mk_wrapper @@ -1,16 +1,26 @@ #!/bin/bash prefix=$1 cmd=$2 +target=$3 -echo "#!/bin/bash" if [ "$LD_LIBRARY_PATH" != "" ];then + cfgfile="$prefix/bin/.$(lsb_release -sr)/cfg.sh" echo "INFO: Using LD_LIBRARY_PATH=$LD_LIBRARY_PATH" >&2 - echo "export LD_LIBRARY_PATH=$LD_LIBRARY_PATH" +( cat << __EOF +if [ "\$LD_LIBRARY_PATH" != "" ];then + export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:\$LD_LIBRARY_PATH +else + export LD_LIBRARY_PATH=$LD_LIBRARY_PATH +fi +__EOF +) > $cfgfile + echo else echo "INFO: LD_LIBRARY_PATH not set" >&2 fi -fullcmd="exec $prefix/bin/$cmd" +echo "#!/bin/bash" > $target +echo "source $prefix/bin/.\$(lsb_release -sr)/cfg.sh" >> $target +echo "exec $prefix/bin/.\$(lsb_release -sr)/$cmd \"\$@\"" >> $target -echo "$fullcmd \"\$@\"" Index: utils/nbfake ================================================================== --- utils/nbfake +++ utils/nbfake @@ -1,12 +1,76 @@ #!/bin/bash +############################################################################### +# +# nbfake - capture command output in a logfile +# +# nbfake behavior can be changed by setting the following env vars: +# NBFAKE_HOST SSH to $NBFAKE_HOST and run command +# NBFAKE_LOG Logfile for nbfake output +# +############################################################################### + +if [[ -z "$@" ]]; then + cat <<__EOF + +nbfake usage: + +nbfake + +nbfake behavior can be changed by setting the following env vars: + NBFAKE_HOST SSH to \$NBFAKE_HOST and run command + NBFAKE_LOG Logfile for nbfake output + +__EOF + exit +fi -# ssh localhost "nohup $* > nbfake.log 2> nbfake.err < /dev/null" +#============================================================================== +# Setup +#============================================================================== # Can't always trust $PWD -CURRWD=`pwd` +CURRWD=$(pwd) + +# Make sure nbfake host and logfile are set. Fall back to old-style variable names + +if [[ -z "$NBFAKE_HOST" && -n "$TARGETHOST" ]]; then + MY_NBFAKE_HOST=$TARGETHOST + unset TARGETHOST +else + MY_NBFAKE_HOST=$NBFAKE_HOST + unset NBFAKE_HOST +fi + + +if [[ -z "$NBFAKE_LOG" && -n "$TARGETHOST_LOGF" ]]; then + MY_NBFAKE_LOG=$TARGETHOST_LOGF + unset TARGETHOST_LOGF +else + MY_NBFAKE_LOG=$NBFAKE_LOG + unset NBFAKE_LOG +fi + +# Set default nbfake log + +if [[ -z "$MY_NBFAKE_LOG" ]]; then + MY_NBFAKE_LOG=NBFAKE-$(date +%GWW%V.%u_%T) +fi + +#============================================================================== +# Run and log +#============================================================================== + +cat <<__EOF >&2 +#====================================================================== +# NBFAKE logging command to: $MY_NBFAKE_LOG +# $* +#====================================================================== +__EOF -if [[ $TARGETHOST == "" ]]; then - sh -c "cd $CURRWD;export DISPLAY=$DISPLAY; export PATH=$PATH; nohup $* > NBFAKE-`date +%GWW%V.%u_%T` 2>&1 &" +if [[ -z "$MY_NBFAKE_HOST" ]]; then + # Run locally + sh -c "cd $CURRWD;export DISPLAY=$DISPLAY; export PATH=$PATH; nohup $* >> $MY_NBFAKE_LOG 2>&1 &" else - ssh -n -f $TARGETHOST "sh -c \"cd $CURRWD;export DISPLAY=$DISPLAY; export PATH=$PATH; nohup $* > NBFAKE-`date +%GWW%V.%u_%T` 2>&1 &\"" + # run remotely + ssh -n -f $MY_NBFAKE_HOST "sh -c \"cd $CURRWD;export DISPLAY=$DISPLAY; export PATH=$PATH; nohup $* >> $MY_NBFAKE_LOG 2>&1 &\"" fi Index: utils/plot-code.scm ================================================================== --- utils/plot-code.scm +++ utils/plot-code.scm @@ -1,7 +1,14 @@ #!/mfs/pkgs/chicken/4.8.0.5/bin/csi -nbq +;; Coming soon (right?) Usage: plot-code file1.scm,file2.scm "fun1,fun2,x*" *.scm > plot.dot +;; Usage: plot-code file1.scm,file2.scm *.scm > plot.dot +;; dot -Tpdf plot.dot > plot.pdf +;; first param is comma separated list of files to include in the map, use - to do all +;; second param is list of regexs for functions to include in the map +;; third param is list of files to scan + (use regex srfi-69 srfi-13) (define targs #f) (define files (cddddr (argv))) @@ -15,10 +22,12 @@ (define defn-rx (regexp "^\\s*\\(define\\s+\\(([^\\s\\)]+).*")) (define all-regexs (make-hash-table)) (define all-fns '()) + +;; for the se (define (print-err . data) (with-output-to-port (current-error-port) (lambda () (apply print data)))) @@ -81,11 +90,32 @@ newres))) (if (null? tal) res (loop (car tal)(cdr tal) res))))) '())) - + +;; (define mm-header #< +;; +;; +;; MMHEADER +;; +;; (define (add-node text) +;; +;; ) +;; +;; minimal mindmap file +;; +;; +;; +;; +;; +;; +;; +;; +;; + ;; Gather the usages (print "digraph G {") (define curr-cluster-num 0) (define function-calls '()) ADDED utils/trace/trace.import.scm Index: utils/trace/trace.import.scm ================================================================== --- /dev/null +++ utils/trace/trace.import.scm @@ -0,0 +1,32 @@ +;;;; trace.import.scm - GENERATED BY CHICKEN 4.9.0.1 -*- Scheme -*- + +(eval '(import + scheme + chicken + csi + advice + extras + ports + data-structures + (except srfi-1 break) + miscmacros)) +(##sys#register-compiled-module + 'trace + (list) + '((breakpoint . trace#breakpoint) + (trace . trace#trace) + (untrace . trace#untrace) + (break . trace#break) + (unbreak . trace#unbreak) + (trace-output-port . trace#trace-output-port) + (continue . trace#continue) + (c . trace#c) + (traced? . trace#traced?) + (trace-module . trace#trace-module) + (untrace-module . trace#untrace-module) + (trace-verbose . trace#trace-verbose) + (trace/untrace . trace#trace/untrace)) + (list) + (list)) + +;; END OF FILE ADDED utils/trace/trace.meta Index: utils/trace/trace.meta ================================================================== --- /dev/null +++ utils/trace/trace.meta @@ -0,0 +1,10 @@ +;;;; trace.meta -*- Scheme -*- + + +((category tools) + (synopsis "tracing and breakpoints") + (author "felix winkelmann") + (license "public domain") + (needs advice ; don't we all? + miscmacros) + (files "tests/run.scm" "trace.meta" "trace.release-info" "trace.scm" "trace.setup") ) ADDED utils/trace/trace.scm Index: utils/trace/trace.scm ================================================================== --- /dev/null +++ utils/trace/trace.scm @@ -0,0 +1,259 @@ +;;;; trace.scm + + +(module trace (breakpoint + trace untrace + break unbreak + trace-output-port + continue c + traced? + trace-module untrace-module + trace-verbose + trace/untrace) + +(import scheme chicken csi) + +(use advice extras ports data-structures) +(require-library srfi-1) +(import (except srfi-1 break) miscmacros) + + +(define *last-breakpoint* #f) +(define *traced-procedures* '()) +(define *broken-procedures* '()) +(define *trace-indent-level* 0) + +(define trace-output-port (make-parameter (current-output-port))) +(define trace-verbose (make-parameter #t)) + +(define (break-entry name args) + ;; Does _not_ unwind! + (##sys#call-with-current-continuation + (lambda (c) + (let ((exn (##sys#make-structure + 'condition + '(exn breakpoint) + (list '(exn . message) "*** breakpoint ***" + '(exn . arguments) (list (cons name args)) + '(exn . location) name + '(exn . continuation) c) ) ) ) + (set! *last-breakpoint* exn) + (signal exn) ) ) ) ) + +(define (break-resume exn) + (let ((a (member '(exn . continuation) (##sys#slot exn 2)))) + (if a + ((cadr a) (void)) + (error "condition has no continuation" exn) ) ) ) + +(define (breakpoint #!optional (name 'breakpoint)) + (break-entry name '()) ) + +(define (trace-indent) + (let ((port (trace-output-port))) + (do ((i (fxmin 3 *trace-indent-level*) (fx- i 1))) + ((fx<= i 0)) + (write-char #\space port) ) + (fprintf port "[~a] " *trace-indent-level*) ) ) + +(define (traced-procedure-entry name args) + (let ((port (trace-output-port))) + (trace-indent) + (set! *trace-indent-level* (fx+ 1 *trace-indent-level*)) + (write (cons name args) port) + (write ", Called from: " port) + (write (conc (car (reverse (get-call-chain))))) + (write-char #\newline port) + (flush-output port) ) ) + +(define (traced-procedure-exit name results) + (let ((port (trace-output-port))) + (set! *trace-indent-level* (fx- *trace-indent-level* 1)) + (trace-indent) + (fprintf port "~a -> " name) + (if results + (for-each + (lambda (x) + (write x port) + (write-char #\space port) ) + results) + (display "(escaping)" port)) + (write-char #\newline port) + (flush-output port) ) ) + +(define (procedure-name proc) + (cond ((procedure-information proc) => + (lambda (info) + (if (pair? info) (car info) info) ) ) + (else ')) ) + +(define (do-trace procs) + (for-each + (lambda (s) + (ensure procedure? s) + (cond ((traced? s) + (warning "procedure already traced" s) ) + (else + (let ((name (procedure-name s))) + (when (trace-verbose) + (fprintf (current-error-port) "; tracing ~a~%" name)) + (set! *traced-procedures* (cons (cons s name) *traced-procedures*)) + (advise + 'around s + (lambda (next args) + (let ((results #f)) + (dynamic-wind + (cut traced-procedure-entry name args) + (lambda () + (call-with-values (cut apply next args) + (lambda rs + (set! results rs) + (apply values rs)))) + (cut traced-procedure-exit name results)))) + '*trace*))))) + procs) ) + +(define (do-untrace-all) + (define (unadvise* p) + (ignore-errors (unadvise p '*trace*))) + (for-each + (lambda (proc) + (let ((proc (car proc))) + (when (trace-verbose) + (fprintf (current-error-port) "; untracing ~a~%" (procedure-name proc)) + (unadvise* proc)))) + *traced-procedures*) + (set! *traced-procedures* '())) + +(define (do-untrace procs) + (for-each + (lambda (s) + (ensure procedure? s) + (let ((p (assq s *traced-procedures*)) + (name (procedure-name s))) + (cond ((not p) (warning "procedure not traced" name)) + (else + (when (trace-verbose) + (fprintf (current-error-port) "; untracing ~a~%" name)) + (ignore-errors (unadvise s '*trace*)) + (set! *traced-procedures* + (delete + p *traced-procedures* + eq?)))))) + procs) ) + +(define (do-break procs) + (for-each + (lambda (s) + (let ((name (procedure-name s))) + (ensure procedure? s) + (cond ((assq s *broken-procedures*) + (warning "procedure already has break-point" name)) + (else + (when (trace-verbose) + (fprintf (current-error-port) "; setting break-point in ~a~%" name)) + (set! *broken-procedures* (cons (cons s name) *broken-procedures*)) + (advise + 'before s + (lambda (args) + (break-entry name args) ) + '*break*) ) ))) + procs) ) + +(define (do-unbreak procs) + (for-each + (lambda (s) + (ensure procedure? s) + (let ((p (assq s *broken-procedures*)) + (name (procedure-name s))) + (cond ((not p) (warning "procedure has no breakpoint" name)) + (else + (when (trace-verbose) + (fprintf (current-error-port) "; removing break-point in ~a~%" name)) + (ignore-errors (unadvise s '*break*)) + (set! *broken-procedures* (delete p *broken-procedures* eq?) ) ) ) ) ) + procs) ) + +(define (do-unbreak-all) + (for-each + (lambda (bp) + (ignore-errors (unadvise (car bp) '*break*))) + *broken-procedures*) + (set! *broken-procedures* '()) + (void)) + +(define (trace . procs) + (cond ((null? procs) + (when (pair? *traced-procedures*) + (printf "Traced:~%~%") + (for-each (lambda (p) (printf " ~a~%" (cdr p))) *traced-procedures*)) ) + (else + (do-trace procs) ) ) ) + +(define (untrace . procs) + (cond ((null? procs) (do-untrace-all)) + (else (do-untrace procs))) + (void)) + +(define (break . procs) + (cond ((null? procs) + (when (pair? *broken-procedures*) + (printf "Breakpoints:~%~%") + (for-each (lambda (p) (printf " ~a~%" (cdr p))) *broken-procedures*)) ) + (else + (do-break procs) ) ) ) + +(define (unbreak . procs) + (cond ((null? procs) (do-unbreak-all)) + (else (do-unbreak procs)))) + +(define (continue #!optional (bp *last-breakpoint*)) + (cond (*last-breakpoint* + (let ((exn *last-breakpoint*)) + (set! *last-breakpoint* #f) + (break-resume exn) ) ) + (else (display "no breakpoint pending\n") ) ) ) + +(define c continue) + +(define (traced? proc) + (assq proc *traced-procedures*)) + +(define (trace/untrace . procs) + (for-each + (lambda (proc) + ((if (traced? proc) do-untrace do-trace) (list proc))) + procs)) + +(define (walk-module mname proc) + (let* ((m (##sys#find-module mname)) + (exps (nth-value 1 (##sys#module-exports m)))) + (for-each + (lambda (exp) + (let* ((realname (cdr exp)) + (prim (get realname '##core#primitive))) + (if prim + (warning "export is a core-library primitive - not traced" (car exp)) + (when (##sys#symbol-has-toplevel-binding? realname) + (let ((val (##sys#slot realname 0))) + (when (procedure? val) + (proc val))))))) + exps))) + +(define (trace-module . mnames) + (for-each + (lambda (mname) + (walk-module mname trace)) + mnames)) + +(define (untrace-module . mnames) + (for-each + (lambda (mname) + (walk-module + mname + (lambda (proc) + (when (traced? proc) + (do-untrace (list proc)))))) + mnames)) + +) ADDED utils/trace/trace.setup Index: utils/trace/trace.setup ================================================================== --- /dev/null +++ utils/trace/trace.setup @@ -0,0 +1,9 @@ +;;;; trace.setup -*- Scheme -*- + + +(compile -s trace.scm -O3 -d1 -j trace) +(compile -s trace.import.scm -O3 -d0) + +(install-extension + 'trace + '("trace.so" "trace.import.so")) ADDED widgets.scm Index: widgets.scm ================================================================== --- /dev/null +++ widgets.scm @@ -0,0 +1,189 @@ +(require-library srfi-4 iup) +(import srfi-4 iup iup-pplot iup-glcanvas) ;; iup-web + +(define (popup dlg . args) + (apply show dlg #:modal? 'yes args) + (destroy! dlg)) + +(define (properties ih) + (popup (element-properties-dialog ih)) + 'default) + +(define dlg + (dialog + (vbox + (hbox ; headline + (fill) + (frame (label " Inspect control and dialog classes " + fontsize: 15)) + (fill) + margin: '0x0) + + (label "") + (label "Dialogs" fontsize: 12) + (hbox + (button "dialog" + action: (lambda (self) (properties (dialog (vbox))))) + (button "color-dialog" + action: (lambda (self) (properties (color-dialog)))) + (button "file-dialog" + action: (lambda (self) (properties (file-dialog)))) + (button "font-dialog" + action: (lambda (self) (properties (font-dialog)))) + (button "message-dialog" + action: (lambda (self) (properties (message-dialog)))) + (fill) + margin: '0x0) + (hbox + (button "layout-dialog" + action: (lambda (self) (properties (layout-dialog)))) + (button "element-properties-dialog" + action: (lambda (self) + (properties + (element-properties-dialog (create 'user))))) + (fill) + margin: '0x0) + + (label "") + (label "Composition widgets" fontsize: 12) + (hbox + (button "fill" + action: (lambda (self) (properties (fill)))) + (button "hbox" + action: (lambda (self) (properties (hbox)))) + (button "vbox" + action: (lambda (self) (properties (vbox)))) + (button "zbox" + action: (lambda (self) (properties (zbox)))) + (button "radio" + action: (lambda (self) (properties (radio (vbox))))) + (button "normalizer" + action: (lambda (self) (properties (normalizer)))) + (button "cbox" + action: (lambda (self) (properties (cbox)))) + (button "sbox" + action: (lambda (self) (properties (sbox (vbox))))) + (button "split" + action: (lambda (self) (properties (split (vbox) (vbox))))) + (fill) + margin: '0x0) + + (label "") + (label "Standard widgets" fontsize: 12) + (hbox + (button "button" + action: (lambda (self) (properties (button)))) + (button "canvas" + action: (lambda (self) (properties (canvas)))) + (button "frame" + action: (lambda (self) (properties (frame)))) + (button "label" + action: (lambda (self) (properties (label)))) + (button "listbox" + action: (lambda (self) (properties (listbox)))) + (button "progress-bar" + action: (lambda (self) (properties (progress-bar)))) + (button "spin" + action: (lambda (self) (properties (spin)))) + (fill) + margin: '0x0) + (hbox + (button "tabs" + action: (lambda (self) (properties (tabs)))) + (button "textbox" + action: (lambda (self) (properties (textbox)))) + (button "toggle" + action: (lambda (self) (properties (toggle)))) + (button "treebox" + action: (lambda (self) (properties (treebox)))) + (button "valuator" + action: (lambda (self) (properties (valuator "")))) + (fill) + margin: '0x0) + + (label "") + (label "Additional widgets" fontsize: 12) + (hbox + (button "cells" + action: (lambda (self) (properties (cells)))) + (button "color-bar" + action: (lambda (self) (properties (color-bar)))) + (button "color-browser" + action: (lambda (self) (properties (color-browser)))) + (button "dial" + action: (lambda (self) (properties (dial "")))) + (button "matrix" + action: (lambda (self) (properties (matrix)))) + (fill) + margin: '0x0) + (hbox + (button "pplot" + action: (lambda (self) (properties (pplot)))) + (button "glcanvas" + action: (lambda (self) (properties (glcanvas)))) + ;; (button "web-browser" + ;; action: (lambda (self) (properties (web-browser)))) + (fill) + margin: '0x0) + + (label "") + (label "Menu widgets" fontsize: 12) + (hbox + (button "menu" + action: (lambda (self) (properties (menu)))) + (button "menu-item" + action: (lambda (self) (properties (menu-item)))) + (button "menu-separator" + action: (lambda (self) (properties (menu-separator)))) + (fill) + margin: '0x0) + + (label "") + (label "Images" fontsize: 12) + (hbox + (button "image/palette" + action: (lambda (self) + (properties + (image/palette 1 1 (u8vector->blob (u8vector 0)))))) + (button "image/rgb" + action: (lambda (self) + (properties + (image/rgb 1 1 (u8vector->blob (u8vector 0)))))) + (button "image/rgba" + action: (lambda (self) + (properties + (image/rgba 1 1 (u8vector->blob (u8vector 0)))))) + (button "image/file" + action: (lambda (self) + (properties + ;; same attributes as image/palette + (image/palette 1 1 (u8vector->blob (u8vector 0)))))) + ;; needs a file in current directory + ;(image/file "chicken.ico")))) ; ok + ;(image/file "chicken.png")))) ; doesn't work + (fill) + margin: '0x0) + + (label "") + (label "Other widgets" fontsize: 12) + (hbox + (button "clipboard" + action: (lambda (self) (properties (clipboard)))) + (button "timer" + action: (lambda (self) (properties (timer)))) + (button "spinbox" + action: (lambda (self) (properties (spinbox (vbox))))) + (fill) + margin: '0x0) + + (fill) + (button "E&xit" + expand: 'horizontal + action: (lambda (self) 'close)) + ) + margin: '15x15 + title: "Iup inspector")) + +(show dlg) +(main-loop) +(exit 0)

State stats