Index: Makefile ================================================================== --- Makefile +++ Makefile @@ -21,28 +21,16 @@ PREFIX=$(PWD) CSCOPTS= INSTALL=install SRCFILES = -# common.scm items.scm launch.scm ods.scm runconfig.scm \ -# server.scm configf.scm db.scm keys.scm margs.scm \ -# process.scm runs.scm tasks.scm tests.scm genexample.scm \ -# http-transport.scm filedb.scm tdb.scm client.scm mt.scm \ -# ezsteps.scm lock-queue.scm sdb.scm rmt.scm api.scm \ -# subrun.scm portlogger.scm archive.scm env.scm \ -# diff-report.scm cgisetup/models/pgdb.scm - # module source files -# MSRCFILES = -# ftail.scm rmtmod.scm commonmod.scm removed MSRCFILES = autoload.scm dbi.scm ducttape-lib.scm pkts.scm stml2.scm \ - cookie.scm mutils.scm mtargs.scm apimod.scm commonmod.scm \ - dbmod.scm rmtmod.scm debugprint.scm mtver.scm csv-xml.scm \ - servermod.scm hostinfo.scm adjutant.scm - -# commonmod.scm dbmod.scm adjutant.scm ulex.scm \ -# rmtmod.scm apimod.scm + cookie.scm mutils.scm mtargs.scm apimod.scm \ + configfmod.scm commonmod.scm dbmod.scm rmtmod.scm \ + debugprint.scm mtver.scm csv-xml.scm servermod.scm \ + hostinfo.scm adjutant.scm processmod.scm GUISRCF = dashboard-context-menu.scm dashboard-tests.scm \ dashboard-guimonitor.scm gutils.scm dcommon.scm tree.scm \ vg.scm @@ -59,14 +47,20 @@ mofiles/%.o : %.scm mkdir -p mofiles csc $(CSCOPTS) -J -c $< -o mofiles/$*.o # module dependencies -mofiles/stml2.o : mofiles/dbi.o +mofiles/stml2.o : mofiles/cookie.o mofiles/dbi.o mofiles/dbi.o : mofiles/autoload.o + mofiles/apimod.o mofiles/dbmod.o mofiles/rmtmod.o : mofiles/commonmod.o -mofiles/commonmod.o : mofiles/mtargs.o mofiles/debugprint.o mofiles/megatest-version.o + +mofiles/commonmod.o : mofiles/mtargs.o mofiles/debugprint.o \ + mofiles/megatest-version.o mofiles/processmod.o \ + mofiles/configfmod.o + +dashboard.o megatest.o : db_records.scm megatest-fossil-hash.scm ADTLSCR=mt_laststep mt_runstep mt_ezstep HELPERS=$(addprefix $(PREFIX)/bin/,$(ADTLSCR)) DEPLOYHELPERS=$(addprefix deploytarg/,$(ADTLSCR)) MTESTHASH=$(shell fossil info|grep checkout:| awk '{print $$2}') @@ -86,12 +80,12 @@ PNGFILES = $(shell cd docs/manual;ls *png) # all : $(PREFIX)/bin/.$(ARCHSTR) mtest dboard mtut ndboard all : $(PREFIX)/bin/.$(ARCHSTR) mtest dboard mtut tcmt -mtest: $(OFILES) readline-fix.scm megatest.o $(MOFILES) $(MOIMPFILES) megatest-version.scm - csc $(CSCOPTS) $(OFILES) $(MOFILES) $(MOIMPFILES) megatest.o -o mtest +mtest: readline-fix.scm megatest.scm $(MOFILES) $(MOIMPFILES) megatest-version.scm + csc $(CSCOPTS) $(OFILES) $(MOFILES) $(MOIMPFILES) megatest.scm -o mtest showmtesthash: @echo $(MTESTHASH) dboard : $(OFILES) $(GOFILES) dashboard.scm $(MOFILES) $(MOIMPFILES) megatest-version.scm megatest-fossil-hash.scm @@ -152,67 +146,14 @@ $(PREFIX)/share/db/mt-pg.sql : mt-pg.sql mkdir -p $(PREFIX)/share/db $(INSTALL) mt-pg.sql $(PREFIX)/share/db/mt-pg.sql -# Special dependencies for the includes -$(MOFILE) $(MOIMPFILES) : megatest-fossil-hash.scm - -# common.o : mofiles/commonmod.o megatest-fossil-hash.scm -mofiles/dbmod.o : mofiles/configfmod.o - -# commonmod.o dashboard.o megatest.o tcmt.o apimod.o : megatest-fossil-hash.scm - -tests.o db.o launch.o runs.o dashboard-tests.o \ -dashboard-context-menu.o dashboard-guimonitor.o dashboard-main.o \ -monitor.o dashboard.o archive.o megatest.o : db_records.scm megatest-fossil-hash.scm - -tests.o runs.o dashboard.o dashboard-tests.o dashboard-context-menu.o dashboard-main.o : run_records.scm - -db.o ezsteps.o keys.o launch.o megatest.o monitor.o runs-for-ref.o runs.o tests.o : key_records.scm - -tests.o tasks.o dashboard-tasks.o : task_records.scm - -runs.o : test_records.scm - -megatest.o : megatest-fossil-hash.scm megatest-version.scm *scm - -rmt.scm client.scm common.scm configf.scm dashboard-guimonitor.scm dashboard-tests.scm dashboard.scm db.scm dcommon.scm ezsteps.scm fs-transport.scm http-transport.scm index-tree.scm items.scm keys.scm launch.scm megatest.scm monitor.scm mt.scm newdashboard.scm runconfig.scm runs.scm server.scm tdb.scm tests.scm tree.scm : common_records.scm megatest-version.scm - -common_records.scm : altdb.scm - -# mofiles/stml2.o : mofiles/cookie.o -# configf.o : mofiles/commonmod.o - -vg.o dashboard.o : vg_records.scm megatest-version.scm - -dcommon.o : run_records.scm - -mofiles/stml2.o : mofiles/cookie.o - -# # special include based modules -# mofiles/pkts.o : pkts/pkts.scm -# mofiles/stml2.o : cookie.o -# # mofiles/mtargs.o : mtargs/mtargs.scm -# # mofiles/mtconfigf.o : mtconfigf/mtconfigf.scm -# mofiles/ulex.o : ulex/ulex.scm -# mofiles/mutils.o : mutils/mutils.scm -# mofiles/cookie.o : stml2/cookie.scm -# mofiles/stml2.o : stml2/stml2.scm - -# Temporary while transitioning to new routine -# runs.o : run-tests-queue-classic.scm run-tests-queue-new.scm - -# for the modularized stuff -mofiles/rmtmod.o : mofiles/commonmod.o - megatest-fossil-hash.scm : $(SRCFILES) megatest.scm *_records.scm echo "(define megatest-fossil-hash \"$(MTESTHASH)\")" > megatest-fossil-hash.new if ! diff -q megatest-fossil-hash.new megatest-fossil-hash.scm ; then echo copying .new to .scm;cp -f megatest-fossil-hash.new megatest-fossil-hash.scm;fi -$(OFILES) $(GOFILES) : common_records.scm - %.o : %.scm $(MOFILES) csc $(CSCOPTS) -c $< $(MOFILES) $(PREFIX)/bin/.$(ARCHSTR)/mtest : mtest utils/mk_wrapper @echo Installing to PREFIX=$(PREFIX) @@ -390,32 +331,10 @@ #====================================================================== # vg_records.scm : records.sh # ./records.sh -#====================================================================== -# Deploy section (not complete yet) -#====================================================================== - -$(DEPLOYHELPERS) : utils/mt_* - $(INSTALL) $< $@ - chmod a+X $@ - -deploytarg/apropos.so : Makefile - chicken-install -p deploytarg -deploy -keep-installed $(EGGS) - -deploy : deploytarg/mtest deploytarg/dboard $(DEPLOYHELPERS) deploytarg/nbfake deploytarg/remrun deploytarg/viewsceen deploytarg/nbfind deploytarg/apropos.so - -# puts deployed megatest in directory "megatest" -deploytarg/mtest : $(OFILES) megatest.o deploytarg/apropos.so - csc -deploy $(CSCOPTS) $(OFILES) megatest.scm -o deploytarg - mv deploytarg/deploytarg deploytarg/mtest - -deploytarg/dboard : $(OFILES) $(GOFILES) dashboard.scm deploytarg/apropos.so - csc -deploy $(OFILES) $(GOFILES) dashboard.scm -o deploytarg - mv deploytarg/deploytarg deploytarg/dboard - datashare-testing/sd : datashare.scm $(OFILES) csc $(CSCOPTS) datashare.scm $(OFILES) -o datashare-testing/sd datashare-testing/sdat: sharedat.scm $(OFILES) csc $(CSCOPTS) sharedat.scm $(OFILES) -o datashare-testing/sdat @@ -442,45 +361,26 @@ rm datashare-testing/sretrieve rm datashare-testing/spublish sauth : sauth-init datashare-testing/sauthorize datashare-testing/sretrieve datashare-testing/spublish -readline-fix.scm : - if [[ $(shell chicken-status | grep readline | awk '{print $4}' | cut -d. -f1) -gt 3 ]];then \ - echo "(define *use-new-readline* #f)" > readline-fix.scm; \ - else \ - echo "(define *use-new-readline* #t)" > readline-fix.scm;\ - fi - altdb.scm : echo ";; optional alternate db setup" > altdb.scm echo "(define *available-db* (make-hash-table))" >> altdb.scm - if csi -ne '(use mysql-client)';then \ - echo "(use mysql-client)(hash-table-set! *available-db* 'mysql #t)" >> altdb.scm; \ + if csi -ne '(import mysql-client)';then \ + echo "(import mysql-client)(hash-table-set! *available-db* 'mysql #t)" >> altdb.scm; \ fi - if csi -ne '(use postgresql)';then \ - echo "(use postgresql)(hash-table-set! *available-db* 'postgresql #t)" >> altdb.scm;\ + if csi -ne '(import postgresql)';then \ + echo "(import postgresql)(hash-table-set! *available-db* 'postgresql #t)" >> altdb.scm;\ fi portlogger-example : portlogger-example.scm api.o archive.o client.o common.o configf.o dashboard-tests.o dashboard-context-menu.o db.o dcommon.o ezsteps.o filedb.o genexample.o gutils.o http-transport.o items.o keys.o launch.o lock-queue.o margs.o mt.o ods.o portlogger.o process.o rmt.o runconfig.o runs.o sdb.o server.o synchash.o tasks.o tdb.o tests.o tree.o csc $(CSCOPTS) portlogger-example.scm api.o archive.o client.o common.o configf.o dashboard-tests.o dashboard-context-menu.o db.o dcommon.o ezsteps.o filedb.o genexample.o gutils.o http-transport.o items.o keys.o launch.o lock-queue.o margs.o mt.o ods.o portlogger.o process.o rmt.o runconfig.o runs.o sdb.o server.o synchash.o tasks.o tdb.o tests.o tree.o -# create a pdf dot graphviz diagram from notations in rmt.scm -rmt.pdf : rmt.scm - grep ';;DOT' rmt.scm | sed -e 's/.*;;DOT //' > rmt.dot;dot -Tpdf rmt.dot -o rmt.pdf - buildmanual: cd docs/manual && make -wikipage=plan -editwiki: - cd docs/manual && ../../utils/editwiki $(wikipage) - -viewmanual: - arora docs/manual/megatest_manual.html - targets: @grep : Makefile | perl -ne '/^([A-Za-z0-9_-]+):/ && print "$$1\n"' - unit : cd tests;make unit Index: client.scm ================================================================== --- client.scm +++ client.scm @@ -94,11 +94,11 @@ (server-id (caddr (cddr server-dat)))) (debug:print-info 4 *default-log-port* "client:setup server-dat=" server-dat ", remaining-tries=" remaining-tries) (if (and (not area-dat) (not *runremote*)) (begin - (set! *runremote* (make-remote)) + (set! *runremote* (make-and-init-remote)) (let* ((server-info (remote-server-info *runremote*))) (if server-info (begin (remote-server-url-set! *runremote* (server:record->url server-info)) (remote-server-id-set! *runremote* (server:record->id server-info))))))) Index: common.scm ================================================================== --- common.scm +++ common.scm @@ -43,3075 +43,64 @@ ;; (if (null? code) ;; (old-exit) ;; (old-exit code))) -;; execute thunk, return value. If exception thrown, trap exception, return #f, and emit nonfatal condition note to *default-log-port* . -;; arguments - thunk, message -(define (common:fail-safe thunk warning-message-on-exception) - (handle-exceptions - exn - (begin - (debug:print-info 0 *default-log-port* "notable but nonfatal condition - "warning-message-on-exception", exn=" exn) - (debug:print-info 0 *default-log-port* - (string-substitute "\n?Error:" "nonfatal condition:" - (with-output-to-string - (lambda () - (print-error-message exn) )))) - (debug:print-info 0 *default-log-port* " -- continuing after nonfatal condition...") - #f) - (thunk))) - -(define getenv get-environment-variable) -(define (safe-setenv key val) - (if (or (substring-index "!" key) - (substring-index ":" key) ;; variables containing : are for internal use and cannot be environment variables. - (substring-index "." key)) ;; periods are not allowed in environment variables - (debug:print-error 4 *default-log-port* "skip setting internal use only variables containing \":\" or starting with \"!\"") - (if (and (string? val) - (string? key)) - (handle-exceptions - exn - (debug:print-error 0 *default-log-port* "bad value for setenv, key=" key ", value=" val ", exn=" exn) - (setenv key val)) - (debug:print-error 0 *default-log-port* "bad value for setenv, key=" key ", value=" val)))) - -(define home (getenv "HOME")) -(define user (getenv "USER")) - - -;; returns list of fd count, socket count -(define (get-file-descriptor-count #!key (pid (current-process-id ))) - (list - (length (glob (conc "/proc/" pid "/fd/*"))) - (length (filter identity (map socket? (glob (conc "/proc/" pid "/fd/*"))))) - ) -) - - - -;; GLOBALS - -;; CONTEXTS -(defstruct cxt - (taskdb #f) - (cmutex (make-mutex))) -;; (define *contexts* (make-hash-table)) -;; (define *context-mutex* (make-mutex)) - -;; ;; safe method for accessing a context given a toppath -;; ;; -;; (define (common:with-cxt toppath proc) -;; (mutex-lock! *context-mutex*) -;; (let ((cxt (hash-table-ref/default *contexts* toppath #f))) -;; (if (not cxt) -;; (set! cxt (let ((x (make-cxt)))(hash-table-set! *contexts* toppath x) x))) -;; (let ((cxt-mutex (cxt-mutex cxt))) -;; (mutex-unlock! *context-mutex*) -;; (mutex-lock! cxt-mutex) -;; (let ((res (proc cxt))) -;; (mutex-unlock! cxt-mutex) -;; res)))) - -;; A hash table that can be accessed by #{scheme ...} calls in -;; config files. Allows communicating between confgs -;; -(define *user-hash-data* (make-hash-table)) - -(define *db-keys* #f) - -(define *pkts-info* (make-hash-table)) ;; store stuff like the last parent here -(define *configinfo* #f) ;; raw results from setup, includes toppath and table from megatest.config -(define *runconfigdat* #f) ;; run configs data -(define *configdat* #f) ;; megatest.config data -(define *configstatus* #f) ;; status of data; 'fulldata : all processing done, #f : no data yet, 'partialdata : partial read done -(define *toppath* #f) -(define *already-seen-runconfig-info* #f) - -(define *test-meta-updated* (make-hash-table)) -(define *globalexitstatus* 0) ;; attempt to work around possible thread issues -(define *passnum* 0) ;; when running track calls to run-tests or similar -;; (define *alt-log-file* #f) ;; used by -log -(define *common:denoise* (make-hash-table)) ;; for low noise printing -;; (define *default-log-port* (current-error-port)) -(define *time-zero* (current-seconds)) ;; for the watchdog -(define *default-area-tag* "local") - -;; DATABASE -(define *dbstruct-db* #f) ;; used to cache the dbstruct in db:setup. Goal is to remove this. -;; db stats -(define *db-stats* (make-hash-table)) ;; hash of vectors < count duration-total > -(define *db-stats-mutex* (make-mutex)) -;; db access -(define *db-last-access* (current-seconds)) ;; last db access, used in server -(define *db-write-access* #t) -;; db sync -(define *db-last-sync* 0) ;; last time the sync to megatest.db happened -(define *db-sync-in-progress* #f) ;; if there is a sync in progress do not try to start another -(define *db-multi-sync-mutex* (make-mutex)) ;; protect access to *db-sync-in-progress*, *db-last-sync* -;; task db -(define *task-db* #f) ;; (vector db path-to-db) -(define *db-access-allowed* #t) ;; flag to allow access -(define *db-access-mutex* (make-mutex)) -(define *db-transaction-mutex* (make-mutex)) -(define *db-cache-path* #f) -(define *db-with-db-mutex* (make-mutex)) -(define *db-api-call-time* (make-hash-table)) ;; hash of command => (list of times) -;; no sync db -(define *no-sync-db* #f) - -;; SERVER -(define *my-client-signature* #f) -(define *transport-type* 'http) ;; override with [server] transport http|rpc|nmsg -(define *runremote* #f) ;; if set up for server communication this will hold -;; (define *max-cache-size* 0) -(define *logged-in-clients* (make-hash-table)) -(define *server-id* #f) -(define *server-info* #f) ;; good candidate for easily convert to non-global -(define *time-to-exit* #f) -(define *server-run* #t) -(define *run-id* #f) -(define *server-kind-run* (make-hash-table)) -(define *home-host* #f) -;; (define *total-non-write-delay* 0) -(define *heartbeat-mutex* (make-mutex)) -(define *api-process-request-count* 0) -(define *max-api-process-requests* 0) -(define *server-overloaded* #f) -(define *writes-total-delay* 0) - -;; client -(define *rmt-mutex* (make-mutex)) ;; remote access calls mutex - -;; RPC transport -(define *rpc:listener* #f) - -;; KEY info -(define *target* (make-hash-table)) ;; cache the target here; target is keyval1/keyval2/.../keyvalN -(define *keys* (make-hash-table)) ;; cache the keys here -(define *keyvals* (make-hash-table)) -(define *toptest-paths* (make-hash-table)) ;; cache toptest path settings here -(define *test-paths* (make-hash-table)) ;; cache test-id to test run paths here -(define *test-ids* (make-hash-table)) ;; cache run-id, testname, and item-path => test-id -(define *test-info* (make-hash-table)) ;; cache the test info records, update the state, status, run_duration etc. from testdat.db - -(define *run-info-cache* (make-hash-table)) ;; run info is stable, no need to reget -(define *launch-setup-mutex* (make-mutex)) ;; need to be able to call launch:setup often so mutex it and re-call the real deal only if *toppath* not set -(define *homehost-mutex* (make-mutex)) - -;; Miscellaneous -(define *triggers-mutex* (make-mutex)) ;; block overlapping processing of triggers -(define *numcpus-cache* (make-hash-table)) - -;; (use posix-extras pathname-expand files) - -;; this plugs a hole in posix-extras in recent chicken versions > 4.9) -;; (let-values (( (chicken-release-number chicken-major-version) -;; (apply values -;; (map string->number -;; (take -;; (string-split (chicken-version) ".") -;; 2))))) -;; (let ((resolve-pathname-broken? -;; (or (> chicken-release-number 4) -;; (and (eq? 4 chicken-release-number) (> chicken-major-version 9))))) -;; (if resolve-pathname-broken? -;; (define ##sys#expand-home-path pathname-expand)))) - -;; (define (realpath x) (resolve-pathname (pathname-expand (or x "/dev/null")) )) -;; (define (realpath x)(pathname-expand (or x "/dev/null")) ) -(define (realpath x) - (with-input-from-pipe - (string-append "readlink -f \""x"\"") - read-line)) - -(define (common:get-this-exe-fullpath #!key (argv (argv))) - (let* ((this-script - (cond - ((and (> (length argv) 2) - (string-match "^(.*/csi|csi)$" (car argv)) - (string-match "^-(s|ss|sx|script)$" (cadr argv))) - (caddr argv)) - (else (car argv)))) - (fullpath (realpath this-script))) - fullpath)) - -;;====================================================================== - -(define *common:this-exe-fullpath* (common:get-this-exe-fullpath)) -(define *common:this-exe-dir* (pathname-directory *common:this-exe-fullpath*)) -(define *common:this-exe-name* (pathname-strip-directory *common:this-exe-fullpath*)) - -(define (common:get-sync-lock-filepath) - (let* ((tmp-area (common:get-db-tmp-area)) - (lockfile (conc tmp-area "/megatest.db.sync-lock"))) - lockfile)) - -;;====================================================================== -;; when called from a wrapper I need sometimes to find the calling -;; wrapper, this is for dashboard to find the correct megatest. -;; -(define (common:find-local-megatest #!optional (progname "megatest")) - (let ((res (filter file-exists? - (map (lambda (updir) - (let* ((lm (car (argv))) - (dir (pathname-directory lm)) - (exe (pathname-strip-directory lm))) - (conc (if dir (conc dir "/") "") - (case (string->symbol exe) - ((dboard) (conc updir progname)) - ((mtest) (conc updir progname)) - ((dashboard) progname) - (else exe))))) - '("../../" "../"))))) - (if (null? res) - (begin - (debug:print 0 *default-log-port* "Failed to find this executable! Using what can be found on the path") - progname) - (car res)))) - -(define *common:logpro-exit-code->status-sym-alist* - '( ( 0 . pass ) - ( 1 . fail ) - ( 2 . warn ) - ( 3 . check ) - ( 4 . waived ) - ( 5 . abort ) - ( 6 . skip ))) - -(define (common:logpro-exit-code->status-sym exit-code) - (or (alist-ref exit-code *common:logpro-exit-code->status-sym-alist*) 'fail)) - -(define (common:worse-status-sym ss1 ss2) - (let loop ((status-syms-remaining '(abort fail check skip warn waived pass))) - (cond - ((null? status-syms-remaining) - 'fail) - ((eq? (car status-syms-remaining) ss1) - ss1) - ((eq? (car status-syms-remaining) ss2) - ss2) - (else - (loop (cdr status-syms-remaining)))))) - -(define (common:steps-can-proceed-given-status-sym status-sym) - (if (member status-sym '(warn waived pass)) - #t - #f)) - -(define (status-sym->string status-sym) - (case status-sym - ((pass) "PASS") - ((fail) "FAIL") - ((warn) "WARN") - ((check) "CHECK") - ((waived) "WAIVED") - ((abort) "ABORT") - ((skip) "SKIP") - (else "FAIL"))) - -(define (common:logpro-exit-code->test-status exit-code) - (status-sym->string (common:logpro-exit-code->status-sym exit-code))) - -(defstruct remote - (hh-dat (common:get-homehost)) ;; homehost record ( addr . hhflag ) - (server-url #f) ;; (server:check-if-running *toppath*) #f)) - (server-id #f) - (server-info (if *toppath* (server:check-if-running *toppath*) #f)) - (last-server-check 0) ;; last time we checked to see if the server was alive - (conndat #f) - (transport *transport-type*) - (server-timeout (server:expiration-timeout)) - (force-server #f) - (ro-mode #f) - (ro-mode-checked #f)) ;; flag that indicates we have checked for ro-mode - -;; launching and hosts -(defstruct host - (reachable #f) - (last-update 0) - (last-used 0) - (last-cpuload 1)) - -(define *host-loads* (make-hash-table)) - -;; cache environment vars for each run here -(define *env-vars-by-run-id* (make-hash-table)) - -;; Testconfig and runconfig caches. -(define *testconfigs* (make-hash-table)) ;; test-name => testconfig -(define *runconfigs* (make-hash-table)) ;; target => runconfig - -;; This is a cache of pre-reqs met, don't re-calc in cases where called with same params less than -;; five seconds ago -(define *pre-reqs-met-cache* (make-hash-table)) - -;; cache of verbosity given string -;; -(define *verbosity-cache* (make-hash-table)) - -(define (common:clear-caches) - (set! *target* (make-hash-table)) - (set! *keys* (make-hash-table)) - (set! *keyvals* (make-hash-table)) - (set! *toptest-paths* (make-hash-table)) - (set! *test-paths* (make-hash-table)) - (set! *test-ids* (make-hash-table)) - (set! *test-info* (make-hash-table)) - (set! *run-info-cache* (make-hash-table)) - (set! *env-vars-by-run-id* (make-hash-table)) - (set! *test-id-cache* (make-hash-table))) - -;; Generic string database -(define sdb:qry #f) ;; (make-sdb:qry)) ;; 'init #f) -;; Generic path database -(define *fdb* #f) - -(define *last-launch* (current-seconds)) ;; use for throttling the launch rate. Would be better to use the db and last time of a test in LAUNCHED state. - -;;====================================================================== -;; V E R S I O N -;;====================================================================== - -(define (common:get-full-version) - (conc megatest-version "-" megatest-fossil-hash)) - -(define (common:version-signature) - (conc megatest-version "-" (substring megatest-fossil-hash 0 4))) - -;;====================================================================== -;; from metadat lookup MEGATEST_VERSION -;; -(define (common:get-last-run-version) ;; RADT => How does this work in send-receive function??; assume it is the value saved in some DB - (rmt:get-var "MEGATEST_VERSION")) - -(define (common:get-last-run-version-number) - (string->number - (substring (common:get-last-run-version) 0 6))) - -(define (common:set-last-run-version) - (rmt:set-var "MEGATEST_VERSION" (common:version-signature))) - -;;====================================================================== -;; postive number if megatest version > db version -;; negative number if megatest version < db version -(define (common:version-db-delta) - (- megatest-version (common:get-last-run-version-number))) - -(define (common:version-changed?) - (not (equal? (common:get-last-run-version) - (common:version-signature)))) - -(define (common:api-changed?) - (not (equal? (substring (->string megatest-version) 0 4) - (substring (conc (common:get-last-run-version)) 0 4)))) - -;;====================================================================== -;; Move me elsewhere ... -;; RADT => Why do we meed the version check here, this is called only if version misma -;; -(define (common:cleanup-db dbstruct #!key (full #f)) - (apply db:multi-db-sync - dbstruct - 'schema - ;; 'new2old - 'killservers - 'adj-target - ;; 'old2new - 'new2old - ;; (if full - '(dejunk) - ;; '()) - ) - (if (common:api-changed?) - (common:set-last-run-version))) - -(define (common:snapshot-file filepath #!key (subdir ".") ) - (if (file-exists? filepath) - (let* ((age-sec (lambda (file) - (if (file-exists? file) - (- (current-seconds) (file-modification-time file)) - 1000000000))) ;; return really old value if file doesn't exist. we want to clobber it if old or not exist. - (ok-flag #t) - (age-mins (lambda (file) (/ (age-sec file) 60))) - (age-hrs (lambda (file) (/ (age-mins file) 60))) - (age-days (lambda (file) (/ (age-hrs file) 24))) - (age-wks (lambda (file) (/ (age-days file) 7))) - (docmd (lambda (cmd) - (cond - (ok-flag - (let ((res (system cmd))) - (cond - ((eq? 0 res) - #t) - (else - (set! ok-flag #f) - (debug:print 0 *default-log-port* "ERROR: ["(common:human-time)"] Command failed with exit code " - (if (< res 0) - res - (/ res 8)) " ["cmd"]" ) - #f)))) - (else - (debug:print 0 *default-log-port* "ERROR: ["(common:human-time)"] Not runnining command due to prior error. ["cmd"]") - #f)))) - (copy (lambda (src dest) (docmd (conc "/bin/cp '"src"' '"dest"'")))) - (copy+zip (lambda (src dest) (docmd (conc "gzip -c - < '"src"' > '"dest"'")))) - (fullpath (realpath filepath)) - (basedir (pathname-directory fullpath)) - (basefile (pathname-strip-directory fullpath)) - ;;(prevfile (conc filepath ".prev.gz")) - (minsfile (conc basedir "/" subdir "/" basefile ".mins.gz")) - (hrsfile (conc basedir "/" subdir "/" basefile ".hrs.gz")) - (daysfile (conc basedir "/" subdir "/" basefile ".days.gz")) - (wksfile (conc basedir "/" subdir "/" basefile ".weeks.gz"))) - - ;; create subdir it not exists - (if (not (directory-exists? (conc basedir "/" subdir))) - (docmd (conc "/bin/mkdir -p '"(conc basedir "/" subdir)"'"))) - - ;; copy&zip to .mins if not exists - (if (not (file-exists? minsfile)) - (copy+zip filepath minsfile)) - ;; copy .mins to .hrs if not exists - (if (not (file-exists? hrsfile)) - (copy minsfile hrsfile)) - ;; copy .hrs to .days if not exists - (if (not (file-exists? daysfile)) - (copy hrsfile daysfile)) - ;; copy .days to .weeks if not exists - (if (not (file-exists? wksfile)) - (copy daysfile wksfile)) - - - ;; if age(.mins.gz) >= 1h: - ;; copy .mins.gz .hrs.gz - ;; copy .mins.gz - (when (>= (age-mins minsfile) 1) - (copy minsfile hrsfile) - (copy+zip filepath minsfile)) - - ;; if age(.hrs.gz) >= 1d: - ;; copy .hrs.gz .days.gz - ;; copy .mins.gz .hrs.gz - (when (>= (age-days hrsfile) 1) - (copy hrsfile daysfile) - (copy minsfile hrsfile)) - - ;; if age(.days.gz) >= 1w: - ;; copy .days.gz .weeks.gz - ;; copy .hrs.gz .days.gz - (when (>= (age-wks daysfile) 1) - (copy daysfile wksfile) - (copy hrsfile daysfile)) - #t) - #f)) - -;;====================================================================== -;; Rotate logs, logic: -;; if > 500k and older than 1 week: -;; remove previous compressed log and compress this log -;; WARNING: This proc operates assuming that it is in the directory above the -;; logs directory you wish to log-rotate. -;; -(define (common:rotate-logs) - (let* ((all-files (make-hash-table)) - (stats (make-hash-table)) - (inc-stat (lambda (key) - (hash-table-set! stats key (+ (hash-table-ref/default stats key 0) 1)))) - (max-allowed (string->number (or (configf:lookup *configdat* "setup" "max-logfiles") "300")))) ;; name -> age - (if (not (directory-exists? "logs"))(create-directory "logs")) - (directory-fold - (lambda (file rem) - (handle-exceptions - exn - (begin - (debug:print-info 2 *default-log-port* "unable to rotate log " file ", probably handled by another process, this is safe to ignore. exn=" exn) - (debug:print 2 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) - ;; (print-call-chain (current-error-port)) ;; - ) - (let* ((fullname (conc "logs/" file)) - (mod-time (file-modification-time fullname)) - (file-age (- (current-seconds) mod-time)) - (file-old (> file-age (* 48 60 60))) - (file-big (> (file-size fullname) 200000))) - (hash-table-set! all-files file mod-time) - (if (or (and (string-match "^.*.log" file) - file-old - file-big) - (and (string-match "^server-.*.log" file) - file-old)) - (let ((gzfile (conc fullname ".gz"))) - (if (common:file-exists? gzfile) - (begin - (debug:print-info 0 *default-log-port* "removing " gzfile) - (delete-file* gzfile) - (hash-table-delete! all-files gzfile) ;; needed? - )) - (debug:print-info 0 *default-log-port* "compressing " file) - (system (conc "gzip " fullname)) - (inc-stat "gzipped") - (hash-table-set! all-files (conc file ".gz") file-age) ;; add the .gz file and remove the base file - (hash-table-delete! all-files file) - ) - (if (and (> file-age (* (string->number (or (configf:lookup *configdat* "setup" "log-expire-days") "30")) 24 3600)) - (file-exists? fullname)) ;; just in case it was gzipped - will get it next time - (handle-exceptions - exn - #f - (if (directory? fullname) - (begin - (debug:print-info 0 *default-log-port* fullname " in logs directory is a directory! Cannot rotate it, it is best to not put subdirectories in the logs dir.") - (inc-stat "directories")) - (begin - (delete-file* fullname) - (inc-stat "deleted"))) - (hash-table-delete! all-files file))))))) - '() - "logs") - (for-each - (lambda (category) - (let ((quant (hash-table-ref/default stats category 0))) - (if (> quant 0) - (debug:print-info 0 *default-log-port* category " log files: " quant)))) - `("deleted" "gzipped" "directories")) - (let ((num-logs (hash-table-size all-files))) - (if (> num-logs max-allowed) ;; because NFS => don't let number of logs exceed 300 - (let ((files (take (sort (hash-table-keys all-files) - (lambda (a b) - (< (hash-table-ref all-files a)(hash-table-ref all-files b)))) - (- num-logs max-allowed)))) - (for-each - (lambda (file) - (let* ((fullname (conc "logs/" file))) - (if (directory? fullname) - (debug:print-info 0 *default-log-port* fullname " in logs directory is a directory! Cannot rotate it, it is best to not put subdirectories in the logs dir.") - (handle-exceptions - exn - (debug:print-error 0 *default-log-port* "failed to remove " fullname ", exn=" exn) - (delete-file* fullname))))) - files) - (debug:print-info 0 *default-log-port* "Deleted " (length files) " files from logs, keeping " max-allowed " files.")))))) - -;;====================================================================== -;; Force a megatest cleanup-db if version is changed and skip-version-check not specified -;; Do NOT check if not on homehost! -;; -(define (common:exit-on-version-changed) - (if (common:on-homehost?) - (if (common:api-changed?) - (let* ((mtconf (conc (get-environment-variable "MT_RUN_AREA_HOME") "/megatest.config")) - (dbfile (conc (get-environment-variable "MT_RUN_AREA_HOME") "/megatest.db")) - (read-only (not (file-writable? dbfile))) - (dbstruct (db:setup #t))) - (debug:print 0 *default-log-port* - "WARNING: Version mismatch!\n" - " expected: " (common:version-signature) "\n" - " got: " (common:get-last-run-version)) - (cond - ((get-environment-variable "MT_SKIP_DB_MIGRATE") #t) - ((and (common:file-exists? mtconf) (common:file-exists? dbfile) (not read-only) - (eq? (current-user-id)(file-owner mtconf))) ;; safe to run -cleanup-db - (debug:print 0 *default-log-port* " I see you are the owner of megatest.config, attempting to cleanup and reset to new version") - (handle-exceptions - exn - (begin - (debug:print 0 *default-log-port* "Failed to switch versions. exn=" exn) - (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) - (print-call-chain (current-error-port)) - (exit 1)) - (common:cleanup-db dbstruct))) - ((not (common:file-exists? mtconf)) - (debug:print 0 *default-log-port* " megatest.config does not exist in this area. Cannot proceed with megatest version migration.") - (exit 1)) - ((not (common:file-exists? dbfile)) - (debug:print 0 *default-log-port* " megatest.db does not exist in this area. Cannot proceed with megatest version migration.") - (exit 1)) - ((not (eq? (current-user-id)(file-owner mtconf))) - (debug:print 0 *default-log-port* " You do not own megatest.db in this area. Cannot proceed with megatest version migration.") - (exit 1)) - (read-only - (debug:print 0 *default-log-port* " You have read-only access to this area. Cannot proceed with megatest version migration.") - (exit 1)) - (else - (debug:print 0 *default-log-port* " to switch versions you can run: \"megatest -cleanup-db\"") - (exit 1))))))) -;;====================================================================== -;; (begin -;; (debug:print 0 *default-log-port* "ERROR: cannot migrate version unless on homehost. Exiting.") -;; (exit 1)))) - -;;====================================================================== -;; S P A R S E A R R A Y S -;;====================================================================== - -(define (make-sparse-array) - (let ((a (make-sparse-vector))) - (sparse-vector-set! a 0 (make-sparse-vector)) - a)) - -(define (sparse-array? a) - (and (sparse-vector? a) - (sparse-vector? (sparse-vector-ref a 0)))) - -(define (sparse-array-ref a x y) - (let ((row (sparse-vector-ref a x))) - (if row - (sparse-vector-ref row y) - #f))) - -(define (sparse-array-set! a x y val) - (let ((row (sparse-vector-ref a x))) - (if row - (sparse-vector-set! row y val) - (let ((new-row (make-sparse-vector))) - (sparse-vector-set! a x new-row) - (sparse-vector-set! new-row y val))))) - -;;====================================================================== -;; L O C K E R S A N D B L O C K E R S -;;====================================================================== - -;; block further accesses to databases. Call this before shutting db down -(define (common:db-block-further-queries) - (mutex-lock! *db-access-mutex*) - (set! *db-access-allowed* #f) - (mutex-unlock! *db-access-mutex*)) - -(define (common:db-access-allowed?) - (let ((val (begin - (mutex-lock! *db-access-mutex*) - *db-access-allowed* - (mutex-unlock! *db-access-mutex*)))) - val)) - -;;====================================================================== -;; U S E F U L S T U F F -;;====================================================================== - -;; convert things to an alist or assoc list, #f gets converted to "" -;; -(define (common:to-alist dat) - (cond - ((list? dat) (map common:to-alist dat)) - ((vector? dat) - (map common:to-alist (vector->list dat))) - ((pair? dat) - (cons (common:to-alist (car dat)) - (common:to-alist (cdr dat)))) - ((hash-table? dat) - (map common:to-alist (hash-table->alist dat))) - (else - (if dat - dat - "")))) - -(define (common:alist-ref/default key alist default) - (or (alist-ref key alist) default)) - -(define (common:low-noise-print waitval . keys) - (let* ((key (string-intersperse (map conc keys) "-" )) - (lasttime (hash-table-ref/default *common:denoise* key 0)) - (currtime (current-seconds))) - (if (> (- currtime lasttime) waitval) - (begin - (hash-table-set! *common:denoise* key currtime) - #t) - #f))) - -(define (common:get-megatest-exe) - (or (getenv "MT_MEGATEST") "megatest")) - -(define (common:read-encoded-string instr) - (handle-exceptions - exn - (handle-exceptions - exn - (begin - (debug:print-error 0 *default-log-port* "received bad encoded string \"" instr "\", message: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn) - (print-call-chain (current-error-port)) - #f) - (read (open-input-string (base64:base64-decode instr)))) - (read (open-input-string (z3:decode-buffer (base64:base64-decode instr)))))) - -;;====================================================================== -;; S T A T E S A N D S T A T U S E S -;;====================================================================== - -;; BBnote: *common:std-states* - dashboard filter control and test control state buttons defined here; used in set-fields-panel and dboard:make-controls -(define *common:std-states* ;; for toggle buttons in dashboard - '( - (0 "ARCHIVED") - (1 "STUCK") - (2 "KILLREQ") - (3 "KILLED") - (4 "NOT_STARTED") - (5 "COMPLETED") - (6 "LAUNCHED") - (7 "REMOTEHOSTSTART") - (8 "RUNNING") - )) - -(define *common:dont-roll-up-states* - '("DELETED" - "REMOVING" - "CLEANING" - "ARCHIVE_REMOVING" - )) - -;;====================================================================== -;; BBnote: *common:std-statuses* dashboard filter control and test control status buttons defined here; used in set-fields-panel and dboard:make-controls -;; note these statuses are sorted from better to worse. -;; This sort order is important to dcommon:status-compare3 and db:set-state-status-and-roll-up-items -(define *common:std-statuses* - '(;; (0 "DELETED") - (1 "n/a") - (2 "PASS") - (3 "SKIP") - (4 "WARN") - (5 "WAIVED") - (6 "CHECK") - (7 "STUCK/DEAD") - (8 "DEAD") - (9 "FAIL") - (10 "PREQ_FAIL") - (11 "PREQ_DISCARDED") - (12 "ABORT"))) - -(define *common:ended-states* ;; states which indicate the test is stopped and will not proceed - '("COMPLETED" "ARCHIVED" "KILLED" "KILLREQ" "STUCK" "INCOMPLETE" )) - -(define *common:badly-ended-states* ;; these roll up as CHECK, i.e. results need to be checked - '("KILLED" "KILLREQ" "STUCK" "INCOMPLETE" "DEAD")) - -(define *common:well-ended-states* ;; an item's prereq in this state allows item to proceed - '("PASS" "WARN" "CHECK" "WAIVED" "SKIP")) - -;; BBnote: *common:running-states* used from db:set-state-status-and-roll-up-items -(define *common:running-states* ;; test is either running or can be run - '("RUNNING" "REMOTEHOSTSTART" "LAUNCHED" "STARTED")) - -(define *common:cant-run-states* ;; These are stopping conditions that prevent a test from being run - '("COMPLETED" "KILLED" "UNKNOWN" "INCOMPLETE" "ARCHIVED")) - -(define *common:not-started-ok-statuses* ;; if not one of these statuses when in not_started state treat as dead - '("n/a" "na" "PASS" "FAIL" "WARN" "CHECK" "WAIVED" "DEAD" "SKIP")) - -;;====================================================================== -;; group tests into buckets corresponding to rollup -;;; Running, completed-pass, completed-non-pass + worst status, not started. -;; filter out -;(define (common:categorize-items-for-rollup in-tests) -; ( - -(define (common:special-sort items order comp) - (let ((items-order (map reverse order)) - (acomp (or comp >))) - (sort items - (lambda (a b) - (let ((a-num (cadr (or (assoc a items-order) '(0 0)))) - (b-num (cadr (or (assoc b items-order) '(0 0))))) - (acomp a-num b-num)))))) - -;;====================================================================== -;; ;; given a toplevel with currstate, currstatus apply state and status -;; ;; => (newstate . newstatus) -;; (define (common:apply-state-status currstate currstatus state status) -;; (let* ((cstate (string->symbol (string-downcase currstate))) -;; (cstatus (string->symbol (string-downcase currstatus))) -;; (sstate (string->symbol (string-downcase state))) -;; (sstatus (string->symbol (string-downcase status))) -;; (nstate #f) -;; (nstatus #f)) -;; (set! nstate -;; (case cstate -;; ((completed not_started killed killreq stuck archived) -;; (case sstate ;; completed -> sstate -;; ((completed killed killreq stuck archived) completed) -;; ((running remotehoststart launched) running) -;; (else unknown-error-1))) -;; ((running remotehoststart launched) -;; (case sstate -;; ((completed killed killreq stuck archived) #f) ;; need to look at all items -;; ((running remotehoststart launched) running) -;; (else unknown-error-2))) -;; (else unknown-error-3))) -;; (set! nstatus -;; (case sstatus -;; ((pass) -;; (case nstate -;; ((pass n/a deleted) pass) -;; ((warn) warn) -;; ((fail) fail) -;; ((check) check) -;; ((waived) waived) -;; ((skip) skip) -;; ((stuck/dead) stuck) -;; ((abort) abort) -;; (else unknown-error-4))) -;; ((warn) -;; (case nstate -;; ((pass warn n/a skip deleted) warn) -;; ((fail) fail) -;; ((check) check) -;; ((waived) waived) -;; ((stuck/dead) stuck) -;; (else unknown-error-5))) -;; ((fail) -;; (case nstate -;; ((pass warn fail check n/a waived skip deleted stuck/dead stuck) fail) -;; ((abort) abort) -;; (else unknown-error-6))) -;; (else unknown-error-7))) -;; (cons -;; (if nstate (symbol->string nstate) nstate) -;; (if nstatus (symbol->string nstatus) nstatus)))) - -;;====================================================================== -;; D E B U G G I N G S T U F F -;;====================================================================== - -(define *verbosity* 1) -(define *logging* #f) - -(define (get-with-default val default) - (let ((val (args:get-arg val))) - (if val val default))) - -(define (assoc/default key lst . default) - (let ((res (assoc key lst))) - (if res (cadr res)(if (null? default) #f (car default))))) - -(define (common:get-testsuite-name) - (or (configf:lookup *configdat* "setup" "area-name") ;; megatest is a flexible tool, testsuite is too limiting a description. - (configf:lookup *configdat* "setup" "testsuite" ) - (getenv "MT_TESTSUITE_NAME") - (pathname-file (or (if (string? *toppath* ) - (pathname-file *toppath*) - #f) - (common:get-toppath #f))) - "please-set-setup-area-name")) ;; (pathname-file (current-directory))))) - -;;====================================================================== -;; safe getting of toppath -(define (common:get-toppath areapath) - (or *toppath* - (if areapath - (begin - (set! *toppath* areapath) - (setenv "MT_RUN_AREA_HOME" areapath) - areapath) - #f) - (if (getenv "MT_RUN_AREA_HOME") - (begin - (set! *toppath* (getenv "MT_RUN_AREA_HOME")) - *toppath*) - #f) - ;; last resort, look for megatest.config - (let loop ((thepath (realpath "."))) - (if (file-exists? (conc thepath "/megatest.config")) - thepath - (if (equal? thepath "/") - (begin - (debug:print-error 0 *default-log-port* "Unable to find megatest home directory.") - #f) - (loop (pathname-directory thepath))))) - )) - -;;====================================================================== -;; redefine for future cleanup (converge on area-name, the more generic -;; -(define common:get-area-name common:get-testsuite-name) - -(define (common:get-db-tmp-area . junk) - (if *db-cache-path* - *db-cache-path* - (if *toppath* ;; common:get-create-writeable-dir - (handle-exceptions - exn - (begin - (debug:print-error 0 *default-log-port* "Couldn't create path to " *db-cache-path* ", exn=" exn) - (exit 1)) - (let* ((tsname (common:get-testsuite-name)) - (dbpath (common:get-create-writeable-dir - (list (conc "/tmp/" (current-user-name) - "/megatest_localdb/" - tsname "/" - (string-translate *toppath* "/" ".")) - (conc "/tmp/" (current-process-id) ;; just in case we have an issue with the dir by own user name - "/megatest_localdb/" - tsname - (string-translate *toppath* "/" ".")) - )))) - (set! *db-cache-path* dbpath) - dbpath)) - #f))) - -(define (common:get-area-path-signature) - (message-digest-string (md5-primitive) *toppath*)) - -(define (common:get-signature str) - (message-digest-string (md5-primitive) str)) - -;;====================================================================== -;; E X I T H A N D L I N G -;;====================================================================== - -(define (common:run-sync?) - (and (common:on-homehost?) - (args:get-arg "-server"))) - -(define (common:human-time) - (time->string (seconds->local-time (current-seconds)) "%Y-%m-%d %H:%M:%S")) - -;;====================================================================== -;; currently the primary job of the watchdog is to run the sync back to megatest.db from the db in /tmp -;; if we are on the homehost and we are a server (by definition we are on the homehost if we are a server) -;; -(define (common:readonly-watchdog dbstruct) - (let ((just-testing 0.0501)) - (thread-sleep! just-testing)) ;; (/ 1 20)) ;; 0.051) ;; delay for startup - (debug:print-info 13 *default-log-port* "common:readonly-watchdog entered.") - ;; sync megatest.db to /tmp/.../megatst.db - (let* ((sync-cool-off-duration 3) - (golden-mtdb (dbr:dbstruct-mtdb dbstruct)) - (golden-mtpath (db:dbdat-get-path golden-mtdb)) - (tmp-mtdb (dbr:dbstruct-tmpdb dbstruct)) - (tmp-mtpath (db:dbdat-get-path tmp-mtdb))) - (debug:print-info 0 *default-log-port* "Read-only periodic sync thread started.") - (let loop ((last-sync-time 0)) - (debug:print-info 13 *default-log-port* "loop top tmp-mtpath="tmp-mtpath" golden-mtpath="golden-mtpath) - (let* ((duration-since-last-sync (- (current-seconds) last-sync-time))) - (debug:print-info 13 *default-log-port* "duration-since-last-sync="duration-since-last-sync) - (if (and (not *time-to-exit*) - (< duration-since-last-sync sync-cool-off-duration)) - (thread-sleep! (- sync-cool-off-duration duration-since-last-sync))) - (if (not *time-to-exit*) - (let ((golden-mtdb-mtime (file-modification-time golden-mtpath)) - (tmp-mtdb-mtime (file-modification-time tmp-mtpath))) - (if (> golden-mtdb-mtime tmp-mtdb-mtime) - (if (< golden-mtdb-mtime (- (current-seconds) 3)) ;; file has NOT been touched in past three seconds, this way multiple servers won't fight to sync back - (let ((res (db:multi-db-sync dbstruct 'old2new))) - (debug:print-info 13 *default-log-port* "rosync called, " res " records transferred.")))) - (loop (current-seconds))) - #t))) - (debug:print-info 0 *default-log-port* "Exiting readonly-watchdog timer, *time-to-exit* = " *time-to-exit*" pid="(current-process-id)" mtpath="golden-mtpath))) - -;;====================================================================== -;; TODO: for multiple areas, we will have multiple watchdogs; and multiple threads to manage -(define (common:watchdog) - (debug:print-info 13 *default-log-port* "common:watchdog entered.") - (if (launch:setup) - (if (common:on-homehost?) - (let ((dbstruct (db:setup #t))) - (debug:print-info 13 *default-log-port* "after db:setup with dbstruct=" dbstruct) - (cond - ((dbr:dbstruct-read-only dbstruct) - (debug:print-info 13 *default-log-port* "loading read-only watchdog") - (common:readonly-watchdog dbstruct)) - (else - (debug:print-info 13 *default-log-port* "loading writable-watchdog.") - (let* ((syncer (or (configf:lookup *configdat* "server" "sync-method") "brute-force-sync"))) - (cond - ((equal? syncer "brute-force-sync") - (server:writable-watchdog-bruteforce dbstruct)) - ((equal? syncer "delta-sync") - (server:writable-watchdog-deltasync dbstruct)) - (else - (debug:print-error 0 *default-log-port* "Unknown server/sync-method specified ("syncer") - valid values are brute-force-sync and delta-sync.") - (exit 1))) - ;;(debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] Syncer started (method="syncer")") - ))) - (debug:print-info 13 *default-log-port* "watchdog done.")) - (debug:print-info 13 *default-log-port* "no need for watchdog on non-homehost")))) - - -(define (std-exit-procedure) - ;;(common:telemetry-log-close) - (on-exit (lambda () 0)) - ;;(debug:print-info 13 *default-log-port* "std-exit-procedure called; *time-to-exit*="*time-to-exit*) - (let ((no-hurry (if *time-to-exit* ;; hurry up - #f - (begin - (set! *time-to-exit* #t) - #t)))) - (debug:print-info 4 *default-log-port* "starting exit process, finalizing databases.") - (if (and no-hurry (debug:debug-mode 18)) - (rmt:print-db-stats)) - (let ((th1 (make-thread (lambda () ;; thread for cleaning up, give it five seconds - (if *dbstruct-db* (db:close-all *dbstruct-db*)) ;; one second allocated - (if *task-db* - (let ((db (cdr *task-db*))) - (if (sqlite3:database? db) - (begin - (sqlite3:interrupt! db) - (sqlite3:finalize! db #t) - ;; (vector-set! *task-db* 0 #f) - (set! *task-db* #f))))) - (http-client#close-idle-connections!) - ;; (if (and *runremote* - ;; (remote-conndat *runremote*)) - ;; (begin - ;; (http-client#close-all-connections!))) ;; for http-client - (if (not (eq? *default-log-port* (current-error-port))) - (close-output-port *default-log-port*)) - (set! *default-log-port* (current-error-port))) "Cleanup db exit thread")) - (th2 (make-thread (lambda () - (debug:print 4 *default-log-port* "Attempting clean exit. Please be patient and wait a few seconds...") - (if no-hurry - (begin - (thread-sleep! 5)) ;; give the clean up few seconds to do it's stuff - (begin - (thread-sleep! 2))) - (debug:print 4 *default-log-port* " ... done") - ) - "clean exit"))) - (thread-start! th1) - (thread-start! th2) - (thread-join! th1) - ) - ) - - 0) - -(define (std-signal-handler signum) - ;; (signal-mask! signum) - (set! *time-to-exit* #t) - ;;(debug:print-info 13 *default-log-port* "got signal "signum) - (debug:print-error 0 *default-log-port* "Received signal " signum " aaa exiting promptly") - ;; (std-exit-procedure) ;; shouldn't need this since we are exiting and it will be called anyway - (exit)) - -(define (special-signal-handler signum) - ;; (signal-mask! signum) - (set! *time-to-exit* #t) - ;;(debug:print-info 13 *default-log-port* "got signal "signum) - (debug:print-error 0 *default-log-port* "Received signal " signum " sending email befor exiting!!") - ;;TODO send email to notify admin contact listed in the config that the lisner got killed - ;; (std-exit-procedure) ;; shouldn't need this since we are exiting and it will be called anyway - (exit)) - - -(set-signal-handler! signal/int std-signal-handler) ;; ^C -(set-signal-handler! signal/term std-signal-handler) - -;; (set-signal-handler! signal/stop std-signal-handler) ;; ^Z NO, do NOT handle ^Z! - -;;====================================================================== -;; M I S C U T I L S -;;====================================================================== - -;;====================================================================== -;; convert stuff to a number if possible -(define (any->number val) - (cond - ((number? val) val) - ((string? val) (string->number val)) - ((symbol? val) (any->number (symbol->string val))) - (else #f))) - -(define (any->number-if-possible val) - (let ((num (any->number val))) - (if num num val))) - -(define (patt-list-match item patts) - (debug:print-info 8 *default-log-port* "patt-list-match item=" item " patts=" patts) - (if (and item patts) ;; here we are filtering for matches with item patterns - (let ((res #f)) ;; look through all the item-patts if defined, format is patt1,patt2,patt3 ... wildcard is % - (for-each - (lambda (patt) - (let ((modpatt (string-substitute "%" ".*" patt #t))) - (debug:print-info 10 *default-log-port* "patt " patt " modpatt " modpatt) - (if (string-match (regexp modpatt) item) - (set! res #t)))) - (string-split patts ",")) - res) - #t)) - -;;====================================================================== -;; '(print (string-intersperse (map cadr (hash-table-ref/default (read-config "megatest.config" \#f \#t) "disks" '"'"'("none" ""))) "\n"))' -(define (common:get-disks #!key (configf #f)) - (hash-table-ref/default - (or configf (read-config "megatest.config" #f #t)) - "disks" '("none" ""))) - -;;====================================================================== -;; return first command that exists, else #f -;; -(define (common:which cmds) - (if (null? cmds) - #f - (let loop ((hed (car cmds)) - (tal (cdr cmds))) - (let ((res (with-input-from-pipe (conc "which " hed) read-line))) - (if (and (string? res) - (common:file-exists? res)) - res - (if (null? tal) - #f - (loop (car tal)(cdr tal)))))))) - -(define (common:get-install-area) - (let ((exe-path (car (argv)))) - (if (common:file-exists? exe-path) - (handle-exceptions - exn - #f - (pathname-directory - (pathname-directory - (pathname-directory exe-path)))) - #f))) - -;;====================================================================== -;; return first path that can be created or already exists and is writable -;; -(define (common:get-create-writeable-dir dirs) - (if (null? dirs) - #f - (let loop ((hed (car dirs)) - (tal (cdr dirs))) - (let ((res (or (and (directory? hed) - (file-writable? hed) - hed) - (handle-exceptions - exn - (begin - (debug:print-info 0 *default-log-port* "could not create " hed - ", this might cause problems down the road. exn=" exn) - #f) - (create-directory hed #t))))) - (if (and (string? res) - (directory? res)) - res - (if (null? tal) - #f - (loop (car tal)(cdr tal)))))))) - -;;====================================================================== -;; return the youngest timestamp . filename -;; -(define (common:get-youngest glob-list) - (let ((all-files (apply append - (map (lambda (patt) - (handle-exceptions - exn - '() - (glob patt))) - glob-list)))) - (fold (lambda (fname res) - (let ((last-mod (car res)) - (curmod (handle-exceptions - exn - 0 - (file-modification-time fname)))) - (if (> curmod last-mod) - (list curmod fname) - res))) - '(0 "n/a") - all-files))) - -;;====================================================================== -;; use bash to expand a glob. Does NOT handle paths with spaces! -;; -(define (common:bash-glob instr) - (string-split - (with-input-from-pipe - (conc "/bin/bash -c \"echo " instr "\"") - read-line))) - -;;====================================================================== -;; Some safety net stuff -;;====================================================================== - -;;====================================================================== -;; return input if it is a list or return null -(define (common:list-or-null inlst #!key (ovrd #f)(message #f)) - (if (list? inlst) - inlst - (begin - (if message (debug:print-error 0 *default-log-port* message)) - (or ovrd '())))) - -;;====================================================================== -;; T A R G E T S , S T A T E , S T A T U S , -;; R U N N A M E A N D T E S T P A T T -;;====================================================================== - -;;====================================================================== -;; (map print (map car (hash-table->alist (read-config "runconfigs.config" #f #t)))) -;; -(define (common:get-runconfig-targets #!key (configf #f)) - (let ((targs (sort (map car (hash-table->alist - (or configf ;; NOTE: There is no value in using runconfig:read here. - (read-config (conc *toppath* "/runconfigs.config") - #f #t) - (make-hash-table)))) - string trynum 0) - (begin - (thread-sleep! 2) - (common:get-homehost trynum: (- trynum 1))) - #f)) - (else - (let* ((currhost (get-host-name)) - (bestadrs (server:get-best-guess-address currhost)) - ;; first look in config, then look in file .homehost, create it if not found - (homehost (or (configf:lookup *configdat* "server" "homehost" ) - (handle-exceptions - exn - (if (> trynum 0) - (let ((delay-time (* (- 5 trynum) 5))) - (mutex-unlock! *homehost-mutex*) - (debug:print 0 *default-log-port* "ERROR: ["(common:human-time)"] Failed to read .homehost file, delaying " - delay-time " seconds and trying again, message: " ((condition-property-accessor 'exn 'message) exn) - ", exn=" exn) - (thread-sleep! delay-time) - (common:get-homehost trynum: (- trynum 1))) - (begin - (mutex-unlock! *homehost-mutex*) - (debug:print 0 *default-log-port* "ERROR: ["(common:human-time) - "] Failed to read .homehost file after trying five times. Giving up and exiting, message: " - ((condition-property-accessor 'exn 'message) exn)) - (exit 1))) - (let ((hhf (conc *toppath* "/.homehost"))) - (if (common:file-exists? hhf) - (with-input-from-file hhf read-line) - (if (file-writable? *toppath*) - (begin - (with-output-to-file hhf - (lambda () - (print bestadrs))) - (begin - (mutex-unlock! *homehost-mutex*) - (car (common:get-homehost)))) - #f)))))) - (at-home (or (equal? homehost currhost) - (equal? homehost bestadrs)))) - (set! *home-host* (cons homehost at-home)) - (mutex-unlock! *homehost-mutex*) - *home-host*)))) - -;;====================================================================== -;; am I on the homehost? -;; -(define (common:on-homehost?) - (let ((hh (common:get-homehost))) - (if hh - (cdr hh) - #f))) - -;;====================================================================== -;; do we honor the caches of the config files? -;; -(define (common:use-cache?) - (let ((res #t)) ;; priority by order of evaluation - (if *configdat* ;; sillyness here. can't use setup/use-cache to know if we can use the cached files! - (if (equal? (configf:lookup *configdat* "setup" "use-cache") "no") - (set! res #f) - (if (equal? (configf:lookup *configdat* "setup" "use-cache") "yes") - (set! res #t)))) - (if (args:get-arg "-no-cache")(set! res #f)) ;; overrides setting in "setup" - (if (getenv "MT_USE_CACHE") - (if (equal? (getenv "MT_USE_CACHE") "yes") - (set! res #t) - (if (equal? (getenv "MT_USE_CACHE") "no") - (set! res #f)))) ;; overrides -no-cache switch - res)) - -;;====================================================================== -;; force use of server? -;; -(define (common:force-server?) - (let* ((force-setting (configf:lookup *configdat* "server" "force")) - (force-type (if force-setting (string->symbol force-setting) #f)) - (force-result (case force-type - ((#f) #f) - ((always) #t) - ((test) (if (args:get-arg "-execute") ;; we are in a test - #t - #f)) - (else - (debug:print 0 *default-log-port* "ERROR: Bad server force setting " force-setting ", forcing server.") - #t)))) ;; default to requiring server - (if force-result - (begin - (debug:print-info 0 *default-log-port* "forcing use of server, force setting is \"" force-setting "\".") - #t) - #f))) - -;;====================================================================== -;; M I S C L I S T S -;;====================================================================== - -;;====================================================================== -;; items in lista are matched value and position in listb -;; return the remaining items in listb or #f -;; -(define (common:list-is-sublist lista listb) - (if (null? lista) - listb ;; all items in listb are "remaining" - (if (> (length lista)(length listb)) - #f - (let loop ((heda (car lista)) - (tala (cdr lista)) - (hedb (car listb)) - (talb (cdr listb))) - (if (equal? heda hedb) - (if (null? tala) ;; we are done - talb - (loop (car tala) - (cdr tala) - (car talb) - - (cdr talb))) - #f))))) - -;;====================================================================== -;; Needed for long lists to be sorted where (apply max ... ) dies -;; -(define (common:max inlst) - (let loop ((max-val (car inlst)) - (hed (car inlst)) - (tal (cdr inlst))) - (if (not (null? tal)) - (loop (max hed max-val) - (car tal) - (cdr tal)) - (max hed max-val)))) - -;;====================================================================== -;; get min or max, use > for max and < for min, this works around the limits on apply -;; -(define (common:min-max comp lst) - (if (null? lst) - #f ;; better than an exception for my needs - (fold (lambda (a b) - (if (comp a b) a b)) - (car lst) - lst))) - -;;====================================================================== -;; get min or max, use > for max and < for min, this works around the limits on apply -;; -(define (common:sum lst) - (if (null? lst) - 0 - (fold (lambda (a b) - (+ a b)) - (car lst) - lst))) - -;;====================================================================== -;; path list to hash-table tree -;; ((a b c)(a b d)(e b c)) => ((a (b (d) (c))) (e (b (c)))) -;; -(define (common:list->htree lst) - (let ((resh (make-hash-table))) - (for-each - (lambda (inlst) - (let loop ((ht resh) - (hed (car inlst)) - (tal (cdr inlst))) - (if (hash-table-ref/default ht hed #f) - (if (not (null? tal)) - (loop (hash-table-ref ht hed) - (car tal) - (cdr tal))) - (begin - (hash-table-set! ht hed (make-hash-table)) - (loop ht hed tal))))) - lst) - resh)) - -;;====================================================================== -;; hash-table tree to html list tree -;; -;; tipfunc takes two parameters: y the tip value and path the path to that point -;; -(define (common:htree->html ht path tipfunc) - (let ((datlist (sort (hash-table->alist ht) - (lambda (a b) - (string< (car a)(car b)))))) - (if (null? datlist) - (tipfunc #f path) ;; really shouldn't get here - (s:ul - (map (lambda (x) - (let* ((levelname (car x)) - (y (cdr x)) - (newpath (append path (list levelname))) - (leaf (or (not (hash-table? y)) - (null? (hash-table-keys y))))) - (if leaf - (s:li (tipfunc y newpath)) - (s:li - (list - levelname - (common:htree->html y newpath tipfunc)))))) - datlist))))) - -;;====================================================================== -;; hash-table tree to alist tree -;; -(define (common:htree->atree ht) - (map (lambda (x) - (cons (car x) - (let ((y (cdr x))) - (if (hash-table? y) - (common:htree->atree y) - y)))) - (hash-table->alist ht))) - -;;====================================================================== -;; M U N G E D A T A I N T O N I C E F O R M S -;;====================================================================== - -;;====================================================================== -;; Generate an index for a sparse list of key values -;; ( (rowname1 colname1 val1)(rowname2 colname2 val2) ) -;; -;; => -;; -;; ( (rowname1 0)(rowname2 1)) ;; rownames -> num -;; (colname1 0)(colname2 1)) ) ;; colnames -> num -;; -;; optional apply proc to rownum colnum value -(define (common:sparse-list-generate-index data #!key (proc #f)) - (if (null? data) - (list '() '()) - (let loop ((hed (car data)) - (tal (cdr data)) - (rownames '()) - (colnames '()) - (rownum 0) - (colnum 0)) - (let* ((rowkey (car hed)) - (colkey (cadr hed)) - (value (caddr hed)) - (existing-rowdat (assoc rowkey rownames)) - (existing-coldat (assoc colkey colnames)) - (curr-rownum (if existing-rowdat rownum (+ rownum 1))) - (curr-colnum (if existing-coldat colnum (+ colnum 1))) - (new-rownames (if existing-rowdat rownames (cons (list rowkey curr-rownum) rownames))) - (new-colnames (if existing-coldat colnames (cons (list colkey curr-colnum) colnames)))) - ;; (debug:print-info 0 *default-log-port* "Processing record: " hed ) - (if proc (proc curr-rownum curr-colnum rowkey colkey value)) - (if (null? tal) - (list new-rownames new-colnames) - (loop (car tal) - (cdr tal) - new-rownames - new-colnames - (if (> curr-rownum rownum) curr-rownum rownum) - (if (> curr-colnum colnum) curr-colnum colnum) - )))))) - -;;====================================================================== -;; if it looks like a number -> convert it to a number, else return it -;; -(define (common:lazy-convert inval) - (let* ((as-num (if (string? inval)(string->number inval) #f))) - (or as-num inval))) - -;;====================================================================== -;; convert string a=1; b=2; c=a silly thing; d= -;; to '((a . 1)(b . 2)(c . "a silly thing")(d . "")) -;; -(define (common:val->alist val #!key (convert #f)) - (let ((val-list (string-split-fields ";\\s*" val #:infix))) - (if val-list - (map (lambda (x) - (let ((f (string-split-fields "\\s*=\\s*" x #:infix))) - (case (length f) - ((0) `(,#f)) ;; null string case - ((1) `(,(string->symbol (car f)))) - ((2) `(,(string->symbol (car f)) . ,(let ((inval (cadr f))) - (if convert (common:lazy-convert inval) inval)))) - (else f)))) - val-list) - '()))) - -;;====================================================================== -;; S Y S T E M S T U F F -;;====================================================================== - -;;====================================================================== -;; lazy-safe get file mod time. on any error (file not existing etc.) return 0 -;; -(define (common:lazy-modification-time fpath) - (handle-exceptions - exn - (begin - (debug:print 0 *default-log-port* "Failed to get modifcation time for " fpath ", treating it as zero. exn=" exn) - 0) - (if (file-exists? fpath) - (file-modification-time fpath) - 0))) - -;;====================================================================== -;; find timestamp of newest file associated with a sqlite db file -(define (common:lazy-sqlite-db-modification-time fpath) - (let* ((glob-list (handle-exceptions - exn - (begin - (debug:print 0 *default-log-port* "Failed to glob " fpath "*, exn=" exn) - `(,(conc "/no/such/file, message: " ((condition-property-accessor 'exn 'message) exn)))) - (glob (conc fpath "*")))) - (file-list (if (eq? 0 (length glob-list)) - '("/no/such/file") - glob-list))) - (apply max - (map - common:lazy-modification-time - file-list)))) - -;;====================================================================== -;; return a nice clean pathname made absolute -(define (common:nice-path dir) - (let ((match (string-match "^(~[^\\/]*)(\\/.*|)$" dir))) - (if match ;; using ~ for home? - (common:nice-path (conc (common:read-link-f (cadr match)) "/" (caddr match))) - (normalize-pathname (if (absolute-pathname? dir) - dir - (conc (current-directory) "/" dir)))))) - -;;====================================================================== -;; make "nice-path" available in config files and the repl -(define nice-path common:nice-path) - -(define (common:read-link-f path) - (handle-exceptions - exn - (begin - (debug:print-error 0 *default-log-port* "command \"/bin/readlink -f " path "\" failed. exn=" exn) - path) ;; just give up - (with-input-from-pipe - (conc "/bin/readlink -f " path) - (lambda () - (read-line))))) - -;;====================================================================== -;; returns *effective load* (not normalized) -;; -(define (common:get-intercept onemin fivemin) - (if (< onemin fivemin) ;; load is decreasing, just use the onemin load - onemin - (let* ((load-change (- onemin fivemin)) - (tchange (- 300 60))) - (max (+ onemin (* 60 (/ load-change tchange))) 0)))) - -;;====================================================================== -;; calculate a delay number based on a droop curve -;; inputs are: -;; - load-in, load as from uptime, NOT normalized -;; - numcpus, number of cpus, ideally use the real cpus, not threads -;; -(define (common:get-delay load-in numcpus) - (let* ((ratio (/ load-in numcpus)) - (new-option (configf:lookup *configdat* "load" "new-load-method")) - (paramstr (or (configf:lookup *configdat* "load" "exp-params") - "15 12 1281453987.9543 0.75")) ;; 5 4 10 1")) - (paramlst (map string->number (string-split paramstr)))) - (if new-option - (begin - (cond ((and (>= ratio 0) (< ratio .5)) - 0) - ((and (>= ratio 0.5) (<= ratio .9)) - (* ratio (/ 5 .9))) - ((and (> ratio .9) (<= ratio 1.1)) - (+ 5 (* (- ratio .9) (/ 55 .2)))) - ((> ratio 1.1) - 60))) - (match paramlst - ((r1 r2 s1 s2) - (debug:print 3 *default-log-port* "Using params r1=" r1 " r2=" r2 " s1=" s1 " s2=" s2) - (min (max (/ (expt r1 (* r2 s2 ratio)) s1) 0) 30)) - (else - (debug:print 0 *default-log-port* "BAD exp-params, should be \"r1 r2 s1 s2\" but got " paramstr) - 30))))) - -(define (common:print-delay-table) - (let loop ((x 0)) - (print x "," (common:get-delay x 1)) - (if (< x 2) - (loop (+ x 0.1))))) - -(define (get-cpu-load #!key (remote-host #f)) - (car (common:get-cpu-load remote-host))) - -;;====================================================================== -;; (let* ((load-res (process:cmd-run->list "uptime")) -;; (load-rx (regexp "load average:\\s+(\\d+)")) -;; (cpu-load #f)) -;; (for-each (lambda (l) -;; (let ((match (string-search load-rx l))) -;; (if match -;; (let ((newval (string->number (cadr match)))) -;; (if (number? newval) -;; (set! cpu-load newval)))))) -;; (car load-res)) -;; cpu-load)) - -;;====================================================================== -;; get values from cached info from dropping file in logs dir -;; e.g. key is host and dtype is normalized-load -;; -(define (common:get-cached-info key dtype #!key (age 10)) - (if *toppath* - (let* ((fullpath (conc *toppath* "/.sysdata/" key "-" dtype ".log")) - (delfile (lambda (exn) - (debug:print-info 1 *default-log-port* " removing bad file " fullpath ", exn=" exn) - (delete-file* fullpath) - #f))) - (if (and (file-exists? fullpath) - (file-readable? fullpath)) - (handle-exceptions - exn - (begin - (debug:print 0 *default-log-port* "failed to get cached info from " fullpath ", exn=" exn) - #f) - (debug:print 2 *default-log-port* "reading file " fullpath) - (let ((real-age (- (current-seconds) - (handle-exceptions - exn - (begin - (debug:print 1 *default-log-port* "Failed to read mod time on file " - fullpath ", using 0, exn=" exn) - 0) - (file-change-time fullpath))))) - (if (< real-age age) - (handle-exceptions - exn - (delfile exn) - (let* ((res (with-input-from-file fullpath read))) - (if (eof-object? res) - (begin - (delfile "n/a") - #f) - res))) - (begin - (debug:print-info 2 *default-log-port* "file " fullpath - " is too old (" real-age" seconds) to trust, skipping reading it") - #f)))) - (begin - (debug:print 2 *default-log-port* "not reading file " fullpath) - #f))) - #f)) - -(define (common:write-cached-info key dtype dat) - (if *toppath* - (let* ((fulldir (conc *toppath* "/.sysdata")) - (fullpath (conc fulldir "/" key "-" dtype ".log"))) - (if (not (file-exists? fulldir))(create-directory fulldir #t)) - (handle-exceptions - exn - (begin - (debug:print 0 *default-log-port* "failed to write file " fullpath ", exn=" exn) - #f) - (with-output-to-file fullpath (lambda ()(pp dat))))) - #f)) - -(define (common:raw-get-remote-host-load remote-host) - (handle-exceptions - exn - (begin - (debug:print 0 *default-log-port* "failed to ssh to " remote-host " and get loadavg. exn=" exn) - #f) ;; more specific handling of errors needed - (with-input-from-pipe - (conc "ssh " remote-host " cat /proc/loadavg") - (lambda ()(list (read)(read)(read)))))) - -;;====================================================================== -;; get cpu load by reading from /proc/loadavg, return all three values -;; -(define (common:get-cpu-load remote-host) - (handle-exceptions - exn - (begin - (debug:print 0 *default-log-port* "failed to ssh or read loadavg from host " remote-host ", exn=" exn) - '(-99 -99 -99)) - (let* ((actual-hostname (or remote-host (get-host-name) "localhost"))) - (or (common:get-cached-info actual-hostname "cpu-load") - (let ((result (if remote-host - (map (lambda (res) - (if (eof-object? res) 9e99 res)) - (with-input-from-pipe - (conc "ssh " remote-host " cat /proc/loadavg") - (lambda ()(list (read)(read)(read))))) - (with-input-from-file "/proc/loadavg" - (lambda ()(list (read)(read)(read))))))) - (match - result - ((l1 l2 l3) - (if (and (number? l1) - (number? l2) - (number? l3)) - (begin - (common:write-cached-info actual-hostname "cpu-load" result) - result) - '(-1 -1 -1))) ;; -1 is bad result - (else '(-2 -2 -2)))))))) - -;;====================================================================== -;; get normalized cpu load by reading from /proc/loadavg and /proc/cpuinfo return all three values and the number of real cpus and the number of threads -;; returns alist '((adj-cpu-load . normalized-proc-load) ... etc. -;; keys: adj-proc-load, adj-core-load, 1m-load, 5m-load, 15m-load -;; -(define (common:get-normalized-cpu-load remote-host) - (let ((res (common:get-normalized-cpu-load-raw remote-host)) - (default `((adj-proc-load . 2) ;; there is no right answer - (adj-core-load . 2) - (1m-load . 2) - (5m-load . 0) ;; causes a large delta - thus causing default of throttling if stuff goes wrong - (15m-load . 0) - (proc . 1) - (core . 1) - (phys . 1) - (error . #t)))) - (cond - ((and (list? res) - (> (length res) 2)) - res) - ((eq? res #f) default) ;; add messages? - ((eq? res #f) default) ;; this would be the #eof - (else default)))) - -(define (common:get-normalized-cpu-load-raw remote-host) - (let* ((actual-host (or remote-host (get-host-name)))) ;; #f is localhost - (or (common:get-cached-info actual-host "normalized-load") - (let ((data (if remote-host - (with-input-from-pipe - (conc "ssh " remote-host " \"cat /proc/loadavg;cat /proc/cpuinfo;echo end\"") - read-lines) - (append - (with-input-from-file "/proc/loadavg" - read-lines) - (with-input-from-file "/proc/cpuinfo" - read-lines) - (list "end")))) - (load-rx (regexp "^([\\d\\.]+)\\s+([\\d\\.]+)\\s+([\\d\\.]+)\\s+.*$")) - (proc-rx (regexp "^processor\\s+:\\s+(\\d+)\\s*$")) - (core-rx (regexp "^core id\\s+:\\s+(\\d+)\\s*$")) - (phys-rx (regexp "^physical id\\s+:\\s+(\\d+)\\s*$")) - (max-num (lambda (p n)(max (string->number p) n)))) - ;; (print "data=" data) - (if (null? data) ;; something went wrong - #f - (let loop ((hed (car data)) - (tal (cdr data)) - (loads #f) - (proc-num 0) ;; processor includes threads - (phys-num 0) ;; physical chip on motherboard - (core-num 0)) ;; core - ;;; (print hed ", " loads ", " proc-num ", " phys-num ", " core-num) - (if (null? tal) ;; have all our data, calculate normalized load and return result - (let* ((act-proc (+ proc-num 1)) - (act-phys (+ phys-num 1)) - (act-core (+ core-num 1)) - (adj-proc-load (/ (car loads) act-proc)) - (adj-core-load (/ (car loads) act-core)) - (result - (append (list (cons 'adj-proc-load adj-proc-load) - (cons 'adj-core-load adj-core-load)) - (list (cons '1m-load (car loads)) - (cons '5m-load (cadr loads)) - (cons '15m-load (caddr loads))) - (list (cons 'proc act-proc) - (cons 'core act-core) - (cons 'phys act-phys))))) - (common:write-cached-info actual-host "normalized-load" result) - result) - (regex-case - hed - (load-rx ( x l1 l5 l15 ) (loop (car tal)(cdr tal)(map string->number (list l1 l5 l15)) proc-num phys-num core-num)) - (proc-rx ( x p ) (loop (car tal)(cdr tal) loads (max-num p proc-num) phys-num core-num)) - (phys-rx ( x p ) (loop (car tal)(cdr tal) loads proc-num (max-num p phys-num) core-num)) - (core-rx ( x c ) (loop (car tal)(cdr tal) loads proc-num phys-num (max-num c core-num))) - (else - (begin - ;; (print "NO MATCH: " hed) - (loop (car tal)(cdr tal) loads proc-num phys-num core-num))))))))))) - -(define (common:unix-ping hostname) - (let ((res (system (conc "ping -c 1 " hostname " > /dev/null")))) - (eq? res 0))) - -;;====================================================================== -;; ideally put all this info into the db, no need to preserve it across moving homehost -;; -;; return list of -;; ( reachable? cpuload update-time ) -(define (common:get-host-info hostname) - (let* ((loadinfo (rmt:get-latest-host-load hostname)) ;; if this host happens to have been recently used by a test reuse the load data - (load (car loadinfo)) - (load-sample-time (cdr loadinfo)) - (load-sample-age (- (current-seconds) load-sample-time)) - (loadinfo-timeout-seconds 6) ;; this was 20 seconds, seems way too lax. Switch to 6 seconds - (host-last-update-timeout-seconds 4) - (host-rec (hash-table-ref/default *host-loads* hostname #f)) - ) - (cond - ((< load-sample-age loadinfo-timeout-seconds) - (list #t - load-sample-time - load)) - ((and host-rec - (< (current-seconds) (+ (host-last-update host-rec) host-last-update-timeout-seconds))) - (list #t - (host-last-update host-rec) - (host-last-cpuload host-rec ))) - ((common:unix-ping hostname) - (list #t - (current-seconds) - (alist-ref 'adj-core-load (common:get-normalized-cpu-load hostname)))) ;; this is cheaper than you might think. get-normalized-cpu-load is cached for up to 5 seconds - (else - (list #f 0 -1) ;; bad host, don't use! - )))) - -;;====================================================================== -;; see defstruct host at top of file. -;; host: reachable last-update last-used last-cpuload -;; -(define (common:update-host-loads-table hosts-raw) - (let* ((hosts (filter (lambda (x) - (string-match (regexp "^\\S+$") x)) - hosts-raw))) - (for-each - (lambda (hostname) - (let* ((rec (let ((h (hash-table-ref/default *host-loads* hostname #f))) - (if h - h - (let ((h (make-host))) - (hash-table-set! *host-loads* hostname h) - h)))) - (host-info (common:get-host-info hostname)) - (is-reachable (car host-info)) - (last-reached-time (cadr host-info)) - (load (caddr host-info))) - (host-reachable-set! rec is-reachable) - (host-last-update-set! rec last-reached-time) - (host-last-cpuload-set! rec load))) - hosts))) - -;;====================================================================== -;; go through the hosts from least recently used to most recently used, pick the first that meets the load criteral from the -;; [host-rules] section. -;; -(define (common:get-least-loaded-host hosts-raw host-type configdat) - (let* ((rdat (configf:lookup configdat "host-rules" host-type)) - (rules (common:val->alist (or rdat "") convert: #t)) ;; maxnload, maxnjobs, maxjobrate - (maxnload (common:alist-ref/default 'maxnload rules 1.5)) ;; max normalized load - (maxnjobs (common:alist-ref/default 'maxnjobs rules 1.5)) ;; max normalized number of jobs - (maxjobrate (common:alist-ref/default 'maxjobrate rules (/ 1 6))) ;; max rate of submitting jobs to a given host in jobs/second - (hosts (filter (lambda (x) - (string-match (regexp "^\\S+$") x)) - hosts-raw)) - ;; (best-host #f) - (get-rec (lambda (hostname) - ;; (print "get-rec hostname=" hostname) - (let ((h (hash-table-ref/default *host-loads* hostname #f))) - (if h - h - (let ((h (make-host))) - (hash-table-set! *host-loads* hostname h) - h))))) - (best-load 99999) - (curr-time (current-seconds)) - (get-hosts-sorted (lambda (hosts) - (sort hosts (lambda (a b) - (let ((a-rec (get-rec a)) - (b-rec (get-rec b))) - ;; (print "a=" a " a-rec=" a-rec " host-last-used=" (host-last-used a-rec)) - ;; (print "b=" b " b-rec=" b-rec " host-last-used=" (host-last-used b-rec)) - (< (host-last-used a-rec) - (host-last-used b-rec)))))))) - (debug:print 0 *default-log-port* "INFO: hosts-sorted=" (get-hosts-sorted hosts)) - (if (null? hosts) - #f ;; no hosts to select from. All done and giving up now. - (let ((hosts-sorted (get-hosts-sorted hosts))) - (common:update-host-loads-table hosts) - (let loop ((hostname (car hosts-sorted)) - (tal (cdr hosts-sorted)) - (best-host #f)) - (let* ((rec (get-rec hostname)) - (reachable (host-reachable rec)) - (load (host-last-cpuload rec)) - (last-used (host-last-used rec)) - (delta (- curr-time last-used)) - (job-rate (if (> delta 0) - (/ 1 delta) - 999)) ;; jobs per second - (new-best - (cond - ((not reachable) - (debug:print 0 *default-log-port* "Skipping host " hostname " as it cannot be reached.") - best-host) - ((and (< load maxnload) ;; load is acceptable - (< job-rate maxjobrate)) ;; job rate is acceptable - (set! best-load load) - hostname) - (else best-host)))) - (debug:print 0 *default-log-port* "INFO: Trying host " hostname " with load " load ", last used " delta " seconds ago, with job-rate " job-rate " for running a test." ) - (if new-best - (begin ;; found a host, return it - (debug:print 0 *default-log-port* "INFO: Found host: " new-best " load: " load " last-used: " delta " seconds ago, with job-rate: " job-rate) - (host-last-used-set! rec curr-time) - new-best) - (if (null? tal) #f (loop (car tal)(cdr tal) best-host))))))))) - -(define (common:wait-for-homehost-load maxnormload msg) - (let* ((hh-dat (if (common:on-homehost?) ;; if we are on the homehost then pass in #f so the calls are local. - #f - (common:get-homehost))) - (hh (if hh-dat (car hh-dat) #f))) - (common:wait-for-normalized-load maxnormload msg hh))) - -(define (common:get-num-cpus remote-host) - (let* ((actual-host (or remote-host (get-host-name)))) - ;; hosts had better not be changing the number of cpus too often! - (or (hash-table-ref/default *numcpus-cache* actual-host #f) - (let* ((numcpus (or (common:get-cached-info actual-host "num-cpus" age: (+ 2592000 (pseudo-random-integer 3600))) - (let* ((proc (lambda () - (let loop ((numcpu 0) - (inl (read-line))) - (if (eof-object? inl) - (if (> numcpu 0) - numcpu - #f) ;; if zero return #f so caller knows that things are not working - (loop (if (string-match "^processor\\s+:\\s+\\d+$" inl) - (+ numcpu 1) - numcpu) - (read-line)))))) - (result (if remote-host - (with-input-from-pipe - (conc "ssh " remote-host " cat /proc/cpuinfo") - proc) - (with-input-from-file "/proc/cpuinfo" proc)))) - (if (and (number? result) - (> result 0)) - (common:write-cached-info actual-host "num-cpus" result)) - result)))) - (hash-table-set! *numcpus-cache* actual-host numcpus) - numcpus)))) - -;;====================================================================== -;; wait for normalized cpu load to drop below maxload -;; -(define (common:wait-for-normalized-load maxnormload msg remote-host #!optional (rem-tries 5)) - (let ((num-cpus (common:get-num-cpus remote-host))) - (if num-cpus - (common:wait-for-cpuload maxnormload num-cpus 15 msg: msg remote-host: remote-host) - (begin - (thread-sleep! (pseudo-random-integer 60)) ;; we failed to get num cpus. wait a bit and try again - (if (> rem-tries 0) - (common:wait-for-normalized-load maxnormload msg remote-host (- rem-tries 1)) - #f))))) - -;;====================================================================== -;; DO NOT CALL THIS DIRECTLY. It is called from common:wait-for-normalized-load -;; count - count down to zero, at some point we'd give up if the load never drops -;; num-tries - count down to zero number tries to get numcpus -;; -(define (common:wait-for-cpuload maxnormload numcpus-in - #!key (count 1000) - (msg #f)(remote-host #f)(num-tries 5)) - (let* ((loadavg (common:get-cpu-load remote-host)) - ;; not possible to have zero. If we get 1, it's possible that we got the previous default, and we should check again - (numcpus (if (<= 1 numcpus-in) - (common:get-num-cpus remote-host) - numcpus-in)) - (first (car loadavg)) - (next (cadr loadavg)) - (adjmaxload (* maxnormload (max 1 numcpus))) ;; possible bug - ;; where numcpus - ;; (or could be - ;; maxload) is - ;; zero, crude - ;; fallback is to - ;; at least use 1 - ;; effective load accounts for load jumps, this should elminate all the first-next-avg, adjwait, load-jump-limit - ;; etc. - (effective-load (common:get-intercept first next)) - (recommended-delay (common:get-delay effective-load numcpus)) - (effective-host (or remote-host "localhost")) - (normalized-effective-load (/ effective-load numcpus)) - (will-wait (> normalized-effective-load maxnormload))) - (if (> recommended-delay 1) - (let* ((actual-delay (min recommended-delay 30))) - (if (common:low-noise-print 30 (conc (round actual-delay) "-safe-load")) - (debug:print-info 0 *default-log-port* "Load control, delaying " - actual-delay " seconds to maintain safe load. current normalized effective load is " - normalized-effective-load". maxnormload = " maxnormload " numcpus = " numcpus " loadavg = " loadavg " effective-load = " effective-load)) - (thread-sleep! actual-delay))) - - (cond - ;; bad data, try again to get the data - ((not will-wait) - (if (common:low-noise-print 3600 (conc (round normalized-effective-load) "-load-acceptable-" effective-host)) - (debug:print 0 *default-log-port* "Effective load on " effective-host " is acceptable at " effective-load " continuing."))) - - ((and (< first 0) ;; this indicates the loadavg data is bad - machine may not be reachable - (> num-tries 0)) - (debug:print 0 *default-log-port* "WARNING: received bad data from get-cpu-load " - first ", we'll sleep 10s and try " num-tries " more times.") - (thread-sleep! 10) - (common:wait-for-cpuload maxnormload numcpus-in - count: count remote-host: remote-host num-tries: (- num-tries 1))) - - ;; need to wait for load to drop - ((and will-wait ;; (> first adjmaxload) - (> count 0)) - (debug:print-info 0 *default-log-port* - "Delaying 15" ;; adjwait - " seconds due to normalized effective load " normalized-effective-load ;; first - " exceeding max of " adjmaxload - " on server " (or remote-host (get-host-name)) - " (normalized load-limit: " maxnormload ") " (if msg msg "")) - (thread-sleep! 15) ;; adjwait) - (common:wait-for-cpuload maxnormload numcpus count: (- count 1) msg: msg remote-host: remote-host) - ;; put the message here to indicate came out of waiting - (debug:print-info 1 *default-log-port* - "On host: " effective-host - ", effective load: " effective-load - ", numcpus: " numcpus - ", normalized effective load: " normalized-effective-load - )) - ;; overloaded and count expired (i.e. went to zero) - (else - (if (> num-tries 0) ;; should be "num-tries-left". - (if (common:low-noise-print 30 (conc (round effective-load) "-load-acceptable-" effective-host)) - (debug:print 0 *default-log-port* "Load on " effective-host " is acceptable at effective normalized load of " - normalized-effective-load " continuing.")) - (debug:print 0 *default-log-port* "Load on " effective-host ", " - first" could not be retrieved. Giving up and continuing.")))))) - -;;====================================================================== -;; DO NOT CALL THIS DIRECTLY. It is called from common:wait-for-normalized-load -;; -;; (define (common:wait-for-cpuload maxload-in numcpus-in waitdelay #!key (count 1000) (msg #f)(remote-host #f)(force-maxload #f)(num-tries 5)) -;; (let* ((loadavg (common:get-cpu-load remote-host)) -;; (numcpus (if (<= 1 numcpus-in) ;; not possible to have zero. If we get 1, it's possible that we got the previous default, and we should check again -;; (common:get-num-cpus remote-host) -;; numcpus-in)) -;; (maxload (if force-maxload -;; maxload-in -;; (if (number? maxload-in) -;; (max maxload-in 0.5) -;; 0.5))) ;; so maxload must be greater than 0.5 for now BUG - FIXME? -;; (first (car loadavg)) -;; (next (cadr loadavg)) -;; (adjmaxload (* maxload (max 1 numcpus))) ;; possible bug where -;; ;; numcpus (or could be -;; ;; maxload) is zero, -;; ;; crude fallback is to -;; ;; at least use 1 -;; (loadjmp (- first (if (> next (* numcpus 0.7)) ;; could do something with average of first and next? -;; 0 -;; next))) ;; we will force a conservative calculation any time next is large. -;; (first-next-avg (/ (+ first next) 2)) -;; ;; add some randomness to the time to break any alignment -;; ;; where netbatch dumps many jobs to machines simultaneously -;; (adjwait (min (+ 300 (pseudo-random-integer 10)) (abs (* (+ (pseudo-random-integer 10) -;; (/ (- 1000 count) 10) -;; waitdelay) -;; (- first adjmaxload) )))) -;; (load-jump-limit (configf:lookup-number *configdat* "setup" "load-jump-limit")) -;; ;; effective load accounts for load jumps, this should elminate all the first-next-avg, adjwait, load-jump-limit -;; ;; etc. -;; (effective-load (common:get-intercept first next)) -;; (effective-host (or remote-host "localhost")) -;; (normalized-effective-load (/ effective-load numcpus)) -;; (will-wait (> normalized-effective-load maxload))) -;; -;; ;; let's let the user know once in a long while that load checking -;; ;; is happening but not constantly report it -;; #;(if (common:low-noise-print 30 (conc "cpuload" (or remote-host "localhost"))) ;; (> (pseudo-random-integer 100) 75) ;; about 25% of the time -;; (debug:print-info 1 *default-log-port* "Checking cpuload on " (or remote-host "localhost") ", maxload: " maxload -;; ", load: " first ", adjmaxload: " adjmaxload ", loadjmp: " loadjmp)) -;; -;; (debug:print-info 1 *default-log-port* -;; "On host: " effective-host -;; ", effective load: " effective-load -;; ", numcpus: " numcpus -;; ", normalized effective load: " normalized-effective-load -;; ) -;; -;; (cond -;; ;; bad data, try again to get the data -;; ((and (< first 0) ;; this indicates the loadavg data is bad - machine may not be reachable -;; (> num-tries 0)) -;; (debug:print 0 *default-log-port* "WARNING: received bad data from get-cpu-load " first ", we'll sleep 10s and try " num-tries " more times.") -;; (thread-sleep! 10) -;; (common:wait-for-cpuload maxload-in numcpus-in waitdelay -;; count: count remote-host: remote-host force-maxload: force-maxload num-tries: (- num-tries 1))) -;; ;; need to wait for load to drop -;; ((and will-wait ;; (> first adjmaxload) -;; (> count 0)) -;; (debug:print-info 0 *default-log-port* -;; "Delaying " 15 ;; adjwait -;; " seconds due to normalized effective load " normalized-effective-load ;; first -;; " exceeding max of " adjmaxload -;; " on server " (or remote-host (get-host-name)) -;; " (normalized load-limit: " maxload ") " (if msg msg "")) -;; (thread-sleep! 15) ;; adjwait) -;; (common:wait-for-cpuload maxload numcpus waitdelay count: (- count 1) msg: msg remote-host: remote-host)) -;; ((and (> loadjmp (cond -;; (load-jump-limit load-jump-limit) -;; ((> numcpus 8)(/ numcpus 2)) -;; ((> numcpus 4)(/ numcpus 1.2)) -;; (else 0.5))) -;; (> count 0)) -;; (debug:print-info 0 *default-log-port* "waiting " adjwait " seconds due to possible load jump " loadjmp ". " -;; (if msg msg "")) -;; (thread-sleep! adjwait) -;; (common:wait-for-cpuload maxload numcpus waitdelay count: (- count 1) msg: msg remote-host: remote-host)) -;; (else -;; (if (> num-tries 0) -;; (if (common:low-noise-print 30 (conc (round first) "-load-acceptable-" (or remote-host "localhost"))) -;; (debug:print 0 *default-log-port* "Load on " (or remote-host "localhost") " is acceptable at " first " continuing.")) -;; (debug:print 0 *default-log-port* "Load on " (or remote-host "localhost") ", "first" could not be retrieved. Giving up and continuing.")))))) -;; -(define (get-uname . params) - (let* ((uname-res (process:cmd-run->list (conc "uname " (if (null? params) "-a" (car params))))) - (uname #f)) - (if (null? (car uname-res)) - "unknown" - (caar uname-res)))) - -;; for reasons I don't understand multiple calls to real-path in parallel threads -;; must be protected by mutexes -;; -(define (common:real-path inpath) - ;; (process:cmd-run-with-stderr->list "readlink" "-f" inpath)) ;; cmd . params) - ;; (let-values - ;; (((inp oup pid) (process "readlink" (list "-f" inpath)))) - ;; (with-input-from-port inp - ;; (let loop ((inl (read-line)) - ;; (res #f)) - ;; (print "inl=" inl) - ;; (if (eof-object? inl) - ;; (begin - ;; (close-input-port inp) - ;; (close-output-port oup) - ;; ;; (process-wait pid) - ;; res) - ;; (loop (read-line) inl)))))) - (with-input-from-pipe (conc "readlink -f " inpath) read-line)) - -;;====================================================================== -;; D I S K S P A C E -;;====================================================================== - -(define (common:get-disk-space-used fpath) - (with-input-from-pipe (conc "/usr/bin/du -s " fpath) read)) - -;;====================================================================== -;; given path get free space, allows override in [setup] -;; with free-space-script /path/to/some/script.sh -;; -(define (get-df path) - (if (configf:lookup *configdat* "setup" "free-space-script") - (with-input-from-pipe - (conc (configf:lookup *configdat* "setup" "free-space-script") " " path) - (lambda () - (let ((res (read-line))) - (if (string? res) - (string->number res))))) - (get-unix-df path))) - -(define (get-free-inodes path) - (if (configf:lookup *configdat* "setup" "free-inodes-script") - (with-input-from-pipe - (conc (configf:lookup *configdat* "setup" "free-inodes-script") " " path) - (lambda () - (let ((res (read-line))) - (if (string? res) - (string->number res))))) - (get-unix-inodes path))) - -(define (get-unix-df path) - (let* ((df-results (process:cmd-run->list (conc "df " path))) - (space-rx (regexp "([0-9]+)\\s+([0-9]+)%")) - (freespc #f)) - ;; (write df-results) - (for-each (lambda (l) - (let ((match (string-search space-rx l))) - (if match - (let ((newval (string->number (cadr match)))) - (if (number? newval) - (set! freespc newval)))))) - (car df-results)) - freespc)) - -(define (get-unix-inodes path) - (let* ((df-results (process:cmd-run->list (conc "df -i " path))) - (space-rx (regexp "([0-9]+)\\s+([0-9]+)%")) - (freenodes 0)) ;; 0 is a better failsafe than #f here. - ;; (write df-results) - (for-each (lambda (l) - (let ((match (string-search space-rx l))) - (if match - (let ((newval (string->number (cadr match)))) - (if (number? newval) - (set! freenodes newval)))))) - (car df-results)) - freenodes)) - -(define (common:check-space-in-dir dirpath required) - (let* ((dbspace (if (directory? dirpath) - (get-df dirpath) - 0))) - (list (> dbspace required) - dbspace - required - dirpath))) - -;;====================================================================== -;; check space in dbdir and in megatest dir -;; returns: ok/not dbspace required-space -;; -(define (common:check-db-dir-space) - (let* ((required (string->number - ;; default is 1GB (or actually a billion bytes) This is the number of 1 kB blocks. - (or (configf:lookup *configdat* "setup" "dbdir-space-required") - "1000000"))) - (dbdir (common:get-db-tmp-area)) ;; (db:get-dbdir)) - (tdbspace (common:check-space-in-dir dbdir required)) - (mdbspace (common:check-space-in-dir *toppath* required))) - (sort (list tdbspace mdbspace) (lambda (a b) - (< (cadr a)(cadr b)))))) - -;;====================================================================== -;; check available space in dbdir, exit if insufficient -;; -(define (common:check-db-dir-and-exit-if-insufficient) - (let* ((spacedat (car (common:check-db-dir-space))) ;; look only at worst for now - (is-ok (car spacedat)) - (dbspace (cadr spacedat)) - (required (caddr spacedat)) - (dbdir (cadddr spacedat))) - (if (not is-ok) - (begin - (debug:print-error 0 *default-log-port* "Insufficient space in " dbdir ", require " required ", have " dbspace ", exiting now.") - (exit 1))))) - -;;====================================================================== -;; paths is list of lists ((name path) ... ) -;; -(define (common:get-disk-with-most-free-space disks minsize) - (let* ((best #f) - (bestsize 0) - (default-min-inodes-string "1000000") - (default-min-inodes (string->number default-min-inodes-string)) - (min-inodes (or (string->number (if (configf:lookup *configdat* "setup" "min_inodes") (configf:lookup *configdat* "setup" "min_inodes") default-min-inodes-string)) default-min-inodes))) - - (for-each - (lambda (disk-num) - (let* ((dirpath (cadr (assoc disk-num disks))) - (freespc (cond - ((not (directory? dirpath)) - (if (common:low-noise-print 300 "disks not a dir " disk-num) - (debug:print 0 *default-log-port* "WARNING: disk " disk-num " at path \"" dirpath "\" is not a directory - ignoring it.")) - -1) - ((not (file-writable? dirpath)) - (if (common:low-noise-print 300 "disks not writeable " disk-num) - (debug:print 0 *default-log-port* "WARNING: disk " disk-num " at path \"" dirpath "\" is not writeable - ignoring it.")) - -1) - ((not (eq? (string-ref dirpath 0) #\/)) - (if (common:low-noise-print 300 "disks not a proper path " disk-num) - (debug:print 0 *default-log-port* "WARNING: disk " disk-num " at path \"" dirpath "\" is not a fully qualified path - ignoring it.")) - -1) - (else - (get-df dirpath)))) - (free-inodes (cond - ((not (directory? dirpath)) - (if (common:low-noise-print 300 "disks not a dir " disk-num) - (debug:print 0 *default-log-port* "WARNING: disk " disk-num " at path \"" dirpath "\" is not a directory - ignoring it.")) - -1) - ((not (file-writable? dirpath)) - (if (common:low-noise-print 300 "disks not writeable " disk-num) - (debug:print 0 *default-log-port* "WARNING: disk " disk-num " at path \"" dirpath "\" is not writeable - ignoring it.")) - -1) - ((not (eq? (string-ref dirpath 0) #\/)) - (if (common:low-noise-print 300 "disks not a proper path " disk-num) - (debug:print 0 *default-log-port* "WARNING: disk " disk-num " at path \"" dirpath "\" is not a fully qualified path - ignoring it.")) - -1) - (else - (get-free-inodes dirpath)))) - ;;(free-inodes (get-free-inodes dirpath)) - ) - (debug:print 2 *default-log-port* "INFO: disk " disk-num " path " dirpath " free space " freespc " free inodes " free-inodes) - (if (and (> freespc bestsize)(> free-inodes min-inodes )) - (begin - (set! best (cons disk-num dirpath)) - (set! bestsize freespc))) - ;;(print "Processing: " disk-num " bestsize: " bestsize " best: " best " freespc: " freespc " min-inodes: " min-inodes " free-inodes: " free-inodes) - )) - (map car disks)) - (if (and best (> bestsize minsize)) - best - #f))) ;; #f means no disk candidate found - -;;====================================================================== -;; convert a spec string to a list of vectors #( rx action rx-string ) -(define (common:spec-string->list-of-specs spec-string actions) - (let ((spec-strings (string-split-fields "\\s*;\\s*" spec-string #:infix)) - (actions-regex (regexp (conc "^(.*)\\s+(" (string-intersperse (map conc actions) "|") ")")))) - (filter - (lambda (x) x) - (map (lambda (s) - (let ((m (string-match actions-regex s))) - (if m - (vector (regexp (cadr m))(string->symbol (caddr m))(cadr m)) - (begin - (debug:print 0 *default-log-port* "WARNING: Unrecognised rule \"" s "\" in clean-up specification.") - #f)))) - spec-strings)))) - -;;====================================================================== -;; given a list of specs rx . rule and a file return the first matching rule -;; -(define (common:file-find-rule fname rules) ;; rule is vector #( rx action rx-string) - (let loop ((rule (car rules)) - (tail (cdr rules))) - (let ((rx (vector-ref rule 0)) - (rn (vector-ref rule 1))) ;; rule name - (if (string-match rx fname) - rule ;; return the whole rule so regex can be printed etc. - (if (null? tail) - #f - (loop (car tail)(cdr tail))))))) - -;;====================================================================== -;; given a spec apply some rules to a directory -;; -;; WARNING: This function will REMOVE files - be sure your spec and path is correct! -;; -;; spec format: -;; file-regex1 action; file-regex2 action; ... -;; e.g. -;; .*\.log$ keep; .* remove -;; --> keep all .log files, remove everything else -;; limitations: -;; cannot have a rule with ; as part of the spec -;; not very flexible, would be nice to return binned file names? -;; supported rules: -;; keep - keep this file -;; remove - remove this file -;; compress - compress this file -;; -(define (common:dir-clean-up path spec-string #!key (compress "gzip")(actions '(keep remove compress))(remove-empty #f)) - (let* ((specs (common:spec-string->list-of-specs spec-string actions)) - (keepers (make-hash-table)) - (directories (make-hash-table))) - (find-files - path - action: (lambda (p res) - (let ((rule (common:file-find-rule p specs))) - (cond - ((directory? p)(hash-table-set! directories p #t)) - (else - (case (vector-ref rule 1) - ((keep)(hash-table-set! keepers p rule)) - ((remove) - (print "Removing file " p) - (delete-file p)) - ((compress) - (print "Compressing file " p) - (system (conc compress " " p))) - (else - (print "No match for file " p)))))))) - (if remove-empty - (for-each - (lambda (d) - (if (null? (glob (conc d "/.*")(conc d "/*"))) - (begin - (print "Removing empty directory " d) - (delete-directory d)))) - (sort (hash-table-keys directories) (lambda (a b)(> (string-length a)(string-length b)))))) - )) - -;;====================================================================== -;; E N V I R O N M E N T V A R S -;;====================================================================== - -(define (bb-check-path #!key (msg "check-path: ")) - (let ((path (or (get-environment-variable "PATH") "none"))) - (debug:print-info 0 *default-log-port* (conc msg" : $PATH="path)) - (if (string-match "^.*/isoenv-core/.*" path) - (debug:print-error 0 *default-log-port* (conc msg" : !!ISOENV PRESENT!!")) ;; remove for prod - (debug:print-info 1 *default-log-port* (conc msg" : **no isoenv present**"))))) - -(define (save-environment-as-files fname #!key (ignorevars (list "USER" "HOME" "DISPLAY" "LS_COLORS" "XKEYSYMDB" "EDITOR" "MAKEFLAGS" "MAKEF" "MAKEOVERRIDES"))) - ;;(bb-check-path msg: "save-environment-as-files entry") - (let ((envvars (get-environment-variables)) - (whitesp (regexp "[^a-zA-Z0-9_\\-:,\\.\\/%$]")) - (mungeval (lambda (val) - (cond - ((eq? val #t) "") ;; convert #t to empty string - ((eq? val #f) #f) ;; convert #f to itself (still thinking about this one - (else val))))) - (with-output-to-file (conc fname ".csh") - (lambda () - (for-each (lambda (keyval) - (let* ((key (car keyval)) - (val (cdr keyval)) - (delim (if (string-search whitesp val) - "\"" - ""))) - (print (if (or (member key ignorevars) - (string-search whitesp key)) - "# setenv " - "setenv ") - key " " delim (mungeval val) delim))) - envvars))) - (with-output-to-file (conc fname ".sh") - (lambda () - (for-each (lambda (keyval) - (let* ((key (car keyval)) - (val (cdr keyval)) - (delim (if (string-search whitesp val) - "\"" - ""))) - (print (if (or (member key ignorevars) - (string-search whitesp key) - (string-search ":" key)) ;; internal only values to be skipped. - "# export " - "export ") - key "=" delim (mungeval val) delim))) - envvars))))) - -(define (common:get-param-mapping #!key (flavor #f)) - "returns alist mapping string keys in testconfig/subrun to megatest command line switches; if flavor is switch-symbol, maps tcmt symbolic switches to megatest switches" - (let ((default '(("tag-expr" . "-tagexpr") - ("mode-patt" . "-modepatt") - ("run-name" . "-runname") - ("contour" . "-contour") - ("target" . "-target") - ("test-patt" . "-testpatt") - ("msg" . "-m") - ("log" . "-log") - ("start-dir" . "-start-dir") - ("new" . "-set-state-status")))) - (if (eq? flavor 'switch-symbol) - (map (lambda (x) - (cons (string->symbol (conc "-" (car x))) (cdr x))) - default) - default))) - -;;====================================================================== -;; set some env vars from an alist, return an alist with original values -;; (("VAR" "value") ...) -;; a value of #f means "unset this var" -;; -(define (alist->env-vars lst) - (if (list? lst) - (let ((res '())) - (for-each (lambda (p) - (let* ((var (car p)) - (val (cadr p)) - (prv (get-environment-variable var))) - (set! res (cons (list var prv) res)) - (if val - (safe-setenv var (->string val)) - (unsetenv var)))) - lst) - res) - '())) - -;;====================================================================== -;; clear vars matching pattern, run proc, set vars back -;; if proc is a string run that string as a command with -;; system. -;; -(define *common:orig-env* - (let ((envvars (get-environment-variables))) - (if (get-environment-variable "MT_ORIG_ENV") - (with-input-from-string - (z3:decode-buffer (base64:base64-decode (get-environment-variable "MT_ORIG_ENV"))) - read) - (filter-map (lambda (x) - (if (string-match "^MT_.*" (car x)) - #f - x)) - envvars)))) - -(define (common:with-orig-env proc) - (let ((current-env (get-environment-variables))) - (for-each (lambda (x) (unsetenv (car x))) current-env) - (for-each (lambda (x) (setenv (car x) (cdr x))) *common:orig-env*) - (let ((rv (cond - ((string? proc)(system proc)) - (proc (proc))))) - (for-each (lambda (x) (unsetenv (car x))) *common:orig-env*) - (for-each (lambda (x) (setenv (car x) (cdr x))) current-env) - rv))) - -(define (common:without-vars proc . var-patts) - (let ((vars (make-hash-table))) - (for-each - (lambda (vardat) ;; each env var - (for-each - (lambda (var-patt) - (if (string-match var-patt (car vardat)) - (let ((var (car vardat)) - (val (cdr vardat))) - (hash-table-set! vars var val) - (unsetenv var)))) - var-patts)) - (get-environment-variables)) - (cond - ((string? proc)(system proc)) - (proc (proc))) - (hash-table-for-each - vars - (lambda (var val) - (setenv var val))) - vars)) - -(define (common:run-a-command cmd #!key (with-vars #f) (with-orig-env #f)) - (let* ((pre-cmd (dtests:get-pre-command)) - (post-cmd (dtests:get-post-command)) - (fullcmd (if (or pre-cmd post-cmd) - (conc pre-cmd cmd post-cmd) - (conc "viewscreen " cmd)))) - (debug:print-info 02 *default-log-port* "Running command: " fullcmd) - (cond - (with-vars (common:without-vars fullcmd)) - (with-orig-env (common:with-orig-env fullcmd)) - (else (common:without-vars fullcmd "MT_.*"))))) - -;;====================================================================== -;; T I M E A N D D A T E -;;====================================================================== - -;;====================================================================== -;; Convert strings like "5s 2h 3m" => 60x60x2 + 3x60 + 5 -(define (common:hms-string->seconds tstr) - (let ((parts (string-split-fields "\\w+" tstr)) - (time-secs 0) - ;; s=seconds, m=minutes, h=hours, d=days, M=months, y=years, w=weeks - (trx (regexp "(\\d+)([smhdMyw])"))) - (for-each (lambda (part) - (let ((match (string-match trx part))) - (if match - (let ((val (string->number (cadr match))) - (unt (caddr match))) - (if val - (set! time-secs (+ time-secs (* val - (case (string->symbol unt) - ((s) 1) - ((m) 60) ;; minutes - ((h) 3600) - ((d) 86400) - ((w) 604800) - ((M) 2628000) ;; aproximately one month - ((y) 31536000) - (else #f)))))))))) - parts) - time-secs)) - -(define (seconds->hr-min-sec secs) - (let* ((hrs (quotient secs 3600)) - (min (quotient (- secs (* hrs 3600)) 60)) - (sec (- secs (* hrs 3600)(* min 60)))) - (conc (if (> hrs 0)(conc hrs "hr ") "") - (if (> min 0)(conc min "m ") "") - sec "s"))) - -(define (seconds->time-string sec) - (time->string - (seconds->local-time sec) "%H:%M:%S")) - -(define (seconds->work-week/day-time sec) - (time->string - (seconds->local-time sec) "ww%V.%u %H:%M")) - -(define (seconds->work-week/day sec) - (time->string - (seconds->local-time sec) "ww%V.%u")) - -(define (seconds->year-work-week/day sec) - (time->string - (seconds->local-time sec) "%yww%V.%w")) - -(define (seconds->year-work-week/day-time sec) - (time->string - (seconds->local-time sec) "%Yww%V.%w %H:%M")) - -(define (seconds->year-week/day-time sec) - (time->string - (seconds->local-time sec) "%Yw%V.%w %H:%M")) - -(define (seconds->quarter sec) - (case (string->number - (time->string - (seconds->local-time sec) - "%m")) - ((1 2 3) 1) - ((4 5 6) 2) - ((7 8 9) 3) - ((10 11 12) 4) - (else #f))) - -;;====================================================================== -;; basic ISO8601 format (e.g. "2017-02-28 06:02:54") date time => Unix epoch -;; -(define (common:date-time->seconds datetime) - (local-time->seconds (string->time datetime "%Y-%m-%d %H:%M:%S"))) - -;;====================================================================== -;; given span of seconds tstart to tend -;; find start time to mark and mark delta -;; -(define (common:find-start-mark-and-mark-delta tstart tend) - (let* ((deltat (- (max tend (+ tend 10)) tstart)) ;; can't handle runs of less than 4 seconds. Pad it to 10 seconds ... - (result #f) - (min 60) - (hr (* 60 60)) - (day (* 24 hr)) - (yr (* 365 day)) ;; year - (mo (/ yr 12)) - (wk (* day 7))) - (for-each - (lambda (max-blks) - (for-each - (lambda (span) ;; 5 2 1 - (if (not result) - (for-each - (lambda (timeunit timesym) ;; year month day hr min sec - (if (not result) - (let* ((time-blk (* span timeunit)) - (num-blks (quotient deltat time-blk))) - (if (and (> num-blks 4)(< num-blks max-blks)) - (let ((first (* (quotient tstart time-blk) time-blk))) - (set! result (list span timeunit time-blk first timesym)) - ))))) - (list yr mo wk day hr min 1) - '( y mo w d h m s)))) - (list 8 6 5 2 1))) - '(5 10 15 20 30 40 50 500)) - (if values - (apply values result) - (values 0 day 1 0 'd)))) - -;;====================================================================== -;; given x y lim return the cron expansion -;; -(define (common:expand-cron-slash x y lim) - (let loop ((curr x) - (res `())) - (if (< curr lim) - (loop (+ curr y) (cons curr res)) - (reverse res)))) - -;;====================================================================== -;; expand a complex cron string to a list of cron strings -;; -;; x/y => x, x+y, x+2y, x+3y while x+Ny a, b ,c -;; -;; NOTE: with flatten a lot of the crud below can be factored down. -;; -(define (common:cron-expand cron-str) - (if (list? cron-str) - (flatten - (fold (lambda (x res) - (if (list? x) - (let ((newres (map common:cron-expand x))) - (append x newres)) - (cons x res))) - '() - cron-str)) ;; (map common:cron-expand cron-str)) - (let ((cron-items (string-split cron-str)) - (slash-rx (regexp "(\\d+)/(\\d+)")) - (comma-rx (regexp ".*,.*")) - (max-vals '((min . 60) - (hour . 24) - (dayofmonth . 28) ;;; BUG!!!! This will be a bug for some combinations - (month . 12) - (dayofweek . 7)))) - (if (< (length cron-items) 5) ;; bad spec - cron-str ;; `(,cron-str) ;; just return the string, something downstream will fix it - (let loop ((hed (car cron-items)) - (tal (cdr cron-items)) - (type 'min) - (type-tal '(hour dayofmonth month dayofweek)) - (res '())) - (regex-case - hed - (slash-rx ( _ base incr ) (let* ((basen (string->number base)) - (incrn (string->number incr)) - (expanded-vals (common:expand-cron-slash basen incrn (alist-ref type max-vals))) - (new-list-crons (fold (lambda (x myres) - (cons (conc (if (null? res) - "" - (conc (string-intersperse res " ") " ")) - x " " (string-intersperse tal " ")) - myres)) - '() expanded-vals))) - ;; (print "new-list-crons: " new-list-crons) - ;; (fold (lambda (x res) - ;; (if (list? x) - ;; (let ((newres (map common:cron-expand x))) - ;; (append x newres)) - ;; (cons x res))) - ;; '() - (flatten (map common:cron-expand new-list-crons)))) - ;; (map common:cron-expand (map common:cron-expand new-list-crons)))) - (else (if (null? tal) - cron-str - (loop (car tal)(cdr tal)(car type-tal)(cdr type-tal)(append res (list hed))))))))))) - -;;====================================================================== -;; given a cron string and the last time event was processed return #t to run or #f to not run -;; -;; min hour dayofmonth month dayofweek -;; 0-59 0-23 1-31 1-12 0-6 ### NOTE: dayofweek does not include 7 -;; -;; #t => yes, run the job -;; #f => no, do not run the job -;; -(define (common:cron-event cron-str now-seconds-in last-done) ;; ref-seconds = #f is NOW. - (let* ((cron-items (map string->number (string-split cron-str))) - (now-seconds (or now-seconds-in (current-seconds))) - (now-time (seconds->local-time now-seconds)) - (last-done-time (seconds->local-time last-done)) - (all-times (make-hash-table))) - ;; (print "cron-items: " cron-items "(length cron-items): " (length cron-items)) - (if (not (eq? (length cron-items) 5)) ;; don't even try to figure out junk strings - #f - (match-let ((( cmin chour cdayofmonth cmonth cdayofweek) - cron-items) - ;; 0 1 2 3 4 5 6 - ((nsec nmin nhour ndayofmonth nmonth nyr ndayofweek n7 n8 n9) - (vector->list now-time)) - ((lsec lmin lhour ldayofmonth lmonth lyr ldayofweek l7 l8 l9) - (vector->list last-done-time))) - ;; create all possible time slots - ;; remove invalid slots due to (for example) day of week - ;; get the start and end entries for the ref-seconds (current) time - ;; if last-done > ref-seconds => this is an ERROR! - ;; does the last-done time fall in the legit region? - ;; yes => #f do not run again this command - ;; no => #t ok to run the command - (for-each ;; month - (lambda (month) - (for-each ;; dayofmonth - (lambda (dom) - (for-each - (lambda (hr) ;; hour - (for-each - (lambda (minute) ;; minute - (let ((copy-now (apply vector (vector->list now-time)))) - (vector-set! copy-now 0 0) ;; force seconds to zero - (vector-set! copy-now 1 minute) - (vector-set! copy-now 2 hr) - (vector-set! copy-now 3 dom) ;; dom is already corrected for zero referenced - (vector-set! copy-now 4 month) - (let* ((copy-now-secs (local-time->seconds copy-now)) - (new-copy (seconds->local-time copy-now-secs))) ;; remake the time vector - (if (or (not cdayofweek) - (equal? (vector-ref new-copy 6) - cdayofweek)) ;; if the day is specified and a match OR if the day is NOT specified - (if (or (not cdayofmonth) - (equal? (vector-ref new-copy 3) - (+ 1 cdayofmonth))) ;; if the month is specified and a match OR if the month is NOT specified - (hash-table-set! all-times copy-now-secs new-copy)))))) - (if cmin - `(,cmin) ;; if given cmin, have to use it - (list (- nmin 1) nmin (+ nmin 1))))) ;; minute - (if chour - `(,chour) - (list (- nhour 1) nhour (+ nhour 1))))) ;; hour - (if cdayofmonth - `(,cdayofmonth) - (list (- ndayofmonth 1) ndayofmonth (+ ndayofmonth 1))))) - (if cmonth - `(,cmonth) - (list (- nmonth 1) nmonth (+ nmonth 1)))) - (let ((before #f) - (is-in #f)) - (for-each - (lambda (moment) - (if (and before - (<= before now-seconds) - (>= moment now-seconds)) - (begin - ;; (print) - ;; (print "Before: " (time->string (seconds->local-time before))) - ;; (print "Now: " (time->string (seconds->local-time now-seconds))) - ;; (print "After: " (time->string (seconds->local-time moment))) - ;; (print "Last: " (time->string (seconds->local-time last-done))) - (if (< last-done before) - (set! is-in before)) - )) - (set! before moment)) - (sort (hash-table-keys all-times) <)) - is-in))))) - -(define (common:extended-cron cron-str now-seconds-in last-done) - (let ((expanded-cron (common:cron-expand cron-str))) - (if (string? expanded-cron) - (common:cron-event expanded-cron now-seconds-in last-done) - (let loop ((hed (car expanded-cron)) - (tal (cdr expanded-cron))) - (if (common:cron-event hed now-seconds-in last-done) - #t - (if (null? tal) - #f - (loop (car tal)(cdr tal)))))))) - -;;====================================================================== -;; C O L O R S -;;====================================================================== - -(define (common:name->iup-color name) - (case (string->symbol (string-downcase name)) - ((red) "223 33 49") - ((grey) "192 192 192") - ((orange) "255 172 13") - ((purple) "This is unfinished ..."))) - -;;====================================================================== -;; (define (common:get-color-for-state-status state status) -;; (case (string->symbol state) -;; ((COMPLETED) -;; (case (string->symbol status) -;; ((PASS) "70 249 73") -;; ((WARN WAIVED) "255 172 13") -;; ((SKIP) "230 230 0") -;; (else "223 33 49"))) -;; ((LAUNCHED) "101 123 142") -;; ((CHECK) "255 100 50") -;; ((REMOTEHOSTSTART) "50 130 195") -;; ((RUNNING) "9 131 232") -;; ((KILLREQ) "39 82 206") -;; ((KILLED) "234 101 17") -;; ((NOT_STARTED) "240 240 240") -;; (else "192 192 192"))) - -(define (common:iup-color->rgb-hex instr) - (string-intersperse - (map (lambda (x) - (number->string x 16)) - (map string->number - (string-split instr))) - "/")) - -;;====================================================================== -;; L O C K I N G M E C H A N I S M S -;;====================================================================== - -;;====================================================================== -;; faux-lock is deprecated. Please use simple-lock below -;; -(define (common:faux-lock keyname #!key (wait-time 8)(allow-lock-steal #t)) - (if (rmt:no-sync-get/default keyname #f) ;; do not be tempted to compare to pid. locking is a one-shot action, if already locked for this pid it doesn't actually count - (if (> wait-time 0) - (begin - (thread-sleep! 1) - (if (eq? wait-time 1) ;; only one second left, steal the lock - (begin - (debug:print-info 0 *default-log-port* "stealing lock for " keyname) - (common:faux-unlock keyname force: #t))) - (common:faux-lock keyname wait-time: (- wait-time 1))) - #f) - (begin - (rmt:no-sync-set keyname (conc (current-process-id))) - (equal? (conc (current-process-id)) (conc (rmt:no-sync-get/default keyname #f)))))) - -(define (common:faux-unlock keyname #!key (force #f)) - (if (or force (equal? (conc (current-process-id)) (conc (rmt:no-sync-get/default keyname #f)))) - (begin - (if (rmt:no-sync-get/default keyname #f) (rmt:no-sync-del! keyname)) - #t) - #f)) - -;;====================================================================== -;; simple lock. improve and converge on this one. -;; -(define (common:simple-lock keyname) - (rmt:no-sync-get-lock keyname)) - -(define (common:simple-unlock keyname #!key (force #f)) - (rmt:no-sync-del! keyname)) - -;;====================================================================== -;; -;;====================================================================== - -(define (common:in-running-test?) - (and (args:get-arg "-execute") (get-environment-variable "MT_CMDINFO"))) - -(define (common:get-color-from-status status) - (cond - ((equal? status "PASS") "green") - ((equal? status "FAIL") "red") - ((equal? status "WARN") "orange") - ((equal? status "KILLED") "orange") - ((equal? status "KILLREQ") "purple") - ((equal? status "RUNNING") "blue") - ((equal? status "ABORT") "brown") - (else "black"))) +(define (common:with-queue-db mtconf proc #!key (use-lt #f)(toppath-in #f)) + (let* ((pktsdirs (common:get-pkts-dirs mtconf use-lt)) + (pktsdir (if pktsdirs (car pktsdirs) #f)) + (toppath (or (configf:lookup mtconf "scratchdat" "toppath") + toppath-in)) + (pdbpath (or (configf:lookup mtconf "setup" "pdbpath") pktsdir))) + (cond + ((not (and pktsdir toppath pdbpath)) + (debug:print 0 *default-log-port* "ERROR: settings are missing in your megatest.config for area management.") + (debug:print 0 *default-log-port* " you need to have pktsdirs in the [setup] section.")) + ((not (common:file-exists? pktsdir)) + (debug:print 0 *default-log-port* "ERROR: pkts directory not found " pktsdir)) + ((not (equal? (file-owner pktsdir)(current-effective-user-id))) + (debug:print 0 *default-log-port* "ERROR: directory " pktsdir " is not owned by " (current-effective-user-name))) + (else + (let* ((pdb (open-queue-db pdbpath "pkts.db" + schema: '("CREATE TABLE groups (id INTEGER PRIMARY KEY,groupname TEXT, CONSTRAINT group_constraint UNIQUE (groupname));")))) + (proc pktsdirs pktsdir pdb) + (dbi:close pdb)))))) + +(define (common:load-pkts-to-db mtconf #!key (use-lt #f)) + (common:with-queue-db + mtconf + (lambda (pktsdirs pktsdir pdb) + (for-each + (lambda (pktsdir) ;; look at all + (cond + ((not (common:file-exists? pktsdir)) + (debug:print 0 *default-log-port* "ERROR: packets directory " pktsdir " does not exist.")) + ((not (directory? pktsdir)) + (debug:print 0 *default-log-port* "ERROR: packets directory path " pktsdir " is not a directory.")) + ((not (file-readable? pktsdir)) + (debug:print 0 *default-log-port* "ERROR: packets directory path " pktsdir " is not readable.")) + (else + (debug:print-info 0 *default-log-port* "Loading packets found in " pktsdir) + (let ((pkts (glob (conc pktsdir "/*.pkt")))) + (for-each + (lambda (pkt) + (let* ((uuid (cadr (string-match ".*/([0-9a-f]+).pkt" pkt))) + (exists (lookup-by-uuid pdb uuid #f))) + (if (not exists) + (let* ((pktdat (string-intersperse + (with-input-from-file pkt read-lines) + "\n")) + (apkt (pkt->alist pktdat)) + (ptype (alist-ref 'T apkt))) + (add-to-queue pdb pktdat uuid (or ptype 'cmd) #f 0) + (debug:print 4 *default-log-port* "Added " uuid " of type " ptype " to queue")) + (debug:print 4 *default-log-port* "pkt: " uuid " exists, skipping...") + ))) + pkts))))) + pktsdirs)) + use-lt: use-lt)) + ;;====================================================================== ;; N A N O M S G C L I E N T ;;====================================================================== ;; @@ -3256,404 +245,448 @@ fallback-launcher (loop (car tal)(cdr tal)))))))) fallback-launcher))) ;;====================================================================== -;; NMSG AND NEW API -;;====================================================================== -;; -;; ;;====================================================================== -;; ;; nm based server experiment, keep around for now. -;; ;; -;; (define (nm:start-server dbconn #!key (given-host-name #f)) -;; (let* ((srvdat (start-raw-server given-host-name: given-host-name)) -;; (host-name (srvdat-host srvdat)) -;; (soc (srvdat-soc srvdat))) -;; -;; ;; start the queue processor (save for second round of development) -;; ;; -;; (thread-start! (make-thread! (lambda ()(queue-processor dbconn) "Queue processor"))) -;; ;; msg is an alist -;; ;; 'r host:port <== where to return the data -;; ;; 'p params <== data to apply the command to -;; ;; 'e j|s|l <== encoding of the params. default is s (sexp), if not specified is assumed to be default -;; ;; 'c command <== look up the function to call using this key -;; ;; -;; (let loop ((msg-in (nn-recv soc))) -;; (if (not (equal? msg-in "quit")) -;; (let* ((dat (decode msg-in)) -;; (host-port (alist-ref 'r dat)) ;; this is for the reverse req rep where the server is a client of the original client -;; (params (alist-ref 'p dat)) -;; (command (let ((c (alist-ref 'c dat)))(if c (string->symbol c) #f))) -;; (all-good (and host-port params command (hash-table-exists? *commands* command)))) -;; (if all-good -;; (let ((cmddat (make-qitem -;; command: command -;; host-port: host-port -;; params: params))) -;; (queue-push cmddat) ;; put request into the queue -;; (nn-send soc "queued")) ;; reply with "queued" -;; (print "ERROR: ["(common:human-time)"] BAD request " dat)) -;; (loop (nn-recv soc))))) -;; (nn-close soc))) - -;;====================================================================== -;; D A S H B O A R D U S E R V I E W S -;;====================================================================== - -;;====================================================================== -;; first read ~/views.config if it exists, then read $MTRAH/views.config if it exists -;; -(define (common:load-views-config) - (let* ((view-cfgdat (make-hash-table)) - (home-cfgfile (conc (get-environment-variable "HOME") "/.mtviews.config")) - (mthome-cfgfile (conc *toppath* "/.mtviews.config"))) - (if (common:file-exists? mthome-cfgfile) - (read-config mthome-cfgfile view-cfgdat #t)) - ;; we load the home dir file AFTER the MTRAH file so the user can clobber settings when running the dashboard in read-only areas - (if (common:file-exists? home-cfgfile) - (read-config home-cfgfile view-cfgdat #t)) - view-cfgdat)) - -;;====================================================================== -;; H I E R A R C H I C A L H A S H T A B L E S -;;====================================================================== -;; -;; Every element including top element is a vector: -;; - -(define (hh:make-hh #!key (ht #f)(value #f)) - (vector (or ht (make-hash-table)) value)) - -;;====================================================================== -;; used internally -(define-inline (hh:set-ht! hh ht) (vector-set! hh 0 ht)) -(define-inline (hh:get-ht hh) (vector-ref hh 0)) -(define-inline (hh:set-value! hh value) (vector-set! hh 1 value)) -(define-inline (hh:get-value hh value) (vector-ref hh 1)) - -;;====================================================================== -;; given a hierarchial hash and some keys look up the value ... -;; -(define (hh:get hh . keys) - (if (null? keys) - (vector-ref hh 1) ;; we have reached the end of the line, return the value sought - (let ((sub-ht (hh:get-ht hh))) - (if sub-ht ;; yes, there is more hierarchy - (let ((sub-hh (hash-table-ref/default sub-ht (car keys) #f))) - (if sub-hh - (apply hh:get sub-hh (cdr keys)) - #f)) - #f)))) - -;;====================================================================== -;; given a hierarchial hash, a value and some keys, add needed hierarcy and insert the value -;; -(define (hh:set! hh value . keys) - (if (null? keys) - (hh:set-value! hh value) ;; we have reached the end of the line, store the value - (let ((sub-ht (hh:get-ht hh))) - (if sub-ht ;; yes, there is more hierarchy - (let ((sub-hh (hash-table-ref/default sub-ht (car keys) #f))) - (if (not sub-hh) ;; we'll need to add the next level of hierarchy - (let ((new-sub-hh (hh:make-hh))) - (hash-table-set! sub-ht (car keys) new-sub-hh) - (apply hh:set! new-sub-hh value (cdr keys))) - (apply hh:set! sub-hh value (cdr keys)))) ;; call the sub-hierhash with remaining keys - (begin - (hh:set-ht! hh (make-hash-table)) - (apply hh:set! hh value keys)))))) - -;;====================================================================== -;; Manage pkts, used in servers, tests and likely other contexts so put -;; in common -;;====================================================================== - -(define common:pkts-spec - '((default . ((parent . P) - (action . a) - (filename . f))) - (configf . ((parent . P) - (action . a) - (filename . f))) - (server . ((action . a) - (pid . d) - (ipaddr . i) - (port . p) - (parent . P))) - - (test . ((cpuuse . c) - (diskuse . d) - (item-path . i) - (runname . r) - (state . s) - (target . t) - (status . u) - (parent . P))))) - -(define (common:get-pkts-dirs mtconf use-lt) - (let* ((pktsdirs-str (or (configf:lookup mtconf "setup" "pktsdirs") - (and use-lt - (conc (or *toppath* - (current-directory)) - "/lt/.pkts")))) - (pktsdirs (if pktsdirs-str - (string-split pktsdirs-str " ") - #f))) - pktsdirs)) - -;;====================================================================== -;; use-lt is use linktree "lt" link to find pkts dir -(define (common:save-pkt pktalist-in mtconf use-lt #!key (add-only #f)) ;; add-only saves the pkt only if there is a parent already - (if (or add-only - (hash-table-exists? *pkts-info* 'last-parent)) - (let* ((parent (hash-table-ref/default *pkts-info* 'last-parent #f)) - (pktalist (if parent - (cons `(parent . ,parent) - pktalist-in) - pktalist-in))) - (let-values (((uuid pkt) - (alist->pkt pktalist common:pkts-spec))) - (hash-table-set! *pkts-info* 'last-parent uuid) - (let ((pktsdir (or (hash-table-ref/default *pkts-info* 'pkts-dir #f) - (let* ((pktsdirs (common:get-pkts-dirs mtconf use-lt)) - (pktsdir (car pktsdirs))) ;; assume it is there - (hash-table-set! *pkts-info* 'pkts-dir pktsdir) - pktsdir)))) - (handle-exceptions - exn - (debug:print-info 0 "failed to write out packet to " pktsdir ", exn=" exn) ;; don't care if this failed for now but MUST FIX - BUG!! - (if (not (file-exists? pktsdir)) - (create-directory pktsdir #t)) - (with-output-to-file - (conc pktsdir "/" uuid ".pkt") - (lambda () - (print pkt))))))))) - -(define (common:with-queue-db mtconf proc #!key (use-lt #f)(toppath-in #f)) - (let* ((pktsdirs (common:get-pkts-dirs mtconf use-lt)) - (pktsdir (if pktsdirs (car pktsdirs) #f)) - (toppath (or (configf:lookup mtconf "scratchdat" "toppath") - toppath-in)) - (pdbpath (or (configf:lookup mtconf "setup" "pdbpath") pktsdir))) - (cond - ((not (and pktsdir toppath pdbpath)) - (debug:print 0 *default-log-port* "ERROR: settings are missing in your megatest.config for area management.") - (debug:print 0 *default-log-port* " you need to have pktsdirs in the [setup] section.")) - ((not (common:file-exists? pktsdir)) - (debug:print 0 *default-log-port* "ERROR: pkts directory not found " pktsdir)) - ((not (equal? (file-owner pktsdir)(current-effective-user-id))) - (debug:print 0 *default-log-port* "ERROR: directory " pktsdir " is not owned by " (current-effective-user-name))) - (else - (let* ((pdb (open-queue-db pdbpath "pkts.db" - schema: '("CREATE TABLE groups (id INTEGER PRIMARY KEY,groupname TEXT, CONSTRAINT group_constraint UNIQUE (groupname));")))) - (proc pktsdirs pktsdir pdb) - (dbi:close pdb)))))) - -(define (common:load-pkts-to-db mtconf #!key (use-lt #f)) - (common:with-queue-db - mtconf - (lambda (pktsdirs pktsdir pdb) - (for-each - (lambda (pktsdir) ;; look at all - (cond - ((not (common:file-exists? pktsdir)) - (debug:print 0 *default-log-port* "ERROR: packets directory " pktsdir " does not exist.")) - ((not (directory? pktsdir)) - (debug:print 0 *default-log-port* "ERROR: packets directory path " pktsdir " is not a directory.")) - ((not (file-readable? pktsdir)) - (debug:print 0 *default-log-port* "ERROR: packets directory path " pktsdir " is not readable.")) - (else - (debug:print-info 0 *default-log-port* "Loading packets found in " pktsdir) - (let ((pkts (glob (conc pktsdir "/*.pkt")))) - (for-each - (lambda (pkt) - (let* ((uuid (cadr (string-match ".*/([0-9a-f]+).pkt" pkt))) - (exists (lookup-by-uuid pdb uuid #f))) - (if (not exists) - (let* ((pktdat (string-intersperse - (with-input-from-file pkt read-lines) - "\n")) - (apkt (pkt->alist pktdat)) - (ptype (alist-ref 'T apkt))) - (add-to-queue pdb pktdat uuid (or ptype 'cmd) #f 0) - (debug:print 4 *default-log-port* "Added " uuid " of type " ptype " to queue")) - (debug:print 4 *default-log-port* "pkt: " uuid " exists, skipping...") - ))) - pkts))))) - pktsdirs)) - use-lt: use-lt)) - -(define (common:get-pkt-alists pkts) - (map (lambda (x) - (alist-ref 'apkt x)) ;; 'pkta pulls out the alist from the read pkt - pkts)) - -;;====================================================================== -;; given list of pkts (alist mode) return list of D cards as Unix epoch, sorted descending -;; also delete duplicates by target i.e. (car pkt) -;; -(define (common:get-pkt-times pkts) - (delete-duplicates - (sort - (map (lambda (x) - `(,(alist-ref 't x) . ,(string->number (alist-ref 'D x)))) - pkts) - (lambda (a b)(> (cdr a)(cdr b)))) ;; sort descending - (lambda (a b)(equal? (car a)(car b))))) ;; remove duplicates by target - -;;====================================================================== -;; accept an alist or hash table containing envvar/env value pairs (value of #f causes unset) -;; execute thunk in context of environment modified as per this list -;; restore env to prior state then return value of eval'd thunk. -;; ** this is not thread safe ** -(define (common:with-env-vars delta-env-alist-or-hash-table thunk) - (let* ((delta-env-alist (if (hash-table? delta-env-alist-or-hash-table) - (hash-table->alist delta-env-alist-or-hash-table) - delta-env-alist-or-hash-table)) - (restore-thunks - (filter - identity - (map (lambda (env-pair) - (let* ((env-var (car env-pair)) - (new-val (let ((tmp (cdr env-pair))) - (if (list? tmp) (car tmp) tmp))) - (current-val (get-environment-variable env-var)) - (restore-thunk - (cond - ((not current-val) (lambda () (unsetenv env-var))) - ((not (string? new-val)) #f) - ((eq? current-val new-val) #f) - (else - (lambda () (setenv env-var current-val)))))) - ;;(when (not (string? new-val)) - ;; (debug:print 0 *default-log-port* " PROBLEM: not a string: "new-val"\n from env-alist:\n"delta-env-alist) - ;; (pp delta-env-alist) - ;; (exit 1)) - - - (cond - ((not new-val) ;; modify env here - (unsetenv env-var)) - ((string? new-val) - (setenv env-var new-val))) - restore-thunk)) - delta-env-alist)))) - (let ((rv (thunk))) - (for-each (lambda (x) (x)) restore-thunks) ;; restore env to original state - rv))) - -(define *common:thread-punchlist* (make-hash-table)) -(define (common:send-thunk-to-background-thread thunk #!key (name #f)) - ;;(BB> "launched thread " name) - ;; we need a unique name for the thread. - (let* ((realname (if name - (if (not (hash-table-ref/default *common:thread-punchlist* name #f)) - name - (conc name"-" (symbol->string (gensym)))) - (conc "anonymous-"(symbol->string (gensym))))) - (realthunk (lambda () - (let ((res (thunk))) - (hash-table-delete! *common:thread-punchlist* realname) - res))) - (thread (make-thread realthunk realname))) - (hash-table-set! *common:thread-punchlist* realname thread) - (thread-start! thread) - )) - -(define (common:join-backgrounded-threads) - ;; may need to trap and ignore exceptions -- dunno how atomic threads are... - (for-each - (lambda (thread-name) - (let* ((thread (hash-table-ref/default *common:thread-punchlist* thread-name #f))) - (if thread - (handle-exceptions - exn - (begin - (debug:print 0 *default-log-port* "joining threads failed. exn=" exn) - #t) ;; just ignore it, it might have died in the meantime so joining it will throw an exception - (thread-join! thread)) - ))) - (hash-table-keys *common:thread-punchlist*))) - -(define (dtests:get-pre-command #!key (default-override #f)) - (let* ((orig-pre-command "export CMD='") - (viewscreen-pre-command "viewscreen ") - (use-viewscreen (configf:lookup *configdat* "dashboard" "use-viewscreen")) - (default-pre-command (if use-viewscreen viewscreen-pre-command orig-pre-command)) - (cfg-ovrd (configf:lookup *configdat* "dashboard" "pre-command"))) - (or cfg-ovrd default-override default-pre-command))) ;; "xterm -geometry 180x20 -e \"")) - - -(define (dtests:get-post-command #!key (default-override #f)) - (let* ((orig-post-command (conc "';xterm -geometry 180x20 -e \"(echo; echo -n START:;date +ww%U.%w-$H:%M:%S;echo;echo $CMD;echo;$CMD)|&" - "tee -a runlog-`date +ww%U.%w-%H:%M`.log;echo Press any key to continue;bash -c 'read -n 1 -s'\" &")) - (viewscreen-post-command "") - (use-viewscreen (configf:lookup *configdat* "dashboard" "use-viewscreen")) - (default-post-command (if use-viewscreen viewscreen-post-command orig-post-command)) - (cfg-ovrd (configf:lookup *configdat* "dashboard" "post-command"))) - (or cfg-ovrd default-override default-post-command))) ;; ";echo Press any key to continue;bash -c 'read -n 1 -s'\" &"))) - -;;====================================================================== -;; (define *common:telemetry-log-state* 'startup) -;; (define *common:telemetry-log-socket* #f) -;; -;; (define (common:telemetry-log-open) -;; (if (eq? *common:telemetry-log-state* 'startup) -;; (let* ((serverhost (configf:lookup *configdat* "telemetry" "host")) -;; (serverport (configf:lookup-number *configdat* "telemetry" "port")) -;; (user (or (get-environment-variable "USER") "unknown")) -;; (host (or (get-environment-variable "HOST") "unknown"))) -;; (set! *common:telemetry-log-state* -;; (handle-exceptions -;; exn -;; (begin -;; (debug:print-info 0 *default-log-port* "common-telemetry-log open udp port failure") -;; 'broken) -;; (if (and serverhost serverport user host) -;; (let* ((s (udp-open-socket))) -;; ;;(udp-bind! s #f 0) -;; (udp-connect! s serverhost serverport) -;; (set! *common:telemetry-log-socket* s) -;; 'open) -;; 'not-needed)))))) -;; -;; (define (common:telemetry-log event #!key (payload '())) -;; (if (eq? *common:telemetry-log-state* 'startup) -;; (common:telemetry-log-open)) -;; -;; (if (eq? 'open *common:telemetry-log-state*) -;; (handle-exceptions -;; exn -;; (begin -;; (debug:print-info 0 *default-log-port* "common-telemetry-log comms failure ; disabled (no server?)") -;; ;;(define *common:telemetry-log-state* 'broken-or-no-server-preclose) -;; ;;(common:telemetry-log-close) -;; (define *common:telemetry-log-state* 'broken-or-no-server) -;; (set! *common:telemetry-log-socket* #f) -;; ) -;; (if (and *common:telemetry-log-socket* event) ;; TODO - filter on event against telemetry.want-events -;; (let* ((user (or (get-environment-variable "USER") "unknown")) -;; (host (or (get-environment-variable "HOST") "unknown")) -;; (start (conc "[megatest "event"]")) -;; (toppath (or *toppath* "/dev/null")) -;; (payload-serialized -;; (base64:base64-encode -;; (z3:encode-buffer -;; (with-output-to-string (lambda () (pp payload)))))) -;; (msg (conc user":"host":"start":"(current-process-id)":"(car (argv))":" -;; toppath":"payload-serialized))) -;; (udp-send *common:telemetry-log-socket* msg)))))) -;; -;; (define (common:telemetry-log-close) -;; (when (or (member *common:telemetry-log-state* '(broken-or-no-server-preclose open)) *common:telemetry-log-socket*) -;; (handle-exceptions -;; exn -;; (begin -;; (define *common:telemetry-log-state* 'closed-fail) -;; (debug:print-info 0 *default-log-port* "common-telemetry-log closure failure") -;; ) -;; (begin -;; (define *common:telemetry-log-state* 'closed) -;; (udp-close-socket *common:telemetry-log-socket*) -;; (set! *common:telemetry-log-socket* #f))))) +;; faux-lock is deprecated. Please use simple-lock below +;; +(define (common:faux-lock keyname #!key (wait-time 8)(allow-lock-steal #t)) + (if (rmt:no-sync-get/default keyname #f) ;; do not be tempted to compare to pid. locking is a one-shot action, if already locked for this pid it doesn't actually count + (if (> wait-time 0) + (begin + (thread-sleep! 1) + (if (eq? wait-time 1) ;; only one second left, steal the lock + (begin + (debug:print-info 0 *default-log-port* "stealing lock for " keyname) + (common:faux-unlock keyname force: #t))) + (common:faux-lock keyname wait-time: (- wait-time 1))) + #f) + (begin + (rmt:no-sync-set keyname (conc (current-process-id))) + (equal? (conc (current-process-id)) (conc (rmt:no-sync-get/default keyname #f)))))) + +(define (common:faux-unlock keyname #!key (force #f)) + (if (or force (equal? (conc (current-process-id)) (conc (rmt:no-sync-get/default keyname #f)))) + (begin + (if (rmt:no-sync-get/default keyname #f) (rmt:no-sync-del! keyname)) + #t) + #f)) + +;;====================================================================== +;; simple lock. improve and converge on this one. +;; +(define (common:simple-lock keyname) + (rmt:no-sync-get-lock keyname)) + +(define (common:simple-unlock keyname #!key (force #f)) + (rmt:no-sync-del! keyname)) + +;;====================================================================== +;; ideally put all this info into the db, no need to preserve it across moving homehost +;; +;; return list of +;; ( reachable? cpuload update-time ) +(define (common:get-host-info hostname) + (let* ((loadinfo (rmt:get-latest-host-load hostname)) ;; if this host happens to have been recently used by a test reuse the load data + (load (car loadinfo)) + (load-sample-time (cdr loadinfo)) + (load-sample-age (- (current-seconds) load-sample-time)) + (loadinfo-timeout-seconds 6) ;; this was 20 seconds, seems way too lax. Switch to 6 seconds + (host-last-update-timeout-seconds 4) + (host-rec (hash-table-ref/default *host-loads* hostname #f)) + ) + (cond + ((< load-sample-age loadinfo-timeout-seconds) + (list #t + load-sample-time + load)) + ((and host-rec + (< (current-seconds) (+ (host-last-update host-rec) host-last-update-timeout-seconds))) + (list #t + (host-last-update host-rec) + (host-last-cpuload host-rec ))) + ((common:unix-ping hostname) + (list #t + (current-seconds) + (alist-ref 'adj-core-load (common:get-normalized-cpu-load hostname)))) ;; this is cheaper than you might think. get-normalized-cpu-load is cached for up to 5 seconds + (else + (list #f 0 -1) ;; bad host, don't use! + )))) + +;;====================================================================== +;; see defstruct host at top of file. +;; host: reachable last-update last-used last-cpuload +;; +(define (common:update-host-loads-table hosts-raw) + (let* ((hosts (filter (lambda (x) + (string-match (regexp "^\\S+$") x)) + hosts-raw))) + (for-each + (lambda (hostname) + (let* ((rec (let ((h (hash-table-ref/default *host-loads* hostname #f))) + (if h + h + (let ((h (make-host))) + (hash-table-set! *host-loads* hostname h) + h)))) + (host-info (common:get-host-info hostname)) + (is-reachable (car host-info)) + (last-reached-time (cadr host-info)) + (load (caddr host-info))) + (host-reachable-set! rec is-reachable) + (host-last-update-set! rec last-reached-time) + (host-last-cpuload-set! rec load))) + hosts))) + +;;====================================================================== +;; go through the hosts from least recently used to most recently used, pick the first that meets the load criteral from the +;; [host-rules] section. +;; +(define (common:get-least-loaded-host hosts-raw host-type configdat) + (let* ((rdat (configf:lookup configdat "host-rules" host-type)) + (rules (common:val->alist (or rdat "") convert: #t)) ;; maxnload, maxnjobs, maxjobrate + (maxnload (common:alist-ref/default 'maxnload rules 1.5)) ;; max normalized load + (maxnjobs (common:alist-ref/default 'maxnjobs rules 1.5)) ;; max normalized number of jobs + (maxjobrate (common:alist-ref/default 'maxjobrate rules (/ 1 6))) ;; max rate of submitting jobs to a given host in jobs/second + (hosts (filter (lambda (x) + (string-match (regexp "^\\S+$") x)) + hosts-raw)) + ;; (best-host #f) + (get-rec (lambda (hostname) + ;; (print "get-rec hostname=" hostname) + (let ((h (hash-table-ref/default *host-loads* hostname #f))) + (if h + h + (let ((h (make-host))) + (hash-table-set! *host-loads* hostname h) + h))))) + (best-load 99999) + (curr-time (current-seconds)) + (get-hosts-sorted (lambda (hosts) + (sort hosts (lambda (a b) + (let ((a-rec (get-rec a)) + (b-rec (get-rec b))) + ;; (print "a=" a " a-rec=" a-rec " host-last-used=" (host-last-used a-rec)) + ;; (print "b=" b " b-rec=" b-rec " host-last-used=" (host-last-used b-rec)) + (< (host-last-used a-rec) + (host-last-used b-rec)))))))) + (debug:print 0 *default-log-port* "INFO: hosts-sorted=" (get-hosts-sorted hosts)) + (if (null? hosts) + #f ;; no hosts to select from. All done and giving up now. + (let ((hosts-sorted (get-hosts-sorted hosts))) + (common:update-host-loads-table hosts) + (let loop ((hostname (car hosts-sorted)) + (tal (cdr hosts-sorted)) + (best-host #f)) + (let* ((rec (get-rec hostname)) + (reachable (host-reachable rec)) + (load (host-last-cpuload rec)) + (last-used (host-last-used rec)) + (delta (- curr-time last-used)) + (job-rate (if (> delta 0) + (/ 1 delta) + 999)) ;; jobs per second + (new-best + (cond + ((not reachable) + (debug:print 0 *default-log-port* "Skipping host " hostname " as it cannot be reached.") + best-host) + ((and (< load maxnload) ;; load is acceptable + (< job-rate maxjobrate)) ;; job rate is acceptable + (set! best-load load) + hostname) + (else best-host)))) + (debug:print 0 *default-log-port* "INFO: Trying host " hostname " with load " load ", last used " delta " seconds ago, with job-rate " job-rate " for running a test." ) + (if new-best + (begin ;; found a host, return it + (debug:print 0 *default-log-port* "INFO: Found host: " new-best " load: " load " last-used: " delta " seconds ago, with job-rate: " job-rate) + (host-last-used-set! rec curr-time) + new-best) + (if (null? tal) #f (loop (car tal)(cdr tal) best-host))))))))) + +(define (std-exit-procedure) + ;;(common:telemetry-log-close) + (on-exit (lambda () 0)) + ;;(debug:print-info 13 *default-log-port* "std-exit-procedure called; *time-to-exit*="*time-to-exit*) + (let ((no-hurry (if *time-to-exit* ;; hurry up + #f + (begin + (set! *time-to-exit* #t) + #t)))) + (debug:print-info 4 *default-log-port* "starting exit process, finalizing databases.") + (if (and no-hurry (debug:debug-mode 18)) + (rmt:print-db-stats)) + (let ((th1 (make-thread (lambda () ;; thread for cleaning up, give it five seconds + (if *dbstruct-db* (db:close-all *dbstruct-db*)) ;; one second allocated + (if *task-db* + (let ((db (cdr *task-db*))) + (if (sqlite3:database? db) + (begin + (sqlite3:interrupt! db) + (sqlite3:finalize! db #t) + ;; (vector-set! *task-db* 0 #f) + (set! *task-db* #f))))) + (http-client#close-idle-connections!) + ;; (if (and *runremote* + ;; (remote-conndat *runremote*)) + ;; (begin + ;; (http-client#close-all-connections!))) ;; for http-client + (if (not (eq? *default-log-port* (current-error-port))) + (close-output-port *default-log-port*)) + (set! *default-log-port* (current-error-port))) "Cleanup db exit thread")) + (th2 (make-thread (lambda () + (debug:print 4 *default-log-port* "Attempting clean exit. Please be patient and wait a few seconds...") + (if no-hurry + (begin + (thread-sleep! 5)) ;; give the clean up few seconds to do it's stuff + (begin + (thread-sleep! 2))) + (debug:print 4 *default-log-port* " ... done") + ) + "clean exit"))) + (thread-start! th1) + (thread-start! th2) + (thread-join! th1) + ) + ) + + 0) + +;;====================================================================== +;; TODO: for multiple areas, we will have multiple watchdogs; and multiple threads to manage +(define (common:watchdog) + (debug:print-info 13 *default-log-port* "common:watchdog entered.") + (if (launch:setup) + (if (common:on-homehost?) + (let ((dbstruct (db:setup #t))) + (debug:print-info 13 *default-log-port* "after db:setup with dbstruct=" dbstruct) + (cond + ((dbr:dbstruct-read-only dbstruct) + (debug:print-info 13 *default-log-port* "loading read-only watchdog") + (common:readonly-watchdog dbstruct)) + (else + (debug:print-info 13 *default-log-port* "loading writable-watchdog.") + (let* ((syncer (or (configf:lookup *configdat* "server" "sync-method") "brute-force-sync"))) + (cond + ((equal? syncer "brute-force-sync") + (server:writable-watchdog-bruteforce dbstruct)) + ((equal? syncer "delta-sync") + (server:writable-watchdog-deltasync dbstruct)) + (else + (debug:print-error 0 *default-log-port* "Unknown server/sync-method specified ("syncer") - valid values are brute-force-sync and delta-sync.") + (exit 1))) + ;;(debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] Syncer started (method="syncer")") + ))) + (debug:print-info 13 *default-log-port* "watchdog done.")) + (debug:print-info 13 *default-log-port* "no need for watchdog on non-homehost")))) + +;;====================================================================== +;; logic for getting homehost. Returns (host . at-home) +;; IF *toppath* is not set, wait up to five seconds trying every two seconds +;; (this is to accomodate the watchdog) +;; +(define (common:get-homehost #!key (trynum 5)) + ;; called often especially at start up. use mutex to eliminate collisions + (mutex-lock! *homehost-mutex*) + (cond + (*home-host* + (mutex-unlock! *homehost-mutex*) + *home-host*) + ((not *toppath*) + (mutex-unlock! *homehost-mutex*) + (launch:setup) ;; safely mutexed now + (if (> trynum 0) + (begin + (thread-sleep! 2) + (common:get-homehost trynum: (- trynum 1))) + #f)) + (else + (let* ((currhost (get-host-name)) + (bestadrs (server:get-best-guess-address currhost)) + ;; first look in config, then look in file .homehost, create it if not found + (homehost (or (configf:lookup *configdat* "server" "homehost" ) + (handle-exceptions + exn + (if (> trynum 0) + (let ((delay-time (* (- 5 trynum) 5))) + (mutex-unlock! *homehost-mutex*) + (debug:print 0 *default-log-port* "ERROR: ["(common:human-time)"] Failed to read .homehost file, delaying " + delay-time " seconds and trying again, message: " ((condition-property-accessor 'exn 'message) exn) + ", exn=" exn) + (thread-sleep! delay-time) + (common:get-homehost trynum: (- trynum 1))) + (begin + (mutex-unlock! *homehost-mutex*) + (debug:print 0 *default-log-port* "ERROR: ["(common:human-time) + "] Failed to read .homehost file after trying five times. Giving up and exiting, message: " + ((condition-property-accessor 'exn 'message) exn)) + (exit 1))) + (let ((hhf (conc *toppath* "/.homehost"))) + (if (common:file-exists? hhf) + (with-input-from-file hhf read-line) + (if (file-writable? *toppath*) + (begin + (with-output-to-file hhf + (lambda () + (print bestadrs))) + (begin + (mutex-unlock! *homehost-mutex*) + (car (common:get-homehost)))) + #f)))))) + (at-home (or (equal? homehost currhost) + (equal? homehost bestadrs)))) + (set! *home-host* (cons homehost at-home)) + (mutex-unlock! *homehost-mutex*) + *home-host*)))) + +;;====================================================================== +;; am I on the homehost? +;; +(define (common:on-homehost?) + (let ((hh (common:get-homehost))) + (if hh + (cdr hh) + #f))) + +(define (common:wait-for-homehost-load maxnormload msg) + (let* ((hh-dat (if (common:on-homehost?) ;; if we are on the homehost then pass in #f so the calls are local. + #f + (common:get-homehost))) + (hh (if hh-dat (car hh-dat) #f))) + (common:wait-for-normalized-load maxnormload msg hh))) + +(define (make-and-init-remote) + (make-remote hh-dat: (common:get-homehost) + server-info: (if *toppath* (server:check-if-running *toppath*) #f) + server-timeout: (server:expiration-timeout))) +;;====================================================================== +;; currently the primary job of the watchdog is to run the sync back to megatest.db from the db in /tmp +;; if we are on the homehost and we are a server (by definition we are on the homehost if we are a server) +;; +(define (common:readonly-watchdog dbstruct) + (let ((just-testing 0.0501)) + (thread-sleep! just-testing)) ;; (/ 1 20)) ;; 0.051) ;; delay for startup + (debug:print-info 13 *default-log-port* "common:readonly-watchdog entered.") + ;; sync megatest.db to /tmp/.../megatst.db + (let* ((sync-cool-off-duration 3) + (golden-mtdb (dbr:dbstruct-mtdb dbstruct)) + (golden-mtpath (db:dbdat-get-path golden-mtdb)) + (tmp-mtdb (dbr:dbstruct-tmpdb dbstruct)) + (tmp-mtpath (db:dbdat-get-path tmp-mtdb))) + (debug:print-info 0 *default-log-port* "Read-only periodic sync thread started.") + (let loop ((last-sync-time 0)) + (debug:print-info 13 *default-log-port* "loop top tmp-mtpath="tmp-mtpath" golden-mtpath="golden-mtpath) + (let* ((duration-since-last-sync (- (current-seconds) last-sync-time))) + (debug:print-info 13 *default-log-port* "duration-since-last-sync="duration-since-last-sync) + (if (and (not *time-to-exit*) + (< duration-since-last-sync sync-cool-off-duration)) + (thread-sleep! (- sync-cool-off-duration duration-since-last-sync))) + (if (not *time-to-exit*) + (let ((golden-mtdb-mtime (file-modification-time golden-mtpath)) + (tmp-mtdb-mtime (file-modification-time tmp-mtpath))) + (if (> golden-mtdb-mtime tmp-mtdb-mtime) + (if (< golden-mtdb-mtime (- (current-seconds) 3)) ;; file has NOT been touched in past three seconds, this way multiple servers won't fight to sync back + (let ((res (db:multi-db-sync dbstruct 'old2new))) + (debug:print-info 13 *default-log-port* "rosync called, " res " records transferred.")))) + (loop (current-seconds))) + #t))) + (debug:print-info 0 *default-log-port* "Exiting readonly-watchdog timer, *time-to-exit* = " *time-to-exit*" pid="(current-process-id)" mtpath="golden-mtpath))) + +;;====================================================================== +;; from metadat lookup MEGATEST_VERSION +;; +(define (common:get-last-run-version) ;; RADT => How does this work in send-receive function??; assume it is the value saved in some DB + (rmt:get-var "MEGATEST_VERSION")) + +(define (common:get-last-run-version-number) + (string->number + (substring (common:get-last-run-version) 0 6))) + +(define (common:set-last-run-version) + (rmt:set-var "MEGATEST_VERSION" (common:version-signature))) + +;;====================================================================== +;; postive number if megatest version > db version +;; negative number if megatest version < db version +(define (common:version-db-delta) + (- megatest-version (common:get-last-run-version-number))) + +(define (common:version-changed?) + (not (equal? (common:get-last-run-version) + (common:version-signature)))) + +(define (common:api-changed?) + (not (equal? (substring (->string megatest-version) 0 4) + (substring (conc (common:get-last-run-version)) 0 4)))) + +;;====================================================================== +;; Move me elsewhere ... +;; RADT => Why do we meed the version check here, this is called only if version misma +;; +(define (common:cleanup-db dbstruct #!key (full #f)) + (apply db:multi-db-sync + dbstruct + 'schema + ;; 'new2old + 'killservers + 'adj-target + ;; 'old2new + 'new2old + ;; (if full + '(dejunk) + ;; '()) + ) + (if (common:api-changed?) + (common:set-last-run-version))) + +;;====================================================================== +;; Force a megatest cleanup-db if version is changed and skip-version-check not specified +;; Do NOT check if not on homehost! +;; +(define (common:exit-on-version-changed) + (if (common:on-homehost?) + (if (common:api-changed?) + (let* ((mtconf (conc (get-environment-variable "MT_RUN_AREA_HOME") "/megatest.config")) + (dbfile (conc (get-environment-variable "MT_RUN_AREA_HOME") "/megatest.db")) + (read-only (not (file-writable? dbfile))) + (dbstruct (db:setup #t))) + (debug:print 0 *default-log-port* + "WARNING: Version mismatch!\n" + " expected: " (common:version-signature) "\n" + " got: " (common:get-last-run-version)) + (cond + ((get-environment-variable "MT_SKIP_DB_MIGRATE") #t) + ((and (common:file-exists? mtconf) (common:file-exists? dbfile) (not read-only) + (eq? (current-user-id)(file-owner mtconf))) ;; safe to run -cleanup-db + (debug:print 0 *default-log-port* " I see you are the owner of megatest.config, attempting to cleanup and reset to new version") + (handle-exceptions + exn + (begin + (debug:print 0 *default-log-port* "Failed to switch versions. exn=" exn) + (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) + (print-call-chain (current-error-port)) + (exit 1)) + (common:cleanup-db dbstruct))) + ((not (common:file-exists? mtconf)) + (debug:print 0 *default-log-port* " megatest.config does not exist in this area. Cannot proceed with megatest version migration.") + (exit 1)) + ((not (common:file-exists? dbfile)) + (debug:print 0 *default-log-port* " megatest.db does not exist in this area. Cannot proceed with megatest version migration.") + (exit 1)) + ((not (eq? (current-user-id)(file-owner mtconf))) + (debug:print 0 *default-log-port* " You do not own megatest.db in this area. Cannot proceed with megatest version migration.") + (exit 1)) + (read-only + (debug:print 0 *default-log-port* " You have read-only access to this area. Cannot proceed with megatest version migration.") + (exit 1)) + (else + (debug:print 0 *default-log-port* " to switch versions you can run: \"megatest -cleanup-db\"") + (exit 1))))))) +;;====================================================================== +;; (begin +;; (debug:print 0 *default-log-port* "ERROR: cannot migrate version unless on homehost. Exiting.") +;; (exit 1)))) + +(define (common:run-sync?) + (and (common:on-homehost?) + (args:get-arg "-server"))) Index: commonmod.scm ================================================================== --- commonmod.scm +++ commonmod.scm @@ -18,10 +18,16 @@ ;;====================================================================== (declare (unit commonmod)) (declare (uses mtver)) +(declare (uses debugprint)) +(declare (uses stml2)) +(declare (uses pkts)) +(declare (uses processmod)) +(declare (uses mtargs)) +(declare (uses configfmod)) (module commonmod * (import scheme @@ -28,26 +34,48 @@ chicken.base chicken.condition chicken.file chicken.time chicken.file.posix + chicken.pathname + chicken.port + chicken.process + chicken.process-context chicken.process-context.posix + chicken.process.signal + chicken.pretty-print + chicken.random chicken.io chicken.string - - (prefix sqlite3 sqlite3:) + chicken.sort + chicken.time.posix - system-information - typed-records + (prefix base64 base64:) + (prefix sqlite3 sqlite3:) + directory-utils + matchable md5 message-digest regex + regex-case + sparse-vectors srfi-1 + srfi-13 srfi-18 srfi-69 + system-information + typed-records + z3 mtver + debugprint + stml2 + pkts + processmod + (prefix mtargs args:) + configfmod + ) ;;====================================================================== ;; CONTENTS ;; @@ -56,10 +84,19 @@ ;; testsuite and area utilites ;; ;;====================================================================== (include "megatest-fossil-hash.scm") + +;; these come from processmod +;; +;; (define setenv set-environment-variable!) +;; (define unsetenv unset-environment-variable!) +;; (define getenv get-environment-variable) + +(define home (getenv "HOME")) +(define user (getenv "USER")) ;; Globals ;; (define *server-loop-heart-beat* (current-seconds)) @@ -106,7 +143,2968 @@ (handle-exceptions exn #f ;; I don't really care why this failed (at least for now) (delete-file* fname))) + +;;====================================================================== +;; PUlled below from common.scm +;;====================================================================== + +;; execute thunk, return value. If exception thrown, trap exception, return #f, and emit nonfatal condition note to *default-log-port* . +;; arguments - thunk, message +(define (common:fail-safe thunk warning-message-on-exception) + (handle-exceptions + exn + (begin + (debug:print-info 0 *default-log-port* "notable but nonfatal condition - "warning-message-on-exception", exn=" exn) + (debug:print-info 0 *default-log-port* + (string-substitute "\n?Error:" "nonfatal condition:" + (with-output-to-string + (lambda () + (print-error-message exn) )))) + (debug:print-info 0 *default-log-port* " -- continuing after nonfatal condition...") + #f) + (thunk))) + +;; defined in configfmod +;; +#;(define (safe-setenv key val) + (if (or (substring-index "!" key) + (substring-index ":" key) ;; variables containing : are for internal use and cannot be environment variables. + (substring-index "." key)) ;; periods are not allowed in environment variables + (debug:print-error 4 *default-log-port* "skip setting internal use only variables containing \":\" or starting with \"!\"") + (if (and (string? val) + (string? key)) + (handle-exceptions + exn + (debug:print-error 0 *default-log-port* "bad value for setenv, key=" key ", value=" val ", exn=" exn) + (setenv key val)) + (debug:print-error 0 *default-log-port* "bad value for setenv, key=" key ", value=" val)))) + + +;; returns list of fd count, socket count +(define (get-file-descriptor-count #!key (pid (current-process-id ))) + (list + (length (glob (conc "/proc/" pid "/fd/*"))) + (length (filter identity (map socket? (glob (conc "/proc/" pid "/fd/*"))))) + ) +) + + + +;; GLOBALS + +;; CONTEXTS +(defstruct cxt + (taskdb #f) + (cmutex (make-mutex))) +;; (define *contexts* (make-hash-table)) +;; (define *context-mutex* (make-mutex)) + +;; ;; safe method for accessing a context given a toppath +;; ;; +;; (define (common:with-cxt toppath proc) +;; (mutex-lock! *context-mutex*) +;; (let ((cxt (hash-table-ref/default *contexts* toppath #f))) +;; (if (not cxt) +;; (set! cxt (let ((x (make-cxt)))(hash-table-set! *contexts* toppath x) x))) +;; (let ((cxt-mutex (cxt-mutex cxt))) +;; (mutex-unlock! *context-mutex*) +;; (mutex-lock! cxt-mutex) +;; (let ((res (proc cxt))) +;; (mutex-unlock! cxt-mutex) +;; res)))) + +;; A hash table that can be accessed by #{scheme ...} calls in +;; config files. Allows communicating between confgs +;; +(define *user-hash-data* (make-hash-table)) + +(define *db-keys* #f) + +(define *pkts-info* (make-hash-table)) ;; store stuff like the last parent here +(define *configinfo* #f) ;; raw results from setup, includes toppath and table from megatest.config +(define *runconfigdat* #f) ;; run configs data +(define *configdat* #f) ;; megatest.config data +(define *configstatus* #f) ;; status of data; 'fulldata : all processing done, #f : no data yet, 'partialdata : partial read done +(define *toppath* #f) +(define *already-seen-runconfig-info* #f) + +(define *test-meta-updated* (make-hash-table)) +(define *globalexitstatus* 0) ;; attempt to work around possible thread issues +(define *passnum* 0) ;; when running track calls to run-tests or similar +;; (define *alt-log-file* #f) ;; used by -log +(define *common:denoise* (make-hash-table)) ;; for low noise printing +;; (define *default-log-port* (current-error-port)) ;; comes from debugprint +(define *time-zero* (current-seconds)) ;; for the watchdog +(define *default-area-tag* "local") + +;; DATABASE +(define *dbstruct-db* #f) ;; used to cache the dbstruct in db:setup. Goal is to remove this. +;; db stats +(define *db-stats* (make-hash-table)) ;; hash of vectors < count duration-total > +(define *db-stats-mutex* (make-mutex)) +;; db access +(define *db-last-access* (current-seconds)) ;; last db access, used in server +(define *db-write-access* #t) +;; db sync +(define *db-last-sync* 0) ;; last time the sync to megatest.db happened +(define *db-sync-in-progress* #f) ;; if there is a sync in progress do not try to start another +(define *db-multi-sync-mutex* (make-mutex)) ;; protect access to *db-sync-in-progress*, *db-last-sync* +;; task db +(define *task-db* #f) ;; (vector db path-to-db) +(define *db-access-allowed* #t) ;; flag to allow access +(define *db-access-mutex* (make-mutex)) +(define *db-transaction-mutex* (make-mutex)) +(define *db-cache-path* #f) +(define *db-with-db-mutex* (make-mutex)) +(define *db-api-call-time* (make-hash-table)) ;; hash of command => (list of times) +;; no sync db +(define *no-sync-db* #f) + +;; SERVER +(define *my-client-signature* #f) +(define *transport-type* 'http) ;; override with [server] transport http|rpc|nmsg +(define *runremote* #f) ;; if set up for server communication this will hold +;; (define *max-cache-size* 0) +(define *logged-in-clients* (make-hash-table)) +(define *server-id* #f) +(define *server-info* #f) ;; good candidate for easily convert to non-global +(define *time-to-exit* #f) +(define *server-run* #t) +(define *run-id* #f) +(define *server-kind-run* (make-hash-table)) +(define *home-host* #f) +;; (define *total-non-write-delay* 0) +(define *heartbeat-mutex* (make-mutex)) +(define *api-process-request-count* 0) +(define *max-api-process-requests* 0) +(define *server-overloaded* #f) +(define *writes-total-delay* 0) + +;; client +(define *rmt-mutex* (make-mutex)) ;; remote access calls mutex + +;; RPC transport +(define *rpc:listener* #f) + +;; KEY info +(define *target* (make-hash-table)) ;; cache the target here; target is keyval1/keyval2/.../keyvalN +(define *keys* (make-hash-table)) ;; cache the keys here +(define *keyvals* (make-hash-table)) +(define *toptest-paths* (make-hash-table)) ;; cache toptest path settings here +(define *test-paths* (make-hash-table)) ;; cache test-id to test run paths here +(define *test-ids* (make-hash-table)) ;; cache run-id, testname, and item-path => test-id +(define *test-info* (make-hash-table)) ;; cache the test info records, update the state, status, run_duration etc. from testdat.db + +(define *run-info-cache* (make-hash-table)) ;; run info is stable, no need to reget +(define *launch-setup-mutex* (make-mutex)) ;; need to be able to call launch:setup often so mutex it and re-call the real deal only if *toppath* not set +(define *homehost-mutex* (make-mutex)) + +;; Miscellaneous +(define *triggers-mutex* (make-mutex)) ;; block overlapping processing of triggers +(define *numcpus-cache* (make-hash-table)) + +;; (use posix-extras pathname-expand files) + +;; this plugs a hole in posix-extras in recent chicken versions > 4.9) +;; (let-values (( (chicken-release-number chicken-major-version) +;; (apply values +;; (map string->number +;; (take +;; (string-split (chicken-version) ".") +;; 2))))) +;; (let ((resolve-pathname-broken? +;; (or (> chicken-release-number 4) +;; (and (eq? 4 chicken-release-number) (> chicken-major-version 9))))) +;; (if resolve-pathname-broken? +;; (define ##sys#expand-home-path pathname-expand)))) + +;; (define (realpath x) (resolve-pathname (pathname-expand (or x "/dev/null")) )) +;; (define (realpath x)(pathname-expand (or x "/dev/null")) ) +(define (realpath x) + (with-input-from-pipe + (string-append "readlink -f \""x"\"") + read-line)) + +(define (common:get-this-exe-fullpath #!key (argv (argv))) + (let* ((this-script + (cond + ((and (> (length argv) 2) + (string-match "^(.*/csi|csi)$" (car argv)) + (string-match "^-(s|ss|sx|script)$" (cadr argv))) + (caddr argv)) + (else (car argv)))) + (fullpath (realpath this-script))) + fullpath)) + +;;====================================================================== + +(define *common:this-exe-fullpath* (common:get-this-exe-fullpath)) +(define *common:this-exe-dir* (pathname-directory *common:this-exe-fullpath*)) +(define *common:this-exe-name* (pathname-strip-directory *common:this-exe-fullpath*)) + +(define (common:get-sync-lock-filepath) + (let* ((tmp-area (common:get-db-tmp-area)) + (lockfile (conc tmp-area "/megatest.db.sync-lock"))) + lockfile)) + +;;====================================================================== +;; when called from a wrapper I need sometimes to find the calling +;; wrapper, this is for dashboard to find the correct megatest. +;; +(define (common:find-local-megatest #!optional (progname "megatest")) + (let ((res (filter file-exists? + (map (lambda (updir) + (let* ((lm (car (argv))) + (dir (pathname-directory lm)) + (exe (pathname-strip-directory lm))) + (conc (if dir (conc dir "/") "") + (case (string->symbol exe) + ((dboard) (conc updir progname)) + ((mtest) (conc updir progname)) + ((dashboard) progname) + (else exe))))) + '("../../" "../"))))) + (if (null? res) + (begin + (debug:print 0 *default-log-port* "Failed to find this executable! Using what can be found on the path") + progname) + (car res)))) + +(define *common:logpro-exit-code->status-sym-alist* + '( ( 0 . pass ) + ( 1 . fail ) + ( 2 . warn ) + ( 3 . check ) + ( 4 . waived ) + ( 5 . abort ) + ( 6 . skip ))) + +(define (common:logpro-exit-code->status-sym exit-code) + (or (alist-ref exit-code *common:logpro-exit-code->status-sym-alist*) 'fail)) + +(define (common:worse-status-sym ss1 ss2) + (let loop ((status-syms-remaining '(abort fail check skip warn waived pass))) + (cond + ((null? status-syms-remaining) + 'fail) + ((eq? (car status-syms-remaining) ss1) + ss1) + ((eq? (car status-syms-remaining) ss2) + ss2) + (else + (loop (cdr status-syms-remaining)))))) + +(define (common:steps-can-proceed-given-status-sym status-sym) + (if (member status-sym '(warn waived pass)) + #t + #f)) + +(define (status-sym->string status-sym) + (case status-sym + ((pass) "PASS") + ((fail) "FAIL") + ((warn) "WARN") + ((check) "CHECK") + ((waived) "WAIVED") + ((abort) "ABORT") + ((skip) "SKIP") + (else "FAIL"))) + +(define (common:logpro-exit-code->test-status exit-code) + (status-sym->string (common:logpro-exit-code->status-sym exit-code))) + +(defstruct remote + (hh-dat #f) ;; (common:get-homehost)) ;; homehost record ( addr . hhflag ) + (server-url #f) ;; (server:check-if-running *toppath*) #f)) + (server-id #f) + (server-info #f) ;; (if *toppath* (server:check-if-running *toppath*) #f)) + (last-server-check 0) ;; last time we checked to see if the server was alive + (conndat #f) + ;; (transport *transport-type*) + (server-timeout #f) ;; (server:expiration-timeout)) + (force-server #f) + (ro-mode #f) + (ro-mode-checked #f)) ;; flag that indicates we have checked for ro-mode + +;; launching and hosts +(defstruct host + (reachable #f) + (last-update 0) + (last-used 0) + (last-cpuload 1)) + +(define *host-loads* (make-hash-table)) + +;; cache environment vars for each run here +(define *env-vars-by-run-id* (make-hash-table)) + +;; Testconfig and runconfig caches. +(define *testconfigs* (make-hash-table)) ;; test-name => testconfig +(define *runconfigs* (make-hash-table)) ;; target => runconfig + +;; This is a cache of pre-reqs met, don't re-calc in cases where called with same params less than +;; five seconds ago +(define *pre-reqs-met-cache* (make-hash-table)) + +;; cache of verbosity given string +;; +(define *verbosity-cache* (make-hash-table)) + +(define (common:clear-caches) + (set! *target* (make-hash-table)) + (set! *keys* (make-hash-table)) + (set! *keyvals* (make-hash-table)) + (set! *toptest-paths* (make-hash-table)) + (set! *test-paths* (make-hash-table)) + (set! *test-ids* (make-hash-table)) + (set! *test-info* (make-hash-table)) + (set! *run-info-cache* (make-hash-table)) + (set! *env-vars-by-run-id* (make-hash-table)) + (set! *test-id-cache* (make-hash-table))) + +;; Generic string database +(define sdb:qry #f) ;; (make-sdb:qry)) ;; 'init #f) +;; Generic path database +(define *fdb* #f) + +(define *last-launch* (current-seconds)) ;; use for throttling the launch rate. Would be better to use the db and last time of a test in LAUNCHED state. + +;;====================================================================== +;; V E R S I O N +;;====================================================================== + +(define (common:get-full-version) + (conc megatest-version "-" megatest-fossil-hash)) + +(define (common:version-signature) + (conc megatest-version "-" (substring megatest-fossil-hash 0 4))) + +;; common:get-last-run-version +;; common:get-last-run-version-number +;; common:set-last-run-version + +(define (common:snapshot-file filepath #!key (subdir ".") ) + (if (file-exists? filepath) + (let* ((age-sec (lambda (file) + (if (file-exists? file) + (- (current-seconds) (file-modification-time file)) + 1000000000))) ;; return really old value if file doesn't exist. we want to clobber it if old or not exist. + (ok-flag #t) + (age-mins (lambda (file) (/ (age-sec file) 60))) + (age-hrs (lambda (file) (/ (age-mins file) 60))) + (age-days (lambda (file) (/ (age-hrs file) 24))) + (age-wks (lambda (file) (/ (age-days file) 7))) + (docmd (lambda (cmd) + (cond + (ok-flag + (let ((res (system cmd))) + (cond + ((eq? 0 res) + #t) + (else + (set! ok-flag #f) + (debug:print 0 *default-log-port* "ERROR: ["(common:human-time)"] Command failed with exit code " + (if (< res 0) + res + (/ res 8)) " ["cmd"]" ) + #f)))) + (else + (debug:print 0 *default-log-port* "ERROR: ["(common:human-time)"] Not runnining command due to prior error. ["cmd"]") + #f)))) + (copy (lambda (src dest) (docmd (conc "/bin/cp '"src"' '"dest"'")))) + (copy+zip (lambda (src dest) (docmd (conc "gzip -c - < '"src"' > '"dest"'")))) + (fullpath (realpath filepath)) + (basedir (pathname-directory fullpath)) + (basefile (pathname-strip-directory fullpath)) + ;;(prevfile (conc filepath ".prev.gz")) + (minsfile (conc basedir "/" subdir "/" basefile ".mins.gz")) + (hrsfile (conc basedir "/" subdir "/" basefile ".hrs.gz")) + (daysfile (conc basedir "/" subdir "/" basefile ".days.gz")) + (wksfile (conc basedir "/" subdir "/" basefile ".weeks.gz"))) + + ;; create subdir it not exists + (if (not (directory-exists? (conc basedir "/" subdir))) + (docmd (conc "/bin/mkdir -p '"(conc basedir "/" subdir)"'"))) + + ;; copy&zip to .mins if not exists + (if (not (file-exists? minsfile)) + (copy+zip filepath minsfile)) + ;; copy .mins to .hrs if not exists + (if (not (file-exists? hrsfile)) + (copy minsfile hrsfile)) + ;; copy .hrs to .days if not exists + (if (not (file-exists? daysfile)) + (copy hrsfile daysfile)) + ;; copy .days to .weeks if not exists + (if (not (file-exists? wksfile)) + (copy daysfile wksfile)) + + + ;; if age(.mins.gz) >= 1h: + ;; copy .mins.gz .hrs.gz + ;; copy .mins.gz + (when (>= (age-mins minsfile) 1) + (copy minsfile hrsfile) + (copy+zip filepath minsfile)) + + ;; if age(.hrs.gz) >= 1d: + ;; copy .hrs.gz .days.gz + ;; copy .mins.gz .hrs.gz + (when (>= (age-days hrsfile) 1) + (copy hrsfile daysfile) + (copy minsfile hrsfile)) + + ;; if age(.days.gz) >= 1w: + ;; copy .days.gz .weeks.gz + ;; copy .hrs.gz .days.gz + (when (>= (age-wks daysfile) 1) + (copy daysfile wksfile) + (copy hrsfile daysfile)) + #t) + #f)) + +;;====================================================================== +;; Rotate logs, logic: +;; if > 500k and older than 1 week: +;; remove previous compressed log and compress this log +;; WARNING: This proc operates assuming that it is in the directory above the +;; logs directory you wish to log-rotate. +;; +(define (common:rotate-logs) + (let* ((all-files (make-hash-table)) + (stats (make-hash-table)) + (inc-stat (lambda (key) + (hash-table-set! stats key (+ (hash-table-ref/default stats key 0) 1)))) + (max-allowed (string->number (or (configf:lookup *configdat* "setup" "max-logfiles") "300")))) ;; name -> age + (if (not (directory-exists? "logs"))(create-directory "logs")) + (directory-fold + (lambda (file rem) + (handle-exceptions + exn + (begin + (debug:print-info 2 *default-log-port* "unable to rotate log " file ", probably handled by another process, this is safe to ignore. exn=" exn) + (debug:print 2 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) + ;; (print-call-chain (current-error-port)) ;; + ) + (let* ((fullname (conc "logs/" file)) + (mod-time (file-modification-time fullname)) + (file-age (- (current-seconds) mod-time)) + (file-old (> file-age (* 48 60 60))) + (file-big (> (file-size fullname) 200000))) + (hash-table-set! all-files file mod-time) + (if (or (and (string-match "^.*.log" file) + file-old + file-big) + (and (string-match "^server-.*.log" file) + file-old)) + (let ((gzfile (conc fullname ".gz"))) + (if (common:file-exists? gzfile) + (begin + (debug:print-info 0 *default-log-port* "removing " gzfile) + (delete-file* gzfile) + (hash-table-delete! all-files gzfile) ;; needed? + )) + (debug:print-info 0 *default-log-port* "compressing " file) + (system (conc "gzip " fullname)) + (inc-stat "gzipped") + (hash-table-set! all-files (conc file ".gz") file-age) ;; add the .gz file and remove the base file + (hash-table-delete! all-files file) + ) + (if (and (> file-age (* (string->number (or (configf:lookup *configdat* "setup" "log-expire-days") "30")) 24 3600)) + (file-exists? fullname)) ;; just in case it was gzipped - will get it next time + (handle-exceptions + exn + #f + (if (directory? fullname) + (begin + (debug:print-info 0 *default-log-port* fullname " in logs directory is a directory! Cannot rotate it, it is best to not put subdirectories in the logs dir.") + (inc-stat "directories")) + (begin + (delete-file* fullname) + (inc-stat "deleted"))) + (hash-table-delete! all-files file))))))) + '() + "logs") + (for-each + (lambda (category) + (let ((quant (hash-table-ref/default stats category 0))) + (if (> quant 0) + (debug:print-info 0 *default-log-port* category " log files: " quant)))) + `("deleted" "gzipped" "directories")) + (let ((num-logs (hash-table-size all-files))) + (if (> num-logs max-allowed) ;; because NFS => don't let number of logs exceed 300 + (let ((files (take (sort (hash-table-keys all-files) + (lambda (a b) + (< (hash-table-ref all-files a)(hash-table-ref all-files b)))) + (- num-logs max-allowed)))) + (for-each + (lambda (file) + (let* ((fullname (conc "logs/" file))) + (if (directory? fullname) + (debug:print-info 0 *default-log-port* fullname " in logs directory is a directory! Cannot rotate it, it is best to not put subdirectories in the logs dir.") + (handle-exceptions + exn + (debug:print-error 0 *default-log-port* "failed to remove " fullname ", exn=" exn) + (delete-file* fullname))))) + files) + (debug:print-info 0 *default-log-port* "Deleted " (length files) " files from logs, keeping " max-allowed " files.")))))) + + +;;====================================================================== +;; S P A R S E A R R A Y S +;;====================================================================== + +(define (make-sparse-array) + (let ((a (make-sparse-vector))) + (sparse-vector-set! a 0 (make-sparse-vector)) + a)) + +(define (sparse-array? a) + (and (sparse-vector? a) + (sparse-vector? (sparse-vector-ref a 0)))) + +(define (sparse-array-ref a x y) + (let ((row (sparse-vector-ref a x))) + (if row + (sparse-vector-ref row y) + #f))) + +(define (sparse-array-set! a x y val) + (let ((row (sparse-vector-ref a x))) + (if row + (sparse-vector-set! row y val) + (let ((new-row (make-sparse-vector))) + (sparse-vector-set! a x new-row) + (sparse-vector-set! new-row y val))))) + +;;====================================================================== +;; L O C K E R S A N D B L O C K E R S +;;====================================================================== + +;; block further accesses to databases. Call this before shutting db down +(define (common:db-block-further-queries) + (mutex-lock! *db-access-mutex*) + (set! *db-access-allowed* #f) + (mutex-unlock! *db-access-mutex*)) + +(define (common:db-access-allowed?) + (let ((val (begin + (mutex-lock! *db-access-mutex*) + *db-access-allowed* + (mutex-unlock! *db-access-mutex*)))) + val)) + +;;====================================================================== +;; U S E F U L S T U F F +;;====================================================================== + +;; convert things to an alist or assoc list, #f gets converted to "" +;; +(define (common:to-alist dat) + (cond + ((list? dat) (map common:to-alist dat)) + ((vector? dat) + (map common:to-alist (vector->list dat))) + ((pair? dat) + (cons (common:to-alist (car dat)) + (common:to-alist (cdr dat)))) + ((hash-table? dat) + (map common:to-alist (hash-table->alist dat))) + (else + (if dat + dat + "")))) + +(define (common:alist-ref/default key alist default) + (or (alist-ref key alist) default)) + +(define (common:low-noise-print waitval . keys) + (let* ((key (string-intersperse (map conc keys) "-" )) + (lasttime (hash-table-ref/default *common:denoise* key 0)) + (currtime (current-seconds))) + (if (> (- currtime lasttime) waitval) + (begin + (hash-table-set! *common:denoise* key currtime) + #t) + #f))) + +(define (common:get-megatest-exe) + (or (getenv "MT_MEGATEST") "megatest")) + +(define (common:read-encoded-string instr) + (handle-exceptions + exn + (handle-exceptions + exn + (begin + (debug:print-error 0 *default-log-port* "received bad encoded string \"" instr "\", message: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn) + (print-call-chain (current-error-port)) + #f) + (read (open-input-string (base64:base64-decode instr)))) + (read (open-input-string (z3:decode-buffer (base64:base64-decode instr)))))) + +;;====================================================================== +;; S T A T E S A N D S T A T U S E S +;;====================================================================== + +;; BBnote: *common:std-states* - dashboard filter control and test control state buttons defined here; used in set-fields-panel and dboard:make-controls +(define *common:std-states* ;; for toggle buttons in dashboard + '( + (0 "ARCHIVED") + (1 "STUCK") + (2 "KILLREQ") + (3 "KILLED") + (4 "NOT_STARTED") + (5 "COMPLETED") + (6 "LAUNCHED") + (7 "REMOTEHOSTSTART") + (8 "RUNNING") + )) + +(define *common:dont-roll-up-states* + '("DELETED" + "REMOVING" + "CLEANING" + "ARCHIVE_REMOVING" + )) + +;;====================================================================== +;; BBnote: *common:std-statuses* dashboard filter control and test control status buttons defined here; used in set-fields-panel and dboard:make-controls +;; note these statuses are sorted from better to worse. +;; This sort order is important to dcommon:status-compare3 and db:set-state-status-and-roll-up-items +(define *common:std-statuses* + '(;; (0 "DELETED") + (1 "n/a") + (2 "PASS") + (3 "SKIP") + (4 "WARN") + (5 "WAIVED") + (6 "CHECK") + (7 "STUCK/DEAD") + (8 "DEAD") + (9 "FAIL") + (10 "PREQ_FAIL") + (11 "PREQ_DISCARDED") + (12 "ABORT"))) + +(define *common:ended-states* ;; states which indicate the test is stopped and will not proceed + '("COMPLETED" "ARCHIVED" "KILLED" "KILLREQ" "STUCK" "INCOMPLETE" )) + +(define *common:badly-ended-states* ;; these roll up as CHECK, i.e. results need to be checked + '("KILLED" "KILLREQ" "STUCK" "INCOMPLETE" "DEAD")) + +(define *common:well-ended-states* ;; an item's prereq in this state allows item to proceed + '("PASS" "WARN" "CHECK" "WAIVED" "SKIP")) + +;; BBnote: *common:running-states* used from db:set-state-status-and-roll-up-items +(define *common:running-states* ;; test is either running or can be run + '("RUNNING" "REMOTEHOSTSTART" "LAUNCHED" "STARTED")) + +(define *common:cant-run-states* ;; These are stopping conditions that prevent a test from being run + '("COMPLETED" "KILLED" "UNKNOWN" "INCOMPLETE" "ARCHIVED")) + +(define *common:not-started-ok-statuses* ;; if not one of these statuses when in not_started state treat as dead + '("n/a" "na" "PASS" "FAIL" "WARN" "CHECK" "WAIVED" "DEAD" "SKIP")) + +;;====================================================================== +;; group tests into buckets corresponding to rollup +;;; Running, completed-pass, completed-non-pass + worst status, not started. +;; filter out +;(define (common:categorize-items-for-rollup in-tests) +; ( + +(define (common:special-sort items order comp) + (let ((items-order (map reverse order)) + (acomp (or comp >))) + (sort items + (lambda (a b) + (let ((a-num (cadr (or (assoc a items-order) '(0 0)))) + (b-num (cadr (or (assoc b items-order) '(0 0))))) + (acomp a-num b-num)))))) + +;;====================================================================== +;; ;; given a toplevel with currstate, currstatus apply state and status +;; ;; => (newstate . newstatus) +;; (define (common:apply-state-status currstate currstatus state status) +;; (let* ((cstate (string->symbol (string-downcase currstate))) +;; (cstatus (string->symbol (string-downcase currstatus))) +;; (sstate (string->symbol (string-downcase state))) +;; (sstatus (string->symbol (string-downcase status))) +;; (nstate #f) +;; (nstatus #f)) +;; (set! nstate +;; (case cstate +;; ((completed not_started killed killreq stuck archived) +;; (case sstate ;; completed -> sstate +;; ((completed killed killreq stuck archived) completed) +;; ((running remotehoststart launched) running) +;; (else unknown-error-1))) +;; ((running remotehoststart launched) +;; (case sstate +;; ((completed killed killreq stuck archived) #f) ;; need to look at all items +;; ((running remotehoststart launched) running) +;; (else unknown-error-2))) +;; (else unknown-error-3))) +;; (set! nstatus +;; (case sstatus +;; ((pass) +;; (case nstate +;; ((pass n/a deleted) pass) +;; ((warn) warn) +;; ((fail) fail) +;; ((check) check) +;; ((waived) waived) +;; ((skip) skip) +;; ((stuck/dead) stuck) +;; ((abort) abort) +;; (else unknown-error-4))) +;; ((warn) +;; (case nstate +;; ((pass warn n/a skip deleted) warn) +;; ((fail) fail) +;; ((check) check) +;; ((waived) waived) +;; ((stuck/dead) stuck) +;; (else unknown-error-5))) +;; ((fail) +;; (case nstate +;; ((pass warn fail check n/a waived skip deleted stuck/dead stuck) fail) +;; ((abort) abort) +;; (else unknown-error-6))) +;; (else unknown-error-7))) +;; (cons +;; (if nstate (symbol->string nstate) nstate) +;; (if nstatus (symbol->string nstatus) nstatus)))) + +;;====================================================================== +;; D E B U G G I N G S T U F F +;;====================================================================== + +(define *verbosity* 1) +(define *logging* #f) + +(define (get-with-default val default) + (let ((val (args:get-arg val))) + (if val val default))) + +(define (assoc/default key lst . default) + (let ((res (assoc key lst))) + (if res (cadr res)(if (null? default) #f (car default))))) + +(define (common:get-testsuite-name) + (or (configf:lookup *configdat* "setup" "area-name") ;; megatest is a flexible tool, testsuite is too limiting a description. + (configf:lookup *configdat* "setup" "testsuite" ) + (getenv "MT_TESTSUITE_NAME") + (pathname-file (or (if (string? *toppath* ) + (pathname-file *toppath*) + #f) + (common:get-toppath #f))) + "please-set-setup-area-name")) ;; (pathname-file (current-directory))))) + +;;====================================================================== +;; safe getting of toppath +(define (common:get-toppath areapath) + (or *toppath* + (if areapath + (begin + (set! *toppath* areapath) + (setenv "MT_RUN_AREA_HOME" areapath) + areapath) + #f) + (if (getenv "MT_RUN_AREA_HOME") + (begin + (set! *toppath* (getenv "MT_RUN_AREA_HOME")) + *toppath*) + #f) + ;; last resort, look for megatest.config + (let loop ((thepath (realpath "."))) + (if (file-exists? (conc thepath "/megatest.config")) + thepath + (if (equal? thepath "/") + (begin + (debug:print-error 0 *default-log-port* "Unable to find megatest home directory.") + #f) + (loop (pathname-directory thepath))))) + )) + +;;====================================================================== +;; redefine for future cleanup (converge on area-name, the more generic +;; +(define common:get-area-name common:get-testsuite-name) + +(define (common:get-db-tmp-area . junk) + (if *db-cache-path* + *db-cache-path* + (if *toppath* ;; common:get-create-writeable-dir + (handle-exceptions + exn + (begin + (debug:print-error 0 *default-log-port* "Couldn't create path to " *db-cache-path* ", exn=" exn) + (exit 1)) + (let* ((tsname (common:get-testsuite-name)) + (dbpath (common:get-create-writeable-dir + (list (conc "/tmp/" (current-user-name) + "/megatest_localdb/" + tsname "/" + (string-translate *toppath* "/" ".")) + (conc "/tmp/" (current-process-id) ;; just in case we have an issue with the dir by own user name + "/megatest_localdb/" + tsname + (string-translate *toppath* "/" ".")) + )))) + (set! *db-cache-path* dbpath) + dbpath)) + #f))) + +(define (common:get-area-path-signature) + (message-digest-string (md5-primitive) *toppath*)) + +(define (common:get-signature str) + (message-digest-string (md5-primitive) str)) + +;;====================================================================== +;; E X I T H A N D L I N G +;;====================================================================== + +(define (common:human-time) + (time->string (seconds->local-time (current-seconds)) "%Y-%m-%d %H:%M:%S")) + +;; common:readonly-watchdo +;; common:watchdog +;; std-exit-procedure was here + +(define (std-signal-handler signum) + ;; (signal-mask! signum) + (set! *time-to-exit* #t) + ;;(debug:print-info 13 *default-log-port* "got signal "signum) + (debug:print-error 0 *default-log-port* "Received signal " signum " aaa exiting promptly") + ;; (std-exit-procedure) ;; shouldn't need this since we are exiting and it will be called anyway + (exit)) + +(define (special-signal-handler signum) + ;; (signal-mask! signum) + (set! *time-to-exit* #t) + ;;(debug:print-info 13 *default-log-port* "got signal "signum) + (debug:print-error 0 *default-log-port* "Received signal " signum " sending email befor exiting!!") + ;;TODO send email to notify admin contact listed in the config that the lisner got killed + ;; (std-exit-procedure) ;; shouldn't need this since we are exiting and it will be called anyway + (exit)) + +(set-signal-handler! signal/int std-signal-handler) ;; ^C +(set-signal-handler! signal/term std-signal-handler) + +;; (set-signal-handler! signal/stop std-signal-handler) ;; ^Z NO, do NOT handle ^Z! + +;;====================================================================== +;; M I S C U T I L S +;;====================================================================== + +;;====================================================================== +;; convert stuff to a number if possible +(define (any->number val) + (cond + ((number? val) val) + ((string? val) (string->number val)) + ((symbol? val) (any->number (symbol->string val))) + (else #f))) + +(define (any->number-if-possible val) + (let ((num (any->number val))) + (if num num val))) + +(define (patt-list-match item patts) + (debug:print-info 8 *default-log-port* "patt-list-match item=" item " patts=" patts) + (if (and item patts) ;; here we are filtering for matches with item patterns + (let ((res #f)) ;; look through all the item-patts if defined, format is patt1,patt2,patt3 ... wildcard is % + (for-each + (lambda (patt) + (let ((modpatt (string-substitute "%" ".*" patt #t))) + (debug:print-info 10 *default-log-port* "patt " patt " modpatt " modpatt) + (if (string-match (regexp modpatt) item) + (set! res #t)))) + (string-split patts ",")) + res) + #t)) + +;;====================================================================== +;; '(print (string-intersperse (map cadr (hash-table-ref/default (read-config "megatest.config" \#f \#t) "disks" '"'"'("none" ""))) "\n"))' +(define (common:get-disks #!key (configf #f)) + (hash-table-ref/default + (or configf (read-config "megatest.config" #f #t)) + "disks" '("none" ""))) + +;;====================================================================== +;; return first command that exists, else #f +;; +(define (common:which cmds) + (if (null? cmds) + #f + (let loop ((hed (car cmds)) + (tal (cdr cmds))) + (let ((res (with-input-from-pipe (conc "which " hed) read-line))) + (if (and (string? res) + (common:file-exists? res)) + res + (if (null? tal) + #f + (loop (car tal)(cdr tal)))))))) + +(define (common:get-install-area) + (let ((exe-path (car (argv)))) + (if (common:file-exists? exe-path) + (handle-exceptions + exn + #f + (pathname-directory + (pathname-directory + (pathname-directory exe-path)))) + #f))) + +;;====================================================================== +;; return first path that can be created or already exists and is writable +;; +(define (common:get-create-writeable-dir dirs) + (if (null? dirs) + #f + (let loop ((hed (car dirs)) + (tal (cdr dirs))) + (let ((res (or (and (directory? hed) + (file-writable? hed) + hed) + (handle-exceptions + exn + (begin + (debug:print-info 0 *default-log-port* "could not create " hed + ", this might cause problems down the road. exn=" exn) + #f) + (create-directory hed #t))))) + (if (and (string? res) + (directory? res)) + res + (if (null? tal) + #f + (loop (car tal)(cdr tal)))))))) + +;;====================================================================== +;; return the youngest timestamp . filename +;; +(define (common:get-youngest glob-list) + (let ((all-files (apply append + (map (lambda (patt) + (handle-exceptions + exn + '() + (glob patt))) + glob-list)))) + (fold (lambda (fname res) + (let ((last-mod (car res)) + (curmod (handle-exceptions + exn + 0 + (file-modification-time fname)))) + (if (> curmod last-mod) + (list curmod fname) + res))) + '(0 "n/a") + all-files))) + +;;====================================================================== +;; use bash to expand a glob. Does NOT handle paths with spaces! +;; +(define (common:bash-glob instr) + (string-split + (with-input-from-pipe + (conc "/bin/bash -c \"echo " instr "\"") + read-line))) + +;;====================================================================== +;; Some safety net stuff +;;====================================================================== + +;;====================================================================== +;; return input if it is a list or return null +(define (common:list-or-null inlst #!key (ovrd #f)(message #f)) + (if (list? inlst) + inlst + (begin + (if message (debug:print-error 0 *default-log-port* message)) + (or ovrd '())))) + +;;====================================================================== +;; T A R G E T S , S T A T E , S T A T U S , +;; R U N N A M E A N D T E S T P A T T +;;====================================================================== + +;;====================================================================== +;; (map print (map car (hash-table->alist (read-config "runconfigs.config" #f #t)))) +;; +(define (common:get-runconfig-targets #!key (configf #f)) + (let ((targs (sort (map car (hash-table->alist + (or configf ;; NOTE: There is no value in using runconfig:read here. + (read-config (conc *toppath* "/runconfigs.config") + #f #t) + (make-hash-table)))) + stringsymbol force-setting) #f)) + (force-result (case force-type + ((#f) #f) + ((always) #t) + ((test) (if (args:get-arg "-execute") ;; we are in a test + #t + #f)) + (else + (debug:print 0 *default-log-port* "ERROR: Bad server force setting " force-setting ", forcing server.") + #t)))) ;; default to requiring server + (if force-result + (begin + (debug:print-info 0 *default-log-port* "forcing use of server, force setting is \"" force-setting "\".") + #t) + #f))) + +;;====================================================================== +;; M I S C L I S T S +;;====================================================================== + +;;====================================================================== +;; items in lista are matched value and position in listb +;; return the remaining items in listb or #f +;; +(define (common:list-is-sublist lista listb) + (if (null? lista) + listb ;; all items in listb are "remaining" + (if (> (length lista)(length listb)) + #f + (let loop ((heda (car lista)) + (tala (cdr lista)) + (hedb (car listb)) + (talb (cdr listb))) + (if (equal? heda hedb) + (if (null? tala) ;; we are done + talb + (loop (car tala) + (cdr tala) + (car talb) + + (cdr talb))) + #f))))) + +;;====================================================================== +;; Needed for long lists to be sorted where (apply max ... ) dies +;; +(define (common:max inlst) + (let loop ((max-val (car inlst)) + (hed (car inlst)) + (tal (cdr inlst))) + (if (not (null? tal)) + (loop (max hed max-val) + (car tal) + (cdr tal)) + (max hed max-val)))) + +;;====================================================================== +;; get min or max, use > for max and < for min, this works around the limits on apply +;; +(define (common:min-max comp lst) + (if (null? lst) + #f ;; better than an exception for my needs + (fold (lambda (a b) + (if (comp a b) a b)) + (car lst) + lst))) + +;;====================================================================== +;; get min or max, use > for max and < for min, this works around the limits on apply +;; +(define (common:sum lst) + (if (null? lst) + 0 + (fold (lambda (a b) + (+ a b)) + (car lst) + lst))) + +;;====================================================================== +;; path list to hash-table tree +;; ((a b c)(a b d)(e b c)) => ((a (b (d) (c))) (e (b (c)))) +;; +(define (common:list->htree lst) + (let ((resh (make-hash-table))) + (for-each + (lambda (inlst) + (let loop ((ht resh) + (hed (car inlst)) + (tal (cdr inlst))) + (if (hash-table-ref/default ht hed #f) + (if (not (null? tal)) + (loop (hash-table-ref ht hed) + (car tal) + (cdr tal))) + (begin + (hash-table-set! ht hed (make-hash-table)) + (loop ht hed tal))))) + lst) + resh)) + +;;====================================================================== +;; hash-table tree to html list tree +;; +;; tipfunc takes two parameters: y the tip value and path the path to that point +;; +(define (common:htree->html ht path tipfunc) + (let ((datlist (sort (hash-table->alist ht) + (lambda (a b) + (string< (car a)(car b)))))) + (if (null? datlist) + (tipfunc #f path) ;; really shouldn't get here + (s:ul + (map (lambda (x) + (let* ((levelname (car x)) + (y (cdr x)) + (newpath (append path (list levelname))) + (leaf (or (not (hash-table? y)) + (null? (hash-table-keys y))))) + (if leaf + (s:li (tipfunc y newpath)) + (s:li + (list + levelname + (common:htree->html y newpath tipfunc)))))) + datlist))))) + +;;====================================================================== +;; hash-table tree to alist tree +;; +(define (common:htree->atree ht) + (map (lambda (x) + (cons (car x) + (let ((y (cdr x))) + (if (hash-table? y) + (common:htree->atree y) + y)))) + (hash-table->alist ht))) + +;;====================================================================== +;; M U N G E D A T A I N T O N I C E F O R M S +;;====================================================================== + +;;====================================================================== +;; Generate an index for a sparse list of key values +;; ( (rowname1 colname1 val1)(rowname2 colname2 val2) ) +;; +;; => +;; +;; ( (rowname1 0)(rowname2 1)) ;; rownames -> num +;; (colname1 0)(colname2 1)) ) ;; colnames -> num +;; +;; optional apply proc to rownum colnum value +(define (common:sparse-list-generate-index data #!key (proc #f)) + (if (null? data) + (list '() '()) + (let loop ((hed (car data)) + (tal (cdr data)) + (rownames '()) + (colnames '()) + (rownum 0) + (colnum 0)) + (let* ((rowkey (car hed)) + (colkey (cadr hed)) + (value (caddr hed)) + (existing-rowdat (assoc rowkey rownames)) + (existing-coldat (assoc colkey colnames)) + (curr-rownum (if existing-rowdat rownum (+ rownum 1))) + (curr-colnum (if existing-coldat colnum (+ colnum 1))) + (new-rownames (if existing-rowdat rownames (cons (list rowkey curr-rownum) rownames))) + (new-colnames (if existing-coldat colnames (cons (list colkey curr-colnum) colnames)))) + ;; (debug:print-info 0 *default-log-port* "Processing record: " hed ) + (if proc (proc curr-rownum curr-colnum rowkey colkey value)) + (if (null? tal) + (list new-rownames new-colnames) + (loop (car tal) + (cdr tal) + new-rownames + new-colnames + (if (> curr-rownum rownum) curr-rownum rownum) + (if (> curr-colnum colnum) curr-colnum colnum) + )))))) + +;;====================================================================== +;; if it looks like a number -> convert it to a number, else return it +;; +(define (common:lazy-convert inval) + (let* ((as-num (if (string? inval)(string->number inval) #f))) + (or as-num inval))) + +;;====================================================================== +;; convert string a=1; b=2; c=a silly thing; d= +;; to '((a . 1)(b . 2)(c . "a silly thing")(d . "")) +;; +(define (common:val->alist val #!key (convert #f)) + (let ((val-list (string-split-fields ";\\s*" val #:infix))) + (if val-list + (map (lambda (x) + (let ((f (string-split-fields "\\s*=\\s*" x #:infix))) + (case (length f) + ((0) `(,#f)) ;; null string case + ((1) `(,(string->symbol (car f)))) + ((2) `(,(string->symbol (car f)) . ,(let ((inval (cadr f))) + (if convert (common:lazy-convert inval) inval)))) + (else f)))) + val-list) + '()))) + +;;====================================================================== +;; S Y S T E M S T U F F +;;====================================================================== + +;;====================================================================== +;; lazy-safe get file mod time. on any error (file not existing etc.) return 0 +;; +(define (common:lazy-modification-time fpath) + (handle-exceptions + exn + (begin + (debug:print 0 *default-log-port* "Failed to get modifcation time for " fpath ", treating it as zero. exn=" exn) + 0) + (if (file-exists? fpath) + (file-modification-time fpath) + 0))) + +;;====================================================================== +;; find timestamp of newest file associated with a sqlite db file +(define (common:lazy-sqlite-db-modification-time fpath) + (let* ((glob-list (handle-exceptions + exn + (begin + (debug:print 0 *default-log-port* "Failed to glob " fpath "*, exn=" exn) + `(,(conc "/no/such/file, message: " ((condition-property-accessor 'exn 'message) exn)))) + (glob (conc fpath "*")))) + (file-list (if (eq? 0 (length glob-list)) + '("/no/such/file") + glob-list))) + (apply max + (map + common:lazy-modification-time + file-list)))) + +;;====================================================================== +;; return a nice clean pathname made absolute +;; +;; defined in configfmod +;; +#;(define (common:nice-path dir) + (let ((match (string-match "^(~[^\\/]*)(\\/.*|)$" dir))) + (if match ;; using ~ for home? + (common:nice-path (conc (common:read-link-f (cadr match)) "/" (caddr match))) + (normalize-pathname (if (absolute-pathname? dir) + dir + (conc (current-directory) "/" dir)))))) + +;; defined in configfmod + +#;(define (common:read-link-f path) + (handle-exceptions + exn + (begin + (debug:print-error 0 *default-log-port* "command \"/bin/readlink -f " path "\" failed. exn=" exn) + path) ;; just give up + (with-input-from-pipe + (conc "/bin/readlink -f " path) + (lambda () + (read-line))))) + +;;====================================================================== +;; returns *effective load* (not normalized) +;; +(define (common:get-intercept onemin fivemin) + (if (< onemin fivemin) ;; load is decreasing, just use the onemin load + onemin + (let* ((load-change (- onemin fivemin)) + (tchange (- 300 60))) + (max (+ onemin (* 60 (/ load-change tchange))) 0)))) + +;;====================================================================== +;; calculate a delay number based on a droop curve +;; inputs are: +;; - load-in, load as from uptime, NOT normalized +;; - numcpus, number of cpus, ideally use the real cpus, not threads +;; +(define (common:get-delay load-in numcpus) + (let* ((ratio (/ load-in numcpus)) + (new-option (configf:lookup *configdat* "load" "new-load-method")) + (paramstr (or (configf:lookup *configdat* "load" "exp-params") + "15 12 1281453987.9543 0.75")) ;; 5 4 10 1")) + (paramlst (map string->number (string-split paramstr)))) + (if new-option + (begin + (cond ((and (>= ratio 0) (< ratio .5)) + 0) + ((and (>= ratio 0.5) (<= ratio .9)) + (* ratio (/ 5 .9))) + ((and (> ratio .9) (<= ratio 1.1)) + (+ 5 (* (- ratio .9) (/ 55 .2)))) + ((> ratio 1.1) + 60))) + (match paramlst + ((r1 r2 s1 s2) + (debug:print 3 *default-log-port* "Using params r1=" r1 " r2=" r2 " s1=" s1 " s2=" s2) + (min (max (/ (expt r1 (* r2 s2 ratio)) s1) 0) 30)) + (else + (debug:print 0 *default-log-port* "BAD exp-params, should be \"r1 r2 s1 s2\" but got " paramstr) + 30))))) + +(define (common:print-delay-table) + (let loop ((x 0)) + (print x "," (common:get-delay x 1)) + (if (< x 2) + (loop (+ x 0.1))))) + +(define (get-cpu-load #!key (remote-host #f)) + (car (common:get-cpu-load remote-host))) + +;;====================================================================== +;; (let* ((load-res (process:cmd-run->list "uptime")) +;; (load-rx (regexp "load average:\\s+(\\d+)")) +;; (cpu-load #f)) +;; (for-each (lambda (l) +;; (let ((match (string-search load-rx l))) +;; (if match +;; (let ((newval (string->number (cadr match)))) +;; (if (number? newval) +;; (set! cpu-load newval)))))) +;; (car load-res)) +;; cpu-load)) + +;;====================================================================== +;; get values from cached info from dropping file in logs dir +;; e.g. key is host and dtype is normalized-load +;; +(define (common:get-cached-info key dtype #!key (age 10)) + (if *toppath* + (let* ((fullpath (conc *toppath* "/.sysdata/" key "-" dtype ".log")) + (delfile (lambda (exn) + (debug:print-info 1 *default-log-port* " removing bad file " fullpath ", exn=" exn) + (delete-file* fullpath) + #f))) + (if (and (file-exists? fullpath) + (file-readable? fullpath)) + (handle-exceptions + exn + (begin + (debug:print 0 *default-log-port* "failed to get cached info from " fullpath ", exn=" exn) + #f) + (debug:print 2 *default-log-port* "reading file " fullpath) + (let ((real-age (- (current-seconds) + (handle-exceptions + exn + (begin + (debug:print 1 *default-log-port* "Failed to read mod time on file " + fullpath ", using 0, exn=" exn) + 0) + (file-change-time fullpath))))) + (if (< real-age age) + (handle-exceptions + exn + (delfile exn) + (let* ((res (with-input-from-file fullpath read))) + (if (eof-object? res) + (begin + (delfile "n/a") + #f) + res))) + (begin + (debug:print-info 2 *default-log-port* "file " fullpath + " is too old (" real-age" seconds) to trust, skipping reading it") + #f)))) + (begin + (debug:print 2 *default-log-port* "not reading file " fullpath) + #f))) + #f)) + +(define (common:write-cached-info key dtype dat) + (if *toppath* + (let* ((fulldir (conc *toppath* "/.sysdata")) + (fullpath (conc fulldir "/" key "-" dtype ".log"))) + (if (not (file-exists? fulldir))(create-directory fulldir #t)) + (handle-exceptions + exn + (begin + (debug:print 0 *default-log-port* "failed to write file " fullpath ", exn=" exn) + #f) + (with-output-to-file fullpath (lambda ()(pp dat))))) + #f)) + +(define (common:raw-get-remote-host-load remote-host) + (handle-exceptions + exn + (begin + (debug:print 0 *default-log-port* "failed to ssh to " remote-host " and get loadavg. exn=" exn) + #f) ;; more specific handling of errors needed + (with-input-from-pipe + (conc "ssh " remote-host " cat /proc/loadavg") + (lambda ()(list (read)(read)(read)))))) + +;;====================================================================== +;; get cpu load by reading from /proc/loadavg, return all three values +;; +(define (common:get-cpu-load remote-host) + (handle-exceptions + exn + (begin + (debug:print 0 *default-log-port* "failed to ssh or read loadavg from host " remote-host ", exn=" exn) + '(-99 -99 -99)) + (let* ((actual-hostname (or remote-host (get-host-name) "localhost"))) + (or (common:get-cached-info actual-hostname "cpu-load") + (let ((result (if remote-host + (map (lambda (res) + (if (eof-object? res) 9e99 res)) + (with-input-from-pipe + (conc "ssh " remote-host " cat /proc/loadavg") + (lambda ()(list (read)(read)(read))))) + (with-input-from-file "/proc/loadavg" + (lambda ()(list (read)(read)(read))))))) + (match + result + ((l1 l2 l3) + (if (and (number? l1) + (number? l2) + (number? l3)) + (begin + (common:write-cached-info actual-hostname "cpu-load" result) + result) + '(-1 -1 -1))) ;; -1 is bad result + (else '(-2 -2 -2)))))))) + +;;====================================================================== +;; get normalized cpu load by reading from /proc/loadavg and /proc/cpuinfo return all three values and the number of real cpus and the number of threads +;; returns alist '((adj-cpu-load . normalized-proc-load) ... etc. +;; keys: adj-proc-load, adj-core-load, 1m-load, 5m-load, 15m-load +;; +(define (common:get-normalized-cpu-load remote-host) + (let ((res (common:get-normalized-cpu-load-raw remote-host)) + (default `((adj-proc-load . 2) ;; there is no right answer + (adj-core-load . 2) + (1m-load . 2) + (5m-load . 0) ;; causes a large delta - thus causing default of throttling if stuff goes wrong + (15m-load . 0) + (proc . 1) + (core . 1) + (phys . 1) + (error . #t)))) + (cond + ((and (list? res) + (> (length res) 2)) + res) + ((eq? res #f) default) ;; add messages? + ((eq? res #f) default) ;; this would be the #eof + (else default)))) + +(define (common:get-normalized-cpu-load-raw remote-host) + (let* ((actual-host (or remote-host (get-host-name)))) ;; #f is localhost + (or (common:get-cached-info actual-host "normalized-load") + (let ((data (if remote-host + (with-input-from-pipe + (conc "ssh " remote-host " \"cat /proc/loadavg;cat /proc/cpuinfo;echo end\"") + read-lines) + (append + (with-input-from-file "/proc/loadavg" + read-lines) + (with-input-from-file "/proc/cpuinfo" + read-lines) + (list "end")))) + (load-rx (regexp "^([\\d\\.]+)\\s+([\\d\\.]+)\\s+([\\d\\.]+)\\s+.*$")) + (proc-rx (regexp "^processor\\s+:\\s+(\\d+)\\s*$")) + (core-rx (regexp "^core id\\s+:\\s+(\\d+)\\s*$")) + (phys-rx (regexp "^physical id\\s+:\\s+(\\d+)\\s*$")) + (max-num (lambda (p n)(max (string->number p) n)))) + ;; (print "data=" data) + (if (null? data) ;; something went wrong + #f + (let loop ((hed (car data)) + (tal (cdr data)) + (loads #f) + (proc-num 0) ;; processor includes threads + (phys-num 0) ;; physical chip on motherboard + (core-num 0)) ;; core + ;;; (print hed ", " loads ", " proc-num ", " phys-num ", " core-num) + (if (null? tal) ;; have all our data, calculate normalized load and return result + (let* ((act-proc (+ proc-num 1)) + (act-phys (+ phys-num 1)) + (act-core (+ core-num 1)) + (adj-proc-load (/ (car loads) act-proc)) + (adj-core-load (/ (car loads) act-core)) + (result + (append (list (cons 'adj-proc-load adj-proc-load) + (cons 'adj-core-load adj-core-load)) + (list (cons '1m-load (car loads)) + (cons '5m-load (cadr loads)) + (cons '15m-load (caddr loads))) + (list (cons 'proc act-proc) + (cons 'core act-core) + (cons 'phys act-phys))))) + (common:write-cached-info actual-host "normalized-load" result) + result) + (regex-case + hed + (load-rx ( x l1 l5 l15 ) (loop (car tal)(cdr tal)(map string->number (list l1 l5 l15)) proc-num phys-num core-num)) + (proc-rx ( x p ) (loop (car tal)(cdr tal) loads (max-num p proc-num) phys-num core-num)) + (phys-rx ( x p ) (loop (car tal)(cdr tal) loads proc-num (max-num p phys-num) core-num)) + (core-rx ( x c ) (loop (car tal)(cdr tal) loads proc-num phys-num (max-num c core-num))) + (else + (begin + ;; (print "NO MATCH: " hed) + (loop (car tal)(cdr tal) loads proc-num phys-num core-num))))))))))) + +(define (common:unix-ping hostname) + (let ((res (system (conc "ping -c 1 " hostname " > /dev/null")))) + (eq? res 0))) + +;; common:get-host-info was here +;; common:update-host-loads-table +;; common:get-least-loaded-host +;; common:wait-for-homehost-load + +(define (common:get-num-cpus remote-host) + (let* ((actual-host (or remote-host (get-host-name)))) + ;; hosts had better not be changing the number of cpus too often! + (or (hash-table-ref/default *numcpus-cache* actual-host #f) + (let* ((numcpus (or (common:get-cached-info actual-host "num-cpus" age: (+ 2592000 (pseudo-random-integer 3600))) + (let* ((proc (lambda () + (let loop ((numcpu 0) + (inl (read-line))) + (if (eof-object? inl) + (if (> numcpu 0) + numcpu + #f) ;; if zero return #f so caller knows that things are not working + (loop (if (string-match "^processor\\s+:\\s+\\d+$" inl) + (+ numcpu 1) + numcpu) + (read-line)))))) + (result (if remote-host + (with-input-from-pipe + (conc "ssh " remote-host " cat /proc/cpuinfo") + proc) + (with-input-from-file "/proc/cpuinfo" proc)))) + (if (and (number? result) + (> result 0)) + (common:write-cached-info actual-host "num-cpus" result)) + result)))) + (hash-table-set! *numcpus-cache* actual-host numcpus) + numcpus)))) + +;;====================================================================== +;; wait for normalized cpu load to drop below maxload +;; +(define (common:wait-for-normalized-load maxnormload msg remote-host #!optional (rem-tries 5)) + (let ((num-cpus (common:get-num-cpus remote-host))) + (if num-cpus + (common:wait-for-cpuload maxnormload num-cpus 15 msg: msg remote-host: remote-host) + (begin + (thread-sleep! (pseudo-random-integer 60)) ;; we failed to get num cpus. wait a bit and try again + (if (> rem-tries 0) + (common:wait-for-normalized-load maxnormload msg remote-host (- rem-tries 1)) + #f))))) + +;;====================================================================== +;; DO NOT CALL THIS DIRECTLY. It is called from common:wait-for-normalized-load +;; count - count down to zero, at some point we'd give up if the load never drops +;; num-tries - count down to zero number tries to get numcpus +;; +(define (common:wait-for-cpuload maxnormload numcpus-in + #!key (count 1000) + (msg #f)(remote-host #f)(num-tries 5)) + (let* ((loadavg (common:get-cpu-load remote-host)) + ;; not possible to have zero. If we get 1, it's possible that we got the previous default, and we should check again + (numcpus (if (<= 1 numcpus-in) + (common:get-num-cpus remote-host) + numcpus-in)) + (first (car loadavg)) + (next (cadr loadavg)) + (adjmaxload (* maxnormload (max 1 numcpus))) ;; possible bug + ;; where numcpus + ;; (or could be + ;; maxload) is + ;; zero, crude + ;; fallback is to + ;; at least use 1 + ;; effective load accounts for load jumps, this should elminate all the first-next-avg, adjwait, load-jump-limit + ;; etc. + (effective-load (common:get-intercept first next)) + (recommended-delay (common:get-delay effective-load numcpus)) + (effective-host (or remote-host "localhost")) + (normalized-effective-load (/ effective-load numcpus)) + (will-wait (> normalized-effective-load maxnormload))) + (if (> recommended-delay 1) + (let* ((actual-delay (min recommended-delay 30))) + (if (common:low-noise-print 30 (conc (round actual-delay) "-safe-load")) + (debug:print-info 0 *default-log-port* "Load control, delaying " + actual-delay " seconds to maintain safe load. current normalized effective load is " + normalized-effective-load". maxnormload = " maxnormload " numcpus = " numcpus " loadavg = " loadavg " effective-load = " effective-load)) + (thread-sleep! actual-delay))) + + (cond + ;; bad data, try again to get the data + ((not will-wait) + (if (common:low-noise-print 3600 (conc (round normalized-effective-load) "-load-acceptable-" effective-host)) + (debug:print 0 *default-log-port* "Effective load on " effective-host " is acceptable at " effective-load " continuing."))) + + ((and (< first 0) ;; this indicates the loadavg data is bad - machine may not be reachable + (> num-tries 0)) + (debug:print 0 *default-log-port* "WARNING: received bad data from get-cpu-load " + first ", we'll sleep 10s and try " num-tries " more times.") + (thread-sleep! 10) + (common:wait-for-cpuload maxnormload numcpus-in + count: count remote-host: remote-host num-tries: (- num-tries 1))) + + ;; need to wait for load to drop + ((and will-wait ;; (> first adjmaxload) + (> count 0)) + (debug:print-info 0 *default-log-port* + "Delaying 15" ;; adjwait + " seconds due to normalized effective load " normalized-effective-load ;; first + " exceeding max of " adjmaxload + " on server " (or remote-host (get-host-name)) + " (normalized load-limit: " maxnormload ") " (if msg msg "")) + (thread-sleep! 15) ;; adjwait) + (common:wait-for-cpuload maxnormload numcpus count: (- count 1) msg: msg remote-host: remote-host) + ;; put the message here to indicate came out of waiting + (debug:print-info 1 *default-log-port* + "On host: " effective-host + ", effective load: " effective-load + ", numcpus: " numcpus + ", normalized effective load: " normalized-effective-load + )) + ;; overloaded and count expired (i.e. went to zero) + (else + (if (> num-tries 0) ;; should be "num-tries-left". + (if (common:low-noise-print 30 (conc (round effective-load) "-load-acceptable-" effective-host)) + (debug:print 0 *default-log-port* "Load on " effective-host " is acceptable at effective normalized load of " + normalized-effective-load " continuing.")) + (debug:print 0 *default-log-port* "Load on " effective-host ", " + first" could not be retrieved. Giving up and continuing.")))))) + +;;====================================================================== +;; DO NOT CALL THIS DIRECTLY. It is called from common:wait-for-normalized-load +;; +;; (define (common:wait-for-cpuload maxload-in numcpus-in waitdelay #!key (count 1000) (msg #f)(remote-host #f)(force-maxload #f)(num-tries 5)) +;; (let* ((loadavg (common:get-cpu-load remote-host)) +;; (numcpus (if (<= 1 numcpus-in) ;; not possible to have zero. If we get 1, it's possible that we got the previous default, and we should check again +;; (common:get-num-cpus remote-host) +;; numcpus-in)) +;; (maxload (if force-maxload +;; maxload-in +;; (if (number? maxload-in) +;; (max maxload-in 0.5) +;; 0.5))) ;; so maxload must be greater than 0.5 for now BUG - FIXME? +;; (first (car loadavg)) +;; (next (cadr loadavg)) +;; (adjmaxload (* maxload (max 1 numcpus))) ;; possible bug where +;; ;; numcpus (or could be +;; ;; maxload) is zero, +;; ;; crude fallback is to +;; ;; at least use 1 +;; (loadjmp (- first (if (> next (* numcpus 0.7)) ;; could do something with average of first and next? +;; 0 +;; next))) ;; we will force a conservative calculation any time next is large. +;; (first-next-avg (/ (+ first next) 2)) +;; ;; add some randomness to the time to break any alignment +;; ;; where netbatch dumps many jobs to machines simultaneously +;; (adjwait (min (+ 300 (pseudo-random-integer 10)) (abs (* (+ (pseudo-random-integer 10) +;; (/ (- 1000 count) 10) +;; waitdelay) +;; (- first adjmaxload) )))) +;; (load-jump-limit (configf:lookup-number *configdat* "setup" "load-jump-limit")) +;; ;; effective load accounts for load jumps, this should elminate all the first-next-avg, adjwait, load-jump-limit +;; ;; etc. +;; (effective-load (common:get-intercept first next)) +;; (effective-host (or remote-host "localhost")) +;; (normalized-effective-load (/ effective-load numcpus)) +;; (will-wait (> normalized-effective-load maxload))) +;; +;; ;; let's let the user know once in a long while that load checking +;; ;; is happening but not constantly report it +;; #;(if (common:low-noise-print 30 (conc "cpuload" (or remote-host "localhost"))) ;; (> (pseudo-random-integer 100) 75) ;; about 25% of the time +;; (debug:print-info 1 *default-log-port* "Checking cpuload on " (or remote-host "localhost") ", maxload: " maxload +;; ", load: " first ", adjmaxload: " adjmaxload ", loadjmp: " loadjmp)) +;; +;; (debug:print-info 1 *default-log-port* +;; "On host: " effective-host +;; ", effective load: " effective-load +;; ", numcpus: " numcpus +;; ", normalized effective load: " normalized-effective-load +;; ) +;; +;; (cond +;; ;; bad data, try again to get the data +;; ((and (< first 0) ;; this indicates the loadavg data is bad - machine may not be reachable +;; (> num-tries 0)) +;; (debug:print 0 *default-log-port* "WARNING: received bad data from get-cpu-load " first ", we'll sleep 10s and try " num-tries " more times.") +;; (thread-sleep! 10) +;; (common:wait-for-cpuload maxload-in numcpus-in waitdelay +;; count: count remote-host: remote-host force-maxload: force-maxload num-tries: (- num-tries 1))) +;; ;; need to wait for load to drop +;; ((and will-wait ;; (> first adjmaxload) +;; (> count 0)) +;; (debug:print-info 0 *default-log-port* +;; "Delaying " 15 ;; adjwait +;; " seconds due to normalized effective load " normalized-effective-load ;; first +;; " exceeding max of " adjmaxload +;; " on server " (or remote-host (get-host-name)) +;; " (normalized load-limit: " maxload ") " (if msg msg "")) +;; (thread-sleep! 15) ;; adjwait) +;; (common:wait-for-cpuload maxload numcpus waitdelay count: (- count 1) msg: msg remote-host: remote-host)) +;; ((and (> loadjmp (cond +;; (load-jump-limit load-jump-limit) +;; ((> numcpus 8)(/ numcpus 2)) +;; ((> numcpus 4)(/ numcpus 1.2)) +;; (else 0.5))) +;; (> count 0)) +;; (debug:print-info 0 *default-log-port* "waiting " adjwait " seconds due to possible load jump " loadjmp ". " +;; (if msg msg "")) +;; (thread-sleep! adjwait) +;; (common:wait-for-cpuload maxload numcpus waitdelay count: (- count 1) msg: msg remote-host: remote-host)) +;; (else +;; (if (> num-tries 0) +;; (if (common:low-noise-print 30 (conc (round first) "-load-acceptable-" (or remote-host "localhost"))) +;; (debug:print 0 *default-log-port* "Load on " (or remote-host "localhost") " is acceptable at " first " continuing.")) +;; (debug:print 0 *default-log-port* "Load on " (or remote-host "localhost") ", "first" could not be retrieved. Giving up and continuing.")))))) +;; +(define (get-uname . params) + (let* ((uname-res (process:cmd-run->list (conc "uname " (if (null? params) "-a" (car params))))) + (uname #f)) + (if (null? (car uname-res)) + "unknown" + (caar uname-res)))) + +;; for reasons I don't understand multiple calls to real-path in parallel threads +;; must be protected by mutexes +;; +(define (common:real-path inpath) + ;; (process:cmd-run-with-stderr->list "readlink" "-f" inpath)) ;; cmd . params) + ;; (let-values + ;; (((inp oup pid) (process "readlink" (list "-f" inpath)))) + ;; (with-input-from-port inp + ;; (let loop ((inl (read-line)) + ;; (res #f)) + ;; (print "inl=" inl) + ;; (if (eof-object? inl) + ;; (begin + ;; (close-input-port inp) + ;; (close-output-port oup) + ;; ;; (process-wait pid) + ;; res) + ;; (loop (read-line) inl)))))) + (with-input-from-pipe (conc "readlink -f " inpath) read-line)) + +;;====================================================================== +;; D I S K S P A C E +;;====================================================================== + +(define (common:get-disk-space-used fpath) + (with-input-from-pipe (conc "/usr/bin/du -s " fpath) read)) + +;;====================================================================== +;; given path get free space, allows override in [setup] +;; with free-space-script /path/to/some/script.sh +;; +(define (get-df path) + (if (configf:lookup *configdat* "setup" "free-space-script") + (with-input-from-pipe + (conc (configf:lookup *configdat* "setup" "free-space-script") " " path) + (lambda () + (let ((res (read-line))) + (if (string? res) + (string->number res))))) + (get-unix-df path))) + +(define (get-free-inodes path) + (if (configf:lookup *configdat* "setup" "free-inodes-script") + (with-input-from-pipe + (conc (configf:lookup *configdat* "setup" "free-inodes-script") " " path) + (lambda () + (let ((res (read-line))) + (if (string? res) + (string->number res))))) + (get-unix-inodes path))) + +(define (get-unix-df path) + (let* ((df-results (process:cmd-run->list (conc "df " path))) + (space-rx (regexp "([0-9]+)\\s+([0-9]+)%")) + (freespc #f)) + ;; (write df-results) + (for-each (lambda (l) + (let ((match (string-search space-rx l))) + (if match + (let ((newval (string->number (cadr match)))) + (if (number? newval) + (set! freespc newval)))))) + (car df-results)) + freespc)) + +(define (get-unix-inodes path) + (let* ((df-results (process:cmd-run->list (conc "df -i " path))) + (space-rx (regexp "([0-9]+)\\s+([0-9]+)%")) + (freenodes 0)) ;; 0 is a better failsafe than #f here. + ;; (write df-results) + (for-each (lambda (l) + (let ((match (string-search space-rx l))) + (if match + (let ((newval (string->number (cadr match)))) + (if (number? newval) + (set! freenodes newval)))))) + (car df-results)) + freenodes)) + +(define (common:check-space-in-dir dirpath required) + (let* ((dbspace (if (directory? dirpath) + (get-df dirpath) + 0))) + (list (> dbspace required) + dbspace + required + dirpath))) + +;;====================================================================== +;; check space in dbdir and in megatest dir +;; returns: ok/not dbspace required-space +;; +(define (common:check-db-dir-space) + (let* ((required (string->number + ;; default is 1GB (or actually a billion bytes) This is the number of 1 kB blocks. + (or (configf:lookup *configdat* "setup" "dbdir-space-required") + "1000000"))) + (dbdir (common:get-db-tmp-area)) ;; (db:get-dbdir)) + (tdbspace (common:check-space-in-dir dbdir required)) + (mdbspace (common:check-space-in-dir *toppath* required))) + (sort (list tdbspace mdbspace) (lambda (a b) + (< (cadr a)(cadr b)))))) + +;;====================================================================== +;; check available space in dbdir, exit if insufficient +;; +(define (common:check-db-dir-and-exit-if-insufficient) + (let* ((spacedat (car (common:check-db-dir-space))) ;; look only at worst for now + (is-ok (car spacedat)) + (dbspace (cadr spacedat)) + (required (caddr spacedat)) + (dbdir (cadddr spacedat))) + (if (not is-ok) + (begin + (debug:print-error 0 *default-log-port* "Insufficient space in " dbdir ", require " required ", have " dbspace ", exiting now.") + (exit 1))))) + +;;====================================================================== +;; paths is list of lists ((name path) ... ) +;; +(define (common:get-disk-with-most-free-space disks minsize) + (let* ((best #f) + (bestsize 0) + (default-min-inodes-string "1000000") + (default-min-inodes (string->number default-min-inodes-string)) + (min-inodes (or (string->number (if (configf:lookup *configdat* "setup" "min_inodes") (configf:lookup *configdat* "setup" "min_inodes") default-min-inodes-string)) default-min-inodes))) + + (for-each + (lambda (disk-num) + (let* ((dirpath (cadr (assoc disk-num disks))) + (freespc (cond + ((not (directory? dirpath)) + (if (common:low-noise-print 300 "disks not a dir " disk-num) + (debug:print 0 *default-log-port* "WARNING: disk " disk-num " at path \"" dirpath "\" is not a directory - ignoring it.")) + -1) + ((not (file-writable? dirpath)) + (if (common:low-noise-print 300 "disks not writeable " disk-num) + (debug:print 0 *default-log-port* "WARNING: disk " disk-num " at path \"" dirpath "\" is not writeable - ignoring it.")) + -1) + ((not (eq? (string-ref dirpath 0) #\/)) + (if (common:low-noise-print 300 "disks not a proper path " disk-num) + (debug:print 0 *default-log-port* "WARNING: disk " disk-num " at path \"" dirpath "\" is not a fully qualified path - ignoring it.")) + -1) + (else + (get-df dirpath)))) + (free-inodes (cond + ((not (directory? dirpath)) + (if (common:low-noise-print 300 "disks not a dir " disk-num) + (debug:print 0 *default-log-port* "WARNING: disk " disk-num " at path \"" dirpath "\" is not a directory - ignoring it.")) + -1) + ((not (file-writable? dirpath)) + (if (common:low-noise-print 300 "disks not writeable " disk-num) + (debug:print 0 *default-log-port* "WARNING: disk " disk-num " at path \"" dirpath "\" is not writeable - ignoring it.")) + -1) + ((not (eq? (string-ref dirpath 0) #\/)) + (if (common:low-noise-print 300 "disks not a proper path " disk-num) + (debug:print 0 *default-log-port* "WARNING: disk " disk-num " at path \"" dirpath "\" is not a fully qualified path - ignoring it.")) + -1) + (else + (get-free-inodes dirpath)))) + ;;(free-inodes (get-free-inodes dirpath)) + ) + (debug:print 2 *default-log-port* "INFO: disk " disk-num " path " dirpath " free space " freespc " free inodes " free-inodes) + (if (and (> freespc bestsize)(> free-inodes min-inodes )) + (begin + (set! best (cons disk-num dirpath)) + (set! bestsize freespc))) + ;;(print "Processing: " disk-num " bestsize: " bestsize " best: " best " freespc: " freespc " min-inodes: " min-inodes " free-inodes: " free-inodes) + )) + (map car disks)) + (if (and best (> bestsize minsize)) + best + #f))) ;; #f means no disk candidate found + +;;====================================================================== +;; convert a spec string to a list of vectors #( rx action rx-string ) +(define (common:spec-string->list-of-specs spec-string actions) + (let ((spec-strings (string-split-fields "\\s*;\\s*" spec-string #:infix)) + (actions-regex (regexp (conc "^(.*)\\s+(" (string-intersperse (map conc actions) "|") ")")))) + (filter + (lambda (x) x) + (map (lambda (s) + (let ((m (string-match actions-regex s))) + (if m + (vector (regexp (cadr m))(string->symbol (caddr m))(cadr m)) + (begin + (debug:print 0 *default-log-port* "WARNING: Unrecognised rule \"" s "\" in clean-up specification.") + #f)))) + spec-strings)))) + +;;====================================================================== +;; given a list of specs rx . rule and a file return the first matching rule +;; +(define (common:file-find-rule fname rules) ;; rule is vector #( rx action rx-string) + (let loop ((rule (car rules)) + (tail (cdr rules))) + (let ((rx (vector-ref rule 0)) + (rn (vector-ref rule 1))) ;; rule name + (if (string-match rx fname) + rule ;; return the whole rule so regex can be printed etc. + (if (null? tail) + #f + (loop (car tail)(cdr tail))))))) + +;;====================================================================== +;; given a spec apply some rules to a directory +;; +;; WARNING: This function will REMOVE files - be sure your spec and path is correct! +;; +;; spec format: +;; file-regex1 action; file-regex2 action; ... +;; e.g. +;; .*\.log$ keep; .* remove +;; --> keep all .log files, remove everything else +;; limitations: +;; cannot have a rule with ; as part of the spec +;; not very flexible, would be nice to return binned file names? +;; supported rules: +;; keep - keep this file +;; remove - remove this file +;; compress - compress this file +;; +(define (common:dir-clean-up path spec-string #!key (compress "gzip")(actions '(keep remove compress))(remove-empty #f)) + (let* ((specs (common:spec-string->list-of-specs spec-string actions)) + (keepers (make-hash-table)) + (directories (make-hash-table))) + (find-files + path + action: (lambda (p res) + (let ((rule (common:file-find-rule p specs))) + (cond + ((directory? p)(hash-table-set! directories p #t)) + (else + (case (vector-ref rule 1) + ((keep)(hash-table-set! keepers p rule)) + ((remove) + (print "Removing file " p) + (delete-file p)) + ((compress) + (print "Compressing file " p) + (system (conc compress " " p))) + (else + (print "No match for file " p)))))))) + (if remove-empty + (for-each + (lambda (d) + (if (null? (glob (conc d "/.*")(conc d "/*"))) + (begin + (print "Removing empty directory " d) + (delete-directory d)))) + (sort (hash-table-keys directories) (lambda (a b)(> (string-length a)(string-length b)))))) + )) + +;;====================================================================== +;; E N V I R O N M E N T V A R S +;;====================================================================== + +(define (bb-check-path #!key (msg "check-path: ")) + (let ((path (or (get-environment-variable "PATH") "none"))) + (debug:print-info 0 *default-log-port* (conc msg" : $PATH="path)) + (if (string-match "^.*/isoenv-core/.*" path) + (debug:print-error 0 *default-log-port* (conc msg" : !!ISOENV PRESENT!!")) ;; remove for prod + (debug:print-info 1 *default-log-port* (conc msg" : **no isoenv present**"))))) + +(define (save-environment-as-files fname #!key (ignorevars (list "USER" "HOME" "DISPLAY" "LS_COLORS" "XKEYSYMDB" "EDITOR" "MAKEFLAGS" "MAKEF" "MAKEOVERRIDES"))) + ;;(bb-check-path msg: "save-environment-as-files entry") + (let ((envvars (get-environment-variables)) + (whitesp (regexp "[^a-zA-Z0-9_\\-:,\\.\\/%$]")) + (mungeval (lambda (val) + (cond + ((eq? val #t) "") ;; convert #t to empty string + ((eq? val #f) #f) ;; convert #f to itself (still thinking about this one + (else val))))) + (with-output-to-file (conc fname ".csh") + (lambda () + (for-each (lambda (keyval) + (let* ((key (car keyval)) + (val (cdr keyval)) + (delim (if (string-search whitesp val) + "\"" + ""))) + (print (if (or (member key ignorevars) + (string-search whitesp key)) + "# setenv " + "setenv ") + key " " delim (mungeval val) delim))) + envvars))) + (with-output-to-file (conc fname ".sh") + (lambda () + (for-each (lambda (keyval) + (let* ((key (car keyval)) + (val (cdr keyval)) + (delim (if (string-search whitesp val) + "\"" + ""))) + (print (if (or (member key ignorevars) + (string-search whitesp key) + (string-search ":" key)) ;; internal only values to be skipped. + "# export " + "export ") + key "=" delim (mungeval val) delim))) + envvars))))) + +(define (common:get-param-mapping #!key (flavor #f)) + "returns alist mapping string keys in testconfig/subrun to megatest command line switches; if flavor is switch-symbol, maps tcmt symbolic switches to megatest switches" + (let ((default '(("tag-expr" . "-tagexpr") + ("mode-patt" . "-modepatt") + ("run-name" . "-runname") + ("contour" . "-contour") + ("target" . "-target") + ("test-patt" . "-testpatt") + ("msg" . "-m") + ("log" . "-log") + ("start-dir" . "-start-dir") + ("new" . "-set-state-status")))) + (if (eq? flavor 'switch-symbol) + (map (lambda (x) + (cons (string->symbol (conc "-" (car x))) (cdr x))) + default) + default))) + +;;====================================================================== +;; set some env vars from an alist, return an alist with original values +;; (("VAR" "value") ...) +;; a value of #f means "unset this var" +;; +(define (alist->env-vars lst) + (if (list? lst) + (let ((res '())) + (for-each (lambda (p) + (let* ((var (car p)) + (val (cadr p)) + (prv (get-environment-variable var))) + (set! res (cons (list var prv) res)) + (if val + (safe-setenv var (->string val)) + (unsetenv var)))) + lst) + res) + '())) + +;;====================================================================== +;; clear vars matching pattern, run proc, set vars back +;; if proc is a string run that string as a command with +;; system. +;; +(define *common:orig-env* + (let ((envvars (get-environment-variables))) + (if (get-environment-variable "MT_ORIG_ENV") + (with-input-from-string + (z3:decode-buffer (base64:base64-decode (get-environment-variable "MT_ORIG_ENV"))) + read) + (filter-map (lambda (x) + (if (string-match "^MT_.*" (car x)) + #f + x)) + envvars)))) + +(define (common:with-orig-env proc) + (let ((current-env (get-environment-variables))) + (for-each (lambda (x) (unsetenv (car x))) current-env) + (for-each (lambda (x) (setenv (car x) (cdr x))) *common:orig-env*) + (let ((rv (cond + ((string? proc)(system proc)) + (proc (proc))))) + (for-each (lambda (x) (unsetenv (car x))) *common:orig-env*) + (for-each (lambda (x) (setenv (car x) (cdr x))) current-env) + rv))) + +(define (common:without-vars proc . var-patts) + (let ((vars (make-hash-table))) + (for-each + (lambda (vardat) ;; each env var + (for-each + (lambda (var-patt) + (if (string-match var-patt (car vardat)) + (let ((var (car vardat)) + (val (cdr vardat))) + (hash-table-set! vars var val) + (unsetenv var)))) + var-patts)) + (get-environment-variables)) + (cond + ((string? proc)(system proc)) + (proc (proc))) + (hash-table-for-each + vars + (lambda (var val) + (setenv var val))) + vars)) + +(define (common:run-a-command cmd #!key (with-vars #f) (with-orig-env #f)) + (let* ((pre-cmd (dtests:get-pre-command)) + (post-cmd (dtests:get-post-command)) + (fullcmd (if (or pre-cmd post-cmd) + (conc pre-cmd cmd post-cmd) + (conc "viewscreen " cmd)))) + (debug:print-info 02 *default-log-port* "Running command: " fullcmd) + (cond + (with-vars (common:without-vars fullcmd)) + (with-orig-env (common:with-orig-env fullcmd)) + (else (common:without-vars fullcmd "MT_.*"))))) + +;;====================================================================== +;; T I M E A N D D A T E +;;====================================================================== + +;;====================================================================== +;; Convert strings like "5s 2h 3m" => 60x60x2 + 3x60 + 5 +(define (common:hms-string->seconds tstr) + (let ((parts (string-split-fields "\\w+" tstr)) + (time-secs 0) + ;; s=seconds, m=minutes, h=hours, d=days, M=months, y=years, w=weeks + (trx (regexp "(\\d+)([smhdMyw])"))) + (for-each (lambda (part) + (let ((match (string-match trx part))) + (if match + (let ((val (string->number (cadr match))) + (unt (caddr match))) + (if val + (set! time-secs (+ time-secs (* val + (case (string->symbol unt) + ((s) 1) + ((m) 60) ;; minutes + ((h) 3600) + ((d) 86400) + ((w) 604800) + ((M) 2628000) ;; aproximately one month + ((y) 31536000) + (else #f)))))))))) + parts) + time-secs)) + +(define (seconds->hr-min-sec secs) + (let* ((hrs (quotient secs 3600)) + (min (quotient (- secs (* hrs 3600)) 60)) + (sec (- secs (* hrs 3600)(* min 60)))) + (conc (if (> hrs 0)(conc hrs "hr ") "") + (if (> min 0)(conc min "m ") "") + sec "s"))) + +(define (seconds->time-string sec) + (time->string + (seconds->local-time sec) "%H:%M:%S")) + +(define (seconds->work-week/day-time sec) + (time->string + (seconds->local-time sec) "ww%V.%u %H:%M")) + +(define (seconds->work-week/day sec) + (time->string + (seconds->local-time sec) "ww%V.%u")) + +(define (seconds->year-work-week/day sec) + (time->string + (seconds->local-time sec) "%yww%V.%w")) + +(define (seconds->year-work-week/day-time sec) + (time->string + (seconds->local-time sec) "%Yww%V.%w %H:%M")) + +(define (seconds->year-week/day-time sec) + (time->string + (seconds->local-time sec) "%Yw%V.%w %H:%M")) + +(define (seconds->quarter sec) + (case (string->number + (time->string + (seconds->local-time sec) + "%m")) + ((1 2 3) 1) + ((4 5 6) 2) + ((7 8 9) 3) + ((10 11 12) 4) + (else #f))) + +;;====================================================================== +;; basic ISO8601 format (e.g. "2017-02-28 06:02:54") date time => Unix epoch +;; +(define (common:date-time->seconds datetime) + (local-time->seconds (string->time datetime "%Y-%m-%d %H:%M:%S"))) + +;;====================================================================== +;; given span of seconds tstart to tend +;; find start time to mark and mark delta +;; +(define (common:find-start-mark-and-mark-delta tstart tend) + (let* ((deltat (- (max tend (+ tend 10)) tstart)) ;; can't handle runs of less than 4 seconds. Pad it to 10 seconds ... + (result #f) + (min 60) + (hr (* 60 60)) + (day (* 24 hr)) + (yr (* 365 day)) ;; year + (mo (/ yr 12)) + (wk (* day 7))) + (for-each + (lambda (max-blks) + (for-each + (lambda (span) ;; 5 2 1 + (if (not result) + (for-each + (lambda (timeunit timesym) ;; year month day hr min sec + (if (not result) + (let* ((time-blk (* span timeunit)) + (num-blks (quotient deltat time-blk))) + (if (and (> num-blks 4)(< num-blks max-blks)) + (let ((first (* (quotient tstart time-blk) time-blk))) + (set! result (list span timeunit time-blk first timesym)) + ))))) + (list yr mo wk day hr min 1) + '( y mo w d h m s)))) + (list 8 6 5 2 1))) + '(5 10 15 20 30 40 50 500)) + (if values + (apply values result) + (values 0 day 1 0 'd)))) + +;;====================================================================== +;; given x y lim return the cron expansion +;; +(define (common:expand-cron-slash x y lim) + (let loop ((curr x) + (res `())) + (if (< curr lim) + (loop (+ curr y) (cons curr res)) + (reverse res)))) + +;;====================================================================== +;; expand a complex cron string to a list of cron strings +;; +;; x/y => x, x+y, x+2y, x+3y while x+Ny a, b ,c +;; +;; NOTE: with flatten a lot of the crud below can be factored down. +;; +(define (common:cron-expand cron-str) + (if (list? cron-str) + (flatten + (fold (lambda (x res) + (if (list? x) + (let ((newres (map common:cron-expand x))) + (append x newres)) + (cons x res))) + '() + cron-str)) ;; (map common:cron-expand cron-str)) + (let ((cron-items (string-split cron-str)) + (slash-rx (regexp "(\\d+)/(\\d+)")) + (comma-rx (regexp ".*,.*")) + (max-vals '((min . 60) + (hour . 24) + (dayofmonth . 28) ;;; BUG!!!! This will be a bug for some combinations + (month . 12) + (dayofweek . 7)))) + (if (< (length cron-items) 5) ;; bad spec + cron-str ;; `(,cron-str) ;; just return the string, something downstream will fix it + (let loop ((hed (car cron-items)) + (tal (cdr cron-items)) + (type 'min) + (type-tal '(hour dayofmonth month dayofweek)) + (res '())) + (regex-case + hed + (slash-rx ( _ base incr ) (let* ((basen (string->number base)) + (incrn (string->number incr)) + (expanded-vals (common:expand-cron-slash basen incrn (alist-ref type max-vals))) + (new-list-crons (fold (lambda (x myres) + (cons (conc (if (null? res) + "" + (conc (string-intersperse res " ") " ")) + x " " (string-intersperse tal " ")) + myres)) + '() expanded-vals))) + ;; (print "new-list-crons: " new-list-crons) + ;; (fold (lambda (x res) + ;; (if (list? x) + ;; (let ((newres (map common:cron-expand x))) + ;; (append x newres)) + ;; (cons x res))) + ;; '() + (flatten (map common:cron-expand new-list-crons)))) + ;; (map common:cron-expand (map common:cron-expand new-list-crons)))) + (else (if (null? tal) + cron-str + (loop (car tal)(cdr tal)(car type-tal)(cdr type-tal)(append res (list hed))))))))))) + +;;====================================================================== +;; given a cron string and the last time event was processed return #t to run or #f to not run +;; +;; min hour dayofmonth month dayofweek +;; 0-59 0-23 1-31 1-12 0-6 ### NOTE: dayofweek does not include 7 +;; +;; #t => yes, run the job +;; #f => no, do not run the job +;; +(define (common:cron-event cron-str now-seconds-in last-done) ;; ref-seconds = #f is NOW. + (let* ((cron-items (map string->number (string-split cron-str))) + (now-seconds (or now-seconds-in (current-seconds))) + (now-time (seconds->local-time now-seconds)) + (last-done-time (seconds->local-time last-done)) + (all-times (make-hash-table))) + ;; (print "cron-items: " cron-items "(length cron-items): " (length cron-items)) + (if (not (eq? (length cron-items) 5)) ;; don't even try to figure out junk strings + #f + (match-let ((( cmin chour cdayofmonth cmonth cdayofweek) + cron-items) + ;; 0 1 2 3 4 5 6 + ((nsec nmin nhour ndayofmonth nmonth nyr ndayofweek n7 n8 n9) + (vector->list now-time)) + ((lsec lmin lhour ldayofmonth lmonth lyr ldayofweek l7 l8 l9) + (vector->list last-done-time))) + ;; create all possible time slots + ;; remove invalid slots due to (for example) day of week + ;; get the start and end entries for the ref-seconds (current) time + ;; if last-done > ref-seconds => this is an ERROR! + ;; does the last-done time fall in the legit region? + ;; yes => #f do not run again this command + ;; no => #t ok to run the command + (for-each ;; month + (lambda (month) + (for-each ;; dayofmonth + (lambda (dom) + (for-each + (lambda (hr) ;; hour + (for-each + (lambda (minute) ;; minute + (let ((copy-now (apply vector (vector->list now-time)))) + (vector-set! copy-now 0 0) ;; force seconds to zero + (vector-set! copy-now 1 minute) + (vector-set! copy-now 2 hr) + (vector-set! copy-now 3 dom) ;; dom is already corrected for zero referenced + (vector-set! copy-now 4 month) + (let* ((copy-now-secs (local-time->seconds copy-now)) + (new-copy (seconds->local-time copy-now-secs))) ;; remake the time vector + (if (or (not cdayofweek) + (equal? (vector-ref new-copy 6) + cdayofweek)) ;; if the day is specified and a match OR if the day is NOT specified + (if (or (not cdayofmonth) + (equal? (vector-ref new-copy 3) + (+ 1 cdayofmonth))) ;; if the month is specified and a match OR if the month is NOT specified + (hash-table-set! all-times copy-now-secs new-copy)))))) + (if cmin + `(,cmin) ;; if given cmin, have to use it + (list (- nmin 1) nmin (+ nmin 1))))) ;; minute + (if chour + `(,chour) + (list (- nhour 1) nhour (+ nhour 1))))) ;; hour + (if cdayofmonth + `(,cdayofmonth) + (list (- ndayofmonth 1) ndayofmonth (+ ndayofmonth 1))))) + (if cmonth + `(,cmonth) + (list (- nmonth 1) nmonth (+ nmonth 1)))) + (let ((before #f) + (is-in #f)) + (for-each + (lambda (moment) + (if (and before + (<= before now-seconds) + (>= moment now-seconds)) + (begin + ;; (print) + ;; (print "Before: " (time->string (seconds->local-time before))) + ;; (print "Now: " (time->string (seconds->local-time now-seconds))) + ;; (print "After: " (time->string (seconds->local-time moment))) + ;; (print "Last: " (time->string (seconds->local-time last-done))) + (if (< last-done before) + (set! is-in before)) + )) + (set! before moment)) + (sort (hash-table-keys all-times) <)) + is-in))))) + +(define (common:extended-cron cron-str now-seconds-in last-done) + (let ((expanded-cron (common:cron-expand cron-str))) + (if (string? expanded-cron) + (common:cron-event expanded-cron now-seconds-in last-done) + (let loop ((hed (car expanded-cron)) + (tal (cdr expanded-cron))) + (if (common:cron-event hed now-seconds-in last-done) + #t + (if (null? tal) + #f + (loop (car tal)(cdr tal)))))))) + +;;====================================================================== +;; C O L O R S +;;====================================================================== + +(define (common:name->iup-color name) + (case (string->symbol (string-downcase name)) + ((red) "223 33 49") + ((grey) "192 192 192") + ((orange) "255 172 13") + ((purple) "This is unfinished ..."))) + +;;====================================================================== +;; (define (common:get-color-for-state-status state status) +;; (case (string->symbol state) +;; ((COMPLETED) +;; (case (string->symbol status) +;; ((PASS) "70 249 73") +;; ((WARN WAIVED) "255 172 13") +;; ((SKIP) "230 230 0") +;; (else "223 33 49"))) +;; ((LAUNCHED) "101 123 142") +;; ((CHECK) "255 100 50") +;; ((REMOTEHOSTSTART) "50 130 195") +;; ((RUNNING) "9 131 232") +;; ((KILLREQ) "39 82 206") +;; ((KILLED) "234 101 17") +;; ((NOT_STARTED) "240 240 240") +;; (else "192 192 192"))) + +(define (common:iup-color->rgb-hex instr) + (string-intersperse + (map (lambda (x) + (number->string x 16)) + (map string->number + (string-split instr))) + "/")) + +;;====================================================================== +;; L O C K I N G M E C H A N I S M S +;;====================================================================== + +;; common:faux-lock was here + +;; common:simple-lock was here + +;;====================================================================== +;; +;;====================================================================== + +(define (common:in-running-test?) + (and (args:get-arg "-execute") (get-environment-variable "MT_CMDINFO"))) + +(define (common:get-color-from-status status) + (cond + ((equal? status "PASS") "green") + ((equal? status "FAIL") "red") + ((equal? status "WARN") "orange") + ((equal? status "KILLED") "orange") + ((equal? status "KILLREQ") "purple") + ((equal? status "RUNNING") "blue") + ((equal? status "ABORT") "brown") + (else "black"))) + +;; common:get-launcher was here + +;;====================================================================== +;; NMSG AND NEW API +;;====================================================================== +;; +;; ;;====================================================================== +;; ;; nm based server experiment, keep around for now. +;; ;; +;; (define (nm:start-server dbconn #!key (given-host-name #f)) +;; (let* ((srvdat (start-raw-server given-host-name: given-host-name)) +;; (host-name (srvdat-host srvdat)) +;; (soc (srvdat-soc srvdat))) +;; +;; ;; start the queue processor (save for second round of development) +;; ;; +;; (thread-start! (make-thread! (lambda ()(queue-processor dbconn) "Queue processor"))) +;; ;; msg is an alist +;; ;; 'r host:port <== where to return the data +;; ;; 'p params <== data to apply the command to +;; ;; 'e j|s|l <== encoding of the params. default is s (sexp), if not specified is assumed to be default +;; ;; 'c command <== look up the function to call using this key +;; ;; +;; (let loop ((msg-in (nn-recv soc))) +;; (if (not (equal? msg-in "quit")) +;; (let* ((dat (decode msg-in)) +;; (host-port (alist-ref 'r dat)) ;; this is for the reverse req rep where the server is a client of the original client +;; (params (alist-ref 'p dat)) +;; (command (let ((c (alist-ref 'c dat)))(if c (string->symbol c) #f))) +;; (all-good (and host-port params command (hash-table-exists? *commands* command)))) +;; (if all-good +;; (let ((cmddat (make-qitem +;; command: command +;; host-port: host-port +;; params: params))) +;; (queue-push cmddat) ;; put request into the queue +;; (nn-send soc "queued")) ;; reply with "queued" +;; (print "ERROR: ["(common:human-time)"] BAD request " dat)) +;; (loop (nn-recv soc))))) +;; (nn-close soc))) + +;;====================================================================== +;; D A S H B O A R D U S E R V I E W S +;;====================================================================== + +;;====================================================================== +;; first read ~/views.config if it exists, then read $MTRAH/views.config if it exists +;; +(define (common:load-views-config) + (let* ((view-cfgdat (make-hash-table)) + (home-cfgfile (conc (get-environment-variable "HOME") "/.mtviews.config")) + (mthome-cfgfile (conc *toppath* "/.mtviews.config"))) + (if (common:file-exists? mthome-cfgfile) + (read-config mthome-cfgfile view-cfgdat #t)) + ;; we load the home dir file AFTER the MTRAH file so the user can clobber settings when running the dashboard in read-only areas + (if (common:file-exists? home-cfgfile) + (read-config home-cfgfile view-cfgdat #t)) + view-cfgdat)) + +;;====================================================================== +;; H I E R A R C H I C A L H A S H T A B L E S +;;====================================================================== +;; +;; Every element including top element is a vector: +;; + +(define (hh:make-hh #!key (ht #f)(value #f)) + (vector (or ht (make-hash-table)) value)) + +;;====================================================================== +;; used internally +(define-inline (hh:set-ht! hh ht) (vector-set! hh 0 ht)) +(define-inline (hh:get-ht hh) (vector-ref hh 0)) +(define-inline (hh:set-value! hh value) (vector-set! hh 1 value)) +(define-inline (hh:get-value hh value) (vector-ref hh 1)) + +;;====================================================================== +;; given a hierarchial hash and some keys look up the value ... +;; +(define (hh:get hh . keys) + (if (null? keys) + (vector-ref hh 1) ;; we have reached the end of the line, return the value sought + (let ((sub-ht (hh:get-ht hh))) + (if sub-ht ;; yes, there is more hierarchy + (let ((sub-hh (hash-table-ref/default sub-ht (car keys) #f))) + (if sub-hh + (apply hh:get sub-hh (cdr keys)) + #f)) + #f)))) + +;;====================================================================== +;; given a hierarchial hash, a value and some keys, add needed hierarcy and insert the value +;; +(define (hh:set! hh value . keys) + (if (null? keys) + (hh:set-value! hh value) ;; we have reached the end of the line, store the value + (let ((sub-ht (hh:get-ht hh))) + (if sub-ht ;; yes, there is more hierarchy + (let ((sub-hh (hash-table-ref/default sub-ht (car keys) #f))) + (if (not sub-hh) ;; we'll need to add the next level of hierarchy + (let ((new-sub-hh (hh:make-hh))) + (hash-table-set! sub-ht (car keys) new-sub-hh) + (apply hh:set! new-sub-hh value (cdr keys))) + (apply hh:set! sub-hh value (cdr keys)))) ;; call the sub-hierhash with remaining keys + (begin + (hh:set-ht! hh (make-hash-table)) + (apply hh:set! hh value keys)))))) + +;;====================================================================== +;; Manage pkts, used in servers, tests and likely other contexts so put +;; in common +;;====================================================================== + +(define common:pkts-spec + '((default . ((parent . P) + (action . a) + (filename . f))) + (configf . ((parent . P) + (action . a) + (filename . f))) + (server . ((action . a) + (pid . d) + (ipaddr . i) + (port . p) + (parent . P))) + + (test . ((cpuuse . c) + (diskuse . d) + (item-path . i) + (runname . r) + (state . s) + (target . t) + (status . u) + (parent . P))))) + +(define (common:get-pkts-dirs mtconf use-lt) + (let* ((pktsdirs-str (or (configf:lookup mtconf "setup" "pktsdirs") + (and use-lt + (conc (or *toppath* + (current-directory)) + "/lt/.pkts")))) + (pktsdirs (if pktsdirs-str + (string-split pktsdirs-str " ") + #f))) + pktsdirs)) + +;;====================================================================== +;; use-lt is use linktree "lt" link to find pkts dir +(define (common:save-pkt pktalist-in mtconf use-lt #!key (add-only #f)) ;; add-only saves the pkt only if there is a parent already + (if (or add-only + (hash-table-exists? *pkts-info* 'last-parent)) + (let* ((parent (hash-table-ref/default *pkts-info* 'last-parent #f)) + (pktalist (if parent + (cons `(parent . ,parent) + pktalist-in) + pktalist-in))) + (let-values (((uuid pkt) + (alist->pkt pktalist common:pkts-spec))) + (hash-table-set! *pkts-info* 'last-parent uuid) + (let ((pktsdir (or (hash-table-ref/default *pkts-info* 'pkts-dir #f) + (let* ((pktsdirs (common:get-pkts-dirs mtconf use-lt)) + (pktsdir (car pktsdirs))) ;; assume it is there + (hash-table-set! *pkts-info* 'pkts-dir pktsdir) + pktsdir)))) + (handle-exceptions + exn + (debug:print-info 0 "failed to write out packet to " pktsdir ", exn=" exn) ;; don't care if this failed for now but MUST FIX - BUG!! + (if (not (file-exists? pktsdir)) + (create-directory pktsdir #t)) + (with-output-to-file + (conc pktsdir "/" uuid ".pkt") + (lambda () + (print pkt))))))))) + +;; common:with-queue-db was here +;; common:load-pkts-to-db was here + +(define (common:get-pkt-alists pkts) + (map (lambda (x) + (alist-ref 'apkt x)) ;; 'pkta pulls out the alist from the read pkt + pkts)) + +;;====================================================================== +;; given list of pkts (alist mode) return list of D cards as Unix epoch, sorted descending +;; also delete duplicates by target i.e. (car pkt) +;; +(define (common:get-pkt-times pkts) + (delete-duplicates + (sort + (map (lambda (x) + `(,(alist-ref 't x) . ,(string->number (alist-ref 'D x)))) + pkts) + (lambda (a b)(> (cdr a)(cdr b)))) ;; sort descending + (lambda (a b)(equal? (car a)(car b))))) ;; remove duplicates by target + +;; common:with-env-vars was here + +(define *common:thread-punchlist* (make-hash-table)) +(define (common:send-thunk-to-background-thread thunk #!key (name #f)) + ;;(BB> "launched thread " name) + ;; we need a unique name for the thread. + (let* ((realname (if name + (if (not (hash-table-ref/default *common:thread-punchlist* name #f)) + name + (conc name"-" (symbol->string (gensym)))) + (conc "anonymous-"(symbol->string (gensym))))) + (realthunk (lambda () + (let ((res (thunk))) + (hash-table-delete! *common:thread-punchlist* realname) + res))) + (thread (make-thread realthunk realname))) + (hash-table-set! *common:thread-punchlist* realname thread) + (thread-start! thread) + )) + +(define (common:join-backgrounded-threads) + ;; may need to trap and ignore exceptions -- dunno how atomic threads are... + (for-each + (lambda (thread-name) + (let* ((thread (hash-table-ref/default *common:thread-punchlist* thread-name #f))) + (if thread + (handle-exceptions + exn + (begin + (debug:print 0 *default-log-port* "joining threads failed. exn=" exn) + #t) ;; just ignore it, it might have died in the meantime so joining it will throw an exception + (thread-join! thread)) + ))) + (hash-table-keys *common:thread-punchlist*))) + +(define (dtests:get-pre-command #!key (default-override #f)) + (let* ((orig-pre-command "export CMD='") + (viewscreen-pre-command "viewscreen ") + (use-viewscreen (configf:lookup *configdat* "dashboard" "use-viewscreen")) + (default-pre-command (if use-viewscreen viewscreen-pre-command orig-pre-command)) + (cfg-ovrd (configf:lookup *configdat* "dashboard" "pre-command"))) + (or cfg-ovrd default-override default-pre-command))) ;; "xterm -geometry 180x20 -e \"")) + + +(define (dtests:get-post-command #!key (default-override #f)) + (let* ((orig-post-command (conc "';xterm -geometry 180x20 -e \"(echo; echo -n START:;date +ww%U.%w-$H:%M:%S;echo;echo $CMD;echo;$CMD)|&" + "tee -a runlog-`date +ww%U.%w-%H:%M`.log;echo Press any key to continue;bash -c 'read -n 1 -s'\" &")) + (viewscreen-post-command "") + (use-viewscreen (configf:lookup *configdat* "dashboard" "use-viewscreen")) + (default-post-command (if use-viewscreen viewscreen-post-command orig-post-command)) + (cfg-ovrd (configf:lookup *configdat* "dashboard" "post-command"))) + (or cfg-ovrd default-override default-post-command))) ;; ";echo Press any key to continue;bash -c 'read -n 1 -s'\" &"))) + +;;====================================================================== +;; (define *common:telemetry-log-state* 'startup) +;; (define *common:telemetry-log-socket* #f) +;; +;; (define (common:telemetry-log-open) +;; (if (eq? *common:telemetry-log-state* 'startup) +;; (let* ((serverhost (configf:lookup *configdat* "telemetry" "host")) +;; (serverport (configf:lookup-number *configdat* "telemetry" "port")) +;; (user (or (get-environment-variable "USER") "unknown")) +;; (host (or (get-environment-variable "HOST") "unknown"))) +;; (set! *common:telemetry-log-state* +;; (handle-exceptions +;; exn +;; (begin +;; (debug:print-info 0 *default-log-port* "common-telemetry-log open udp port failure") +;; 'broken) +;; (if (and serverhost serverport user host) +;; (let* ((s (udp-open-socket))) +;; ;;(udp-bind! s #f 0) +;; (udp-connect! s serverhost serverport) +;; (set! *common:telemetry-log-socket* s) +;; 'open) +;; 'not-needed)))))) +;; +;; (define (common:telemetry-log event #!key (payload '())) +;; (if (eq? *common:telemetry-log-state* 'startup) +;; (common:telemetry-log-open)) +;; +;; (if (eq? 'open *common:telemetry-log-state*) +;; (handle-exceptions +;; exn +;; (begin +;; (debug:print-info 0 *default-log-port* "common-telemetry-log comms failure ; disabled (no server?)") +;; ;;(define *common:telemetry-log-state* 'broken-or-no-server-preclose) +;; ;;(common:telemetry-log-close) +;; (define *common:telemetry-log-state* 'broken-or-no-server) +;; (set! *common:telemetry-log-socket* #f) +;; ) +;; (if (and *common:telemetry-log-socket* event) ;; TODO - filter on event against telemetry.want-events +;; (let* ((user (or (get-environment-variable "USER") "unknown")) +;; (host (or (get-environment-variable "HOST") "unknown")) +;; (start (conc "[megatest "event"]")) +;; (toppath (or *toppath* "/dev/null")) +;; (payload-serialized +;; (base64:base64-encode +;; (z3:encode-buffer +;; (with-output-to-string (lambda () (pp payload)))))) +;; (msg (conc user":"host":"start":"(current-process-id)":"(car (argv))":" +;; toppath":"payload-serialized))) +;; (udp-send *common:telemetry-log-socket* msg)))))) +;; +;; (define (common:telemetry-log-close) +;; (when (or (member *common:telemetry-log-state* '(broken-or-no-server-preclose open)) *common:telemetry-log-socket*) +;; (handle-exceptions +;; exn +;; (begin +;; (define *common:telemetry-log-state* 'closed-fail) +;; (debug:print-info 0 *default-log-port* "common-telemetry-log closure failure") +;; ) +;; (begin +;; (define *common:telemetry-log-state* 'closed) +;; (udp-close-socket *common:telemetry-log-socket*) +;; (set! *common:telemetry-log-socket* #f))))) + ) Index: db.scm ================================================================== --- db.scm +++ db.scm @@ -15,4777 +15,10 @@ ;; ;; You should have received a copy of the GNU General Public License ;; along with Megatest. If not, see . ;; ;;====================================================================== - -;;====================================================================== -;; SQLITE3 HELPERS -;;====================================================================== - -(define (db:general-sqlite-error-dump exn stmt . params) - (let ((err-status ((condition-property-accessor 'sqlite3 'status #f) exn))) ;; RADT ... how does this work? - ;; check for (exn sqlite3) ((condition-property-accessor 'exn 'message) exn) - ;; (print "err-status: " err-status) - (debug:print-error 0 *default-log-port* " query " stmt " failed, params: " params ", error: " ((condition-property-accessor 'exn 'message) exn)) - (print-call-chain (current-error-port)))) - -;; convert to -inline -;; -(define (db:first-result-default db stmt default . params) - (handle-exceptions - exn - (let ((err-status ((condition-property-accessor 'sqlite3 'status #f) exn))) - ;; check for (exn sqlite3) ((condition-property-accessor 'exn 'message) exn) - (if (eq? err-status 'done) - default - (begin - (debug:print-error 0 *default-log-port* " query " stmt " failed, params: " params ", error: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn) - (print-call-chain (current-error-port)) - default))) - (apply sqlite3:first-result db stmt params))) - -;; Get/open a database -;; if run-id => get run specific db -;; if #f => get main db -;; if db already open - return inmem -;; if db not open, open inmem, rundb and sync then return inmem -;; inuse gets set automatically for rundb's -;; -(define (db:get-db dbstruct) ;; run-id) - (if (stack? (dbr:dbstruct-dbstack dbstruct)) - (if (stack-empty? (dbr:dbstruct-dbstack dbstruct)) - (let ((newdb (db:open-megatest-db path: (db:dbfile-path)))) - ;; (stack-push! (dbr:dbstruct-dbstack dbstruct) newdb) - newdb) - (stack-pop! (dbr:dbstruct-dbstack dbstruct))) - (db:open-db dbstruct))) - -;; ;; legacy handling of structure for managing db's. Refactor this into dbr:? -(define (db:dbdat-get-db dbdat) - (if (pair? dbdat) - (car dbdat) - dbdat)) - -(define (db:dbdat-get-path dbdat) - (if (pair? dbdat) - (cdr dbdat) - #f)) - -(define-inline (db:generic-error-printout exn . message) - (print-call-chain (current-error-port)) - (apply debug:print-error 0 *default-log-port* message) - (debug:print-error 0 *default-log-port* " params: " params - ", error: " ((condition-property-accessor 'exn 'message) exn) - ", arguments: " ((condition-property-accessor 'exn 'arguments) exn) - ", location: " ((condition-property-accessor 'exn 'location) exn) - )) - -;; (db:with-db dbstruct run-id sqlite3:exec "select blah fgrom blaz;") -;; r/w is a flag to indicate if the db is modified by this query #t = yes, #f = no -;; -(define (db:with-db dbstruct run-id r/w proc . params) - (let* ((have-struct (dbr:dbstruct? dbstruct)) - (dbdat (if have-struct - (db:get-db dbstruct) - #f)) - (db (if have-struct - (db:dbdat-get-db dbdat) - dbstruct)) - (fname (db:dbdat-get-path dbdat)) - (use-mutex (> *api-process-request-count* 25))) ;; was 25 - (if (and use-mutex - (common:low-noise-print 120 "over-50-parallel-api-requests")) - (debug:print-info 0 *default-log-port* *api-process-request-count* " parallel api requests being processed in process " (current-process-id) ", throttling access")) - (if (common:low-noise-print 600 (conc "parallel-api-requests" *max-api-process-requests*)) - (debug:print-info 2 *default-log-port* "Parallel api request count: " *api-process-request-count* " max parallel requests: " *max-api-process-requests*)) - (condition-case - (begin - (if use-mutex (mutex-lock! *db-with-db-mutex*)) - (let ((res (apply proc db params))) - (if use-mutex (mutex-unlock! *db-with-db-mutex*)) - ;; (if (vector? dbstruct)(db:done-with dbstruct run-id r/w)) - (if dbdat (stack-push! (dbr:dbstruct-dbstack dbstruct) dbdat)) - res)) - (exn (io-error) - (db:generic-error-printout exn "ERROR: i/o error with " fname ". Check permissions, disk space etc. and try again.")) - (exn (corrupt) - (db:generic-error-printout exn "ERROR: database " fname " is corrupt. Repair it to proceed.")) - (exn (busy) - (db:generic-error-printout exn "ERROR: database " fname - " is locked. Try copying to another location, remove original and copy back.")) - (exn (permission)(db:generic-error-printout exn "ERROR: database " fname " has some permissions problem.")) - (exn () - (db:generic-error-printout exn "ERROR: Unknown error with database " fname " message: " - ((condition-property-accessor 'exn 'message) exn)))))) - -;;====================================================================== -;; K E E P F I L E D B I N dbstruct -;;====================================================================== - -;; (define (db:get-filedb dbstruct run-id) -;; (let ((db (vector-ref dbstruct 2))) -;; (if db -;; db -;; (let ((fdb (filedb:open-db (conc *toplevel* "/db/files.db")))) -;; (vector-set! dbstruct 2 fdb) -;; fdb)))) -;; -;; ;; Can also be used to save arbitrary strings -;; ;; -;; (define (db:save-path dbstruct path) -;; (let ((fdb (db:get-filedb dbstruct)))b -;; (filedb:register-path fdb path))) -;; -;; ;; Use to get a path. To get an arbitrary string see next define -;; ;; -;; (define (db:get-path dbstruct id) -;; (let ((fdb (db:get-filedb dbstruct))) -;; (filedb:get-path db id))) - -;; NB// #f => return dbdir only -;; (was planned to be; zeroth db with name=main.db) -;; -;; If run-id is #f return to create and retrieve the path where the db will live. -;; -(define db:dbfile-path common:get-db-tmp-area) - -(define (db:set-sync db) - (let ((syncprag (configf:lookup *configdat* "setup" "sychronous"))) - (sqlite3:execute db (conc "PRAGMA synchronous = " (or syncprag 0) ";")))) - -;; open an sql database inside a file lock -;; returns: db existed-prior-to-opening -;; RA => Returns a db handler; sets the lock if opened in writable mode -;; -;; (define *db-open-mutex* (make-mutex)) - -(define (db:lock-create-open fname initproc) - (let* ((parent-dir (or (pathname-directory fname)(current-directory))) ;; no parent? go local - (raw-fname (pathname-file fname)) - (dir-writable (file-writable? parent-dir)) - (file-exists (common:file-exists? fname)) - (file-write (if file-exists - (file-writable? fname) - dir-writable ))) - ;; (mutex-lock! *db-open-mutex*) ;; tried this mutex, not clear it helped. - (if file-write ;; dir-writable - (condition-case - (let* ((lockfname (conc fname ".lock")) - (readyfname (conc parent-dir "/.ready-" raw-fname)) - (readyexists (common:file-exists? readyfname))) - (if (not readyexists) - (common:simple-file-lock-and-wait lockfname)) - (let ((db (sqlite3:open-database fname))) - (sqlite3:set-busy-handler! db (sqlite3:make-busy-timeout 136000)) - (sqlite3:execute db "PRAGMA synchronous = 0;") - (if (and (configf:lookup *configdat* "setup" "tmp_mode") (string-match "^/tmp/.*" fname)) - (begin - ;;(print "DEBUG: Setting tmp_mode for " fname) - (sqlite3:execute db (configf:lookup *configdat* "setup" "tmp_mode")) - ) - ) - (if (and (configf:lookup *configdat* "setup" "nfs_mode") (not (string-match "^/tmp/.*" fname))) - (begin - ;;(print "DEBUG: Setting nfs_mode for " fname) - (sqlite3:execute db (configf:lookup *configdat* "setup" "nfs_mode")) - ) - ) - (if (and (not (or (configf:lookup *configdat* "setup" "tmp_mode") (configf:lookup *configdat* "setup" "nfs_mode"))) - (configf:lookup *configdat* "setup" "use-wal") - (string-match "^/tmp/.*" fname)) ;; this is a file in /tmp - (sqlite3:execute db "PRAGMA journal_mode=WAL;") - (debug:print 2 *default-log-port* "Creating " fname " in NON-WAL mode.")) - (if (not file-exists) - (initproc db)) - (if (not readyexists) - (begin - (common:simple-file-release-lock lockfname) - (with-output-to-file - readyfname - (lambda () - (print "Ready at " - (seconds->year-work-week/day-time - (current-seconds))))))) - db)) - (exn (io-error) (debug:print 0 *default-log-port* "ERROR: i/o error with " fname ". Check permissions, disk space etc. and try again.")) - (exn (corrupt) (debug:print 0 *default-log-port* "ERROR: database " fname " is corrupt. Repair it to proceed.")) - (exn (busy) (debug:print 0 *default-log-port* "ERROR: database " fname " is locked. Try copying to another location, remove original and copy back.")) - (exn (permission)(debug:print 0 *default-log-port* "ERROR: database " fname " has some permissions problem.")) - (exn () (debug:print 0 *default-log-port* "ERROR: Unknown error with database " fname " message: " ((condition-property-accessor 'exn 'message) exn)))) - - (condition-case - (begin - (debug:print 2 *default-log-port* "WARNING: opening db in non-writable dir " fname) - (let ((db (sqlite3:open-database fname))) - (sqlite3:set-busy-handler! db (sqlite3:make-busy-timeout 136000)) - (sqlite3:execute db "PRAGMA synchronous = 0;") - ;; (mutex-unlock! *db-open-mutex*) - db)) - (exn (io-error) (debug:print 0 *default-log-port* "ERROR: i/o error with " fname ". Check permissions, disk space etc. and try again.")) - (exn (corrupt) (debug:print 0 *default-log-port* "ERROR: database " fname " is corrupt. Repair it to proceed.")) - (exn (busy) (debug:print 0 *default-log-port* "ERROR: database " fname " is locked. Try copying to another location, remove original and copy back.")) - (exn (permission)(debug:print 0 *default-log-port* "ERROR: database " fname " has some permissions problem.")) - (exn () (debug:print 0 *default-log-port* "ERROR: Unknown error with database " fname " message: " ((condition-property-accessor 'exn 'message) exn)))) - ))) - - -;; This routine creates the db if not already present. It is only called if the db is not already opened -;; -(define (db:open-db dbstruct #!key (areapath #f)(do-sync #t)) ;; TODO: actually use areapath - (let ((tmpdb-stack (dbr:dbstruct-dbstack dbstruct))) ;; RA => Returns the first reference in dbstruct - (if (stack? tmpdb-stack) - (db:get-db tmpdb-stack) ;; get previously opened db (will create new db handle if all in the stack are already used - (let* ((max-stale-tmp (configf:lookup-number *configdat* "server" "filling-db-max-stale-seconds" default: 10)) - (dbpath (db:dbfile-path )) ;; path to tmp db area - (dbexists (common:file-exists? dbpath)) - (tmpdbfname (conc dbpath "/megatest.db")) - (dbfexists (common:file-exists? tmpdbfname)) ;; (conc dbpath "/megatest.db"))) - (mtdbexists (common:file-exists? (conc *toppath* "/megatest.db"))) - - (mtdbmodtime (if mtdbexists (common:lazy-sqlite-db-modification-time (conc *toppath* "/megatest.db")) #f)) - (tmpdbmodtime (if dbfexists (common:lazy-sqlite-db-modification-time tmpdbfname) #f)) - (mtdb (db:open-megatest-db)) - (mtdbpath (db:dbdat-get-path mtdb)) - (tmpdb (db:open-megatest-db path: dbpath)) ;; lock-create-open dbpath db:initialize-main-db)) - (refndb (db:open-megatest-db path: dbpath name: "megatest_ref.db")) - (write-access (file-writable? mtdbpath)) - ;(mtdbmodtime (if mtdbexists (common:lazy-sqlite-db-modification-time mtdbpath) #f)) ; moving this before db:open-megatest-db is called. if wal mode is on -WAL and -shm file get created with causing the tmpdbmodtime timestamp always greater than mtdbmodtime - ;(tmpdbmodtime (if dbfexists (common:lazy-sqlite-db-modification-time tmpdbfname) #f)) - ;if wal mode is on -WAL and -shm file get created when db:open-megatest-db is called. modtimedelta will always be < 10 so db in tmp not get synced - ;(tmpdbmodtime (if dbfexists (db:get-last-update-time (car tmpdb)) #f)) - ;(fmt (file-modification-time tmpdbfname)) - (modtimedelta (and mtdbmodtime tmpdbmodtime (- mtdbmodtime tmpdbmodtime)))) - - (when write-access - (sqlite3:execute (car mtdb) "drop trigger if exists update_tests_trigger") - (sqlite3:execute (car mtdb) "drop trigger if exists update_runs_trigger")) - - ;(print "mtdbmodtime " mtdbmodtime " tmpdbmodtime " tmpdbmodtime " mtdbpath " mtdbpath " " (conc *toppath* "/megatest.db")) - ;;(debug:print-info 13 *default-log-port* "db:open-db>> mtdbpath="mtdbpath" mtdbexists="mtdbexists" and write-access="write-access) - (if (and dbexists (not write-access)) - (begin - (set! *db-write-access* #f) - (dbr:dbstruct-read-only-set! dbstruct #t))) - (dbr:dbstruct-mtdb-set! dbstruct mtdb) - (dbr:dbstruct-tmpdb-set! dbstruct tmpdb) - (dbr:dbstruct-dbstack-set! dbstruct (make-stack)) ;; BB: why a stack? Why would the number of db's be indeterminate? Is this a legacy of 1.db 2.db .. ? - (stack-push! (dbr:dbstruct-dbstack dbstruct) tmpdb) ;; olddb is already a (cons db path) - (dbr:dbstruct-refndb-set! dbstruct refndb) - (if (and (or (not dbfexists) - (and modtimedelta - (> modtimedelta max-stale-tmp))) ;; if db in tmp is over ten seconds older than the file in MTRA then do a sync back - do-sync) - (begin - (debug:print 1 *default-log-port* "filling db " (db:dbdat-get-path tmpdb) " with data \n from " (db:dbdat-get-path mtdb) " mod time delta: " modtimedelta) - (db:sync-tables (db:sync-all-tables-list dbstruct) #f mtdb refndb tmpdb) - ;touch tmp db to avoid wal mode wierdness - (set! (file-modification-time tmpdbfname) (current-seconds)) - (debug:print-info 13 *default-log-port* "db:sync-all-tables-list done.") - ) - (debug:print 4 *default-log-port* " db, " (db:dbdat-get-path tmpdb) " already exists or fresh enough, not propogating data from\n " (db:dbdat-get-path mtdb) " mod time delta: " modtimedelta) ) - ;; (db:multi-db-sync dbstruct 'old2new)) ;; migrate data from megatest.db automatically - tmpdb)))) - - -(define (db:get-last-update-time db) -; (db:with-db -; dbstruct #f #f -; (lambda (db) - (let ((last-update-time #f)) - (sqlite3:for-each-row - (lambda (lup) - (set! last-update-time lup)) - db - "select max(lup) from ( select max(last_update) as lup from tests union select max(last_update) as lup from runs);") - last-update-time)) -;)) - -;; Make the dbstruct, setup up auxillary db's and call for main db at least once -;; -;; called in http-transport and replicated in rmt.scm for *local* access. -;; -(define (db:setup do-sync #!key (areapath #f)) - ;; - (cond - (*dbstruct-db* *dbstruct-db*);; TODO: when multiple areas are supported, this optimization will be a hazard - (else ;;(common:on-homehost?) - (debug:print-info 13 *default-log-port* "db:setup entered (first time, not cached.)") - (let* ((dbstruct (make-dbr:dbstruct))) - (when (not *toppath*) - (debug:print-info 13 *default-log-port* "in db:setup, *toppath* not set; calling launch:setup") - (launch:setup areapath: areapath)) - (debug:print-info 13 *default-log-port* "Begin db:open-db") - (db:open-db dbstruct areapath: areapath do-sync: do-sync) - (debug:print-info 13 *default-log-port* "Done db:open-db") - (set! *dbstruct-db* dbstruct) - ;;(debug:print-info 13 *default-log-port* "new dbstruct = "(dbr:dbstruct->alist dbstruct)) - dbstruct)))) - ;; (else - ;; (debug:print 0 *default-log-port* "ERROR: attempt to open database when not on homehost. Exiting. Homehost: " (common:get-homehost)) - ;; (exit 1)))) - -;; Open the classic megatest.db file (defaults to open in toppath) -;; -;; NOTE: returns a dbdat not a dbstruct! -;; - -;;(define (db:reopen-megatest-db - -(define (db:open-megatest-db #!key (path #f)(name #f)) - (let* ((dbdir (or path *toppath*)) - (dbpath (conc dbdir "/" (or name "megatest.db"))) - (dbexists (common:file-exists? dbpath)) - (db (db:lock-create-open dbpath - (lambda (db) - (db:initialize-main-db db) - ;;(db:initialize-run-id-db db) - ))) - (write-access (file-writable? dbpath))) - (debug:print-info 13 *default-log-port* "db:open-megatest-db "dbpath) - (if (and dbexists (not write-access)) - (set! *db-write-access* #f)) - (cons db dbpath))) - -;; sync run to disk if touched -;; -(define (db:sync-touched dbstruct run-id #!key (force-sync #f)) - (let ((tmpdb (db:get-db dbstruct)) - (mtdb (dbr:dbstruct-mtdb dbstruct)) - (refndb (dbr:dbstruct-refndb dbstruct)) - (start-t (current-seconds))) - (debug:print-info 4 *default-log-port* "Syncing for run-id: " run-id) - (mutex-lock! *db-multi-sync-mutex*) - (let ((update_info (cons (if force-sync 0 *db-last-sync*) "last_update"))) - (mutex-unlock! *db-multi-sync-mutex*) - (db:sync-tables (db:sync-all-tables-list dbstruct) update_info tmpdb refndb mtdb)) - (mutex-lock! *db-multi-sync-mutex*) - (set! *db-last-sync* start-t) - (set! *db-last-access* start-t) - (mutex-unlock! *db-multi-sync-mutex*) - (stack-push! (dbr:dbstruct-dbstack dbstruct) tmpdb))) - -(define (db:safely-close-sqlite3-db db stmt-cache #!key (try-num 3)) - (if (<= try-num 0) - #f - (handle-exceptions - exn - (begin - (print "Attempt to safely close sqlite3 db failed. Trying again. exn=" exn) - (thread-sleep! 3) - (sqlite3:interrupt! db) - (db:safely-close-sqlite3-db db stmt-cache try-num: (- try-num 1))) - (if (sqlite3:database? db) - (let* ((stmts (and stmt-cache (hash-table-ref/default stmt-cache db #f)))) - (if stmts (map sqlite3:finalize! (hash-table-values stmts))) - (sqlite3:finalize! db) - #t) - #f)))) - -;; close all opened run-id dbs -(define (db:close-all dbstruct) - (if (dbr:dbstruct? dbstruct) - (handle-exceptions - exn - (begin - (debug:print 0 *default-log-port* "WARNING: Finalizing failed, " ((condition-property-accessor 'exn 'message) exn) ", note - exn=" exn) - (print-call-chain *default-log-port*)) - ;; (db:sync-touched dbstruct 0 force-sync: #t) ;; NO. Do not do this here. Instead we rely on a server to be started when there are writes, even if the server itself is not going to be used as a server. - (let ((tdbs (map db:dbdat-get-db - (stack->list (dbr:dbstruct-dbstack dbstruct)))) - (mdb (db:dbdat-get-db (dbr:dbstruct-mtdb dbstruct))) - (rdb (db:dbdat-get-db (dbr:dbstruct-refndb dbstruct))) - (stmt-cache (dbr:dbstruct-stmt-cache dbstruct))) - (map (lambda (db) - (db:safely-close-sqlite3-db db stmt-cache)) - tdbs) - (db:safely-close-sqlite3-db mdb stmt-cache) ;; (if (sqlite3:database? mdb) (sqlite3:finalize! mdb)) - (db:safely-close-sqlite3-db rdb stmt-cache))))) ;; (if (sqlite3:database? rdb) (sqlite3:finalize! rdb)))))) - -;; (let ((locdbs (dbr:dbstruct-locdbs dbstruct))) -;; (if (hash-table? locdbs) -;; (for-each (lambda (run-id) -;; (db:close-run-db dbstruct run-id)) -;; (hash-table-keys locdbs))))) - -;; (define (db:open-inmem-db) -;; (let* ((db (sqlite3:open-database ":memory:")) -;; (handler (make-busy-timeout 3600))) -;; (sqlite3:set-busy-handler! db handler) -;; (db:initialize-run-id-db db) -;; (cons db #f))) - -;; just tests, test_steps and test_data tables -(define db:sync-tests-only - (list - ;; (list "strs" - ;; '("id" #f) - ;; '("str" #f)) - (list "tests" - '("id" #f) - '("run_id" #f) - '("testname" #f) - '("host" #f) - '("cpuload" #f) - '("diskfree" #f) - '("uname" #f) - '("rundir" #f) - '("shortdir" #f) - '("item_path" #f) - '("state" #f) - '("status" #f) - '("attemptnum" #f) - '("final_logf" #f) - '("logdat" #f) - '("run_duration" #f) - '("comment" #f) - '("event_time" #f) - '("fail_count" #f) - '("pass_count" #f) - '("archived" #f) - '("last_update" #f)) - (list "test_steps" - '("id" #f) - '("test_id" #f) - '("stepname" #f) - '("state" #f) - '("status" #f) - '("event_time" #f) - '("comment" #f) - '("logfile" #f) - '("last_update" #f)) - (list "test_data" - '("id" #f) - '("test_id" #f) - '("category" #f) - '("variable" #f) - '("value" #f) - '("expected" #f) - '("tol" #f) - '("units" #f) - '("comment" #f) - '("status" #f) - '("type" #f) - '("last_update" #f)))) - -;; needs db to get keys, this is for syncing all tables -;; -(define (db:sync-main-list dbstruct) - (let ((keys (db:get-keys dbstruct))) - (list - (list "keys" - '("id" #f) - '("fieldname" #f) - '("fieldtype" #f)) - (list "metadat" '("var" #f) '("val" #f)) - (append (list "runs" - '("id" #f)) - (map (lambda (k)(list k #f)) - (append keys - (list "runname" "state" "status" "owner" "event_time" "comment" "fail_count" "pass_count" "contour" "last_update")))) - (list "archive_disks" - '("id" #f) - '("archive_area_name" #f) - '("disk_path" #f) - '("last_df" #f) - '("last_df_time" #f) - '("creation_time" #f)) - - (list "archive_blocks" - '("id" #f) - '("archive_disk_id" #f) - '("disk_path" #f) - '("last_du" #f) - '("last_du_time" #f) - '("creation_time" #f)) - - (list "test_meta" - '("id" #f) - '("testname" #f) - '("owner" #f) - '("description" #f) - '("reviewed" #f) - '("iterated" #f) - '("avg_runtime" #f) - '("avg_disk" #f) - '("tags" #f) - '("jobgroup" #f))))) - -(define (db:sync-all-tables-list dbstruct) - (append (db:sync-main-list dbstruct) - db:sync-tests-only)) - -;; use bunch of Unix commands to try to break the lock and recreate the db -;; -(define (db:move-and-recreate-db dbdat) - (let* ((dbpath (db:dbdat-get-path dbdat)) - (dbdir (pathname-directory dbpath)) - (fname (pathname-strip-directory dbpath)) - (fnamejnl (conc fname "-journal")) - (tmpname (conc fname "." (current-process-id))) - (tmpjnl (conc fnamejnl "." (current-process-id)))) - (debug:print-error 0 *default-log-port* "" fname " appears corrupted. Making backup \"old/" fname "\"") - (system (conc "cd " dbdir ";mkdir -p old;cat " fname " > old/" tmpname)) - (system (conc "rm -f " dbpath)) - (if (common:file-exists? fnamejnl) - (begin - (debug:print-error 0 *default-log-port* "" fnamejnl " found, moving it to old dir as " tmpjnl) - (system (conc "cd " dbdir ";mkdir -p old;cat " fnamejnl " > old/" tmpjnl)) - (system (conc "rm -f " dbdir "/" fnamejnl)))) - ;; attempt to recreate database - (system (conc "cd " dbdir ";sqlite3 old/" tmpname " .dump | sqlite3 " fname)))) - -;; return #f to indicate the dbdat should be closed/reopened -;; else return dbdat -;; -(define (db:repair-db dbdat #!key (numtries 1)) - (let* ((dbpath (db:dbdat-get-path dbdat)) - (dbdir (pathname-directory dbpath)) - (fname (pathname-strip-directory dbpath))) - (debug:print-info 0 *default-log-port* "Checking db " dbpath " for errors.") - (cond - ((not (file-writable? dbdir)) - (debug:print 0 *default-log-port* "WARNING: can't write to " dbdir ", can't fix " fname) - #f) - - ;; handle special cases, megatest.db and monitor.db - ;; - ;; NOPE: apply this same approach to all db files - ;; - (else ;; ((equal? fname "megatest.db") ;; this file can be regenerated if needed - (handle-exceptions - exn - (begin - (print "Problems trying to repair the db, exn=" exn) - ;; (db:move-and-recreate-db dbdat) - (if (> numtries 0) - (db:repair-db dbdat numtries: (- numtries 1)) - #f) - (debug:print 0 *default-log-port* "FATAL: file " dbpath " was found corrupted, an attempt to fix has been made but you must start over.") - (debug:print 0 *default-log-port* - " check the following:\n" - " 1. full directories, look in ~/ /tmp and " dbdir "\n" - " 2. write access to " dbdir "\n\n" - " if the automatic recovery failed you may be able to recover data by doing \"" - (if (member fname '("megatest.db" "monitor.db")) - "megatest -cleanup-db" - "megatest -import-megatest.db;megatest -cleanup-db") - "\"\n") - (exit) ;; we can not safely continue when a db was corrupted - even if fixed. - ) - ;; test read/write access to the database - (let ((db (sqlite3:open-database dbpath))) - (sqlite3:set-busy-handler! db (sqlite3:make-busy-timeout 136000)) - (sqlite3:execute db "PRAGMA synchronous = 0;") - (cond - ((equal? fname "megatest.db") - (sqlite3:execute db "DELETE FROM tests WHERE state='DELETED';")) - ((equal? fname "main.db") - (sqlite3:execute db "DELETE FROM runs WHERE state='deleted';")) - ((string-match "\\d.db" fname) - (sqlite3:execute db "UPDATE tests SET state='DELETED' WHERE state='DELETED';")) - ((equal? fname "monitor.db") - (sqlite3:execute "DELETE FROM servers WHERE state LIKE 'defunct%';")) - (else - (sqlite3:execute db "vacuum;"))) - - (sqlite3:finalize! db) - #t)))))) - -;; tbls is ( ("tablename" ( "field1" [#f|proc1] ) ( "field2" [#f|proc2] ) .... ) ) -;; db's are dbdat's -;; -;; if last-update specified ("field-name" . time-in-seconds) -;; then sync only records where field-name >= time-in-seconds -;; IFF field-name exists -;; -(define (db:sync-tables tbls last-update fromdb todb . slave-dbs) - (handle-exceptions - exn - (begin - (debug:print 0 *default-log-port* "EXCEPTION: database probably overloaded or unreadable in db:sync-tables.") - (print-call-chain (current-error-port)) - (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) - (debug:print 5 *default-log-port* "exn=" (condition->list exn)) - (debug:print 0 *default-log-port* " status: " ((condition-property-accessor 'sqlite3 'status) exn)) - (debug:print 0 *default-log-port* " src db: " (db:dbdat-get-path fromdb)) - (for-each (lambda (dbdat) - (let ((dbpath (db:dbdat-get-path dbdat))) - (debug:print 0 *default-log-port* " dbpath: " dbpath) - (if (not (db:repair-db dbdat)) - (begin - (debug:print-error 0 *default-log-port* "Failed to rebuild " dbpath ", exiting now.") - (exit))))) - (cons todb slave-dbs)) - - 0) - ;; this is the work to be done - (cond - ((not fromdb) (debug:print 3 *default-log-port* "WARNING: db:sync-tables called with fromdb missing") - -1) - ((not todb) (debug:print 3 *default-log-port* "WARNING: db:sync-tables called with todb missing") - -2) - ((not (sqlite3:database? (db:dbdat-get-db fromdb))) - (debug:print-error 0 *default-log-port* "db:sync-tables called with fromdb not a database " fromdb) - -3) - ((not (sqlite3:database? (db:dbdat-get-db todb))) - (debug:print-error 0 *default-log-port* "db:sync-tables called with todb not a database " todb) - -4) - - ((not (file-writable? (db:dbdat-get-path todb))) - (debug:print-error 0 *default-log-port* "db:sync-tables called with todb not a read-only database " todb) - -5) - ((not (null? (let ((readonly-slave-dbs - (filter - (lambda (dbdat) - (not (file-writable? (db:dbdat-get-path todb)))) - slave-dbs))) - (for-each - (lambda (bad-dbdat) - (debug:print-error - 0 *default-log-port* "db:sync-tables called with todb not a read-only database " bad-dbdat)) - readonly-slave-dbs) - readonly-slave-dbs))) -6) - (else - (let ((stmts (make-hash-table)) ;; table-field => stmt - (all-stmts '()) ;; ( ( stmt1 value1 ) ( stml2 value2 )) - (numrecs (make-hash-table)) - (start-time (current-milliseconds)) - (tot-count 0)) - (for-each ;; table - (lambda (tabledat) - (let* ((tablename (car tabledat)) - (fields (cdr tabledat)) - (has-last-update (member "last_update" fields)) - (use-last-update (cond - ((and has-last-update - (member "last_update" fields)) - #t) ;; if given a number, just use it for all fields - ((number? last-update) #f) ;; if not matched first entry then ignore last-update for this table - ((and (pair? last-update) - (member (car last-update) ;; last-update field name - (map car fields))) - #t) - (last-update - (debug:print 0 *default-log-port* "ERROR: parameter last-update for db:sync-tables must be a pair or a number, received: " last-update);; found in fields - #f) - (else - #f))) - (last-update-value (if use-last-update ;; no need to check for has-last-update - it is already accounted for - (if (number? last-update) - last-update - (cdr last-update)) - #f)) - (last-update-field (if use-last-update - (if (number? last-update) - "last_update" - (car last-update)) - #f)) - (num-fields (length fields)) - (field->num (make-hash-table)) - (num->field (apply vector (map car fields))) ;; BBHERE - (full-sel (conc "SELECT " (string-intersperse (map car fields) ",") - " FROM " tablename (if use-last-update ;; apply last-update criteria - (conc " WHERE " last-update-field " >= " last-update-value) - "") - ";")) - (full-ins (conc "INSERT OR REPLACE INTO " tablename " ( " (string-intersperse (map car fields) ",") " ) " - " VALUES ( " (string-intersperse (make-list num-fields "?") ",") " );")) - (fromdat '()) - (fromdats '()) - (totrecords 0) - (batch-len (string->number (or (configf:lookup *configdat* "sync" "batchsize") "100"))) - (todat (make-hash-table)) - (count 0) - (field-names (map car fields)) - (delay-handicap (string->number (or (configf:lookup *configdat* "sync" "delay-handicap") "0"))) - ) - - ;; set up the field->num table - (for-each - (lambda (field) - (hash-table-set! field->num field count) - (set! count (+ count 1))) - fields) - - ;; read the source table - (sqlite3:for-each-row - (lambda (a . b) - (set! fromdat (cons (apply vector a b) fromdat)) - (if (> (length fromdat) batch-len) - (begin - (set! fromdats (cons fromdat fromdats)) - (set! fromdat '()) - (set! totrecords (+ totrecords 1))))) - (db:dbdat-get-db fromdb) - full-sel) - - ;; tack on remaining records in fromdat - (if (not (null? fromdat)) - (set! fromdats (cons fromdat fromdats))) - - (if (common:low-noise-print 120 "sync-records") - (debug:print-info 4 *default-log-port* "found " totrecords " records to sync")) - - ;; read the target table; BBHERE - (sqlite3:for-each-row - (lambda (a . b) - (hash-table-set! todat a (apply vector a b))) - (db:dbdat-get-db todb) - full-sel) - - (when (and delay-handicap (> delay-handicap 0)) - (debug:print-info 0 *default-log-port* "imposing synthetic sync delay of "delay-handicap" seconds since sync/delay-handicap is configured") - (thread-sleep! delay-handicap) - (debug:print-info 0 *default-log-port* "synthetic sync delay of "delay-handicap" seconds completed") - ) - - ;; first pass implementation, just insert all changed rows - (for-each - (lambda (targdb) - (let* ((db (db:dbdat-get-db targdb)) - (drp-trigger (if (member "last_update" field-names) - (db:drop-trigger db tablename) - #f)) - (is-trigger-dropped (if (member "last_update" field-names) - (db:is-trigger-dropped db tablename) - #f)) - (stmth (sqlite3:prepare db full-ins))) - ;; (db:delay-if-busy targdb) ;; NO WAITING - (if (member "last_update" field-names) - (debug:print-info 3 *default-log-port* "is-trigger-dropped: " is-trigger-dropped)) - (for-each - (lambda (fromdat-lst) - (sqlite3:with-transaction - db - (lambda () - (for-each ;; - (lambda (fromrow) - (let* ((a (vector-ref fromrow 0)) - (curr (hash-table-ref/default todat a #f)) - (same #t)) - (let loop ((i 0)) - (if (or (not curr) - (not (equal? (vector-ref fromrow i)(vector-ref curr i)))) - (set! same #f)) - (if (and same - (< i (- num-fields 1))) - (loop (+ i 1)))) - (if (not same) - (begin - (apply sqlite3:execute stmth (vector->list fromrow)) - (hash-table-set! numrecs tablename (+ 1 (hash-table-ref/default numrecs tablename 0))))))) - fromdat-lst)))) - fromdats) - (sqlite3:finalize! stmth) - (if (member "last_update" field-names) - (db:create-trigger db tablename)))) - (append (list todb) slave-dbs)))) - tbls) - (let* ((runtime (- (current-milliseconds) start-time)) - (should-print (or (debug:debug-mode 12) - (common:low-noise-print 120 "db sync" (> runtime 500))))) ;; low and high sync times treated as separate. - (if should-print (debug:print 3 *default-log-port* "INFO: db sync, total run time " runtime " ms")) - (for-each - (lambda (dat) - (let ((tblname (car dat)) - (count (cdr dat))) - (set! tot-count (+ tot-count count)) - (if (> count 0) - (if should-print (debug:print 0 *default-log-port* (format #f " ~10a ~5a" tblname count)))))) - (sort (hash-table->alist numrecs)(lambda (a b)(> (cdr a)(cdr b)))))) - tot-count))))) - -(define (db:patch-schema-rundb frundb) - ;; - ;; remove this some time after September 2016 (added in version v1.6031 - ;; - (for-each - (lambda (table-name) - (handle-exceptions - exn - (if (string-match ".*duplicate.*" ((condition-property-accessor 'exn 'message) exn)) - (debug:print 0 *default-log-port* "Column last_update already added to " table-name " table") - (db:general-sqlite-error-dump exn "alter table " table-name " ..." #f "none")) - (sqlite3:execute - frundb - (conc "ALTER TABLE " table-name " ADD COLUMN last_update INTEGER DEFAULT 0"))) - (sqlite3:execute - frundb - (conc "DROP TRIGGER IF EXISTS update_" table-name "_trigger;")) - (sqlite3:execute - frundb - (conc "CREATE TRIGGER IF NOT EXISTS update_" table-name "_trigger AFTER UPDATE ON " table-name " - FOR EACH ROW - BEGIN - UPDATE " table-name " SET last_update=(strftime('%s','now')) - WHERE id=old.id; - END;")) - ) - '("tests" "test_steps" "test_data"))) - -(define (db:patch-schema-maindb maindb) - ;; - ;; remove all these some time after september 2016 (added in v1.6031 - ;; - (for-each - (lambda (column type default) - (handle-exceptions - exn - (if (string-match ".*duplicate.*" ((condition-property-accessor 'exn 'message) exn)) - (debug:print 0 *default-log-port* "Column " column " already added to runs table") - (db:general-sqlite-error-dump exn "alter table runs ..." #f "none")) - (sqlite3:execute - maindb - (conc "ALTER TABLE runs ADD COLUMN " column " " type " DEFAULT " default)))) - (list "last_update" "contour") - (list "INTEGER" "TEXT" ) - (list "0" "''" )) - ;; these schema changes don't need exception handling - (sqlite3:execute - maindb - "CREATE TRIGGER IF NOT EXISTS update_runs_trigger AFTER UPDATE ON runs - FOR EACH ROW - BEGIN - UPDATE runs SET last_update=(strftime('%s','now')) - WHERE id=old.id; - END;") - (sqlite3:execute maindb "CREATE TABLE IF NOT EXISTS run_stats ( - id INTEGER PRIMARY KEY, - run_id INTEGER, - state TEXT, - status TEXT, - count INTEGER, - last_update INTEGER DEFAULT (strftime('%s','now')))") - (sqlite3:execute maindb "CREATE TRIGGER IF NOT EXISTS update_run_stats_trigger AFTER UPDATE ON run_stats - FOR EACH ROW - BEGIN - UPDATE run_stats SET last_update=(strftime('%s','now')) - WHERE id=old.id; - END;") - (sqlite3:execute maindb "CREATE TABLE IF NOT EXISTS test_rundat ( - id INTEGER PRIMARY KEY, - test_id INTEGER, - update_time TIMESTAMP, - cpuload INTEGER DEFAULT -1, - diskfree INTEGER DEFAULT -1, - diskusage INTGER DEFAULT -1, - run_duration INTEGER DEFAULT 0);")) - -(define (db:adj-target db) - (let ((fields (configf:get-section *configdat* "fields")) - (field-num 0)) - ;; because we will be refreshing the keys table it is best to clear it here - (sqlite3:execute db "DELETE FROM keys;") - (for-each - (lambda (field) - (let ((column (car field)) - (spec (cadr field))) - (handle-exceptions - exn - (if (string-match ".*duplicate.*" ((condition-property-accessor 'exn 'message) exn)) - (debug:print 0 *default-log-port* "Target field " column " already exists in the runs table") - (db:general-sqlite-error-dump exn "alter table runs ..." #f "none")) - ;; Add the column if needed - (sqlite3:execute - db - (conc "ALTER TABLE runs ADD COLUMN " column " " spec))) - ;; correct the entry in the keys column - (sqlite3:execute - db - "INSERT INTO keys (id,fieldname,fieldtype) VALUES (?,?,?);" - field-num column spec) - ;; fill in blanks (not allowed as it would be part of the path - (sqlite3:execute - db - (conc "UPDATE runs SET " column "='x' WHERE " column "='';")) - (set! field-num (+ field-num 1)))) - fields))) - -(define *global-db-store* (make-hash-table)) - -(define (db:get-access-mode) - (if (args:get-arg "-use-db-cache") 'cached 'rmt)) - -;; Add db direct -;; -(define (db:dispatch-query access-mode rmt-cmd db-cmd . params) - (if (eq? access-mode 'cached) - (debug:print 2 *default-log-port* "not doing cached calls right now")) -;; (apply db:call-with-cached-db db-cmd params) - (apply rmt-cmd params)) -;;) - -;; return the target db handle so it can be used -;; -(define (db:cache-for-read-only source target #!key (use-last-update #f)) - (if (and (hash-table-ref/default *global-db-store* target #f) - (>= (file-modification-time target)(file-modification-time source))) - (hash-table-ref *global-db-store* target) - (let* ((toppath (launch:setup)) - (targ-db-last-mod (if (common:file-exists? target) - (file-modification-time target) - 0)) - (cache-db (or (hash-table-ref/default *global-db-store* target #f) - (db:open-megatest-db path: target))) - (source-db (db:open-megatest-db path: source)) - (curr-time (current-seconds)) - (res '()) - (last-update (if use-last-update (cons "last_update" targ-db-last-mod) #f))) - (db:sync-tables (db:sync-main-list source-db) last-update source-db cache-db) - (db:sync-tables db:sync-tests-only last-update source-db cache-db) - (hash-table-set! *global-db-store* target cache-db) - cache-db))) - -;; ;; call a proc with a cached db -;; ;; -;; (define (db:call-with-cached-db proc . params) -;; ;; first cache the db in /tmp -;; (let* ((cname-part (conc "megatest_cache/" (common:get-testsuite-name))) -;; (fname (conc (common:get-area-path-signature) ".db")) -;; (cache-dir (common:get-create-writeable-dir -;; (list (conc "/tmp/" (current-user-name) "/" cname-part) -;; (conc "/tmp/" (current-user-name) "-" cname-part) -;; (conc "/tmp/" (current-user-name) "_" cname-part)))) -;; (megatest-db (conc *toppath* "/megatest.db"))) -;; ;; (debug:print-info 0 *default-log-port* "Using cache dir " cache-dir) -;; (if (not cache-dir) -;; (begin -;; (debug:print 0 *default-log-port* "ERROR: Failed to find an area to write the cache db") -;; (exit 1)) -;; (let* ((th1 (make-thread -;; (lambda () -;; (if (and (common:file-exists? megatest-db) -;; (file-writable? megatest-db)) -;; (begin -;; (db:sync-to-megatest.db dbstruct 'timestamps) ;; internally mutexes on *db-local-sync* -;; (debug:print-info 2 *default-log-port* "Done syncing to megatest.db")))) -;; "call-with-cached-db sync-to-megatest.db")) -;; (cache-db (db:cache-for-read-only -;; megatest-db -;; (conc cache-dir "/" fname) -;; use-last-update: #t))) -;; (thread-start! th1) -;; (apply proc cache-db params) -;; )))) - -;; options: -;; -;; 'killservers - kills all servers -;; 'dejunk - removes junk records -;; 'adj-testids - move test-ids into correct ranges -;; 'old2new - sync megatest.db to /tmp/.../megatest.db and /tmp/.../megatest_ref.db -;; 'new2old - sync /tmp/.../megatest.db to megatest.db and /tmp/.../megatest_ref.db (and update data_synced) -;; 'closeall - close all opened dbs -;; 'schema - attempt to apply schema changes -;; run-ids: '(1 2 3 ...) or #f (for all) -;; -(define (db:multi-db-sync dbstruct . options) - ;; (if (not (launch:setup)) - ;; (debug:print 0 *default-log-port* "ERROR: not able to setup up for megatest.") - (let* ((mtdb (dbr:dbstruct-mtdb dbstruct)) - (tmpdb (db:get-db dbstruct)) - (refndb (dbr:dbstruct-refndb dbstruct)) - (allow-cleanup #t) ;; (if run-ids #f #t)) - (servers (server:get-list *toppath*)) ;; (tasks:get-all-servers (db:delay-if-busy tdbdat))) - (data-synced 0)) ;; count of changed records (I hope) - - (for-each - (lambda (option) - - (case option - ;; kill servers - ((killservers) - (for-each - (lambda (server) - (handle-exceptions - exn - (begin - (debug:print-info 0 *default-log-port* "Unable to get host and/or port from " server ", exn=" exn) - #f) - (match-let (((mod-time host port start-time server-id pid) server)) - (if (and host pid) - (tasks:kill-server host pid))))) - servers) - - ;; /tmp/bjbarcla/megatest_localdb/fullrun/.nfs.pdx.disks.icf_env_disk001.bjbarcla.gwa.issues.mtdev.matt-bisect.megatest.ext-tests.runs.all.v1.65.1.6524.dev.bb-24-justrollup-f8.rollup.fullrun/megatest.db.lock - (delete-file* (common:get-sync-lock-filepath)) - ) - - ;; clear out junk records - ;; - ((dejunk) - ;; (db:delay-if-busy mtdb) ;; ok to delay on mtdb - (when (file-writable? (db:dbdat-get-path mtdb)) (db:clean-up mtdb)) - (db:clean-up tmpdb) - (db:clean-up refndb)) - - ;; sync runs, test_meta etc. - ;; - ((old2new) - (set! data-synced - (+ (db:sync-tables (db:sync-all-tables-list dbstruct) #f mtdb tmpdb refndb) - data-synced))) - - ;; now ensure all newdb data are synced to megatest.db - ;; do not use the run-ids list passed in to the function - ;; - ((new2old) - (set! data-synced - (+ (db:sync-tables (db:sync-all-tables-list dbstruct) #f tmpdb refndb mtdb) - data-synced))) - - ((adj-target) - (db:adj-target (db:dbdat-get-db mtdb)) - (db:adj-target (db:dbdat-get-db tmpdb)) - (db:adj-target (db:dbdat-get-db refndb))) - - ((schema) - (db:patch-schema-maindb (db:dbdat-get-db mtdb)) - (db:patch-schema-maindb (db:dbdat-get-db tmpdb)) - (db:patch-schema-maindb (db:dbdat-get-db refndb)) - (db:patch-schema-rundb (db:dbdat-get-db mtdb)) - (db:patch-schema-rundb (db:dbdat-get-db tmpdb)) - (db:patch-schema-rundb (db:dbdat-get-db refndb)))) - - (stack-push! (dbr:dbstruct-dbstack dbstruct) tmpdb)) - options) - data-synced)) - -(define (db:tmp->megatest.db-sync dbstruct last-update) - (let* ((mtdb (dbr:dbstruct-mtdb dbstruct)) - (tmpdb (db:get-db dbstruct)) - (refndb (dbr:dbstruct-refndb dbstruct)) - (res (db:sync-tables (db:sync-all-tables-list dbstruct) last-update tmpdb refndb mtdb))) - (stack-push! (dbr:dbstruct-dbstack dbstruct) tmpdb) - res)) - -;;;; run-ids -;; if #f use *db-local-sync* : or 'local-sync-flags -;; if #t use timestamps : or 'timestamps -;; -;; NB// no-sync-db is the db handle, not a flag! -;; -(define (db:sync-to-megatest.db dbstruct #!key (no-sync-db #f)) - (let* ((start-time (current-seconds)) - (last-full-update (if no-sync-db - (db:no-sync-get/default no-sync-db "LAST_FULL_UPDATE" 0) - 0)) - (full-sync-needed (> (- start-time last-full-update) 3600)) ;; every hour do a full sync - (last-update (if full-sync-needed - 0 - (if no-sync-db - (db:no-sync-get/default no-sync-db "LAST_UPDATE" 0) - 0))) ;; (or (db:get-var dbstruct "LAST_UPDATE") 0)) - (sync-needed (> (- start-time last-update) 6)) - (res (if (or sync-needed ;; don't sync if a sync already occurred in the past 6 seconds - full-sync-needed) - (begin - (if no-sync-db - (begin - (if full-sync-needed (db:no-sync-set no-sync-db "LAST_FULL_UPDATE" start-time)) - (db:no-sync-set no-sync-db "LAST_UPDATE" start-time))) - (db:tmp->megatest.db-sync dbstruct last-update)) - 0)) - (sync-time (- (current-seconds) start-time))) - (debug:print-info 3 *default-log-port* "Sync of newdb to olddb completed in " sync-time " seconds pid="(current-process-id)) - (if (common:low-noise-print 30 "sync new to old") - (if sync-needed - (debug:print-info 0 *default-log-port* "Sync of " res " records from newdb to olddb completed in " sync-time " seconds pid="(current-process-id)) - (debug:print-info 0 *default-log-port* "No sync needed, last updated " (- start-time last-update) " seconds ago"))) - res)) - -;; keeping it around for debugging purposes only -#;(define (open-run-close-no-exception-handling proc idb . params) - (debug:print-info 11 *default-log-port* "open-run-close-no-exception-handling START given a db=" (if idb "yes " "no ") ", params=" params) - (print "I don't work anymore. open-run-close-no-exception-handling needs fixing or removing...") - (exit) - (if (or *db-write-access* - (not #t)) ;; was: (member proc * db:all-write-procs *))) - (let* ((db (cond - ((pair? idb) (db:dbdat-get-db idb)) - ((sqlite3:database? idb) idb) - ((not idb) (debug:print-error 0 *default-log-port* "cannot open-run-close with #f anymore")) - ((procedure? idb) (idb)) - (else (debug:print-error 0 *default-log-port* "cannot open-run-close with #f anymore")))) - (res #f)) - (set! res (apply proc db params)) - (if (not idb)(sqlite3:finalize! dbstruct)) - (debug:print-info 11 *default-log-port* "open-run-close-no-exception-handling END" ) - res) - #f)) - -#;(define (open-run-close-exception-handling proc idb . params) - (handle-exceptions - exn - (let ((sleep-time (pseudo-random-integer 30)) - (err-status ((condition-property-accessor 'sqlite3 'status #f) exn))) - (case err-status - ((busy) - (thread-sleep! sleep-time)) - (else - (debug:print 0 *default-log-port* "EXCEPTION: database probably overloaded or unreadable.") - (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) - (debug:print 5 *default-log-port* "exn=" (condition->list exn)) - (debug:print 0 *default-log-port* " status: " ((condition-property-accessor 'sqlite3 'status) exn)) - (print-call-chain (current-error-port)) - (thread-sleep! sleep-time) - (debug:print-info 0 *default-log-port* "trying db call one more time....this may never recover, if necessary kill process " (current-process-id) " on host " (get-host-name) " to clean up"))) - (apply open-run-close-exception-handling proc idb params)) - (apply open-run-close-no-exception-handling proc idb params))) - -;; (define open-run-close -#;(define open-run-close open-run-close-exception-handling) - ;; open-run-close-no-exception-handling -;; open-run-close-exception-handling) -;;) - -(define db:trigger-list - (list (list "update_runs_trigger" "CREATE TRIGGER IF NOT EXISTS update_runs_trigger AFTER UPDATE ON runs - FOR EACH ROW - BEGIN - UPDATE runs SET last_update=(strftime('%s','now')) - WHERE id=old.id; - END;" ) - (list "update_run_stats_trigger" "CREATE TRIGGER IF NOT EXISTS update_run_stats_trigger AFTER UPDATE ON run_stats - FOR EACH ROW - BEGIN - UPDATE run_stats SET last_update=(strftime('%s','now')) - WHERE id=old.id; - END;" ) - (list "update_tests_trigger" "CREATE TRIGGER IF NOT EXISTS update_tests_trigger AFTER UPDATE ON tests - FOR EACH ROW - BEGIN - UPDATE tests SET last_update=(strftime('%s','now')) - WHERE id=old.id; - END;" ) - (list "update_teststeps_trigger" "CREATE TRIGGER IF NOT EXISTS update_teststeps_trigger AFTER UPDATE ON test_steps - FOR EACH ROW - BEGIN - UPDATE test_steps SET last_update=(strftime('%s','now')) - WHERE id=old.id; - END;" ) - (list "update_test_data_trigger" "CREATE TRIGGER IF NOT EXISTS update_test_data_trigger AFTER UPDATE ON test_data - FOR EACH ROW - BEGIN - UPDATE test_data SET last_update=(strftime('%s','now')) - WHERE id=old.id; - END;" ))) - -(define (db:create-all-triggers dbstruct) -(db:with-db - dbstruct #f #f - (lambda (db) -(db:create-triggers db)))) - -(define (db:create-triggers db) - (for-each (lambda (key) - (sqlite3:execute db (cadr key))) - db:trigger-list)) - -(define (db:drop-all-triggers dbstruct) - (db:with-db - dbstruct #f #f - (lambda (db) - (db:drop-triggers db)))) - -(define (db:is-trigger-dropped db tbl-name) - (let* ((trigger-name (if (equal? tbl-name "test_steps") - "update_teststeps_trigger" - (conc "update_" tbl-name "_trigger"))) - (res #f)) - (sqlite3:for-each-row - (lambda (name) - (if (equal? name trigger-name) - (set! res #t))) - db - "SELECT name FROM sqlite_master WHERE type = 'trigger' ;" - ))) - -(define (db:drop-triggers db) - (for-each - (lambda (key) - (sqlite3:execute db (conc "drop trigger if exists " (car key)))) - db:trigger-list)) - -(define (db:drop-trigger db tbl-name) - (let* ((trigger-name (if (equal? tbl-name "test_steps") - "update_teststeps_trigger" - (conc "update_" tbl-name "_trigger")))) - (for-each - (lambda (key) - (if (equal? (car key) trigger-name) - (sqlite3:execute db (conc "drop trigger if exists " trigger-name)))) - db:trigger-list))) - -(define (db:create-trigger db tbl-name) - (let* ((trigger-name (if (equal? tbl-name "test_steps") - "update_teststeps_trigger" - (conc "update_" tbl-name "_trigger")))) - (for-each (lambda (key) - (if (equal? (car key) trigger-name) - (sqlite3:execute db (cadr key)))) - db:trigger-list))) - - -(define (db:initialize-main-db dbdat) - (when (not *configinfo*) - (launch:setup)) ;; added because Elena was getting stack dump because *configinfo* below was #f. - (let* ((configdat (car *configinfo*)) ;; tut tut, global warning... - (keys (keys:config-get-fields configdat)) - (havekeys (> (length keys) 0)) - (keystr (keys->keystr keys)) - (fieldstr (keys:make-key/field-string configdat)) - (db (db:dbdat-get-db dbdat))) - (for-each (lambda (key) - (let ((keyn key)) - (if (member (string-downcase keyn) - (list "runname" "state" "status" "owner" "event_time" "comment" "fail_count" - "pass_count" "contour")) - (begin - (print "ERROR: your key cannot be named " keyn " as this conflicts with the same named field in the runs table, you must remove your megatest.db and /.db before trying again.") - (exit 1))))) - keys) - (sqlite3:with-transaction - db - (lambda () - ;; handle-exceptions - ;; exn - ;; (begin - ;; (debug:print 0 "ERROR: Failed to create tables. Look at your [fields] section, should be: fieldname TEXT DEFAULT 'yourdefault'") - ;; (exit)) - (sqlite3:execute db "CREATE TABLE IF NOT EXISTS keys (id INTEGER PRIMARY KEY, fieldname TEXT, fieldtype TEXT, CONSTRAINT keyconstraint UNIQUE (fieldname));") - (for-each (lambda (key) - (sqlite3:execute db "INSERT OR REPLACE INTO keys (fieldname,fieldtype) VALUES (?,?);" key "TEXT")) - keys) - (sqlite3:execute db (conc - "CREATE TABLE IF NOT EXISTS runs (id INTEGER PRIMARY KEY, \n " - fieldstr (if havekeys "," "") " - runname TEXT DEFAULT 'norun', - contour TEXT DEFAULT '', - state TEXT DEFAULT '', - status TEXT DEFAULT '', - owner TEXT DEFAULT '', - event_time TIMESTAMP DEFAULT (strftime('%s','now')), - comment TEXT DEFAULT '', - fail_count INTEGER DEFAULT 0, - pass_count INTEGER DEFAULT 0, - last_update INTEGER DEFAULT (strftime('%s','now')), - CONSTRAINT runsconstraint UNIQUE (runname" (if havekeys "," "") keystr "));")) - ;; All triggers created at once in end - ;;(sqlite3:execute db "CREATE TRIGGER IF NOT EXISTS update_runs_trigger AFTER UPDATE ON runs - ;; FOR EACH ROW - ;; BEGIN - ;; UPDATE runs SET last_update=(strftime('%s','now')) - ;; WHERE id=old.id; - ;; END;") - (sqlite3:execute db "CREATE TABLE IF NOT EXISTS run_stats ( - id INTEGER PRIMARY KEY, - run_id INTEGER, - state TEXT, - status TEXT, - count INTEGER, - last_update INTEGER DEFAULT (strftime('%s','now')))") - ;; All triggers created at once in end - ;; (sqlite3:execute db "CREATE TRIGGER IF NOT EXISTS update_run_stats_trigger AFTER UPDATE ON run_stats - ;; FOR EACH ROW - ;; BEGIN - ;; UPDATE run_stats SET last_update=(strftime('%s','now')) - ;; WHERE id=old.id; - ;; END;") - (sqlite3:execute db "CREATE TABLE IF NOT EXISTS test_meta ( - id INTEGER PRIMARY KEY, - testname TEXT DEFAULT '', - author TEXT DEFAULT '', - owner TEXT DEFAULT '', - description TEXT DEFAULT '', - reviewed TIMESTAMP, - iterated TEXT DEFAULT '', - avg_runtime REAL, - avg_disk REAL, - tags TEXT DEFAULT '', - jobgroup TEXT DEFAULT 'default', - CONSTRAINT test_meta_constraint UNIQUE (testname));") - (sqlite3:execute db "CREATE TABLE IF NOT EXISTS tasks_queue (id INTEGER PRIMARY KEY, - action TEXT DEFAULT '', - owner TEXT, - state TEXT DEFAULT 'new', - target TEXT DEFAULT '', - name TEXT DEFAULT '', - testpatt TEXT DEFAULT '', - keylock TEXT, - params TEXT, - creation_time TIMESTAMP DEFAULT (strftime('%s','now')), - execution_time TIMESTAMP);") - ;; archive disk areas, cached info from [archive-disks] - (sqlite3:execute db "CREATE TABLE IF NOT EXISTS archive_disks ( - id INTEGER PRIMARY KEY, - archive_area_name TEXT, - disk_path TEXT, - last_df INTEGER DEFAULT -1, - last_df_time TIMESTAMP DEFAULT (strftime('%s','now')), - creation_time TIMESTAMP DEFAULT (strftime('%s','now')));") - ;; individual bup (or tar) data chunks - (sqlite3:execute db "CREATE TABLE IF NOT EXISTS archive_blocks ( - id INTEGER PRIMARY KEY, - archive_disk_id INTEGER, - disk_path TEXT, - last_du INTEGER DEFAULT -1, - last_du_time TIMESTAMP DEFAULT (strftime('%s','now')), - creation_time TIMESTAMP DEFAULT (strftime('%s','now')));") - ;; tests allocated to what chunks. reusing a chunk for a test/item_path is very efficient - ;; NB// the per run/test recording of where the archive is stored is done in the test - ;; record. - (sqlite3:execute db "CREATE TABLE IF NOT EXISTS archive_allocations ( - id INTEGER PRIMARY KEY, - archive_block_id INTEGER, - testname TEXT, - item_path TEXT, - creation_time TIMESTAMP DEFAULT (strftime('%s','now')));") - ;; move this clean up call somewhere else - (sqlite3:execute db "DELETE FROM tasks_queue WHERE state='done' AND creation_time < ?;" (- (current-seconds)(* 24 60 60))) ;; remove older than 24 hrs - (sqlite3:execute db (conc "CREATE INDEX IF NOT EXISTS runs_index ON runs (runname" (if havekeys "," "") keystr ");")) - ;; (sqlite3:execute db "CREATE VIEW runs_tests AS SELECT * FROM runs INNER JOIN tests ON runs.id=tests.run_id;") - (sqlite3:execute db "CREATE TABLE IF NOT EXISTS extradat (id INTEGER PRIMARY KEY, run_id INTEGER, key TEXT, val TEXT);") - (sqlite3:execute db "CREATE TABLE IF NOT EXISTS metadat (id INTEGER PRIMARY KEY, var TEXT, val TEXT, - CONSTRAINT metadat_constraint UNIQUE (var));") - (sqlite3:execute db "CREATE TABLE IF NOT EXISTS access_log (id INTEGER PRIMARY KEY, user TEXT, accessed TIMESTAMP, args TEXT);") - ;; Must do this *after* running patch db !! No more. - ;; cannot use db:set-var since it will deadlock, hardwire the code here - (sqlite3:execute db "INSERT OR REPLACE INTO metadat (var,val) VALUES (?,?);" "MEGATEST_VERSION" (common:version-signature)) - (debug:print-info 11 *default-log-port* "db:initialize END") ;; )))) - - ;;====================================================================== - ;; R U N S P E C I F I C D B - ;;====================================================================== - - ;; (define (db:initialize-run-id-db db) - ;; (sqlite3:with-transaction - ;; db - ;; (lambda () - (sqlite3:execute db "CREATE TABLE IF NOT EXISTS tests - (id INTEGER PRIMARY KEY, - run_id INTEGER DEFAULT -1, - testname TEXT DEFAULT 'noname', - host TEXT DEFAULT 'n/a', - cpuload REAL DEFAULT -1, - diskfree INTEGER DEFAULT -1, - uname TEXT DEFAULT 'n/a', - rundir TEXT DEFAULT '/tmp/badname', - shortdir TEXT DEFAULT '/tmp/badname', - item_path TEXT DEFAULT '', - state TEXT DEFAULT 'NOT_STARTED', - status TEXT DEFAULT 'FAIL', - attemptnum INTEGER DEFAULT 0, - final_logf TEXT DEFAULT 'logs/final.log', - logdat TEXT DEFAULT '', - run_duration INTEGER DEFAULT 0, - comment TEXT DEFAULT '', - event_time TIMESTAMP DEFAULT (strftime('%s','now')), - fail_count INTEGER DEFAULT 0, - pass_count INTEGER DEFAULT 0, - archived INTEGER DEFAULT 0, -- 0=no, > 1=archive block id where test data can be found - last_update INTEGER DEFAULT (strftime('%s','now')), - CONSTRAINT testsconstraint UNIQUE (run_id, testname, item_path));") - ;; deprecated -- (sqlite3:execute db "CREATE INDEX IF NOT EXISTS tests_index ON tests (run_id, testname, item_path, uname);") - - (sqlite3:execute db "CREATE INDEX IF NOT EXISTS tests_run_id_index ON tests (run_id);") ;; new - (sqlite3:execute db "CREATE INDEX IF NOT EXISTS tests_testname_index ON tests (testname,item_path);") ;; new - (sqlite3:execute db "CREATE INDEX IF NOT EXISTS tests_state_status_index ON tests (state, status); ") ;; new - - ;; All triggers created at once in end - ;;(sqlite3:execute db "CREATE TRIGGER IF NOT EXISTS update_tests_trigger AFTER UPDATE ON tests - ;; FOR EACH ROW - ;; BEGIN - ;; UPDATE tests SET last_update=(strftime('%s','now')) - ;; WHERE id=old.id; - ;; END;") - (sqlite3:execute db "CREATE TABLE IF NOT EXISTS test_steps - (id INTEGER PRIMARY KEY, - test_id INTEGER, - stepname TEXT, - state TEXT DEFAULT 'NOT_STARTED', - status TEXT DEFAULT 'n/a', - event_time TIMESTAMP, - comment TEXT DEFAULT '', - logfile TEXT DEFAULT '', - last_update INTEGER DEFAULT (strftime('%s','now')), - CONSTRAINT test_steps_constraint UNIQUE (test_id,stepname,state));") - (sqlite3:execute db "CREATE INDEX IF NOT EXISTS teststeps_index ON tests (run_id, testname, item_path);") - ;; All triggers created at once in end - ;;(sqlite3:execute db "CREATE TRIGGER IF NOT EXISTS update_teststeps_trigger AFTER UPDATE ON test_steps - ;; FOR EACH ROW - ;; BEGIN - ;; UPDATE test_steps SET last_update=(strftime('%s','now')) - ;; WHERE id=old.id; - ;; END;") - (sqlite3:execute db "CREATE TABLE IF NOT EXISTS test_data (id INTEGER PRIMARY KEY, - test_id INTEGER, - category TEXT DEFAULT '', - variable TEXT, - value REAL, - expected REAL, - tol REAL, - units TEXT, - comment TEXT DEFAULT '', - status TEXT DEFAULT 'n/a', - type TEXT DEFAULT '', - last_update INTEGER DEFAULT (strftime('%s','now')), - CONSTRAINT test_data_constraint UNIQUE (test_id,category,variable));") - (sqlite3:execute db "CREATE INDEX IF NOT EXISTS test_data_index ON test_data (test_id);") - ;; All triggers created at once in end - ;;(sqlite3:execute db "CREATE TRIGGER IF NOT EXISTS update_test_data_trigger AFTER UPDATE ON test_data - ;; FOR EACH ROW - ;; BEGIN - ;; UPDATE test_data SET last_update=(strftime('%s','now')) - ;; WHERE id=old.id; - ;; END;") - (sqlite3:execute db "CREATE TABLE IF NOT EXISTS test_rundat ( - id INTEGER PRIMARY KEY, - test_id INTEGER, - update_time TIMESTAMP, - cpuload INTEGER DEFAULT -1, - diskfree INTEGER DEFAULT -1, - diskusage INTGER DEFAULT -1, - run_duration INTEGER DEFAULT 0);") - (sqlite3:execute db "CREATE TABLE IF NOT EXISTS archives ( - id INTEGER PRIMARY KEY, - test_id INTEGER, - state TEXT DEFAULT 'new', - status TEXT DEFAULT 'n/a', - archive_type TEXT DEFAULT 'bup', - du INTEGER, - archive_path TEXT);"))) - (print "creating triggers from init") - (db:create-triggers db) - db)) ;; ) - -;;====================================================================== -;; A R C H I V E S -;;====================================================================== - -;; dneeded is minimum space needed, scan for existing archives that -;; are on disks with adequate space and already have this test/itempath -;; archived -;; -(define (db:archive-get-allocations dbstruct testname itempath dneeded) - (let* ((dbdat (db:get-db dbstruct)) ;; archive tables are in main.db - (db (db:dbdat-get-db dbdat)) - (res '()) - (blocks '())) ;; a block is an archive chunck that can be added too if there is space - (sqlite3:for-each-row - (lambda (id archive-disk-id disk-path last-du last-du-time) - (set! res (cons (vector id archive-disk-id disk-path last-du last-du-time) res))) - db - "SELECT b.id,b.archive_disk_id,b.disk_path,b.last_du,b.last_du_time FROM archive_blocks AS b - INNER JOIN archive_allocations AS a ON a.archive_block_id=b.id - WHERE a.testname=? AND a.item_path=?;" - testname itempath) - ;; Now res has list of candidate paths, look in archive_disks for candidate with potential free space - (if (null? res) - '() - (sqlite3:for-each-row - (lambda (id archive-area-name disk-path last-df last-df-time) - (set! blocks (cons (vector id archive-area-name disk-path last-df last-df-time) blocks))) - db - (conc - "SELECT d.id,d.archive_area_name,disk_path,last_df,last_df_time FROM archive_disks AS d - INNER JOIN archive_blocks AS b ON d.id=b.archive_disk_id - WHERE b.id IN (" (string-intersperse (map conc res) ",") ") AND - last_df > ?;") - dneeded)) - (stack-push! (dbr:dbstruct-dbstack dbstruct) dbdat) - blocks)) - -;; returns id of the record, register a disk allocated to archiving and record it's last known -;; available space -;; -(define (db:archive-register-disk dbstruct bdisk-name bdisk-path df) - (let* ((dbdat (db:get-db dbstruct)) ;; archive tables are in main.db - (db (db:dbdat-get-db dbdat)) - (res #f)) - (sqlite3:for-each-row - (lambda (id) - (set! res id)) - db - "SELECT id FROM archive_disks WHERE archive_area_name=? AND disk_path=?;" - bdisk-name bdisk-path) - (if res ;; record exists, update df and return id - (begin - (sqlite3:execute db "UPDATE archive_disks SET last_df=?,last_df_time=(strftime('%s','now')) - WHERE archive_area_name=? AND disk_path=?;" - df bdisk-name bdisk-path) - (stack-push! (dbr:dbstruct-dbstack dbstruct) dbdat) - res) - (begin - (sqlite3:execute - db - "INSERT OR REPLACE INTO archive_disks (archive_area_name,disk_path,last_df) - VALUES (?,?,?);" - bdisk-name bdisk-path df) - (stack-push! (dbr:dbstruct-dbstack dbstruct) dbdat) - (db:archive-register-disk dbstruct bdisk-name bdisk-path df))))) - -;; record an archive path created on a given archive disk (identified by it's bdisk-id) -;; if path starts with / then it is full, otherwise it is relative to the archive disk -;; preference is to store the relative path. -;; -(define (db:archive-register-block-name dbstruct bdisk-id archive-path #!key (du #f)) - (let* ((dbdat (db:get-db dbstruct)) ;; archive tables are in main.db - (db (db:dbdat-get-db dbdat)) - (res #f)) - ;; first look to see if this path is already registered - (sqlite3:for-each-row - (lambda (id) - (set! res id)) - db - "SELECT id FROM archive_blocks WHERE archive_disk_id=? AND disk_path=?;" - bdisk-id archive-path) - (if res ;; record exists, update du if applicable and return res - (if du (sqlite3:execute db "UPDATE archive_blocks SET last_du=?,last_du_time=(strftime('%s','now')) - WHERE archive_disk_id=? AND disk_path=?;" - bdisk-id archive-path du)) - (begin - (sqlite3:execute db "INSERT OR REPLACE INTO archive_blocks (archive_disk_id,disk_path,last_du) - VALUES (?,?,?);" - bdisk-id archive-path (or du 0)) - (set! res (db:archive-register-block-name dbstruct bdisk-id archive-path du: du)))) - (stack-push! (dbr:dbstruct-dbstack dbstruct) dbdat) - res)) - - -;; The "archived" field in tests is overloaded; 0 = not archived, > 0 archived in block with given id -;; -(define (db:test-set-archive-block-id dbstruct run-id test-id archive-block-id) - (db:with-db - dbstruct - run-id - #f - (lambda (db) - (sqlite3:execute db "UPDATE tests SET archived=? WHERE id=?;" - archive-block-id test-id)))) - -;; Look up the archive block info given a block-id -;; -(define (db:test-get-archive-block-info dbstruct archive-block-id) - (db:with-db - dbstruct - #f - #f - (lambda (db) - (let ((res #f)) - (sqlite3:for-each-row - ;; 0 1 2 3 4 5 - (lambda (id archive-disk-id disk-path last-du last-du-time creation-time) - (set! res (vector id archive-disk-id disk-path last-du last-du-time creation-time))) - db - "SELECT id,archive_disk_id,disk_path,last_du,last_du_time,creation_time FROM archive_blocks WHERE id=?;" - archive-block-id) - res)))) - -;; (define (db:archive-allocate-testsuite/area-to-block block-id testsuite-name areakey) -;; (let* ((dbdat (db:get-db dbstruct #f)) ;; archive tables are in main.db -;; (db (db:dbdat-get-db dbdat)) -;; (res '()) -;; (blocks '())) ;; a block is an archive chunck that can be added too if there is space -;; (sqlite3:for-each-row #f) - -;;====================================================================== -;; L O G G I N G D B -;;====================================================================== - -(define (open-logging-db) - (let* ((dbpath (conc (if *toppath* (conc *toppath* "/") "") "logging.db")) ;; fname) - (dbexists (common:file-exists? dbpath)) - (db (sqlite3:open-database dbpath)) - (handler (sqlite3:make-busy-timeout (if (args:get-arg "-override-timeout") - (string->number (args:get-arg "-override-timeout")) - 136000)))) ;; 136000))) - (sqlite3:set-busy-handler! db handler) - (if (not dbexists) - (begin - (sqlite3:execute db "CREATE TABLE IF NOT EXISTS log (id INTEGER PRIMARY KEY,event_time TIMESTAMP DEFAULT (strftime('%s','now')),logline TEXT,pwd TEXT,cmdline TEXT,pid INTEGER);") - (db:set-sync db) ;; (sqlite3:execute db (conc "PRAGMA synchronous = 0;")) - )) - db)) - -(define (db:log-local-event . loglst) - (let ((logline (apply conc loglst))) - (db:log-event logline))) - -(define (db:log-event logline) - (let ((db (open-logging-db))) - (sqlite3:execute db "INSERT INTO log (logline,pwd,cmdline,pid) VALUES (?,?,?,?);" - logline - (current-directory) - (string-intersperse (argv) " ") - (current-process-id)) - (sqlite3:finalize! db) - logline)) - -;;====================================================================== -;; D B U T I L S -;;====================================================================== - -;;====================================================================== -;; M A I N T E N A N C E -;;====================================================================== - -(define (db:have-incompletes? dbstruct run-id ovr-deadtime) - (let* ((incompleted '()) - (oldlaunched '()) - (toplevels '()) - (deadtime-str (configf:lookup *configdat* "setup" "deadtime")) - (deadtime (if (and deadtime-str - (string->number deadtime-str)) - (string->number deadtime-str) - 72000))) ;; twenty hours - (db:with-db - dbstruct #f #f - (lambda (db) - (if (number? ovr-deadtime)(set! deadtime ovr-deadtime)) - - ;; in RUNNING or REMOTEHOSTSTART for more than 10 minutes - ;; - ;; HOWEVER: this code in run:test seems to work fine - ;; (> (- (current-seconds)(+ (db:test-get-event_time testdat) - ;; (db:test-get-run_duration testdat))) - ;; 600) - ;; (db:delay-if-busy dbdat) - (sqlite3:for-each-row - (lambda (test-id run-dir uname testname item-path) - (if (and (equal? uname "n/a") - (equal? item-path "")) ;; this is a toplevel test - ;; what to do with toplevel? call rollup? - (begin - (set! toplevels (cons (list test-id run-dir uname testname item-path run-id) toplevels)) - (debug:print-info 0 *default-log-port* "Found old toplevel test in RUNNING state, test-id=" test-id)) - (set! incompleted (cons (list test-id run-dir uname testname item-path run-id) incompleted)))) - db - "SELECT id,rundir,uname,testname,item_path FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > (run_duration + ?) AND state IN ('RUNNING','REMOTEHOSTSTART');" - run-id deadtime) - - ;; in LAUNCHED for more than one day. Could be long due to job queues TODO/BUG: Need override for this in config - ;; - ;; (db:delay-if-busy dbdat) - (sqlite3:for-each-row - (lambda (test-id run-dir uname testname item-path) - (if (and (equal? uname "n/a") - (equal? item-path "")) ;; this is a toplevel test - ;; what to do with toplevel? call rollup? - (set! toplevels (cons (list test-id run-dir uname testname item-path run-id) toplevels)) - (set! oldlaunched (cons (list test-id run-dir uname testname item-path run-id) oldlaunched)))) - db - "SELECT id,rundir,uname,testname,item_path FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > 86400 AND state IN ('LAUNCHED');" - run-id) - - (debug:print-info 18 *default-log-port* "Found " (length oldlaunched) " old LAUNCHED items, " (length toplevels) " old LAUNCHED toplevel tests and " (length incompleted) " tests marked RUNNING but apparently dead.") - (if (and (null? incompleted) - (null? oldlaunched) - (null? toplevels)) - #f - #t))))) - -(define (db:get-status-from-final-status-file run-dir) - (let ((infile (conc run-dir "/.final-status"))) - ;; first verify we are able to write the output file - (if (not (file-readable? infile)) - (begin - (debug:print 0 *default-log-port* "ERROR: cannot read " infile) - (debug:print 0 *default-log-port* "ERROR: run-dir is " run-dir) - #f - ) - (with-input-from-file infile read-lines) - ))) - -;; select end_time-now from -;; (select testname,item_path,event_time+run_duration as -;; end_time,strftime('%s','now') as now from tests where state in -;; ('RUNNING','REMOTEHOSTSTART','LAUNCHED')); - -(define (db:find-and-mark-incomplete dbstruct run-id ovr-deadtime) - (let* ((incompleted '()) - (oldlaunched '()) - (toplevels '()) - ;; The default running-deadtime is 720 seconds = 12 minutes. - ;; "(running-deadtime-default (+ server-start-allowance (* 2 launch-monitor-period)))" = 200 + (2 * (200 + 30 + 30)) - (deadtime-trim (or ovr-deadtime (configf:lookup-number *configdat* "setup" "deadtime"))) - (server-start-allowance 200) - (server-overloaded-budget 200) - (launch-monitor-off-time (or (configf:lookup-number *configdat* "setup" "test-stats-update-period") 30)) - (launch-monitor-on-time-budget 30) - (launch-monitor-period (+ launch-monitor-off-time launch-monitor-on-time-budget server-overloaded-budget)) - (remotehoststart-deadtime-default (+ server-start-allowance server-overloaded-budget 30)) - (remotehoststart-deadtime (or deadtime-trim remotehoststart-deadtime-default)) - (running-deadtime-default (+ server-start-allowance (* 2 launch-monitor-period))) - (running-deadtime (or deadtime-trim running-deadtime-default)) ;; two minutes (30 seconds between updates, this leaves 3x grace period) - ) - (debug:print-info 4 *default-log-port* "running-deadtime = " running-deadtime) - (debug:print-info 4 *default-log-port* "deadtime-trim = " deadtime-trim) - - (db:with-db - dbstruct #f #f - (lambda (db) - (let* ((stmth1 (db:get-cache-stmth - dbstruct db - "SELECT id,rundir,uname,testname,item_path,event_time,run_duration FROM tests - WHERE run_id=? AND (strftime('%s','now') - event_time) > (run_duration + ?) - AND state IN ('RUNNING');")) - (stmth2 (db:get-cache-stmth - dbstruct db - "SELECT id,rundir,uname,testname,item_path,event_time,run_duration FROM tests - WHERE run_id=? AND (strftime('%s','now') - event_time) > (run_duration + ?) - AND state IN ('REMOTEHOSTSTART');")) - (stmth3 (db:get-cache-stmth - dbstruct db - "SELECT id,rundir,uname,testname,item_path FROM tests - WHERE run_id=? AND (strftime('%s','now') - event_time) > 86400 - AND state IN ('LAUNCHED');"))) - ;; in RUNNING or REMOTEHOSTSTART for more than 10 minutes - ;; - ;; HOWEVER: this code in run:test seems to work fine - ;; (> (- (current-seconds)(+ (db:test-get-event_time testdat) - ;; (db:test-get-run_duration testdat))) - ;; 600) - ;; (db:delay-if-busy dbdat) - (sqlite3:for-each-row - (lambda (test-id run-dir uname testname item-path event-time run-duration) - (if (and (equal? uname "n/a") - (equal? item-path "")) ;; this is a toplevel test - ;; what to do with toplevel? call rollup? - (begin - (set! toplevels (cons (list test-id run-dir uname testname item-path run-id) toplevels)) - (debug:print-info 0 *default-log-port* "Found old toplevel test in RUNNING state, test-id=" test-id)) - (begin - (set! incompleted (cons (list test-id run-dir uname testname item-path run-id) incompleted)) - (debug:print-info 0 *default-log-port* "Found old test in RUNNING state, test-id=" - test-id" exceeded running-deadtime "running-deadtime" now="(current-seconds) - " event-time="event-time" run-duration="run-duration)))) - stmth1 - run-id running-deadtime) ;; default time 720 seconds - - (sqlite3:for-each-row - (lambda (test-id run-dir uname testname item-path event-time run-duration) - (if (and (equal? uname "n/a") - (equal? item-path "")) ;; this is a toplevel test - ;; what to do with toplevel? call rollup? - (begin - (set! toplevels (cons (list test-id run-dir uname testname item-path run-id) toplevels)) - (debug:print-info 0 *default-log-port* "Found old toplevel test in RUNNING state, test-id=" test-id)) - (begin - (debug:print-info 0 *default-log-port* "Found old test in REMOTEHOSTSTART state, test-id=" test-id - " exceeded running-deadtime "running-deadtime" now="(current-seconds)" event-time="event-time - " run-duration="run-duration) - (set! incompleted (cons (list test-id run-dir uname testname item-path run-id) incompleted))))) - stmth2 - run-id remotehoststart-deadtime) ;; default time 230 seconds - - ;; in LAUNCHED for more than one day. Could be long due to job queues TODO/BUG: Need override for this in config - ;; - ;; (db:delay-if-busy dbdat) - (sqlite3:for-each-row - (lambda (test-id run-dir uname testname item-path) - (if (and (equal? uname "n/a") - (equal? item-path "")) ;; this is a toplevel test - ;; what to do with toplevel? call rollup? - (set! toplevels (cons (list test-id run-dir uname testname item-path run-id) toplevels)) - (begin - (debug:print-info 0 *default-log-port* "Found old test in LAUNCHED state, test-id=" test-id - " 1 day since event_time marked") - (set! oldlaunched (cons (list test-id run-dir uname testname item-path run-id) oldlaunched))))) - stmth3 - run-id) - - (debug:print-info 18 *default-log-port* "Found " (length oldlaunched) " old LAUNCHED items, " - (length toplevels) " old LAUNCHED toplevel tests and " - (length incompleted) " tests marked RUNNING but apparently dead.")) - - ;; These are defunct tests, do not do all the overhead of set-state-status. Force them to INCOMPLETE. - ;; - ;; (db:delay-if-busy dbdat) - (let* ((min-incompleted-ids (map car incompleted)) ;; do 'em all - (all-ids (append min-incompleted-ids (map car oldlaunched)))) - (if (> (length all-ids) 0) - (begin - ;; (launch:is-test-alive "localhost" 435) - (debug:print 0 *default-log-port* "WARNING: Marking test(s); " (string-intersperse (map conc all-ids) ", ") - " as DEAD") - (for-each - (lambda (test-id) - (let* (;; (run-dir (db:test-get-rundir-from-test-id dbstruct run-id test-id)) - (tinfo (db:get-test-info-by-id dbstruct run-id test-id)) - (run-dir (db:test-get-rundir tinfo)) - (host (db:test-get-host tinfo)) - (pid (db:test-get-process_id tinfo)) - (result (db:get-status-from-final-status-file run-dir))) - (if (and (list? result) (> (length result) 1) (equal? "PASS" (cadr result)) (equal? "COMPLETED" (car result))) - (begin - (debug:print 0 *default-log-port* "INFO: test " test-id " actually passed, so marking PASS not DEAD") - (db:set-state-status-and-roll-up-items - dbstruct run-id test-id 'foo "COMPLETED" "PASS" - "Test stopped responding but it has PASSED; marking it PASS in the DB.")) - (let ((is-alive (and (not (eq? pid 0)) ;; 0 is default in re-used field "attemptnum" where pid stored. - (launch:is-test-alive host pid)))) - (if is-alive - (debug:print 0 *default-log-port* "INFO: test " test-id " on host " host - " has a process on pid " pid ", NOT setting to DEAD.") - (begin - (debug:print 0 *default-log-port* "INFO: test " test-id - " final state/status is not COMPLETED/PASS. It is " result) - (db:set-state-status-and-roll-up-items - dbstruct run-id test-id 'foo "COMPLETED" "DEAD" - "Test stopped responding while in RUNNING or REMOTEHOSTSTART; presumed dead."))))))) - ;; call end of eud of run detection for posthook - from merge, is it needed? - ;; (launch:end-of-run-check run-id) - all-ids) - ;;call end of eud of run detection for posthook - (launch:end-of-run-check run-id) - ))))))) - -;; BUG: Probably broken - does not explicitly use run-id in the query -;; -(define (db:top-test-set-per-pf-counts dbstruct run-id test-name) - (db:general-call dbstruct 'top-test-set-per-pf-counts (list test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name))) - -;; Clean out old junk and vacuum the database -;; -;; Ultimately do something like this: -;; -;; 1. Look at test records either deleted or part of deleted run: -;; a. If test dir exists, set the the test to state='UNKNOWN', Set the run to 'unknown' -;; b. If test dir gone, delete the test record -;; 2. Look at run records -;; a. If have tests that are not deleted, set state='unknown' -;; b. .... -;; -(define (db:clean-up dbdat) - ;; (debug:print 0 *default-log-port* "WARNING: db clean up not fully ported to v1.60, cleanup action will be on megatest.db") - (let* ((keep-record-age ( - (current-seconds) (common:hms-string->seconds (or (configf:lookup *configdat* "setup" "delete-record-age") "30d")))) - (db (db:dbdat-get-db dbdat)) - (count-stmt (sqlite3:prepare db "SELECT (SELECT count(id) FROM tests)+(SELECT count(id) FROM runs);")) - (statements - (map (lambda (stmt) - (sqlite3:prepare db stmt)) - (list - ;; delete all tests that belong to runs that are 'deleted' - (conc "DELETE FROM tests WHERE run_id in (SELECT id FROM runs WHERE state='deleted') and last_update < " keep-record-age ";") - ;; delete all tests that are 'DELETED' - (conc "DELETE FROM tests WHERE state='DELETED' and last_update < " keep-record-age " ;") - ;; delete all tests that have no run - (conc "DELETE FROM tests WHERE run_id NOT IN (SELECT DISTINCT id FROM runs) and last_update < " keep-record-age "; ") - ;; delete all runs that are state='deleted' - (conc "DELETE FROM runs WHERE state='deleted' and last_update < " keep-record-age ";") - ;; delete empty runs - (conc "DELETE FROM runs WHERE id NOT IN (SELECT DISTINCT r.id FROM runs AS r INNER JOIN tests AS t ON t.run_id=r.id) and last_update < " keep-record-age ";") - ;; remove orphaned test_rundat entries - (conc "DELETE FROM test_rundat where test_id NOT IN (SELECT id FROM tests);") - ;; remove orphaned test_steps entries - (conc "DELETE FROM test_steps WHERE test_id NOT IN (SELECT id FROM tests);") - ;; remove orphaned test_dat entries - (conc "DELETE FROM test_data WHERE test_id NOT IN (SELECT id FROM tests);") - - )))) - ;; (db:delay-if-busy dbdat) - ;(debug:print-info 0 *default-log-port* statements) - (sqlite3:with-transaction - db - (lambda () - (sqlite3:for-each-row (lambda (tot) - (debug:print-info 0 *default-log-port* "Records count before clean: " tot)) - count-stmt) - (map sqlite3:execute statements) - (sqlite3:for-each-row (lambda (tot) - (debug:print-info 0 *default-log-port* "Records count after clean: " tot)) - count-stmt))) - (map sqlite3:finalize! statements) - (sqlite3:finalize! count-stmt) - ;; (db:find-and-mark-incomplete db) - ;; (db:delay-if-busy dbdat) - (sqlite3:execute db "VACUUM;"))) - -;; Clean out old junk and vacuum the database -;; -;; Ultimately do something like this: -;; -;; 1. Look at test records either deleted or part of deleted run: -;; a. If test dir exists, set the the test to state='UNKNOWN', Set the run to 'unknown' -;; b. If test dir gone, delete the test record -;; 2. Look at run records -;; a. If have tests that are not deleted, set state='unknown' -;; b. .... -;; -(define (db:clean-up-rundb dbdat) - ;; (debug:print 0 *default-log-port* "WARNING: db clean up not fully ported to v1.60, cleanup action will be on megatest.db") - (let* ((db (db:dbdat-get-db dbdat)) - (count-stmt (sqlite3:prepare db "SELECT (SELECT count(id) FROM tests);")) - (statements - (map (lambda (stmt) - (sqlite3:prepare db stmt)) - (list - ;; delete all tests that belong to runs that are 'deleted' - ;; (conc "DELETE FROM tests WHERE run_id NOT IN (" (string-intersperse (map conc valid-runs) ",") ");") - ;; delete all tests that are 'DELETED' - "DELETE FROM tests WHERE state='DELETED';" - )))) - ;; (db:delay-if-busy dbdat) - (sqlite3:with-transaction - db - (lambda () - (sqlite3:for-each-row (lambda (tot) - (debug:print-info 0 *default-log-port* "Records count before clean: " tot)) - count-stmt) - (map sqlite3:execute statements) - (sqlite3:for-each-row (lambda (tot) - (debug:print-info 0 *default-log-port* "Records count after clean: " tot)) - count-stmt))) - (map sqlite3:finalize! statements) - (sqlite3:finalize! count-stmt) - ;; (db:find-and-mark-incomplete db) - ;; (db:delay-if-busy dbdat) - (sqlite3:execute db "VACUUM;"))) - -;; Clean out old junk and vacuum the database -;; -;; Ultimately do something like this: -;; -;; 1. Look at test records either deleted or part of deleted run: -;; a. If test dir exists, set the the test to state='UNKNOWN', Set the run to 'unknown' -;; b. If test dir gone, delete the test record -;; 2. Look at run records -;; a. If have tests that are not deleted, set state='unknown' -;; b. .... -;; -(define (db:clean-up-maindb dbdat) - ;; (debug:print 0 *default-log-port* "WARNING: db clean up not fully ported to v1.60, cleanup action will be on megatest.db") - (let* ((db (db:dbdat-get-db dbdat)) - (count-stmt (sqlite3:prepare db "SELECT (SELECT count(id) FROM runs);")) - (statements - (map (lambda (stmt) - (sqlite3:prepare db stmt)) - (list - ;; delete all tests that belong to runs that are 'deleted' - ;; (conc "DELETE FROM tests WHERE run_id NOT IN (" (string-intersperse (map conc valid-runs) ",") ");") - ;; delete all tests that are 'DELETED' - "DELETE FROM runs WHERE state='deleted';" - ))) - (dead-runs '())) - (sqlite3:for-each-row - (lambda (run-id) - (set! dead-runs (cons run-id dead-runs))) - db - "SELECT id FROM runs WHERE state='deleted';") - ;; (db:delay-if-busy dbdat) - (sqlite3:with-transaction - db - (lambda () - (sqlite3:for-each-row (lambda (tot) - (debug:print-info 0 *default-log-port* "Records count before clean: " tot)) - count-stmt) - (map sqlite3:execute statements) - (sqlite3:for-each-row (lambda (tot) - (debug:print-info 0 *default-log-port* "Records count after clean: " tot)) - count-stmt))) - (map sqlite3:finalize! statements) - (sqlite3:finalize! count-stmt) - ;; (db:find-and-mark-incomplete db) - ;; (db:delay-if-busy dbdat) - (sqlite3:execute db "VACUUM;") - dead-runs)) - -;;====================================================================== -;; M E T A G E T A N D S E T V A R S -;;====================================================================== - -;; returns number if string->number is successful, string otherwise -;; also updates *global-delta* -;; -(define (db:get-var dbstruct var) - (let* ((res #f)) - (db:with-db - dbstruct #f #f - (lambda (db) - (sqlite3:for-each-row - (lambda (val) - (set! res val)) - db - "SELECT val FROM metadat WHERE var=?;" var) - ;; convert to number if can - (if (string? res) - (let ((valnum (string->number res))) - (if valnum (set! res valnum)))) - res)))) - -(define (db:inc-var dbstruct var) - (db:with-db dbstruct #f #t - (lambda (db) - (sqlite3:execute db "UPDATE metadat SET val=val+1 WHERE var=?;" var)))) - -(define (db:dec-var dbstruct var) - (db:with-db dbstruct #f #t - (lambda (db) - (sqlite3:execute db "UPDATE metadat SET val=val-1 WHERE var=?;" var)))) - -;; This was part of db:get-var. It was used to estimate the load on -;; the database files. -;; -;; scale by 10, average with current value. -;; (set! *global-delta* (/ (+ *global-delta* (* (- (current-milliseconds) start-ms) -;; (if throttle throttle 0.01))) -;; 2)) -;; (if (> (abs (- *last-global-delta-printed* *global-delta*)) 0.08) ;; don't print all the time, only if it changes a bit -;; (begin -;; (debug:print-info 4 *default-log-port* "launch throttle factor=" *global-delta*) -;; (set! *last-global-delta-printed* *global-delta*))) - -(define (db:set-var dbstruct var val) - (db:with-db dbstruct #f #t - (lambda (db) - (sqlite3:execute db "INSERT OR REPLACE INTO metadat (var,val) VALUES (?,?);" var val)))) - -(define (db:add-var dbstruct var val) - (db:with-db dbstruct #f #t - (lambda (db) - (sqlite3:execute db "UPDATE metadat SET val=val+? WHERE var=?;" val var)))) - -(define (db:del-var dbstruct var) - (db:with-db dbstruct #f #t - (lambda (db) - (sqlite3:execute db "DELETE FROM metadat WHERE var=?;" var)))) - -;;====================================================================== -;; no-sync.db - small bits of data to be shared between servers -;;====================================================================== - -(define (db:open-no-sync-db) - (let* ((dbpath (db:dbfile-path)) - (dbname (conc dbpath "/no-sync.db")) - (db-exists (common:file-exists? dbname)) - (db (sqlite3:open-database dbname))) - (sqlite3:set-busy-handler! db (sqlite3:make-busy-timeout 136000)) - (if (not db-exists) - (begin - (sqlite3:execute db "PRAGMA synchronous = 0;") - (sqlite3:execute db "CREATE TABLE IF NOT EXISTS no_sync_metadat (var TEXT,val TEXT, CONSTRAINT no_sync_metadat_constraint UNIQUE (var));") - (sqlite3:execute db "PRAGMA journal_mode=WAL;"))) - db)) - -;; if we are not a server create a db handle. this is not finalized -;; so watch for problems. I'm still not clear if it is needed to manually -;; finalize sqlite3 dbs with the sqlite3 egg. -;; -(define (db:no-sync-db db-in) - (mutex-lock! *db-access-mutex*) - (let ((res (if db-in - db-in - (let ((db (db:open-no-sync-db))) - (set! *no-sync-db* db) - db)))) - (mutex-unlock! *db-access-mutex*) - res)) - -(define (db:no-sync-set db var val) - (sqlite3:execute (db:no-sync-db db) "INSERT OR REPLACE INTO no_sync_metadat (var,val) VALUES (?,?);" var val)) - -(define (db:no-sync-del! db var) - (sqlite3:execute (db:no-sync-db db) "DELETE FROM no_sync_metadat WHERE var=?;" var)) - -(define (db:no-sync-get/default db var default) - (let ((res default)) - (sqlite3:for-each-row - (lambda (val) - (set! res val)) - (db:no-sync-db db) - "SELECT val FROM no_sync_metadat WHERE var=?;" - var) - (if res - (let ((newres (if (string? res) - (string->number res) - #f))) - (if newres - newres - res)) - res))) - -(define (db:no-sync-close-db db stmt-cache) - (db:safely-close-sqlite3-db db stmt-cache)) - -;; transaction protected lock aquisition -;; either: -;; fails returns (#f . lock-creation-time) -;; succeeds (returns (#t . lock-creation-time) -;; use (db:no-sync-del! db keyname) to release the lock -;; -(define (db:no-sync-get-lock db-in keyname) - (let ((db (db:no-sync-db db-in))) - (sqlite3:with-transaction - db - (lambda () - (handle-exceptions - exn - (let ((lock-time (current-seconds))) - (debug:print-info 2 *default-log-port* "db:no-sync-get-lock keyname=" keyname ", lock-time=" lock-time ", exn=" exn) - (sqlite3:execute db "INSERT INTO no_sync_metadat (var,val) VALUES(?,?);" keyname lock-time) - `(#t . ,lock-time)) - `(#f . ,(sqlite3:first-result db "SELECT val FROM no_sync_metadat WHERE var=?;" keyname))))))) - - - -;; use a global for some primitive caching, it is just silly to -;; re-read the db over and over again for the keys since they never -;; change - -;; why get the keys from the db? why not get from the *configdat* -;; using keys:config-get-fields? - -(define (db:get-keys dbstruct) - (if *db-keys* *db-keys* - (let ((res '())) - (db:with-db dbstruct #f #f - (lambda (db) - (sqlite3:for-each-row - (lambda (key) - (set! res (cons key res))) - db - "SELECT fieldname FROM keys ORDER BY id DESC;"))) - (set! *db-keys* res) - res))) - -;; extract index number given a header/data structure -(define (db:get-index-by-header header field) - (list-index (lambda (x)(equal? x field)) header)) - -;; look up values in a header/data structure -(define (db:get-value-by-header row header field) - (if (or (null? header) (not row)) - #f - (let loop ((hed (car header)) - (tal (cdr header)) - (n 0)) - (if (equal? hed field) - (handle-exceptions - exn - (begin - (debug:print 0 *default-log-port* "WARNING: attempt to read non-existant field, row=" - row " header=" header " field=" field ", exn=" exn) - #f) - (vector-ref row n)) - (if (null? tal) #f (loop (car tal)(cdr tal)(+ n 1))))))) - -;; Accessors for the header/data structure -;; get rows and header from -(define (db:get-header vec)(vector-ref vec 0)) -(define (db:get-rows vec)(vector-ref vec 1)) - -;;====================================================================== -;; R U N S -;;====================================================================== - - - - - -(define (db:get-run-times dbstruct run-patt target-patt) -(let ((res `()) - (qry (conc "select runname, (max(end_time)-min(event_time))/60 as runtime, target from (select runname, run_id,tests.event_time,tests.event_time+run_duration AS end_time, " (string-join (db:get-keys dbstruct) " || '/' || ") " as target from tests inner join runs on tests.run_id = runs.id where runs.runname like ? and target like ?) group by run_id ;"))) -;(print qry) -(db:with-db - dbstruct - #f ;; this is for the main runs db - #f ;; does not modify db - (lambda (db) - (sqlite3:for-each-row - (lambda (runname runtime target ) - (set! res (cons (vector runname runtime target) res))) - db - qry - run-patt target-patt) - - res)))) - - - -(define (db:get-run-name-from-id dbstruct run-id) - (db:with-db - dbstruct - #f ;; this is for the main runs db - #f ;; does not modify db - (lambda (db) - (let ((res #f)) - (sqlite3:for-each-row - (lambda (runname) - (set! res runname)) - db - "SELECT runname FROM runs WHERE id=?;" - run-id) - res)))) - -(define (db:get-run-key-val dbstruct run-id key) - (db:with-db - dbstruct - #f - #f - (lambda (db) - (let ((res #f)) - (sqlite3:for-each-row - (lambda (val) - (set! res val)) - db - (conc "SELECT " key " FROM runs WHERE id=?;") - run-id) - res)))) - -;; keys list to key1,key2,key3 ... -(define (runs:get-std-run-fields keys remfields) - (let* ((header (append keys remfields)) - (keystr (conc (keys->keystr keys) "," - (string-intersperse remfields ",")))) - (list keystr header))) - -;; make a query (fieldname like 'patt1' OR fieldname -(define (db:patt->like fieldname pattstr #!key (comparator " OR ")) - (let ((patts (if (string? pattstr) - (string-split pattstr ",") - '("%")))) - (string-intersperse (map (lambda (patt) - (let ((wildtype (if (substring-index "%" patt) "LIKE" "GLOB"))) - (conc fieldname " " wildtype " '" patt "'"))) - (if (null? patts) - '("") - patts)) - comparator))) - - -;; register a test run with the db, this accesses the main.db and does NOT -;; use server api -;; -(define (db:register-run dbstruct keyvals runname state status user contour-in) - (let* ((keys (map car keyvals)) - (keystr (keys->keystr keys)) - (contour (or contour-in "")) ;; empty string to force no hierarcy and be backwards compatible. - (comma (if (> (length keys) 0) "," "")) - (andstr (if (> (length keys) 0) " AND " "")) - (valslots (keys->valslots keys)) ;; ?,?,? ... - (allvals (append (list runname state status user contour) (map cadr keyvals))) - (qryvals (append (list runname) (map cadr keyvals))) - (key=?str (string-intersperse (map (lambda (k)(conc k "=?")) keys) " AND "))) - (debug:print 3 *default-log-port* "keys: " keys " allvals: " allvals " keyvals: " keyvals " key=?str is " key=?str) - (debug:print 2 *default-log-port* "NOTE: using target " (string-intersperse (map cadr keyvals) "/") " for this run") - (if (and runname (null? (filter (lambda (x)(not x)) keyvals))) ;; there must be a better way to "apply and" - (db:with-db - dbstruct #f #f - (lambda (db) - (let ((res #f)) - (apply sqlite3:execute db (conc "INSERT OR IGNORE INTO runs (runname,state,status,owner,event_time,contour" comma keystr ") VALUES (?,?,?,?,strftime('%s','now'),?" comma valslots ");") - allvals) - (apply sqlite3:for-each-row - (lambda (id) - (set! res id)) - db - (let ((qry (conc "SELECT id FROM runs WHERE (runname=? " andstr key=?str ");"))) - qry) - qryvals) - (sqlite3:execute db "UPDATE runs SET state=?,status=?,event_time=strftime('%s','now') WHERE id=? AND state='deleted';" state status res) - res))) - (begin - (debug:print-error 0 *default-log-port* "Called without all necessary keys") - #f)))) - -;; replace header and keystr with a call to runs:get-std-run-fields -;; -;; keypatts: ( (KEY1 "abc%def")(KEY2 "%") ) -;; runpatts: patt1,patt2 ... -;; -(define (db:get-runs dbstruct runpatt count offset keypatts) - (let* ((res '()) - (keys (db:get-keys dbstruct)) - (runpattstr (db:patt->like "runname" runpatt)) - (remfields (list "id" "runname" "state" "status" "owner" "event_time")) - (header (append keys remfields)) - (keystr (conc (keys->keystr keys) "," - (string-intersperse remfields ","))) - (qrystr (conc "SELECT " keystr " FROM runs WHERE (" runpattstr ") " ;; runname LIKE ? " - ;; Generate: " AND x LIKE 'keypatt' ..." - (if (null? keypatts) "" - (conc " AND " - (string-join - (map (lambda (keypatt) - (let ((key (car keypatt)) - (patt (cadr keypatt))) - (db:patt->like key patt))) - keypatts) - " AND "))) - " AND state != 'deleted' ORDER BY event_time DESC " - (if (number? count) - (conc " LIMIT " count) - "") - (if (number? offset) - (conc " OFFSET " offset) - "")))) - (debug:print-info 11 *default-log-port* "db:get-runs START qrystr: " qrystr " keypatts: " keypatts " offset: " offset " limit: " count) - (db:with-db dbstruct #f #f - (lambda (db) - (sqlite3:for-each-row - (lambda (a . x) - (set! res (cons (apply vector a x) res))) - db - qrystr - ))) - (debug:print-info 11 *default-log-port* "db:get-runs END qrystr: " qrystr " keypatts: " keypatts " offset: " offset " limit: " count) - (vector header res))) - - -(define-record simple-run target id runname state status owner event_time) -(define-record-printer (simple-run x out) - (fprintf out "#,(simple-run ~S ~S ~S ~S)" - (simple-run-target x) (simple-run-id x) (simple-run-runname x) (time->string (seconds->local-time (simple-run-event_time x) )))) - -;; simple get-runs -;; -(define (db:simple-get-runs dbstruct runpatt count offset target last-update) - (let* ((res '()) - (keys (db:get-keys dbstruct)) - (runpattstr (db:patt->like "runname" runpatt)) - (remfields (list "id" "runname" "state" "status" "owner" "event_time")) - (targstr (string-intersperse keys "||'/'||")) - (keystr (conc targstr " AS target," - (string-intersperse remfields ","))) - (qrystr (conc "SELECT " keystr " FROM runs WHERE (" runpattstr ") " ;; runname LIKE ? " - ;; Generate: " AND x LIKE 'keypatt' ..." - " AND target LIKE '" target "'" - " AND state != 'deleted' " - (if (number? last-update) - (conc " AND last_update >= " last-update) - "") - " ORDER BY event_time DESC " - (if (number? count) - (conc " LIMIT " count) - "") - (if (number? offset) - (conc " OFFSET " offset) - ""))) - ) - (debug:print-info 11 *default-log-port* "db:get-runs START qrystr: " qrystr " target: " target " offset: " offset " limit: " count) - (db:with-db dbstruct #f #f - (lambda (db) - (sqlite3:for-each-row - (lambda (target id runname state status owner event_time) - (set! res (cons (make-simple-run target id runname state status owner event_time) res))) - db - qrystr - ))) - (debug:print-info 11 *default-log-port* "db:get-runs END qrystr: " qrystr " target: " target " offset: " offset " limit: " count) - res)) - -;; TODO: Switch this to use max(update_time) from each run db? Then if using a server there is no disk traffic (using inmem db) -;; -(define (db:get-changed-run-ids since-time) - (let* ((dbdir (db:dbfile-path)) ;; (configf:lookup *configdat* "setup" "dbdir")) - (alldbs (glob (conc dbdir "/[0-9]*.db"))) - (changed (filter (lambda (dbfile) - (> (file-modification-time dbfile) since-time)) - alldbs))) - (delete-duplicates - (map (lambda (dbfile) - (let* ((res (string-match ".*\\/(\\d)*\\.db" dbfile))) - (if res - (string->number (cadr res)) - (begin - (debug:print 2 *default-log-port* "WARNING: Failed to process " dbfile " for run-id") - 0)))) - changed)))) - -;; Get all targets from the db -;; -(define (db:get-targets dbstruct) - (let* ((res '()) - (keys (db:get-keys dbstruct)) - (header keys) ;; (map key:get-fieldname keys)) - (keystr (keys->keystr keys)) - (qrystr (conc "SELECT " keystr " FROM runs WHERE state != 'deleted';")) - (seen (make-hash-table))) - (db:with-db - dbstruct - #f - #f - (lambda (db) - (sqlite3:for-each-row - (lambda (a . x) - (let ((targ (cons a x))) - (if (not (hash-table-ref/default seen targ #f)) - (begin - (hash-table-set! seen targ #t) - (set! res (cons (apply vector targ) res)))))) - db - qrystr) - (debug:print-info 11 *default-log-port* "db:get-targets END qrystr: " qrystr ) - (vector header res))))) - -;; just get count of runs -(define (db:get-num-runs dbstruct runpatt) - (db:with-db - dbstruct - #f - #f - (lambda (db) - (let ((numruns 0)) - (debug:print-info 11 *default-log-port* "db:get-num-runs START " runpatt) - (sqlite3:for-each-row - (lambda (count) - (set! numruns count)) - db - "SELECT COUNT(id) FROM runs WHERE runname LIKE ? AND state != 'deleted';" runpatt) - (debug:print-info 11 *default-log-port* "db:get-num-runs END " runpatt) - numruns)))) - -;; just get count of runs -(define (db:get-runs-cnt-by-patt dbstruct runpatt targetpatt keys) - (db:with-db - dbstruct - #f - #f - (lambda (db) - (let ((numruns 0) - (qry-str #f) - (key-patt "") - (keyvals (if targetpatt (keys:target->keyval keys targetpatt) '()))) - - (for-each (lambda (keyval) - (let* ((key (car keyval)) - (patt (cadr keyval)) - (fulkey (conc ":" key)) - (wildtype (if (substring-index "%" patt) "like" "glob"))) - - (if patt - (set! key-patt (conc key-patt " AND " key " " wildtype " '" patt "'")) - (begin - (debug:print-error 0 *default-log-port* "searching for runs with no pattern set for " fulkey) - (exit 6))))) - keyvals) - ;(print runpatt " -- " key-patt) - (set! qry-str (conc "SELECT COUNT(id) FROM runs WHERE state != 'deleted' AND runname like '" runpatt "'" key-patt)) - ;(print qry-str ) - - (sqlite3:for-each-row - (lambda (count) - (set! numruns count)) - db - qry-str) - (debug:print-info 11 *default-log-port* "db:get-num-runs END " runpatt) - numruns)))) - - -;; (sqlite3#fold-row proc3670 init3671 db-or-stmt3672 . params3673)> -;; -(define (db:get-raw-run-stats dbstruct run-id) - (db:with-db - dbstruct - run-id - #f - (lambda (db) - (sqlite3:fold-row - (lambda (res state status count) - (cons (list state status count) res)) - '() - db - "SELECT state,status,count(id) AS count FROM tests WHERE run_id=? AND NOT(uname='n/a' AND item_path='') GROUP BY state,status;" - run-id)))) - -;; Update run_stats for given run_id -;; input data is a list (state status count) -;; -(define (db:update-run-stats dbstruct run-id stats) - ;; (mutex-lock! *db-transaction-mutex*) - (db:with-db - dbstruct - #f - #f - - (lambda (db) - ;; remove previous data - - (let* ((stmt1 (sqlite3:prepare db "DELETE FROM run_stats WHERE run_id=? AND state=? AND status=?;")) - (stmt2 (sqlite3:prepare db "INSERT INTO run_stats (run_id,state,status,count) VALUES (?,?,?,?);")) - (res - (sqlite3:with-transaction - db - (lambda () - (for-each - (lambda (dat) - (sqlite3:execute stmt1 run-id (car dat)(cadr dat)) - (apply sqlite3:execute stmt2 run-id dat)) - stats))))) - (sqlite3:finalize! stmt1) - (sqlite3:finalize! stmt2) - ;; (mutex-unlock! *db-transaction-mutex*) - res)))) - -(define (db:get-main-run-stats dbstruct run-id) - (db:with-db - dbstruct - #f ;; this data comes from main - #f - (lambda (db) - (sqlite3:fold-row - (lambda (res state status count) - (cons (list state status count) res)) - '() - db - "SELECT state,status,count FROM run_stats WHERE run_id=? AND run_id IN (SELECT id FROM runs WHERE state NOT IN ('DELETED','deleted'));" - run-id)))) - -(define (db:print-current-query-stats) - ;; generate stats from *db-api-call-time* - (let ((ordered-keys (sort (hash-table-keys *db-api-call-time*) - (lambda (a b) - (let ((sum-a (common:sum (hash-table-ref *db-api-call-time* a))) - (sum-b (common:sum (hash-table-ref *db-api-call-time* b)))) - (> sum-a sum-b))))) - (total 0)) - (for-each - (lambda (cmd-key) - (let* ((dat (hash-table-ref *db-api-call-time* cmd-key)) - (num (length dat)) - (avg (if (> num 0) - (/ (common:sum dat)(length dat))))) - (set! total (+ total num)) - (debug:print-info 0 *default-log-port* cmd-key "\tavg: " avg " max: " (common:max dat) " min: " (common:min-max < dat) " num: " (length dat)))) - ordered-keys) - (debug:print-info 0 *default-log-port* "TOTAL: " total " api calls since start."))) - -(define (db:get-all-run-ids dbstruct) - (db:with-db - dbstruct - #f - #f - (lambda (db) - (let ((run-ids '())) - (sqlite3:for-each-row - (lambda (run-id) - (set! run-ids (cons run-id run-ids))) - db - "SELECT id FROM runs WHERE state != 'deleted' ORDER BY event_time DESC;") - (reverse run-ids))))) - -;; get some basic run stats -;; -;; data structure: -;; -;; ( (runname (( state count ) ... )) -;; ( ... -;; -(define (db:get-run-stats dbstruct) - (let* ((totals (make-hash-table)) - (curr (make-hash-table)) - (res '()) - (runs-info '())) - ;; First get all the runname/run-ids - (db:with-db - dbstruct #f #f - (lambda (db) - (sqlite3:for-each-row - (lambda (run-id runname) - (set! runs-info (cons (list run-id runname) runs-info))) - db - "SELECT id,runname FROM runs WHERE state != 'deleted' ORDER BY event_time DESC;"))) ;; If you change this to the more logical ASC please adjust calls to db:get-run-stats - ;; for each run get stats data - (for-each - (lambda (run-info) - ;; get the net state/status counts for this run - (let* ((run-id (car run-info)) - (run-name (cadr run-info))) - (db:with-db - dbstruct - run-id - #f - (lambda (db) - (sqlite3:for-each-row - (lambda (state status count) - (let ((netstate (if (equal? state "COMPLETED") status state))) - (if (string? netstate) - (begin - (hash-table-set! totals netstate (+ (hash-table-ref/default totals netstate 0) count)) - (hash-table-set! curr netstate (+ (hash-table-ref/default curr netstate 0) count)))))) - db - "SELECT state,status,count(id) FROM tests AS t WHERE run_id=? GROUP BY state,status ORDER BY state,status DESC;" - run-id) - ;; add the per run counts to res - (for-each (lambda (state) - (set! res (cons (list run-name state (hash-table-ref curr state)) res))) - (sort (hash-table-keys curr) string>=)) - (set! curr (make-hash-table)))))) - runs-info) - (for-each (lambda (state) - (set! res (cons (list "Totals" state (hash-table-ref totals state)) res))) - (sort (hash-table-keys totals) string>=)) - res)) - -;; db:get-runs-by-patt -;; get runs by list of criteria -;; register a test run with the db -;; -;; Use: (db:get-value-by-header (db:get-header runinfo)(db:get-rows runinfo)) -;; to extract info from the structure returned -;; -(define (db:get-runs-by-patt dbstruct keys runnamepatt targpatt offset limit fields last-update sort-order ) ;; test-name) - (let* ((tmp (runs:get-std-run-fields keys (or fields '("id" "runname" "state" "status" "owner" "event_time")))) - (keystr (car tmp)) - (header (cadr tmp)) - (key-patt "") - (runwildtype (if (substring-index "%" runnamepatt) "like" "glob")) - (qry-str #f) - (keyvals (if targpatt (keys:target->keyval keys targpatt) '()))) - (for-each (lambda (keyval) - (let* ((key (car keyval)) - (patt (cadr keyval)) - (fulkey (conc ":" key)) - (wildtype (if (substring-index "%" patt) "like" "glob"))) - (if patt - (set! key-patt (conc key-patt " AND " key " " wildtype " '" patt "'")) - (begin - (debug:print-error 0 *default-log-port* "searching for runs with no pattern set for " fulkey) - (exit 6))))) - keyvals) - (set! qry-str (conc "SELECT " keystr " FROM runs WHERE state != 'deleted' AND runname " runwildtype " ? " key-patt - (if last-update - (conc " AND last_update >= " last-update " ") - " ") - " ORDER BY event_time " sort-order " " - (if limit (conc " LIMIT " limit) "") - (if offset (conc " OFFSET " offset) "") - ";")) - (debug:print-info 4 *default-log-port* "runs:get-runs-by-patt qry=" qry-str " " runnamepatt) - ;(print "runs:get-runs-by-patt qry=" qry-str " " runnamepatt) - - (vector header - (reverse - (db:with-db dbstruct #f #f ;; reads db, does not write to it. - (lambda (db) - (sqlite3:fold-row - (lambda (res . r) - (cons (list->vector r) res)) - '() - db - qry-str - runnamepatt))))))) - -;; use (get-value-by-header (db:get-header runinfo)(db:get-rows runinfo)) -;; NOTE: Does NOT return a list of rows (or one row) for the first slot of the vector -;; this is inconsistent with get-runs but it makes some sense. -;; -(define (db:get-run-info dbstruct run-id) - ;;(if (hash-table-ref/default *run-info-cache* run-id #f) - ;; (hash-table-ref *run-info-cache* run-id) - (let* ((res (vector #f #f #f #f)) - (keys (db:get-keys dbstruct)) - (remfields (list "id" "runname" "state" "status" "owner" "event_time" "comment" "fail_count" "pass_count" "contour" "last_update")) ;; "area_id")) - (header (append keys remfields)) - (keystr (conc (keys->keystr keys) "," - (string-intersperse remfields ",")))) - (debug:print-info 11 *default-log-port* "db:get-run-info run-id: " run-id " header: " header " keystr: " keystr) - - (db:with-db - dbstruct #f #f - (lambda (db) - (sqlite3:for-each-row - (lambda (a . x) - (set! res (apply vector a x))) - db - (conc "SELECT " keystr " FROM runs WHERE id=?;") - run-id))) - (debug:print-info 11 *default-log-port* "db:get-run-info run-id: " run-id " header: " header " keystr: " keystr) - (let ((finalres (vector header res))) - ;; (hash-table-set! *run-info-cache* run-id finalres) - finalres))) - -(define (db:set-comment-for-run dbstruct run-id comment) - (db:with-db - dbstruct #f #f - (lambda (db) - (sqlite3:execute db "UPDATE runs SET comment=? WHERE id=?;" comment ;; (sdb:qry 'getid comment) - run-id)))) - -;; does not (obviously!) removed dependent data. But why not!!? -(define (db:delete-run dbstruct run-id) - (db:with-db - dbstruct #f #f - (lambda (db) - (sqlite3:with-transaction - db - (lambda () - (sqlite3:execute db "DELETE FROM test_steps WHERE test_id IN (SELECT id FROM tests WHERE run_id=?);" run-id) - (sqlite3:execute db "DELETE FROM test_data WHERE test_id IN (SELECT id FROM tests WHERE run_id=?);" run-id) - (sqlite3:execute db "UPDATE tests SET state='DELETED',comment='' WHERE run_id=?;" run-id) - ;; (db:delay-if-busy dbdat) - (sqlite3:execute db "UPDATE runs SET state='deleted',comment='' WHERE id=?;" run-id)))))) - -(define (db:update-run-event_time dbstruct run-id) - (db:with-db - dbstruct #f #t - (lambda (db) - (sqlite3:execute db "UPDATE runs SET event_time=strftime('%s','now') WHERE id=?;" run-id)))) - -(define (db:lock/unlock-run dbstruct run-id lock unlock user) - (db:with-db - dbstruct #f #t - (lambda (db) - (let ((newlockval (if lock "locked" - (if unlock - "unlocked" - "locked")))) ;; semi-failsafe - (sqlite3:execute db "UPDATE runs SET state=? WHERE id=?;" newlockval run-id) - (sqlite3:execute db "INSERT INTO access_log (user,accessed,args) VALUES(?,strftime('%s','now'),?);" - user (conc newlockval " " run-id)) - (debug:print-info 1 *default-log-port* "" newlockval " run number " run-id))))) - -(define (db:set-run-status dbstruct run-id status msg) - (db:with-db - dbstruct #f #f - (lambda (db) - (if msg - (sqlite3:execute db "UPDATE runs SET status=?,comment=? WHERE id=?;" status msg run-id) - (sqlite3:execute db "UPDATE runs SET status=? WHERE id=?;" status run-id))))) - -(define (db:set-run-state-status dbstruct run-id state status ) - (db:with-db - dbstruct #f #f - (lambda (db) - (sqlite3:execute db "UPDATE runs SET status=?,state=? WHERE id=?;" status state run-id)))) - - - -(define (db:get-run-status dbstruct run-id) - (let ((res "n/a")) - (db:with-db - dbstruct #f #f - (lambda (db) - (sqlite3:for-each-row - (lambda (status) - (set! res status)) - db - "SELECT status FROM runs WHERE id=?;" - run-id) - res)))) - -(define (db:get-run-state dbstruct run-id) - (let ((res "n/a")) - (db:with-db - dbstruct #f #f - (lambda (db) - (sqlite3:for-each-row - (lambda (status) - (set! res status)) - db - "SELECT state FROM runs WHERE id=?;" - run-id) - res)))) - - -;;====================================================================== -;; K E Y S -;;====================================================================== - -;; get key val pairs for a given run-id -;; ( (FIELDNAME1 keyval1) (FIELDNAME2 keyval2) ... ) -(define (db:get-key-val-pairs dbstruct run-id) - (let* ((keys (db:get-keys dbstruct)) - (res '())) - (db:with-db - dbstruct #f #f - (lambda (db) - (for-each - (lambda (key) - (let ((qry (conc "SELECT " key " FROM runs WHERE id=?;"))) - (sqlite3:for-each-row - (lambda (key-val) - (set! res (cons (list key (if (string? key-val) key-val "")) res))) ;; replace non-string bad values with empty string to prevent crashes. This scenario can happen when Megatest is killed on updating the db - db qry run-id))) - keys))) - (reverse res))) - -;; get key vals for a given run-id -(define (db:get-key-vals dbstruct run-id) - (let* ((keys (db:get-keys dbstruct)) - (res '())) - (db:with-db - dbstruct #f #f - (lambda (db) - (for-each - (lambda (key) - (let ((qry (conc "SELECT " key " FROM runs WHERE id=?;"))) - ;; (db:delay-if-busy dbdat) - (sqlite3:for-each-row - (lambda (key-val) - (set! res (cons (if (string? key-val) key-val "") res))) ;; check that the key-val is a string for cases where a crash injected bad data in the megatest.db - db qry run-id))) - keys))) - (let ((final-res (reverse res))) - (hash-table-set! *keyvals* run-id final-res) - final-res))) - -;; The target is keyval1/keyval2..., cached in *target* as it is used often -(define (db:get-target dbstruct run-id) - (let* ((keyvals (db:get-key-vals dbstruct run-id)) - (thekey (string-intersperse (map (lambda (x)(if x x "-na-")) keyvals) "/"))) - thekey)) - -;; Get run-ids for runs with same target but different runnames and NOT run-id -;; -(define (db:get-prev-run-ids dbstruct run-id) - (let* ((keyvals (db:get-key-val-pairs dbstruct run-id)) - (kvalues (map cadr keyvals)) - (keys (db:get-keys dbstruct)) - (qrystr (string-intersperse (map (lambda (x)(conc x "=?")) keys) " AND "))) - (let ((prev-run-ids '())) - (if (null? keyvals) - '() - (begin - (db:with-db dbstruct #f #f ;; #f means work with the zeroth db - i.e. the runs db - (lambda (db) - (apply sqlite3:for-each-row - (lambda (id) - (set! prev-run-ids (cons id prev-run-ids))) - db - (conc "SELECT id FROM runs WHERE " qrystr " AND state != 'deleted' AND id != ?;") - (append kvalues (list run-id))))) - prev-run-ids))))) - -;;====================================================================== -;; T E S T S -;;====================================================================== - -;; states and statuses are lists, turn them into ("PASS","FAIL"...) and use NOT IN -;; i.e. these lists define what to NOT show. -;; states and statuses are required to be lists, empty is ok -;; not-in #t = above behaviour, #f = must match -;; mode: -;; 'dashboard - use state = 'COMPLETED' AND status in ( statuses ) OR state in ( states ) -;; -(define (db:get-tests-for-run dbstruct run-id testpatt states statuses offset limit not-in sort-by sort-order qryvals last-update mode) - (let* ((qryvalstr (case qryvals - ((shortlist) "id,run_id,testname,item_path,state,status") - ((#f) db:test-record-qry-selector) ;; "id,run_id,testname,state,status,event_time,host,cpuload,diskfree,uname,rundir,item_path,run_duration,final_logf,comment") - (else qryvals))) - (res '()) - ;; if states or statuses are null then assume match all when not-in is false - (states-qry (if (null? states) - #f - (conc " state " - (if (eq? mode 'dashboard) - " IN ('" - (if not-in - " NOT IN ('" - " IN ('")) - (string-intersperse states "','") - "')"))) - (statuses-qry (if (null? statuses) - #f - (conc " status " - (if (eq? mode 'dashboard) - " IN ('" - (if not-in - " NOT IN ('" - " IN ('") ) - (string-intersperse statuses "','") - "')"))) - (interim-qry (conc " AND " (if not-in "NOT " "") "( state='COMPLETED' " (if statuses-qry (conc " AND " statuses-qry " ) ") " ) ") - (if states-qry - (conc (if not-in " AND " " OR ") states-qry ) ;; " ) ") - ""))) - (states-statuses-qry - (cond - ((and states-qry statuses-qry) - (case mode - ((dashboard) - (if not-in - (conc " AND (state='COMPLETED' AND status NOT IN ('" (string-intersperse statuses "','") "')) " - " OR (state != 'COMPLETED' AND state NOT IN ('" (string-intersperse states "','") "')) ") - (conc " AND (state='COMPLETED' AND status IN ('" (string-intersperse statuses "','") "')) " - " OR (state NOT IN ('COMPLETED','DELETED') AND state IN ('" (string-intersperse states "','") "')) "))) - (else (conc " AND ( " states-qry " AND " statuses-qry " ) ")))) - (states-qry - (case mode - ((dashboard) (conc " AND " (if not-in "NOT " "") " state IN ('" (string-intersperse states "','") "') ")) ;; interim-qry) - (else (conc " AND " states-qry)))) - (statuses-qry - (case mode - ((dashboard) (conc " AND " (if not-in "NOT " "") " status IN ('" (string-intersperse statuses "','") "') ")) ;; interim-qry) - (else (conc " AND " statuses-qry)))) - (else ""))) - (tests-match-qry (tests:match->sqlqry testpatt)) - (qry (conc "SELECT " qryvalstr - (if run-id - " FROM tests WHERE run_id=? " - " FROM tests WHERE ? > 0 ") ;; should work? - (if last-update " " " AND state != 'DELETED' ") ;; if using last-update we want deleted tests? - states-statuses-qry - (if tests-match-qry (conc " AND (" tests-match-qry ") ") "") - (if last-update (conc " AND last_update >= " last-update " ") "") - (case sort-by - ((rundir) " ORDER BY length(rundir) ") - ((testname) (conc " ORDER BY testname " (if sort-order (conc sort-order ",") "") " item_path ")) - ((statestatus) (conc " ORDER BY state " (if sort-order (conc sort-order ",") "") " status ")) - ((event_time) " ORDER BY event_time ") - (else (if (string? sort-by) - (conc " ORDER BY " sort-by " ") - " "))) - (if sort-order sort-order " ") - (if limit (conc " LIMIT " limit) " ") - (if offset (conc " OFFSET " offset) " ") - ";" - ))) - (debug:print-info 8 *default-log-port* "db:get-tests-for-run run-id=" run-id ", qry=" qry) - (let* ((res (db:with-db dbstruct run-id #f - (lambda (db) - ;; (let* ((stmth (db:get-cache-stmth dbstruct db qry))) ;; due to use of last-update we can't efficiently cache this query - (reverse - (sqlite3:fold-row - (lambda (res . row) - ;; id run-id testname state status event-time host cpuload - ;; diskfree uname rundir item-path run-duration final-logf comment) - (cons (list->vector row) res)) - '() - db qry ;; stmth - (or run-id 1) ;; 1 > 0 , for the case where we are seeking tests matching criteral for all runs - )))))) - (case qryvals - ((shortlist)(map db:test-short-record->norm res)) - ((#f) res) - (else res))))) - -(define (db:test-short-record->norm inrec) - ;; "id,run_id,testname,item_path,state,status" - ;; "id,run_id,testname,state,status,event_time,host,cpuload,diskfree,uname,rundir,item_path,run_duration,final_logf,comment - (vector (vector-ref inrec 0) ;; id - (vector-ref inrec 1) ;; run_id - (vector-ref inrec 2) ;; testname - (vector-ref inrec 4) ;; state - (vector-ref inrec 5) ;; status - -1 "" -1 -1 "" "-" - (vector-ref inrec 3) ;; item-path - -1 "-" "-")) - -;; -;; 1. cache tests-match-qry -;; 2. compile qry and store in hash -;; 3. convert for-each-row to fold -;; -;; (define (db:get-tests-for-run-state-status dbstruct run-id testpatt) -;; (db:with-db -;; dbstruct run-id #f -;; (lambda (db) -;; (let* ((res '()) -;; (stmt-cache (dbr:dbstruct-stmt-cache dbstruct)) -;; (stmth (let* ((sh (db:hoh-get stmt-cache db testpatt))) -;; (or sh -;; (let* ((tests-match-qry (tests:match->sqlqry testpatt)) -;; (qry (conc "SELECT id,testname,item_path,state,status FROM tests WHERE run_id=? " -;; (if tests-match-qry (conc " AND (" tests-match-qry ") ") ""))) -;; (newsh (sqlite3:prepare db qry))) -;; (debug:print-info 8 *default-log-port* "db:get-tests-for-run qry=" qry) -;; (db:hoh-set! stmt-cache db testpatt newsh) -;; newsh))))) -;; (reverse -;; (sqlite3:fold-row -;; (lambda (res id testname item-path state status) -;; ;; id,run_id,testname,state,status,event_time,host,cpuload,diskfree,uname,rundir,item_path,run_duration,final_logf,comment -;; (cons (vector id run-id testname state status -1 "" -1 -1 "" "-" item-path -1 "-" "-") res)) -;; '() -;; stmth -;; run-id)))))) - -(define (db:get-tests-for-run-state-status dbstruct run-id testpatt #!optional (last-update 0)) - (let* ((res '()) - (tests-match-qry (tests:match->sqlqry testpatt)) - (qry (conc "SELECT id,testname,item_path,state,status,event_time,run_duration FROM tests WHERE run_id=? " - " AND last_update > ? " - (if tests-match-qry (conc " AND (" tests-match-qry ") ") "") - ))) - (debug:print-info 8 *default-log-port* "db:get-tests-for-run qry=" qry) - (db:with-db dbstruct run-id #f - (lambda (db) - (sqlite3:fold-row - (lambda (res id testname item-path state status event-time run-duration) - ;; id,run_id,testname,state,status,event_time,host,cpuload,diskfree,uname,rundir,item_path,run_duration,final_logf,comment - (cons (vector id run-id testname state status event-time "" -1 -1 "" "-" item-path run-duration "-" "-") res)) - '() - db - qry - run-id - (or last-update 0)))))) - -(define (db:get-testinfo-state-status dbstruct run-id test-id) - (let ((res #f)) - (db:with-db dbstruct run-id #f - (lambda (db) - (sqlite3:for-each-row - (lambda (run-id testname item-path state status) - ;; id,run_id,testname,state,status,event_time,host,cpuload,diskfree,uname,rundir,item_path,run_duration,final_logf,comment - (set! res (vector test-id run-id testname state status -1 "" -1 -1 "" "-" item-path -1 "-" "-"))) - db - "SELECT run_id,testname,item_path,state,status FROM tests WHERE id=?;" - test-id))) - res)) - -;; get a useful subset of the tests data (used in dashboard -;; use db:mintest-get-{id ,run_id,testname ...} -;; -(define (db:get-tests-for-run-mindata dbstruct run-id testpatt states statuses not-in) - (db:get-tests-for-run dbstruct run-id testpatt states statuses #f #f not-in #f #f "id,run_id,testname,state,status,event_time,item_path" 0 #f)) - -;; do not use. -;; -(define (db:get-tests-for-runs dbstruct run-ids testpatt states statuses #!key (not-in #f)(qryvals #f)) - ;; (db:delay-if-busy) - (let ((res '())) - (for-each - (lambda (run-id) - (set! res (append - res - (db:get-tests-for-run dbstruct run-id testpatt states statuses #f #f not-in #f #f qryvals #f 'normal)))) - (if run-ids - run-ids - (db:get-all-run-ids dbstruct))) - res)) - -;; Convert calling routines to get list of run-ids and loop, do not use the get-tests-for-runs -;; - -(define (db:delete-test-records dbstruct run-id test-id) - (db:general-call dbstruct 'delete-test-step-records (list test-id)) - (db:general-call dbstruct 'delete-test-data-records (list test-id)) - (db:with-db - dbstruct #f #f - (lambda (db) - (sqlite3:execute db "UPDATE tests SET state='DELETED',status='n/a',comment='' WHERE id=?;" test-id)))) - -;; -(define (db:delete-old-deleted-test-records dbstruct) - (let ((targtime (- (current-seconds) - (or (configf:lookup-number *configdat* "setup" "keep-deleted-records") - (* 30 24 60 60))))) ;; one month in the past - (db:with-db - dbstruct - 0 - #t - (lambda (db) - (sqlite3:with-transaction - db - (lambda () - (sqlite3:execute db "DELETE FROM test_steps WHERE test_id IN (SELECT id FROM tests WHERE state='DELETED' AND event_timenumber fieldname fields) - (if (null? fields) - #f - (let loop ((hed (car fields)) - (tal (cdr fields)) - (indx 0)) - (if (equal? fieldname hed) - indx - (if (null? tal) - #f - (loop (car tal)(cdr tal)(+ indx 1))))))) - -(define db:test-record-qry-selector (string-intersperse db:test-record-fields ",")) - -(define (db:update-tesdata-on-repilcate-db dbstruct old-lt new-lt) - (db:with-db - dbstruct #f #f - (lambda (db) - (sqlite3:execute db "UPDATE tests SET rundir= replace(rundir,?,?), shortdir=replace(shortdir,?,?);" - old-lt new-lt old-lt new-lt)))) - -;; NOTE: Use db:test-get* to access records -;; NOTE: This needs rundir decoding? Decide, decode here or where used? For the moment decode where used. -(define (db:get-all-tests-info-by-run-id dbstruct run-id) - (let* ((res '())) - (db:with-db - dbstruct #f #f - (lambda (db) - (sqlite3:for-each-row - (lambda (id run-id testname state status event-time host cpuload diskfree uname rundir item-path run-duration final-logf comment shortdir attemptnum archived) - ;; 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 - (set! res (cons (vector id run-id testname state status event-time host cpuload diskfree uname rundir item-path run-duration final-logf comment shortdir attemptnum archived) - res))) - db - (conc "SELECT " db:test-record-qry-selector " FROM tests WHERE state != 'DELETED' AND run_id=?;") - run-id))) - res)) - -(define (db:replace-test-records dbstruct run-id testrecs) - (db:with-db dbstruct run-id #t - (lambda (db) - (let* ((qmarks (string-intersperse (make-list (length db:test-record-fields) "?") ",")) - (qrystr (conc "INSERT OR REPLACE INTO tests (" db:test-record-qry-selector ") VALUES (" qmarks ") WHERE run_id=?;")) - (qry (sqlite3:prepare db qrystr))) - (debug:print 0 *default-log-port* "INFO: migrating test records for run with id " run-id) - (sqlite3:with-transaction - db - (lambda () - (for-each - (lambda (rec) - ;; (debug:print 0 *default-log-port* "INFO: Inserting values: " (string-intersperse (map conc (vector->list rec)) ",") "\n") - (apply sqlite3:execute qry (append (vector->list rec)(list run-id)))) - testrecs))) - (sqlite3:finalize! qry))))) - -;; map a test-id into the proper range -;; -(define (db:adj-test-id mtdb min-test-id test-id) - (if (>= test-id min-test-id) - test-id - (let loop ((new-id min-test-id)) - (let ((test-id-found #f)) - (sqlite3:for-each-row - (lambda (id) - (set! test-id-found id)) - (db:dbdat-get-db mtdb) - "SELECT id FROM tests WHERE id=?;" - new-id) - ;; if test-id-found then need to try again - (if test-id-found - (loop (+ new-id 1)) - (begin - (debug:print-info 0 *default-log-port* "New test id " new-id " selected for test with id " test-id) - (sqlite3:execute mtdb "UPDATE tests SET id=? WHERE id=?;" new-id test-id))))))) - -;; move test ids into the 30k * run_id range -;; -(define (db:prep-megatest.db-adj-test-ids mtdb run-id testrecs) - (debug:print-info 0 *default-log-port* "Adjusting test ids in megatest.db for run " run-id) - (let ((min-test-id (* run-id 30000))) - (for-each - (lambda (testrec) - (let* ((test-id (vector-ref testrec (db:field->number "id" db:test-record-fields)))) - (db:adj-test-id (db:dbdat-get-db mtdb) min-test-id test-id))) - testrecs))) - -;; 1. move test ids into the 30k * run_id range -;; 2. move step ids into the 30k * run_id range -;; -(define (db:prep-megatest.db-for-migration mtdb) - (let* ((run-ids (db:get-all-run-ids mtdb))) - (for-each - (lambda (run-id) - (let ((testrecs (db:get-all-tests-info-by-run-id mtdb run-id))) - (db:prep-megatest.db-adj-test-ids (db:dbdat-get-db mtdb) run-id testrecs))) - run-ids))) - -;; Get test data using test_id, run-id is not used -;; -(define (db:get-test-info-by-id dbstruct run-id test-id) - (db:with-db - dbstruct - #f ;; run-id - #f - (lambda (db) - (let ((res #f)) - (sqlite3:for-each-row ;; attemptnum added to hold pid of top process (not Megatest) controlling a test - (lambda (id run-id testname state status event-time host cpuload diskfree uname rundir-id item-path run_duration final-logf-id comment short-dir-id attemptnum archived last-update) - ;; 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 - (set! res (vector id run-id testname state status event-time host cpuload diskfree uname rundir-id item-path run_duration final-logf-id comment short-dir-id attemptnum archived last-update))) - db - (conc "SELECT " db:test-record-qry-selector " FROM tests WHERE id=?;") - test-id) - res)))) - -;; Use db:test-get* to access -;; Get test data using test_ids. NB// Only works within a single run!! -;; -(define (db:get-test-info-by-ids dbstruct run-id test-ids) - (db:with-db - dbstruct - run-id - #f - (lambda (db) - (let ((res '())) - (sqlite3:for-each-row - (lambda (a . b) - ;; 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 - (set! res (cons (apply vector a b) res))) - db - (conc "SELECT " db:test-record-qry-selector " FROM tests WHERE id in (" - (string-intersperse (map conc test-ids) ",") ");")) - res)))) - -(define (db:get-test-info dbstruct run-id test-name item-path) - (db:with-db - dbstruct - run-id - #f - (lambda (db) - (let ((res #f)) - (sqlite3:for-each-row - (lambda (a . b) - (set! res (apply vector a b))) - db - (conc "SELECT " db:test-record-qry-selector " FROM tests WHERE testname=? AND item_path=? AND run_id=?;") - test-name item-path run-id) - res)))) - -(define (db:test-get-rundir-from-test-id dbstruct run-id test-id) - (db:with-db - dbstruct - run-id - #f - (lambda (db) - (db:first-result-default - db - "SELECT rundir FROM tests WHERE id=?;" - #f ;; default result - test-id)))) - -(define (db:get-test-times dbstruct run-name target) - (let ((res `()) - (qry (conc "select testname, item_path, run_duration, " - (string-join (db:get-keys dbstruct) " || '/' || ") - " as target from tests inner join runs on tests.run_id = runs.id where runs.runname = ? and target = ? ;"))) - (db:with-db - dbstruct - #f ;; this is for the main runs db - #f ;; does not modify db - (lambda (db) - (sqlite3:for-each-row - (lambda (test-name item-path test-time target ) - (set! res (cons (vector test-name item-path test-time) res))) - db - qry - run-name target) - res)))) - -;;====================================================================== -;; S T E P S -;;====================================================================== - -(define (db:teststep-set-status! dbstruct run-id test-id teststep-name state-in status-in comment logfile) - (db:with-db - dbstruct - run-id - #t - (lambda (db) - (sqlite3:execute - db - "INSERT OR REPLACE into test_steps (test_id,stepname,state,status,event_time,comment,logfile) VALUES(?,?,?,?,?,?,?);" - test-id teststep-name state-in status-in (current-seconds) - (if comment comment "") - (if logfile logfile ""))))) - - - -(define (db:delete-steps-for-test! dbstruct run-id test-id) - ;; TODO: figure out why status is the key field rather than state (note: CONSTRAINT test_steps_constraint UNIQUE (test_id,stepname,state) ) - (db:with-db - dbstruct - run-id - #t - (lambda (db) - (sqlite3:execute - db - "UPDATE test_steps set status='DELETED' where test_id=?" ;; and run_id=? !! - run_id not in table (bummer) TODO: get run_id into schema for test_steps - test-id)))) - - -;; db-get-test-steps-for-run -(define (db:get-steps-for-test dbstruct run-id test-id) - (db:with-db - dbstruct - run-id - #f - (lambda (db) - (let* ((res '())) - (sqlite3:for-each-row - (lambda (id test-id stepname state status event-time logfile comment) - (set! res (cons (vector id test-id stepname state status event-time (if (string? logfile) logfile "") comment) res))) - db - "SELECT id,test_id,stepname,state,status,event_time,logfile,comment FROM test_steps WHERE status != 'DELETED' AND test_id=? ORDER BY id ASC;" ;; event_time DESC,id ASC; - test-id) - (reverse res))))) - - (define (db:get-steps-info-by-id dbstruct test-step-id) - (db:with-db - dbstruct - #f - #f - (lambda (db) - (let* ((res (vector #f #f #f #f #f #f #f #f #f))) - (sqlite3:for-each-row - (lambda (id test-id stepname state status event-time logfile comment last-update) - (set! res (vector id test-id stepname state status event-time (if (string? logfile) logfile "") comment last-update))) - db - "SELECT id,test_id,stepname,state,status,event_time,logfile,comment,last_update FROM test_steps WHERE id=? ORDER BY id ASC;" ;; event_time DESC,id ASC; - test-step-id) - res)))) - -(define (db:get-steps-data dbstruct run-id test-id) - (db:with-db - dbstruct - run-id - #f - (lambda (db) - (let ((res '())) - (sqlite3:for-each-row - (lambda (id test-id stepname state status event-time logfile) - (set! res (cons (vector id test-id stepname state status event-time (if (string? logfile) logfile "")) res))) - db - "SELECT id,test_id,stepname,state,status,event_time,logfile FROM test_steps WHERE status != 'DELETED' AND test_id=? ORDER BY id ASC;" ;; event_time DESC,id ASC; - test-id) - (reverse res))))) - -;;====================================================================== -;; T E S T D A T A -;;====================================================================== - -(define (db:get-data-info-by-id dbstruct test-data-id) - (let* ((stmt "SELECT id,test_id, category, variable, value, expected, tol, units, comment, status, type, last_update FROM test_data WHERE id=? ORDER BY id ASC;")) ;; event_time DESC,id ASC; - (db:with-db - dbstruct - #f - #f - (lambda (db) - (let* ((stmth (db:get-cache-stmth dbstruct db stmt)) - (res (sqlite3:fold-row - (lambda (res id test-id category variable value expected tol units comment status type last-update) - (vector id test-id category variable value expected tol units comment status type last-update)) - (vector #f #f #f #f #f #f #f #f #f #f #f #f) - stmth - test-data-id))) - res))))) - -;; WARNING: Do NOT call this for the parent test on an iterated test -;; Roll up test_data pass/fail results -;; look at the test_data status field, -;; if all are pass (any case) and the test status is PASS or NULL or '' then set test status to PASS. -;; if one or more are fail (any case) then set test status to PASS, non "pass" or "fail" are ignored -(define (db:test-data-rollup dbstruct run-id test-id status) - (let* ((fail-count 0) - (pass-count 0)) - (db:with-db - dbstruct #f #f - (lambda (db) - (sqlite3:for-each-row - (lambda (fcount pcount) - (set! fail-count fcount) - (set! pass-count pcount)) - db - "SELECT (SELECT count(id) FROM test_data WHERE test_id=? AND status like 'fail') AS fail_count, - (SELECT count(id) FROM test_data WHERE test_id=? AND status like 'pass') AS pass_count;" - test-id test-id) - ;; Now rollup the counts to the central megatest.db - (db:general-call dbstruct 'pass-fail-counts (list pass-count fail-count test-id)) - ;; if the test is not FAIL then set status based on the fail and pass counts. - (db:general-call dbstruct 'test_data-pf-rollup (list test-id test-id test-id test-id)))))) - -;; each section is a rule except "final" which is the final result -;; -;; [rule-5] -;; operator in -;; section LogFileBody -;; desc Output voltage -;; status OK -;; expected 1.9 -;; measured 1.8 -;; type +/- -;; tolerance 0.1 -;; pass 1 -;; fail 0 -;; -;; [final] -;; exit-code 6 -;; exit-status SKIP -;; message If flagged we are asking for this to exit with code 6 -;; -;; recorded in steps table: -;; category: stepname -;; variable: rule-N -;; value: measured -;; expected: expected -;; tol: tolerance -;; units: - -;; comment: desc or message -;; status: status -;; type: type -;; -(define (db:logpro-dat->csv dat stepname) - (let ((res '())) - (for-each - (lambda (entry-name) - (if (equal? entry-name "final") - (set! res (append - res - (list - (list stepname - entry-name - (configf:lookup dat entry-name "exit-code") ;; 0 ;; Value - 0 ;; 1 ;; Expected - 0 ;; 2 ;; Tolerance - "n/a" ;; 3 ;; Units - (configf:lookup dat entry-name "message") ;; 4 ;; Comment - (configf:lookup dat entry-name "exit-status") ;; 5 ;; Status - "logpro" ;; 6 ;; Type - )))) - (let* ((value (or (configf:lookup dat entry-name "measured") "n/a")) - (expected (or (configf:lookup dat entry-name "expected") 0.0)) - (tolerance (or (configf:lookup dat entry-name "tolerance") 0.0)) - (comment (or (configf:lookup dat entry-name "comment") - (configf:lookup dat entry-name "desc") "n/a")) - (status (or (configf:lookup dat entry-name "status") "n/a")) - (type (or (configf:lookup dat entry-name "expected") "n/a"))) - (set! res (append - res - (list (list stepname - entry-name - value ;; 0 - expected ;; 1 - tolerance ;; 2 - "n/a" ;; 3 Units - comment ;; 4 - status ;; 5 - type ;; 6 - ))))))) - (hash-table-keys dat)) - res)) - -;; $MT_MEGATEST -load-test-data << EOF -;; foo,bar, 1.2, 1.9, > -;; foo,rab, 1.0e9, 10e9, 1e9 -;; foo,bla, 1.2, 1.9, < -;; foo,bal, 1.2, 1.2, < , ,Check for overload -;; foo,alb, 1.2, 1.2, <= , Amps,This is the high power circuit test -;; foo,abl, 1.2, 1.3, 0.1 -;; foo,bra, 1.2, pass, silly stuff -;; faz,bar, 10, 8mA, , ,"this is a comment" -;; EOF - -(define (db:csv->test-data dbstruct run-id test-id csvdata) - (debug:print 4 *default-log-port* "test-id " test-id ", csvdata: " csvdata) - (db:with-db - dbstruct #f #f - (lambda (db) - (let* ((csvlist (csv->list (make-csv-reader - (open-input-string csvdata) - '((strip-leading-whitespace? #t) - (strip-trailing-whitespace? #t)))))) ;; (csv->list csvdata))) - (for-each - (lambda (csvrow) - (let* ((padded-row (take (append csvrow (list #f #f #f #f #f #f #f #f #f)) 9)) - (category (list-ref padded-row 0)) - (variable (list-ref padded-row 1)) - (value (any->number-if-possible (list-ref padded-row 2))) - (expected (any->number-if-possible (list-ref padded-row 3))) - (tol (any->number-if-possible (list-ref padded-row 4))) ;; >, <, >=, <=, or a number - (units (list-ref padded-row 5)) - (comment (list-ref padded-row 6)) - (status (let ((s (list-ref padded-row 7))) - (if (and (string? s)(or (string-match (regexp "^\\s*$") s) - (string-match (regexp "^n/a$") s))) - #f - s))) ;; if specified on the input then use, else calculate - (type (list-ref padded-row 8))) - ;; look up expected,tol,units from previous best fit test if they are all either #f or '' - (debug:print 4 *default-log-port* "BEFORE: category: " category " variable: " variable " value: " value - ", expected: " expected " tol: " tol " units: " units " status: " status " comment: " comment " type: " type) - - (if (and (or (not expected)(equal? expected "")) - (or (not tol) (equal? expected "")) - (or (not units) (equal? expected ""))) - (let-values (((new-expected new-tol new-units)(tdb:get-prev-tol-for-test #f test-id category variable))) - (set! expected new-expected) - (set! tol new-tol) - (set! units new-units))) - - (debug:print 4 *default-log-port* "AFTER: category: " category " variable: " variable " value: " value - ", expected: " expected " tol: " tol " units: " units " status: " status " comment: " comment) - ;; calculate status if NOT specified - (if (and (not status)(number? expected)(number? value)) ;; need expected and value to be numbers - (if (number? tol) ;; if tol is a number then we do the standard comparison - (let* ((max-val (+ expected tol)) - (min-val (- expected tol)) - (result (and (>= value min-val)(<= value max-val)))) - (debug:print 4 *default-log-port* "max-val: " max-val " min-val: " min-val " result: " result) - (set! status (if result "pass" "fail"))) - (set! status ;; NB// need to assess each one (i.e. not return operator since need to act if not valid op. - (case (string->symbol tol) ;; tol should be >, <, >=, <= - ((>) (if (> value expected) "pass" "fail")) - ((<) (if (< value expected) "pass" "fail")) - ((>=) (if (>= value expected) "pass" "fail")) - ((<=) (if (<= value expected) "pass" "fail")) - (else (conc "ERROR: bad tol comparator " tol)))))) - (debug:print 4 *default-log-port* "AFTER2: category: " category " variable: " variable " value: " value - ", expected: " expected " tol: " tol " units: " units " status: " status " comment: " comment) - ;; (db:delay-if-busy dbdat) - (sqlite3:execute db "INSERT OR REPLACE INTO test_data (test_id,category,variable,value,expected,tol,units,comment,status,type) VALUES (?,?,?,?,?,?,?,?,?,?);" - test-id category variable value expected tol units (if comment comment "") status type))) - csvlist))))) - -;; This routine moved from tdb.scm, tdb:read-test-data -;; -(define (db:read-test-data dbstruct run-id test-id categorypatt) - (let* ((res '())) - (db:with-db - dbstruct #f #f - (lambda (db) - (sqlite3:for-each-row - (lambda (id test_id category variable value expected tol units comment status type) - (set! res (cons (vector id test_id category variable value expected tol units comment status type) res))) - db - "SELECT id,test_id,category,variable,value,expected,tol,units,comment,status,type FROM test_data WHERE test_id=? AND category LIKE ? ORDER BY category,variable;" test-id categorypatt) - (reverse res))))) - -;; This routine moved from tdb.scm, :read-test-data -;; -(define (db:read-test-data-varpatt dbstruct run-id test-id categorypatt varpatt) - (let* ((res '())) - (db:with-db - dbstruct #f #f - (lambda (db) - (sqlite3:for-each-row - (lambda (id test_id category variable value expected tol units comment status type) - (set! res (cons (vector id test_id category variable value expected tol units comment status type) res))) - db - "SELECT id,test_id,category,variable,value,expected,tol,units,comment,status,type FROM test_data WHERE test_id=? AND category LIKE ? AND variable LIKE ? ORDER BY category,variable;" test-id categorypatt varpatt) - (reverse res))))) - - -;;====================================================================== -;; Misc. test related queries -;;====================================================================== - -(define (db:get-run-ids-matching-target dbstruct keynames target res runname testpatt statepatt statuspatt) - (db:with-db - dbstruct #f #f - (lambda (db) - (let* ((row-ids '()) - (keystr (string-intersperse - (map (lambda (key val) - (conc key " like '" val "'")) - keynames - (string-split target "/")) - " AND ")) - ;; (testqry (tests:match->sqlqry testpatt)) - (runsqry (sqlite3:prepare db (conc "SELECT id FROM runs WHERE " keystr " AND runname LIKE '" runname "';")))) - ;; (debug:print 8 *default-log-port* "db:test-get-paths-matching-keynames-target-new\n runsqry=" runsqry "\n tstsqry=" testqry) - (sqlite3:for-each-row - (lambda (rid) - (set! row-ids (cons rid row-ids))) - runsqry) - (sqlite3:finalize! runsqry) - row-ids)))) - -;; finds latest matching all patts for given run-id -;; -(define (db:test-get-paths-matching-keynames-target-new dbstruct run-id keynames target res testpatt statepatt statuspatt runname) - (let* ((testqry (tests:match->sqlqry testpatt)) - (tstsqry (conc "SELECT rundir FROM tests WHERE run_id=? AND " testqry " AND state LIKE '" statepatt "' AND status LIKE '" statuspatt "' ORDER BY event_time ASC;"))) - (db:with-db - dbstruct - run-id - #f - (lambda (db) - (sqlite3:for-each-row - (lambda (p) - (set! res (cons p res))) - db - tstsqry - run-id) - res)))) - -(define (db:test-toplevel-num-items dbstruct run-id testname) - (db:with-db - dbstruct - run-id - #f - (lambda (db) - (let ((res 0)) - (sqlite3:for-each-row - (lambda (num-items) - (set! res num-items)) - db - "SELECT count(id) FROM tests WHERE run_id=? AND testname=? AND item_path != '' AND state NOT IN ('DELETED');" - run-id - testname) - res)))) - -;;====================================================================== -;; QUEUE UP META, TEST STATUS AND STEPS REMOTE ACCESS -;;====================================================================== - -;; NOTE: Can remove the regex and base64 encoding for zmq -(define (db:obj->string obj #!key (transport 'http)) - (case transport - ;; ((fs) obj) - ((http fs) - (string-substitute - (regexp "=") "_" - (base64:base64-encode - (z3:encode-buffer - (with-output-to-string - (lambda ()(serialize obj))))) ;; BB: serialize - this is what causes problems between different builds of megatest communicating. serialize is sensitive to binary image of mtest. - #t)) - ((zmq nmsg)(with-output-to-string (lambda ()(serialize obj)))) - (else obj))) ;; rpc - -(define (db:string->obj msg #!key (transport 'http)) - (case transport - ;; ((fs) msg) - ((http fs) - (if (string? msg) - (with-input-from-string - (z3:decode-buffer - (base64:base64-decode - (string-substitute - (regexp "_") "=" msg #t))) - (lambda ()(deserialize))) - (begin - (debug:print-error 0 *default-log-port* "reception failed. Received \"" msg "\" but cannot translate it.") - (print-call-chain (current-error-port)) - msg))) ;; crude reply for when things go awry - ((zmq nmsg)(with-input-from-string msg (lambda ()(deserialize)))) - (else msg))) ;; rpc - -;; ; This is to be the big daddy call NOPE: Replaced by db:set-state-status-and-roll-up-items -;; ; -;; define (db:test-set-state-status dbstruct run-id test-id state status msg) -;; (let ((dbdat (db:get-db dbstruct run-id))) -;; (if (member state '("LAUNCHED" "REMOTEHOSTSTART")) -;; (db:general-call dbdat 'set-test-start-time (list test-id))) -;; ;; (if msg -;; ;; (db:general-call dbdat 'state-status-msg (list state status msg test-id)) -;; ;; (db:general-call dbdat 'state-status (list state status test-id))) -;; (db:set-state-status-and-roll-up-items dbstruct run-id test-id #f state status msg) -;; ;; process the test_data table -;; (if (and test-id state status (equal? status "AUTO")) -;; (db:test-data-rollup dbstruct run-id test-id status)) -;; (mt:process-triggers dbstruct run-id test-id state status))) - -;; state is the priority rollup of all states -;; status is the priority rollup of all completed statesfu -;; -;; if test-name is an integer work off that instead of test-name test-path -;; -(define (db:set-state-status-and-roll-up-items dbstruct run-id test-name item-path state status comment) - ;; establish info on incoming test followed by info on top level test - ;; BBnote - for mode itemwait, linkage between upstream test & matching item status is propagated to run queue in db:prereqs-not-met - (let* ((testdat (if (number? test-name) - (db:get-test-info-by-id dbstruct run-id test-name) ;; test-name is actually a test-id - (db:get-test-info dbstruct run-id test-name item-path))) - (test-id (db:test-get-id testdat)) - (test-name (if (number? test-name) - (db:test-get-testname testdat) - test-name)) - (item-path (db:test-get-item-path testdat)) - (tl-testdat (db:get-test-info dbstruct run-id test-name "")) - (tl-test-id (if tl-testdat - (db:test-get-id tl-testdat) - #f))) - (if (member state '("LAUNCHED" "REMOTEHOSTSTART")) - (db:general-call dbstruct 'set-test-start-time (list test-id))) - (mutex-lock! *db-transaction-mutex*) - (db:with-db - dbstruct #f #f - (lambda (db) - (let ((tr-res - (sqlite3:with-transaction - db - (lambda () - ;; NB// Pass the db so it is part fo the transaction - (db:test-set-state-status db run-id test-id state status comment) ;; this call sets the item state/status - (if (not (equal? item-path "")) ;; only roll up IF incoming test is an item - (let* ((state-status-counts (db:get-all-state-status-counts-for-test dbstruct run-id test-name item-path state status)) ;; item-path is used to exclude current state/status of THIS test - (state-stauses (db:roll-up-rules state-status-counts state status)) - (newstate (car state-stauses)) - (newstatus (cadr state-stauses))) - (debug:print 4 *default-log-port* "BB> tl-test-id="tl-test-id" ; "test-name":"item-path" newstate="newstate" newstatus="newstatus" len(sscs)="(length state-status-counts) " state-status-counts: " - (apply conc - (map (lambda (x) - (conc - (with-output-to-string (lambda () (pp (dbr:counts->alist x)))) " | ")) - state-status-counts))); end debug:print - - (if tl-test-id - (db:test-set-state-status db run-id tl-test-id newstate newstatus #f)) ;; we are still in the transaction - must access the db and not the dbstruct - )))))) - (mutex-unlock! *db-transaction-mutex*) - (if (and test-id state status (equal? status "AUTO")) - (db:test-data-rollup dbstruct run-id test-id status)) - tr-res))))) - -(define (db:roll-up-rules state-status-counts state status) - (let* ((running (length (filter (lambda (x) - (member (dbr:counts-state x) *common:running-states*)) - state-status-counts))) - (bad-not-started (length (filter (lambda (x) - (and (equal? (dbr:counts-state x) "NOT_STARTED") - (not (member (dbr:counts-status x) *common:not-started-ok-statuses*)))) - state-status-counts))) - (all-curr-states (common:special-sort ;; worst -> best (sort of) - (delete-duplicates - (if (and state (not (member state *common:dont-roll-up-states*))) - (cons state (map dbr:counts-state state-status-counts)) - (map dbr:counts-state state-status-counts))) - *common:std-states* >)) - (all-curr-statuses (common:special-sort ;; worst -> best - (delete-duplicates - (if (and state status (not (member state *common:dont-roll-up-states*))) - (cons status (map dbr:counts-status state-status-counts)) - (map dbr:counts-status state-status-counts))) - *common:std-statuses* >)) - (non-completes (filter (lambda (x) - (not (member x (cons "COMPLETED" *common:dont-roll-up-states*)))) - all-curr-states)) - (preq-fails (filter (lambda (x) - (equal? x "PREQ_FAIL")) - all-curr-statuses)) - (num-non-completes (length non-completes)) - (newstate (cond - ((> running 0) "RUNNING") ;; anything running, call the situation running - ((> (length preq-fails) 0) "NOT_STARTED") - ((> bad-not-started 0) "COMPLETED") ;; we have an ugly situation, it is completed in the sense we cannot do more. - ((> num-non-completes 0) (car non-completes)) ;; (remove (lambda (x)(equal? "COMPLETED" x)) all-curr-states))) ;; only rollup DELETED if all DELETED - (else (car all-curr-states)))) - (newstatus (cond - ((> (length preq-fails) 0) "PREQ_FAIL") - ((or (> bad-not-started 0) - (and (equal? newstate "NOT_STARTED") - (> num-non-completes 0))) - "STARTED") - (else (car all-curr-statuses))))) - (debug:print-info 2 *default-log-port* - "\n--> probe db:set-state-status-and-roll-up-items: " - "\n--> state-status-counts: "(map dbr:counts->alist state-status-counts) - "\n--> running: "running - "\n--> bad-not-started: "bad-not-started - "\n--> non-non-completes: "num-non-completes - "\n--> non-completes: "non-completes - "\n--> all-curr-states: "all-curr-states - "\n--> all-curr-statuses: "all-curr-statuses - "\n--> newstate "newstate - "\n--> newstatus "newstatus - "\n\n") - - ;; NB// Pass the db so it is part of the transaction - (list newstate newstatus))) - -(define (db:set-state-status-and-roll-up-run dbstruct run-id curr-state curr-status) - (mutex-lock! *db-transaction-mutex*) - (db:with-db - dbstruct #f #f - (lambda (db) - (let ((tr-res - (sqlite3:with-transaction - db - (lambda () - (let* ((state-status-counts (db:get-all-state-status-counts-for-run dbstruct run-id)) - (state-stauses (db:roll-up-rules state-status-counts #f #f )) - (newstate (car state-stauses)) - (newstatus (cadr state-stauses))) - (if (or (not (eq? newstate curr-state)) (not (eq? newstatus curr-status))) - (db:set-run-state-status dbstruct run-id newstate newstatus ))))))) - (mutex-unlock! *db-transaction-mutex*) - tr-res)))) - - -(define (db:get-all-state-status-counts-for-run dbstruct run-id) - (let* ((test-count-recs (db:with-db - dbstruct #f #f - (lambda (db) - (sqlite3:map-row - (lambda (state status count) - (make-dbr:counts state: state status: status count: count)) - db - "SELECT state,status,count(id) FROM tests WHERE run_id=? GROUP BY state,status;" - run-id ))))) - test-count-recs)) - - -;; BBnote: db:get-all-state-status-counts-for-test returns dbr:counts object aggregating state and status of items of a given test, *not including rollup state/status* -;; -;; NOTE: This is called within a transaction -;; -(define (db:get-all-state-status-counts-for-test dbstruct run-id test-name item-path item-state-in item-status-in) - (let* ((test-info (db:get-test-info dbstruct run-id test-name item-path)) - (item-state (or item-state-in (db:test-get-state test-info))) - (item-status (or item-status-in (db:test-get-status test-info))) - (other-items-count-recs (db:with-db - dbstruct #f #f - (lambda (db) - (sqlite3:map-row - (lambda (state status count) - (make-dbr:counts state: state status: status count: count)) - db - ;; ignore current item because we have changed its value in the current transation so this select will see the old value. - "SELECT state,status,count(id) FROM tests WHERE run_id=? AND testname=? AND item_path != '' AND item_path !=? GROUP BY state,status;" - run-id test-name item-path)))) - - ;; add current item to tally outside of sql query - (match-countrec-lambda (lambda (countrec) - (and (equal? (dbr:counts-state countrec) item-state) - (equal? (dbr:counts-status countrec) item-status)))) - - (already-have-count-rec-list - (filter match-countrec-lambda other-items-count-recs)) ;; will have either 0 or 1 count recs depending if another item shares this item's state/status - - (updated-count-rec (if (null? already-have-count-rec-list) - (make-dbr:counts state: item-state status: item-status count: 1) - (let* ((our-count-rec (car already-have-count-rec-list)) - (new-count (add1 (dbr:counts-count our-count-rec)))) - (make-dbr:counts state: item-state status: item-status count: new-count)))) - - (nonmatch-countrec-lambda (lambda (countrec) (not (match-countrec-lambda countrec)))) - - (unrelated-rec-list - (filter nonmatch-countrec-lambda other-items-count-recs))) - - (cons updated-count-rec unrelated-rec-list))) - -;; (define (db:get-all-item-states db run-id test-name) -;; (sqlite3:map-row -;; (lambda (a) a) -;; db -;; "SELECT DISTINCT state FROM tests WHERE item_path != '' AND state != 'DELETED' AND run_id=? AND testname=?" -;; run-id test-name)) -;; -;; (define (db:get-all-item-statuses db run-id test-name) -;; (sqlite3:map-row -;; (lambda (a) a) -;; db -;; "SELECT DISTINCT status FROM tests WHERE item_path != '' AND state != 'DELETED' AND state='COMPLETED' AND run_id=? AND testname=?" -;; run-id test-name)) - -(define (db:test-get-logfile-info dbstruct run-id test-name) - (db:with-db - dbstruct - run-id - #f - (lambda (db) - (let ((res #f)) - (sqlite3:for-each-row - (lambda (path final_logf) - ;; (let ((path (sdb:qry 'getstr path-id)) - ;; (final_logf (sdb:qry 'getstr final_logf-id))) - (set! logf final_logf) - (set! res (list path final_logf)) - (if (directory? path) - (debug:print 2 *default-log-port* "Found path: " path) - (debug:print 2 *default-log-port* "No such path: " path))) ;; ) - db - "SELECT rundir,final_logf FROM tests WHERE testname=? AND item_path='' AND run_id=?;" - test-name run-id) - res)))) - -;;====================================================================== -;; A G R E G A T E D T R A N S A C T I O N D B W R I T E S -;;====================================================================== - -(define db:queries - (list '(update-run-duration "UPDATE tests SET run_duration=? WHERE id=?;") - - ;; TESTS - '(register-test "INSERT OR IGNORE INTO tests (run_id,testname,event_time,item_path,state,status) VALUES (?,?,strftime('%s','now'),?,'NOT_STARTED','n/a');") - ;; Test state and status - '(set-test-state "UPDATE tests SET state=? WHERE id=?;") - '(set-test-status "UPDATE tests SET state=? WHERE id=?;") - '(state-status "UPDATE tests SET state=?,status=? WHERE id=?;") ;; D/ONE - '(state-status-msg "UPDATE tests SET state=?,status=?,comment=? WHERE id=?;") ;; DONE - ;; Test comment - '(set-test-comment "UPDATE tests SET comment=? WHERE id=?;") - '(set-test-start-time "UPDATE tests SET event_time=strftime('%s','now') WHERE id=?;") ;; DONE - '(pass-fail-counts "UPDATE tests SET pass_count=?,fail_count=? WHERE id=?;") - ;; test_data-pf-rollup is used to set a tests PASS/FAIL based on the pass/fail info from the steps - '(test_data-pf-rollup "UPDATE tests - SET status=CASE WHEN (SELECT fail_count FROM tests WHERE id=?) > 0 - THEN 'FAIL' - WHEN (SELECT pass_count FROM tests WHERE id=?) > 0 AND - (SELECT status FROM tests WHERE id=?) NOT IN ('WARN','FAIL') - THEN 'PASS' - ELSE status - END WHERE id=?;") ;; DONE - '(test-set-log "UPDATE tests SET final_logf=? WHERE id=?;") ;; DONE - ;; '(test-set-rundir-by-test-id "UPDATE tests SET rundir=? WHERE id=?") ;; DONE - ;; '(test-set-rundir "UPDATE tests SET rundir=? AND testname=? AND item_path=?;") ;; DONE - '(test-set-rundir-shortdir "UPDATE tests SET rundir=?,shortdir=? WHERE testname=? AND item_path=? AND run_id=?;") ;; BROKEN!!! NEEDS run-id - '(delete-tests-in-state ;; "DELETE FROM tests WHERE state=?;") ;; DONE - "UPDATE tests SET state='DELETED' WHERE state=?") - '(tests:test-set-toplog "UPDATE tests SET final_logf=? WHERE run_id=? AND testname=? AND item_path='';") - '(update-cpuload-diskfree "UPDATE tests SET cpuload=?,diskfree=? WHERE id=?;") ;; DONE - '(update-uname-host "UPDATE tests SET uname=?,host=? WHERE id=?;") ;; DONE - '(update-test-rundat "INSERT INTO test_rundat (test_id,update_time,cpuload,diskfree,diskusage,run_duration) VALUES (?,?,?,?,?,?);") - '(update-test-state "UPDATE tests SET state=? WHERE state=? AND run_id=? AND testname=? AND NOT (item_path='' AND testname IN (SELECT DISTINCT testname FROM tests WHERE testname=? AND item_path != ''));") - '(update-test-status "UPDATE tests SET status=? WHERE status like ? AND run_id=? AND testname=? AND NOT (item_path='' AND testname IN (SELECT DISTINCT testname FROM tests WHERE testname=? AND item_path != ''));") - ;; stuff for set-state-status-and-roll-up-items - '(update-pass-fail-counts "UPDATE tests - SET fail_count=(SELECT count(id) FROM tests WHERE testname=? AND item_path != '' AND status IN ('FAIL','CHECK','INCOMPLETE','ABORT')), - pass_count=(SELECT count(id) FROM tests WHERE testname=? AND item_path != '' AND status IN ('PASS','WARN','WAIVED')) - WHERE testname=? AND item_path='' AND run_id=?;") ;; DONE ;; BROKEN!!! NEEDS run-id - '(top-test-set-running "UPDATE tests SET state='RUNNING' WHERE testname=? AND item_path='' AND run_id=?;") ;; DONE ;; BROKEN!!! NEEDS run-id - - ;; NOT USED - ;; - ;; Might be the following top-test-set-per-pf-counts query could be better based off of something like this: - ;; - ;; select state,status,count(state) from tests where run_id=59 AND testname='runfirst' group by state,status; - ;; - '(top-test-set-per-pf-counts "UPDATE tests - SET state=CASE - WHEN (SELECT count(id) FROM tests - WHERE testname=? - AND item_path != '' - AND status NOT IN ('n/a') - AND state in ('NOT_STARTED')) > 0 THEN 'UNKNOWN' - WHEN (SELECT count(id) FROM tests - WHERE testname=? - AND item_path != '' - AND (status NOT IN ('TEN_STRIKES','BLOCKED') OR status IN ('INCOMPLETE')) - AND state in ('RUNNING','NOT_STARTED','LAUNCHED','REMOTEHOSTSTART')) > 0 THEN 'RUNNING' - WHEN (SELECT count(id) FROM tests - WHERE testname=? - AND item_path != '' - AND state NOT IN ('COMPLETED','DELETED')) = 0 THEN 'COMPLETED' - WHEN (SELECT count(id) FROM tests - WHERE testname=? - AND item_path != '' - AND state = 'NOT_STARTED') > 0 THEN 'NOT_STARTED' - ELSE 'UNKNOWN' END, - status=CASE - WHEN fail_count > 0 THEN 'FAIL' - WHEN (SELECT count(id) FROM tests - WHERE testname=? - AND item_path != '' - AND state IN ('BLOCKED','INCOMPLETE')) > 0 THEN 'FAIL' - WHEN (SELECT count(id) FROM tests - WHERE testname=? - AND item_path != '' - AND state NOT IN ('DELETED') - AND status IN ('INCOMPLETE','ABORT')) > 0 THEN 'ABORT' - WHEN (SELECT count(id) FROM tests - WHERE testname=? - AND item_path != '' - AND state NOT IN ('DELETED') - AND status = 'AUTO') > 0 THEN 'AUTO' - WHEN (SELECT count(id) FROM tests - WHERE testname=? - AND item_path != '' - AND state NOT IN ('DELETED') - AND status IN ('STUCK/INCOMPLETE', 'INCOMPLETE')) > 0 THEN 'INCOMPLETE' - WHEN (SELECT count(id) FROM tests - WHERE testname=? - AND item_path != '' - AND state IN ('COMPLETED','STUCK/INCOMPLETE','INCOMPLETE') - AND status = 'FAIL') > 0 THEN 'FAIL' - WHEN (SELECT count(id) FROM tests - WHERE testname=? - AND item_path != '' - AND state NOT IN ('DELETED') - AND status = 'CHECK') > 0 THEN 'CHECK' - WHEN (SELECT count(id) FROM tests - WHERE testname=? - AND item_path != '' - AND state NOT IN ('DELETED') - AND status = 'SKIP') > 0 THEN 'SKIP' - WHEN (SELECT count(id) FROM tests - WHERE testname=? - AND item_path != '' - AND state NOT IN ('DELETED') - AND status = 'WARN') > 0 THEN 'WARN' - WHEN (SELECT count(id) FROM tests - WHERE testname=? - AND item_path != '' - AND state NOT IN ('DELETED') - AND status = 'WAIVED') > 0 THEN 'WAIVED' - WHEN (SELECT count(id) FROM tests - WHERE testname=? - AND item_path != '' - AND state NOT IN ('DELETED') - AND status NOT IN ('PASS','FAIL','WARN','WAIVED')) > 0 THEN 'ABORT' - WHEN (SELECT count(id) FROM tests - WHERE testname=? - AND item_path != '' - AND state='NOT_STARTED') > 0 THEN 'n/a' - WHEN (SELECT count(id) FROM tests - WHERE testname=? - AND item_path != '' - AND state = 'COMPLETED' - AND status = 'PASS') > 0 THEN 'PASS' - WHEN pass_count > 0 AND fail_count=0 THEN 'PASS' - ELSE 'UNKNOWN' END - WHERE testname=? AND item_path='';") ;; DONE ;; BROKEN!!! NEEDS run-id - - ;; STEPS - '(delete-test-step-records "UPDATE test_steps SET status='DELETED' WHERE test_id=?;") - '(delete-test-data-records "UPDATE test_data SET status='DELETED' WHERE test_id=?;") ;; using status since no state field - )) - -(define (db:lookup-query qry-name) - (let ((q (alist-ref qry-name db:queries))) - (if q (car q) #f))) - -;; do not run these as part of the transaction -(define db:special-queries '(rollup-tests-pass-fail - ;; db:set-state-status-and-roll-up-items ;; WHY NOT!? - login - immediate - flush - sync - set-verbosity - killserver - )) - -(define (db:login dbstruct calling-path calling-version client-signature) - (cond - ((not (equal? calling-path *toppath*)) - (list #f "Login failed due to mismatch paths: " calling-path ", " *toppath*)) - ;; ((not (equal? *run-id* run-id)) - ;; (list #f "Login failed due to mismatch run-id: " run-id ", " *run-id*)) - ((not (equal? megatest-version calling-version)) - (list #t (conc "Login warning due to mismatch megatest version: " calling-version ", " megatest-version))) - - (else - (hash-table-set! *logged-in-clients* client-signature (current-seconds)) - '(#t "successful login")))) - -(define (db:general-call dbstruct stmtname params) - (let ((query (let ((q (alist-ref (if (string? stmtname) - (string->symbol stmtname) - stmtname) - db:queries))) - (if q (car q) #f)))) - (db:with-db - dbstruct #f #f - (lambda (db) - (apply sqlite3:execute db query params) - #t)))) - -;; get a summary of state and status counts to calculate a rollup -;; -(define (db:get-state-status-summary dbstruct run-id testname) - (let ((res '())) - (db:with-db - dbstruct #f #f - (lambda (db) - (sqlite3:for-each-row - (lambda (state status count) - (set! res (cons (vector state status count) res))) - db - "SELECT state,status,count(state) FROM tests WHERE run_id=? AND testname=? AND item_path='' GROUP BY state,status;" - run-id testname) - res)))) - -(define (db:get-latest-host-load dbstruct raw-hostname) - (let* ((hostname (string-substitute "\\..*$" "" raw-hostname)) - (res (cons -1 0))) - (db:with-db - dbstruct #f #f - (lambda (db) - (sqlite3:for-each-row - (lambda (cpuload update-time) (set! res (cons cpuload update-time))) - db - "SELECT tr.cpuload, tr.update_time FROM test_rundat tr, tests t WHERE t.host=? AND tr.cpuload != -1 AND tr.test_id=t.id ORDER BY tr.update_time DESC LIMIT 1;" - hostname))) res )) - -(define (db:set-top-level-from-items dbstruct run-id testname) - (let* ((summ (db:get-state-status-summary dbstruct run-id testname)) - (find (lambda (state status) - (if (null? summ) - #f - (let loop ((hed (car summ)) - (tal (cdr summ))) - (if (and (string-match state (vector-ref hed 0)) - (string-match status (vector-ref hed 1))) - hed - (if (null? tal) - #f - (loop (car tal)(cdr tal))))))))) - - - ;;; E D I T M E ! ! - - - (cond - ((> (find "COMPLETED" ".*") 0) #f)))) - - - -;; get the previous records for when these tests were run where all keys match but runname -;; NB// Merge this with test:get-previous-test-run-records? This one looks for all matching tests -;; can use wildcards. Also can likely be factored in with get test paths? -;; -;; Run this remotely!! -;; -(define (db:get-matching-previous-test-run-records dbstruct run-id test-name item-path) - (let* ((keys (db:get-keys dbstruct)) - (selstr (string-intersperse keys ",")) - (qrystr (string-intersperse (map (lambda (x)(conc x "=?")) keys) " AND ")) - (keyvals #f) - (tests-hash (make-hash-table))) - ;; first look up the key values from the run selected by run-id - (db:with-db - dbstruct #f #f - (lambda (db) - (sqlite3:for-each-row - (lambda (a . b) - (set! keyvals (cons a b))) - db - (conc "SELECT " selstr " FROM runs WHERE id=? ORDER BY event_time DESC;") run-id))) - (if (not keyvals) - '() - (let ((prev-run-ids '())) - (db:with-db - dbstruct #f #f - (lambda (db) - (apply sqlite3:for-each-row - (lambda (id) - (set! prev-run-ids (cons id prev-run-ids))) - db - (conc "SELECT id FROM runs WHERE " qrystr " AND id != ?;") (append keyvals (list run-id))))) - ;; collect all matching tests for the runs then - ;; extract the most recent test and return that. - (debug:print 4 *default-log-port* "selstr: " selstr ", qrystr: " qrystr ", keyvals: " keyvals - ", previous run ids found: " prev-run-ids) - (if (null? prev-run-ids) '() ;; no previous runs? return null - (let loop ((hed (car prev-run-ids)) - (tal (cdr prev-run-ids))) - (let ((results (db:get-tests-for-run dbstruct hed (conc test-name "/" item-path) '() '() #f #f #f #f #f #f #f 'normal))) - (debug:print 4 *default-log-port* "Got tests for run-id " run-id ", test-name " test-name - ", item-path " item-path " results: " (intersperse results "\n")) - ;; Keep only the youngest of any test/item combination - (for-each - (lambda (testdat) - (let* ((full-testname (conc (db:test-get-testname testdat) "/" (db:test-get-item-path testdat))) - (stored-test (hash-table-ref/default tests-hash full-testname #f))) - (if (or (not stored-test) - (and stored-test - (> (db:test-get-event_time testdat)(db:test-get-event_time stored-test)))) - ;; this test is younger, store it in the hash - (hash-table-set! tests-hash full-testname testdat)))) - results) - (if (null? tal) - (map cdr (hash-table->alist tests-hash)) ;; return a list of the most recent tests - (loop (car tal)(cdr tal)))))))))) - -;; Function recursively checks if .journal exists; if yes means db busy; call itself after delayed interval -;; return the sqlite3 db handle if possible -;; -(define (db:delay-if-busy dbdat #!key (count 6)) - (if (not (configf:lookup *configdat* "server" "delay-on-busy")) - (and dbdat (db:dbdat-get-db dbdat)) - (if dbdat - (let* ((dbpath (db:dbdat-get-path dbdat)) - (db (db:dbdat-get-db dbdat)) ;; we'll return this so (db:delay--if-busy can be called inline - (dbfj (conc dbpath "-journal"))) - (if (handle-exceptions - exn - (begin - (debug:print-info 0 *default-log-port* "WARNING: failed to test for existance of " dbfj ", exn=" exn) - (thread-sleep! 1) - (db:delay-if-busy count (- count 1))) - (common:file-exists? dbfj)) - (case count - ((6) - (thread-sleep! 0.2) - (db:delay-if-busy count: 5)) - ((5) - (thread-sleep! 0.4) - (db:delay-if-busy count: 4)) - ((4) - (thread-sleep! 0.8) - (db:delay-if-busy count: 3)) - ((3) - (thread-sleep! 1.6) - (db:delay-if-busy count: 2)) - ((2) - (thread-sleep! 3.2) - (db:delay-if-busy count: 1)) - ((1) - (thread-sleep! 6.4) - (db:delay-if-busy count: 0)) - (else - (debug:print-info 0 *default-log-port* "delaying db access due to high database load.") - (thread-sleep! 12.8)))) - db) - "bogus result from db:delay-if-busy"))) - -(define (db:test-get-records-for-index-file dbstruct run-id test-name) - (let ((res '())) - (db:with-db - dbstruct - run-id - #f - (lambda (db) - (sqlite3:for-each-row - (lambda (id itempath state status run_duration logf comment) - (set! res (cons (vector id itempath state status run_duration logf comment) res))) - db - "SELECT id,item_path,state,status,run_duration,final_logf,comment FROM tests WHERE testname=? AND item_path != '' AND run_id=?;" ;; BUG! WHY NO run_id? - test-name - run-id) - res)))) - -;;====================================================================== -;; Tests meta data -;;====================================================================== - -;; returns a hash table of tags to tests -;; -(define (db:get-tests-tags dbstruct) - (db:with-db - dbstruct #f #f - (lambda (db) - (let* ((res (make-hash-table))) - (sqlite3:for-each-row - (lambda (testname tags-in) - (let ((tags (string-split tags-in ","))) - (for-each - (lambda (tag) - (hash-table-set! res tag - (delete-duplicates - (cons testname (hash-table-ref/default res tag '()))))) - tags))) - db - "SELECT testname,tags FROM test_meta") - (hash-table->alist res))))) - -;; read the record given a testname -(define (db:testmeta-get-record dbstruct testname) - (let ((res #f)) - (db:with-db - dbstruct - #f - #f - (lambda (db) - (sqlite3:for-each-row - (lambda (id testname author owner description reviewed iterated avg_runtime avg_disk tags jobgroup) - (set! res (vector id testname author owner description reviewed iterated avg_runtime avg_disk tags jobgroup))) - db - "SELECT id,testname,author,owner,description,reviewed,iterated,avg_runtime,avg_disk,tags,jobgroup FROM test_meta WHERE testname=?;" - testname) - res)))) - -;; create a new record for a given testname -(define (db:testmeta-add-record dbstruct testname) - (db:with-db dbstruct #f #f - (lambda (db) - (sqlite3:execute - db - "INSERT OR IGNORE INTO test_meta (testname,author,owner,description,reviewed,iterated,avg_runtime,avg_disk,tags) VALUES (?,'','','','','','','','');" testname)))) - -;; update one of the testmeta fields -(define (db:testmeta-update-field dbstruct testname field value) - (db:with-db dbstruct #f #f - (lambda (db) - (sqlite3:execute - db - (conc "UPDATE test_meta SET " field "=? WHERE testname=?;") value testname)))) - -(define (db:testmeta-get-all dbstruct) - (db:with-db dbstruct #f #f - (lambda (db) - (let ((res '())) - (sqlite3:for-each-row - (lambda (a . b) - (set! res (cons (apply vector a b) res))) - db - "SELECT id,testname,author,owner,description,reviewed,iterated,avg_runtime,avg_disk,tags,jobgroup FROM test_meta;") - res)))) - -;;====================================================================== -;; M I S C M A N A G E M E N T I T E M S -;;====================================================================== - -;; A routine to map itempaths using a itemmap -;; patha and pathb must be strings or this will fail -;; -;; path-b is waiting on path-a -;; -(define (db:compare-itempaths test-b-name path-a path-b itemmaps ) - (debug:print-info 6 *default-log-port* "ITEMMAPS: " itemmaps) - (let* ((itemmap (tests:lookup-itemmap itemmaps test-b-name))) - (if itemmap - (let ((path-b-mapped (db:multi-pattern-apply path-b itemmap))) - (debug:print-info 6 *default-log-port* "ITEMMAP is " itemmap ", path: " path-b ", mapped path: " path-b-mapped) - (equal? path-a path-b-mapped)) - (equal? path-b path-a)))) - -;; A routine to convert test/itempath using a itemmap -;; NOTE: to process only an itempath (i.e. no prepended testname) -;; just call db:multi-pattern-apply -;; -(define (db:convert-test-itempath path-in itemmap) - (debug:print-info 6 *default-log-port* "ITEMMAP is " itemmap) - (let* ((path-parts (string-split path-in "/")) - (test-name (if (null? path-parts) "" (car path-parts))) - (item-path (string-intersperse (if (null? path-parts) '() (cdr path-parts)) "/"))) - (conc test-name "/" - (db:multi-pattern-apply item-path itemmap)))) - -;; patterns are: -;; "rx1" "replacement1"\n -;; "rx2" "replacement2" -;; etc. -;; -(define (db:multi-pattern-apply item-path itemmap) - (let ((all-patts (string-split itemmap "\n"))) - (if (null? all-patts) - item-path - (let loop ((hed (car all-patts)) - (tal (cdr all-patts)) - (res item-path)) - (let* ((parts (string-split hed)) - (patt (car parts)) - - (repl (if (> (length parts) 1)(cadr parts) "")) - - (newr (if (and patt repl) - (begin - (handle-exceptions - exn - (begin - (debug:print 0 *default-log-port* - "WARNING: itemmap has problem \"" itemmap "\", patt: " patt ", repl: " repl ", exn=" exn) - res) - (string-substitute patt repl res)) - - - ) - (begin - (debug:print 0 *default-log-port* - "WARNING: itemmap has problem \"" itemmap "\", patt: " patt ", repl: " repl) - res)))) - (if (null? tal) - newr - (loop (car tal)(cdr tal) newr))))))) - - - - -;; the new prereqs calculation, looks also at itempath if specified -;; all prereqs must be met -;; if prereq test with itempath='' is COMPLETED and PASS, WARN, CHECK, or WAIVED then prereq is met -;; if prereq test with itempath=ref-item-path and COMPLETED with PASS, WARN, CHECK, or WAIVED then prereq is met -;; -;; Note: mode 'normal means that tests must be COMPLETED and ok (i.e. PASS, WARN, CHECK, SKIP or WAIVED) -;; mode 'toplevel means that tests must be COMPLETED only -;; mode 'itemmatch or 'itemwait means that tests items must be COMPLETED and (PASS|WARN|WAIVED|CHECK) [[ NB// NOT IMPLEMENTED YET ]] -;; mode 'exclusive means this test/item cannot run if the same test/item is LAUNCHED,REMOTEHOSTSTART or RUNNING -;; -;; IDEA for consideration: -;; 1. collect all tests "upstream" -;; 2. any NOT completed and good? if yes => return those as prereqs not met, if no => return null list -;; -;; (define (db:get-prereqs-not-met dbstruct run-id waitons ref-item-path mode) -(define (db:get-prereqs-not-met dbstruct run-id waitons ref-test-name ref-item-path mode itemmaps) ;; #!key (mode '(normal))(itemmap #f)) - ;; BBnote - rollup of an itemized test's overall state/status done in db:set-state-status-and-roll-up-items - (append - (if (member 'exclusive mode) - (let ((running-tests (db:get-tests-for-run dbstruct - #f ;; run-id of #f means for all runs. - (if (string=? ref-item-path "") ;; testpatt - ref-test-name - (conc ref-test-name "/" ref-item-path)) - '("LAUNCHED" "REMOTEHOSTSTART" "RUNNING") ;; states - '() ;; statuses - #f ;; offset - #f ;; limit - #f ;; not-in - #f ;; sort by - #f ;; sort order - 'shortlist ;; query type - 0 ;; last update, beginning of time .... - #f ;; mode - ))) - ;;(map (lambda (testdat) - ;; (if (equal? (db:test-get-item-path testdat) "") - ;; (db:test-get-testname testdat) - ;; (conc (db:test-get-testname testdat) - ;; "/" - ;; (db:test-get-item-path testdat)))) - running-tests) ;; calling functions want the entire data - '()) - - ;; collection of: for each waiton - - ;; if this ref-test-name is an item in an itemized test and mode is itemwait/itemmatch: - ;; if waiton is not itemized - if waiton is not both completed and in ok status, add as unmet prerequisite - ;; if waiton is itemized: - ;; and waiton's items are not expanded, add as unmet prerequisite - ;; else if matching waiton item is not both completed and in an ok status, add as unmet prerequisite - ;; else - ;; if waiton toplevel is not in both completed and ok status, add as unmet prerequisite - - (if (or (not waitons) - (null? waitons)) - '() - (let* ((ref-test-itemized-mode (not (null? (lset-intersection eq? mode '(itemmatch itemwait))))) ;; how is this different from using member? - (ref-test-toplevel-mode (not (null? (lset-intersection eq? mode '(toplevel))))) - (ref-test-is-toplevel (equal? ref-item-path "")) - (ref-test-is-item (not ref-test-is-toplevel)) - (unmet-pre-reqs '()) - (result '()) - (unmet-prereq-items '()) - ) - (for-each ; waitons - (lambda (waitontest-name) - ;; by getting the tests with matching name we are looking only at the matching test - ;; and related sub items - ;; next should be using mt:get-tests-for-run? - - (let (;(waiton-is-itemized ...) - ;(waiton-items-are-expanded ...) - (waiton-tests (db:get-tests-for-run-state-status dbstruct run-id waitontest-name)) - (ever-seen #f) - (parent-waiton-met #f) - (item-waiton-met #f) - - ) - (for-each ; test expanded from waiton - (lambda (waiton-test) - (let* ((waiton-state (db:test-get-state waiton-test)) - (waiton-status (db:test-get-status waiton-test)) - (waiton-item-path (db:test-get-item-path waiton-test)) ;; BB- this is the upstream itempath - (waiton-test-name (db:test-get-testname waiton-test)) - (waiton-is-toplevel (equal? waiton-item-path "")) - (waiton-is-item (not waiton-is-toplevel)) - (waiton-is-completed (member waiton-state *common:ended-states*)) - (waiton-is-running (member waiton-state *common:running-states*)) - (waiton-is-killed (member waiton-state *common:badly-ended-states*)) - (waiton-is-ok (member waiton-status *common:well-ended-states*)) - ;; testname-b path-a path-b - (same-itempath (db:compare-itempaths ref-test-name waiton-item-path ref-item-path itemmaps)) ;; (equal? ref-item-path waiton-item-path))) - (real-ref-test-name (car (string-split ref-test-name "/"))) ;; I THINK ref-test-name SHOULD NEVER HAVE THE ITEM_PATH! - (test-and-ref-are-same (equal? real-ref-test-name waiton-test-name))) - (debug:print 4 *default-log-port* "waiton-test-name " waiton-test-name " ref-test-name: " ref-test-name " test-and-ref-are-same: " test-and-ref-are-same) - (set! ever-seen #t) - ;;(BB> "***consider waiton "waiton-test"/"waiton-item-path"***") - (cond - ;; case 0 - toplevel of an itemized test, at least one item in prereq has completed - ((and waiton-is-item ref-test-is-toplevel ref-test-itemized-mode waiton-is-completed) - (set! parent-waiton-met #t)) - - ;; case 1, non-item (parent test) is - ((and waiton-is-toplevel ;; this is the parent test of the waiton being examined - waiton-is-completed - ;;(BB> "cond1") - (or waiton-is-ok ref-test-toplevel-mode)) ;; itemmatch itemwait)))))) - (set! parent-waiton-met #t)) - ;; Special case for toplevel and KILLED - ((and waiton-is-toplevel ;; this is the parent test - waiton-is-killed - (member 'toplevel mode)) - ;;(BB> "cond2") - (set! parent-waiton-met #t)) - ;; For itemwait mode IFF the previous matching item is good the set parent-waiton-met - ((and ref-test-itemized-mode ref-test-is-item same-itempath) - ;;(BB> "cond3") - (if (and waiton-is-completed (or waiton-is-ok ref-test-toplevel-mode)) - (set! item-waiton-met #t) - (set! unmet-prereq-items (cons waiton-test unmet-prereq-items))) - (if (and waiton-is-toplevel ;; if upstream rollup test is completed, parent-waiton-met is set - (or waiton-is-completed waiton-is-running)) - (set! parent-waiton-met #t))) - ;; normal checking of parent items, any parent or parent item not ok blocks running - ((and waiton-is-completed - (or waiton-is-ok - (member 'toplevel mode)) ;; toplevel does not block on FAIL - (and waiton-is-ok (member 'itemmatch mode) ;; itemmatch blocks on not ok ;; TODO: THIS IS PROBABLY A BUG. ITEMMATCH AND ITEMWAIT ARE SYNONYMS!! WHAT HAPPENED OT ITEMWAIT??? - )) - ;;(BB> "cond4") - (set! item-waiton-met #t)) - ((and waiton-is-completed waiton-is-ok same-itempath) - ;;(BB> "cond5") - (set! item-waiton-met #t)) - ((and waiton-is-completed waiton-is-ok test-and-ref-are-same) ;; probably from [waitons] table - (set! item-waiton-met #t)) - (else - #t - ;;(BB> "condelse") - )))) - waiton-tests) - ;; both requirements, parent and item-waiton must be met to NOT add item to - ;; prereq's not met list - ;; (BB> - ;; "\n* waiton-tests "waiton-tests - ;; "\n* parent-waiton-met "parent-waiton-met - ;; "\n* item-waiton-met "item-waiton-met - ;; "\n* ever-seen "ever-seen - ;; "\n* ref-test-itemized-mode "ref-test-itemized-mode - ;; "\n* unmet-prereq-items "unmet-prereq-items - ;; "\n* result (pre) "result - ;; "\n* ever-seen "ever-seen - ;; "\n") - - (cond - ((and ref-test-itemized-mode ref-test-is-item (not (null? unmet-prereq-items))) - (set! result (append unmet-prereq-items result))) - ((not (or parent-waiton-met item-waiton-met)) - (set! result (append (if (null? waiton-tests) (list waitontest-name) waiton-tests) result))) ;; appends the string if the full record is not available - ;; if the test is not found then clearly the waiton is not met... - ;; (if (not ever-seen)(set! result (cons waitontest-name result))))) - ((not ever-seen) - (set! result (append (if (null? waiton-tests)(list waitontest-name) waiton-tests) result)))))) - waitons) - (delete-duplicates result))))) - -;;====================================================================== -;; To sync individual run -;;====================================================================== -(define (db:get-run-record-ids dbstruct target run keynames test-patt) -(let ((backcons (lambda (lst item)(cons item lst)))) - (db:with-db - dbstruct #f #f - (lambda (db) - (let* ((keystr (string-intersperse - (map (lambda (key val) - (conc key " like '" val "'")) - keynames - (string-split target "/")) - " AND ")) - (run-qry (conc "SELECT id FROM runs WHERE " keystr " and runname='" run"'")) - (test-qry (conc "SELECT id FROM tests WHERE run_id in (" run-qry ") and testname like '" test-patt "'"))) - (print run-qry) - (print test-qry) - `((runs . ,(sqlite3:fold-row backcons '() db run-qry)) - (tests . ,(sqlite3:fold-row backcons '() db test-qry)) - (test_steps . ,(sqlite3:fold-row backcons '() db (conc "SELECT id FROM test_steps WHERE test_id in (" test-qry ")"))) - (test_data . ,(sqlite3:fold-row backcons '() db (conc "SELECT id FROM test_data WHERE test_id in (" test-qry ")" ))) - )))))) - -;;====================================================================== -;; Just for sync, procedures to make sync easy -;;====================================================================== - -;; get an alist of record ids changed since time since-time -;; '((runs . (1 2 3 ...))(steps . (5 6 7 ...) ...)) -;; -(define (db:get-changed-record-ids dbstruct since-time) - ;; no transaction, allow the db to be accessed between the big queries - (let ((backcons (lambda (lst item)(cons item lst)))) - (db:with-db - dbstruct #f #f - (lambda (db) - `((runs . ,(sqlite3:fold-row backcons '() db "SELECT id FROM runs WHERE last_update>=?" since-time)) - (tests . ,(sqlite3:fold-row backcons '() db "SELECT id FROM tests WHERE last_update>=?" since-time)) - (test_steps . ,(sqlite3:fold-row backcons '() db "SELECT id FROM test_steps WHERE last_update>=?" since-time)) - (test_data . ,(sqlite3:fold-row backcons '() db "SELECT id FROM test_data WHERE last_update>=?" since-time)) - ;; (test_meta . ,(fold-row backcons '() db "SELECT id FROM test_meta WHERE last_update>?" since-time)) - (run_stats . ,(sqlite3:fold-row backcons '() db "SELECT id FROM run_stats WHERE last_update>=?" since-time)) - ))))) - ;;====================================================================== ;; Extract ods file from the db ;;====================================================================== ;; NOT REWRITTEN YET!!!!! Index: dbmod.scm ================================================================== --- dbmod.scm +++ dbmod.scm @@ -17,21 +17,43 @@ ;; along with Megatest. If not, see . ;;====================================================================== (declare (unit dbmod)) +(declare (uses debugprint)) +(declare (uses commonmod)) +(declare (uses configfmod)) +(declare (uses mtargs)) (module dbmod * (import scheme chicken.base (prefix sqlite3 sqlite3:) - + chicken.condition + chicken.process-context.posix + chicken.string + chicken.pathname + chicken.process + chicken.process-context + chicken.file + chicken.time + chicken.file.posix + chicken.format + chicken.sort + typed-records + srfi-1 srfi-18 srfi-69 + stack + + commonmod + configfmod + debugprint + (prefix mtargs args:) ) ;;====================================================================== ;; Database access @@ -117,8 +139,4780 @@ (stmth (db:hoh-get stmt-cache db stmt))) (or stmth (let* ((newstmth (sqlite3:prepare db stmt))) (db:hoh-set! stmt-cache db stmt newstmth) newstmth)))) + +;;====================================================================== +;; Pulled from db.scm +;;====================================================================== + + +;;====================================================================== +;; SQLITE3 HELPERS +;;====================================================================== + +(define (db:general-sqlite-error-dump exn stmt . params) + (let ((err-status ((condition-property-accessor 'sqlite3 'status #f) exn))) ;; RADT ... how does this work? + ;; check for (exn sqlite3) ((condition-property-accessor 'exn 'message) exn) + ;; (print "err-status: " err-status) + (debug:print-error 0 *default-log-port* " query " stmt " failed, params: " params ", error: " ((condition-property-accessor 'exn 'message) exn)) + (print-call-chain (current-error-port)))) + +;; convert to -inline +;; +(define (db:first-result-default db stmt default . params) + (handle-exceptions + exn + (let ((err-status ((condition-property-accessor 'sqlite3 'status #f) exn))) + ;; check for (exn sqlite3) ((condition-property-accessor 'exn 'message) exn) + (if (eq? err-status 'done) + default + (begin + (debug:print-error 0 *default-log-port* " query " stmt " failed, params: " params ", error: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn) + (print-call-chain (current-error-port)) + default))) + (apply sqlite3:first-result db stmt params))) + +;; Get/open a database +;; if run-id => get run specific db +;; if #f => get main db +;; if db already open - return inmem +;; if db not open, open inmem, rundb and sync then return inmem +;; inuse gets set automatically for rundb's +;; +(define (db:get-db dbstruct) ;; run-id) + (if (stack? (dbr:dbstruct-dbstack dbstruct)) + (if (stack-empty? (dbr:dbstruct-dbstack dbstruct)) + (let ((newdb (db:open-megatest-db path: (db:dbfile-path)))) + ;; (stack-push! (dbr:dbstruct-dbstack dbstruct) newdb) + newdb) + (stack-pop! (dbr:dbstruct-dbstack dbstruct))) + (db:open-db dbstruct))) + +;; ;; legacy handling of structure for managing db's. Refactor this into dbr:? +(define (db:dbdat-get-db dbdat) + (if (pair? dbdat) + (car dbdat) + dbdat)) + +(define (db:dbdat-get-path dbdat) + (if (pair? dbdat) + (cdr dbdat) + #f)) + +(define-inline (db:generic-error-printout exn . message) + (print-call-chain (current-error-port)) + (apply debug:print-error 0 *default-log-port* message) + (debug:print-error 0 *default-log-port* " params: " params + ", error: " ((condition-property-accessor 'exn 'message) exn) + ", arguments: " ((condition-property-accessor 'exn 'arguments) exn) + ", location: " ((condition-property-accessor 'exn 'location) exn) + )) + +;; (db:with-db dbstruct run-id sqlite3:exec "select blah fgrom blaz;") +;; r/w is a flag to indicate if the db is modified by this query #t = yes, #f = no +;; +(define (db:with-db dbstruct run-id r/w proc . params) + (let* ((have-struct (dbr:dbstruct? dbstruct)) + (dbdat (if have-struct + (db:get-db dbstruct) + #f)) + (db (if have-struct + (db:dbdat-get-db dbdat) + dbstruct)) + (fname (db:dbdat-get-path dbdat)) + (use-mutex (> *api-process-request-count* 25))) ;; was 25 + (if (and use-mutex + (common:low-noise-print 120 "over-50-parallel-api-requests")) + (debug:print-info 0 *default-log-port* *api-process-request-count* " parallel api requests being processed in process " (current-process-id) ", throttling access")) + (if (common:low-noise-print 600 (conc "parallel-api-requests" *max-api-process-requests*)) + (debug:print-info 2 *default-log-port* "Parallel api request count: " *api-process-request-count* " max parallel requests: " *max-api-process-requests*)) + (condition-case + (begin + (if use-mutex (mutex-lock! *db-with-db-mutex*)) + (let ((res (apply proc db params))) + (if use-mutex (mutex-unlock! *db-with-db-mutex*)) + ;; (if (vector? dbstruct)(db:done-with dbstruct run-id r/w)) + (if dbdat (stack-push! (dbr:dbstruct-dbstack dbstruct) dbdat)) + res)) + (exn (io-error) + (db:generic-error-printout exn "ERROR: i/o error with " fname ". Check permissions, disk space etc. and try again.")) + (exn (corrupt) + (db:generic-error-printout exn "ERROR: database " fname " is corrupt. Repair it to proceed.")) + (exn (busy) + (db:generic-error-printout exn "ERROR: database " fname + " is locked. Try copying to another location, remove original and copy back.")) + (exn (permission)(db:generic-error-printout exn "ERROR: database " fname " has some permissions problem.")) + (exn () + (db:generic-error-printout exn "ERROR: Unknown error with database " fname " message: " + ((condition-property-accessor 'exn 'message) exn)))))) + +;;====================================================================== +;; K E E P F I L E D B I N dbstruct +;;====================================================================== + +;; (define (db:get-filedb dbstruct run-id) +;; (let ((db (vector-ref dbstruct 2))) +;; (if db +;; db +;; (let ((fdb (filedb:open-db (conc *toplevel* "/db/files.db")))) +;; (vector-set! dbstruct 2 fdb) +;; fdb)))) +;; +;; ;; Can also be used to save arbitrary strings +;; ;; +;; (define (db:save-path dbstruct path) +;; (let ((fdb (db:get-filedb dbstruct)))b +;; (filedb:register-path fdb path))) +;; +;; ;; Use to get a path. To get an arbitrary string see next define +;; ;; +;; (define (db:get-path dbstruct id) +;; (let ((fdb (db:get-filedb dbstruct))) +;; (filedb:get-path db id))) + +;; NB// #f => return dbdir only +;; (was planned to be; zeroth db with name=main.db) +;; +;; If run-id is #f return to create and retrieve the path where the db will live. +;; +(define db:dbfile-path common:get-db-tmp-area) + +(define (db:set-sync db) + (let ((syncprag (configf:lookup *configdat* "setup" "sychronous"))) + (sqlite3:execute db (conc "PRAGMA synchronous = " (or syncprag 0) ";")))) + +;; open an sql database inside a file lock +;; returns: db existed-prior-to-opening +;; RA => Returns a db handler; sets the lock if opened in writable mode +;; +;; (define *db-open-mutex* (make-mutex)) + +(define (db:lock-create-open fname initproc) + (let* ((parent-dir (or (pathname-directory fname)(current-directory))) ;; no parent? go local + (raw-fname (pathname-file fname)) + (dir-writable (file-writable? parent-dir)) + (file-exists (common:file-exists? fname)) + (file-write (if file-exists + (file-writable? fname) + dir-writable ))) + ;; (mutex-lock! *db-open-mutex*) ;; tried this mutex, not clear it helped. + (if file-write ;; dir-writable + (condition-case + (let* ((lockfname (conc fname ".lock")) + (readyfname (conc parent-dir "/.ready-" raw-fname)) + (readyexists (common:file-exists? readyfname))) + (if (not readyexists) + (common:simple-file-lock-and-wait lockfname)) + (let ((db (sqlite3:open-database fname))) + (sqlite3:set-busy-handler! db (sqlite3:make-busy-timeout 136000)) + (sqlite3:execute db "PRAGMA synchronous = 0;") + (if (and (configf:lookup *configdat* "setup" "tmp_mode") (string-match "^/tmp/.*" fname)) + (begin + ;;(print "DEBUG: Setting tmp_mode for " fname) + (sqlite3:execute db (configf:lookup *configdat* "setup" "tmp_mode")) + ) + ) + (if (and (configf:lookup *configdat* "setup" "nfs_mode") (not (string-match "^/tmp/.*" fname))) + (begin + ;;(print "DEBUG: Setting nfs_mode for " fname) + (sqlite3:execute db (configf:lookup *configdat* "setup" "nfs_mode")) + ) + ) + (if (and (not (or (configf:lookup *configdat* "setup" "tmp_mode") (configf:lookup *configdat* "setup" "nfs_mode"))) + (configf:lookup *configdat* "setup" "use-wal") + (string-match "^/tmp/.*" fname)) ;; this is a file in /tmp + (sqlite3:execute db "PRAGMA journal_mode=WAL;") + (debug:print 2 *default-log-port* "Creating " fname " in NON-WAL mode.")) + (if (not file-exists) + (initproc db)) + (if (not readyexists) + (begin + (common:simple-file-release-lock lockfname) + (with-output-to-file + readyfname + (lambda () + (print "Ready at " + (seconds->year-work-week/day-time + (current-seconds))))))) + db)) + (exn (io-error) (debug:print 0 *default-log-port* "ERROR: i/o error with " fname ". Check permissions, disk space etc. and try again.")) + (exn (corrupt) (debug:print 0 *default-log-port* "ERROR: database " fname " is corrupt. Repair it to proceed.")) + (exn (busy) (debug:print 0 *default-log-port* "ERROR: database " fname " is locked. Try copying to another location, remove original and copy back.")) + (exn (permission)(debug:print 0 *default-log-port* "ERROR: database " fname " has some permissions problem.")) + (exn () (debug:print 0 *default-log-port* "ERROR: Unknown error with database " fname " message: " ((condition-property-accessor 'exn 'message) exn)))) + + (condition-case + (begin + (debug:print 2 *default-log-port* "WARNING: opening db in non-writable dir " fname) + (let ((db (sqlite3:open-database fname))) + (sqlite3:set-busy-handler! db (sqlite3:make-busy-timeout 136000)) + (sqlite3:execute db "PRAGMA synchronous = 0;") + ;; (mutex-unlock! *db-open-mutex*) + db)) + (exn (io-error) (debug:print 0 *default-log-port* "ERROR: i/o error with " fname ". Check permissions, disk space etc. and try again.")) + (exn (corrupt) (debug:print 0 *default-log-port* "ERROR: database " fname " is corrupt. Repair it to proceed.")) + (exn (busy) (debug:print 0 *default-log-port* "ERROR: database " fname " is locked. Try copying to another location, remove original and copy back.")) + (exn (permission)(debug:print 0 *default-log-port* "ERROR: database " fname " has some permissions problem.")) + (exn () (debug:print 0 *default-log-port* "ERROR: Unknown error with database " fname " message: " ((condition-property-accessor 'exn 'message) exn)))) + ))) + + +;; This routine creates the db if not already present. It is only called if the db is not already opened +;; +(define (db:open-db dbstruct #!key (areapath #f)(do-sync #t)) ;; TODO: actually use areapath + (let ((tmpdb-stack (dbr:dbstruct-dbstack dbstruct))) ;; RA => Returns the first reference in dbstruct + (if (stack? tmpdb-stack) + (db:get-db tmpdb-stack) ;; get previously opened db (will create new db handle if all in the stack are already used + (let* ((max-stale-tmp (configf:lookup-number *configdat* "server" "filling-db-max-stale-seconds" default: 10)) + (dbpath (db:dbfile-path )) ;; path to tmp db area + (dbexists (common:file-exists? dbpath)) + (tmpdbfname (conc dbpath "/megatest.db")) + (dbfexists (common:file-exists? tmpdbfname)) ;; (conc dbpath "/megatest.db"))) + (mtdbexists (common:file-exists? (conc *toppath* "/megatest.db"))) + + (mtdbmodtime (if mtdbexists (common:lazy-sqlite-db-modification-time (conc *toppath* "/megatest.db")) #f)) + (tmpdbmodtime (if dbfexists (common:lazy-sqlite-db-modification-time tmpdbfname) #f)) + (mtdb (db:open-megatest-db)) + (mtdbpath (db:dbdat-get-path mtdb)) + (tmpdb (db:open-megatest-db path: dbpath)) ;; lock-create-open dbpath db:initialize-main-db)) + (refndb (db:open-megatest-db path: dbpath name: "megatest_ref.db")) + (write-access (file-writable? mtdbpath)) + ;(mtdbmodtime (if mtdbexists (common:lazy-sqlite-db-modification-time mtdbpath) #f)) ; moving this before db:open-megatest-db is called. if wal mode is on -WAL and -shm file get created with causing the tmpdbmodtime timestamp always greater than mtdbmodtime + ;(tmpdbmodtime (if dbfexists (common:lazy-sqlite-db-modification-time tmpdbfname) #f)) + ;if wal mode is on -WAL and -shm file get created when db:open-megatest-db is called. modtimedelta will always be < 10 so db in tmp not get synced + ;(tmpdbmodtime (if dbfexists (db:get-last-update-time (car tmpdb)) #f)) + ;(fmt (file-modification-time tmpdbfname)) + (modtimedelta (and mtdbmodtime tmpdbmodtime (- mtdbmodtime tmpdbmodtime)))) + + (when write-access + (sqlite3:execute (car mtdb) "drop trigger if exists update_tests_trigger") + (sqlite3:execute (car mtdb) "drop trigger if exists update_runs_trigger")) + + ;(print "mtdbmodtime " mtdbmodtime " tmpdbmodtime " tmpdbmodtime " mtdbpath " mtdbpath " " (conc *toppath* "/megatest.db")) + ;;(debug:print-info 13 *default-log-port* "db:open-db>> mtdbpath="mtdbpath" mtdbexists="mtdbexists" and write-access="write-access) + (if (and dbexists (not write-access)) + (begin + (set! *db-write-access* #f) + (dbr:dbstruct-read-only-set! dbstruct #t))) + (dbr:dbstruct-mtdb-set! dbstruct mtdb) + (dbr:dbstruct-tmpdb-set! dbstruct tmpdb) + (dbr:dbstruct-dbstack-set! dbstruct (make-stack)) ;; BB: why a stack? Why would the number of db's be indeterminate? Is this a legacy of 1.db 2.db .. ? + (stack-push! (dbr:dbstruct-dbstack dbstruct) tmpdb) ;; olddb is already a (cons db path) + (dbr:dbstruct-refndb-set! dbstruct refndb) + (if (and (or (not dbfexists) + (and modtimedelta + (> modtimedelta max-stale-tmp))) ;; if db in tmp is over ten seconds older than the file in MTRA then do a sync back + do-sync) + (begin + (debug:print 1 *default-log-port* "filling db " (db:dbdat-get-path tmpdb) " with data \n from " (db:dbdat-get-path mtdb) " mod time delta: " modtimedelta) + (db:sync-tables (db:sync-all-tables-list dbstruct) #f mtdb refndb tmpdb) + ;touch tmp db to avoid wal mode wierdness + (set! (file-modification-time tmpdbfname) (current-seconds)) + (debug:print-info 13 *default-log-port* "db:sync-all-tables-list done.") + ) + (debug:print 4 *default-log-port* " db, " (db:dbdat-get-path tmpdb) " already exists or fresh enough, not propogating data from\n " (db:dbdat-get-path mtdb) " mod time delta: " modtimedelta) ) + ;; (db:multi-db-sync dbstruct 'old2new)) ;; migrate data from megatest.db automatically + tmpdb)))) + + +(define (db:get-last-update-time db) +; (db:with-db +; dbstruct #f #f +; (lambda (db) + (let ((last-update-time #f)) + (sqlite3:for-each-row + (lambda (lup) + (set! last-update-time lup)) + db + "select max(lup) from ( select max(last_update) as lup from tests union select max(last_update) as lup from runs);") + last-update-time)) +;)) + +;; Make the dbstruct, setup up auxillary db's and call for main db at least once +;; +;; called in http-transport and replicated in rmt.scm for *local* access. +;; +(define (db:setup do-sync #!key (areapath #f)) + ;; + (cond + (*dbstruct-db* *dbstruct-db*);; TODO: when multiple areas are supported, this optimization will be a hazard + (else ;;(common:on-homehost?) + (debug:print-info 13 *default-log-port* "db:setup entered (first time, not cached.)") + (let* ((dbstruct (make-dbr:dbstruct))) + (when (not *toppath*) + (debug:print-info 13 *default-log-port* "in db:setup, *toppath* not set; calling launch:setup") + (launch:setup areapath: areapath)) + (debug:print-info 13 *default-log-port* "Begin db:open-db") + (db:open-db dbstruct areapath: areapath do-sync: do-sync) + (debug:print-info 13 *default-log-port* "Done db:open-db") + (set! *dbstruct-db* dbstruct) + ;;(debug:print-info 13 *default-log-port* "new dbstruct = "(dbr:dbstruct->alist dbstruct)) + dbstruct)))) + ;; (else + ;; (debug:print 0 *default-log-port* "ERROR: attempt to open database when not on homehost. Exiting. Homehost: " (common:get-homehost)) + ;; (exit 1)))) + +;; Open the classic megatest.db file (defaults to open in toppath) +;; +;; NOTE: returns a dbdat not a dbstruct! +;; + +;;(define (db:reopen-megatest-db + +(define (db:open-megatest-db #!key (path #f)(name #f)) + (let* ((dbdir (or path *toppath*)) + (dbpath (conc dbdir "/" (or name "megatest.db"))) + (dbexists (common:file-exists? dbpath)) + (db (db:lock-create-open dbpath + (lambda (db) + (db:initialize-main-db db) + ;;(db:initialize-run-id-db db) + ))) + (write-access (file-writable? dbpath))) + (debug:print-info 13 *default-log-port* "db:open-megatest-db "dbpath) + (if (and dbexists (not write-access)) + (set! *db-write-access* #f)) + (cons db dbpath))) + +;; sync run to disk if touched +;; +(define (db:sync-touched dbstruct run-id #!key (force-sync #f)) + (let ((tmpdb (db:get-db dbstruct)) + (mtdb (dbr:dbstruct-mtdb dbstruct)) + (refndb (dbr:dbstruct-refndb dbstruct)) + (start-t (current-seconds))) + (debug:print-info 4 *default-log-port* "Syncing for run-id: " run-id) + (mutex-lock! *db-multi-sync-mutex*) + (let ((update_info (cons (if force-sync 0 *db-last-sync*) "last_update"))) + (mutex-unlock! *db-multi-sync-mutex*) + (db:sync-tables (db:sync-all-tables-list dbstruct) update_info tmpdb refndb mtdb)) + (mutex-lock! *db-multi-sync-mutex*) + (set! *db-last-sync* start-t) + (set! *db-last-access* start-t) + (mutex-unlock! *db-multi-sync-mutex*) + (stack-push! (dbr:dbstruct-dbstack dbstruct) tmpdb))) + +(define (db:safely-close-sqlite3-db db stmt-cache #!key (try-num 3)) + (if (<= try-num 0) + #f + (handle-exceptions + exn + (begin + (print "Attempt to safely close sqlite3 db failed. Trying again. exn=" exn) + (thread-sleep! 3) + (sqlite3:interrupt! db) + (db:safely-close-sqlite3-db db stmt-cache try-num: (- try-num 1))) + (if (sqlite3:database? db) + (let* ((stmts (and stmt-cache (hash-table-ref/default stmt-cache db #f)))) + (if stmts (map sqlite3:finalize! (hash-table-values stmts))) + (sqlite3:finalize! db) + #t) + #f)))) + +;; close all opened run-id dbs +(define (db:close-all dbstruct) + (if (dbr:dbstruct? dbstruct) + (handle-exceptions + exn + (begin + (debug:print 0 *default-log-port* "WARNING: Finalizing failed, " ((condition-property-accessor 'exn 'message) exn) ", note - exn=" exn) + (print-call-chain *default-log-port*)) + ;; (db:sync-touched dbstruct 0 force-sync: #t) ;; NO. Do not do this here. Instead we rely on a server to be started when there are writes, even if the server itself is not going to be used as a server. + (let ((tdbs (map db:dbdat-get-db + (stack->list (dbr:dbstruct-dbstack dbstruct)))) + (mdb (db:dbdat-get-db (dbr:dbstruct-mtdb dbstruct))) + (rdb (db:dbdat-get-db (dbr:dbstruct-refndb dbstruct))) + (stmt-cache (dbr:dbstruct-stmt-cache dbstruct))) + (map (lambda (db) + (db:safely-close-sqlite3-db db stmt-cache)) + tdbs) + (db:safely-close-sqlite3-db mdb stmt-cache) ;; (if (sqlite3:database? mdb) (sqlite3:finalize! mdb)) + (db:safely-close-sqlite3-db rdb stmt-cache))))) ;; (if (sqlite3:database? rdb) (sqlite3:finalize! rdb)))))) + +;; (let ((locdbs (dbr:dbstruct-locdbs dbstruct))) +;; (if (hash-table? locdbs) +;; (for-each (lambda (run-id) +;; (db:close-run-db dbstruct run-id)) +;; (hash-table-keys locdbs))))) + +;; (define (db:open-inmem-db) +;; (let* ((db (sqlite3:open-database ":memory:")) +;; (handler (make-busy-timeout 3600))) +;; (sqlite3:set-busy-handler! db handler) +;; (db:initialize-run-id-db db) +;; (cons db #f))) + +;; just tests, test_steps and test_data tables +(define db:sync-tests-only + (list + ;; (list "strs" + ;; '("id" #f) + ;; '("str" #f)) + (list "tests" + '("id" #f) + '("run_id" #f) + '("testname" #f) + '("host" #f) + '("cpuload" #f) + '("diskfree" #f) + '("uname" #f) + '("rundir" #f) + '("shortdir" #f) + '("item_path" #f) + '("state" #f) + '("status" #f) + '("attemptnum" #f) + '("final_logf" #f) + '("logdat" #f) + '("run_duration" #f) + '("comment" #f) + '("event_time" #f) + '("fail_count" #f) + '("pass_count" #f) + '("archived" #f) + '("last_update" #f)) + (list "test_steps" + '("id" #f) + '("test_id" #f) + '("stepname" #f) + '("state" #f) + '("status" #f) + '("event_time" #f) + '("comment" #f) + '("logfile" #f) + '("last_update" #f)) + (list "test_data" + '("id" #f) + '("test_id" #f) + '("category" #f) + '("variable" #f) + '("value" #f) + '("expected" #f) + '("tol" #f) + '("units" #f) + '("comment" #f) + '("status" #f) + '("type" #f) + '("last_update" #f)))) + +;; needs db to get keys, this is for syncing all tables +;; +(define (db:sync-main-list dbstruct) + (let ((keys (db:get-keys dbstruct))) + (list + (list "keys" + '("id" #f) + '("fieldname" #f) + '("fieldtype" #f)) + (list "metadat" '("var" #f) '("val" #f)) + (append (list "runs" + '("id" #f)) + (map (lambda (k)(list k #f)) + (append keys + (list "runname" "state" "status" "owner" "event_time" "comment" "fail_count" "pass_count" "contour" "last_update")))) + (list "archive_disks" + '("id" #f) + '("archive_area_name" #f) + '("disk_path" #f) + '("last_df" #f) + '("last_df_time" #f) + '("creation_time" #f)) + + (list "archive_blocks" + '("id" #f) + '("archive_disk_id" #f) + '("disk_path" #f) + '("last_du" #f) + '("last_du_time" #f) + '("creation_time" #f)) + + (list "test_meta" + '("id" #f) + '("testname" #f) + '("owner" #f) + '("description" #f) + '("reviewed" #f) + '("iterated" #f) + '("avg_runtime" #f) + '("avg_disk" #f) + '("tags" #f) + '("jobgroup" #f))))) + +(define (db:sync-all-tables-list dbstruct) + (append (db:sync-main-list dbstruct) + db:sync-tests-only)) + +;; use bunch of Unix commands to try to break the lock and recreate the db +;; +(define (db:move-and-recreate-db dbdat) + (let* ((dbpath (db:dbdat-get-path dbdat)) + (dbdir (pathname-directory dbpath)) + (fname (pathname-strip-directory dbpath)) + (fnamejnl (conc fname "-journal")) + (tmpname (conc fname "." (current-process-id))) + (tmpjnl (conc fnamejnl "." (current-process-id)))) + (debug:print-error 0 *default-log-port* "" fname " appears corrupted. Making backup \"old/" fname "\"") + (system (conc "cd " dbdir ";mkdir -p old;cat " fname " > old/" tmpname)) + (system (conc "rm -f " dbpath)) + (if (common:file-exists? fnamejnl) + (begin + (debug:print-error 0 *default-log-port* "" fnamejnl " found, moving it to old dir as " tmpjnl) + (system (conc "cd " dbdir ";mkdir -p old;cat " fnamejnl " > old/" tmpjnl)) + (system (conc "rm -f " dbdir "/" fnamejnl)))) + ;; attempt to recreate database + (system (conc "cd " dbdir ";sqlite3 old/" tmpname " .dump | sqlite3 " fname)))) + +;; return #f to indicate the dbdat should be closed/reopened +;; else return dbdat +;; +(define (db:repair-db dbdat #!key (numtries 1)) + (let* ((dbpath (db:dbdat-get-path dbdat)) + (dbdir (pathname-directory dbpath)) + (fname (pathname-strip-directory dbpath))) + (debug:print-info 0 *default-log-port* "Checking db " dbpath " for errors.") + (cond + ((not (file-writable? dbdir)) + (debug:print 0 *default-log-port* "WARNING: can't write to " dbdir ", can't fix " fname) + #f) + + ;; handle special cases, megatest.db and monitor.db + ;; + ;; NOPE: apply this same approach to all db files + ;; + (else ;; ((equal? fname "megatest.db") ;; this file can be regenerated if needed + (handle-exceptions + exn + (begin + (print "Problems trying to repair the db, exn=" exn) + ;; (db:move-and-recreate-db dbdat) + (if (> numtries 0) + (db:repair-db dbdat numtries: (- numtries 1)) + #f) + (debug:print 0 *default-log-port* "FATAL: file " dbpath " was found corrupted, an attempt to fix has been made but you must start over.") + (debug:print 0 *default-log-port* + " check the following:\n" + " 1. full directories, look in ~/ /tmp and " dbdir "\n" + " 2. write access to " dbdir "\n\n" + " if the automatic recovery failed you may be able to recover data by doing \"" + (if (member fname '("megatest.db" "monitor.db")) + "megatest -cleanup-db" + "megatest -import-megatest.db;megatest -cleanup-db") + "\"\n") + (exit) ;; we can not safely continue when a db was corrupted - even if fixed. + ) + ;; test read/write access to the database + (let ((db (sqlite3:open-database dbpath))) + (sqlite3:set-busy-handler! db (sqlite3:make-busy-timeout 136000)) + (sqlite3:execute db "PRAGMA synchronous = 0;") + (cond + ((equal? fname "megatest.db") + (sqlite3:execute db "DELETE FROM tests WHERE state='DELETED';")) + ((equal? fname "main.db") + (sqlite3:execute db "DELETE FROM runs WHERE state='deleted';")) + ((string-match "\\d.db" fname) + (sqlite3:execute db "UPDATE tests SET state='DELETED' WHERE state='DELETED';")) + ((equal? fname "monitor.db") + (sqlite3:execute "DELETE FROM servers WHERE state LIKE 'defunct%';")) + (else + (sqlite3:execute db "vacuum;"))) + + (sqlite3:finalize! db) + #t)))))) + +;; tbls is ( ("tablename" ( "field1" [#f|proc1] ) ( "field2" [#f|proc2] ) .... ) ) +;; db's are dbdat's +;; +;; if last-update specified ("field-name" . time-in-seconds) +;; then sync only records where field-name >= time-in-seconds +;; IFF field-name exists +;; +(define (db:sync-tables tbls last-update fromdb todb . slave-dbs) + (handle-exceptions + exn + (begin + (debug:print 0 *default-log-port* "EXCEPTION: database probably overloaded or unreadable in db:sync-tables.") + (print-call-chain (current-error-port)) + (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) + (debug:print 5 *default-log-port* "exn=" (condition->list exn)) + (debug:print 0 *default-log-port* " status: " ((condition-property-accessor 'sqlite3 'status) exn)) + (debug:print 0 *default-log-port* " src db: " (db:dbdat-get-path fromdb)) + (for-each (lambda (dbdat) + (let ((dbpath (db:dbdat-get-path dbdat))) + (debug:print 0 *default-log-port* " dbpath: " dbpath) + (if (not (db:repair-db dbdat)) + (begin + (debug:print-error 0 *default-log-port* "Failed to rebuild " dbpath ", exiting now.") + (exit))))) + (cons todb slave-dbs)) + + 0) + ;; this is the work to be done + (cond + ((not fromdb) (debug:print 3 *default-log-port* "WARNING: db:sync-tables called with fromdb missing") + -1) + ((not todb) (debug:print 3 *default-log-port* "WARNING: db:sync-tables called with todb missing") + -2) + ((not (sqlite3:database? (db:dbdat-get-db fromdb))) + (debug:print-error 0 *default-log-port* "db:sync-tables called with fromdb not a database " fromdb) + -3) + ((not (sqlite3:database? (db:dbdat-get-db todb))) + (debug:print-error 0 *default-log-port* "db:sync-tables called with todb not a database " todb) + -4) + + ((not (file-writable? (db:dbdat-get-path todb))) + (debug:print-error 0 *default-log-port* "db:sync-tables called with todb not a read-only database " todb) + -5) + ((not (null? (let ((readonly-slave-dbs + (filter + (lambda (dbdat) + (not (file-writable? (db:dbdat-get-path todb)))) + slave-dbs))) + (for-each + (lambda (bad-dbdat) + (debug:print-error + 0 *default-log-port* "db:sync-tables called with todb not a read-only database " bad-dbdat)) + readonly-slave-dbs) + readonly-slave-dbs))) -6) + (else + (let ((stmts (make-hash-table)) ;; table-field => stmt + (all-stmts '()) ;; ( ( stmt1 value1 ) ( stml2 value2 )) + (numrecs (make-hash-table)) + (start-time (current-milliseconds)) + (tot-count 0)) + (for-each ;; table + (lambda (tabledat) + (let* ((tablename (car tabledat)) + (fields (cdr tabledat)) + (has-last-update (member "last_update" fields)) + (use-last-update (cond + ((and has-last-update + (member "last_update" fields)) + #t) ;; if given a number, just use it for all fields + ((number? last-update) #f) ;; if not matched first entry then ignore last-update for this table + ((and (pair? last-update) + (member (car last-update) ;; last-update field name + (map car fields))) + #t) + (last-update + (debug:print 0 *default-log-port* "ERROR: parameter last-update for db:sync-tables must be a pair or a number, received: " last-update);; found in fields + #f) + (else + #f))) + (last-update-value (if use-last-update ;; no need to check for has-last-update - it is already accounted for + (if (number? last-update) + last-update + (cdr last-update)) + #f)) + (last-update-field (if use-last-update + (if (number? last-update) + "last_update" + (car last-update)) + #f)) + (num-fields (length fields)) + (field->num (make-hash-table)) + (num->field (apply vector (map car fields))) ;; BBHERE + (full-sel (conc "SELECT " (string-intersperse (map car fields) ",") + " FROM " tablename (if use-last-update ;; apply last-update criteria + (conc " WHERE " last-update-field " >= " last-update-value) + "") + ";")) + (full-ins (conc "INSERT OR REPLACE INTO " tablename " ( " (string-intersperse (map car fields) ",") " ) " + " VALUES ( " (string-intersperse (make-list num-fields "?") ",") " );")) + (fromdat '()) + (fromdats '()) + (totrecords 0) + (batch-len (string->number (or (configf:lookup *configdat* "sync" "batchsize") "100"))) + (todat (make-hash-table)) + (count 0) + (field-names (map car fields)) + (delay-handicap (string->number (or (configf:lookup *configdat* "sync" "delay-handicap") "0"))) + ) + + ;; set up the field->num table + (for-each + (lambda (field) + (hash-table-set! field->num field count) + (set! count (+ count 1))) + fields) + + ;; read the source table + (sqlite3:for-each-row + (lambda (a . b) + (set! fromdat (cons (apply vector a b) fromdat)) + (if (> (length fromdat) batch-len) + (begin + (set! fromdats (cons fromdat fromdats)) + (set! fromdat '()) + (set! totrecords (+ totrecords 1))))) + (db:dbdat-get-db fromdb) + full-sel) + + ;; tack on remaining records in fromdat + (if (not (null? fromdat)) + (set! fromdats (cons fromdat fromdats))) + + (if (common:low-noise-print 120 "sync-records") + (debug:print-info 4 *default-log-port* "found " totrecords " records to sync")) + + ;; read the target table; BBHERE + (sqlite3:for-each-row + (lambda (a . b) + (hash-table-set! todat a (apply vector a b))) + (db:dbdat-get-db todb) + full-sel) + + (when (and delay-handicap (> delay-handicap 0)) + (debug:print-info 0 *default-log-port* "imposing synthetic sync delay of "delay-handicap" seconds since sync/delay-handicap is configured") + (thread-sleep! delay-handicap) + (debug:print-info 0 *default-log-port* "synthetic sync delay of "delay-handicap" seconds completed") + ) + + ;; first pass implementation, just insert all changed rows + (for-each + (lambda (targdb) + (let* ((db (db:dbdat-get-db targdb)) + (drp-trigger (if (member "last_update" field-names) + (db:drop-trigger db tablename) + #f)) + (is-trigger-dropped (if (member "last_update" field-names) + (db:is-trigger-dropped db tablename) + #f)) + (stmth (sqlite3:prepare db full-ins))) + ;; (db:delay-if-busy targdb) ;; NO WAITING + (if (member "last_update" field-names) + (debug:print-info 3 *default-log-port* "is-trigger-dropped: " is-trigger-dropped)) + (for-each + (lambda (fromdat-lst) + (sqlite3:with-transaction + db + (lambda () + (for-each ;; + (lambda (fromrow) + (let* ((a (vector-ref fromrow 0)) + (curr (hash-table-ref/default todat a #f)) + (same #t)) + (let loop ((i 0)) + (if (or (not curr) + (not (equal? (vector-ref fromrow i)(vector-ref curr i)))) + (set! same #f)) + (if (and same + (< i (- num-fields 1))) + (loop (+ i 1)))) + (if (not same) + (begin + (apply sqlite3:execute stmth (vector->list fromrow)) + (hash-table-set! numrecs tablename (+ 1 (hash-table-ref/default numrecs tablename 0))))))) + fromdat-lst)))) + fromdats) + (sqlite3:finalize! stmth) + (if (member "last_update" field-names) + (db:create-trigger db tablename)))) + (append (list todb) slave-dbs)))) + tbls) + (let* ((runtime (- (current-milliseconds) start-time)) + (should-print (or (debug:debug-mode 12) + (common:low-noise-print 120 "db sync" (> runtime 500))))) ;; low and high sync times treated as separate. + (if should-print (debug:print 3 *default-log-port* "INFO: db sync, total run time " runtime " ms")) + (for-each + (lambda (dat) + (let ((tblname (car dat)) + (count (cdr dat))) + (set! tot-count (+ tot-count count)) + (if (> count 0) + (if should-print (debug:print 0 *default-log-port* (format #f " ~10a ~5a" tblname count)))))) + (sort (hash-table->alist numrecs)(lambda (a b)(> (cdr a)(cdr b)))))) + tot-count))))) + +(define (db:patch-schema-rundb frundb) + ;; + ;; remove this some time after September 2016 (added in version v1.6031 + ;; + (for-each + (lambda (table-name) + (handle-exceptions + exn + (if (string-match ".*duplicate.*" ((condition-property-accessor 'exn 'message) exn)) + (debug:print 0 *default-log-port* "Column last_update already added to " table-name " table") + (db:general-sqlite-error-dump exn "alter table " table-name " ..." #f "none")) + (sqlite3:execute + frundb + (conc "ALTER TABLE " table-name " ADD COLUMN last_update INTEGER DEFAULT 0"))) + (sqlite3:execute + frundb + (conc "DROP TRIGGER IF EXISTS update_" table-name "_trigger;")) + (sqlite3:execute + frundb + (conc "CREATE TRIGGER IF NOT EXISTS update_" table-name "_trigger AFTER UPDATE ON " table-name " + FOR EACH ROW + BEGIN + UPDATE " table-name " SET last_update=(strftime('%s','now')) + WHERE id=old.id; + END;")) + ) + '("tests" "test_steps" "test_data"))) + +(define (db:patch-schema-maindb maindb) + ;; + ;; remove all these some time after september 2016 (added in v1.6031 + ;; + (for-each + (lambda (column type default) + (handle-exceptions + exn + (if (string-match ".*duplicate.*" ((condition-property-accessor 'exn 'message) exn)) + (debug:print 0 *default-log-port* "Column " column " already added to runs table") + (db:general-sqlite-error-dump exn "alter table runs ..." #f "none")) + (sqlite3:execute + maindb + (conc "ALTER TABLE runs ADD COLUMN " column " " type " DEFAULT " default)))) + (list "last_update" "contour") + (list "INTEGER" "TEXT" ) + (list "0" "''" )) + ;; these schema changes don't need exception handling + (sqlite3:execute + maindb + "CREATE TRIGGER IF NOT EXISTS update_runs_trigger AFTER UPDATE ON runs + FOR EACH ROW + BEGIN + UPDATE runs SET last_update=(strftime('%s','now')) + WHERE id=old.id; + END;") + (sqlite3:execute maindb "CREATE TABLE IF NOT EXISTS run_stats ( + id INTEGER PRIMARY KEY, + run_id INTEGER, + state TEXT, + status TEXT, + count INTEGER, + last_update INTEGER DEFAULT (strftime('%s','now')))") + (sqlite3:execute maindb "CREATE TRIGGER IF NOT EXISTS update_run_stats_trigger AFTER UPDATE ON run_stats + FOR EACH ROW + BEGIN + UPDATE run_stats SET last_update=(strftime('%s','now')) + WHERE id=old.id; + END;") + (sqlite3:execute maindb "CREATE TABLE IF NOT EXISTS test_rundat ( + id INTEGER PRIMARY KEY, + test_id INTEGER, + update_time TIMESTAMP, + cpuload INTEGER DEFAULT -1, + diskfree INTEGER DEFAULT -1, + diskusage INTGER DEFAULT -1, + run_duration INTEGER DEFAULT 0);")) + +(define (db:adj-target db) + (let ((fields (configf:get-section *configdat* "fields")) + (field-num 0)) + ;; because we will be refreshing the keys table it is best to clear it here + (sqlite3:execute db "DELETE FROM keys;") + (for-each + (lambda (field) + (let ((column (car field)) + (spec (cadr field))) + (handle-exceptions + exn + (if (string-match ".*duplicate.*" ((condition-property-accessor 'exn 'message) exn)) + (debug:print 0 *default-log-port* "Target field " column " already exists in the runs table") + (db:general-sqlite-error-dump exn "alter table runs ..." #f "none")) + ;; Add the column if needed + (sqlite3:execute + db + (conc "ALTER TABLE runs ADD COLUMN " column " " spec))) + ;; correct the entry in the keys column + (sqlite3:execute + db + "INSERT INTO keys (id,fieldname,fieldtype) VALUES (?,?,?);" + field-num column spec) + ;; fill in blanks (not allowed as it would be part of the path + (sqlite3:execute + db + (conc "UPDATE runs SET " column "='x' WHERE " column "='';")) + (set! field-num (+ field-num 1)))) + fields))) + +(define *global-db-store* (make-hash-table)) + +(define (db:get-access-mode) + (if (args:get-arg "-use-db-cache") 'cached 'rmt)) + +;; Add db direct +;; +(define (db:dispatch-query access-mode rmt-cmd db-cmd . params) + (if (eq? access-mode 'cached) + (debug:print 2 *default-log-port* "not doing cached calls right now")) +;; (apply db:call-with-cached-db db-cmd params) + (apply rmt-cmd params)) +;;) + +;; return the target db handle so it can be used +;; +(define (db:cache-for-read-only source target #!key (use-last-update #f)) + (if (and (hash-table-ref/default *global-db-store* target #f) + (>= (file-modification-time target)(file-modification-time source))) + (hash-table-ref *global-db-store* target) + (let* ((toppath (launch:setup)) + (targ-db-last-mod (if (common:file-exists? target) + (file-modification-time target) + 0)) + (cache-db (or (hash-table-ref/default *global-db-store* target #f) + (db:open-megatest-db path: target))) + (source-db (db:open-megatest-db path: source)) + (curr-time (current-seconds)) + (res '()) + (last-update (if use-last-update (cons "last_update" targ-db-last-mod) #f))) + (db:sync-tables (db:sync-main-list source-db) last-update source-db cache-db) + (db:sync-tables db:sync-tests-only last-update source-db cache-db) + (hash-table-set! *global-db-store* target cache-db) + cache-db))) + +;; ;; call a proc with a cached db +;; ;; +;; (define (db:call-with-cached-db proc . params) +;; ;; first cache the db in /tmp +;; (let* ((cname-part (conc "megatest_cache/" (common:get-testsuite-name))) +;; (fname (conc (common:get-area-path-signature) ".db")) +;; (cache-dir (common:get-create-writeable-dir +;; (list (conc "/tmp/" (current-user-name) "/" cname-part) +;; (conc "/tmp/" (current-user-name) "-" cname-part) +;; (conc "/tmp/" (current-user-name) "_" cname-part)))) +;; (megatest-db (conc *toppath* "/megatest.db"))) +;; ;; (debug:print-info 0 *default-log-port* "Using cache dir " cache-dir) +;; (if (not cache-dir) +;; (begin +;; (debug:print 0 *default-log-port* "ERROR: Failed to find an area to write the cache db") +;; (exit 1)) +;; (let* ((th1 (make-thread +;; (lambda () +;; (if (and (common:file-exists? megatest-db) +;; (file-writable? megatest-db)) +;; (begin +;; (db:sync-to-megatest.db dbstruct 'timestamps) ;; internally mutexes on *db-local-sync* +;; (debug:print-info 2 *default-log-port* "Done syncing to megatest.db")))) +;; "call-with-cached-db sync-to-megatest.db")) +;; (cache-db (db:cache-for-read-only +;; megatest-db +;; (conc cache-dir "/" fname) +;; use-last-update: #t))) +;; (thread-start! th1) +;; (apply proc cache-db params) +;; )))) + +;; options: +;; +;; 'killservers - kills all servers +;; 'dejunk - removes junk records +;; 'adj-testids - move test-ids into correct ranges +;; 'old2new - sync megatest.db to /tmp/.../megatest.db and /tmp/.../megatest_ref.db +;; 'new2old - sync /tmp/.../megatest.db to megatest.db and /tmp/.../megatest_ref.db (and update data_synced) +;; 'closeall - close all opened dbs +;; 'schema - attempt to apply schema changes +;; run-ids: '(1 2 3 ...) or #f (for all) +;; +(define (db:multi-db-sync dbstruct . options) + ;; (if (not (launch:setup)) + ;; (debug:print 0 *default-log-port* "ERROR: not able to setup up for megatest.") + (let* ((mtdb (dbr:dbstruct-mtdb dbstruct)) + (tmpdb (db:get-db dbstruct)) + (refndb (dbr:dbstruct-refndb dbstruct)) + (allow-cleanup #t) ;; (if run-ids #f #t)) + (servers (server:get-list *toppath*)) ;; (tasks:get-all-servers (db:delay-if-busy tdbdat))) + (data-synced 0)) ;; count of changed records (I hope) + + (for-each + (lambda (option) + + (case option + ;; kill servers + ((killservers) + (for-each + (lambda (server) + (handle-exceptions + exn + (begin + (debug:print-info 0 *default-log-port* "Unable to get host and/or port from " server ", exn=" exn) + #f) + (match-let (((mod-time host port start-time server-id pid) server)) + (if (and host pid) + (tasks:kill-server host pid))))) + servers) + + ;; /tmp/bjbarcla/megatest_localdb/fullrun/.nfs.pdx.disks.icf_env_disk001.bjbarcla.gwa.issues.mtdev.matt-bisect.megatest.ext-tests.runs.all.v1.65.1.6524.dev.bb-24-justrollup-f8.rollup.fullrun/megatest.db.lock + (delete-file* (common:get-sync-lock-filepath)) + ) + + ;; clear out junk records + ;; + ((dejunk) + ;; (db:delay-if-busy mtdb) ;; ok to delay on mtdb + (when (file-writable? (db:dbdat-get-path mtdb)) (db:clean-up mtdb)) + (db:clean-up tmpdb) + (db:clean-up refndb)) + + ;; sync runs, test_meta etc. + ;; + ((old2new) + (set! data-synced + (+ (db:sync-tables (db:sync-all-tables-list dbstruct) #f mtdb tmpdb refndb) + data-synced))) + + ;; now ensure all newdb data are synced to megatest.db + ;; do not use the run-ids list passed in to the function + ;; + ((new2old) + (set! data-synced + (+ (db:sync-tables (db:sync-all-tables-list dbstruct) #f tmpdb refndb mtdb) + data-synced))) + + ((adj-target) + (db:adj-target (db:dbdat-get-db mtdb)) + (db:adj-target (db:dbdat-get-db tmpdb)) + (db:adj-target (db:dbdat-get-db refndb))) + + ((schema) + (db:patch-schema-maindb (db:dbdat-get-db mtdb)) + (db:patch-schema-maindb (db:dbdat-get-db tmpdb)) + (db:patch-schema-maindb (db:dbdat-get-db refndb)) + (db:patch-schema-rundb (db:dbdat-get-db mtdb)) + (db:patch-schema-rundb (db:dbdat-get-db tmpdb)) + (db:patch-schema-rundb (db:dbdat-get-db refndb)))) + + (stack-push! (dbr:dbstruct-dbstack dbstruct) tmpdb)) + options) + data-synced)) + +(define (db:tmp->megatest.db-sync dbstruct last-update) + (let* ((mtdb (dbr:dbstruct-mtdb dbstruct)) + (tmpdb (db:get-db dbstruct)) + (refndb (dbr:dbstruct-refndb dbstruct)) + (res (db:sync-tables (db:sync-all-tables-list dbstruct) last-update tmpdb refndb mtdb))) + (stack-push! (dbr:dbstruct-dbstack dbstruct) tmpdb) + res)) + +;;;; run-ids +;; if #f use *db-local-sync* : or 'local-sync-flags +;; if #t use timestamps : or 'timestamps +;; +;; NB// no-sync-db is the db handle, not a flag! +;; +(define (db:sync-to-megatest.db dbstruct #!key (no-sync-db #f)) + (let* ((start-time (current-seconds)) + (last-full-update (if no-sync-db + (db:no-sync-get/default no-sync-db "LAST_FULL_UPDATE" 0) + 0)) + (full-sync-needed (> (- start-time last-full-update) 3600)) ;; every hour do a full sync + (last-update (if full-sync-needed + 0 + (if no-sync-db + (db:no-sync-get/default no-sync-db "LAST_UPDATE" 0) + 0))) ;; (or (db:get-var dbstruct "LAST_UPDATE") 0)) + (sync-needed (> (- start-time last-update) 6)) + (res (if (or sync-needed ;; don't sync if a sync already occurred in the past 6 seconds + full-sync-needed) + (begin + (if no-sync-db + (begin + (if full-sync-needed (db:no-sync-set no-sync-db "LAST_FULL_UPDATE" start-time)) + (db:no-sync-set no-sync-db "LAST_UPDATE" start-time))) + (db:tmp->megatest.db-sync dbstruct last-update)) + 0)) + (sync-time (- (current-seconds) start-time))) + (debug:print-info 3 *default-log-port* "Sync of newdb to olddb completed in " sync-time " seconds pid="(current-process-id)) + (if (common:low-noise-print 30 "sync new to old") + (if sync-needed + (debug:print-info 0 *default-log-port* "Sync of " res " records from newdb to olddb completed in " sync-time " seconds pid="(current-process-id)) + (debug:print-info 0 *default-log-port* "No sync needed, last updated " (- start-time last-update) " seconds ago"))) + res)) + +;; keeping it around for debugging purposes only +#;(define (open-run-close-no-exception-handling proc idb . params) + (debug:print-info 11 *default-log-port* "open-run-close-no-exception-handling START given a db=" (if idb "yes " "no ") ", params=" params) + (print "I don't work anymore. open-run-close-no-exception-handling needs fixing or removing...") + (exit) + (if (or *db-write-access* + (not #t)) ;; was: (member proc * db:all-write-procs *))) + (let* ((db (cond + ((pair? idb) (db:dbdat-get-db idb)) + ((sqlite3:database? idb) idb) + ((not idb) (debug:print-error 0 *default-log-port* "cannot open-run-close with #f anymore")) + ((procedure? idb) (idb)) + (else (debug:print-error 0 *default-log-port* "cannot open-run-close with #f anymore")))) + (res #f)) + (set! res (apply proc db params)) + (if (not idb)(sqlite3:finalize! dbstruct)) + (debug:print-info 11 *default-log-port* "open-run-close-no-exception-handling END" ) + res) + #f)) + +#;(define (open-run-close-exception-handling proc idb . params) + (handle-exceptions + exn + (let ((sleep-time (pseudo-random-integer 30)) + (err-status ((condition-property-accessor 'sqlite3 'status #f) exn))) + (case err-status + ((busy) + (thread-sleep! sleep-time)) + (else + (debug:print 0 *default-log-port* "EXCEPTION: database probably overloaded or unreadable.") + (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) + (debug:print 5 *default-log-port* "exn=" (condition->list exn)) + (debug:print 0 *default-log-port* " status: " ((condition-property-accessor 'sqlite3 'status) exn)) + (print-call-chain (current-error-port)) + (thread-sleep! sleep-time) + (debug:print-info 0 *default-log-port* "trying db call one more time....this may never recover, if necessary kill process " (current-process-id) " on host " (get-host-name) " to clean up"))) + (apply open-run-close-exception-handling proc idb params)) + (apply open-run-close-no-exception-handling proc idb params))) + +;; (define open-run-close +#;(define open-run-close open-run-close-exception-handling) + ;; open-run-close-no-exception-handling +;; open-run-close-exception-handling) +;;) + +(define db:trigger-list + (list (list "update_runs_trigger" "CREATE TRIGGER IF NOT EXISTS update_runs_trigger AFTER UPDATE ON runs + FOR EACH ROW + BEGIN + UPDATE runs SET last_update=(strftime('%s','now')) + WHERE id=old.id; + END;" ) + (list "update_run_stats_trigger" "CREATE TRIGGER IF NOT EXISTS update_run_stats_trigger AFTER UPDATE ON run_stats + FOR EACH ROW + BEGIN + UPDATE run_stats SET last_update=(strftime('%s','now')) + WHERE id=old.id; + END;" ) + (list "update_tests_trigger" "CREATE TRIGGER IF NOT EXISTS update_tests_trigger AFTER UPDATE ON tests + FOR EACH ROW + BEGIN + UPDATE tests SET last_update=(strftime('%s','now')) + WHERE id=old.id; + END;" ) + (list "update_teststeps_trigger" "CREATE TRIGGER IF NOT EXISTS update_teststeps_trigger AFTER UPDATE ON test_steps + FOR EACH ROW + BEGIN + UPDATE test_steps SET last_update=(strftime('%s','now')) + WHERE id=old.id; + END;" ) + (list "update_test_data_trigger" "CREATE TRIGGER IF NOT EXISTS update_test_data_trigger AFTER UPDATE ON test_data + FOR EACH ROW + BEGIN + UPDATE test_data SET last_update=(strftime('%s','now')) + WHERE id=old.id; + END;" ))) + +(define (db:create-all-triggers dbstruct) +(db:with-db + dbstruct #f #f + (lambda (db) +(db:create-triggers db)))) + +(define (db:create-triggers db) + (for-each (lambda (key) + (sqlite3:execute db (cadr key))) + db:trigger-list)) + +(define (db:drop-all-triggers dbstruct) + (db:with-db + dbstruct #f #f + (lambda (db) + (db:drop-triggers db)))) + +(define (db:is-trigger-dropped db tbl-name) + (let* ((trigger-name (if (equal? tbl-name "test_steps") + "update_teststeps_trigger" + (conc "update_" tbl-name "_trigger"))) + (res #f)) + (sqlite3:for-each-row + (lambda (name) + (if (equal? name trigger-name) + (set! res #t))) + db + "SELECT name FROM sqlite_master WHERE type = 'trigger' ;" + ))) + +(define (db:drop-triggers db) + (for-each + (lambda (key) + (sqlite3:execute db (conc "drop trigger if exists " (car key)))) + db:trigger-list)) + +(define (db:drop-trigger db tbl-name) + (let* ((trigger-name (if (equal? tbl-name "test_steps") + "update_teststeps_trigger" + (conc "update_" tbl-name "_trigger")))) + (for-each + (lambda (key) + (if (equal? (car key) trigger-name) + (sqlite3:execute db (conc "drop trigger if exists " trigger-name)))) + db:trigger-list))) + +(define (db:create-trigger db tbl-name) + (let* ((trigger-name (if (equal? tbl-name "test_steps") + "update_teststeps_trigger" + (conc "update_" tbl-name "_trigger")))) + (for-each (lambda (key) + (if (equal? (car key) trigger-name) + (sqlite3:execute db (cadr key)))) + db:trigger-list))) + + +(define (db:initialize-main-db dbdat) + (when (not *configinfo*) + (launch:setup)) ;; added because Elena was getting stack dump because *configinfo* below was #f. + (let* ((configdat (car *configinfo*)) ;; tut tut, global warning... + (keys (keys:config-get-fields configdat)) + (havekeys (> (length keys) 0)) + (keystr (keys->keystr keys)) + (fieldstr (keys:make-key/field-string configdat)) + (db (db:dbdat-get-db dbdat))) + (for-each (lambda (key) + (let ((keyn key)) + (if (member (string-downcase keyn) + (list "runname" "state" "status" "owner" "event_time" "comment" "fail_count" + "pass_count" "contour")) + (begin + (print "ERROR: your key cannot be named " keyn " as this conflicts with the same named field in the runs table, you must remove your megatest.db and /.db before trying again.") + (exit 1))))) + keys) + (sqlite3:with-transaction + db + (lambda () + ;; handle-exceptions + ;; exn + ;; (begin + ;; (debug:print 0 "ERROR: Failed to create tables. Look at your [fields] section, should be: fieldname TEXT DEFAULT 'yourdefault'") + ;; (exit)) + (sqlite3:execute db "CREATE TABLE IF NOT EXISTS keys (id INTEGER PRIMARY KEY, fieldname TEXT, fieldtype TEXT, CONSTRAINT keyconstraint UNIQUE (fieldname));") + (for-each (lambda (key) + (sqlite3:execute db "INSERT OR REPLACE INTO keys (fieldname,fieldtype) VALUES (?,?);" key "TEXT")) + keys) + (sqlite3:execute db (conc + "CREATE TABLE IF NOT EXISTS runs (id INTEGER PRIMARY KEY, \n " + fieldstr (if havekeys "," "") " + runname TEXT DEFAULT 'norun', + contour TEXT DEFAULT '', + state TEXT DEFAULT '', + status TEXT DEFAULT '', + owner TEXT DEFAULT '', + event_time TIMESTAMP DEFAULT (strftime('%s','now')), + comment TEXT DEFAULT '', + fail_count INTEGER DEFAULT 0, + pass_count INTEGER DEFAULT 0, + last_update INTEGER DEFAULT (strftime('%s','now')), + CONSTRAINT runsconstraint UNIQUE (runname" (if havekeys "," "") keystr "));")) + ;; All triggers created at once in end + ;;(sqlite3:execute db "CREATE TRIGGER IF NOT EXISTS update_runs_trigger AFTER UPDATE ON runs + ;; FOR EACH ROW + ;; BEGIN + ;; UPDATE runs SET last_update=(strftime('%s','now')) + ;; WHERE id=old.id; + ;; END;") + (sqlite3:execute db "CREATE TABLE IF NOT EXISTS run_stats ( + id INTEGER PRIMARY KEY, + run_id INTEGER, + state TEXT, + status TEXT, + count INTEGER, + last_update INTEGER DEFAULT (strftime('%s','now')))") + ;; All triggers created at once in end + ;; (sqlite3:execute db "CREATE TRIGGER IF NOT EXISTS update_run_stats_trigger AFTER UPDATE ON run_stats + ;; FOR EACH ROW + ;; BEGIN + ;; UPDATE run_stats SET last_update=(strftime('%s','now')) + ;; WHERE id=old.id; + ;; END;") + (sqlite3:execute db "CREATE TABLE IF NOT EXISTS test_meta ( + id INTEGER PRIMARY KEY, + testname TEXT DEFAULT '', + author TEXT DEFAULT '', + owner TEXT DEFAULT '', + description TEXT DEFAULT '', + reviewed TIMESTAMP, + iterated TEXT DEFAULT '', + avg_runtime REAL, + avg_disk REAL, + tags TEXT DEFAULT '', + jobgroup TEXT DEFAULT 'default', + CONSTRAINT test_meta_constraint UNIQUE (testname));") + (sqlite3:execute db "CREATE TABLE IF NOT EXISTS tasks_queue (id INTEGER PRIMARY KEY, + action TEXT DEFAULT '', + owner TEXT, + state TEXT DEFAULT 'new', + target TEXT DEFAULT '', + name TEXT DEFAULT '', + testpatt TEXT DEFAULT '', + keylock TEXT, + params TEXT, + creation_time TIMESTAMP DEFAULT (strftime('%s','now')), + execution_time TIMESTAMP);") + ;; archive disk areas, cached info from [archive-disks] + (sqlite3:execute db "CREATE TABLE IF NOT EXISTS archive_disks ( + id INTEGER PRIMARY KEY, + archive_area_name TEXT, + disk_path TEXT, + last_df INTEGER DEFAULT -1, + last_df_time TIMESTAMP DEFAULT (strftime('%s','now')), + creation_time TIMESTAMP DEFAULT (strftime('%s','now')));") + ;; individual bup (or tar) data chunks + (sqlite3:execute db "CREATE TABLE IF NOT EXISTS archive_blocks ( + id INTEGER PRIMARY KEY, + archive_disk_id INTEGER, + disk_path TEXT, + last_du INTEGER DEFAULT -1, + last_du_time TIMESTAMP DEFAULT (strftime('%s','now')), + creation_time TIMESTAMP DEFAULT (strftime('%s','now')));") + ;; tests allocated to what chunks. reusing a chunk for a test/item_path is very efficient + ;; NB// the per run/test recording of where the archive is stored is done in the test + ;; record. + (sqlite3:execute db "CREATE TABLE IF NOT EXISTS archive_allocations ( + id INTEGER PRIMARY KEY, + archive_block_id INTEGER, + testname TEXT, + item_path TEXT, + creation_time TIMESTAMP DEFAULT (strftime('%s','now')));") + ;; move this clean up call somewhere else + (sqlite3:execute db "DELETE FROM tasks_queue WHERE state='done' AND creation_time < ?;" (- (current-seconds)(* 24 60 60))) ;; remove older than 24 hrs + (sqlite3:execute db (conc "CREATE INDEX IF NOT EXISTS runs_index ON runs (runname" (if havekeys "," "") keystr ");")) + ;; (sqlite3:execute db "CREATE VIEW runs_tests AS SELECT * FROM runs INNER JOIN tests ON runs.id=tests.run_id;") + (sqlite3:execute db "CREATE TABLE IF NOT EXISTS extradat (id INTEGER PRIMARY KEY, run_id INTEGER, key TEXT, val TEXT);") + (sqlite3:execute db "CREATE TABLE IF NOT EXISTS metadat (id INTEGER PRIMARY KEY, var TEXT, val TEXT, + CONSTRAINT metadat_constraint UNIQUE (var));") + (sqlite3:execute db "CREATE TABLE IF NOT EXISTS access_log (id INTEGER PRIMARY KEY, user TEXT, accessed TIMESTAMP, args TEXT);") + ;; Must do this *after* running patch db !! No more. + ;; cannot use db:set-var since it will deadlock, hardwire the code here + (sqlite3:execute db "INSERT OR REPLACE INTO metadat (var,val) VALUES (?,?);" "MEGATEST_VERSION" (common:version-signature)) + (debug:print-info 11 *default-log-port* "db:initialize END") ;; )))) + + ;;====================================================================== + ;; R U N S P E C I F I C D B + ;;====================================================================== + + ;; (define (db:initialize-run-id-db db) + ;; (sqlite3:with-transaction + ;; db + ;; (lambda () + (sqlite3:execute db "CREATE TABLE IF NOT EXISTS tests + (id INTEGER PRIMARY KEY, + run_id INTEGER DEFAULT -1, + testname TEXT DEFAULT 'noname', + host TEXT DEFAULT 'n/a', + cpuload REAL DEFAULT -1, + diskfree INTEGER DEFAULT -1, + uname TEXT DEFAULT 'n/a', + rundir TEXT DEFAULT '/tmp/badname', + shortdir TEXT DEFAULT '/tmp/badname', + item_path TEXT DEFAULT '', + state TEXT DEFAULT 'NOT_STARTED', + status TEXT DEFAULT 'FAIL', + attemptnum INTEGER DEFAULT 0, + final_logf TEXT DEFAULT 'logs/final.log', + logdat TEXT DEFAULT '', + run_duration INTEGER DEFAULT 0, + comment TEXT DEFAULT '', + event_time TIMESTAMP DEFAULT (strftime('%s','now')), + fail_count INTEGER DEFAULT 0, + pass_count INTEGER DEFAULT 0, + archived INTEGER DEFAULT 0, -- 0=no, > 1=archive block id where test data can be found + last_update INTEGER DEFAULT (strftime('%s','now')), + CONSTRAINT testsconstraint UNIQUE (run_id, testname, item_path));") + ;; deprecated -- (sqlite3:execute db "CREATE INDEX IF NOT EXISTS tests_index ON tests (run_id, testname, item_path, uname);") + + (sqlite3:execute db "CREATE INDEX IF NOT EXISTS tests_run_id_index ON tests (run_id);") ;; new + (sqlite3:execute db "CREATE INDEX IF NOT EXISTS tests_testname_index ON tests (testname,item_path);") ;; new + (sqlite3:execute db "CREATE INDEX IF NOT EXISTS tests_state_status_index ON tests (state, status); ") ;; new + + ;; All triggers created at once in end + ;;(sqlite3:execute db "CREATE TRIGGER IF NOT EXISTS update_tests_trigger AFTER UPDATE ON tests + ;; FOR EACH ROW + ;; BEGIN + ;; UPDATE tests SET last_update=(strftime('%s','now')) + ;; WHERE id=old.id; + ;; END;") + (sqlite3:execute db "CREATE TABLE IF NOT EXISTS test_steps + (id INTEGER PRIMARY KEY, + test_id INTEGER, + stepname TEXT, + state TEXT DEFAULT 'NOT_STARTED', + status TEXT DEFAULT 'n/a', + event_time TIMESTAMP, + comment TEXT DEFAULT '', + logfile TEXT DEFAULT '', + last_update INTEGER DEFAULT (strftime('%s','now')), + CONSTRAINT test_steps_constraint UNIQUE (test_id,stepname,state));") + (sqlite3:execute db "CREATE INDEX IF NOT EXISTS teststeps_index ON tests (run_id, testname, item_path);") + ;; All triggers created at once in end + ;;(sqlite3:execute db "CREATE TRIGGER IF NOT EXISTS update_teststeps_trigger AFTER UPDATE ON test_steps + ;; FOR EACH ROW + ;; BEGIN + ;; UPDATE test_steps SET last_update=(strftime('%s','now')) + ;; WHERE id=old.id; + ;; END;") + (sqlite3:execute db "CREATE TABLE IF NOT EXISTS test_data (id INTEGER PRIMARY KEY, + test_id INTEGER, + category TEXT DEFAULT '', + variable TEXT, + value REAL, + expected REAL, + tol REAL, + units TEXT, + comment TEXT DEFAULT '', + status TEXT DEFAULT 'n/a', + type TEXT DEFAULT '', + last_update INTEGER DEFAULT (strftime('%s','now')), + CONSTRAINT test_data_constraint UNIQUE (test_id,category,variable));") + (sqlite3:execute db "CREATE INDEX IF NOT EXISTS test_data_index ON test_data (test_id);") + ;; All triggers created at once in end + ;;(sqlite3:execute db "CREATE TRIGGER IF NOT EXISTS update_test_data_trigger AFTER UPDATE ON test_data + ;; FOR EACH ROW + ;; BEGIN + ;; UPDATE test_data SET last_update=(strftime('%s','now')) + ;; WHERE id=old.id; + ;; END;") + (sqlite3:execute db "CREATE TABLE IF NOT EXISTS test_rundat ( + id INTEGER PRIMARY KEY, + test_id INTEGER, + update_time TIMESTAMP, + cpuload INTEGER DEFAULT -1, + diskfree INTEGER DEFAULT -1, + diskusage INTGER DEFAULT -1, + run_duration INTEGER DEFAULT 0);") + (sqlite3:execute db "CREATE TABLE IF NOT EXISTS archives ( + id INTEGER PRIMARY KEY, + test_id INTEGER, + state TEXT DEFAULT 'new', + status TEXT DEFAULT 'n/a', + archive_type TEXT DEFAULT 'bup', + du INTEGER, + archive_path TEXT);"))) + (print "creating triggers from init") + (db:create-triggers db) + db)) ;; ) + +;;====================================================================== +;; A R C H I V E S +;;====================================================================== + +;; dneeded is minimum space needed, scan for existing archives that +;; are on disks with adequate space and already have this test/itempath +;; archived +;; +(define (db:archive-get-allocations dbstruct testname itempath dneeded) + (let* ((dbdat (db:get-db dbstruct)) ;; archive tables are in main.db + (db (db:dbdat-get-db dbdat)) + (res '()) + (blocks '())) ;; a block is an archive chunck that can be added too if there is space + (sqlite3:for-each-row + (lambda (id archive-disk-id disk-path last-du last-du-time) + (set! res (cons (vector id archive-disk-id disk-path last-du last-du-time) res))) + db + "SELECT b.id,b.archive_disk_id,b.disk_path,b.last_du,b.last_du_time FROM archive_blocks AS b + INNER JOIN archive_allocations AS a ON a.archive_block_id=b.id + WHERE a.testname=? AND a.item_path=?;" + testname itempath) + ;; Now res has list of candidate paths, look in archive_disks for candidate with potential free space + (if (null? res) + '() + (sqlite3:for-each-row + (lambda (id archive-area-name disk-path last-df last-df-time) + (set! blocks (cons (vector id archive-area-name disk-path last-df last-df-time) blocks))) + db + (conc + "SELECT d.id,d.archive_area_name,disk_path,last_df,last_df_time FROM archive_disks AS d + INNER JOIN archive_blocks AS b ON d.id=b.archive_disk_id + WHERE b.id IN (" (string-intersperse (map conc res) ",") ") AND + last_df > ?;") + dneeded)) + (stack-push! (dbr:dbstruct-dbstack dbstruct) dbdat) + blocks)) + +;; returns id of the record, register a disk allocated to archiving and record it's last known +;; available space +;; +(define (db:archive-register-disk dbstruct bdisk-name bdisk-path df) + (let* ((dbdat (db:get-db dbstruct)) ;; archive tables are in main.db + (db (db:dbdat-get-db dbdat)) + (res #f)) + (sqlite3:for-each-row + (lambda (id) + (set! res id)) + db + "SELECT id FROM archive_disks WHERE archive_area_name=? AND disk_path=?;" + bdisk-name bdisk-path) + (if res ;; record exists, update df and return id + (begin + (sqlite3:execute db "UPDATE archive_disks SET last_df=?,last_df_time=(strftime('%s','now')) + WHERE archive_area_name=? AND disk_path=?;" + df bdisk-name bdisk-path) + (stack-push! (dbr:dbstruct-dbstack dbstruct) dbdat) + res) + (begin + (sqlite3:execute + db + "INSERT OR REPLACE INTO archive_disks (archive_area_name,disk_path,last_df) + VALUES (?,?,?);" + bdisk-name bdisk-path df) + (stack-push! (dbr:dbstruct-dbstack dbstruct) dbdat) + (db:archive-register-disk dbstruct bdisk-name bdisk-path df))))) + +;; record an archive path created on a given archive disk (identified by it's bdisk-id) +;; if path starts with / then it is full, otherwise it is relative to the archive disk +;; preference is to store the relative path. +;; +(define (db:archive-register-block-name dbstruct bdisk-id archive-path #!key (du #f)) + (let* ((dbdat (db:get-db dbstruct)) ;; archive tables are in main.db + (db (db:dbdat-get-db dbdat)) + (res #f)) + ;; first look to see if this path is already registered + (sqlite3:for-each-row + (lambda (id) + (set! res id)) + db + "SELECT id FROM archive_blocks WHERE archive_disk_id=? AND disk_path=?;" + bdisk-id archive-path) + (if res ;; record exists, update du if applicable and return res + (if du (sqlite3:execute db "UPDATE archive_blocks SET last_du=?,last_du_time=(strftime('%s','now')) + WHERE archive_disk_id=? AND disk_path=?;" + bdisk-id archive-path du)) + (begin + (sqlite3:execute db "INSERT OR REPLACE INTO archive_blocks (archive_disk_id,disk_path,last_du) + VALUES (?,?,?);" + bdisk-id archive-path (or du 0)) + (set! res (db:archive-register-block-name dbstruct bdisk-id archive-path du: du)))) + (stack-push! (dbr:dbstruct-dbstack dbstruct) dbdat) + res)) + + +;; The "archived" field in tests is overloaded; 0 = not archived, > 0 archived in block with given id +;; +(define (db:test-set-archive-block-id dbstruct run-id test-id archive-block-id) + (db:with-db + dbstruct + run-id + #f + (lambda (db) + (sqlite3:execute db "UPDATE tests SET archived=? WHERE id=?;" + archive-block-id test-id)))) + +;; Look up the archive block info given a block-id +;; +(define (db:test-get-archive-block-info dbstruct archive-block-id) + (db:with-db + dbstruct + #f + #f + (lambda (db) + (let ((res #f)) + (sqlite3:for-each-row + ;; 0 1 2 3 4 5 + (lambda (id archive-disk-id disk-path last-du last-du-time creation-time) + (set! res (vector id archive-disk-id disk-path last-du last-du-time creation-time))) + db + "SELECT id,archive_disk_id,disk_path,last_du,last_du_time,creation_time FROM archive_blocks WHERE id=?;" + archive-block-id) + res)))) + +;; (define (db:archive-allocate-testsuite/area-to-block block-id testsuite-name areakey) +;; (let* ((dbdat (db:get-db dbstruct #f)) ;; archive tables are in main.db +;; (db (db:dbdat-get-db dbdat)) +;; (res '()) +;; (blocks '())) ;; a block is an archive chunck that can be added too if there is space +;; (sqlite3:for-each-row #f) + +;;====================================================================== +;; L O G G I N G D B +;;====================================================================== + +(define (open-logging-db) + (let* ((dbpath (conc (if *toppath* (conc *toppath* "/") "") "logging.db")) ;; fname) + (dbexists (common:file-exists? dbpath)) + (db (sqlite3:open-database dbpath)) + (handler (sqlite3:make-busy-timeout (if (args:get-arg "-override-timeout") + (string->number (args:get-arg "-override-timeout")) + 136000)))) ;; 136000))) + (sqlite3:set-busy-handler! db handler) + (if (not dbexists) + (begin + (sqlite3:execute db "CREATE TABLE IF NOT EXISTS log (id INTEGER PRIMARY KEY,event_time TIMESTAMP DEFAULT (strftime('%s','now')),logline TEXT,pwd TEXT,cmdline TEXT,pid INTEGER);") + (db:set-sync db) ;; (sqlite3:execute db (conc "PRAGMA synchronous = 0;")) + )) + db)) + +(define (db:log-local-event . loglst) + (let ((logline (apply conc loglst))) + (db:log-event logline))) + +(define (db:log-event logline) + (let ((db (open-logging-db))) + (sqlite3:execute db "INSERT INTO log (logline,pwd,cmdline,pid) VALUES (?,?,?,?);" + logline + (current-directory) + (string-intersperse (argv) " ") + (current-process-id)) + (sqlite3:finalize! db) + logline)) + +;;====================================================================== +;; D B U T I L S +;;====================================================================== + +;;====================================================================== +;; M A I N T E N A N C E +;;====================================================================== + +(define (db:have-incompletes? dbstruct run-id ovr-deadtime) + (let* ((incompleted '()) + (oldlaunched '()) + (toplevels '()) + (deadtime-str (configf:lookup *configdat* "setup" "deadtime")) + (deadtime (if (and deadtime-str + (string->number deadtime-str)) + (string->number deadtime-str) + 72000))) ;; twenty hours + (db:with-db + dbstruct #f #f + (lambda (db) + (if (number? ovr-deadtime)(set! deadtime ovr-deadtime)) + + ;; in RUNNING or REMOTEHOSTSTART for more than 10 minutes + ;; + ;; HOWEVER: this code in run:test seems to work fine + ;; (> (- (current-seconds)(+ (db:test-get-event_time testdat) + ;; (db:test-get-run_duration testdat))) + ;; 600) + ;; (db:delay-if-busy dbdat) + (sqlite3:for-each-row + (lambda (test-id run-dir uname testname item-path) + (if (and (equal? uname "n/a") + (equal? item-path "")) ;; this is a toplevel test + ;; what to do with toplevel? call rollup? + (begin + (set! toplevels (cons (list test-id run-dir uname testname item-path run-id) toplevels)) + (debug:print-info 0 *default-log-port* "Found old toplevel test in RUNNING state, test-id=" test-id)) + (set! incompleted (cons (list test-id run-dir uname testname item-path run-id) incompleted)))) + db + "SELECT id,rundir,uname,testname,item_path FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > (run_duration + ?) AND state IN ('RUNNING','REMOTEHOSTSTART');" + run-id deadtime) + + ;; in LAUNCHED for more than one day. Could be long due to job queues TODO/BUG: Need override for this in config + ;; + ;; (db:delay-if-busy dbdat) + (sqlite3:for-each-row + (lambda (test-id run-dir uname testname item-path) + (if (and (equal? uname "n/a") + (equal? item-path "")) ;; this is a toplevel test + ;; what to do with toplevel? call rollup? + (set! toplevels (cons (list test-id run-dir uname testname item-path run-id) toplevels)) + (set! oldlaunched (cons (list test-id run-dir uname testname item-path run-id) oldlaunched)))) + db + "SELECT id,rundir,uname,testname,item_path FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > 86400 AND state IN ('LAUNCHED');" + run-id) + + (debug:print-info 18 *default-log-port* "Found " (length oldlaunched) " old LAUNCHED items, " (length toplevels) " old LAUNCHED toplevel tests and " (length incompleted) " tests marked RUNNING but apparently dead.") + (if (and (null? incompleted) + (null? oldlaunched) + (null? toplevels)) + #f + #t))))) + +(define (db:get-status-from-final-status-file run-dir) + (let ((infile (conc run-dir "/.final-status"))) + ;; first verify we are able to write the output file + (if (not (file-readable? infile)) + (begin + (debug:print 0 *default-log-port* "ERROR: cannot read " infile) + (debug:print 0 *default-log-port* "ERROR: run-dir is " run-dir) + #f + ) + (with-input-from-file infile read-lines) + ))) + +;; select end_time-now from +;; (select testname,item_path,event_time+run_duration as +;; end_time,strftime('%s','now') as now from tests where state in +;; ('RUNNING','REMOTEHOSTSTART','LAUNCHED')); + +(define (db:find-and-mark-incomplete dbstruct run-id ovr-deadtime) + (let* ((incompleted '()) + (oldlaunched '()) + (toplevels '()) + ;; The default running-deadtime is 720 seconds = 12 minutes. + ;; "(running-deadtime-default (+ server-start-allowance (* 2 launch-monitor-period)))" = 200 + (2 * (200 + 30 + 30)) + (deadtime-trim (or ovr-deadtime (configf:lookup-number *configdat* "setup" "deadtime"))) + (server-start-allowance 200) + (server-overloaded-budget 200) + (launch-monitor-off-time (or (configf:lookup-number *configdat* "setup" "test-stats-update-period") 30)) + (launch-monitor-on-time-budget 30) + (launch-monitor-period (+ launch-monitor-off-time launch-monitor-on-time-budget server-overloaded-budget)) + (remotehoststart-deadtime-default (+ server-start-allowance server-overloaded-budget 30)) + (remotehoststart-deadtime (or deadtime-trim remotehoststart-deadtime-default)) + (running-deadtime-default (+ server-start-allowance (* 2 launch-monitor-period))) + (running-deadtime (or deadtime-trim running-deadtime-default)) ;; two minutes (30 seconds between updates, this leaves 3x grace period) + ) + (debug:print-info 4 *default-log-port* "running-deadtime = " running-deadtime) + (debug:print-info 4 *default-log-port* "deadtime-trim = " deadtime-trim) + + (db:with-db + dbstruct #f #f + (lambda (db) + (let* ((stmth1 (db:get-cache-stmth + dbstruct db + "SELECT id,rundir,uname,testname,item_path,event_time,run_duration FROM tests + WHERE run_id=? AND (strftime('%s','now') - event_time) > (run_duration + ?) + AND state IN ('RUNNING');")) + (stmth2 (db:get-cache-stmth + dbstruct db + "SELECT id,rundir,uname,testname,item_path,event_time,run_duration FROM tests + WHERE run_id=? AND (strftime('%s','now') - event_time) > (run_duration + ?) + AND state IN ('REMOTEHOSTSTART');")) + (stmth3 (db:get-cache-stmth + dbstruct db + "SELECT id,rundir,uname,testname,item_path FROM tests + WHERE run_id=? AND (strftime('%s','now') - event_time) > 86400 + AND state IN ('LAUNCHED');"))) + ;; in RUNNING or REMOTEHOSTSTART for more than 10 minutes + ;; + ;; HOWEVER: this code in run:test seems to work fine + ;; (> (- (current-seconds)(+ (db:test-get-event_time testdat) + ;; (db:test-get-run_duration testdat))) + ;; 600) + ;; (db:delay-if-busy dbdat) + (sqlite3:for-each-row + (lambda (test-id run-dir uname testname item-path event-time run-duration) + (if (and (equal? uname "n/a") + (equal? item-path "")) ;; this is a toplevel test + ;; what to do with toplevel? call rollup? + (begin + (set! toplevels (cons (list test-id run-dir uname testname item-path run-id) toplevels)) + (debug:print-info 0 *default-log-port* "Found old toplevel test in RUNNING state, test-id=" test-id)) + (begin + (set! incompleted (cons (list test-id run-dir uname testname item-path run-id) incompleted)) + (debug:print-info 0 *default-log-port* "Found old test in RUNNING state, test-id=" + test-id" exceeded running-deadtime "running-deadtime" now="(current-seconds) + " event-time="event-time" run-duration="run-duration)))) + stmth1 + run-id running-deadtime) ;; default time 720 seconds + + (sqlite3:for-each-row + (lambda (test-id run-dir uname testname item-path event-time run-duration) + (if (and (equal? uname "n/a") + (equal? item-path "")) ;; this is a toplevel test + ;; what to do with toplevel? call rollup? + (begin + (set! toplevels (cons (list test-id run-dir uname testname item-path run-id) toplevels)) + (debug:print-info 0 *default-log-port* "Found old toplevel test in RUNNING state, test-id=" test-id)) + (begin + (debug:print-info 0 *default-log-port* "Found old test in REMOTEHOSTSTART state, test-id=" test-id + " exceeded running-deadtime "running-deadtime" now="(current-seconds)" event-time="event-time + " run-duration="run-duration) + (set! incompleted (cons (list test-id run-dir uname testname item-path run-id) incompleted))))) + stmth2 + run-id remotehoststart-deadtime) ;; default time 230 seconds + + ;; in LAUNCHED for more than one day. Could be long due to job queues TODO/BUG: Need override for this in config + ;; + ;; (db:delay-if-busy dbdat) + (sqlite3:for-each-row + (lambda (test-id run-dir uname testname item-path) + (if (and (equal? uname "n/a") + (equal? item-path "")) ;; this is a toplevel test + ;; what to do with toplevel? call rollup? + (set! toplevels (cons (list test-id run-dir uname testname item-path run-id) toplevels)) + (begin + (debug:print-info 0 *default-log-port* "Found old test in LAUNCHED state, test-id=" test-id + " 1 day since event_time marked") + (set! oldlaunched (cons (list test-id run-dir uname testname item-path run-id) oldlaunched))))) + stmth3 + run-id) + + (debug:print-info 18 *default-log-port* "Found " (length oldlaunched) " old LAUNCHED items, " + (length toplevels) " old LAUNCHED toplevel tests and " + (length incompleted) " tests marked RUNNING but apparently dead.")) + + ;; These are defunct tests, do not do all the overhead of set-state-status. Force them to INCOMPLETE. + ;; + ;; (db:delay-if-busy dbdat) + (let* ((min-incompleted-ids (map car incompleted)) ;; do 'em all + (all-ids (append min-incompleted-ids (map car oldlaunched)))) + (if (> (length all-ids) 0) + (begin + ;; (launch:is-test-alive "localhost" 435) + (debug:print 0 *default-log-port* "WARNING: Marking test(s); " (string-intersperse (map conc all-ids) ", ") + " as DEAD") + (for-each + (lambda (test-id) + (let* (;; (run-dir (db:test-get-rundir-from-test-id dbstruct run-id test-id)) + (tinfo (db:get-test-info-by-id dbstruct run-id test-id)) + (run-dir (db:test-get-rundir tinfo)) + (host (db:test-get-host tinfo)) + (pid (db:test-get-process_id tinfo)) + (result (db:get-status-from-final-status-file run-dir))) + (if (and (list? result) (> (length result) 1) (equal? "PASS" (cadr result)) (equal? "COMPLETED" (car result))) + (begin + (debug:print 0 *default-log-port* "INFO: test " test-id " actually passed, so marking PASS not DEAD") + (db:set-state-status-and-roll-up-items + dbstruct run-id test-id 'foo "COMPLETED" "PASS" + "Test stopped responding but it has PASSED; marking it PASS in the DB.")) + (let ((is-alive (and (not (eq? pid 0)) ;; 0 is default in re-used field "attemptnum" where pid stored. + (launch:is-test-alive host pid)))) + (if is-alive + (debug:print 0 *default-log-port* "INFO: test " test-id " on host " host + " has a process on pid " pid ", NOT setting to DEAD.") + (begin + (debug:print 0 *default-log-port* "INFO: test " test-id + " final state/status is not COMPLETED/PASS. It is " result) + (db:set-state-status-and-roll-up-items + dbstruct run-id test-id 'foo "COMPLETED" "DEAD" + "Test stopped responding while in RUNNING or REMOTEHOSTSTART; presumed dead."))))))) + ;; call end of eud of run detection for posthook - from merge, is it needed? + ;; (launch:end-of-run-check run-id) + all-ids) + ;;call end of eud of run detection for posthook + (launch:end-of-run-check run-id) + ))))))) + +;; BUG: Probably broken - does not explicitly use run-id in the query +;; +(define (db:top-test-set-per-pf-counts dbstruct run-id test-name) + (db:general-call dbstruct 'top-test-set-per-pf-counts (list test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name))) + +;; Clean out old junk and vacuum the database +;; +;; Ultimately do something like this: +;; +;; 1. Look at test records either deleted or part of deleted run: +;; a. If test dir exists, set the the test to state='UNKNOWN', Set the run to 'unknown' +;; b. If test dir gone, delete the test record +;; 2. Look at run records +;; a. If have tests that are not deleted, set state='unknown' +;; b. .... +;; +(define (db:clean-up dbdat) + ;; (debug:print 0 *default-log-port* "WARNING: db clean up not fully ported to v1.60, cleanup action will be on megatest.db") + (let* ((keep-record-age ( - (current-seconds) (common:hms-string->seconds (or (configf:lookup *configdat* "setup" "delete-record-age") "30d")))) + (db (db:dbdat-get-db dbdat)) + (count-stmt (sqlite3:prepare db "SELECT (SELECT count(id) FROM tests)+(SELECT count(id) FROM runs);")) + (statements + (map (lambda (stmt) + (sqlite3:prepare db stmt)) + (list + ;; delete all tests that belong to runs that are 'deleted' + (conc "DELETE FROM tests WHERE run_id in (SELECT id FROM runs WHERE state='deleted') and last_update < " keep-record-age ";") + ;; delete all tests that are 'DELETED' + (conc "DELETE FROM tests WHERE state='DELETED' and last_update < " keep-record-age " ;") + ;; delete all tests that have no run + (conc "DELETE FROM tests WHERE run_id NOT IN (SELECT DISTINCT id FROM runs) and last_update < " keep-record-age "; ") + ;; delete all runs that are state='deleted' + (conc "DELETE FROM runs WHERE state='deleted' and last_update < " keep-record-age ";") + ;; delete empty runs + (conc "DELETE FROM runs WHERE id NOT IN (SELECT DISTINCT r.id FROM runs AS r INNER JOIN tests AS t ON t.run_id=r.id) and last_update < " keep-record-age ";") + ;; remove orphaned test_rundat entries + (conc "DELETE FROM test_rundat where test_id NOT IN (SELECT id FROM tests);") + ;; remove orphaned test_steps entries + (conc "DELETE FROM test_steps WHERE test_id NOT IN (SELECT id FROM tests);") + ;; remove orphaned test_dat entries + (conc "DELETE FROM test_data WHERE test_id NOT IN (SELECT id FROM tests);") + + )))) + ;; (db:delay-if-busy dbdat) + ;(debug:print-info 0 *default-log-port* statements) + (sqlite3:with-transaction + db + (lambda () + (sqlite3:for-each-row (lambda (tot) + (debug:print-info 0 *default-log-port* "Records count before clean: " tot)) + count-stmt) + (map sqlite3:execute statements) + (sqlite3:for-each-row (lambda (tot) + (debug:print-info 0 *default-log-port* "Records count after clean: " tot)) + count-stmt))) + (map sqlite3:finalize! statements) + (sqlite3:finalize! count-stmt) + ;; (db:find-and-mark-incomplete db) + ;; (db:delay-if-busy dbdat) + (sqlite3:execute db "VACUUM;"))) + +;; Clean out old junk and vacuum the database +;; +;; Ultimately do something like this: +;; +;; 1. Look at test records either deleted or part of deleted run: +;; a. If test dir exists, set the the test to state='UNKNOWN', Set the run to 'unknown' +;; b. If test dir gone, delete the test record +;; 2. Look at run records +;; a. If have tests that are not deleted, set state='unknown' +;; b. .... +;; +(define (db:clean-up-rundb dbdat) + ;; (debug:print 0 *default-log-port* "WARNING: db clean up not fully ported to v1.60, cleanup action will be on megatest.db") + (let* ((db (db:dbdat-get-db dbdat)) + (count-stmt (sqlite3:prepare db "SELECT (SELECT count(id) FROM tests);")) + (statements + (map (lambda (stmt) + (sqlite3:prepare db stmt)) + (list + ;; delete all tests that belong to runs that are 'deleted' + ;; (conc "DELETE FROM tests WHERE run_id NOT IN (" (string-intersperse (map conc valid-runs) ",") ");") + ;; delete all tests that are 'DELETED' + "DELETE FROM tests WHERE state='DELETED';" + )))) + ;; (db:delay-if-busy dbdat) + (sqlite3:with-transaction + db + (lambda () + (sqlite3:for-each-row (lambda (tot) + (debug:print-info 0 *default-log-port* "Records count before clean: " tot)) + count-stmt) + (map sqlite3:execute statements) + (sqlite3:for-each-row (lambda (tot) + (debug:print-info 0 *default-log-port* "Records count after clean: " tot)) + count-stmt))) + (map sqlite3:finalize! statements) + (sqlite3:finalize! count-stmt) + ;; (db:find-and-mark-incomplete db) + ;; (db:delay-if-busy dbdat) + (sqlite3:execute db "VACUUM;"))) + +;; Clean out old junk and vacuum the database +;; +;; Ultimately do something like this: +;; +;; 1. Look at test records either deleted or part of deleted run: +;; a. If test dir exists, set the the test to state='UNKNOWN', Set the run to 'unknown' +;; b. If test dir gone, delete the test record +;; 2. Look at run records +;; a. If have tests that are not deleted, set state='unknown' +;; b. .... +;; +(define (db:clean-up-maindb dbdat) + ;; (debug:print 0 *default-log-port* "WARNING: db clean up not fully ported to v1.60, cleanup action will be on megatest.db") + (let* ((db (db:dbdat-get-db dbdat)) + (count-stmt (sqlite3:prepare db "SELECT (SELECT count(id) FROM runs);")) + (statements + (map (lambda (stmt) + (sqlite3:prepare db stmt)) + (list + ;; delete all tests that belong to runs that are 'deleted' + ;; (conc "DELETE FROM tests WHERE run_id NOT IN (" (string-intersperse (map conc valid-runs) ",") ");") + ;; delete all tests that are 'DELETED' + "DELETE FROM runs WHERE state='deleted';" + ))) + (dead-runs '())) + (sqlite3:for-each-row + (lambda (run-id) + (set! dead-runs (cons run-id dead-runs))) + db + "SELECT id FROM runs WHERE state='deleted';") + ;; (db:delay-if-busy dbdat) + (sqlite3:with-transaction + db + (lambda () + (sqlite3:for-each-row (lambda (tot) + (debug:print-info 0 *default-log-port* "Records count before clean: " tot)) + count-stmt) + (map sqlite3:execute statements) + (sqlite3:for-each-row (lambda (tot) + (debug:print-info 0 *default-log-port* "Records count after clean: " tot)) + count-stmt))) + (map sqlite3:finalize! statements) + (sqlite3:finalize! count-stmt) + ;; (db:find-and-mark-incomplete db) + ;; (db:delay-if-busy dbdat) + (sqlite3:execute db "VACUUM;") + dead-runs)) + +;;====================================================================== +;; M E T A G E T A N D S E T V A R S +;;====================================================================== + +;; returns number if string->number is successful, string otherwise +;; also updates *global-delta* +;; +(define (db:get-var dbstruct var) + (let* ((res #f)) + (db:with-db + dbstruct #f #f + (lambda (db) + (sqlite3:for-each-row + (lambda (val) + (set! res val)) + db + "SELECT val FROM metadat WHERE var=?;" var) + ;; convert to number if can + (if (string? res) + (let ((valnum (string->number res))) + (if valnum (set! res valnum)))) + res)))) + +(define (db:inc-var dbstruct var) + (db:with-db dbstruct #f #t + (lambda (db) + (sqlite3:execute db "UPDATE metadat SET val=val+1 WHERE var=?;" var)))) + +(define (db:dec-var dbstruct var) + (db:with-db dbstruct #f #t + (lambda (db) + (sqlite3:execute db "UPDATE metadat SET val=val-1 WHERE var=?;" var)))) + +;; This was part of db:get-var. It was used to estimate the load on +;; the database files. +;; +;; scale by 10, average with current value. +;; (set! *global-delta* (/ (+ *global-delta* (* (- (current-milliseconds) start-ms) +;; (if throttle throttle 0.01))) +;; 2)) +;; (if (> (abs (- *last-global-delta-printed* *global-delta*)) 0.08) ;; don't print all the time, only if it changes a bit +;; (begin +;; (debug:print-info 4 *default-log-port* "launch throttle factor=" *global-delta*) +;; (set! *last-global-delta-printed* *global-delta*))) + +(define (db:set-var dbstruct var val) + (db:with-db dbstruct #f #t + (lambda (db) + (sqlite3:execute db "INSERT OR REPLACE INTO metadat (var,val) VALUES (?,?);" var val)))) + +(define (db:add-var dbstruct var val) + (db:with-db dbstruct #f #t + (lambda (db) + (sqlite3:execute db "UPDATE metadat SET val=val+? WHERE var=?;" val var)))) + +(define (db:del-var dbstruct var) + (db:with-db dbstruct #f #t + (lambda (db) + (sqlite3:execute db "DELETE FROM metadat WHERE var=?;" var)))) + +;;====================================================================== +;; no-sync.db - small bits of data to be shared between servers +;;====================================================================== + +(define (db:open-no-sync-db) + (let* ((dbpath (db:dbfile-path)) + (dbname (conc dbpath "/no-sync.db")) + (db-exists (common:file-exists? dbname)) + (db (sqlite3:open-database dbname))) + (sqlite3:set-busy-handler! db (sqlite3:make-busy-timeout 136000)) + (if (not db-exists) + (begin + (sqlite3:execute db "PRAGMA synchronous = 0;") + (sqlite3:execute db "CREATE TABLE IF NOT EXISTS no_sync_metadat (var TEXT,val TEXT, CONSTRAINT no_sync_metadat_constraint UNIQUE (var));") + (sqlite3:execute db "PRAGMA journal_mode=WAL;"))) + db)) + +;; if we are not a server create a db handle. this is not finalized +;; so watch for problems. I'm still not clear if it is needed to manually +;; finalize sqlite3 dbs with the sqlite3 egg. +;; +(define (db:no-sync-db db-in) + (mutex-lock! *db-access-mutex*) + (let ((res (if db-in + db-in + (let ((db (db:open-no-sync-db))) + (set! *no-sync-db* db) + db)))) + (mutex-unlock! *db-access-mutex*) + res)) + +(define (db:no-sync-set db var val) + (sqlite3:execute (db:no-sync-db db) "INSERT OR REPLACE INTO no_sync_metadat (var,val) VALUES (?,?);" var val)) + +(define (db:no-sync-del! db var) + (sqlite3:execute (db:no-sync-db db) "DELETE FROM no_sync_metadat WHERE var=?;" var)) + +(define (db:no-sync-get/default db var default) + (let ((res default)) + (sqlite3:for-each-row + (lambda (val) + (set! res val)) + (db:no-sync-db db) + "SELECT val FROM no_sync_metadat WHERE var=?;" + var) + (if res + (let ((newres (if (string? res) + (string->number res) + #f))) + (if newres + newres + res)) + res))) + +(define (db:no-sync-close-db db stmt-cache) + (db:safely-close-sqlite3-db db stmt-cache)) + +;; transaction protected lock aquisition +;; either: +;; fails returns (#f . lock-creation-time) +;; succeeds (returns (#t . lock-creation-time) +;; use (db:no-sync-del! db keyname) to release the lock +;; +(define (db:no-sync-get-lock db-in keyname) + (let ((db (db:no-sync-db db-in))) + (sqlite3:with-transaction + db + (lambda () + (handle-exceptions + exn + (let ((lock-time (current-seconds))) + (debug:print-info 2 *default-log-port* "db:no-sync-get-lock keyname=" keyname ", lock-time=" lock-time ", exn=" exn) + (sqlite3:execute db "INSERT INTO no_sync_metadat (var,val) VALUES(?,?);" keyname lock-time) + `(#t . ,lock-time)) + `(#f . ,(sqlite3:first-result db "SELECT val FROM no_sync_metadat WHERE var=?;" keyname))))))) + + + +;; use a global for some primitive caching, it is just silly to +;; re-read the db over and over again for the keys since they never +;; change + +;; why get the keys from the db? why not get from the *configdat* +;; using keys:config-get-fields? + +(define (db:get-keys dbstruct) + (if *db-keys* *db-keys* + (let ((res '())) + (db:with-db dbstruct #f #f + (lambda (db) + (sqlite3:for-each-row + (lambda (key) + (set! res (cons key res))) + db + "SELECT fieldname FROM keys ORDER BY id DESC;"))) + (set! *db-keys* res) + res))) + +;; extract index number given a header/data structure +(define (db:get-index-by-header header field) + (list-index (lambda (x)(equal? x field)) header)) + +;; look up values in a header/data structure +(define (db:get-value-by-header row header field) + (if (or (null? header) (not row)) + #f + (let loop ((hed (car header)) + (tal (cdr header)) + (n 0)) + (if (equal? hed field) + (handle-exceptions + exn + (begin + (debug:print 0 *default-log-port* "WARNING: attempt to read non-existant field, row=" + row " header=" header " field=" field ", exn=" exn) + #f) + (vector-ref row n)) + (if (null? tal) #f (loop (car tal)(cdr tal)(+ n 1))))))) + +;; Accessors for the header/data structure +;; get rows and header from +(define (db:get-header vec)(vector-ref vec 0)) +(define (db:get-rows vec)(vector-ref vec 1)) + +;;====================================================================== +;; R U N S +;;====================================================================== + + + + + +(define (db:get-run-times dbstruct run-patt target-patt) +(let ((res `()) + (qry (conc "select runname, (max(end_time)-min(event_time))/60 as runtime, target from (select runname, run_id,tests.event_time,tests.event_time+run_duration AS end_time, " (string-join (db:get-keys dbstruct) " || '/' || ") " as target from tests inner join runs on tests.run_id = runs.id where runs.runname like ? and target like ?) group by run_id ;"))) +;(print qry) +(db:with-db + dbstruct + #f ;; this is for the main runs db + #f ;; does not modify db + (lambda (db) + (sqlite3:for-each-row + (lambda (runname runtime target ) + (set! res (cons (vector runname runtime target) res))) + db + qry + run-patt target-patt) + + res)))) + + + +(define (db:get-run-name-from-id dbstruct run-id) + (db:with-db + dbstruct + #f ;; this is for the main runs db + #f ;; does not modify db + (lambda (db) + (let ((res #f)) + (sqlite3:for-each-row + (lambda (runname) + (set! res runname)) + db + "SELECT runname FROM runs WHERE id=?;" + run-id) + res)))) + +(define (db:get-run-key-val dbstruct run-id key) + (db:with-db + dbstruct + #f + #f + (lambda (db) + (let ((res #f)) + (sqlite3:for-each-row + (lambda (val) + (set! res val)) + db + (conc "SELECT " key " FROM runs WHERE id=?;") + run-id) + res)))) + +;; keys list to key1,key2,key3 ... +(define (runs:get-std-run-fields keys remfields) + (let* ((header (append keys remfields)) + (keystr (conc (keys->keystr keys) "," + (string-intersperse remfields ",")))) + (list keystr header))) + +;; make a query (fieldname like 'patt1' OR fieldname +(define (db:patt->like fieldname pattstr #!key (comparator " OR ")) + (let ((patts (if (string? pattstr) + (string-split pattstr ",") + '("%")))) + (string-intersperse (map (lambda (patt) + (let ((wildtype (if (substring-index "%" patt) "LIKE" "GLOB"))) + (conc fieldname " " wildtype " '" patt "'"))) + (if (null? patts) + '("") + patts)) + comparator))) + + +;; register a test run with the db, this accesses the main.db and does NOT +;; use server api +;; +(define (db:register-run dbstruct keyvals runname state status user contour-in) + (let* ((keys (map car keyvals)) + (keystr (keys->keystr keys)) + (contour (or contour-in "")) ;; empty string to force no hierarcy and be backwards compatible. + (comma (if (> (length keys) 0) "," "")) + (andstr (if (> (length keys) 0) " AND " "")) + (valslots (keys->valslots keys)) ;; ?,?,? ... + (allvals (append (list runname state status user contour) (map cadr keyvals))) + (qryvals (append (list runname) (map cadr keyvals))) + (key=?str (string-intersperse (map (lambda (k)(conc k "=?")) keys) " AND "))) + (debug:print 3 *default-log-port* "keys: " keys " allvals: " allvals " keyvals: " keyvals " key=?str is " key=?str) + (debug:print 2 *default-log-port* "NOTE: using target " (string-intersperse (map cadr keyvals) "/") " for this run") + (if (and runname (null? (filter (lambda (x)(not x)) keyvals))) ;; there must be a better way to "apply and" + (db:with-db + dbstruct #f #f + (lambda (db) + (let ((res #f)) + (apply sqlite3:execute db (conc "INSERT OR IGNORE INTO runs (runname,state,status,owner,event_time,contour" comma keystr ") VALUES (?,?,?,?,strftime('%s','now'),?" comma valslots ");") + allvals) + (apply sqlite3:for-each-row + (lambda (id) + (set! res id)) + db + (let ((qry (conc "SELECT id FROM runs WHERE (runname=? " andstr key=?str ");"))) + qry) + qryvals) + (sqlite3:execute db "UPDATE runs SET state=?,status=?,event_time=strftime('%s','now') WHERE id=? AND state='deleted';" state status res) + res))) + (begin + (debug:print-error 0 *default-log-port* "Called without all necessary keys") + #f)))) + +;; replace header and keystr with a call to runs:get-std-run-fields +;; +;; keypatts: ( (KEY1 "abc%def")(KEY2 "%") ) +;; runpatts: patt1,patt2 ... +;; +(define (db:get-runs dbstruct runpatt count offset keypatts) + (let* ((res '()) + (keys (db:get-keys dbstruct)) + (runpattstr (db:patt->like "runname" runpatt)) + (remfields (list "id" "runname" "state" "status" "owner" "event_time")) + (header (append keys remfields)) + (keystr (conc (keys->keystr keys) "," + (string-intersperse remfields ","))) + (qrystr (conc "SELECT " keystr " FROM runs WHERE (" runpattstr ") " ;; runname LIKE ? " + ;; Generate: " AND x LIKE 'keypatt' ..." + (if (null? keypatts) "" + (conc " AND " + (string-join + (map (lambda (keypatt) + (let ((key (car keypatt)) + (patt (cadr keypatt))) + (db:patt->like key patt))) + keypatts) + " AND "))) + " AND state != 'deleted' ORDER BY event_time DESC " + (if (number? count) + (conc " LIMIT " count) + "") + (if (number? offset) + (conc " OFFSET " offset) + "")))) + (debug:print-info 11 *default-log-port* "db:get-runs START qrystr: " qrystr " keypatts: " keypatts " offset: " offset " limit: " count) + (db:with-db dbstruct #f #f + (lambda (db) + (sqlite3:for-each-row + (lambda (a . x) + (set! res (cons (apply vector a x) res))) + db + qrystr + ))) + (debug:print-info 11 *default-log-port* "db:get-runs END qrystr: " qrystr " keypatts: " keypatts " offset: " offset " limit: " count) + (vector header res))) + + +(define-record simple-run target id runname state status owner event_time) +(define-record-printer (simple-run x out) + (fprintf out "#,(simple-run ~S ~S ~S ~S)" + (simple-run-target x) (simple-run-id x) (simple-run-runname x) (time->string (seconds->local-time (simple-run-event_time x) )))) + +;; simple get-runs +;; +(define (db:simple-get-runs dbstruct runpatt count offset target last-update) + (let* ((res '()) + (keys (db:get-keys dbstruct)) + (runpattstr (db:patt->like "runname" runpatt)) + (remfields (list "id" "runname" "state" "status" "owner" "event_time")) + (targstr (string-intersperse keys "||'/'||")) + (keystr (conc targstr " AS target," + (string-intersperse remfields ","))) + (qrystr (conc "SELECT " keystr " FROM runs WHERE (" runpattstr ") " ;; runname LIKE ? " + ;; Generate: " AND x LIKE 'keypatt' ..." + " AND target LIKE '" target "'" + " AND state != 'deleted' " + (if (number? last-update) + (conc " AND last_update >= " last-update) + "") + " ORDER BY event_time DESC " + (if (number? count) + (conc " LIMIT " count) + "") + (if (number? offset) + (conc " OFFSET " offset) + ""))) + ) + (debug:print-info 11 *default-log-port* "db:get-runs START qrystr: " qrystr " target: " target " offset: " offset " limit: " count) + (db:with-db dbstruct #f #f + (lambda (db) + (sqlite3:for-each-row + (lambda (target id runname state status owner event_time) + (set! res (cons (make-simple-run target id runname state status owner event_time) res))) + db + qrystr + ))) + (debug:print-info 11 *default-log-port* "db:get-runs END qrystr: " qrystr " target: " target " offset: " offset " limit: " count) + res)) + +;; TODO: Switch this to use max(update_time) from each run db? Then if using a server there is no disk traffic (using inmem db) +;; +(define (db:get-changed-run-ids since-time) + (let* ((dbdir (db:dbfile-path)) ;; (configf:lookup *configdat* "setup" "dbdir")) + (alldbs (glob (conc dbdir "/[0-9]*.db"))) + (changed (filter (lambda (dbfile) + (> (file-modification-time dbfile) since-time)) + alldbs))) + (delete-duplicates + (map (lambda (dbfile) + (let* ((res (string-match ".*\\/(\\d)*\\.db" dbfile))) + (if res + (string->number (cadr res)) + (begin + (debug:print 2 *default-log-port* "WARNING: Failed to process " dbfile " for run-id") + 0)))) + changed)))) + +;; Get all targets from the db +;; +(define (db:get-targets dbstruct) + (let* ((res '()) + (keys (db:get-keys dbstruct)) + (header keys) ;; (map key:get-fieldname keys)) + (keystr (keys->keystr keys)) + (qrystr (conc "SELECT " keystr " FROM runs WHERE state != 'deleted';")) + (seen (make-hash-table))) + (db:with-db + dbstruct + #f + #f + (lambda (db) + (sqlite3:for-each-row + (lambda (a . x) + (let ((targ (cons a x))) + (if (not (hash-table-ref/default seen targ #f)) + (begin + (hash-table-set! seen targ #t) + (set! res (cons (apply vector targ) res)))))) + db + qrystr) + (debug:print-info 11 *default-log-port* "db:get-targets END qrystr: " qrystr ) + (vector header res))))) + +;; just get count of runs +(define (db:get-num-runs dbstruct runpatt) + (db:with-db + dbstruct + #f + #f + (lambda (db) + (let ((numruns 0)) + (debug:print-info 11 *default-log-port* "db:get-num-runs START " runpatt) + (sqlite3:for-each-row + (lambda (count) + (set! numruns count)) + db + "SELECT COUNT(id) FROM runs WHERE runname LIKE ? AND state != 'deleted';" runpatt) + (debug:print-info 11 *default-log-port* "db:get-num-runs END " runpatt) + numruns)))) + +;; just get count of runs +(define (db:get-runs-cnt-by-patt dbstruct runpatt targetpatt keys) + (db:with-db + dbstruct + #f + #f + (lambda (db) + (let ((numruns 0) + (qry-str #f) + (key-patt "") + (keyvals (if targetpatt (keys:target->keyval keys targetpatt) '()))) + + (for-each (lambda (keyval) + (let* ((key (car keyval)) + (patt (cadr keyval)) + (fulkey (conc ":" key)) + (wildtype (if (substring-index "%" patt) "like" "glob"))) + + (if patt + (set! key-patt (conc key-patt " AND " key " " wildtype " '" patt "'")) + (begin + (debug:print-error 0 *default-log-port* "searching for runs with no pattern set for " fulkey) + (exit 6))))) + keyvals) + ;(print runpatt " -- " key-patt) + (set! qry-str (conc "SELECT COUNT(id) FROM runs WHERE state != 'deleted' AND runname like '" runpatt "'" key-patt)) + ;(print qry-str ) + + (sqlite3:for-each-row + (lambda (count) + (set! numruns count)) + db + qry-str) + (debug:print-info 11 *default-log-port* "db:get-num-runs END " runpatt) + numruns)))) + + +;; (sqlite3#fold-row proc3670 init3671 db-or-stmt3672 . params3673)> +;; +(define (db:get-raw-run-stats dbstruct run-id) + (db:with-db + dbstruct + run-id + #f + (lambda (db) + (sqlite3:fold-row + (lambda (res state status count) + (cons (list state status count) res)) + '() + db + "SELECT state,status,count(id) AS count FROM tests WHERE run_id=? AND NOT(uname='n/a' AND item_path='') GROUP BY state,status;" + run-id)))) + +;; Update run_stats for given run_id +;; input data is a list (state status count) +;; +(define (db:update-run-stats dbstruct run-id stats) + ;; (mutex-lock! *db-transaction-mutex*) + (db:with-db + dbstruct + #f + #f + + (lambda (db) + ;; remove previous data + + (let* ((stmt1 (sqlite3:prepare db "DELETE FROM run_stats WHERE run_id=? AND state=? AND status=?;")) + (stmt2 (sqlite3:prepare db "INSERT INTO run_stats (run_id,state,status,count) VALUES (?,?,?,?);")) + (res + (sqlite3:with-transaction + db + (lambda () + (for-each + (lambda (dat) + (sqlite3:execute stmt1 run-id (car dat)(cadr dat)) + (apply sqlite3:execute stmt2 run-id dat)) + stats))))) + (sqlite3:finalize! stmt1) + (sqlite3:finalize! stmt2) + ;; (mutex-unlock! *db-transaction-mutex*) + res)))) + +(define (db:get-main-run-stats dbstruct run-id) + (db:with-db + dbstruct + #f ;; this data comes from main + #f + (lambda (db) + (sqlite3:fold-row + (lambda (res state status count) + (cons (list state status count) res)) + '() + db + "SELECT state,status,count FROM run_stats WHERE run_id=? AND run_id IN (SELECT id FROM runs WHERE state NOT IN ('DELETED','deleted'));" + run-id)))) + +(define (db:print-current-query-stats) + ;; generate stats from *db-api-call-time* + (let ((ordered-keys (sort (hash-table-keys *db-api-call-time*) + (lambda (a b) + (let ((sum-a (common:sum (hash-table-ref *db-api-call-time* a))) + (sum-b (common:sum (hash-table-ref *db-api-call-time* b)))) + (> sum-a sum-b))))) + (total 0)) + (for-each + (lambda (cmd-key) + (let* ((dat (hash-table-ref *db-api-call-time* cmd-key)) + (num (length dat)) + (avg (if (> num 0) + (/ (common:sum dat)(length dat))))) + (set! total (+ total num)) + (debug:print-info 0 *default-log-port* cmd-key "\tavg: " avg " max: " (common:max dat) " min: " (common:min-max < dat) " num: " (length dat)))) + ordered-keys) + (debug:print-info 0 *default-log-port* "TOTAL: " total " api calls since start."))) + +(define (db:get-all-run-ids dbstruct) + (db:with-db + dbstruct + #f + #f + (lambda (db) + (let ((run-ids '())) + (sqlite3:for-each-row + (lambda (run-id) + (set! run-ids (cons run-id run-ids))) + db + "SELECT id FROM runs WHERE state != 'deleted' ORDER BY event_time DESC;") + (reverse run-ids))))) + +;; get some basic run stats +;; +;; data structure: +;; +;; ( (runname (( state count ) ... )) +;; ( ... +;; +(define (db:get-run-stats dbstruct) + (let* ((totals (make-hash-table)) + (curr (make-hash-table)) + (res '()) + (runs-info '())) + ;; First get all the runname/run-ids + (db:with-db + dbstruct #f #f + (lambda (db) + (sqlite3:for-each-row + (lambda (run-id runname) + (set! runs-info (cons (list run-id runname) runs-info))) + db + "SELECT id,runname FROM runs WHERE state != 'deleted' ORDER BY event_time DESC;"))) ;; If you change this to the more logical ASC please adjust calls to db:get-run-stats + ;; for each run get stats data + (for-each + (lambda (run-info) + ;; get the net state/status counts for this run + (let* ((run-id (car run-info)) + (run-name (cadr run-info))) + (db:with-db + dbstruct + run-id + #f + (lambda (db) + (sqlite3:for-each-row + (lambda (state status count) + (let ((netstate (if (equal? state "COMPLETED") status state))) + (if (string? netstate) + (begin + (hash-table-set! totals netstate (+ (hash-table-ref/default totals netstate 0) count)) + (hash-table-set! curr netstate (+ (hash-table-ref/default curr netstate 0) count)))))) + db + "SELECT state,status,count(id) FROM tests AS t WHERE run_id=? GROUP BY state,status ORDER BY state,status DESC;" + run-id) + ;; add the per run counts to res + (for-each (lambda (state) + (set! res (cons (list run-name state (hash-table-ref curr state)) res))) + (sort (hash-table-keys curr) string>=)) + (set! curr (make-hash-table)))))) + runs-info) + (for-each (lambda (state) + (set! res (cons (list "Totals" state (hash-table-ref totals state)) res))) + (sort (hash-table-keys totals) string>=)) + res)) + +;; db:get-runs-by-patt +;; get runs by list of criteria +;; register a test run with the db +;; +;; Use: (db:get-value-by-header (db:get-header runinfo)(db:get-rows runinfo)) +;; to extract info from the structure returned +;; +(define (db:get-runs-by-patt dbstruct keys runnamepatt targpatt offset limit fields last-update sort-order ) ;; test-name) + (let* ((tmp (runs:get-std-run-fields keys (or fields '("id" "runname" "state" "status" "owner" "event_time")))) + (keystr (car tmp)) + (header (cadr tmp)) + (key-patt "") + (runwildtype (if (substring-index "%" runnamepatt) "like" "glob")) + (qry-str #f) + (keyvals (if targpatt (keys:target->keyval keys targpatt) '()))) + (for-each (lambda (keyval) + (let* ((key (car keyval)) + (patt (cadr keyval)) + (fulkey (conc ":" key)) + (wildtype (if (substring-index "%" patt) "like" "glob"))) + (if patt + (set! key-patt (conc key-patt " AND " key " " wildtype " '" patt "'")) + (begin + (debug:print-error 0 *default-log-port* "searching for runs with no pattern set for " fulkey) + (exit 6))))) + keyvals) + (set! qry-str (conc "SELECT " keystr " FROM runs WHERE state != 'deleted' AND runname " runwildtype " ? " key-patt + (if last-update + (conc " AND last_update >= " last-update " ") + " ") + " ORDER BY event_time " sort-order " " + (if limit (conc " LIMIT " limit) "") + (if offset (conc " OFFSET " offset) "") + ";")) + (debug:print-info 4 *default-log-port* "runs:get-runs-by-patt qry=" qry-str " " runnamepatt) + ;(print "runs:get-runs-by-patt qry=" qry-str " " runnamepatt) + + (vector header + (reverse + (db:with-db dbstruct #f #f ;; reads db, does not write to it. + (lambda (db) + (sqlite3:fold-row + (lambda (res . r) + (cons (list->vector r) res)) + '() + db + qry-str + runnamepatt))))))) + +;; use (get-value-by-header (db:get-header runinfo)(db:get-rows runinfo)) +;; NOTE: Does NOT return a list of rows (or one row) for the first slot of the vector +;; this is inconsistent with get-runs but it makes some sense. +;; +(define (db:get-run-info dbstruct run-id) + ;;(if (hash-table-ref/default *run-info-cache* run-id #f) + ;; (hash-table-ref *run-info-cache* run-id) + (let* ((res (vector #f #f #f #f)) + (keys (db:get-keys dbstruct)) + (remfields (list "id" "runname" "state" "status" "owner" "event_time" "comment" "fail_count" "pass_count" "contour" "last_update")) ;; "area_id")) + (header (append keys remfields)) + (keystr (conc (keys->keystr keys) "," + (string-intersperse remfields ",")))) + (debug:print-info 11 *default-log-port* "db:get-run-info run-id: " run-id " header: " header " keystr: " keystr) + + (db:with-db + dbstruct #f #f + (lambda (db) + (sqlite3:for-each-row + (lambda (a . x) + (set! res (apply vector a x))) + db + (conc "SELECT " keystr " FROM runs WHERE id=?;") + run-id))) + (debug:print-info 11 *default-log-port* "db:get-run-info run-id: " run-id " header: " header " keystr: " keystr) + (let ((finalres (vector header res))) + ;; (hash-table-set! *run-info-cache* run-id finalres) + finalres))) + +(define (db:set-comment-for-run dbstruct run-id comment) + (db:with-db + dbstruct #f #f + (lambda (db) + (sqlite3:execute db "UPDATE runs SET comment=? WHERE id=?;" comment ;; (sdb:qry 'getid comment) + run-id)))) + +;; does not (obviously!) removed dependent data. But why not!!? +(define (db:delete-run dbstruct run-id) + (db:with-db + dbstruct #f #f + (lambda (db) + (sqlite3:with-transaction + db + (lambda () + (sqlite3:execute db "DELETE FROM test_steps WHERE test_id IN (SELECT id FROM tests WHERE run_id=?);" run-id) + (sqlite3:execute db "DELETE FROM test_data WHERE test_id IN (SELECT id FROM tests WHERE run_id=?);" run-id) + (sqlite3:execute db "UPDATE tests SET state='DELETED',comment='' WHERE run_id=?;" run-id) + ;; (db:delay-if-busy dbdat) + (sqlite3:execute db "UPDATE runs SET state='deleted',comment='' WHERE id=?;" run-id)))))) + +(define (db:update-run-event_time dbstruct run-id) + (db:with-db + dbstruct #f #t + (lambda (db) + (sqlite3:execute db "UPDATE runs SET event_time=strftime('%s','now') WHERE id=?;" run-id)))) + +(define (db:lock/unlock-run dbstruct run-id lock unlock user) + (db:with-db + dbstruct #f #t + (lambda (db) + (let ((newlockval (if lock "locked" + (if unlock + "unlocked" + "locked")))) ;; semi-failsafe + (sqlite3:execute db "UPDATE runs SET state=? WHERE id=?;" newlockval run-id) + (sqlite3:execute db "INSERT INTO access_log (user,accessed,args) VALUES(?,strftime('%s','now'),?);" + user (conc newlockval " " run-id)) + (debug:print-info 1 *default-log-port* "" newlockval " run number " run-id))))) + +(define (db:set-run-status dbstruct run-id status msg) + (db:with-db + dbstruct #f #f + (lambda (db) + (if msg + (sqlite3:execute db "UPDATE runs SET status=?,comment=? WHERE id=?;" status msg run-id) + (sqlite3:execute db "UPDATE runs SET status=? WHERE id=?;" status run-id))))) + +(define (db:set-run-state-status dbstruct run-id state status ) + (db:with-db + dbstruct #f #f + (lambda (db) + (sqlite3:execute db "UPDATE runs SET status=?,state=? WHERE id=?;" status state run-id)))) + + + +(define (db:get-run-status dbstruct run-id) + (let ((res "n/a")) + (db:with-db + dbstruct #f #f + (lambda (db) + (sqlite3:for-each-row + (lambda (status) + (set! res status)) + db + "SELECT status FROM runs WHERE id=?;" + run-id) + res)))) + +(define (db:get-run-state dbstruct run-id) + (let ((res "n/a")) + (db:with-db + dbstruct #f #f + (lambda (db) + (sqlite3:for-each-row + (lambda (status) + (set! res status)) + db + "SELECT state FROM runs WHERE id=?;" + run-id) + res)))) + + +;;====================================================================== +;; K E Y S +;;====================================================================== + +;; get key val pairs for a given run-id +;; ( (FIELDNAME1 keyval1) (FIELDNAME2 keyval2) ... ) +(define (db:get-key-val-pairs dbstruct run-id) + (let* ((keys (db:get-keys dbstruct)) + (res '())) + (db:with-db + dbstruct #f #f + (lambda (db) + (for-each + (lambda (key) + (let ((qry (conc "SELECT " key " FROM runs WHERE id=?;"))) + (sqlite3:for-each-row + (lambda (key-val) + (set! res (cons (list key (if (string? key-val) key-val "")) res))) ;; replace non-string bad values with empty string to prevent crashes. This scenario can happen when Megatest is killed on updating the db + db qry run-id))) + keys))) + (reverse res))) + +;; get key vals for a given run-id +(define (db:get-key-vals dbstruct run-id) + (let* ((keys (db:get-keys dbstruct)) + (res '())) + (db:with-db + dbstruct #f #f + (lambda (db) + (for-each + (lambda (key) + (let ((qry (conc "SELECT " key " FROM runs WHERE id=?;"))) + ;; (db:delay-if-busy dbdat) + (sqlite3:for-each-row + (lambda (key-val) + (set! res (cons (if (string? key-val) key-val "") res))) ;; check that the key-val is a string for cases where a crash injected bad data in the megatest.db + db qry run-id))) + keys))) + (let ((final-res (reverse res))) + (hash-table-set! *keyvals* run-id final-res) + final-res))) + +;; The target is keyval1/keyval2..., cached in *target* as it is used often +(define (db:get-target dbstruct run-id) + (let* ((keyvals (db:get-key-vals dbstruct run-id)) + (thekey (string-intersperse (map (lambda (x)(if x x "-na-")) keyvals) "/"))) + thekey)) + +;; Get run-ids for runs with same target but different runnames and NOT run-id +;; +(define (db:get-prev-run-ids dbstruct run-id) + (let* ((keyvals (db:get-key-val-pairs dbstruct run-id)) + (kvalues (map cadr keyvals)) + (keys (db:get-keys dbstruct)) + (qrystr (string-intersperse (map (lambda (x)(conc x "=?")) keys) " AND "))) + (let ((prev-run-ids '())) + (if (null? keyvals) + '() + (begin + (db:with-db dbstruct #f #f ;; #f means work with the zeroth db - i.e. the runs db + (lambda (db) + (apply sqlite3:for-each-row + (lambda (id) + (set! prev-run-ids (cons id prev-run-ids))) + db + (conc "SELECT id FROM runs WHERE " qrystr " AND state != 'deleted' AND id != ?;") + (append kvalues (list run-id))))) + prev-run-ids))))) + +;;====================================================================== +;; T E S T S +;;====================================================================== + +;; states and statuses are lists, turn them into ("PASS","FAIL"...) and use NOT IN +;; i.e. these lists define what to NOT show. +;; states and statuses are required to be lists, empty is ok +;; not-in #t = above behaviour, #f = must match +;; mode: +;; 'dashboard - use state = 'COMPLETED' AND status in ( statuses ) OR state in ( states ) +;; +(define (db:get-tests-for-run dbstruct run-id testpatt states statuses offset limit not-in sort-by sort-order qryvals last-update mode) + (let* ((qryvalstr (case qryvals + ((shortlist) "id,run_id,testname,item_path,state,status") + ((#f) db:test-record-qry-selector) ;; "id,run_id,testname,state,status,event_time,host,cpuload,diskfree,uname,rundir,item_path,run_duration,final_logf,comment") + (else qryvals))) + (res '()) + ;; if states or statuses are null then assume match all when not-in is false + (states-qry (if (null? states) + #f + (conc " state " + (if (eq? mode 'dashboard) + " IN ('" + (if not-in + " NOT IN ('" + " IN ('")) + (string-intersperse states "','") + "')"))) + (statuses-qry (if (null? statuses) + #f + (conc " status " + (if (eq? mode 'dashboard) + " IN ('" + (if not-in + " NOT IN ('" + " IN ('") ) + (string-intersperse statuses "','") + "')"))) + (interim-qry (conc " AND " (if not-in "NOT " "") "( state='COMPLETED' " (if statuses-qry (conc " AND " statuses-qry " ) ") " ) ") + (if states-qry + (conc (if not-in " AND " " OR ") states-qry ) ;; " ) ") + ""))) + (states-statuses-qry + (cond + ((and states-qry statuses-qry) + (case mode + ((dashboard) + (if not-in + (conc " AND (state='COMPLETED' AND status NOT IN ('" (string-intersperse statuses "','") "')) " + " OR (state != 'COMPLETED' AND state NOT IN ('" (string-intersperse states "','") "')) ") + (conc " AND (state='COMPLETED' AND status IN ('" (string-intersperse statuses "','") "')) " + " OR (state NOT IN ('COMPLETED','DELETED') AND state IN ('" (string-intersperse states "','") "')) "))) + (else (conc " AND ( " states-qry " AND " statuses-qry " ) ")))) + (states-qry + (case mode + ((dashboard) (conc " AND " (if not-in "NOT " "") " state IN ('" (string-intersperse states "','") "') ")) ;; interim-qry) + (else (conc " AND " states-qry)))) + (statuses-qry + (case mode + ((dashboard) (conc " AND " (if not-in "NOT " "") " status IN ('" (string-intersperse statuses "','") "') ")) ;; interim-qry) + (else (conc " AND " statuses-qry)))) + (else ""))) + (tests-match-qry (tests:match->sqlqry testpatt)) + (qry (conc "SELECT " qryvalstr + (if run-id + " FROM tests WHERE run_id=? " + " FROM tests WHERE ? > 0 ") ;; should work? + (if last-update " " " AND state != 'DELETED' ") ;; if using last-update we want deleted tests? + states-statuses-qry + (if tests-match-qry (conc " AND (" tests-match-qry ") ") "") + (if last-update (conc " AND last_update >= " last-update " ") "") + (case sort-by + ((rundir) " ORDER BY length(rundir) ") + ((testname) (conc " ORDER BY testname " (if sort-order (conc sort-order ",") "") " item_path ")) + ((statestatus) (conc " ORDER BY state " (if sort-order (conc sort-order ",") "") " status ")) + ((event_time) " ORDER BY event_time ") + (else (if (string? sort-by) + (conc " ORDER BY " sort-by " ") + " "))) + (if sort-order sort-order " ") + (if limit (conc " LIMIT " limit) " ") + (if offset (conc " OFFSET " offset) " ") + ";" + ))) + (debug:print-info 8 *default-log-port* "db:get-tests-for-run run-id=" run-id ", qry=" qry) + (let* ((res (db:with-db dbstruct run-id #f + (lambda (db) + ;; (let* ((stmth (db:get-cache-stmth dbstruct db qry))) ;; due to use of last-update we can't efficiently cache this query + (reverse + (sqlite3:fold-row + (lambda (res . row) + ;; id run-id testname state status event-time host cpuload + ;; diskfree uname rundir item-path run-duration final-logf comment) + (cons (list->vector row) res)) + '() + db qry ;; stmth + (or run-id 1) ;; 1 > 0 , for the case where we are seeking tests matching criteral for all runs + )))))) + (case qryvals + ((shortlist)(map db:test-short-record->norm res)) + ((#f) res) + (else res))))) + +(define (db:test-short-record->norm inrec) + ;; "id,run_id,testname,item_path,state,status" + ;; "id,run_id,testname,state,status,event_time,host,cpuload,diskfree,uname,rundir,item_path,run_duration,final_logf,comment + (vector (vector-ref inrec 0) ;; id + (vector-ref inrec 1) ;; run_id + (vector-ref inrec 2) ;; testname + (vector-ref inrec 4) ;; state + (vector-ref inrec 5) ;; status + -1 "" -1 -1 "" "-" + (vector-ref inrec 3) ;; item-path + -1 "-" "-")) + +;; +;; 1. cache tests-match-qry +;; 2. compile qry and store in hash +;; 3. convert for-each-row to fold +;; +;; (define (db:get-tests-for-run-state-status dbstruct run-id testpatt) +;; (db:with-db +;; dbstruct run-id #f +;; (lambda (db) +;; (let* ((res '()) +;; (stmt-cache (dbr:dbstruct-stmt-cache dbstruct)) +;; (stmth (let* ((sh (db:hoh-get stmt-cache db testpatt))) +;; (or sh +;; (let* ((tests-match-qry (tests:match->sqlqry testpatt)) +;; (qry (conc "SELECT id,testname,item_path,state,status FROM tests WHERE run_id=? " +;; (if tests-match-qry (conc " AND (" tests-match-qry ") ") ""))) +;; (newsh (sqlite3:prepare db qry))) +;; (debug:print-info 8 *default-log-port* "db:get-tests-for-run qry=" qry) +;; (db:hoh-set! stmt-cache db testpatt newsh) +;; newsh))))) +;; (reverse +;; (sqlite3:fold-row +;; (lambda (res id testname item-path state status) +;; ;; id,run_id,testname,state,status,event_time,host,cpuload,diskfree,uname,rundir,item_path,run_duration,final_logf,comment +;; (cons (vector id run-id testname state status -1 "" -1 -1 "" "-" item-path -1 "-" "-") res)) +;; '() +;; stmth +;; run-id)))))) + +(define (db:get-tests-for-run-state-status dbstruct run-id testpatt #!optional (last-update 0)) + (let* ((res '()) + (tests-match-qry (tests:match->sqlqry testpatt)) + (qry (conc "SELECT id,testname,item_path,state,status,event_time,run_duration FROM tests WHERE run_id=? " + " AND last_update > ? " + (if tests-match-qry (conc " AND (" tests-match-qry ") ") "") + ))) + (debug:print-info 8 *default-log-port* "db:get-tests-for-run qry=" qry) + (db:with-db dbstruct run-id #f + (lambda (db) + (sqlite3:fold-row + (lambda (res id testname item-path state status event-time run-duration) + ;; id,run_id,testname,state,status,event_time,host,cpuload,diskfree,uname,rundir,item_path,run_duration,final_logf,comment + (cons (vector id run-id testname state status event-time "" -1 -1 "" "-" item-path run-duration "-" "-") res)) + '() + db + qry + run-id + (or last-update 0)))))) + +(define (db:get-testinfo-state-status dbstruct run-id test-id) + (let ((res #f)) + (db:with-db dbstruct run-id #f + (lambda (db) + (sqlite3:for-each-row + (lambda (run-id testname item-path state status) + ;; id,run_id,testname,state,status,event_time,host,cpuload,diskfree,uname,rundir,item_path,run_duration,final_logf,comment + (set! res (vector test-id run-id testname state status -1 "" -1 -1 "" "-" item-path -1 "-" "-"))) + db + "SELECT run_id,testname,item_path,state,status FROM tests WHERE id=?;" + test-id))) + res)) + +;; get a useful subset of the tests data (used in dashboard +;; use db:mintest-get-{id ,run_id,testname ...} +;; +(define (db:get-tests-for-run-mindata dbstruct run-id testpatt states statuses not-in) + (db:get-tests-for-run dbstruct run-id testpatt states statuses #f #f not-in #f #f "id,run_id,testname,state,status,event_time,item_path" 0 #f)) + +;; do not use. +;; +(define (db:get-tests-for-runs dbstruct run-ids testpatt states statuses #!key (not-in #f)(qryvals #f)) + ;; (db:delay-if-busy) + (let ((res '())) + (for-each + (lambda (run-id) + (set! res (append + res + (db:get-tests-for-run dbstruct run-id testpatt states statuses #f #f not-in #f #f qryvals #f 'normal)))) + (if run-ids + run-ids + (db:get-all-run-ids dbstruct))) + res)) + +;; Convert calling routines to get list of run-ids and loop, do not use the get-tests-for-runs +;; + +(define (db:delete-test-records dbstruct run-id test-id) + (db:general-call dbstruct 'delete-test-step-records (list test-id)) + (db:general-call dbstruct 'delete-test-data-records (list test-id)) + (db:with-db + dbstruct #f #f + (lambda (db) + (sqlite3:execute db "UPDATE tests SET state='DELETED',status='n/a',comment='' WHERE id=?;" test-id)))) + +;; +(define (db:delete-old-deleted-test-records dbstruct) + (let ((targtime (- (current-seconds) + (or (configf:lookup-number *configdat* "setup" "keep-deleted-records") + (* 30 24 60 60))))) ;; one month in the past + (db:with-db + dbstruct + 0 + #t + (lambda (db) + (sqlite3:with-transaction + db + (lambda () + (sqlite3:execute db "DELETE FROM test_steps WHERE test_id IN (SELECT id FROM tests WHERE state='DELETED' AND event_timenumber fieldname fields) + (if (null? fields) + #f + (let loop ((hed (car fields)) + (tal (cdr fields)) + (indx 0)) + (if (equal? fieldname hed) + indx + (if (null? tal) + #f + (loop (car tal)(cdr tal)(+ indx 1))))))) + +(define db:test-record-qry-selector (string-intersperse db:test-record-fields ",")) + +(define (db:update-tesdata-on-repilcate-db dbstruct old-lt new-lt) + (db:with-db + dbstruct #f #f + (lambda (db) + (sqlite3:execute db "UPDATE tests SET rundir= replace(rundir,?,?), shortdir=replace(shortdir,?,?);" + old-lt new-lt old-lt new-lt)))) + +;; NOTE: Use db:test-get* to access records +;; NOTE: This needs rundir decoding? Decide, decode here or where used? For the moment decode where used. +(define (db:get-all-tests-info-by-run-id dbstruct run-id) + (let* ((res '())) + (db:with-db + dbstruct #f #f + (lambda (db) + (sqlite3:for-each-row + (lambda (id run-id testname state status event-time host cpuload diskfree uname rundir item-path run-duration final-logf comment shortdir attemptnum archived) + ;; 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 + (set! res (cons (vector id run-id testname state status event-time host cpuload diskfree uname rundir item-path run-duration final-logf comment shortdir attemptnum archived) + res))) + db + (conc "SELECT " db:test-record-qry-selector " FROM tests WHERE state != 'DELETED' AND run_id=?;") + run-id))) + res)) + +(define (db:replace-test-records dbstruct run-id testrecs) + (db:with-db dbstruct run-id #t + (lambda (db) + (let* ((qmarks (string-intersperse (make-list (length db:test-record-fields) "?") ",")) + (qrystr (conc "INSERT OR REPLACE INTO tests (" db:test-record-qry-selector ") VALUES (" qmarks ") WHERE run_id=?;")) + (qry (sqlite3:prepare db qrystr))) + (debug:print 0 *default-log-port* "INFO: migrating test records for run with id " run-id) + (sqlite3:with-transaction + db + (lambda () + (for-each + (lambda (rec) + ;; (debug:print 0 *default-log-port* "INFO: Inserting values: " (string-intersperse (map conc (vector->list rec)) ",") "\n") + (apply sqlite3:execute qry (append (vector->list rec)(list run-id)))) + testrecs))) + (sqlite3:finalize! qry))))) + +;; map a test-id into the proper range +;; +(define (db:adj-test-id mtdb min-test-id test-id) + (if (>= test-id min-test-id) + test-id + (let loop ((new-id min-test-id)) + (let ((test-id-found #f)) + (sqlite3:for-each-row + (lambda (id) + (set! test-id-found id)) + (db:dbdat-get-db mtdb) + "SELECT id FROM tests WHERE id=?;" + new-id) + ;; if test-id-found then need to try again + (if test-id-found + (loop (+ new-id 1)) + (begin + (debug:print-info 0 *default-log-port* "New test id " new-id " selected for test with id " test-id) + (sqlite3:execute mtdb "UPDATE tests SET id=? WHERE id=?;" new-id test-id))))))) + +;; move test ids into the 30k * run_id range +;; +(define (db:prep-megatest.db-adj-test-ids mtdb run-id testrecs) + (debug:print-info 0 *default-log-port* "Adjusting test ids in megatest.db for run " run-id) + (let ((min-test-id (* run-id 30000))) + (for-each + (lambda (testrec) + (let* ((test-id (vector-ref testrec (db:field->number "id" db:test-record-fields)))) + (db:adj-test-id (db:dbdat-get-db mtdb) min-test-id test-id))) + testrecs))) + +;; 1. move test ids into the 30k * run_id range +;; 2. move step ids into the 30k * run_id range +;; +(define (db:prep-megatest.db-for-migration mtdb) + (let* ((run-ids (db:get-all-run-ids mtdb))) + (for-each + (lambda (run-id) + (let ((testrecs (db:get-all-tests-info-by-run-id mtdb run-id))) + (db:prep-megatest.db-adj-test-ids (db:dbdat-get-db mtdb) run-id testrecs))) + run-ids))) + +;; Get test data using test_id, run-id is not used +;; +(define (db:get-test-info-by-id dbstruct run-id test-id) + (db:with-db + dbstruct + #f ;; run-id + #f + (lambda (db) + (let ((res #f)) + (sqlite3:for-each-row ;; attemptnum added to hold pid of top process (not Megatest) controlling a test + (lambda (id run-id testname state status event-time host cpuload diskfree uname rundir-id item-path run_duration final-logf-id comment short-dir-id attemptnum archived last-update) + ;; 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 + (set! res (vector id run-id testname state status event-time host cpuload diskfree uname rundir-id item-path run_duration final-logf-id comment short-dir-id attemptnum archived last-update))) + db + (conc "SELECT " db:test-record-qry-selector " FROM tests WHERE id=?;") + test-id) + res)))) + +;; Use db:test-get* to access +;; Get test data using test_ids. NB// Only works within a single run!! +;; +(define (db:get-test-info-by-ids dbstruct run-id test-ids) + (db:with-db + dbstruct + run-id + #f + (lambda (db) + (let ((res '())) + (sqlite3:for-each-row + (lambda (a . b) + ;; 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 + (set! res (cons (apply vector a b) res))) + db + (conc "SELECT " db:test-record-qry-selector " FROM tests WHERE id in (" + (string-intersperse (map conc test-ids) ",") ");")) + res)))) + +(define (db:get-test-info dbstruct run-id test-name item-path) + (db:with-db + dbstruct + run-id + #f + (lambda (db) + (let ((res #f)) + (sqlite3:for-each-row + (lambda (a . b) + (set! res (apply vector a b))) + db + (conc "SELECT " db:test-record-qry-selector " FROM tests WHERE testname=? AND item_path=? AND run_id=?;") + test-name item-path run-id) + res)))) + +(define (db:test-get-rundir-from-test-id dbstruct run-id test-id) + (db:with-db + dbstruct + run-id + #f + (lambda (db) + (db:first-result-default + db + "SELECT rundir FROM tests WHERE id=?;" + #f ;; default result + test-id)))) + +(define (db:get-test-times dbstruct run-name target) + (let ((res `()) + (qry (conc "select testname, item_path, run_duration, " + (string-join (db:get-keys dbstruct) " || '/' || ") + " as target from tests inner join runs on tests.run_id = runs.id where runs.runname = ? and target = ? ;"))) + (db:with-db + dbstruct + #f ;; this is for the main runs db + #f ;; does not modify db + (lambda (db) + (sqlite3:for-each-row + (lambda (test-name item-path test-time target ) + (set! res (cons (vector test-name item-path test-time) res))) + db + qry + run-name target) + res)))) + +;;====================================================================== +;; S T E P S +;;====================================================================== + +(define (db:teststep-set-status! dbstruct run-id test-id teststep-name state-in status-in comment logfile) + (db:with-db + dbstruct + run-id + #t + (lambda (db) + (sqlite3:execute + db + "INSERT OR REPLACE into test_steps (test_id,stepname,state,status,event_time,comment,logfile) VALUES(?,?,?,?,?,?,?);" + test-id teststep-name state-in status-in (current-seconds) + (if comment comment "") + (if logfile logfile ""))))) + + + +(define (db:delete-steps-for-test! dbstruct run-id test-id) + ;; TODO: figure out why status is the key field rather than state (note: CONSTRAINT test_steps_constraint UNIQUE (test_id,stepname,state) ) + (db:with-db + dbstruct + run-id + #t + (lambda (db) + (sqlite3:execute + db + "UPDATE test_steps set status='DELETED' where test_id=?" ;; and run_id=? !! - run_id not in table (bummer) TODO: get run_id into schema for test_steps + test-id)))) + + +;; db-get-test-steps-for-run +(define (db:get-steps-for-test dbstruct run-id test-id) + (db:with-db + dbstruct + run-id + #f + (lambda (db) + (let* ((res '())) + (sqlite3:for-each-row + (lambda (id test-id stepname state status event-time logfile comment) + (set! res (cons (vector id test-id stepname state status event-time (if (string? logfile) logfile "") comment) res))) + db + "SELECT id,test_id,stepname,state,status,event_time,logfile,comment FROM test_steps WHERE status != 'DELETED' AND test_id=? ORDER BY id ASC;" ;; event_time DESC,id ASC; + test-id) + (reverse res))))) + + (define (db:get-steps-info-by-id dbstruct test-step-id) + (db:with-db + dbstruct + #f + #f + (lambda (db) + (let* ((res (vector #f #f #f #f #f #f #f #f #f))) + (sqlite3:for-each-row + (lambda (id test-id stepname state status event-time logfile comment last-update) + (set! res (vector id test-id stepname state status event-time (if (string? logfile) logfile "") comment last-update))) + db + "SELECT id,test_id,stepname,state,status,event_time,logfile,comment,last_update FROM test_steps WHERE id=? ORDER BY id ASC;" ;; event_time DESC,id ASC; + test-step-id) + res)))) + +(define (db:get-steps-data dbstruct run-id test-id) + (db:with-db + dbstruct + run-id + #f + (lambda (db) + (let ((res '())) + (sqlite3:for-each-row + (lambda (id test-id stepname state status event-time logfile) + (set! res (cons (vector id test-id stepname state status event-time (if (string? logfile) logfile "")) res))) + db + "SELECT id,test_id,stepname,state,status,event_time,logfile FROM test_steps WHERE status != 'DELETED' AND test_id=? ORDER BY id ASC;" ;; event_time DESC,id ASC; + test-id) + (reverse res))))) + +;;====================================================================== +;; T E S T D A T A +;;====================================================================== + +(define (db:get-data-info-by-id dbstruct test-data-id) + (let* ((stmt "SELECT id,test_id, category, variable, value, expected, tol, units, comment, status, type, last_update FROM test_data WHERE id=? ORDER BY id ASC;")) ;; event_time DESC,id ASC; + (db:with-db + dbstruct + #f + #f + (lambda (db) + (let* ((stmth (db:get-cache-stmth dbstruct db stmt)) + (res (sqlite3:fold-row + (lambda (res id test-id category variable value expected tol units comment status type last-update) + (vector id test-id category variable value expected tol units comment status type last-update)) + (vector #f #f #f #f #f #f #f #f #f #f #f #f) + stmth + test-data-id))) + res))))) + +;; WARNING: Do NOT call this for the parent test on an iterated test +;; Roll up test_data pass/fail results +;; look at the test_data status field, +;; if all are pass (any case) and the test status is PASS or NULL or '' then set test status to PASS. +;; if one or more are fail (any case) then set test status to PASS, non "pass" or "fail" are ignored +(define (db:test-data-rollup dbstruct run-id test-id status) + (let* ((fail-count 0) + (pass-count 0)) + (db:with-db + dbstruct #f #f + (lambda (db) + (sqlite3:for-each-row + (lambda (fcount pcount) + (set! fail-count fcount) + (set! pass-count pcount)) + db + "SELECT (SELECT count(id) FROM test_data WHERE test_id=? AND status like 'fail') AS fail_count, + (SELECT count(id) FROM test_data WHERE test_id=? AND status like 'pass') AS pass_count;" + test-id test-id) + ;; Now rollup the counts to the central megatest.db + (db:general-call dbstruct 'pass-fail-counts (list pass-count fail-count test-id)) + ;; if the test is not FAIL then set status based on the fail and pass counts. + (db:general-call dbstruct 'test_data-pf-rollup (list test-id test-id test-id test-id)))))) + +;; each section is a rule except "final" which is the final result +;; +;; [rule-5] +;; operator in +;; section LogFileBody +;; desc Output voltage +;; status OK +;; expected 1.9 +;; measured 1.8 +;; type +/- +;; tolerance 0.1 +;; pass 1 +;; fail 0 +;; +;; [final] +;; exit-code 6 +;; exit-status SKIP +;; message If flagged we are asking for this to exit with code 6 +;; +;; recorded in steps table: +;; category: stepname +;; variable: rule-N +;; value: measured +;; expected: expected +;; tol: tolerance +;; units: - +;; comment: desc or message +;; status: status +;; type: type +;; +(define (db:logpro-dat->csv dat stepname) + (let ((res '())) + (for-each + (lambda (entry-name) + (if (equal? entry-name "final") + (set! res (append + res + (list + (list stepname + entry-name + (configf:lookup dat entry-name "exit-code") ;; 0 ;; Value + 0 ;; 1 ;; Expected + 0 ;; 2 ;; Tolerance + "n/a" ;; 3 ;; Units + (configf:lookup dat entry-name "message") ;; 4 ;; Comment + (configf:lookup dat entry-name "exit-status") ;; 5 ;; Status + "logpro" ;; 6 ;; Type + )))) + (let* ((value (or (configf:lookup dat entry-name "measured") "n/a")) + (expected (or (configf:lookup dat entry-name "expected") 0.0)) + (tolerance (or (configf:lookup dat entry-name "tolerance") 0.0)) + (comment (or (configf:lookup dat entry-name "comment") + (configf:lookup dat entry-name "desc") "n/a")) + (status (or (configf:lookup dat entry-name "status") "n/a")) + (type (or (configf:lookup dat entry-name "expected") "n/a"))) + (set! res (append + res + (list (list stepname + entry-name + value ;; 0 + expected ;; 1 + tolerance ;; 2 + "n/a" ;; 3 Units + comment ;; 4 + status ;; 5 + type ;; 6 + ))))))) + (hash-table-keys dat)) + res)) + +;; $MT_MEGATEST -load-test-data << EOF +;; foo,bar, 1.2, 1.9, > +;; foo,rab, 1.0e9, 10e9, 1e9 +;; foo,bla, 1.2, 1.9, < +;; foo,bal, 1.2, 1.2, < , ,Check for overload +;; foo,alb, 1.2, 1.2, <= , Amps,This is the high power circuit test +;; foo,abl, 1.2, 1.3, 0.1 +;; foo,bra, 1.2, pass, silly stuff +;; faz,bar, 10, 8mA, , ,"this is a comment" +;; EOF + +(define (db:csv->test-data dbstruct run-id test-id csvdata) + (debug:print 4 *default-log-port* "test-id " test-id ", csvdata: " csvdata) + (db:with-db + dbstruct #f #f + (lambda (db) + (let* ((csvlist (csv->list (make-csv-reader + (open-input-string csvdata) + '((strip-leading-whitespace? #t) + (strip-trailing-whitespace? #t)))))) ;; (csv->list csvdata))) + (for-each + (lambda (csvrow) + (let* ((padded-row (take (append csvrow (list #f #f #f #f #f #f #f #f #f)) 9)) + (category (list-ref padded-row 0)) + (variable (list-ref padded-row 1)) + (value (any->number-if-possible (list-ref padded-row 2))) + (expected (any->number-if-possible (list-ref padded-row 3))) + (tol (any->number-if-possible (list-ref padded-row 4))) ;; >, <, >=, <=, or a number + (units (list-ref padded-row 5)) + (comment (list-ref padded-row 6)) + (status (let ((s (list-ref padded-row 7))) + (if (and (string? s)(or (string-match (regexp "^\\s*$") s) + (string-match (regexp "^n/a$") s))) + #f + s))) ;; if specified on the input then use, else calculate + (type (list-ref padded-row 8))) + ;; look up expected,tol,units from previous best fit test if they are all either #f or '' + (debug:print 4 *default-log-port* "BEFORE: category: " category " variable: " variable " value: " value + ", expected: " expected " tol: " tol " units: " units " status: " status " comment: " comment " type: " type) + + (if (and (or (not expected)(equal? expected "")) + (or (not tol) (equal? expected "")) + (or (not units) (equal? expected ""))) + (let-values (((new-expected new-tol new-units)(tdb:get-prev-tol-for-test #f test-id category variable))) + (set! expected new-expected) + (set! tol new-tol) + (set! units new-units))) + + (debug:print 4 *default-log-port* "AFTER: category: " category " variable: " variable " value: " value + ", expected: " expected " tol: " tol " units: " units " status: " status " comment: " comment) + ;; calculate status if NOT specified + (if (and (not status)(number? expected)(number? value)) ;; need expected and value to be numbers + (if (number? tol) ;; if tol is a number then we do the standard comparison + (let* ((max-val (+ expected tol)) + (min-val (- expected tol)) + (result (and (>= value min-val)(<= value max-val)))) + (debug:print 4 *default-log-port* "max-val: " max-val " min-val: " min-val " result: " result) + (set! status (if result "pass" "fail"))) + (set! status ;; NB// need to assess each one (i.e. not return operator since need to act if not valid op. + (case (string->symbol tol) ;; tol should be >, <, >=, <= + ((>) (if (> value expected) "pass" "fail")) + ((<) (if (< value expected) "pass" "fail")) + ((>=) (if (>= value expected) "pass" "fail")) + ((<=) (if (<= value expected) "pass" "fail")) + (else (conc "ERROR: bad tol comparator " tol)))))) + (debug:print 4 *default-log-port* "AFTER2: category: " category " variable: " variable " value: " value + ", expected: " expected " tol: " tol " units: " units " status: " status " comment: " comment) + ;; (db:delay-if-busy dbdat) + (sqlite3:execute db "INSERT OR REPLACE INTO test_data (test_id,category,variable,value,expected,tol,units,comment,status,type) VALUES (?,?,?,?,?,?,?,?,?,?);" + test-id category variable value expected tol units (if comment comment "") status type))) + csvlist))))) + +;; This routine moved from tdb.scm, tdb:read-test-data +;; +(define (db:read-test-data dbstruct run-id test-id categorypatt) + (let* ((res '())) + (db:with-db + dbstruct #f #f + (lambda (db) + (sqlite3:for-each-row + (lambda (id test_id category variable value expected tol units comment status type) + (set! res (cons (vector id test_id category variable value expected tol units comment status type) res))) + db + "SELECT id,test_id,category,variable,value,expected,tol,units,comment,status,type FROM test_data WHERE test_id=? AND category LIKE ? ORDER BY category,variable;" test-id categorypatt) + (reverse res))))) + +;; This routine moved from tdb.scm, :read-test-data +;; +(define (db:read-test-data-varpatt dbstruct run-id test-id categorypatt varpatt) + (let* ((res '())) + (db:with-db + dbstruct #f #f + (lambda (db) + (sqlite3:for-each-row + (lambda (id test_id category variable value expected tol units comment status type) + (set! res (cons (vector id test_id category variable value expected tol units comment status type) res))) + db + "SELECT id,test_id,category,variable,value,expected,tol,units,comment,status,type FROM test_data WHERE test_id=? AND category LIKE ? AND variable LIKE ? ORDER BY category,variable;" test-id categorypatt varpatt) + (reverse res))))) + + +;;====================================================================== +;; Misc. test related queries +;;====================================================================== + +(define (db:get-run-ids-matching-target dbstruct keynames target res runname testpatt statepatt statuspatt) + (db:with-db + dbstruct #f #f + (lambda (db) + (let* ((row-ids '()) + (keystr (string-intersperse + (map (lambda (key val) + (conc key " like '" val "'")) + keynames + (string-split target "/")) + " AND ")) + ;; (testqry (tests:match->sqlqry testpatt)) + (runsqry (sqlite3:prepare db (conc "SELECT id FROM runs WHERE " keystr " AND runname LIKE '" runname "';")))) + ;; (debug:print 8 *default-log-port* "db:test-get-paths-matching-keynames-target-new\n runsqry=" runsqry "\n tstsqry=" testqry) + (sqlite3:for-each-row + (lambda (rid) + (set! row-ids (cons rid row-ids))) + runsqry) + (sqlite3:finalize! runsqry) + row-ids)))) + +;; finds latest matching all patts for given run-id +;; +(define (db:test-get-paths-matching-keynames-target-new dbstruct run-id keynames target res testpatt statepatt statuspatt runname) + (let* ((testqry (tests:match->sqlqry testpatt)) + (tstsqry (conc "SELECT rundir FROM tests WHERE run_id=? AND " testqry " AND state LIKE '" statepatt "' AND status LIKE '" statuspatt "' ORDER BY event_time ASC;"))) + (db:with-db + dbstruct + run-id + #f + (lambda (db) + (sqlite3:for-each-row + (lambda (p) + (set! res (cons p res))) + db + tstsqry + run-id) + res)))) + +(define (db:test-toplevel-num-items dbstruct run-id testname) + (db:with-db + dbstruct + run-id + #f + (lambda (db) + (let ((res 0)) + (sqlite3:for-each-row + (lambda (num-items) + (set! res num-items)) + db + "SELECT count(id) FROM tests WHERE run_id=? AND testname=? AND item_path != '' AND state NOT IN ('DELETED');" + run-id + testname) + res)))) + +;;====================================================================== +;; QUEUE UP META, TEST STATUS AND STEPS REMOTE ACCESS +;;====================================================================== + +;; NOTE: Can remove the regex and base64 encoding for zmq +(define (db:obj->string obj #!key (transport 'http)) + (case transport + ;; ((fs) obj) + ((http fs) + (string-substitute + (regexp "=") "_" + (base64:base64-encode + (z3:encode-buffer + (with-output-to-string + (lambda ()(serialize obj))))) ;; BB: serialize - this is what causes problems between different builds of megatest communicating. serialize is sensitive to binary image of mtest. + #t)) + ((zmq nmsg)(with-output-to-string (lambda ()(serialize obj)))) + (else obj))) ;; rpc + +(define (db:string->obj msg #!key (transport 'http)) + (case transport + ;; ((fs) msg) + ((http fs) + (if (string? msg) + (with-input-from-string + (z3:decode-buffer + (base64:base64-decode + (string-substitute + (regexp "_") "=" msg #t))) + (lambda ()(deserialize))) + (begin + (debug:print-error 0 *default-log-port* "reception failed. Received \"" msg "\" but cannot translate it.") + (print-call-chain (current-error-port)) + msg))) ;; crude reply for when things go awry + ((zmq nmsg)(with-input-from-string msg (lambda ()(deserialize)))) + (else msg))) ;; rpc + +;; ; This is to be the big daddy call NOPE: Replaced by db:set-state-status-and-roll-up-items +;; ; +;; define (db:test-set-state-status dbstruct run-id test-id state status msg) +;; (let ((dbdat (db:get-db dbstruct run-id))) +;; (if (member state '("LAUNCHED" "REMOTEHOSTSTART")) +;; (db:general-call dbdat 'set-test-start-time (list test-id))) +;; ;; (if msg +;; ;; (db:general-call dbdat 'state-status-msg (list state status msg test-id)) +;; ;; (db:general-call dbdat 'state-status (list state status test-id))) +;; (db:set-state-status-and-roll-up-items dbstruct run-id test-id #f state status msg) +;; ;; process the test_data table +;; (if (and test-id state status (equal? status "AUTO")) +;; (db:test-data-rollup dbstruct run-id test-id status)) +;; (mt:process-triggers dbstruct run-id test-id state status))) + +;; state is the priority rollup of all states +;; status is the priority rollup of all completed statesfu +;; +;; if test-name is an integer work off that instead of test-name test-path +;; +(define (db:set-state-status-and-roll-up-items dbstruct run-id test-name item-path state status comment) + ;; establish info on incoming test followed by info on top level test + ;; BBnote - for mode itemwait, linkage between upstream test & matching item status is propagated to run queue in db:prereqs-not-met + (let* ((testdat (if (number? test-name) + (db:get-test-info-by-id dbstruct run-id test-name) ;; test-name is actually a test-id + (db:get-test-info dbstruct run-id test-name item-path))) + (test-id (db:test-get-id testdat)) + (test-name (if (number? test-name) + (db:test-get-testname testdat) + test-name)) + (item-path (db:test-get-item-path testdat)) + (tl-testdat (db:get-test-info dbstruct run-id test-name "")) + (tl-test-id (if tl-testdat + (db:test-get-id tl-testdat) + #f))) + (if (member state '("LAUNCHED" "REMOTEHOSTSTART")) + (db:general-call dbstruct 'set-test-start-time (list test-id))) + (mutex-lock! *db-transaction-mutex*) + (db:with-db + dbstruct #f #f + (lambda (db) + (let ((tr-res + (sqlite3:with-transaction + db + (lambda () + ;; NB// Pass the db so it is part fo the transaction + (db:test-set-state-status db run-id test-id state status comment) ;; this call sets the item state/status + (if (not (equal? item-path "")) ;; only roll up IF incoming test is an item + (let* ((state-status-counts (db:get-all-state-status-counts-for-test dbstruct run-id test-name item-path state status)) ;; item-path is used to exclude current state/status of THIS test + (state-stauses (db:roll-up-rules state-status-counts state status)) + (newstate (car state-stauses)) + (newstatus (cadr state-stauses))) + (debug:print 4 *default-log-port* "BB> tl-test-id="tl-test-id" ; "test-name":"item-path" newstate="newstate" newstatus="newstatus" len(sscs)="(length state-status-counts) " state-status-counts: " + (apply conc + (map (lambda (x) + (conc + (with-output-to-string (lambda () (pp (dbr:counts->alist x)))) " | ")) + state-status-counts))); end debug:print + + (if tl-test-id + (db:test-set-state-status db run-id tl-test-id newstate newstatus #f)) ;; we are still in the transaction - must access the db and not the dbstruct + )))))) + (mutex-unlock! *db-transaction-mutex*) + (if (and test-id state status (equal? status "AUTO")) + (db:test-data-rollup dbstruct run-id test-id status)) + tr-res))))) + +(define (db:roll-up-rules state-status-counts state status) + (let* ((running (length (filter (lambda (x) + (member (dbr:counts-state x) *common:running-states*)) + state-status-counts))) + (bad-not-started (length (filter (lambda (x) + (and (equal? (dbr:counts-state x) "NOT_STARTED") + (not (member (dbr:counts-status x) *common:not-started-ok-statuses*)))) + state-status-counts))) + (all-curr-states (common:special-sort ;; worst -> best (sort of) + (delete-duplicates + (if (and state (not (member state *common:dont-roll-up-states*))) + (cons state (map dbr:counts-state state-status-counts)) + (map dbr:counts-state state-status-counts))) + *common:std-states* >)) + (all-curr-statuses (common:special-sort ;; worst -> best + (delete-duplicates + (if (and state status (not (member state *common:dont-roll-up-states*))) + (cons status (map dbr:counts-status state-status-counts)) + (map dbr:counts-status state-status-counts))) + *common:std-statuses* >)) + (non-completes (filter (lambda (x) + (not (member x (cons "COMPLETED" *common:dont-roll-up-states*)))) + all-curr-states)) + (preq-fails (filter (lambda (x) + (equal? x "PREQ_FAIL")) + all-curr-statuses)) + (num-non-completes (length non-completes)) + (newstate (cond + ((> running 0) "RUNNING") ;; anything running, call the situation running + ((> (length preq-fails) 0) "NOT_STARTED") + ((> bad-not-started 0) "COMPLETED") ;; we have an ugly situation, it is completed in the sense we cannot do more. + ((> num-non-completes 0) (car non-completes)) ;; (remove (lambda (x)(equal? "COMPLETED" x)) all-curr-states))) ;; only rollup DELETED if all DELETED + (else (car all-curr-states)))) + (newstatus (cond + ((> (length preq-fails) 0) "PREQ_FAIL") + ((or (> bad-not-started 0) + (and (equal? newstate "NOT_STARTED") + (> num-non-completes 0))) + "STARTED") + (else (car all-curr-statuses))))) + (debug:print-info 2 *default-log-port* + "\n--> probe db:set-state-status-and-roll-up-items: " + "\n--> state-status-counts: "(map dbr:counts->alist state-status-counts) + "\n--> running: "running + "\n--> bad-not-started: "bad-not-started + "\n--> non-non-completes: "num-non-completes + "\n--> non-completes: "non-completes + "\n--> all-curr-states: "all-curr-states + "\n--> all-curr-statuses: "all-curr-statuses + "\n--> newstate "newstate + "\n--> newstatus "newstatus + "\n\n") + + ;; NB// Pass the db so it is part of the transaction + (list newstate newstatus))) + +(define (db:set-state-status-and-roll-up-run dbstruct run-id curr-state curr-status) + (mutex-lock! *db-transaction-mutex*) + (db:with-db + dbstruct #f #f + (lambda (db) + (let ((tr-res + (sqlite3:with-transaction + db + (lambda () + (let* ((state-status-counts (db:get-all-state-status-counts-for-run dbstruct run-id)) + (state-stauses (db:roll-up-rules state-status-counts #f #f )) + (newstate (car state-stauses)) + (newstatus (cadr state-stauses))) + (if (or (not (eq? newstate curr-state)) (not (eq? newstatus curr-status))) + (db:set-run-state-status dbstruct run-id newstate newstatus ))))))) + (mutex-unlock! *db-transaction-mutex*) + tr-res)))) + + +(define (db:get-all-state-status-counts-for-run dbstruct run-id) + (let* ((test-count-recs (db:with-db + dbstruct #f #f + (lambda (db) + (sqlite3:map-row + (lambda (state status count) + (make-dbr:counts state: state status: status count: count)) + db + "SELECT state,status,count(id) FROM tests WHERE run_id=? GROUP BY state,status;" + run-id ))))) + test-count-recs)) + + +;; BBnote: db:get-all-state-status-counts-for-test returns dbr:counts object aggregating state and status of items of a given test, *not including rollup state/status* +;; +;; NOTE: This is called within a transaction +;; +(define (db:get-all-state-status-counts-for-test dbstruct run-id test-name item-path item-state-in item-status-in) + (let* ((test-info (db:get-test-info dbstruct run-id test-name item-path)) + (item-state (or item-state-in (db:test-get-state test-info))) + (item-status (or item-status-in (db:test-get-status test-info))) + (other-items-count-recs (db:with-db + dbstruct #f #f + (lambda (db) + (sqlite3:map-row + (lambda (state status count) + (make-dbr:counts state: state status: status count: count)) + db + ;; ignore current item because we have changed its value in the current transation so this select will see the old value. + "SELECT state,status,count(id) FROM tests WHERE run_id=? AND testname=? AND item_path != '' AND item_path !=? GROUP BY state,status;" + run-id test-name item-path)))) + + ;; add current item to tally outside of sql query + (match-countrec-lambda (lambda (countrec) + (and (equal? (dbr:counts-state countrec) item-state) + (equal? (dbr:counts-status countrec) item-status)))) + + (already-have-count-rec-list + (filter match-countrec-lambda other-items-count-recs)) ;; will have either 0 or 1 count recs depending if another item shares this item's state/status + + (updated-count-rec (if (null? already-have-count-rec-list) + (make-dbr:counts state: item-state status: item-status count: 1) + (let* ((our-count-rec (car already-have-count-rec-list)) + (new-count (add1 (dbr:counts-count our-count-rec)))) + (make-dbr:counts state: item-state status: item-status count: new-count)))) + + (nonmatch-countrec-lambda (lambda (countrec) (not (match-countrec-lambda countrec)))) + + (unrelated-rec-list + (filter nonmatch-countrec-lambda other-items-count-recs))) + + (cons updated-count-rec unrelated-rec-list))) + +;; (define (db:get-all-item-states db run-id test-name) +;; (sqlite3:map-row +;; (lambda (a) a) +;; db +;; "SELECT DISTINCT state FROM tests WHERE item_path != '' AND state != 'DELETED' AND run_id=? AND testname=?" +;; run-id test-name)) +;; +;; (define (db:get-all-item-statuses db run-id test-name) +;; (sqlite3:map-row +;; (lambda (a) a) +;; db +;; "SELECT DISTINCT status FROM tests WHERE item_path != '' AND state != 'DELETED' AND state='COMPLETED' AND run_id=? AND testname=?" +;; run-id test-name)) + +(define (db:test-get-logfile-info dbstruct run-id test-name) + (db:with-db + dbstruct + run-id + #f + (lambda (db) + (let ((res #f)) + (sqlite3:for-each-row + (lambda (path final_logf) + ;; (let ((path (sdb:qry 'getstr path-id)) + ;; (final_logf (sdb:qry 'getstr final_logf-id))) + (set! logf final_logf) + (set! res (list path final_logf)) + (if (directory? path) + (debug:print 2 *default-log-port* "Found path: " path) + (debug:print 2 *default-log-port* "No such path: " path))) ;; ) + db + "SELECT rundir,final_logf FROM tests WHERE testname=? AND item_path='' AND run_id=?;" + test-name run-id) + res)))) + +;;====================================================================== +;; A G R E G A T E D T R A N S A C T I O N D B W R I T E S +;;====================================================================== + +(define db:queries + (list '(update-run-duration "UPDATE tests SET run_duration=? WHERE id=?;") + + ;; TESTS + '(register-test "INSERT OR IGNORE INTO tests (run_id,testname,event_time,item_path,state,status) VALUES (?,?,strftime('%s','now'),?,'NOT_STARTED','n/a');") + ;; Test state and status + '(set-test-state "UPDATE tests SET state=? WHERE id=?;") + '(set-test-status "UPDATE tests SET state=? WHERE id=?;") + '(state-status "UPDATE tests SET state=?,status=? WHERE id=?;") ;; D/ONE + '(state-status-msg "UPDATE tests SET state=?,status=?,comment=? WHERE id=?;") ;; DONE + ;; Test comment + '(set-test-comment "UPDATE tests SET comment=? WHERE id=?;") + '(set-test-start-time "UPDATE tests SET event_time=strftime('%s','now') WHERE id=?;") ;; DONE + '(pass-fail-counts "UPDATE tests SET pass_count=?,fail_count=? WHERE id=?;") + ;; test_data-pf-rollup is used to set a tests PASS/FAIL based on the pass/fail info from the steps + '(test_data-pf-rollup "UPDATE tests + SET status=CASE WHEN (SELECT fail_count FROM tests WHERE id=?) > 0 + THEN 'FAIL' + WHEN (SELECT pass_count FROM tests WHERE id=?) > 0 AND + (SELECT status FROM tests WHERE id=?) NOT IN ('WARN','FAIL') + THEN 'PASS' + ELSE status + END WHERE id=?;") ;; DONE + '(test-set-log "UPDATE tests SET final_logf=? WHERE id=?;") ;; DONE + ;; '(test-set-rundir-by-test-id "UPDATE tests SET rundir=? WHERE id=?") ;; DONE + ;; '(test-set-rundir "UPDATE tests SET rundir=? AND testname=? AND item_path=?;") ;; DONE + '(test-set-rundir-shortdir "UPDATE tests SET rundir=?,shortdir=? WHERE testname=? AND item_path=? AND run_id=?;") ;; BROKEN!!! NEEDS run-id + '(delete-tests-in-state ;; "DELETE FROM tests WHERE state=?;") ;; DONE + "UPDATE tests SET state='DELETED' WHERE state=?") + '(tests:test-set-toplog "UPDATE tests SET final_logf=? WHERE run_id=? AND testname=? AND item_path='';") + '(update-cpuload-diskfree "UPDATE tests SET cpuload=?,diskfree=? WHERE id=?;") ;; DONE + '(update-uname-host "UPDATE tests SET uname=?,host=? WHERE id=?;") ;; DONE + '(update-test-rundat "INSERT INTO test_rundat (test_id,update_time,cpuload,diskfree,diskusage,run_duration) VALUES (?,?,?,?,?,?);") + '(update-test-state "UPDATE tests SET state=? WHERE state=? AND run_id=? AND testname=? AND NOT (item_path='' AND testname IN (SELECT DISTINCT testname FROM tests WHERE testname=? AND item_path != ''));") + '(update-test-status "UPDATE tests SET status=? WHERE status like ? AND run_id=? AND testname=? AND NOT (item_path='' AND testname IN (SELECT DISTINCT testname FROM tests WHERE testname=? AND item_path != ''));") + ;; stuff for set-state-status-and-roll-up-items + '(update-pass-fail-counts "UPDATE tests + SET fail_count=(SELECT count(id) FROM tests WHERE testname=? AND item_path != '' AND status IN ('FAIL','CHECK','INCOMPLETE','ABORT')), + pass_count=(SELECT count(id) FROM tests WHERE testname=? AND item_path != '' AND status IN ('PASS','WARN','WAIVED')) + WHERE testname=? AND item_path='' AND run_id=?;") ;; DONE ;; BROKEN!!! NEEDS run-id + '(top-test-set-running "UPDATE tests SET state='RUNNING' WHERE testname=? AND item_path='' AND run_id=?;") ;; DONE ;; BROKEN!!! NEEDS run-id + + ;; NOT USED + ;; + ;; Might be the following top-test-set-per-pf-counts query could be better based off of something like this: + ;; + ;; select state,status,count(state) from tests where run_id=59 AND testname='runfirst' group by state,status; + ;; + '(top-test-set-per-pf-counts "UPDATE tests + SET state=CASE + WHEN (SELECT count(id) FROM tests + WHERE testname=? + AND item_path != '' + AND status NOT IN ('n/a') + AND state in ('NOT_STARTED')) > 0 THEN 'UNKNOWN' + WHEN (SELECT count(id) FROM tests + WHERE testname=? + AND item_path != '' + AND (status NOT IN ('TEN_STRIKES','BLOCKED') OR status IN ('INCOMPLETE')) + AND state in ('RUNNING','NOT_STARTED','LAUNCHED','REMOTEHOSTSTART')) > 0 THEN 'RUNNING' + WHEN (SELECT count(id) FROM tests + WHERE testname=? + AND item_path != '' + AND state NOT IN ('COMPLETED','DELETED')) = 0 THEN 'COMPLETED' + WHEN (SELECT count(id) FROM tests + WHERE testname=? + AND item_path != '' + AND state = 'NOT_STARTED') > 0 THEN 'NOT_STARTED' + ELSE 'UNKNOWN' END, + status=CASE + WHEN fail_count > 0 THEN 'FAIL' + WHEN (SELECT count(id) FROM tests + WHERE testname=? + AND item_path != '' + AND state IN ('BLOCKED','INCOMPLETE')) > 0 THEN 'FAIL' + WHEN (SELECT count(id) FROM tests + WHERE testname=? + AND item_path != '' + AND state NOT IN ('DELETED') + AND status IN ('INCOMPLETE','ABORT')) > 0 THEN 'ABORT' + WHEN (SELECT count(id) FROM tests + WHERE testname=? + AND item_path != '' + AND state NOT IN ('DELETED') + AND status = 'AUTO') > 0 THEN 'AUTO' + WHEN (SELECT count(id) FROM tests + WHERE testname=? + AND item_path != '' + AND state NOT IN ('DELETED') + AND status IN ('STUCK/INCOMPLETE', 'INCOMPLETE')) > 0 THEN 'INCOMPLETE' + WHEN (SELECT count(id) FROM tests + WHERE testname=? + AND item_path != '' + AND state IN ('COMPLETED','STUCK/INCOMPLETE','INCOMPLETE') + AND status = 'FAIL') > 0 THEN 'FAIL' + WHEN (SELECT count(id) FROM tests + WHERE testname=? + AND item_path != '' + AND state NOT IN ('DELETED') + AND status = 'CHECK') > 0 THEN 'CHECK' + WHEN (SELECT count(id) FROM tests + WHERE testname=? + AND item_path != '' + AND state NOT IN ('DELETED') + AND status = 'SKIP') > 0 THEN 'SKIP' + WHEN (SELECT count(id) FROM tests + WHERE testname=? + AND item_path != '' + AND state NOT IN ('DELETED') + AND status = 'WARN') > 0 THEN 'WARN' + WHEN (SELECT count(id) FROM tests + WHERE testname=? + AND item_path != '' + AND state NOT IN ('DELETED') + AND status = 'WAIVED') > 0 THEN 'WAIVED' + WHEN (SELECT count(id) FROM tests + WHERE testname=? + AND item_path != '' + AND state NOT IN ('DELETED') + AND status NOT IN ('PASS','FAIL','WARN','WAIVED')) > 0 THEN 'ABORT' + WHEN (SELECT count(id) FROM tests + WHERE testname=? + AND item_path != '' + AND state='NOT_STARTED') > 0 THEN 'n/a' + WHEN (SELECT count(id) FROM tests + WHERE testname=? + AND item_path != '' + AND state = 'COMPLETED' + AND status = 'PASS') > 0 THEN 'PASS' + WHEN pass_count > 0 AND fail_count=0 THEN 'PASS' + ELSE 'UNKNOWN' END + WHERE testname=? AND item_path='';") ;; DONE ;; BROKEN!!! NEEDS run-id + + ;; STEPS + '(delete-test-step-records "UPDATE test_steps SET status='DELETED' WHERE test_id=?;") + '(delete-test-data-records "UPDATE test_data SET status='DELETED' WHERE test_id=?;") ;; using status since no state field + )) + +(define (db:lookup-query qry-name) + (let ((q (alist-ref qry-name db:queries))) + (if q (car q) #f))) + +;; do not run these as part of the transaction +(define db:special-queries '(rollup-tests-pass-fail + ;; db:set-state-status-and-roll-up-items ;; WHY NOT!? + login + immediate + flush + sync + set-verbosity + killserver + )) + +(define (db:login dbstruct calling-path calling-version client-signature) + (cond + ((not (equal? calling-path *toppath*)) + (list #f "Login failed due to mismatch paths: " calling-path ", " *toppath*)) + ;; ((not (equal? *run-id* run-id)) + ;; (list #f "Login failed due to mismatch run-id: " run-id ", " *run-id*)) + ((not (equal? megatest-version calling-version)) + (list #t (conc "Login warning due to mismatch megatest version: " calling-version ", " megatest-version))) + + (else + (hash-table-set! *logged-in-clients* client-signature (current-seconds)) + '(#t "successful login")))) + +(define (db:general-call dbstruct stmtname params) + (let ((query (let ((q (alist-ref (if (string? stmtname) + (string->symbol stmtname) + stmtname) + db:queries))) + (if q (car q) #f)))) + (db:with-db + dbstruct #f #f + (lambda (db) + (apply sqlite3:execute db query params) + #t)))) + +;; get a summary of state and status counts to calculate a rollup +;; +(define (db:get-state-status-summary dbstruct run-id testname) + (let ((res '())) + (db:with-db + dbstruct #f #f + (lambda (db) + (sqlite3:for-each-row + (lambda (state status count) + (set! res (cons (vector state status count) res))) + db + "SELECT state,status,count(state) FROM tests WHERE run_id=? AND testname=? AND item_path='' GROUP BY state,status;" + run-id testname) + res)))) + +(define (db:get-latest-host-load dbstruct raw-hostname) + (let* ((hostname (string-substitute "\\..*$" "" raw-hostname)) + (res (cons -1 0))) + (db:with-db + dbstruct #f #f + (lambda (db) + (sqlite3:for-each-row + (lambda (cpuload update-time) (set! res (cons cpuload update-time))) + db + "SELECT tr.cpuload, tr.update_time FROM test_rundat tr, tests t WHERE t.host=? AND tr.cpuload != -1 AND tr.test_id=t.id ORDER BY tr.update_time DESC LIMIT 1;" + hostname))) res )) + +(define (db:set-top-level-from-items dbstruct run-id testname) + (let* ((summ (db:get-state-status-summary dbstruct run-id testname)) + (find (lambda (state status) + (if (null? summ) + #f + (let loop ((hed (car summ)) + (tal (cdr summ))) + (if (and (string-match state (vector-ref hed 0)) + (string-match status (vector-ref hed 1))) + hed + (if (null? tal) + #f + (loop (car tal)(cdr tal))))))))) + + + ;;; E D I T M E ! ! + + + (cond + ((> (find "COMPLETED" ".*") 0) #f)))) + + + +;; get the previous records for when these tests were run where all keys match but runname +;; NB// Merge this with test:get-previous-test-run-records? This one looks for all matching tests +;; can use wildcards. Also can likely be factored in with get test paths? +;; +;; Run this remotely!! +;; +(define (db:get-matching-previous-test-run-records dbstruct run-id test-name item-path) + (let* ((keys (db:get-keys dbstruct)) + (selstr (string-intersperse keys ",")) + (qrystr (string-intersperse (map (lambda (x)(conc x "=?")) keys) " AND ")) + (keyvals #f) + (tests-hash (make-hash-table))) + ;; first look up the key values from the run selected by run-id + (db:with-db + dbstruct #f #f + (lambda (db) + (sqlite3:for-each-row + (lambda (a . b) + (set! keyvals (cons a b))) + db + (conc "SELECT " selstr " FROM runs WHERE id=? ORDER BY event_time DESC;") run-id))) + (if (not keyvals) + '() + (let ((prev-run-ids '())) + (db:with-db + dbstruct #f #f + (lambda (db) + (apply sqlite3:for-each-row + (lambda (id) + (set! prev-run-ids (cons id prev-run-ids))) + db + (conc "SELECT id FROM runs WHERE " qrystr " AND id != ?;") (append keyvals (list run-id))))) + ;; collect all matching tests for the runs then + ;; extract the most recent test and return that. + (debug:print 4 *default-log-port* "selstr: " selstr ", qrystr: " qrystr ", keyvals: " keyvals + ", previous run ids found: " prev-run-ids) + (if (null? prev-run-ids) '() ;; no previous runs? return null + (let loop ((hed (car prev-run-ids)) + (tal (cdr prev-run-ids))) + (let ((results (db:get-tests-for-run dbstruct hed (conc test-name "/" item-path) '() '() #f #f #f #f #f #f #f 'normal))) + (debug:print 4 *default-log-port* "Got tests for run-id " run-id ", test-name " test-name + ", item-path " item-path " results: " (intersperse results "\n")) + ;; Keep only the youngest of any test/item combination + (for-each + (lambda (testdat) + (let* ((full-testname (conc (db:test-get-testname testdat) "/" (db:test-get-item-path testdat))) + (stored-test (hash-table-ref/default tests-hash full-testname #f))) + (if (or (not stored-test) + (and stored-test + (> (db:test-get-event_time testdat)(db:test-get-event_time stored-test)))) + ;; this test is younger, store it in the hash + (hash-table-set! tests-hash full-testname testdat)))) + results) + (if (null? tal) + (map cdr (hash-table->alist tests-hash)) ;; return a list of the most recent tests + (loop (car tal)(cdr tal)))))))))) + +;; Function recursively checks if .journal exists; if yes means db busy; call itself after delayed interval +;; return the sqlite3 db handle if possible +;; +(define (db:delay-if-busy dbdat #!key (count 6)) + (if (not (configf:lookup *configdat* "server" "delay-on-busy")) + (and dbdat (db:dbdat-get-db dbdat)) + (if dbdat + (let* ((dbpath (db:dbdat-get-path dbdat)) + (db (db:dbdat-get-db dbdat)) ;; we'll return this so (db:delay--if-busy can be called inline + (dbfj (conc dbpath "-journal"))) + (if (handle-exceptions + exn + (begin + (debug:print-info 0 *default-log-port* "WARNING: failed to test for existance of " dbfj ", exn=" exn) + (thread-sleep! 1) + (db:delay-if-busy count (- count 1))) + (common:file-exists? dbfj)) + (case count + ((6) + (thread-sleep! 0.2) + (db:delay-if-busy count: 5)) + ((5) + (thread-sleep! 0.4) + (db:delay-if-busy count: 4)) + ((4) + (thread-sleep! 0.8) + (db:delay-if-busy count: 3)) + ((3) + (thread-sleep! 1.6) + (db:delay-if-busy count: 2)) + ((2) + (thread-sleep! 3.2) + (db:delay-if-busy count: 1)) + ((1) + (thread-sleep! 6.4) + (db:delay-if-busy count: 0)) + (else + (debug:print-info 0 *default-log-port* "delaying db access due to high database load.") + (thread-sleep! 12.8)))) + db) + "bogus result from db:delay-if-busy"))) + +(define (db:test-get-records-for-index-file dbstruct run-id test-name) + (let ((res '())) + (db:with-db + dbstruct + run-id + #f + (lambda (db) + (sqlite3:for-each-row + (lambda (id itempath state status run_duration logf comment) + (set! res (cons (vector id itempath state status run_duration logf comment) res))) + db + "SELECT id,item_path,state,status,run_duration,final_logf,comment FROM tests WHERE testname=? AND item_path != '' AND run_id=?;" ;; BUG! WHY NO run_id? + test-name + run-id) + res)))) + +;;====================================================================== +;; Tests meta data +;;====================================================================== + +;; returns a hash table of tags to tests +;; +(define (db:get-tests-tags dbstruct) + (db:with-db + dbstruct #f #f + (lambda (db) + (let* ((res (make-hash-table))) + (sqlite3:for-each-row + (lambda (testname tags-in) + (let ((tags (string-split tags-in ","))) + (for-each + (lambda (tag) + (hash-table-set! res tag + (delete-duplicates + (cons testname (hash-table-ref/default res tag '()))))) + tags))) + db + "SELECT testname,tags FROM test_meta") + (hash-table->alist res))))) + +;; read the record given a testname +(define (db:testmeta-get-record dbstruct testname) + (let ((res #f)) + (db:with-db + dbstruct + #f + #f + (lambda (db) + (sqlite3:for-each-row + (lambda (id testname author owner description reviewed iterated avg_runtime avg_disk tags jobgroup) + (set! res (vector id testname author owner description reviewed iterated avg_runtime avg_disk tags jobgroup))) + db + "SELECT id,testname,author,owner,description,reviewed,iterated,avg_runtime,avg_disk,tags,jobgroup FROM test_meta WHERE testname=?;" + testname) + res)))) + +;; create a new record for a given testname +(define (db:testmeta-add-record dbstruct testname) + (db:with-db dbstruct #f #f + (lambda (db) + (sqlite3:execute + db + "INSERT OR IGNORE INTO test_meta (testname,author,owner,description,reviewed,iterated,avg_runtime,avg_disk,tags) VALUES (?,'','','','','','','','');" testname)))) + +;; update one of the testmeta fields +(define (db:testmeta-update-field dbstruct testname field value) + (db:with-db dbstruct #f #f + (lambda (db) + (sqlite3:execute + db + (conc "UPDATE test_meta SET " field "=? WHERE testname=?;") value testname)))) + +(define (db:testmeta-get-all dbstruct) + (db:with-db dbstruct #f #f + (lambda (db) + (let ((res '())) + (sqlite3:for-each-row + (lambda (a . b) + (set! res (cons (apply vector a b) res))) + db + "SELECT id,testname,author,owner,description,reviewed,iterated,avg_runtime,avg_disk,tags,jobgroup FROM test_meta;") + res)))) + +;;====================================================================== +;; M I S C M A N A G E M E N T I T E M S +;;====================================================================== + +;; A routine to map itempaths using a itemmap +;; patha and pathb must be strings or this will fail +;; +;; path-b is waiting on path-a +;; +(define (db:compare-itempaths test-b-name path-a path-b itemmaps ) + (debug:print-info 6 *default-log-port* "ITEMMAPS: " itemmaps) + (let* ((itemmap (tests:lookup-itemmap itemmaps test-b-name))) + (if itemmap + (let ((path-b-mapped (db:multi-pattern-apply path-b itemmap))) + (debug:print-info 6 *default-log-port* "ITEMMAP is " itemmap ", path: " path-b ", mapped path: " path-b-mapped) + (equal? path-a path-b-mapped)) + (equal? path-b path-a)))) + +;; A routine to convert test/itempath using a itemmap +;; NOTE: to process only an itempath (i.e. no prepended testname) +;; just call db:multi-pattern-apply +;; +(define (db:convert-test-itempath path-in itemmap) + (debug:print-info 6 *default-log-port* "ITEMMAP is " itemmap) + (let* ((path-parts (string-split path-in "/")) + (test-name (if (null? path-parts) "" (car path-parts))) + (item-path (string-intersperse (if (null? path-parts) '() (cdr path-parts)) "/"))) + (conc test-name "/" + (db:multi-pattern-apply item-path itemmap)))) + +;; patterns are: +;; "rx1" "replacement1"\n +;; "rx2" "replacement2" +;; etc. +;; +(define (db:multi-pattern-apply item-path itemmap) + (let ((all-patts (string-split itemmap "\n"))) + (if (null? all-patts) + item-path + (let loop ((hed (car all-patts)) + (tal (cdr all-patts)) + (res item-path)) + (let* ((parts (string-split hed)) + (patt (car parts)) + + (repl (if (> (length parts) 1)(cadr parts) "")) + + (newr (if (and patt repl) + (begin + (handle-exceptions + exn + (begin + (debug:print 0 *default-log-port* + "WARNING: itemmap has problem \"" itemmap "\", patt: " patt ", repl: " repl ", exn=" exn) + res) + (string-substitute patt repl res)) + + + ) + (begin + (debug:print 0 *default-log-port* + "WARNING: itemmap has problem \"" itemmap "\", patt: " patt ", repl: " repl) + res)))) + (if (null? tal) + newr + (loop (car tal)(cdr tal) newr))))))) + + + + +;; the new prereqs calculation, looks also at itempath if specified +;; all prereqs must be met +;; if prereq test with itempath='' is COMPLETED and PASS, WARN, CHECK, or WAIVED then prereq is met +;; if prereq test with itempath=ref-item-path and COMPLETED with PASS, WARN, CHECK, or WAIVED then prereq is met +;; +;; Note: mode 'normal means that tests must be COMPLETED and ok (i.e. PASS, WARN, CHECK, SKIP or WAIVED) +;; mode 'toplevel means that tests must be COMPLETED only +;; mode 'itemmatch or 'itemwait means that tests items must be COMPLETED and (PASS|WARN|WAIVED|CHECK) [[ NB// NOT IMPLEMENTED YET ]] +;; mode 'exclusive means this test/item cannot run if the same test/item is LAUNCHED,REMOTEHOSTSTART or RUNNING +;; +;; IDEA for consideration: +;; 1. collect all tests "upstream" +;; 2. any NOT completed and good? if yes => return those as prereqs not met, if no => return null list +;; +;; (define (db:get-prereqs-not-met dbstruct run-id waitons ref-item-path mode) +(define (db:get-prereqs-not-met dbstruct run-id waitons ref-test-name ref-item-path mode itemmaps) ;; #!key (mode '(normal))(itemmap #f)) + ;; BBnote - rollup of an itemized test's overall state/status done in db:set-state-status-and-roll-up-items + (append + (if (member 'exclusive mode) + (let ((running-tests (db:get-tests-for-run dbstruct + #f ;; run-id of #f means for all runs. + (if (string=? ref-item-path "") ;; testpatt + ref-test-name + (conc ref-test-name "/" ref-item-path)) + '("LAUNCHED" "REMOTEHOSTSTART" "RUNNING") ;; states + '() ;; statuses + #f ;; offset + #f ;; limit + #f ;; not-in + #f ;; sort by + #f ;; sort order + 'shortlist ;; query type + 0 ;; last update, beginning of time .... + #f ;; mode + ))) + ;;(map (lambda (testdat) + ;; (if (equal? (db:test-get-item-path testdat) "") + ;; (db:test-get-testname testdat) + ;; (conc (db:test-get-testname testdat) + ;; "/" + ;; (db:test-get-item-path testdat)))) + running-tests) ;; calling functions want the entire data + '()) + + ;; collection of: for each waiton - + ;; if this ref-test-name is an item in an itemized test and mode is itemwait/itemmatch: + ;; if waiton is not itemized - if waiton is not both completed and in ok status, add as unmet prerequisite + ;; if waiton is itemized: + ;; and waiton's items are not expanded, add as unmet prerequisite + ;; else if matching waiton item is not both completed and in an ok status, add as unmet prerequisite + ;; else + ;; if waiton toplevel is not in both completed and ok status, add as unmet prerequisite + + (if (or (not waitons) + (null? waitons)) + '() + (let* ((ref-test-itemized-mode (not (null? (lset-intersection eq? mode '(itemmatch itemwait))))) ;; how is this different from using member? + (ref-test-toplevel-mode (not (null? (lset-intersection eq? mode '(toplevel))))) + (ref-test-is-toplevel (equal? ref-item-path "")) + (ref-test-is-item (not ref-test-is-toplevel)) + (unmet-pre-reqs '()) + (result '()) + (unmet-prereq-items '()) + ) + (for-each ; waitons + (lambda (waitontest-name) + ;; by getting the tests with matching name we are looking only at the matching test + ;; and related sub items + ;; next should be using mt:get-tests-for-run? + + (let (;(waiton-is-itemized ...) + ;(waiton-items-are-expanded ...) + (waiton-tests (db:get-tests-for-run-state-status dbstruct run-id waitontest-name)) + (ever-seen #f) + (parent-waiton-met #f) + (item-waiton-met #f) + + ) + (for-each ; test expanded from waiton + (lambda (waiton-test) + (let* ((waiton-state (db:test-get-state waiton-test)) + (waiton-status (db:test-get-status waiton-test)) + (waiton-item-path (db:test-get-item-path waiton-test)) ;; BB- this is the upstream itempath + (waiton-test-name (db:test-get-testname waiton-test)) + (waiton-is-toplevel (equal? waiton-item-path "")) + (waiton-is-item (not waiton-is-toplevel)) + (waiton-is-completed (member waiton-state *common:ended-states*)) + (waiton-is-running (member waiton-state *common:running-states*)) + (waiton-is-killed (member waiton-state *common:badly-ended-states*)) + (waiton-is-ok (member waiton-status *common:well-ended-states*)) + ;; testname-b path-a path-b + (same-itempath (db:compare-itempaths ref-test-name waiton-item-path ref-item-path itemmaps)) ;; (equal? ref-item-path waiton-item-path))) + (real-ref-test-name (car (string-split ref-test-name "/"))) ;; I THINK ref-test-name SHOULD NEVER HAVE THE ITEM_PATH! + (test-and-ref-are-same (equal? real-ref-test-name waiton-test-name))) + (debug:print 4 *default-log-port* "waiton-test-name " waiton-test-name " ref-test-name: " ref-test-name " test-and-ref-are-same: " test-and-ref-are-same) + (set! ever-seen #t) + ;;(BB> "***consider waiton "waiton-test"/"waiton-item-path"***") + (cond + ;; case 0 - toplevel of an itemized test, at least one item in prereq has completed + ((and waiton-is-item ref-test-is-toplevel ref-test-itemized-mode waiton-is-completed) + (set! parent-waiton-met #t)) + + ;; case 1, non-item (parent test) is + ((and waiton-is-toplevel ;; this is the parent test of the waiton being examined + waiton-is-completed + ;;(BB> "cond1") + (or waiton-is-ok ref-test-toplevel-mode)) ;; itemmatch itemwait)))))) + (set! parent-waiton-met #t)) + ;; Special case for toplevel and KILLED + ((and waiton-is-toplevel ;; this is the parent test + waiton-is-killed + (member 'toplevel mode)) + ;;(BB> "cond2") + (set! parent-waiton-met #t)) + ;; For itemwait mode IFF the previous matching item is good the set parent-waiton-met + ((and ref-test-itemized-mode ref-test-is-item same-itempath) + ;;(BB> "cond3") + (if (and waiton-is-completed (or waiton-is-ok ref-test-toplevel-mode)) + (set! item-waiton-met #t) + (set! unmet-prereq-items (cons waiton-test unmet-prereq-items))) + (if (and waiton-is-toplevel ;; if upstream rollup test is completed, parent-waiton-met is set + (or waiton-is-completed waiton-is-running)) + (set! parent-waiton-met #t))) + ;; normal checking of parent items, any parent or parent item not ok blocks running + ((and waiton-is-completed + (or waiton-is-ok + (member 'toplevel mode)) ;; toplevel does not block on FAIL + (and waiton-is-ok (member 'itemmatch mode) ;; itemmatch blocks on not ok ;; TODO: THIS IS PROBABLY A BUG. ITEMMATCH AND ITEMWAIT ARE SYNONYMS!! WHAT HAPPENED OT ITEMWAIT??? + )) + ;;(BB> "cond4") + (set! item-waiton-met #t)) + ((and waiton-is-completed waiton-is-ok same-itempath) + ;;(BB> "cond5") + (set! item-waiton-met #t)) + ((and waiton-is-completed waiton-is-ok test-and-ref-are-same) ;; probably from [waitons] table + (set! item-waiton-met #t)) + (else + #t + ;;(BB> "condelse") + )))) + waiton-tests) + ;; both requirements, parent and item-waiton must be met to NOT add item to + ;; prereq's not met list + ;; (BB> + ;; "\n* waiton-tests "waiton-tests + ;; "\n* parent-waiton-met "parent-waiton-met + ;; "\n* item-waiton-met "item-waiton-met + ;; "\n* ever-seen "ever-seen + ;; "\n* ref-test-itemized-mode "ref-test-itemized-mode + ;; "\n* unmet-prereq-items "unmet-prereq-items + ;; "\n* result (pre) "result + ;; "\n* ever-seen "ever-seen + ;; "\n") + + (cond + ((and ref-test-itemized-mode ref-test-is-item (not (null? unmet-prereq-items))) + (set! result (append unmet-prereq-items result))) + ((not (or parent-waiton-met item-waiton-met)) + (set! result (append (if (null? waiton-tests) (list waitontest-name) waiton-tests) result))) ;; appends the string if the full record is not available + ;; if the test is not found then clearly the waiton is not met... + ;; (if (not ever-seen)(set! result (cons waitontest-name result))))) + ((not ever-seen) + (set! result (append (if (null? waiton-tests)(list waitontest-name) waiton-tests) result)))))) + waitons) + (delete-duplicates result))))) + +;;====================================================================== +;; To sync individual run +;;====================================================================== +(define (db:get-run-record-ids dbstruct target run keynames test-patt) +(let ((backcons (lambda (lst item)(cons item lst)))) + (db:with-db + dbstruct #f #f + (lambda (db) + (let* ((keystr (string-intersperse + (map (lambda (key val) + (conc key " like '" val "'")) + keynames + (string-split target "/")) + " AND ")) + (run-qry (conc "SELECT id FROM runs WHERE " keystr " and runname='" run"'")) + (test-qry (conc "SELECT id FROM tests WHERE run_id in (" run-qry ") and testname like '" test-patt "'"))) + (print run-qry) + (print test-qry) + `((runs . ,(sqlite3:fold-row backcons '() db run-qry)) + (tests . ,(sqlite3:fold-row backcons '() db test-qry)) + (test_steps . ,(sqlite3:fold-row backcons '() db (conc "SELECT id FROM test_steps WHERE test_id in (" test-qry ")"))) + (test_data . ,(sqlite3:fold-row backcons '() db (conc "SELECT id FROM test_data WHERE test_id in (" test-qry ")" ))) + )))))) + +;;====================================================================== +;; Just for sync, procedures to make sync easy +;;====================================================================== + +;; get an alist of record ids changed since time since-time +;; '((runs . (1 2 3 ...))(steps . (5 6 7 ...) ...)) +;; +(define (db:get-changed-record-ids dbstruct since-time) + ;; no transaction, allow the db to be accessed between the big queries + (let ((backcons (lambda (lst item)(cons item lst)))) + (db:with-db + dbstruct #f #f + (lambda (db) + `((runs . ,(sqlite3:fold-row backcons '() db "SELECT id FROM runs WHERE last_update>=?" since-time)) + (tests . ,(sqlite3:fold-row backcons '() db "SELECT id FROM tests WHERE last_update>=?" since-time)) + (test_steps . ,(sqlite3:fold-row backcons '() db "SELECT id FROM test_steps WHERE last_update>=?" since-time)) + (test_data . ,(sqlite3:fold-row backcons '() db "SELECT id FROM test_data WHERE last_update>=?" since-time)) + ;; (test_meta . ,(fold-row backcons '() db "SELECT id FROM test_meta WHERE last_update>?" since-time)) + (run_stats . ,(sqlite3:fold-row backcons '() db "SELECT id FROM run_stats WHERE last_update>=?" since-time)) + ))))) + ) Index: megatest.scm ================================================================== --- megatest.scm +++ megatest.scm @@ -47,10 +47,11 @@ (declare (uses dbmod)) (declare (uses rmtmod)) (declare (uses servermod)) (declare (uses mtver)) (declare (uses adjutant)) +(declare (uses processmod)) ;; (include "call-with-environment-variables/call-with-environment-variables.scm") (module megatest-main * @@ -138,10 +139,11 @@ commonmod dbmod rmtmod servermod debugprint + processmod ) ;; fake out readline usage of toplevel-command (define (toplevel-command . a) #f) Index: process.scm ================================================================== --- process.scm +++ process.scm @@ -16,212 +16,5 @@ ;; You should have received a copy of the GNU General Public License ;; along with Megatest. If not, see . ;;====================================================================== -;;====================================================================== -;; Process convience utils -;;====================================================================== - -;; (use regex directory-utils) -;; (declare (unit process)) - -(define (process:conservative-read port) - (let loop ((res "")) - (if (not (eof-object? (peek-char port))) - (loop (conc res (read-char port))) - res))) - -(define (process:cmd-run-with-stderr->list cmd . params) - ;; (print "Called with cmd=" cmd ", proc=" proc ", params=" params) -;; (handle-exceptions -;; exn -;; (begin -;; (print "ERROR: Failed to run command: " cmd " " (string-intersperse params " ")) -;; (print " " ((condition-property-accessor 'exn 'message) exn)) -;; #f) - (let-values (((fh fho pid fhe) (if (null? params) - (process* cmd) - (process* cmd params)))) - (let loop ((curr (read-line fh)) - (result '())) - (let ((errstr (process:conservative-read fhe))) - (if (not (string=? errstr "")) - (set! result (append result (list errstr))))) - (if (not (eof-object? curr)) - (loop (read-line fh) - (append result (list curr))) - (begin - (close-input-port fh) - (close-input-port fhe) - (close-output-port fho) - result))))) ;; ) - -(define (process:cmd-run-with-stderr-and-exitcode->list cmd . params) - ;; (print "Called with cmd=" cmd ", proc=" proc ", params=" params) -;; (handle-exceptions -;; exn -;; (begin -;; (print "ERROR: Failed to run command: " cmd " " (string-intersperse params " ")) -;; (print " " ((condition-property-accessor 'exn 'message) exn)) -;; #f) - (let-values (((fh fho pid fhe) (if (null? params) - (process* cmd) - (process* cmd params)))) - (let loop ((curr (read-line fh)) - (result '())) - (let ((errstr (process:conservative-read fhe))) - (if (not (string=? errstr "")) - (set! result (append result (list errstr))))) - (if (not (eof-object? curr)) - (loop (read-line fh) - (append result (list curr))) - (begin - (let-values (((anotherpid normalexit? exitstatus) (process-wait pid))) - (close-input-port fh) - (close-input-port fhe) - (close-output-port fho) - (list result (if normalexit? exitstatus -1)))))))) - -(define (process:cmd-run-proc-each-line cmd proc . params) - ;; (print "Called with cmd=" cmd ", proc=" proc ", params=" params) - (handle-exceptions - exn - (begin - (print "ERROR: Failed to run command: " cmd " " (string-intersperse params " ")) - (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) - (debug:print 5 *default-log-port* "exn=" (condition->list exn)) - #f) - (let-values (((fh fho pid) (if (null? params) - (process cmd) - (process cmd params)))) - (let loop ((curr (read-line fh)) - (result '())) - (if (not (eof-object? curr)) - (loop (read-line fh) - (append result (list (proc curr)))) - (begin - (close-input-port fh) - ;;(close-input-port fhe) - (close-output-port fho) - result)))))) - -(define (process:cmd-run-proc-each-line-alt cmd proc) - (let* ((fh (open-input-pipe cmd)) - (res (port-proc->list fh proc)) - (status (close-input-pipe fh))) - (if (eq? status 0) res #f))) - -(define (process:cmd-run->list cmd #!key (delta-env-alist-or-hash-table '())) - (common:with-env-vars - delta-env-alist-or-hash-table - (lambda () - (let* ((fh (open-input-pipe cmd)) - (res (port->list fh)) - (status (close-input-pipe fh))) - (list res status))))) - -(define (port->list fh) - (if (eof-object? fh) #f - (let loop ((curr (read-line fh)) - (result '())) - (if (not (eof-object? curr)) - (loop (read-line fh) - (append result (list curr))) - result)))) - -(define (port-proc->list fh proc) - (if (eof-object? fh) #f - (let loop ((curr (proc (read-line fh))) - (result '())) - (if (not (eof-object? curr)) - (loop (let ((l (read-line fh))) - (if (eof-object? l) l (proc l))) - (append result (list curr))) - result)))) - -;; here is an example line where the shell is sh or bash -;; "find / -print 2&>1 > findall.log" -(define (run-n-wait cmdline #!key (params #f)(print-cmd #f)(run-dir #f)) - (if print-cmd - (debug:print 0 *default-log-port* - (if (string? print-cmd) - print-cmd - "") - (if run-dir (conc "Run in " run-dir ";") "") - cmdline - (if params - (conc " " (string-intersperse params " ")) - ""))) - (if (and run-dir - (directory-exists? run-dir)) - (push-directory run-dir)) - (let ((pid (if params - (process-run cmdline params) - (process-run cmdline)))) - (let loop ((i 0)) - (let-values (((pid-val exit-status exit-code) (process-wait pid #t))) - (if (eq? pid-val 0) - (begin - (thread-sleep! 2) - (loop (+ i 1))) - (begin - (if (and run-dir - (directory-exists? run-dir)) - (pop-directory)) - (values pid-val exit-status exit-code))))))) - -;;====================================================================== -;; MISC PROCESS RELATED STUFF -;;====================================================================== - -(define (process:children proc) - (with-input-from-pipe - (conc "ps h --ppid " (current-process-id) " -o pid") - (lambda () - (let loop ((inl (read-line)) - (res '())) - (if (eof-object? inl) - (reverse res) - (let ((pid (string->number inl))) - (if proc (proc pid)) - (loop (read-line) (cons pid res)))))))) - -(define (process:alive? pid) - (handle-exceptions - exn - ;; possibly pid is a process not a child, look in /proc to see if it is running still - (common:file-exists? (conc "/proc/" pid)) - (let-values (((rpid exit-type exit-signal)(process-wait pid #t))) - (and (number? rpid) - (equal? rpid pid))))) - -(define (process:alive-on-host? host pid) - (let ((cmd (conc "ssh " host " ps -o pid= -p " pid))) - (handle-exceptions - exn - (begin - (debug:print 0 *default-log-port* "failed to identify if process " pid ", on host " host " is alive. exn=" exn) - #f) ;; anything goes wrong - assume the process in NOT running. - (with-input-from-pipe - cmd - (lambda () - (let loop ((inl (read-line))) - (if (eof-object? inl) - #f - (let* ((clean-str (string-substitute "^[^\\d]*([0-9]+)[^\\d]*$" "\\1" inl)) - (innum (string->number clean-str))) - (and innum - (eq? pid innum)))))))))) - -(define (process:get-sub-pids pid) - (with-input-from-pipe - (conc "pstree -A -p " pid) ;; | tr 'a-z\\-+`()\\.' ' ' " pid) - (lambda () - (let loop ((inl (read-line)) - (res '())) - (if (eof-object? inl) - (reverse res) - (let ((nums (map string->number - (string-split-fields "\\d+" inl)))) - (loop (read-line) - (append res nums)))))))) ADDED processmod.scm Index: processmod.scm ================================================================== --- /dev/null +++ processmod.scm @@ -0,0 +1,321 @@ +;;====================================================================== +;; Copyright 2017, Matthew Welland. +;; +;; This file is part of Megatest. +;; +;; Megatest is free software: you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. +;; +;; Megatest is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with Megatest. If not, see . + +;;====================================================================== + +(declare (unit processmod)) +;; (declare (uses mtver)) +(declare (uses debugprint)) +;; (declare (uses stml2)) +;; (declare (uses pkts)) + +(module processmod + * + +(import scheme + chicken.base + chicken.condition + chicken.file +;; chicken.time +;; chicken.file.posix + chicken.pathname +;; chicken.port + chicken.process + chicken.process-context + chicken.process-context.posix + chicken.process.signal +;; chicken.pretty-print +;; chicken.random + chicken.io + chicken.string +;; chicken.sort +;; chicken.time.posix +;; +;; (prefix base64 base64:) +;; (prefix sqlite3 sqlite3:) + directory-utils +;; matchable +;; md5 +;; message-digest + regex +;; regex-case +;; sparse-vectors + srfi-1 +;; srfi-13 + srfi-18 + srfi-69 +;; system-information +;; typed-records +;; z3 +;; +;; mtver + debugprint +;; stml2 +;; pkts + ) + +;;====================================================================== +;; Process convience utils +;;====================================================================== + +;;====================================================================== +;; accept an alist or hash table containing envvar/env value pairs (value of #f causes unset) +;; execute thunk in context of environment modified as per this list +;; restore env to prior state then return value of eval'd thunk. +;; ** this is not thread safe ** +(define (common:with-env-vars delta-env-alist-or-hash-table thunk) + (let* ((delta-env-alist (if (hash-table? delta-env-alist-or-hash-table) + (hash-table->alist delta-env-alist-or-hash-table) + delta-env-alist-or-hash-table)) + (restore-thunks + (filter + identity + (map (lambda (env-pair) + (let* ((env-var (car env-pair)) + (new-val (let ((tmp (cdr env-pair))) + (if (list? tmp) (car tmp) tmp))) + (current-val (get-environment-variable env-var)) + (restore-thunk + (cond + ((not current-val) (lambda () (unset-environment-variable! env-var))) + ((not (string? new-val)) #f) + ((eq? current-val new-val) #f) + (else + (lambda () (set-environment-variable! env-var current-val)))))) + ;;(when (not (string? new-val)) + ;; (debug:print 0 *default-log-port* " PROBLEM: not a string: "new-val"\n from env-alist:\n"delta-env-alist) + ;; (pp delta-env-alist) + ;; (exit 1)) + + + (cond + ((not new-val) ;; modify env here + (unset-environment-variable! env-var)) + ((string? new-val) + (set-environment-variable! env-var new-val))) + restore-thunk)) + delta-env-alist)))) + (let ((rv (thunk))) + (for-each (lambda (x) (x)) restore-thunks) ;; restore env to original state + rv))) + +;; (use regex directory-utils) +;; (declare (unit process)) + +(define (process:conservative-read port) + (let loop ((res "")) + (if (not (eof-object? (peek-char port))) + (loop (conc res (read-char port))) + res))) + +(define (process:cmd-run-with-stderr->list cmd . params) + ;; (print "Called with cmd=" cmd ", proc=" proc ", params=" params) +;; (handle-exceptions +;; exn +;; (begin +;; (print "ERROR: Failed to run command: " cmd " " (string-intersperse params " ")) +;; (print " " ((condition-property-accessor 'exn 'message) exn)) +;; #f) + (let-values (((fh fho pid fhe) (if (null? params) + (process* cmd) + (process* cmd params)))) + (let loop ((curr (read-line fh)) + (result '())) + (let ((errstr (process:conservative-read fhe))) + (if (not (string=? errstr "")) + (set! result (append result (list errstr))))) + (if (not (eof-object? curr)) + (loop (read-line fh) + (append result (list curr))) + (begin + (close-input-port fh) + (close-input-port fhe) + (close-output-port fho) + result))))) ;; ) + +(define (process:cmd-run-with-stderr-and-exitcode->list cmd . params) + ;; (print "Called with cmd=" cmd ", proc=" proc ", params=" params) +;; (handle-exceptions +;; exn +;; (begin +;; (print "ERROR: Failed to run command: " cmd " " (string-intersperse params " ")) +;; (print " " ((condition-property-accessor 'exn 'message) exn)) +;; #f) + (let-values (((fh fho pid fhe) (if (null? params) + (process* cmd) + (process* cmd params)))) + (let loop ((curr (read-line fh)) + (result '())) + (let ((errstr (process:conservative-read fhe))) + (if (not (string=? errstr "")) + (set! result (append result (list errstr))))) + (if (not (eof-object? curr)) + (loop (read-line fh) + (append result (list curr))) + (begin + (let-values (((anotherpid normalexit? exitstatus) (process-wait pid))) + (close-input-port fh) + (close-input-port fhe) + (close-output-port fho) + (list result (if normalexit? exitstatus -1)))))))) + +(define (process:cmd-run-proc-each-line cmd proc . params) + ;; (print "Called with cmd=" cmd ", proc=" proc ", params=" params) + (handle-exceptions + exn + (begin + (print "ERROR: Failed to run command: " cmd " " (string-intersperse params " ")) + (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) + (debug:print 5 *default-log-port* "exn=" (condition->list exn)) + #f) + (let-values (((fh fho pid) (if (null? params) + (process cmd) + (process cmd params)))) + (let loop ((curr (read-line fh)) + (result '())) + (if (not (eof-object? curr)) + (loop (read-line fh) + (append result (list (proc curr)))) + (begin + (close-input-port fh) + ;;(close-input-port fhe) + (close-output-port fho) + result)))))) + +(define (process:cmd-run-proc-each-line-alt cmd proc) + (let* ((fh (open-input-pipe cmd)) + (res (port-proc->list fh proc)) + (status (close-input-pipe fh))) + (if (eq? status 0) res #f))) + +(define (process:cmd-run->list cmd #!key (delta-env-alist-or-hash-table '())) + (common:with-env-vars + delta-env-alist-or-hash-table + (lambda () + (let* ((fh (open-input-pipe cmd)) + (res (port->list fh)) + (status (close-input-pipe fh))) + (list res status))))) + +(define (port->list fh) + (if (eof-object? fh) #f + (let loop ((curr (read-line fh)) + (result '())) + (if (not (eof-object? curr)) + (loop (read-line fh) + (append result (list curr))) + result)))) + +(define (port-proc->list fh proc) + (if (eof-object? fh) #f + (let loop ((curr (proc (read-line fh))) + (result '())) + (if (not (eof-object? curr)) + (loop (let ((l (read-line fh))) + (if (eof-object? l) l (proc l))) + (append result (list curr))) + result)))) + +;; here is an example line where the shell is sh or bash +;; "find / -print 2&>1 > findall.log" +(define (run-n-wait cmdline #!key (params #f)(print-cmd #f)(run-dir #f)) + (if print-cmd + (debug:print 0 *default-log-port* + (if (string? print-cmd) + print-cmd + "") + (if run-dir (conc "Run in " run-dir ";") "") + cmdline + (if params + (conc " " (string-intersperse params " ")) + ""))) + (if (and run-dir + (directory-exists? run-dir)) + (push-directory run-dir)) + (let ((pid (if params + (process-run cmdline params) + (process-run cmdline)))) + (let loop ((i 0)) + (let-values (((pid-val exit-status exit-code) (process-wait pid #t))) + (if (eq? pid-val 0) + (begin + (thread-sleep! 2) + (loop (+ i 1))) + (begin + (if (and run-dir + (directory-exists? run-dir)) + (pop-directory)) + (values pid-val exit-status exit-code))))))) + +;;====================================================================== +;; MISC PROCESS RELATED STUFF +;;====================================================================== + +(define (process:children proc) + (with-input-from-pipe + (conc "ps h --ppid " (current-process-id) " -o pid") + (lambda () + (let loop ((inl (read-line)) + (res '())) + (if (eof-object? inl) + (reverse res) + (let ((pid (string->number inl))) + (if proc (proc pid)) + (loop (read-line) (cons pid res)))))))) + +(define (process:alive? pid) + (handle-exceptions + exn + ;; possibly pid is a process not a child, look in /proc to see if it is running still + (file-exists? (conc "/proc/" pid)) + (let-values (((rpid exit-type exit-signal)(process-wait pid #t))) + (and (number? rpid) + (equal? rpid pid))))) + +(define (process:alive-on-host? host pid) + (let ((cmd (conc "ssh " host " ps -o pid= -p " pid))) + (handle-exceptions + exn + (begin + (debug:print 0 *default-log-port* "failed to identify if process " pid ", on host " host " is alive. exn=" exn) + #f) ;; anything goes wrong - assume the process in NOT running. + (with-input-from-pipe + cmd + (lambda () + (let loop ((inl (read-line))) + (if (eof-object? inl) + #f + (let* ((clean-str (string-substitute "^[^\\d]*([0-9]+)[^\\d]*$" "\\1" inl)) + (innum (string->number clean-str))) + (and innum + (eq? pid innum)))))))))) + +(define (process:get-sub-pids pid) + (with-input-from-pipe + (conc "pstree -A -p " pid) ;; | tr 'a-z\\-+`()\\.' ' ' " pid) + (lambda () + (let loop ((inl (read-line)) + (res '())) + (if (eof-object? inl) + (reverse res) + (let ((nums (map string->number + (string-split-fields "\\d+" inl)))) + (loop (read-line) + (append res nums)))))))) + +) Index: rmt.scm ================================================================== --- rmt.scm +++ rmt.scm @@ -101,11 +101,11 @@ ;; DOT MUTEXLOCK -> INIT_RUNREMOTE [label="no remote?"]; ;; DOT INIT_RUNREMOTE -> MUTEXLOCK; ;; ensure we have a record for our connection for given area (if (not runremote) ;; can remove this one. should never get here. (begin - (set! *runremote* (make-remote)) + (set! *runremote* (make-init-remote)) (let* ((server-info (remote-server-info *runremote*))) (if server-info (begin (remote-server-url-set! *runremote* (server:record->url server-info)) (remote-server-id-set! *runremote* (server:record->id server-info))))) @@ -182,11 +182,11 @@ ;; on homehost and this is a write, we already have a server, but server has died ((and (cdr (remote-hh-dat runremote)) ;; on homehost (not (member cmd api:read-only-queries)) ;; this is a write (remote-server-url runremote) ;; have a server (not (server:ping (remote-server-url runremote) (remote-server-id runremote)))) ;; server has died. NOTE: this is not a cheap call! Need better approach. - (set! *runremote* (make-remote)) + (set! *runremote* (make-init-remote)) (let* ((server-info (remote-server-info *runremote*))) (if server-info (begin (remote-server-url-set! *runremote* (server:record->url server-info)) (remote-server-id-set! *runremote* (server:record->id server-info)))))