Index: Makefile ================================================================== --- Makefile +++ Makefile @@ -32,16 +32,22 @@ server.scm configf.scm db.scm keys.scm \ process.scm runs.scm tasks.scm tests.scm genexample.scm \ tdb.scm mt.scm \ ezsteps.scm rmt.scm api.scm \ subrun.scm archive.scm env.scm \ - diff-report.scm cgisetup/models/pgdb.scm + diff-report.scm + +# cgisetup/models/pgdb.scm # module source files MSRCFILES = dbfile.scm debugprint.scm mtargs.scm commonmod.scm dbmod.scm \ tcp-transportmod.scm rmtmod.scm portlogger.scm apimod.scm \ - configfmod.scm processmod.scm + configfmod.scm processmod.scm servermod.scm megatestmod.scm \ + stml2.scm fsmod.scm cpumod.scm mtmod.scm odsmod.scm \ + pkts.scm testsmod.scm pgdb.scm cookie.scm launchmod.scm \ + subrunmod.scm runsmod.scm tasksmod.scm archivemod.scm \ + ezstepsmod.scm transport-mode.scm : transport-mode.scm.template cp transport-mode.scm.template transport-mode.scm dashboard-transport-mode.scm : dashboard-transport-mode.scm.template @@ -49,27 +55,39 @@ mtest : transport-mode.scm dboard : dashboard-transport-mode.scm # dbmod.import.o is just a hack here -mofiles/portlogger.o : mofiles/dbmod.o -process.o : mofiles/processmod.o + +process.o : mofiles/processmod.o mofiles/configfmod.o : mofiles/processmod.o mofiles/processmod.o : mofiles/commonmod.o +mofiles/servermod.o : mofiles/commonmod.o +mofiles/rmtmod.o : mofiles/mtmod.o mofiles/apimod.o +mofiles/dbmod.o : mofiles/mtmod.o +# mofiles/mtmod.o : mofiles/tcp-transportmod.o +mofiles/megatestmod.o : mofiles/pkts.o mofiles/servermod.o mofiles/fsmod.o +mofiles/mtmod.o : mofiles/testsmod.o +mofiles/subrunmod.o : mofiles/tasksmod.o +mofiles/launchmod.o : mofiles/subrunmod.o mofiles/runsmod.o +mofiles/launchmod.o : mofiles/ezstepsmod.o +mofiles/runsmod.o : mofiles/archivemod.o mofiles/dbfile.o : \ - mofiles/debugprint.o mofiles/commonmod.o -mofiles/apimod.o : mofiles/commonmod.o mofiles/tcp-transportmod.o mofiles/configfmod.o + mofiles/debugprint.o mofiles/commonmod.o mofiles/configfmod.o +mofiles/apimod.o : mofiles/commonmod.o mofiles/tcp-transportmod.o mofiles/configfmod.o mofiles/megatestmod.o mofiles/dbmod.o : mofiles/dbfile.o mofiles/api.o : mofiles/apimod.o -mofiles/commonmod.o : mofiles/debugprint.o +mofiles/commonmod.o : mofiles/debugprint.o mofiles/stml2.o configf.o : commonmod.import.o mofiles/dbfile.o : mofiles/debugprint.o mofiles/rmtmod.o mofiles/dbmod.o : mofiles/dbfile.o mofiles/commonmod.o mofiles/debugprint.o db.o : mofiles/dbmod.o mofiles/dbfile.o mofiles/debugprint.o : mofiles/mtargs.o mofiles/tcp-transportmod.o : mofiles/portlogger.o +mofiles/tasksmod.o : mofiles/rmtmod.o mofiles/pgdb.o +mofiles/fsmod.o : mofiles/debugprint.o # ftail.scm rmtmod.scm commonmod.scm removed # MSRCFILES = ducttape-lib.scm pkts.scm stml2.scm cookie.scm mutils.scm \ # mtargs.scm commonmod.scm dbmod.scm adjutant.scm ulex.scm \ # rmtmod.scm apimod.scm @@ -518,10 +536,12 @@ # portlogger-example : portlogger-example.scm api.o archive.o common.o configf.o dashboard-tests.o dashboard-context-menu.o db.o dcommon.o ezsteps.o genexample.o gutils.o items.o keys.o launch.o lock-queue.o margs.o mt.o ods.o portlogger.o process.o rmt.o runconfig.o runs.o server.o synchash.o tasks.o tdb.o tests.o tree.o # csc $(CSCOPTS) portlogger-example.scm api.o archive.o common.o configf.o dashboard-tests.o dashboard-context-menu.o db.o dcommon.o ezsteps.o genexample.o gutils.o items.o keys.o launch.o lock-queue.o margs.o mt.o ods.o portlogger.o process.o rmt.o runconfig.o runs.o server.o sync-hash.o tasks.o tdb.o tests.o tree.o unitdeps.dot : *scm ./utils/plot-uses Makefile ./utils/plot-uses todot commonmod.import,mtargs.import,mtargs,debugprint *.scm > unitdeps.dot + +# ./utils/plot-uses todot commonmod,portlogger,stml2,debugprint,mtargs apimod.scm commonmod.scm configfmod.scm dbmod.scm megatestmod.scm mtmod.scm processmod.scm rmtmod.scm servermod.scm tcp-transportmod.scm > uses.dot ; dot uses.dot -Tpdf -o uses.pdf unitdeps.pdf : unitdeps.dot dot unitdeps.dot -Tpdf -o unitdeps.pdf ./utils/plot-uses : utils/plot-uses.scm Index: api.scm ================================================================== --- api.scm +++ api.scm @@ -107,12 +107,10 @@ (normal-proc cmd run-id params) ;; numthreads must be greater than 5 for busy (* 0.1 (- numthreads maxthreads)) ;; was 15 - return a number for the remote to delay )) ;; (- numthreads 29)) ;; call back in as many seconds ((loaded) - ;; (if (eq? (rmt:transport-mode) 'tcp) - ;; (thread-sleep! 0.5)) (normal-proc cmd run-id params)) (else (normal-proc cmd run-id params)))) (meta (case cmd ((ping) `((sstate . ,server-state))) @@ -129,218 +127,5 @@ (api:unregister-thread (current-thread)) result))) (define api:tcp-dispatch-request-make-handler api:tcp-dispatch-request-make-handler-old) ;; choose -old or -new -(define *api-halt-writes* #f) - -(define (api:dispatch-request dbstruct cmd run-id params) - (if (not *no-sync-db*) - (db:open-no-sync-db)) - (let* ((start-time (current-milliseconds))) - (if (member cmd api:write-queries) - (let loop () - (if *api-halt-writes* - (begin - (thread-sleep! 0.2) - (if (< (- (current-milliseconds) start-time) - 5000) ;; hope it don't take more than five seconds to sync - (loop-time) - #;(debug:print 0 *default-log-port* "ERROR: writes halted for more than 5 seconds, sync might be taking too long")))))) - (db:add-stats 'api-write-blocking-for-sync run-id params (- (current-milliseconds) start-time))) - (case cmd - ;;=============================================== - ;; READ/WRITE QUERIES - ;;=============================================== - - ((get-keys-write) (db:get-keys dbstruct)) ;; force a dummy "write" query to force server; for debug in -repl - - ;; SERVERS - ((start-server) (apply tt:server-process-run params)) - ((kill-server) (set! *server-run* #f)) - - ;; TESTS - - ;;((test-set-state-status-by-id) (apply mt:test-set-state-status-by-id dbstruct params)) - ;;BB - commented out above because it was calling below, eventually, incorrectly (dbstruct passed to mt:test-set-state-status-by-id, which previosly did more, but now only passes thru to db:set-state-status-and-roll-up-items. - ((test-set-state-status-by-id) - - ;; (define (db:set-state-status-and-roll-up-items dbstruct run-id test-name item-path state status comment) - (db:set-state-status-and-roll-up-items - dbstruct - (list-ref params 0) ; run-id - (list-ref params 1) ; test-name - #f ; item-path - (list-ref params 2) ; state - (list-ref params 3) ; status - (list-ref params 4) ; comment - )) - - ((delete-test-records) (apply db:delete-test-records dbstruct params)) - ((delete-old-deleted-test-records) (apply db:delete-old-deleted-test-records dbstruct params)) - ((test-set-state-status) (apply db:test-set-state-status dbstruct params)) - ((test-set-top-process-pid) (apply db:test-set-top-process-pid dbstruct params)) - ((set-state-status-and-roll-up-items) (apply db:set-state-status-and-roll-up-items dbstruct params)) - ((set-state-status-and-roll-up-run) (apply db:set-state-status-and-roll-up-run dbstruct params)) - ((top-test-set-per-pf-counts) (apply db:top-test-set-per-pf-counts dbstruct params)) - ((test-set-archive-block-id) (apply db:test-set-archive-block-id dbstruct params)) - - ((insert-test) (db:insert-test dbstruct run-id params)) - - ;; RUNS - ((register-run) (apply db:register-run dbstruct params)) - ((set-tests-state-status) (apply db:set-tests-state-status dbstruct params)) - ((delete-run) (apply db:delete-run dbstruct params)) - ((lock/unlock-run) (apply db:lock/unlock-run dbstruct params)) - ((update-run-event_time) (apply db:update-run-event_time dbstruct params)) - ((update-run-stats) (apply db:update-run-stats dbstruct params)) - ((set-var) (apply db:set-var dbstruct params)) - ((inc-var) (apply db:inc-var dbstruct params)) - ((dec-var) (apply db:dec-var dbstruct params)) - ((del-var) (apply db:del-var dbstruct params)) - ((add-var) (apply db:add-var dbstruct params)) - - ((insert-run) (apply db:insert-run dbstruct params)) - - ;; STEPS - ((teststep-set-status!) (apply db:teststep-set-status! dbstruct params)) - ((delete-steps-for-test!) (apply db:delete-steps-for-test! dbstruct params)) - - ;; TEST DATA - ((test-data-rollup) (apply db:test-data-rollup dbstruct params)) - ((csv->test-data) (apply db:csv->test-data dbstruct params)) - - ;; MISC - ((sync-cachedb->db) (let ((run-id (car params))) - (db:sync-touched dbstruct run-id db:initialize-main-db force-sync: #t))) - ((get-toplevels-and-incompletes) (apply db:get-toplevels-and-incompletes dbstruct params)) - ((mark-incomplete) #f);;(thread-start! (make-thread (lambda () ;; no need to block on this one - ;; (apply db:find-and-mark-incomplete dbstruct params) - ;; #t)))) - ((create-all-triggers) (db:create-all-triggers dbstruct)) - ((drop-all-triggers) (db:drop-all-triggers dbstruct)) - - ;; TESTMETA - ((testmeta-add-record) (apply db:testmeta-add-record dbstruct params)) - ((testmeta-update-field) (apply db:testmeta-update-field dbstruct params)) - ((get-tests-tags) (db:get-tests-tags dbstruct)) - - ;; TASKS - ((tasks-add) (apply tasks:add dbstruct params)) - ((tasks-set-state-given-param-key) (apply tasks:set-state-given-param-key dbstruct params)) - ((tasks-get-last) (apply tasks:get-last dbstruct params)) - - ;; NO SYNC DB - ((no-sync-set) (apply db:no-sync-set *no-sync-db* params)) - ((no-sync-get/default) (apply db:no-sync-get/default *no-sync-db* params)) - ((no-sync-del!) (apply db:no-sync-del! *no-sync-db* params)) - ((no-sync-get-lock) (apply db:no-sync-get-lock *no-sync-db* params)) - - ;; NO SYNC DB PROCESSES - ((register-process) (apply dbfile:register-process *no-sync-db* params)) - ((set-process-done) (apply dbfile:set-process-done *no-sync-db* params)) - ((set-process-status) (apply dbfile:set-process-status *no-sync-db* params)) - ((get-process-options) (apply dbfile:get-process-options *no-sync-db* params)) - - ;; ARCHIVES - ;; ((archive-get-allocations) - ((archive-register-disk) (apply db:archive-register-disk dbstruct params)) - ((archive-register-block-name)(apply db:archive-register-block-name dbstruct params)) - ;; ((archive-allocate-testsuite/area-to-block)(apply db:archive-allocate-testsuite/area-to-block dbstruct block-id testsuite-name areakey)) - - ;;====================================================================== - ;; READ ONLY QUERIES - ;;====================================================================== - - ;; KEYS - ((get-key-val-pairs) (apply db:get-key-val-pairs dbstruct params)) - ((get-keys) (db:get-keys dbstruct)) - ((get-key-vals) (apply db:get-key-vals dbstruct params)) - ((get-target) (apply db:get-target dbstruct params)) - ((get-targets) (db:get-targets dbstruct)) - - ;; ARCHIVES - ((test-get-archive-block-info) (apply db:test-get-archive-block-info dbstruct params)) - - ;; TESTS - ((test-toplevel-num-items) (apply db:test-toplevel-num-items dbstruct params)) - ((get-test-info-by-id) (apply db:get-test-info-by-id dbstruct params)) - ((get-test-state-status-by-id) (apply db:get-test-state-status-by-id dbstruct params)) - ((test-get-rundir-from-test-id) (apply db:test-get-rundir-from-test-id dbstruct params)) - ((get-count-tests-running-for-testname) (apply db:get-count-tests-running-for-testname dbstruct params)) - ((get-count-tests-running) (apply db:get-count-tests-running dbstruct params)) - ((get-count-tests-running-in-jobgroup) (apply db:get-count-tests-running-in-jobgroup dbstruct params)) - ;; ((delete-test-step-records) (apply db:delete-test-step-records dbstruct params)) - ;; ((get-previous-test-run-record) (apply db:get-previous-test-run-record dbstruct params)) - ((get-matching-previous-test-run-records)(apply db:get-matching-previous-test-run-records dbstruct params)) - ((test-get-logfile-info) (apply db:test-get-logfile-info dbstruct params)) - ((test-get-records-for-index-file) (apply db:test-get-records-for-index-file dbstruct params)) - ((get-testinfo-state-status) (apply db:get-testinfo-state-status dbstruct params)) - ((test-get-top-process-pid) (apply db:test-get-top-process-pid dbstruct params)) - ((test-get-paths-matching-keynames-target-new) (apply db:test-get-paths-matching-keynames-target-new dbstruct params)) - ((get-prereqs-not-met) (apply db:get-prereqs-not-met dbstruct params)) - ((get-count-tests-running-for-run-id) (apply db:get-count-tests-running-for-run-id dbstruct params)) - ((get-not-completed-cnt) (apply db:get-not-completed-cnt dbstruct params)) - ((get-raw-run-stats) (apply db:get-raw-run-stats dbstruct params)) - ((get-test-times) (apply db:get-test-times dbstruct params)) - - ;; RUNS - ((get-run-info) (apply db:get-run-info dbstruct params)) - ((get-run-status) (apply db:get-run-status dbstruct params)) - ((get-run-state) (apply db:get-run-state dbstruct params)) - ((get-run-state-status) (apply db:get-run-state-status dbstruct params)) - ((set-run-status) (apply db:set-run-status dbstruct params)) - ((set-run-state-status) (apply db:set-run-state-status dbstruct params)) - ((update-tesdata-on-repilcate-db) (apply db:update-tesdata-on-repilcate-db dbstruct params)) - ((get-tests-for-run) (apply db:get-tests-for-run dbstruct params)) - ((get-tests-for-run-state-status) (apply db:get-tests-for-run-state-status dbstruct params)) - ((get-test-id) (apply db:get-test-id dbstruct params)) - ((get-tests-for-run-mindata) (apply db:get-tests-for-run-mindata dbstruct params)) - ;; ((get-tests-for-runs-mindata) (apply db:get-tests-for-runs-mindata dbstruct params)) - ((get-runs) (apply db:get-runs dbstruct params)) - ((simple-get-runs) (apply db:simple-get-runs dbstruct params)) - ((get-num-runs) (apply db:get-num-runs dbstruct params)) - ((get-runs-cnt-by-patt) (apply db:get-runs-cnt-by-patt dbstruct params)) - ((get-all-run-ids) (db:get-all-run-ids dbstruct)) - ((get-prev-run-ids) (apply db:get-prev-run-ids dbstruct params)) - ((get-run-ids-matching-target) (apply db:get-run-ids-matching-target dbstruct params)) - ((get-runs-by-patt) (apply db:get-runs-by-patt dbstruct params)) - ((get-run-name-from-id) (apply db:get-run-name-from-id dbstruct params)) - ((get-main-run-stats) (apply db:get-main-run-stats dbstruct params)) - ((get-var) (apply db:get-var dbstruct params)) - ((get-run-stats) (apply db:get-run-stats dbstruct params)) - ((get-run-times) (apply db:get-run-times dbstruct params)) - - ;; STEPS - ((get-steps-data) (apply db:get-steps-data dbstruct params)) - ((get-steps-for-test) (apply db:get-steps-for-test dbstruct params)) - ((get-steps-info-by-id) (apply db:get-steps-info-by-id dbstruct params)) - - ;; TEST DATA - ((read-test-data) (apply db:read-test-data dbstruct params)) - ((read-test-data-varpatt) (apply db:read-test-data-varpatt dbstruct params)) - ((get-data-info-by-id) (apply db:get-data-info-by-id dbstruct params)) - - ;; MISC - ((get-latest-host-load) (apply db:get-latest-host-load dbstruct params)) - ((have-incompletes?) (apply db:have-incompletes? dbstruct params)) - ((login) (apply db:login dbstruct params)) - ((general-call) (let ((stmtname (car params)) - (run-id (cadr params)) - (realparams (cddr params))) - (db:general-call dbstruct run-id stmtname realparams))) - ((sdb-qry) (apply sdb:qry params)) - ((ping) (current-process-id)) - ((get-changed-record-ids) (apply db:get-changed-record-ids dbstruct params)) - ((get-changed-record-test-ids) (apply db:get-changed-record-test-ids dbstruct params)) - ((get-changed-record-run-ids) (apply db:get-changed-record-run-ids dbstruct params)) - ((get-run-record-ids) (apply db:get-run-record-ids dbstruct params)) - ((get-all-runids) (apply db:get-all-runids dbstruct)) - ;; TESTMETA - ((testmeta-get-record) (apply db:testmeta-get-record dbstruct params)) - - ;; TASKS - ((find-task-queue-records) (apply tasks:find-task-queue-records dbstruct params)) - (else - (debug:print 0 *default-log-port* "ERROR: bad api call " cmd) - (conc "ERROR: BAD api call " cmd)))) - Index: apimod.scm ================================================================== --- apimod.scm +++ apimod.scm @@ -22,10 +22,11 @@ (declare (uses commonmod)) (declare (uses debugprint)) (declare (uses dbmod)) (declare (uses dbfile)) (declare (uses tcp-transportmod)) +(declare (uses megatestmod)) (module apimod * (import scheme chicken data-structures extras) @@ -33,10 +34,14 @@ (import commonmod) (import debugprint) (import dbmod) (import dbfile) (import tcp-transportmod) +(import megatestmod) + + +(define *api-halt-writes* #f) ;; allow these queries through without starting a server ;; (define api:read-only-queries '(get-key-val-pairs @@ -281,11 +286,12 @@ (loop))))) (let loop ((thnum 0)) (thread-start! (make-thread thproc (conc "queue-thread-" thnum))) (thread-sleep! 0.05) (if (< thnum 20) - (loop (+ thnum 1)))))) + (loop (+ thnum 1)) + (debug:print 0 *default-log-port* "Started "thnum" api threads"))))) (define (api:tcp-dispatch-request-make-handler-core dbstruct api:dispatch-request) (assert *toppath* "FATAL: api:tcp-dispatch-request-make-handler called but *toppath* not set.") (if (not *server-signature*) (set! *server-signature* (tt:mk-signature *toppath*))) @@ -327,6 +333,221 @@ ;; (cmd run-id params meta) (db:add-stats cmd run-id params (- (current-milliseconds) start-t)) payload)) (else (assert #f "FATAL: failed to deserialize indat "indat)))))) + + +(define (api:dispatch-request dbstruct cmd run-id params) + (if (not *no-sync-db*) + (db:open-no-sync-db)) + (let* ((start-time (current-milliseconds))) + (if (member cmd api:write-queries) + (let loop () + (if *api-halt-writes* + (begin + (thread-sleep! 0.2) + (if (< (- (current-milliseconds) start-time) + 5000) ;; hope it don't take more than five seconds to sync + (loop) + #;(debug:print 0 *default-log-port* "ERROR: writes halted for more than 5 seconds, sync might be taking too long")))))) + (db:add-stats 'api-write-blocking-for-sync run-id params (- (current-milliseconds) start-time))) + (case cmd + ;;=============================================== + ;; READ/WRITE QUERIES + ;;=============================================== + + ((get-keys-write) (db:get-keys dbstruct)) ;; force a dummy "write" query to force server; for debug in -repl + + ;; SERVERS + ((start-server) (apply tt:server-process-run params)) + ((kill-server) (set! *server-run* #f)) + + ;; TESTS + + ;;((test-set-state-status-by-id) (apply mt:test-set-state-status-by-id dbstruct params)) + ;;BB - commented out above because it was calling below, eventually, incorrectly (dbstruct passed to mt:test-set-state-status-by-id, which previosly did more, but now only passes thru to db:set-state-status-and-roll-up-items. + ((test-set-state-status-by-id) + + ;; (define (db:set-state-status-and-roll-up-items dbstruct run-id test-name item-path state status comment) + (db:set-state-status-and-roll-up-items + dbstruct + (list-ref params 0) ; run-id + (list-ref params 1) ; test-name + #f ; item-path + (list-ref params 2) ; state + (list-ref params 3) ; status + (list-ref params 4) ; comment + )) + + ((delete-test-records) (apply db:delete-test-records dbstruct params)) + ((delete-old-deleted-test-records) (apply db:delete-old-deleted-test-records dbstruct params)) + ((test-set-state-status) (apply db:test-set-state-status dbstruct params)) + ((test-set-top-process-pid) (apply db:test-set-top-process-pid dbstruct params)) + ((set-state-status-and-roll-up-items) (apply db:set-state-status-and-roll-up-items dbstruct params)) + ((set-state-status-and-roll-up-run) (apply db:set-state-status-and-roll-up-run dbstruct params)) + ((top-test-set-per-pf-counts) (apply db:top-test-set-per-pf-counts dbstruct params)) + ((test-set-archive-block-id) (apply db:test-set-archive-block-id dbstruct params)) + + ((insert-test) (db:insert-test dbstruct run-id params)) + + ;; RUNS + ((register-run) (apply db:register-run dbstruct params)) + ((set-tests-state-status) (apply db:set-tests-state-status dbstruct params)) + ((delete-run) (apply db:delete-run dbstruct params)) + ((lock/unlock-run) (apply db:lock/unlock-run dbstruct params)) + ((update-run-event_time) (apply db:update-run-event_time dbstruct params)) + ((update-run-stats) (apply db:update-run-stats dbstruct params)) + ((set-var) (apply db:set-var dbstruct params)) + ((inc-var) (apply db:inc-var dbstruct params)) + ((dec-var) (apply db:dec-var dbstruct params)) + ((del-var) (apply db:del-var dbstruct params)) + ((add-var) (apply db:add-var dbstruct params)) + + ((insert-run) (apply db:insert-run dbstruct params)) + + ;; STEPS + ((teststep-set-status!) (apply db:teststep-set-status! dbstruct params)) + ((delete-steps-for-test!) (apply db:delete-steps-for-test! dbstruct params)) + + ;; TEST DATA + ((test-data-rollup) (apply db:test-data-rollup dbstruct params)) + ((csv->test-data) (apply db:csv->test-data dbstruct params)) + + ;; MISC + ((sync-cachedb->db) (let ((run-id (car params))) + (db:sync-touched dbstruct run-id db:initialize-main-db force-sync: #t))) + ((get-toplevels-and-incompletes) (apply db:get-toplevels-and-incompletes dbstruct params)) + ((mark-incomplete) #f);;(thread-start! (make-thread (lambda () ;; no need to block on this one + ;; (apply db:find-and-mark-incomplete dbstruct params) + ;; #t)))) + ((create-all-triggers) (db:create-all-triggers dbstruct)) + ((drop-all-triggers) (db:drop-all-triggers dbstruct)) + + ;; TESTMETA + ((testmeta-add-record) (apply db:testmeta-add-record dbstruct params)) + ((testmeta-update-field) (apply db:testmeta-update-field dbstruct params)) + ((get-tests-tags) (db:get-tests-tags dbstruct)) + + ;; TASKS + ((tasks-add) (apply tasks:add dbstruct params)) + ((tasks-set-state-given-param-key) (apply tasks:set-state-given-param-key dbstruct params)) + ((tasks-get-last) (apply tasks:get-last dbstruct params)) + + ;; NO SYNC DB + ((no-sync-set) (apply db:no-sync-set *no-sync-db* params)) + ((no-sync-get/default) (apply db:no-sync-get/default *no-sync-db* params)) + ((no-sync-del!) (apply db:no-sync-del! *no-sync-db* params)) + ((no-sync-get-lock) (apply db:no-sync-get-lock *no-sync-db* params)) + + ;; NO SYNC DB PROCESSES + ((register-process) (apply dbfile:register-process *no-sync-db* params)) + ((set-process-done) (apply dbfile:set-process-done *no-sync-db* params)) + ((set-process-status) (apply dbfile:set-process-status *no-sync-db* params)) + ((get-process-options) (apply dbfile:get-process-options *no-sync-db* params)) + + ;; ARCHIVES + ;; ((archive-get-allocations) + ((archive-register-disk) (apply db:archive-register-disk dbstruct params)) + ((archive-register-block-name)(apply db:archive-register-block-name dbstruct params)) + ;; ((archive-allocate-testsuite/area-to-block)(apply db:archive-allocate-testsuite/area-to-block dbstruct block-id testsuite-name areakey)) + + ;;====================================================================== + ;; READ ONLY QUERIES + ;;====================================================================== + + ;; KEYS + ((get-key-val-pairs) (apply db:get-key-val-pairs dbstruct params)) + ((get-keys) (db:get-keys dbstruct)) + ((get-key-vals) (apply db:get-key-vals dbstruct params)) + ((get-target) (apply db:get-target dbstruct params)) + ((get-targets) (db:get-targets dbstruct)) + + ;; ARCHIVES + ((test-get-archive-block-info) (apply db:test-get-archive-block-info dbstruct params)) + + ;; TESTS + ((test-toplevel-num-items) (apply db:test-toplevel-num-items dbstruct params)) + ((get-test-info-by-id) (apply db:get-test-info-by-id dbstruct params)) + ((get-test-state-status-by-id) (apply db:get-test-state-status-by-id dbstruct params)) + ((test-get-rundir-from-test-id) (apply db:test-get-rundir-from-test-id dbstruct params)) + ((get-count-tests-running-for-testname) (apply db:get-count-tests-running-for-testname dbstruct params)) + ((get-count-tests-running) (apply db:get-count-tests-running dbstruct params)) + ((get-count-tests-running-in-jobgroup) (apply db:get-count-tests-running-in-jobgroup dbstruct params)) + ;; ((delete-test-step-records) (apply db:delete-test-step-records dbstruct params)) + ;; ((get-previous-test-run-record) (apply db:get-previous-test-run-record dbstruct params)) + ((get-matching-previous-test-run-records)(apply db:get-matching-previous-test-run-records dbstruct params)) + ((test-get-logfile-info) (apply db:test-get-logfile-info dbstruct params)) + ((test-get-records-for-index-file) (apply db:test-get-records-for-index-file dbstruct params)) + ((get-testinfo-state-status) (apply db:get-testinfo-state-status dbstruct params)) + ((test-get-top-process-pid) (apply db:test-get-top-process-pid dbstruct params)) + ((test-get-paths-matching-keynames-target-new) (apply db:test-get-paths-matching-keynames-target-new dbstruct params)) + ((get-prereqs-not-met) (apply db:get-prereqs-not-met dbstruct params)) + ((get-count-tests-running-for-run-id) (apply db:get-count-tests-running-for-run-id dbstruct params)) + ((get-not-completed-cnt) (apply db:get-not-completed-cnt dbstruct params)) + ((get-raw-run-stats) (apply db:get-raw-run-stats dbstruct params)) + ((get-test-times) (apply db:get-test-times dbstruct params)) + + ;; RUNS + ((get-run-info) (apply db:get-run-info dbstruct params)) + ((get-run-status) (apply db:get-run-status dbstruct params)) + ((get-run-state) (apply db:get-run-state dbstruct params)) + ((get-run-state-status) (apply db:get-run-state-status dbstruct params)) + ((set-run-status) (apply db:set-run-status dbstruct params)) + ((set-run-state-status) (apply db:set-run-state-status dbstruct params)) + ((update-tesdata-on-repilcate-db) (apply db:update-tesdata-on-repilcate-db dbstruct params)) + ((get-tests-for-run) (apply db:get-tests-for-run dbstruct params)) + ((get-tests-for-run-state-status) (apply db:get-tests-for-run-state-status dbstruct params)) + ((get-test-id) (apply db:get-test-id dbstruct params)) + ((get-tests-for-run-mindata) (apply db:get-tests-for-run-mindata dbstruct params)) + ;; ((get-tests-for-runs-mindata) (apply db:get-tests-for-runs-mindata dbstruct params)) + ((get-runs) (apply db:get-runs dbstruct params)) + ((simple-get-runs) (apply db:simple-get-runs dbstruct params)) + ((get-num-runs) (apply db:get-num-runs dbstruct params)) + ((get-runs-cnt-by-patt) (apply db:get-runs-cnt-by-patt dbstruct params)) + ((get-all-run-ids) (db:get-all-run-ids dbstruct)) + ((get-prev-run-ids) (apply db:get-prev-run-ids dbstruct params)) + ((get-run-ids-matching-target) (apply db:get-run-ids-matching-target dbstruct params)) + ((get-runs-by-patt) (apply db:get-runs-by-patt dbstruct params)) + ((get-run-name-from-id) (apply db:get-run-name-from-id dbstruct params)) + ((get-main-run-stats) (apply db:get-main-run-stats dbstruct params)) + ((get-var) (apply db:get-var dbstruct params)) + ((get-run-stats) (apply db:get-run-stats dbstruct params)) + ((get-run-times) (apply db:get-run-times dbstruct params)) + + ;; STEPS + ((get-steps-data) (apply db:get-steps-data dbstruct params)) + ((get-steps-for-test) (apply db:get-steps-for-test dbstruct params)) + ((get-steps-info-by-id) (apply db:get-steps-info-by-id dbstruct params)) + + ;; TEST DATA + ((read-test-data) (apply db:read-test-data dbstruct params)) + ((read-test-data-varpatt) (apply db:read-test-data-varpatt dbstruct params)) + ((get-data-info-by-id) (apply db:get-data-info-by-id dbstruct params)) + + ;; MISC + ((get-latest-host-load) (apply db:get-latest-host-load dbstruct params)) + ((have-incompletes?) (apply db:have-incompletes? dbstruct params)) + ((login) (apply db:login dbstruct params)) + ((general-call) (let ((stmtname (car params)) + (run-id (cadr params)) + (realparams (cddr params))) + (db:general-call dbstruct run-id stmtname realparams))) + ((sdb-qry) (apply sdb:qry params)) + ((ping) (current-process-id)) + ((get-changed-record-ids) (apply db:get-changed-record-ids dbstruct params)) + ((get-changed-record-test-ids) (apply db:get-changed-record-test-ids dbstruct params)) + ((get-changed-record-run-ids) (apply db:get-changed-record-run-ids dbstruct params)) + ((get-run-record-ids) (apply db:get-run-record-ids dbstruct params)) + ((get-all-runids) (apply db:get-all-runids dbstruct)) + ;; TESTMETA + ((testmeta-get-record) (apply db:testmeta-get-record dbstruct params)) + + ;; TASKS + ((find-task-queue-records) (apply tasks:find-task-queue-records dbstruct params)) + (else + (debug:print 0 *default-log-port* "ERROR: bad api call " cmd) + (conc "ERROR: BAD api call " cmd)))) + + + ) Index: archive.scm ================================================================== --- archive.scm +++ archive.scm @@ -34,614 +34,5 @@ configfmod debugprint rmtmod (prefix mtargs args:)) -(include "common_records.scm") -(include "db_records.scm") - -;;====================================================================== -;; -;;====================================================================== - -;; NOT CURRENTLY USED -;; -(define (archive:main linktree target runname testname itempath options) - (let ((testdir (conc linktree "/" target "/" runname "/" testname "/" itempatt)) - (flavor 'plain) ;; type of machine to run jobs on - (maxload 1.5) ;; max allowed load for this work - (adisks (archive:get-archive-disks))) - ;; get testdir size - ;; - hand off du to job mgr - (if (and (common:file-exists? testdir) - (file-is-writable? testdir)) - (let* ((dused (jobrunner:run-job - flavor ;; machine type - maxload ;; max allowed load - '() ;; prevars - environment vars to set for the job - common:get-disk-space-used ;; if a proc call it, if a string it is a unix command - (list testdir))) - (apath (archive:get-archive testname itempath dused))) - (jobrunner:run-job - flavor - maxload - '() - archive:run-bup - (list testdir apath)))))) - -;; Get archive disks from megatest.config -;; -(define (archive:get-archive-disks) - (let ((section (configf:get-section *configdat* "archive-disks"))) - (if section - section - '()))) - -;; look for the best candidate archive area, else create new -;; area -;; -(define (archive:get-archive testname itempath dused) - ;; look up in archive_allocations if there is a pre-used archive - ;; with adequate diskspace - ;; - (let* ((existing-blocks (rmt:archive-get-allocations testname itempath dused)) - (candidate-disks (map (lambda (block) - (list - (vector-ref block 1) ;; archive-area-name - (vector-ref block 2))) ;; disk-path - existing-blocks))) - (or (common:get-disk-with-most-free-space candidate-disks dused) - (archive:allocate-new-archive-block #f #f #f)))) ;; BROKEN. testname itempath)))) - -;; allocate a new archive area -;; -(define (archive:allocate-new-archive-block blockid-cache run-area-home testsuite-name dneeded target run-name test-name) - (let ((key (conc testsuite-name "/" target "/" run-name "/" test-name))) - (if (hash-table-exists? blockid-cache key) - (hash-table-ref blockid-cache key) - (let* ((pscript (configf:lookup *configdat* "archive" "pathscript")) - (pscript-cmd (conc pscript " " testsuite-name " " target " " run-name " " test-name)) - (apath (if pscript - (handle-exceptions - exn - (begin - (debug:print 0 *default-log-port* "ERROR: script \"" pscript-cmd "\" failed to run properly. exn=" exn) - (exit 1)) - (with-input-from-pipe - pscript-cmd - read-line)) - #f)) ;; this is the user-calculated archive path - (adisks (archive:get-archive-disks)) - (best-disk (common:get-disk-with-most-free-space adisks dneeded))) - (if best-disk - (let* ((bdisk-name (car best-disk)) - (bdisk-path (cdr best-disk)) - (area-key (substring (message-digest-string (md5-primitive) run-area-home) 0 5)) - (bdisk-id (rmt:archive-register-disk bdisk-name bdisk-path (get-df bdisk-path))) - (archive-name (if apath - apath - (let ((sec (current-seconds))) - (conc (time->string (seconds->local-time sec) "%Y") - "_q" (seconds->quarter sec) "/" - testsuite-name "_" area-key)))) - (archive-path (conc bdisk-path "/" archive-name)) - (block-id (rmt:archive-register-block-name bdisk-id archive-path))) - ;; (allocation-id (rmt:archive-allocate-testsuite/area-to-block block-id testsuite-name area-key))) - (if block-id ;; (and block-id allocation-id) - (let ((res (cons block-id archive-path))) - (hash-table-set! blockid-cache key res) - res) - (begin - (debug:print 0 *default-log-port* "WARNING: no disk found for " target ", " run-name ", " test-name ", archive-path=" archive-path) - #f))) - (begin - (debug:print 0 *default-log-port* "WARNING: no disk found for " target ", " run-name ", " test-name ) - #f)))))) ;; no best disk found - -;; archive - run bup -;; -;; 1. create the bup dir if not exists -;; 2. start the du of each directory -;; 3. gen index -;; 4. save -;; -(define (archive:run-bup archive-command run-id run-name tests rp-mutex bup-mutex) - ;; move the getting of archive space down into the below block so that a single run can - ;; allocate as needed should a disk fill up - ;; - (let* ((blockid-cache (make-hash-table)) - (tsname (common:get-testsuite-name)) - (target (string-intersperse (map cadr (rmt:get-key-val-pairs run-id)) "/")) - (min-space (string->number (or (configf:lookup *configdat* "archive" "minspace") "1000"))) - (arch-groups (make-hash-table)) ;; archive groups, each corrosponds to a bup area - (disk-groups (make-hash-table)) ;; - (test-groups (make-hash-table)) ;; these two (disk and test groups) could be combined nicely - (test-dirs (make-hash-table)) - (bup-exe (or (configf:lookup *configdat* "archive" "bup") "bup")) - (compress (or (configf:lookup *configdat* "archive" "compress") "9")) - (linktree (common:get-linktree)) ;; (configf:lookup *configdat* "setup" "linktree"))) - (archiver (let ((s (configf:lookup *configdat* "archive" "archiver"))) - (if s (string->symbol s) 'bup))) - (archiver-cmd (case archiver - ((tar) "tar cfj ARCHIVE_NAME.tar.bz2 ") - ((7z) " 7z u -t7z -m0=lzma -mx=9 -mfb=64 -md=32m -ms=on ARCHIVE_NAME.7z ") - (else #f))) - (src-archive-linktree (rmt:get-var "src-archive-linktree")) - (print-prefix "Running: ") ;; change to #f to turn off printing - (preclean-spec (configf:get-section *configdat* "archive-preclean"))) - - (if (or (not src-archive-linktree) (not (equal? src-archive-linktree linktree))) - (rmt:set-var "src-archive-linktree" linktree)) - ;; (tests:match patt testname itempath) - - ;; from the test info bin the path to the test by stem - ;; - (for-each - (lambda (test-dat) - (let* ((item-path (db:test-get-item-path test-dat)) - (test-name (db:test-get-testname test-dat)) - (test-id (db:test-get-id test-dat)) - (run-id (db:test-get-run_id test-dat)) - - (toplevel/children (and (db:test-get-is-toplevel test-dat) - (> (rmt:test-toplevel-num-items run-id test-name) 0))) - (test-partial-path (conc target "/" run-name "/" (db:test-make-full-name test-name item-path))) - ;; note the trailing slash to get the dir inspite of it being a link - (test-path (conc linktree "/" test-partial-path)) - (mutex-lock! rp-mutex) - (test-physical-path (if (common:file-exists? test-path) - (common:real-path test-path) - #f)) - (mutex-unlock! rp-mutex) - (partial-path-index (if test-physical-path (substring-index test-partial-path test-physical-path) #f)) - (test-base (if (and partial-path-index - test-physical-path ) - (substring test-physical-path - 0 - partial-path-index) - #f)) - ;; we need our archive dir checked for every test to enable folks who want to store other ways. - (archive-info (archive:allocate-new-archive-block blockid-cache *toppath* tsname min-space target run-name test-name)) - (archive-dir (if archive-info (cdr archive-info) #f)) - (archive-id (if archive-info (car archive-info) -1))) - - (if (not archive-dir) ;; no archive disk found, this is fatal - (begin - (debug:print 0 *default-log-port* "FATAL: No archive disks found. Please add disks with at least " - min-space " MB space to the [archive-disks] section of megatest.config") - (debug:print 0 *default-log-port* " use [archive] minspace to specify minimum available space") - (debug:print 0 *default-log-port* " disks: " - (string-intersperse (map cadr (archive:get-archive-disks)) "\n ")) - (exit 1)) - (debug:print-info 0 *default-log-port* "Using path " archive-dir " for archiving test " test-path)) - - ;; preclean the test directory per the spec if provided - (if (not (null? preclean-spec)) ;; we've been asked to preclean before archiving - (let loop ((spec (car preclean-spec)) - (tail (cdr preclean-spec))) - (if (> (length spec) 1) - (let ((testspec (car spec)) - (rules (cadr spec))) - (if (tests:match testspec test-name item-path) - (begin - (debug:print 0 *default-log-port* "INFO: cleanup requested for " test-physical-path) - (common:dir-clean-up test-physical-path rules remove-empty: #t)) - (if (not (null? tail)) - (loop (car tail)(cdr tail))))) - (begin - (debug:print 0 *default-log-port* "ERROR: bad spec line in [archive-preclean] section. \"" spec "\"") - (if (not (null? tail))(loop (car tail)(cdr tail))))))) - (cond - (toplevel/children - (debug:print 0 *default-log-port* "WARNING: cannot archive " test-name " with id " test-id - " as it is a toplevel test with children")) - ((not (common:file-exists? test-path)) - (debug:print 0 *default-log-port* "WARNING: Cannot archive " test-name "/" item-path - " as path " test-path " does not exist")) - (else - (debug:print 2 *default-log-port* - "From test-dat=" test-dat " derived the following:\n" - "test-partial-path = " test-partial-path "\n" - "test-path = " test-path "\n" - "test-physical-path = " test-physical-path "\n" - "partial-path-index = " partial-path-index "\n" - "test-base = " test-base) - (hash-table-set! disk-groups test-base - (cons test-physical-path (hash-table-ref/default disk-groups test-base '()))) - (hash-table-set! test-groups test-base - (cons test-dat (hash-table-ref/default test-groups test-base '()))) - (hash-table-set! arch-groups test-base - (cons archive-info (hash-table-ref/default arch-groups test-base '()))) - (hash-table-set! test-dirs test-id test-path))))) - ;; test-path)))) - tests) - (debug:print 0 *default-log-port* "INFO: DISK GROUPS=" (hash-table->alist disk-groups)) - ;; for each disk-group, initialize the bup area if needed - (for-each - (lambda (test-base) - (let* ((disk-group (hash-table-ref disk-groups test-base)) - (arch-group (hash-table-ref arch-groups test-base)) - (arch-info (car arch-group)) ;; don't know yet how this will work, can I get more than one possibility? - (archive-id (car arch-info)) - (archive-dir (cdr arch-info))) - (debug:print 0 *default-log-port* "Processing disk-group " test-base) - (let* ((test-paths-in (hash-table-ref disk-groups test-base)) - (test-paths (if (args:get-arg "-include") - (let ((subpaths (string-split (args:get-arg "-include") ","))) - (apply append - (map (lambda (p) - (map (lambda (subp) - (conc p "/" subp)) - subpaths)) - test-paths-in))) - test-paths-in))) - (if (not (common:file-exists? archive-dir)) - (create-directory archive-dir #t)) - (case archiver - ((bup) ;; Archive using bup - (let* ((bup-init-params (list "-d" archive-dir "init")) - (bup-index-params (append (list "-d" archive-dir "index") test-paths)) - (bup-save-params (append (list "-d" archive-dir "save" ;; (conc "--strip-path=" linktree) - (conc "-" compress) ;; or (conc "--compress=" compress) - "-n" (conc (common:get-testsuite-name) "-"(string-substitute "/" "-" target " ")) - (conc "--strip-path=" (conc test-base target "/" )) ;; if we push to the directory do we need this? - ) - test-paths))) - (if (not (common:file-exists? (conc archive-dir "/HEAD"))) - (begin - ;; replace this with jobrunner stuff enventually - (debug:print-info 2 *default-log-port* "Init bup in " archive-dir) - ;; (mutex-lock! bup-mutex) - (let-values (((pid-val exit-status exit-code) (run-n-wait bup-exe params: bup-init-params print-cmd: print-prefix))) - (if (not (eq? exit-code 0)) - (begin - (debug:print-error 0 *default-log-port* "There was an error initializing bup. Archive failed.") - (exit 1)))) - ;; (mutex-unlock! bup-mutex) - )) - (debug:print-info 2 *default-log-port* "Indexing data to be archived") - ;; (mutex-lock! bup-mutex) - (let-values (((pid-val exit-status exit-code) (run-n-wait bup-exe params: bup-index-params print-cmd: print-prefix))) - (if (not (eq? exit-code 0)) - (begin - (debug:print-error 0 *default-log-port* "There was an error Indexing bup. Archive failed.") - (exit 1)))) - (debug:print-info 2 *default-log-port* "Archiving data with bup") - (let-values (((pid-val exit-status exit-code) (run-n-wait bup-exe params: bup-save-params print-cmd: print-prefix))) - (if (not (eq? exit-code 0)) - (begin - (debug:print-error 0 *default-log-port* "There was an archiving data with bup. Archive failed.") - (exit 1)))))) - ((7z tar) - (for-each - (lambda (test-dat) - (let* ((test-id (db:test-get-id test-dat)) - (test-name (db:test-get-testname test-dat)) - (item-path (db:test-get-item-path test-dat)) - (test-full-name (db:test-make-full-name test-name item-path)) - (run-id (db:test-get-run_id test-dat)) - (target (string-intersperse (map cadr (rmt:get-key-val-pairs run-id)) "/")) - (run-name (rmt:get-run-name-from-id run-id)) - (source-dir (hash-table-ref test-dirs test-id)) ;; (conc test-base "/" test-name "/" item-path)) - (target-dir (string-substitute "/$" "" (conc archive-dir "/" target "/" run-name "/" test-full-name)))) - ;; create the test and item-path levels under archive-dir - (create-directory (pathname-directory target-dir) #t) - (run-n-wait - (conc - (string-substitute "ARCHIVE_NAME" target-dir archiver-cmd) " " - "." - ) - print-cmd: print-prefix - run-dir: source-dir))) - (hash-table-ref test-groups test-base)))) - ;; (mutex-unlock! bup-mutex) - (for-each - (lambda (test-dat) - (let ((test-id (db:test-get-id test-dat)) - (run-id (db:test-get-run_id test-dat))) - (rmt:test-set-archive-block-id run-id test-id archive-id) - (if (member (symbol->string archive-command) '("save-remove")) - (begin - (debug:print-info 0 *default-log-port* "remove testdat") - (runs:remove-test-directory test-dat 'archive-remove))))) - (hash-table-ref test-groups test-base))))) - (hash-table-keys disk-groups)) - #t)) - -(define (archive:megatest-db target-patt run-patt) - (let* ((blockid-cache (make-hash-table)) - (tsname (common:get-testsuite-name)) - (min-space (string->number (or (configf:lookup *configdat* "archive" "minspace") "1000"))) - (bup-exe (or (configf:lookup *configdat* "archive" "bup") "bup")) - (compress (or (configf:lookup *configdat* "archive" "compress") "9")) - (archiver (let ((s (configf:lookup *configdat* "archive" "archiver"))) - (if s (string->symbol s) 'bup))) - (rsync-exe (or (configf:lookup *configdat* "archive" "rsync") "rsync")) - (print-prefix "Running: ") - (archive-info (archive:allocate-new-archive-block blockid-cache *toppath* tsname min-space target-patt run-patt "megatest-db")) - (archive-dir (if archive-info (cdr archive-info) #f)) - (archive-id (if archive-info (car archive-info) -1)) - (home-host (server:choose-server *toppath* 'homehost)) - (archive-time (seconds->std-time-str (current-seconds))) - (archive-staging-db (conc *toppath* "/.db-snapshot/archive_" archive-time)) - (tmp-db-path (conc (dbfile:make-tmpdir-name *toppath* "") "/megatest.db")) - (dbfile (conc archive-staging-db "/megatest.db"))) - (create-directory archive-staging-db #t) - (let-values (((pid-val exit-status exit-code) (run-n-wait rsync-exe params: (list "-v" (conc (car home-host) ":"tmp-db-path) archive-staging-db) print-cmd: print-prefix))) - (if (eq? exit-code 0) - (case archiver - ((bup) ;; Archive using bup - (let* ((bup-init-params (list "-d" archive-dir "init")) - (bup-index-params (list "-d" archive-dir "index" archive-staging-db)) - (bup-save-params (list "-d" archive-dir "save" ;; (conc "--strip-path=" linktree) - (conc "-" compress) ;; or (conc "--compress=" compress) - "-n" (conc tsname "-megatest-db" ) - (conc "--strip-path=" archive-staging-db ) ;; if we push to the directory do we need this? - dbfile))) - (if (not (common:file-exists? (conc archive-dir "/HEAD"))) - (begin - ;; replace this with jobrunner stuff enventually - (debug:print-info 2 *default-log-port* "Init bup in " archive-dir) - (let-values (((pid-val exit-status exit-code)(run-n-wait bup-exe params: bup-init-params print-cmd: print-prefix))) - (if (not (eq? exit-code 0)) - (begin - (debug:print-error 0 *default-log-port* "There was an error initializing bup. Archive failed.") - (exit 1)))))) - (debug:print-info 2 *default-log-port* "Indexing data to be archived") - (let-values (((pid-val exit-status exit-code) (run-n-wait bup-exe params: bup-index-params print-cmd: print-prefix))) - (if (not (eq? exit-code 0)) - (begin - (debug:print-error 0 *default-log-port* "There was an error Indexing bup. Archive failed.") - (exit 1)))) - (debug:print-info 2 *default-log-port* "Archiving data with bup") - (let-values (((pid-val exit-status exit-code) (run-n-wait bup-exe params: bup-save-params print-cmd: print-prefix))) - (if (not (eq? exit-code 0)) - (begin - (debug:print-error 0 *default-log-port* "There was an error archiving data with bup. Archive failed.") - (exit 1)) - (debug:print-info 0 *default-log-port* "To restore megatest.db run megatest -archive replicacte-db -source archive-dir -time-stamp . Current timestamp: " (seconds->std-time-str (current-seconds))))))) - (else - (debug:print-info 0 *default-log-port* "No support for databse archiving with " archiver))) - (debug:print-error 0 *default-log-port* "There was an error rsyncing tmp database"))))) - -(define (archive:restore-db archive-path ts) - (let* ((bup-exe (or (configf:lookup *configdat* "archive" "bup") "bup")) - (archive-internal-path (conc (common:get-testsuite-name) "-megatest-db/" ts "/megatest.db" )) - (bup-restore-params (list "-d" archive-path "restore" "-C" *toppath* archive-internal-path))) - (debug:print-info 2 *default-log-port* "Restoring archived data to " *toppath* " from archive in " archive-path " ... " archive-internal-path) - (run-n-wait bup-exe params: bup-restore-params print-cmd: "Running:")) - (sleep 2) - (db:multi-db-sync - (db:setup) ;; (db:setup-db *dbstruct-dbs* *toppath* #f) - 'killservers - ;'dejunk - ;'adj-testids - 'old2new - ) - (debug:print-info 1 *default-log-port* "dropping triggers to update linktree") - (rmt:drop-all-triggers) - (let* ((linktree (common:get-linktree)) ;; (configf:lookup *configdat* "setup" "linktree"))) - (src-archive-linktree (rmt:get-var "src-archive-linktree"))) - (if (not (equal? src-archive-linktree linktree)) - (rmt:update-tesdata-on-repilcate-db src-archive-linktree linktree)) - (debug:print-info 1 *default-log-port* "creating triggers after updating linktree") - (rmt:create-all-triggers) -)) - -(define (archive:ls->list bup-exe archive-dir internal-path) - (let ((cmd (conc bup-exe " -d " archive-dir " ls -l " internal-path "| awk '{print $6}' | sort")) - (res '())) - (debug:print-info 0 *default-log-port* cmd) - (handle-exceptions - exn - #f ;; anything goes wrong - assume the process in NOT running. - (with-input-from-pipe - cmd - (lambda () - (let* ((inl (read-lines))) - (reverse inl))))))) - -(define (time-string->seconds tstr ds-flag) - (let* ((atime (string->time tstr "%Y-%m-%d-%H%M%S"))) - (vector-set! atime 8 ds-flag) - (local-time->seconds atime))) - -(define (seconds->std-time-str sec) - (time->string - (seconds->local-time sec) - "%Y-%m-%d-%H%M%S")) - - -(define (archive:get-timestamp-dir bup-exe archive-dir testsuite-name target test-partial-path test-last-update) - (debug:print-info 0 *default-log-port* "Test last update time:" (seconds->std-time-str test-last-update)) - (let* ((internal-path (conc testsuite-name "-" target)) - (archive-update-delay (string->number (or (configf:lookup *configdat* "archive" "test-update-delay") "900" ))) - (ts-list (archive:ls->list bup-exe archive-dir internal-path)) - (ds-flag (vector-ref (seconds->local-time) 8))) - (let loop ((hed (car ts-list)) - (tail (cdr ts-list))) - (if (and (null? tail) (equal? hed "latest")) - #f - (if (and (not (null? tail)) (equal? hed "latest")) - (loop (car tail) (cdr tail)) - (let* ((archive-seconds (time-string->seconds hed ds-flag))) - (if (< (abs (- archive-seconds test-last-update)) archive-update-delay) - (let* ((test-list (archive:ls->list bup-exe archive-dir (conc internal-path "/" hed "/" test-partial-path)))) - (if (> (length test-list) 0) - hed - (if (not (null? tail)) - (loop (car tail) (cdr tail)) - #f))) - (if (null? tail) - #f - (loop (car tail) (cdr tail)))))))))) - -(define (archive:bup-restore archive-command run-id run-name tests rp-mutex bup-mutex) ;; move the getting of archive space down into the below block so that a single run can - ;; allocate as needed should a disk fill up - ;; - (let* ((bup-exe (or (configf:lookup *configdat* "archive" "bup") "bup")) - (linktree (common:get-linktree))) ;; (configf:lookup *configdat* "setup" "linktree"))) - - ;; from the test info bin the path to the test by stem - ;; - (for-each - (lambda (test-dat) - ;; When restoring test-dat will initially contain an old and invalid path to the test - (let* ((best-disk (get-best-disk *configdat* #f)) ;; BUG: get the testconfig and use it here. Otherwise data pulled out of archive could end up on the wrong kind of disk. - (item-path (db:test-get-item-path test-dat)) - (test-name (db:test-get-testname test-dat)) - (test-id (db:test-get-id test-dat)) - (run-id (db:test-get-run_id test-dat)) - (keyvals (rmt:get-key-val-pairs run-id)) - (target (string-intersperse (map cadr keyvals) "/")) - - (toplevel/children (and (db:test-get-is-toplevel test-dat) - (> (rmt:test-toplevel-num-items run-id test-name) 0))) - (test-partial-path (conc run-name "/" (db:test-make-full-name test-name item-path))) - ;; note the trailing slash to get the dir inspite of it being a link - (test-path (conc linktree "/" test-partial-path)) - ;; if the old path was not deleted then prev-test-physical-path will end up pointing to a real directory - (mutex-lock! rp-mutex) - (prev-test-physical-path (if (common:file-exists? test-path) - ;; (read-symbolic-link test-path #t) - (common:real-path test-path) - #f)) - (mutex-unlock! rp-mutex) - (new-test-physical-path (conc best-disk "/" test-partial-path)) - (archive-block-id (db:test-get-archived test-dat)) - (test-last-update (db:test-get-last_update test-dat)) - (archive-block-info (rmt:test-get-archive-block-info archive-block-id)) - (archive-path (if (vector? archive-block-info) - (vector-ref archive-block-info 2) ;; look in db.scm for test-get-archive-block-info for the vector record info - #f)) ;; no archive found? - (archive-timestamp-dir (if archive-path (archive:get-timestamp-dir bup-exe archive-path (common:get-testsuite-name) (string-substitute "/" "-" target " ") test-partial-path test-last-update) #f)) - (archive-internal-path (conc (common:get-testsuite-name) "-" (string-substitute "/" "-" target " ") "/" archive-timestamp-dir "/" test-partial-path)) - (include-paths (args:get-arg "-include")) - (exclude-pattern (args:get-arg "-exclude-rx")) - (exclude-file (args:get-arg "-exclude-rx-from"))) - (if (not archive-timestamp-dir) - (debug:print-error 0 *default-log-port* "Archive not found for testsuite" (common:get-testsuite-name) " run/test/itempath" test-partial-path) - (begin - ;; some sanity checks, move an existing path out of the way - iif it is not a toplevel with children - (debug:print-info 0 *default-log-port* "Archive time: " archive-timestamp-dir) - (if (and (not toplevel/children) ;; special handling needed for toplevel with children - prev-test-physical-path - (common:file-exists? prev-test-physical-path)) ;; what to do? abort or clean up or link it in? - (let* ((base (pathname-directory prev-test-physical-path)) - (dirn (pathname-file prev-test-physical-path)) - (newn (conc base "/." dirn))) - (debug:print-error 0 *default-log-port* "the old directory " prev-test-physical-path ", still exists! Moving it to " newn) - (rename-file prev-test-physical-path newn))) - - (if (and archive-path ;; no point in proceeding if there is no actual archive - (not toplevel/children)) - (begin - ;; CREATE WORK AREA - ;; test-src-path == #f ==> don't copy in data from tests directory - ;; itemdat == string ==> use directly - (create-work-area run-id run-name keyvals test-id #f best-disk test-name item-path) ;; #!key (remtries 2)) - ;; 1. Get the block id from the test info - ;; 2. Get the block data given the block id - ;; 3. Construct the paths etc. for the following command: - ;; bup -d /tmp/matt/adisk1/2015_q1/fullrun_e1a40/ restore -C /tmp/seeme fullrun-30/latest/ubuntu/nfs/none/w02.1.20.54_b/ - ;; DO BUP RESTORE - (let* ((new-test-dat (rmt:get-test-info-by-id run-id test-id)) - (new-test-path (if (vector? new-test-dat ) - (db:test-get-rundir new-test-dat) - (begin - (debug:print-error 0 *default-log-port* "unable to get data for run-id=" run-id ", test-id=" test-id) - (exit 1)))) - ;; new-test-path won't work - must use best-disk instead? Nope, new-test-path but tack on /.. - (bup-restore-params (list "-d" archive-path "restore" "-C" (conc new-test-path "/..") archive-internal-path))) - (debug:print-info 0 *default-log-port* "Restoring archived data to " new-test-physical-path " from archive in " archive-path " ... " archive-internal-path) - (debug:print-info 0 *default-log-port* bup-exe " " (string-join bup-restore-params " ")) - ;; (mutex-lock! bup-mutex) - (run-n-wait bup-exe params: bup-restore-params print-cmd: #f) - ;; (mutex-unlock! bup-mutex) - (mt:test-set-state-status-by-id run-id test-id "COMPLETED" #f #f))) - (debug:print-error 0 *default-log-port* "No archive path in the record for run-id=" run-id " test-id=" test-id)))))) - (filter vector? tests)))) - -(define (common:get-youngest-test tests) - (if (null? tests) - #f - (let ((res #f)) - (for-each - (lambda (test-dat) - (let ((event-time (db:test-get-event_time test-dat))) - (if (or (not res) - (> event-time (db:test-get-event_time res))) - (set! res test-dat)))) - tests) - res))) - -;; from an archive get a specific path - works ONLY with bup for now -;; -(define (archive:bup-get-data archive-command run-id-in run-name-in tests rp-mutex bup-mutex) - (if (null? tests) - (debug:print-info 0 *default-log-port* "get-data called with no matching tests to operate on.") - - (let* ((bup-exe (or (configf:lookup *configdat* "archive" "bup") "bup")) - (linktree (common:get-linktree)) ;; (configf:lookup *configdat* "setup" "linktree"))) - ;; (test-dat (common:get-youngest-test tests)) - (destpath (args:get-arg "-dest"))) - (cond - ((null? tests) - (debug:print-error 0 *default-log-port* - "No test matching provided target, runname pattern and test pattern found.")) - ((file-exists? destpath) - (debug:print-error 0 *default-log-port* - "Destination path alread exists! Please remove it before running get.")) - (else - (let loop ((rem-tests tests)) - (let* ((test-dat (common:get-youngest-test rem-tests)) - (item-path (db:test-get-item-path test-dat)) - (test-name (db:test-get-testname test-dat)) - (test-id (db:test-get-id test-dat)) - (run-id (db:test-get-run_id test-dat)) - (run-name (rmt:get-run-name-from-id run-id)) - (keyvals (rmt:get-key-val-pairs run-id)) - (target (string-intersperse (map cadr keyvals) "/")) - - (toplevel/children (and (db:test-get-is-toplevel test-dat) - (> (rmt:test-toplevel-num-items run-id test-name) 0))) - (test-partial-path (conc target "/" run-name "/" - (db:test-make-full-name test-name item-path))) - ;; note the trailing slash to get the dir inspite of it being a link - (test-path (conc linktree "/" test-partial-path)) - (archive-block-id (db:test-get-archived test-dat)) - (archive-block-info (rmt:test-get-archive-block-info archive-block-id)) - (archive-path (if (vector? archive-block-info) - (vector-ref archive-block-info 2) - #f)) - (archive-internal-path (conc (common:get-testsuite-name) "-" run-id - "/latest/" test-partial-path)) - (include-paths (args:get-arg "-include")) - (exclude-pattern (args:get-arg "-exclude-rx")) - (exclude-file (args:get-arg "-exclude-rx-from"))) - - (if (and archive-path ;; no point in proceeding if there is no actual archive - (not toplevel/children)) - (begin - (let* ((bup-restore-params (append (list "-d" archive-path "restore" "-C" (or destpath "data")) - ;; " " ;; What is the empty string for? - (if include-paths - (map (lambda (p) - (conc archive-internal-path "/" p)) - (string-split include-paths ",")) - (list archive-internal-path))))) - (debug:print-info 0 *default-log-port* "Restoring archived data to " (or destpath "data") - " from archive in " archive-path " ... " archive-internal-path) - (run-n-wait bup-exe params: bup-restore-params print-cmd: #t))) - (let ((new-rem-tests (filter (lambda (tdat) - (or (not (eq? (db:test-get-id tdat) test-id)) - (not (eq? (db:test-get-run_id tdat) run-id)))) - rem-tests) )) - (debug:print-info 0 *default-log-port* - "No archive path in the record for run-id=" run-id - " test-id=" test-id ", skipping.") - (if (null? new-rem-tests) - (begin - (debug:print-info 0 *default-log-port* "No archives found for " target "/" run-name "...") - #f) - (loop new-rem-tests))))))))))) - ADDED archivemod.scm Index: archivemod.scm ================================================================== --- /dev/null +++ archivemod.scm @@ -0,0 +1,441 @@ +;;====================================================================== +;; Copyright 2017, Matthew Welland. +;; +;; This file is part of Megatest. +;; +;; Megatest is free software: you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. +;; +;; Megatest is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with Megatest. If not, see . + +;;====================================================================== + +;;====================================================================== +;; Cpumod: +;; +;; Put things here don't fit anywhere else +;;====================================================================== + +(declare (unit archivemod)) + +(declare (uses debugprint)) +(declare (uses mtargs)) +(declare (uses commonmod)) +(declare (uses configfmod)) +(declare (uses fsmod)) +(declare (uses processmod)) +(declare (uses mtmod)) +(declare (uses dbmod)) +(declare (uses dbfile)) + +(use srfi-69) + +(module archivemod + * + +(import scheme) +(cond-expand + (chicken-4 + + (import chicken + ports + (prefix base64 base64:) + + (prefix sqlite3 sqlite3:) + data-structures + extras + files + matchable + md5 + message-digest + pathname-expand + posix + posix-extras + + debugprint + (prefix mtargs args:) + ) + (use srfi-69)) + (chicken-5 + (import (prefix sqlite3 sqlite3:) + ;; data-structures + ;; extras + ;; files + ;; posix + ;; posix-extras + chicken.base + chicken.condition + chicken.file + chicken.file.posix + chicken.io + chicken.pathname + chicken.port + chicken.process + chicken.process-context + chicken.process-context.posix + chicken.sort + chicken.string + chicken.time + chicken.time.posix + + matchable + md5 + message-digest + pathname-expand + regex + regex-case + srfi-1 + srfi-18 + srfi-69 + typed-records + system-information + + debugprint + + ))) +(import debugprint + commonmod + configfmod + fsmod + rmtmod + processmod + mtmod + dbmod + dbfile + (prefix mtargs args:) + + regex + regex-case + sparse-vectors + srfi-1 + srfi-13 + srfi-18 + srfi-69 + typed-records + z3 + ) + +(include "common_records.scm") +(include "db_records.scm") + +;;====================================================================== +;; +;;====================================================================== + +;; ;; NOT CURRENTLY USED +;; ;; +;; (define (archive:main linktree target runname testname itempath options) +;; (let ((testdir (conc linktree "/" target "/" runname "/" testname "/" itempatt)) +;; (flavor 'plain) ;; type of machine to run jobs on +;; (maxload 1.5) ;; max allowed load for this work +;; (adisks (archive:get-archive-disks))) +;; ;; get testdir size +;; ;; - hand off du to job mgr +;; (if (and (common:file-exists? testdir) +;; (file-is-writable? testdir)) +;; (let* ((dused (jobrunner:run-job +;; flavor ;; machine type +;; maxload ;; max allowed load +;; '() ;; prevars - environment vars to set for the job +;; common:get-disk-space-used ;; if a proc call it, if a string it is a unix command +;; (list testdir))) +;; (apath (archive:get-archive testname itempath dused))) +;; (jobrunner:run-job +;; flavor +;; maxload +;; '() +;; archive:run-bup +;; (list testdir apath)))))) + +;; Get archive disks from megatest.config +;; +(define (archive:get-archive-disks) + (let ((section (configf:get-section *configdat* "archive-disks"))) + (if section + section + '()))) + +;; look for the best candidate archive area, else create new +;; area +;; +(define (archive:get-archive testname itempath dused) + ;; look up in archive_allocations if there is a pre-used archive + ;; with adequate diskspace + ;; + (let* ((existing-blocks (rmt:archive-get-allocations testname itempath dused)) + (candidate-disks (map (lambda (block) + (list + (vector-ref block 1) ;; archive-area-name + (vector-ref block 2))) ;; disk-path + existing-blocks))) + (or (common:get-disk-with-most-free-space candidate-disks dused) + (archive:allocate-new-archive-block #f #f #f)))) ;; BROKEN. testname itempath)))) + +;; allocate a new archive area +;; +(define (archive:allocate-new-archive-block blockid-cache run-area-home testsuite-name dneeded target run-name test-name) + (let ((key (conc testsuite-name "/" target "/" run-name "/" test-name))) + (if (hash-table-exists? blockid-cache key) + (hash-table-ref blockid-cache key) + (let* ((pscript (configf:lookup *configdat* "archive" "pathscript")) + (pscript-cmd (conc pscript " " testsuite-name " " target " " run-name " " test-name)) + (apath (if pscript + (handle-exceptions + exn + (begin + (debug:print 0 *default-log-port* "ERROR: script \"" pscript-cmd "\" failed to run properly. exn=" exn) + (exit 1)) + (with-input-from-pipe + pscript-cmd + read-line)) + #f)) ;; this is the user-calculated archive path + (adisks (archive:get-archive-disks)) + (best-disk (common:get-disk-with-most-free-space adisks dneeded))) + (if best-disk + (let* ((bdisk-name (car best-disk)) + (bdisk-path (cdr best-disk)) + (area-key (substring (message-digest-string (md5-primitive) run-area-home) 0 5)) + (bdisk-id (rmt:archive-register-disk bdisk-name bdisk-path (get-df bdisk-path))) + (archive-name (if apath + apath + (let ((sec (current-seconds))) + (conc (time->string (seconds->local-time sec) "%Y") + "_q" (seconds->quarter sec) "/" + testsuite-name "_" area-key)))) + (archive-path (conc bdisk-path "/" archive-name)) + (block-id (rmt:archive-register-block-name bdisk-id archive-path))) + ;; (allocation-id (rmt:archive-allocate-testsuite/area-to-block block-id testsuite-name area-key))) + (if block-id ;; (and block-id allocation-id) + (let ((res (cons block-id archive-path))) + (hash-table-set! blockid-cache key res) + res) + (begin + (debug:print 0 *default-log-port* "WARNING: no disk found for " target ", " run-name ", " test-name ", archive-path=" archive-path) + #f))) + (begin + (debug:print 0 *default-log-port* "WARNING: no disk found for " target ", " run-name ", " test-name ) + #f)))))) ;; no best disk found + + +(define (archive:megatest-db target-patt run-patt) + (let* ((blockid-cache (make-hash-table)) + (tsname (common:get-testsuite-name)) + (min-space (string->number (or (configf:lookup *configdat* "archive" "minspace") "1000"))) + (bup-exe (or (configf:lookup *configdat* "archive" "bup") "bup")) + (compress (or (configf:lookup *configdat* "archive" "compress") "9")) + (archiver (let ((s (configf:lookup *configdat* "archive" "archiver"))) + (if s (string->symbol s) 'bup))) + (rsync-exe (or (configf:lookup *configdat* "archive" "rsync") "rsync")) + (print-prefix "Running: ") + (archive-info (archive:allocate-new-archive-block blockid-cache *toppath* tsname min-space target-patt run-patt "megatest-db")) + (archive-dir (if archive-info (cdr archive-info) #f)) + (archive-id (if archive-info (car archive-info) -1)) + (home-host #f) ;; FIXME! (server:choose-server *toppath* 'homehost)) + (archive-time (seconds->std-time-str (current-seconds))) + (archive-staging-db (conc *toppath* "/.db-snapshot/archive_" archive-time)) + (tmp-db-path (conc (dbfile:make-tmpdir-name *toppath* "") "/megatest.db")) + (dbfile (conc archive-staging-db "/megatest.db"))) + (create-directory archive-staging-db #t) + (let-values (((pid-val exit-status exit-code) (run-n-wait rsync-exe params: (list "-v" (conc (car home-host) ":"tmp-db-path) archive-staging-db) print-cmd: print-prefix))) + (if (eq? exit-code 0) + (case archiver + ((bup) ;; Archive using bup + (let* ((bup-init-params (list "-d" archive-dir "init")) + (bup-index-params (list "-d" archive-dir "index" archive-staging-db)) + (bup-save-params (list "-d" archive-dir "save" ;; (conc "--strip-path=" linktree) + (conc "-" compress) ;; or (conc "--compress=" compress) + "-n" (conc tsname "-megatest-db" ) + (conc "--strip-path=" archive-staging-db ) ;; if we push to the directory do we need this? + dbfile))) + (if (not (common:file-exists? (conc archive-dir "/HEAD"))) + (begin + ;; replace this with jobrunner stuff enventually + (debug:print-info 2 *default-log-port* "Init bup in " archive-dir) + (let-values (((pid-val exit-status exit-code)(run-n-wait bup-exe params: bup-init-params print-cmd: print-prefix))) + (if (not (eq? exit-code 0)) + (begin + (debug:print-error 0 *default-log-port* "There was an error initializing bup. Archive failed.") + (exit 1)))))) + (debug:print-info 2 *default-log-port* "Indexing data to be archived") + (let-values (((pid-val exit-status exit-code) (run-n-wait bup-exe params: bup-index-params print-cmd: print-prefix))) + (if (not (eq? exit-code 0)) + (begin + (debug:print-error 0 *default-log-port* "There was an error Indexing bup. Archive failed.") + (exit 1)))) + (debug:print-info 2 *default-log-port* "Archiving data with bup") + (let-values (((pid-val exit-status exit-code) (run-n-wait bup-exe params: bup-save-params print-cmd: print-prefix))) + (if (not (eq? exit-code 0)) + (begin + (debug:print-error 0 *default-log-port* "There was an error archiving data with bup. Archive failed.") + (exit 1)) + (debug:print-info 0 *default-log-port* "To restore megatest.db run megatest -archive replicacte-db -source archive-dir -time-stamp . Current timestamp: " (seconds->std-time-str (current-seconds))))))) + (else + (debug:print-info 0 *default-log-port* "No support for databse archiving with " archiver))) + (debug:print-error 0 *default-log-port* "There was an error rsyncing tmp database"))))) + +(define (archive:restore-db archive-path ts) + (let* ((bup-exe (or (configf:lookup *configdat* "archive" "bup") "bup")) + (archive-internal-path (conc (common:get-testsuite-name) "-megatest-db/" ts "/megatest.db" )) + (bup-restore-params (list "-d" archive-path "restore" "-C" *toppath* archive-internal-path))) + (debug:print-info 2 *default-log-port* "Restoring archived data to " *toppath* " from archive in " archive-path " ... " archive-internal-path) + (run-n-wait bup-exe params: bup-restore-params print-cmd: "Running:")) + (sleep 2) + (db:multi-db-sync + (db:setup) ;; (db:setup-db *dbstruct-dbs* *toppath* #f) + 'killservers + ;'dejunk + ;'adj-testids + 'old2new + ) + (debug:print-info 1 *default-log-port* "dropping triggers to update linktree") + (rmt:drop-all-triggers) + (let* ((linktree (common:get-linktree)) ;; (configf:lookup *configdat* "setup" "linktree"))) + (src-archive-linktree (rmt:get-var "src-archive-linktree"))) + (if (not (equal? src-archive-linktree linktree)) + (rmt:update-tesdata-on-repilcate-db src-archive-linktree linktree)) + (debug:print-info 1 *default-log-port* "creating triggers after updating linktree") + (rmt:create-all-triggers) +)) + +(define (archive:ls->list bup-exe archive-dir internal-path) + (let ((cmd (conc bup-exe " -d " archive-dir " ls -l " internal-path "| awk '{print $6}' | sort")) + (res '())) + (debug:print-info 0 *default-log-port* cmd) + (handle-exceptions + exn + #f ;; anything goes wrong - assume the process in NOT running. + (with-input-from-pipe + cmd + (lambda () + (let* ((inl (read-lines))) + (reverse inl))))))) + +(define (time-string->seconds tstr ds-flag) + (let* ((atime (string->time tstr "%Y-%m-%d-%H%M%S"))) + (vector-set! atime 8 ds-flag) + (local-time->seconds atime))) + +(define (seconds->std-time-str sec) + (time->string + (seconds->local-time sec) + "%Y-%m-%d-%H%M%S")) + + +(define (archive:get-timestamp-dir bup-exe archive-dir testsuite-name target test-partial-path test-last-update) + (debug:print-info 0 *default-log-port* "Test last update time:" (seconds->std-time-str test-last-update)) + (let* ((internal-path (conc testsuite-name "-" target)) + (archive-update-delay (string->number (or (configf:lookup *configdat* "archive" "test-update-delay") "900" ))) + (ts-list (archive:ls->list bup-exe archive-dir internal-path)) + (ds-flag (vector-ref (seconds->local-time) 8))) + (let loop ((hed (car ts-list)) + (tail (cdr ts-list))) + (if (and (null? tail) (equal? hed "latest")) + #f + (if (and (not (null? tail)) (equal? hed "latest")) + (loop (car tail) (cdr tail)) + (let* ((archive-seconds (time-string->seconds hed ds-flag))) + (if (< (abs (- archive-seconds test-last-update)) archive-update-delay) + (let* ((test-list (archive:ls->list bup-exe archive-dir (conc internal-path "/" hed "/" test-partial-path)))) + (if (> (length test-list) 0) + hed + (if (not (null? tail)) + (loop (car tail) (cdr tail)) + #f))) + (if (null? tail) + #f + (loop (car tail) (cdr tail)))))))))) + + +(define (common:get-youngest-test tests) + (if (null? tests) + #f + (let ((res #f)) + (for-each + (lambda (test-dat) + (let ((event-time (db:test-get-event_time test-dat))) + (if (or (not res) + (> event-time (db:test-get-event_time res))) + (set! res test-dat)))) + tests) + res))) + +;; from an archive get a specific path - works ONLY with bup for now +;; +(define (archive:bup-get-data archive-command run-id-in run-name-in tests rp-mutex bup-mutex) + (if (null? tests) + (debug:print-info 0 *default-log-port* "get-data called with no matching tests to operate on.") + + (let* ((bup-exe (or (configf:lookup *configdat* "archive" "bup") "bup")) + (linktree (common:get-linktree)) ;; (configf:lookup *configdat* "setup" "linktree"))) + ;; (test-dat (common:get-youngest-test tests)) + (destpath (args:get-arg "-dest"))) + (cond + ((null? tests) + (debug:print-error 0 *default-log-port* + "No test matching provided target, runname pattern and test pattern found.")) + ((file-exists? destpath) + (debug:print-error 0 *default-log-port* + "Destination path alread exists! Please remove it before running get.")) + (else + (let loop ((rem-tests tests)) + (let* ((test-dat (common:get-youngest-test rem-tests)) + (item-path (db:test-get-item-path test-dat)) + (test-name (db:test-get-testname test-dat)) + (test-id (db:test-get-id test-dat)) + (run-id (db:test-get-run_id test-dat)) + (run-name (rmt:get-run-name-from-id run-id)) + (keyvals (rmt:get-key-val-pairs run-id)) + (target (string-intersperse (map cadr keyvals) "/")) + + (toplevel/children (and (db:test-get-is-toplevel test-dat) + (> (rmt:test-toplevel-num-items run-id test-name) 0))) + (test-partial-path (conc target "/" run-name "/" + (db:test-make-full-name test-name item-path))) + ;; note the trailing slash to get the dir inspite of it being a link + (test-path (conc linktree "/" test-partial-path)) + (archive-block-id (db:test-get-archived test-dat)) + (archive-block-info (rmt:test-get-archive-block-info archive-block-id)) + (archive-path (if (vector? archive-block-info) + (vector-ref archive-block-info 2) + #f)) + (archive-internal-path (conc (common:get-testsuite-name) "-" run-id + "/latest/" test-partial-path)) + (include-paths (args:get-arg "-include")) + (exclude-pattern (args:get-arg "-exclude-rx")) + (exclude-file (args:get-arg "-exclude-rx-from"))) + + (if (and archive-path ;; no point in proceeding if there is no actual archive + (not toplevel/children)) + (begin + (let* ((bup-restore-params (append (list "-d" archive-path "restore" "-C" (or destpath "data")) + ;; " " ;; What is the empty string for? + (if include-paths + (map (lambda (p) + (conc archive-internal-path "/" p)) + (string-split include-paths ",")) + (list archive-internal-path))))) + (debug:print-info 0 *default-log-port* "Restoring archived data to " (or destpath "data") + " from archive in " archive-path " ... " archive-internal-path) + (run-n-wait bup-exe params: bup-restore-params print-cmd: #t))) + (let ((new-rem-tests (filter (lambda (tdat) + (or (not (eq? (db:test-get-id tdat) test-id)) + (not (eq? (db:test-get-run_id tdat) run-id)))) + rem-tests) )) + (debug:print-info 0 *default-log-port* + "No archive path in the record for run-id=" run-id + " test-id=" test-id ", skipping.") + (if (null? new-rem-tests) + (begin + (debug:print-info 0 *default-log-port* "No archives found for " target "/" run-name "...") + #f) + (loop new-rem-tests))))))))))) + +) Index: cgisetup/models/pgdb.scm ================================================================== --- cgisetup/models/pgdb.scm +++ cgisetup/models/pgdb.scm @@ -17,26 +17,32 @@ ;; along with Megatest. If not, see . ;;====================================================================== (declare (unit pgdb)) -(declare (uses configf)) +(declare (uses configfmod)) (declare (uses mtargs)) +(declare (uses debugprint)) ;; I don't know how to mix compilation units and modules, so no module here. ;; -;; (module pgdb -;; ( -;; open-pgdb -;; ) -;; -;; (import scheme) -;; (import data-structures) -;; (import chicken) - -(use typed-records (prefix dbi dbi:)) -(import (prefix mtargs args:)) +(module pgdb + * + +(import scheme) +(import data-structures) +(import chicken) + +(use typed-records + (prefix dbi dbi:) + srfi-69 + srfi-1 + ) +(import (prefix mtargs args:) + debugprint + configfmod + ) ;; given a configdat lookup the connection info and open the db ;; (define (pgdb:open configdat #!key (dbname #f)(dbispec #f)) (let ((pgconf (or dbispec @@ -654,5 +660,6 @@ ((> i tab2-pages ) lst) (else (loop (+ i 1) (append lst (list i))))))) +) Index: common.scm ================================================================== --- common.scm +++ common.scm @@ -45,2938 +45,13 @@ rmtmod (prefix mtargs args:)) (include "common_records.scm") -(define (remove-files filespec) - (let ((files (glob filespec))) - (for-each delete-file files))) - -(define (stop-the-train) - (thread-start! (make-thread (lambda () - (let loop () - (if (and *toppath* - (file-exists? (conc *toppath*"/stop-the-train"))) - (let* ((msg (conc "ERROR: found file "*toppath*"/stop-the-train, exiting immediately"))) - ;; yes, print to current-output-port AND *default-log-port*, annoying but necessary I think - (print msg) - (debug:print 0 *default-log-port* msg) - (remove-files (conc *toppath* "/logs/server*")) - (remove-files (conc *toppath* "/.servinfo/*")) - (remove-files (conc *toppath* "/.mtdb/*lock")) - (exit 1))) - (thread-sleep! 5) - (loop)))))) - -;; execute thunk, return value. If exception thrown, trap exception, return #f, and emit nonfatal condition note to *default-log-port* . -;; arguments - thunk, message -(define (common:fail-safe thunk warning-message-on-exception) - (handle-exceptions - exn - (begin - (debug:print-info 0 *default-log-port* "notable but nonfatal condition - "warning-message-on-exception", exn=" exn) - (debug:print-info 0 *default-log-port* - (string-substitute "\n?Error:" "nonfatal condition:" - (with-output-to-string - (lambda () - (print-error-message exn) )))) - (debug:print-info 0 *default-log-port* " -- continuing after nonfatal condition...") - #f) - (thunk))) - - -;; returns list of fd count, socket count -(define (get-file-descriptor-count #!key (pid (current-process-id ))) - (list - (length (glob (conc "/proc/" pid "/fd/*"))) - (length (filter identity (map socket? (glob (conc "/proc/" pid "/fd/*"))))) - ) -) - - - -;; GLOBALS - -;; CONTEXTS -(defstruct cxt - (taskdb #f) - (cmutex (make-mutex))) -;; (define *contexts* (make-hash-table)) -;; (define *context-mutex* (make-mutex)) - -;; ;; safe method for accessing a context given a toppath -;; ;; -;; (define (common:with-cxt toppath proc) -;; (mutex-lock! *context-mutex*) -;; (let ((cxt (hash-table-ref/default *contexts* toppath #f))) -;; (if (not cxt) -;; (set! cxt (let ((x (make-cxt)))(hash-table-set! *contexts* toppath x) x))) -;; (let ((cxt-mutex (cxt-mutex cxt))) -;; (mutex-unlock! *context-mutex*) -;; (mutex-lock! cxt-mutex) -;; (let ((res (proc cxt))) -;; (mutex-unlock! cxt-mutex) -;; res)))) - -;; A hash table that can be accessed by #{scheme ...} calls in -;; config files. Allows communicating between confgs -;; -(define *user-hash-data* (make-hash-table)) - -(define *db-keys* #f) - -(define *pkts-info* (make-hash-table)) ;; store stuff like the last parent here -(define *configinfo* #f) ;; raw results from setup, includes toppath and table from megatest.config -(define *runconfigdat* #f) ;; run configs data -(define *configdat* #f) ;; megatest.config data -(define *configstatus* #f) ;; status of data; 'fulldata : all processing done, #f : no data yet, 'partialdata : partial read done -;; (define *toppath* #f) ;; moved to commonmod -(define *already-seen-runconfig-info* #f) - -(define *test-meta-updated* (make-hash-table)) -(define *globalexitstatus* 0) ;; attempt to work around possible thread issues -(define *passnum* 0) ;; when running track calls to run-tests or similar -;; (define *alt-log-file* #f) ;; used by -log -;; (define *common:denoise* (make-hash-table)) ;; for low noise printing -(define *default-log-port* (current-error-port)) -(define *time-zero* (current-seconds)) ;; for the watchdog -(define *on-exit-procs* '()) ;; add procs to this list to be executed on exit -(define *default-area-tag* "local") - -;; DATABASE -;; db access -(define *db-last-access* (current-seconds)) ;; last db access, used in server -;; (define *db-write-access* #t) -;; db sync -;; (define *db-last-sync* 0) ;; last time the sync to megatest.db happened -(define *db-sync-in-progress* #f) ;; if there is a sync in progress do not try to start another -;; (define *db-multi-sync-mutex* (make-mutex)) ;; protect access to *db-sync-in-progress*, *db-last-sync* -;; task db -(define *task-db* #f) ;; (vector db path-to-db) -(define *db-access-allowed* #t) ;; flag to allow access -;; (define *db-access-mutex* (make-mutex)) ;; moved to dbfile -;; (define *db-transaction-mutex* (make-mutex)) -(define *db-cache-path* #f) -;; (define *db-with-db-mutex* (make-mutex)) -(define *db-api-call-time* (make-hash-table)) ;; hash of command => (list of times) - -;; SERVER -(define *transport-type* 'http) ;; override with [server] transport http|rpc|nmsg -(define *runremote* #f) ;; if set up for server communication this will hold -;; (define *max-cache-size* 0) -(define *logged-in-clients* (make-hash-table)) -(define *server-id* #f) -;; (define *server-info* #f) ;; good candidate for easily convert to non-global -(define *time-to-exit* #f) -(define *run-id* #f) -(define *server-kind-run* (make-hash-table)) -(define *home-host* #f) -;; (define *total-non-write-delay* 0) -(define *heartbeat-mutex* (make-mutex)) -;; (define *api-process-request-count* 0) -;; (define *max-api-process-requests* 0) -(define *server-overloaded* #f) - -;; client -(define *rmt-mutex* (make-mutex)) ;; remote access calls mutex - -;; RPC transport -(define *rpc:listener* #f) - -;; KEY info -(define *target* (make-hash-table)) ;; cache the target here; target is keyval1/keyval2/.../keyvalN -(define *keys* (make-hash-table)) ;; cache the keys here -(define *keyvals* (make-hash-table)) -(define *toptest-paths* (make-hash-table)) ;; cache toptest path settings here -(define *test-paths* (make-hash-table)) ;; cache test-id to test run paths here -(define *test-ids* (make-hash-table)) ;; cache run-id, testname, and item-path => test-id -(define *test-info* (make-hash-table)) ;; cache the test info records, update the state, status, run_duration etc. from testdat.db - -(define *run-info-cache* (make-hash-table)) ;; run info is stable, no need to reget -(define *launch-setup-mutex* (make-mutex)) ;; need to be able to call launch:setup often so mutex it and re-call the real deal only if *toppath* not set -(define *homehost-mutex* (make-mutex)) - -;; Miscellaneous -(define *triggers-mutex* (make-mutex)) ;; block overlapping processing of triggers -(define *numcpus-cache* (make-hash-table)) - -;; this plugs a hole in posix-extras in recent chicken versions > 4.9) -(let-values (( (chicken-release-number chicken-major-version) - (apply values - (map string->number - (take - (string-split (chicken-version) ".") - 2))))) - (let ((resolve-pathname-broken? - (or (> chicken-release-number 4) - (and (eq? 4 chicken-release-number) (> chicken-major-version 9))))) - (if resolve-pathname-broken? - (define ##sys#expand-home-path pathname-expand)))) - -(define (realpath x) (resolve-pathname (pathname-expand (or x "/dev/null")) )) - -(define (common:get-this-exe-fullpath #!key (argv (argv))) - (let* ((this-script - (cond - ((and (> (length argv) 2) - (string-match "^(.*/csi|csi)$" (car argv)) - (string-match "^-(s|ss|sx|script)$" (cadr argv))) - (caddr argv)) - (else (car argv)))) - (fullpath (realpath this-script))) - fullpath)) - -;;====================================================================== - -(define *common:this-exe-fullpath* (common:get-this-exe-fullpath)) -(define *common:this-exe-dir* (pathname-directory *common:this-exe-fullpath*)) -(define *common:this-exe-name* (pathname-strip-directory *common:this-exe-fullpath*)) - -(define (common:get-sync-lock-filepath) - (let* ((tmp-area (common:make-tmpdir-name *toppath* "")) - (lockfile (conc tmp-area "/megatest.db.lock"))) - lockfile)) - -(define *common:logpro-exit-code->status-sym-alist* - '( ( 0 . pass ) - ( 1 . fail ) - ( 2 . warn ) - ( 3 . check ) - ( 4 . waived ) - ( 5 . abort ) - ( 6 . skip ))) - -(define (common:logpro-exit-code->status-sym exit-code) - (or (alist-ref exit-code *common:logpro-exit-code->status-sym-alist*) 'fail)) - -(define (common:worse-status-sym ss1 ss2) - (let loop ((status-syms-remaining '(abort fail check skip warn waived pass))) - (cond - ((null? status-syms-remaining) - 'fail) - ((eq? (car status-syms-remaining) ss1) - ss1) - ((eq? (car status-syms-remaining) ss2) - ss2) - (else - (loop (cdr status-syms-remaining)))))) - -(define (common:steps-can-proceed-given-status-sym status-sym) - (if (member status-sym '(warn waived pass)) - #t - #f)) - -(define (status-sym->string status-sym) - (case status-sym - ((pass) "PASS") - ((fail) "FAIL") - ((warn) "WARN") - ((check) "CHECK") - ((waived) "WAIVED") - ((abort) "ABORT") - ((skip) "SKIP") - (else "FAIL"))) - -(define (common:logpro-exit-code->test-status exit-code) - (status-sym->string (common:logpro-exit-code->status-sym exit-code))) - -;; -(defstruct remote - - ;; transport to be used - ;; http - use http-transport - ;; http-read-cached - use http-transport for writes but in-mem cached for reads - (rmode 'http) - (hh-dat (let ((res (or (server:choose-server *toppath* 'homehost) - (cons #f #f)))) - (assert (pair? res)(conc "FATAL: hh-dat should be a pair, got "res)) - res)) - (server-url #f) ;; (server:check-if-running *toppath*) #f)) - (server-id #f) - (server-info #f) ;; (if *toppath* (server:check-if-running *toppath*) #f)) - (last-server-check 0) ;; last time we checked to see if the server was alive - (connect-time (current-seconds)) ;; when we first connected - (last-access (current-seconds)) ;; last time we talked to server - ;; (conndat #f) ;; iface port api-uri api-url api-req seconds server-id - (server-timeout (server:expiration-timeout)) - (force-server #f) - (ro-mode #f) - (ro-mode-checked #f) ;; flag that indicates we have checked for ro-mode - - ;; conndat stuff - (iface #f) ;; TODO: Consolidate this data with server-url and server-info above - (port #f) - (api-url #f) - (api-uri #f) - (api-req #f)) - -;; launching and hosts -(defstruct host - (reachable #f) - (last-update 0) - (last-used 0) - (last-cpuload 1)) - -(define *host-loads* (make-hash-table)) - -;; cache environment vars for each run here -(define *env-vars-by-run-id* (make-hash-table)) - -;; Testconfig and runconfig caches. -(define *testconfigs* (make-hash-table)) ;; test-name => testconfig -(define *runconfigs* (make-hash-table)) ;; target => runconfig - -;; This is a cache of pre-reqs met, don't re-calc in cases where called with same params less than -;; five seconds ago -(define *pre-reqs-met-cache* (make-hash-table)) - -;; cache of verbosity given string -;; -(define *verbosity-cache* (make-hash-table)) - -(define (common:clear-caches) - (set! *target* (make-hash-table)) - (set! *keys* (make-hash-table)) - (set! *keyvals* (make-hash-table)) - (set! *toptest-paths* (make-hash-table)) - (set! *test-paths* (make-hash-table)) - (set! *test-ids* (make-hash-table)) - (set! *test-info* (make-hash-table)) - (set! *run-info-cache* (make-hash-table)) - (set! *env-vars-by-run-id* (make-hash-table)) - (set! *test-id-cache* (make-hash-table))) - -;; Generic string database -(define sdb:qry #f) ;; (make-sdb:qry)) ;; 'init #f) -;; Generic path database -(define *fdb* #f) - -(define *last-launch* (current-seconds)) ;; use for throttling the launch rate. Would be better to use the db and last time of a test in LAUNCHED state. - -;;====================================================================== -;; V E R S I O N -;;====================================================================== - -(define (common:get-full-version) - (conc megatest-version "-" megatest-fossil-hash)) - -(define (common:version-signature) - (conc megatest-version "-" (substring megatest-fossil-hash 0 4))) - -;;====================================================================== -;; from metadat lookup MEGATEST_VERSION -;; -(define (common:get-last-run-version) ;; RADT => How does this work in send-receive function??; assume it is the value saved in some DB - (rmt:get-var "MEGATEST_VERSION")) - -(define (common:get-last-run-version-number) - (string->number - (substring (common:get-last-run-version) 0 6))) - -(define (common:set-last-run-version) - (rmt:set-var "MEGATEST_VERSION" (common:version-signature))) - -;;====================================================================== -;; postive number if megatest version > db version -;; negative number if megatest version < db version -(define (common:version-db-delta) - (- megatest-version (common:get-last-run-version-number))) - -(define (common:version-changed?) - (not (equal? (common:get-last-run-version) - (common:version-signature)))) - - -;; From 1.70 to 1.80, db's are compatible. - -(define (common:api-changed?) - (let* ( - (megatest-major-version (substring (->string megatest-version) 0 4)) - (run-major-version (substring (conc (common:get-last-run-version)) 0 4)) - ) - (and (not (equal? megatest-major-version "1.80")) - (not (equal? megatest-major-version megatest-run-version))) - ) -) - -;;====================================================================== -;; Move me elsewhere ... -;; RADT => Why do we meed the version check here, this is called only if version misma -;; -(define (common:cleanup-db dbstruct #!key (full #f)) - (case (rmt:transport-mode) - ((http) - (apply db:multi-db-sync - dbstruct - 'schema - 'killservers - 'adj-target - 'new2old - '(dejunk) - )) - ((tcp nfs) - (apply db:multi-db-sync - dbstruct - 'schema - 'killservers - 'adj-target - 'new2old - '(dejunk) - ))) - (if (common:api-changed?) - (common:set-last-run-version))) - -(define (common:snapshot-file filepath #!key (subdir ".") ) - (if (file-exists? filepath) - (let* ((age-sec (lambda (file) - (if (file-exists? file) - (- (current-seconds) (file-modification-time file)) - 1000000000))) ;; return really old value if file doesn't exist. we want to clobber it if old or not exist. - (ok-flag #t) - (age-mins (lambda (file) (/ (age-sec file) 60))) - (age-hrs (lambda (file) (/ (age-mins file) 60))) - (age-days (lambda (file) (/ (age-hrs file) 24))) - (age-wks (lambda (file) (/ (age-days file) 7))) - (docmd (lambda (cmd) - (cond - (ok-flag - (let ((res (system cmd))) - (cond - ((eq? 0 res) - #t) - (else - (set! ok-flag #f) - (debug:print 0 *default-log-port* "ERROR: ["(common:human-time)"] Command failed with exit code " - (if (< res 0) - res - (/ res 8)) " ["cmd"]" ) - #f)))) - (else - (debug:print 0 *default-log-port* "ERROR: ["(common:human-time)"] Not runnining command due to prior error. ["cmd"]") - #f)))) - (copy (lambda (src dest) (docmd (conc "/bin/cp '"src"' '"dest"'")))) - (copy+zip (lambda (src dest) (docmd (conc "gzip -c - < '"src"' > '"dest"'")))) - (fullpath (realpath filepath)) - (basedir (pathname-directory fullpath)) - (basefile (pathname-strip-directory fullpath)) - ;;(prevfile (conc filepath ".prev.gz")) - (minsfile (conc basedir "/" subdir "/" basefile ".mins.gz")) - (hrsfile (conc basedir "/" subdir "/" basefile ".hrs.gz")) - (daysfile (conc basedir "/" subdir "/" basefile ".days.gz")) - (wksfile (conc basedir "/" subdir "/" basefile ".weeks.gz"))) - - ;; create subdir it not exists - (if (not (directory-exists? (conc basedir "/" subdir))) - (docmd (conc "/bin/mkdir -p '"(conc basedir "/" subdir)"'"))) - - ;; copy&zip to .mins if not exists - (if (not (file-exists? minsfile)) - (copy+zip filepath minsfile)) - ;; copy .mins to .hrs if not exists - (if (not (file-exists? hrsfile)) - (copy minsfile hrsfile)) - ;; copy .hrs to .days if not exists - (if (not (file-exists? daysfile)) - (copy hrsfile daysfile)) - ;; copy .days to .weeks if not exists - (if (not (file-exists? wksfile)) - (copy daysfile wksfile)) - - - ;; if age(.mins.gz) >= 1h: - ;; copy .mins.gz .hrs.gz - ;; copy .mins.gz - (when (>= (age-mins minsfile) 1) - (copy minsfile hrsfile) - (copy+zip filepath minsfile)) - - ;; if age(.hrs.gz) >= 1d: - ;; copy .hrs.gz .days.gz - ;; copy .mins.gz .hrs.gz - (when (>= (age-days hrsfile) 1) - (copy hrsfile daysfile) - (copy minsfile hrsfile)) - - ;; if age(.days.gz) >= 1w: - ;; copy .days.gz .weeks.gz - ;; copy .hrs.gz .days.gz - (when (>= (age-wks daysfile) 1) - (copy daysfile wksfile) - (copy hrsfile daysfile)) - #t) - #f)) - -;;====================================================================== -;; Rotate logs, logic: -;; if > 500k and older than 1 week: -;; remove previous compressed log and compress this log -;; WARNING: This proc operates assuming that it is in the directory above the -;; logs directory you wish to log-rotate. -;; -(define (common:rotate-logs) - (let* ((all-files (make-hash-table)) - (stats (make-hash-table)) - (inc-stat (lambda (key) - (hash-table-set! stats key (+ (hash-table-ref/default stats key 0) 1)))) - (max-allowed (string->number (or (configf:lookup *configdat* "setup" "max-logfiles") "600")))) ;; name -> age - (if (not (directory-exists? "logs"))(create-directory "logs")) - (directory-fold - (lambda (file rem) - (handle-exceptions - exn - (begin - (debug:print-info 2 *default-log-port* "unable to rotate log " file ", probably handled by another process, this is safe to ignore. exn=" exn) - (debug:print 2 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) - ;; (print-call-chain (current-error-port)) ;; - ) - (let* ((fullname (conc "logs/" file)) - (mod-time (file-modification-time fullname)) - (file-age (- (current-seconds) mod-time)) - (file-old (> file-age (* 48 60 60))) - (file-big (> (file-size fullname) 200000))) - (hash-table-set! all-files file mod-time) - (if (or (and (string-match "^.*.log" file) - file-old - file-big) - (and (string-match "^server-.*.log" file) - file-old)) - (let ((gzfile (conc fullname ".gz"))) - (if (common:file-exists? gzfile) - (begin - (debug:print-info 0 *default-log-port* "removing " gzfile) - (delete-file* gzfile) - (hash-table-delete! all-files gzfile) ;; needed? - )) - (debug:print-info 0 *default-log-port* "compressing " file) - (system (conc "gzip " fullname)) - (inc-stat "gzipped") - (hash-table-set! all-files (conc file ".gz") file-age) ;; add the .gz file and remove the base file - (hash-table-delete! all-files file) - ) - (if (and (> file-age (* (string->number (or (configf:lookup *configdat* "setup" "log-expire-days") "30")) 24 3600)) - (file-exists? fullname)) ;; just in case it was gzipped - will get it next time - (handle-exceptions - exn - #f - (if (directory? fullname) - (begin - (debug:print-info 0 *default-log-port* fullname " in logs directory is a directory! Cannot rotate it, it is best to not put subdirectories in the logs dir.") - (inc-stat "directories")) - (begin - (delete-file* fullname) - (inc-stat "deleted"))) - (hash-table-delete! all-files file))))))) - '() - "logs") - (for-each - (lambda (category) - (let ((quant (hash-table-ref/default stats category 0))) - (if (> quant 0) - (debug:print-info 0 *default-log-port* category " log files: " quant)))) - `("deleted" "gzipped" "directories")) - (let ((num-logs (hash-table-size all-files))) - (if (> num-logs max-allowed) ;; because NFS => don't let number of logs exceed 300 - (let ((files (take (sort (hash-table-keys all-files) - (lambda (a b) - (< (hash-table-ref all-files a)(hash-table-ref all-files b)))) - (- num-logs max-allowed)))) - (for-each - (lambda (file) - (let* ((fullname (conc "logs/" file))) - (if (directory? fullname) - (debug:print-info 0 *default-log-port* fullname " in logs directory is a directory! Cannot rotate it, it is best to not put subdirectories in the logs dir.") - (handle-exceptions - exn - (debug:print-error 0 *default-log-port* "failed to remove " fullname ", exn=" exn) - (delete-file* fullname))))) - files) - (debug:print-info 0 *default-log-port* "Deleted " (length files) " files from logs, keeping " max-allowed " files.")))))) - -;;====================================================================== -;; Force a megatest cleanup-db if version is changed and skip-version-check not specified -;; Do NOT check if not on homehost! -;; -(define (common:exit-on-version-changed) - (if (and *toppath* ;; do nothing if *toppath* not yet provided - (common:on-homehost?)) - (if (common:api-changed?) - (let* ((mtconf (conc (get-environment-variable "MT_RUN_AREA_HOME") "/megatest.config")) - (dbfile (conc (get-environment-variable "MT_RUN_AREA_HOME") ".mtdb/main.db")) - (read-only (not (file-write-access? dbfile))) - (dbstruct (db:setup))) ;; (db:setup-db *dbstruct-dbs* *toppath* #f))) ;; #t))) - (debug:print 0 *default-log-port* - "WARNING: Version mismatch!\n" - " expected: " (common:version-signature) "\n" - " got: " (common:get-last-run-version)) - (cond - ((get-environment-variable "MT_SKIP_DB_MIGRATE") #t) - ((and (common:file-exists? mtconf) (common:file-exists? dbfile) (not read-only) - (eq? (current-user-id)(file-owner mtconf))) ;; safe to run -cleanup-db - (debug:print 0 *default-log-port* " I see you are the owner of megatest.config, attempting to cleanup and reset to new version") - (handle-exceptions - exn - (begin - (debug:print 0 *default-log-port* "Failed to switch versions. exn=" exn) - (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) - (print-call-chain (current-error-port)) - (exit 1)) - (common:cleanup-db dbstruct))) - ((not (common:file-exists? mtconf)) - (debug:print 0 *default-log-port* " megatest.config does not exist in this area. Cannot proceed with megatest version migration.") - (exit 1)) - ((not (common:file-exists? dbfile)) - (debug:print 0 *default-log-port* " .mtdb/main.db does not exist in this area. Cannot proceed with megatest version migration.") - (exit 1)) - ((not (eq? (current-user-id)(file-owner mtconf))) - (debug:print 0 *default-log-port* " You do not own .mtdb/main.db in this area. Cannot proceed with megatest version migration.") - (exit 1)) - (read-only - (debug:print 0 *default-log-port* " You have read-only access to this area. Cannot proceed with megatest version migration.") - (exit 1)) - (else - (debug:print 0 *default-log-port* " to switch versions you can run: \"megatest -cleanup-db\"") - (exit 1))))))) -;;====================================================================== -;; (begin -;; (debug:print 0 *default-log-port* "ERROR: cannot migrate version unless on homehost. Exiting.") -;; (exit 1)))) - -;;====================================================================== -;; S P A R S E A R R A Y S -;;====================================================================== - -(define (make-sparse-array) - (let ((a (make-sparse-vector))) - (sparse-vector-set! a 0 (make-sparse-vector)) - a)) - -(define (sparse-array? a) - (and (sparse-vector? a) - (sparse-vector? (sparse-vector-ref a 0)))) - -(define (sparse-array-ref a x y) - (let ((row (sparse-vector-ref a x))) - (if row - (sparse-vector-ref row y) - #f))) - -(define (sparse-array-set! a x y val) - (let ((row (sparse-vector-ref a x))) - (if row - (sparse-vector-set! row y val) - (let ((new-row (make-sparse-vector))) - (sparse-vector-set! a x new-row) - (sparse-vector-set! new-row y val))))) - -;;====================================================================== -;; L O C K E R S A N D B L O C K E R S -;;====================================================================== - -;; block further accesses to databases. Call this before shutting db down -(define (common:db-block-further-queries) - (mutex-lock! *db-access-mutex*) - (set! *db-access-allowed* #f) - (mutex-unlock! *db-access-mutex*)) - -(define (common:db-access-allowed?) - (let ((val (begin - (mutex-lock! *db-access-mutex*) - *db-access-allowed* - (mutex-unlock! *db-access-mutex*)))) - val)) - -;;====================================================================== -;; U S E F U L S T U F F -;;====================================================================== - -;; convert things to an alist or assoc list, #f gets converted to "" -;; -(define (common:to-alist dat) - (cond - ((list? dat) (map common:to-alist dat)) - ((vector? dat) - (map common:to-alist (vector->list dat))) - ((pair? dat) - (cons (common:to-alist (car dat)) - (common:to-alist (cdr dat)))) - ((hash-table? dat) - (map common:to-alist (hash-table->alist dat))) - (else - (if dat - dat - "")))) - -(define (common:alist-ref/default key alist default) - (or (alist-ref key alist) default)) - -;; moved into commonmod -;; -;; (define (common:low-noise-print waitval . keys) -;; (let* ((key (string-intersperse (map conc keys) "-" )) -;; (lasttime (hash-table-ref/default *common:denoise* key 0)) -;; (currtime (current-seconds))) -;; (if (> (- currtime lasttime) waitval) -;; (begin -;; (hash-table-set! *common:denoise* key currtime) -;; #t) -;; #f))) - -(define (common:read-encoded-string instr) - (handle-exceptions - exn - (handle-exceptions - exn - (begin - (debug:print-error 0 *default-log-port* "received bad encoded string \"" instr "\", message: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn) - (print-call-chain (current-error-port)) - #f) - (read (open-input-string (base64:base64-decode instr)))) - (read (open-input-string (z3:decode-buffer (base64:base64-decode instr)))))) - - -;;====================================================================== -;; S T A T E S A N D S T A T U S E S -;;====================================================================== - -;; BBnote: *common:std-states* - dashboard filter control and test control state buttons defined here; used in set-fields-panel and dboard:make-controls -(define *common:std-states* ;; for toggle buttons in dashboard - '( - (0 "ARCHIVED") - (1 "STUCK") - (2 "KILLREQ") - (3 "KILLED") - (4 "NOT_STARTED") - (5 "COMPLETED") - (6 "LAUNCHED") - (7 "REMOTEHOSTSTART") - (8 "RUNNING") - )) - -(define *common:dont-roll-up-states* - '("DELETED" - "REMOVING" - "CLEANING" - "ARCHIVE_REMOVING" - )) - -;;====================================================================== -;; BBnote: *common:std-statuses* dashboard filter control and test control status buttons defined here; used in set-fields-panel and dboard:make-controls -;; note these statuses are sorted from better to worse. -;; This sort order is important to dcommon:status-compare3 and db:set-state-status-and-roll-up-items -(define *common:std-statuses* - '(;; (0 "DELETED") - (1 "n/a") - (2 "PASS") - (3 "SKIP") - (4 "WARN") - (5 "WAIVED") - (6 "CHECK") - (7 "STUCK/DEAD") - (8 "DEAD") - (9 "FAIL") - (10 "PREQ_FAIL") - (11 "PREQ_DISCARDED") - (12 "ABORT"))) - -(define *common:ended-states* ;; states which indicate the test is stopped and will not proceed - '("COMPLETED" "ARCHIVED" "KILLED" "KILLREQ" "STUCK" "INCOMPLETE" )) - -(define *common:badly-ended-states* ;; these roll up as CHECK, i.e. results need to be checked - '("KILLED" "KILLREQ" "STUCK" "INCOMPLETE" "DEAD" "CHECK")) - -(define *common:well-ended-states* ;; an item's prereq in this state allows item to proceed - '("PASS" "WARN" "WAIVED" "SKIP")) - -;; BBnote: *common:running-states* used from db:set-state-status-and-roll-up-items -(define *common:running-states* ;; test is either running or can be run - '("RUNNING" "REMOTEHOSTSTART" "LAUNCHED" "STARTED")) - -(define *common:cant-run-states* ;; These are stopping conditions that prevent a test from being run - '("COMPLETED" "KILLED" "UNKNOWN" "INCOMPLETE" "ARCHIVED")) - -(define *common:not-started-ok-statuses* ;; if not one of these statuses when in not_started state treat as dead - '("n/a" "na" "PASS" "FAIL" "WARN" "CHECK" "WAIVED" "DEAD" "SKIP")) - -;;====================================================================== -;; group tests into buckets corresponding to rollup -;;; Running, completed-pass, completed-non-pass + worst status, not started. -;; filter out -;(define (common:categorize-items-for-rollup in-tests) -; ( - -(define (common:special-sort items order comp) - (let ((items-order (map reverse order)) - (acomp (or comp >))) - (sort items - (lambda (a b) - (let ((a-num (cadr (or (assoc a items-order) '(0 0)))) - (b-num (cadr (or (assoc b items-order) '(0 0))))) - (acomp a-num b-num)))))) - -;;====================================================================== -;; ;; given a toplevel with currstate, currstatus apply state and status -;; ;; => (newstate . newstatus) -;; (define (common:apply-state-status currstate currstatus state status) -;; (let* ((cstate (string->symbol (string-downcase currstate))) -;; (cstatus (string->symbol (string-downcase currstatus))) -;; (sstate (string->symbol (string-downcase state))) -;; (sstatus (string->symbol (string-downcase status))) -;; (nstate #f) -;; (nstatus #f)) -;; (set! nstate -;; (case cstate -;; ((completed not_started killed killreq stuck archived) -;; (case sstate ;; completed -> sstate -;; ((completed killed killreq stuck archived) completed) -;; ((running remotehoststart launched) running) -;; (else unknown-error-1))) -;; ((running remotehoststart launched) -;; (case sstate -;; ((completed killed killreq stuck archived) #f) ;; need to look at all items -;; ((running remotehoststart launched) running) -;; (else unknown-error-2))) -;; (else unknown-error-3))) -;; (set! nstatus -;; (case sstatus -;; ((pass) -;; (case nstate -;; ((pass n/a deleted) pass) -;; ((warn) warn) -;; ((fail) fail) -;; ((check) check) -;; ((waived) waived) -;; ((skip) skip) -;; ((stuck/dead) stuck) -;; ((abort) abort) -;; (else unknown-error-4))) -;; ((warn) -;; (case nstate -;; ((pass warn n/a skip deleted) warn) -;; ((fail) fail) -;; ((check) check) -;; ((waived) waived) -;; ((stuck/dead) stuck) -;; (else unknown-error-5))) -;; ((fail) -;; (case nstate -;; ((pass warn fail check n/a waived skip deleted stuck/dead stuck) fail) -;; ((abort) abort) -;; (else unknown-error-6))) -;; (else unknown-error-7))) -;; (cons -;; (if nstate (symbol->string nstate) nstate) -;; (if nstatus (symbol->string nstatus) nstatus)))) - -;;====================================================================== -;; D E B U G G I N G S T U F F -;;====================================================================== - -(define *verbosity* 1) -(define *logging* #f) - -(define (get-with-default val default) - (let ((val (args:get-arg val))) - (if val val default))) - -(define (assoc/default key lst . default) - (let ((res (assoc key lst))) - (if res (cadr res)(if (null? default) #f (car default))))) - -(define (common:get-testsuite-name) - (or (configf:lookup *configdat* "setup" "area-name") ;; megatest is a flexible tool, testsuite is too limiting a description. - (configf:lookup *configdat* "setup" "testsuite" ) - (getenv "MT_TESTSUITE_NAME") - (pathname-file (or (if (string? *toppath* ) - (pathname-file *toppath*) - #f) - (common:get-toppath #f))) - "please-set-setup-area-name")) ;; (pathname-file (current-directory))))) - -;;====================================================================== -;; safe getting of toppath -(define (common:get-toppath areapath) - (or *toppath* - (if areapath - (begin - (set! *toppath* areapath) - (setenv "MT_RUN_AREA_HOME" areapath) - areapath) - #f) - (if (getenv "MT_RUN_AREA_HOME") - (begin - (set! *toppath* (getenv "MT_RUN_AREA_HOME")) - *toppath*) - #f) - ;; last resort, look for megatest.config - (let loop ((thepath (realpath "."))) - (if (file-exists? (conc thepath "/megatest.config")) - thepath - (if (equal? thepath "/") - (begin - (debug:print-error 0 *default-log-port* "Unable to find megatest home directory.") - #f) - (loop (pathname-directory thepath))))) - )) - - -;;====================================================================== -;; redefine for future cleanup (converge on area-name, the more generic -;; -(define common:get-area-name common:get-testsuite-name) - -(define (common:get-db-tmp-area . junk) - (if *db-cache-path* - *db-cache-path* - (if *toppath* ;; common:get-create-writeable-dir - (handle-exceptions - exn - (begin - (debug:print-error 0 *default-log-port* "Couldn't create path to " *db-cache-path* ", exn=" exn) - (exit 1)) - (let* ((toppath (common:real-path *toppath*)) - (tsname (common:get-testsuite-name)) - (dbpath (common:get-create-writeable-dir - (list (conc "/tmp/" (current-user-name) - "/megatest_localdb/" - tsname "/" - (string-translate toppath "/" ".")) - (conc "/tmp/" (current-process-id) ;; just in case we have an issue with the dir by own user name - "/"(current-user-name) "/megatest_localdb/" - tsname - (string-translate toppath "/" ".")) - )))) - (set! *db-cache-path* dbpath) - ;; ensure megatest area has .mtdb - (let ((dbarea (conc *toppath* "/.mtdb"))) - (if (not (file-exists? dbarea)) - (create-directory dbarea))) - ;; ensure tmp area has .mtdb - (let ((dbarea (conc dbpath "/.mtdb"))) - (if (not (file-exists? dbarea)) - (create-directory dbarea))) - dbpath)) - #f))) - -(define (common:get-area-path-signature) - (message-digest-string (md5-primitive) *toppath*)) - -;;====================================================================== -;; E X I T H A N D L I N G -;;====================================================================== - -(define (common:run-sync?) - (and *toppath* ;; gate if called before *toppath* is set - (common:on-homehost?) - (args:get-arg "-server"))) - - -(define (std-signal-handler signum) - ;; (signal-mask! signum) - (set! *time-to-exit* #t) - ;;(debug:print-info 13 *default-log-port* "got signal "signum) - (debug:print-error 0 *default-log-port* "Received signal " signum " aaa exiting promptly") - ;; (std-exit-procedure) ;; shouldn't need this since we are exiting and it will be called anyway - (exit)) - -(define (special-signal-handler signum) - ;; (signal-mask! signum) - (set! *time-to-exit* #t) - ;;(debug:print-info 13 *default-log-port* "got signal "signum) - (debug:print-error 0 *default-log-port* "Received signal " signum " sending email befor exiting!!") - ;;TODO send email to notify admin contact listed in the config that the lisner got killed - ;; (std-exit-procedure) ;; shouldn't need this since we are exiting and it will be called anyway - (exit)) - - -(set-signal-handler! signal/int std-signal-handler) ;; ^C -(set-signal-handler! signal/term std-signal-handler) - -;; (set-signal-handler! signal/stop std-signal-handler) ;; ^Z NO, do NOT handle ^Z! - -;;====================================================================== -;; M I S C U T I L S -;;====================================================================== - -;;====================================================================== -;; convert stuff to a number if possible -(define (any->number val) - (cond - ((number? val) val) - ((string? val) (string->number val)) - ((symbol? val) (any->number (symbol->string val))) - (else #f))) - -(define (any->number-if-possible val) - (let ((num (any->number val))) - (if num num val))) - -(define (patt-list-match item patts) - (debug:print-info 8 *default-log-port* "patt-list-match item=" item " patts=" patts) - (if (and item patts) ;; here we are filtering for matches with item patterns - (let ((res #f)) ;; look through all the item-patts if defined, format is patt1,patt2,patt3 ... wildcard is % - (for-each - (lambda (patt) - (let ((modpatt (string-substitute "%" ".*" patt #t))) - (debug:print-info 10 *default-log-port* "patt " patt " modpatt " modpatt) - (if (string-match (regexp modpatt) item) - (set! res #t)))) - (string-split patts ",")) - res) - #t)) - -;;====================================================================== -;; '(print (string-intersperse (map cadr (hash-table-ref/default (read-config "megatest.config" \#f \#t) "disks" '"'"'("none" ""))) "\n"))' -(define (common:get-disks #!key (configf #f)) - (hash-table-ref/default - (or configf (read-config "megatest.config" #f #t)) - "disks" '("none" ""))) - -(define (common:get-install-area) - (let ((exe-path (car (argv)))) - (if (common:file-exists? exe-path) - (handle-exceptions - exn - #f - (pathname-directory - (pathname-directory - (pathname-directory exe-path)))) - #f))) - -;;====================================================================== -;; return first path that can be created or already exists and is writable -;; -(define (common:get-create-writeable-dir dirs) - (if (null? dirs) - #f - (let loop ((hed (car dirs)) - (tal (cdr dirs))) - (let ((res (or (and (directory? hed) - (file-write-access? hed) - hed) - (handle-exceptions - exn - (begin - (debug:print-info 0 *default-log-port* "could not create " hed - ", this might cause problems down the road. exn=" exn) - #f) - (create-directory hed #t))))) - (if (and (string? res) - (directory? res)) - res - (if (null? tal) - #f - (loop (car tal)(cdr tal)))))))) - -;;====================================================================== -;; return the youngest timestamp . filename -;; -(define (common:get-youngest glob-list) - (let ((all-files (apply append - (map (lambda (patt) - (handle-exceptions - exn - '() - (glob patt))) - glob-list)))) - (fold (lambda (fname res) - (let ((last-mod (car res)) - (curmod (handle-exceptions - exn - 0 - (file-modification-time fname)))) - (if (> curmod last-mod) - (list curmod fname) - res))) - '(0 "n/a") - all-files))) - -;;====================================================================== -;; use bash to expand a glob. Does NOT handle paths with spaces! -;; -(define (common:bash-glob instr) - (string-split - (with-input-from-pipe - (conc "/bin/bash -c \"echo " instr "\"") - read-line))) - -;;====================================================================== -;; Some safety net stuff -;;====================================================================== - -;;====================================================================== -;; return input if it is a list or return null -(define (common:list-or-null inlst #!key (ovrd #f)(message #f)) - (if (list? inlst) - inlst - (begin - (if message (debug:print-error 0 *default-log-port* message)) - (or ovrd '())))) - -;;====================================================================== -;; T A R G E T S , S T A T E , S T A T U S , -;; R U N N A M E A N D T E S T P A T T -;;====================================================================== - -;;====================================================================== -;; (map print (map car (hash-table->alist (read-config "runconfigs.config" #f #t)))) -;; -(define (common:get-runconfig-targets #!key (configf #f)) - (let ((targs (sort (map car (hash-table->alist - (or configf ;; NOTE: There is no value in using runconfig:read here. - (read-config (conc *toppath* "/runconfigs.config") - #f #t) - (make-hash-table)))) - stringsymbol force-setting) #f)) - (force-result (case force-type - ((#f) #f) - ((always) #t) - ((test) (if (args:get-arg "-execute") ;; we are in a test - #t - #f)) - (else - (debug:print 0 *default-log-port* "ERROR: Bad server force setting " force-setting ", forcing server.") - #t)))) ;; default to requiring server - (if force-result - (begin - (debug:print-info 0 *default-log-port* "ATTENTION! Forcing use of server, force setting is \"" force-setting "\".") - #t) - #f))) - -;;====================================================================== -;; M I S C L I S T S -;;====================================================================== - -;;====================================================================== -;; items in lista are matched value and position in listb -;; return the remaining items in listb or #f -;; -(define (common:list-is-sublist lista listb) - (if (null? lista) - listb ;; all items in listb are "remaining" - (if (> (length lista)(length listb)) - #f - (let loop ((heda (car lista)) - (tala (cdr lista)) - (hedb (car listb)) - (talb (cdr listb))) - (if (equal? heda hedb) - (if (null? tala) ;; we are done - talb - (loop (car tala) - (cdr tala) - (car talb) - - (cdr talb))) - #f))))) - -;;====================================================================== -;; Needed for long lists to be sorted where (apply max ... ) dies -;; -(define (common:max inlst) - (let loop ((max-val (car inlst)) - (hed (car inlst)) - (tal (cdr inlst))) - (if (not (null? tal)) - (loop (max hed max-val) - (car tal) - (cdr tal)) - (max hed max-val)))) - -;;====================================================================== -;; get min or max, use > for max and < for min, this works around the limits on apply -;; -(define (common:min-max comp lst) - (if (null? lst) - #f ;; better than an exception for my needs - (fold (lambda (a b) - (if (comp a b) a b)) - (car lst) - lst))) - -;;====================================================================== -;; get min or max, use > for max and < for min, this works around the limits on apply -;; -(define (common:sum lst) - (if (null? lst) - 0 - (fold (lambda (a b) - (+ a b)) - (car lst) - lst))) - -;;====================================================================== -;; path list to hash-table tree -;; ((a b c)(a b d)(e b c)) => ((a (b (d) (c))) (e (b (c)))) -;; -(define (common:list->htree lst) - (let ((resh (make-hash-table))) - (for-each - (lambda (inlst) - (let loop ((ht resh) - (hed (car inlst)) - (tal (cdr inlst))) - (if (hash-table-ref/default ht hed #f) - (if (not (null? tal)) - (loop (hash-table-ref ht hed) - (car tal) - (cdr tal))) - (begin - (hash-table-set! ht hed (make-hash-table)) - (loop ht hed tal))))) - lst) - resh)) - -;;====================================================================== -;; hash-table tree to html list tree -;; -;; tipfunc takes two parameters: y the tip value and path the path to that point -;; -(define (common:htree->html ht path tipfunc) - (let ((datlist (sort (hash-table->alist ht) - (lambda (a b) - (string< (car a)(car b)))))) - (if (null? datlist) - (tipfunc #f path) ;; really shouldn't get here - (s:ul - (map (lambda (x) - (let* ((levelname (car x)) - (y (cdr x)) - (newpath (append path (list levelname))) - (leaf (or (not (hash-table? y)) - (null? (hash-table-keys y))))) - (if leaf - (s:li (tipfunc y newpath)) - (s:li - (list - levelname - (common:htree->html y newpath tipfunc)))))) - datlist))))) - -;;====================================================================== -;; hash-table tree to alist tree -;; -(define (common:htree->atree ht) - (map (lambda (x) - (cons (car x) - (let ((y (cdr x))) - (if (hash-table? y) - (common:htree->atree y) - y)))) - (hash-table->alist ht))) - -;;====================================================================== -;; M U N G E D A T A I N T O N I C E F O R M S -;;====================================================================== - -;;====================================================================== -;; Generate an index for a sparse list of key values -;; ( (rowname1 colname1 val1)(rowname2 colname2 val2) ) -;; -;; => -;; -;; ( (rowname1 0)(rowname2 1)) ;; rownames -> num -;; (colname1 0)(colname2 1)) ) ;; colnames -> num -;; -;; optional apply proc to rownum colnum value -(define (common:sparse-list-generate-index data #!key (proc #f)) - (if (null? data) - (list '() '()) - (let loop ((hed (car data)) - (tal (cdr data)) - (rownames '()) - (colnames '()) - (rownum 0) - (colnum 0)) - (let* ((rowkey (car hed)) - (colkey (cadr hed)) - (value (caddr hed)) - (existing-rowdat (assoc rowkey rownames)) - (existing-coldat (assoc colkey colnames)) - (curr-rownum (if existing-rowdat rownum (+ rownum 1))) - (curr-colnum (if existing-coldat colnum (+ colnum 1))) - (new-rownames (if existing-rowdat rownames (cons (list rowkey curr-rownum) rownames))) - (new-colnames (if existing-coldat colnames (cons (list colkey curr-colnum) colnames)))) - ;; (debug:print-info 0 *default-log-port* "Processing record: " hed ) - (if proc (proc curr-rownum curr-colnum rowkey colkey value)) - (if (null? tal) - (list new-rownames new-colnames) - (loop (car tal) - (cdr tal) - new-rownames - new-colnames - (if (> curr-rownum rownum) curr-rownum rownum) - (if (> curr-colnum colnum) curr-colnum colnum) - )))))) - -;;====================================================================== -;; if it looks like a number -> convert it to a number, else return it -;; -(define (common:lazy-convert inval) - (let* ((as-num (if (string? inval)(string->number inval) #f))) - (or as-num inval))) - -;;====================================================================== -;; convert string a=1; b=2; c=a silly thing; d= -;; to '((a . 1)(b . 2)(c . "a silly thing")(d . "")) -;; -(define (common:val->alist val #!key (convert #f)) - (let ((val-list (string-split-fields ";\\s*" val #:infix))) - (if val-list - (map (lambda (x) - (let ((f (string-split-fields "\\s*=\\s*" x #:infix))) - (case (length f) - ((0) `(,#f)) ;; null string case - ((1) `(,(string->symbol (car f)))) - ((2) `(,(string->symbol (car f)) . ,(let ((inval (cadr f))) - (if convert (common:lazy-convert inval) inval)))) - (else f)))) - val-list) - '()))) - -;;====================================================================== -;; S Y S T E M S T U F F -;;====================================================================== - -;;====================================================================== -;; lazy-safe get file mod time. on any error (file not existing etc.) return 0 -;; -(define (common:lazy-modification-time fpath) - (handle-exceptions - exn - (begin - (debug:print 2 *default-log-port* "Failed to get modification time for " fpath ", treating it as zero. exn=" exn) - 0) - (if (file-exists? fpath) - (file-modification-time fpath) - 0))) - -;;====================================================================== -;; find timestamp of newest file associated with a sqlite db file -(define (common:lazy-sqlite-db-modification-time fpath) - (let* ((glob-list (handle-exceptions - exn - (begin - (debug:print 0 *default-log-port* "Failed to glob " fpath "*, exn=" exn) - `(,(conc "/no/such/file, message: " ((condition-property-accessor 'exn 'message) exn)))) - (glob (conc fpath "*")))) - (file-list (if (eq? 0 (length glob-list)) - '("/no/such/file") - glob-list))) - (apply max - (map - common:lazy-modification-time - file-list)))) - -;;====================================================================== -;; make "nice-path" available in config files and the repl -(define nice-path common:nice-path) - -;;====================================================================== -;; returns *effective load* (not normalized) -;; -(define (common:get-intercept onemin fivemin) - (if (< onemin fivemin) ;; load is decreasing, just use the onemin load - onemin - (let* ((load-change (- onemin fivemin)) - (tchange (- 300 60))) - (max (+ onemin (* 60 (/ load-change tchange))) 0)))) - -;;====================================================================== -;; calculate a delay number based on a droop curve -;; inputs are: -;; - load-in, load as from uptime, NOT normalized -;; - numcpus, number of cpus, ideally use the real cpus, not threads -;; -(define (common:get-delay load-in numcpus) - (let* ((ratio (/ load-in numcpus)) - (new-option (configf:lookup *configdat* "load" "new-load-method")) - (paramstr (or (configf:lookup *configdat* "load" "exp-params") - "15 12 1281453987.9543 0.75")) ;; 5 4 10 1")) - (paramlst (map string->number (string-split paramstr)))) - (if new-option - (begin - (cond ((and (>= ratio 0) (< ratio .5)) - 0) - ((and (>= ratio 0.5) (<= ratio .9)) - (* ratio (/ 5 .9))) - ((and (> ratio .9) (<= ratio 1.1)) - (+ 5 (* (- ratio .9) (/ 55 .2)))) - ((> ratio 1.1) - 60))) - (match paramlst - ((r1 r2 s1 s2) - (debug:print 3 *default-log-port* "Using params r1=" r1 " r2=" r2 " s1=" s1 " s2=" s2) - (min (max (/ (expt r1 (* r2 s2 ratio)) s1) 0) 30)) - (else - (debug:print 0 *default-log-port* "BAD exp-params, should be \"r1 r2 s1 s2\" but got " paramstr) - 30))))) - -;; -mrw- this appears to not be used -;; -;; (define (common:print-delay-table) -;; (let loop ((x 0)) -;; (print x "," (common:get-delay x 1)) -;; (if (< x 2) -;; (loop (+ x 0.1))))) - -;; (define (get-cpu-load #!key (remote-host #f)) -;; (car (common:get-cpu-load remote-host))) - -;;====================================================================== -;; (let* ((load-res (process:cmd-run->list "uptime")) -;; (load-rx (regexp "load average:\\s+(\\d+)")) -;; (cpu-load #f)) -;; (for-each (lambda (l) -;; (let ((match (string-search load-rx l))) -;; (if match -;; (let ((newval (string->number (cadr match)))) -;; (if (number? newval) -;; (set! cpu-load newval)))))) -;; (car load-res)) -;; cpu-load)) - -;;====================================================================== -;; get values from cached info from dropping file in .sysdata dir -;; e.g. key is host and dtype is normalized-load -;; -(define (common:get-cached-info key dtype #!key (age 10)) - (if *toppath* - (let* ((fullpath (conc *toppath* "/.sysdata/" key "-" dtype ".log")) - (delfile (lambda (exn) - (debug:print-info 2 *default-log-port* " removing bad file " fullpath ", exn=" exn) - (delete-file* fullpath) - #f))) - (if (and (file-exists? fullpath) - (file-read-access? fullpath)) - (handle-exceptions - exn - (begin - (debug:print 0 *default-log-port* "failed to get cached info from " fullpath ", exn=" exn) - #f) - (debug:print 2 *default-log-port* "reading file " fullpath) - (let ((real-age (- (current-seconds) - (handle-exceptions - exn - (begin - (debug:print 1 *default-log-port* "Failed to read mod time on file " - fullpath ", using 0, exn=" exn) - 0) - (file-change-time fullpath))))) - (if (< real-age age) - (handle-exceptions - exn - (delfile exn) - (let* ((res (with-input-from-file fullpath read))) - (if (eof-object? res) - (begin - (delfile "n/a") - #f) - res))) - (begin - (debug:print-info 2 *default-log-port* "file " fullpath - " is too old (" real-age" seconds) to trust, skipping reading it") - #f)))) - (begin - (debug:print 2 *default-log-port* "not reading file " fullpath) - #f))) - #f)) - -(define (common:write-cached-info key dtype dat) - (if *toppath* - (let* ((fulldir (conc *toppath* "/.sysdata")) - (fullpath (conc fulldir "/" key "-" dtype ".log"))) - (if (not (file-exists? fulldir))(create-directory fulldir #t)) - (handle-exceptions - exn - (begin - (debug:print 0 *default-log-port* "failed to write file " fullpath ", exn=" exn) - #f) - (with-output-to-file fullpath (lambda ()(pp dat))))) - #f)) - -(define (common:raw-get-remote-host-load remote-host) - (let* ((inp #f)) - (handle-exceptions - exn - (begin - (close-input-pipe inp) - (debug:print 0 *default-log-port* "failed to ssh to " remote-host " and get loadavg. exn=" exn) - #f) ;; more specific handling of errors needed - (set! inp (open-input-pipe (conc "ssh " remote-host " cat /proc/loadavg"))) - (let ((res (list (read inp)(read inp)(read inp)))) - (close-input-pipe inp) - res)))) - -;;====================================================================== -;; get cpu load by reading from /proc/loadavg, return all three values -;; -(define (common:get-cpu-load remote-host) - (handle-exceptions - exn - (begin - (debug:print 0 *default-log-port* "failed to ssh or read loadavg from host " remote-host ", exn=" exn) - '(-99 -99 -99)) - (let* ((actual-hostname (or remote-host (get-host-name) "localhost"))) - (or (common:get-cached-info actual-hostname "cpu-load") - (let ((result (if (and remote-host - (not (equal? remote-host (get-host-name)))) - (map (lambda (res) - (if (eof-object? res) 9e99 res)) - (common:raw-get-remote-host-load remote-host)) - (with-input-from-file "/proc/loadavg" - (lambda ()(list (read)(read)(read))))))) - (match - result - ((l1 l2 l3) - (if (and (number? l1) - (number? l2) - (number? l3)) - (begin - (common:write-cached-info actual-hostname "cpu-load" result) - result) - '(-1 -1 -1))) ;; -1 is bad result - (else '(-2 -2 -2)))))))) - -;;====================================================================== -;; get normalized cpu load by reading from /proc/loadavg and /proc/cpuinfo return all three values and the number of real cpus and the number of threads -;; returns alist '((adj-cpu-load . normalized-proc-load) ... etc. -;; keys: adj-proc-load, adj-core-load, 1m-load, 5m-load, 15m-load -;; -(define (common:get-normalized-cpu-load remote-host) - (let ((res (common:get-normalized-cpu-load-raw remote-host)) - (default `((adj-proc-load . 2) ;; there is no right answer - (adj-core-load . 2) - (1m-load . 2) - (5m-load . 0) ;; causes a large delta - thus causing default of throttling if stuff goes wrong - (15m-load . 0) - (proc . 1) - (core . 1) - (phys . 1) - (error . #t)))) - (cond - ((and (list? res) - (> (length res) 2)) - res) - ((eq? res #f) default) ;; add messages? - ((eq? res #f) default) ;; this would be the #eof - (else default)))) - -(define (common:ssh-get-loadavg remote-host) - (let ((inp (open-input-pipe (conc "ssh " remote-host " \"cat /proc/loadavg;cat /proc/cpuinfo;echo end\"")))) - (let* ((res (read-lines inp))) - (close-input-pipe inp) - res))) - -(define (common:get-normalized-cpu-load-raw remote-host) - (let* ((actual-host (or remote-host (get-host-name)))) ;; #f is localhost - (or (common:get-cached-info actual-host "normalized-load") - (let ((data (if remote-host - (common:ssh-get-loadavg remote-host) - (append - (with-input-from-file "/proc/loadavg" - read-lines) - (with-input-from-file "/proc/cpuinfo" - read-lines) - (list "end")))) - (load-rx (regexp "^([\\d\\.]+)\\s+([\\d\\.]+)\\s+([\\d\\.]+)\\s+.*$")) - (proc-rx (regexp "^processor\\s+:\\s+(\\d+)\\s*$")) - (core-rx (regexp "^core id\\s+:\\s+(\\d+)\\s*$")) - (phys-rx (regexp "^physical id\\s+:\\s+(\\d+)\\s*$")) - (max-num (lambda (p n)(max (string->number p) n)))) - ;; (print "data=" data) - (if (null? data) ;; something went wrong - #f - (let loop ((hed (car data)) - (tal (cdr data)) - (loads #f) - (proc-num 0) ;; processor includes threads - (phys-num 0) ;; physical chip on motherboard - (core-num 0)) ;; core - ;;; (print hed ", " loads ", " proc-num ", " phys-num ", " core-num) - (if (null? tal) ;; have all our data, calculate normalized load and return result - (let* ((act-proc (+ proc-num 1)) - (act-phys (+ phys-num 1)) - (act-core (+ core-num 1)) - (adj-proc-load (/ (car loads) act-proc)) - (adj-core-load (/ (car loads) act-core)) - (result - (append (list (cons 'adj-proc-load adj-proc-load) - (cons 'adj-core-load adj-core-load)) - (list (cons '1m-load (car loads)) - (cons '5m-load (cadr loads)) - (cons '15m-load (caddr loads))) - (list (cons 'proc act-proc) - (cons 'core act-core) - (cons 'phys act-phys))))) - (common:write-cached-info actual-host "normalized-load" result) - result) - (regex-case - hed - (load-rx ( x l1 l5 l15 ) (loop (car tal)(cdr tal)(map string->number (list l1 l5 l15)) proc-num phys-num core-num)) - (proc-rx ( x p ) (loop (car tal)(cdr tal) loads (max-num p proc-num) phys-num core-num)) - (phys-rx ( x p ) (loop (car tal)(cdr tal) loads proc-num (max-num p phys-num) core-num)) - (core-rx ( x c ) (loop (car tal)(cdr tal) loads proc-num phys-num (max-num c core-num))) - (else - (begin - ;; (print "NO MATCH: " hed) - (loop (car tal)(cdr tal) loads proc-num phys-num core-num))))))))))) - -(define (common:unix-ping hostname) - (let ((res (system (conc "ping -c 1 " hostname " > /dev/null")))) - (eq? res 0))) - -;;====================================================================== -;; ideally put all this info into the db, no need to preserve it across moving homehost -;; -;; return list of -;; ( reachable? cpuload update-time ) -(define (common:get-host-info hostname) - (let* ((loadinfo (rmt:get-latest-host-load hostname)) ;; if this host happens to have been recently used by a test reuse the load data - (load (car loadinfo)) - (load-sample-time (cdr loadinfo)) - (load-sample-age (- (current-seconds) load-sample-time)) - (loadinfo-timeout-seconds 6) ;; this was 20 seconds, seems way too lax. Switch to 6 seconds - (host-last-update-timeout-seconds 4) - (host-rec (hash-table-ref/default *host-loads* hostname #f)) - ) - (cond - ((< load-sample-age loadinfo-timeout-seconds) - (list #t - load-sample-time - load)) - ((and host-rec - (< (current-seconds) (+ (host-last-update host-rec) host-last-update-timeout-seconds))) - (list #t - (host-last-update host-rec) - (host-last-cpuload host-rec ))) - ((common:unix-ping hostname) - (list #t - (current-seconds) - (alist-ref 'adj-core-load (common:get-normalized-cpu-load hostname)))) ;; this is cheaper than you might think. get-normalized-cpu-load is cached for up to 5 seconds - (else - (list #f 0 -1) ;; bad host, don't use! - )))) - -;;====================================================================== -;; see defstruct host at top of file. -;; host: reachable last-update last-used last-cpuload -;; -(define (common:update-host-loads-table hosts-raw) - (let* ((hosts (filter (lambda (x) - (string-match (regexp "^\\S+$") x)) - hosts-raw))) - (for-each - (lambda (hostname) - (let* ((rec (let ((h (hash-table-ref/default *host-loads* hostname #f))) - (if h - h - (let ((h (make-host))) - (hash-table-set! *host-loads* hostname h) - h)))) - (host-info (common:get-host-info hostname)) - (is-reachable (car host-info)) - (last-reached-time (cadr host-info)) - (load (caddr host-info))) - (host-reachable-set! rec is-reachable) - (host-last-update-set! rec last-reached-time) - (host-last-cpuload-set! rec load))) - hosts))) - -;;====================================================================== -;; go through the hosts from least recently used to most recently used, pick the first that meets the load criteral from the -;; [host-rules] section. -;; -(define (common:get-least-loaded-host hosts-raw host-type configdat) - (let* ((rdat (configf:lookup configdat "host-rules" host-type)) - (rules (common:val->alist (or rdat "") convert: #t)) ;; maxnload, maxnjobs, maxjobrate - (maxnload (common:alist-ref/default 'maxnload rules 1.5)) ;; max normalized load - (maxnjobs (common:alist-ref/default 'maxnjobs rules 1.5)) ;; max normalized number of jobs - (maxjobrate (common:alist-ref/default 'maxjobrate rules (/ 1 6))) ;; max rate of submitting jobs to a given host in jobs/second - (hosts (filter (lambda (x) - (string-match (regexp "^\\S+$") x)) - hosts-raw)) - ;; (best-host #f) - (get-rec (lambda (hostname) - ;; (print "get-rec hostname=" hostname) - (let ((h (hash-table-ref/default *host-loads* hostname #f))) - (if h - h - (let ((h (make-host))) - (hash-table-set! *host-loads* hostname h) - h))))) - (best-load 99999) - (curr-time (current-seconds)) - (get-hosts-sorted (lambda (hosts) - (sort hosts (lambda (a b) - (let ((a-rec (get-rec a)) - (b-rec (get-rec b))) - ;; (print "a=" a " a-rec=" a-rec " host-last-used=" (host-last-used a-rec)) - ;; (print "b=" b " b-rec=" b-rec " host-last-used=" (host-last-used b-rec)) - (< (host-last-used a-rec) - (host-last-used b-rec)))))))) - (debug:print 0 *default-log-port* "INFO: hosts-sorted=" (get-hosts-sorted hosts)) - (if (null? hosts) - #f ;; no hosts to select from. All done and giving up now. - (let ((hosts-sorted (get-hosts-sorted hosts))) - (common:update-host-loads-table hosts) - (let loop ((hostname (car hosts-sorted)) - (tal (cdr hosts-sorted)) - (best-host #f)) - (let* ((rec (get-rec hostname)) - (reachable (host-reachable rec)) - (load (host-last-cpuload rec)) - (last-used (host-last-used rec)) - (delta (- curr-time last-used)) - (job-rate (if (> delta 0) - (/ 1 delta) - 999)) ;; jobs per second - (new-best - (cond - ((not reachable) - (debug:print 0 *default-log-port* "Skipping host " hostname " as it cannot be reached.") - best-host) - ((and (< load maxnload) ;; load is acceptable - (< job-rate maxjobrate)) ;; job rate is acceptable - (set! best-load load) - hostname) - (else best-host)))) - (debug:print 0 *default-log-port* "INFO: Trying host " hostname " with load " load ", last used " delta " seconds ago, with job-rate " job-rate " for running a test." ) - (if new-best - (begin ;; found a host, return it - (debug:print 0 *default-log-port* "INFO: Found host: " new-best " load: " load " last-used: " delta " seconds ago, with job-rate: " job-rate) - (host-last-used-set! rec curr-time) - new-best) - (if (null? tal) #f (loop (car tal)(cdr tal) best-host))))))))) - -(define (common:wait-for-homehost-load maxnormload msg) - (let loop ((start-time (current-seconds))) ;; we saw some instances of this being called before *toppath* was set. This might be an early setup race. This delay should help but it is impossible to test... - (if (not *toppath*) - (begin - (debug:print 0 *default-log-port* "ERROR: common:wait-for-homehost-load called before *toppath* set.") - (thread-sleep! 30) - (if (< (- (current-seconds) start-time) 300) - (loop start-time))))) - (case (rmt:transport-mode) - ((http) - (let* ((hh-dat (if (common:on-homehost?) ;; if we are on the homehost then pass in #f so the calls are local. - #f - (server:choose-server *toppath* 'homehost))) - (hh (if hh-dat (car hh-dat) #f))) - (common:wait-for-normalized-load maxnormload msg hh))) - (else - (common:wait-for-normalized-load maxnormload msg (get-host-name))))) - -(define (common:get-num-cpus remote-host) - (let* ((actual-host (or remote-host (get-host-name)))) - ;; hosts had better not be changing the number of cpus too often! - (or (hash-table-ref/default *numcpus-cache* actual-host #f) - (let* ((numcpus (or (common:get-cached-info actual-host "num-cpus" age: (+ 2592000 (random 3600))) - (let* ((proc (lambda () - (let loop ((numcpu 0) - (inl (read-line))) - (if (eof-object? inl) - (if (> numcpu 0) - numcpu - #f) ;; if zero return #f so caller knows that things are not working - (loop (if (string-match "^processor\\s+:\\s+\\d+$" inl) - (+ numcpu 1) - numcpu) - (read-line)))))) - (result (if (and remote-host - (not (equal? remote-host (get-host-name)))) - (common:generic-ssh - (conc "ssh " remote-host " cat /proc/cpuinfo") - proc -1) - (with-input-from-file "/proc/cpuinfo" proc)))) - (if (and (number? result) - (> result 0)) - (common:write-cached-info actual-host "num-cpus" result)) - result)))) - (hash-table-set! *numcpus-cache* actual-host numcpus) - numcpus)))) - -;;====================================================================== -;; wait for normalized cpu load to drop below maxload -;; -(define (common:wait-for-normalized-load maxnormload msg remote-host #!optional (rem-tries 5)) - (let ((num-cpus (common:get-num-cpus remote-host))) - (if num-cpus - (common:wait-for-cpuload maxnormload num-cpus 15 msg: msg remote-host: remote-host) - (begin - (thread-sleep! (random 60)) ;; we failed to get num cpus. wait a bit and try again - (if (> rem-tries 0) - (common:wait-for-normalized-load maxnormload msg remote-host (- rem-tries 1)) - #f))))) - -;;====================================================================== -;; DO NOT CALL THIS DIRECTLY. It is called from common:wait-for-normalized-load -;; count - count down to zero, at some point we'd give up if the load never drops -;; num-tries - count down to zero number tries to get numcpus -;; -(define (common:wait-for-cpuload maxnormload numcpus-in - #!key (count 1000) - (msg #f)(remote-host #f)(num-tries 5)) - (let* ((loadavg (common:get-cpu-load remote-host)) - ;; not possible to have zero. If we get 1, it's possible that we got the previous default, and we should check again - (numcpus (if (<= 1 numcpus-in) - (common:get-num-cpus remote-host) numcpus-in)) - (first (car loadavg)) - (next (cadr loadavg)) - (adjmaxload (* maxnormload (max 1 numcpus))) ;; possible bug where numcpus (or could be maxload) is zero, crude - ;; fallback is to at least use 1 - ;; effective load accounts for load jumps, this should elminate all the first-next-avg, adjwait, load-jump-limit - ;; etc. - (effective-load (common:get-intercept first next)) - (recommended-delay (common:get-delay effective-load numcpus)) - (effective-host (or remote-host "localhost")) - (normalized-effective-load (/ effective-load numcpus)) - (will-wait (> normalized-effective-load maxnormload))) - (if (and will-wait (> recommended-delay 1)) - (let* ((actual-delay (min recommended-delay 30))) - (if (common:low-noise-print 30 (conc (round actual-delay) "-safe-load")) - (debug:print-info 0 *default-log-port* "Load control, delaying " - actual-delay " seconds to maintain safe load. current normalized effective load is " - normalized-effective-load". maxnormload = " maxnormload " numcpus = " numcpus " loadavg = " loadavg " effective-load = " effective-load)) - (thread-sleep! actual-delay))) - - (cond - ;; bad data, try again to get the data - ((not will-wait) - (if (common:low-noise-print 3600 (conc (round normalized-effective-load) "-load-acceptable-" effective-host)) - (debug:print 0 *default-log-port* "Effective load on " effective-host " is acceptable at " effective-load " continuing."))) - - ((and (< first 0) ;; this indicates the loadavg data is bad - machine may not be reachable - (> num-tries 0)) - (debug:print 0 *default-log-port* "WARNING: received bad data from get-cpu-load " - first ", we'll sleep 10s and try " num-tries " more times.") - (thread-sleep! 10) - (common:wait-for-cpuload maxnormload numcpus-in - count: count remote-host: remote-host num-tries: (- num-tries 1))) - - ;; need to wait for load to drop - ((and will-wait ;; (> first adjmaxload) - (> count 0)) - (debug:print-info 0 *default-log-port* - "Delaying 15" ;; adjwait - " seconds due to normalized effective load " normalized-effective-load ;; first - " exceeding max of " adjmaxload - " on server " (or remote-host (get-host-name)) - " (normalized load-limit: " maxnormload ") " (if msg msg "")) - (thread-sleep! 15) ;; adjwait) - (common:wait-for-cpuload maxnormload numcpus count: (- count 1) msg: msg remote-host: remote-host) - ;; put the message here to indicate came out of waiting - (debug:print-info 1 *default-log-port* - "On host: " effective-host - ", effective load: " effective-load - ", numcpus: " numcpus - ", normalized effective load: " normalized-effective-load - )) - ;; overloaded and count expired (i.e. went to zero) - (else - (if (> num-tries 0) ;; should be "num-tries-left". - (if (common:low-noise-print 30 (conc (round effective-load) "-load-acceptable-" effective-host)) - (debug:print 0 *default-log-port* "Load on " effective-host " is acceptable at effective normalized load of " - effective-normalized-load " continuing.")) - (debug:print 0 *default-log-port* "Load on " effective-host ", " - first" could not be retrieved. Giving up and continuing.")))))) - -;;====================================================================== -;; DO NOT CALL THIS DIRECTLY. It is called from common:wait-for-normalized-load -;; -;; (define (common:wait-for-cpuload maxload-in numcpus-in waitdelay #!key (count 1000) (msg #f)(remote-host #f)(force-maxload #f)(num-tries 5)) -;; (let* ((loadavg (common:get-cpu-load remote-host)) -;; (numcpus (if (<= 1 numcpus-in) ;; not possible to have zero. If we get 1, it's possible that we got the previous default, and we should check again -;; (common:get-num-cpus remote-host) -;; numcpus-in)) -;; (maxload (if force-maxload -;; maxload-in -;; (if (number? maxload-in) -;; (max maxload-in 0.5) -;; 0.5))) ;; so maxload must be greater than 0.5 for now BUG - FIXME? -;; (first (car loadavg)) -;; (next (cadr loadavg)) -;; (adjmaxload (* maxload (max 1 numcpus))) ;; possible bug where -;; ;; numcpus (or could be -;; ;; maxload) is zero, -;; ;; crude fallback is to -;; ;; at least use 1 -;; (loadjmp (- first (if (> next (* numcpus 0.7)) ;; could do something with average of first and next? -;; 0 -;; next))) ;; we will force a conservative calculation any time next is large. -;; (first-next-avg (/ (+ first next) 2)) -;; ;; add some randomness to the time to break any alignment -;; ;; where netbatch dumps many jobs to machines simultaneously -;; (adjwait (min (+ 300 (random 10)) (abs (* (+ (random 10) -;; (/ (- 1000 count) 10) -;; waitdelay) -;; (- first adjmaxload) )))) -;; (load-jump-limit (configf:lookup-number *configdat* "setup" "load-jump-limit")) -;; ;; effective load accounts for load jumps, this should elminate all the first-next-avg, adjwait, load-jump-limit -;; ;; etc. -;; (effective-load (common:get-intercept first next)) -;; (effective-host (or remote-host "localhost")) -;; (normalized-effective-load (/ effective-load numcpus)) -;; (will-wait (> normalized-effective-load maxload))) -;; -;; ;; let's let the user know once in a long while that load checking -;; ;; is happening but not constantly report it -;; #;(if (common:low-noise-print 30 (conc "cpuload" (or remote-host "localhost"))) ;; (> (random 100) 75) ;; about 25% of the time -;; (debug:print-info 1 *default-log-port* "Checking cpuload on " (or remote-host "localhost") ", maxload: " maxload -;; ", load: " first ", adjmaxload: " adjmaxload ", loadjmp: " loadjmp)) -;; -;; (debug:print-info 1 *default-log-port* -;; "On host: " effective-host -;; ", effective load: " effective-load -;; ", numcpus: " numcpus -;; ", normalized effective load: " normalized-effective-load -;; ) -;; -;; (cond -;; ;; bad data, try again to get the data -;; ((and (< first 0) ;; this indicates the loadavg data is bad - machine may not be reachable -;; (> num-tries 0)) -;; (debug:print 0 *default-log-port* "WARNING: received bad data from get-cpu-load " first ", we'll sleep 10s and try " num-tries " more times.") -;; (thread-sleep! 10) -;; (common:wait-for-cpuload maxload-in numcpus-in waitdelay -;; count: count remote-host: remote-host force-maxload: force-maxload num-tries: (- num-tries 1))) -;; ;; need to wait for load to drop -;; ((and will-wait ;; (> first adjmaxload) -;; (> count 0)) -;; (debug:print-info 0 *default-log-port* -;; "Delaying " 15 ;; adjwait -;; " seconds due to normalized effective load " normalized-effective-load ;; first -;; " exceeding max of " adjmaxload -;; " on server " (or remote-host (get-host-name)) -;; " (normalized load-limit: " maxload ") " (if msg msg "")) -;; (thread-sleep! 15) ;; adjwait) -;; (common:wait-for-cpuload maxload numcpus waitdelay count: (- count 1) msg: msg remote-host: remote-host)) -;; ((and (> loadjmp (cond -;; (load-jump-limit load-jump-limit) -;; ((> numcpus 8)(/ numcpus 2)) -;; ((> numcpus 4)(/ numcpus 1.2)) -;; (else 0.5))) -;; (> count 0)) -;; (debug:print-info 0 *default-log-port* "waiting " adjwait " seconds due to possible load jump " loadjmp ". " -;; (if msg msg "")) -;; (thread-sleep! adjwait) -;; (common:wait-for-cpuload maxload numcpus waitdelay count: (- count 1) msg: msg remote-host: remote-host)) -;; (else -;; (if (> num-tries 0) -;; (if (common:low-noise-print 30 (conc (round first) "-load-acceptable-" (or remote-host "localhost"))) -;; (debug:print 0 *default-log-port* "Load on " (or remote-host "localhost") " is acceptable at " first " continuing.")) -;; (debug:print 0 *default-log-port* "Load on " (or remote-host "localhost") ", "first" could not be retrieved. Giving up and continuing.")))))) -;; -(define (get-uname . params) - (let* ((uname-res (process:cmd-run->list (conc "uname " (if (null? params) "-a" (car params))))) - (uname #f)) - (if (null? (car uname-res)) - "unknown" - (caar uname-res)))) - -;;====================================================================== -;; D I S K S P A C E -;;====================================================================== - -(define (common:get-disk-space-used fpath) - (with-input-from-pipe (conc "/usr/bin/du -s " fpath) read)) - -;;====================================================================== -;; given path get free space, allows override in [setup] -;; with free-space-script /path/to/some/script.sh -;; -(define (get-df path) - (if (configf:lookup *configdat* "setup" "free-space-script") - (with-input-from-pipe - (conc (configf:lookup *configdat* "setup" "free-space-script") " " path) - (lambda () - (let ((res (read-line))) - (if (string? res) - (string->number res))))) - (get-unix-df path))) - -(define (get-free-inodes path) - (if (configf:lookup *configdat* "setup" "free-inodes-script") - (with-input-from-pipe - (conc (configf:lookup *configdat* "setup" "free-inodes-script") " " path) - (lambda () - (let ((res (read-line))) - (if (string? res) - (string->number res))))) - (get-unix-inodes path))) - -(define (get-unix-df path) - (let* ((df-results (process:cmd-run->list (conc "df " path))) - (space-rx (regexp "([0-9]+)\\s+([0-9]+)%")) - (freespc #f)) - ;; (write df-results) - (for-each (lambda (l) - (let ((match (string-search space-rx l))) - (if match - (let ((newval (string->number (cadr match)))) - (if (number? newval) - (set! freespc newval)))))) - (car df-results)) - freespc)) - -(define (get-unix-inodes path) - (let* ((df-results (process:cmd-run->list (conc "df -i " path))) - (space-rx (regexp "([0-9]+)\\s+([0-9]+)%")) - (freenodes 0)) ;; 0 is a better failsafe than #f here. - ;; (write df-results) - (for-each (lambda (l) - (let ((match (string-search space-rx l))) - (if match - (let ((newval (string->number (cadr match)))) - (if (number? newval) - (set! freenodes newval)))))) - (car df-results)) - freenodes)) - -(define (common:check-space-in-dir dirpath required) - (let* ((dbspace (if (directory? dirpath) - (get-df dirpath) - 0))) - (list (> dbspace required) - dbspace - required - dirpath))) - -;;====================================================================== -;; check space in dbdir and in megatest dir -;; returns: ok/not dbspace required-space -;; -(define (common:check-db-dir-space) - (let* ((required (string->number - ;; default is 1GB (or actually a billion bytes) This is the number of 1 kB blocks. - (or (configf:lookup *configdat* "setup" "dbdir-space-required") - "1000000"))) - (dbdir (common:make-tmpdir-name *toppath* "")) ;; (db:get-dbdir)) - (tdbspace (common:check-space-in-dir dbdir required)) - (mdbspace (common:check-space-in-dir *toppath* required))) - (sort (list tdbspace mdbspace) (lambda (a b) - (< (cadr a)(cadr b)))))) - -;;====================================================================== -;; check available space in dbdir, exit if insufficient -;; -(define (common:check-db-dir-and-exit-if-insufficient) - (let* ((spacedat (car (common:check-db-dir-space))) ;; look only at worst for now - (is-ok (car spacedat)) - (dbspace (cadr spacedat)) - (required (caddr spacedat)) - (dbdir (cadddr spacedat))) - (if (not is-ok) - (begin - (debug:print-error 0 *default-log-port* "Insufficient space in " dbdir ", require " required ", have " dbspace ", exiting now.") - (exit 1))))) - -;;====================================================================== -;; paths is list of lists ((name path) ... ) -;; -(define (common:get-disk-with-most-free-space disks minsize) - (let* ((best #f) - (bestsize 0) - (default-min-inodes-string "1000000") - (default-min-inodes (string->number default-min-inodes-string)) - (min-inodes (or (string->number (if (configf:lookup *configdat* "setup" "min_inodes") (configf:lookup *configdat* "setup" "min_inodes") default-min-inodes-string)) default-min-inodes))) - - (for-each - (lambda (disk-num) - (let* ((dirpath (cadr (assoc disk-num disks))) - (freespc (cond - ((not (directory? dirpath)) - (if (common:low-noise-print 300 "disks not a dir " disk-num) - (debug:print 0 *default-log-port* "WARNING: disk " disk-num " at path \"" dirpath "\" is not a directory - ignoring it.")) - -1) - ((not (file-write-access? dirpath)) - (if (common:low-noise-print 300 "disks not writeable " disk-num) - (debug:print 0 *default-log-port* "WARNING: disk " disk-num " at path \"" dirpath "\" is not writeable - ignoring it.")) - -1) - ((not (eq? (string-ref dirpath 0) #\/)) - (if (common:low-noise-print 300 "disks not a proper path " disk-num) - (debug:print 0 *default-log-port* "WARNING: disk " disk-num " at path \"" dirpath "\" is not a fully qualified path - ignoring it.")) - -1) - (else - (get-df dirpath)))) - (free-inodes (cond - ((not (directory? dirpath)) - (if (common:low-noise-print 300 "disks not a dir " disk-num) - (debug:print 0 *default-log-port* "WARNING: disk " disk-num " at path \"" dirpath "\" is not a directory - ignoring it.")) - -1) - ((not (file-write-access? dirpath)) - (if (common:low-noise-print 300 "disks not writeable " disk-num) - (debug:print 0 *default-log-port* "WARNING: disk " disk-num " at path \"" dirpath "\" is not writeable - ignoring it.")) - -1) - ((not (eq? (string-ref dirpath 0) #\/)) - (if (common:low-noise-print 300 "disks not a proper path " disk-num) - (debug:print 0 *default-log-port* "WARNING: disk " disk-num " at path \"" dirpath "\" is not a fully qualified path - ignoring it.")) - -1) - (else - (get-free-inodes dirpath)))) - ;;(free-inodes (get-free-inodes dirpath)) - ) - (debug:print 2 *default-log-port* "INFO: disk " disk-num " path " dirpath " free space " freespc " free inodes " free-inodes) - (if (and (> freespc bestsize)(> free-inodes min-inodes )) - (begin - (set! best (cons disk-num dirpath)) - (set! bestsize freespc))) - ;;(print "Processing: " disk-num " bestsize: " bestsize " best: " best " freespc: " freespc " min-inodes: " min-inodes " free-inodes: " free-inodes) - )) - (map car disks)) - (if (and best (> bestsize minsize)) - best - #f))) ;; #f means no disk candidate found - -;;====================================================================== -;; convert a spec string to a list of vectors #( rx action rx-string ) -(define (common:spec-string->list-of-specs spec-string actions) - (let ((spec-strings (string-split-fields "\\s*;\\s*" spec-string #:infix)) - (actions-regex (regexp (conc "^(.*)\\s+(" (string-intersperse (map conc actions) "|") ")")))) - (filter - (lambda (x) x) - (map (lambda (s) - (let ((m (string-match actions-regex s))) - (if m - (vector (regexp (cadr m))(string->symbol (caddr m))(cadr m)) - (begin - (debug:print 0 *default-log-port* "WARNING: Unrecognised rule \"" s "\" in clean-up specification.") - #f)))) - spec-strings)))) - -;;====================================================================== -;; given a list of specs rx . rule and a file return the first matching rule -;; -(define (common:file-find-rule fname rules) ;; rule is vector #( rx action rx-string) - (let loop ((rule (car rules)) - (tail (cdr rules))) - (let ((rx (vector-ref rule 0)) - (rn (vector-ref rule 1))) ;; rule name - (if (string-match rx fname) - rule ;; return the whole rule so regex can be printed etc. - (if (null? tail) - #f - (loop (car tail)(cdr tail))))))) - -;;====================================================================== -;; given a spec apply some rules to a directory -;; -;; WARNING: This function will REMOVE files - be sure your spec and path is correct! -;; -;; spec format: -;; file-regex1 action; file-regex2 action; ... -;; e.g. -;; .*\.log$ keep; .* remove -;; --> keep all .log files, remove everything else -;; limitations: -;; cannot have a rule with ; as part of the spec -;; not very flexible, would be nice to return binned file names? -;; supported rules: -;; keep - keep this file -;; remove - remove this file -;; compress - compress this file -;; -(define (common:dir-clean-up path spec-string #!key (compress "gzip")(actions '(keep remove compress))(remove-empty #f)) - (let* ((specs (common:spec-string->list-of-specs spec-string actions)) - (keepers (make-hash-table)) - (directories (make-hash-table))) - (find-files - path - action: (lambda (p res) - (let ((rule (common:file-find-rule p specs))) - (cond - ((directory? p)(hash-table-set! directories p #t)) - (else - (case (vector-ref rule 1) - ((keep)(hash-table-set! keepers p rule)) - ((remove) - (debug:print 0 *default-log-port* "Removing file " p) - (delete-file p)) - ((compress) - (debug:print 0 *default-log-port* "Compressing file " p) - (system (conc compress " " p))) - (else - (debug:print 0 *default-log-port* "No match for file " p)))))))) - (if remove-empty - (for-each - (lambda (d) - (if (null? (glob (conc d "/.*")(conc d "/*"))) - (begin - (debug:print 0 *default-log-port* "Removing empty directory " d) - (delete-directory d)))) - (sort (hash-table-keys directories) (lambda (a b)(> (string-length a)(string-length b)))))) - )) - -;;====================================================================== -;; E N V I R O N M E N T V A R S -;;====================================================================== - -(define (save-environment-as-files fname #!key (ignorevars (list "USER" "HOME" "DISPLAY" "LS_COLORS" "XKEYSYMDB" "EDITOR" "MAKEFLAGS" "MAKEF" "MAKEOVERRIDES" "HOSTNAME"))) - ;;(bb-check-path msg: "save-environment-as-files entry") - (let ((envvars (get-environment-variables)) - (whitesp (regexp "[^a-zA-Z0-9_\\-:,\\.\\/%$]")) - (mungeval (lambda (val) - (cond - ((eq? val #t) "") ;; convert #t to empty string - ((eq? val #f) #f) ;; convert #f to itself (still thinking about this one - (else val))))) - (with-output-to-file (conc fname ".csh") - (lambda () - (for-each (lambda (keyval) - (let* ((key (car keyval)) - (val (cdr keyval)) - (delim (if (and (string-search whitesp val) - (not (string-search "^\".*\"$" val)) - (not (string-search "^'.*'$" val))) - "\"" - ""))) - - (print (if (or (member key ignorevars) - (string-search whitesp key)) - "# setenv " - "setenv ") - key " " delim (mungeval val) delim))) - envvars))) - (with-output-to-file (conc fname ".sh") - (lambda () - (for-each (lambda (keyval) - (let* ((key (car keyval)) - (val (cdr keyval)) - (delim (if (and (string-search whitesp val) - (not (string-search "^\".*\"$" val)) - (not (string-search "^'.*'$" val))) - "\"" - ""))) - (print (if (or (member key ignorevars) - (string-search whitesp key) - (string-search ":" key)) ;; internal only values to be skipped. - "# export " - "export ") - key "=" delim (mungeval val) delim))) - envvars))))) - -(define (common:get-param-mapping #!key (flavor #f)) - "returns alist mapping string keys in testconfig/subrun to megatest command line switches; if flavor is switch-symbol, maps tcmt symbolic switches to megatest switches" - (let ((default '(("tag-expr" . "-tagexpr") - ("mode-patt" . "-modepatt") - ("run-name" . "-runname") - ("contour" . "-contour") - ("target" . "-target") - ("test-patt" . "-testpatt") - ("msg" . "-m") - ("log" . "-log") - ("start-dir" . "-start-dir") - ("new" . "-set-state-status")))) - (if (eq? flavor 'switch-symbol) - (map (lambda (x) - (cons (string->symbol (conc "-" (car x))) (cdr x))) - default) - default))) - -;;====================================================================== -;; set some env vars from an alist, return an alist with original values -;; (("VAR" "value") ...) -;; a value of #f means "unset this var" -;; -(define (alist->env-vars lst) - (if (list? lst) - (let ((res '())) - (for-each (lambda (p) - (let* ((var (car p)) - (val (cadr p)) - (prv (get-environment-variable var))) - (set! res (cons (list var prv) res)) - (if val - (safe-setenv var (->string val)) - (unsetenv var)))) - lst) - res) - '())) - -;;====================================================================== -;; clear vars matching pattern, run proc, set vars back -;; if proc is a string run that string as a command with -;; system. -;; -(define *common:orig-env* - (let ((envvars (get-environment-variables))) - (if (get-environment-variable "MT_ORIG_ENV") - (with-input-from-string - (z3:decode-buffer (base64:base64-decode (get-environment-variable "MT_ORIG_ENV"))) - read) - (filter-map (lambda (x) - (if (string-match "^MT_.*" (car x)) - #f - x)) - envvars)))) - -(define (common:with-orig-env proc) - (let ((current-env (get-environment-variables))) - (for-each (lambda (x) (unsetenv (car x))) current-env) - (for-each (lambda (x) (setenv (car x) (cdr x))) *common:orig-env*) - (let ((rv (cond - ((string? proc)(system proc)) - (proc (proc))))) - (for-each (lambda (x) (unsetenv (car x))) *common:orig-env*) - (for-each (lambda (x) (setenv (car x) (cdr x))) current-env) - rv))) - -(define (common:without-vars proc . var-patts) - (let ((vars (make-hash-table))) - (for-each - (lambda (vardat) ;; each env var - (for-each - (lambda (var-patt) - (if (string-match var-patt (car vardat)) - (let ((var (car vardat)) - (val (cdr vardat))) - (hash-table-set! vars var val) - (unsetenv var)))) - var-patts)) - (get-environment-variables)) - (cond - ((string? proc)(system proc)) - (proc (proc))) - (hash-table-for-each - vars - (lambda (var val) - (setenv var val))) - vars)) - -(define (common:run-a-command cmd #!key (with-vars #f) (with-orig-env #f)) - (let* ((pre-cmd (dtests:get-pre-command)) - (post-cmd (dtests:get-post-command)) - (fullcmd (if (or pre-cmd post-cmd) - (conc pre-cmd cmd post-cmd) - (conc "viewscreen " cmd)))) - (debug:print-info 02 *default-log-port* "Running command: " fullcmd) - (cond - (with-vars (common:without-vars fullcmd)) - (with-orig-env (common:with-orig-env fullcmd)) - (else (common:without-vars fullcmd "MT_.*"))))) - -;;====================================================================== -;; C O L O R S -;;====================================================================== - -(define (common:name->iup-color name) - (case (string->symbol (string-downcase name)) - ((red) "223 33 49") - ((grey) "192 192 192") - ((orange) "255 172 13") - ((purple) "This is unfinished ..."))) - -;;====================================================================== -;; (define (common:get-color-for-state-status state status) -;; (case (string->symbol state) -;; ((COMPLETED) -;; (case (string->symbol status) -;; ((PASS) "70 249 73") -;; ((WARN WAIVED) "255 172 13") -;; ((SKIP) "230 230 0") -;; (else "223 33 49"))) -;; ((LAUNCHED) "101 123 142") -;; ((CHECK) "255 100 50") -;; ((REMOTEHOSTSTART) "50 130 195") -;; ((RUNNING) "9 131 232") -;; ((KILLREQ) "39 82 206") -;; ((KILLED) "234 101 17") -;; ((NOT_STARTED) "240 240 240") -;; (else "192 192 192"))) - -(define (common:iup-color->rgb-hex instr) - (string-intersperse - (map (lambda (x) - (number->string x 16)) - (map string->number - (string-split instr))) - "/")) - -;;====================================================================== -;; L O C K I N G M E C H A N I S M S -;;====================================================================== - -;;====================================================================== -;; faux-lock is deprecated. Please use simple-lock below -;; -(define (common:faux-lock keyname #!key (wait-time 8)(allow-lock-steal #t)) - (if (rmt:no-sync-get/default keyname #f) ;; do not be tempted to compare to pid. locking is a one-shot action, if already locked for this pid it doesn't actually count - (if (> wait-time 0) - (begin - (thread-sleep! 1) - (if (eq? wait-time 1) ;; only one second left, steal the lock - (begin - (debug:print-info 0 *default-log-port* "stealing lock for " keyname) - (common:faux-unlock keyname force: #t))) - (common:faux-lock keyname wait-time: (- wait-time 1))) - #f) - (begin - (rmt:no-sync-set keyname (conc (current-process-id))) - (equal? (conc (current-process-id)) (conc (rmt:no-sync-get/default keyname #f)))))) - -(define (common:faux-unlock keyname #!key (force #f)) - (if (or force (equal? (conc (current-process-id)) (conc (rmt:no-sync-get/default keyname #f)))) - (begin - (if (rmt:no-sync-get/default keyname #f) (rmt:no-sync-del! keyname)) - #t) - #f)) - -;;====================================================================== -;; simple lock. improve and converge on this one. -;; -(define (common:simple-lock keyname) - (rmt:no-sync-get-lock keyname)) - -(define (common:simple-unlock keyname #!key (force #f)) - (rmt:no-sync-del! keyname)) - -;;====================================================================== -;; -;;====================================================================== - -(define (common:in-running-test?) - (and (args:get-arg "-execute") (get-environment-variable "MT_CMDINFO"))) - -(define (common:get-color-from-status status) - (cond - ((equal? status "PASS") "green") - ((equal? status "FAIL") "red") - ((equal? status "WARN") "orange") - ((equal? status "KILLED") "orange") - ((equal? status "KILLREQ") "purple") - ((equal? status "RUNNING") "blue") - ((equal? status "ABORT") "brown") - (else "black"))) - -;;====================================================================== -;; N A N O M S G C L I E N T -;;====================================================================== -;; -;; -;; -;; (define (common:send-dboard-main-changed) -;; (let* ((dashboard-ips (mddb:get-dashboards))) -;; (for-each -;; (lambda (ipadr) -;; (let* ((soc (common:open-nm-req (conc "tcp://" ipadr))) -;; (msg (conc "main " *toppath*)) -;; (res (common:nm-send-receive-timeout soc msg))) -;; (if (not res) ;; couldn't reach that dashboard - remove it from db -;; (print "ERROR: couldn't reach dashboard " ipadr)) -;; res)) -;; dashboard-ips))) -;; -;; -;; ;;====================================================================== -;; ;; D A S H B O A R D D B -;; ;;====================================================================== -;; -;; (define (mddb:open-db) -;; (let* ((db (open-database (conc (get-environment-variable "HOME") "/.dashboard.db")))) -;; (set-busy-handler! db (busy-timeout 10000)) -;; (for-each -;; (lambda (qry) -;; (exec (sql db qry))) -;; (list -;; "CREATE TABLE IF NOT EXISTS vars (id INTEGER PRIMARY KEY,key TEXT, val TEXT, CONSTRAINT varsconstraint UNIQUE (key));" -;; "CREATE TABLE IF NOT EXISTS dashboards ( -;; id INTEGER PRIMARY KEY, -;; pid INTEGER, -;; username TEXT, -;; hostname TEXT, -;; ipaddr TEXT, -;; portnum INTEGER, -;; start_time TIMESTAMP DEFAULT (strftime('%s','now')), -;; CONSTRAINT hostport UNIQUE (hostname,portnum) -;; );" -;; )) -;; db)) -;; -;; ;; register a dashboard -;; ;; -;; (define (mddb:register-dashboard port) -;; (let* ((pid (current-process-id)) -;; (hostname (get-host-name)) -;; (ipaddr (server:get-best-guess-address hostname)) -;; (username (current-user-name)) ;; (car userinfo))) -;; (db (mddb:open-db))) -;; (print "Register monitor, pid: " pid ", hostname: " hostname ", port: " port ", username: " username) -;; (exec (sql db "INSERT OR REPLACE INTO dashboards (pid,username,hostname,ipaddr,portnum) VALUES (?,?,?,?,?);") -;; pid username hostname ipaddr port) -;; (close-database db))) -;; -;; ;; unregister a monitor -;; ;; -;; (define (mddb:unregister-dashboard host port) -;; (let* ((db (mddb:open-db))) -;; (print "Register unregister monitor, host:port=" host ":" port) -;; (exec (sql db "DELETE FROM dashboards WHERE hostname=? AND portnum=?;") host port) -;; (close-database db))) -;; -;; ;; get registered dashboards -;; ;; -;; (define (mddb:get-dashboards) -;; (let ((db (mddb:open-db))) -;; (query fetch-column -;; (sql db "SELECT ipaddr || ':' || portnum FROM dashboards;")))) - -;;====================================================================== -;; T E S T L A U N C H I N G P E R I T E M W I T H H O S T T Y P E S -;;====================================================================== -;; -;; [hosts] -;; arm cubie01 cubie02 -;; x86_64 zeus xena myth01 -;; allhosts #{g hosts arm} #{g hosts x86_64} -;; -;; [host-types] -;; general #MTLOWESTLOAD #{g hosts allhosts} -;; arm #MTLOWESTLOAD #{g hosts arm} -;; nbgeneral nbjob run JOBCOMMAND -log $MT_LINKTREE/$MT_TARGET/$MT_RUNNAME.$MT_TESTNAME-$MT_ITEM_PATH.lgo -;; -;; [host-rules] -;; # maxnload => max normalized load -;; # maxnjobs => max jobs per cpu -;; # maxjobrate => max jobs per second -;; general maxnload=1.1; maxnjobs=1.2; maxjobrate=0.1 -;; -;; [launchers] -;; envsetup general -;; xor/%/n 4C16G -;; % nbgeneral -;; -;; [jobtools] -;; # if defined and not "no" flexi-launcher will bypass "launcher" unless no match. -;; flexi-launcher yes -;; launcher nbfake -;; -(define (common:get-launcher configdat testname itempath) - (let ((fallback-launcher (configf:lookup configdat "jobtools" "launcher"))) - (if (and (configf:lookup configdat "jobtools" "flexi-launcher") ;; overrides launcher - (not (equal? (configf:lookup configdat "jobtools" "flexi-launcher") "no"))) - (let* ((launchers (hash-table-ref/default configdat "launchers" '()))) - (if (null? launchers) - fallback-launcher - (let loop ((hed (car launchers)) - (tal (cdr launchers))) - (let ((patt (car hed)) - (host-type (cadr hed))) - (if (tests:match patt testname itempath) - (begin - (debug:print-info 2 *default-log-port* "Have flexi-launcher match for " testname "/" itempath " = " host-type) - (let ((launcher (configf:lookup configdat "host-types" host-type))) - (if launcher - (let* ((launcher-parts (string-split launcher)) - (launcher-exe (car launcher-parts))) - (if (equal? launcher-exe "#MTLOWESTLOAD") ;; this is our special case, we will find the lowest load and craft a nbfake commandline - (let host-loop ((targ-host (common:get-least-loaded-host (cdr launcher-parts) host-type configdat)) - (count 100)) - (if targ-host - (conc "remrun " targ-host) - (if (> count 0) - (begin - (debug:print 0 *default-log-port* "INFO: Waiting for a host for host-type " host-type) - (thread-sleep! (- 101 count)) - (host-loop (common:get-least-loaded-host (cdr launcher-parts) host-type configdat) - (- count 1))) - (begin - (debug:print 0 *default-log-port* "FATAL: Failed to find a host from #MTLOWESTLOAD for host-type " host-type) - (exit))))) - launcher)) - (begin - (debug:print-info 0 *default-log-port* "WARNING: no launcher found for host-type " host-type) - (if (null? tal) - fallback-launcher - (loop (car tal)(cdr tal))))))) - ;; no match, try again - (if (null? tal) - fallback-launcher - (loop (car tal)(cdr tal)))))))) - fallback-launcher))) - -;;====================================================================== -;; D A S H B O A R D U S E R V I E W S -;;====================================================================== - -;;====================================================================== -;; first read ~/views.config if it exists, then read $MTRAH/views.config if it exists -;; -(define (common:load-views-config) - (let* ((view-cfgdat (make-hash-table)) - (home-cfgfile (conc (get-environment-variable "HOME") "/.mtviews.config")) - (mthome-cfgfile (conc *toppath* "/.mtviews.config"))) - (if (common:file-exists? mthome-cfgfile) - (read-config mthome-cfgfile view-cfgdat #t)) - ;; we load the home dir file AFTER the MTRAH file so the user can clobber settings when running the dashboard in read-only areas - (if (common:file-exists? home-cfgfile) - (read-config home-cfgfile view-cfgdat #t)) - view-cfgdat)) - -;;====================================================================== -;; H I E R A R C H I C A L H A S H T A B L E S -;;====================================================================== -;; -;; Every element including top element is a vector: -;; - -(define (hh:make-hh #!key (ht #f)(value #f)) - (vector (or ht (make-hash-table)) value)) - -;;====================================================================== -;; used internally -(define-inline (hh:set-ht! hh ht) (vector-set! hh 0 ht)) -(define-inline (hh:get-ht hh) (vector-ref hh 0)) -(define-inline (hh:set-value! hh value) (vector-set! hh 1 value)) -(define-inline (hh:get-value hh value) (vector-ref hh 1)) - -;;====================================================================== -;; given a hierarchial hash and some keys look up the value ... -;; -(define (hh:get hh . keys) - (if (null? keys) - (vector-ref hh 1) ;; we have reached the end of the line, return the value sought - (let ((sub-ht (hh:get-ht hh))) - (if sub-ht ;; yes, there is more hierarchy - (let ((sub-hh (hash-table-ref/default sub-ht (car keys) #f))) - (if sub-hh - (apply hh:get sub-hh (cdr keys)) - #f)) - #f)))) - -;;====================================================================== -;; given a hierarchial hash, a value and some keys, add needed hierarcy and insert the value -;; -(define (hh:set! hh value . keys) - (if (null? keys) - (hh:set-value! hh value) ;; we have reached the end of the line, store the value - (let ((sub-ht (hh:get-ht hh))) - (if sub-ht ;; yes, there is more hierarchy - (let ((sub-hh (hash-table-ref/default sub-ht (car keys) #f))) - (if (not sub-hh) ;; we'll need to add the next level of hierarchy - (let ((new-sub-hh (hh:make-hh))) - (hash-table-set! sub-ht (car keys) new-sub-hh) - (apply hh:set! new-sub-hh value (cdr keys))) - (apply hh:set! sub-hh value (cdr keys)))) ;; call the sub-hierhash with remaining keys - (begin - (hh:set-ht! hh (make-hash-table)) - (apply hh:set! hh value keys)))))) - -;;====================================================================== -;; Manage pkts, used in servers, tests and likely other contexts so put -;; in common -;;====================================================================== - -(define common:pkts-spec - '((default . ((parent . P) - (action . a) - (filename . f))) - (configf . ((parent . P) - (action . a) - (filename . f))) - (server . ((action . a) - (pid . d) - (ipaddr . i) - (port . p) - (parent . P))) - - (test . ((cpuuse . c) - (diskuse . d) - (item-path . i) - (runname . r) - (state . s) - (target . t) - (status . u) - (parent . P))))) - -(define (common:get-pkts-dirs mtconf use-lt) - (let* ((pktsdirs-str (or (configf:lookup mtconf "setup" "pktsdirs") - (and use-lt - (conc (or *toppath* - (current-directory)) - "/lt/.pkts")))) - (pktsdirs (if pktsdirs-str - (string-split pktsdirs-str " ") - #f))) - pktsdirs)) - -;;====================================================================== -;; use-lt is use linktree "lt" link to find pkts dir -(define (common:save-pkt pktalist-in mtconf use-lt #!key (add-only #f)) ;; add-only saves the pkt only if there is a parent already - (if (or (not add-only) - (hash-table-exists? *pkts-info* 'last-parent)) - (let* ((parent (hash-table-ref/default *pkts-info* 'last-parent #f)) - (pktalist (if parent - (cons `(parent . ,parent) - pktalist-in) - pktalist-in))) - (let-values (((uuid pkt) - (alist->pkt pktalist common:pkts-spec))) - (hash-table-set! *pkts-info* 'last-parent uuid) - (let ((pktsdir (or (hash-table-ref/default *pkts-info* 'pkts-dir #f) - (let* ((pktsdirs (common:get-pkts-dirs mtconf use-lt)) - (pktsdir (car pktsdirs))) ;; assume it is there - (hash-table-set! *pkts-info* 'pkts-dir pktsdir) - pktsdir)))) - (debug:print 0 *default-log-port* "pktsdir: "pktsdir) - (handle-exceptions - exn - (debug:print-info 0 "failed to write out packet to " pktsdir ", exn=" exn) ;; don't care if this failed for now but MUST FIX - BUG!! - (if (not (file-exists? pktsdir)) - (create-directory pktsdir #t)) - (with-output-to-file - (conc pktsdir "/" uuid ".pkt") - (lambda () - (print pkt))))))))) - -(define (common:with-queue-db mtconf proc #!key (use-lt #f)(toppath-in #f)) - (let* ((pktsdirs (common:get-pkts-dirs mtconf use-lt)) - (pktsdir (if pktsdirs (car pktsdirs) #f)) - (toppath (or (configf:lookup mtconf "scratchdat" "toppath") - toppath-in)) - (pdbpath (or (configf:lookup mtconf "setup" "pdbpath") pktsdir))) - (cond - ((not (and pktsdir toppath pdbpath)) - (debug:print 0 *default-log-port* "ERROR: settings are missing in your megatest.config for area management.") - (debug:print 0 *default-log-port* " you need to have pktsdirs in the [setup] section.")) - ((not (common:file-exists? pktsdir)) - (debug:print 0 *default-log-port* "ERROR: pkts directory not found " pktsdir)) - ((not (equal? (file-owner pktsdir)(current-effective-user-id))) - (debug:print 0 *default-log-port* "ERROR: directory " pktsdir " is not owned by " (current-effective-user-name))) - (else - (let* ((pdb (open-queue-db pdbpath "pkts.db" - schema: '("CREATE TABLE groups (id INTEGER PRIMARY KEY,groupname TEXT, CONSTRAINT group_constraint UNIQUE (groupname));")))) - (proc pktsdirs pktsdir pdb) - (dbi:close pdb)))))) - -(define (common:load-pkts-to-db mtconf #!key (use-lt #f)) - (common:with-queue-db - mtconf - (lambda (pktsdirs pktsdir pdb) - (for-each - (lambda (pktsdir) ;; look at all - (cond - ((not (common:file-exists? pktsdir)) - (debug:print 0 *default-log-port* "ERROR: packets directory " pktsdir " does not exist.")) - ((not (directory? pktsdir)) - (debug:print 0 *default-log-port* "ERROR: packets directory path " pktsdir " is not a directory.")) - ((not (file-read-access? pktsdir)) - (debug:print 0 *default-log-port* "ERROR: packets directory path " pktsdir " is not readable.")) - (else - (debug:print-info 0 *default-log-port* "Loading packets found in " pktsdir) - (let ((pkts (glob (conc pktsdir "/*.pkt"))) - (sqdb (dbi:db-conn pdb)) - ) - ;; Put this in a transaction to avoid issues overloading the db - (sqlite3:with-transaction - sqdb - (lambda () - (for-each - (lambda (pkt) - (let* ((uuid (cadr (string-match ".*/([0-9a-f]+).pkt" pkt))) - (exists (lookup-by-uuid pdb uuid #f))) - (if (not exists) - (let* ((pktdat (string-intersperse - (with-input-from-file pkt read-lines) - "\n")) - (apkt (pkt->alist pktdat)) - (ptype (alist-ref 'T apkt))) - (add-to-queue pdb pktdat uuid (or ptype 'cmd) #f 0) - (debug:print 4 *default-log-port* "Added " uuid " of type " ptype " to queue")) - (debug:print 4 *default-log-port* "pkt: " uuid " exists, skipping...") - ))) - pkts))))))) - pktsdirs)) - use-lt: use-lt)) - -(define (common:get-pkt-alists pkts) - (map (lambda (x) - (alist-ref 'apkt x)) ;; 'pkta pulls out the alist from the read pkt - pkts)) - -;;====================================================================== -;; given list of pkts (alist mode) return list of D cards as Unix epoch, sorted descending -;; also delete duplicates by target i.e. (car pkt) -;; -(define (common:get-pkt-times pkts) - (delete-duplicates - (sort - (map (lambda (x) - `(,(alist-ref 't x) . ,(string->number (alist-ref 'D x)))) - pkts) - (lambda (a b)(> (cdr a)(cdr b)))) ;; sort descending - (lambda (a b)(equal? (car a)(car b))))) ;; remove duplicates by target - - -(define *common:thread-punchlist* (make-hash-table)) -(define (common:send-thunk-to-background-thread thunk #!key (name #f)) - ;;(BB> "launched thread " name) - ;; we need a unique name for the thread. - (let* ((realname (if name - (if (not (hash-table-ref/default *common:thread-punchlist* name #f)) - name - (conc name"-" (symbol->string (gensym)))) - (conc "anonymous-"(symbol->string (gensym))))) - (realthunk (lambda () - (let ((res (thunk))) - (hash-table-delete! *common:thread-punchlist* realname) - res))) - (thread (make-thread realthunk realname))) - (hash-table-set! *common:thread-punchlist* realname thread) - (thread-start! thread) - )) - -(define (common:join-backgrounded-threads) - ;; may need to trap and ignore exceptions -- dunno how atomic threads are... - (for-each - (lambda (thread-name) - (let* ((thread (hash-table-ref/default *common:thread-punchlist* thread-name #f))) - (if thread - (handle-exceptions - exn - (begin - (debug:print 0 *default-log-port* "joining threads failed. exn=" exn) - #t) ;; just ignore it, it might have died in the meantime so joining it will throw an exception - (thread-join! thread)) - ))) - (hash-table-keys *common:thread-punchlist*))) + + + ;;====================================================================== ;; (define *common:telemetry-log-state* 'startup) ;; (define *common:telemetry-log-socket* #f) ;; Index: common_records.scm ================================================================== --- common_records.scm +++ common_records.scm @@ -18,11 +18,11 @@ ;; ;;====================================================================== ;; (use trace) -(include "altdb.scm") +;; (include "altdb.scm") ;; Some of these routines use: ;; ;; http://www.cs.toronto.edu/~gfb/scheme/simple-macros.html ;; Index: commonmod.scm ================================================================== --- commonmod.scm +++ commonmod.scm @@ -18,12 +18,40 @@ ;;====================================================================== (declare (unit commonmod)) (declare (uses debugprint)) +(declare (uses mtargs)) +(declare (uses stml2)) -(use srfi-69) +(use srfi-69 + srfi-18 + srfi-1 + srfi-13 + (prefix base64 base64:) + (prefix sqlite3 sqlite3:) + data-structures + extras + files + matchable + md5 + message-digest + pathname-expand + posix + posix-extras + regex + regex-case + sparse-vectors + srfi-1 + srfi-13 + srfi-18 + srfi-69 + typed-records + z3) + +(import stml2 + ) (module commonmod * (import scheme) @@ -30,11 +58,12 @@ (cond-expand (chicken-4 (import chicken ports - + (prefix base64 base64:) + (prefix sqlite3 sqlite3:) data-structures extras files matchable @@ -43,16 +72,21 @@ pathname-expand posix posix-extras regex regex-case + sparse-vectors srfi-1 + srfi-13 srfi-18 srfi-69 typed-records - + z3 + debugprint + stml2 + (prefix mtargs args:) ) (use srfi-69)) (chicken-5 (import (prefix sqlite3 sqlite3:) ;; data-structures @@ -124,10 +158,230 @@ (begin (hash-table-set! *common:denoise* key currtime) #t) #f))) +;; GLOBALS + +;; CONTEXTS +(defstruct cxt + (taskdb #f) + (cmutex (make-mutex))) +;; (define *contexts* (make-hash-table)) +;; (define *context-mutex* (make-mutex)) + +;; ;; safe method for accessing a context given a toppath +;; ;; +;; (define (common:with-cxt toppath proc) +;; (mutex-lock! *context-mutex*) +;; (let ((cxt (hash-table-ref/default *contexts* toppath #f))) +;; (if (not cxt) +;; (set! cxt (let ((x (make-cxt)))(hash-table-set! *contexts* toppath x) x))) +;; (let ((cxt-mutex (cxt-mutex cxt))) +;; (mutex-unlock! *context-mutex*) +;; (mutex-lock! cxt-mutex) +;; (let ((res (proc cxt))) +;; (mutex-unlock! cxt-mutex) +;; res)))) + +;; A hash table that can be accessed by #{scheme ...} calls in +;; config files. Allows communicating between confgs +;; +(define *user-hash-data* (make-hash-table)) + +(define *db-keys* #f) + +(define *pkts-info* (make-hash-table)) ;; store stuff like the last parent here +(define *configinfo* #f) ;; raw results from setup, includes toppath and table from megatest.config +(define *runconfigdat* #f) ;; run configs data +(define *configdat* #f) ;; megatest.config data +(define *configstatus* #f) ;; status of data; 'fulldata : all processing done, #f : no data yet, 'partialdata : partial read done +;; (define *toppath* #f) ;; moved to commonmod +(define *already-seen-runconfig-info* #f) + +(define *test-meta-updated* (make-hash-table)) +(define *globalexitstatus* 0) ;; attempt to work around possible thread issues +(define *passnum* 0) ;; when running track calls to run-tests or similar +;; (define *alt-log-file* #f) ;; used by -log +;; (define *common:denoise* (make-hash-table)) ;; for low noise printing +(define *time-zero* (current-seconds)) ;; for the watchdog +(define *on-exit-procs* '()) ;; add procs to this list to be executed on exit +(define *default-area-tag* "local") + +;; DATABASE +;; task db +(define *task-db* #f) ;; (vector db path-to-db) +(define *db-access-allowed* #t) ;; flag to allow access +;; (define *db-access-mutex* (make-mutex)) ;; moved to dbfile +;; (define *db-transaction-mutex* (make-mutex)) +(define *db-cache-path* #f) +;; (define *db-with-db-mutex* (make-mutex)) +(define *db-api-call-time* (make-hash-table)) ;; hash of command => (list of times) + +;; SERVER +(define *transport-type* 'http) ;; override with [server] transport http|rpc|nmsg +(define *runremote* #f) ;; if set up for server communication this will hold +;; (define *max-cache-size* 0) +(define *logged-in-clients* (make-hash-table)) +(define *server-id* #f) +;; (define *server-info* #f) ;; good candidate for easily convert to non-global +(define *time-to-exit* #f) +(define *run-id* #f) +(define *server-kind-run* (make-hash-table)) +(define *home-host* #f) +;; (define *total-non-write-delay* 0) +(define *heartbeat-mutex* (make-mutex)) +;; (define *api-process-request-count* 0) +;; (define *max-api-process-requests* 0) +(define *server-overloaded* #f) + +;; client +(define *rmt-mutex* (make-mutex)) ;; remote access calls mutex + +;; RPC transport +(define *rpc:listener* #f) + +;; KEY info +(define *target* (make-hash-table)) ;; cache the target here; target is keyval1/keyval2/.../keyvalN +(define *keys* (make-hash-table)) ;; cache the keys here +(define *keyvals* (make-hash-table)) +(define *toptest-paths* (make-hash-table)) ;; cache toptest path settings here +(define *test-paths* (make-hash-table)) ;; cache test-id to test run paths here +(define *test-ids* (make-hash-table)) ;; cache run-id, testname, and item-path => test-id +(define *test-info* (make-hash-table)) ;; cache the test info records, update the state, status, run_duration etc. from testdat.db + +(define *run-info-cache* (make-hash-table)) ;; run info is stable, no need to reget +(define *launch-setup-mutex* (make-mutex)) ;; need to be able to call launch:setup often so mutex it and re-call the real deal only if *toppath* not set +(define *homehost-mutex* (make-mutex)) + +;; Miscellaneous +(define *triggers-mutex* (make-mutex)) ;; block overlapping processing of triggers +(define *numcpus-cache* (make-hash-table)) + +;; this plugs a hole in posix-extras in recent chicken versions > 4.9) +(let-values (( (chicken-release-number chicken-major-version) + (apply values + (map string->number + (take + (string-split (chicken-version) ".") + 2))))) + (let ((resolve-pathname-broken? + (or (> chicken-release-number 4) + (and (eq? 4 chicken-release-number) (> chicken-major-version 9))))) + (if resolve-pathname-broken? + (define ##sys#expand-home-path pathname-expand)))) + +(define (realpath x) (resolve-pathname (pathname-expand (or x "/dev/null")) )) + +(define (common:get-this-exe-fullpath #!key (argv (argv))) + (let* ((this-script + (cond + ((and (> (length argv) 2) + (string-match "^(.*/csi|csi)$" (car argv)) + (string-match "^-(s|ss|sx|script)$" (cadr argv))) + (caddr argv)) + (else (car argv)))) + (fullpath (realpath this-script))) + fullpath)) + +;;====================================================================== + +(define *common:this-exe-fullpath* (common:get-this-exe-fullpath)) +(define *common:this-exe-dir* (pathname-directory *common:this-exe-fullpath*)) +(define *common:this-exe-name* (pathname-strip-directory *common:this-exe-fullpath*)) + +(define (common:get-sync-lock-filepath) + (let* ((tmp-area (common:make-tmpdir-name *toppath* "")) + (lockfile (conc tmp-area "/megatest.db.lock"))) + lockfile)) + +(define *common:logpro-exit-code->status-sym-alist* + '( ( 0 . pass ) + ( 1 . fail ) + ( 2 . warn ) + ( 3 . check ) + ( 4 . waived ) + ( 5 . abort ) + ( 6 . skip ))) + +(define (common:logpro-exit-code->status-sym exit-code) + (or (alist-ref exit-code *common:logpro-exit-code->status-sym-alist*) 'fail)) + +(define (common:worse-status-sym ss1 ss2) + (let loop ((status-syms-remaining '(abort fail check skip warn waived pass))) + (cond + ((null? status-syms-remaining) + 'fail) + ((eq? (car status-syms-remaining) ss1) + ss1) + ((eq? (car status-syms-remaining) ss2) + ss2) + (else + (loop (cdr status-syms-remaining)))))) + +(define (common:steps-can-proceed-given-status-sym status-sym) + (if (member status-sym '(warn waived pass)) + #t + #f)) + +(define (status-sym->string status-sym) + (case status-sym + ((pass) "PASS") + ((fail) "FAIL") + ((warn) "WARN") + ((check) "CHECK") + ((waived) "WAIVED") + ((abort) "ABORT") + ((skip) "SKIP") + (else "FAIL"))) + +(define (common:logpro-exit-code->test-status exit-code) + (status-sym->string (common:logpro-exit-code->status-sym exit-code))) + +;; launching and hosts +(defstruct host + (reachable #f) + (last-update 0) + (last-used 0) + (last-cpuload 1)) + +(define *host-loads* (make-hash-table)) + +;; cache environment vars for each run here +(define *env-vars-by-run-id* (make-hash-table)) + +;; Testconfig and runconfig caches. +(define *testconfigs* (make-hash-table)) ;; test-name => testconfig +(define *runconfigs* (make-hash-table)) ;; target => runconfig + +;; This is a cache of pre-reqs met, don't re-calc in cases where called with same params less than +;; five seconds ago +(define *pre-reqs-met-cache* (make-hash-table)) + +;; cache of verbosity given string +;; +(define *verbosity-cache* (make-hash-table)) + +(define (common:clear-caches) + (set! *target* (make-hash-table)) + (set! *keys* (make-hash-table)) + (set! *keyvals* (make-hash-table)) + (set! *toptest-paths* (make-hash-table)) + (set! *test-paths* (make-hash-table)) + (set! *test-ids* (make-hash-table)) + (set! *test-info* (make-hash-table)) + (set! *run-info-cache* (make-hash-table)) + (set! *env-vars-by-run-id* (make-hash-table)) + (set! *test-id-cache* (make-hash-table))) + +;; Generic string database +(define sdb:qry #f) ;; (make-sdb:qry)) ;; 'init #f) +;; Generic path database +(define *fdb* #f) + +(define *last-launch* (current-seconds)) ;; use for throttling the launch rate. Would be better to use the db and last time of a test in LAUNCHED state. + ;; environment vars handy stuff from common.scm ;; (define getenv get-environment-variable) (define (safe-setenv key val) (if (or (substring-index "!" key) @@ -186,10 +440,12 @@ (define home (getenv "HOME")) (define user (getenv "USER")) ;;====================================================================== ;; return a nice clean pathname made absolute +;;====================================================================== + (define (common:nice-path dir) (let ((match (string-match "^(~[^\\/]*)(\\/.*|)$" dir))) (if match ;; using ~ for home? (common:nice-path (conc (common:read-link-f (cadr match)) "/" (caddr match))) (normalize-pathname (if (absolute-pathname? dir) @@ -265,11 +521,12 @@ ;;====================================================================== ;; does the directory exist and do we have write access? ;; ;; returns the directory or #f -;; +;;====================================================================== + (define (common:directory-writable? path-string) (handle-exceptions exn (begin (debug:print 0 *default-log-port* "Failed to identify access to " path-string ", exn=" exn) @@ -364,10 +621,88 @@ (delete-file* fname))) ;;====================================================================== ;; misc conversion, data manipulation functions ;;====================================================================== + +;;====================================================================== +;; old stuff from keys.scm +;;====================================================================== + +(include "key_records.scm") +(include "common_records.scm") + +(define (keys->keystr keys) ;; => key1,key2,key3,additiona1, ... + (string-intersperse keys ",")) + +;; (define (args:usage . a) #f) + +(define (tasks:wait-on-journal path n #!key (remove #f)(waiting-msg #f)) + (if (not (string? path)) + (debug:print-error 0 *default-log-port* "Called tasks:wait-on-journal with path=" path " (not a string)") + (let ((fullpath (conc path "-journal"))) + (handle-exceptions + exn + (begin + (print-call-chain (current-error-port)) + (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) + (debug:print 5 *default-log-port* " exn=" (condition->list exn)) + (debug:print 0 *default-log-port* "tasks:wait-on-journal failed. Continuing on, you can ignore this call-chain") + #t) ;; if stuff goes wrong just allow it to move on + (let loop ((journal-exists (file-exists? fullpath)) + (count n)) ;; wait ten times ... + (if journal-exists + (begin + (if (and waiting-msg + (eq? (modulo n 30) 0)) + (debug:print 0 *default-log-port* waiting-msg)) + (if (> count 0) + (begin + (thread-sleep! 1) + (loop (file-exists? fullpath) + (- count 1))) + (begin + (debug:print 0 *default-log-port* "ERROR: removing the journal file " fullpath ", this is not good. Look for disk full, write access and other issues.") + (if remove (system (conc "rm -rf " fullpath))) + #f))) + #t)))))) + +;;====================================================================== +;; key <=> target routines +;;====================================================================== + +;; This invalidates using "/" in item names. Every key will be +;; available via args:get-arg as :keyfield. Since this only needs to +;; be called once let's use it to set the environment vars +;; +;; The setting of :keyfield in args should be turned off ASAP +;; +(define (keys:target-set-args keys target ht) + (if target + (let ((vals (string-split target "/"))) + (if (eq? (length vals)(length keys)) + (for-each (lambda (key val) + (setenv key val) + (if ht (hash-table-set! ht (conc ":" key) val))) + keys + vals) + (debug:print-error 0 *default-log-port* "wrong number of values in " target ", should match " keys)) + vals) + (debug:print 4 *default-log-port* "ERROR: keys:target-set-args called with no target."))) + +;; given the keys (a list of vectors or a list of keys) and a target return a keyval list +;; keyval list ( (key1 val1) (key2 val2) ...) +(define (keys:target->keyval keys target) + (let* ((targlist (string-split target "/")) + (numkeys (length keys)) + (numtarg (length targlist)) + (targtweaked (if (> numkeys numtarg) + (append targlist (make-list (- numkeys numtarg) "")) + targlist))) + (map (lambda (key targ) + (list key targ)) + keys targtweaked))) ;;====================================================================== ;; return first command that exists, else #f ;; (define (common:which cmds) @@ -471,58 +806,10 @@ ;; get the normalized (i.e. load / numcpus) for *this* host ;; (define (get-normalized-cpu-load) (/ (commonmod:get-cpu-load)(get-current-host-cores))) -;;====================================================================== -;; testsuite and area utilites -;;====================================================================== - -(define (get-testsuite-name toppath configdat) - (or (lookup configdat "setup" "area-name") - (lookup configdat "setup" "testsuite") - (get-environment-variable "MT_TESTSUITE_NAME") - (if (string? toppath) - (pathname-file toppath) - #f))) - -(define (get-area-path-signature toppath #!optional (short #f)) - (let ((res (message-digest-string (md5-primitive) toppath))) - (if short - (substring res 0 4) - res))) - -(define (get-area-name configdat toppath #!optional (short #f)) - ;; look up my area name in areas table (future) - ;; generate auto name - (conc (get-area-path-signature toppath short) - "-" - (get-testsuite-name toppath configdat))) - -;; need generic find-record-with-var-nmatching-val -;; -(define (path->area-record cfgdat path) - (let* ((areadat (get-cfg-areas cfgdat)) - (all (filter (lambda (x) - (let* ((keyvals (cdr x)) - (pth (alist-ref 'path keyvals))) - (equal? path pth))) - areadat))) - (if (null? all) - #f - (car all)))) ;; return first match - -;; given a config return an alist of alists -;; area-name => data -;; -(define (get-cfg-areas cfgdat) - (let ((adat (get-section cfgdat "areas"))) - (map (lambda (entry) - `(,(car entry) . - ,(val->alist (cadr entry)))) - adat))) - ;;====================================================================== ;; time utils ;;====================================================================== (define (common:human-time) @@ -897,7 +1184,1557 @@ (if (eq? (length output) 0) #f #t)) #t))) ;; assuming bad query is about a live test is likely not the right thing to do? +(define (remove-files filespec) + (let ((files (glob filespec))) + (for-each delete-file files))) + +(define (stop-the-train) + (thread-start! (make-thread (lambda () + (let loop () + (if (and *toppath* + (file-exists? (conc *toppath*"/stop-the-train"))) + (let* ((msg (conc "ERROR: found file "*toppath*"/stop-the-train, exiting immediately"))) + ;; yes, print to current-output-port AND *default-log-port*, annoying but necessary I think + (print msg) + (debug:print 0 *default-log-port* msg) + (remove-files (conc *toppath* "/logs/server*")) + (remove-files (conc *toppath* "/.servinfo/*")) + (remove-files (conc *toppath* "/.mtdb/*lock")) + (exit 1))) + (thread-sleep! 5) + (loop)))))) + +;; execute thunk, return value. If exception thrown, trap exception, return #f, and emit nonfatal condition note to *default-log-port* . +;; arguments - thunk, message +(define (common:fail-safe thunk warning-message-on-exception) + (handle-exceptions + exn + (begin + (debug:print-info 0 *default-log-port* "notable but nonfatal condition - "warning-message-on-exception", exn=" exn) + (debug:print-info 0 *default-log-port* + (string-substitute "\n?Error:" "nonfatal condition:" + (with-output-to-string + (lambda () + (print-error-message exn) )))) + (debug:print-info 0 *default-log-port* " -- continuing after nonfatal condition...") + #f) + (thunk))) + + +;; returns list of fd count, socket count +(define (get-file-descriptor-count #!key (pid (current-process-id ))) + (list + (length (glob (conc "/proc/" pid "/fd/*"))) + (length (filter identity (map socket? (glob (conc "/proc/" pid "/fd/*"))))) + ) +) + + + +(define (common:snapshot-file filepath #!key (subdir ".") ) + (if (file-exists? filepath) + (let* ((age-sec (lambda (file) + (if (file-exists? file) + (- (current-seconds) (file-modification-time file)) + 1000000000))) ;; return really old value if file doesn't exist. we want to clobber it if old or not exist. + (ok-flag #t) + (age-mins (lambda (file) (/ (age-sec file) 60))) + (age-hrs (lambda (file) (/ (age-mins file) 60))) + (age-days (lambda (file) (/ (age-hrs file) 24))) + (age-wks (lambda (file) (/ (age-days file) 7))) + (docmd (lambda (cmd) + (cond + (ok-flag + (let ((res (system cmd))) + (cond + ((eq? 0 res) + #t) + (else + (set! ok-flag #f) + (debug:print 0 *default-log-port* "ERROR: ["(common:human-time)"] Command failed with exit code " + (if (< res 0) + res + (/ res 8)) " ["cmd"]" ) + #f)))) + (else + (debug:print 0 *default-log-port* "ERROR: ["(common:human-time)"] Not runnining command due to prior error. ["cmd"]") + #f)))) + (copy (lambda (src dest) (docmd (conc "/bin/cp '"src"' '"dest"'")))) + (copy+zip (lambda (src dest) (docmd (conc "gzip -c - < '"src"' > '"dest"'")))) + (fullpath (realpath filepath)) + (basedir (pathname-directory fullpath)) + (basefile (pathname-strip-directory fullpath)) + ;;(prevfile (conc filepath ".prev.gz")) + (minsfile (conc basedir "/" subdir "/" basefile ".mins.gz")) + (hrsfile (conc basedir "/" subdir "/" basefile ".hrs.gz")) + (daysfile (conc basedir "/" subdir "/" basefile ".days.gz")) + (wksfile (conc basedir "/" subdir "/" basefile ".weeks.gz"))) + + ;; create subdir it not exists + (if (not (directory-exists? (conc basedir "/" subdir))) + (docmd (conc "/bin/mkdir -p '"(conc basedir "/" subdir)"'"))) + + ;; copy&zip to .mins if not exists + (if (not (file-exists? minsfile)) + (copy+zip filepath minsfile)) + ;; copy .mins to .hrs if not exists + (if (not (file-exists? hrsfile)) + (copy minsfile hrsfile)) + ;; copy .hrs to .days if not exists + (if (not (file-exists? daysfile)) + (copy hrsfile daysfile)) + ;; copy .days to .weeks if not exists + (if (not (file-exists? wksfile)) + (copy daysfile wksfile)) + + + ;; if age(.mins.gz) >= 1h: + ;; copy .mins.gz .hrs.gz + ;; copy .mins.gz + (when (>= (age-mins minsfile) 1) + (copy minsfile hrsfile) + (copy+zip filepath minsfile)) + + ;; if age(.hrs.gz) >= 1d: + ;; copy .hrs.gz .days.gz + ;; copy .mins.gz .hrs.gz + (when (>= (age-days hrsfile) 1) + (copy hrsfile daysfile) + (copy minsfile hrsfile)) + + ;; if age(.days.gz) >= 1w: + ;; copy .days.gz .weeks.gz + ;; copy .hrs.gz .days.gz + (when (>= (age-wks daysfile) 1) + (copy daysfile wksfile) + (copy hrsfile daysfile)) + #t) + #f)) + + +;;====================================================================== +;; S P A R S E A R R A Y S +;;====================================================================== + +(define (make-sparse-array) + (let ((a (make-sparse-vector))) + (sparse-vector-set! a 0 (make-sparse-vector)) + a)) + +(define (sparse-array? a) + (and (sparse-vector? a) + (sparse-vector? (sparse-vector-ref a 0)))) + +(define (sparse-array-ref a x y) + (let ((row (sparse-vector-ref a x))) + (if row + (sparse-vector-ref row y) + #f))) + +(define (sparse-array-set! a x y val) + (let ((row (sparse-vector-ref a x))) + (if row + (sparse-vector-set! row y val) + (let ((new-row (make-sparse-vector))) + (sparse-vector-set! a x new-row) + (sparse-vector-set! new-row y val))))) + +;;====================================================================== +;; U S E F U L S T U F F +;;====================================================================== + +;; convert things to an alist or assoc list, #f gets converted to "" +;; +(define (common:to-alist dat) + (cond + ((list? dat) (map common:to-alist dat)) + ((vector? dat) + (map common:to-alist (vector->list dat))) + ((pair? dat) + (cons (common:to-alist (car dat)) + (common:to-alist (cdr dat)))) + ((hash-table? dat) + (map common:to-alist (hash-table->alist dat))) + (else + (if dat + dat + "")))) + +(define (common:alist-ref/default key alist default) + (or (alist-ref key alist) default)) + +;; moved into commonmod +;; +;; (define (common:low-noise-print waitval . keys) +;; (let* ((key (string-intersperse (map conc keys) "-" )) +;; (lasttime (hash-table-ref/default *common:denoise* key 0)) +;; (currtime (current-seconds))) +;; (if (> (- currtime lasttime) waitval) +;; (begin +;; (hash-table-set! *common:denoise* key currtime) +;; #t) +;; #f))) + +(define (common:read-encoded-string instr) + (handle-exceptions + exn + (handle-exceptions + exn + (begin + (debug:print-error 0 *default-log-port* "received bad encoded string \"" instr "\", message: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn) + (print-call-chain (current-error-port)) + #f) + (read (open-input-string (base64:base64-decode instr)))) + (read (open-input-string (z3:decode-buffer (base64:base64-decode instr)))))) + + +;;====================================================================== +;; S T A T E S A N D S T A T U S E S +;;====================================================================== + +;; BBnote: *common:std-states* - dashboard filter control and test control state buttons defined here; used in set-fields-panel and dboard:make-controls +(define *common:std-states* ;; for toggle buttons in dashboard + '( + (0 "ARCHIVED") + (1 "STUCK") + (2 "KILLREQ") + (3 "KILLED") + (4 "NOT_STARTED") + (5 "COMPLETED") + (6 "LAUNCHED") + (7 "REMOTEHOSTSTART") + (8 "RUNNING") + )) + +(define *common:dont-roll-up-states* + '("DELETED" + "REMOVING" + "CLEANING" + "ARCHIVE_REMOVING" + )) + +;;====================================================================== +;; BBnote: *common:std-statuses* dashboard filter control and test control status buttons defined here; used in set-fields-panel and dboard:make-controls +;; note these statuses are sorted from better to worse. +;; This sort order is important to dcommon:status-compare3 and db:set-state-status-and-roll-up-items +(define *common:std-statuses* + '(;; (0 "DELETED") + (1 "n/a") + (2 "PASS") + (3 "SKIP") + (4 "WARN") + (5 "WAIVED") + (6 "CHECK") + (7 "STUCK/DEAD") + (8 "DEAD") + (9 "FAIL") + (10 "PREQ_FAIL") + (11 "PREQ_DISCARDED") + (12 "ABORT"))) + +(define *common:ended-states* ;; states which indicate the test is stopped and will not proceed + '("COMPLETED" "ARCHIVED" "KILLED" "KILLREQ" "STUCK" "INCOMPLETE" )) + +(define *common:badly-ended-states* ;; these roll up as CHECK, i.e. results need to be checked + '("KILLED" "KILLREQ" "STUCK" "INCOMPLETE" "DEAD" "CHECK")) + +(define *common:well-ended-states* ;; an item's prereq in this state allows item to proceed + '("PASS" "WARN" "WAIVED" "SKIP")) + +;; BBnote: *common:running-states* used from db:set-state-status-and-roll-up-items +(define *common:running-states* ;; test is either running or can be run + '("RUNNING" "REMOTEHOSTSTART" "LAUNCHED" "STARTED")) + +(define *common:cant-run-states* ;; These are stopping conditions that prevent a test from being run + '("COMPLETED" "KILLED" "UNKNOWN" "INCOMPLETE" "ARCHIVED")) + +(define *common:not-started-ok-statuses* ;; if not one of these statuses when in not_started state treat as dead + '("n/a" "na" "PASS" "FAIL" "WARN" "CHECK" "WAIVED" "DEAD" "SKIP")) + +;;====================================================================== +;; group tests into buckets corresponding to rollup +;;; Running, completed-pass, completed-non-pass + worst status, not started. +;; filter out +;(define (common:categorize-items-for-rollup in-tests) +; ( + +(define (common:special-sort items order comp) + (let ((items-order (map reverse order)) + (acomp (or comp >))) + (sort items + (lambda (a b) + (let ((a-num (cadr (or (assoc a items-order) '(0 0)))) + (b-num (cadr (or (assoc b items-order) '(0 0))))) + (acomp a-num b-num)))))) + +;;====================================================================== +;; ;; given a toplevel with currstate, currstatus apply state and status +;; ;; => (newstate . newstatus) +;; (define (common:apply-state-status currstate currstatus state status) +;; (let* ((cstate (string->symbol (string-downcase currstate))) +;; (cstatus (string->symbol (string-downcase currstatus))) +;; (sstate (string->symbol (string-downcase state))) +;; (sstatus (string->symbol (string-downcase status))) +;; (nstate #f) +;; (nstatus #f)) +;; (set! nstate +;; (case cstate +;; ((completed not_started killed killreq stuck archived) +;; (case sstate ;; completed -> sstate +;; ((completed killed killreq stuck archived) completed) +;; ((running remotehoststart launched) running) +;; (else unknown-error-1))) +;; ((running remotehoststart launched) +;; (case sstate +;; ((completed killed killreq stuck archived) #f) ;; need to look at all items +;; ((running remotehoststart launched) running) +;; (else unknown-error-2))) +;; (else unknown-error-3))) +;; (set! nstatus +;; (case sstatus +;; ((pass) +;; (case nstate +;; ((pass n/a deleted) pass) +;; ((warn) warn) +;; ((fail) fail) +;; ((check) check) +;; ((waived) waived) +;; ((skip) skip) +;; ((stuck/dead) stuck) +;; ((abort) abort) +;; (else unknown-error-4))) +;; ((warn) +;; (case nstate +;; ((pass warn n/a skip deleted) warn) +;; ((fail) fail) +;; ((check) check) +;; ((waived) waived) +;; ((stuck/dead) stuck) +;; (else unknown-error-5))) +;; ((fail) +;; (case nstate +;; ((pass warn fail check n/a waived skip deleted stuck/dead stuck) fail) +;; ((abort) abort) +;; (else unknown-error-6))) +;; (else unknown-error-7))) +;; (cons +;; (if nstate (symbol->string nstate) nstate) +;; (if nstatus (symbol->string nstatus) nstatus)))) + +;;====================================================================== +;; D E B U G G I N G S T U F F +;;====================================================================== + +(define *verbosity* 1) +(define *logging* #f) + +(define (get-with-default val default) + (let ((val (args:get-arg val))) + (if val val default))) + +(define (assoc/default key lst . default) + (let ((res (assoc key lst))) + (if res (cadr res)(if (null? default) #f (car default))))) + +;;====================================================================== +;; safe getting of toppath +(define (common:get-toppath areapath) + (or *toppath* + (if areapath + (begin + (set! *toppath* areapath) + (setenv "MT_RUN_AREA_HOME" areapath) + areapath) + #f) + (if (getenv "MT_RUN_AREA_HOME") + (begin + (set! *toppath* (getenv "MT_RUN_AREA_HOME")) + *toppath*) + #f) + ;; last resort, look for megatest.config + (let loop ((thepath (realpath "."))) + (if (file-exists? (conc thepath "/megatest.config")) + thepath + (if (equal? thepath "/") + (begin + (debug:print-error 0 *default-log-port* "Unable to find megatest home directory.") + #f) + (loop (pathname-directory thepath))))) + )) + + +(define (get-area-path-signature toppath #!optional (short #f)) + (let ((res (message-digest-string (md5-primitive) toppath))) + (if short + (substring res 0 4) + res))) + + +(define (common:get-area-path-signature) + (message-digest-string (md5-primitive) *toppath*)) + +;; (set-signal-handler! signal/stop std-signal-handler) ;; ^Z NO, do NOT handle ^Z! + +;;====================================================================== +;; M I S C U T I L S +;;====================================================================== + +;;====================================================================== +;; convert stuff to a number if possible +(define (any->number val) + (cond + ((number? val) val) + ((string? val) (string->number val)) + ((symbol? val) (any->number (symbol->string val))) + (else #f))) + +(define (any->number-if-possible val) + (let ((num (any->number val))) + (if num num val))) + +(define (patt-list-match item patts) + (debug:print-info 8 *default-log-port* "patt-list-match item=" item " patts=" patts) + (if (and item patts) ;; here we are filtering for matches with item patterns + (let ((res #f)) ;; look through all the item-patts if defined, format is patt1,patt2,patt3 ... wildcard is % + (for-each + (lambda (patt) + (let ((modpatt (string-substitute "%" ".*" patt #t))) + (debug:print-info 10 *default-log-port* "patt " patt " modpatt " modpatt) + (if (string-match (regexp modpatt) item) + (set! res #t)))) + (string-split patts ",")) + res) + #t)) + +;;====================================================================== +;; return first path that can be created or already exists and is writable +;; +(define (common:get-create-writeable-dir dirs) + (if (null? dirs) + #f + (let loop ((hed (car dirs)) + (tal (cdr dirs))) + (let ((res (or (and (directory? hed) + (file-write-access? hed) + hed) + (handle-exceptions + exn + (begin + (debug:print-info 0 *default-log-port* "could not create " hed + ", this might cause problems down the road. exn=" exn) + #f) + (create-directory hed #t))))) + (if (and (string? res) + (directory? res)) + res + (if (null? tal) + #f + (loop (car tal)(cdr tal)))))))) + +;;====================================================================== +;; return the youngest timestamp . filename +;; +(define (common:get-youngest glob-list) + (let ((all-files (apply append + (map (lambda (patt) + (handle-exceptions + exn + '() + (glob patt))) + glob-list)))) + (fold (lambda (fname res) + (let ((last-mod (car res)) + (curmod (handle-exceptions + exn + 0 + (file-modification-time fname)))) + (if (> curmod last-mod) + (list curmod fname) + res))) + '(0 "n/a") + all-files))) + +;;====================================================================== +;; use bash to expand a glob. Does NOT handle paths with spaces! +;; +(define (common:bash-glob instr) + (string-split + (with-input-from-pipe + (conc "/bin/bash -c \"echo " instr "\"") + read-line))) + +;;====================================================================== +;; Some safety net stuff +;;====================================================================== + +;;====================================================================== +;; return input if it is a list or return null +(define (common:list-or-null inlst #!key (ovrd #f)(message #f)) + (if (list? inlst) + inlst + (begin + (if message (debug:print-error 0 *default-log-port* message)) + (or ovrd '())))) + + +;;====================================================================== +;; M I S C L I S T S +;;====================================================================== + +;;====================================================================== +;; items in lista are matched value and position in listb +;; return the remaining items in listb or #f +;; +(define (common:list-is-sublist lista listb) + (if (null? lista) + listb ;; all items in listb are "remaining" + (if (> (length lista)(length listb)) + #f + (let loop ((heda (car lista)) + (tala (cdr lista)) + (hedb (car listb)) + (talb (cdr listb))) + (if (equal? heda hedb) + (if (null? tala) ;; we are done + talb + (loop (car tala) + (cdr tala) + (car talb) + + (cdr talb))) + #f))))) + +;;====================================================================== +;; Needed for long lists to be sorted where (apply max ... ) dies +;; +(define (common:max inlst) + (let loop ((max-val (car inlst)) + (hed (car inlst)) + (tal (cdr inlst))) + (if (not (null? tal)) + (loop (max hed max-val) + (car tal) + (cdr tal)) + (max hed max-val)))) + +;;====================================================================== +;; get min or max, use > for max and < for min, this works around the limits on apply +;; +(define (common:min-max comp lst) + (if (null? lst) + #f ;; better than an exception for my needs + (fold (lambda (a b) + (if (comp a b) a b)) + (car lst) + lst))) + +;;====================================================================== +;; get min or max, use > for max and < for min, this works around the limits on apply +;; +(define (common:sum lst) + (if (null? lst) + 0 + (fold (lambda (a b) + (+ a b)) + (car lst) + lst))) + +;;====================================================================== +;; path list to hash-table tree +;; ((a b c)(a b d)(e b c)) => ((a (b (d) (c))) (e (b (c)))) +;; +(define (common:list->htree lst) + (let ((resh (make-hash-table))) + (for-each + (lambda (inlst) + (let loop ((ht resh) + (hed (car inlst)) + (tal (cdr inlst))) + (if (hash-table-ref/default ht hed #f) + (if (not (null? tal)) + (loop (hash-table-ref ht hed) + (car tal) + (cdr tal))) + (begin + (hash-table-set! ht hed (make-hash-table)) + (loop ht hed tal))))) + lst) + resh)) + +;;====================================================================== +;; hash-table tree to html list tree +;; +;; tipfunc takes two parameters: y the tip value and path the path to that point +;; +(define (common:htree->html ht path tipfunc) + (let ((datlist (sort (hash-table->alist ht) + (lambda (a b) + (string< (car a)(car b)))))) + (if (null? datlist) + (tipfunc #f path) ;; really shouldn't get here + (s:ul + (map (lambda (x) + (let* ((levelname (car x)) + (y (cdr x)) + (newpath (append path (list levelname))) + (leaf (or (not (hash-table? y)) + (null? (hash-table-keys y))))) + (if leaf + (s:li (tipfunc y newpath)) + (s:li + (list + levelname + (common:htree->html y newpath tipfunc)))))) + datlist))))) + +;;====================================================================== +;; hash-table tree to alist tree +;; +(define (common:htree->atree ht) + (map (lambda (x) + (cons (car x) + (let ((y (cdr x))) + (if (hash-table? y) + (common:htree->atree y) + y)))) + (hash-table->alist ht))) + +;;====================================================================== +;; M U N G E D A T A I N T O N I C E F O R M S +;;====================================================================== + +;;====================================================================== +;; Generate an index for a sparse list of key values +;; ( (rowname1 colname1 val1)(rowname2 colname2 val2) ) +;; +;; => +;; +;; ( (rowname1 0)(rowname2 1)) ;; rownames -> num +;; (colname1 0)(colname2 1)) ) ;; colnames -> num +;; +;; optional apply proc to rownum colnum value +(define (common:sparse-list-generate-index data #!key (proc #f)) + (if (null? data) + (list '() '()) + (let loop ((hed (car data)) + (tal (cdr data)) + (rownames '()) + (colnames '()) + (rownum 0) + (colnum 0)) + (let* ((rowkey (car hed)) + (colkey (cadr hed)) + (value (caddr hed)) + (existing-rowdat (assoc rowkey rownames)) + (existing-coldat (assoc colkey colnames)) + (curr-rownum (if existing-rowdat rownum (+ rownum 1))) + (curr-colnum (if existing-coldat colnum (+ colnum 1))) + (new-rownames (if existing-rowdat rownames (cons (list rowkey curr-rownum) rownames))) + (new-colnames (if existing-coldat colnames (cons (list colkey curr-colnum) colnames)))) + ;; (debug:print-info 0 *default-log-port* "Processing record: " hed ) + (if proc (proc curr-rownum curr-colnum rowkey colkey value)) + (if (null? tal) + (list new-rownames new-colnames) + (loop (car tal) + (cdr tal) + new-rownames + new-colnames + (if (> curr-rownum rownum) curr-rownum rownum) + (if (> curr-colnum colnum) curr-colnum colnum) + )))))) + +;;====================================================================== +;; if it looks like a number -> convert it to a number, else return it +;; +(define (common:lazy-convert inval) + (let* ((as-num (if (string? inval)(string->number inval) #f))) + (or as-num inval))) + +;;====================================================================== +;; convert string a=1; b=2; c=a silly thing; d= +;; to '((a . 1)(b . 2)(c . "a silly thing")(d . "")) +;; +(define (common:val->alist val #!key (convert #f)) + (let ((val-list (string-split-fields ";\\s*" val #:infix))) + (if val-list + (map (lambda (x) + (let ((f (string-split-fields "\\s*=\\s*" x #:infix))) + (case (length f) + ((0) `(,#f)) ;; null string case + ((1) `(,(string->symbol (car f)))) + ((2) `(,(string->symbol (car f)) . ,(let ((inval (cadr f))) + (if convert (common:lazy-convert inval) inval)))) + (else f)))) + val-list) + '()))) + +;;====================================================================== +;; S Y S T E M S T U F F +;;====================================================================== + +;;====================================================================== +;; lazy-safe get file mod time. on any error (file not existing etc.) return 0 +;; +(define (common:lazy-modification-time fpath) + (handle-exceptions + exn + (begin + (debug:print 2 *default-log-port* "Failed to get modification time for " fpath ", treating it as zero. exn=" exn) + 0) + (if (file-exists? fpath) + (file-modification-time fpath) + 0))) + +;;====================================================================== +;; find timestamp of newest file associated with a sqlite db file +(define (common:lazy-sqlite-db-modification-time fpath) + (let* ((glob-list (handle-exceptions + exn + (begin + (debug:print 0 *default-log-port* "Failed to glob " fpath "*, exn=" exn) + `(,(conc "/no/such/file, message: " ((condition-property-accessor 'exn 'message) exn)))) + (glob (conc fpath "*")))) + (file-list (if (eq? 0 (length glob-list)) + '("/no/such/file") + glob-list))) + (apply max + (map + common:lazy-modification-time + file-list)))) + +;;====================================================================== +;; make "nice-path" available in config files and the repl +(define nice-path common:nice-path) + +;;====================================================================== +;; returns *effective load* (not normalized) +;; +(define (common:get-intercept onemin fivemin) + (if (< onemin fivemin) ;; load is decreasing, just use the onemin load + onemin + (let* ((load-change (- onemin fivemin)) + (tchange (- 300 60))) + (max (+ onemin (* 60 (/ load-change tchange))) 0)))) + +;;====================================================================== +;; get values from cached info from dropping file in .sysdata dir +;; e.g. key is host and dtype is normalized-load +;; +(define (common:get-cached-info key dtype #!key (age 10)) + (if *toppath* + (let* ((fullpath (conc *toppath* "/.sysdata/" key "-" dtype ".log")) + (delfile (lambda (exn) + (debug:print-info 2 *default-log-port* " removing bad file " fullpath ", exn=" exn) + (delete-file* fullpath) + #f))) + (if (and (file-exists? fullpath) + (file-read-access? fullpath)) + (handle-exceptions + exn + (begin + (debug:print 0 *default-log-port* "failed to get cached info from " fullpath ", exn=" exn) + #f) + (debug:print 2 *default-log-port* "reading file " fullpath) + (let ((real-age (- (current-seconds) + (handle-exceptions + exn + (begin + (debug:print 1 *default-log-port* "Failed to read mod time on file " + fullpath ", using 0, exn=" exn) + 0) + (file-change-time fullpath))))) + (if (< real-age age) + (handle-exceptions + exn + (delfile exn) + (let* ((res (with-input-from-file fullpath read))) + (if (eof-object? res) + (begin + (delfile "n/a") + #f) + res))) + (begin + (debug:print-info 2 *default-log-port* "file " fullpath + " is too old (" real-age" seconds) to trust, skipping reading it") + #f)))) + (begin + (debug:print 2 *default-log-port* "not reading file " fullpath) + #f))) + #f)) + +(define (common:write-cached-info key dtype dat) + (if *toppath* + (let* ((fulldir (conc *toppath* "/.sysdata")) + (fullpath (conc fulldir "/" key "-" dtype ".log"))) + (if (not (file-exists? fulldir))(create-directory fulldir #t)) + (handle-exceptions + exn + (begin + (debug:print 0 *default-log-port* "failed to write file " fullpath ", exn=" exn) + #f) + (with-output-to-file fullpath (lambda ()(pp dat))))) + #f)) + +(define (common:raw-get-remote-host-load remote-host) + (let* ((inp #f)) + (handle-exceptions + exn + (begin + (close-input-pipe inp) + (debug:print 0 *default-log-port* "failed to ssh to " remote-host " and get loadavg. exn=" exn) + #f) ;; more specific handling of errors needed + (set! inp (open-input-pipe (conc "ssh " remote-host " cat /proc/loadavg"))) + (let ((res (list (read inp)(read inp)(read inp)))) + (close-input-pipe inp) + res)))) + +;;====================================================================== +;; get cpu load by reading from /proc/loadavg, return all three values +;; +(define (common:get-cpu-load remote-host) + (handle-exceptions + exn + (begin + (debug:print 0 *default-log-port* "failed to ssh or read loadavg from host " remote-host ", exn=" exn) + '(-99 -99 -99)) + (let* ((actual-hostname (or remote-host (get-host-name) "localhost"))) + (or (common:get-cached-info actual-hostname "cpu-load") + (let ((result (if (and remote-host + (not (equal? remote-host (get-host-name)))) + (map (lambda (res) + (if (eof-object? res) 9e99 res)) + (common:raw-get-remote-host-load remote-host)) + (with-input-from-file "/proc/loadavg" + (lambda ()(list (read)(read)(read))))))) + (match + result + ((l1 l2 l3) + (if (and (number? l1) + (number? l2) + (number? l3)) + (begin + (common:write-cached-info actual-hostname "cpu-load" result) + result) + '(-1 -1 -1))) ;; -1 is bad result + (else '(-2 -2 -2)))))))) + +;;====================================================================== +;; get normalized cpu load by reading from /proc/loadavg and /proc/cpuinfo return all three values and the number of real cpus and the number of threads +;; returns alist '((adj-cpu-load . normalized-proc-load) ... etc. +;; keys: adj-proc-load, adj-core-load, 1m-load, 5m-load, 15m-load +;; +(define (common:get-normalized-cpu-load remote-host) + (let ((res (common:get-normalized-cpu-load-raw remote-host)) + (default `((adj-proc-load . 2) ;; there is no right answer + (adj-core-load . 2) + (1m-load . 2) + (5m-load . 0) ;; causes a large delta - thus causing default of throttling if stuff goes wrong + (15m-load . 0) + (proc . 1) + (core . 1) + (phys . 1) + (error . #t)))) + (cond + ((and (list? res) + (> (length res) 2)) + res) + ((eq? res #f) default) ;; add messages? + ((eq? res #f) default) ;; this would be the #eof + (else default)))) + +(define (common:ssh-get-loadavg remote-host) + (let ((inp (open-input-pipe (conc "ssh " remote-host " \"cat /proc/loadavg;cat /proc/cpuinfo;echo end\"")))) + (let* ((res (read-lines inp))) + (close-input-pipe inp) + res))) + +(define (common:get-normalized-cpu-load-raw remote-host) + (let* ((actual-host (or remote-host (get-host-name)))) ;; #f is localhost + (or (common:get-cached-info actual-host "normalized-load") + (let ((data (if remote-host + (common:ssh-get-loadavg remote-host) + (append + (with-input-from-file "/proc/loadavg" + read-lines) + (with-input-from-file "/proc/cpuinfo" + read-lines) + (list "end")))) + (load-rx (regexp "^([\\d\\.]+)\\s+([\\d\\.]+)\\s+([\\d\\.]+)\\s+.*$")) + (proc-rx (regexp "^processor\\s+:\\s+(\\d+)\\s*$")) + (core-rx (regexp "^core id\\s+:\\s+(\\d+)\\s*$")) + (phys-rx (regexp "^physical id\\s+:\\s+(\\d+)\\s*$")) + (max-num (lambda (p n)(max (string->number p) n)))) + ;; (print "data=" data) + (if (null? data) ;; something went wrong + #f + (let loop ((hed (car data)) + (tal (cdr data)) + (loads #f) + (proc-num 0) ;; processor includes threads + (phys-num 0) ;; physical chip on motherboard + (core-num 0)) ;; core + ;;; (print hed ", " loads ", " proc-num ", " phys-num ", " core-num) + (if (null? tal) ;; have all our data, calculate normalized load and return result + (let* ((act-proc (+ proc-num 1)) + (act-phys (+ phys-num 1)) + (act-core (+ core-num 1)) + (adj-proc-load (/ (car loads) act-proc)) + (adj-core-load (/ (car loads) act-core)) + (result + (append (list (cons 'adj-proc-load adj-proc-load) + (cons 'adj-core-load adj-core-load)) + (list (cons '1m-load (car loads)) + (cons '5m-load (cadr loads)) + (cons '15m-load (caddr loads))) + (list (cons 'proc act-proc) + (cons 'core act-core) + (cons 'phys act-phys))))) + (common:write-cached-info actual-host "normalized-load" result) + result) + (regex-case + hed + (load-rx ( x l1 l5 l15 ) (loop (car tal)(cdr tal)(map string->number (list l1 l5 l15)) proc-num phys-num core-num)) + (proc-rx ( x p ) (loop (car tal)(cdr tal) loads (max-num p proc-num) phys-num core-num)) + (phys-rx ( x p ) (loop (car tal)(cdr tal) loads proc-num (max-num p phys-num) core-num)) + (core-rx ( x c ) (loop (car tal)(cdr tal) loads proc-num phys-num (max-num c core-num))) + (else + (begin + ;; (print "NO MATCH: " hed) + (loop (car tal)(cdr tal) loads proc-num phys-num core-num))))))))))) + +(define (common:unix-ping hostname) + (let ((res (system (conc "ping -c 1 " hostname " > /dev/null")))) + (eq? res 0))) + +(define (common:get-num-cpus remote-host) + (let* ((actual-host (or remote-host (get-host-name)))) + ;; hosts had better not be changing the number of cpus too often! + (or (hash-table-ref/default *numcpus-cache* actual-host #f) + (let* ((numcpus (or (common:get-cached-info actual-host "num-cpus" age: (+ 2592000 (random 3600))) + (let* ((proc (lambda () + (let loop ((numcpu 0) + (inl (read-line))) + (if (eof-object? inl) + (if (> numcpu 0) + numcpu + #f) ;; if zero return #f so caller knows that things are not working + (loop (if (string-match "^processor\\s+:\\s+\\d+$" inl) + (+ numcpu 1) + numcpu) + (read-line)))))) + (result (if (and remote-host + (not (equal? remote-host (get-host-name)))) + (common:generic-ssh + (conc "ssh " remote-host " cat /proc/cpuinfo") + proc -1) + (with-input-from-file "/proc/cpuinfo" proc)))) + (if (and (number? result) + (> result 0)) + (common:write-cached-info actual-host "num-cpus" result)) + result)))) + (hash-table-set! *numcpus-cache* actual-host numcpus) + numcpus)))) + +;;====================================================================== +;; convert a spec string to a list of vectors #( rx action rx-string ) +(define (common:spec-string->list-of-specs spec-string actions) + (let ((spec-strings (string-split-fields "\\s*;\\s*" spec-string #:infix)) + (actions-regex (regexp (conc "^(.*)\\s+(" (string-intersperse (map conc actions) "|") ")")))) + (filter + (lambda (x) x) + (map (lambda (s) + (let ((m (string-match actions-regex s))) + (if m + (vector (regexp (cadr m))(string->symbol (caddr m))(cadr m)) + (begin + (debug:print 0 *default-log-port* "WARNING: Unrecognised rule \"" s "\" in clean-up specification.") + #f)))) + spec-strings)))) + +;;====================================================================== +;; given a list of specs rx . rule and a file return the first matching rule +;; +(define (common:file-find-rule fname rules) ;; rule is vector #( rx action rx-string) + (let loop ((rule (car rules)) + (tail (cdr rules))) + (let ((rx (vector-ref rule 0)) + (rn (vector-ref rule 1))) ;; rule name + (if (string-match rx fname) + rule ;; return the whole rule so regex can be printed etc. + (if (null? tail) + #f + (loop (car tail)(cdr tail))))))) + +;;====================================================================== +;; given a spec apply some rules to a directory +;; +;; WARNING: This function will REMOVE files - be sure your spec and path is correct! +;; +;; spec format: +;; file-regex1 action; file-regex2 action; ... +;; e.g. +;; .*\.log$ keep; .* remove +;; --> keep all .log files, remove everything else +;; limitations: +;; cannot have a rule with ; as part of the spec +;; not very flexible, would be nice to return binned file names? +;; supported rules: +;; keep - keep this file +;; remove - remove this file +;; compress - compress this file +;; +(define (common:dir-clean-up path spec-string #!key (compress "gzip")(actions '(keep remove compress))(remove-empty #f)) + (let* ((specs (common:spec-string->list-of-specs spec-string actions)) + (keepers (make-hash-table)) + (directories (make-hash-table))) + (find-files + path + action: (lambda (p res) + (let ((rule (common:file-find-rule p specs))) + (cond + ((directory? p)(hash-table-set! directories p #t)) + (else + (case (vector-ref rule 1) + ((keep)(hash-table-set! keepers p rule)) + ((remove) + (debug:print 0 *default-log-port* "Removing file " p) + (delete-file p)) + ((compress) + (debug:print 0 *default-log-port* "Compressing file " p) + (system (conc compress " " p))) + (else + (debug:print 0 *default-log-port* "No match for file " p)))))))) + (if remove-empty + (for-each + (lambda (d) + (if (null? (glob (conc d "/.*")(conc d "/*"))) + (begin + (debug:print 0 *default-log-port* "Removing empty directory " d) + (delete-directory d)))) + (sort (hash-table-keys directories) (lambda (a b)(> (string-length a)(string-length b)))))) + )) + +;;====================================================================== +;; E N V I R O N M E N T V A R S +;;====================================================================== + +(define (save-environment-as-files fname #!key (ignorevars (list "USER" "HOME" "DISPLAY" "LS_COLORS" "XKEYSYMDB" "EDITOR" "MAKEFLAGS" "MAKEF" "MAKEOVERRIDES" "HOSTNAME"))) + ;;(bb-check-path msg: "save-environment-as-files entry") + (let ((envvars (get-environment-variables)) + (whitesp (regexp "[^a-zA-Z0-9_\\-:,\\.\\/%$]")) + (mungeval (lambda (val) + (cond + ((eq? val #t) "") ;; convert #t to empty string + ((eq? val #f) #f) ;; convert #f to itself (still thinking about this one + (else val))))) + (with-output-to-file (conc fname ".csh") + (lambda () + (for-each (lambda (keyval) + (let* ((key (car keyval)) + (val (cdr keyval)) + (delim (if (and (string-search whitesp val) + (not (string-search "^\".*\"$" val)) + (not (string-search "^'.*'$" val))) + "\"" + ""))) + + (print (if (or (member key ignorevars) + (string-search whitesp key)) + "# setenv " + "setenv ") + key " " delim (mungeval val) delim))) + envvars))) + (with-output-to-file (conc fname ".sh") + (lambda () + (for-each (lambda (keyval) + (let* ((key (car keyval)) + (val (cdr keyval)) + (delim (if (and (string-search whitesp val) + (not (string-search "^\".*\"$" val)) + (not (string-search "^'.*'$" val))) + "\"" + ""))) + (print (if (or (member key ignorevars) + (string-search whitesp key) + (string-search ":" key)) ;; internal only values to be skipped. + "# export " + "export ") + key "=" delim (mungeval val) delim))) + envvars))))) + +(define (common:get-param-mapping #!key (flavor #f)) + "returns alist mapping string keys in testconfig/subrun to megatest command line switches; if flavor is switch-symbol, maps tcmt symbolic switches to megatest switches" + (let ((default '(("tag-expr" . "-tagexpr") + ("mode-patt" . "-modepatt") + ("run-name" . "-runname") + ("contour" . "-contour") + ("target" . "-target") + ("test-patt" . "-testpatt") + ("msg" . "-m") + ("log" . "-log") + ("start-dir" . "-start-dir") + ("new" . "-set-state-status")))) + (if (eq? flavor 'switch-symbol) + (map (lambda (x) + (cons (string->symbol (conc "-" (car x))) (cdr x))) + default) + default))) + +;;====================================================================== +;; set some env vars from an alist, return an alist with original values +;; (("VAR" "value") ...) +;; a value of #f means "unset this var" +;; +(define (alist->env-vars lst) + (if (list? lst) + (let ((res '())) + (for-each (lambda (p) + (let* ((var (car p)) + (val (cadr p)) + (prv (get-environment-variable var))) + (set! res (cons (list var prv) res)) + (if val + (safe-setenv var (->string val)) + (unsetenv var)))) + lst) + res) + '())) + +;;====================================================================== +;; clear vars matching pattern, run proc, set vars back +;; if proc is a string run that string as a command with +;; system. +;; +(define *common:orig-env* + (let ((envvars (get-environment-variables))) + (if (get-environment-variable "MT_ORIG_ENV") + (with-input-from-string + (z3:decode-buffer (base64:base64-decode (get-environment-variable "MT_ORIG_ENV"))) + read) + (filter-map (lambda (x) + (if (string-match "^MT_.*" (car x)) + #f + x)) + envvars)))) + +(define (common:with-orig-env proc) + (let ((current-env (get-environment-variables))) + (for-each (lambda (x) (unsetenv (car x))) current-env) + (for-each (lambda (x) (setenv (car x) (cdr x))) *common:orig-env*) + (let ((rv (cond + ((string? proc)(system proc)) + (proc (proc))))) + (for-each (lambda (x) (unsetenv (car x))) *common:orig-env*) + (for-each (lambda (x) (setenv (car x) (cdr x))) current-env) + rv))) + +(define (common:without-vars proc . var-patts) + (let ((vars (make-hash-table))) + (for-each + (lambda (vardat) ;; each env var + (for-each + (lambda (var-patt) + (if (string-match var-patt (car vardat)) + (let ((var (car vardat)) + (val (cdr vardat))) + (hash-table-set! vars var val) + (unsetenv var)))) + var-patts)) + (get-environment-variables)) + (cond + ((string? proc)(system proc)) + (proc (proc))) + (hash-table-for-each + vars + (lambda (var val) + (setenv var val))) + vars)) + +;;====================================================================== +;; C O L O R S +;;====================================================================== + +(define (common:name->iup-color name) + (case (string->symbol (string-downcase name)) + ((red) "223 33 49") + ((grey) "192 192 192") + ((orange) "255 172 13") + ((purple) "This is unfinished ..."))) + +;;====================================================================== +;; (define (common:get-color-for-state-status state status) +;; (case (string->symbol state) +;; ((COMPLETED) +;; (case (string->symbol status) +;; ((PASS) "70 249 73") +;; ((WARN WAIVED) "255 172 13") +;; ((SKIP) "230 230 0") +;; (else "223 33 49"))) +;; ((LAUNCHED) "101 123 142") +;; ((CHECK) "255 100 50") +;; ((REMOTEHOSTSTART) "50 130 195") +;; ((RUNNING) "9 131 232") +;; ((KILLREQ) "39 82 206") +;; ((KILLED) "234 101 17") +;; ((NOT_STARTED) "240 240 240") +;; (else "192 192 192"))) + +(define (common:iup-color->rgb-hex instr) + (string-intersperse + (map (lambda (x) + (number->string x 16)) + (map string->number + (string-split instr))) + "/")) + +;;====================================================================== +;; L O C K I N G M E C H A N I S M S +;;====================================================================== +;;====================================================================== +;; +;;====================================================================== + +(define (common:in-running-test?) + (and (args:get-arg "-execute") (get-environment-variable "MT_CMDINFO"))) + +(define (common:get-color-from-status status) + (cond + ((equal? status "PASS") "green") + ((equal? status "FAIL") "red") + ((equal? status "WARN") "orange") + ((equal? status "KILLED") "orange") + ((equal? status "KILLREQ") "purple") + ((equal? status "RUNNING") "blue") + ((equal? status "ABORT") "brown") + (else "black"))) + +;;====================================================================== +;; N A N O M S G C L I E N T +;;====================================================================== +;; +;; +;; +;; (define (common:send-dboard-main-changed) +;; (let* ((dashboard-ips (mddb:get-dashboards))) +;; (for-each +;; (lambda (ipadr) +;; (let* ((soc (common:open-nm-req (conc "tcp://" ipadr))) +;; (msg (conc "main " *toppath*)) +;; (res (common:nm-send-receive-timeout soc msg))) +;; (if (not res) ;; couldn't reach that dashboard - remove it from db +;; (print "ERROR: couldn't reach dashboard " ipadr)) +;; res)) +;; dashboard-ips))) +;; +;; +;; ;;====================================================================== +;; ;; D A S H B O A R D D B +;; ;;====================================================================== +;; +;; (define (mddb:open-db) +;; (let* ((db (open-database (conc (get-environment-variable "HOME") "/.dashboard.db")))) +;; (set-busy-handler! db (busy-timeout 10000)) +;; (for-each +;; (lambda (qry) +;; (exec (sql db qry))) +;; (list +;; "CREATE TABLE IF NOT EXISTS vars (id INTEGER PRIMARY KEY,key TEXT, val TEXT, CONSTRAINT varsconstraint UNIQUE (key));" +;; "CREATE TABLE IF NOT EXISTS dashboards ( +;; id INTEGER PRIMARY KEY, +;; pid INTEGER, +;; username TEXT, +;; hostname TEXT, +;; ipaddr TEXT, +;; portnum INTEGER, +;; start_time TIMESTAMP DEFAULT (strftime('%s','now')), +;; CONSTRAINT hostport UNIQUE (hostname,portnum) +;; );" +;; )) +;; db)) +;; +;; ;; register a dashboard +;; ;; +;; (define (mddb:register-dashboard port) +;; (let* ((pid (current-process-id)) +;; (hostname (get-host-name)) +;; (ipaddr (server:get-best-guess-address hostname)) +;; (username (current-user-name)) ;; (car userinfo))) +;; (db (mddb:open-db))) +;; (print "Register monitor, pid: " pid ", hostname: " hostname ", port: " port ", username: " username) +;; (exec (sql db "INSERT OR REPLACE INTO dashboards (pid,username,hostname,ipaddr,portnum) VALUES (?,?,?,?,?);") +;; pid username hostname ipaddr port) +;; (close-database db))) +;; +;; ;; unregister a monitor +;; ;; +;; (define (mddb:unregister-dashboard host port) +;; (let* ((db (mddb:open-db))) +;; (print "Register unregister monitor, host:port=" host ":" port) +;; (exec (sql db "DELETE FROM dashboards WHERE hostname=? AND portnum=?;") host port) +;; (close-database db))) +;; +;; ;; get registered dashboards +;; ;; +;; (define (mddb:get-dashboards) +;; (let ((db (mddb:open-db))) +;; (query fetch-column +;; (sql db "SELECT ipaddr || ':' || portnum FROM dashboards;")))) + +;;====================================================================== +;; H I E R A R C H I C A L H A S H T A B L E S +;;====================================================================== +;; +;; Every element including top element is a vector: +;; + +(define (hh:make-hh #!key (ht #f)(value #f)) + (vector (or ht (make-hash-table)) value)) + +;;====================================================================== +;; used internally +(define-inline (hh:set-ht! hh ht) (vector-set! hh 0 ht)) +(define-inline (hh:get-ht hh) (vector-ref hh 0)) +(define-inline (hh:set-value! hh value) (vector-set! hh 1 value)) +(define-inline (hh:get-value hh value) (vector-ref hh 1)) + +;;====================================================================== +;; given a hierarchial hash and some keys look up the value ... +;; +(define (hh:get hh . keys) + (if (null? keys) + (vector-ref hh 1) ;; we have reached the end of the line, return the value sought + (let ((sub-ht (hh:get-ht hh))) + (if sub-ht ;; yes, there is more hierarchy + (let ((sub-hh (hash-table-ref/default sub-ht (car keys) #f))) + (if sub-hh + (apply hh:get sub-hh (cdr keys)) + #f)) + #f)))) + +;;====================================================================== +;; given a hierarchial hash, a value and some keys, add needed hierarcy and insert the value +;; +(define (hh:set! hh value . keys) + (if (null? keys) + (hh:set-value! hh value) ;; we have reached the end of the line, store the value + (let ((sub-ht (hh:get-ht hh))) + (if sub-ht ;; yes, there is more hierarchy + (let ((sub-hh (hash-table-ref/default sub-ht (car keys) #f))) + (if (not sub-hh) ;; we'll need to add the next level of hierarchy + (let ((new-sub-hh (hh:make-hh))) + (hash-table-set! sub-ht (car keys) new-sub-hh) + (apply hh:set! new-sub-hh value (cdr keys))) + (apply hh:set! sub-hh value (cdr keys)))) ;; call the sub-hierhash with remaining keys + (begin + (hh:set-ht! hh (make-hash-table)) + (apply hh:set! hh value keys)))))) + +;;====================================================================== +;; Manage pkts, used in servers, tests and likely other contexts so put +;; in common +;;====================================================================== + +(define common:pkts-spec + '((default . ((parent . P) + (action . a) + (filename . f))) + (configf . ((parent . P) + (action . a) + (filename . f))) + (server . ((action . a) + (pid . d) + (ipaddr . i) + (port . p) + (parent . P))) + + (test . ((cpuuse . c) + (diskuse . d) + (item-path . i) + (runname . r) + (state . s) + (target . t) + (status . u) + (parent . P))))) + +(define (common:get-pkt-alists pkts) + (map (lambda (x) + (alist-ref 'apkt x)) ;; 'pkta pulls out the alist from the read pkt + pkts)) + +;;====================================================================== +;; given list of pkts (alist mode) return list of D cards as Unix epoch, sorted descending +;; also delete duplicates by target i.e. (car pkt) +;; +(define (common:get-pkt-times pkts) + (delete-duplicates + (sort + (map (lambda (x) + `(,(alist-ref 't x) . ,(string->number (alist-ref 'D x)))) + pkts) + (lambda (a b)(> (cdr a)(cdr b)))) ;; sort descending + (lambda (a b)(equal? (car a)(car b))))) ;; remove duplicates by target + + +(define *common:thread-punchlist* (make-hash-table)) +(define (common:send-thunk-to-background-thread thunk #!key (name #f)) + ;;(BB> "launched thread " name) + ;; we need a unique name for the thread. + (let* ((realname (if name + (if (not (hash-table-ref/default *common:thread-punchlist* name #f)) + name + (conc name"-" (symbol->string (gensym)))) + (conc "anonymous-"(symbol->string (gensym))))) + (realthunk (lambda () + (let ((res (thunk))) + (hash-table-delete! *common:thread-punchlist* realname) + res))) + (thread (make-thread realthunk realname))) + (hash-table-set! *common:thread-punchlist* realname thread) + (thread-start! thread) + )) + +(define (common:join-backgrounded-threads) + ;; may need to trap and ignore exceptions -- dunno how atomic threads are... + (for-each + (lambda (thread-name) + (let* ((thread (hash-table-ref/default *common:thread-punchlist* thread-name #f))) + (if thread + (handle-exceptions + exn + (begin + (debug:print 0 *default-log-port* "joining threads failed. exn=" exn) + #t) ;; just ignore it, it might have died in the meantime so joining it will throw an exception + (thread-join! thread)) + ))) + (hash-table-keys *common:thread-punchlist*))) + +;; given a list of itemmaps (testname . map), return the first match +;; +(define (tests:lookup-itemmap itemmaps testname) + (let ((best-matches (filter (lambda (itemmap) + (tests:match (car itemmap) testname #f)) + itemmaps))) + (if (null? best-matches) + #f + (let ((res (car best-matches))) + ;; (debug:print 0 *default-log-port* "res=" res) + (cond + ((string? res) res) ;;; FIX THE ROOT CAUSE HERE .... + ((null? res) #f) + ((string? (cdr res)) (cdr res)) ;; it is a pair + ((string? (cadr res))(cadr res)) ;; it is a list + (else cadr res)))))) + +;; if itempath is #f then look only at the testname part +;; +(define (tests:match patterns testname itempath #!key (required '())) + (if (string? patterns) + (let ((patts (append (string-split patterns ",") required))) + (if (null? patts) ;;; no pattern(s) means no match + #f + (let loop ((patt (car patts)) + (tal (cdr patts))) + ;; (print "loop: patt: " patt ", tal " tal) + (if (string=? patt "") + #f ;; nothing ever matches empty string - policy + (let* ((patt-parts (string-match (regexp "^([^\\/]*)(\\/(.*)|)$") patt)) + (test-patt (cadr patt-parts)) + (item-patt (cadddr patt-parts))) + ;; special case: test vs. test/ + ;; test => "test" "%" + ;; test/ => "test" "" + (if (and (not (substring-index "/" patt)) ;; no slash in the original + (or (not item-patt) + (equal? item-patt ""))) ;; should always be true that item-patt is "" + (set! item-patt "%")) + ;; (print "tests:match => patt-parts: " patt-parts ", test-patt: " test-patt ", item-patt: " item-patt) + (if (and (tests:glob-like-match test-patt testname) + (or (not itempath) + (tests:glob-like-match (if item-patt item-patt "") itempath))) + #t + (if (null? tal) + #f + (loop (car tal)(cdr tal))))))))))) + +;; if itempath is #f then look only at the testname part +;; +(define (tests:match->sqlqry patterns) + (if (string? patterns) + (let ((patts (string-split patterns ","))) + (if (null? patts) ;;; no pattern(s) means no match, we will do no query + #f + (let loop ((patt (car patts)) + (tal (cdr patts)) + (res '())) + ;; (print "loop: patt: " patt ", tal " tal) + (let* ((patt-parts (string-match (regexp "^([^\\/]*)(\\/(.*)|)$") patt)) + (test-patt (cadr patt-parts)) + (item-patt (cadddr patt-parts)) + (test-qry (db:patt->like "testname" test-patt)) + (item-qry (db:patt->like "item_path" item-patt)) + (qry (conc "(" test-qry " AND " item-qry ")"))) + ;; (print "tests:match => patt-parts: " patt-parts ", test-patt: " test-patt ", item-patt: " item-patt) + (if (null? tal) + (string-intersperse (append (reverse res)(list qry)) " OR ") + (loop (car tal)(cdr tal)(cons qry res))))))) + #f)) + +(define *glob-like-match-cache* (make-hash-table)) +(define (tests:cache-regexp str-in flag) + (let* ((key (conc str-in flag))) + (or (hash-table-ref/default *glob-like-match-cache* key #f) + (let* ((newrx (regexp str-in flag))) + (hash-table-set! *glob-like-match-cache* key newrx) + newrx)))) + +;; tests:glob-like-match +(define (tests:glob-like-match patt str) + (let* ((like (substring-index "%" patt)) + (notpatt (equal? (substring-index "~" patt) 0)) + (newpatt (if notpatt (substring patt 1) patt)) + (finpatt (if like + (string-substitute (regexp "%") ".*" newpatt #f) + (string-substitute (regexp "\\*") ".*" newpatt #f))) + (rx (tests:cache-regexp finpatt (if like #t #f))) + (res (string-match rx str))) + (if notpatt (not res) res))) + +;; keys list to key1,key2,key3 ... +(define (runs:get-std-run-fields keys remfields) + (let* ((header (append keys remfields)) + (keystr (conc (keys->keystr keys) "," + (string-intersperse remfields ",")))) + (list keystr header))) + +;; make a query (fieldname like 'patt1' OR fieldname +(define (db:patt->like fieldname pattstr #!key (comparator " OR ")) + (let ((patts (if (string? pattstr) + (string-split pattstr ",") + '("%")))) + (string-intersperse (map (lambda (patt) + (let ((wildtype (if (substring-index "%" patt) "LIKE" "GLOB"))) + (conc fieldname " " wildtype " '" patt "'"))) + (if (null? patts) + '("") + patts)) + comparator))) + +;;====================================================================== +;; V E R S I O N +;;====================================================================== + +(define (common:get-full-version) + (conc megatest-version "-" megatest-fossil-hash)) + +(define (common:version-signature) + (conc megatest-version "-" (substring megatest-fossil-hash 0 4))) + +(define (common:get-fields cfgdat) + (let ((fields (hash-table-ref/default cfgdat "fields" '()))) + (map car fields))) + +(define keys:config-get-fields common:get-fields) ) Index: configf.scm ================================================================== --- configf.scm +++ configf.scm @@ -39,64 +39,25 @@ (declare (uses configfmod.import)) (declare (uses dbfile)) (declare (uses dbfile.import)) (declare (uses dbmod)) (declare (uses dbmod.import)) +(declare (uses mtmod)) +(declare (uses mtmod.import)) (import commonmod configfmod processmod (prefix mtargs args:) - debugprint) + debugprint + mtmod + ) (include "common_records.scm") (define configf:imports "(import commonmod configfmod processmod (prefix mtargs args:))") -(define (configf:write-alist cdat fname) - (if (not (common:faux-lock fname)) - (debug:print 0 *default-log-port* "INFO: Could not get lock on " fname)) - (let* ((dat (configf:config->alist cdat)) - (res - (begin - (with-output-to-file fname ;; first write out the file - (lambda () - (pp dat))) - - (if (common:file-exists? fname) ;; now verify it is readable - (if (configf:read-alist fname) - #t ;; data is good. - (begin - (handle-exceptions - exn - (begin - (debug:print 0 *default-log-port* "deleting " fname " failed, exn=" exn) - #f) - (debug:print 0 *default-log-port* "WARNING: content " dat " for cache " fname " is not readable. Deleting generated file.") - (delete-file fname)) - #f)) - #f)))) - (common:faux-unlock fname) - res)) - -;; pathenvvar will set the named var to the path of the config -(define (find-and-read-config fname #!key (environ-patt #f)(given-toppath #f)(pathenvvar #f)) - (let* ((curr-dir (current-directory)) - (configinfo (find-config fname toppath: given-toppath)) - (toppath (car configinfo)) - (configfile (cadr configinfo)) - (set-fields (lambda (curr-section next-section ht path) - (let ((field-names (if ht (common:get-fields ht) '())) - (target (or (getenv "MT_TARGET")(args:get-arg "-reqtarg")(args:get-arg "-target")))) - (debug:print-info 9 *default-log-port* "set-fields with field-names=" field-names " target=" target " curr-section=" curr-section " next-section=" next-section " path=" path " ht=" ht) - (if (not (null? field-names))(keys:target-set-args field-names target #f)))))) - (if toppath (change-directory toppath)) - (if (and toppath pathenvvar)(setenv pathenvvar toppath)) - (let ((configdat (if configfile - (read-config configfile #f #t environ-patt: environ-patt post-section-procs: (list (cons "^fields$" set-fields)) #f)))) - (if toppath (change-directory curr-dir)) - (list configdat toppath configfile fname)))) (define (configf:process-line l ht allow-system #!key (linenum #f)) (let loop ((res l)) (if (string? res) (let ((matchdat (string-search configf:var-expand-regex res))) Index: configfmod.scm ================================================================== --- configfmod.scm +++ configfmod.scm @@ -20,10 +20,11 @@ (declare (unit configfmod)) (declare (uses debugprint)) (declare (uses commonmod)) (declare (uses processmod)) +(declare (uses mtargs)) (use regex regex-case) (module configfmod * @@ -46,11 +47,13 @@ ) (import debugprint commonmod - processmod) + processmod + (prefix mtargs args:) + ) ;; Run a shell command and return the output as a string (define (shell cmd) (let* ((output (process:cmd-run->list cmd)) (res (car output)) @@ -430,11 +433,15 @@ (debug:print 0 *default-log-port* "read of alist " fname " failed. exn=" exn) #f) (configf:alist->config (with-input-from-file fname read)))) - +(define read-config (lambda ()(assert #f "FATAL: read-config proc not set!"))) + +(define (read-config-set! proc) + (set! read-config proc)) + ;; convert hierarchial list to ini format ;; (define (configf:config->ini data) (map (lambda (section) @@ -447,9 +454,27 @@ (fname (if (> (length dat-pair) 2)(caddr dat-pair) #f))) (if fname (print "# " var "=>" fname)) (print var " " val))) section-dat))) ;; (print "section-dat: " section-dat)) (hash-table->alist data))) + +;; pathenvvar will set the named var to the path of the config +(define (find-and-read-config fname #!key (environ-patt #f)(given-toppath #f)(pathenvvar #f)) + (let* ((curr-dir (current-directory)) + (configinfo (find-config fname toppath: given-toppath)) + (toppath (car configinfo)) + (configfile (cadr configinfo)) + (set-fields (lambda (curr-section next-section ht path) + (let ((field-names (if ht (common:get-fields ht) '())) + (target (or (getenv "MT_TARGET")(args:get-arg "-reqtarg")(args:get-arg "-target")))) + (debug:print-info 9 *default-log-port* "set-fields with field-names=" field-names " target=" target " curr-section=" curr-section " next-section=" next-section " path=" path " ht=" ht) + (if (not (null? field-names))(keys:target-set-args field-names target #f)))))) + (if toppath (change-directory toppath)) + (if (and toppath pathenvvar)(setenv pathenvvar toppath)) + (let ((configdat (if configfile + (read-config configfile #f #t environ-patt: environ-patt post-section-procs: (list (cons "^fields$" set-fields)) #f)))) + (if toppath (change-directory curr-dir)) + (list configdat toppath configfile fname)))) ) ADDED cpumod.scm Index: cpumod.scm ================================================================== --- /dev/null +++ cpumod.scm @@ -0,0 +1,105 @@ +;;====================================================================== +;; Copyright 2017, Matthew Welland. +;; +;; This file is part of Megatest. +;; +;; Megatest is free software: you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. +;; +;; Megatest is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with Megatest. If not, see . + +;;====================================================================== + +;;====================================================================== +;; Cpumod: +;; +;; Put things here don't fit anywhere else +;;====================================================================== + +(declare (unit cpumod)) +(declare (uses debugprint)) +(declare (uses mtargs)) + +(use srfi-69) + +(module cpumod + * + +(import scheme) +(cond-expand + (chicken-4 + + (import chicken + ports + (prefix base64 base64:) + + (prefix sqlite3 sqlite3:) + data-structures + extras + files + matchable + md5 + message-digest + pathname-expand + posix + posix-extras + regex + regex-case + sparse-vectors + srfi-1 + srfi-18 + srfi-69 + typed-records + z3 + + debugprint + (prefix mtargs args:) + ) + (use srfi-69)) + (chicken-5 + (import (prefix sqlite3 sqlite3:) + ;; data-structures + ;; extras + ;; files + ;; posix + ;; posix-extras + chicken.base + chicken.condition + chicken.file + chicken.file.posix + chicken.io + chicken.pathname + chicken.port + chicken.process + chicken.process-context + chicken.process-context.posix + chicken.sort + chicken.string + chicken.time + chicken.time.posix + + matchable + md5 + message-digest + pathname-expand + regex + regex-case + srfi-1 + srfi-18 + srfi-69 + typed-records + system-information + + debugprint + ))) + + +) Index: dashboard-tests.scm ================================================================== --- dashboard-tests.scm +++ dashboard-tests.scm @@ -24,18 +24,23 @@ (declare (unit dashboard-tests)) (declare (uses common)) (declare (uses commonmod)) (declare (uses configfmod)) +(declare (uses debugprint)) +(declare (uses rmtmod)) +(declare (uses megatestmod)) +(declare (uses dbmod)) +(declare (uses dbfile)) +(declare (uses tasksmod)) + (declare (uses dcommon)) -(declare (uses db)) (declare (uses gutils)) +(declare (uses db)) (declare (uses rmt)) (declare (uses ezsteps)) (declare (uses subrun)) -(declare (uses debugprint)) -(declare (uses rmtmod)) (use format fmt) (require-library iup) (import (prefix iup iup:)) @@ -45,11 +50,16 @@ (use (prefix sqlite3 sqlite3:)) (import commonmod configfmod rmtmod - debugprint) + debugprint + megatestmod + dbmod + dbfile + tasksmod + ) (include "common_records.scm") (include "db_records.scm") (include "run_records.scm") @@ -950,5 +960,17 @@ (curr-updaters (hash-table-ref/default (dboard:commondat-updaters commondat) tnum '()))) (hash-table-set! (dboard:commondat-updaters commondat) tnum (cons updater curr-updaters)))) +(define (common:run-a-command cmd #!key (with-vars #f) (with-orig-env #f)) + (let* ((pre-cmd (dtests:get-pre-command)) + (post-cmd (dtests:get-post-command)) + (fullcmd (if (or pre-cmd post-cmd) + (conc pre-cmd cmd post-cmd) + (conc "viewscreen " cmd)))) + (debug:print-info 02 *default-log-port* "Running command: " fullcmd) + (cond + (with-vars (common:without-vars fullcmd)) + (with-orig-env (common:with-orig-env fullcmd)) + (else (common:without-vars fullcmd "MT_.*"))))) + Index: dashboard.scm ================================================================== --- dashboard.scm +++ dashboard.scm @@ -16,23 +16,52 @@ ;; You should have received a copy of the GNU General Public License ;; along with Megatest. If not, see . ;; ;;====================================================================== -(declare (uses common)) (declare (uses mtargs)) (declare (uses mtargs.import)) -(declare (uses keys)) -(declare (uses items)) (declare (uses debugprint)) (declare (uses debugprint.import)) -(declare (uses db)) -(declare (uses configf)) -(declare (uses configfmod)) -(declare (uses configfmod.import)) +(declare (uses cookie)) +(declare (uses cookie.import)) +(declare (uses stml2)) +(declare (uses stml2.import)) +(declare (uses commonmod)) +(declare (uses commonmod.import)) (declare (uses processmod)) (declare (uses processmod.import)) +(declare (uses configfmod)) +(declare (uses configfmod.import)) +(declare (uses mtmod)) +(declare (uses mtmod.import)) +(declare (uses dbfile)) +(declare (uses dbfile.import)) +(declare (uses dbmod)) +(declare (uses dbmod.import)) +(declare (uses portlogger)) +(declare (uses portlogger.import)) +(declare (uses tcp-transportmod)) +(declare (uses tcp-transportmod.import)) +(declare (uses servermod)) +(declare (uses servermod.import)) +(declare (uses megatestmod)) +(declare (uses megatestmod.import)) +(declare (uses apimod)) +(declare (uses apimod.import)) +(declare (uses rmtmod)) +(declare (uses rmtmod.import)) +(declare (uses pgdb)) +(declare (uses pgdb.import)) +(declare (uses tasksmod)) +(declare (uses tasksmod.import)) + +(declare (uses configf)) +(declare (uses common)) +(declare (uses keys)) +(declare (uses items)) +(declare (uses db)) (declare (uses process)) (declare (uses launch)) (declare (uses runs)) (declare (uses dashboard-tests)) (declare (uses tree)) @@ -39,17 +68,10 @@ (declare (uses dcommon)) (declare (uses dashboard-context-menu)) (declare (uses vg)) (declare (uses subrun)) (declare (uses mt)) -(declare (uses dbmod)) -(declare (uses dbfile)) -(declare (uses dbfile.import)) -(declare (uses rmtmod)) -(declare (uses rmtmod.import)) -(declare (uses commonmod)) -(declare (uses commonmod.import)) (use format) (require-library iup) (import (prefix iup iup:)) @@ -66,11 +88,15 @@ processmod (prefix mtargs args:) dbmod dbfile rmtmod - debugprint) + debugprint + stml2 + megatestmod + tasksmod + ) (include "common_records.scm") (include "db_records.scm") (include "run_records.scm") (include "task_records.scm") @@ -82,10 +108,13 @@ ;; executables such as dashboard and mtutil ;; (include "dashboard-transport-mode.scm") (dbfile:db-init-proc db:initialize-main-db) (set! rmtmod:send-receive rmt:send-receive) + +;; remove when configf fully modularized +(read-config-set! configf:read-file) (debug:print-info 0 *default-log-port* "transport-mode="(rmt:transport-mode)) (define help (conc "Megatest Dashboard, documentation at http://www.kiatoa.com/fossils/megatest version " megatest-version @@ -3855,11 +3884,11 @@ (debug:print 0 *default-log-port* "Failed to find megatest.config, exiting") (exit 1) ) ) - #;(if (not (common:on-homehost?)) + #;(if (not (rmt:on-homehost?)) (begin (debug:print 0 *default-log-port* "WARNING: You are starting the dashboard on a machine that is not the homehost:" (server:get-homehost)) (debug:print 0 *default-log-port* "It will be slower.") )) Index: db.scm ================================================================== --- db.scm +++ db.scm @@ -61,4627 +61,10 @@ z3 typed-records matchable files) -(include "common_records.scm") -(include "db_records.scm") -(include "key_records.scm") -(include "run_records.scm") - -(define *number-of-writes* 0) -(define *number-non-write-queries* 0) - (import debugprint) (import dbfile) (import dbmod) (import rmtmod) -;; record for keeping state,status and count for doing roll-ups in -;; iterated tests -;; -(defstruct dbr:counts - (state #f) - (status #f) - (count 0)) - -;; (define (db:with-db dbstruct run-id r/w proc . params) -;; (case (rmt:transport-mode) -;; ((http)(dbfile:with-db dbstruct run-id r/w proc params)) -;; ((tcp) (dbmod:with-db dbstruct run-id r/w proc params)) -;; ((nfs) (dbmod:with-db dbstruct run-id r/w proc params)) -;; (else (assert #f "FATAL: db:with-db called with non-existant transport mode")))) - -;;====================================================================== -;; hash of hashs -;;====================================================================== - - -(define (db:hoh-set! dat key1 key2 val) - (let* ((subhash (hash-table-ref/default dat key1 #f))) - (if subhash - (hash-table-set! subhash key2 val) - (begin - (hash-table-set! dat key1 (make-hash-table)) - (db:hoh-set! dat key1 key2 val))))) - -(define (db:hoh-get dat key1 key2) - (let* ((subhash (hash-table-ref/default dat key1 #f))) - (and subhash - (hash-table-ref/default subhash key2 #f)))) - -;;====================================================================== -;; SQLITE3 HELPERS -;;====================================================================== - -(define (db:general-sqlite-error-dump exn stmt . params) - (let ((err-status ((condition-property-accessor 'sqlite3 'status #f) exn))) ;; RADT ... how does this work? - ;; check for (exn sqlite3) ((condition-property-accessor 'exn 'message) exn) - ;; (print "err-status: " err-status) - (debug:print-error 0 *default-log-port* " query " stmt " failed, params: " params ", error: " ((condition-property-accessor 'exn 'message) exn)) - (print-call-chain (current-error-port)))) - -;; convert to -inline -;; -(define (db:first-result-default db stmt default . params) - (handle-exceptions - exn - (let ((err-status ((condition-property-accessor 'sqlite3 'status #f) exn))) - ;; check for (exn sqlite3) ((condition-property-accessor 'exn 'message) exn) - (if (eq? err-status 'done) - default - (begin - (debug:print-error 0 *default-log-port* " query " stmt " failed, params: " params ", error: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn) - (print-call-chain (current-error-port)) - default))) - (apply sqlite3:first-result db stmt params))) - -(define (db:setup) - (assert *toppath* "FATAL: db:setup called before launch:setup has been run.") - (let* ((tmpdir (common:make-tmpdir-name *toppath* ""))) - (if (not *dbstruct-dbs*) - (dbfile:setup (conc *toppath* "/.mtdb") tmpdir) - *dbstruct-dbs*))) - -;; moved from dbfile -;; -;; ADD run-id SUPPORT -;; -(define (db:create-all-triggers dbstruct) - (db:with-db - dbstruct #f #t - (lambda (dbdat db) - (db:create-triggers db)))) - -(define (db:create-triggers db) - (for-each (lambda (key) - (sqlite3:execute db (cadr key))) - db:trigger-list)) - -(define (db:drop-all-triggers dbstruct) - (db:with-db - dbstruct #f #t - (lambda (dbdat db) - (db:drop-triggers db)))) - -(define (db:have-incompletes? dbstruct run-id ovr-deadtime) - (let* ((incompleted '()) - (oldlaunched '()) - (toplevels '()) - ;; (deadtime-str (configf:lookup *configdat* "setup" "deadtime")) - (deadtime (or ovr-deadtime 72000))) ;; twenty hours - (db:with-db - dbstruct run-id #f - (lambda (dbdat db) - - ;; in RUNNING or REMOTEHOSTSTART for more than 10 minutes - ;; - ;; HOWEVER: this code in run:test seems to work fine - ;; (> (- (current-seconds)(+ (db:test-get-event_time testdat) - ;; (db:test-get-run_duration testdat))) - ;; 600) - ;; (db:delay-if-busy dbdat) - (sqlite3:for-each-row - (lambda (test-id run-dir uname testname item-path) - (if (and (equal? uname "n/a") - (equal? item-path "")) ;; this is a toplevel test - ;; what to do with toplevel? call rollup? - (begin - (set! toplevels (cons (list test-id run-dir uname testname item-path run-id) toplevels))) - ;; (print-info "Found old toplevel test in RUNNING state, test-id=" test-id)) - (set! incompleted (cons (list test-id run-dir uname testname item-path run-id) incompleted)))) - (db:get-cache-stmth dbdat db - "SELECT id,rundir,uname,testname,item_path FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > (run_duration + ?) AND state IN ('RUNNING','REMOTEHOSTSTART');") - run-id deadtime) - - ;; in LAUNCHED for more than one day. Could be long due to job queues TODO/BUG: Need override for this in config - ;; - ;; (db:delay-if-busy dbdat) - (sqlite3:for-each-row - (lambda (test-id run-dir uname testname item-path) - (if (and (equal? uname "n/a") - (equal? item-path "")) ;; this is a toplevel test - ;; what to do with toplevel? call rollup? - (set! toplevels (cons (list test-id run-dir uname testname item-path run-id) toplevels)) - (set! oldlaunched (cons (list test-id run-dir uname testname item-path run-id) oldlaunched)))) - (db:get-cache-stmth dbdat db - "SELECT id,rundir,uname,testname,item_path FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > 86400 AND state IN ('LAUNCHED');") - run-id) - - ;; (print-info "Found " (length oldlaunched) " old LAUNCHED items, " (length toplevels) " old LAUNCHED toplevel tests and " (length incompleted) " tests marked RUNNING but apparently dead.") - (if (and (null? incompleted) - (null? oldlaunched) - (null? toplevels)) - #f - #t))))) - - -;; looks up subdb and returns it, if not found then set up -;; and then return it. -;; -#;(define (db:get-db dbstruct run-id) - (let* ((res (dbfile:get-subdb dbstruct run-id))) - (if res - res - (let* ((newsubdb (make-dbr:subdb))) - (dbfile:set-subdb dbstruct run-id newsubdb) - (db:open-db dbstruct run-id areapath: (dbr:dbstruct-areapath dbstruct) do-sync: #t) - newsubdb)))) - -;; Get/open a database -;; if run-id => get run specific db -;; if #f => get main db -;; if run-id is a string treat it as a filename -;; if db already open - return cachedb -;; if db not open, open cachedb, rundb and sync then return cachedb -;; inuse gets set automatically for rundb's -;; -;; (define db:get-db db:get-subdb) - -;; (define (db:get-db subdb #;dbstruct run-id) ;; RENAME TO db:get-dbh -;; ;; (let* ((subdb (dbfile:get-subdb dbstruct run-id))) -;; (if (stack? (dbr:subdb-dbstack subdb)) -;; (if (stack-empty? (dbr:subdb-dbstack subdb)) -;; (let* ((dbname (db:run-id->dbname run-id)) -;; (newdb (db:open-megatest-db path: (db:dbfile-path) -;; name: dbname))) -;; ;; NOTE: pushing on the stack only happens AFTER the handle has been used -;; ;; (stack-push! (dbr:dbstruct-dbstack dbstruct) newdb) -;; newdb) -;; (stack-pop! (dbr:subdb-dbstack subdb))) -;; (db:open-db subdb run-id))) ;; ) - - -#;(define (db:get-db dbstruct run-id) - (let* ((subdb (dbfile:get-subdb dbstruct run-id)) - (dbdat (dbfile:get-dbdat dbstruct run-id))) - (if (dbr:dbdat? dbdat) - dbdat - (dbfile:open-db *dbstruct-dbs* #f db:initialize-main-db) - ) - ) -) - -(define-inline (db:generic-error-printout exn . message) - (print-call-chain (current-error-port)) - (apply debug:print-error 0 *default-log-port* message) - (debug:print-error 0 *default-log-port* " params: " params - ", error: " ((condition-property-accessor 'exn 'message) exn) - ", arguments: " ((condition-property-accessor 'exn 'arguments) exn) - ", location: " ((condition-property-accessor 'exn 'location) exn) - )) - - -(define (db:set-sync db) - (let ((syncprag (configf:lookup *configdat* "setup" "sychronous"))) - (sqlite3:execute db (conc "PRAGMA synchronous = " (or syncprag 0) ";")))) - - -(define (db:get-last-update-time db) - (let ((last-update-time #f)) - (sqlite3:for-each-row - (lambda (lup) - (set! last-update-time lup)) - db - "select max(lup) from ( select max(last_update) as lup from tests union select max(last_update) as lup from runs);") - last-update-time)) - - -;; Open the classic megatest.db file (defaults to open in toppath) -;; -;; NOTE: returns a dbdat not a dbstruct! -;; -(define (db:open-megatest-db dbpath) - (let* ((dbexists (file-exists? dbpath)) - (db (db:lock-create-open dbpath - (lambda (db) - (db:initialize-main-db db)))) - (write-access (file-write-access? dbpath))) - (debug:print-info 13 *default-log-port* "db:open-megatest-db "dbpath) - (if (and dbexists (not write-access)) - (set! *db-write-access* #f)) - ;; (cons db dbpath))) - (make-dbr:dbdat dbfile: dbpath dbh: db read-only: (not write-access)))) - - -;; use bunch of Unix commands to try to break the lock and recreate the db -;; -(define (db:move-and-recreate-db dbdat) - (let* ((dbpath (dbr:dbdat-dbfile dbdat)) - (dbdir (pathname-directory dbpath)) - (fname (pathname-strip-directory dbpath)) - (fnamejnl (conc fname "-journal")) - (tmpname (conc fname "." (current-process-id))) - (tmpjnl (conc fnamejnl "." (current-process-id)))) - (debug:print-error 0 *default-log-port* "" fname " appears corrupted. Making backup \"old/" fname "\"") - (system (conc "cd " dbdir ";mkdir -p old;cat " fname " > old/" tmpname)) - (system (conc "rm -f " dbpath)) - (if (common:file-exists? fnamejnl) - (begin - (debug:print-error 0 *default-log-port* "" fnamejnl " found, moving it to old dir as " tmpjnl) - (system (conc "cd " dbdir ";mkdir -p old;cat " fnamejnl " > old/" tmpjnl)) - (system (conc "rm -f " dbdir "/" fnamejnl)))) - ;; attempt to recreate database - (system (conc "cd " dbdir ";sqlite3 old/" tmpname " .dump | sqlite3 " fname)))) - -;; return #f to indicate the dbdat should be closed/reopened -;; else return dbdat -;; -(define (db:repair-db dbdat #!key (numtries 1)) - (let* ((dbpath (dbr:dbdat-dbfile dbdat)) - (dbdir (pathname-directory dbpath)) - (fname (pathname-strip-directory dbpath))) - (debug:print-info 0 *default-log-port* "Checking db " dbpath " for errors.") - (cond - ((not (file-write-access? dbdir)) - (debug:print 0 *default-log-port* "WARNING: can't write to " dbdir ", can't fix " fname) - #f) - - ;; handle special cases, megatest.db and monitor.db - ;; - ;; NOPE: apply this same approach to all db files - ;; - (else ;; ((equal? fname "megatest.db") ;; this file can be regenerated if needed - (handle-exceptions - exn - (begin - (debug:print 0 *default-debug-port* "Problems trying to repair the db, exn=" exn) - ;; (db:move-and-recreate-db dbdat) - (if (> numtries 0) - (db:repair-db dbdat numtries: (- numtries 1)) - #f) - (debug:print 0 *default-log-port* "FATAL: file " dbpath " was found corrupted, an attempt to fix has been made but you must start over.") - (debug:print 0 *default-log-port* - " check the following:\n" - " 1. full directories, look in ~/ /tmp and " dbdir "\n" - " 2. write access to " dbdir "\n\n" - " if the automatic recovery failed you may be able to recover data by doing \"" - (if (member fname '("megatest.db" "monitor.db")) - "megatest -cleanup-db" - "megatest -import-megatest.db;megatest -cleanup-db") - "\"\n") - (exit) ;; we can not safely continue when a db was corrupted - even if fixed. - ) - ;; test read/write access to the database - (let ((db (sqlite3:open-database dbpath))) - (sqlite3:set-busy-handler! db (sqlite3:make-busy-timeout 136000)) - (sqlite3:execute db "PRAGMA synchronous = 0;") - (cond - ((equal? fname "megatest.db") - (sqlite3:execute db "DELETE FROM tests WHERE state='DELETED';")) - ((equal? fname "main.db") - (sqlite3:execute db "DELETE FROM runs WHERE state='deleted';")) - ((string-match "\\d.db" fname) - (sqlite3:execute db "UPDATE tests SET state='DELETED' WHERE state='DELETED';")) - ((equal? fname "monitor.db") - (sqlite3:execute "DELETE FROM servers WHERE state LIKE 'defunct%';")) - (else - (sqlite3:execute db "vacuum;"))) - - (sqlite3:finalize! db) - #t)))))) - - - -(define (db:adj-target db) - (let ((fields (configf:get-section *configdat* "fields")) - (field-num 0)) - ;; because we will be refreshing the keys table it is best to clear it here - (sqlite3:execute db "DELETE FROM keys;") - (for-each - (lambda (field) - (let ((column (car field)) - (spec (cadr field))) - (handle-exceptions - exn - (if (string-match ".*duplicate.*" ((condition-property-accessor 'exn 'message) exn)) - (debug:print 0 *default-log-port* "Target field " column " already exists in the runs table") - (db:general-sqlite-error-dump exn "alter table runs ..." #f "none")) - ;; Add the column if needed - (sqlite3:execute - db - (conc "ALTER TABLE runs ADD COLUMN " column " " spec))) - ;; correct the entry in the keys column - (sqlite3:execute - db - "INSERT INTO keys (id,fieldname,fieldtype) VALUES (?,?,?);" - field-num column spec) - ;; fill in blanks (not allowed as it would be part of the path - (sqlite3:execute - db - (conc "UPDATE runs SET " column "='x' WHERE " column "='';")) - (set! field-num (+ field-num 1)))) - fields))) - -(define *global-db-store* (make-hash-table)) - -(define (db:get-access-mode) - (if (args:get-arg "-use-db-cache") 'cached 'rmt)) - -;; Add db direct -;; -(define (db:dispatch-query access-mode rmt-cmd db-cmd . params) - (if (eq? access-mode 'cached) - (debug:print 2 *default-log-port* "not doing cached calls right now")) -;; (apply db:call-with-cached-db db-cmd params) - (apply rmt-cmd params)) -;;) - -;; return the target db handle so it can be used -;; -(define (db:cache-for-read-only source target #!key (use-last-update #f)) - (if (and (hash-table-ref/default *global-db-store* target #f) - (>= (file-modification-time target)(file-modification-time source))) - (hash-table-ref *global-db-store* target) - (let* ((toppath (launch:setup)) - (targ-db-last-mod (db:get-sqlite3-mod-time target)) -;; (if (common:file-exists? target) -;; BUG: This needs to include wal mode stuff .shm etc. -;; (file-modification-time target) -;; 0)) - (cache-db (or (hash-table-ref/default *global-db-store* target #f) - (db:open-megatest-db path: target))) - (source-db (db:open-megatest-db path: source)) - (curr-time (current-seconds)) - (res '()) - (last-update (if use-last-update (cons "last_update" targ-db-last-mod) #f))) - (db:sync-tables (db:sync-main-list source-db) last-update source-db cache-db) - (db:sync-tables db:sync-tests-only last-update source-db cache-db) - (hash-table-set! *global-db-store* target cache-db) - cache-db))) - -(define (db:get-sqlite3-mod-time fname) - (let* ((wal-file (conc fname "-wal")) - (shm-file (conc fname "-shm")) - (get-mtime (lambda (f) - (if (and (file-exists? f) - (file-read-access? f)) - (file-modification-time f) - 0)))) - (max (get-mtime fname) - (get-mtime wal-file) - (get-mtime shm-file)))) - -;; (define (db:all-db-sync dbstruct) -;; (let* ((dbdat (db:open-db dbstruct #f db:initialize-main-db)) -;; (data-synced 0) ;; count of changed records -;; (tmp-area (common:make-tmpdir-name *toppath*)) -;; (dbfiles (glob (conc tmp-area"/.mtdb/*.db"))) -;; (sync-durations (make-hash-table)) -;; (no-sync-db (db:open-no-sync-db))) -;; (for-each -;; (lambda (file) ;; tmp db file -;; (debug:print-info 3 *default-log-port* "file: " file) -;; (let* ((fname (conc (pathname-file file) ".db")) ;; fname is tmp db file -;; (wal-file (conc fname "-wal")) -;; (shm-file (conc fname "-shm")) -;; (fulln (conc *toppath*"/,mtdb/"fname)) ;; fulln is nfs db name -;; (wal-time (if (file-exists? wal-file) -;; (file-modification-time wal-file) -;; 0)) -;; (shm-time (if (file-exists? shm-file) -;; (file-modification-time shm-file) -;; 0)) -;; (time1 (db:get-sqlite3-mod-time file)) -;; ;; (if (file-exists? file) ;; time1 is the max itime of the tmp db, -wal and -shm files. -;; ;; (max (file-modification-time file) wal-time shm-time) -;; ;; (begin -;; ;; (debug:print-info 2 *default-log-port* "Sync - I do not see file "file) -;; ;; 1))) -;; (time2 (db:get-sqlite3-mod-time fulln)) -;; ;; (if (file-exists? fulln) ;; time2 is nfs file time -;; ;; (file-modification-time fulln) -;; ;; (begin -;; ;; (debug:print-info 2 *default-log-port* "Sync - I do not see file "fulln) -;; ;; 0))) -;; (changed (> (- time1 time2) (+ (random 5) 1))) ;; it has been at some few seconds since last synced -;; (changed10 (> (- time1 time2) 10)) ;; it has been at least ten seconds since sync'd -;; (jfile-exists (file-exists? (conc file"-journal"))) ;; i.e. are we busy? -;; (do-cp (cond -;; ((not (file-exists? fulln)) ;; shouldn't happen, but this might recover -;; (cons #t (conc "File "fulln" not found! Copying "fname" to "fulln))) -;; ((and (not jfile-exists) changed) -;; (cons #t "not busy, changed")) ;; not busy and changed -;; ((and jfile-exists changed10) -;; (cons #t "busy but not synced in a while")) ;; busy but not sync'd in over 10 seconds -;; ((and changed *time-to-exit*) -;; (cons #t "Time to exit, forced final sync")) ;; last sync -;; (else -;; (cons #f "No sync needed"))))) -;; (if (car do-cp) -;; (let* ((start-time (current-milliseconds)) -;; (fname (pathname-file file)) -;; (runid (if (string= fname "main") #f (string->number fname)))) -;; (debug:print-info 3 *default-log-port* "db:all-db-sync: fname: " -;; fname", delta: " (- time1 time2) " seconds, reason: "(cdr do-cp)) -;; (db:lock-and-delta-sync no-sync-db dbstruct fname runid (db:get-keys dbstruct) db:initialize-main-db) -;; (hash-table-set! sync-durations (conc fname".db") -;; (- (current-milliseconds) start-time))) -;; (debug:print-info 3 *default-log-port* "skipping sync. " file " is up to date") -;; ))) -;; dbfiles) -;; ;; WHY does the dbdat need to be added back? -;; (if dbdat (dbfile:add-dbdat dbstruct #f dbdat)) -;; ) -;; #t) - -(define (db:kill-servers) - (let* ((tl (launch:setup)) ;; need this to initialize *toppath* - (servdir (conc *toppath* "/.servinfo")) - (servfiles (glob (conc servdir "/*:*.db"))) - (fmtstr "~10a~22a~10a~25a~25a~8a\n") - (dbfiles (append (glob (conc *toppath* "/.mtdb/main.db")) (glob (conc *toppath* "/.mtdb/?.db"))(glob (conc *toppath* "/.mtdb/??.db")))) - (ttdat (make-tt areapath: *toppath*)) - ) - (format #t fmtstr "DB" "host:port" "PID" "age" "last mod" "state") - (for-each - (lambda (dbfile) - (let* ( - (dbfname (conc (pathname-file dbfile) ".db")) - (sfiles (tt:find-server *toppath* dbfname)) - ) - (for-each - (lambda (sfile) - (let ( - (sinfos (tt:get-server-info-sorted ttdat dbfname)) - ) - (for-each - (lambda (sinfo) - (let* ( - (db (list-ref sinfo 5)) - (pid (list-ref sinfo 4)) - (host (list-ref sinfo 0)) - (port (list-ref sinfo 1)) - (server-id (list-ref sinfo 3)) - (age (seconds->hr-min-sec (- (current-seconds) (list-ref sinfo 2)))) - (last-mod (seconds->string (list-ref sinfo 2))) - (killed (system (conc "ssh " host " kill " pid " > /dev/null"))) - (dummy2 (sleep 1)) - (state (if (> (system (conc "ssh " host " ps " pid " > /dev/null")) 0) "dead" "alive")) - ) - (format #t fmtstr db (conc host ":" port) pid age last-mod state) - (system (conc "rm " sfile)) - ) - ) - sinfos - ) - ) - ) - sfiles - ) - ) - ) - dbfiles - ) - ;; remove this db, because otherwise metadata contains records for old servers, and this causes a problem with db:no-sync-get-lock-with-id. - (if (file-exists? (conc *toppath* "/.mtdb/no-sync.db")) - (delete-file (conc *toppath* "/.mtdb/no-sync.db")) - ) - ) -) - -;; options: -;; -;; 'killservers - kills all servers -;; 'dejunk - removes junk records -;; 'adj-testids - move test-ids into correct ranges -;; 'old2new - sync megatest.db to /tmp/.../megatest.db and /tmp/.../megatest_ref.db -;; 'new2old - sync /tmp/.../megatest.db to megatest.db and /tmp/.../megatest_ref.db (and update data_synced) -;; 'closeall - close all opened dbs -;; 'schema - attempt to apply schema changes -;; run-ids: '(1 2 3 ...) or #f (for all) -;; -(define (db:multi-db-sync dbstruct . options) - (let* (;; (dbdat (db:open-db dbstruct #f dbfile:db-init-proc)) - (data-synced 0) ;; count of changed records - (tmp-area (common:make-tmpdir-name *toppath* "")) - (old2new (member 'old2new options)) - (dejunk (member 'dejunk options)) - (killservers (member 'killservers options)) - (src-area (if old2new *toppath* tmp-area)) - (dest-area (if old2new tmp-area (conc *toppath* "/.mtdb"))) - (dbfiles (if old2new (glob (conc *toppath* "/.mtdb/*.db")) - (glob (conc tmp-area "/*.db")))) - (keys (db:get-keys dbstruct)) - (sync-durations (make-hash-table))) - - ;; kill servers - ;; (if killservers (db:kill-servers)) - - (if (not dbfiles) - (debug:print-error 0 *default-log-port* "no dbfiles found in " (conc *toppath* "/.mtdb")) - (for-each - (lambda (srcfile) - (debug:print-info 3 *default-log-port* "file: " srcfile) - (let* ((fname (conc (pathname-file srcfile) ".db")) - (basename (pathname-file srcfile)) - (run-id (if (string= basename "main") #f (string->number basename))) - (destfile (conc dest-area "/" fname)) - (dest-directory dest-area) - (time1 (file-modification-time srcfile)) - (time2 (if (file-exists? destfile) - (begin - (debug:print-info 2 *default-log-port* "destfile " destfile " exists") - (file-modification-time destfile)) - (begin - (debug:print-info 0 *default-log-port* "Sync - I do not see file " destfile) - 0))) - (changed ( < (- time2 time1) 6.0)) ;; dest db not updated within last 6 seconds - - (do-cp (cond - ((not (file-exists? destfile)) ;; shouldn't happen, but this might recover - (debug:print-info 2 *default-log-port* "File " destfile " not found. Copying "srcfile" to "destfile) - ;; TODO: Need to fix this for WAL mod. Can't just copy. - (system (conc "/bin/mkdir -p " dest-directory)) - (system (conc "/bin/cp " srcfile " " destfile)) - #t) - (changed ;; (and changed - #t) - ((and changed *time-to-exit*) ;; last sync - #t) - (else - #f)))) - - (if (or dejunk do-cp) - (let* ((start-time (current-milliseconds)) - (subdb (or (dbfile:get-subdb dbstruct run-id) (dbfile:init-subdb dbstruct run-id dbfile:db-init-proc))) - (dbdat (or (dbfile:get-dbdat dbstruct run-id) (dbfile:open-db dbstruct run-id dbfile:db-init-proc))) - (mtdb (dbr:subdb-mtdbdat subdb)) - ;; - ;; BUG: -mrw- I think this next line is wrong. run-id should be the path to .mtdb/.db - ;; - (tmpdb (dbfile:open-db dbstruct run-id dbfile:db-init-proc))) - - (if dejunk - (begin - (debug:print 0 *default-log-port* "Cleaning tmp DB") - (db:clean-up run-id tmpdb) - (debug:print 0 *default-log-port* "Cleaning nfs DB") - (db:clean-up run-id mtdb) - ) - ) - (debug:print-info 2 *default-log-port* "delta syncing file: " srcfile ", time diff: " (- time1 time2) " seconds") - (if old2new - (begin - (db:sync-tables (db:sync-all-tables-list - (db:get-keys dbstruct)) - #f mtdb tmpdb)) - (begin - (db:sync-tables (db:sync-all-tables-list (db:get-keys dbstruct)) #f tmpdb mtdb))) - (hash-table-set! sync-durations (conc srcfile ".db") (- (current-milliseconds) start-time))) - (debug:print-info 2 *default-log-port* "skipping delta sync. " srcfile " is up to date")))) - dbfiles)) - data-synced)) - -;; Sync all changed db's -;; -(define (db:tmp->megatest.db-sync dbstruct run-id last-update) - (let* ((subdbs (hash-table-values (dbr:dbstruct-subdbs dbstruct))) - (res '())) - (for-each - (lambda (subdb) - (let* ((mtdb (dbr:subdb-mtdb subdb)) - (tmpdb (db:get-subdb dbstruct run-id)) - (refndb (dbr:subdb-refndb subdb)) - (newres (db:sync-tables (db:sync-all-tables-list (db:get-keys dbstruct)) last-update tmpdb refndb mtdb))) - ;; (stack-push! (dbr:subdb-dbstack subdb) tmpdb) - ;; BUG: verify this is really needed - (dbfile:add-dbdat dbstruct run-id tmpdb) - (set! res (cons newres res)))) - subdbs) - res)) - -;;;; run-ids -;; if #f use *db-local-sync* : or 'local-sync-flags -;; if #t use timestamps : or 'timestamps -;; -;; NB// no-sync-db is the db handle, not a flag! -;; -(define (db:sync-to-megatest.db dbstruct #!key (no-sync-db #f)) - (let* ((start-time (current-seconds)) - (last-full-update (if no-sync-db - (db:no-sync-get/default no-sync-db "LAST_FULL_UPDATE" 0) - 0)) - (full-sync-needed (> (- start-time last-full-update) 3600)) ;; every hour do a full sync - (last-update (if full-sync-needed - 0 - (if no-sync-db - (db:no-sync-get/default no-sync-db "LAST_UPDATE" 0) - 0))) ;; (or (db:get-var dbstruct "LAST_UPDATE") 0)) - (sync-needed (> (- start-time last-update) 6)) - (res (if (or sync-needed ;; don't sync if a sync already occurred in the past 6 seconds - full-sync-needed) - (begin - (if no-sync-db - (begin - (if full-sync-needed (db:no-sync-set no-sync-db "LAST_FULL_UPDATE" start-time)) - (db:no-sync-set no-sync-db "LAST_UPDATE" start-time))) - (db:tmp->megatest.db-sync dbstruct last-update)) - 0)) - (sync-time (- (current-seconds) start-time))) - (debug:print-info 3 *default-log-port* "Sync of newdb to olddb completed in " sync-time " seconds pid="(current-process-id)) - (if (common:low-noise-print 30 "sync new to old") - (if sync-needed - (debug:print-info 0 *default-log-port* "Sync of " res " records from newdb to olddb completed in " sync-time " seconds pid="(current-process-id)) - (debug:print-info 0 *default-log-port* "No sync needed, last updated " (- start-time last-update) " seconds ago"))) - res)) - - -(define (db:initialize-main-db db) - (when (not *configinfo*) - (launch:setup)) ;; added because Elena was getting stack dump because *configinfo* below was #f. - (let* ((configdat (car *configinfo*)) ;; tut tut, global warning... - (keys (keys:config-get-fields configdat)) - (havekeys (> (length keys) 0)) - (keystr (keys->keystr keys)) - (fieldstr (keys:make-key/field-string configdat)) - #;(db (dbr:dbdat-dbh dbdat))) - (for-each (lambda (key) - (let ((keyn key)) - (if (member (string-downcase keyn) - (list "runname" "state" "status" "owner" "event_time" "comment" "fail_count" - "pass_count" "contour")) - (begin - (debug:print 0 *default-log-port* "ERROR: your key cannot be named " keyn " as this conflicts with the same named field in the runs table, you must remove your megatest.db and /.db before trying again.") - (exit 1))))) - keys) - (sqlite3:with-transaction - db - (lambda () - ;; handle-exceptions - ;; exn - ;; (begin - ;; (debug:print 0 "ERROR: Failed to create tables. Look at your [fields] section, should be: fieldname TEXT DEFAULT 'yourdefault'") - ;; (exit)) - (sqlite3:execute db "CREATE TABLE IF NOT EXISTS keys (id INTEGER PRIMARY KEY, fieldname TEXT, fieldtype TEXT, CONSTRAINT keyconstraint UNIQUE (fieldname));") - (for-each - (lambda (key) - (let* ((fieldname #f) - (fieldtype #f)) - (sqlite3:for-each-row - (lambda (fn ft) - (set! fieldname fn) - (set! fieldtype ft)) - db - "SELECT fieldname,fieldtype FROM keys WHERE fieldname=?" key) - (if (not fieldname) - (sqlite3:execute db "INSERT INTO keys (fieldname,fieldtype) VALUES (?,?);" key "TEXT")))) - keys) - (sqlite3:execute db (conc - "CREATE TABLE IF NOT EXISTS runs (id INTEGER PRIMARY KEY, \n " - fieldstr (if havekeys "," "") " - runname TEXT DEFAULT 'norun', - contour TEXT DEFAULT '', - state TEXT DEFAULT '', - status TEXT DEFAULT '', - owner TEXT DEFAULT '', - event_time TIMESTAMP DEFAULT (strftime('%s','now')), - comment TEXT DEFAULT '', - fail_count INTEGER DEFAULT 0, - pass_count INTEGER DEFAULT 0, - last_update INTEGER DEFAULT (strftime('%s','now')), - CONSTRAINT runsconstraint UNIQUE (runname" (if havekeys "," "") keystr "));")) - ;; All triggers created at once in end - ;;(sqlite3:execute db "CREATE TRIGGER IF NOT EXISTS update_runs_trigger AFTER UPDATE ON runs - ;; FOR EACH ROW - ;; BEGIN - ;; UPDATE runs SET last_update=(strftime('%s','now')) - ;; WHERE id=old.id; - ;; END;") - (sqlite3:execute db "CREATE TABLE IF NOT EXISTS run_stats ( - id INTEGER PRIMARY KEY, - run_id INTEGER, - state TEXT, - status TEXT, - count INTEGER, - last_update INTEGER DEFAULT (strftime('%s','now')))") - ;; All triggers created at once in end - ;; (sqlite3:execute db "CREATE TRIGGER IF NOT EXISTS update_run_stats_trigger AFTER UPDATE ON run_stats - ;; FOR EACH ROW - ;; BEGIN - ;; UPDATE run_stats SET last_update=(strftime('%s','now')) - ;; WHERE id=old.id; - ;; END;") - (sqlite3:execute db "CREATE TABLE IF NOT EXISTS test_meta ( - id INTEGER PRIMARY KEY, - testname TEXT DEFAULT '', - author TEXT DEFAULT '', - owner TEXT DEFAULT '', - description TEXT DEFAULT '', - reviewed TIMESTAMP, - iterated TEXT DEFAULT '', - avg_runtime REAL, - avg_disk REAL, - tags TEXT DEFAULT '', - jobgroup TEXT DEFAULT 'default', - CONSTRAINT test_meta_constraint UNIQUE (testname));") - (sqlite3:execute db "CREATE TABLE IF NOT EXISTS tasks_queue (id INTEGER PRIMARY KEY, - action TEXT DEFAULT '', - owner TEXT, - state TEXT DEFAULT 'new', - target TEXT DEFAULT '', - name TEXT DEFAULT '', - testpatt TEXT DEFAULT '', - keylock TEXT, - params TEXT, - creation_time TIMESTAMP DEFAULT (strftime('%s','now')), - execution_time TIMESTAMP);") - ;; archive disk areas, cached info from [archive-disks] - (sqlite3:execute db "CREATE TABLE IF NOT EXISTS archive_disks ( - id INTEGER PRIMARY KEY, - archive_area_name TEXT, - disk_path TEXT, - last_df INTEGER DEFAULT -1, - last_df_time TIMESTAMP DEFAULT (strftime('%s','now')), - creation_time TIMESTAMP DEFAULT (strftime('%s','now')));") - ;; individual bup (or tar) data chunks - (sqlite3:execute db "CREATE TABLE IF NOT EXISTS archive_blocks ( - id INTEGER PRIMARY KEY, - archive_disk_id INTEGER, - disk_path TEXT, - last_du INTEGER DEFAULT -1, - last_du_time TIMESTAMP DEFAULT (strftime('%s','now')), - creation_time TIMESTAMP DEFAULT (strftime('%s','now')));") - ;; tests allocated to what chunks. reusing a chunk for a test/item_path is very efficient - ;; NB// the per run/test recording of where the archive is stored is done in the test - ;; record. - (sqlite3:execute db "CREATE TABLE IF NOT EXISTS archive_allocations ( - id INTEGER PRIMARY KEY, - archive_block_id INTEGER, - testname TEXT, - item_path TEXT, - creation_time TIMESTAMP DEFAULT (strftime('%s','now')));") - ;; move this clean up call somewhere else - (sqlite3:execute db "DELETE FROM tasks_queue WHERE state='done' AND creation_time < ?;" (- (current-seconds)(* 24 60 60))) ;; remove older than 24 hrs - (sqlite3:execute db (conc "CREATE INDEX IF NOT EXISTS runs_index ON runs (runname" (if havekeys "," "") keystr ");")) - ;; (sqlite3:execute db "CREATE VIEW runs_tests AS SELECT * FROM runs INNER JOIN tests ON runs.id=tests.run_id;") - (sqlite3:execute db "CREATE TABLE IF NOT EXISTS extradat (id INTEGER PRIMARY KEY, run_id INTEGER, key TEXT, val TEXT);") - (sqlite3:execute db "CREATE TABLE IF NOT EXISTS metadat (id INTEGER PRIMARY KEY, var TEXT, val TEXT, - CONSTRAINT metadat_constraint UNIQUE (var));") - (sqlite3:execute db "CREATE TABLE IF NOT EXISTS access_log (id INTEGER PRIMARY KEY, user TEXT, accessed TIMESTAMP, args TEXT);") - ;; Must do this *after* running patch db !! No more. - ;; cannot use db:set-var since it will deadlock, hardwire the code here - (let* ((prev-version #f) - (curr-version (common:version-signature))) - (sqlite3:for-each-row - (lambda (ver) - (set! prev-version ver)) - db - "SELECT val FROM metadat WHERE var='MEGATEST_VERSION';") - (if prev-version - (if (not (equal? prev-version curr-version)) - (sqlite3:execute db "UPDATE metadat SET val=? WHERE var=?;" curr-version "MEGATEST_VERSION")) - (sqlite3:execute db "INSERT OR REPLACE INTO metadat (var,val) VALUES (?,?);" "MEGATEST_VERSION" curr-version) )) - (debug:print-info 11 *default-log-port* "db:initialize END") ;; )))) - - ;;====================================================================== - ;; R U N S P E C I F I C D B - ;;====================================================================== - - ;; (define (db:initialize-run-id-db db) - ;; (sqlite3:with-transaction - ;; db - ;; (lambda () - (sqlite3:execute db "CREATE TABLE IF NOT EXISTS tests - (id INTEGER PRIMARY KEY, - run_id INTEGER DEFAULT -1, - testname TEXT DEFAULT 'noname', - host TEXT DEFAULT 'n/a', - cpuload REAL DEFAULT -1, - diskfree INTEGER DEFAULT -1, - uname TEXT DEFAULT 'n/a', - rundir TEXT DEFAULT '/tmp/badname', - shortdir TEXT DEFAULT '/tmp/badname', - item_path TEXT DEFAULT '', - state TEXT DEFAULT 'NOT_STARTED', - status TEXT DEFAULT 'FAIL', - attemptnum INTEGER DEFAULT 0, - final_logf TEXT DEFAULT 'logs/final.log', - logdat TEXT DEFAULT '', - run_duration INTEGER DEFAULT 0, - comment TEXT DEFAULT '', - event_time TIMESTAMP DEFAULT (strftime('%s','now')), - fail_count INTEGER DEFAULT 0, - pass_count INTEGER DEFAULT 0, - archived INTEGER DEFAULT 0, -- 0=no, > 1=archive block id where test data can be found - last_update INTEGER DEFAULT (strftime('%s','now')), - CONSTRAINT testsconstraint UNIQUE (run_id, testname, item_path));") - ;; deprecated -- (sqlite3:execute db "CREATE INDEX IF NOT EXISTS tests_index ON tests (run_id, testname, item_path, uname);") - - (sqlite3:execute db "CREATE INDEX IF NOT EXISTS tests_run_id_index ON tests (run_id);") ;; new - (sqlite3:execute db "CREATE INDEX IF NOT EXISTS tests_testname_index ON tests (testname,item_path);") ;; new - (sqlite3:execute db "CREATE INDEX IF NOT EXISTS tests_state_status_index ON tests (state, status); ") ;; new - - ;; All triggers created at once in end - ;;(sqlite3:execute db "CREATE TRIGGER IF NOT EXISTS update_tests_trigger AFTER UPDATE ON tests - ;; FOR EACH ROW - ;; BEGIN - ;; UPDATE tests SET last_update=(strftime('%s','now')) - ;; WHERE id=old.id; - ;; END;") - (sqlite3:execute db "CREATE TABLE IF NOT EXISTS test_steps - (id INTEGER PRIMARY KEY, - test_id INTEGER, - stepname TEXT, - state TEXT DEFAULT 'NOT_STARTED', - status TEXT DEFAULT 'n/a', - event_time TIMESTAMP, - comment TEXT DEFAULT '', - logfile TEXT DEFAULT '', - last_update INTEGER DEFAULT (strftime('%s','now')), - CONSTRAINT test_steps_constraint UNIQUE (test_id,stepname,state));") - (sqlite3:execute db "CREATE INDEX IF NOT EXISTS teststeps_index ON test_steps (test_id, stepname, state);") - ;; All triggers created at once in end - ;;(sqlite3:execute db "CREATE TRIGGER IF NOT EXISTS update_teststeps_trigger AFTER UPDATE ON test_steps - ;; FOR EACH ROW - ;; BEGIN - ;; UPDATE test_steps SET last_update=(strftime('%s','now')) - ;; WHERE id=old.id; - ;; END;") - (sqlite3:execute db "CREATE TABLE IF NOT EXISTS test_data (id INTEGER PRIMARY KEY, - test_id INTEGER, - category TEXT DEFAULT '', - variable TEXT, - value REAL, - expected REAL, - tol REAL, - units TEXT, - comment TEXT DEFAULT '', - status TEXT DEFAULT 'n/a', - type TEXT DEFAULT '', - last_update INTEGER DEFAULT (strftime('%s','now')), - CONSTRAINT test_data_constraint UNIQUE (test_id,category,variable));") - (sqlite3:execute db "CREATE INDEX IF NOT EXISTS test_data_index ON test_data (test_id);") - ;; All triggers created at once in end - ;;(sqlite3:execute db "CREATE TRIGGER IF NOT EXISTS update_test_data_trigger AFTER UPDATE ON test_data - ;; FOR EACH ROW - ;; BEGIN - ;; UPDATE test_data SET last_update=(strftime('%s','now')) - ;; WHERE id=old.id; - ;; END;") - (sqlite3:execute db "CREATE TABLE IF NOT EXISTS test_rundat ( - id INTEGER PRIMARY KEY, - test_id INTEGER, - update_time TIMESTAMP, - cpuload INTEGER DEFAULT -1, - diskfree INTEGER DEFAULT -1, - diskusage INTGER DEFAULT -1, - run_duration INTEGER DEFAULT 0, - last_update INTEGER DEFAULT (strftime('%s','now')));") - (sqlite3:execute db "CREATE TABLE IF NOT EXISTS archives ( - id INTEGER PRIMARY KEY, - test_id INTEGER, - state TEXT DEFAULT 'new', - status TEXT DEFAULT 'n/a', - archive_type TEXT DEFAULT 'bup', - du INTEGER, - archive_path TEXT, - last_update INTEGER DEFAULT (strftime('%s','now')));"))) - (db:create-triggers db) - db)) ;; ) - -;;====================================================================== -;; A R C H I V E S -;;====================================================================== - -;; dneeded is minimum space needed, scan for existing archives that -;; are on disks with adequate space and already have this test/itempath -;; archived -;; -(define (db:archive-get-allocations dbstruct testname itempath dneeded) - (let* ((dbdat (db:get-subdb dbstruct #f)) ;; archive tables are in main.db - (db (dbr:dbdat-dbh dbdat)) - (res '()) - (blocks '())) ;; a block is an archive chunck that can be added too if there is space - (sqlite3:for-each-row - (lambda (id archive-disk-id disk-path last-du last-du-time) - (set! res (cons (vector id archive-disk-id disk-path last-du last-du-time) res))) - db - "SELECT b.id,b.archive_disk_id,b.disk_path,b.last_du,b.last_du_time FROM archive_blocks AS b - INNER JOIN archive_allocations AS a ON a.archive_block_id=b.id - WHERE a.testname=? AND a.item_path=?;" - testname itempath) - ;; Now res has list of candidate paths, look in archive_disks for candidate with potential free space - (if (null? res) - '() - (sqlite3:for-each-row - (lambda (id archive-area-name disk-path last-df last-df-time) - (set! blocks (cons (vector id archive-area-name disk-path last-df last-df-time) blocks))) - db - (conc - "SELECT d.id,d.archive_area_name,disk_path,last_df,last_df_time FROM archive_disks AS d - INNER JOIN archive_blocks AS b ON d.id=b.archive_disk_id - WHERE b.id IN (" (string-intersperse (map conc res) ",") ") AND - last_df > ?;") - dneeded)) - ;; BUG: Verfify this is really needed - (dbfile:add-dbdat dbstruct #f dbdat) - blocks)) - -;; returns id of the record, register a disk allocated to archiving and record it's last known -;; available space -;; -(define (db:archive-register-disk dbstruct bdisk-name bdisk-path df) - (let* ((dbdat (db:get-subdb dbstruct #f)) ;; archive tables are in main.db - (db (dbr:dbdat-dbh dbdat)) - (res #f)) - (sqlite3:for-each-row - (lambda (id) - (set! res id)) - db - "SELECT id FROM archive_disks WHERE archive_area_name=? AND disk_path=?;" - bdisk-name bdisk-path) - (if res ;; record exists, update df and return id - (begin - (sqlite3:execute db "UPDATE archive_disks SET last_df=?,last_df_time=(strftime('%s','now')) - WHERE archive_area_name=? AND disk_path=?;" - df bdisk-name bdisk-path) - (dbfile:add-dbdat dbstruct #f dbdat) - res) - (begin - (sqlite3:execute - db - "INSERT OR REPLACE INTO archive_disks (archive_area_name,disk_path,last_df) - VALUES (?,?,?);" - bdisk-name bdisk-path df) - (dbfile:add-dbdat dbstruct #f dbdat) - (db:archive-register-disk dbstruct bdisk-name bdisk-path df))))) - -;; record an archive path created on a given archive disk (identified by it's bdisk-id) -;; if path starts with / then it is full, otherwise it is relative to the archive disk -;; preference is to store the relative path. -;; -(define (db:archive-register-block-name dbstruct bdisk-id archive-path #!key (du #f)) - (let* ((dbdat (db:get-subdb dbstruct #f)) ;; archive tables are in main.db - (db (dbr:dbdat-dbh dbdat)) - (res #f)) - ;; first look to see if this path is already registered - (sqlite3:for-each-row - (lambda (id) - (set! res id)) - db - "SELECT id FROM archive_blocks WHERE archive_disk_id=? AND disk_path=?;" - bdisk-id archive-path) - (if res ;; record exists, update du if applicable and return res - (if du (sqlite3:execute db "UPDATE archive_blocks SET last_du=?,last_du_time=(strftime('%s','now')) - WHERE archive_disk_id=? AND disk_path=?;" - bdisk-id archive-path du)) - (begin - (sqlite3:execute db "INSERT OR REPLACE INTO archive_blocks (archive_disk_id,disk_path,last_du) - VALUES (?,?,?);" - bdisk-id archive-path (or du 0)) - (set! res (db:archive-register-block-name dbstruct bdisk-id archive-path du: du)))) - (dbfile:add-dbdat dbstruct #f dbdat) - res)) - - -;; The "archived" field in tests is overloaded; 0 = not archived, > 0 archived in block with given id -;; -(define (db:test-set-archive-block-id dbstruct run-id test-id archive-block-id) - (db:with-db - dbstruct - run-id - #t - (lambda (dbdat db) - (sqlite3:execute db "UPDATE tests SET archived=? WHERE id=?;" - archive-block-id test-id)))) - -;; Look up the archive block info given a block-id -;; -(define (db:test-get-archive-block-info dbstruct archive-block-id) - (db:with-db - dbstruct - #f - #f - (lambda (dbdat db) - (let ((res #f)) - (sqlite3:for-each-row - ;; 0 1 2 3 4 5 - (lambda (id archive-disk-id disk-path last-du last-du-time creation-time) - (set! res (vector id archive-disk-id disk-path last-du last-du-time creation-time))) - db - "SELECT id,archive_disk_id,disk_path,last_du,last_du_time,creation_time FROM archive_blocks WHERE id=?;" - archive-block-id) - res)))) - -;; (define (db:archive-allocate-testsuite/area-to-block block-id testsuite-name areakey) -;; (let* ((dbdat (db:get-subdb dbstruct #f)) ;; archive tables are in main.db -;; (db (dbr:dbdat-dbh dbdat)) -;; (res '()) -;; (blocks '())) ;; a block is an archive chunck that can be added too if there is space -;; (sqlite3:for-each-row #f) - -;;====================================================================== -;; D B U T I L S -;;====================================================================== - -;;====================================================================== -;; M A I N T E N A N C E -;;====================================================================== - -;; (define (db:have-incompletes? dbstruct run-id ovr-deadtime) -;; (let* ((incompleted '()) -;; (oldlaunched '()) -;; (toplevels '()) -;; (deadtime-str (configf:lookup *configdat* "setup" "deadtime")) -;; (deadtime (if (and deadtime-str -;; (string->number deadtime-str)) -;; (string->number deadtime-str) -;; 72000))) ;; twenty hours -;; (db:with-db -;; dbstruct run-id #f -;; (lambda (dbdat db) -;; (if (number? ovr-deadtime)(set! deadtime ovr-deadtime)) -;; -;; ;; in RUNNING or REMOTEHOSTSTART for more than 10 minutes -;; ;; -;; ;; HOWEVER: this code in run:test seems to work fine -;; ;; (> (- (current-seconds)(+ (db:test-get-event_time testdat) -;; ;; (db:test-get-run_duration testdat))) -;; ;; 600) -;; ;; (db:delay-if-busy dbdat) -;; (sqlite3:for-each-row -;; (lambda (test-id run-dir uname testname item-path) -;; (if (and (equal? uname "n/a") -;; (equal? item-path "")) ;; this is a toplevel test -;; ;; what to do with toplevel? call rollup? -;; (begin -;; (set! toplevels (cons (list test-id run-dir uname testname item-path run-id) toplevels)) -;; (debug:print-info 0 *default-log-port* "Found old toplevel test in RUNNING state, test-id=" test-id)) -;; (set! incompleted (cons (list test-id run-dir uname testname item-path run-id) incompleted)))) -;; (db:get-cache-stmth dbdat db -;; "SELECT id,rundir,uname,testname,item_path FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > (run_duration + ?) AND state IN ('RUNNING','REMOTEHOSTSTART');") -;; run-id deadtime) -;; -;; ;; in LAUNCHED for more than one day. Could be long due to job queues TODO/BUG: Need override for this in config -;; ;; -;; ;; (db:delay-if-busy dbdat) -;; (sqlite3:for-each-row -;; (lambda (test-id run-dir uname testname item-path) -;; (if (and (equal? uname "n/a") -;; (equal? item-path "")) ;; this is a toplevel test -;; ;; what to do with toplevel? call rollup? -;; (set! toplevels (cons (list test-id run-dir uname testname item-path run-id) toplevels)) -;; (set! oldlaunched (cons (list test-id run-dir uname testname item-path run-id) oldlaunched)))) -;; (db:get-cache-stmth dbdat db -;; "SELECT id,rundir,uname,testname,item_path FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > 86400 AND state IN ('LAUNCHED');") -;; run-id) -;; -;; (debug:print-info 18 *default-log-port* "Found " (length oldlaunched) " old LAUNCHED items, " (length toplevels) " old LAUNCHED toplevel tests and " (length incompleted) " tests marked RUNNING but apparently dead.") -;; (if (and (null? incompleted) -;; (null? oldlaunched) -;; (null? toplevels)) -;; #f -;; #t))))) - -;; BUG: Probably broken - does not explicitly use run-id in the query -;; -(define (db:top-test-set-per-pf-counts dbstruct run-id test-name) - (db:general-call dbstruct run-id 'top-test-set-per-pf-counts (list test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name))) - -;; Clean out old junk and vacuum the database -;; -;; Ultimately do something like this: -;; -;; 1. Look at test records either deleted or part of deleted run: -;; a. If test dir exists, set the the test to state='UNKNOWN', Set the run to 'unknown' -;; b. If test dir gone, delete the test record -;; 2. Look at run records -;; a. If have tests that are not deleted, set state='unknown' -;; b. .... -;; -(define (db:clean-up run-id dbdat) - (if run-id - (begin - (debug:print 0 *default-log-port* "Cleaning run DB " run-id) - (db:clean-up-rundb dbdat run-id) - ) - (begin - (debug:print 0 *default-log-port* "Cleaning main DB ") - (db:clean-up-maindb dbdat) - ) - ) -) - - -;; Clean out old junk and vacuum the database -;; -;; Ultimately do something like this: -;; -;; 1. Look at test records either deleted or part of deleted run: -;; a. If test dir exists, set the the test to state='UNKNOWN', Set the run to 'unknown' -;; b. If test dir gone, delete the test record -;; 2. Look at run records -;; a. If have tests that are not deleted, set state='unknown' -;; b. .... -;; -(define (db:clean-up-rundb dbdat run-id) - ;; (debug:print 0 *default-log-port* "WARNING: db clean up not fully ported to v1.60, cleanup action will be on megatest.db") - (let* ((db (dbr:dbdat-dbh dbdat)) - (test-count-stmt (sqlite3:prepare db "SELECT (SELECT count(id) FROM tests);")) - (step-count-stmt (sqlite3:prepare db "SELECT (SELECT count(id) FROM test_steps);")) - (statements - (map (lambda (stmt) - (sqlite3:prepare db stmt)) - (list - "DELETE FROM tests WHERE state='DELETED';" - "DELETE FROM test_steps WHERE status = 'DELETED';" - "DELETE FROM tests WHERE run_id IN (SELECT id FROM runs WHERE state = 'deleted');" - )))) - (sqlite3:with-transaction - db - (lambda () - (sqlite3:for-each-row (lambda (tot) - (debug:print-info 0 *default-log-port* "Test records count before clean: " tot)) - test-count-stmt) - (sqlite3:for-each-row (lambda (tot) - (debug:print-info 0 *default-log-port* "Test_step records count before clean: " tot)) - step-count-stmt) - (map sqlite3:execute statements) - (sqlite3:for-each-row (lambda (tot) - (debug:print-info 0 *default-log-port* "Test records count after clean: " tot)) - test-count-stmt) - (sqlite3:for-each-row (lambda (tot) - (debug:print-info 0 *default-log-port* "Test_step records count after clean: " tot)) - step-count-stmt))) - (map sqlite3:finalize! statements) - (sqlite3:finalize! test-count-stmt) - (sqlite3:finalize! step-count-stmt) - (sqlite3:execute db "VACUUM;"))) - -;; Clean out old junk and vacuum the database -;; -;; Ultimately do something like this: -;; -;; 1. Look at test records either deleted or part of deleted run: -;; a. If test dir exists, set the the test to state='UNKNOWN', Set the run to 'unknown' -;; b. If test dir gone, delete the test record -;; 2. Look at run records -;; a. If have tests that are not deleted, set state='unknown' -;; b. .... -;; -(define (db:clean-up-maindb dbdat) - ;; (debug:print 0 *default-log-port* "WARNING: db clean up not fully ported to v1.60, cleanup action will be on megatest.db") - (let* ((db (dbr:dbdat-dbh dbdat)) - (count-stmt (sqlite3:prepare db "SELECT (SELECT count(id) FROM runs);")) - (statements - (map (lambda (stmt) - (sqlite3:prepare db stmt)) - (list - ;; delete all tests that belong to runs that are 'deleted' - ;; (conc "DELETE FROM tests WHERE run_id NOT IN (" (string-intersperse (map conc valid-runs) ",") ");") - ;; delete all tests that are 'DELETED' - "DELETE FROM runs WHERE state='deleted';" - ))) - (dead-runs '())) - (sqlite3:for-each-row - (lambda (run-id) - (set! dead-runs (cons run-id dead-runs))) - db - "SELECT id FROM runs WHERE state='deleted';") - ;; (db:delay-if-busy dbdat) - (sqlite3:with-transaction - db - (lambda () - (sqlite3:for-each-row (lambda (tot) - (debug:print-info 0 *default-log-port* "Run records count before clean: " tot)) - count-stmt) - (map sqlite3:execute statements) - (sqlite3:for-each-row (lambda (tot) - (debug:print-info 0 *default-log-port* "Run records count after clean: " tot)) - count-stmt))) - (map sqlite3:finalize! statements) - (sqlite3:finalize! count-stmt) - ;; (db:find-and-mark-incomplete db) - ;; (db:delay-if-busy dbdat) - (sqlite3:execute db "VACUUM;") - dead-runs)) - -;;====================================================================== -;; no-sync.db - small bits of data to be shared between servers -;;====================================================================== - -(define (db:get-dbsync-path) - (case (rmt:transport-mode) - ((http)(common:make-tmpdir-name *toppath* "")) - ((tcp) (conc *toppath*"/.mtdb")) - ((nfs) (conc *toppath*"/.mtdb")) - (else "/tmp/dunno-this-gonna-exist"))) - -;; This is needed for api.scm -(define (db:open-no-sync-db) - (dbfile:open-no-sync-db (db:get-dbsync-path))) - -;; why get the keys from the db? why not get from the *configdat* -;; using keys:config-get-fields? - -(define (db:get-keys dbstruct) - (keys:config-get-fields *configdat*)) - -;; extract index number given a header/data structure -(define (db:get-index-by-header header field) - (list-index (lambda (x)(equal? x field)) header)) - -;; look up values in a header/data structure -(define (db:get-value-by-header row header field) - (let ((len (if (vector? row) - (vector-length row) - 0))) - (if (or (null? header) (not row)) - #f - (let loop ((hed (car header)) - (tal (cdr header)) - (n 0)) - (if (equal? hed field) - (handle-exceptions - exn - (begin - (debug:print 4 *default-log-port* "WARNING: attempt to read non-existant field, row=" - row " header=" header " field=" field ", exn=" exn) - #f) - (if (>= n len) - #f - (vector-ref row n))) - (if (null? tal) #f (loop (car tal)(cdr tal)(+ n 1)))))))) - -;; Accessors for the header/data structure -;; get rows and header from -(define (db:get-header vec)(vector-ref vec 0)) -(define (db:get-rows vec)(vector-ref vec 1)) - -;;====================================================================== -;; R U N S -;;====================================================================== - -(define (db:get-run-times dbstruct run-patt target-patt) -(let ((res `()) - (qry (conc "select runname, (max(end_time)-min(event_time))/60 as runtime, target from (select runname, run_id,tests.event_time,tests.event_time+run_duration AS end_time, " (string-join (db:get-keys dbstruct) " || '/' || ") " as target from tests inner join runs on tests.run_id = runs.id where runs.runname like ? and target like ?) group by run_id ;"))) - ;(print qry) - (db:with-db - dbstruct - #f ;; this is for the main runs db - #f ;; does not modify db - (lambda (dbdat db) - (sqlite3:for-each-row - (lambda (runname runtime target ) - (set! res (cons (vector runname runtime target) res))) - db - qry - run-patt target-patt) - res)))) - -(define (db:get-run-name-from-id dbstruct run-id) - (db:with-db - dbstruct - #f ;; this is for the main runs db - #f ;; does not modify db - (lambda (dbdat db) - (let ((res #f)) - (sqlite3:for-each-row - (lambda (runname) - (set! res runname)) - db - "SELECT runname FROM runs WHERE id=?;" - run-id) - res)))) - -(define (db:get-run-key-val dbstruct run-id key) - (db:with-db - dbstruct - #f - #f - (lambda (dbdat db) - (let ((res #f)) - (sqlite3:for-each-row - (lambda (val) - (set! res val)) - db - (conc "SELECT " key " FROM runs WHERE id=?;") - run-id) - res)))) - -;; keys list to key1,key2,key3 ... -(define (runs:get-std-run-fields keys remfields) - (let* ((header (append keys remfields)) - (keystr (conc (keys->keystr keys) "," - (string-intersperse remfields ",")))) - (list keystr header))) - -;; make a query (fieldname like 'patt1' OR fieldname -(define (db:patt->like fieldname pattstr #!key (comparator " OR ")) - (let ((patts (if (string? pattstr) - (string-split pattstr ",") - '("%")))) - (string-intersperse (map (lambda (patt) - (let ((wildtype (if (substring-index "%" patt) "LIKE" "GLOB"))) - (conc fieldname " " wildtype " '" patt "'"))) - (if (null? patts) - '("") - patts)) - comparator))) - - -;; register a test run with the db, this accesses the main.db and does NOT -;; use server api -;; -(define (db:register-run dbstruct keyvals runname state status user contour-in) - (let* ((keys (map car keyvals)) - (keystr (keys->keystr keys)) - (contour (or contour-in "")) ;; empty string to force no hierarcy and be backwards compatible. - (comma (if (> (length keys) 0) "," "")) - (andstr (if (> (length keys) 0) " AND " "")) - (valslots (keys->valslots keys)) ;; ?,?,? ... - (allvals (append (list runname state status user contour) (map cadr keyvals))) - (qryvals (append (list runname) (map cadr keyvals))) - (key=?str (string-intersperse (map (lambda (k)(conc k "=?")) keys) " AND "))) - ;; (debug:print 0 *default-log-port* "Got here 0.") - (debug:print 3 *default-log-port* "keys: " keys " allvals: " allvals " keyvals: " keyvals " key=?str is " key=?str) - (debug:print 2 *default-log-port* "NOTE: using target " (string-intersperse (map cadr keyvals) "/") " for this run") - (if (and runname (null? (filter (lambda (x)(not x)) keyvals))) ;; there must be a better way to "apply and" - (db:with-db - dbstruct #f #t - (lambda (dbdat db) - ;; (debug:print 0 *default-log-port* "Got here 1.") - (let ((res #f)) - (apply sqlite3:execute db - (conc "INSERT OR IGNORE INTO runs (runname,state,status,owner,event_time,contour" - comma keystr ") VALUES (?,?,?,?,strftime('%s','now'),?" comma valslots ");") - allvals) - (apply sqlite3:for-each-row - (lambda (id) - (set! res id)) - db - (let ((qry (conc "SELECT id FROM runs WHERE (runname=? " andstr key=?str ");"))) - qry) - qryvals) - (sqlite3:execute db "UPDATE runs SET state=?,status=?,event_time=strftime('%s','now') WHERE id=? AND state='deleted';" state status res) - res))) - (begin - (debug:print-error 0 *default-log-port* "Called without all necessary keys") - #f)))) - -(define (db:get-run-id dbstruct runname target) - (let ((runs (db:simple-get-runs dbstruct runname #f #f target #f))) ;; runpatt count offset target last-update - (if (null? runs) - #f - (simple-run-id (car runs))))) - -;; called with run-id=#f so will operate on main.db -;; -(define (db:insert-run dbstruct run-id target runname run-meta) - (let* ((keys (db:get-keys dbstruct)) - (runs (db:simple-get-runs dbstruct runname #f #f target #f))) ;; runpatt count offset target last-update - ;; need to insert run based on target and runname - (let* ((targvals (string-split target "/")) - (keystr (string-intersperse keys ",")) - (key?str (string-intersperse (make-list (length targvals) "?") ",")) - (qrystr (conc "INSERT INTO runs (id,runname,"keystr") VALUES (?,?,"key?str")")) - (get-var (lambda (db qrystr) - (let* ((res #f)) - (sqlite3:for-each-row - (lambda row - (set res (car row))) - db qrystr run-id runname) - res)))) - (if (null? runs) - (begin - (db:create-initial-run-record dbstruct run-id runname target) - ) - ) - (let* () - ;;(debug:print 0 *default-log-port* "db:insert-run: Calling db:with-db to update the run record") - (debug:print 0 *default-log-port* "db:insert-run: runid = " run-id) -#; (db:with-db - dbstruct - #f #t - (lambda (dbdat db) - (debug:print 0 *default-log-port* "In the lambda proc for " dbdat " " db) - (for-each - (lambda (keyval) - (debug:print 0 *default-log-port* "In the lambda proc for " keyval) - (let* ((fieldname (car keyval)) - (getqry (conc "SELECT "fieldname" FROM runs WHERE id=?;")) - (setqry (conc "UPDATE runs SET "fieldname"=? WHERE id=?;")) - (val (cdr keyval)) - (valnum (if (number? val) - val - (if (string? val) - (string->number val) - #f)))) - (debug:print 0 *default-log-port* "fieldname " fieldname " val " val " valnum " valnum) - (if (not (member fieldname (cons "runname" keys))) ;; don't attempt to tweak these - (let* ((curr-val (get-var db getqry)) - (have-it (or (equal? curr-val val) - (equal? curr-val valnum)))) - (debug:print 0 *default-log-port* "have-it = " have-it) - (if (not have-it) - (begin - (debug:print 0 *default-log-port* "Do sqlite3:execute") - ;; (sqlite3:execute db setqry (or valnum val) run-id) - ) - ) - ) - ) - (debug:print 0 *default-log-port* "Done with update") - ) - (debug:print 0 *default-log-port* "next keyval") - ) - run-meta))) - run-id)))) - -(define (db:create-initial-run-record dbstruct run-id runname target) - (let* ((keys (db:get-keys dbstruct)) - (targvals (string-split target "/")) - (keystr (string-intersperse keys ",")) - (key?str (string-intersperse (make-list (length targvals) "?") ",")) ;; a string with the same length as targvals, where each element is "?" and interspersed with commas. - (qrystr (conc "INSERT INTO runs (id,runname,"keystr") VALUES (?,?,"key?str")"))) - (debug:print 0 *default-log-port* "db:create-initial-run-record") - (debug:print 0 *default-log-port* "qrystr = " qrystr) - - (db:with-db - dbstruct #f #t ;; run-id writable - (lambda (dbdat db) - (debug:print 0 *default-log-port* "lambda proc: dbdat: " dbdat " db: " db) - (apply sqlite3:execute db qrystr run-id runname targvals))))) - -(define (db:insert-test dbstruct run-id test-rec) - (let* ((testname (alist-ref "testname" test-rec equal?)) - (item-path (alist-ref "item_path" test-rec equal?)) - (id (db:get-test-id dbstruct run-id testname item-path)) - (fieldvals (filter (lambda (x)(not (member (car x) '("id" "last_update")))) test-rec)) - (setqry (conc "UPDATE tests SET "(string-intersperse - (map (lambda (dat) - (conc (car dat)"=?")) - fieldvals) - ",")" WHERE id=?;")) - (insqry (conc "INSERT INTO tests ("(string-intersperse (map (lambda (x) (car x)) fieldvals) ",") - ") VALUES ("(string-intersperse (make-list (length fieldvals) "?") ",")");"))) - ;; (debug:print 0 *default-log-port* "id: "id"\nset: "setqry"\ninsqry: "insqry) - (db:with-db - dbstruct - run-id #t - (lambda (dbdat db) - (if id - (apply sqlite3:execute db setqry (append (map cdr fieldvals) (list id))) - (apply sqlite3:execute db insqry (map cdr fieldvals))))))) - -;; replace header and keystr with a call to runs:get-std-run-fields -;; -;; keypatts: ( (KEY1 "abc%def")(KEY2 "%") ) -;; runpatts: patt1,patt2 ... -;; -(define (db:get-runs dbstruct runpatt count offset keypatts) - (let* ((res '()) - (keys (db:get-keys dbstruct)) - (runpattstr (db:patt->like "runname" runpatt)) - (remfields (list "id" "runname" "state" "status" "owner" "event_time")) - (header (append keys remfields)) - (keystr (conc (keys->keystr keys) "," - (string-intersperse remfields ","))) - (qrystr (conc "SELECT " keystr " FROM runs WHERE (" runpattstr ") " ;; runname LIKE ? " - ;; Generate: " AND x LIKE 'keypatt' ..." - (if (null? keypatts) "" - (conc " AND " - (string-join - (map (lambda (keypatt) - (let ((key (car keypatt)) - (patt (cadr keypatt))) - (db:patt->like key patt))) - keypatts) - " AND "))) - " AND state != 'deleted' ORDER BY event_time DESC " - (if (number? count) - (conc " LIMIT " count) - "") - (if (number? offset) - (conc " OFFSET " offset) - "")))) - (debug:print-info 11 *default-log-port* "db:get-runs START qrystr: " qrystr " keypatts: " keypatts " offset: " offset " limit: " count) - (db:with-db dbstruct #f #f - (lambda (dbdat db) - (sqlite3:for-each-row - (lambda (a . x) - (set! res (cons (apply vector a x) res))) - db - qrystr - ))) - (debug:print-info 11 *default-log-port* "db:get-runs END qrystr: " qrystr " keypatts: " keypatts " offset: " offset " limit: " count) - (vector header res))) - -;; simple get-runs -;; -;; records used defined in dbfile -;; -(define (db:simple-get-runs dbstruct runpatt count offset target last-update) - (let* ((res '()) - (keys (db:get-keys dbstruct)) - (runpattstr (db:patt->like "runname" runpatt)) - (remfields (list "id" "runname" "state" "status" "owner" "event_time")) - (targstr (string-intersperse keys "||'/'||")) - (keystr (conc targstr " AS target," - (string-intersperse remfields ","))) - (qrystr (conc "SELECT " keystr " FROM runs WHERE (" runpattstr ") " ;; runname LIKE ? " - ;; Generate: " AND x LIKE 'keypatt' ..." - " AND target LIKE '" target "'" - " AND state != 'deleted' " - (if (number? last-update) - (conc " AND last_update >= " last-update) - "") - " ORDER BY event_time DESC " - (if (number? count) - (conc " LIMIT " count) - "") - (if (number? offset) - (conc " OFFSET " offset) - ""))) - ) - (debug:print-info 11 *default-log-port* "db:get-runs START qrystr: " qrystr " target: " target " offset: " offset " limit: " count) - (db:with-db dbstruct #f #f - (lambda (dbdat db) - (sqlite3:for-each-row - (lambda (target id runname state status owner event_time) - (set! res (cons (make-simple-run target id runname state status owner event_time) res))) - db - qrystr - ))) - (debug:print-info 11 *default-log-port* "db:get-runs END qrystr: " qrystr " target: " target " offset: " offset " limit: " count) - res)) - -;; TODO: Switch this to use max(update_time) from each run db? Then if using a server there is no disk traffic (using cachedb db) ??? -;; -;; NOTE: This DOESN'T (necessarily) get the real run ids, but the number of the .db!! - -(define (db:get-changed-run-ids since-time) - (let* ((dbdir (common:make-tmpdir-name *toppath* "")) ;; (configf:lookup *configdat* "setup" "dbdir")) - (alldbs (glob (conc *toppath* "/.mtdb/[0-9]*.db*"))) - (changed (filter (lambda (dbfile) - (> (file-modification-time dbfile) since-time)) - alldbs))) - (delete-duplicates - (map (lambda (dbfile) - (let* ((res (string-match ".*\\/(\\d+)\\.db*" dbfile))) - (if res - (string->number (cadr res)) - (begin - (debug:print 2 *default-log-port* "WARNING: Failed to process " dbfile " for run-id") - 0)))) - changed)))) - -;; Get all targets from the db -;; -(define (db:get-targets dbstruct) - (let* ((res '()) - (keys (db:get-keys dbstruct)) - (header keys) ;; (map key:get-fieldname keys)) - (keystr (keys->keystr keys)) - (qrystr (conc "SELECT " keystr " FROM runs WHERE state != 'deleted';")) - (seen (make-hash-table))) - (db:with-db - dbstruct - #f - #f - (lambda (dbdat db) - (sqlite3:for-each-row - (lambda (a . x) - (let ((targ (cons a x))) - (if (not (hash-table-ref/default seen targ #f)) - (begin - (hash-table-set! seen targ #t) - (set! res (cons (apply vector targ) res)))))) - db - qrystr) - (debug:print-info 11 *default-log-port* "db:get-targets END qrystr: " qrystr ) - (vector header res))))) - -;; just get count of runs -(define (db:get-num-runs dbstruct runpatt) - (db:with-db - dbstruct - #f - #f - (lambda (dbdat db) - (let ((numruns 0)) - (debug:print-info 11 *default-log-port* "db:get-num-runs START " runpatt) - (sqlite3:for-each-row - (lambda (count) - (set! numruns count)) - db - "SELECT COUNT(id) FROM runs WHERE runname LIKE ? AND state != 'deleted';" runpatt) - (debug:print-info 11 *default-log-port* "db:get-num-runs END " runpatt) - numruns)))) - -;; just get count of runs -(define (db:get-runs-cnt-by-patt dbstruct runpatt targetpatt keys) - (db:with-db - dbstruct - #f - #f - (lambda (dbdat db) - (let ((numruns 0) - (qry-str #f) - (key-patt "") - (keyvals (if targetpatt (keys:target->keyval keys targetpatt) '()))) - - (for-each (lambda (keyval) - (let* ((key (car keyval)) - (patt (cadr keyval)) - (fulkey (conc ":" key)) - (wildtype (if (substring-index "%" patt) "like" "glob"))) - - (if patt - (set! key-patt (conc key-patt " AND " key " " wildtype " '" patt "'")) - (begin - (debug:print-error 0 *default-log-port* "searching for runs with no pattern set for " fulkey) - (exit 6))))) - keyvals) - ;(print runpatt " -- " key-patt) - (set! qry-str (conc "SELECT COUNT(id) FROM runs WHERE state != 'deleted' AND runname like '" runpatt "'" key-patt)) - ;(print qry-str ) - - (sqlite3:for-each-row - (lambda (count) - (set! numruns count)) - db - qry-str) - (debug:print-info 11 *default-log-port* "db:get-num-runs END " runpatt) - numruns)))) - - -;; (sqlite3#fold-row proc3670 init3671 db-or-stmt3672 . params3673)> -;; -(define (db:get-raw-run-stats dbstruct run-id) - (db:with-db - dbstruct - run-id - #f - (lambda (dbdat db) - (sqlite3:fold-row - (lambda (res state status count) - (cons (list state status count) res)) - '() - db - "SELECT state,status,count(id) AS count FROM tests WHERE run_id=? AND NOT(uname='n/a' AND item_path='') GROUP BY state,status;" - run-id)))) - -;; Update run_stats for given run_id -;; input data is a list (state status count) -;; -(define (db:update-run-stats dbstruct run-id stats) - (mutex-lock! *db-transaction-mutex*) - (db:with-db - dbstruct - #f - #t - (lambda (dbdat db) - ;; remove previous data - - (let* ((stmt1 (sqlite3:prepare db "DELETE FROM run_stats WHERE run_id=? AND state=? AND status=?;")) - (stmt2 (sqlite3:prepare db "INSERT INTO run_stats (run_id,state,status,count) VALUES (?,?,?,?);")) - (res - (sqlite3:with-transaction - db - (lambda () - (for-each - (lambda (dat) - (sqlite3:execute stmt1 run-id (car dat)(cadr dat)) - (apply sqlite3:execute stmt2 run-id dat)) - stats))))) - (sqlite3:finalize! stmt1) - (sqlite3:finalize! stmt2) - (mutex-unlock! *db-transaction-mutex*) - res)))) - -(define (db:get-main-run-stats dbstruct run-id) - (db:with-db - dbstruct - #f ;; this data comes from main - #f - (lambda (dbdat db) - (sqlite3:fold-row - (lambda (res state status count) - (cons (list state status count) res)) - '() - db - "SELECT state,status,count FROM run_stats WHERE run_id=? AND run_id IN (SELECT id FROM runs WHERE state NOT IN ('DELETED','deleted'));" - run-id)))) - -(define (db:print-current-query-stats) - ;; generate stats from *db-api-call-time* - (let ((ordered-keys (sort (hash-table-keys *db-api-call-time*) - (lambda (a b) - (let ((sum-a (common:sum (hash-table-ref *db-api-call-time* a))) - (sum-b (common:sum (hash-table-ref *db-api-call-time* b)))) - (> sum-a sum-b))))) - (total 0)) - (for-each - (lambda (cmd-key) - (let* ((dat (hash-table-ref *db-api-call-time* cmd-key)) - (num (length dat)) - (avg (if (> num 0) - (/ (common:sum dat)(length dat))))) - (set! total (+ total num)) - (debug:print-info 0 *default-log-port* cmd-key "\tavg: " avg " max: " (common:max dat) " min: " (common:min-max < dat) " num: " (length dat)))) - ordered-keys) - (debug:print-info 0 *default-log-port* "TOTAL: " total " api calls since start."))) - -(define (db:get-all-run-ids dbstruct) - (db:with-db - dbstruct - #f - #f - (lambda (dbdat db) - (let ((run-ids '())) - (sqlite3:for-each-row - (lambda (run-id) - (set! run-ids (cons run-id run-ids))) - db - "SELECT id FROM runs WHERE state != 'deleted' ORDER BY event_time DESC;") - (reverse run-ids))))) - -;; get some basic run stats -;; -;; data structure: -;; -;; ( (runname (( state count ) ... )) -;; ( ... -;; -(define (db:get-run-stats dbstruct) - (let* ((totals (make-hash-table)) - (curr (make-hash-table)) - (res '()) - (runs-info '())) - ;; First get all the runname/run-ids - (db:with-db - dbstruct #f #f - (lambda (dbdat db) - (sqlite3:for-each-row - (lambda (run-id runname) - (set! runs-info (cons (list run-id runname) runs-info))) - db - "SELECT id,runname FROM runs WHERE state != 'deleted' ORDER BY event_time DESC;"))) ;; If you change this to the more logical ASC please adjust calls to db:get-run-stats - ;; for each run get stats data - (for-each - (lambda (run-info) - ;; get the net state/status counts for this run - (let* ((run-id (car run-info)) - (run-name (cadr run-info))) - (db:with-db - dbstruct - run-id - #f - (lambda (dbdat db) - (sqlite3:for-each-row - (lambda (state status count) - (let ((netstate (if (equal? state "COMPLETED") status state))) - (if (string? netstate) - (begin - (hash-table-set! totals netstate (+ (hash-table-ref/default totals netstate 0) count)) - (hash-table-set! curr netstate (+ (hash-table-ref/default curr netstate 0) count)))))) - db - "SELECT state,status,count(id) FROM tests AS t WHERE run_id=? GROUP BY state,status ORDER BY state,status DESC;" - run-id) - ;; add the per run counts to res - (for-each (lambda (state) - (set! res (cons (list run-name state (hash-table-ref curr state)) res))) - (sort (hash-table-keys curr) string>=)) - (set! curr (make-hash-table)))))) - runs-info) - (for-each (lambda (state) - (set! res (cons (list "Totals" state (hash-table-ref totals state)) res))) - (sort (hash-table-keys totals) string>=)) - res)) - -;; db:get-runs-by-patt -;; get runs by list of criteria -;; register a test run with the db -;; -;; Use: (db:get-value-by-header (db:get-header runinfo)(db:get-rows runinfo)) -;; to extract info from the structure returned -;; -(define (db:get-runs-by-patt dbstruct keys runnamepatt targpatt offset limit fields last-update sort-order ) ;; test-name) - (let* ((tmp (runs:get-std-run-fields keys (or fields '("id" "runname" "state" "status" "owner" "event_time")))) - (keystr (car tmp)) - (header (cadr tmp)) - (key-patt "") - (runwildtype (if (substring-index "%" runnamepatt) "like" "glob")) - (qry-str #f) - (keyvals (if targpatt (keys:target->keyval keys targpatt) '()))) - (for-each (lambda (keyval) - (let* ((key (car keyval)) - (patt (cadr keyval)) - (fulkey (conc ":" key)) - (wildtype (if (substring-index "%" patt) "like" "glob"))) - (if patt - (set! key-patt (conc key-patt " AND " key " " wildtype " '" patt "'")) - (begin - (debug:print-error 0 *default-log-port* "searching for runs with no pattern set for " fulkey) - (exit 6))))) - keyvals) - (set! qry-str (conc "SELECT " keystr " FROM runs WHERE state != 'deleted' AND runname " runwildtype " ? " key-patt - (if last-update - (conc " AND last_update >= " last-update " ") - " ") - " ORDER BY event_time " sort-order " " - (if limit (conc " LIMIT " limit) "") - (if offset (conc " OFFSET " offset) "") - ";")) - (debug:print-info 4 *default-log-port* "runs:get-runs-by-patt qry=" qry-str " " runnamepatt) - ;(print "runs:get-runs-by-patt qry=" qry-str " " runnamepatt) - - (vector header - (reverse - (db:with-db - dbstruct #f #f ;; reads db, does not write to it. - (lambda (dbdat db) - (sqlite3:fold-row - (lambda (res . r) - (cons (list->vector r) res)) - '() - db - qry-str - runnamepatt))))))) - -;; use (get-value-by-header (db:get-header runinfo)(db:get-rows runinfo)) -;; NOTE: Does NOT return a list of rows (or one row) for the first slot of the vector -;; this is inconsistent with get-runs but it makes some sense. -;; -(define (db:get-run-info dbstruct run-id) - ;;(if (hash-table-ref/default *run-info-cache* run-id #f) - ;; (hash-table-ref *run-info-cache* run-id) - (let* ((res (vector #f #f #f #f)) - (keys (db:get-keys dbstruct)) - (remfields (list "id" "runname" "state" "status" "owner" "event_time" "comment" "fail_count" "pass_count" "contour" "last_update")) ;; "area_id")) - (header (append keys remfields)) - (keystr (conc (keys->keystr keys) "," - (string-intersperse remfields ",")))) - (debug:print-info 11 *default-log-port* "db:get-run-info run-id: " run-id " header: " header " keystr: " keystr) - - (db:with-db - dbstruct #f #f - (lambda (dbdat db) - (sqlite3:for-each-row - (lambda (a . x) - (set! res (apply vector a x))) - db - (conc "SELECT " keystr " FROM runs WHERE id=?;") - run-id))) - (debug:print-info 11 *default-log-port* "db:get-run-info run-id: " run-id " header: " header " keystr: " keystr) - (let ((finalres (vector header res))) - ;; (hash-table-set! *run-info-cache* run-id finalres) - finalres))) - -(define (db:set-comment-for-run dbstruct run-id comment) - (db:with-db - dbstruct #f #f - (lambda (dbdat db) - (sqlite3:execute db "UPDATE runs SET comment=? WHERE id=?;" comment ;; (sdb:qry 'getid comment) - run-id)))) - -;; does not (obviously!) removed dependent data. But why not!!? -(define (db:delete-run dbstruct run-id) - (db:with-db - dbstruct #f #t - (lambda (dbdat db) - (sqlite3:with-transaction - db - (lambda () - (sqlite3:execute db "DELETE FROM test_steps WHERE test_id IN (SELECT id FROM tests WHERE run_id=?);" run-id) - (sqlite3:execute db "DELETE FROM test_data WHERE test_id IN (SELECT id FROM tests WHERE run_id=?);" run-id) - (sqlite3:execute db "UPDATE tests SET state='DELETED',comment='' WHERE run_id=?;" run-id) - ;; (db:delay-if-busy dbdat) - (sqlite3:execute db "UPDATE runs SET state='deleted',comment='' WHERE id=?;" run-id)))))) - -(define (db:update-run-event_time dbstruct run-id) - (db:with-db - dbstruct #f #t - (lambda (dbdat db) - (sqlite3:execute db "UPDATE runs SET event_time=strftime('%s','now') WHERE id=?;" run-id)))) - -(define (db:lock/unlock-run dbstruct run-id lock unlock user) - (db:with-db - dbstruct #f #t - (lambda (dbdat db) - (let ((newlockval (if lock "locked" - (if unlock - "unlocked" - "locked")))) ;; semi-failsafe - (sqlite3:execute db "UPDATE runs SET state=? WHERE id=?;" newlockval run-id) - (sqlite3:execute db "INSERT INTO access_log (user,accessed,args) VALUES(?,strftime('%s','now'),?);" - user (conc newlockval " " run-id)) - (debug:print-info 1 *default-log-port* "" newlockval " run number " run-id))))) - -(define (db:set-run-status dbstruct run-id status msg) - (db:with-db - dbstruct #f #t - (lambda (dbdat db) - (if msg - (sqlite3:execute db "UPDATE runs SET status=?,comment=? WHERE id=?;" status msg run-id) - (sqlite3:execute db "UPDATE runs SET status=? WHERE id=?;" status run-id))))) - -(define (db:set-run-state-status-db dbdat db run-id state status ) - (sqlite3:execute - (db:get-cache-stmth - dbdat db "UPDATE runs SET status=?,state=? WHERE id=?;") status state run-id)) - -(define (db:set-run-state-status dbstruct run-id state status ) - (db:with-db - dbstruct #f #t - (lambda (dbdat db) - (db:set-run-state-status-db dbdat db run-id state status)))) - -(define (db:get-run-status dbstruct run-id) - (let ((res "n/a")) - (db:with-db - dbstruct #f #f - (lambda (dbdat db) - (sqlite3:for-each-row - (lambda (status) - (set! res status)) - (db:get-cache-stmth - dbdat db - "SELECT status FROM runs WHERE id=?;" ) - run-id) - res)))) - -(define (db:get-run-state dbstruct run-id) - (let ((res "n/a")) - (db:with-db - dbstruct #f #f - (lambda (dbdat db) - (sqlite3:for-each-row - (lambda (status) - (set! res status)) - (db:get-cache-stmth - dbdat db - "SELECT state FROM runs WHERE id=?;" ) - run-id) - res)))) - -(define (db:get-run-state-status dbstruct run-id) - (let ((res (cons "n/a" "n/a"))) - (db:with-db - dbstruct #f #f - (lambda (dbdat db) - (sqlite3:for-each-row - (lambda (state status) - (set! res (cons state status))) - (db:get-cache-stmth - dbdat db - "SELECT state,status FROM runs WHERE id=?;" ) - run-id) - res)))) - - -;;====================================================================== -;; K E Y S -;;====================================================================== - -;; get key val pairs for a given run-id -;; ( (FIELDNAME1 keyval1) (FIELDNAME2 keyval2) ... ) -(define (db:get-key-val-pairs dbstruct run-id) - (let* ((keys (db:get-keys dbstruct)) - (res '())) - (db:with-db - dbstruct #f #f - (lambda (dbdat db) - (for-each - (lambda (key) - (let ((qry (conc "SELECT " key " FROM runs WHERE id=?;"))) - (sqlite3:for-each-row - (lambda (key-val) - (set! res (cons (list key (if (string? key-val) key-val "")) res))) ;; replace non-string bad values with empty string to prevent crashes. This scenario can happen when Megatest is killed on updating the db - db qry run-id))) - keys))) - (reverse res))) - -;; get key vals for a given run-id -(define (db:get-key-vals dbstruct run-id) - (let* ((keys (db:get-keys dbstruct)) - (res '())) - (db:with-db - dbstruct #f #f - (lambda (dbdat db) - (for-each - (lambda (key) - (let ((qry (conc "SELECT " key " FROM runs WHERE id=?;"))) - ;; (db:delay-if-busy dbdat) - (sqlite3:for-each-row - (lambda (key-val) - (set! res (cons (if (string? key-val) key-val "") res))) ;; check that the key-val is a string for cases where a crash injected bad data in the megatest.db - db qry run-id))) - keys))) - (let ((final-res (reverse res))) - (hash-table-set! *keyvals* run-id final-res) - final-res))) - -;; The target is keyval1/keyval2..., cached in *target* as it is used often -(define (db:get-target dbstruct run-id) - (let* ((keyvals (db:get-key-vals dbstruct run-id)) - (thekey (string-intersperse (map (lambda (x)(if x x "-na-")) keyvals) "/"))) - thekey)) - -;; Get run-ids for runs with same target but different runnames and NOT run-id -;; -(define (db:get-prev-run-ids dbstruct run-id) - (let* ((keyvals (db:get-key-val-pairs dbstruct run-id)) - (kvalues (map cadr keyvals)) - (keys (db:get-keys dbstruct)) - (qrystr (string-intersperse (map (lambda (x)(conc x "=?")) keys) " AND "))) - (let ((prev-run-ids '())) - (if (null? keyvals) - '() - (begin - (db:with-db dbstruct #f #f ;; #f means work with the zeroth db - i.e. the runs db - (lambda (dbdat db) - (apply sqlite3:for-each-row - (lambda (id) - (set! prev-run-ids (cons id prev-run-ids))) - db - (conc "SELECT id FROM runs WHERE " qrystr " AND state != 'deleted' AND id != ?;") - (append kvalues (list run-id))))) - prev-run-ids))))) - -;;====================================================================== -;; T E S T S -;;====================================================================== - -;; states and statuses are lists, turn them into ("PASS","FAIL"...) and use NOT IN -;; i.e. these lists define what to NOT show. -;; states and statuses are required to be lists, empty is ok -;; not-in #t = above behaviour, #f = must match -;; mode: -;; 'dashboard - use state = 'COMPLETED' AND status in ( statuses ) OR state in ( states ) -;; -(define (db:get-tests-for-run dbstruct run-id testpatt states statuses offset limit not-in sort-by sort-order qryvals last-update mode) - (let* ((qryvalstr (case qryvals - ((shortlist) "id,run_id,testname,item_path,state,status") - ((#f) db:test-record-qry-selector) ;; "id,run_id,testname,state,status,event_time,host,cpuload,diskfree,uname,rundir,item_path,run_duration,final_logf,comment") - (else qryvals))) - (res '()) - ;; if states or statuses are null then assume match all when not-in is false - (states-qry (if (null? states) - #f - (conc " state " - (if (eq? mode 'dashboard) - " IN ('" - (if not-in - " NOT IN ('" - " IN ('")) - (string-intersperse states "','") - "')"))) - (statuses-qry (if (null? statuses) - #f - (conc " status " - (if (eq? mode 'dashboard) - " IN ('" - (if not-in - " NOT IN ('" - " IN ('") ) - (string-intersperse statuses "','") - "')"))) - (interim-qry (conc " AND " (if not-in "NOT " "") "( state='COMPLETED' " (if statuses-qry (conc " AND " statuses-qry " ) ") " ) ") - (if states-qry - (conc (if not-in " AND " " OR ") states-qry ) ;; " ) ") - ""))) - (states-statuses-qry - (cond - ((and states-qry statuses-qry) - (case mode - ((dashboard) - (if not-in - (conc " AND (state='COMPLETED' AND status NOT IN ('" (string-intersperse statuses "','") "')) " - " OR (state != 'COMPLETED' AND state NOT IN ('" (string-intersperse states "','") "')) ") - (conc " AND (state='COMPLETED' AND status IN ('" (string-intersperse statuses "','") "')) " - " OR (state NOT IN ('COMPLETED','DELETED') AND state IN ('" (string-intersperse states "','") "')) "))) - (else (conc " AND ( " states-qry " AND " statuses-qry " ) ")))) - (states-qry - (case mode - ((dashboard) (conc " AND " (if not-in "NOT " "") " state IN ('" (string-intersperse states "','") "') ")) ;; interim-qry) - (else (conc " AND " states-qry)))) - (statuses-qry - (case mode - ((dashboard) (conc " AND " (if not-in "NOT " "") " status IN ('" (string-intersperse statuses "','") "') ")) ;; interim-qry) - (else (conc " AND " statuses-qry)))) - (else ""))) - (tests-match-qry (tests:match->sqlqry testpatt)) - (qry (conc "SELECT " qryvalstr - (if run-id - " FROM tests WHERE run_id=? " - " FROM tests WHERE ? > 0 ") ;; should work? - (if last-update " " " AND state != 'DELETED' ") ;; if using last-update we want deleted tests? - states-statuses-qry - (if tests-match-qry (conc " AND (" tests-match-qry ") ") "") - (if last-update (conc " AND last_update >= " last-update " ") "") - (case sort-by - ((rundir) " ORDER BY length(rundir) ") - ((testname) (conc " ORDER BY testname " (if sort-order (conc sort-order ",") "") " item_path ")) - ((statestatus) (conc " ORDER BY state " (if sort-order (conc sort-order ",") "") " status ")) - ((event_time) " ORDER BY event_time ") - (else (if (string? sort-by) - (conc " ORDER BY " sort-by " ") - " "))) - (if sort-order sort-order " ") - (if limit (conc " LIMIT " limit) " ") - (if offset (conc " OFFSET " offset) " ") - ";" - ))) - (debug:print-info 8 *default-log-port* "db:get-tests-for-run run-id=" run-id ", qry=" qry) - (let* ((res (db:with-db dbstruct run-id #f - (lambda (dbdat db) - ;; (let* ((stmth (db:get-cache-stmth dbstruct db qry))) ;; due to use of last-update we can't efficiently cache this query - (reverse - (sqlite3:fold-row - (lambda (res . row) - ;; id run-id testname state status event-time host cpuload - ;; diskfree uname rundir item-path run-duration final-logf comment) - (cons (list->vector row) res)) - '() - db qry ;; stmth - (or run-id 1) ;; 1 > 0 , for the case where we are seeking tests matching criteral for all runs - )))))) - (case qryvals - ((shortlist)(map db:test-short-record->norm res)) - ((#f) res) - (else res))))) - -(define (db:test-short-record->norm inrec) - ;; "id,run_id,testname,item_path,state,status" - ;; "id,run_id,testname,state,status,event_time,host,cpuload,diskfree,uname,rundir,item_path,run_duration,final_logf,comment - (vector (vector-ref inrec 0) ;; id - (vector-ref inrec 1) ;; run_id - (vector-ref inrec 2) ;; testname - (vector-ref inrec 4) ;; state - (vector-ref inrec 5) ;; status - -1 "" -1 -1 "" "-" - (vector-ref inrec 3) ;; item-path - -1 "-" "-")) - -;; -;; 1. cache tests-match-qry -;; 2. compile qry and store in hash -;; 3. convert for-each-row to fold -;; -;; (define (db:get-tests-for-run-state-status dbstruct run-id testpatt) -;; (db:with-db -;; dbstruct run-id #f -;; (lambda (dbdat db) -;; (let* ((res '()) -;; (stmt-cache (dbr:dbstruct-stmt-cache dbstruct)) -;; (stmth (let* ((sh (db:hoh-get stmt-cache db testpatt))) -;; (or sh -;; (let* ((tests-match-qry (tests:match->sqlqry testpatt)) -;; (qry (conc "SELECT id,testname,item_path,state,status FROM tests WHERE run_id=? " -;; (if tests-match-qry (conc " AND (" tests-match-qry ") ") ""))) -;; (newsh (sqlite3:prepare db qry))) -;; (debug:print-info 8 *default-log-port* "db:get-tests-for-run qry=" qry) -;; (db:hoh-set! stmt-cache db testpatt newsh) -;; newsh))))) -;; (reverse -;; (sqlite3:fold-row -;; (lambda (res id testname item-path state status) -;; ;; id,run_id,testname,state,status,event_time,host,cpuload,diskfree,uname,rundir,item_path,run_duration,final_logf,comment -;; (cons (vector id run-id testname state status -1 "" -1 -1 "" "-" item-path -1 "-" "-") res)) -;; '() -;; stmth -;; run-id)))))) - -(define (db:get-tests-for-run-state-status dbstruct run-id testpatt #!optional (last-update 0)) - (let* ((res '()) - (tests-match-qry (tests:match->sqlqry testpatt)) - (qry (conc "SELECT id,testname,item_path,state,status,event_time,run_duration FROM tests WHERE run_id=? " - " AND last_update > ? " - (if tests-match-qry (conc " AND (" tests-match-qry ") ") "") - ))) - (debug:print-info 8 *default-log-port* "db:get-tests-for-run qry=" qry) - (db:with-db dbstruct run-id #f - (lambda (dbdat db) - (sqlite3:fold-row - (lambda (res id testname item-path state status event-time run-duration) - ;; id,run_id,testname,state,status,event_time,host,cpuload,diskfree,uname,rundir,item_path,run_duration,final_logf,comment - (cons (vector id run-id testname state status event-time "" -1 -1 "" "-" item-path run-duration "-" "-") res)) - '() - db - qry - run-id - (or last-update 0)))))) - -(define (db:get-testinfo-state-status dbstruct run-id test-id) - (db:with-db - dbstruct run-id #f - (lambda (dbdat db) - (let* ((res #f) - (stmth (db:get-cache-stmth dbdat db "SELECT run_id,testname,item_path,state,status FROM tests WHERE id=? and run_id=?;"))) - (sqlite3:for-each-row - (lambda (run-id testname item-path state status) - ;; id,run_id,testname,state,status,event_time,host,cpuload,diskfree,uname,rundir,item_path,run_duration,final_logf,comment - (set! res (vector test-id run-id testname state status -1 "" -1 -1 "" "-" item-path -1 "-" "-"))) - ;; db - ;; "SELECT run_id,testname,item_path,state,status FROM tests WHERE id=? and run_id=?;" - stmth - test-id run-id) - res)))) - -;; get a useful subset of the tests data (used in dashboard -;; use db:mintest-get-{id ,run_id,testname ...} -;; -(define (db:get-tests-for-run-mindata dbstruct run-id testpatt states statuses not-in) - (db:get-tests-for-run dbstruct run-id testpatt states statuses #f #f not-in #f #f "id,run_id,testname,state,status,event_time,item_path" 0 #f)) - -;; do not use. -;; -(define (db:get-tests-for-runs dbstruct run-ids testpatt states statuses #!key (not-in #f)(qryvals #f)) - ;; (db:delay-if-busy) - (let ((res '())) - (for-each - (lambda (run-id) - (set! res (append - res - (db:get-tests-for-run dbstruct run-id testpatt states statuses #f #f not-in #f #f qryvals #f 'normal)))) - (if run-ids - run-ids - (db:get-all-run-ids dbstruct))) - res)) - -;; Convert calling routines to get list of run-ids and loop, do not use the get-tests-for-runs -;; - -(define (db:delete-test-records dbstruct run-id test-id) - (db:general-call dbstruct run-id 'delete-test-step-records (list test-id)) - (db:general-call dbstruct run-id 'delete-test-data-records (list test-id)) - (db:with-db - dbstruct run-id #t - (lambda (dbdat db) - (sqlite3:execute db "UPDATE tests SET state='DELETED',status='n/a',comment='' WHERE id=?;" test-id)))) - -;; -(define (db:delete-old-deleted-test-records dbstruct run-id) - (let* ((targtime (- (current-seconds) - (or (configf:lookup-number *configdat* "setup" "keep-deleted-records") - (* 7 24 60 60)))) ;; cleanup if over one week old - (mtdbfile (dbmod:run-id->full-dbfname dbstruct run-id)) - (qry1 "DELETE FROM test_steps WHERE test_id IN (SELECT id FROM tests WHERE state='DELETED' AND event_timenumber fieldname fields) - (if (null? fields) - #f - (let loop ((hed (car fields)) - (tal (cdr fields)) - (indx 0)) - (if (equal? fieldname hed) - indx - (if (null? tal) - #f - (loop (car tal)(cdr tal)(+ indx 1))))))) - -(define db:test-record-qry-selector (string-intersperse db:test-record-fields ",")) - -(define (db:update-tesdata-on-repilcate-db dbstruct old-lt new-lt) - (db:with-db - dbstruct #f #t - (lambda (dbdat db) - (sqlite3:execute db "UPDATE tests SET rundir= replace(rundir,?,?), shortdir=replace(shortdir,?,?);" - old-lt new-lt old-lt new-lt)))) - -;; NOTE: Use db:test-get* to access records -;; NOTE: This needs rundir decoding? Decide, decode here or where used? For the moment decode where used. -(define (db:get-all-tests-info-by-run-id dbstruct run-id) - (let* ((res '())) - (db:with-db - dbstruct #f #f - (lambda (dbdat db) - (sqlite3:for-each-row - (lambda (id run-id testname state status event-time host cpuload diskfree uname rundir item-path run-duration final-logf comment shortdir attemptnum archived) - ;; 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 - (set! res (cons (vector id run-id testname state status event-time host cpuload diskfree uname rundir item-path run-duration final-logf comment shortdir attemptnum archived) - res))) - db - (conc "SELECT " db:test-record-qry-selector " FROM tests WHERE state != 'DELETED' AND run_id=?;") - run-id))) - res)) - -(define (db:replace-test-records dbstruct run-id testrecs) - (db:with-db dbstruct run-id #t - (lambda (dbdat db) - (let* ((qmarks (string-intersperse (make-list (length db:test-record-fields) "?") ",")) - (qrystr (conc "INSERT OR REPLACE INTO tests (" db:test-record-qry-selector ") VALUES (" qmarks ") WHERE run_id=?;")) - (qry (sqlite3:prepare db qrystr))) - (debug:print 0 *default-log-port* "INFO: migrating test records for run with id " run-id) - (sqlite3:with-transaction - db - (lambda () - (for-each - (lambda (rec) - ;; (debug:print 0 *default-log-port* "INFO: Inserting values: " (string-intersperse (map conc (vector->list rec)) ",") "\n") - (apply sqlite3:execute qry (append (vector->list rec)(list run-id)))) - testrecs))) - (sqlite3:finalize! qry))))) - -;; map a test-id into the proper range -;; -(define (db:adj-test-id mtdb min-test-id test-id) - (if (>= test-id min-test-id) - test-id - (let loop ((new-id min-test-id)) - (let ((test-id-found #f)) - (sqlite3:for-each-row - (lambda (id) - (set! test-id-found id)) - (dbr:dbdat-dbh mtdb) - "SELECT id FROM tests WHERE id=?;" - new-id) - ;; if test-id-found then need to try again - (if test-id-found - (loop (+ new-id 1)) - (begin - (debug:print-info 0 *default-log-port* "New test id " new-id " selected for test with id " test-id) - (sqlite3:execute mtdb "UPDATE tests SET id=? WHERE id=?;" new-id test-id))))))) - -;; move test ids into the 30k * run_id range -;; -(define (db:prep-megatest.db-adj-test-ids mtdb run-id testrecs) - (debug:print-info 0 *default-log-port* "Adjusting test ids in megatest.db for run " run-id) - (let ((min-test-id (* run-id 30000))) - (for-each - (lambda (testrec) - (let* ((test-id (vector-ref testrec (db:field->number "id" db:test-record-fields)))) - (db:adj-test-id (dbr:dbdat-dbh mtdb) min-test-id test-id))) - testrecs))) - -;; 1. move test ids into the 30k * run_id range -;; 2. move step ids into the 30k * run_id range -;; -(define (db:prep-megatest.db-for-migration mtdb) - (let* ((run-ids (db:get-all-run-ids mtdb))) - (for-each - (lambda (run-id) - (let ((testrecs (db:get-all-tests-info-by-run-id mtdb run-id))) - (db:prep-megatest.db-adj-test-ids (dbr:dbdat-dbh mtdb) run-id testrecs))) - run-ids))) - -;; Get test data using test_id -;; -(define (db:get-test-info-by-id dbstruct run-id test-id) - (db:with-db - dbstruct - run-id - #f - (lambda (dbdat db) - (let ((res #f)) - (sqlite3:for-each-row ;; attemptnum added to hold pid of top process (not Megatest) controlling a test - (lambda (id run-id testname state status event-time host cpuload diskfree uname rundir-id item-path run_duration final-logf-id comment short-dir-id attemptnum archived last-update) - ;; 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 - (set! res (vector id run-id testname state status event-time host cpuload diskfree uname rundir-id item-path run_duration final-logf-id comment short-dir-id attemptnum archived last-update))) - db - ;; (db:get-cache-stmth dbdat db - ;; (conc "SELECT " db:test-record-qry-selector " FROM tests WHERE id=? AND run_id=?;")) - (conc "SELECT " db:test-record-qry-selector " FROM tests WHERE id=? AND run_id=?;") - test-id run-id) - res)))) - -;; Get test state, status using test_id -;; -(define (db:get-test-state-status-by-id dbstruct run-id test-id) - (db:with-db - dbstruct - run-id - #f - (lambda (dbdat db) - (let ((res (cons #f #f)) - (stmth (db:get-cache-stmth dbdat db "SELECT state,status FROM tests WHERE id=? AND run_id=?;"))) - (sqlite3:for-each-row ;; attemptnum added to hold pid of top process (not Megatest) controlling a test - (lambda (state status) - (cons state status)) - ;; db - stmth ;;"SELECT state,status FROM tests WHERE id=? AND run_id=?;" ;; stmth try not compiling this one - yes, this fixed the bind issue - test-id run-id) - res)))) - -;; Use db:test-get* to access -;; Get test data using test_ids. NB// Only works within a single run!! -;; -(define (db:get-test-info-by-ids dbstruct run-id test-ids) - (db:with-db - dbstruct - run-id - #f - (lambda (dbdat db) - (let ((res '())) - (sqlite3:for-each-row - (lambda (a . b) - ;; 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 - (set! res (cons (apply vector a b) res))) - db - (conc "SELECT " db:test-record-qry-selector " FROM tests WHERE id in (" - (string-intersperse (map conc test-ids) ",") ");")) - res)))) - -;; try every second until tries times proc -;; -(define (db:keep-trying-until-true proc params tries) - (let* ((res (apply proc params))) - (if res - res - (if (> tries 0) - (begin - (thread-sleep! 1) - (db:keep-trying-until-true proc params (- tries 1))) - (begin - ;; (debug:print-info 0 *default-log-port* "proc never returned true, params="params) - (print"db:keep-trying-until-true proc never returned true, proc = " proc " params =" params " tries = " tries) - #f))))) - -(define (db:get-test-info dbstruct run-id test-name item-path) - (db:with-db - dbstruct - run-id - #f - (lambda (dbdat db) - (db:get-test-info-db db run-id test-name item-path)))) - -(define (db:get-test-info-db db run-id test-name item-path) - (let ((res #f)) - (sqlite3:for-each-row - (lambda (a . b) - (set! res (apply vector a b))) - db - (conc "SELECT " db:test-record-qry-selector " FROM tests WHERE testname=? AND item_path=? AND run_id=?;") - test-name item-path run-id) - res)) - -(define (db:test-get-rundir-from-test-id dbstruct run-id test-id) - (db:with-db - dbstruct - run-id - #f - (lambda (dbdat db) - (db:first-result-default - db - "SELECT rundir FROM tests WHERE id=? AND run_id=?;" - #f ;; default result - test-id run-id)))) - -(define (db:get-test-times dbstruct run-name target) - (let ((res `()) - (qry (conc "select testname, item_path, run_duration, " - (string-join (db:get-keys dbstruct) " || '/' || ") - " as target from tests inner join runs on tests.run_id = runs.id where runs.runname = ? and target = ? ;"))) - (db:with-db - dbstruct - #f ;; this is for the main runs db - #f ;; does not modify db - (lambda (dbdat db) - (sqlite3:for-each-row - (lambda (test-name item-path test-time target ) - (set! res (cons (vector test-name item-path test-time) res))) - db - qry - run-name target) - res)))) - -;;====================================================================== -;; S T E P S -;;====================================================================== - -(define (db:teststep-set-status! dbstruct run-id test-id teststep-name state-in status-in comment logfile) - (db:with-db - dbstruct - run-id - #t - (lambda (dbdat db) - (sqlite3:execute - db - "INSERT OR REPLACE into test_steps (test_id,stepname,state,status,event_time,comment,logfile) VALUES(?,?,?,?,?,?,?);" - test-id teststep-name state-in status-in (current-seconds) - (if comment comment "") - (if logfile logfile ""))))) - - - -(define (db:delete-steps-for-test! dbstruct run-id test-id) - ;; TODO: figure out why status is the key field rather than state (note: CONSTRAINT test_steps_constraint UNIQUE (test_id,stepname,state) ) - (db:with-db - dbstruct - run-id - #t - (lambda (dbdat db) - (sqlite3:execute - db - "UPDATE test_steps set status='DELETED' where test_id=?" ;; and run_id=? !! - run_id not in table (bummer) TODO: get run_id into schema for test_steps - test-id)))) - - -;; db-get-test-steps-for-run -(define (db:get-steps-for-test dbstruct run-id test-id) - (db:with-db - dbstruct - run-id - #f - (lambda (dbdat db) - (let* ((res '())) - (sqlite3:for-each-row - (lambda (id test-id stepname state status event-time logfile comment) - (set! res (cons (vector id test-id stepname state status event-time (if (string? logfile) logfile "") comment) res))) - db - "SELECT id,test_id,stepname,state,status,event_time,logfile,comment FROM test_steps WHERE status != 'DELETED' AND test_id=? ORDER BY id ASC;" ;; event_time DESC,id ASC; - test-id) - (reverse res))))) - - (define (db:get-steps-info-by-id dbstruct run-id test-step-id) - (db:with-db - dbstruct - run-id - #f - (lambda (dbdat db) - (let* ((res (vector #f #f #f #f #f #f #f #f #f))) - (sqlite3:for-each-row - (lambda (id test-id stepname state status event-time logfile comment last-update) - (set! res (vector id test-id stepname state status event-time (if (string? logfile) logfile "") comment last-update))) - db - "SELECT id,test_id,stepname,state,status,event_time,logfile,comment,last_update FROM test_steps WHERE id=? ORDER BY id ASC;" ;; event_time DESC,id ASC; - test-step-id) - res)))) - -(define (db:get-steps-data dbstruct run-id test-id) - (db:with-db - dbstruct - run-id - #f - (lambda (dbdat db) - (let ((res '())) - (sqlite3:for-each-row - (lambda (id test-id stepname state status event-time logfile) - (set! res (cons (vector id test-id stepname state status event-time (if (string? logfile) logfile "")) res))) - db - "SELECT id,test_id,stepname,state,status,event_time,logfile FROM test_steps WHERE status != 'DELETED' AND test_id=? ORDER BY id ASC;" ;; event_time DESC,id ASC; - test-id) - (reverse res))))) - -;;====================================================================== -;; T E S T D A T A -;;====================================================================== - -(define (db:get-data-info-by-id dbstruct run-id test-data-id) - (let* ((stmt "SELECT id,test_id, category, variable, value, expected, tol, units, comment, status, type, last_update FROM test_data WHERE id=? ORDER BY id ASC;")) ;; event_time DESC,id ASC; - (db:with-db - dbstruct - run-id - #f - (lambda (dbdat db) - (let* ((stmth (db:get-cache-stmth dbdat db stmt)) - (res (sqlite3:fold-row - (lambda (res id test-id category variable value expected tol units comment status type last-update) - (vector id test-id category variable value expected tol units comment status type last-update)) - (vector #f #f #f #f #f #f #f #f #f #f #f #f) - stmth - test-data-id))) - res))))) - -;; WARNING: Do NOT call this for the parent test on an iterated test -;; Roll up test_data pass/fail results -;; look at the test_data status field, -;; if all are pass (any case) and the test status is PASS or NULL or '' then set test status to PASS. -;; if one or more are fail (any case) then set test status to PASS, non "pass" or "fail" are ignored -(define (db:test-data-rollup dbstruct run-id test-id status) - (let* ((fail-count 0) - (pass-count 0)) - (db:with-db - dbstruct run-id #t - (lambda (dbdat db) - (sqlite3:for-each-row - (lambda (fcount pcount) - (set! fail-count fcount) - (set! pass-count pcount)) - db - "SELECT (SELECT count(id) FROM test_data WHERE test_id=? AND status like 'fail') AS fail_count, - (SELECT count(id) FROM test_data WHERE test_id=? AND status like 'pass') AS pass_count;" - test-id test-id) - ;; Now rollup the counts to the central megatest.db - (db:general-call dbstruct run-id 'pass-fail-counts (list pass-count fail-count test-id)) - ;; if the test is not FAIL then set status based on the fail and pass counts. - (db:general-call dbstruct run-id 'test_data-pf-rollup (list test-id test-id test-id test-id)))))) - -;; each section is a rule except "final" which is the final result -;; -;; [rule-5] -;; operator in -;; section LogFileBody -;; desc Output voltage -;; status OK -;; expected 1.9 -;; measured 1.8 -;; type +/- -;; tolerance 0.1 -;; pass 1 -;; fail 0 -;; -;; [final] -;; exit-code 6 -;; exit-status SKIP -;; message If flagged we are asking for this to exit with code 6 -;; -;; recorded in steps table: -;; category: stepname -;; variable: rule-N -;; value: measured -;; expected: expected -;; tol: tolerance -;; units: - -;; comment: desc or message -;; status: status -;; type: type -;; -(define (db:logpro-dat->csv dat stepname) - (let ((res '())) - (for-each - (lambda (entry-name) - (if (equal? entry-name "final") - (set! res (append - res - (list - (list stepname - entry-name - (configf:lookup dat entry-name "exit-code") ;; 0 ;; Value - 0 ;; 1 ;; Expected - 0 ;; 2 ;; Tolerance - "n/a" ;; 3 ;; Units - (configf:lookup dat entry-name "message") ;; 4 ;; Comment - (configf:lookup dat entry-name "exit-status") ;; 5 ;; Status - "logpro" ;; 6 ;; Type - )))) - (let* ((value (or (configf:lookup dat entry-name "measured") "n/a")) - (expected (or (configf:lookup dat entry-name "expected") 0.0)) - (tolerance (or (configf:lookup dat entry-name "tolerance") 0.0)) - (comment (or (configf:lookup dat entry-name "comment") - (configf:lookup dat entry-name "desc") "n/a")) - (status (or (configf:lookup dat entry-name "status") "n/a")) - (type (or (configf:lookup dat entry-name "expected") "n/a"))) - (set! res (append - res - (list (list stepname - entry-name - value ;; 0 - expected ;; 1 - tolerance ;; 2 - "n/a" ;; 3 Units - comment ;; 4 - status ;; 5 - type ;; 6 - ))))))) - (hash-table-keys dat)) - res)) - -;; $MT_MEGATEST -load-test-data << EOF -;; foo,bar, 1.2, 1.9, > -;; foo,rab, 1.0e9, 10e9, 1e9 -;; foo,bla, 1.2, 1.9, < -;; foo,bal, 1.2, 1.2, < , ,Check for overload -;; foo,alb, 1.2, 1.2, <= , Amps,This is the high power circuit test -;; foo,abl, 1.2, 1.3, 0.1 -;; foo,bra, 1.2, pass, silly stuff -;; faz,bar, 10, 8mA, , ,"this is a comment" -;; EOF - -(define (db:csv->test-data dbstruct run-id test-id csvdata) - (debug:print 4 *default-log-port* "test-id " test-id ", csvdata: " csvdata) - (db:with-db - dbstruct #f #t - (lambda (dbdat db) - (let* ((csvlist (csv->list (make-csv-reader - (open-input-string csvdata) - '((strip-leading-whitespace? #t) - (strip-trailing-whitespace? #t)))))) ;; (csv->list csvdata))) - (for-each - (lambda (csvrow) - (let* ((padded-row (take (append csvrow (list #f #f #f #f #f #f #f #f #f)) 9)) - (category (list-ref padded-row 0)) - (variable (list-ref padded-row 1)) - (value (any->number-if-possible (list-ref padded-row 2))) - (expected (any->number-if-possible (list-ref padded-row 3))) - (tol (any->number-if-possible (list-ref padded-row 4))) ;; >, <, >=, <=, or a number - (units (list-ref padded-row 5)) - (comment (list-ref padded-row 6)) - (status (let ((s (list-ref padded-row 7))) - (if (and (string? s)(or (string-match (regexp "^\\s*$") s) - (string-match (regexp "^n/a$") s))) - #f - s))) ;; if specified on the input then use, else calculate - (type (list-ref padded-row 8))) - ;; look up expected,tol,units from previous best fit test if they are all either #f or '' - (debug:print 4 *default-log-port* "BEFORE: category: " category " variable: " variable " value: " value - ", expected: " expected " tol: " tol " units: " units " status: " status " comment: " comment " type: " type) - - (if (and (or (not expected)(equal? expected "")) - (or (not tol) (equal? expected "")) - (or (not units) (equal? expected ""))) - (let-values (((new-expected new-tol new-units)(tdb:get-prev-tol-for-test #f test-id category variable))) - (set! expected new-expected) - (set! tol new-tol) - (set! units new-units))) - - (debug:print 4 *default-log-port* "AFTER: category: " category " variable: " variable " value: " value - ", expected: " expected " tol: " tol " units: " units " status: " status " comment: " comment) - ;; calculate status if NOT specified - (if (and (not status)(number? expected)(number? value)) ;; need expected and value to be numbers - (if (number? tol) ;; if tol is a number then we do the standard comparison - (let* ((max-val (+ expected tol)) - (min-val (- expected tol)) - (result (and (>= value min-val)(<= value max-val)))) - (debug:print 4 *default-log-port* "max-val: " max-val " min-val: " min-val " result: " result) - (set! status (if result "pass" "fail"))) - (set! status ;; NB// need to assess each one (i.e. not return operator since need to act if not valid op. - (case (string->symbol tol) ;; tol should be >, <, >=, <= - ((>) (if (> value expected) "pass" "fail")) - ((<) (if (< value expected) "pass" "fail")) - ((>=) (if (>= value expected) "pass" "fail")) - ((<=) (if (<= value expected) "pass" "fail")) - (else (conc "ERROR: bad tol comparator " tol)))))) - (debug:print 4 *default-log-port* "AFTER2: category: " category " variable: " variable " value: " value - ", expected: " expected " tol: " tol " units: " units " status: " status " comment: " comment) - ;; (db:delay-if-busy dbdat) - (sqlite3:execute db "INSERT OR REPLACE INTO test_data (test_id,category,variable,value,expected,tol,units,comment,status,type) VALUES (?,?,?,?,?,?,?,?,?,?);" - test-id category variable value expected tol units (if comment comment "") status type))) - csvlist))))) - -;; This routine moved from tdb.scm, tdb:read-test-data -;; -(define (db:read-test-data dbstruct run-id test-id categorypatt) - (let* ((res '())) - (db:with-db - dbstruct #f #f - (lambda (dbdat db) - (sqlite3:for-each-row - (lambda (id test_id category variable value expected tol units comment status type) - (set! res (cons (vector id test_id category variable value expected tol units comment status type) res))) - db - "SELECT id,test_id,category,variable,value,expected,tol,units,comment,status,type FROM test_data WHERE test_id=? AND category LIKE ? ORDER BY category,variable;" test-id categorypatt) - (reverse res))))) - -;; This routine moved from tdb.scm, :read-test-data -;; -(define (db:read-test-data-varpatt dbstruct run-id test-id categorypatt varpatt) - (let* ((res '())) - (db:with-db - dbstruct #f #f - (lambda (dbdat db) - (sqlite3:for-each-row - (lambda (id test_id category variable value expected tol units comment status type) - (set! res (cons (vector id test_id category variable value expected tol units comment status type) res))) - db - "SELECT id,test_id,category,variable,value,expected,tol,units,comment,status,type FROM test_data WHERE test_id=? AND category LIKE ? AND variable LIKE ? ORDER BY category,variable;" test-id categorypatt varpatt) - (reverse res))))) - - -;;====================================================================== -;; Misc. test related queries -;;====================================================================== - -(define (db:get-run-ids-matching-target dbstruct keynames target res runname testpatt statepatt statuspatt) - (db:with-db - dbstruct #f #f - (lambda (dbdat db) - (let* ((row-ids '()) - (keystr (string-intersperse - (map (lambda (key val) - (conc key " like '" val "'")) - keynames - (string-split target "/")) - " AND ")) - ;; (testqry (tests:match->sqlqry testpatt)) - (runsqry (sqlite3:prepare db (conc "SELECT id FROM runs WHERE " keystr " AND runname LIKE '" runname "';")))) - ;; (debug:print 8 *default-log-port* "db:test-get-paths-matching-keynames-target-new\n runsqry=" runsqry "\n tstsqry=" testqry) - (sqlite3:for-each-row - (lambda (rid) - (set! row-ids (cons rid row-ids))) - runsqry) - (sqlite3:finalize! runsqry) - row-ids)))) - -;; finds latest matching all patts for given run-id -;; -(define (db:test-get-paths-matching-keynames-target-new dbstruct run-id keynames target res testpatt statepatt statuspatt runname) - (let* ((testqry (tests:match->sqlqry testpatt)) - (tstsqry (conc "SELECT rundir FROM tests WHERE run_id=? AND " testqry " AND state LIKE '" statepatt "' AND status LIKE '" statuspatt "' ORDER BY event_time ASC;"))) - (db:with-db - dbstruct - run-id - #f - (lambda (dbdat db) - (sqlite3:for-each-row - (lambda (p) - (set! res (cons p res))) - db - tstsqry - run-id) - res)))) - -(define (db:test-toplevel-num-items dbstruct run-id testname) - (db:with-db - dbstruct - run-id - #f - (lambda (dbdat db) - (let ((res 0)) - (sqlite3:for-each-row - (lambda (num-items) - (set! res num-items)) - db - "SELECT count(id) FROM tests WHERE run_id=? AND testname=? AND item_path != '' AND state NOT IN ('DELETED');" - run-id - testname) - res)))) - -;;====================================================================== -;; QUEUE UP META, TEST STATUS AND STEPS REMOTE ACCESS -;;====================================================================== - -;; NOTE: Can remove the regex and base64 encoding for zmq -(define (db:obj->string obj #!key (transport 'http)) - (case transport - ;; ((fs) obj) - ((http fs) - (string-substitute - (regexp "=") "_" - (base64:base64-encode - (z3:encode-buffer - (with-output-to-string - (lambda ()(serialize obj))))) ;; BB: serialize - this is what causes problems between different builds of megatest communicating. serialize is sensitive to binary image of mtest. - #t)) - ((zmq nmsg)(with-output-to-string (lambda ()(serialize obj)))) - (else obj))) ;; rpc - -(define (db:string->obj msg #!key (transport 'http)) - (case transport - ;; ((fs) msg) - ((http fs) - (if (string? msg) - (with-input-from-string - (z3:decode-buffer - (base64:base64-decode - (string-substitute - (regexp "_") "=" msg #t))) - (lambda ()(deserialize))) - (begin - (debug:print-error 0 *default-log-port* "reception failed. Received \"" msg "\" but cannot translate it.") - (print-call-chain (current-error-port)) - msg))) ;; crude reply for when things go awry - ((zmq nmsg)(with-input-from-string msg (lambda ()(deserialize)))) - (else msg))) ;; rpc - -;; ; This is to be the big daddy call NOPE: Replaced by db:set-state-status-and-roll-up-items -;; ; -;; define (db:test-set-state-status dbstruct run-id test-id state status msg) -;; (let ((dbdat (db:get-subdb dbstruct run-id))) -;; (if (member state '("LAUNCHED" "REMOTEHOSTSTART")) -;; (db:general-call dbdat 'set-test-start-time (list test-id))) -;; ;; (if msg -;; ;; (db:general-call dbdat 'state-status-msg (list state status msg test-id)) -;; ;; (db:general-call dbdat 'state-status (list state status test-id))) -;; (db:set-state-status-and-roll-up-items dbstruct run-id test-id #f state status msg) -;; ;; process the test_data table -;; (if (and test-id state status (equal? status "AUTO")) -;; (db:test-data-rollup dbstruct run-id test-id status)) -;; (mt:process-triggers dbstruct run-id test-id state status))) - -;; state is the priority rollup of all states -;; status is the priority rollup of all completed statesfu -;; -;; if test-name is an integer work off that as test-id instead of test-name test-path -;; -(define (db:set-state-status-and-roll-up-items dbstruct run-id test-name item-path state status comment) - ;; establish info on incoming test followed by info on top level test - ;; BBnote - for mode itemwait, linkage between upstream test & matching item status is propagated to run queue in db:prereqs-not-met - (let* ((testdat (if (number? test-name) - (db:get-test-info-by-id dbstruct run-id test-name) ;; test-name is actually a test-id - (db:keep-trying-until-true ;; in our threaded stuff this call could happen before the test is registered (maybe?) - db:get-test-info - (list dbstruct run-id test-name item-path) - 10))) - (test-id (db:test-get-id testdat)) - (test-name (if (number? test-name) - (db:test-get-testname testdat) - test-name)) - (item-path (db:test-get-item-path testdat)) - (tl-testdat (db:get-test-info dbstruct run-id test-name "")) - (tl-test-id (if tl-testdat - (db:test-get-id tl-testdat) - #f)) - (new-state-eh #f) - (new-status-eh #f)) - (if (member state '("LAUNCHED" "REMOTEHOSTSTART")) - (db:general-call dbstruct run-id 'set-test-start-time (list test-id))) - (mutex-lock! *db-transaction-mutex*) - (db:with-db - dbstruct run-id #t - (lambda (dbdat db) - (let ((tr-res - (sqlite3:with-transaction - db - (lambda () - ;; NB// Pass the db so it is part fo the transaction - (db:test-set-state-status-db db run-id test-id state status comment) ;; this call sets the item state/status - (if (not (equal? item-path "")) ;; only roll up IF incoming test is an item - (let* ((state-status-counts (db:get-all-state-status-counts-for-test db run-id test-name item-path state status)) ;; item-path is used to exclude current state/status of THIS test - (state-statuses (db:roll-up-rules state-status-counts state status)) - (newstate (car state-statuses)) - (newstatus (cadr state-statuses))) - (set! new-state-eh newstate) - (set! new-status-eh newstatus) - (debug:print 4 *default-log-port* "BB> tl-test-id="tl-test-id" ; "test-name":"item-path" newstate="newstate" newstatus="newstatus" len(sscs)="(length state-status-counts) " state-status-counts: " - (apply conc - (map (lambda (x) - (conc - (with-output-to-string (lambda () (pp (dbr:counts->alist x)))) " | ")) - state-status-counts))); end debug:print - (if tl-test-id - (db:test-set-state-status-db db run-id tl-test-id newstate newstatus #f)) ;; we are still in the transaction - must access the db and not the dbstruct - )))))) - (mutex-unlock! *db-transaction-mutex*) - (if (and test-id state status (equal? status "AUTO")) - (db:test-data-rollup dbstruct run-id test-id status)) - (if new-state-eh ;; moved from db:test-set-state-status - (mt:process-triggers dbstruct run-id test-id new-state-eh new-status-eh)) - tr-res))))) - -(define (db:roll-up-rules state-status-counts state status) - (if (null? state-status-counts) - '(#f #f) - (let* ((running (length (filter (lambda (x) - (member (dbr:counts-state x) *common:running-states*)) - state-status-counts))) - (bad-not-started (length (filter (lambda (x) - (and (equal? (dbr:counts-state x) "NOT_STARTED") - (not (member (dbr:counts-status x) *common:not-started-ok-statuses*)))) - state-status-counts))) - (all-curr-states (common:special-sort ;; worst -> best (sort of) - (delete-duplicates - (if (and state (not (member state *common:dont-roll-up-states*))) - (cons state (map dbr:counts-state state-status-counts)) - (map dbr:counts-state state-status-counts))) - *common:std-states* >)) - (all-curr-statuses (common:special-sort ;; worst -> best - (delete-duplicates - (if (and state status (not (member state *common:dont-roll-up-states*))) - (cons status (map dbr:counts-status state-status-counts)) - (map dbr:counts-status state-status-counts))) - *common:std-statuses* >)) - (non-completes (filter (lambda (x) - (not (member x (cons "COMPLETED" *common:dont-roll-up-states*)))) - all-curr-states)) - (preq-fails (filter (lambda (x) - (equal? x "PREQ_FAIL")) - all-curr-statuses)) - (num-non-completes (length non-completes)) - (newstate (cond - ((> running 0) "RUNNING") ;; anything running, call the situation running - ((> (length preq-fails) 0) "NOT_STARTED") - ((> bad-not-started 0) "COMPLETED") ;; we have an ugly situation, it is completed in the sense we cannot do more. - ((> num-non-completes 0) (car non-completes)) ;; (remove (lambda (x)(equal? "COMPLETED" x)) all-curr-states))) ;; only rollup DELETED if all DELETED - (else (car all-curr-states)))) - (newstatus (cond - ((> (length preq-fails) 0) "PREQ_FAIL") - ((or (> bad-not-started 0) - (and (equal? newstate "NOT_STARTED") - (> num-non-completes 0))) - "STARTED") - (else (car all-curr-statuses))))) - (debug:print-info 2 *default-log-port* - "\n--> probe db:set-state-status-and-roll-up-items: " - "\n--> state-status-counts: "(map dbr:counts->alist state-status-counts) - "\n--> running: "running - "\n--> bad-not-started: "bad-not-started - "\n--> non-non-completes: "num-non-completes - "\n--> non-completes: "non-completes - "\n--> all-curr-states: "all-curr-states - "\n--> all-curr-statuses: "all-curr-statuses - "\n--> newstate "newstate - "\n--> newstatus "newstatus - "\n\n") - - ;; NB// Pass the db so it is part of the transaction - (list newstate newstatus)))) - -(define (db:set-state-status-and-roll-up-run dbstruct run-id curr-state curr-status) - (mutex-lock! *db-transaction-mutex*) - (db:with-db - dbstruct run-id #t - (lambda (dbdat db) - (let ((tr-res - (sqlite3:with-transaction - db - (lambda () - (let* ((state-status-counts (db:get-all-state-status-counts-for-run-db dbdat db run-id)) - (state-statuses (db:roll-up-rules state-status-counts #f #f )) - (newstate (car state-statuses)) - (newstatus (cadr state-statuses))) - (if (or (not (eq? newstate curr-state)) (not (eq? newstatus curr-status))) - (db:set-run-state-status-db dbdat db run-id newstate newstatus ))))))) - (mutex-unlock! *db-transaction-mutex*) - tr-res)))) - -(define (db:get-all-state-status-counts-for-run-db dbdat db run-id) - (sqlite3:map-row - (lambda (state status count) - (make-dbr:counts state: state status: status count: count)) - (db:get-cache-stmth - dbdat db - "SELECT state,status,count(id) FROM tests WHERE run_id=? GROUP BY state,status;") - run-id )) - -(define (db:get-all-state-status-counts-for-run dbstruct run-id) - (db:with-db - dbstruct #f #f - (lambda (dbdat db) - (db:get-all-state-status-counts-for-run-db dbdat db run-id)))) - -;; BBnote: db:get-all-state-status-counts-for-test returns dbr:counts object aggregating state and status of items of a given test, *not including rollup state/status* -;; -;; NOTE: This is called within a transaction -;; -(define (db:get-all-state-status-counts-for-test db run-id test-name item-path item-state-in item-status-in) - (let* ((test-info (db:get-test-info-db db run-id test-name item-path)) - (item-state (or item-state-in (db:test-get-state test-info))) - (item-status (or item-status-in (db:test-get-status test-info))) - (other-items-count-recs (sqlite3:map-row - (lambda (state status count) - (make-dbr:counts state: state status: status count: count)) - db - ;; ignore current item because we have changed its value in the current transation so this select will see the old value. - "SELECT state,status,count(id) FROM tests WHERE run_id=? AND testname=? AND item_path != '' AND item_path !=? GROUP BY state,status;" - run-id test-name item-path)) - ;; add current item to tally outside of sql query - (match-countrec-lambda (lambda (countrec) - (and (equal? (dbr:counts-state countrec) item-state) - (equal? (dbr:counts-status countrec) item-status)))) - - (already-have-count-rec-list - (filter match-countrec-lambda other-items-count-recs)) ;; will have either 0 or 1 count recs depending if another item shares this item's state/status - - (updated-count-rec (if (null? already-have-count-rec-list) - (make-dbr:counts state: item-state status: item-status count: 1) - (let* ((our-count-rec (car already-have-count-rec-list)) - (new-count (add1 (dbr:counts-count our-count-rec)))) - (make-dbr:counts state: item-state status: item-status count: new-count)))) - - (nonmatch-countrec-lambda (lambda (countrec) (not (match-countrec-lambda countrec)))) - - (unrelated-rec-list - (filter nonmatch-countrec-lambda other-items-count-recs))) - (cons updated-count-rec unrelated-rec-list))) - -;; (define (db:get-all-item-states db run-id test-name) -;; (sqlite3:map-row -;; (lambda (a) a) -;; db -;; "SELECT DISTINCT state FROM tests WHERE item_path != '' AND state != 'DELETED' AND run_id=? AND testname=?" -;; run-id test-name)) -;; -;; (define (db:get-all-item-statuses db run-id test-name) -;; (sqlite3:map-row -;; (lambda (a) a) -;; db -;; "SELECT DISTINCT status FROM tests WHERE item_path != '' AND state != 'DELETED' AND state='COMPLETED' AND run_id=? AND testname=?" -;; run-id test-name)) - -(define (db:test-get-logfile-info dbstruct run-id test-name) - (db:with-db - dbstruct - run-id - #f - (lambda (dbdat db) - (let ((res #f)) - (sqlite3:for-each-row - (lambda (path final_logf) - ;; (let ((path (sdb:qry 'getstr path-id)) - ;; (final_logf (sdb:qry 'getstr final_logf-id))) - (set! logf final_logf) - (set! res (list path final_logf)) - (if (directory? path) - (debug:print 2 *default-log-port* "Found path: " path) - (debug:print 2 *default-log-port* "No such path: " path))) ;; ) - db - "SELECT rundir,final_logf FROM tests WHERE testname=? AND item_path='' AND run_id=?;" - test-name run-id) - res)))) - -;;====================================================================== -;; A G R E G A T E D T R A N S A C T I O N D B W R I T E S -;;====================================================================== - -(define db:queries - (list '(update-run-duration "UPDATE tests SET run_duration=? WHERE id=?;") - - ;; TESTS - '(register-test "INSERT OR IGNORE INTO tests (run_id,testname,event_time,item_path,state,status) VALUES (?,?,strftime('%s','now'),?,'NOT_STARTED','n/a');") - ;; Test state and status - '(set-test-state "UPDATE tests SET state=? WHERE id=?;") - '(set-test-status "UPDATE tests SET state=? WHERE id=?;") - '(state-status "UPDATE tests SET state=?,status=? WHERE id=?;") ;; D/ONE - '(state-status-msg "UPDATE tests SET state=?,status=?,comment=? WHERE id=?;") ;; DONE - ;; Test comment - '(set-test-comment "UPDATE tests SET comment=? WHERE id=?;") - '(set-test-start-time "UPDATE tests SET event_time=strftime('%s','now') WHERE id=?;") ;; DONE - '(pass-fail-counts "UPDATE tests SET pass_count=?,fail_count=? WHERE id=?;") - ;; test_data-pf-rollup is used to set a tests PASS/FAIL based on the pass/fail info from the steps - '(test_data-pf-rollup "UPDATE tests - SET status=CASE WHEN (SELECT fail_count FROM tests WHERE id=?) > 0 - THEN 'FAIL' - WHEN (SELECT pass_count FROM tests WHERE id=?) > 0 AND - (SELECT status FROM tests WHERE id=?) NOT IN ('WARN','FAIL') - THEN 'PASS' - ELSE status - END WHERE id=?;") ;; DONE - '(test-set-log "UPDATE tests SET final_logf=? WHERE id=?;") ;; DONE - ;; '(test-set-rundir-by-test-id "UPDATE tests SET rundir=? WHERE id=?") ;; DONE - ;; '(test-set-rundir "UPDATE tests SET rundir=? AND testname=? AND item_path=?;") ;; DONE - '(test-set-rundir-shortdir "UPDATE tests SET rundir=?,shortdir=? WHERE testname=? AND item_path=? AND run_id=?;") ;; BROKEN!!! NEEDS run-id - '(delete-tests-in-state ;; "DELETE FROM tests WHERE state=?;") ;; DONE - "UPDATE tests SET state='DELETED' WHERE state=?") - '(tests:test-set-toplog "UPDATE tests SET final_logf=? WHERE run_id=? AND testname=? AND item_path='';") - '(update-cpuload-diskfree "UPDATE tests SET cpuload=?,diskfree=? WHERE id=?;") ;; DONE - '(update-uname-host "UPDATE tests SET uname=?,host=? WHERE id=?;") ;; DONE - '(update-test-rundat "INSERT INTO test_rundat (test_id,update_time,cpuload,diskfree,diskusage,run_duration) VALUES (?,?,?,?,?,?);") - '(update-test-state "UPDATE tests SET state=? WHERE state=? AND run_id=? AND testname=? AND NOT (item_path='' AND testname IN (SELECT DISTINCT testname FROM tests WHERE testname=? AND item_path != ''));") - '(update-test-status "UPDATE tests SET status=? WHERE status like ? AND run_id=? AND testname=? AND NOT (item_path='' AND testname IN (SELECT DISTINCT testname FROM tests WHERE testname=? AND item_path != ''));") - ;; stuff for set-state-status-and-roll-up-items - '(update-pass-fail-counts "UPDATE tests - SET fail_count=(SELECT count(id) FROM tests WHERE testname=? AND item_path != '' AND status IN ('FAIL','CHECK','INCOMPLETE','ABORT')), - pass_count=(SELECT count(id) FROM tests WHERE testname=? AND item_path != '' AND status IN ('PASS','WARN','WAIVED')) - WHERE testname=? AND item_path='' AND run_id=?;") ;; DONE ;; BROKEN!!! NEEDS run-id - '(top-test-set-running "UPDATE tests SET state='RUNNING' WHERE testname=? AND item_path='' AND run_id=?;") ;; DONE ;; BROKEN!!! NEEDS run-id - - ;; NOT USED - ;; - ;; Might be the following top-test-set-per-pf-counts query could be better based off of something like this: - ;; - ;; select state,status,count(state) from tests where run_id=59 AND testname='runfirst' group by state,status; - ;; - '(top-test-set-per-pf-counts "UPDATE tests - SET state=CASE - WHEN (SELECT count(id) FROM tests - WHERE testname=? - AND item_path != '' - AND status NOT IN ('n/a') - AND state in ('NOT_STARTED')) > 0 THEN 'UNKNOWN' - WHEN (SELECT count(id) FROM tests - WHERE testname=? - AND item_path != '' - AND (status NOT IN ('TEN_STRIKES','BLOCKED') OR status IN ('INCOMPLETE')) - AND state in ('RUNNING','NOT_STARTED','LAUNCHED','REMOTEHOSTSTART')) > 0 THEN 'RUNNING' - WHEN (SELECT count(id) FROM tests - WHERE testname=? - AND item_path != '' - AND state NOT IN ('COMPLETED','DELETED')) = 0 THEN 'COMPLETED' - WHEN (SELECT count(id) FROM tests - WHERE testname=? - AND item_path != '' - AND state = 'NOT_STARTED') > 0 THEN 'NOT_STARTED' - ELSE 'UNKNOWN' END, - status=CASE - WHEN fail_count > 0 THEN 'FAIL' - WHEN (SELECT count(id) FROM tests - WHERE testname=? - AND item_path != '' - AND state IN ('BLOCKED','INCOMPLETE')) > 0 THEN 'FAIL' - WHEN (SELECT count(id) FROM tests - WHERE testname=? - AND item_path != '' - AND state NOT IN ('DELETED') - AND status IN ('INCOMPLETE','ABORT')) > 0 THEN 'ABORT' - WHEN (SELECT count(id) FROM tests - WHERE testname=? - AND item_path != '' - AND state NOT IN ('DELETED') - AND status = 'AUTO') > 0 THEN 'AUTO' - WHEN (SELECT count(id) FROM tests - WHERE testname=? - AND item_path != '' - AND state NOT IN ('DELETED') - AND status IN ('STUCK/INCOMPLETE', 'INCOMPLETE')) > 0 THEN 'INCOMPLETE' - WHEN (SELECT count(id) FROM tests - WHERE testname=? - AND item_path != '' - AND state IN ('COMPLETED','STUCK/INCOMPLETE','INCOMPLETE') - AND status = 'FAIL') > 0 THEN 'FAIL' - WHEN (SELECT count(id) FROM tests - WHERE testname=? - AND item_path != '' - AND state NOT IN ('DELETED') - AND status = 'CHECK') > 0 THEN 'CHECK' - WHEN (SELECT count(id) FROM tests - WHERE testname=? - AND item_path != '' - AND state NOT IN ('DELETED') - AND status = 'SKIP') > 0 THEN 'SKIP' - WHEN (SELECT count(id) FROM tests - WHERE testname=? - AND item_path != '' - AND state NOT IN ('DELETED') - AND status = 'WARN') > 0 THEN 'WARN' - WHEN (SELECT count(id) FROM tests - WHERE testname=? - AND item_path != '' - AND state NOT IN ('DELETED') - AND status = 'WAIVED') > 0 THEN 'WAIVED' - WHEN (SELECT count(id) FROM tests - WHERE testname=? - AND item_path != '' - AND state NOT IN ('DELETED') - AND status NOT IN ('PASS','FAIL','WARN','WAIVED')) > 0 THEN 'ABORT' - WHEN (SELECT count(id) FROM tests - WHERE testname=? - AND item_path != '' - AND state='NOT_STARTED') > 0 THEN 'n/a' - WHEN (SELECT count(id) FROM tests - WHERE testname=? - AND item_path != '' - AND state = 'COMPLETED' - AND status = 'PASS') > 0 THEN 'PASS' - WHEN pass_count > 0 AND fail_count=0 THEN 'PASS' - ELSE 'UNKNOWN' END - WHERE testname=? AND item_path='';") ;; DONE ;; BROKEN!!! NEEDS run-id - - ;; STEPS - '(delete-test-step-records "UPDATE test_steps SET status='DELETED' WHERE test_id=?;") - '(delete-test-data-records "UPDATE test_data SET status='DELETED' WHERE test_id=?;") ;; using status since no state field - )) - -(define (db:lookup-query qry-name) - (let ((q (alist-ref qry-name db:queries))) - (if q (car q) #f))) - -;; do not run these as part of the transaction -(define db:special-queries '(rollup-tests-pass-fail - ;; db:set-state-status-and-roll-up-items ;; WHY NOT!? - login - immediate - flush - sync - set-verbosity - killserver - )) - -(define (db:login dbstruct calling-path calling-version client-signature) - (cond - ((not (equal? calling-path *toppath*)) - (list #f "Login failed due to mismatch paths: " calling-path ", " *toppath*)) - ;; ((not (equal? *run-id* run-id)) - ;; (list #f "Login failed due to mismatch run-id: " run-id ", " *run-id*)) - ((not (equal? megatest-version calling-version)) - (list #t (conc "Login warning due to mismatch megatest version: " calling-version ", " megatest-version))) - - (else - (hash-table-set! *logged-in-clients* client-signature (current-seconds)) - '(#t "successful login")))) - -;; NO WAY TO KNOW IF IT MODIFIES THE DB BUT NEARLY ALL ARE UPDATES/INSERTS -;; -(define (db:general-call dbstruct run-id stmtname params) - ;; Why is db:lookup-query above not used here to get the query? - (let ((query (let ((q (alist-ref (if (string? stmtname) - (string->symbol stmtname) - stmtname) - db:queries))) - (if q (car q) #f)))) - (db:with-db - dbstruct run-id #t - (lambda (dbdat db) - (apply sqlite3:execute db query params) - #t)))) - -;; get a summary of state and status counts to calculate a rollup -;; -(define (db:get-state-status-summary dbstruct run-id testname) - (let ((res '())) - (db:with-db - dbstruct run-id #f - (lambda (dbdat db) - (sqlite3:for-each-row - (lambda (state status count) - (set! res (cons (vector state status count) res))) - db - "SELECT state,status,count(state) FROM tests WHERE run_id=? AND testname=? AND item_path='' GROUP BY state,status;" - run-id testname) - res)))) - -(define (db:get-latest-host-load dbstruct raw-hostname) - (let* ((hostname (string-substitute "\\..*$" "" raw-hostname)) - (res (cons -1 0))) - (db:with-db - dbstruct #f #f - (lambda (dbdat db) - (sqlite3:for-each-row - (lambda (cpuload update-time) (set! res (cons cpuload update-time))) - db - "SELECT tr.cpuload, tr.update_time FROM test_rundat tr, tests t WHERE t.host=? AND tr.cpuload != -1 AND tr.test_id=t.id ORDER BY tr.update_time DESC LIMIT 1;" - hostname))) res )) - -(define (db:set-top-level-from-items dbstruct run-id testname) - (let* ((summ (db:get-state-status-summary dbstruct run-id testname)) - (find (lambda (state status) - (if (null? summ) - #f - (let loop ((hed (car summ)) - (tal (cdr summ))) - (if (and (string-match state (vector-ref hed 0)) - (string-match status (vector-ref hed 1))) - hed - (if (null? tal) - #f - (loop (car tal)(cdr tal))))))))) - - - ;;; E D I T M E ! ! - - - (cond - ((> (find "COMPLETED" ".*") 0) #f)))) - - - -;; get the previous records for when these tests were run where all keys match but runname -;; NB// Merge this with test:get-previous-test-run-records? This one looks for all matching tests -;; can use wildcards. Also can likely be factored in with get test paths? -;; -;; Run this remotely!! -;; -(define (db:get-matching-previous-test-run-records dbstruct run-id test-name item-path) - (let* ((keys (db:get-keys dbstruct)) - (selstr (string-intersperse keys ",")) - (qrystr (string-intersperse (map (lambda (x)(conc x "=?")) keys) " AND ")) - (keyvals #f) - (tests-hash (make-hash-table))) - ;; first look up the key values from the run selected by run-id - (db:with-db - dbstruct #f #f - (lambda (dbdat db) - (sqlite3:for-each-row - (lambda (a . b) - (set! keyvals (cons a b))) - db - (conc "SELECT " selstr " FROM runs WHERE id=? ORDER BY event_time DESC;") run-id))) - (if (not keyvals) - '() - (let ((prev-run-ids '())) - (db:with-db - dbstruct #f #f - (lambda (dbdat db) - (apply sqlite3:for-each-row - (lambda (id) - (set! prev-run-ids (cons id prev-run-ids))) - db - (conc "SELECT id FROM runs WHERE " qrystr " AND id != ?;") (append keyvals (list run-id))))) - ;; collect all matching tests for the runs then - ;; extract the most recent test and return that. - (debug:print 4 *default-log-port* "selstr: " selstr ", qrystr: " qrystr ", keyvals: " keyvals - ", previous run ids found: " prev-run-ids) - (if (null? prev-run-ids) '() ;; no previous runs? return null - (let loop ((hed (car prev-run-ids)) - (tal (cdr prev-run-ids))) - (let ((results (db:get-tests-for-run dbstruct hed (conc test-name "/" item-path) '() '() #f #f #f #f #f #f #f 'normal))) - (debug:print 4 *default-log-port* "Got tests for run-id " run-id ", test-name " test-name - ", item-path " item-path " results: " (intersperse results "\n")) - ;; Keep only the youngest of any test/item combination - (for-each - (lambda (testdat) - (let* ((full-testname (conc (db:test-get-testname testdat) "/" (db:test-get-item-path testdat))) - (stored-test (hash-table-ref/default tests-hash full-testname #f))) - (if (or (not stored-test) - (and stored-test - (> (db:test-get-event_time testdat)(db:test-get-event_time stored-test)))) - ;; this test is younger, store it in the hash - (hash-table-set! tests-hash full-testname testdat)))) - results) - (if (null? tal) - (map cdr (hash-table->alist tests-hash)) ;; return a list of the most recent tests - (loop (car tal)(cdr tal)))))))))) - -;; Function recursively checks if .journal exists; if yes means db busy; call itself after delayed interval -;; return the sqlite3 db handle if possible -;; -(define (db:delay-if-busy dbdat #!key (count 6)) - (if (not (configf:lookup *configdat* "server" "delay-on-busy")) - (and dbdat (dbr:dbdat-dbh dbdat)) - (if dbdat - (let* ((dbpath (dbr:dbdat-dbfile dbdat)) - (db (dbr:dbdat-dbh dbdat)) ;; we'll return this so (db:delay--if-busy can be called inline - (dbfj (conc dbpath "-journal"))) - (if (handle-exceptions - exn - (begin - (debug:print-info 0 *default-log-port* "WARNING: failed to test for existance of " dbfj ", exn=" exn) - (thread-sleep! 1) - (db:delay-if-busy count (- count 1))) - (common:file-exists? dbfj)) - (case count - ((6) - (thread-sleep! 0.2) - (db:delay-if-busy count: 5)) - ((5) - (thread-sleep! 0.4) - (db:delay-if-busy count: 4)) - ((4) - (thread-sleep! 0.8) - (db:delay-if-busy count: 3)) - ((3) - (thread-sleep! 1.6) - (db:delay-if-busy count: 2)) - ((2) - (thread-sleep! 3.2) - (db:delay-if-busy count: 1)) - ((1) - (thread-sleep! 6.4) - (db:delay-if-busy count: 0)) - (else - (debug:print-info 0 *default-log-port* "delaying db access due to high database load.") - (thread-sleep! 12.8)))) - db) - "bogus result from db:delay-if-busy"))) - -(define (db:test-get-records-for-index-file dbstruct run-id test-name) - (let ((res '())) - (db:with-db - dbstruct - run-id - #f - (lambda (dbdat db) - (sqlite3:for-each-row - (lambda (id itempath state status run_duration logf comment) - (set! res (cons (vector id itempath state status run_duration logf comment) res))) - db - "SELECT id,item_path,state,status,run_duration,final_logf,comment FROM tests WHERE testname=? AND item_path != '' AND run_id=?;" ;; BUG! WHY NO run_id? - test-name - run-id) - res)))) - -;;====================================================================== -;; Tests meta data -;;====================================================================== - -;; returns a hash table of tags to tests -;; -(define (db:get-tests-tags dbstruct) - (db:with-db - dbstruct #f #f - (lambda (dbdat db) - (let* ((res (make-hash-table))) - (sqlite3:for-each-row - (lambda (testname tags-in) - (let ((tags (string-split tags-in ","))) - (for-each - (lambda (tag) - (hash-table-set! res tag - (delete-duplicates - (cons testname (hash-table-ref/default res tag '()))))) - tags))) - db - "SELECT testname,tags FROM test_meta") - (hash-table->alist res))))) - -;; testmeta doesn't change, we can cache it for up too an hour - -(define *db:testmeta-cache* (make-hash-table)) -(define *db:testmeta-last-update* 0) - -;; read the record given a testname -(define (db:testmeta-get-record dbstruct testname) - (if (and (< (- (current-seconds) *db:testmeta-last-update*) 600) - (hash-table-exists? *db:testmeta-cache* testname)) - (hash-table-ref *db:testmeta-cache* testname) - (let ((res #f)) - (db:with-db - dbstruct - #f - #f - (lambda (dbdat db) - (sqlite3:for-each-row - (lambda (id testname author owner description reviewed iterated avg_runtime avg_disk tags jobgroup) - (set! res (vector id testname author owner description reviewed iterated avg_runtime avg_disk tags jobgroup))) - db - "SELECT id,testname,author,owner,description,reviewed,iterated,avg_runtime,avg_disk,tags,jobgroup FROM test_meta WHERE testname=?;" - testname))) - (hash-table-set! *db:testmeta-cache* testname res) - (set! *db:testmeta-last-update* (current-seconds)) - res))) - -;; create a new record for a given testname -(define (db:testmeta-add-record dbstruct testname) - (db:with-db dbstruct #f #t - (lambda (dbdat db) - (sqlite3:execute - db - "INSERT OR IGNORE INTO test_meta (testname,author,owner,description,reviewed,iterated,avg_runtime,avg_disk,tags) VALUES (?,'','','','','','','','');" testname)))) - -;; update one of the testmeta fields -(define (db:testmeta-update-field dbstruct testname field value) - (db:with-db dbstruct #f #t - (lambda (dbdat db) - (sqlite3:execute - db - (conc "UPDATE test_meta SET " field "=? WHERE testname=?;") value testname)))) - -(define (db:testmeta-get-all dbstruct) - (db:with-db dbstruct #f #f - (lambda (dbdat db) - (let ((res '())) - (sqlite3:for-each-row - (lambda (a . b) - (set! res (cons (apply vector a b) res))) - db - "SELECT id,testname,author,owner,description,reviewed,iterated,avg_runtime,avg_disk,tags,jobgroup FROM test_meta;") - res)))) - -;;====================================================================== -;; M I S C M A N A G E M E N T I T E M S -;;====================================================================== - -;; A routine to map itempaths using a itemmap -;; patha and pathb must be strings or this will fail -;; -;; path-b is waiting on path-a -;; -(define (db:compare-itempaths test-b-name path-a path-b itemmaps ) - (debug:print-info 6 *default-log-port* "ITEMMAPS: " itemmaps) - (let* ((itemmap (tests:lookup-itemmap itemmaps test-b-name))) - (if itemmap - (let ((path-b-mapped (db:multi-pattern-apply path-b itemmap))) - (debug:print-info 6 *default-log-port* "ITEMMAP is " itemmap ", path: " path-b ", mapped path: " path-b-mapped) - (equal? path-a path-b-mapped)) - (equal? path-b path-a)))) - -;; A routine to convert test/itempath using a itemmap -;; NOTE: to process only an itempath (i.e. no prepended testname) -;; just call db:multi-pattern-apply -;; -(define (db:convert-test-itempath path-in itemmap) - (debug:print-info 6 *default-log-port* "ITEMMAP is " itemmap) - (let* ((path-parts (string-split path-in "/")) - (test-name (if (null? path-parts) "" (car path-parts))) - (item-path (string-intersperse (if (null? path-parts) '() (cdr path-parts)) "/"))) - (conc test-name "/" - (db:multi-pattern-apply item-path itemmap)))) - -;; patterns are: -;; "rx1" "replacement1"\n -;; "rx2" "replacement2" -;; etc. -;; -(define (db:multi-pattern-apply item-path itemmap) - (let ((all-patts (string-split itemmap "\n"))) - (if (null? all-patts) - item-path - (let loop ((hed (car all-patts)) - (tal (cdr all-patts)) - (res item-path)) - (let* ((parts (string-split hed)) - (patt (car parts)) - - (repl (if (> (length parts) 1)(cadr parts) "")) - - (newr (if (and patt repl) - (begin - (handle-exceptions - exn - (begin - (debug:print 0 *default-log-port* - "WARNING: itemmap has problem \"" itemmap "\", patt: " patt ", repl: " repl ", exn=" exn) - res) - (string-substitute patt repl res)) - - - ) - (begin - (debug:print 0 *default-log-port* - "WARNING: itemmap has problem \"" itemmap "\", patt: " patt ", repl: " repl) - res)))) - (if (null? tal) - newr - (loop (car tal)(cdr tal) newr))))))) - - - - -;; the new prereqs calculation, looks also at itempath if specified -;; all prereqs must be met -;; if prereq test with itempath='' is in common:well-ended-states, then prereq is met -;; if prereq test with itempath=ref-item-path and COMPLETED with PASS, WARN, CHECK, or WAIVED then prereq is met -;; -;; Note: mode 'normal means that tests must be COMPLETED and ok (i.e. PASS, WARN, CHECK, SKIP or WAIVED) -;; mode 'toplevel means that tests must be COMPLETED only -;; mode 'itemmatch or 'itemwait means that tests items must be COMPLETED and (PASS|WARN|WAIVED|CHECK) [[ NB// NOT IMPLEMENTED YET ]] -;; mode 'exclusive means this test/item cannot run if the same test/item is LAUNCHED,REMOTEHOSTSTART or RUNNING -;; -;; IDEA for consideration: -;; 1. collect all tests "upstream" -;; 2. any NOT completed and good? if yes => return those as prereqs not met, if no => return null list -;; -;; (define (db:get-prereqs-not-met dbstruct run-id waitons ref-item-path mode) -(define (db:get-prereqs-not-met dbstruct run-id waitons ref-test-name ref-item-path mode itemmaps) ;; #!key (mode '(normal))(itemmap #f)) - ;; BBnote - rollup of an itemized test's overall state/status done in db:set-state-status-and-roll-up-items - (debug:print 4 *default-log-port* "db:get-prereqs-not-met: " waitons) - (append - (if (member 'exclusive mode) - (let ((running-tests (db:get-tests-for-run dbstruct - #f ;; run-id of #f means for all runs. - (if (string=? ref-item-path "") ;; testpatt - ref-test-name - (conc ref-test-name "/" ref-item-path)) - '("LAUNCHED" "REMOTEHOSTSTART" "RUNNING") ;; states - '() ;; statuses - #f ;; offset - #f ;; limit - #f ;; not-in - #f ;; sort by - #f ;; sort order - 'shortlist ;; query type - 0 ;; last update, beginning of time .... - #f ;; mode - ))) - ;;(map (lambda (testdat) - ;; (if (equal? (db:test-get-item-path testdat) "") - ;; (db:test-get-testname testdat) - ;; (conc (db:test-get-testname testdat) - ;; "/" - ;; (db:test-get-item-path testdat)))) - running-tests) ;; calling functions want the entire data - '()) - - - - ;; collection of: for each waiton - - ;; if this ref-test-name is an item in an itemized test and mode is itemwait/itemmatch: - ;; if waiton is not itemized - if waiton is not both completed and in ok status, add as unmet prerequisite - ;; if waiton is itemized: - ;; and waiton's items are not expanded, add as unmet prerequisite - ;; else if matching waiton item is not both completed and in an ok status, add as unmet prerequisite - ;; else - ;; if waiton toplevel is not in both completed and ok status, add as unmet prerequisite - - (if (or (not waitons) - (null? waitons)) - '() - (let* ((ref-test-itemized-mode (not (null? (lset-intersection eq? mode '(itemmatch itemwait))))) ;; how is this different from using member? - (ref-test-toplevel-mode (not (null? (lset-intersection eq? mode '(toplevel))))) - (ref-test-is-toplevel (equal? ref-item-path "")) - (ref-test-is-item (not ref-test-is-toplevel)) - (unmet-pre-reqs '()) - (result '()) - (unmet-prereq-items '()) - ) - (for-each ; waitons - (lambda (waitontest-name) - ;; by getting the tests with matching name we are looking only at the matching test - ;; and related sub items - ;; next should be using mt:get-tests-for-run? - - (let (;(waiton-is-itemized ...) - ;(waiton-items-are-expanded ...) - (waiton-tests (db:get-tests-for-run-state-status dbstruct run-id waitontest-name)) - (ever-seen #f) - (parent-waiton-met #f) - (item-waiton-met #f) - - ) - (for-each ; test expanded from waiton - (lambda (waiton-test) - (let* ((waiton-state (db:test-get-state waiton-test)) - (waiton-status (db:test-get-status waiton-test)) - (waiton-item-path (db:test-get-item-path waiton-test)) ;; BB- this is the upstream itempath - (waiton-test-name (db:test-get-testname waiton-test)) - (waiton-is-toplevel (equal? waiton-item-path "")) - (waiton-is-item (not waiton-is-toplevel)) - (waiton-is-completed (member waiton-state *common:ended-states*)) - (waiton-is-running (member waiton-state *common:running-states*)) - (waiton-is-killed (member waiton-state *common:badly-ended-states*)) - (waiton-is-ok (member waiton-status *common:well-ended-states*)) - ;; testname-b path-a path-b - (same-itempath (db:compare-itempaths ref-test-name waiton-item-path ref-item-path itemmaps)) ;; (equal? ref-item-path waiton-item-path))) - (real-ref-test-name (car (string-split ref-test-name "/"))) ;; I THINK ref-test-name SHOULD NEVER HAVE THE ITEM_PATH! - (test-and-ref-are-same (equal? real-ref-test-name waiton-test-name))) - (debug:print 4 *default-log-port* "waiton-test-name " waiton-test-name " ref-test-name: " ref-test-name " test-and-ref-are-same: " test-and-ref-are-same) - (set! ever-seen #t) - ;;(BB> "***consider waiton "waiton-test"/"waiton-item-path"***") - (cond - ;; case 0 - toplevel of an itemized test, at least one item in prereq has completed - ((and waiton-is-item ref-test-is-toplevel ref-test-itemized-mode waiton-is-completed) - (set! parent-waiton-met #t)) - - ;; case 1, non-item (parent test) is - ((and waiton-is-toplevel ;; this is the parent test of the waiton being examined - waiton-is-completed - ;;(BB> "cond1") - (or waiton-is-ok ref-test-toplevel-mode)) ;; itemmatch itemwait)))))) - (set! parent-waiton-met #t)) - ;; Special case for toplevel and KILLED - ((and waiton-is-toplevel ;; this is the parent test - waiton-is-killed - (member 'toplevel mode)) - ;;(BB> "cond2") - (set! parent-waiton-met #t)) - ;; For itemwait mode IFF the previous matching item is good the set parent-waiton-met - ((and ref-test-itemized-mode ref-test-is-item same-itempath) - ;;(BB> "cond3") - (if (and waiton-is-completed (or waiton-is-ok ref-test-toplevel-mode)) - (set! item-waiton-met #t) - (set! unmet-prereq-items (cons waiton-test unmet-prereq-items))) - (if (and waiton-is-toplevel ;; if upstream rollup test is completed, parent-waiton-met is set - (or waiton-is-completed waiton-is-running)) - (set! parent-waiton-met #t))) - ;; normal checking of parent items, any parent or parent item not ok blocks running - ((and waiton-is-completed - (or waiton-is-ok - (member 'toplevel mode)) ;; toplevel does not block on FAIL - (and waiton-is-ok (member 'itemmatch mode) ;; itemmatch blocks on not ok ;; TODO: THIS IS PROBABLY A BUG. ITEMMATCH AND ITEMWAIT ARE SYNONYMS!! WHAT HAPPENED OT ITEMWAIT??? - )) - ;;(BB> "cond4") - (set! item-waiton-met #t)) - ((and waiton-is-completed waiton-is-ok same-itempath) - ;;(BB> "cond5") - (set! item-waiton-met #t)) - ((and waiton-is-completed waiton-is-ok test-and-ref-are-same) ;; probably from [waitons] table - (set! item-waiton-met #t)) - (else - #t - ;;(BB> "condelse") - )))) - waiton-tests) - ;; both requirements, parent and item-waiton must be met to NOT add item to - ;; prereq's not met list - ;; (BB> - ;; "\n* waiton-tests "waiton-tests - ;; "\n* parent-waiton-met "parent-waiton-met - ;; "\n* item-waiton-met "item-waiton-met - ;; "\n* ever-seen "ever-seen - ;; "\n* ref-test-itemized-mode "ref-test-itemized-mode - ;; "\n* unmet-prereq-items "unmet-prereq-items - ;; "\n* result (pre) "result - ;; "\n* ever-seen "ever-seen - ;; "\n") - - (cond - ((and ref-test-itemized-mode ref-test-is-item (not (null? unmet-prereq-items))) - (set! result (append unmet-prereq-items result))) - ((not (or parent-waiton-met item-waiton-met)) - (set! result (append (if (null? waiton-tests) (list waitontest-name) waiton-tests) result))) ;; appends the string if the full record is not available - ;; if the test is not found then clearly the waiton is not met... - ;; (if (not ever-seen)(set! result (cons waitontest-name result))))) - ((not ever-seen) - (set! result (append (if (null? waiton-tests)(list waitontest-name) waiton-tests) result)))))) - waitons) - (delete-duplicates result))))) - -;;====================================================================== -;; To sync individual run -;;====================================================================== -(define (db:get-run-record-ids dbstruct target run keynames) - (let* ((backcons (lambda (lst item)(cons item lst))) - (all_tests '()) - (keystr (string-intersperse - (map (lambda (key val) - (conc key " like '" val "'")) - keynames - (string-split target "/")) - " AND ") - ) - (run-qry (conc "SELECT id FROM runs WHERE " keystr " and runname='" run"'")) - ; (test-qry (conc "SELECT id FROM tests WHERE run_id in (" run-qry ") and testname like '" test-patt "'")) - (run_ids - (db:with-db dbstruct #f #f - (lambda (dbdat db) - (sqlite3:fold-row backcons '() db run-qry)) - ) - ) - ) - run_ids) -) - -;;====================================================================== -;; Just for sync, procedures to make sync easy -;;====================================================================== - -;; get an alist of run ids and test/run, test_step/run pairs changed since time since-time -;; '((runs . (1 2 3 ...))(tests . ((5 . 1) (6 . 3) (6 . 2) (7 . 1) ... - -;; Retrieves record IDs from the database based on the timestamp of their last update. - -;; The function takes two arguments: dbstruct, which represents the database structure, and since-time, which is a timestamp indicating the time of the last update. -;; The function first defines a few helper functions, including backcons, which takes a list and an item and adds the item to the front of the list. -;; It then initializes several variables to empty lists: all_tests, all_test_steps, all_test_data, all_run_ids, and all_test_ids. -;; The function then retrieves a list of IDs for runs that have been changed since since-time using the db:get-changed-run-ids function. -;; It then filters the full list of run IDs to only include those that match the changed run IDs based on their modulo (num-run-dbs). -;; For each changed run ID, the function retrieves a list of test IDs, test step IDs, and test data IDs that have been updated since since-time. -;; It appends these IDs to the appropriate lists (all_tests, all_test_steps, and all_test_data) using the append and map functions. -;; The function then retrieves a list of run stat IDs that have been updated since since-time. -;; Finally, the function returns a list of associations between record types and their corresponding IDs: runs, tests, test_steps, test_data, and run_stats. -;; -(define (db:get-changed-record-ids dbstruct since-time) - ;; no transaction, allow the db to be accessed between the big queries - (let* ((backcons (lambda (lst item)(cons item lst))) - (all_tests '()) - (changed_run_dbs (db:get-changed-run-ids since-time)) ;; gets the rundb numbers - (all_run_ids - (db:with-db dbstruct #f #f - (lambda (dbdat db) - (sqlite3:fold-row backcons '() db "SELECT id FROM runs")) - ) - ) - (changed_run_ids (filter (lambda (run) (member (modulo run (num-run-dbs)) changed_run_dbs)) all_run_ids)) - (run_ids - (db:with-db dbstruct #f #f - (lambda (dbdat db) - (sqlite3:fold-row backcons '() db "SELECT id FROM runs WHERE last_update>=?" since-time)) - ) - ) - ) - (for-each - (lambda (run_id) - (set! all_tests - (append - (map (lambda (x) (cons x run_id)) - (db:with-db dbstruct run_id #f - (lambda (dbdat db) - (sqlite3:fold-row backcons '() db "SELECT id FROM tests WHERE run_id=? and last_update>=?" run_id since-time) - ) - ) - ) all_tests - ) - ) - ) - changed_run_ids - ) - (debug:print 2 *default-log-port* "run_ids = " run_ids) - (debug:print 2 *default-log-port* "all_tests = " all_tests) - - `((runs . ,run_ids) - (tests . ,all_tests) - ) - ) -) - - - -(define (db:get-changed-record-test-ids dbstruct since-time run-id) - (let* ((backcons (lambda (lst item)(cons item lst))) - (all-tests (db:with-db dbstruct run-id #f - (lambda (dbdat db) - (sqlite3:fold-row backcons '() db "SELECT id FROM tests WHERE run_id=? and last_update>=?" run-id since-time))))) - - all-tests)) - -(define (db:get-changed-record-run-ids dbstruct since-time) - ;; no transaction, allow the db to be accessed between the big queries - (let* ((backcons (lambda (lst item)(cons item lst))) - (run_ids (db:with-db dbstruct #f #f - (lambda (dbdat db) - (sqlite3:fold-row backcons '() db "SELECT id FROM runs WHERE last_update>=?" since-time))))) - (debug:print 2 *default-log-port* "run_ids = " run_ids) - run_ids) -) - -(define (db:get-all-runids dbstruct) - (let* ((backcons (lambda (lst item)(cons item lst))) - (all_run_ids (db:with-db dbstruct #f #f - (lambda (dbdat db) - (sqlite3:fold-row backcons '() db "SELECT id FROM runs"))))) - -all_run_ids)) - -;;====================================================================== -;; Extract ods file from the db -;;====================================================================== - -;; NOT REWRITTEN YET!!!!! - -;; runspatt is a comma delimited list of run patterns -;; keypatt-alist must contain *all* keys with an associated pattern: '( ("KEY1" "%") .. ) -(define (db:extract-ods-file dbstruct outputfile keypatt-alist runspatt pathmod) - (assert #f "FATAL: call to db:extract-ods-file which is not ported yet.") - (let* ((keysstr (string-intersperse (map car keypatt-alist) ",")) - (keyqry (string-intersperse (map (lambda (p)(conc (car p) " LIKE ? ")) keypatt-alist) " AND ")) - (numkeys (length keypatt-alist)) - (test-ids '()) - (dbdat (db:get-subdb dbstruct)) - (db (dbr:dbdat-dbh dbdat)) - (windows (and pathmod (substring-index "\\" pathmod))) - (tempdir (conc "/tmp/" (current-user-name) "/" runspatt "_" (random 10000) "_" (current-process-id))) - (runsheader (append (list "Run Id" "Runname") ; 0 1 - (map car keypatt-alist) ; + N = length keypatt-alist - (list "Testname" ; 2 - "Item Path" ; 3 - "Description" ; 4 - "State" ; 5 - "Status" ; 6 - "Final Log" ; 7 - "Run Duration" ; 8 - "When Run" ; 9 - "Tags" ; 10 - "Run Owner" ; 11 - "Comment" ; 12 - "Author" ; 13 - "Test Owner" ; 14 - "Reviewed" ; 15 - "Diskfree" ; 16 - "Uname" ; 17 - "Rundir" ; 18 - "Host" ; 19 - "Cpu Load" ; 20 - ))) - (results (list runsheader)) - (testdata-header (list "Run Id" "Testname" "Item Path" "Category" "Variable" "Value" "Expected" "Tol" "Units" "Status" "Comment")) - (mainqry (conc "SELECT - t.testname,r.id,runname," keysstr ",t.testname, - t.item_path,tm.description,t.state,t.status, - final_logf,run_duration, - strftime('%m/%d/%Y %H:%M:%S',datetime(t.event_time,'unixepoch'),'localtime'), - tm.tags,r.owner,t.comment, - author, - tm.owner,reviewed, - diskfree,uname,rundir, - host,cpuload - FROM tests AS t JOIN runs AS r ON t.run_id=r.id JOIN test_meta AS tm ON tm.testname=t.testname - WHERE runname LIKE ? AND " keyqry ";"))) - (debug:print 2 *default-log-port* "Using " tempdir " for constructing the ods file. keyqry: " keyqry " keystr: " keysstr " with keys: " (map cadr keypatt-alist) - "\n mainqry: " mainqry) - ;; "Expected Value" - ;; "Value Found" - ;; "Tolerance" - (apply sqlite3:for-each-row - (lambda (test-id . b) - (set! test-ids (cons test-id test-ids)) ;; test-id is now testname - (set! results (append results ;; note, drop the test-id - (list - (if pathmod - (let* ((vb (apply vector b)) - (keyvals (let loop ((i 0) - (res '())) - (if (>= i numkeys) - res - (loop (+ i 1) - (append res (list (vector-ref vb (+ i 2)))))))) - (runname (vector-ref vb 1)) - (testname (vector-ref vb (+ 2 numkeys))) - (item-path (vector-ref vb (+ 3 numkeys))) - (final-log (vector-ref vb (+ 7 numkeys))) - (run-dir (vector-ref vb (+ 18 numkeys))) - (log-fpath (conc run-dir "/" final-log))) ;; (string-intersperse keyvals "/") "/" testname "/" item-path "/" - (debug:print 4 *default-log-port* "log: " log-fpath " exists: " (common:file-exists? log-fpath)) - (vector-set! vb (+ 7 numkeys) (if (common:file-exists? log-fpath) - (let ((newpath (conc pathmod "/" - (string-intersperse keyvals "/") - "/" runname "/" testname "/" - (if (string=? item-path "") "" (conc "/" item-path)) - final-log))) - ;; for now throw away newpath and use the log-fpath conc'd with pathmod - (set! newpath (conc pathmod log-fpath)) - (if windows (string-translate newpath "/" "\\") newpath)) - (if (debug:debug-mode 1) - (conc final-log " not-found") - ""))) - (vector->list vb)) - b))))) - db - mainqry - runspatt (map cadr keypatt-alist)) - (debug:print 2 *default-log-port* "Found " (length test-ids) " records") - (set! results (list (cons "Runs" results))) - ;; now, for each test, collect the test_data info and add a new sheet - (for-each - (lambda (test-id) - (let ((test-data (list testdata-header)) - (curr-test-name #f)) - (sqlite3:for-each-row - (lambda (run-id testname item-path category variable value expected tol units status comment) - (set! curr-test-name testname) - (set! test-data (append test-data (list (list run-id testname item-path category variable value expected tol units status comment))))) - db - ;; "SELECT run_id,testname,item_path,category,variable,td.value AS value,expected,tol,units,td.status AS status,td.comment AS comment FROM test_data AS td INNER JOIN tests ON tests.id=td.test_id WHERE test_id=?;" - "SELECT run_id,testname,item_path,category,variable,td.value AS value,td.expected,td.tol,td.units,td.status AS status,td.comment AS comment FROM test_data AS td INNER JOIN tests ON tests.id=td.test_id WHERE testname=?;" - test-id) - (if curr-test-name - (set! results (append results (list (cons curr-test-name test-data))))) - )) - (sort (delete-duplicates test-ids) string<=)) - (system (conc "mkdir -p " tempdir)) - ;; (pp results) - (ods:list->ods - tempdir - (if (string-match (regexp "^[/~]+.*") outputfile) ;; full path? - outputfile - (begin - (debug:print 0 *default-log-port* "WARNING: path given, " outputfile " is relative, prefixing with current directory") - (conc (current-directory) "/" outputfile))) - results) - ;; brutal clean up - (dbfile:add-dbdat dbstruct #f dbdat) - (system "rm -rf tempdir"))) - -;; (db:extract-ods-file db "outputfile.ods" '(("sysname" "%")("fsname" "%")("datapath" "%")) "%") - -;;====================================================================== -;; moving watch dogs here due to dependencies -;;====================================================================== - -;;====================================================================== -;; currently the primary job of the watchdog is to run the sync back to megatest.db from the db in /tmp -;; if we are on the homehost and we are a server (by definition we are on the homehost if we are a server) -;; -(define (common:readonly-watchdog dbstruct) - (thread-sleep! 0.05) ;; delay for startup - (debug:print-info 13 *default-log-port* "common:readonly-watchdog entered.") - ;; sync megatest.db to /tmp/.../megatst.db - (let* ((sync-cool-off-duration 3) - (golden-mtdb (dbr:dbstruct-mtdb dbstruct)) - (golden-mtpath (db:dbdat-get-path golden-mtdb)) - (tmp-mtdb (dbr:dbstruct-tmpdb dbstruct)) - (tmp-mtpath (db:dbdat-get-path tmp-mtdb))) - (debug:print-info 0 *default-log-port* "Read-only periodic sync thread started.") - (let loop ((last-sync-time 0)) - (debug:print-info 13 *default-log-port* "loop top tmp-mtpath="tmp-mtpath" golden-mtpath="golden-mtpath) - (let* ((duration-since-last-sync (- (current-seconds) last-sync-time))) - (debug:print-info 13 *default-log-port* "duration-since-last-sync="duration-since-last-sync) - (if (and (not *time-to-exit*) - (< duration-since-last-sync sync-cool-off-duration)) - (thread-sleep! (- sync-cool-off-duration duration-since-last-sync))) - (if (not *time-to-exit*) - (let ((golden-mtdb-mtime (file-modification-time golden-mtpath)) - (tmp-mtdb-mtime (file-modification-time tmp-mtpath))) - (if (> golden-mtdb-mtime tmp-mtdb-mtime) - (if (< golden-mtdb-mtime (- (current-seconds) 3)) ;; file has NOT been touched in past three seconds, this way multiple servers won't fight to sync back - (let ((res (db:multi-db-sync dbstruct 'old2new))) - (debug:print-info 13 *default-log-port* "rosync called, " res " records transferred.")))) - (loop (current-seconds))) - #t))) - (debug:print-info 0 *default-log-port* "Exiting readonly-watchdog timer, *time-to-exit* = " *time-to-exit*" pid="(current-process-id)" mtpath="golden-mtpath))) - - -;; Get a lock from the no-sync-db for the from-db, then copy the from-db to the to-db, otherwise return #f - -(define (db:lock-and-sync no-sync-db from-db to-db) - (assert (not *db-sync-in-progress*) "FATAL: db:lock-and-sync called while a sync is in progress.") - (let* ((lockdat (db:no-sync-get-lock no-sync-db from-db)) - (gotlock (car lockdat)) - (locktime (cdr lockdat))) - (if gotlock - (begin - (file-copy from-db to-db #t) - (db:no-sync-del! no-sync-db from-db) - #t) - (begin - (debug:print 0 *default-log-port* "could not get lock for " from-db " from no-sync-db") - #f - )))) - -;; sync for filesystem local db writes -;; -(define (db:run-lock-and-sync no-sync-db) - (let* ((tmp-area (common:make-tmpdir-name *toppath* "")) - (dbfiles (glob (conc tmp-area"/.mtdb/*.db"))) - (sync-durations (make-hash-table))) - ;; (debug:print-info 0 *default-log-port* "lock-and-sync, dbfiles: "dbfiles) - (for-each - (lambda (file) - (let* ((fname (conc (pathname-file file) ".db")) - (fulln (conc *toppath*"/.mtdb/"fname)) - (time1 (if (file-exists? file) - (file-modification-time file) - (begin - (debug:print-info 0 *default-log-port* "Sync - I do not see file "file) - 1))) - (time2 (if (file-exists? fulln) - (file-modification-time fulln) - (begin - (debug:print-info 0 *default-log-port* "Sync - I do not see file "fulln) - 0))) - (changed (> time1 time2)) - (do-cp (cond - ((not (file-exists? fulln)) ;; shouldn't happen, but this might recover - (debug:print-info 0 *default-log-port* "File "fulln" not found! Copying "fname" to "fulln) - #t) - (changed ;; (and changed - ;; (> (- (current-seconds) time1) 3)) ;; if file is changed and three seconds have passed. - #t) - ((and changed *time-to-exit*) ;; last copy - #t) - (else - #f)))) - (if do-cp - (let* ((start-time (current-milliseconds))) - (debug:print-info 0 *default-log-port* "sync copy file: " fname", delta: " (- time1 time2) " seconds") - (db:lock-and-sync no-sync-db file fulln) - (hash-table-set! sync-durations (conc fname".db") (- (current-milliseconds) start-time))) - #;(debug:print-info 0 *default-log-port* "skipping sync...")))) - dbfiles) - (hash-table->alist sync-durations))) - -;; straight forward copy based sync -;; 1. for each .db fil -;; 2. next if file changed since last sync cycle -;; 2. next if time delta /tmp file to MTRA less than 3 seconds -;; 3. get a lock for the file in nosyncdb -;; 4. copy the file -;; 5. when copy is done release the lock -;; -;; DONE -(define (server:writable-watchdog-copysync dbstruct) - (thread-sleep! 0.05) ;; delay for startup - (let ((legacy-sync (common:run-sync?)) - (sync-stale-seconds (configf:lookup-number *configdat* "server" "sync-stale-seconds" default: 300)) - (debug-mode (debug:debug-mode 1)) - (last-time (current-seconds)) ;; last time through the sync loop - (no-sync-db (db:open-no-sync-db)) - (sync-duration 0) ;; run time of the sync in milliseconds - (tmp-area (common:make-tmpdir-name *toppath* ""))) - ;; Sync moved to http-transport keep-running loop - (debug:print-info 2 *default-log-port* "Periodic copy-based sync thread started. syncer is copy-sync, tmp-area is " tmp-area) - (debug:print-info 3 *default-log-port* "watchdog starting. syncer is copy-sync pid="(current-process-id));; " this-wd-num="this-wd-num) - - (if (and legacy-sync (not *time-to-exit*)) - (begin - (debug:print-info 0 *default-log-port* "Server running, periodic copy-based sync started.") - (let loop () - - ;; run the sync and print out durations - (debug:print-info 0 *default-log-port* "Sync durations: "(db:run-lock-and-sync no-sync-db)) - ;; keep going unless time to exit - ;; - (if (not *time-to-exit*) - (let delay-loop ((count 0)) - ;;(debug:print-info 13 *default-log-port* "delay-loop top; count="count" pid="(current-process-id)" this-wd-num="this-wd-num" *time-to-exit*="*time-to-exit*) - - (if (and (not *time-to-exit*) - (< count 6)) ;; was 11, changing to 4. - (begin - (thread-sleep! 1) - (delay-loop (+ count 1)))) - (if (not *time-to-exit*) (loop)))) - - ;; ==> ;; time to exit, close the no-sync db here - ;; ==> (db:no-sync-close-db no-sync-db stmt-cache) - (if (common:low-noise-print 30) - (debug:print-info 0 *default-log-port* "Exiting watchdog timer, *time-to-exit* = " - *time-to-exit*" pid="(current-process-id) ))))))) - -(define (server:writable-watchdog-deltasync dbstruct) - ;; This is awful complex and convoluted. Plan to redo? - ;; for now ... skip it. - - (thread-sleep! 0.05) ;; delay for startup - (let ((legacy-sync (common:run-sync?))) - (sync-stale-seconds (configf:lookup-number *configdat* "server" "sync-stale-seconds" default: 300)) - (debug-mode (debug:debug-mode 1)) - (last-time (current-seconds)) - (no-sync-db (db:open-no-sync-db)) - (stmt-cache #f) ;; (dbr:dbstruct-stmt-cache dbstruct)) - (sync-duration 0) ;; run time of the sync in milliseconds - (subdbs (hash-table-values (dbr:dbstruct-subdbs dbstruct)))) - (debug:print-info 2 *default-log-port* "Periodic sync thread started.") - (debug:print-info 3 *default-log-port* "watchdog starting. legacy-sync is " legacy-sync" pid="(current-process-id) );; " this-wd-num="this-wd-num) - - (if (and legacy-sync (not *time-to-exit*)) - (begin - (debug:print-info 0 *default-log-port* "Server running, periodic sync started.") - (let loop () - ;; sync for filesystem local db writes - ;; - (mutex-lock! *db-multi-sync-mutex*) - (let* ((start-file (conc tmp-area "/.start-sync")) - (end-file (conc tmp-area "/.end-sync")) - - (need-sync (>= *db-last-access* *db-last-sync*)) ;; no sync since last write - (sync-in-progress *db-sync-in-progress*) - (min-intersync-delay (configf:lookup-number *configdat* "server" "minimum-intersync-delay" default: 5)) - (should-sync (and (not *time-to-exit*) - (> (- (current-seconds) *db-last-sync*) min-intersync-delay))) ;; sync every five seconds minimum, deprecated logic, can probably be removed - (start-time (current-seconds)) - (cpu-load-adj (alist-ref 'adj-proc-load (common:get-normalized-cpu-load #f))) - (mt-mod-time (file-modification-time mtpath)) - (last-sync-start (if (common:file-exists? start-file) - (file-modification-time start-file) - 0)) - (last-sync-end (if (common:file-exists? end-file) - (file-modification-time end-file) - 10)) - (sync-period (+ 3 (* cpu-load-adj 30))) ;; as adjusted load increases increase the sync period - (recently-synced (and (< (- start-time mt-mod-time) sync-period) ;; not useful if sync didn't modify megatest.db! - (< mt-mod-time last-sync-start))) - (sync-done (<= last-sync-start last-sync-end)) - (sync-stale (> start-time (+ last-sync-start sync-stale-seconds))) - (will-sync (and (not *time-to-exit*) ;; do not start a sync if we are in the process of exiting - (or need-sync should-sync) - (or sync-done sync-stale) - (not sync-in-progress) - (not recently-synced)))) - (debug:print-info 13 *default-log-port* "WD writable-watchdog top of loop. need-sync="need-sync" sync-in-progress=" sync-in-progress - " should-sync="should-sync" start-time="start-time" mt-mod-time="mt-mod-time" recently-synced="recently-synced" will-sync="will-sync - " sync-done=" sync-done " sync-period=" sync-period) - (if (and (> sync-period 5) - (common:low-noise-print 30 "sync-period")) - (debug:print-info 0 *default-log-port* "Increased sync period due to long sync times, sync took: " sync-period " seconds.")) - ;; (if recently-synced (debug:print-info 0 *default-log-port* "Skipping sync due to recently-synced flag=" recently-synced)) - ;; (debug:print-info 0 *default-log-port* "need-sync: " need-sync " sync-in-progress: " sync-in-progress " should-sync: " should-sync " will-sync: " will-sync) - (if will-sync (set! *db-sync-in-progress* #t)) - (mutex-unlock! *db-multi-sync-mutex*) - (if will-sync - (let (;; (max-sync-duration (configf:lookup-number *configdat* "server" "max-sync-duration")) ;; KEEPING THIS AVAILABLE BUT SHOULD NOT USE, I'M PRETTY SURE IT DOES NOT WORK! - (sync-start (current-milliseconds))) - (with-output-to-file start-file (lambda ()(print (current-process-id)))) - - ;; put lock here - - ;; (if (or (not max-sync-duration) - ;; (< sync-duration max-sync-duration)) ;; NOTE: db:sync-to-megatest.db keeps track of time of last sync and syncs incrementally - - ;; - - (for-each - (lambda (subdb) - (let* (;;(dbstruct (db:setup)) - (mtdb (dbr:subdb-mtdb subdb)) - (mtpath (db:dbdat-get-path mtdb)) - (tmp-area (common:make-tmpdir-name *toppath* "")) - (res (db:sync-to-megatest.db dbstruct no-sync-db: no-sync-db))) ;; did we sync any data? If so need to set the db touched flag to keep the server alive - (set! sync-duration (- (current-milliseconds) sync-start)) - (if (> res 0) ;; some records were transferred, keep the db alive - (begin - (mutex-lock! *heartbeat-mutex*) - (set! *db-last-access* (current-seconds)) - (mutex-unlock! *heartbeat-mutex*) - (debug:print-info 0 *default-log-port* "sync called, " res " records transferred.")) - (debug:print-info 2 *default-log-port* "sync called but zero records transferred"))) - ) - subdbs))) - - (if will-sync - (begin - (mutex-lock! *db-multi-sync-mutex*) - (set! *db-sync-in-progress* #f) - (set! *db-last-sync* start-time) - (with-output-to-file end-file (lambda ()(print (current-process-id)))) - - ;; release lock here - - (mutex-unlock! *db-multi-sync-mutex*))) - (if (and debug-mode - (> (- start-time last-time) 60)) - (begin - (set! last-time start-time) - (debug:print-info 4 *default-log-port* "timestamp -> " (seconds->time-string (current-seconds)) ", time since start -> " (seconds->hr-min-sec (- (current-seconds) *time-zero*)))))) - - ;; keep going unless time to exit - ;; - (if (not *time-to-exit*) - (let delay-loop ((count 0)) - ;;(debug:print-info 13 *default-log-port* "delay-loop top; count="count" pid="(current-process-id)" this-wd-num="this-wd-num" *time-to-exit*="*time-to-exit*) - - (if (and (not *time-to-exit*) - (< count 6)) ;; was 11, changing to 4. - (begin - (thread-sleep! 1) - (delay-loop (+ count 1)))) - (if (not *time-to-exit*) (loop)))) - -;; ;; time to exit, close the no-sync db here -;; (db:no-sync-close-db no-sync-db stmt-cache) - (if (common:low-noise-print 30) - (debug:print-info 0 *default-log-port* "Exiting watchdog timer, *time-to-exit* = " *time-to-exit*" pid="(current-process-id) )))) -)) - -(define (std-exit-procedure) - ;;(common:telemetry-log-close) - (on-exit (lambda () 0)) ;; why is this here? - ;;(debug:print-info 13 *default-log-port* "std-exit-procedure called; *time-to-exit*="*time-to-exit*) - (let ((no-hurry (if *time-to-exit* ;; hurry up - #f - (begin - (set! *time-to-exit* #t) - #t)))) - (debug:print-info 4 *default-log-port* "starting exit process, finalizing databases.") - (if (and no-hurry - (debug:debug-mode 18)) - (dbmod:print-db-stats)) - (let ((th1 (make-thread (lambda () ;; thread for cleaning up, give it five seconds - (if *dbstruct-dbs* (db:close-all *dbstruct-dbs*)) ;; one second allocated - (if (list? *on-exit-procs*) - (for-each - (lambda (proc) - (proc)) - *on-exit-procs*)) - (if *task-db* - (let ((db (cdr *task-db*))) - (if (sqlite3:database? db) - (begin - (sqlite3:interrupt! db) - (sqlite3:finalize! db #t) - ;; (vector-set! *task-db* 0 #f) - (set! *task-db* #f))))) - (if (and *no-sync-db* - (sqlite3:database? *no-sync-db*)) - (sqlite3:finalize! *no-sync-db* #t)) - (if (and (not (args:get-arg "-server")) - *runremote* - (eq? (rmt:transport-mode) 'http)) - (begin - (debug:print-info 0 *default-log-port* "Closing all client connections...") - (http-transport:close-connections *runremote*) - #;(http-client#close-all-connections!))) - ;; (if (and *runremote* - ;; (remote-conndat *runremote*)) - ;; (begin - ;; (http-client#close-all-connections!))) ;; for http-client - (if (not (eq? *default-log-port* (current-error-port))) - (close-output-port *default-log-port*)) - (set! *default-log-port* (current-error-port))) "Cleanup db exit thread")) - (th2 (make-thread (lambda () - (debug:print 4 *default-log-port* "Attempting clean exit. Please be patient and wait a few seconds...") - (if no-hurry - (begin - (thread-sleep! 5)) ;; give the clean up few seconds to do it's stuff - (begin - (thread-sleep! 2))) - (debug:print 4 *default-log-port* " ... done") - ) - "clean exit"))) - (thread-start! th1) - (thread-start! th2) - (thread-join! th1) - ) - ) - - 0) - Index: db_records.scm ================================================================== --- db_records.scm +++ db_records.scm @@ -18,49 +18,49 @@ ;;====================================================================== ;; dbstruct ;;====================================================================== (define (make-db:test)(make-vector 20)) -(define-inline (db:test-get-id vec) (vector-ref vec 0)) -(define-inline (db:test-get-run_id vec) (vector-ref vec 1)) -(define-inline (db:test-get-testname vec) (vector-ref vec 2)) -(define-inline (db:test-get-state vec) (vector-ref vec 3)) -(define-inline (db:test-get-status vec) (vector-ref vec 4)) -(define-inline (db:test-get-event_time vec) (vector-ref vec 5)) -(define-inline (db:test-get-host vec) (vector-ref vec 6)) -(define-inline (db:test-get-cpuload vec) (vector-ref vec 7)) -(define-inline (db:test-get-diskfree vec) (vector-ref vec 8)) -(define-inline (db:test-get-uname vec) (vector-ref vec 9)) -;; (define-inline (db:test-get-rundir vec) (sdb:qry 'getstr (vector-ref vec 10))) -(define-inline (db:test-get-rundir vec) (vector-ref vec 10)) -(define-inline (db:test-get-item-path vec) (vector-ref vec 11)) -(define-inline (db:test-get-run_duration vec) (vector-ref vec 12)) -(define-inline (db:test-get-final_logf vec) (vector-ref vec 13)) -(define-inline (db:test-get-comment vec) (vector-ref vec 14)) -(define-inline (db:test-get-process_id vec) (vector-ref vec 16)) -(define-inline (db:test-get-archived vec) (vector-ref vec 17)) -(define-inline (db:test-get-last_update vec) (vector-ref vec 18)) - -;; (define-inline (db:test-get-pass_count vec) (vector-ref vec 15)) -;; (define-inline (db:test-get-fail_count vec) (vector-ref vec 16)) -(define-inline (db:test-get-fullname vec) +(define (db:test-get-id vec) (vector-ref vec 0)) +(define (db:test-get-run_id vec) (vector-ref vec 1)) +(define (db:test-get-testname vec) (vector-ref vec 2)) +(define (db:test-get-state vec) (vector-ref vec 3)) +(define (db:test-get-status vec) (vector-ref vec 4)) +(define (db:test-get-event_time vec) (vector-ref vec 5)) +(define (db:test-get-host vec) (vector-ref vec 6)) +(define (db:test-get-cpuload vec) (vector-ref vec 7)) +(define (db:test-get-diskfree vec) (vector-ref vec 8)) +(define (db:test-get-uname vec) (vector-ref vec 9)) +;; (define (db:test-get-rundir vec) (sdb:qry 'getstr (vector-ref vec 10))) +(define (db:test-get-rundir vec) (vector-ref vec 10)) +(define (db:test-get-item-path vec) (vector-ref vec 11)) +(define (db:test-get-run_duration vec) (vector-ref vec 12)) +(define (db:test-get-final_logf vec) (vector-ref vec 13)) +(define (db:test-get-comment vec) (vector-ref vec 14)) +(define (db:test-get-process_id vec) (vector-ref vec 16)) +(define (db:test-get-archived vec) (vector-ref vec 17)) +(define (db:test-get-last_update vec) (vector-ref vec 18)) + +;; (define (db:test-get-pass_count vec) (vector-ref vec 15)) +;; (define (db:test-get-fail_count vec) (vector-ref vec 16)) +(define (db:test-get-fullname vec) (conc (db:test-get-testname vec) "/" (db:test-get-item-path vec))) ;; replace runs:make-full-test-name with this routine (define (db:test-make-full-name testname itempath) (if (equal? itempath "") testname (conc testname "/" itempath))) -(define-inline (db:test-get-first_err vec) (printable (vector-ref vec 15))) -(define-inline (db:test-get-first_warn vec) (printable (vector-ref vec 16))) ;; RADT => reference 16 is repeated - -(define-inline (db:test-set-cpuload! vec val)(vector-set! vec 7 val)) -(define-inline (db:test-set-diskfree! vec val)(vector-set! vec 8 val)) -(define-inline (db:test-set-testname! vec val)(vector-set! vec 2 val)) -(define-inline (db:test-set-state! vec val)(vector-set! vec 3 val)) -(define-inline (db:test-set-status! vec val)(vector-set! vec 4 val)) -(define-inline (db:test-set-run_duration! vec val)(vector-set! vec 12 val)) -(define-inline (db:test-set-final_logf! vec val)(vector-set! vec 13 val)) +;; (define (db:test-get-first_err vec) (printable (vector-ref vec 15))) +;; (define (db:test-get-first_warn vec) (printable (vector-ref vec 16))) ;; RADT => reference 16 is repeated + +(define (db:test-set-cpuload! vec val)(vector-set! vec 7 val)) +(define (db:test-set-diskfree! vec val)(vector-set! vec 8 val)) +(define (db:test-set-testname! vec val)(vector-set! vec 2 val)) +(define (db:test-set-state! vec val)(vector-set! vec 3 val)) +(define (db:test-set-status! vec val)(vector-set! vec 4 val)) +(define (db:test-set-run_duration! vec val)(vector-set! vec 12 val)) +(define (db:test-set-final_logf! vec val)(vector-set! vec 13 val)) ;; Test record utility functions ;; Is a test a toplevel? ;; @@ -70,39 +70,39 @@ ;; make-vector-record "" db mintest id run_id testname state status event_time item_path ;; RADT => purpose of mintest?? ;; (define (make-db:mintest)(make-vector 7)) -(define-inline (db:mintest-get-id vec) (vector-ref vec 0)) -(define-inline (db:mintest-get-run_id vec) (vector-ref vec 1)) -(define-inline (db:mintest-get-testname vec) (vector-ref vec 2)) -(define-inline (db:mintest-get-state vec) (vector-ref vec 3)) -(define-inline (db:mintest-get-status vec) (vector-ref vec 4)) -(define-inline (db:mintest-get-event_time vec) (vector-ref vec 5)) -(define-inline (db:mintest-get-item_path vec) (vector-ref vec 6)) +(define (db:mintest-get-id vec) (vector-ref vec 0)) +(define (db:mintest-get-run_id vec) (vector-ref vec 1)) +(define (db:mintest-get-testname vec) (vector-ref vec 2)) +(define (db:mintest-get-state vec) (vector-ref vec 3)) +(define (db:mintest-get-status vec) (vector-ref vec 4)) +(define (db:mintest-get-event_time vec) (vector-ref vec 5)) +(define (db:mintest-get-item_path vec) (vector-ref vec 6)) ;; make-vector-record db testmeta id testname author owner description reviewed iterated avg_runtime avg_disk (define (make-db:testmeta)(make-vector 10 "")) -(define-inline (db:testmeta-get-id vec) (vector-ref vec 0)) -(define-inline (db:testmeta-get-testname vec) (vector-ref vec 1)) -(define-inline (db:testmeta-get-author vec) (vector-ref vec 2)) -(define-inline (db:testmeta-get-owner vec) (vector-ref vec 3)) -(define-inline (db:testmeta-get-description vec) (vector-ref vec 4)) -(define-inline (db:testmeta-get-reviewed vec) (vector-ref vec 5)) -(define-inline (db:testmeta-get-iterated vec) (vector-ref vec 6)) -(define-inline (db:testmeta-get-avg_runtime vec) (vector-ref vec 7)) -(define-inline (db:testmeta-get-avg_disk vec) (vector-ref vec 8)) -(define-inline (db:testmeta-get-tags vec) (vector-ref vec 9)) -(define-inline (db:testmeta-set-id! vec val)(vector-set! vec 0 val)) -(define-inline (db:testmeta-set-testname! vec val)(vector-set! vec 1 val)) -(define-inline (db:testmeta-set-author! vec val)(vector-set! vec 2 val)) -(define-inline (db:testmeta-set-owner! vec val)(vector-set! vec 3 val)) -(define-inline (db:testmeta-set-description! vec val)(vector-set! vec 4 val)) -(define-inline (db:testmeta-set-reviewed! vec val)(vector-set! vec 5 val)) -(define-inline (db:testmeta-set-iterated! vec val)(vector-set! vec 6 val)) -(define-inline (db:testmeta-set-avg_runtime! vec val)(vector-set! vec 7 val)) -(define-inline (db:testmeta-set-avg_disk! vec val)(vector-set! vec 8 val)) +(define (db:testmeta-get-id vec) (vector-ref vec 0)) +(define (db:testmeta-get-testname vec) (vector-ref vec 1)) +(define (db:testmeta-get-author vec) (vector-ref vec 2)) +(define (db:testmeta-get-owner vec) (vector-ref vec 3)) +(define (db:testmeta-get-description vec) (vector-ref vec 4)) +(define (db:testmeta-get-reviewed vec) (vector-ref vec 5)) +(define (db:testmeta-get-iterated vec) (vector-ref vec 6)) +(define (db:testmeta-get-avg_runtime vec) (vector-ref vec 7)) +(define (db:testmeta-get-avg_disk vec) (vector-ref vec 8)) +(define (db:testmeta-get-tags vec) (vector-ref vec 9)) +(define (db:testmeta-set-id! vec val)(vector-set! vec 0 val)) +(define (db:testmeta-set-testname! vec val)(vector-set! vec 1 val)) +(define (db:testmeta-set-author! vec val)(vector-set! vec 2 val)) +(define (db:testmeta-set-owner! vec val)(vector-set! vec 3 val)) +(define (db:testmeta-set-description! vec val)(vector-set! vec 4 val)) +(define (db:testmeta-set-reviewed! vec val)(vector-set! vec 5 val)) +(define (db:testmeta-set-iterated! vec val)(vector-set! vec 6 val)) +(define (db:testmeta-set-avg_runtime! vec val)(vector-set! vec 7 val)) +(define (db:testmeta-set-avg_disk! vec val)(vector-set! vec 8 val)) ;;====================================================================== ;; S I M P L E R U N ;;====================================================================== @@ -110,84 +110,84 @@ ;;====================================================================== ;; T E S T D A T A ;;====================================================================== (define (make-db:test-data)(make-vector 10)) -(define-inline (db:test-data-get-id vec) (vector-ref vec 0)) -(define-inline (db:test-data-get-test_id vec) (vector-ref vec 1)) -(define-inline (db:test-data-get-category vec) (vector-ref vec 2)) -(define-inline (db:test-data-get-variable vec) (vector-ref vec 3)) -(define-inline (db:test-data-get-value vec) (vector-ref vec 4)) -(define-inline (db:test-data-get-expected vec) (vector-ref vec 5)) -(define-inline (db:test-data-get-tol vec) (vector-ref vec 6)) -(define-inline (db:test-data-get-units vec) (vector-ref vec 7)) -(define-inline (db:test-data-get-comment vec) (vector-ref vec 8)) -(define-inline (db:test-data-get-status vec) (vector-ref vec 9)) -(define-inline (db:test-data-get-type vec) (vector-ref vec 10)) -(define-inline (db:test-data-get-last_update vec) (vector-ref vec 11)) - -(define-inline (db:test-data-set-id! vec val)(vector-set! vec 0 val)) -(define-inline (db:test-data-set-test_id! vec val)(vector-set! vec 1 val)) -(define-inline (db:test-data-set-category! vec val)(vector-set! vec 2 val)) -(define-inline (db:test-data-set-variable! vec val)(vector-set! vec 3 val)) -(define-inline (db:test-data-set-value! vec val)(vector-set! vec 4 val)) -(define-inline (db:test-data-set-expected! vec val)(vector-set! vec 5 val)) -(define-inline (db:test-data-set-tol! vec val)(vector-set! vec 6 val)) -(define-inline (db:test-data-set-units! vec val)(vector-set! vec 7 val)) -(define-inline (db:test-data-set-comment! vec val)(vector-set! vec 8 val)) -(define-inline (db:test-data-set-status! vec val)(vector-set! vec 9 val)) -(define-inline (db:test-data-set-type! vec val)(vector-set! vec 10 val)) +(define (db:test-data-get-id vec) (vector-ref vec 0)) +(define (db:test-data-get-test_id vec) (vector-ref vec 1)) +(define (db:test-data-get-category vec) (vector-ref vec 2)) +(define (db:test-data-get-variable vec) (vector-ref vec 3)) +(define (db:test-data-get-value vec) (vector-ref vec 4)) +(define (db:test-data-get-expected vec) (vector-ref vec 5)) +(define (db:test-data-get-tol vec) (vector-ref vec 6)) +(define (db:test-data-get-units vec) (vector-ref vec 7)) +(define (db:test-data-get-comment vec) (vector-ref vec 8)) +(define (db:test-data-get-status vec) (vector-ref vec 9)) +(define (db:test-data-get-type vec) (vector-ref vec 10)) +(define (db:test-data-get-last_update vec) (vector-ref vec 11)) + +(define (db:test-data-set-id! vec val)(vector-set! vec 0 val)) +(define (db:test-data-set-test_id! vec val)(vector-set! vec 1 val)) +(define (db:test-data-set-category! vec val)(vector-set! vec 2 val)) +(define (db:test-data-set-variable! vec val)(vector-set! vec 3 val)) +(define (db:test-data-set-value! vec val)(vector-set! vec 4 val)) +(define (db:test-data-set-expected! vec val)(vector-set! vec 5 val)) +(define (db:test-data-set-tol! vec val)(vector-set! vec 6 val)) +(define (db:test-data-set-units! vec val)(vector-set! vec 7 val)) +(define (db:test-data-set-comment! vec val)(vector-set! vec 8 val)) +(define (db:test-data-set-status! vec val)(vector-set! vec 9 val)) +(define (db:test-data-set-type! vec val)(vector-set! vec 10 val)) ;;====================================================================== ;; S T E P S ;;====================================================================== ;; Run steps ;; make-vector-record "Run steps" db step id test_id stepname step_complete step_pass event_time (define (make-db:step)(make-vector 9)) -(define-inline (tdb:step-get-id vec) (vector-ref vec 0)) -(define-inline (tdb:step-get-test_id vec) (vector-ref vec 1)) -(define-inline (tdb:step-get-stepname vec) (vector-ref vec 2)) -(define-inline (tdb:step-get-state vec) (vector-ref vec 3)) -(define-inline (tdb:step-get-status vec) (vector-ref vec 4)) -(define-inline (tdb:step-get-event_time vec) (vector-ref vec 5)) -(define-inline (tdb:step-get-logfile vec) (vector-ref vec 6)) -(define-inline (tdb:step-get-comment vec) (vector-ref vec 7)) -(define-inline (tdb:step-get-last_update vec) (vector-ref vec 8)) -(define-inline (tdb:step-set-id! vec val)(vector-set! vec 0 val)) -(define-inline (tdb:step-set-test_id! vec val)(vector-set! vec 1 val)) -(define-inline (tdb:step-set-stepname! vec val)(vector-set! vec 2 val)) -(define-inline (tdb:step-set-state! vec val)(vector-set! vec 3 val)) -(define-inline (tdb:step-set-status! vec val)(vector-set! vec 4 val)) -(define-inline (tdb:step-set-event_time! vec val)(vector-set! vec 5 val)) -(define-inline (tdb:step-set-logfile! vec val)(vector-set! vec 6 val)) -(define-inline (tdb:step-set-comment! vec val)(vector-set! vec 7 val)) +(define (tdb:step-get-id vec) (vector-ref vec 0)) +(define (tdb:step-get-test_id vec) (vector-ref vec 1)) +(define (tdb:step-get-stepname vec) (vector-ref vec 2)) +(define (tdb:step-get-state vec) (vector-ref vec 3)) +(define (tdb:step-get-status vec) (vector-ref vec 4)) +(define (tdb:step-get-event_time vec) (vector-ref vec 5)) +(define (tdb:step-get-logfile vec) (vector-ref vec 6)) +(define (tdb:step-get-comment vec) (vector-ref vec 7)) +(define (tdb:step-get-last_update vec) (vector-ref vec 8)) +(define (tdb:step-set-id! vec val)(vector-set! vec 0 val)) +(define (tdb:step-set-test_id! vec val)(vector-set! vec 1 val)) +(define (tdb:step-set-stepname! vec val)(vector-set! vec 2 val)) +(define (tdb:step-set-state! vec val)(vector-set! vec 3 val)) +(define (tdb:step-set-status! vec val)(vector-set! vec 4 val)) +(define (tdb:step-set-event_time! vec val)(vector-set! vec 5 val)) +(define (tdb:step-set-logfile! vec val)(vector-set! vec 6 val)) +(define (tdb:step-set-comment! vec val)(vector-set! vec 7 val)) ;; The steps table (define (make-db:steps-table)(make-vector 5)) -(define-inline (tdb:steps-table-get-stepname vec) (vector-ref vec 0)) -(define-inline (tdb:steps-table-get-start vec) (vector-ref vec 1)) -(define-inline (tdb:steps-table-get-end vec) (vector-ref vec 2)) -(define-inline (tdb:steps-table-get-status vec) (vector-ref vec 3)) -(define-inline (tdb:steps-table-get-runtime vec) (vector-ref vec 4)) -(define-inline (tdb:steps-table-get-log-file vec) (vector-ref vec 5)) - -(define-inline (tdb:step-stable-set-stepname! vec val)(vector-set! vec 0 val)) -(define-inline (tdb:step-stable-set-start! vec val)(vector-set! vec 1 val)) -(define-inline (tdb:step-stable-set-end! vec val)(vector-set! vec 2 val)) -(define-inline (tdb:step-stable-set-status! vec val)(vector-set! vec 3 val)) -(define-inline (tdb:step-stable-set-runtime! vec val)(vector-set! vec 4 val)) +(define (tdb:steps-table-get-stepname vec) (vector-ref vec 0)) +(define (tdb:steps-table-get-start vec) (vector-ref vec 1)) +(define (tdb:steps-table-get-end vec) (vector-ref vec 2)) +(define (tdb:steps-table-get-status vec) (vector-ref vec 3)) +(define (tdb:steps-table-get-runtime vec) (vector-ref vec 4)) +(define (tdb:steps-table-get-log-file vec) (vector-ref vec 5)) + +(define (tdb:step-stable-set-stepname! vec val)(vector-set! vec 0 val)) +(define (tdb:step-stable-set-start! vec val)(vector-set! vec 1 val)) +(define (tdb:step-stable-set-end! vec val)(vector-set! vec 2 val)) +(define (tdb:step-stable-set-status! vec val)(vector-set! vec 3 val)) +(define (tdb:step-stable-set-runtime! vec val)(vector-set! vec 4 val)) ;; The data structure for handing off requests via wire (define (make-cdb:packet)(make-vector 6)) -(define-inline (cdb:packet-get-client-sig vec) (vector-ref vec 0)) -(define-inline (cdb:packet-get-qtype vec) (vector-ref vec 1)) -(define-inline (cdb:packet-get-immediate vec) (vector-ref vec 2)) -(define-inline (cdb:packet-get-query-sig vec) (vector-ref vec 3)) -(define-inline (cdb:packet-get-params vec) (vector-ref vec 4)) -(define-inline (cdb:packet-get-qtime vec) (vector-ref vec 5)) -(define-inline (cdb:packet-set-client-sig! vec val)(vector-set! vec 0 val)) -(define-inline (cdb:packet-set-qtype! vec val)(vector-set! vec 1 val)) -(define-inline (cdb:packet-set-immediate! vec val)(vector-set! vec 2 val)) -(define-inline (cdb:packet-set-query-sig! vec val)(vector-set! vec 3 val)) -(define-inline (cdb:packet-set-params! vec val)(vector-set! vec 4 val)) -(define-inline (cdb:packet-set-qtime! vec val)(vector-set! vec 5 val)) +(define (cdb:packet-get-client-sig vec) (vector-ref vec 0)) +(define (cdb:packet-get-qtype vec) (vector-ref vec 1)) +(define (cdb:packet-get-immediate vec) (vector-ref vec 2)) +(define (cdb:packet-get-query-sig vec) (vector-ref vec 3)) +(define (cdb:packet-get-params vec) (vector-ref vec 4)) +(define (cdb:packet-get-qtime vec) (vector-ref vec 5)) +(define (cdb:packet-set-client-sig! vec val)(vector-set! vec 0 val)) +(define (cdb:packet-set-qtype! vec val)(vector-set! vec 1 val)) +(define (cdb:packet-set-immediate! vec val)(vector-set! vec 2 val)) +(define (cdb:packet-set-query-sig! vec val)(vector-set! vec 3 val)) +(define (cdb:packet-set-params! vec val)(vector-set! vec 4 val)) +(define (cdb:packet-set-qtime! vec val)(vector-set! vec 5 val)) Index: dbfile.scm ================================================================== --- dbfile.scm +++ dbfile.scm @@ -135,12 +135,11 @@ (dbfile #f) ;; path to the db file on disk (dbfname #f) ;; short name of db file on disk (used to validate accessing correct db) (ondiskdb #f) ;; handle for the on-disk file (dbtmpname #f) ;; path to db file in /tmp (non-imem method) (dbdat #f) ;; create a dbdat for the downstream calls such as db:with-db - (last-update 0) - (sync-proc #f) +grep (last-update 0) (sync-proc #f) ) ;; NOTE: Need one dbr:subdb per main.db, 1.db ... ;; (defstruct dbr:subdb @@ -147,10 +146,11 @@ (dbname #f) ;; .mtdb/1.db (mtdbfile #f) ;; mtrah/.mtdb/1.db (mtdbdat #f) ;; only need one of these for syncing ;; (dbdats (make-hash-table)) ;; id => dbdat (tmpdbfile #f) ;; /tmp/.../.mtdb/1.db + (refndb #f) ;; FIX THIS, IT SHOULD NOT BE REFERENCED! ;; (refndbfile #f) ;; /tmp/.../.mtdb/1.db_ref (dbstack (make-stack)) ;; stack for tmp dbr:dbdat, (homehost #f) ;; not used yet (on-homehost #f) ;; not used yet (read-only #f) @@ -1648,7 +1648,24 @@ ;; (define (db:with-mutex-for-stmth proc) ;; (mutex-lock! *mutex-stmth-call*) ;; (let* ((res (proc))) ;; (mutex-unlock! *mutex-stmth-call*) ;; res)) +;;====================================================================== +;; L O C K E R S A N D B L O C K E R S +;;====================================================================== + +;; block further accesses to databases. Call this before shutting db down +(define (common:db-block-further-queries) + (mutex-lock! *db-access-mutex*) + (set! *db-access-allowed* #f) + (mutex-unlock! *db-access-mutex*)) + +(define (common:db-access-allowed?) + (let ((val (begin + (mutex-lock! *db-access-mutex*) + *db-access-allowed* + (mutex-unlock! *db-access-mutex*)))) + val)) + ) Index: dbmod.scm ================================================================== --- dbmod.scm +++ dbmod.scm @@ -22,10 +22,12 @@ (declare (unit dbmod)) (declare (uses dbfile)) (declare (uses commonmod)) (declare (uses configfmod)) (declare (uses debugprint)) +(declare (uses mtargs)) +(declare (uses mtmod)) (module dbmod * (import scheme) @@ -32,15 +34,20 @@ (cond-expand (chicken-4 (import chicken data-structures + srfi-13 + + debugprint extras files - + (prefix mtargs args:) posix - + ports + csv-xml + )) (chicken-5 (import chicken.base chicken.condition chicken.file @@ -57,18 +64,32 @@ (import format (prefix sqlite3 sqlite3:) matchable typed-records + regex + s11n srfi-1 srfi-18 srfi-69 - + z3 + (prefix base64 base64:) + commonmod configfmod dbfile - debugprint) + debugprint + mtmod + ) + +(include "common_records.scm") +(include "db_records.scm") +(include "key_records.scm") +(include "run_records.scm") + +(define *number-of-writes* 0) +(define *number-non-write-queries* 0) ;; NOTE: This returns only the name "1.db", "main.db", not the path ;; (define (dbmod:run-id->dbfname run-id) (conc (dbfile:run-id->dbnum run-id)".db")) @@ -680,48 +701,16 @@ (debug:print 0 *default-log-port* "Skipping sync of table "table" due to transaction in flight.")))) table-names) (sqlite3:execute dbh1 "DETACH auxdb;")))) - - ;;====================================================================== ;; Moved from dbfile ;;====================================================================== ;; wait up to aprox n seconds for a journal to go away ;; -(define (tasks:wait-on-journal path n #!key (remove #f)(waiting-msg #f)) - (if (not (string? path)) - (debug:print-error 0 *default-log-port* "Called tasks:wait-on-journal with path=" path " (not a string)") - (let ((fullpath (conc path "-journal"))) - (handle-exceptions - exn - (begin - (print-call-chain (current-error-port)) - (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) - (debug:print 5 *default-log-port* " exn=" (condition->list exn)) - (debug:print 0 *default-log-port* "tasks:wait-on-journal failed. Continuing on, you can ignore this call-chain") - #t) ;; if stuff goes wrong just allow it to move on - (let loop ((journal-exists (file-exists? fullpath)) - (count n)) ;; wait ten times ... - (if journal-exists - (begin - (if (and waiting-msg - (eq? (modulo n 30) 0)) - (debug:print 0 *default-log-port* waiting-msg)) - (if (> count 0) - (begin - (thread-sleep! 1) - (loop (file-exists? fullpath) - (- count 1))) - (begin - (debug:print 0 *default-log-port* "ERROR: removing the journal file " fullpath ", this is not good. Look for disk full, write access and other issues.") - (if remove (system (conc "rm -rf " fullpath))) - #f))) - #t)))))) - ;;====================================================================== ;; M E T A G E T A N D S E T V A R S ;;====================================================================== @@ -931,53 +920,4371 @@ (let ((new-rec (make-dbstat))) (hash-table-set! *db-stats* modified-cmd new-rec) (set! rec new-rec))) (dbstat-cnt-set! rec (+ (dbstat-cnt rec) 1)) (dbstat-tottime-set! rec (+ (dbstat-tottime rec) delta)))) - - - -) - - -;; ATTIC - - #;(let* ((syncer-logfile (conc areapath"/logs/"dbfname"-syncer.log")) - (sync-cmd (if (eq? syncdir 'todisk) - (conc "(NBFAKE_LOG="syncer-logfile" nbfake megatest -db2db -from "tmpdb" -to "dbfullname" -period 5 -timeout 10 > /dev/null 2&>1)&") - (conc "(NBFAKE_LOG="syncer-logfile" nbfake megatest -db2db -from "dbfullname" -to "tmpdb" -period 5 -timeout 10 > /dev/null 2&>1)&"))) - (synclock-file (conc dbfullname".lock")) - (syncer-running-file (conc dbfullname"-sync-running")) - (synclock-mod-time (if (file-exists? synclock-file) - (handle-exceptions - exn - #f - (file-modification-time synclock-file)) - #f)) - (thethread (lambda () - (thread-start! - (make-thread - (lambda () - (set! *sync-in-progress* #t) - (debug:print-info "Running "sync-cmd) - (if (file-exists? syncer-running-file) - (debug:print-info 0 *default-log-port* "Syncer still running, skipping syncer start.") - (system sync-cmd)) - (set! *sync-in-progress* #f))))))) - (if ((if (eq? syncdir 'todisk) < >) ;; use less than for todisk, greater than for from disk - (file-modification-time tmpdb) - (file-modification-time dbfullname)) - (debug:print 4 *default-log-port* "Skipping sync, "tmpdb" older than "dbfullname) - (if synclock-mod-time - (if (> (- (current-seconds) synclock-mod-time) 20) ;; something wrong with sync, remove file - (begin - (handle-exceptions - exn - #f - (begin - (debug:print 0 *default-log-port* "Sync lock file " synclock-file "is older than 20 seconds (" synclock-mod-time " seconds). Removing it") - (delete-file synclock-file) - ) - ) - (thethread)) - (debug:print 0 *default-log-port* "Skipping sync, lockfile "synclock-file" found.")) - (thethread)))) + +;; record for keeping state,status and count for doing roll-ups in +;; iterated tests +;; +(defstruct dbr:counts + (state #f) + (status #f) + (count 0)) + +;; (define (db:with-db dbstruct run-id r/w proc . params) +;; (case (rmt:transport-mode) +;; ((http)(dbfile:with-db dbstruct run-id r/w proc params)) +;; ((tcp) (dbmod:with-db dbstruct run-id r/w proc params)) +;; ((nfs) (dbmod:with-db dbstruct run-id r/w proc params)) +;; (else (assert #f "FATAL: db:with-db called with non-existant transport mode")))) + +;;====================================================================== +;; hash of hashs +;;====================================================================== + + +(define (db:hoh-set! dat key1 key2 val) + (let* ((subhash (hash-table-ref/default dat key1 #f))) + (if subhash + (hash-table-set! subhash key2 val) + (begin + (hash-table-set! dat key1 (make-hash-table)) + (db:hoh-set! dat key1 key2 val))))) + +(define (db:hoh-get dat key1 key2) + (let* ((subhash (hash-table-ref/default dat key1 #f))) + (and subhash + (hash-table-ref/default subhash key2 #f)))) + +;;====================================================================== +;; SQLITE3 HELPERS +;;====================================================================== + +(define (db:general-sqlite-error-dump exn stmt . params) + (let ((err-status ((condition-property-accessor 'sqlite3 'status #f) exn))) ;; RADT ... how does this work? + ;; check for (exn sqlite3) ((condition-property-accessor 'exn 'message) exn) + ;; (print "err-status: " err-status) + (debug:print-error 0 *default-log-port* " query " stmt " failed, params: " params ", error: " ((condition-property-accessor 'exn 'message) exn)) + (print-call-chain (current-error-port)))) + +;; convert to -inline +;; +(define (db:first-result-default db stmt default . params) + (handle-exceptions + exn + (let ((err-status ((condition-property-accessor 'sqlite3 'status #f) exn))) + ;; check for (exn sqlite3) ((condition-property-accessor 'exn 'message) exn) + (if (eq? err-status 'done) + default + (begin + (debug:print-error 0 *default-log-port* " query " stmt " failed, params: " params ", error: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn) + (print-call-chain (current-error-port)) + default))) + (apply sqlite3:first-result db stmt params))) + +(define (db:setup) + (assert *toppath* "FATAL: db:setup called before launch:setup has been run.") + (let* ((tmpdir (common:make-tmpdir-name *toppath* ""))) + (if (not *dbstruct-dbs*) + (dbfile:setup (conc *toppath* "/.mtdb") tmpdir) + *dbstruct-dbs*))) + +;; moved from dbfile +;; +;; ADD run-id SUPPORT +;; +(define (db:create-all-triggers dbstruct) + (db:with-db + dbstruct #f #t + (lambda (dbdat db) + (db:create-triggers db)))) + +(define (db:create-triggers db) + (for-each (lambda (key) + (sqlite3:execute db (cadr key))) + db:trigger-list)) + +(define (db:drop-all-triggers dbstruct) + (db:with-db + dbstruct #f #t + (lambda (dbdat db) + (db:drop-triggers db)))) + +(define (db:have-incompletes? dbstruct run-id ovr-deadtime) + (let* ((incompleted '()) + (oldlaunched '()) + (toplevels '()) + ;; (deadtime-str (configf:lookup *configdat* "setup" "deadtime")) + (deadtime (or ovr-deadtime 72000))) ;; twenty hours + (db:with-db + dbstruct run-id #f + (lambda (dbdat db) + + ;; in RUNNING or REMOTEHOSTSTART for more than 10 minutes + ;; + ;; HOWEVER: this code in run:test seems to work fine + ;; (> (- (current-seconds)(+ (db:test-get-event_time testdat) + ;; (db:test-get-run_duration testdat))) + ;; 600) + ;; (db:delay-if-busy dbdat) + (sqlite3:for-each-row + (lambda (test-id run-dir uname testname item-path) + (if (and (equal? uname "n/a") + (equal? item-path "")) ;; this is a toplevel test + ;; what to do with toplevel? call rollup? + (begin + (set! toplevels (cons (list test-id run-dir uname testname item-path run-id) toplevels))) + ;; (print-info "Found old toplevel test in RUNNING state, test-id=" test-id)) + (set! incompleted (cons (list test-id run-dir uname testname item-path run-id) incompleted)))) + (db:get-cache-stmth dbdat db + "SELECT id,rundir,uname,testname,item_path FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > (run_duration + ?) AND state IN ('RUNNING','REMOTEHOSTSTART');") + run-id deadtime) + + ;; in LAUNCHED for more than one day. Could be long due to job queues TODO/BUG: Need override for this in config + ;; + ;; (db:delay-if-busy dbdat) + (sqlite3:for-each-row + (lambda (test-id run-dir uname testname item-path) + (if (and (equal? uname "n/a") + (equal? item-path "")) ;; this is a toplevel test + ;; what to do with toplevel? call rollup? + (set! toplevels (cons (list test-id run-dir uname testname item-path run-id) toplevels)) + (set! oldlaunched (cons (list test-id run-dir uname testname item-path run-id) oldlaunched)))) + (db:get-cache-stmth dbdat db + "SELECT id,rundir,uname,testname,item_path FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > 86400 AND state IN ('LAUNCHED');") + run-id) + + ;; (print-info "Found " (length oldlaunched) " old LAUNCHED items, " (length toplevels) " old LAUNCHED toplevel tests and " (length incompleted) " tests marked RUNNING but apparently dead.") + (if (and (null? incompleted) + (null? oldlaunched) + (null? toplevels)) + #f + #t))))) + + +(define-inline (db:generic-error-printout exn . message) + (print-call-chain (current-error-port)) + (apply debug:print-error 0 *default-log-port* message) + (debug:print-error 0 *default-log-port* " params: " params + ", error: " ((condition-property-accessor 'exn 'message) exn) + ", arguments: " ((condition-property-accessor 'exn 'arguments) exn) + ", location: " ((condition-property-accessor 'exn 'location) exn) + )) + + +(define (db:set-sync db) + (let ((syncprag (configf:lookup *configdat* "setup" "sychronous"))) + (sqlite3:execute db (conc "PRAGMA synchronous = " (or syncprag 0) ";")))) + + +(define (db:get-last-update-time db) + (let ((last-update-time #f)) + (sqlite3:for-each-row + (lambda (lup) + (set! last-update-time lup)) + db + "select max(lup) from ( select max(last_update) as lup from tests union select max(last_update) as lup from runs);") + last-update-time)) + + +;; Open the classic megatest.db file (defaults to open in toppath) +;; +;; NOTE: returns a dbdat not a dbstruct! +;; +(define (db:open-megatest-db dbpath #!key (launch-setup #f)) + (let* ((dbexists (file-exists? dbpath)) + (db (db:lock-create-open dbpath + (lambda (db) + (db:initialize-main-db db launch-setup: launch-setup)))) + (write-access (file-write-access? dbpath))) + (debug:print-info 13 *default-log-port* "db:open-megatest-db "dbpath) + (if (and dbexists (not write-access)) + (set! *db-write-access* #f)) + ;; (cons db dbpath))) + (make-dbr:dbdat dbfile: dbpath dbh: db read-only: (not write-access)))) + + +;; use bunch of Unix commands to try to break the lock and recreate the db +;; +(define (db:move-and-recreate-db dbdat) + (let* ((dbpath (dbr:dbdat-dbfile dbdat)) + (dbdir (pathname-directory dbpath)) + (fname (pathname-strip-directory dbpath)) + (fnamejnl (conc fname "-journal")) + (tmpname (conc fname "." (current-process-id))) + (tmpjnl (conc fnamejnl "." (current-process-id)))) + (debug:print-error 0 *default-log-port* "" fname " appears corrupted. Making backup \"old/" fname "\"") + (system (conc "cd " dbdir ";mkdir -p old;cat " fname " > old/" tmpname)) + (system (conc "rm -f " dbpath)) + (if (common:file-exists? fnamejnl) + (begin + (debug:print-error 0 *default-log-port* "" fnamejnl " found, moving it to old dir as " tmpjnl) + (system (conc "cd " dbdir ";mkdir -p old;cat " fnamejnl " > old/" tmpjnl)) + (system (conc "rm -f " dbdir "/" fnamejnl)))) + ;; attempt to recreate database + (system (conc "cd " dbdir ";sqlite3 old/" tmpname " .dump | sqlite3 " fname)))) + +;; return #f to indicate the dbdat should be closed/reopened +;; else return dbdat +;; +(define (db:repair-db dbdat #!key (numtries 1)) + (let* ((dbpath (dbr:dbdat-dbfile dbdat)) + (dbdir (pathname-directory dbpath)) + (fname (pathname-strip-directory dbpath))) + (debug:print-info 0 *default-log-port* "Checking db " dbpath " for errors.") + (cond + ((not (file-write-access? dbdir)) + (debug:print 0 *default-log-port* "WARNING: can't write to " dbdir ", can't fix " fname) + #f) + + ;; handle special cases, megatest.db and monitor.db + ;; + ;; NOPE: apply this same approach to all db files + ;; + (else ;; ((equal? fname "megatest.db") ;; this file can be regenerated if needed + (handle-exceptions + exn + (begin + (debug:print 0 *default-log-port* "Problems trying to repair the db, exn=" exn) + ;; (db:move-and-recreate-db dbdat) + (if (> numtries 0) + (db:repair-db dbdat numtries: (- numtries 1)) + #f) + (debug:print 0 *default-log-port* "FATAL: file " dbpath " was found corrupted, an attempt to fix has been made but you must start over.") + (debug:print 0 *default-log-port* + " check the following:\n" + " 1. full directories, look in ~/ /tmp and " dbdir "\n" + " 2. write access to " dbdir "\n\n" + " if the automatic recovery failed you may be able to recover data by doing \"" + (if (member fname '("megatest.db" "monitor.db")) + "megatest -cleanup-db" + "megatest -import-megatest.db;megatest -cleanup-db") + "\"\n") + (exit) ;; we can not safely continue when a db was corrupted - even if fixed. + ) + ;; test read/write access to the database + (let ((db (sqlite3:open-database dbpath))) + (sqlite3:set-busy-handler! db (sqlite3:make-busy-timeout 136000)) + (sqlite3:execute db "PRAGMA synchronous = 0;") + (cond + ((equal? fname "megatest.db") + (sqlite3:execute db "DELETE FROM tests WHERE state='DELETED';")) + ((equal? fname "main.db") + (sqlite3:execute db "DELETE FROM runs WHERE state='deleted';")) + ((string-match "\\d.db" fname) + (sqlite3:execute db "UPDATE tests SET state='DELETED' WHERE state='DELETED';")) + ((equal? fname "monitor.db") + (sqlite3:execute "DELETE FROM servers WHERE state LIKE 'defunct%';")) + (else + (sqlite3:execute db "vacuum;"))) + + (sqlite3:finalize! db) + #t)))))) + + + +(define (db:adj-target db) + (let ((fields (configf:get-section *configdat* "fields")) + (field-num 0)) + ;; because we will be refreshing the keys table it is best to clear it here + (sqlite3:execute db "DELETE FROM keys;") + (for-each + (lambda (field) + (let ((column (car field)) + (spec (cadr field))) + (handle-exceptions + exn + (if (string-match ".*duplicate.*" ((condition-property-accessor 'exn 'message) exn)) + (debug:print 0 *default-log-port* "Target field " column " already exists in the runs table") + (db:general-sqlite-error-dump exn "alter table runs ..." #f "none")) + ;; Add the column if needed + (sqlite3:execute + db + (conc "ALTER TABLE runs ADD COLUMN " column " " spec))) + ;; correct the entry in the keys column + (sqlite3:execute + db + "INSERT INTO keys (id,fieldname,fieldtype) VALUES (?,?,?);" + field-num column spec) + ;; fill in blanks (not allowed as it would be part of the path + (sqlite3:execute + db + (conc "UPDATE runs SET " column "='x' WHERE " column "='';")) + (set! field-num (+ field-num 1)))) + fields))) + +(define *global-db-store* (make-hash-table)) + +(define (db:get-access-mode) + (if (args:get-arg "-use-db-cache") 'cached 'rmt)) + +;; Add db direct +;; +(define (db:dispatch-query access-mode rmt-cmd db-cmd . params) + (if (eq? access-mode 'cached) + (debug:print 2 *default-log-port* "not doing cached calls right now")) +;; (apply db:call-with-cached-db db-cmd params) + (apply rmt-cmd params)) +;;) + +;; return the target db handle so it can be used +;; +(define (db:cache-for-read-only source target #!key (use-last-update #f)(launch-setup #f)) + (if (and (hash-table-ref/default *global-db-store* target #f) + (>= (file-modification-time target)(file-modification-time source))) + (hash-table-ref *global-db-store* target) + (let* ((toppath (if *toppath* + *toppath* + (launch-setup))) + (targ-db-last-mod (db:get-sqlite3-mod-time target)) +;; (if (common:file-exists? target) +;; BUG: This needs to include wal mode stuff .shm etc. +;; (file-modification-time target) +;; 0)) + (cache-db (or (hash-table-ref/default *global-db-store* target #f) + (db:open-megatest-db path: target))) + (source-db (db:open-megatest-db path: source)) + (curr-time (current-seconds)) + (res '()) + (last-update (if use-last-update (cons "last_update" targ-db-last-mod) #f))) + (db:sync-tables (db:sync-main-list source-db) last-update source-db cache-db) + (db:sync-tables db:sync-tests-only last-update source-db cache-db) + (hash-table-set! *global-db-store* target cache-db) + cache-db))) + +(define (db:get-sqlite3-mod-time fname) + (let* ((wal-file (conc fname "-wal")) + (shm-file (conc fname "-shm")) + (get-mtime (lambda (f) + (if (and (file-exists? f) + (file-read-access? f)) + (file-modification-time f) + 0)))) + (max (get-mtime fname) + (get-mtime wal-file) + (get-mtime shm-file)))) + +;; (define (db:all-db-sync dbstruct) +;; (let* ((dbdat (db:open-db dbstruct #f db:initialize-main-db)) +;; (data-synced 0) ;; count of changed records +;; (tmp-area (common:make-tmpdir-name *toppath*)) +;; (dbfiles (glob (conc tmp-area"/.mtdb/*.db"))) +;; (sync-durations (make-hash-table)) +;; (no-sync-db (db:open-no-sync-db))) +;; (for-each +;; (lambda (file) ;; tmp db file +;; (debug:print-info 3 *default-log-port* "file: " file) +;; (let* ((fname (conc (pathname-file file) ".db")) ;; fname is tmp db file +;; (wal-file (conc fname "-wal")) +;; (shm-file (conc fname "-shm")) +;; (fulln (conc *toppath*"/,mtdb/"fname)) ;; fulln is nfs db name +;; (wal-time (if (file-exists? wal-file) +;; (file-modification-time wal-file) +;; 0)) +;; (shm-time (if (file-exists? shm-file) +;; (file-modification-time shm-file) +;; 0)) +;; (time1 (db:get-sqlite3-mod-time file)) +;; ;; (if (file-exists? file) ;; time1 is the max itime of the tmp db, -wal and -shm files. +;; ;; (max (file-modification-time file) wal-time shm-time) +;; ;; (begin +;; ;; (debug:print-info 2 *default-log-port* "Sync - I do not see file "file) +;; ;; 1))) +;; (time2 (db:get-sqlite3-mod-time fulln)) +;; ;; (if (file-exists? fulln) ;; time2 is nfs file time +;; ;; (file-modification-time fulln) +;; ;; (begin +;; ;; (debug:print-info 2 *default-log-port* "Sync - I do not see file "fulln) +;; ;; 0))) +;; (changed (> (- time1 time2) (+ (random 5) 1))) ;; it has been at some few seconds since last synced +;; (changed10 (> (- time1 time2) 10)) ;; it has been at least ten seconds since sync'd +;; (jfile-exists (file-exists? (conc file"-journal"))) ;; i.e. are we busy? +;; (do-cp (cond +;; ((not (file-exists? fulln)) ;; shouldn't happen, but this might recover +;; (cons #t (conc "File "fulln" not found! Copying "fname" to "fulln))) +;; ((and (not jfile-exists) changed) +;; (cons #t "not busy, changed")) ;; not busy and changed +;; ((and jfile-exists changed10) +;; (cons #t "busy but not synced in a while")) ;; busy but not sync'd in over 10 seconds +;; ((and changed *time-to-exit*) +;; (cons #t "Time to exit, forced final sync")) ;; last sync +;; (else +;; (cons #f "No sync needed"))))) +;; (if (car do-cp) +;; (let* ((start-time (current-milliseconds)) +;; (fname (pathname-file file)) +;; (runid (if (string= fname "main") #f (string->number fname)))) +;; (debug:print-info 3 *default-log-port* "db:all-db-sync: fname: " +;; fname", delta: " (- time1 time2) " seconds, reason: "(cdr do-cp)) +;; (db:lock-and-delta-sync no-sync-db dbstruct fname runid (db:get-keys dbstruct) db:initialize-main-db) +;; (hash-table-set! sync-durations (conc fname".db") +;; (- (current-milliseconds) start-time))) +;; (debug:print-info 3 *default-log-port* "skipping sync. " file " is up to date") +;; ))) +;; dbfiles) +;; ;; WHY does the dbdat need to be added back? +;; (if dbdat (dbfile:add-dbdat dbstruct #f dbdat)) +;; ) +;; #t) + + +;; options: +;; +;; 'killservers - kills all servers +;; 'dejunk - removes junk records +;; 'adj-testids - move test-ids into correct ranges +;; 'old2new - sync megatest.db to /tmp/.../megatest.db and /tmp/.../megatest_ref.db +;; 'new2old - sync /tmp/.../megatest.db to megatest.db and /tmp/.../megatest_ref.db (and update data_synced) +;; 'closeall - close all opened dbs +;; 'schema - attempt to apply schema changes +;; run-ids: '(1 2 3 ...) or #f (for all) +;; +(define (db:multi-db-sync dbstruct . options) + (let* (;; (dbdat (db:open-db dbstruct #f dbfile:db-init-proc)) + (data-synced 0) ;; count of changed records + (tmp-area (common:make-tmpdir-name *toppath* "")) + (old2new (member 'old2new options)) + (dejunk (member 'dejunk options)) + (killservers (member 'killservers options)) + (src-area (if old2new *toppath* tmp-area)) + (dest-area (if old2new tmp-area (conc *toppath* "/.mtdb"))) + (dbfiles (if old2new (glob (conc *toppath* "/.mtdb/*.db")) + (glob (conc tmp-area "/*.db")))) + (keys (db:get-keys dbstruct)) + (sync-durations (make-hash-table))) + + ;; kill servers + ;; (if killservers (db:kill-servers)) + + (if (not dbfiles) + (debug:print-error 0 *default-log-port* "no dbfiles found in " (conc *toppath* "/.mtdb")) + (for-each + (lambda (srcfile) + (debug:print-info 3 *default-log-port* "file: " srcfile) + (let* ((fname (conc (pathname-file srcfile) ".db")) + (basename (pathname-file srcfile)) + (run-id (if (string= basename "main") #f (string->number basename))) + (destfile (conc dest-area "/" fname)) + (dest-directory dest-area) + (time1 (file-modification-time srcfile)) + (time2 (if (file-exists? destfile) + (begin + (debug:print-info 2 *default-log-port* "destfile " destfile " exists") + (file-modification-time destfile)) + (begin + (debug:print-info 0 *default-log-port* "Sync - I do not see file " destfile) + 0))) + (changed ( < (- time2 time1) 6.0)) ;; dest db not updated within last 6 seconds + + (do-cp (cond + ((not (file-exists? destfile)) ;; shouldn't happen, but this might recover + (debug:print-info 2 *default-log-port* "File " destfile " not found. Copying "srcfile" to "destfile) + ;; TODO: Need to fix this for WAL mod. Can't just copy. + (system (conc "/bin/mkdir -p " dest-directory)) + (system (conc "/bin/cp " srcfile " " destfile)) + #t) + (changed ;; (and changed + #t) + ((and changed *time-to-exit*) ;; last sync + #t) + (else + #f)))) + + (if (or dejunk do-cp) + (let* ((start-time (current-milliseconds)) + (subdb (or (dbfile:get-subdb dbstruct run-id) (dbfile:init-subdb dbstruct run-id dbfile:db-init-proc))) + (dbdat (or (dbfile:get-dbdat dbstruct run-id) (dbfile:open-db dbstruct run-id dbfile:db-init-proc))) + (mtdb (dbr:subdb-mtdbdat subdb)) + ;; + ;; BUG: -mrw- I think this next line is wrong. run-id should be the path to .mtdb/.db + ;; + (tmpdb (dbfile:open-db dbstruct run-id dbfile:db-init-proc))) + + (if dejunk + (begin + (debug:print 0 *default-log-port* "Cleaning tmp DB") + (db:clean-up run-id tmpdb) + (debug:print 0 *default-log-port* "Cleaning nfs DB") + (db:clean-up run-id mtdb) + ) + ) + (debug:print-info 2 *default-log-port* "delta syncing file: " srcfile ", time diff: " (- time1 time2) " seconds") + (if old2new + (begin + (db:sync-tables (db:sync-all-tables-list + (db:get-keys dbstruct)) + #f mtdb tmpdb)) + (begin + (db:sync-tables (db:sync-all-tables-list (db:get-keys dbstruct)) #f tmpdb mtdb))) + (hash-table-set! sync-durations (conc srcfile ".db") (- (current-milliseconds) start-time))) + (debug:print-info 2 *default-log-port* "skipping delta sync. " srcfile " is up to date")))) + dbfiles)) + data-synced)) + +;; Sync all changed db's +;; +(define (db:tmp->megatest.db-sync dbstruct run-id last-update) + (let* ((subdbs (hash-table-values (dbr:dbstruct-subdbs dbstruct))) + (res '())) + (for-each + (lambda (subdb) + (let* ((mtdb (dbr:subdb-mtdbdat subdb)) + (tmpdb (db:get-subdb dbstruct run-id)) + (refndb (dbr:subdb-refndb subdb)) + (newres (db:sync-tables (db:sync-all-tables-list (db:get-keys dbstruct)) last-update tmpdb refndb mtdb))) + ;; (stack-push! (dbr:subdb-dbstack subdb) tmpdb) + ;; BUG: verify this is really needed + (dbfile:add-dbdat dbstruct run-id tmpdb) + (set! res (cons newres res)))) + subdbs) + res)) + +;;;; run-ids +;; if #f use *db-local-sync* : or 'local-sync-flags +;; if #t use timestamps : or 'timestamps +;; +;; NB// no-sync-db is the db handle, not a flag! +;; +(define (db:sync-to-megatest.db dbstruct #!key (no-sync-db #f)) + (let* ((start-time (current-seconds)) + (last-full-update (if no-sync-db + (db:no-sync-get/default no-sync-db "LAST_FULL_UPDATE" 0) + 0)) + (full-sync-needed (> (- start-time last-full-update) 3600)) ;; every hour do a full sync + (last-update (if full-sync-needed + 0 + (if no-sync-db + (db:no-sync-get/default no-sync-db "LAST_UPDATE" 0) + 0))) ;; (or (db:get-var dbstruct "LAST_UPDATE") 0)) + (sync-needed (> (- start-time last-update) 6)) + (res (if (or sync-needed ;; don't sync if a sync already occurred in the past 6 seconds + full-sync-needed) + (begin + (if no-sync-db + (begin + (if full-sync-needed (db:no-sync-set no-sync-db "LAST_FULL_UPDATE" start-time)) + (db:no-sync-set no-sync-db "LAST_UPDATE" start-time))) + (db:tmp->megatest.db-sync dbstruct last-update)) + 0)) + (sync-time (- (current-seconds) start-time))) + (debug:print-info 3 *default-log-port* "Sync of newdb to olddb completed in " sync-time " seconds pid="(current-process-id)) + (if (common:low-noise-print 30 "sync new to old") + (if sync-needed + (debug:print-info 0 *default-log-port* "Sync of " res " records from newdb to olddb completed in " sync-time " seconds pid="(current-process-id)) + (debug:print-info 0 *default-log-port* "No sync needed, last updated " (- start-time last-update) " seconds ago"))) + res)) + + +(define (db:initialize-main-db db #!key (launch-setup #f)) + (when (not *configinfo*) + (if launch-setup + (launch-setup) ;; added because Elena was getting stack dump because *configinfo* below was #f. + (assert #f "db:initialize-main-db called and needs launch:setup but was not given it"))) + (let* ((configdat (car *configinfo*)) ;; tut tut, global warning... + (keys (keys:config-get-fields configdat)) + (havekeys (> (length keys) 0)) + (keystr (keys->keystr keys)) + (fieldstr (keys:make-key/field-string configdat)) + #;(db (dbr:dbdat-dbh dbdat))) + (for-each (lambda (key) + (let ((keyn key)) + (if (member (string-downcase keyn) + (list "runname" "state" "status" "owner" "event_time" "comment" "fail_count" + "pass_count" "contour")) + (begin + (debug:print 0 *default-log-port* "ERROR: your key cannot be named " keyn " as this conflicts with the same named field in the runs table, you must remove your megatest.db and /.db before trying again.") + (exit 1))))) + keys) + (sqlite3:with-transaction + db + (lambda () + ;; handle-exceptions + ;; exn + ;; (begin + ;; (debug:print 0 "ERROR: Failed to create tables. Look at your [fields] section, should be: fieldname TEXT DEFAULT 'yourdefault'") + ;; (exit)) + (sqlite3:execute db "CREATE TABLE IF NOT EXISTS keys (id INTEGER PRIMARY KEY, fieldname TEXT, fieldtype TEXT, CONSTRAINT keyconstraint UNIQUE (fieldname));") + (for-each + (lambda (key) + (let* ((fieldname #f) + (fieldtype #f)) + (sqlite3:for-each-row + (lambda (fn ft) + (set! fieldname fn) + (set! fieldtype ft)) + db + "SELECT fieldname,fieldtype FROM keys WHERE fieldname=?" key) + (if (not fieldname) + (sqlite3:execute db "INSERT INTO keys (fieldname,fieldtype) VALUES (?,?);" key "TEXT")))) + keys) + (sqlite3:execute db (conc + "CREATE TABLE IF NOT EXISTS runs (id INTEGER PRIMARY KEY, \n " + fieldstr (if havekeys "," "") " + runname TEXT DEFAULT 'norun', + contour TEXT DEFAULT '', + state TEXT DEFAULT '', + status TEXT DEFAULT '', + owner TEXT DEFAULT '', + event_time TIMESTAMP DEFAULT (strftime('%s','now')), + comment TEXT DEFAULT '', + fail_count INTEGER DEFAULT 0, + pass_count INTEGER DEFAULT 0, + last_update INTEGER DEFAULT (strftime('%s','now')), + CONSTRAINT runsconstraint UNIQUE (runname" (if havekeys "," "") keystr "));")) + ;; All triggers created at once in end + ;;(sqlite3:execute db "CREATE TRIGGER IF NOT EXISTS update_runs_trigger AFTER UPDATE ON runs + ;; FOR EACH ROW + ;; BEGIN + ;; UPDATE runs SET last_update=(strftime('%s','now')) + ;; WHERE id=old.id; + ;; END;") + (sqlite3:execute db "CREATE TABLE IF NOT EXISTS run_stats ( + id INTEGER PRIMARY KEY, + run_id INTEGER, + state TEXT, + status TEXT, + count INTEGER, + last_update INTEGER DEFAULT (strftime('%s','now')))") + ;; All triggers created at once in end + ;; (sqlite3:execute db "CREATE TRIGGER IF NOT EXISTS update_run_stats_trigger AFTER UPDATE ON run_stats + ;; FOR EACH ROW + ;; BEGIN + ;; UPDATE run_stats SET last_update=(strftime('%s','now')) + ;; WHERE id=old.id; + ;; END;") + (sqlite3:execute db "CREATE TABLE IF NOT EXISTS test_meta ( + id INTEGER PRIMARY KEY, + testname TEXT DEFAULT '', + author TEXT DEFAULT '', + owner TEXT DEFAULT '', + description TEXT DEFAULT '', + reviewed TIMESTAMP, + iterated TEXT DEFAULT '', + avg_runtime REAL, + avg_disk REAL, + tags TEXT DEFAULT '', + jobgroup TEXT DEFAULT 'default', + CONSTRAINT test_meta_constraint UNIQUE (testname));") + (sqlite3:execute db "CREATE TABLE IF NOT EXISTS tasks_queue (id INTEGER PRIMARY KEY, + action TEXT DEFAULT '', + owner TEXT, + state TEXT DEFAULT 'new', + target TEXT DEFAULT '', + name TEXT DEFAULT '', + testpatt TEXT DEFAULT '', + keylock TEXT, + params TEXT, + creation_time TIMESTAMP DEFAULT (strftime('%s','now')), + execution_time TIMESTAMP);") + ;; archive disk areas, cached info from [archive-disks] + (sqlite3:execute db "CREATE TABLE IF NOT EXISTS archive_disks ( + id INTEGER PRIMARY KEY, + archive_area_name TEXT, + disk_path TEXT, + last_df INTEGER DEFAULT -1, + last_df_time TIMESTAMP DEFAULT (strftime('%s','now')), + creation_time TIMESTAMP DEFAULT (strftime('%s','now')));") + ;; individual bup (or tar) data chunks + (sqlite3:execute db "CREATE TABLE IF NOT EXISTS archive_blocks ( + id INTEGER PRIMARY KEY, + archive_disk_id INTEGER, + disk_path TEXT, + last_du INTEGER DEFAULT -1, + last_du_time TIMESTAMP DEFAULT (strftime('%s','now')), + creation_time TIMESTAMP DEFAULT (strftime('%s','now')));") + ;; tests allocated to what chunks. reusing a chunk for a test/item_path is very efficient + ;; NB// the per run/test recording of where the archive is stored is done in the test + ;; record. + (sqlite3:execute db "CREATE TABLE IF NOT EXISTS archive_allocations ( + id INTEGER PRIMARY KEY, + archive_block_id INTEGER, + testname TEXT, + item_path TEXT, + creation_time TIMESTAMP DEFAULT (strftime('%s','now')));") + ;; move this clean up call somewhere else + (sqlite3:execute db "DELETE FROM tasks_queue WHERE state='done' AND creation_time < ?;" (- (current-seconds)(* 24 60 60))) ;; remove older than 24 hrs + (sqlite3:execute db (conc "CREATE INDEX IF NOT EXISTS runs_index ON runs (runname" (if havekeys "," "") keystr ");")) + ;; (sqlite3:execute db "CREATE VIEW runs_tests AS SELECT * FROM runs INNER JOIN tests ON runs.id=tests.run_id;") + (sqlite3:execute db "CREATE TABLE IF NOT EXISTS extradat (id INTEGER PRIMARY KEY, run_id INTEGER, key TEXT, val TEXT);") + (sqlite3:execute db "CREATE TABLE IF NOT EXISTS metadat (id INTEGER PRIMARY KEY, var TEXT, val TEXT, + CONSTRAINT metadat_constraint UNIQUE (var));") + (sqlite3:execute db "CREATE TABLE IF NOT EXISTS access_log (id INTEGER PRIMARY KEY, user TEXT, accessed TIMESTAMP, args TEXT);") + ;; Must do this *after* running patch db !! No more. + ;; cannot use db:set-var since it will deadlock, hardwire the code here + (let* ((prev-version #f) + (curr-version (common:version-signature))) + (sqlite3:for-each-row + (lambda (ver) + (set! prev-version ver)) + db + "SELECT val FROM metadat WHERE var='MEGATEST_VERSION';") + (if prev-version + (if (not (equal? prev-version curr-version)) + (sqlite3:execute db "UPDATE metadat SET val=? WHERE var=?;" curr-version "MEGATEST_VERSION")) + (sqlite3:execute db "INSERT OR REPLACE INTO metadat (var,val) VALUES (?,?);" "MEGATEST_VERSION" curr-version) )) + (debug:print-info 11 *default-log-port* "db:initialize END") ;; )))) + + ;;====================================================================== + ;; R U N S P E C I F I C D B + ;;====================================================================== + + ;; (define (db:initialize-run-id-db db) + ;; (sqlite3:with-transaction + ;; db + ;; (lambda () + (sqlite3:execute db "CREATE TABLE IF NOT EXISTS tests + (id INTEGER PRIMARY KEY, + run_id INTEGER DEFAULT -1, + testname TEXT DEFAULT 'noname', + host TEXT DEFAULT 'n/a', + cpuload REAL DEFAULT -1, + diskfree INTEGER DEFAULT -1, + uname TEXT DEFAULT 'n/a', + rundir TEXT DEFAULT '/tmp/badname', + shortdir TEXT DEFAULT '/tmp/badname', + item_path TEXT DEFAULT '', + state TEXT DEFAULT 'NOT_STARTED', + status TEXT DEFAULT 'FAIL', + attemptnum INTEGER DEFAULT 0, + final_logf TEXT DEFAULT 'logs/final.log', + logdat TEXT DEFAULT '', + run_duration INTEGER DEFAULT 0, + comment TEXT DEFAULT '', + event_time TIMESTAMP DEFAULT (strftime('%s','now')), + fail_count INTEGER DEFAULT 0, + pass_count INTEGER DEFAULT 0, + archived INTEGER DEFAULT 0, -- 0=no, > 1=archive block id where test data can be found + last_update INTEGER DEFAULT (strftime('%s','now')), + CONSTRAINT testsconstraint UNIQUE (run_id, testname, item_path));") + ;; deprecated -- (sqlite3:execute db "CREATE INDEX IF NOT EXISTS tests_index ON tests (run_id, testname, item_path, uname);") + + (sqlite3:execute db "CREATE INDEX IF NOT EXISTS tests_run_id_index ON tests (run_id);") ;; new + (sqlite3:execute db "CREATE INDEX IF NOT EXISTS tests_testname_index ON tests (testname,item_path);") ;; new + (sqlite3:execute db "CREATE INDEX IF NOT EXISTS tests_state_status_index ON tests (state, status); ") ;; new + + ;; All triggers created at once in end + ;;(sqlite3:execute db "CREATE TRIGGER IF NOT EXISTS update_tests_trigger AFTER UPDATE ON tests + ;; FOR EACH ROW + ;; BEGIN + ;; UPDATE tests SET last_update=(strftime('%s','now')) + ;; WHERE id=old.id; + ;; END;") + (sqlite3:execute db "CREATE TABLE IF NOT EXISTS test_steps + (id INTEGER PRIMARY KEY, + test_id INTEGER, + stepname TEXT, + state TEXT DEFAULT 'NOT_STARTED', + status TEXT DEFAULT 'n/a', + event_time TIMESTAMP, + comment TEXT DEFAULT '', + logfile TEXT DEFAULT '', + last_update INTEGER DEFAULT (strftime('%s','now')), + CONSTRAINT test_steps_constraint UNIQUE (test_id,stepname,state));") + (sqlite3:execute db "CREATE INDEX IF NOT EXISTS teststeps_index ON test_steps (test_id, stepname, state);") + ;; All triggers created at once in end + ;;(sqlite3:execute db "CREATE TRIGGER IF NOT EXISTS update_teststeps_trigger AFTER UPDATE ON test_steps + ;; FOR EACH ROW + ;; BEGIN + ;; UPDATE test_steps SET last_update=(strftime('%s','now')) + ;; WHERE id=old.id; + ;; END;") + (sqlite3:execute db "CREATE TABLE IF NOT EXISTS test_data (id INTEGER PRIMARY KEY, + test_id INTEGER, + category TEXT DEFAULT '', + variable TEXT, + value REAL, + expected REAL, + tol REAL, + units TEXT, + comment TEXT DEFAULT '', + status TEXT DEFAULT 'n/a', + type TEXT DEFAULT '', + last_update INTEGER DEFAULT (strftime('%s','now')), + CONSTRAINT test_data_constraint UNIQUE (test_id,category,variable));") + (sqlite3:execute db "CREATE INDEX IF NOT EXISTS test_data_index ON test_data (test_id);") + ;; All triggers created at once in end + ;;(sqlite3:execute db "CREATE TRIGGER IF NOT EXISTS update_test_data_trigger AFTER UPDATE ON test_data + ;; FOR EACH ROW + ;; BEGIN + ;; UPDATE test_data SET last_update=(strftime('%s','now')) + ;; WHERE id=old.id; + ;; END;") + (sqlite3:execute db "CREATE TABLE IF NOT EXISTS test_rundat ( + id INTEGER PRIMARY KEY, + test_id INTEGER, + update_time TIMESTAMP, + cpuload INTEGER DEFAULT -1, + diskfree INTEGER DEFAULT -1, + diskusage INTGER DEFAULT -1, + run_duration INTEGER DEFAULT 0, + last_update INTEGER DEFAULT (strftime('%s','now')));") + (sqlite3:execute db "CREATE TABLE IF NOT EXISTS archives ( + id INTEGER PRIMARY KEY, + test_id INTEGER, + state TEXT DEFAULT 'new', + status TEXT DEFAULT 'n/a', + archive_type TEXT DEFAULT 'bup', + du INTEGER, + archive_path TEXT, + last_update INTEGER DEFAULT (strftime('%s','now')));"))) + (db:create-triggers db) + db)) ;; ) + +;;====================================================================== +;; A R C H I V E S +;;====================================================================== + +;; dneeded is minimum space needed, scan for existing archives that +;; are on disks with adequate space and already have this test/itempath +;; archived +;; +(define (db:archive-get-allocations dbstruct testname itempath dneeded) + (let* ((dbdat (db:get-subdb dbstruct #f)) ;; archive tables are in main.db + (db (dbr:dbdat-dbh dbdat)) + (res '()) + (blocks '())) ;; a block is an archive chunck that can be added too if there is space + (sqlite3:for-each-row + (lambda (id archive-disk-id disk-path last-du last-du-time) + (set! res (cons (vector id archive-disk-id disk-path last-du last-du-time) res))) + db + "SELECT b.id,b.archive_disk_id,b.disk_path,b.last_du,b.last_du_time FROM archive_blocks AS b + INNER JOIN archive_allocations AS a ON a.archive_block_id=b.id + WHERE a.testname=? AND a.item_path=?;" + testname itempath) + ;; Now res has list of candidate paths, look in archive_disks for candidate with potential free space + (if (null? res) + '() + (sqlite3:for-each-row + (lambda (id archive-area-name disk-path last-df last-df-time) + (set! blocks (cons (vector id archive-area-name disk-path last-df last-df-time) blocks))) + db + (conc + "SELECT d.id,d.archive_area_name,disk_path,last_df,last_df_time FROM archive_disks AS d + INNER JOIN archive_blocks AS b ON d.id=b.archive_disk_id + WHERE b.id IN (" (string-intersperse (map conc res) ",") ") AND + last_df > ?;") + dneeded)) + ;; BUG: Verfify this is really needed + (dbfile:add-dbdat dbstruct #f dbdat) + blocks)) + +;; returns id of the record, register a disk allocated to archiving and record it's last known +;; available space +;; +(define (db:archive-register-disk dbstruct bdisk-name bdisk-path df) + (let* ((dbdat (db:get-subdb dbstruct #f)) ;; archive tables are in main.db + (db (dbr:dbdat-dbh dbdat)) + (res #f)) + (sqlite3:for-each-row + (lambda (id) + (set! res id)) + db + "SELECT id FROM archive_disks WHERE archive_area_name=? AND disk_path=?;" + bdisk-name bdisk-path) + (if res ;; record exists, update df and return id + (begin + (sqlite3:execute db "UPDATE archive_disks SET last_df=?,last_df_time=(strftime('%s','now')) + WHERE archive_area_name=? AND disk_path=?;" + df bdisk-name bdisk-path) + (dbfile:add-dbdat dbstruct #f dbdat) + res) + (begin + (sqlite3:execute + db + "INSERT OR REPLACE INTO archive_disks (archive_area_name,disk_path,last_df) + VALUES (?,?,?);" + bdisk-name bdisk-path df) + (dbfile:add-dbdat dbstruct #f dbdat) + (db:archive-register-disk dbstruct bdisk-name bdisk-path df))))) + +;; record an archive path created on a given archive disk (identified by it's bdisk-id) +;; if path starts with / then it is full, otherwise it is relative to the archive disk +;; preference is to store the relative path. +;; +(define (db:archive-register-block-name dbstruct bdisk-id archive-path #!key (du #f)) + (let* ((dbdat (db:get-subdb dbstruct #f)) ;; archive tables are in main.db + (db (dbr:dbdat-dbh dbdat)) + (res #f)) + ;; first look to see if this path is already registered + (sqlite3:for-each-row + (lambda (id) + (set! res id)) + db + "SELECT id FROM archive_blocks WHERE archive_disk_id=? AND disk_path=?;" + bdisk-id archive-path) + (if res ;; record exists, update du if applicable and return res + (if du (sqlite3:execute db "UPDATE archive_blocks SET last_du=?,last_du_time=(strftime('%s','now')) + WHERE archive_disk_id=? AND disk_path=?;" + bdisk-id archive-path du)) + (begin + (sqlite3:execute db "INSERT OR REPLACE INTO archive_blocks (archive_disk_id,disk_path,last_du) + VALUES (?,?,?);" + bdisk-id archive-path (or du 0)) + (set! res (db:archive-register-block-name dbstruct bdisk-id archive-path du: du)))) + (dbfile:add-dbdat dbstruct #f dbdat) + res)) + + +;; The "archived" field in tests is overloaded; 0 = not archived, > 0 archived in block with given id +;; +(define (db:test-set-archive-block-id dbstruct run-id test-id archive-block-id) + (db:with-db + dbstruct + run-id + #t + (lambda (dbdat db) + (sqlite3:execute db "UPDATE tests SET archived=? WHERE id=?;" + archive-block-id test-id)))) + +;; Look up the archive block info given a block-id +;; +(define (db:test-get-archive-block-info dbstruct archive-block-id) + (db:with-db + dbstruct + #f + #f + (lambda (dbdat db) + (let ((res #f)) + (sqlite3:for-each-row + ;; 0 1 2 3 4 5 + (lambda (id archive-disk-id disk-path last-du last-du-time creation-time) + (set! res (vector id archive-disk-id disk-path last-du last-du-time creation-time))) + db + "SELECT id,archive_disk_id,disk_path,last_du,last_du_time,creation_time FROM archive_blocks WHERE id=?;" + archive-block-id) + res)))) + +;; (define (db:archive-allocate-testsuite/area-to-block block-id testsuite-name areakey) +;; (let* ((dbdat (db:get-subdb dbstruct #f)) ;; archive tables are in main.db +;; (db (dbr:dbdat-dbh dbdat)) +;; (res '()) +;; (blocks '())) ;; a block is an archive chunck that can be added too if there is space +;; (sqlite3:for-each-row #f) + +;;====================================================================== +;; D B U T I L S +;;====================================================================== + +;;====================================================================== +;; M A I N T E N A N C E +;;====================================================================== + +;; (define (db:have-incompletes? dbstruct run-id ovr-deadtime) +;; (let* ((incompleted '()) +;; (oldlaunched '()) +;; (toplevels '()) +;; (deadtime-str (configf:lookup *configdat* "setup" "deadtime")) +;; (deadtime (if (and deadtime-str +;; (string->number deadtime-str)) +;; (string->number deadtime-str) +;; 72000))) ;; twenty hours +;; (db:with-db +;; dbstruct run-id #f +;; (lambda (dbdat db) +;; (if (number? ovr-deadtime)(set! deadtime ovr-deadtime)) +;; +;; ;; in RUNNING or REMOTEHOSTSTART for more than 10 minutes +;; ;; +;; ;; HOWEVER: this code in run:test seems to work fine +;; ;; (> (- (current-seconds)(+ (db:test-get-event_time testdat) +;; ;; (db:test-get-run_duration testdat))) +;; ;; 600) +;; ;; (db:delay-if-busy dbdat) +;; (sqlite3:for-each-row +;; (lambda (test-id run-dir uname testname item-path) +;; (if (and (equal? uname "n/a") +;; (equal? item-path "")) ;; this is a toplevel test +;; ;; what to do with toplevel? call rollup? +;; (begin +;; (set! toplevels (cons (list test-id run-dir uname testname item-path run-id) toplevels)) +;; (debug:print-info 0 *default-log-port* "Found old toplevel test in RUNNING state, test-id=" test-id)) +;; (set! incompleted (cons (list test-id run-dir uname testname item-path run-id) incompleted)))) +;; (db:get-cache-stmth dbdat db +;; "SELECT id,rundir,uname,testname,item_path FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > (run_duration + ?) AND state IN ('RUNNING','REMOTEHOSTSTART');") +;; run-id deadtime) +;; +;; ;; in LAUNCHED for more than one day. Could be long due to job queues TODO/BUG: Need override for this in config +;; ;; +;; ;; (db:delay-if-busy dbdat) +;; (sqlite3:for-each-row +;; (lambda (test-id run-dir uname testname item-path) +;; (if (and (equal? uname "n/a") +;; (equal? item-path "")) ;; this is a toplevel test +;; ;; what to do with toplevel? call rollup? +;; (set! toplevels (cons (list test-id run-dir uname testname item-path run-id) toplevels)) +;; (set! oldlaunched (cons (list test-id run-dir uname testname item-path run-id) oldlaunched)))) +;; (db:get-cache-stmth dbdat db +;; "SELECT id,rundir,uname,testname,item_path FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > 86400 AND state IN ('LAUNCHED');") +;; run-id) +;; +;; (debug:print-info 18 *default-log-port* "Found " (length oldlaunched) " old LAUNCHED items, " (length toplevels) " old LAUNCHED toplevel tests and " (length incompleted) " tests marked RUNNING but apparently dead.") +;; (if (and (null? incompleted) +;; (null? oldlaunched) +;; (null? toplevels)) +;; #f +;; #t))))) + +;; BUG: Probably broken - does not explicitly use run-id in the query +;; +(define (db:top-test-set-per-pf-counts dbstruct run-id test-name) + (db:general-call dbstruct run-id 'top-test-set-per-pf-counts (list test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name))) + +;; Clean out old junk and vacuum the database +;; +;; Ultimately do something like this: +;; +;; 1. Look at test records either deleted or part of deleted run: +;; a. If test dir exists, set the the test to state='UNKNOWN', Set the run to 'unknown' +;; b. If test dir gone, delete the test record +;; 2. Look at run records +;; a. If have tests that are not deleted, set state='unknown' +;; b. .... +;; +(define (db:clean-up run-id dbdat) + (if run-id + (begin + (debug:print 0 *default-log-port* "Cleaning run DB " run-id) + (db:clean-up-rundb dbdat run-id) + ) + (begin + (debug:print 0 *default-log-port* "Cleaning main DB ") + (db:clean-up-maindb dbdat) + ) + ) +) + + +;; Clean out old junk and vacuum the database +;; +;; Ultimately do something like this: +;; +;; 1. Look at test records either deleted or part of deleted run: +;; a. If test dir exists, set the the test to state='UNKNOWN', Set the run to 'unknown' +;; b. If test dir gone, delete the test record +;; 2. Look at run records +;; a. If have tests that are not deleted, set state='unknown' +;; b. .... +;; +(define (db:clean-up-rundb dbdat run-id) + ;; (debug:print 0 *default-log-port* "WARNING: db clean up not fully ported to v1.60, cleanup action will be on megatest.db") + (let* ((db (dbr:dbdat-dbh dbdat)) + (test-count-stmt (sqlite3:prepare db "SELECT (SELECT count(id) FROM tests);")) + (step-count-stmt (sqlite3:prepare db "SELECT (SELECT count(id) FROM test_steps);")) + (statements + (map (lambda (stmt) + (sqlite3:prepare db stmt)) + (list + "DELETE FROM tests WHERE state='DELETED';" + "DELETE FROM test_steps WHERE status = 'DELETED';" + "DELETE FROM tests WHERE run_id IN (SELECT id FROM runs WHERE state = 'deleted');" + )))) + (sqlite3:with-transaction + db + (lambda () + (sqlite3:for-each-row (lambda (tot) + (debug:print-info 0 *default-log-port* "Test records count before clean: " tot)) + test-count-stmt) + (sqlite3:for-each-row (lambda (tot) + (debug:print-info 0 *default-log-port* "Test_step records count before clean: " tot)) + step-count-stmt) + (map sqlite3:execute statements) + (sqlite3:for-each-row (lambda (tot) + (debug:print-info 0 *default-log-port* "Test records count after clean: " tot)) + test-count-stmt) + (sqlite3:for-each-row (lambda (tot) + (debug:print-info 0 *default-log-port* "Test_step records count after clean: " tot)) + step-count-stmt))) + (map sqlite3:finalize! statements) + (sqlite3:finalize! test-count-stmt) + (sqlite3:finalize! step-count-stmt) + (sqlite3:execute db "VACUUM;"))) + +;; Clean out old junk and vacuum the database +;; +;; Ultimately do something like this: +;; +;; 1. Look at test records either deleted or part of deleted run: +;; a. If test dir exists, set the the test to state='UNKNOWN', Set the run to 'unknown' +;; b. If test dir gone, delete the test record +;; 2. Look at run records +;; a. If have tests that are not deleted, set state='unknown' +;; b. .... +;; +(define (db:clean-up-maindb dbdat) + ;; (debug:print 0 *default-log-port* "WARNING: db clean up not fully ported to v1.60, cleanup action will be on megatest.db") + (let* ((db (dbr:dbdat-dbh dbdat)) + (count-stmt (sqlite3:prepare db "SELECT (SELECT count(id) FROM runs);")) + (statements + (map (lambda (stmt) + (sqlite3:prepare db stmt)) + (list + ;; delete all tests that belong to runs that are 'deleted' + ;; (conc "DELETE FROM tests WHERE run_id NOT IN (" (string-intersperse (map conc valid-runs) ",") ");") + ;; delete all tests that are 'DELETED' + "DELETE FROM runs WHERE state='deleted';" + ))) + (dead-runs '())) + (sqlite3:for-each-row + (lambda (run-id) + (set! dead-runs (cons run-id dead-runs))) + db + "SELECT id FROM runs WHERE state='deleted';") + ;; (db:delay-if-busy dbdat) + (sqlite3:with-transaction + db + (lambda () + (sqlite3:for-each-row (lambda (tot) + (debug:print-info 0 *default-log-port* "Run records count before clean: " tot)) + count-stmt) + (map sqlite3:execute statements) + (sqlite3:for-each-row (lambda (tot) + (debug:print-info 0 *default-log-port* "Run records count after clean: " tot)) + count-stmt))) + (map sqlite3:finalize! statements) + (sqlite3:finalize! count-stmt) + ;; (db:find-and-mark-incomplete db) + ;; (db:delay-if-busy dbdat) + (sqlite3:execute db "VACUUM;") + dead-runs)) + +;;====================================================================== +;; no-sync.db - small bits of data to be shared between servers +;;====================================================================== + +(define (db:get-dbsync-path) + (case (rmt:transport-mode) + ((http)(common:make-tmpdir-name *toppath* "")) + ((tcp) (conc *toppath*"/.mtdb")) + ((nfs) (conc *toppath*"/.mtdb")) + (else "/tmp/dunno-this-gonna-exist"))) + +;; This is needed for api.scm +(define (db:open-no-sync-db) + (dbfile:open-no-sync-db (db:get-dbsync-path))) + +;; why get the keys from the db? why not get from the *configdat* +;; using keys:config-get-fields? + +(define (db:get-keys dbstruct) + (keys:config-get-fields *configdat*)) + +;; extract index number given a header/data structure +(define (db:get-index-by-header header field) + (list-index (lambda (x)(equal? x field)) header)) + +;; look up values in a header/data structure +(define (db:get-value-by-header row header field) + (let ((len (if (vector? row) + (vector-length row) + 0))) + (if (or (null? header) (not row)) + #f + (let loop ((hed (car header)) + (tal (cdr header)) + (n 0)) + (if (equal? hed field) + (handle-exceptions + exn + (begin + (debug:print 4 *default-log-port* "WARNING: attempt to read non-existant field, row=" + row " header=" header " field=" field ", exn=" exn) + #f) + (if (>= n len) + #f + (vector-ref row n))) + (if (null? tal) #f (loop (car tal)(cdr tal)(+ n 1)))))))) + +;; Accessors for the header/data structure +;; get rows and header from +(define (db:get-header vec)(vector-ref vec 0)) +(define (db:get-rows vec)(vector-ref vec 1)) + +;;====================================================================== +;; R U N S +;;====================================================================== + +(define (db:get-run-times dbstruct run-patt target-patt) +(let ((res `()) + (qry (conc "select runname, (max(end_time)-min(event_time))/60 as runtime, target from (select runname, run_id,tests.event_time,tests.event_time+run_duration AS end_time, " (string-join (db:get-keys dbstruct) " || '/' || ") " as target from tests inner join runs on tests.run_id = runs.id where runs.runname like ? and target like ?) group by run_id ;"))) + ;(print qry) + (db:with-db + dbstruct + #f ;; this is for the main runs db + #f ;; does not modify db + (lambda (dbdat db) + (sqlite3:for-each-row + (lambda (runname runtime target ) + (set! res (cons (vector runname runtime target) res))) + db + qry + run-patt target-patt) + res)))) + +(define (db:get-run-name-from-id dbstruct run-id) + (db:with-db + dbstruct + #f ;; this is for the main runs db + #f ;; does not modify db + (lambda (dbdat db) + (let ((res #f)) + (sqlite3:for-each-row + (lambda (runname) + (set! res runname)) + db + "SELECT runname FROM runs WHERE id=?;" + run-id) + res)))) + +(define (db:get-run-key-val dbstruct run-id key) + (db:with-db + dbstruct + #f + #f + (lambda (dbdat db) + (let ((res #f)) + (sqlite3:for-each-row + (lambda (val) + (set! res val)) + db + (conc "SELECT " key " FROM runs WHERE id=?;") + run-id) + res)))) + + +;; register a test run with the db, this accesses the main.db and does NOT +;; use server api +;; +(define (db:register-run dbstruct keyvals runname state status user contour-in) + (let* ((keys (map car keyvals)) + (keystr (keys->keystr keys)) + (contour (or contour-in "")) ;; empty string to force no hierarcy and be backwards compatible. + (comma (if (> (length keys) 0) "," "")) + (andstr (if (> (length keys) 0) " AND " "")) + (valslots (keys->valslots keys)) ;; ?,?,? ... + (allvals (append (list runname state status user contour) (map cadr keyvals))) + (qryvals (append (list runname) (map cadr keyvals))) + (key=?str (string-intersperse (map (lambda (k)(conc k "=?")) keys) " AND "))) + ;; (debug:print 0 *default-log-port* "Got here 0.") + (debug:print 3 *default-log-port* "keys: " keys " allvals: " allvals " keyvals: " keyvals " key=?str is " key=?str) + (debug:print 2 *default-log-port* "NOTE: using target " (string-intersperse (map cadr keyvals) "/") " for this run") + (if (and runname (null? (filter (lambda (x)(not x)) keyvals))) ;; there must be a better way to "apply and" + (db:with-db + dbstruct #f #t + (lambda (dbdat db) + ;; (debug:print 0 *default-log-port* "Got here 1.") + (let ((res #f)) + (apply sqlite3:execute db + (conc "INSERT OR IGNORE INTO runs (runname,state,status,owner,event_time,contour" + comma keystr ") VALUES (?,?,?,?,strftime('%s','now'),?" comma valslots ");") + allvals) + (apply sqlite3:for-each-row + (lambda (id) + (set! res id)) + db + (let ((qry (conc "SELECT id FROM runs WHERE (runname=? " andstr key=?str ");"))) + qry) + qryvals) + (sqlite3:execute db "UPDATE runs SET state=?,status=?,event_time=strftime('%s','now') WHERE id=? AND state='deleted';" state status res) + res))) + (begin + (debug:print-error 0 *default-log-port* "Called without all necessary keys") + #f)))) + +(define (db:get-run-id dbstruct runname target) + (let ((runs (db:simple-get-runs dbstruct runname #f #f target #f))) ;; runpatt count offset target last-update + (if (null? runs) + #f + (simple-run-id (car runs))))) + +;; called with run-id=#f so will operate on main.db +;; +(define (db:insert-run dbstruct run-id target runname run-meta) + (let* ((keys (db:get-keys dbstruct)) + (runs (db:simple-get-runs dbstruct runname #f #f target #f))) ;; runpatt count offset target last-update + ;; need to insert run based on target and runname + (let* ((targvals (string-split target "/")) + (keystr (string-intersperse keys ",")) + (key?str (string-intersperse (make-list (length targvals) "?") ",")) + (qrystr (conc "INSERT INTO runs (id,runname,"keystr") VALUES (?,?,"key?str")")) + (get-var (lambda (db qrystr) + (let* ((res #f)) + (sqlite3:for-each-row + (lambda row + (set! res (car row))) + db qrystr run-id runname) + res)))) + (if (null? runs) + (begin + (db:create-initial-run-record dbstruct run-id runname target) + ) + ) + (let* () + ;;(debug:print 0 *default-log-port* "db:insert-run: Calling db:with-db to update the run record") + (debug:print 0 *default-log-port* "db:insert-run: runid = " run-id) + run-id)))) + +(define (db:create-initial-run-record dbstruct run-id runname target) + (let* ((keys (db:get-keys dbstruct)) + (targvals (string-split target "/")) + (keystr (string-intersperse keys ",")) + (key?str (string-intersperse (make-list (length targvals) "?") ",")) ;; a string with the same length as targvals, where each element is "?" and interspersed with commas. + (qrystr (conc "INSERT INTO runs (id,runname,"keystr") VALUES (?,?,"key?str")"))) + (debug:print 0 *default-log-port* "db:create-initial-run-record") + (debug:print 0 *default-log-port* "qrystr = " qrystr) + + (db:with-db + dbstruct #f #t ;; run-id writable + (lambda (dbdat db) + (debug:print 0 *default-log-port* "lambda proc: dbdat: " dbdat " db: " db) + (apply sqlite3:execute db qrystr run-id runname targvals))))) + +(define (db:insert-test dbstruct run-id test-rec) + (let* ((testname (alist-ref "testname" test-rec equal?)) + (item-path (alist-ref "item_path" test-rec equal?)) + (id (db:get-test-id dbstruct run-id testname item-path)) + (fieldvals (filter (lambda (x)(not (member (car x) '("id" "last_update")))) test-rec)) + (setqry (conc "UPDATE tests SET "(string-intersperse + (map (lambda (dat) + (conc (car dat)"=?")) + fieldvals) + ",")" WHERE id=?;")) + (insqry (conc "INSERT INTO tests ("(string-intersperse (map (lambda (x) (car x)) fieldvals) ",") + ") VALUES ("(string-intersperse (make-list (length fieldvals) "?") ",")");"))) + ;; (debug:print 0 *default-log-port* "id: "id"\nset: "setqry"\ninsqry: "insqry) + (db:with-db + dbstruct + run-id #t + (lambda (dbdat db) + (if id + (apply sqlite3:execute db setqry (append (map cdr fieldvals) (list id))) + (apply sqlite3:execute db insqry (map cdr fieldvals))))))) + +;; replace header and keystr with a call to runs:get-std-run-fields +;; +;; keypatts: ( (KEY1 "abc%def")(KEY2 "%") ) +;; runpatts: patt1,patt2 ... +;; +(define (db:get-runs dbstruct runpatt count offset keypatts) + (let* ((res '()) + (keys (db:get-keys dbstruct)) + (runpattstr (db:patt->like "runname" runpatt)) + (remfields (list "id" "runname" "state" "status" "owner" "event_time")) + (header (append keys remfields)) + (keystr (conc (keys->keystr keys) "," + (string-intersperse remfields ","))) + (qrystr (conc "SELECT " keystr " FROM runs WHERE (" runpattstr ") " ;; runname LIKE ? " + ;; Generate: " AND x LIKE 'keypatt' ..." + (if (null? keypatts) "" + (conc " AND " + (string-join + (map (lambda (keypatt) + (let ((key (car keypatt)) + (patt (cadr keypatt))) + (db:patt->like key patt))) + keypatts) + " AND "))) + " AND state != 'deleted' ORDER BY event_time DESC " + (if (number? count) + (conc " LIMIT " count) + "") + (if (number? offset) + (conc " OFFSET " offset) + "")))) + (debug:print-info 11 *default-log-port* "db:get-runs START qrystr: " qrystr " keypatts: " keypatts " offset: " offset " limit: " count) + (db:with-db dbstruct #f #f + (lambda (dbdat db) + (sqlite3:for-each-row + (lambda (a . x) + (set! res (cons (apply vector a x) res))) + db + qrystr + ))) + (debug:print-info 11 *default-log-port* "db:get-runs END qrystr: " qrystr " keypatts: " keypatts " offset: " offset " limit: " count) + (vector header res))) + +;; simple get-runs +;; +;; records used defined in dbfile +;; +(define (db:simple-get-runs dbstruct runpatt count offset target last-update) + (let* ((res '()) + (keys (db:get-keys dbstruct)) + (runpattstr (db:patt->like "runname" runpatt)) + (remfields (list "id" "runname" "state" "status" "owner" "event_time")) + (targstr (string-intersperse keys "||'/'||")) + (keystr (conc targstr " AS target," + (string-intersperse remfields ","))) + (qrystr (conc "SELECT " keystr " FROM runs WHERE (" runpattstr ") " ;; runname LIKE ? " + ;; Generate: " AND x LIKE 'keypatt' ..." + " AND target LIKE '" target "'" + " AND state != 'deleted' " + (if (number? last-update) + (conc " AND last_update >= " last-update) + "") + " ORDER BY event_time DESC " + (if (number? count) + (conc " LIMIT " count) + "") + (if (number? offset) + (conc " OFFSET " offset) + ""))) + ) + (debug:print-info 11 *default-log-port* "db:get-runs START qrystr: " qrystr " target: " target " offset: " offset " limit: " count) + (db:with-db dbstruct #f #f + (lambda (dbdat db) + (sqlite3:for-each-row + (lambda (target id runname state status owner event_time) + (set! res (cons (make-simple-run target id runname state status owner event_time) res))) + db + qrystr + ))) + (debug:print-info 11 *default-log-port* "db:get-runs END qrystr: " qrystr " target: " target " offset: " offset " limit: " count) + res)) + +;; TODO: Switch this to use max(update_time) from each run db? Then if using a server there is no disk traffic (using cachedb db) ??? +;; +;; NOTE: This DOESN'T (necessarily) get the real run ids, but the number of the .db!! + +(define (db:get-changed-run-ids since-time) + (let* ((dbdir (common:make-tmpdir-name *toppath* "")) ;; (configf:lookup *configdat* "setup" "dbdir")) + (alldbs (glob (conc *toppath* "/.mtdb/[0-9]*.db*"))) + (changed (filter (lambda (dbfile) + (> (file-modification-time dbfile) since-time)) + alldbs))) + (delete-duplicates + (map (lambda (dbfile) + (let* ((res (string-match ".*\\/(\\d+)\\.db*" dbfile))) + (if res + (string->number (cadr res)) + (begin + (debug:print 2 *default-log-port* "WARNING: Failed to process " dbfile " for run-id") + 0)))) + changed)))) + +;; Get all targets from the db +;; +(define (db:get-targets dbstruct) + (let* ((res '()) + (keys (db:get-keys dbstruct)) + (header keys) ;; (map key:get-fieldname keys)) + (keystr (keys->keystr keys)) + (qrystr (conc "SELECT " keystr " FROM runs WHERE state != 'deleted';")) + (seen (make-hash-table))) + (db:with-db + dbstruct + #f + #f + (lambda (dbdat db) + (sqlite3:for-each-row + (lambda (a . x) + (let ((targ (cons a x))) + (if (not (hash-table-ref/default seen targ #f)) + (begin + (hash-table-set! seen targ #t) + (set! res (cons (apply vector targ) res)))))) + db + qrystr) + (debug:print-info 11 *default-log-port* "db:get-targets END qrystr: " qrystr ) + (vector header res))))) + +;; just get count of runs +(define (db:get-num-runs dbstruct runpatt) + (db:with-db + dbstruct + #f + #f + (lambda (dbdat db) + (let ((numruns 0)) + (debug:print-info 11 *default-log-port* "db:get-num-runs START " runpatt) + (sqlite3:for-each-row + (lambda (count) + (set! numruns count)) + db + "SELECT COUNT(id) FROM runs WHERE runname LIKE ? AND state != 'deleted';" runpatt) + (debug:print-info 11 *default-log-port* "db:get-num-runs END " runpatt) + numruns)))) + +;; just get count of runs +(define (db:get-runs-cnt-by-patt dbstruct runpatt targetpatt keys) + (db:with-db + dbstruct + #f + #f + (lambda (dbdat db) + (let ((numruns 0) + (qry-str #f) + (key-patt "") + (keyvals (if targetpatt (keys:target->keyval keys targetpatt) '()))) + + (for-each (lambda (keyval) + (let* ((key (car keyval)) + (patt (cadr keyval)) + (fulkey (conc ":" key)) + (wildtype (if (substring-index "%" patt) "like" "glob"))) + + (if patt + (set! key-patt (conc key-patt " AND " key " " wildtype " '" patt "'")) + (begin + (debug:print-error 0 *default-log-port* "searching for runs with no pattern set for " fulkey) + (exit 6))))) + keyvals) + ;(print runpatt " -- " key-patt) + (set! qry-str (conc "SELECT COUNT(id) FROM runs WHERE state != 'deleted' AND runname like '" runpatt "'" key-patt)) + ;(print qry-str ) + + (sqlite3:for-each-row + (lambda (count) + (set! numruns count)) + db + qry-str) + (debug:print-info 11 *default-log-port* "db:get-num-runs END " runpatt) + numruns)))) + + +;; (sqlite3#fold-row proc3670 init3671 db-or-stmt3672 . params3673)> +;; +(define (db:get-raw-run-stats dbstruct run-id) + (db:with-db + dbstruct + run-id + #f + (lambda (dbdat db) + (sqlite3:fold-row + (lambda (res state status count) + (cons (list state status count) res)) + '() + db + "SELECT state,status,count(id) AS count FROM tests WHERE run_id=? AND NOT(uname='n/a' AND item_path='') GROUP BY state,status;" + run-id)))) + +;; Update run_stats for given run_id +;; input data is a list (state status count) +;; +(define (db:update-run-stats dbstruct run-id stats) + (mutex-lock! *db-transaction-mutex*) + (db:with-db + dbstruct + #f + #t + (lambda (dbdat db) + ;; remove previous data + + (let* ((stmt1 (sqlite3:prepare db "DELETE FROM run_stats WHERE run_id=? AND state=? AND status=?;")) + (stmt2 (sqlite3:prepare db "INSERT INTO run_stats (run_id,state,status,count) VALUES (?,?,?,?);")) + (res + (sqlite3:with-transaction + db + (lambda () + (for-each + (lambda (dat) + (sqlite3:execute stmt1 run-id (car dat)(cadr dat)) + (apply sqlite3:execute stmt2 run-id dat)) + stats))))) + (sqlite3:finalize! stmt1) + (sqlite3:finalize! stmt2) + (mutex-unlock! *db-transaction-mutex*) + res)))) + +(define (db:get-main-run-stats dbstruct run-id) + (db:with-db + dbstruct + #f ;; this data comes from main + #f + (lambda (dbdat db) + (sqlite3:fold-row + (lambda (res state status count) + (cons (list state status count) res)) + '() + db + "SELECT state,status,count FROM run_stats WHERE run_id=? AND run_id IN (SELECT id FROM runs WHERE state NOT IN ('DELETED','deleted'));" + run-id)))) + +(define (db:print-current-query-stats) + ;; generate stats from *db-api-call-time* + (let ((ordered-keys (sort (hash-table-keys *db-api-call-time*) + (lambda (a b) + (let ((sum-a (common:sum (hash-table-ref *db-api-call-time* a))) + (sum-b (common:sum (hash-table-ref *db-api-call-time* b)))) + (> sum-a sum-b))))) + (total 0)) + (for-each + (lambda (cmd-key) + (let* ((dat (hash-table-ref *db-api-call-time* cmd-key)) + (num (length dat)) + (avg (if (> num 0) + (/ (common:sum dat)(length dat))))) + (set! total (+ total num)) + (debug:print-info 0 *default-log-port* cmd-key "\tavg: " avg " max: " (common:max dat) " min: " (common:min-max < dat) " num: " (length dat)))) + ordered-keys) + (debug:print-info 0 *default-log-port* "TOTAL: " total " api calls since start."))) + +(define (db:get-all-run-ids dbstruct) + (db:with-db + dbstruct + #f + #f + (lambda (dbdat db) + (let ((run-ids '())) + (sqlite3:for-each-row + (lambda (run-id) + (set! run-ids (cons run-id run-ids))) + db + "SELECT id FROM runs WHERE state != 'deleted' ORDER BY event_time DESC;") + (reverse run-ids))))) + +;; get some basic run stats +;; +;; data structure: +;; +;; ( (runname (( state count ) ... )) +;; ( ... +;; +(define (db:get-run-stats dbstruct) + (let* ((totals (make-hash-table)) + (curr (make-hash-table)) + (res '()) + (runs-info '())) + ;; First get all the runname/run-ids + (db:with-db + dbstruct #f #f + (lambda (dbdat db) + (sqlite3:for-each-row + (lambda (run-id runname) + (set! runs-info (cons (list run-id runname) runs-info))) + db + "SELECT id,runname FROM runs WHERE state != 'deleted' ORDER BY event_time DESC;"))) ;; If you change this to the more logical ASC please adjust calls to db:get-run-stats + ;; for each run get stats data + (for-each + (lambda (run-info) + ;; get the net state/status counts for this run + (let* ((run-id (car run-info)) + (run-name (cadr run-info))) + (db:with-db + dbstruct + run-id + #f + (lambda (dbdat db) + (sqlite3:for-each-row + (lambda (state status count) + (let ((netstate (if (equal? state "COMPLETED") status state))) + (if (string? netstate) + (begin + (hash-table-set! totals netstate (+ (hash-table-ref/default totals netstate 0) count)) + (hash-table-set! curr netstate (+ (hash-table-ref/default curr netstate 0) count)))))) + db + "SELECT state,status,count(id) FROM tests AS t WHERE run_id=? GROUP BY state,status ORDER BY state,status DESC;" + run-id) + ;; add the per run counts to res + (for-each (lambda (state) + (set! res (cons (list run-name state (hash-table-ref curr state)) res))) + (sort (hash-table-keys curr) string>=)) + (set! curr (make-hash-table)))))) + runs-info) + (for-each (lambda (state) + (set! res (cons (list "Totals" state (hash-table-ref totals state)) res))) + (sort (hash-table-keys totals) string>=)) + res)) + +(define (mt:get-run-stats dbstruct run-id) +;; Get run stats from local access, move this ... but where? + (db:get-run-stats dbstruct run-id)) + +;; db:get-runs-by-patt +;; get runs by list of criteria +;; register a test run with the db +;; +;; Use: (db:get-value-by-header (db:get-header runinfo)(db:get-rows runinfo)) +;; to extract info from the structure returned +;; +(define (db:get-runs-by-patt dbstruct keys runnamepatt targpatt offset limit fields last-update sort-order ) ;; test-name) + (let* ((tmp (runs:get-std-run-fields keys (or fields '("id" "runname" "state" "status" "owner" "event_time")))) + (keystr (car tmp)) + (header (cadr tmp)) + (key-patt "") + (runwildtype (if (substring-index "%" runnamepatt) "like" "glob")) + (qry-str #f) + (keyvals (if targpatt (keys:target->keyval keys targpatt) '()))) + (for-each (lambda (keyval) + (let* ((key (car keyval)) + (patt (cadr keyval)) + (fulkey (conc ":" key)) + (wildtype (if (substring-index "%" patt) "like" "glob"))) + (if patt + (set! key-patt (conc key-patt " AND " key " " wildtype " '" patt "'")) + (begin + (debug:print-error 0 *default-log-port* "searching for runs with no pattern set for " fulkey) + (exit 6))))) + keyvals) + (set! qry-str (conc "SELECT " keystr " FROM runs WHERE state != 'deleted' AND runname " runwildtype " ? " key-patt + (if last-update + (conc " AND last_update >= " last-update " ") + " ") + " ORDER BY event_time " sort-order " " + (if limit (conc " LIMIT " limit) "") + (if offset (conc " OFFSET " offset) "") + ";")) + (debug:print-info 4 *default-log-port* "runs:get-runs-by-patt qry=" qry-str " " runnamepatt) + ;(print "runs:get-runs-by-patt qry=" qry-str " " runnamepatt) + + (vector header + (reverse + (db:with-db + dbstruct #f #f ;; reads db, does not write to it. + (lambda (dbdat db) + (sqlite3:fold-row + (lambda (res . r) + (cons (list->vector r) res)) + '() + db + qry-str + runnamepatt))))))) + +;; use (get-value-by-header (db:get-header runinfo)(db:get-rows runinfo)) +;; NOTE: Does NOT return a list of rows (or one row) for the first slot of the vector +;; this is inconsistent with get-runs but it makes some sense. +;; +(define (db:get-run-info dbstruct run-id) + ;;(if (hash-table-ref/default *run-info-cache* run-id #f) + ;; (hash-table-ref *run-info-cache* run-id) + (let* ((res (vector #f #f #f #f)) + (keys (db:get-keys dbstruct)) + (remfields (list "id" "runname" "state" "status" "owner" "event_time" "comment" "fail_count" "pass_count" "contour" "last_update")) ;; "area_id")) + (header (append keys remfields)) + (keystr (conc (keys->keystr keys) "," + (string-intersperse remfields ",")))) + (debug:print-info 11 *default-log-port* "db:get-run-info run-id: " run-id " header: " header " keystr: " keystr) + + (db:with-db + dbstruct #f #f + (lambda (dbdat db) + (sqlite3:for-each-row + (lambda (a . x) + (set! res (apply vector a x))) + db + (conc "SELECT " keystr " FROM runs WHERE id=?;") + run-id))) + (debug:print-info 11 *default-log-port* "db:get-run-info run-id: " run-id " header: " header " keystr: " keystr) + (let ((finalres (vector header res))) + ;; (hash-table-set! *run-info-cache* run-id finalres) + finalres))) + +(define (db:set-comment-for-run dbstruct run-id comment) + (db:with-db + dbstruct #f #f + (lambda (dbdat db) + (sqlite3:execute db "UPDATE runs SET comment=? WHERE id=?;" comment ;; (sdb:qry 'getid comment) + run-id)))) + +;; does not (obviously!) removed dependent data. But why not!!? +(define (db:delete-run dbstruct run-id) + (db:with-db + dbstruct #f #t + (lambda (dbdat db) + (sqlite3:with-transaction + db + (lambda () + (sqlite3:execute db "DELETE FROM test_steps WHERE test_id IN (SELECT id FROM tests WHERE run_id=?);" run-id) + (sqlite3:execute db "DELETE FROM test_data WHERE test_id IN (SELECT id FROM tests WHERE run_id=?);" run-id) + (sqlite3:execute db "UPDATE tests SET state='DELETED',comment='' WHERE run_id=?;" run-id) + ;; (db:delay-if-busy dbdat) + (sqlite3:execute db "UPDATE runs SET state='deleted',comment='' WHERE id=?;" run-id)))))) + +(define (db:update-run-event_time dbstruct run-id) + (db:with-db + dbstruct #f #t + (lambda (dbdat db) + (sqlite3:execute db "UPDATE runs SET event_time=strftime('%s','now') WHERE id=?;" run-id)))) + +(define (db:lock/unlock-run dbstruct run-id lock unlock user) + (db:with-db + dbstruct #f #t + (lambda (dbdat db) + (let ((newlockval (if lock "locked" + (if unlock + "unlocked" + "locked")))) ;; semi-failsafe + (sqlite3:execute db "UPDATE runs SET state=? WHERE id=?;" newlockval run-id) + (sqlite3:execute db "INSERT INTO access_log (user,accessed,args) VALUES(?,strftime('%s','now'),?);" + user (conc newlockval " " run-id)) + (debug:print-info 1 *default-log-port* "" newlockval " run number " run-id))))) + +(define (db:set-run-status dbstruct run-id status msg) + (db:with-db + dbstruct #f #t + (lambda (dbdat db) + (if msg + (sqlite3:execute db "UPDATE runs SET status=?,comment=? WHERE id=?;" status msg run-id) + (sqlite3:execute db "UPDATE runs SET status=? WHERE id=?;" status run-id))))) + +(define (db:set-run-state-status-db dbdat db run-id state status ) + (sqlite3:execute + (db:get-cache-stmth + dbdat db "UPDATE runs SET status=?,state=? WHERE id=?;") status state run-id)) + +(define (db:set-run-state-status dbstruct run-id state status ) + (db:with-db + dbstruct #f #t + (lambda (dbdat db) + (db:set-run-state-status-db dbdat db run-id state status)))) + +(define (db:get-run-status dbstruct run-id) + (let ((res "n/a")) + (db:with-db + dbstruct #f #f + (lambda (dbdat db) + (sqlite3:for-each-row + (lambda (status) + (set! res status)) + (db:get-cache-stmth + dbdat db + "SELECT status FROM runs WHERE id=?;" ) + run-id) + res)))) + +(define (db:get-run-state dbstruct run-id) + (let ((res "n/a")) + (db:with-db + dbstruct #f #f + (lambda (dbdat db) + (sqlite3:for-each-row + (lambda (status) + (set! res status)) + (db:get-cache-stmth + dbdat db + "SELECT state FROM runs WHERE id=?;" ) + run-id) + res)))) + +(define (db:get-run-state-status dbstruct run-id) + (let ((res (cons "n/a" "n/a"))) + (db:with-db + dbstruct #f #f + (lambda (dbdat db) + (sqlite3:for-each-row + (lambda (state status) + (set! res (cons state status))) + (db:get-cache-stmth + dbdat db + "SELECT state,status FROM runs WHERE id=?;" ) + run-id) + res)))) + + +;;====================================================================== +;; K E Y S +;;====================================================================== + +;; get key val pairs for a given run-id +;; ( (FIELDNAME1 keyval1) (FIELDNAME2 keyval2) ... ) +(define (db:get-key-val-pairs dbstruct run-id) + (let* ((keys (db:get-keys dbstruct)) + (res '())) + (db:with-db + dbstruct #f #f + (lambda (dbdat db) + (for-each + (lambda (key) + (let ((qry (conc "SELECT " key " FROM runs WHERE id=?;"))) + (sqlite3:for-each-row + (lambda (key-val) + (set! res (cons (list key (if (string? key-val) key-val "")) res))) ;; replace non-string bad values with empty string to prevent crashes. This scenario can happen when Megatest is killed on updating the db + db qry run-id))) + keys))) + (reverse res))) + +;; get key vals for a given run-id +(define (db:get-key-vals dbstruct run-id) + (let* ((keys (db:get-keys dbstruct)) + (res '())) + (db:with-db + dbstruct #f #f + (lambda (dbdat db) + (for-each + (lambda (key) + (let ((qry (conc "SELECT " key " FROM runs WHERE id=?;"))) + ;; (db:delay-if-busy dbdat) + (sqlite3:for-each-row + (lambda (key-val) + (set! res (cons (if (string? key-val) key-val "") res))) ;; check that the key-val is a string for cases where a crash injected bad data in the megatest.db + db qry run-id))) + keys))) + (let ((final-res (reverse res))) + (hash-table-set! *keyvals* run-id final-res) + final-res))) + +;; The target is keyval1/keyval2..., cached in *target* as it is used often +(define (db:get-target dbstruct run-id) + (let* ((keyvals (db:get-key-vals dbstruct run-id)) + (thekey (string-intersperse (map (lambda (x)(if x x "-na-")) keyvals) "/"))) + thekey)) + +;; Get run-ids for runs with same target but different runnames and NOT run-id +;; +(define (db:get-prev-run-ids dbstruct run-id) + (let* ((keyvals (db:get-key-val-pairs dbstruct run-id)) + (kvalues (map cadr keyvals)) + (keys (db:get-keys dbstruct)) + (qrystr (string-intersperse (map (lambda (x)(conc x "=?")) keys) " AND "))) + (let ((prev-run-ids '())) + (if (null? keyvals) + '() + (begin + (db:with-db dbstruct #f #f ;; #f means work with the zeroth db - i.e. the runs db + (lambda (dbdat db) + (apply sqlite3:for-each-row + (lambda (id) + (set! prev-run-ids (cons id prev-run-ids))) + db + (conc "SELECT id FROM runs WHERE " qrystr " AND state != 'deleted' AND id != ?;") + (append kvalues (list run-id))))) + prev-run-ids))))) + +;;====================================================================== +;; T E S T S +;;====================================================================== + +;; states and statuses are lists, turn them into ("PASS","FAIL"...) and use NOT IN +;; i.e. these lists define what to NOT show. +;; states and statuses are required to be lists, empty is ok +;; not-in #t = above behaviour, #f = must match +;; mode: +;; 'dashboard - use state = 'COMPLETED' AND status in ( statuses ) OR state in ( states ) +;; +(define (db:get-tests-for-run dbstruct run-id testpatt states statuses offset limit not-in sort-by sort-order qryvals last-update mode) + (let* ((qryvalstr (case qryvals + ((shortlist) "id,run_id,testname,item_path,state,status") + ((#f) db:test-record-qry-selector) ;; "id,run_id,testname,state,status,event_time,host,cpuload,diskfree,uname,rundir,item_path,run_duration,final_logf,comment") + (else qryvals))) + (res '()) + ;; if states or statuses are null then assume match all when not-in is false + (states-qry (if (null? states) + #f + (conc " state " + (if (eq? mode 'dashboard) + " IN ('" + (if not-in + " NOT IN ('" + " IN ('")) + (string-intersperse states "','") + "')"))) + (statuses-qry (if (null? statuses) + #f + (conc " status " + (if (eq? mode 'dashboard) + " IN ('" + (if not-in + " NOT IN ('" + " IN ('") ) + (string-intersperse statuses "','") + "')"))) + (interim-qry (conc " AND " (if not-in "NOT " "") "( state='COMPLETED' " (if statuses-qry (conc " AND " statuses-qry " ) ") " ) ") + (if states-qry + (conc (if not-in " AND " " OR ") states-qry ) ;; " ) ") + ""))) + (states-statuses-qry + (cond + ((and states-qry statuses-qry) + (case mode + ((dashboard) + (if not-in + (conc " AND (state='COMPLETED' AND status NOT IN ('" (string-intersperse statuses "','") "')) " + " OR (state != 'COMPLETED' AND state NOT IN ('" (string-intersperse states "','") "')) ") + (conc " AND (state='COMPLETED' AND status IN ('" (string-intersperse statuses "','") "')) " + " OR (state NOT IN ('COMPLETED','DELETED') AND state IN ('" (string-intersperse states "','") "')) "))) + (else (conc " AND ( " states-qry " AND " statuses-qry " ) ")))) + (states-qry + (case mode + ((dashboard) (conc " AND " (if not-in "NOT " "") " state IN ('" (string-intersperse states "','") "') ")) ;; interim-qry) + (else (conc " AND " states-qry)))) + (statuses-qry + (case mode + ((dashboard) (conc " AND " (if not-in "NOT " "") " status IN ('" (string-intersperse statuses "','") "') ")) ;; interim-qry) + (else (conc " AND " statuses-qry)))) + (else ""))) + (tests-match-qry (tests:match->sqlqry testpatt)) + (qry (conc "SELECT " qryvalstr + (if run-id + " FROM tests WHERE run_id=? " + " FROM tests WHERE ? > 0 ") ;; should work? + (if last-update " " " AND state != 'DELETED' ") ;; if using last-update we want deleted tests? + states-statuses-qry + (if tests-match-qry (conc " AND (" tests-match-qry ") ") "") + (if last-update (conc " AND last_update >= " last-update " ") "") + (case sort-by + ((rundir) " ORDER BY length(rundir) ") + ((testname) (conc " ORDER BY testname " (if sort-order (conc sort-order ",") "") " item_path ")) + ((statestatus) (conc " ORDER BY state " (if sort-order (conc sort-order ",") "") " status ")) + ((event_time) " ORDER BY event_time ") + (else (if (string? sort-by) + (conc " ORDER BY " sort-by " ") + " "))) + (if sort-order sort-order " ") + (if limit (conc " LIMIT " limit) " ") + (if offset (conc " OFFSET " offset) " ") + ";" + ))) + (debug:print-info 8 *default-log-port* "db:get-tests-for-run run-id=" run-id ", qry=" qry) + (let* ((res (db:with-db dbstruct run-id #f + (lambda (dbdat db) + ;; (let* ((stmth (db:get-cache-stmth dbstruct db qry))) ;; due to use of last-update we can't efficiently cache this query + (reverse + (sqlite3:fold-row + (lambda (res . row) + ;; id run-id testname state status event-time host cpuload + ;; diskfree uname rundir item-path run-duration final-logf comment) + (cons (list->vector row) res)) + '() + db qry ;; stmth + (or run-id 1) ;; 1 > 0 , for the case where we are seeking tests matching criteral for all runs + )))))) + (case qryvals + ((shortlist)(map db:test-short-record->norm res)) + ((#f) res) + (else res))))) + +(define (db:test-short-record->norm inrec) + ;; "id,run_id,testname,item_path,state,status" + ;; "id,run_id,testname,state,status,event_time,host,cpuload,diskfree,uname,rundir,item_path,run_duration,final_logf,comment + (vector (vector-ref inrec 0) ;; id + (vector-ref inrec 1) ;; run_id + (vector-ref inrec 2) ;; testname + (vector-ref inrec 4) ;; state + (vector-ref inrec 5) ;; status + -1 "" -1 -1 "" "-" + (vector-ref inrec 3) ;; item-path + -1 "-" "-")) + +;; +;; 1. cache tests-match-qry +;; 2. compile qry and store in hash +;; 3. convert for-each-row to fold +;; +;; (define (db:get-tests-for-run-state-status dbstruct run-id testpatt) +;; (db:with-db +;; dbstruct run-id #f +;; (lambda (dbdat db) +;; (let* ((res '()) +;; (stmt-cache (dbr:dbstruct-stmt-cache dbstruct)) +;; (stmth (let* ((sh (db:hoh-get stmt-cache db testpatt))) +;; (or sh +;; (let* ((tests-match-qry (tests:match->sqlqry testpatt)) +;; (qry (conc "SELECT id,testname,item_path,state,status FROM tests WHERE run_id=? " +;; (if tests-match-qry (conc " AND (" tests-match-qry ") ") ""))) +;; (newsh (sqlite3:prepare db qry))) +;; (debug:print-info 8 *default-log-port* "db:get-tests-for-run qry=" qry) +;; (db:hoh-set! stmt-cache db testpatt newsh) +;; newsh))))) +;; (reverse +;; (sqlite3:fold-row +;; (lambda (res id testname item-path state status) +;; ;; id,run_id,testname,state,status,event_time,host,cpuload,diskfree,uname,rundir,item_path,run_duration,final_logf,comment +;; (cons (vector id run-id testname state status -1 "" -1 -1 "" "-" item-path -1 "-" "-") res)) +;; '() +;; stmth +;; run-id)))))) + +(define (db:get-tests-for-run-state-status dbstruct run-id testpatt #!optional (last-update 0)) + (let* ((res '()) + (tests-match-qry (tests:match->sqlqry testpatt)) + (qry (conc "SELECT id,testname,item_path,state,status,event_time,run_duration FROM tests WHERE run_id=? " + " AND last_update > ? " + (if tests-match-qry (conc " AND (" tests-match-qry ") ") "") + ))) + (debug:print-info 8 *default-log-port* "db:get-tests-for-run qry=" qry) + (db:with-db dbstruct run-id #f + (lambda (dbdat db) + (sqlite3:fold-row + (lambda (res id testname item-path state status event-time run-duration) + ;; id,run_id,testname,state,status,event_time,host,cpuload,diskfree,uname,rundir,item_path,run_duration,final_logf,comment + (cons (vector id run-id testname state status event-time "" -1 -1 "" "-" item-path run-duration "-" "-") res)) + '() + db + qry + run-id + (or last-update 0)))))) + +(define (db:get-testinfo-state-status dbstruct run-id test-id) + (db:with-db + dbstruct run-id #f + (lambda (dbdat db) + (let* ((res #f) + (stmth (db:get-cache-stmth dbdat db "SELECT run_id,testname,item_path,state,status FROM tests WHERE id=? and run_id=?;"))) + (sqlite3:for-each-row + (lambda (run-id testname item-path state status) + ;; id,run_id,testname,state,status,event_time,host,cpuload,diskfree,uname,rundir,item_path,run_duration,final_logf,comment + (set! res (vector test-id run-id testname state status -1 "" -1 -1 "" "-" item-path -1 "-" "-"))) + ;; db + ;; "SELECT run_id,testname,item_path,state,status FROM tests WHERE id=? and run_id=?;" + stmth + test-id run-id) + res)))) + +;; get a useful subset of the tests data (used in dashboard +;; use db:mintest-get-{id ,run_id,testname ...} +;; +(define (db:get-tests-for-run-mindata dbstruct run-id testpatt states statuses not-in) + (db:get-tests-for-run dbstruct run-id testpatt states statuses #f #f not-in #f #f "id,run_id,testname,state,status,event_time,item_path" 0 #f)) + +;; do not use. +;; +(define (db:get-tests-for-runs dbstruct run-ids testpatt states statuses #!key (not-in #f)(qryvals #f)) + ;; (db:delay-if-busy) + (let ((res '())) + (for-each + (lambda (run-id) + (set! res (append + res + (db:get-tests-for-run dbstruct run-id testpatt states statuses #f #f not-in #f #f qryvals #f 'normal)))) + (if run-ids + run-ids + (db:get-all-run-ids dbstruct))) + res)) + +;; Convert calling routines to get list of run-ids and loop, do not use the get-tests-for-runs +;; + +(define (db:delete-test-records dbstruct run-id test-id) + (db:general-call dbstruct run-id 'delete-test-step-records (list test-id)) + (db:general-call dbstruct run-id 'delete-test-data-records (list test-id)) + (db:with-db + dbstruct run-id #t + (lambda (dbdat db) + (sqlite3:execute db "UPDATE tests SET state='DELETED',status='n/a',comment='' WHERE id=?;" test-id)))) + +;; +(define (db:delete-old-deleted-test-records dbstruct run-id) + (let* ((targtime (- (current-seconds) + (or (configf:lookup-number *configdat* "setup" "keep-deleted-records") + (* 7 24 60 60)))) ;; cleanup if over one week old + (mtdbfile (dbmod:run-id->full-dbfname dbstruct run-id)) + (qry1 "DELETE FROM test_steps WHERE test_id IN (SELECT id FROM tests WHERE state='DELETED' AND event_timenumber fieldname fields) + (if (null? fields) + #f + (let loop ((hed (car fields)) + (tal (cdr fields)) + (indx 0)) + (if (equal? fieldname hed) + indx + (if (null? tal) + #f + (loop (car tal)(cdr tal)(+ indx 1))))))) + +(define db:test-record-qry-selector (string-intersperse db:test-record-fields ",")) + +(define (db:update-tesdata-on-repilcate-db dbstruct old-lt new-lt) + (db:with-db + dbstruct #f #t + (lambda (dbdat db) + (sqlite3:execute db "UPDATE tests SET rundir= replace(rundir,?,?), shortdir=replace(shortdir,?,?);" + old-lt new-lt old-lt new-lt)))) + +;; NOTE: Use db:test-get* to access records +;; NOTE: This needs rundir decoding? Decide, decode here or where used? For the moment decode where used. +(define (db:get-all-tests-info-by-run-id dbstruct run-id) + (let* ((res '())) + (db:with-db + dbstruct #f #f + (lambda (dbdat db) + (sqlite3:for-each-row + (lambda (id run-id testname state status event-time host cpuload diskfree uname rundir item-path run-duration final-logf comment shortdir attemptnum archived) + ;; 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 + (set! res (cons (vector id run-id testname state status event-time host cpuload diskfree uname rundir item-path run-duration final-logf comment shortdir attemptnum archived) + res))) + db + (conc "SELECT " db:test-record-qry-selector " FROM tests WHERE state != 'DELETED' AND run_id=?;") + run-id))) + res)) + +(define (db:replace-test-records dbstruct run-id testrecs) + (db:with-db dbstruct run-id #t + (lambda (dbdat db) + (let* ((qmarks (string-intersperse (make-list (length db:test-record-fields) "?") ",")) + (qrystr (conc "INSERT OR REPLACE INTO tests (" db:test-record-qry-selector ") VALUES (" qmarks ") WHERE run_id=?;")) + (qry (sqlite3:prepare db qrystr))) + (debug:print 0 *default-log-port* "INFO: migrating test records for run with id " run-id) + (sqlite3:with-transaction + db + (lambda () + (for-each + (lambda (rec) + ;; (debug:print 0 *default-log-port* "INFO: Inserting values: " (string-intersperse (map conc (vector->list rec)) ",") "\n") + (apply sqlite3:execute qry (append (vector->list rec)(list run-id)))) + testrecs))) + (sqlite3:finalize! qry))))) + +;; map a test-id into the proper range +;; +(define (db:adj-test-id mtdb min-test-id test-id) + (if (>= test-id min-test-id) + test-id + (let loop ((new-id min-test-id)) + (let ((test-id-found #f)) + (sqlite3:for-each-row + (lambda (id) + (set! test-id-found id)) + (dbr:dbdat-dbh mtdb) + "SELECT id FROM tests WHERE id=?;" + new-id) + ;; if test-id-found then need to try again + (if test-id-found + (loop (+ new-id 1)) + (begin + (debug:print-info 0 *default-log-port* "New test id " new-id " selected for test with id " test-id) + (sqlite3:execute mtdb "UPDATE tests SET id=? WHERE id=?;" new-id test-id))))))) + +;; move test ids into the 30k * run_id range +;; +(define (db:prep-megatest.db-adj-test-ids mtdb run-id testrecs) + (debug:print-info 0 *default-log-port* "Adjusting test ids in megatest.db for run " run-id) + (let ((min-test-id (* run-id 30000))) + (for-each + (lambda (testrec) + (let* ((test-id (vector-ref testrec (db:field->number "id" db:test-record-fields)))) + (db:adj-test-id (dbr:dbdat-dbh mtdb) min-test-id test-id))) + testrecs))) + +;; 1. move test ids into the 30k * run_id range +;; 2. move step ids into the 30k * run_id range +;; +(define (db:prep-megatest.db-for-migration mtdb) + (let* ((run-ids (db:get-all-run-ids mtdb))) + (for-each + (lambda (run-id) + (let ((testrecs (db:get-all-tests-info-by-run-id mtdb run-id))) + (db:prep-megatest.db-adj-test-ids (dbr:dbdat-dbh mtdb) run-id testrecs))) + run-ids))) + +;; Get test data using test_id +;; +(define (db:get-test-info-by-id dbstruct run-id test-id) + (db:with-db + dbstruct + run-id + #f + (lambda (dbdat db) + (let ((res #f)) + (sqlite3:for-each-row ;; attemptnum added to hold pid of top process (not Megatest) controlling a test + (lambda (id run-id testname state status event-time host cpuload diskfree uname rundir-id item-path run_duration final-logf-id comment short-dir-id attemptnum archived last-update) + ;; 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 + (set! res (vector id run-id testname state status event-time host cpuload diskfree uname rundir-id item-path run_duration final-logf-id comment short-dir-id attemptnum archived last-update))) + db + ;; (db:get-cache-stmth dbdat db + ;; (conc "SELECT " db:test-record-qry-selector " FROM tests WHERE id=? AND run_id=?;")) + (conc "SELECT " db:test-record-qry-selector " FROM tests WHERE id=? AND run_id=?;") + test-id run-id) + res)))) + +;; Get test state, status using test_id +;; +(define (db:get-test-state-status-by-id dbstruct run-id test-id) + (db:with-db + dbstruct + run-id + #f + (lambda (dbdat db) + (let ((res (cons #f #f)) + (stmth (db:get-cache-stmth dbdat db "SELECT state,status FROM tests WHERE id=? AND run_id=?;"))) + (sqlite3:for-each-row ;; attemptnum added to hold pid of top process (not Megatest) controlling a test + (lambda (state status) + (cons state status)) + ;; db + stmth ;;"SELECT state,status FROM tests WHERE id=? AND run_id=?;" ;; stmth try not compiling this one - yes, this fixed the bind issue + test-id run-id) + res)))) + +;; Use db:test-get* to access +;; Get test data using test_ids. NB// Only works within a single run!! +;; +(define (db:get-test-info-by-ids dbstruct run-id test-ids) + (db:with-db + dbstruct + run-id + #f + (lambda (dbdat db) + (let ((res '())) + (sqlite3:for-each-row + (lambda (a . b) + ;; 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 + (set! res (cons (apply vector a b) res))) + db + (conc "SELECT " db:test-record-qry-selector " FROM tests WHERE id in (" + (string-intersperse (map conc test-ids) ",") ");")) + res)))) + +;; try every second until tries times proc +;; +(define (db:keep-trying-until-true proc params tries) + (let* ((res (apply proc params))) + (if res + res + (if (> tries 0) + (begin + (thread-sleep! 1) + (db:keep-trying-until-true proc params (- tries 1))) + (begin + ;; (debug:print-info 0 *default-log-port* "proc never returned true, params="params) + (print"db:keep-trying-until-true proc never returned true, proc = " proc " params =" params " tries = " tries) + #f))))) + +(define (db:get-test-info dbstruct run-id test-name item-path) + (db:with-db + dbstruct + run-id + #f + (lambda (dbdat db) + (db:get-test-info-db db run-id test-name item-path)))) + +(define (db:get-test-info-db db run-id test-name item-path) + (let ((res #f)) + (sqlite3:for-each-row + (lambda (a . b) + (set! res (apply vector a b))) + db + (conc "SELECT " db:test-record-qry-selector " FROM tests WHERE testname=? AND item_path=? AND run_id=?;") + test-name item-path run-id) + res)) + +(define (db:test-get-rundir-from-test-id dbstruct run-id test-id) + (db:with-db + dbstruct + run-id + #f + (lambda (dbdat db) + (db:first-result-default + db + "SELECT rundir FROM tests WHERE id=? AND run_id=?;" + #f ;; default result + test-id run-id)))) + +(define (db:get-test-times dbstruct run-name target) + (let ((res `()) + (qry (conc "select testname, item_path, run_duration, " + (string-join (db:get-keys dbstruct) " || '/' || ") + " as target from tests inner join runs on tests.run_id = runs.id where runs.runname = ? and target = ? ;"))) + (db:with-db + dbstruct + #f ;; this is for the main runs db + #f ;; does not modify db + (lambda (dbdat db) + (sqlite3:for-each-row + (lambda (test-name item-path test-time target ) + (set! res (cons (vector test-name item-path test-time) res))) + db + qry + run-name target) + res)))) + +;;====================================================================== +;; S T E P S +;;====================================================================== + +(define (db:teststep-set-status! dbstruct run-id test-id teststep-name state-in status-in comment logfile) + (db:with-db + dbstruct + run-id + #t + (lambda (dbdat db) + (sqlite3:execute + db + "INSERT OR REPLACE into test_steps (test_id,stepname,state,status,event_time,comment,logfile) VALUES(?,?,?,?,?,?,?);" + test-id teststep-name state-in status-in (current-seconds) + (if comment comment "") + (if logfile logfile ""))))) + + + +(define (db:delete-steps-for-test! dbstruct run-id test-id) + ;; TODO: figure out why status is the key field rather than state (note: CONSTRAINT test_steps_constraint UNIQUE (test_id,stepname,state) ) + (db:with-db + dbstruct + run-id + #t + (lambda (dbdat db) + (sqlite3:execute + db + "UPDATE test_steps set status='DELETED' where test_id=?" ;; and run_id=? !! - run_id not in table (bummer) TODO: get run_id into schema for test_steps + test-id)))) + + +;; db-get-test-steps-for-run +(define (db:get-steps-for-test dbstruct run-id test-id) + (db:with-db + dbstruct + run-id + #f + (lambda (dbdat db) + (let* ((res '())) + (sqlite3:for-each-row + (lambda (id test-id stepname state status event-time logfile comment) + (set! res (cons (vector id test-id stepname state status event-time (if (string? logfile) logfile "") comment) res))) + db + "SELECT id,test_id,stepname,state,status,event_time,logfile,comment FROM test_steps WHERE status != 'DELETED' AND test_id=? ORDER BY id ASC;" ;; event_time DESC,id ASC; + test-id) + (reverse res))))) + + (define (db:get-steps-info-by-id dbstruct run-id test-step-id) + (db:with-db + dbstruct + run-id + #f + (lambda (dbdat db) + (let* ((res (vector #f #f #f #f #f #f #f #f #f))) + (sqlite3:for-each-row + (lambda (id test-id stepname state status event-time logfile comment last-update) + (set! res (vector id test-id stepname state status event-time (if (string? logfile) logfile "") comment last-update))) + db + "SELECT id,test_id,stepname,state,status,event_time,logfile,comment,last_update FROM test_steps WHERE id=? ORDER BY id ASC;" ;; event_time DESC,id ASC; + test-step-id) + res)))) + +(define (db:get-steps-data dbstruct run-id test-id) + (db:with-db + dbstruct + run-id + #f + (lambda (dbdat db) + (let ((res '())) + (sqlite3:for-each-row + (lambda (id test-id stepname state status event-time logfile) + (set! res (cons (vector id test-id stepname state status event-time (if (string? logfile) logfile "")) res))) + db + "SELECT id,test_id,stepname,state,status,event_time,logfile FROM test_steps WHERE status != 'DELETED' AND test_id=? ORDER BY id ASC;" ;; event_time DESC,id ASC; + test-id) + (reverse res))))) + +;;====================================================================== +;; T E S T D A T A +;;====================================================================== + +(define (db:get-data-info-by-id dbstruct run-id test-data-id) + (let* ((stmt "SELECT id,test_id, category, variable, value, expected, tol, units, comment, status, type, last_update FROM test_data WHERE id=? ORDER BY id ASC;")) ;; event_time DESC,id ASC; + (db:with-db + dbstruct + run-id + #f + (lambda (dbdat db) + (let* ((stmth (db:get-cache-stmth dbdat db stmt)) + (res (sqlite3:fold-row + (lambda (res id test-id category variable value expected tol units comment status type last-update) + (vector id test-id category variable value expected tol units comment status type last-update)) + (vector #f #f #f #f #f #f #f #f #f #f #f #f) + stmth + test-data-id))) + res))))) + +;; WARNING: Do NOT call this for the parent test on an iterated test +;; Roll up test_data pass/fail results +;; look at the test_data status field, +;; if all are pass (any case) and the test status is PASS or NULL or '' then set test status to PASS. +;; if one or more are fail (any case) then set test status to PASS, non "pass" or "fail" are ignored +(define (db:test-data-rollup dbstruct run-id test-id status) + (let* ((fail-count 0) + (pass-count 0)) + (db:with-db + dbstruct run-id #t + (lambda (dbdat db) + (sqlite3:for-each-row + (lambda (fcount pcount) + (set! fail-count fcount) + (set! pass-count pcount)) + db + "SELECT (SELECT count(id) FROM test_data WHERE test_id=? AND status like 'fail') AS fail_count, + (SELECT count(id) FROM test_data WHERE test_id=? AND status like 'pass') AS pass_count;" + test-id test-id) + ;; Now rollup the counts to the central megatest.db + (db:general-call dbstruct run-id 'pass-fail-counts (list pass-count fail-count test-id)) + ;; if the test is not FAIL then set status based on the fail and pass counts. + (db:general-call dbstruct run-id 'test_data-pf-rollup (list test-id test-id test-id test-id)))))) + +;; each section is a rule except "final" which is the final result +;; +;; [rule-5] +;; operator in +;; section LogFileBody +;; desc Output voltage +;; status OK +;; expected 1.9 +;; measured 1.8 +;; type +/- +;; tolerance 0.1 +;; pass 1 +;; fail 0 +;; +;; [final] +;; exit-code 6 +;; exit-status SKIP +;; message If flagged we are asking for this to exit with code 6 +;; +;; recorded in steps table: +;; category: stepname +;; variable: rule-N +;; value: measured +;; expected: expected +;; tol: tolerance +;; units: - +;; comment: desc or message +;; status: status +;; type: type +;; +(define (db:logpro-dat->csv dat stepname) + (let ((res '())) + (for-each + (lambda (entry-name) + (if (equal? entry-name "final") + (set! res (append + res + (list + (list stepname + entry-name + (configf:lookup dat entry-name "exit-code") ;; 0 ;; Value + 0 ;; 1 ;; Expected + 0 ;; 2 ;; Tolerance + "n/a" ;; 3 ;; Units + (configf:lookup dat entry-name "message") ;; 4 ;; Comment + (configf:lookup dat entry-name "exit-status") ;; 5 ;; Status + "logpro" ;; 6 ;; Type + )))) + (let* ((value (or (configf:lookup dat entry-name "measured") "n/a")) + (expected (or (configf:lookup dat entry-name "expected") 0.0)) + (tolerance (or (configf:lookup dat entry-name "tolerance") 0.0)) + (comment (or (configf:lookup dat entry-name "comment") + (configf:lookup dat entry-name "desc") "n/a")) + (status (or (configf:lookup dat entry-name "status") "n/a")) + (type (or (configf:lookup dat entry-name "expected") "n/a"))) + (set! res (append + res + (list (list stepname + entry-name + value ;; 0 + expected ;; 1 + tolerance ;; 2 + "n/a" ;; 3 Units + comment ;; 4 + status ;; 5 + type ;; 6 + ))))))) + (hash-table-keys dat)) + res)) + +;; $MT_MEGATEST -load-test-data << EOF +;; foo,bar, 1.2, 1.9, > +;; foo,rab, 1.0e9, 10e9, 1e9 +;; foo,bla, 1.2, 1.9, < +;; foo,bal, 1.2, 1.2, < , ,Check for overload +;; foo,alb, 1.2, 1.2, <= , Amps,This is the high power circuit test +;; foo,abl, 1.2, 1.3, 0.1 +;; foo,bra, 1.2, pass, silly stuff +;; faz,bar, 10, 8mA, , ,"this is a comment" +;; EOF + +(define (tdb:get-prev-tol-for-test tdb test-id category variable) + ;; Finish me? + (values #f #f #f)) + +(define (db:csv->test-data dbstruct run-id test-id csvdata) + (debug:print 4 *default-log-port* "test-id " test-id ", csvdata: " csvdata) + (db:with-db + dbstruct #f #t + (lambda (dbdat db) + (let* ((csvlist (csv->list (make-csv-reader + (open-input-string csvdata) + '((strip-leading-whitespace? #t) + (strip-trailing-whitespace? #t)))))) ;; (csv->list csvdata))) + (for-each + (lambda (csvrow) + (let* ((padded-row (take (append csvrow (list #f #f #f #f #f #f #f #f #f)) 9)) + (category (list-ref padded-row 0)) + (variable (list-ref padded-row 1)) + (value (any->number-if-possible (list-ref padded-row 2))) + (expected (any->number-if-possible (list-ref padded-row 3))) + (tol (any->number-if-possible (list-ref padded-row 4))) ;; >, <, >=, <=, or a number + (units (list-ref padded-row 5)) + (comment (list-ref padded-row 6)) + (status (let ((s (list-ref padded-row 7))) + (if (and (string? s)(or (string-match (regexp "^\\s*$") s) + (string-match (regexp "^n/a$") s))) + #f + s))) ;; if specified on the input then use, else calculate + (type (list-ref padded-row 8))) + ;; look up expected,tol,units from previous best fit test if they are all either #f or '' + (debug:print 4 *default-log-port* "BEFORE: category: " category " variable: " variable " value: " value + ", expected: " expected " tol: " tol " units: " units " status: " status " comment: " comment " type: " type) + + (if (and (or (not expected)(equal? expected "")) + (or (not tol) (equal? expected "")) + (or (not units) (equal? expected ""))) + (let-values (((new-expected new-tol new-units)(tdb:get-prev-tol-for-test #f test-id category variable))) + (set! expected new-expected) + (set! tol new-tol) + (set! units new-units))) + + (debug:print 4 *default-log-port* "AFTER: category: " category " variable: " variable " value: " value + ", expected: " expected " tol: " tol " units: " units " status: " status " comment: " comment) + ;; calculate status if NOT specified + (if (and (not status)(number? expected)(number? value)) ;; need expected and value to be numbers + (if (number? tol) ;; if tol is a number then we do the standard comparison + (let* ((max-val (+ expected tol)) + (min-val (- expected tol)) + (result (and (>= value min-val)(<= value max-val)))) + (debug:print 4 *default-log-port* "max-val: " max-val " min-val: " min-val " result: " result) + (set! status (if result "pass" "fail"))) + (set! status ;; NB// need to assess each one (i.e. not return operator since need to act if not valid op. + (case (string->symbol tol) ;; tol should be >, <, >=, <= + ((>) (if (> value expected) "pass" "fail")) + ((<) (if (< value expected) "pass" "fail")) + ((>=) (if (>= value expected) "pass" "fail")) + ((<=) (if (<= value expected) "pass" "fail")) + (else (conc "ERROR: bad tol comparator " tol)))))) + (debug:print 4 *default-log-port* "AFTER2: category: " category " variable: " variable " value: " value + ", expected: " expected " tol: " tol " units: " units " status: " status " comment: " comment) + ;; (db:delay-if-busy dbdat) + (sqlite3:execute db "INSERT OR REPLACE INTO test_data (test_id,category,variable,value,expected,tol,units,comment,status,type) VALUES (?,?,?,?,?,?,?,?,?,?);" + test-id category variable value expected tol units (if comment comment "") status type))) + csvlist))))) + +;; This routine moved from tdb.scm, tdb:read-test-data +;; +(define (db:read-test-data dbstruct run-id test-id categorypatt) + (let* ((res '())) + (db:with-db + dbstruct #f #f + (lambda (dbdat db) + (sqlite3:for-each-row + (lambda (id test_id category variable value expected tol units comment status type) + (set! res (cons (vector id test_id category variable value expected tol units comment status type) res))) + db + "SELECT id,test_id,category,variable,value,expected,tol,units,comment,status,type FROM test_data WHERE test_id=? AND category LIKE ? ORDER BY category,variable;" test-id categorypatt) + (reverse res))))) + +;; This routine moved from tdb.scm, :read-test-data +;; +(define (db:read-test-data-varpatt dbstruct run-id test-id categorypatt varpatt) + (let* ((res '())) + (db:with-db + dbstruct #f #f + (lambda (dbdat db) + (sqlite3:for-each-row + (lambda (id test_id category variable value expected tol units comment status type) + (set! res (cons (vector id test_id category variable value expected tol units comment status type) res))) + db + "SELECT id,test_id,category,variable,value,expected,tol,units,comment,status,type FROM test_data WHERE test_id=? AND category LIKE ? AND variable LIKE ? ORDER BY category,variable;" test-id categorypatt varpatt) + (reverse res))))) + + +;;====================================================================== +;; Misc. test related queries +;;====================================================================== + +(define (db:get-run-ids-matching-target dbstruct keynames target res runname testpatt statepatt statuspatt) + (db:with-db + dbstruct #f #f + (lambda (dbdat db) + (let* ((row-ids '()) + (keystr (string-intersperse + (map (lambda (key val) + (conc key " like '" val "'")) + keynames + (string-split target "/")) + " AND ")) + ;; (testqry (tests:match->sqlqry testpatt)) + (runsqry (sqlite3:prepare db (conc "SELECT id FROM runs WHERE " keystr " AND runname LIKE '" runname "';")))) + ;; (debug:print 8 *default-log-port* "db:test-get-paths-matching-keynames-target-new\n runsqry=" runsqry "\n tstsqry=" testqry) + (sqlite3:for-each-row + (lambda (rid) + (set! row-ids (cons rid row-ids))) + runsqry) + (sqlite3:finalize! runsqry) + row-ids)))) + +;; finds latest matching all patts for given run-id +;; +(define (db:test-get-paths-matching-keynames-target-new dbstruct run-id keynames target res testpatt statepatt statuspatt runname) + (let* ((testqry (tests:match->sqlqry testpatt)) + (tstsqry (conc "SELECT rundir FROM tests WHERE run_id=? AND " testqry " AND state LIKE '" statepatt "' AND status LIKE '" statuspatt "' ORDER BY event_time ASC;"))) + (db:with-db + dbstruct + run-id + #f + (lambda (dbdat db) + (sqlite3:for-each-row + (lambda (p) + (set! res (cons p res))) + db + tstsqry + run-id) + res)))) + +(define (db:test-toplevel-num-items dbstruct run-id testname) + (db:with-db + dbstruct + run-id + #f + (lambda (dbdat db) + (let ((res 0)) + (sqlite3:for-each-row + (lambda (num-items) + (set! res num-items)) + db + "SELECT count(id) FROM tests WHERE run_id=? AND testname=? AND item_path != '' AND state NOT IN ('DELETED');" + run-id + testname) + res)))) + +;;====================================================================== +;; QUEUE UP META, TEST STATUS AND STEPS REMOTE ACCESS +;;====================================================================== + +;; NOTE: Can remove the regex and base64 encoding for zmq +(define (db:obj->string obj #!key (transport 'http)) + (case transport + ;; ((fs) obj) + ((http fs) + (string-substitute + (regexp "=") "_" + (base64:base64-encode + (z3:encode-buffer + (with-output-to-string + (lambda ()(serialize obj))))) ;; BB: serialize - this is what causes problems between different builds of megatest communicating. serialize is sensitive to binary image of mtest. + #t)) + ((zmq nmsg)(with-output-to-string (lambda ()(serialize obj)))) + (else obj))) ;; rpc + +(define (db:string->obj msg #!key (transport 'http)) + (case transport + ;; ((fs) msg) + ((http fs) + (if (string? msg) + (with-input-from-string + (z3:decode-buffer + (base64:base64-decode + (string-substitute + (regexp "_") "=" msg #t))) + (lambda ()(deserialize))) + (begin + (debug:print-error 0 *default-log-port* "reception failed. Received \"" msg "\" but cannot translate it.") + (print-call-chain (current-error-port)) + msg))) ;; crude reply for when things go awry + ((zmq nmsg)(with-input-from-string msg (lambda ()(deserialize)))) + (else msg))) ;; rpc + + +(define (db:roll-up-rules state-status-counts state status) + (if (null? state-status-counts) + '(#f #f) + (let* ((running (length (filter (lambda (x) + (member (dbr:counts-state x) *common:running-states*)) + state-status-counts))) + (bad-not-started (length (filter (lambda (x) + (and (equal? (dbr:counts-state x) "NOT_STARTED") + (not (member (dbr:counts-status x) *common:not-started-ok-statuses*)))) + state-status-counts))) + (all-curr-states (common:special-sort ;; worst -> best (sort of) + (delete-duplicates + (if (and state (not (member state *common:dont-roll-up-states*))) + (cons state (map dbr:counts-state state-status-counts)) + (map dbr:counts-state state-status-counts))) + *common:std-states* >)) + (all-curr-statuses (common:special-sort ;; worst -> best + (delete-duplicates + (if (and state status (not (member state *common:dont-roll-up-states*))) + (cons status (map dbr:counts-status state-status-counts)) + (map dbr:counts-status state-status-counts))) + *common:std-statuses* >)) + (non-completes (filter (lambda (x) + (not (member x (cons "COMPLETED" *common:dont-roll-up-states*)))) + all-curr-states)) + (preq-fails (filter (lambda (x) + (equal? x "PREQ_FAIL")) + all-curr-statuses)) + (num-non-completes (length non-completes)) + (newstate (cond + ((> running 0) "RUNNING") ;; anything running, call the situation running + ((> (length preq-fails) 0) "NOT_STARTED") + ((> bad-not-started 0) "COMPLETED") ;; we have an ugly situation, it is completed in the sense we cannot do more. + ((> num-non-completes 0) (car non-completes)) ;; (remove (lambda (x)(equal? "COMPLETED" x)) all-curr-states))) ;; only rollup DELETED if all DELETED + (else (car all-curr-states)))) + (newstatus (cond + ((> (length preq-fails) 0) "PREQ_FAIL") + ((or (> bad-not-started 0) + (and (equal? newstate "NOT_STARTED") + (> num-non-completes 0))) + "STARTED") + (else (car all-curr-statuses))))) + (debug:print-info 2 *default-log-port* + "\n--> probe db:set-state-status-and-roll-up-items: " + "\n--> state-status-counts: "(map dbr:counts->alist state-status-counts) + "\n--> running: "running + "\n--> bad-not-started: "bad-not-started + "\n--> non-non-completes: "num-non-completes + "\n--> non-completes: "non-completes + "\n--> all-curr-states: "all-curr-states + "\n--> all-curr-statuses: "all-curr-statuses + "\n--> newstate "newstate + "\n--> newstatus "newstatus + "\n\n") + + ;; NB// Pass the db so it is part of the transaction + (list newstate newstatus)))) + +(define (db:set-state-status-and-roll-up-run dbstruct run-id curr-state curr-status) + (mutex-lock! *db-transaction-mutex*) + (db:with-db + dbstruct run-id #t + (lambda (dbdat db) + (let ((tr-res + (sqlite3:with-transaction + db + (lambda () + (let* ((state-status-counts (db:get-all-state-status-counts-for-run-db dbdat db run-id)) + (state-statuses (db:roll-up-rules state-status-counts #f #f )) + (newstate (car state-statuses)) + (newstatus (cadr state-statuses))) + (if (or (not (eq? newstate curr-state)) (not (eq? newstatus curr-status))) + (db:set-run-state-status-db dbdat db run-id newstate newstatus ))))))) + (mutex-unlock! *db-transaction-mutex*) + tr-res)))) + +(define (db:get-all-state-status-counts-for-run-db dbdat db run-id) + (sqlite3:map-row + (lambda (state status count) + (make-dbr:counts state: state status: status count: count)) + (db:get-cache-stmth + dbdat db + "SELECT state,status,count(id) FROM tests WHERE run_id=? GROUP BY state,status;") + run-id )) + +(define (db:get-all-state-status-counts-for-run dbstruct run-id) + (db:with-db + dbstruct #f #f + (lambda (dbdat db) + (db:get-all-state-status-counts-for-run-db dbdat db run-id)))) + +;; BBnote: db:get-all-state-status-counts-for-test returns dbr:counts object aggregating state and status of items of a given test, *not including rollup state/status* +;; +;; NOTE: This is called within a transaction +;; +(define (db:get-all-state-status-counts-for-test db run-id test-name item-path item-state-in item-status-in) + (let* ((test-info (db:get-test-info-db db run-id test-name item-path)) + (item-state (or item-state-in (db:test-get-state test-info))) + (item-status (or item-status-in (db:test-get-status test-info))) + (other-items-count-recs (sqlite3:map-row + (lambda (state status count) + (make-dbr:counts state: state status: status count: count)) + db + ;; ignore current item because we have changed its value in the current transation so this select will see the old value. + "SELECT state,status,count(id) FROM tests WHERE run_id=? AND testname=? AND item_path != '' AND item_path !=? GROUP BY state,status;" + run-id test-name item-path)) + ;; add current item to tally outside of sql query + (match-countrec-lambda (lambda (countrec) + (and (equal? (dbr:counts-state countrec) item-state) + (equal? (dbr:counts-status countrec) item-status)))) + + (already-have-count-rec-list + (filter match-countrec-lambda other-items-count-recs)) ;; will have either 0 or 1 count recs depending if another item shares this item's state/status + + (updated-count-rec (if (null? already-have-count-rec-list) + (make-dbr:counts state: item-state status: item-status count: 1) + (let* ((our-count-rec (car already-have-count-rec-list)) + (new-count (add1 (dbr:counts-count our-count-rec)))) + (make-dbr:counts state: item-state status: item-status count: new-count)))) + + (nonmatch-countrec-lambda (lambda (countrec) (not (match-countrec-lambda countrec)))) + + (unrelated-rec-list + (filter nonmatch-countrec-lambda other-items-count-recs))) + (cons updated-count-rec unrelated-rec-list))) + +;; (define (db:get-all-item-states db run-id test-name) +;; (sqlite3:map-row +;; (lambda (a) a) +;; db +;; "SELECT DISTINCT state FROM tests WHERE item_path != '' AND state != 'DELETED' AND run_id=? AND testname=?" +;; run-id test-name)) +;; +;; (define (db:get-all-item-statuses db run-id test-name) +;; (sqlite3:map-row +;; (lambda (a) a) +;; db +;; "SELECT DISTINCT status FROM tests WHERE item_path != '' AND state != 'DELETED' AND state='COMPLETED' AND run_id=? AND testname=?" +;; run-id test-name)) + +(define (db:test-get-logfile-info dbstruct run-id test-name) + (db:with-db + dbstruct + run-id + #f + (lambda (dbdat db) + (let ((res #f)) + (sqlite3:for-each-row + (lambda (path final_logf) + ;; (let ((path (sdb:qry 'getstr path-id)) + ;; (final_logf (sdb:qry 'getstr final_logf-id))) + (set! logf final_logf) + (set! res (list path final_logf)) + (if (directory? path) + (debug:print 2 *default-log-port* "Found path: " path) + (debug:print 2 *default-log-port* "No such path: " path))) ;; ) + db + "SELECT rundir,final_logf FROM tests WHERE testname=? AND item_path='' AND run_id=?;" + test-name run-id) + res)))) + +;;====================================================================== +;; A G R E G A T E D T R A N S A C T I O N D B W R I T E S +;;====================================================================== + +(define db:queries + (list '(update-run-duration "UPDATE tests SET run_duration=? WHERE id=?;") + + ;; TESTS + '(register-test "INSERT OR IGNORE INTO tests (run_id,testname,event_time,item_path,state,status) VALUES (?,?,strftime('%s','now'),?,'NOT_STARTED','n/a');") + ;; Test state and status + '(set-test-state "UPDATE tests SET state=? WHERE id=?;") + '(set-test-status "UPDATE tests SET state=? WHERE id=?;") + '(state-status "UPDATE tests SET state=?,status=? WHERE id=?;") ;; D/ONE + '(state-status-msg "UPDATE tests SET state=?,status=?,comment=? WHERE id=?;") ;; DONE + ;; Test comment + '(set-test-comment "UPDATE tests SET comment=? WHERE id=?;") + '(set-test-start-time "UPDATE tests SET event_time=strftime('%s','now') WHERE id=?;") ;; DONE + '(pass-fail-counts "UPDATE tests SET pass_count=?,fail_count=? WHERE id=?;") + ;; test_data-pf-rollup is used to set a tests PASS/FAIL based on the pass/fail info from the steps + '(test_data-pf-rollup "UPDATE tests + SET status=CASE WHEN (SELECT fail_count FROM tests WHERE id=?) > 0 + THEN 'FAIL' + WHEN (SELECT pass_count FROM tests WHERE id=?) > 0 AND + (SELECT status FROM tests WHERE id=?) NOT IN ('WARN','FAIL') + THEN 'PASS' + ELSE status + END WHERE id=?;") ;; DONE + '(test-set-log "UPDATE tests SET final_logf=? WHERE id=?;") ;; DONE + ;; '(test-set-rundir-by-test-id "UPDATE tests SET rundir=? WHERE id=?") ;; DONE + ;; '(test-set-rundir "UPDATE tests SET rundir=? AND testname=? AND item_path=?;") ;; DONE + '(test-set-rundir-shortdir "UPDATE tests SET rundir=?,shortdir=? WHERE testname=? AND item_path=? AND run_id=?;") ;; BROKEN!!! NEEDS run-id + '(delete-tests-in-state ;; "DELETE FROM tests WHERE state=?;") ;; DONE + "UPDATE tests SET state='DELETED' WHERE state=?") + '(tests:test-set-toplog "UPDATE tests SET final_logf=? WHERE run_id=? AND testname=? AND item_path='';") + '(update-cpuload-diskfree "UPDATE tests SET cpuload=?,diskfree=? WHERE id=?;") ;; DONE + '(update-uname-host "UPDATE tests SET uname=?,host=? WHERE id=?;") ;; DONE + '(update-test-rundat "INSERT INTO test_rundat (test_id,update_time,cpuload,diskfree,diskusage,run_duration) VALUES (?,?,?,?,?,?);") + '(update-test-state "UPDATE tests SET state=? WHERE state=? AND run_id=? AND testname=? AND NOT (item_path='' AND testname IN (SELECT DISTINCT testname FROM tests WHERE testname=? AND item_path != ''));") + '(update-test-status "UPDATE tests SET status=? WHERE status like ? AND run_id=? AND testname=? AND NOT (item_path='' AND testname IN (SELECT DISTINCT testname FROM tests WHERE testname=? AND item_path != ''));") + ;; stuff for set-state-status-and-roll-up-items + '(update-pass-fail-counts "UPDATE tests + SET fail_count=(SELECT count(id) FROM tests WHERE testname=? AND item_path != '' AND status IN ('FAIL','CHECK','INCOMPLETE','ABORT')), + pass_count=(SELECT count(id) FROM tests WHERE testname=? AND item_path != '' AND status IN ('PASS','WARN','WAIVED')) + WHERE testname=? AND item_path='' AND run_id=?;") ;; DONE ;; BROKEN!!! NEEDS run-id + '(top-test-set-running "UPDATE tests SET state='RUNNING' WHERE testname=? AND item_path='' AND run_id=?;") ;; DONE ;; BROKEN!!! NEEDS run-id + + ;; NOT USED + ;; + ;; Might be the following top-test-set-per-pf-counts query could be better based off of something like this: + ;; + ;; select state,status,count(state) from tests where run_id=59 AND testname='runfirst' group by state,status; + ;; + '(top-test-set-per-pf-counts "UPDATE tests + SET state=CASE + WHEN (SELECT count(id) FROM tests + WHERE testname=? + AND item_path != '' + AND status NOT IN ('n/a') + AND state in ('NOT_STARTED')) > 0 THEN 'UNKNOWN' + WHEN (SELECT count(id) FROM tests + WHERE testname=? + AND item_path != '' + AND (status NOT IN ('TEN_STRIKES','BLOCKED') OR status IN ('INCOMPLETE')) + AND state in ('RUNNING','NOT_STARTED','LAUNCHED','REMOTEHOSTSTART')) > 0 THEN 'RUNNING' + WHEN (SELECT count(id) FROM tests + WHERE testname=? + AND item_path != '' + AND state NOT IN ('COMPLETED','DELETED')) = 0 THEN 'COMPLETED' + WHEN (SELECT count(id) FROM tests + WHERE testname=? + AND item_path != '' + AND state = 'NOT_STARTED') > 0 THEN 'NOT_STARTED' + ELSE 'UNKNOWN' END, + status=CASE + WHEN fail_count > 0 THEN 'FAIL' + WHEN (SELECT count(id) FROM tests + WHERE testname=? + AND item_path != '' + AND state IN ('BLOCKED','INCOMPLETE')) > 0 THEN 'FAIL' + WHEN (SELECT count(id) FROM tests + WHERE testname=? + AND item_path != '' + AND state NOT IN ('DELETED') + AND status IN ('INCOMPLETE','ABORT')) > 0 THEN 'ABORT' + WHEN (SELECT count(id) FROM tests + WHERE testname=? + AND item_path != '' + AND state NOT IN ('DELETED') + AND status = 'AUTO') > 0 THEN 'AUTO' + WHEN (SELECT count(id) FROM tests + WHERE testname=? + AND item_path != '' + AND state NOT IN ('DELETED') + AND status IN ('STUCK/INCOMPLETE', 'INCOMPLETE')) > 0 THEN 'INCOMPLETE' + WHEN (SELECT count(id) FROM tests + WHERE testname=? + AND item_path != '' + AND state IN ('COMPLETED','STUCK/INCOMPLETE','INCOMPLETE') + AND status = 'FAIL') > 0 THEN 'FAIL' + WHEN (SELECT count(id) FROM tests + WHERE testname=? + AND item_path != '' + AND state NOT IN ('DELETED') + AND status = 'CHECK') > 0 THEN 'CHECK' + WHEN (SELECT count(id) FROM tests + WHERE testname=? + AND item_path != '' + AND state NOT IN ('DELETED') + AND status = 'SKIP') > 0 THEN 'SKIP' + WHEN (SELECT count(id) FROM tests + WHERE testname=? + AND item_path != '' + AND state NOT IN ('DELETED') + AND status = 'WARN') > 0 THEN 'WARN' + WHEN (SELECT count(id) FROM tests + WHERE testname=? + AND item_path != '' + AND state NOT IN ('DELETED') + AND status = 'WAIVED') > 0 THEN 'WAIVED' + WHEN (SELECT count(id) FROM tests + WHERE testname=? + AND item_path != '' + AND state NOT IN ('DELETED') + AND status NOT IN ('PASS','FAIL','WARN','WAIVED')) > 0 THEN 'ABORT' + WHEN (SELECT count(id) FROM tests + WHERE testname=? + AND item_path != '' + AND state='NOT_STARTED') > 0 THEN 'n/a' + WHEN (SELECT count(id) FROM tests + WHERE testname=? + AND item_path != '' + AND state = 'COMPLETED' + AND status = 'PASS') > 0 THEN 'PASS' + WHEN pass_count > 0 AND fail_count=0 THEN 'PASS' + ELSE 'UNKNOWN' END + WHERE testname=? AND item_path='';") ;; DONE ;; BROKEN!!! NEEDS run-id + + ;; STEPS + '(delete-test-step-records "UPDATE test_steps SET status='DELETED' WHERE test_id=?;") + '(delete-test-data-records "UPDATE test_data SET status='DELETED' WHERE test_id=?;") ;; using status since no state field + )) + +(define (db:lookup-query qry-name) + (let ((q (alist-ref qry-name db:queries))) + (if q (car q) #f))) + +;; do not run these as part of the transaction +(define db:special-queries '(rollup-tests-pass-fail + ;; db:set-state-status-and-roll-up-items ;; WHY NOT!? + login + immediate + flush + sync + set-verbosity + killserver + )) + +(define (db:login dbstruct calling-path calling-version client-signature) + (cond + ((not (equal? calling-path *toppath*)) + (list #f "Login failed due to mismatch paths: " calling-path ", " *toppath*)) + ;; ((not (equal? *run-id* run-id)) + ;; (list #f "Login failed due to mismatch run-id: " run-id ", " *run-id*)) + ((not (equal? megatest-version calling-version)) + (list #t (conc "Login warning due to mismatch megatest version: " calling-version ", " megatest-version))) + + (else + (hash-table-set! *logged-in-clients* client-signature (current-seconds)) + '(#t "successful login")))) + +;; NO WAY TO KNOW IF IT MODIFIES THE DB BUT NEARLY ALL ARE UPDATES/INSERTS +;; +(define (db:general-call dbstruct run-id stmtname params) + ;; Why is db:lookup-query above not used here to get the query? + (let ((query (let ((q (alist-ref (if (string? stmtname) + (string->symbol stmtname) + stmtname) + db:queries))) + (if q (car q) #f)))) + (db:with-db + dbstruct run-id #t + (lambda (dbdat db) + (apply sqlite3:execute db query params) + #t)))) + +;; get a summary of state and status counts to calculate a rollup +;; +(define (db:get-state-status-summary dbstruct run-id testname) + (let ((res '())) + (db:with-db + dbstruct run-id #f + (lambda (dbdat db) + (sqlite3:for-each-row + (lambda (state status count) + (set! res (cons (vector state status count) res))) + db + "SELECT state,status,count(state) FROM tests WHERE run_id=? AND testname=? AND item_path='' GROUP BY state,status;" + run-id testname) + res)))) + +(define (db:get-latest-host-load dbstruct raw-hostname) + (let* ((hostname (string-substitute "\\..*$" "" raw-hostname)) + (res (cons -1 0))) + (db:with-db + dbstruct #f #f + (lambda (dbdat db) + (sqlite3:for-each-row + (lambda (cpuload update-time) (set! res (cons cpuload update-time))) + db + "SELECT tr.cpuload, tr.update_time FROM test_rundat tr, tests t WHERE t.host=? AND tr.cpuload != -1 AND tr.test_id=t.id ORDER BY tr.update_time DESC LIMIT 1;" + hostname))) res )) + +(define (db:set-top-level-from-items dbstruct run-id testname) + (let* ((summ (db:get-state-status-summary dbstruct run-id testname)) + (find (lambda (state status) + (if (null? summ) + #f + (let loop ((hed (car summ)) + (tal (cdr summ))) + (if (and (string-match state (vector-ref hed 0)) + (string-match status (vector-ref hed 1))) + hed + (if (null? tal) + #f + (loop (car tal)(cdr tal))))))))) + + + ;;; E D I T M E ! ! + + + (cond + ((> (find "COMPLETED" ".*") 0) #f)))) + + + +;; get the previous records for when these tests were run where all keys match but runname +;; NB// Merge this with test:get-previous-test-run-records? This one looks for all matching tests +;; can use wildcards. Also can likely be factored in with get test paths? +;; +;; Run this remotely!! +;; +(define (db:get-matching-previous-test-run-records dbstruct run-id test-name item-path) + (let* ((keys (db:get-keys dbstruct)) + (selstr (string-intersperse keys ",")) + (qrystr (string-intersperse (map (lambda (x)(conc x "=?")) keys) " AND ")) + (keyvals #f) + (tests-hash (make-hash-table))) + ;; first look up the key values from the run selected by run-id + (db:with-db + dbstruct #f #f + (lambda (dbdat db) + (sqlite3:for-each-row + (lambda (a . b) + (set! keyvals (cons a b))) + db + (conc "SELECT " selstr " FROM runs WHERE id=? ORDER BY event_time DESC;") run-id))) + (if (not keyvals) + '() + (let ((prev-run-ids '())) + (db:with-db + dbstruct #f #f + (lambda (dbdat db) + (apply sqlite3:for-each-row + (lambda (id) + (set! prev-run-ids (cons id prev-run-ids))) + db + (conc "SELECT id FROM runs WHERE " qrystr " AND id != ?;") (append keyvals (list run-id))))) + ;; collect all matching tests for the runs then + ;; extract the most recent test and return that. + (debug:print 4 *default-log-port* "selstr: " selstr ", qrystr: " qrystr ", keyvals: " keyvals + ", previous run ids found: " prev-run-ids) + (if (null? prev-run-ids) '() ;; no previous runs? return null + (let loop ((hed (car prev-run-ids)) + (tal (cdr prev-run-ids))) + (let ((results (db:get-tests-for-run dbstruct hed (conc test-name "/" item-path) '() '() #f #f #f #f #f #f #f 'normal))) + (debug:print 4 *default-log-port* "Got tests for run-id " run-id ", test-name " test-name + ", item-path " item-path " results: " (intersperse results "\n")) + ;; Keep only the youngest of any test/item combination + (for-each + (lambda (testdat) + (let* ((full-testname (conc (db:test-get-testname testdat) "/" (db:test-get-item-path testdat))) + (stored-test (hash-table-ref/default tests-hash full-testname #f))) + (if (or (not stored-test) + (and stored-test + (> (db:test-get-event_time testdat)(db:test-get-event_time stored-test)))) + ;; this test is younger, store it in the hash + (hash-table-set! tests-hash full-testname testdat)))) + results) + (if (null? tal) + (map cdr (hash-table->alist tests-hash)) ;; return a list of the most recent tests + (loop (car tal)(cdr tal)))))))))) + +;; Function recursively checks if .journal exists; if yes means db busy; call itself after delayed interval +;; return the sqlite3 db handle if possible +;; +(define (db:delay-if-busy dbdat #!key (count 6)) + (if (not (configf:lookup *configdat* "server" "delay-on-busy")) + (and dbdat (dbr:dbdat-dbh dbdat)) + (if dbdat + (let* ((dbpath (dbr:dbdat-dbfile dbdat)) + (db (dbr:dbdat-dbh dbdat)) ;; we'll return this so (db:delay--if-busy can be called inline + (dbfj (conc dbpath "-journal"))) + (if (handle-exceptions + exn + (begin + (debug:print-info 0 *default-log-port* "WARNING: failed to test for existance of " dbfj ", exn=" exn) + (thread-sleep! 1) + (db:delay-if-busy count (- count 1))) + (common:file-exists? dbfj)) + (case count + ((6) + (thread-sleep! 0.2) + (db:delay-if-busy count: 5)) + ((5) + (thread-sleep! 0.4) + (db:delay-if-busy count: 4)) + ((4) + (thread-sleep! 0.8) + (db:delay-if-busy count: 3)) + ((3) + (thread-sleep! 1.6) + (db:delay-if-busy count: 2)) + ((2) + (thread-sleep! 3.2) + (db:delay-if-busy count: 1)) + ((1) + (thread-sleep! 6.4) + (db:delay-if-busy count: 0)) + (else + (debug:print-info 0 *default-log-port* "delaying db access due to high database load.") + (thread-sleep! 12.8)))) + db) + "bogus result from db:delay-if-busy"))) + +(define (db:test-get-records-for-index-file dbstruct run-id test-name) + (let ((res '())) + (db:with-db + dbstruct + run-id + #f + (lambda (dbdat db) + (sqlite3:for-each-row + (lambda (id itempath state status run_duration logf comment) + (set! res (cons (vector id itempath state status run_duration logf comment) res))) + db + "SELECT id,item_path,state,status,run_duration,final_logf,comment FROM tests WHERE testname=? AND item_path != '' AND run_id=?;" ;; BUG! WHY NO run_id? + test-name + run-id) + res)))) + +;;====================================================================== +;; Tests meta data +;;====================================================================== + +;; returns a hash table of tags to tests +;; +(define (db:get-tests-tags dbstruct) + (db:with-db + dbstruct #f #f + (lambda (dbdat db) + (let* ((res (make-hash-table))) + (sqlite3:for-each-row + (lambda (testname tags-in) + (let ((tags (string-split tags-in ","))) + (for-each + (lambda (tag) + (hash-table-set! res tag + (delete-duplicates + (cons testname (hash-table-ref/default res tag '()))))) + tags))) + db + "SELECT testname,tags FROM test_meta") + (hash-table->alist res))))) + +;; testmeta doesn't change, we can cache it for up too an hour + +(define *db:testmeta-cache* (make-hash-table)) +(define *db:testmeta-last-update* 0) + +;; read the record given a testname +(define (db:testmeta-get-record dbstruct testname) + (if (and (< (- (current-seconds) *db:testmeta-last-update*) 600) + (hash-table-exists? *db:testmeta-cache* testname)) + (hash-table-ref *db:testmeta-cache* testname) + (let ((res #f)) + (db:with-db + dbstruct + #f + #f + (lambda (dbdat db) + (sqlite3:for-each-row + (lambda (id testname author owner description reviewed iterated avg_runtime avg_disk tags jobgroup) + (set! res (vector id testname author owner description reviewed iterated avg_runtime avg_disk tags jobgroup))) + db + "SELECT id,testname,author,owner,description,reviewed,iterated,avg_runtime,avg_disk,tags,jobgroup FROM test_meta WHERE testname=?;" + testname))) + (hash-table-set! *db:testmeta-cache* testname res) + (set! *db:testmeta-last-update* (current-seconds)) + res))) + +;; create a new record for a given testname +(define (db:testmeta-add-record dbstruct testname) + (db:with-db dbstruct #f #t + (lambda (dbdat db) + (sqlite3:execute + db + "INSERT OR IGNORE INTO test_meta (testname,author,owner,description,reviewed,iterated,avg_runtime,avg_disk,tags) VALUES (?,'','','','','','','','');" testname)))) + +;; update one of the testmeta fields +(define (db:testmeta-update-field dbstruct testname field value) + (db:with-db dbstruct #f #t + (lambda (dbdat db) + (sqlite3:execute + db + (conc "UPDATE test_meta SET " field "=? WHERE testname=?;") value testname)))) + +(define (db:testmeta-get-all dbstruct) + (db:with-db dbstruct #f #f + (lambda (dbdat db) + (let ((res '())) + (sqlite3:for-each-row + (lambda (a . b) + (set! res (cons (apply vector a b) res))) + db + "SELECT id,testname,author,owner,description,reviewed,iterated,avg_runtime,avg_disk,tags,jobgroup FROM test_meta;") + res)))) + +;;====================================================================== +;; M I S C M A N A G E M E N T I T E M S +;;====================================================================== + +;; A routine to map itempaths using a itemmap +;; patha and pathb must be strings or this will fail +;; +;; path-b is waiting on path-a +;; +(define (db:compare-itempaths test-b-name path-a path-b itemmaps ) + (debug:print-info 6 *default-log-port* "ITEMMAPS: " itemmaps) + (let* ((itemmap (tests:lookup-itemmap itemmaps test-b-name))) + (if itemmap + (let ((path-b-mapped (db:multi-pattern-apply path-b itemmap))) + (debug:print-info 6 *default-log-port* "ITEMMAP is " itemmap ", path: " path-b ", mapped path: " path-b-mapped) + (equal? path-a path-b-mapped)) + (equal? path-b path-a)))) + +;; A routine to convert test/itempath using a itemmap +;; NOTE: to process only an itempath (i.e. no prepended testname) +;; just call db:multi-pattern-apply +;; +(define (db:convert-test-itempath path-in itemmap) + (debug:print-info 6 *default-log-port* "ITEMMAP is " itemmap) + (let* ((path-parts (string-split path-in "/")) + (test-name (if (null? path-parts) "" (car path-parts))) + (item-path (string-intersperse (if (null? path-parts) '() (cdr path-parts)) "/"))) + (conc test-name "/" + (db:multi-pattern-apply item-path itemmap)))) + +;; patterns are: +;; "rx1" "replacement1"\n +;; "rx2" "replacement2" +;; etc. +;; +(define (db:multi-pattern-apply item-path itemmap) + (let ((all-patts (string-split itemmap "\n"))) + (if (null? all-patts) + item-path + (let loop ((hed (car all-patts)) + (tal (cdr all-patts)) + (res item-path)) + (let* ((parts (string-split hed)) + (patt (car parts)) + + (repl (if (> (length parts) 1)(cadr parts) "")) + + (newr (if (and patt repl) + (begin + (handle-exceptions + exn + (begin + (debug:print 0 *default-log-port* + "WARNING: itemmap has problem \"" itemmap "\", patt: " patt ", repl: " repl ", exn=" exn) + res) + (string-substitute patt repl res)) + + + ) + (begin + (debug:print 0 *default-log-port* + "WARNING: itemmap has problem \"" itemmap "\", patt: " patt ", repl: " repl) + res)))) + (if (null? tal) + newr + (loop (car tal)(cdr tal) newr))))))) + + + + +;; the new prereqs calculation, looks also at itempath if specified +;; all prereqs must be met +;; if prereq test with itempath='' is in common:well-ended-states, then prereq is met +;; if prereq test with itempath=ref-item-path and COMPLETED with PASS, WARN, CHECK, or WAIVED then prereq is met +;; +;; Note: mode 'normal means that tests must be COMPLETED and ok (i.e. PASS, WARN, CHECK, SKIP or WAIVED) +;; mode 'toplevel means that tests must be COMPLETED only +;; mode 'itemmatch or 'itemwait means that tests items must be COMPLETED and (PASS|WARN|WAIVED|CHECK) [[ NB// NOT IMPLEMENTED YET ]] +;; mode 'exclusive means this test/item cannot run if the same test/item is LAUNCHED,REMOTEHOSTSTART or RUNNING +;; +;; IDEA for consideration: +;; 1. collect all tests "upstream" +;; 2. any NOT completed and good? if yes => return those as prereqs not met, if no => return null list +;; +;; (define (db:get-prereqs-not-met dbstruct run-id waitons ref-item-path mode) +(define (db:get-prereqs-not-met dbstruct run-id waitons ref-test-name ref-item-path mode itemmaps) ;; #!key (mode '(normal))(itemmap #f)) + ;; BBnote - rollup of an itemized test's overall state/status done in db:set-state-status-and-roll-up-items + (debug:print 4 *default-log-port* "db:get-prereqs-not-met: " waitons) + (append + (if (member 'exclusive mode) + (let ((running-tests (db:get-tests-for-run dbstruct + #f ;; run-id of #f means for all runs. + (if (string=? ref-item-path "") ;; testpatt + ref-test-name + (conc ref-test-name "/" ref-item-path)) + '("LAUNCHED" "REMOTEHOSTSTART" "RUNNING") ;; states + '() ;; statuses + #f ;; offset + #f ;; limit + #f ;; not-in + #f ;; sort by + #f ;; sort order + 'shortlist ;; query type + 0 ;; last update, beginning of time .... + #f ;; mode + ))) + ;;(map (lambda (testdat) + ;; (if (equal? (db:test-get-item-path testdat) "") + ;; (db:test-get-testname testdat) + ;; (conc (db:test-get-testname testdat) + ;; "/" + ;; (db:test-get-item-path testdat)))) + running-tests) ;; calling functions want the entire data + '()) + + + + ;; collection of: for each waiton - + ;; if this ref-test-name is an item in an itemized test and mode is itemwait/itemmatch: + ;; if waiton is not itemized - if waiton is not both completed and in ok status, add as unmet prerequisite + ;; if waiton is itemized: + ;; and waiton's items are not expanded, add as unmet prerequisite + ;; else if matching waiton item is not both completed and in an ok status, add as unmet prerequisite + ;; else + ;; if waiton toplevel is not in both completed and ok status, add as unmet prerequisite + + (if (or (not waitons) + (null? waitons)) + '() + (let* ((ref-test-itemized-mode (not (null? (lset-intersection eq? mode '(itemmatch itemwait))))) ;; how is this different from using member? + (ref-test-toplevel-mode (not (null? (lset-intersection eq? mode '(toplevel))))) + (ref-test-is-toplevel (equal? ref-item-path "")) + (ref-test-is-item (not ref-test-is-toplevel)) + (unmet-pre-reqs '()) + (result '()) + (unmet-prereq-items '()) + ) + (for-each ; waitons + (lambda (waitontest-name) + ;; by getting the tests with matching name we are looking only at the matching test + ;; and related sub items + ;; next should be using mt:get-tests-for-run? + + (let (;(waiton-is-itemized ...) + ;(waiton-items-are-expanded ...) + (waiton-tests (db:get-tests-for-run-state-status dbstruct run-id waitontest-name)) + (ever-seen #f) + (parent-waiton-met #f) + (item-waiton-met #f) + + ) + (for-each ; test expanded from waiton + (lambda (waiton-test) + (let* ((waiton-state (db:test-get-state waiton-test)) + (waiton-status (db:test-get-status waiton-test)) + (waiton-item-path (db:test-get-item-path waiton-test)) ;; BB- this is the upstream itempath + (waiton-test-name (db:test-get-testname waiton-test)) + (waiton-is-toplevel (equal? waiton-item-path "")) + (waiton-is-item (not waiton-is-toplevel)) + (waiton-is-completed (member waiton-state *common:ended-states*)) + (waiton-is-running (member waiton-state *common:running-states*)) + (waiton-is-killed (member waiton-state *common:badly-ended-states*)) + (waiton-is-ok (member waiton-status *common:well-ended-states*)) + ;; testname-b path-a path-b + (same-itempath (db:compare-itempaths ref-test-name waiton-item-path ref-item-path itemmaps)) ;; (equal? ref-item-path waiton-item-path))) + (real-ref-test-name (car (string-split ref-test-name "/"))) ;; I THINK ref-test-name SHOULD NEVER HAVE THE ITEM_PATH! + (test-and-ref-are-same (equal? real-ref-test-name waiton-test-name))) + (debug:print 4 *default-log-port* "waiton-test-name " waiton-test-name " ref-test-name: " ref-test-name " test-and-ref-are-same: " test-and-ref-are-same) + (set! ever-seen #t) + ;;(BB> "***consider waiton "waiton-test"/"waiton-item-path"***") + (cond + ;; case 0 - toplevel of an itemized test, at least one item in prereq has completed + ((and waiton-is-item ref-test-is-toplevel ref-test-itemized-mode waiton-is-completed) + (set! parent-waiton-met #t)) + + ;; case 1, non-item (parent test) is + ((and waiton-is-toplevel ;; this is the parent test of the waiton being examined + waiton-is-completed + ;;(BB> "cond1") + (or waiton-is-ok ref-test-toplevel-mode)) ;; itemmatch itemwait)))))) + (set! parent-waiton-met #t)) + ;; Special case for toplevel and KILLED + ((and waiton-is-toplevel ;; this is the parent test + waiton-is-killed + (member 'toplevel mode)) + ;;(BB> "cond2") + (set! parent-waiton-met #t)) + ;; For itemwait mode IFF the previous matching item is good the set parent-waiton-met + ((and ref-test-itemized-mode ref-test-is-item same-itempath) + ;;(BB> "cond3") + (if (and waiton-is-completed (or waiton-is-ok ref-test-toplevel-mode)) + (set! item-waiton-met #t) + (set! unmet-prereq-items (cons waiton-test unmet-prereq-items))) + (if (and waiton-is-toplevel ;; if upstream rollup test is completed, parent-waiton-met is set + (or waiton-is-completed waiton-is-running)) + (set! parent-waiton-met #t))) + ;; normal checking of parent items, any parent or parent item not ok blocks running + ((and waiton-is-completed + (or waiton-is-ok + (member 'toplevel mode)) ;; toplevel does not block on FAIL + (and waiton-is-ok (member 'itemmatch mode) ;; itemmatch blocks on not ok ;; TODO: THIS IS PROBABLY A BUG. ITEMMATCH AND ITEMWAIT ARE SYNONYMS!! WHAT HAPPENED OT ITEMWAIT??? + )) + ;;(BB> "cond4") + (set! item-waiton-met #t)) + ((and waiton-is-completed waiton-is-ok same-itempath) + ;;(BB> "cond5") + (set! item-waiton-met #t)) + ((and waiton-is-completed waiton-is-ok test-and-ref-are-same) ;; probably from [waitons] table + (set! item-waiton-met #t)) + (else + #t + ;;(BB> "condelse") + )))) + waiton-tests) + ;; both requirements, parent and item-waiton must be met to NOT add item to + ;; prereq's not met list + ;; (BB> + ;; "\n* waiton-tests "waiton-tests + ;; "\n* parent-waiton-met "parent-waiton-met + ;; "\n* item-waiton-met "item-waiton-met + ;; "\n* ever-seen "ever-seen + ;; "\n* ref-test-itemized-mode "ref-test-itemized-mode + ;; "\n* unmet-prereq-items "unmet-prereq-items + ;; "\n* result (pre) "result + ;; "\n* ever-seen "ever-seen + ;; "\n") + + (cond + ((and ref-test-itemized-mode ref-test-is-item (not (null? unmet-prereq-items))) + (set! result (append unmet-prereq-items result))) + ((not (or parent-waiton-met item-waiton-met)) + (set! result (append (if (null? waiton-tests) (list waitontest-name) waiton-tests) result))) ;; appends the string if the full record is not available + ;; if the test is not found then clearly the waiton is not met... + ;; (if (not ever-seen)(set! result (cons waitontest-name result))))) + ((not ever-seen) + (set! result (append (if (null? waiton-tests)(list waitontest-name) waiton-tests) result)))))) + waitons) + (delete-duplicates result))))) + +;;====================================================================== +;; To sync individual run +;;====================================================================== +(define (db:get-run-record-ids dbstruct target run keynames) + (let* ((backcons (lambda (lst item)(cons item lst))) + (all_tests '()) + (keystr (string-intersperse + (map (lambda (key val) + (conc key " like '" val "'")) + keynames + (string-split target "/")) + " AND ") + ) + (run-qry (conc "SELECT id FROM runs WHERE " keystr " and runname='" run"'")) + ; (test-qry (conc "SELECT id FROM tests WHERE run_id in (" run-qry ") and testname like '" test-patt "'")) + (run_ids + (db:with-db dbstruct #f #f + (lambda (dbdat db) + (sqlite3:fold-row backcons '() db run-qry)) + ) + ) + ) + run_ids) +) + +;;====================================================================== +;; Just for sync, procedures to make sync easy +;;====================================================================== + +;; get an alist of run ids and test/run, test_step/run pairs changed since time since-time +;; '((runs . (1 2 3 ...))(tests . ((5 . 1) (6 . 3) (6 . 2) (7 . 1) ... + +;; Retrieves record IDs from the database based on the timestamp of their last update. + +;; The function takes two arguments: dbstruct, which represents the database structure, and since-time, which is a timestamp indicating the time of the last update. +;; The function first defines a few helper functions, including backcons, which takes a list and an item and adds the item to the front of the list. +;; It then initializes several variables to empty lists: all_tests, all_test_steps, all_test_data, all_run_ids, and all_test_ids. +;; The function then retrieves a list of IDs for runs that have been changed since since-time using the db:get-changed-run-ids function. +;; It then filters the full list of run IDs to only include those that match the changed run IDs based on their modulo (num-run-dbs). +;; For each changed run ID, the function retrieves a list of test IDs, test step IDs, and test data IDs that have been updated since since-time. +;; It appends these IDs to the appropriate lists (all_tests, all_test_steps, and all_test_data) using the append and map functions. +;; The function then retrieves a list of run stat IDs that have been updated since since-time. +;; Finally, the function returns a list of associations between record types and their corresponding IDs: runs, tests, test_steps, test_data, and run_stats. +;; +(define (db:get-changed-record-ids dbstruct since-time) + ;; no transaction, allow the db to be accessed between the big queries + (let* ((backcons (lambda (lst item)(cons item lst))) + (all_tests '()) + (changed_run_dbs (db:get-changed-run-ids since-time)) ;; gets the rundb numbers + (all_run_ids + (db:with-db dbstruct #f #f + (lambda (dbdat db) + (sqlite3:fold-row backcons '() db "SELECT id FROM runs")) + ) + ) + (changed_run_ids (filter (lambda (run) (member (modulo run (num-run-dbs)) changed_run_dbs)) all_run_ids)) + (run_ids + (db:with-db dbstruct #f #f + (lambda (dbdat db) + (sqlite3:fold-row backcons '() db "SELECT id FROM runs WHERE last_update>=?" since-time)) + ) + ) + ) + (for-each + (lambda (run_id) + (set! all_tests + (append + (map (lambda (x) (cons x run_id)) + (db:with-db dbstruct run_id #f + (lambda (dbdat db) + (sqlite3:fold-row backcons '() db "SELECT id FROM tests WHERE run_id=? and last_update>=?" run_id since-time) + ) + ) + ) all_tests + ) + ) + ) + changed_run_ids + ) + (debug:print 2 *default-log-port* "run_ids = " run_ids) + (debug:print 2 *default-log-port* "all_tests = " all_tests) + + `((runs . ,run_ids) + (tests . ,all_tests) + ) + ) +) + + +(define (db:get-changed-record-test-ids dbstruct since-time run-id) + (let* ((backcons (lambda (lst item)(cons item lst))) + (all-tests (db:with-db dbstruct run-id #f + (lambda (dbdat db) + (sqlite3:fold-row backcons '() db "SELECT id FROM tests WHERE run_id=? and last_update>=?" run-id since-time))))) + + all-tests)) + +(define (db:get-changed-record-run-ids dbstruct since-time) + ;; no transaction, allow the db to be accessed between the big queries + (let* ((backcons (lambda (lst item)(cons item lst))) + (run_ids (db:with-db dbstruct #f #f + (lambda (dbdat db) + (sqlite3:fold-row backcons '() db "SELECT id FROM runs WHERE last_update>=?" since-time))))) + (debug:print 2 *default-log-port* "run_ids = " run_ids) + run_ids) +) + +(define (db:get-all-runids dbstruct) + (let* ((backcons (lambda (lst item)(cons item lst))) + (all_run_ids (db:with-db dbstruct #f #f + (lambda (dbdat db) + (sqlite3:fold-row backcons '() db "SELECT id FROM runs"))))) + +all_run_ids)) + +;;====================================================================== +;; moving watch dogs here due to dependencies +;;====================================================================== + +;; =not-used= ;;====================================================================== +;; =not-used= ;; currently the primary job of the watchdog is to run the sync back to megatest.db from the db in /tmp +;; =not-used= ;; if we are on the homehost and we are a server (by definition we are on the homehost if we are a server) +;; =not-used= ;; +;; =not-used= (define (common:readonly-watchdog dbstruct) +;; =not-used= (thread-sleep! 0.05) ;; delay for startup +;; =not-used= (debug:print-info 13 *default-log-port* "common:readonly-watchdog entered.") +;; =not-used= ;; sync megatest.db to /tmp/.../megatst.db +;; =not-used= (let* ((sync-cool-off-duration 3) +;; =not-used= (golden-mtdb (dbr:dbstruct-mtdb dbstruct)) +;; =not-used= (golden-mtpath (db:dbdat-get-path golden-mtdb)) +;; =not-used= (tmp-mtdb (dbr:dbstruct-tmpdb dbstruct)) +;; =not-used= (tmp-mtpath (db:dbdat-get-path tmp-mtdb))) +;; =not-used= (debug:print-info 0 *default-log-port* "Read-only periodic sync thread started.") +;; =not-used= (let loop ((last-sync-time 0)) +;; =not-used= (debug:print-info 13 *default-log-port* "loop top tmp-mtpath="tmp-mtpath" golden-mtpath="golden-mtpath) +;; =not-used= (let* ((duration-since-last-sync (- (current-seconds) last-sync-time))) +;; =not-used= (debug:print-info 13 *default-log-port* "duration-since-last-sync="duration-since-last-sync) +;; =not-used= (if (and (not *time-to-exit*) +;; =not-used= (< duration-since-last-sync sync-cool-off-duration)) +;; =not-used= (thread-sleep! (- sync-cool-off-duration duration-since-last-sync))) +;; =not-used= (if (not *time-to-exit*) +;; =not-used= (let ((golden-mtdb-mtime (file-modification-time golden-mtpath)) +;; =not-used= (tmp-mtdb-mtime (file-modification-time tmp-mtpath))) +;; =not-used= (if (> golden-mtdb-mtime tmp-mtdb-mtime) +;; =not-used= (if (< golden-mtdb-mtime (- (current-seconds) 3)) ;; file has NOT been touched in past three seconds, this way multiple servers won't fight to sync back +;; =not-used= (let ((res (db:multi-db-sync dbstruct 'old2new))) +;; =not-used= (debug:print-info 13 *default-log-port* "rosync called, " res " records transferred.")))) +;; =not-used= (loop (current-seconds))) +;; =not-used= #t))) +;; =not-used= (debug:print-info 0 *default-log-port* "Exiting readonly-watchdog timer, *time-to-exit* = " *time-to-exit*" pid="(current-process-id)" mtpath="golden-mtpath))) +;; =not-used= + +;; Get a lock from the no-sync-db for the from-db, then copy the from-db to the to-db, otherwise return #f + +(define (db:lock-and-sync no-sync-db from-db to-db) + (assert (not *db-sync-in-progress*) "FATAL: db:lock-and-sync called while a sync is in progress.") + (let* ((lockdat (db:no-sync-get-lock no-sync-db from-db)) + (gotlock (car lockdat)) + (locktime (cdr lockdat))) + (if gotlock + (begin + (file-copy from-db to-db #t) + (db:no-sync-del! no-sync-db from-db) + #t) + (begin + (debug:print 0 *default-log-port* "could not get lock for " from-db " from no-sync-db") + #f + )))) + +;; sync for filesystem local db writes +;; +(define (db:run-lock-and-sync no-sync-db) + (let* ((tmp-area (common:make-tmpdir-name *toppath* "")) + (dbfiles (glob (conc tmp-area"/.mtdb/*.db"))) + (sync-durations (make-hash-table))) + ;; (debug:print-info 0 *default-log-port* "lock-and-sync, dbfiles: "dbfiles) + (for-each + (lambda (file) + (let* ((fname (conc (pathname-file file) ".db")) + (fulln (conc *toppath*"/.mtdb/"fname)) + (time1 (if (file-exists? file) + (file-modification-time file) + (begin + (debug:print-info 0 *default-log-port* "Sync - I do not see file "file) + 1))) + (time2 (if (file-exists? fulln) + (file-modification-time fulln) + (begin + (debug:print-info 0 *default-log-port* "Sync - I do not see file "fulln) + 0))) + (changed (> time1 time2)) + (do-cp (cond + ((not (file-exists? fulln)) ;; shouldn't happen, but this might recover + (debug:print-info 0 *default-log-port* "File "fulln" not found! Copying "fname" to "fulln) + #t) + (changed ;; (and changed + ;; (> (- (current-seconds) time1) 3)) ;; if file is changed and three seconds have passed. + #t) + ((and changed *time-to-exit*) ;; last copy + #t) + (else + #f)))) + (if do-cp + (let* ((start-time (current-milliseconds))) + (debug:print-info 0 *default-log-port* "sync copy file: " fname", delta: " (- time1 time2) " seconds") + (db:lock-and-sync no-sync-db file fulln) + (hash-table-set! sync-durations (conc fname".db") (- (current-milliseconds) start-time))) + #;(debug:print-info 0 *default-log-port* "skipping sync...")))) + dbfiles) + (hash-table->alist sync-durations))) + +;; =not-used= ;; straight forward copy based sync +;; =not-used= ;; 1. for each .db fil +;; =not-used= ;; 2. next if file changed since last sync cycle +;; =not-used= ;; 2. next if time delta /tmp file to MTRA less than 3 seconds +;; =not-used= ;; 3. get a lock for the file in nosyncdb +;; =not-used= ;; 4. copy the file +;; =not-used= ;; 5. when copy is done release the lock +;; =not-used= ;; +;; =not-used= ;; DONE +;; =not-used= (define (server:writable-watchdog-copysync dbstruct) +;; =not-used= (thread-sleep! 0.05) ;; delay for startup +;; =not-used= (let ((legacy-sync (common:run-sync?)) +;; =not-used= (sync-stale-seconds (configf:lookup-number *configdat* "server" "sync-stale-seconds" default: 300)) +;; =not-used= (debug-mode (debug:debug-mode 1)) +;; =not-used= (last-time (current-seconds)) ;; last time through the sync loop +;; =not-used= (no-sync-db (db:open-no-sync-db)) +;; =not-used= (sync-duration 0) ;; run time of the sync in milliseconds +;; =not-used= (tmp-area (common:make-tmpdir-name *toppath* ""))) +;; =not-used= ;; Sync moved to http-transport keep-running loop +;; =not-used= (debug:print-info 2 *default-log-port* "Periodic copy-based sync thread started. syncer is copy-sync, tmp-area is " tmp-area) +;; =not-used= (debug:print-info 3 *default-log-port* "watchdog starting. syncer is copy-sync pid="(current-process-id));; " this-wd-num="this-wd-num) +;; =not-used= +;; =not-used= (if (and legacy-sync (not *time-to-exit*)) +;; =not-used= (begin +;; =not-used= (debug:print-info 0 *default-log-port* "Server running, periodic copy-based sync started.") +;; =not-used= (let loop () +;; =not-used= +;; =not-used= ;; run the sync and print out durations +;; =not-used= (debug:print-info 0 *default-log-port* "Sync durations: "(db:run-lock-and-sync no-sync-db)) +;; =not-used= ;; keep going unless time to exit +;; =not-used= ;; +;; =not-used= (if (not *time-to-exit*) +;; =not-used= (let delay-loop ((count 0)) +;; =not-used= ;;(debug:print-info 13 *default-log-port* "delay-loop top; count="count" pid="(current-process-id)" this-wd-num="this-wd-num" *time-to-exit*="*time-to-exit*) +;; =not-used= +;; =not-used= (if (and (not *time-to-exit*) +;; =not-used= (< count 6)) ;; was 11, changing to 4. +;; =not-used= (begin +;; =not-used= (thread-sleep! 1) +;; =not-used= (delay-loop (+ count 1)))) +;; =not-used= (if (not *time-to-exit*) (loop)))) +;; =not-used= +;; =not-used= ;; ==> ;; time to exit, close the no-sync db here +;; =not-used= ;; ==> (db:no-sync-close-db no-sync-db stmt-cache) +;; =not-used= (if (common:low-noise-print 30) +;; =not-used= (debug:print-info 0 *default-log-port* "Exiting watchdog timer, *time-to-exit* = " +;; =not-used= *time-to-exit*" pid="(current-process-id) ))))))) + + +;; =not-used= (define (server:writable-watchdog-deltasync dbstruct) +;; =not-used= ;; This is awful complex and convoluted. Plan to redo? +;; =not-used= ;; for now ... skip it. +;; =not-used= +;; =not-used= (thread-sleep! 0.05) ;; delay for startup +;; =not-used= (let ((legacy-sync (common:run-sync?))) +;; =not-used= (sync-stale-seconds (configf:lookup-number *configdat* "server" "sync-stale-seconds" default: 300)) +;; =not-used= (debug-mode (debug:debug-mode 1)) +;; =not-used= (last-time (current-seconds)) +;; =not-used= (no-sync-db (db:open-no-sync-db)) +;; =not-used= (stmt-cache #f) ;; (dbr:dbstruct-stmt-cache dbstruct)) +;; =not-used= (sync-duration 0) ;; run time of the sync in milliseconds +;; =not-used= (subdbs (hash-table-values (dbr:dbstruct-subdbs dbstruct)))) +;; =not-used= (debug:print-info 2 *default-log-port* "Periodic sync thread started.") +;; =not-used= (debug:print-info 3 *default-log-port* "watchdog starting. legacy-sync is " legacy-sync" pid="(current-process-id) );; " this-wd-num="this-wd-num) +;; =not-used= +;; =not-used= (if (and legacy-sync (not *time-to-exit*)) +;; =not-used= (begin +;; =not-used= (debug:print-info 0 *default-log-port* "Server running, periodic sync started.") +;; =not-used= (let loop () +;; =not-used= ;; sync for filesystem local db writes +;; =not-used= ;; +;; =not-used= (mutex-lock! *db-multi-sync-mutex*) +;; =not-used= (let* ((start-file (conc tmp-area "/.start-sync")) +;; =not-used= (end-file (conc tmp-area "/.end-sync")) +;; =not-used= +;; =not-used= (need-sync (>= *db-last-access* *db-last-sync*)) ;; no sync since last write +;; =not-used= (sync-in-progress *db-sync-in-progress*) +;; =not-used= (min-intersync-delay (configf:lookup-number *configdat* "server" "minimum-intersync-delay" default: 5)) +;; =not-used= (should-sync (and (not *time-to-exit*) +;; =not-used= (> (- (current-seconds) *db-last-sync*) min-intersync-delay))) ;; sync every five seconds minimum, deprecated logic, can probably be removed +;; =not-used= (start-time (current-seconds)) +;; =not-used= (cpu-load-adj (alist-ref 'adj-proc-load (common:get-normalized-cpu-load #f))) +;; =not-used= (mt-mod-time (file-modification-time mtpath)) +;; =not-used= (last-sync-start (if (common:file-exists? start-file) +;; =not-used= (file-modification-time start-file) +;; =not-used= 0)) +;; =not-used= (last-sync-end (if (common:file-exists? end-file) +;; =not-used= (file-modification-time end-file) +;; =not-used= 10)) +;; =not-used= (sync-period (+ 3 (* cpu-load-adj 30))) ;; as adjusted load increases increase the sync period +;; =not-used= (recently-synced (and (< (- start-time mt-mod-time) sync-period) ;; not useful if sync didn't modify megatest.db! +;; =not-used= (< mt-mod-time last-sync-start))) +;; =not-used= (sync-done (<= last-sync-start last-sync-end)) +;; =not-used= (sync-stale (> start-time (+ last-sync-start sync-stale-seconds))) +;; =not-used= (will-sync (and (not *time-to-exit*) ;; do not start a sync if we are in the process of exiting +;; =not-used= (or need-sync should-sync) +;; =not-used= (or sync-done sync-stale) +;; =not-used= (not sync-in-progress) +;; =not-used= (not recently-synced)))) +;; =not-used= (debug:print-info 13 *default-log-port* "WD writable-watchdog top of loop. need-sync="need-sync" sync-in-progress=" sync-in-progress +;; =not-used= " should-sync="should-sync" start-time="start-time" mt-mod-time="mt-mod-time" recently-synced="recently-synced" will-sync="will-sync +;; =not-used= " sync-done=" sync-done " sync-period=" sync-period) +;; =not-used= (if (and (> sync-period 5) +;; =not-used= (common:low-noise-print 30 "sync-period")) +;; =not-used= (debug:print-info 0 *default-log-port* "Increased sync period due to long sync times, sync took: " sync-period " seconds.")) +;; =not-used= ;; (if recently-synced (debug:print-info 0 *default-log-port* "Skipping sync due to recently-synced flag=" recently-synced)) +;; =not-used= ;; (debug:print-info 0 *default-log-port* "need-sync: " need-sync " sync-in-progress: " sync-in-progress " should-sync: " should-sync " will-sync: " will-sync) +;; =not-used= (if will-sync (set! *db-sync-in-progress* #t)) +;; =not-used= (mutex-unlock! *db-multi-sync-mutex*) +;; =not-used= (if will-sync +;; =not-used= (let (;; (max-sync-duration (configf:lookup-number *configdat* "server" "max-sync-duration")) ;; KEEPING THIS AVAILABLE BUT SHOULD NOT USE, I'M PRETTY SURE IT DOES NOT WORK! +;; =not-used= (sync-start (current-milliseconds))) +;; =not-used= (with-output-to-file start-file (lambda ()(print (current-process-id)))) +;; =not-used= +;; =not-used= ;; put lock here +;; =not-used= +;; =not-used= ;; (if (or (not max-sync-duration) +;; =not-used= ;; (< sync-duration max-sync-duration)) ;; NOTE: db:sync-to-megatest.db keeps track of time of last sync and syncs incrementally +;; =not-used= +;; =not-used= ;; +;; =not-used= +;; =not-used= (for-each +;; =not-used= (lambda (subdb) +;; =not-used= (let* (;;(dbstruct (db:setup)) +;; =not-used= (mtdb (dbr:subdb-mtdbdat subdb)) +;; =not-used= (mtdb (dbr:subdb-mtdbdat subdb)) +;; =not-used= (mtpath (db:dbdat-get-path mtdb)) +;; =not-used= (tmp-area (common:make-tmpdir-name *toppath* "")) +;; =not-used= (res (db:sync-to-megatest.db dbstruct no-sync-db: no-sync-db))) ;; did we sync any data? If so need to set the db touched flag to keep the server alive +;; =not-used= (set! sync-duration (- (current-milliseconds) sync-start)) +;; =not-used= (if (> res 0) ;; some records were transferred, keep the db alive +;; =not-used= (begin +;; =not-used= (mutex-lock! *heartbeat-mutex*) +;; =not-used= (set! *db-last-access* (current-seconds)) +;; =not-used= (mutex-unlock! *heartbeat-mutex*) +;; =not-used= (debug:print-info 0 *default-log-port* "sync called, " res " records transferred.")) +;; =not-used= (debug:print-info 2 *default-log-port* "sync called but zero records transferred"))) +;; =not-used= ) +;; =not-used= subdbs))) +;; =not-used= +;; =not-used= (if will-sync +;; =not-used= (begin +;; =not-used= (mutex-lock! *db-multi-sync-mutex*) +;; =not-used= (set! *db-sync-in-progress* #f) +;; =not-used= (set! *db-last-sync* start-time) +;; =not-used= (with-output-to-file end-file (lambda ()(print (current-process-id)))) +;; =not-used= +;; =not-used= ;; release lock here +;; =not-used= +;; =not-used= (mutex-unlock! *db-multi-sync-mutex*))) +;; =not-used= (if (and debug-mode +;; =not-used= (> (- start-time last-time) 60)) +;; =not-used= (begin +;; =not-used= (set! last-time start-time) +;; =not-used= (debug:print-info 4 *default-log-port* "timestamp -> " (seconds->time-string (current-seconds)) ", time since start -> " (seconds->hr-min-sec (- (current-seconds) *time-zero*)))))) +;; =not-used= +;; =not-used= ;; keep going unless time to exit +;; =not-used= ;; +;; =not-used= (if (not *time-to-exit*) +;; =not-used= (let delay-loop ((count 0)) +;; =not-used= ;;(debug:print-info 13 *default-log-port* "delay-loop top; count="count" pid="(current-process-id)" this-wd-num="this-wd-num" *time-to-exit*="*time-to-exit*) +;; =not-used= +;; =not-used= (if (and (not *time-to-exit*) +;; =not-used= (< count 6)) ;; was 11, changing to 4. +;; =not-used= (begin +;; =not-used= (thread-sleep! 1) +;; =not-used= (delay-loop (+ count 1)))) +;; =not-used= (if (not *time-to-exit*) (loop)))) +;; =not-used= +;; =not-used= ;; ;; time to exit, close the no-sync db here +;; =not-used= ;; (db:no-sync-close-db no-sync-db stmt-cache) +;; =not-used= (if (common:low-noise-print 30) +;; =not-used= (debug:print-info 0 *default-log-port* "Exiting watchdog timer, *time-to-exit* = " *time-to-exit*" pid="(current-process-id) )))) +;; =not-used= )) + +(define (std-exit-procedure) + ;;(common:telemetry-log-close) + (on-exit (lambda () 0)) ;; why is this here? + ;;(debug:print-info 13 *default-log-port* "std-exit-procedure called; *time-to-exit*="*time-to-exit*) + (let ((no-hurry (if *time-to-exit* ;; hurry up + #f + (begin + (set! *time-to-exit* #t) + #t)))) + (debug:print-info 4 *default-log-port* "starting exit process, finalizing databases.") + (if (and no-hurry + (debug:debug-mode 18)) + (dbmod:print-db-stats)) + (let ((th1 (make-thread (lambda () ;; thread for cleaning up, give it five seconds + (if *dbstruct-dbs* (db:close-all *dbstruct-dbs*)) ;; one second allocated + (if (list? *on-exit-procs*) + (for-each + (lambda (proc) + (proc)) + *on-exit-procs*)) + (if *task-db* + (let ((db (cdr *task-db*))) + (if (sqlite3:database? db) + (begin + (sqlite3:interrupt! db) + (sqlite3:finalize! db #t) + ;; (vector-set! *task-db* 0 #f) + (set! *task-db* #f))))) + (if (and *no-sync-db* + (sqlite3:database? *no-sync-db*)) + (sqlite3:finalize! *no-sync-db* #t)) + (if (and (not (args:get-arg "-server")) + *runremote* + (eq? (rmt:transport-mode) 'http)) + (begin + (debug:print-info 0 *default-log-port* "Closing all client connections...") + + ;; (http-transport:close-connections *runremote*) ;; <== no definition for this + + #;(http-client#close-all-connections!))) + ;; (if (and *runremote* + ;; (remote-conndat *runremote*)) + ;; (begin + ;; (http-client#close-all-connections!))) ;; for http-client + (if (not (eq? *default-log-port* (current-error-port))) + (close-output-port *default-log-port*)) + (set! *default-log-port* (current-error-port))) "Cleanup db exit thread")) + (th2 (make-thread (lambda () + (debug:print 4 *default-log-port* "Attempting clean exit. Please be patient and wait a few seconds...") + (if no-hurry + (begin + (thread-sleep! 5)) ;; give the clean up few seconds to do it's stuff + (begin + (thread-sleep! 2))) + (debug:print 4 *default-log-port* " ... done") + ) + "clean exit"))) + (thread-start! th1) + (thread-start! th2) + (thread-join! th1) + ) + ) + + 0) + +;; open an sql database inside a file lock +;; returns: db existed-prior-to-opening +;; RA => Returns a db handler; sets the lock if opened in writable mode +;; +;; (define *db-open-mutex* (make-mutex)) +;; +(define (db:lock-create-open fname initproc) + (let* ((parent-dir (or (pathname-directory fname)(current-directory))) ;; no parent? go local + (raw-fname (pathname-file fname)) + (dir-writable (file-write-access? parent-dir)) + (file-exists (common:file-exists? fname)) + (file-write (if file-exists + (file-write-access? fname) + dir-writable ))) + ;; (mutex-lock! *db-open-mutex*) ;; tried this mutex, not clear it helped. + (if file-write ;; dir-writable + (condition-case + (let* ((lockfname (conc fname ".lock")) + (readyfname (conc parent-dir "/.ready-" raw-fname)) + (readyexists (common:file-exists? readyfname))) + (if (not readyexists) + (common:simple-file-lock-and-wait lockfname)) + (let ((db (sqlite3:open-database fname))) + (sqlite3:set-busy-handler! db (sqlite3:make-busy-timeout 136000)) + (sqlite3:execute db "PRAGMA synchronous = 0;") + (if (and (configf:lookup *configdat* "setup" "tmp_mode") (string-match "^/tmp/.*" fname)) + (begin + ;;(print "DEBUG: Setting tmp_mode for " fname) + (sqlite3:execute db (configf:lookup *configdat* "setup" "tmp_mode")) + ) + ) + (if (and (configf:lookup *configdat* "setup" "nfs_mode") (not (string-match "^/tmp/.*" fname))) + (begin + ;;(print "DEBUG: Setting nfs_mode for " fname) + (sqlite3:execute db (configf:lookup *configdat* "setup" "nfs_mode")) + ) + ) + (if (and (not (or (configf:lookup *configdat* "setup" "tmp_mode") (configf:lookup *configdat* "setup" "nfs_mode"))) + (configf:lookup *configdat* "setup" "use-wal") + (string-match "^/tmp/.*" fname)) ;; this is a file in /tmp + (sqlite3:execute db "PRAGMA journal_mode=WAL;") + (debug:print 2 *default-log-port* "Creating " fname " in NON-WAL mode.")) + (if (not file-exists) + (initproc db)) + (if (not readyexists) + (begin + (common:simple-file-release-lock lockfname) + (with-output-to-file + readyfname + (lambda () + (print "Ready at " + (seconds->year-work-week/day-time + (current-seconds))))))) + db)) + (exn (io-error) (debug:print 0 *default-log-port* "ERROR: i/o error with " fname ". Check permissions, disk space etc. and try again.")) + (exn (corrupt) (debug:print 0 *default-log-port* "ERROR: database " fname " is corrupt. Repair it to proceed.")) + (exn (busy) (debug:print 0 *default-log-port* "ERROR: database " fname " is locked. Try copying to another location, remove original and copy back.")) + (exn (permission)(debug:print 0 *default-log-port* "ERROR: database " fname " has some permissions problem.")) + (exn () (debug:print 0 *default-log-port* "ERROR: Unknown error with database " fname " message: " ((condition-property-accessor 'exn 'message) exn)))) + + (condition-case + (begin + (debug:print 2 *default-log-port* "WARNING: opening db in non-writable dir " fname) + (let ((db (sqlite3:open-database fname))) + (sqlite3:set-busy-handler! db (sqlite3:make-busy-timeout 136000)) + (sqlite3:execute db "PRAGMA synchronous = 0;") + ;; (mutex-unlock! *db-open-mutex*) + db)) + (exn (io-error) (debug:print 0 *default-log-port* "ERROR: i/o error with " fname ". Check permissions, disk space etc. and try again.")) + (exn (corrupt) (debug:print 0 *default-log-port* "ERROR: database " fname " is corrupt. Repair it to proceed.")) + (exn (busy) (debug:print 0 *default-log-port* "ERROR: database " fname " is locked. Try copying to another location, remove original and copy back.")) + (exn (permission)(debug:print 0 *default-log-port* "ERROR: database " fname " has some permissions problem.")) + (exn () (debug:print 0 *default-log-port* "ERROR: Unknown error with database " fname " message: " ((condition-property-accessor 'exn 'message) exn)))) + ))) + + +;; traps to catch usage of functions that need to be tracked down + +(define (db:get-subdb . params) + (assert #f "FATAL: Call to db:get-subdb - needs to be fixed.")) + +(define (tasks:find-task-queue-records dbstruct target run-name test-patt state-patt action-patt) + (db:with-db + dbstruct + #f #f + (lambda (dbdat db) + (let ((res '())) + (sqlite3:for-each-row + (lambda (a . b) + (set! res (cons (cons a b) res))) + db "SELECT id,action,owner,state,target,name,testpatt,keylock,params FROM tasks_queue + WHERE + target = ? AND name = ? AND state LIKE ? AND action LIKE ? AND testpatt LIKE ?;" + target run-name state-patt action-patt test-patt) + res)))) + +(define (tasks:get-last dbstruct target runname) + (let ((res #f)) + (db:with-db + dbstruct #f #f + (lambda (dbdat db) + (sqlite3:for-each-row + (lambda (id . rem) + (set! res (apply vector id rem))) + db + (conc "SELECT id,action,owner,state,target,name,testpatt,keylock,params,creation_time,execution_time + FROM tasks_queue + WHERE + target = ? AND name =? + ORDER BY creation_time DESC LIMIT 1;") + target runname) + res)))) + +(define (tasks:set-state-given-param-key dbstruct param-key new-state) + (db:with-db + dbstruct #f #t + (lambda (dbdat db) + (sqlite3:execute db "UPDATE tasks_queue SET state=? WHERE params LIKE ?;" new-state param-key)))) + + +;; register a task +(define (tasks:add dbstruct action owner target runname testpatt params) + (db:with-db + dbstruct #f #t + (lambda (dbdat db) + (sqlite3:execute db "INSERT INTO tasks_queue (action,owner,state,target,name,testpatt,params,creation_time,execution_time) + VALUES (?,?,'new',?,?,?,?,strftime('%s','now'),0);" + action + owner + target + runname + testpatt + (if params params ""))))) + + + +) Index: debugprint.scm ================================================================== --- debugprint.scm +++ debugprint.scm @@ -32,10 +32,11 @@ srfi-1 (prefix mtargs args:)) (define setenv set-environment-variable!) + (define unsetenv unset-environment-variable!) )) ;;====================================================================== ;; debug stuff ;;====================================================================== Index: ezsteps.scm ================================================================== --- ezsteps.scm +++ ezsteps.scm @@ -27,378 +27,18 @@ (declare (uses debugprint)) (declare (uses items)) (declare (uses runconfig)) (declare (uses rmtmod)) (declare (uses mtargs)) +(declare (uses tasksmod)) (use srfi-1 posix regex srfi-69 directory-utils call-with-environment-variables posix-extras z3 csv typed-records pathname-expand matchable) (import commonmod configfmod debugprint rmtmod - (prefix mtargs args:)) - -(include "common_records.scm") -(include "key_records.scm") -(include "db_records.scm") -(include "run_records.scm") - - -;;(rmt:get-test-info-by-id run-id test-id) -> testdat - -;; TODO: deprecate me in favor of ezsteps.scm -;; -(define (ezsteps:runstep ezstep run-id test-id exit-info m tal testconfig all-steps-dat) - (let* ((stepname (car ezstep)) ;; do stuff to run the step - (stepinfo (cadr ezstep)) - ;; (let ((info (cadr ezstep))) - ;; (if (proc? info) "" info))) - ;; (stepproc (let ((info (cadr ezstep))) - ;; (if (proc? info) info #f))) - (stepparts (string-match (regexp "^(\\{([^\\}\\{]*)\\}\\s*|)(.*)$") stepinfo)) - (stepparams (if (and (list? stepparts) - (> (length stepparts) 1)) - (list-ref stepparts 2) - #f)) ;; for future use, {VAR=1,2,3}, run step for each - (paramparts (if (string? stepparams) - (map (lambda (x)(string-split x "=")) (string-split-fields "[^;]*=[^;]*" stepparams)) - '())) - (subrun (alist-ref "subrun" paramparts equal?)) - (stepcmd (if (and (list? stepparts) - (> (length stepparts) 2)) - (list-ref stepparts 3) - (conc "# error, no command for step "stepname))) - (script "") ; "#!/bin/bash\n") ;; yep, we depend on bin/bash FIXME!!!\ - (logpro-file (conc stepname ".logpro")) - (html-file (conc stepname ".html")) - (dat-file (conc stepname ".dat")) - (tconfig-logpro (configf:lookup testconfig "logpro" stepname)) - (logpro-used (common:file-exists? logpro-file)) - (mtexepath (common:get-megatest-exe-path))) - (setenv "MT_STEP_NAME" stepname) - (hash-table-set! all-steps-dat stepname `((params . ,paramparts))) - (debug:print 0 *default-log-port* "stepparts: " stepparts ", stepparams: " stepparams - ", paramparts: " paramparts ", subrun: " subrun ", stepcmd: " stepcmd) - - (if (and tconfig-logpro - (not logpro-used)) ;; no logpro file found but have a defn in the testconfig - (begin - (with-output-to-file logpro-file - (lambda () - (print ";; logpro file extracted from testconfig\n" - ";;") - (print tconfig-logpro))) - (set! logpro-used #t))) - - ;; NB// can safely assume we are in test-area directory - (debug:print 4 *default-log-port* "ezsteps:\n stepname: " stepname " stepinfo: " stepinfo " stepparts: " stepparts - " stepparams: " stepparams " stepcmd: " stepcmd) - - ;; ;; first source the previous environment - ;; (let ((prev-env (conc ".ezsteps/" prevstep (if (string-search (regexp "csh") - ;; (get-environment-variable "SHELL")) ".csh" ".sh")))) - ;; (if (and prevstep (common:file-exists? prev-env)) - ;; (set! script (conc script "source " prev-env)))) - - ;; call the command using mt_ezstep - ;; (set! script (conc "mt_ezstep " stepname " " (if prevstep prevstep "x") " " stepcmd)) - - (debug:print 4 *default-log-port* "script: " script) - (rmt:teststep-set-status! run-id test-id stepname "start" "-" #f #f) - ;; now launch the actual process - (call-with-environment-variables - (list (cons "PATH" mtexepath)) - (lambda () ;; (process-run "/bin/bash" "-c" "exec ls -l /tmp/foobar > /tmp/delme-more.log 2>&1") - (let* ((cmd (conc stepcmd " > " stepname ".log 2>&1")) ;; >outfile 2>&1 - (pid #f)) - (let ((proc (lambda () - (set! pid (process-run "/bin/bash" (list "-c" cmd)))))) - (if subrun - (begin - (debug:print-info 0 *default-log-port* "Running "cmd" without MT_.* environment variables.") - (common:without-vars proc "^MT_.*")) - (proc))) - - (with-output-to-file "Makefile.ezsteps" - (lambda () - (print stepname ".log :") - (print "\t" cmd) - (if (common:file-exists? (conc stepname ".logpro")) - (print "\tlogpro " stepname ".logpro " stepname ".html < " stepname ".log")) - (print) - (print stepname " : " stepname ".log") - (print)) - #:append) - - (rmt:test-set-top-process-pid run-id test-id pid) - (let processloop ((i 0)) - (let-values (((pid-val exit-status exit-code)(process-wait pid #t))) - (mutex-lock! m) - (launch:einf-pid-set! exit-info pid) ;; (vector-set! exit-info 0 pid) - (launch:einf-exit-status-set! exit-info exit-status) ;; (vector-set! exit-info 1 exit-status) - (launch:einf-exit-code-set! exit-info exit-code) ;; (vector-set! exit-info 2 exit-code) - (mutex-unlock! m) - (if (eq? pid-val 0) - (begin - (thread-sleep! 2) - (processloop (+ i 1)))) - ))))) - (debug:print-info 0 *default-log-port* "step " stepname " completed with exit code " (launch:einf-exit-code exit-info)) ;; (vector-ref exit-info 2)) - ;; now run logpro if needed - (if logpro-used - (let* ((logpro-exe (or (getenv "LOGPRO_EXE") "logpro")) - (pid (process-run (conc "/bin/sh -c '"logpro-exe" "logpro-file " " (conc stepname ".html") " < " stepname ".log > /dev/null'")))) - (let processloop ((i 0)) - (let-values (((pid-val exit-status exit-code)(process-wait pid #t))) - (mutex-lock! m) - ;; (make-launch:einf pid: pid exit-status: exit-status exit-code: exit-code) - (launch:einf-pid-set! exit-info pid) ;; (vector-set! exit-info 0 pid) - (launch:einf-exit-status-set! exit-info exit-status) ;; (vector-set! exit-info 1 exit-status) - (launch:einf-exit-code-set! exit-info exit-code) ;; (vector-set! exit-info 2 exit-code) - (mutex-unlock! m) - (if (eq? pid-val 0) - (begin - (thread-sleep! 2) - (processloop (+ i 1))))) - (debug:print-info 0 *default-log-port* "logpro for step " stepname " exited with code " (launch:einf-exit-code exit-info))))) ;; (vector-ref exit-info 2))))) - - (let ((exinfo (launch:einf-exit-code exit-info)) ;; (vector-ref exit-info 2)) - (logfna (if logpro-used (conc stepname ".html") "")) - (comment #f)) - (if logpro-used - (let ((datfile (conc stepname ".dat"))) - ;; load the .dat file into the test_data table if it exists - (if (common:file-exists? datfile) - (set! comment (launch:load-logpro-dat run-id test-id stepname))) - (rmt:test-set-log! run-id test-id (conc stepname ".html")))) - (rmt:teststep-set-status! run-id test-id stepname "end" exinfo comment logfna)) - ;; set the test final status - (let* ((process-exit-status (launch:einf-exit-code exit-info)) ;; (vector-ref exit-info 2)) - (this-step-status (cond - ((and (eq? process-exit-status 2) logpro-used) 'warn) ;; logpro 2 = warnings - ((and (eq? process-exit-status 3) logpro-used) 'check) ;; logpro 3 = check - ((and (eq? process-exit-status 4) logpro-used) 'waived) ;; logpro 4 = waived - ((and (eq? process-exit-status 5) logpro-used) 'abort) ;; logpro 5 = abort - ((and (eq? process-exit-status 6) logpro-used) 'skip) ;; logpro 6 = skip - ((eq? process-exit-status 0) 'pass) ;; logpro 0 = pass - (else 'fail))) - (overall-status - (cond - ((eq? (launch:einf-rollup-status exit-info) 2) 'warn) ;; rollup-status (vector-ref exit-info 3) - ((eq? (launch:einf-rollup-status exit-info) 3) 'check) - ((eq? (launch:einf-rollup-status exit-info) 4) 'waived) - ((eq? (launch:einf-rollup-status exit-info) 5) 'abort) - ((eq? (launch:einf-rollup-status exit-info) 6) 'skip) - ((eq? (launch:einf-rollup-status exit-info) 0) 'pass) - (else 'fail))) - (next-status (common:worse-status-sym this-step-status overall-status)) - - (next-state ;; "RUNNING") ;; WHY WAS THIS CHANGED TO NOT USE (null? tal) ?? - (cond - ((null? tal) ;; more to run? - "COMPLETED") - (else "RUNNING")))) - (debug:print 4 *default-log-port* "Exit value received: " (launch:einf-exit-code exit-info) " logpro-used: " logpro-used - " this-step-status: " this-step-status " overall-status: " overall-status - " next-status: " next-status " rollup-status: " (launch:einf-rollup-status exit-info)) ;; (vector-ref exit-info 3)) - (case next-status - ((warn) - (launch:einf-rollup-status-set! exit-info 2) ;; (vector-set! exit-info 3 2) ;; rollup-status - ;; NB// test-set-status! does rdb calls under the hood - (tests:test-set-status! run-id test-id next-state "WARN" - (if (eq? this-step-status 'warn) "Logpro warning found" #f) - #f)) - ((check) - (launch:einf-rollup-status-set! exit-info 3) ;; (vector-set! exit-info 3 3) ;; rollup-status - ;; NB// test-set-status! does rdb calls under the hood - (tests:test-set-status! run-id test-id next-state "CHECK" - (if (eq? this-step-status 'check) "Logpro check found" #f) - #f)) - ((waived) - (launch:einf-rollup-status-set! exit-info 4) ;; (vector-set! exit-info 3 3) ;; rollup-status - ;; NB// test-set-status! does rdb calls under the hood - (tests:test-set-status! run-id test-id next-state "WAIVED" - (if (eq? this-step-status 'check) "Logpro waived found" #f) - #f)) - ((abort) - (launch:einf-rollup-status-set! exit-info 5) ;; (vector-set! exit-info 3 4) ;; rollup-status - ;; NB// test-set-status! does rdb calls under the hood - (tests:test-set-status! run-id test-id next-state "ABORT" - (if (eq? this-step-status 'abort) "Logpro abort found" #f) - #f)) - ((skip) - (launch:einf-rollup-status-set! exit-info 6) ;; (vector-set! exit-info 3 4) ;; rollup-status - ;; NB// test-set-status! does rdb calls under the hood - (tests:test-set-status! run-id test-id next-state "SKIP" - (if (eq? this-step-status 'skip) "Logpro skip found" #f) - #f)) - ((pass) - (tests:test-set-status! run-id test-id next-state "PASS" #f #f)) - (else ;; 'fail - (launch:einf-rollup-status-set! exit-info 1) ;; (vector-set! exit-info 3 1) ;; force fail, this used to be next-state but that doesn't make sense. should always be "COMPLETED" - (tests:test-set-status! run-id test-id "COMPLETED" "FAIL" (conc "Failed at step " stepname) #f) - ))) - logpro-used)) - -(define (ezsteps:run-from testdat start-step-name run-one) - ;;# TODO - recapture item variables, debug repeated step eval; regen logpro from test - (let* ((do-update-test-state-status #f) - (test-run-dir ;; (filedb:get-path *fdb* - (db:test-get-rundir testdat)) ;; ) - (testconfig (read-config (conc test-run-dir "/testconfig") #f #t environ-patt: "pre-launch-env-vars")) - (ezstepslst (hash-table-ref/default testconfig "ezsteps" '())) - (run-mutex (make-mutex)) - (rollup-status 0) - (rollup-status-string #f) - (rollup-status-sym #f) - (exit-info (vector #t #t #t)) - (test-id (db:test-get-id testdat)) - (run-id (db:test-get-run_id testdat)) - (test-name (db:test-get-testname testdat)) - (orig-test-state (db:test-get-state testdat)) - (orig-test-status (db:test-get-status testdat)) - (kill-job #f) ;; for future use (on re-factoring with launch.scm code - (the-step-params '())) ;; not exactly "functional" - - ;; keep trying till NFS deigns to populate test run dir on this host - (let loop ((count 5)) - (if (not (common:file-exists? test-run-dir)) - ;;(push-directory test-run-dir) - (if (> count 0) - (begin - (debug:print 0 *default-log-port* "WARNING: ezsteps attempting to run but test run directory " test-run-dir " is not there. Waiting and trying again " count " more times") - (sleep 3) - (loop (- count 1)))))) - - (debug:print-info 0 *default-log-port* "Running in directory " test-run-dir) - (if (not (common:file-exists? ".ezsteps"))(create-directory ".ezsteps")) - ;; if ezsteps was defined then we are sure to have at least one step but check anyway - - (if (not (> (length ezstepslst) 0)) - (message-window "ERROR: You can only re-run steps defined via ezsteps") - (begin - (let loop ((ezstep (car ezstepslst)) - (tal (cdr ezstepslst)) - (status-sym-so-far 'pass) - ;;(runflag #f) - (saw-start-step-name #f)) ;; flag used to skip steps when not starting at the beginning - (if (or (vector-ref exit-info 1) - (equal? (alist-ref 'keep-going prev-step-params) 'yes)) - (let* ((prev-step-params the-step-params) ;; need to snag this now - (stepname (car ezstep)) ;; do stuff to run the step - (logpro-used (common:file-exists? (conc test-run-dir "/" stepname ".logpro"))) - (stepinfo (cadr ezstep)) - (stepparts (string-match (regexp "^(\\{([^\\}]*)\\}\\s*|)(.*)$") stepinfo)) - (stepparms (list-ref stepparts 2)) ;; for future use, {VAR=1,2,3}, run step for each - (stepcmd (list-ref stepparts 3)) - (script (conc "mt_ezstep '"test-run-dir"' '"stepname"' '"stepcmd"'")) ;; call the command using mt_ezstep - (saw-start-step-name-next (or saw-start-step-name (equal? stepname start-step-name))) - (proceed-with-this-step - (or (not start-step-name) - (equal? stepname start-step-name) - (and saw-start-step-name (not run-one)) - saw-start-step-name-next - (and start-step-name (equal? stepname start-step-name)))) - ) - (debug:print 0 *default-log-port* "NOTE: stepparms=" stepparms) - (set! prev-step-params stepparms) - (set! do-update-test-state-status (and proceed-with-this-step (null? tal))) - ;;(BB> "stepname="stepname" proceed-with-this-step="proceed-with-this-step " do-update-test-state-status="do-update-test-state-status " orig-test-state="orig-test-state" orig-test-status="orig-test-status) - (cond - ((and (not proceed-with-this-step) (null? tal)) - 'done) - ((not proceed-with-this-step) - (loop (car tal) - (cdr tal) - status-sym-so-far - saw-start-step-name-next)) - (else - (debug:print 4 *default-log-port* "ezsteps:\n stepname: " stepname " stepinfo: " stepinfo " stepparts: " stepparts - " stepparms: " stepparms " stepcmd: " stepcmd) - (debug:print 4 *default-log-port* "script: " script) - (rmt:teststep-set-status! run-id test-id stepname "start" "-" #f #f) - - ;; now launch the script - (let ((pid (process-run script))) - (let processloop ((i 0)) - (let-values (((pid-val exit-status exit-code)(process-wait pid #t))) - (mutex-lock! run-mutex) - (vector-set! exit-info 0 pid) - (vector-set! exit-info 1 exit-status) - (vector-set! exit-info 2 exit-code) - (mutex-unlock! run-mutex) - (if (eq? pid-val 0) - (begin - (thread-sleep! 1) - (processloop (+ i 1)))) - )) - (let ((exinfo (vector-ref exit-info 2)) - (logfna (if logpro-used (conc stepname ".html") ""))) - (rmt:teststep-set-status! run-id test-id stepname "end" exinfo #f logfna)) - - (if logpro-used - (rmt:test-set-log! run-id test-id (conc stepname ".html"))) - - ;; set the test final status - (let* ((this-step-status (cond - (logpro-used - (common:logpro-exit-code->status-sym (vector-ref exit-info 2))) - ((eq? (vector-ref exit-info 2) 0) - 'pass) - (else - 'fail))) - (overall-status-sym (common:worse-status-sym this-step-status status-sym-so-far)) - (overall-status-string (status-sym->string overall-status-sym))) - (debug:print 4 *default-log-port* "Exit value received: " (vector-ref exit-info 2) " logpro-used: " logpro-used - " this-step-status: " this-step-status " overall-status: " overall-status-sym) - ;;" next-status: " next-status " rollup-status: " rollup-status) - (set! rollup-status-string overall-status-string) - (set! rollup-status-sym overall-status-sym) - (tests:test-set-status! run-id test-id "RUNNING" overall-status-string #f #f))) - - (if (and - (not run-one) - (common:steps-can-proceed-given-status-sym rollup-status-sym) - (not (null? tal))) - (loop (car tal) - (cdr tal) - rollup-status-sym - saw-start-step-name-next))))) - (debug:print 4 *default-log-port* "WARNING: a prior step failed, stopping at " ezstep))) - - ;; Once done with step/steps update the test record - ;; - (let* ((item-path (db:test-get-item-path testdat)) ;; (item-list->path itemdat)) - (testinfo (rmt:get-testinfo-state-status run-id test-id))) ;; refresh the testdat, call it iteminfo in case need prev/curr - ;; Am I completed? - (if (equal? (db:test-get-state testinfo) "RUNNING") ;; (not (equal? (db:test-get-state testinfo) "COMPLETED")) - (let ((new-state (if kill-job "KILLED" "COMPLETED") ;; (if (eq? (vector-ref exit-info 2) 0) ;; exited with "good" status - ;; "COMPLETED" - ;; (db:test-get-state testinfo))) ;; else preseve the state as set within the test - ) - (new-status rollup-status-string) - ) ;; (db:test-get-status testinfo))) - (debug:print-info 2 *default-log-port* "Test NOT logged as COMPLETED, (state=" (db:test-get-state testinfo) "), updating result, rollup-status is " rollup-status) - (tests:test-set-status! run-id test-id - (if do-update-test-state-status new-state orig-test-state) - (if do-update-test-state-status new-status orig-test-status) - (args:get-arg "-m") #f) - ;; need to update the top test record if PASS or FAIL and this is a subtest - (if (and (not (equal? item-path "")) do-update-test-state-status) - (rmt:set-state-status-and-roll-up-items run-id test-name item-path new-state new-status #f)))) - ;; for automated creation of the rollup html file this is a good place... - (if (not (equal? item-path "")) - (tests:summarize-items run-id test-id test-name #f)) ;; don't force - just update if no - ))) - ;;(pop-directory) - rollup-status-string)) - -(define (ezsteps:spawn-run-from testdat start-step-name run-one) - (thread-start! - (make-thread - (lambda () - (ezsteps:run-from testdat start-step-name run-one)) - (conc "ezstep run single step " start-step-name " run-one="run-one))) - ) + (prefix mtargs args:) + tasksmod + ) ADDED ezstepsmod.scm Index: ezstepsmod.scm ================================================================== --- /dev/null +++ ezstepsmod.scm @@ -0,0 +1,309 @@ +;;====================================================================== +;; Copyright 2017, Matthew Welland. +;; +;; This file is part of Megatest. +;; +;; Megatest is free software: you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. +;; +;; Megatest is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with Megatest. If not, see . + +;;====================================================================== + +;;====================================================================== +;; Cpumod: +;; +;; Put things here don't fit anywhere else +;;====================================================================== + +(declare (unit ezstepsmod)) + +(declare (uses debugprint)) +(declare (uses mtargs)) +(declare (uses commonmod)) +(declare (uses configfmod)) +(declare (uses dbfile)) +(declare (uses dbmod)) +(declare (uses rmtmod)) +(declare (uses servermod)) +(declare (uses processmod)) +(declare (uses pgdb)) +(declare (uses mtmod)) +(declare (uses megatestmod)) +(declare (uses tasksmod)) +(declare (uses subrunmod)) +(declare (uses testsmod)) +(declare (uses runsmod)) +(declare (uses fsmod)) + +(use srfi-69) + +(module ezstepsmod + * + +(import scheme) +(cond-expand + (chicken-4 + + (import chicken + ports + data-structures + extras + files + matchable + pathname-expand + posix + posix-extras + regex + regex-case + sparse-vectors + + ) + (use srfi-69)) + (chicken-5 + (import (prefix sqlite3 sqlite3:) + chicken.base + chicken.condition + chicken.file + chicken.file.posix + chicken.io + chicken.pathname + chicken.port + chicken.process + chicken.process-context + chicken.process-context.posix + chicken.sort + chicken.string + chicken.time + chicken.time.posix + + matchable + md5 + message-digest + pathname-expand + regex + regex-case + system-information + + ))) + +;; imports common to ck4 and ck5 +(import srfi-1 + srfi-13 + srfi-18 + srfi-69 + typed-records + (prefix base64 base64:) + (prefix sqlite3 sqlite3:) + md5 + message-digest + z3 + csv + directory-utils + + debugprint + commonmod + configfmod + (prefix mtargs args:) + dbmod + dbfile + rmtmod + servermod + processmod + pgdb + mtmod + megatestmod + tasksmod + subrunmod + testsmod + runsmod + fsmod + ) + +(include "common_records.scm") +(include "key_records.scm") +(include "db_records.scm") +(include "run_records.scm") + + +;;(rmt:get-test-info-by-id run-id test-id) -> testdat + + +;; (define (message-window msg) +;; (iup:show +;; (iup:dialog +;; (iup:vbox +;; (iup:label msg #:margin "40x40"))))) + +(define (ezsteps:run-from testdat start-step-name run-one) + ;;# TODO - recapture item variables, debug repeated step eval; regen logpro from test + (let* ((do-update-test-state-status #f) + (test-run-dir ;; (filedb:get-path *fdb* + (db:test-get-rundir testdat)) ;; ) + (testconfig (read-config (conc test-run-dir "/testconfig") #f #t environ-patt: "pre-launch-env-vars")) + (ezstepslst (hash-table-ref/default testconfig "ezsteps" '())) + (run-mutex (make-mutex)) + (rollup-status 0) + (rollup-status-string #f) + (rollup-status-sym #f) + (exit-info (vector #t #t #t)) + (test-id (db:test-get-id testdat)) + (run-id (db:test-get-run_id testdat)) + (test-name (db:test-get-testname testdat)) + (orig-test-state (db:test-get-state testdat)) + (orig-test-status (db:test-get-status testdat)) + (kill-job #f) ;; for future use (on re-factoring with launch.scm code + (the-step-params '())) ;; not exactly "functional" + + ;; keep trying till NFS deigns to populate test run dir on this host + (let loop ((count 5)) + (if (not (common:file-exists? test-run-dir)) + ;;(push-directory test-run-dir) + (if (> count 0) + (begin + (debug:print 0 *default-log-port* "WARNING: ezsteps attempting to run but test run directory " test-run-dir " is not there. Waiting and trying again " count " more times") + (sleep 3) + (loop (- count 1)))))) + + (debug:print-info 0 *default-log-port* "Running in directory " test-run-dir) + (if (not (common:file-exists? ".ezsteps"))(create-directory ".ezsteps")) + ;; if ezsteps was defined then we are sure to have at least one step but check anyway + + (if (not (> (length ezstepslst) 0)) + (debug:print 0 *default-log-port* "ERROR: You can only re-run steps defined via ezsteps") ;; convert to message-window somehow? + (begin + (let loop ((ezstep (car ezstepslst)) + (tal (cdr ezstepslst)) + (status-sym-so-far 'pass) + ;;(runflag #f) + (saw-start-step-name #f)) ;; flag used to skip steps when not starting at the beginning + (if (or (vector-ref exit-info 1) + (equal? (alist-ref 'keep-going the-step-params) 'yes)) ;; not sure this is the intent. was prev-step-params + (let* ((prev-step-params the-step-params) ;; need to snag this now + (stepname (car ezstep)) ;; do stuff to run the step + (logpro-used (common:file-exists? (conc test-run-dir "/" stepname ".logpro"))) + (stepinfo (cadr ezstep)) + (stepparts (string-match (regexp "^(\\{([^\\}]*)\\}\\s*|)(.*)$") stepinfo)) + (stepparms (list-ref stepparts 2)) ;; for future use, {VAR=1,2,3}, run step for each + (stepcmd (list-ref stepparts 3)) + (script (conc "mt_ezstep '"test-run-dir"' '"stepname"' '"stepcmd"'")) ;; call the command using mt_ezstep + (saw-start-step-name-next (or saw-start-step-name (equal? stepname start-step-name))) + (proceed-with-this-step + (or (not start-step-name) + (equal? stepname start-step-name) + (and saw-start-step-name (not run-one)) + saw-start-step-name-next + (and start-step-name (equal? stepname start-step-name)))) + ) + (debug:print 0 *default-log-port* "NOTE: stepparms=" stepparms) + (set! prev-step-params stepparms) + (set! do-update-test-state-status (and proceed-with-this-step (null? tal))) + ;;(BB> "stepname="stepname" proceed-with-this-step="proceed-with-this-step " do-update-test-state-status="do-update-test-state-status " orig-test-state="orig-test-state" orig-test-status="orig-test-status) + (cond + ((and (not proceed-with-this-step) (null? tal)) + 'done) + ((not proceed-with-this-step) + (loop (car tal) + (cdr tal) + status-sym-so-far + saw-start-step-name-next)) + (else + (debug:print 4 *default-log-port* "ezsteps:\n stepname: " stepname " stepinfo: " stepinfo " stepparts: " stepparts + " stepparms: " stepparms " stepcmd: " stepcmd) + (debug:print 4 *default-log-port* "script: " script) + (rmt:teststep-set-status! run-id test-id stepname "start" "-" #f #f) + + ;; now launch the script + (let ((pid (process-run script))) + (let processloop ((i 0)) + (let-values (((pid-val exit-status exit-code)(process-wait pid #t))) + (mutex-lock! run-mutex) + (vector-set! exit-info 0 pid) + (vector-set! exit-info 1 exit-status) + (vector-set! exit-info 2 exit-code) + (mutex-unlock! run-mutex) + (if (eq? pid-val 0) + (begin + (thread-sleep! 1) + (processloop (+ i 1)))) + )) + (let ((exinfo (vector-ref exit-info 2)) + (logfna (if logpro-used (conc stepname ".html") ""))) + (rmt:teststep-set-status! run-id test-id stepname "end" exinfo #f logfna)) + + (if logpro-used + (rmt:test-set-log! run-id test-id (conc stepname ".html"))) + + ;; set the test final status + (let* ((this-step-status (cond + (logpro-used + (common:logpro-exit-code->status-sym (vector-ref exit-info 2))) + ((eq? (vector-ref exit-info 2) 0) + 'pass) + (else + 'fail))) + (overall-status-sym (common:worse-status-sym this-step-status status-sym-so-far)) + (overall-status-string (status-sym->string overall-status-sym))) + (debug:print 4 *default-log-port* "Exit value received: " (vector-ref exit-info 2) " logpro-used: " logpro-used + " this-step-status: " this-step-status " overall-status: " overall-status-sym) + ;;" next-status: " next-status " rollup-status: " rollup-status) + (set! rollup-status-string overall-status-string) + (set! rollup-status-sym overall-status-sym) + (tests:test-set-status! run-id test-id "RUNNING" overall-status-string #f #f))) + + (if (and + (not run-one) + (common:steps-can-proceed-given-status-sym rollup-status-sym) + (not (null? tal))) + (loop (car tal) + (cdr tal) + rollup-status-sym + saw-start-step-name-next))))) + (debug:print 4 *default-log-port* "WARNING: a prior step failed, stopping at " ezstep))) + + ;; Once done with step/steps update the test record + ;; + (let* ((item-path (db:test-get-item-path testdat)) ;; (item-list->path itemdat)) + (testinfo (rmt:get-testinfo-state-status run-id test-id))) ;; refresh the testdat, call it iteminfo in case need prev/curr + ;; Am I completed? + (if (equal? (db:test-get-state testinfo) "RUNNING") ;; (not (equal? (db:test-get-state testinfo) "COMPLETED")) + (let ((new-state (if kill-job "KILLED" "COMPLETED") ;; (if (eq? (vector-ref exit-info 2) 0) ;; exited with "good" status + ;; "COMPLETED" + ;; (db:test-get-state testinfo))) ;; else preseve the state as set within the test + ) + (new-status rollup-status-string) + ) ;; (db:test-get-status testinfo))) + (debug:print-info 2 *default-log-port* "Test NOT logged as COMPLETED, (state=" (db:test-get-state testinfo) "), updating result, rollup-status is " rollup-status) + (tests:test-set-status! run-id test-id + (if do-update-test-state-status new-state orig-test-state) + (if do-update-test-state-status new-status orig-test-status) + (args:get-arg "-m") #f) + ;; need to update the top test record if PASS or FAIL and this is a subtest + (if (and (not (equal? item-path "")) do-update-test-state-status) + (rmt:set-state-status-and-roll-up-items run-id test-name item-path new-state new-status #f)))) + ;; for automated creation of the rollup html file this is a good place... + (if (not (equal? item-path "")) + (tests:summarize-items run-id test-id test-name #f)) ;; don't force - just update if no + ))) + ;;(pop-directory) + rollup-status-string)) + +(define (ezsteps:spawn-run-from testdat start-step-name run-one) + (thread-start! + (make-thread + (lambda () + (ezsteps:run-from testdat start-step-name run-one)) + (conc "ezstep run single step " start-step-name " run-one="run-one))) + ) + + + +) ADDED fsmod.scm Index: fsmod.scm ================================================================== --- /dev/null +++ fsmod.scm @@ -0,0 +1,274 @@ +;;====================================================================== +;; Copyright 2017, Matthew Welland. +;; +;; This file is part of Megatest. +;; +;; Megatest is free software: you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. +;; +;; Megatest is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with Megatest. If not, see . + +;;====================================================================== + +;;====================================================================== +;; Megatestmod: +;; +;; Put things here don't fit anywhere else +;;====================================================================== + +(declare (unit fsmod)) + +(declare (uses debugprint)) +(declare (uses mtargs)) +(declare (uses configfmod)) +(declare (uses commonmod)) +(declare (uses processmod)) + +(use srfi-69) + +(module fsmod + * + +(import scheme) +(cond-expand + (chicken-4 + + (import chicken + ports + (prefix base64 base64:) + + (prefix sqlite3 sqlite3:) + data-structures + extras + files + matchable + md5 + message-digest + pathname-expand + posix + posix-extras + regex + regex-case + sparse-vectors + srfi-1 + srfi-18 + srfi-69 + typed-records + z3 + + (prefix mtargs args:) + ) + (use srfi-69)) + (chicken-5 + (import (prefix sqlite3 sqlite3:) + ;; data-structures + ;; extras + ;; files + ;; posix + ;; posix-extras + chicken.base + chicken.condition + chicken.file + chicken.file.posix + chicken.io + chicken.pathname + chicken.port + chicken.process + chicken.process-context + chicken.process-context.posix + chicken.sort + chicken.string + chicken.time + chicken.time.posix + + matchable + md5 + message-digest + pathname-expand + regex + regex-case + srfi-1 + srfi-18 + srfi-69 + typed-records + system-information + + ))) + +(import configfmod + commonmod + debugprint + processmod + ) + +;;====================================================================== +;; given path get free space, allows override in [setup] +;; with free-space-script /path/to/some/script.sh +;; +(define (get-df path) + (if (configf:lookup *configdat* "setup" "free-space-script") + (with-input-from-pipe + (conc (configf:lookup *configdat* "setup" "free-space-script") " " path) + (lambda () + (let ((res (read-line))) + (if (string? res) + (string->number res))))) + (get-unix-df path))) + +(define (get-free-inodes path) + (if (configf:lookup *configdat* "setup" "free-inodes-script") + (with-input-from-pipe + (conc (configf:lookup *configdat* "setup" "free-inodes-script") " " path) + (lambda () + (let ((res (read-line))) + (if (string? res) + (string->number res))))) + (get-unix-inodes path))) + +;;====================================================================== +;; D I S K S P A C E +;;====================================================================== + +(define (common:get-disk-space-used fpath) + (with-input-from-pipe (conc "/usr/bin/du -s " fpath) read)) + +(define (get-unix-df path) + (let* ((df-results (process:cmd-run->list (conc "df " path))) + (space-rx (regexp "([0-9]+)\\s+([0-9]+)%")) + (freespc #f)) + ;; (write df-results) + (for-each (lambda (l) + (let ((match (string-search space-rx l))) + (if match + (let ((newval (string->number (cadr match)))) + (if (number? newval) + (set! freespc newval)))))) + (car df-results)) + freespc)) + +(define (get-unix-inodes path) + (let* ((df-results (process:cmd-run->list (conc "df -i " path))) + (space-rx (regexp "([0-9]+)\\s+([0-9]+)%")) + (freenodes 0)) ;; 0 is a better failsafe than #f here. + ;; (write df-results) + (for-each (lambda (l) + (let ((match (string-search space-rx l))) + (if match + (let ((newval (string->number (cadr match)))) + (if (number? newval) + (set! freenodes newval)))))) + (car df-results)) + freenodes)) + +(define (common:check-space-in-dir dirpath required) + (let* ((dbspace (if (directory? dirpath) + (get-df dirpath) + 0))) + (list (> dbspace required) + dbspace + required + dirpath))) + +(define (get-uname . params) + (let* ((uname-res (process:cmd-run->list (conc "uname " (if (null? params) "-a" (car params))))) + (uname #f)) + (if (null? (car uname-res)) + "unknown" + (caar uname-res)))) + +;;====================================================================== +;; check space in dbdir and in megatest dir +;; returns: ok/not dbspace required-space +;; +(define (common:check-db-dir-space) + (let* ((required (string->number + ;; default is 1GB (or actually a billion bytes) This is the number of 1 kB blocks. + (or (configf:lookup *configdat* "setup" "dbdir-space-required") + "1000000"))) + (dbdir (common:make-tmpdir-name *toppath* "")) ;; (db:get-dbdir)) + (tdbspace (common:check-space-in-dir dbdir required)) + (mdbspace (common:check-space-in-dir *toppath* required))) + (sort (list tdbspace mdbspace) (lambda (a b) + (< (cadr a)(cadr b)))))) + +;;====================================================================== +;; check available space in dbdir, exit if insufficient +;; +(define (common:check-db-dir-and-exit-if-insufficient) + (let* ((spacedat (car (common:check-db-dir-space))) ;; look only at worst for now + (is-ok (car spacedat)) + (dbspace (cadr spacedat)) + (required (caddr spacedat)) + (dbdir (cadddr spacedat))) + (if (not is-ok) + (begin + (debug:print-error 0 *default-log-port* "Insufficient space in " dbdir ", require " required ", have " dbspace ", exiting now.") + (exit 1))))) + +;;====================================================================== +;; paths is list of lists ((name path) ... ) +;; +(define (common:get-disk-with-most-free-space disks minsize) + (let* ((best #f) + (bestsize 0) + (default-min-inodes-string "1000000") + (default-min-inodes (string->number default-min-inodes-string)) + (min-inodes (or (string->number (if (configf:lookup *configdat* "setup" "min_inodes") (configf:lookup *configdat* "setup" "min_inodes") default-min-inodes-string)) default-min-inodes))) + + (for-each + (lambda (disk-num) + (let* ((dirpath (cadr (assoc disk-num disks))) + (freespc (cond + ((not (directory? dirpath)) + (if (common:low-noise-print 300 "disks not a dir " disk-num) + (debug:print 0 *default-log-port* "WARNING: disk " disk-num " at path \"" dirpath "\" is not a directory - ignoring it.")) + -1) + ((not (file-write-access? dirpath)) + (if (common:low-noise-print 300 "disks not writeable " disk-num) + (debug:print 0 *default-log-port* "WARNING: disk " disk-num " at path \"" dirpath "\" is not writeable - ignoring it.")) + -1) + ((not (eq? (string-ref dirpath 0) #\/)) + (if (common:low-noise-print 300 "disks not a proper path " disk-num) + (debug:print 0 *default-log-port* "WARNING: disk " disk-num " at path \"" dirpath "\" is not a fully qualified path - ignoring it.")) + -1) + (else + (get-df dirpath)))) + (free-inodes (cond + ((not (directory? dirpath)) + (if (common:low-noise-print 300 "disks not a dir " disk-num) + (debug:print 0 *default-log-port* "WARNING: disk " disk-num " at path \"" dirpath "\" is not a directory - ignoring it.")) + -1) + ((not (file-write-access? dirpath)) + (if (common:low-noise-print 300 "disks not writeable " disk-num) + (debug:print 0 *default-log-port* "WARNING: disk " disk-num " at path \"" dirpath "\" is not writeable - ignoring it.")) + -1) + ((not (eq? (string-ref dirpath 0) #\/)) + (if (common:low-noise-print 300 "disks not a proper path " disk-num) + (debug:print 0 *default-log-port* "WARNING: disk " disk-num " at path \"" dirpath "\" is not a fully qualified path - ignoring it.")) + -1) + (else + (get-free-inodes dirpath)))) + ;;(free-inodes (get-free-inodes dirpath)) + ) + (debug:print 2 *default-log-port* "INFO: disk " disk-num " path " dirpath " free space " freespc " free inodes " free-inodes) + (if (and (> freespc bestsize)(> free-inodes min-inodes )) + (begin + (set! best (cons disk-num dirpath)) + (set! bestsize freespc))) + ;;(print "Processing: " disk-num " bestsize: " bestsize " best: " best " freespc: " freespc " min-inodes: " min-inodes " free-inodes: " free-inodes) + )) + (map car disks)) + (if (and best (> bestsize minsize)) + best + #f))) ;; #f means no disk candidate found + + +) Index: items.scm ================================================================== --- items.scm +++ items.scm @@ -30,190 +30,5 @@ (import commonmod configfmod debugprint) (include "common_records.scm") - -;; Puts out all combinations -(define (process-itemlist hierdepth curritemkey itemlist) - (let ((res '())) - (if (not hierdepth) - (set! hierdepth (length itemlist))) - (let loop ((hed (car itemlist)) - (tal (cdr itemlist))) - (if (null? tal) - (for-each (lambda (item) - (if (> (length curritemkey) (- hierdepth 2)) - (set! res (append res (list (append curritemkey (list (list (car hed) item)))))))) - (cadr hed)) - (begin - (for-each (lambda (item) - (set! res (append res (process-itemlist hierdepth (append curritemkey (list (list (car hed) item))) tal)))) - (cadr hed)) - (loop (car tal)(cdr tal))))) - res)) - -;; (item-assoc->item-list '(("ANIMAL" "Elephant Lion")("SEASON" "Spring Fall"))) -;; => ((("ANIMAL" "Elephant") ("SEASON" "Spring")) -;; (("ANIMAL" "Elephant") ("SEASON" "Fall")) -;; (("ANIMAL" "Lion") ("SEASON" "Spring")) -;; (("ANIMAL" "Lion") ("SEASON" "Fall"))) -(define (item-assoc->item-list itemsdat) - (if (and itemsdat (not (null? itemsdat))) - (let ((itemlst (filter (lambda (x) - (list? x)) - (map (lambda (x) - (debug:print 6 *default-log-port* "item-assoc->item-list x: " x) - (if (< (length x) 2) - (begin - (debug:print-error 0 *default-log-port* "malformed items spec " (string-intersperse x " ")) - (list (car x)'())) - (let* ((name (car x)) - (items (cadr x)) - (ilist (list name (if (string? items) - (string-split items) - '())))) - (if (null? ilist) - (debug:print-error 0 *default-log-port* "No items specified for " name)) - ilist))) - itemsdat)))) - (let ((debuglevel 5)) - (debug:print 5 *default-log-port* "item-assoc->item-list: itemsdat => itemlst ") - (if (debug:debug-mode 5) - (begin - (pp itemsdat) - (print " => ") - (pp itemlst)))) - (if (> (length itemlst) 0) - (process-itemlist #f '() itemlst) - '())) - '())) ;; return a list consisting on a single null list for non-item runs - ;; Nope, not now, return null as of 6/6/2011 - -;; (item-table->item-list '(("ANIMAL" "Elephant Lion")("SEASON" "Spring Winter"))) -;; => ((("ANIMAL" "Elephant")("SEASON" "Spring")) -;; (("ANIMAL" "Lion") ("SEASON" "Winter"))) -(define (item-table->item-list itemtable) - (let ((newlst (map (lambda (x) - (if (> (length x) 1) - (list (car x) - (string-split (cadr x))) - (list x '()))) - itemtable)) - (res '())) ;; a list of items - (let loop ((indx 0) - (item '()) ;; an item will be ((KEYNAME1 VAL1)(KEYNAME2 VAL2) ...) - (elflag #f)) - (for-each (lambda (row) - (let ((rowname (car row)) - (rowdat (cadr row))) - (set! item (append item - (list - (if (< indx (length rowdat)) - (let ((new (list rowname (list-ref rowdat indx)))) - ;; (debug:print 0 *default-log-port* "New: " new) - (set! elflag #t) - new - ) ;; i.e. had at least on legit value to use - (list rowname "-"))))))) - newlst) - (if elflag - (begin - (set! res (append res (list item))) - (loop (+ indx 1) - '() - #f))) - res))) - ;; Nope, not now, return null as of 6/6/2011 - -(define (items:check-valid-items class item) - (let ((valid-values (let ((s (configf:lookup *configdat* "validvalues" class))) - (if s (string-split s) #f)))) - (if valid-values - (if (member item valid-values) - item #f) - item))) - -;; '(("k1" "k2" "k3") -;; ("a" "b" "c") -;; ("d" "e" "f")) -;; -;; => '((("k1" "a")("k2" "b")("k3" "c")) -;; (("k1" "d")("k2" "e")("k3" "f"))) -;; -(define (items:first-row-intersperse data) - (if (< (length data) 2) - '() - (let ((header (car data)) - (rows (cdr data))) - (map (lambda (row) - (map list header row)) - rows)))) - -;; k1/k2/k3 -;; a/b/c -;; d/e/f -;; => '(("k1" "k2" "k3") -;; ("a" "b" "c") -;; ("d" "e" "f")) -;; -;; => '((("k1" "a")("k2" "b")("k3" "c")) -;; (("k1" "d")("k2" "e")("k3" "f"))) -;; -(define (items:read-items-file fname ftype) ;; 'sxml 'slash 'space - (if (and fname (file-exists? fname)) - (items:first-row-intersperse (case ftype - ((slash space) - (let ((splitter (case ftype - ((slash) (lambda (x)(string-split x "/"))) - (else string-split)))) - (debug:print 0 *default-log-port* "Reading " fname " of type " ftype) - (with-input-from-file fname - (lambda () - (let loop ((inl (read-line)) - (res '())) - (if (eof-object? inl) - res - (loop (read-line)(cons (splitter inl) res)))))))) - ((sxml)(with-input-from-file fname read)) - (else (debug:print 0 *default-log-port* "items file type " ftype " not recognised")))) - (begin - (if fname (debug:print 0 *default-log-port* "no items file " fname " found")) - '()))) - -(define (items:get-items-from-config tconfig) - (let* ((slashf (configf:lookup tconfig "itemopts" "slash")) ;; a/b/c\nd/e/f\n ... - (sxmlf (configf:lookup tconfig "itemopts" "sxml")) ;; '(("a" "b" "c")("d" "e" "f") ...) - (spacef (configf:lookup tconfig "itemopts" "space")) ;; a b c\nd e f\n ... - (have-items (hash-table-ref/default tconfig "items" #f)) - (have-itable (hash-table-ref/default tconfig "itemstable" #f)) - (items (hash-table-ref/default tconfig "items" '())) - (itemstable (hash-table-ref/default tconfig "itemstable" '()))) - (debug:print 5 *default-log-port* "items: " items " itemstable: " itemstable) - (set! items (map (lambda (item) - (if (procedure? (cadr item)) - (list (car item)((cadr item))) ;; evaluate the proc - item)) - items)) - (set! itemstable (map (lambda (item) - (if (procedure? (cadr item)) - (list (car item)((cadr item))) ;; evaluate the proc - item)) - itemstable)) - (if (and have-items (null? items)) (debug:print 0 *default-log-port* "WARNING:[items] section in testconfig but no entries defined")) - (if (and have-itable (null? itemstable))(debug:print 0 *default-log-port* "WARNNG:[itemstable] section in testconfig but no entries defined")) - (if (or (not (null? items)) - (not (null? itemstable)) - slashf - sxmlf - spacef) - (append (item-assoc->item-list items) - (item-table->item-list itemstable) - (items:read-items-file slashf 'slash) - (items:read-items-file sxmlf 'sxml) - (items:read-items-file spacef 'space)) - '(())))) - -;; (pp (item-assoc->item-list itemdat)) - - - Index: js-path.scm ================================================================== --- js-path.scm +++ js-path.scm @@ -13,6 +13,7 @@ ;; GNU General Public License for more details. ;; ;; You should have received a copy of the GNU General Public License ;; along with Megatest. If not, see . ;; + (define *java-script-lib* (conc (common:get-install-area) "/share/js/jquery-3.1.0.slim.min.js")) Index: keys.scm ================================================================== --- keys.scm +++ keys.scm @@ -34,61 +34,5 @@ (import commonmod configfmod debugprint) -(include "key_records.scm") -(include "common_records.scm") - -(define (keys->keystr keys) ;; => key1,key2,key3,additiona1, ... - (string-intersperse keys ",")) - -(define (args:usage . a) #f) - -;;====================================================================== -;; key <=> target routines -;;====================================================================== - -;; This invalidates using "/" in item names. Every key will be -;; available via args:get-arg as :keyfield. Since this only needs to -;; be called once let's use it to set the environment vars -;; -;; The setting of :keyfield in args should be turned off ASAP -;; -(define (keys:target-set-args keys target ht) - (if target - (let ((vals (string-split target "/"))) - (if (eq? (length vals)(length keys)) - (for-each (lambda (key val) - (setenv key val) - (if ht (hash-table-set! ht (conc ":" key) val))) - keys - vals) - (debug:print-error 0 *default-log-port* "wrong number of values in " target ", should match " keys)) - vals) - (debug:print 4 *default-log-port* "ERROR: keys:target-set-args called with no target."))) - -;; given the keys (a list of vectors or a list of keys) and a target return a keyval list -;; keyval list ( (key1 val1) (key2 val2) ...) -(define (keys:target->keyval keys target) - (let* ((targlist (string-split target "/")) - (numkeys (length keys)) - (numtarg (length targlist)) - (targtweaked (if (> numkeys numtarg) - (append targlist (make-list (- numkeys numtarg) "")) - targlist))) - (map (lambda (key targ) - (list key targ)) - keys targtweaked))) - -;;====================================================================== -;; config file related routines -;;====================================================================== - -(define keys:config-get-fields common:get-fields) -(define (keys:make-key/field-string confdat) - (let ((fields (configf:get-section confdat "fields"))) - (string-join - (map (lambda (field)(conc (car field) " " (cadr field))) - fields) - ","))) - Index: launch.scm ================================================================== --- launch.scm +++ launch.scm @@ -32,11 +32,15 @@ (declare (uses db)) (declare (uses rmtmod)) (declare (uses ezsteps)) ;; (declare (uses dbmod)) (declare (uses dbfile)) +(declare (uses dbmod)) (declare (uses mtargs)) +(declare (uses mtmod)) +(declare (uses megatestmod)) +(declare (uses tasksmod)) (use regex regex-case base64 sqlite3 srfi-18 directory-utils posix posix-extras z3 call-with-environment-variables csv hostinfo typed-records pathname-expand matchable) @@ -53,1725 +57,12 @@ (import commonmod processmod configfmod rmtmod debugprint - ;; dbmod - dbfile) - -;;====================================================================== -;; ezsteps -;;====================================================================== - -;; ezsteps were going to be coded as -;; stepname[,predstep1,predstep2 ...] [{VAR1=first,second,third}] command to execute -;; BUT -;; now are -;; stepname {VAR=first,second,third ...} command ... -;; where the {VAR=first,second,third ...} is optional. - -;; given an exit code and whether or not logpro was used calculate OK/BAD -;; return #t if we are ok, #f otherwise -(define (steprun-good? logpro exitcode stepparms) - (or (eq? exitcode 0) - (and logpro (member exitcode '( 2 4 6))) - (let* ((params (alist-ref 'params stepparms)) ;; get the params section - (keep-going (if params - (alist-ref "keep-going" params equal?) - #f))) - (debug:print 0 *default-log-port* "keep-going=" keep-going) - (and keep-going (equal? (car keep-going) "yes"))))) - -;; if handed a string, process it, else look for MT_CMDINFO -(define (launch:get-cmdinfo-assoc-list #!key (encoded-cmd #f)) - (let ((enccmd (if encoded-cmd encoded-cmd (getenv "MT_CMDINFO")))) - (if enccmd - (common:read-encoded-string enccmd) - '()))) - -;; 0 1 2 3 -(defstruct launch:einf (pid #t)(exit-status #t)(exit-code #t)(rollup-status 0)) - -;; return (conc status ": " comment) from the final section so that -;; the comment can be set in the step record in launch.scm -;; -(define (launch:load-logpro-dat run-id test-id stepname) - (let ((cname (conc stepname ".dat"))) - (if (common:file-exists? cname) - (let* ((dat (read-config cname #f #f)) - (csvr (db:logpro-dat->csv dat stepname)) - (csvt (let-values (((fmt-cell fmt-record fmt-csv) (make-format ","))) - (fmt-csv (map list->csv-record csvr)))) - (status (configf:lookup dat "final" "exit-status")) - (msg (configf:lookup dat "final" "message"))) - (if csvt ;; this if blocked stack dump caused by .dat file from logpro being 0-byte. fixed by upgrading logpro - (rmt:csv->test-data run-id test-id csvt) - (debug:print 0 *default-log-port* "ERROR: no csvdat exists for run-id: " run-id " test-id: " test-id " stepname: " stepname ", check that logpro version is 1.15 or newer")) - ;; (debug:print-info 13 *default-log-port* "Error: run-id/test-id/stepname="run-id"/"test-id"/"stepname" => bad csvr="csvr) - ;; ) - (cond - ((equal? status "PASS") "PASS") ;; skip the message part if status is pass - (status (conc (configf:lookup dat "final" "exit-status") ": " (if msg msg "no message"))) - (else #f))) - #f))) - -(define (launch:manage-steps run-id test-id item-path fullrunscript ezsteps subrun test-name tconfigreg exit-info m) - ;; (let-values - ;; (((pid exit-status exit-code) - ;; (run-n-wait fullrunscript))) - ;; (tests:test-set-status! test-id "RUNNING" "n/a" #f #f) - ;; Since we should have a clean slate at this time there is no need to do - ;; any of the other stuff that tests:test-set-status! does. Let's just - ;; force RUNNING/n/a - - ;; (thread-sleep! 0.3) - ;; (tests:test-force-state-status! run-id test-id "RUNNING" "n/a") - (rmt:set-state-status-and-roll-up-items run-id test-name item-path "RUNNING" #f #f) - ;; (thread-sleep! 0.3) ;; NFS slowness has caused grief here - - ;; if there is a runscript do it first - (if fullrunscript - (let ((pid (process-run fullrunscript))) - (rmt:test-set-top-process-pid run-id test-id pid) - (let loop ((i 0)) - (let-values - (((pid-val exit-status exit-code) (process-wait pid #t))) - (mutex-lock! m) - (launch:einf-pid-set! exit-info pid) ;; (vector-set! exit-info 0 pid) - (launch:einf-exit-status-set! exit-info exit-status) ;; (vector-set! exit-info 1 exit-status) - (launch:einf-exit-code-set! exit-info exit-code) ;; (vector-set! exit-info 2 exit-code) - (launch:einf-rollup-status-set! exit-info exit-code) ;; (vector-set! exit-info 3 exit-code) ;; rollup status - (mutex-unlock! m) - (if (eq? pid-val 0) - (begin - (thread-sleep! 2) - (loop (+ i 1))) - ))))) - ;; then, if runscript ran ok (or did not get called) - ;; do all the ezsteps (if any) - (if (or ezsteps subrun) - (let* ((test-run-dir (tests:get-test-path-from-environment)) - (testconfig ;; (read-config (conc work-area "/testconfig") #f #t environ-patt: "pre-launch-env-vars")) ;; FIXME??? is allow-system ok here? - ;; NOTE: it is tempting to turn off force-create of testconfig but dynamic - ;; ezstep names need a full re-eval here. - (tests:get-testconfig test-name item-path tconfigreg #t force-create: #t)) ;; 'return-procs))) - (ezstepslst (if (hash-table? testconfig) - (hash-table-ref/default testconfig "ezsteps" '()) - #f))) - (if testconfig - (hash-table-set! *testconfigs* test-name testconfig) ;; cached for lazy reads later ... - (begin - (launch:setup) - (debug:print 0 *default-log-port* "WARNING: no testconfig found for " test-name " in search path:\n " - (string-intersperse (tests:get-tests-search-path *configdat*) "\n ")))) - ;; after all that, still no testconfig? Time to abort - (if (not testconfig) - (begin - (debug:print-error 0 *default-log-port* "Failed to resolve megatest.config, runconfigs.config and testconfig issues. Giving up now") - (exit 1))) - - ;; create a proc for the subrun if requested, save that proc in the ezsteps table as the last entry - ;; 1. get section [runarun] - ;; 2. unset MT_* vars - ;; 3. fix target - ;; 4. fix runname - ;; 5. fix testpatt or calculate it from contour - ;; 6. launch the run - ;; 7. roll up the run result and or roll up the logpro processed result - (when (configf:lookup testconfig "subrun" "runwait") ;; we use runwait as the flag that a subrun is requested - (subrun:initialize-toprun-test testconfig test-run-dir) - (let* ((mt-cmd (subrun:launch-cmd test-run-dir (configf:lookup testconfig "subrun" "runwait")))) - (debug:print-info 0 *default-log-port* "Subrun command is \"" mt-cmd "\"") - (set! ezsteps #t) ;; set the needed flag - (set! ezstepslst - (append (or ezstepslst '()) - (list (list "subrun" (conc "{subrun=true} " mt-cmd))))))) - - ;; process the ezsteps - (if ezsteps - (let* ((all-steps-dat (make-hash-table))) ;; keep all the info around as stepname ==> alist; where 'params is the params list (add other stuff as needed) - (if (not (common:file-exists? ".ezsteps"))(create-directory ".ezsteps")) - ;; if ezsteps was defined then we are sure to have at least one step but check anyway - (if (not (> (length ezstepslst) 0)) - (debug:print-error 0 *default-log-port* "ezsteps defined but ezstepslst is zero length") - (let ((all-step-names (map car ezstepslst)) - (status-file (file-open "ezsteps.status" (+ open/append open/wronly open/creat))) - ) - (setenv "MT_STEP_NAMES" (string-intersperse all-step-names " ")) - (let loop ((ezstep (car ezstepslst)) - (tal (cdr ezstepslst)) - (prevstep #f)) - (debug:print-info 0 *default-log-port* "Processing ezstep \"" (string-intersperse ezstep " ") "\"") - ;; check exit-info (vector-ref exit-info 1) - (if (launch:einf-exit-status exit-info) ;; (vector-ref exit-info 1) - (let* ((logpro-used (ezsteps:runstep ezstep run-id test-id exit-info m tal testconfig all-steps-dat)) - (stepname (car ezstep)) - (stepparms (hash-table-ref all-steps-dat stepname))) - (setenv "MT_STEP_NAME" stepname) - (pp (hash-table->alist all-steps-dat)) - ;; if logpro-used read in the stepname.dat file - (if (and logpro-used (common:file-exists? (conc stepname ".dat"))) - (launch:load-logpro-dat run-id test-id stepname)) - - (file-write status-file (conc stepname " " (launch:einf-exit-code exit-info) "\n")) - - (if (steprun-good? logpro-used (launch:einf-exit-code exit-info) stepparms) - (if (not (null? tal)) - (loop (car tal) (cdr tal) stepname)) - (debug:print 0 *default-log-port* "WARNING: step " (car ezstep) " failed. Stopping"))) - (debug:print 0 *default-log-port* "WARNING: a prior step failed, stopping at " ezstep))) - (file-close status-file) - ) - - )))))) - -(define (launch:monitor-job run-id test-id item-path fullrunscript ezsteps test-name tconfigreg exit-info m work-area runtlim misc-flags) - (let* ((update-period (string->number (or (configf:lookup *configdat* "setup" "test-stats-update-period") "60"))) - (start-seconds (current-seconds)) - (calc-minutes (lambda () - (inexact->exact - (round - (- - (current-seconds) - start-seconds))))) - (kill-tries 0)) - ;; (tests:set-full-meta-info #f test-id run-id (calc-minutes) work-area) - ;; (tests:set-full-meta-info test-id run-id (calc-minutes) work-area) - (tests:set-full-meta-info #f test-id run-id (calc-minutes) work-area 10) - - (let loop ((minutes (calc-minutes)) - (cpu-load (alist-ref 'adj-core-load (common:get-normalized-cpu-load #f))) - (disk-free (get-df (current-directory))) - (last-sync (current-seconds))) - (let* ((over-time (> (current-seconds) (+ last-sync update-period))) - (new-cpu-load (let* ((load (alist-ref 'adj-core-load (common:get-normalized-cpu-load #f))) - (delta (abs (- load cpu-load)))) - (if (> delta 0.1) ;; don't bother updating with small changes - load - #f))) - (new-disk-free (let* ((df (if over-time ;; only get df every 30 seconds - (get-df (current-directory)) - disk-free)) - (delta (abs (- df disk-free)))) - (if (and (> df 0) - (> (/ delta df) 0.1)) ;; (> delta 200) ;; ignore changes under 200 Meg - df - #f))) - (do-sync (or new-cpu-load new-disk-free over-time)) - - (test-info (rmt:get-test-state-status-by-id run-id test-id)) - (state (car test-info));; (db:test-get-state test-info)) - (status (cdr test-info));; (db:test-get-status test-info)) - (killreq (equal? state "KILLREQ")) - (kill-reason "no kill reason specified") - (kill-job? #f)) - ;; (common:telemetry-log "zombie" (conc "launch:monitor-job - decision time encountered at "(current-seconds)" with last-sync="last-sync" do-sync="do-sync" over-time="over-time" update-period="update-period)) - (cond - (killreq - (set! kill-reason "KILLING TEST since received kill request (KILLREQ)") - (set! kill-job? #t)) - ((and runtlim (> (- (current-seconds) start-seconds) runtlim)) - (set! kill-reason (conc "KILLING TEST DUE TO TIME LIMIT EXCEEDED! Runtime=" (- (current-seconds) start-seconds) " seconds, limit=" runtlim)) - (set! kill-job? #t)) - ((equal? status "DEAD") - (tests:update-central-meta-info run-id test-id new-cpu-load new-disk-free (calc-minutes) #f #f) - (rmt:set-state-status-and-roll-up-items run-id test-id 'foo "RUNNING" "n/a" "was marked dead; really still running.") - ;;(set! kill-reason "KILLING TEST because it was marked as DEAD by launch:handle-zombie-tests (might indicate really overloaded server or else overzealous setup.deadtime)") ;; MARK RUNNING - (set! kill-job? #f))) - - (debug:print 4 *default-log-port* "cpu: " new-cpu-load " disk: " new-disk-free " last-sync: " last-sync " do-sync: " do-sync) - (if (common:low-noise-print 600 "run zombie") ;; every five minutes is plenty - (launch:handle-zombie-tests run-id)) - (when do-sync - (tests:update-central-meta-info run-id test-id new-cpu-load new-disk-free (calc-minutes) #f #f)) - - (if kill-job? - (begin - (debug:print-info 0 *default-log-port* "proceeding to kill test: "kill-reason) - (mutex-lock! m) - ;; NOTE: The pid can change as different steps are run. Do we need handshaking between this - ;; section and the runit section? Or add a loop that tries three times with a 1/4 second - ;; between tries? - (let* ((pid1 (launch:einf-pid exit-info)) ;; (vector-ref exit-info 0)) - (pid2 (rmt:test-get-top-process-pid run-id test-id)) - (pids (delete-duplicates (filter number? (list pid1 pid2))))) - (if (not (null? pids)) - (begin - (for-each - (lambda (pid) - (handle-exceptions - exn - (begin - (debug:print-info 0 *default-log-port* "Unable to kill process with pid " pid ", possibly already killed.") - (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn)) - (debug:print 0 *default-log-port* "WARNING: Request received to kill job " pid) ;; " (attempt # " kill-tries ")") - (debug:print-info 0 *default-log-port* "Signal mask=" (signal-mask)) - ;; (if (process:alive? pid) - ;; (begin - (map (lambda (pid-num) - (process-signal pid-num signal/term)) - (process:get-sub-pids pid)) - (thread-sleep! 5) - ;; (if (process:process-alive? pid) - (map (lambda (pid-num) - (handle-exceptions - exn - (begin - (debug:print 0 *default-log-port* " .... had trouble sending kill to " pid-num ", exn=" exn) - #f) - (process-signal pid-num signal/kill))) - (process:get-sub-pids pid)))) - ;; (debug:print-info 0 *default-log-port* "not killing process " pid " as it is not alive")))) - pids) - ;; BB: question to Matt -- does the tests:test-state-status! encompass rollup to toplevel? If not, should it? - (tests:test-set-status! run-id test-id "KILLED" "KILLED" (conc (args:get-arg "-m")" "kill-reason) #f)) ;; BB ADDED kill-reason -- confirm OK with Matt - (begin - (debug:print-error 0 *default-log-port* "Nothing to kill, pid1=" pid1 ", pid2=" pid2) - (tests:test-set-status! run-id test-id "KILLED" "FAILED TO KILL" (conc (args:get-arg "-m")" "kill-reason) #f) ;; BB ADDED kill-reason -- confirm OK with Matt - ))) - (mutex-unlock! m) - ;; no point in sticking around. Exit now. But run end of run before exiting? - (launch:end-of-run-check run-id) - (exit))) - (if (hash-table-ref/default misc-flags 'keep-going #f) ;; keep originals for cpu-load and disk-free unless they change more than the allowed delta - (begin - (thread-sleep! 6) ;; was 3 - (loop (calc-minutes) - (or new-cpu-load cpu-load) - (or new-disk-free disk-free) - (if do-sync (current-seconds) last-sync)))))) - (tests:update-central-meta-info run-id test-id (commonmod:get-cpu-load) (get-df (current-directory))(calc-minutes) #f #f))) ;; NOTE: Checking twice for keep-going is intentional - - -;; read testconfig and create .logpro and script files -;; - use #f for tconfigreg to re-read the testconfigs from disk -;; -(define (launch:extract-scripts-logpro test-dir test-name item-path tconfigreg-in) - (let* ((tconfigreg (or tconfigreg-in - (tests:get-all))) - (tconfig-fname (conc test-dir "/.testconfig")) - (tconfig-tmpfile (conc tconfig-fname ".tmp")) - (tconfig (tests:get-testconfig test-name item-path tconfigreg #t force-create: #t)) ;; 'return-procs))) - (scripts (configf:get-section tconfig "scripts")) - (logpros (configf:get-section tconfig "logpro"))) - ;; create .testconfig file - (configf:write-alist tconfig tconfig-tmpfile) - (file-move tconfig-tmpfile tconfig-fname #t) - (delete-file* ".final-status") - - ;; extract scripts from testconfig and write them to files in test run dir - (for-each - (lambda (scriptdat) - (match scriptdat - ((name content) - (debug:print-info 2 *default-log-port* "Creating script "(current-directory)"/"name) - (with-output-to-file name - (lambda () - (print content))) - (change-file-mode name (bitwise-ior perm/irwxg perm/irwxu))) - (else - (debug:print-info 0 "Invalid script definiton found in [scripts] section of testconfig. \"" scriptdat "\"")))) - scripts) - - ;; extract logpro from testconfig and write them to files in test run dir - (for-each - (lambda (logprodat) - (match logprodat - ((name content) - (debug:print-info 2 *default-log-port* "Creating logpro file "(current-directory)"/"name".logpro") - (with-output-to-file (conc name".logpro") - (lambda () - (print content) - ;; (change-file-mode name (bitwise-ior perm/irwxg perm/irwxu)) - ))) - (else - (debug:print-info 0 "Invalid logpro definiton found in [logpro] section of testconfig. \"" logprodat "\"")))) - logpros))) - -(define (launch:execute encoded-cmd) - (let* ((cmdinfo (common:read-encoded-string encoded-cmd)) - (tconfigreg #f)) - (setenv "MT_CMDINFO" encoded-cmd) - ;;(bb-check-path msg: "launch:execute incoming") - (if (list? cmdinfo) ;; ((testpath /tmp/mrwellan/jazzmind/src/example_run/tests/sqlitespeed) - ;; (test-name sqlitespeed) (runscript runscript.rb) (db-host localhost) (run-id 1)) - (let* ((testpath (assoc/default 'testpath cmdinfo)) ;; testpath is the test spec area - (top-path (assoc/default 'toppath cmdinfo)) - (work-area (assoc/default 'work-area cmdinfo)) ;; work-area is the test run area - (test-name (assoc/default 'test-name cmdinfo)) - (runscript (assoc/default 'runscript cmdinfo)) - (ezsteps (assoc/default 'ezsteps cmdinfo)) - (subrun (assoc/default 'subrun cmdinfo)) - ;; (runremote (assoc/default 'runremote cmdinfo)) - ;; (transport (assoc/default 'transport cmdinfo)) ;; not used - ;; (serverinf (assoc/default 'serverinf cmdinfo)) - ;; (port (assoc/default 'port cmdinfo)) - (serverurl (assoc/default 'serverurl cmdinfo)) - (homehost (assoc/default 'homehost cmdinfo)) - (run-id (assoc/default 'run-id cmdinfo)) - (test-id (assoc/default 'test-id cmdinfo)) - (target (assoc/default 'target cmdinfo)) - (areaname (assoc/default 'areaname cmdinfo)) - (itemdat (assoc/default 'itemdat cmdinfo)) - (env-ovrd (assoc/default 'env-ovrd cmdinfo)) - (set-vars (assoc/default 'set-vars cmdinfo)) ;; pre-overrides from -setvar - (runname (assoc/default 'runname cmdinfo)) - (megatest (assoc/default 'megatest cmdinfo)) - (runtlim (assoc/default 'runtlim cmdinfo)) - (contour (assoc/default 'contour cmdinfo)) - (item-path (item-list->path itemdat)) - (mt-bindir-path (assoc/default 'mt-bindir-path cmdinfo)) - (keys #f) - (keyvals #f) - (fullrunscript (if (not runscript) - #f - (if (substring-index "/" runscript) - runscript ;; use unadultered if contains slashes - (let ((fulln (conc work-area "/" runscript))) - (if (and (common:file-exists? fulln) - (file-execute-access? fulln)) - fulln - runscript))))) ;; assume it is on the path - (check-work-area (lambda () - ;; NFS might not have propagated the directory meta data to the run host - give it time if needed - (let loop ((count 0)) - (if (or (common:directory-exists? work-area) - (> count 10)) - (change-directory work-area) - (begin - (debug:print 0 *default-log-port* "INFO: Not starting job yet - directory " work-area " not found") - (thread-sleep! 10) - (loop (+ count 1))))) - - (if (not (string=? (common:real-path work-area)(common:real-path (current-directory)))) - (begin - (debug:print 0 *default-log-port* - "INFO: we are expecting to be in directory " work-area "\n" - " but we are actually in the directory " (current-directory) "\n" - " doing another change dir.") - (change-directory work-area))) - - ;; spot check that the files in testpath are available. Too often NFS delays cause problems here. - (let ((files (glob (conc testpath "/*"))) - (bad-files '())) - (for-each - (lambda (fullname) - (let* ((fname (pathname-strip-directory fullname)) - (targn (conc work-area "/" fname))) - (if (not (file-exists? targn)) - (set! bad-files (cons fname bad-files))))) - files) - (if (not (null? bad-files)) - (begin - (debug:print 0 *default-log-port* "INFO: test data from " testpath " not copied properly or filesystem problems causing data to not be found. Re-running the copy command.") - (debug:print 0 *default-log-port* "INFO: missing files from " work-area ": " (string-intersperse bad-files ", ")) - (launch:test-copy testpath work-area)))) - ;; one more time, change to the work-area directory - (change-directory work-area))) - ) ;; let* - - (if contour (setenv "MT_CONTOUR" contour)) - - ;; immediated set some key variables from CMDINFO data, yes, these will be set again below ... - ;; - (setenv "MT_TESTSUITENAME" areaname) - (setenv "MT_RUN_AREA_HOME" top-path) - (set! *toppath* top-path) - (change-directory *toppath*) ;; temporarily switch to the run area home - (setenv "MT_TEST_RUN_DIR" work-area) - - (launch:setup) ;; should be properly in the run area home now - - (if contour (setenv "MT_CONTOUR" contour)) - - ;; immediated set some key variables from CMDINFO data, yes, these will be set again below ... - ;; - (setenv "MT_TESTSUITENAME" areaname) - (setenv "MT_RUN_AREA_HOME" top-path) - (set! *toppath* top-path) - (change-directory *toppath*) ;; temporarily switch to the run area home - (setenv "MT_TEST_RUN_DIR" work-area) - - (launch:setup) ;; should be properly in the run area home now - - (set! tconfigreg (tests:get-all)) ;; mapping of testname => test source path - (let ((sighand (lambda (signum) - ;; (signal-mask! signum) ;; to mask or not? seems to cause issues in exiting - (if (eq? signum signal/stop) - (debug:print-error 0 *default-log-port* "attempt to STOP process. Exiting.")) - (set! *time-to-exit* #t) - (debug:print 0 *default-log-port* "Received signal " signum ", cleaning up before exit (set this test to COMPLETED/ABORT) . Please wait...") - (let ((th1 (make-thread (lambda () - (debug:print 0 *default-log-port* "set test to COMPLETED/ABORT begin.") - (rmt:test-set-state-status run-id test-id "COMPLETED" "ABORT" "received kill signal") - (debug:print 0 *default-log-port* "set test to COMPLETED/ABORT complete.") - (debug:print 0 *default-log-port* "Killed by signal " signum ". Exiting") - (exit 1)))) - (th2 (make-thread (lambda () - (thread-sleep! 20) - (debug:print 0 *default-log-port* "Done") - (exit 4))))) - (thread-start! th2) - (thread-start! th1) - (thread-join! th2))))) - (set-signal-handler! signal/int sighand) - (set-signal-handler! signal/term sighand) - ) ;; (set-signal-handler! signal/stop sighand) - - ;; Do not run the test if it is REMOVING, RUNNING, KILLREQ or REMOTEHOSTSTART, - ;; Mark the test as REMOTEHOSTSTART *IMMEDIATELY* - ;; - (let* ((test-info (let loop ((tries 0)) - (let ((tinfo (rmt:get-test-info-by-id run-id test-id))) - (if tinfo - tinfo - (if (> tries 5) - #f - (begin - (thread-sleep! (+ 1 (* tries 10))) - (loop (+ tries 1)))))))) - (test-host (if test-info - (db:test-get-host test-info) - (begin - (debug:print 0 *default-log-port* "ERROR: failed to find a record for test-id " test-id ", exiting.") - (exit)))) - (test-pid (db:test-get-process_id test-info))) - (cond - ;; -mrw- I'm removing KILLREQ from this list so that a test in KILLREQ state is treated as a "do not run" flag. - ((member (db:test-get-state test-info) '("INCOMPLETE" "KILLED" "UNKNOWN" "STUCK")) ;; prior run of this test didn't complete, go ahead and try to rerun - (debug:print 0 *default-log-port* "INFO: test is INCOMPLETE or KILLED, treat this execute call as a rerun request") - ;; (tests:test-force-state-status! run-id test-id "REMOTEHOSTSTART" "n/a") - - (rmt:general-call 'set-test-start-time run-id test-id) - (rmt:test-set-state-status run-id test-id "REMOTEHOSTSTART" "n/a" #f) - ) ;; prime it for running - ((member (db:test-get-state test-info) '("RUNNING" "REMOTEHOSTSTART")) - (if (process:alive-on-host? test-host test-pid) - (debug:print-error 0 *default-log-port* "test state is " (db:test-get-state test-info) " and process " test-pid " is still running on host " test-host ", cannot proceed") - (exit 1))) - ((member (db:test-get-state test-info) '("COMPLETED")) ;; we do NOT want to re-run COMPLETED jobs. Mark as NOT_STARTED to run! - (debug:print 0 *default-log-port* "test state is " (db:test-get-state test-info) ", cannot proceed") - (debug:print 0 *default-log-port* "exiting with status 1") - (exit 1)) - ((not (member (db:test-get-state test-info) '("REMOVING" "REMOTEHOSTSTART" "RUNNING" "KILLREQ"))) - ;; (tests:test-force-state-status! run-id test-id "REMOTEHOSTSTART" "n/a") - (rmt:general-call 'set-test-start-time run-id test-id) - (rmt:test-set-state-status run-id test-id "REMOTEHOSTSTART" "n/a" #f)) - (else ;; (member (db:test-get-state test-info) '("REMOVING" "REMOTEHOSTSTART" "RUNNING" "KILLREQ")) - (debug:print 0 *default-log-port* "test state is " (db:test-get-state test-info) ", cannot proceed") - (debug:print 0 *default-log-port* "exiting with status 1") - (exit 1)))) - - ;; cleanup prior execution's steps - (rmt:delete-steps-for-test! run-id test-id) - - (debug:print 2 *default-log-port* "Executing " test-name " (id: " test-id ") on " (get-host-name)) - (set! keys (rmt:get-keys)) - ;; (runs:set-megatest-env-vars run-id inkeys: keys inkeyvals: keyvals) ;; these may be needed by the launching process - ;; one of these is defunct/redundant ... - (if (not (launch:setup force-reread: #t)) - (begin - (debug:print 0 *default-log-port* "Failed to setup, exiting") - ;; (sqlite3:finalize! db) - ;; (sqlite3:finalize! tdb) - (exit 1))) - ;; validate that the test run area is available - (check-work-area) - - ;; still need to go back to run area home for next couple steps - (change-directory *toppath*) - - ;; NOTE: Current order is to process runconfigs *before* setting the MT_ vars. This - ;; seems non-ideal but could well break stuff - ;; BUG? BUG? BUG? - - (let ((rconfig (full-runconfigs-read)) ;; (read-config (conc *toppath* "/runconfigs.config") #f #t sections: (list "default" target)))) - (wconfig (read-config "waivers.config" #f #t sections: `( "default" ,target )))) ;; read the waivers config if it exists - ;; (setup-env-defaults (conc *toppath* "/runconfigs.config") run-id (make-hash-table) keyvals target) - ;; (set-run-config-vars run-id keyvals target) ;; (db:get-target db run-id)) - ;; Now have runconfigs data loaded, set environment vars - (for-each - (lambda (section) - (for-each - (lambda (varval) - (let ((var (car varval)) - (val (cadr varval))) - (if (and (string? var)(string? val)) - (begin - (safe-setenv var (configf:eval-string-in-environment val))) ;; val) - (debug:print-error 0 *default-log-port* "bad variable spec, " var "=" val)))) - (configf:get-section rconfig section))) - (list "default" target))) - ;;(bb-check-path msg: "launch:execute post block 1") - - ;; NFS might not have propagated the directory meta data to the run host - give it time if needed - (let loop ((count 0)) - (if (or (common:file-exists? work-area) - (> count 10)) - (change-directory work-area) - (begin - (debug:print 0 *default-log-port* "INFO: Not starting job yet - directory " work-area " not found") - (thread-sleep! 10) - (loop (+ count 1))))) - - ;; now we can switch to the work-area? - (change-directory work-area) - ;;(bb-check-path msg: "launch:execute post block 1.5") - ;; (change-directory work-area) - (set! keyvals (keys:target->keyval keys target)) - ;; apply pre-overrides before other variables. The pre-override vars must not - ;; clobbers things from the official sources such as megatest.config and runconfigs.config - (if (string? set-vars) - (let ((varpairs (string-split set-vars ","))) - (debug:print 4 *default-log-port* "varpairs: " varpairs) - (map (lambda (varpair) - (let ((varval (string-split varpair "="))) - (if (eq? (length varval) 2) - (let ((var (car varval)) - (val (cadr varval))) - (debug:print 1 *default-log-port* "Adding pre-var/val " var " = " val " to the environment") - (setenv var val))))) - varpairs))) - ;;(bb-check-path msg: "launch:execute post block 2") - (for-each - (lambda (varval) - (let ((var (car varval)) - (val (cadr varval))) - (if val - (setenv var val) - (begin - (debug:print-error 0 *default-log-port* "required variable " var " does not have a valid value. Exiting") - (exit))))) - (list - (list "MT_TEST_RUN_DIR" work-area) - (list "MT_TEST_NAME" test-name) - (list "MT_ITEM_INFO" (conc itemdat)) - (list "MT_ITEMPATH" item-path) - (list "MT_RUNNAME" runname) - (list "MT_MEGATEST" megatest) - (list "MT_TARGET" target) - (list "MT_LINKTREE" (common:get-linktree)) ;; (configf:lookup *configdat* "setup" "linktree")) - (list "MT_TESTSUITENAME" (common:get-testsuite-name)))) - ;;(bb-check-path msg: "launch:execute post block 3") - - (let ((tmppath (getenv "PATH"))) - (if (string-search tmppath " ") - (debug:print 0 *default-log-port* "WARNING: spaces in PATH are not supported.")) - (if mt-bindir-path (setenv "PATH" (conc tmppath":"mt-bindir-path)))) - ;;(bb-check-path msg: "launch:execute post block 4") - ;; (change-directory top-path) - ;; Can setup as client for server mode now - ;; (client:setup) - - ;; environment overrides are done *before* the remaining critical envars. - (alist->env-vars env-ovrd) - ;;(bb-check-path msg: "launch:execute post block 41") - (runs:set-megatest-env-vars run-id inkeys: keys inkeyvals: keyvals) - ;;(bb-check-path msg: "launch:execute post block 42") - (set-item-env-vars itemdat) - ;;(bb-check-path msg: "launch:execute post block 43") - (let ((blacklist (configf:lookup *configdat* "setup" "blacklistvars"))) - (if blacklist - (let ((vars (string-split blacklist))) - (save-environment-as-files "megatest" ignorevars: vars) - (for-each (lambda (var) - (unsetenv var)) - vars)) - (save-environment-as-files "megatest"))) - ;;(bb-check-path msg: "launch:execute post block 44") - ;; open-run-close not needed for test-set-meta-info - ;; (tests:set-full-meta-info #f test-id run-id 0 work-area) - ;; (tests:set-full-meta-info test-id run-id 0 work-area) - (tests:set-full-meta-info #f test-id run-id 0 work-area 10) - - ;; (thread-sleep! 0.3) ;; NFS slowness has caused grief here - - (if (args:get-arg "-xterm") - (set! fullrunscript "xterm") - (if (and fullrunscript - (common:file-exists? fullrunscript) - (not (file-execute-access? fullrunscript))) - (system (conc "chmod ug+x " fullrunscript)))) - (launch:extract-scripts-logpro work-area test-name item-path tconfigreg) - -;;;;; ;; We are about to actually kick off the test -;;;;; ;; so this is a good place to remove the records for -;;;;; ;; any previous runs -;;;;; ;; (db:test-remove-steps db run-id testname itemdat) -;;;;; ;; now is also a good time to write the .testconfig file -;;;;; (let* ((tconfig-fname (conc work-area "/.testconfig")) -;;;;; (tconfig-tmpfile (conc tconfig-fname ".tmp")) -;;;;; (tconfig (tests:get-testconfig test-name item-path tconfigreg #t force-create: #t)) ;; 'return-procs))) -;;;;; (scripts (configf:get-section tconfig "scripts")) -;;;;; (precmd (configf:lookup tconfig ) -;;;;; ;; create .testconfig file -;;;;; (configf:write-alist tconfig tconfig-tmpfile) -;;;;; (file-move tconfig-tmpfile tconfig-fname #t) -;;;;; (delete-file* ".final-status") -;;;;; -;;;;; ;; extract scripts from testconfig and write them to files in test run dir -;;;;; (for-each -;;;;; (lambda (scriptdat) -;;;;; (match scriptdat -;;;;; ((name content) -;;;;; (with-output-to-file name -;;;;; (lambda () -;;;;; (print content) -;;;;; (change-file-mode name (bitwise-ior perm/irwxg perm/irwxu))))) -;;;;; (else -;;;;; (debug:print-info 0 "Invalid script definiton found in [scripts] section of testconfig. \"" scriptdat "\"")))) -;;;;; scripts)) - ;; - - (let* ((m (make-mutex)) - (kill-job? #f) - (exit-info (make-launch:einf pid: #t exit-status: #t exit-code: #t rollup-status: 0)) ;; pid exit-status exit-code (i.e. process was successfully run) rollup-status - (job-thread #f) - ;; (keep-going #t) - (misc-flags (let ((ht (make-hash-table))) - (hash-table-set! ht 'keep-going #t) - ht)) - (runit (lambda () - (launch:manage-steps run-id test-id item-path fullrunscript ezsteps subrun test-name tconfigreg exit-info m))) - (monitorjob (lambda () - (launch:monitor-job run-id test-id item-path fullrunscript ezsteps test-name tconfigreg exit-info m work-area runtlim misc-flags))) - (th1 (make-thread monitorjob "monitor job")) - (th2 (make-thread runit "run job")) - (tconfig (tests:get-testconfig test-name item-path tconfigreg #t)) - (propagate-exit-code (configf:lookup *configdat* "setup" "propagate-exit-code")) - (propagate-status-list '("FAIL" "KILLED" "ABORT" "DEAD" "CHECK" "SKIP" "WAIVED")) - (test-status "not set") - (precmd (configf:lookup tconfig "setup" "precmd")) - (postcmd (configf:lookup tconfig "setup" "postcmd"))) - ;; first, if set, run the precmd - (if precmd ;; (file-exists? precmd)(file-execute-access? precmd)) - (system precmd)) ;; up to test author to put nbfake if desired. - (set! job-thread th2) - (thread-start! th1) - (thread-start! th2) - (thread-join! th2) - (debug:print-info 0 *default-log-port* "Megatest execute of test " test-name ", item path " item-path " complete. Notifying the db ...") - (debug:print-info 2 *default-log-port* "exit-info = " exit-info) - (hash-table-set! misc-flags 'keep-going #f) - (thread-join! th1) - (thread-sleep! 1) ;; givbe thread th1 a chance to be done TODO: Verify this is needed. At 0.1 I was getting fail to stop, increased to total of 1.1 sec. - (mutex-lock! m) - (let* ((item-path (item-list->path itemdat)) - ;; only state and status needed - use lazy routine - (testinfo (rmt:get-testinfo-state-status run-id test-id))) - ;; Am I completed? - (if (member (db:test-get-state testinfo) '("REMOTEHOSTSTART" "RUNNING")) - (let ((new-state (if kill-job? "KILLED" "COMPLETED")) - (new-status (cond - ((not (launch:einf-exit-status exit-info)) "FAIL") ;; job failed to run ... (vector-ref exit-info 1) - ((eq? (launch:einf-rollup-status exit-info) 0) ;; (vector-ref exit-info 3) - ;; if the current status is AUTO then defer to the calculated value (i.e. leave this AUTO) - (if (equal? (db:test-get-status testinfo) "AUTO") "AUTO" "PASS")) - ((eq? (launch:einf-rollup-status exit-info) 1) "FAIL") ;; (vector-ref exit-info 3) - ((eq? (launch:einf-rollup-status exit-info) 2) ;; (vector-ref exit-info 3) - ;; if the current status is AUTO the defer to the calculated value but qualify (i.e. make this AUTO-WARN) - (if (equal? (db:test-get-status testinfo) "AUTO") "AUTO-WARN" "WARN")) - ((eq? (launch:einf-rollup-status exit-info) 3) "CHECK") - ((eq? (launch:einf-rollup-status exit-info) 4) "WAIVED") - ((eq? (launch:einf-rollup-status exit-info) 5) "ABORT") - ((eq? (launch:einf-rollup-status exit-info) 6) "SKIP") - (else "FAIL"))) - ) ;; (db:test-get-status testinfo))) - (debug:print-info 0 *default-log-port* "Test exited in state=" (db:test-get-state testinfo) ", setting state/status based on exit code of " (launch:einf-exit-status exit-info) " and rollup-status of " (launch:einf-rollup-status exit-info)) - - ;; Leave a .final-status file for each sub-test - (tests:save-final-status run-id test-id) - - (tests:test-set-status! run-id - test-id - new-state - new-status - (args:get-arg "-m") #f) - ;; need to update the top test record if PASS or FAIL and this is a subtest - ;; NO NEED TO CALL set-state-status-and-roll-up-items HERE, THIS IS DONE IN set-state-status-and-roll-up-items called by tests:test-set-status! - ) - ) - - - ;; for automated creation of the rollup html file this is a good place... - (if (not (equal? item-path "")) - (tests:summarize-items run-id test-id test-name #f)) - ;; BUG was this meant to be the antecnt of the if above? - ;; BUG was this meant to be the antecnt of the if above? - (tests:summarize-test run-id test-id) ;; don't force - just update if no - ;; Leave a .final-status file for the top level test - (tests:save-final-status run-id test-id) - (rmt:update-run-stats run-id (rmt:get-raw-run-stats run-id))) ;; end of let* - - (mutex-unlock! m) - (launch:end-of-run-check run-id ) - (debug:print 2 *default-log-port* "Output from running " fullrunscript ", pid " (launch:einf-pid exit-info) " in work area " - work-area ":\n====\n exit code " (launch:einf-exit-code exit-info) "\n" "====\n") - - - (set! test-status (db:test-get-status (rmt:get-testinfo-state-status run-id test-id))) - - ;; If the propagate-exit-code option has been set in the megatest config, and the test status matches the list, set the exit code to 1. - - (if postcmd - (system postcmd)) - - (if (and propagate-exit-code (string=? propagate-exit-code "yes") (member test-status propagate-status-list)) - (begin - (debug:print 1 *default-log-port* "Setting exit status to 1 because of test status of " test-status) - (set! *globalexitstatus* 1) - ) - ) - - (if (not (launch:einf-exit-status exit-info)) - (exit 4)))) - ))) - -;; Spec for End of test -;; At end of each test call, after marking self as COMPLETED do run-state-status-rollup -;; At transition to run COMPLETED/X do hooks -;; Definition: test_dead if event_time + duration + 1 minute? < current_time AND -;; we can prove the process is not alive (ssh host pstree -A pid) -;; if dead safe to mark the test as killed in the db -;; State/status table -;; new -;; 100% COMPLETED/ (PASS,FAIL,ABORT etc.) ==> COMPLETED / X where X is same as itemized rollup -;; > 3 RUNNING with not test_dead do nothing (run should already be RUNNING/ na -;; > 0 RUNNING and test_dead then send KILLREQ ==> COMPLETED -;; 0 RUNNING ==> this is actually the first condition, should not get here -(define *last-rollup* 0) -(define (launch:end-of-run-check run-id ) - (let* ((not-completed-cnt (rmt:get-not-completed-cnt run-id)) - (running-cnt (rmt:get-count-tests-running-for-run-id run-id)) - (all-test-launched (rmt:get-var (conc "lunch-complete-" run-id))) - (current-state-status (rmt:get-run-state-status run-id)) - (current-state (car current-state-status)) ;; (rmt:get-run-state run-id)) - (current-status (cdr current-state-status))) ;; (rmt:get-run-status run-id))) - ;;get-vars run-id to query metadata table to check if all completed. if all-test-launched = yes then only not-completed-cnt = 0 means everyting is completed if no entry found in the table do nothing - (debug:print 0 *default-log-port* "Running test cnt :" running-cnt) - ;; - ;; TODO: add a final rollup when run is done (if there isn't one already) - ;; - (if (or (< running-cnt 3) ;; have only few running - (> (- (current-seconds) *last-rollup*) 10)) ;; or haven't rolled up in past ten seconds - (begin - (rmt:set-state-status-and-roll-up-run run-id current-state current-status) - (set! *last-rollup* (current-seconds)))) - (runs:update-junit-test-reporter-xml run-id) - (cond - ((and all-test-launched (eq? not-completed-cnt 0) (equal? all-test-launched "yes" )) - (if (and (equal? (rmt:get-var (conc "end-of-run-" run-id)) "no") (common:simple-lock (conc "endOfRun" run-id))) - (begin - (debug:print 4 *default-log-port* "look for post hook. currseconds: " (current-seconds) " EOR " (rmt:get-var (conc "end-of-run-" run-id))) - (debug:print 0 *default-log-port* "End of Run Detected.") - (rmt:set-var (conc "end-of-run-" run-id) "yes") - ;(thread-sleep! 10) - (runs:run-post-hook run-id) - (debug:print 4 *default-log-port* "currseconds: " (current-seconds)" eor: " (rmt:get-var (conc "end-of-run-" run-id))) - (common:simple-unlock (conc "endOfRun" run-id))) - (debug:print 0 *default-log-port* "End of Run Detected but not running post hook. This should happen when eor is set to yes. This will happen only when 2 tests exit at smae time. eor= " (rmt:get-var (conc "end-of-run-" run-id))))) - ((> running-cnt 3) - (debug:print 0 *default-log-port* "There are " running-cnt " tests running." )) - ((> running-cnt 0) - (debug:print 0 *default-log-port* "running cnt > 0 but <= 3 kill-running-tests-if-dead" ) - (let ((kill-cnt (launch:kill-tests-if-dead run-id))) - (if (and all-test-launched (equal? all-test-launched "yes") (eq? kill-cnt running-cnt)) - (launch:end-of-run-check run-id)))) ;;todo - (else (debug:print 0 *default-log-port* "Should it get here?? May be everything is not launched yet. Running test cnt:" running-cnt " Not completed test cnt:" not-completed-cnt) - (let* ((not-completed-tests (rmt:get-tests-for-run run-id "%" `("NOT_STARTED" "RUNNING" "LAUNCHED" "REMOTEHOSTSTART") `() #f #f #f #f #f #f #f #f))) - (if (> (length not-completed-tests) 0) - (let loop ((running-test (car not-completed-tests)) - (tal (cdr not-completed-tests))) - (let* ((test-name (vector-ref running-test 2)) - (item-path (vector-ref running-test 11))) - (debug:print 0 *default-log-port* "test " test-name "/" item-path " not completed") - (if (not (null? tal)) - (loop (car tal) (cdr tal))))))))))) - -(define (launch:kill-tests-if-dead run-id) - (let* ((running-tests (rmt:get-tests-for-run run-id "%" `("RUNNING" "LAUNCHED" "REMOTEHOSTSTART") `() #f #f #f #f #f #f #f #f))) - (let loop ((running-test (car running-tests)) - (tal (cdr running-tests)) - (kill-cnt 0)) - (let* ((test-name (vector-ref running-test 2)) - (item-path (vector-ref running-test 11)) - (test-id (vector-ref running-test 0)) - (host (vector-ref running-test 6)) - (pid (rmt:test-get-top-process-pid run-id test-id)) - (event-time (vector-ref running-test 5)) - (duration (vector-ref running-test 12)) - (flag 0) - (curr-time (current-seconds))) - (if (and (< (+ event-time duration 600) curr-time) (not (commonmod:is-test-alive host pid))) ;;test has not updated duration in last 10 min then likely its not running but confirm before marking it as killed - (begin - (debug:print 0 *default-log-port* "test " test-name "/" item-path " needs to be killed") - (set! flag 1) - (rmt:set-state-status-and-roll-up-items run-id test-name item-path "KILLREQ" "n/a" #f))) - (if (not (null? tal)) - (loop (car tal) (cdr tal) (+ kill-cnt flag)) - (+ kill-cnt flag)))))) - -;; DO NOT USE - caching of configs is handled in launch:setup now. -;; -(define (launch:cache-config) - ;; if we have a linktree and -runtests and -target and the directory exists dump the config - ;; to megatest-(current-seconds).cfg and symlink it to megatest.cfg - (if (and *configdat* - (or (args:get-arg "-run") - (args:get-arg "-runtests") - (args:get-arg "-execute"))) - (let* ((linktree (common:get-linktree)) ;; (get-environment-variable "MT_LINKTREE")) - (target (common:args-get-target exit-if-bad: #t)) - (runname (or (args:get-arg "-runname") - (args:get-arg ":runname") - (getenv "MT_RUNNAME"))) - (fulldir (conc linktree "/" - target "/" - runname))) - (if (and linktree (common:file-exists? linktree)) ;; can't proceed without linktree - (begin - (debug:print-info 0 *default-log-port* "Have -run with target=" target ", runname=" runname ", fulldir=" fulldir ", testpatt=" (or (args:get-arg "-testpatt") "%")) - (if (not (common:file-exists? fulldir)) - (create-directory fulldir #t)) ;; need to protect with exception handler - (if (and target - runname - (common:file-exists? fulldir)) - (let ((tmpfile (conc fulldir "/.megatest.cfg." (current-seconds))) - (targfile (conc fulldir "/.megatest.cfg-" megatest-version "-" megatest-fossil-hash)) - (rconfig (conc fulldir "/.runconfig." megatest-version "-" megatest-fossil-hash))) - (if (common:file-exists? rconfig) ;; only cache megatest.config AFTER runconfigs has been cached - (begin - (debug:print-info 0 *default-log-port* "Caching megatest.config in " tmpfile) - (if (not (common:in-running-test?)) - (configf:write-alist *configdat* tmpfile)) - (system (conc "ln -sf " tmpfile " " targfile)))) - ))) - (debug:print-info 1 *default-log-port* "No linktree yet, no caching configs."))))) - - -;; gather available information, if legit read configs in this order: -;; -;; if have cache; -;; read it a return it -;; else -;; megatest.config (do not cache) -;; runconfigs.config (cache if all vars avail) -;; megatest.config (cache if all vars avail) -;; returns: -;; *toppath* -;; side effects: -;; sets; *configdat* (megatest.config info) -;; *runconfigdat* (runconfigs.config info) -;; *configstatus* (status of the read data) -;; -(define (launch:setup #!key (force-reread #f) (areapath #f)) - (mutex-lock! *launch-setup-mutex*) - ;; this stops the train quickly for new processes - (if (and *toppath* - (file-exists? (conc *toppath*"/stop-the-train"))) - (begin - (debug:print 0 *default-log-port* "ERROR: found file "*toppath*"/stop-the-train, exiting immediately") - (exit 1))) - (if (and *toppath* - (eq? *configstatus* 'fulldata) (not force-reread)) ;; got it all - (begin - (debug:print 2 *default-log-port* "NOTE: skipping launch:setup-body call since we have fulldata") - (mutex-unlock! *launch-setup-mutex*) - *toppath*) - (let ((res (launch:setup-body force-reread: force-reread areapath: areapath))) - (mutex-unlock! *launch-setup-mutex*) - res))) - -;; return paths depending on what info is available. -;; -(define (launch:get-cache-file-paths areapath toppath target mtconfig) - (let* ((use-cache (common:use-cache?)) - (runname (common:args-get-runname)) - (linktree (common:get-linktree)) - (testname (common:get-full-test-name)) - (rundir (if (and runname target linktree) - (common:directory-writable? (conc linktree "/" target "/" runname)) - #f)) - (testdir (if (and rundir testname) - (common:directory-writable? (conc rundir "/" testname)) - #f)) - (cachedir (or testdir rundir)) - (mtcachef (and cachedir (conc cachedir "/" ".megatest.cfg-" megatest-version "-" megatest-fossil-hash))) - (rccachef (and cachedir (conc cachedir "/" ".runconfigs.cfg-" megatest-version "-" megatest-fossil-hash)))) - (debug:print-info 6 *default-log-port* - "runname=" runname - "\n linktree=" linktree - "\n testname=" testname - "\n rundir=" rundir - "\n testdir=" testdir - "\n cachedir=" cachedir - "\n mtcachef=" mtcachef - "\n rccachef=" rccachef) - (cons mtcachef rccachef))) - -(define (launch:setup-body #!key (force-reread #f) (areapath #f)) - (if (and (eq? *configstatus* 'fulldata) - *toppath* - (not force-reread)) ;; no need to reprocess - *toppath* ;; return toppath - (let* ((use-cache (common:use-cache?)) ;; BB- use-cache checks *configdat* for use-cache setting. We do not have *configdat*. Bootstrapping problem here. - (toppath (common:get-toppath areapath)) - (target (common:args-get-target)) - (sections (if target (list "default" target) #f)) ;; for runconfigs - (mtconfig (or (args:get-arg "-config") "megatest.config")) ;; allow overriding megatest.config - (cachefiles (launch:get-cache-file-paths areapath toppath target mtconfig)) - ;; checking for null cachefiles should not be necessary, I was seeing error car of '(), might be a chicken bug or a red herring ... - (mtcachef (if (null? cachefiles) - #f - (car cachefiles))) ;; (and cachedir (conc cachedir "/" ".megatest.cfg-" megatest-version "-" megatest-fossil-hash))) - (rccachef (if (null? cachefiles) - #f - (cdr cachefiles)))) ;; (and cachedir (conc cachedir "/" ".runconfigs.cfg-" megatest-version "-" megatest-fossil-hash))) - ;; (cancreate (and cachedir (common:file-exists? cachedir)(file-write-access? cachedir) (not (common:in-running-test?))))) - (set! *toppath* toppath) ;; This is needed when we are running as a test using CMDINFO as a datasource - ;;(BB> "launch:setup-body -- cachefiles="cachefiles) - (cond - ;; if mtcachef exists just read it, however we need to assume toppath is available in $MT_RUN_AREA_HOME - ((and (not force-reread) - mtcachef rccachef - use-cache - (get-environment-variable "MT_RUN_AREA_HOME") - (common:file-exists? mtcachef) - (common:file-exists? rccachef)) - ;;(BB> "launch:setup-body -- cond branch 1 - use-cache") - (set! *configdat* (configf:read-alist mtcachef)) - ;;(BB> "launch:setup-body -- 1 set! *configdat*="*configdat*) - (set! *runconfigdat* (configf:read-alist rccachef)) - (set! *configinfo* (list *configdat* (get-environment-variable "MT_RUN_AREA_HOME"))) - (set! *configstatus* 'fulldata) - (set! *toppath* (get-environment-variable "MT_RUN_AREA_HOME")) - *toppath*) - ;; there are no existing cached configs, do full reads of the configs and cache them - ;; we have all the info needed to fully process runconfigs and megatest.config - ((and ;; (not force-reread) ;; force-reread is irrelevant in the AND, could however OR it? - mtcachef - rccachef) ;; BB- why are we doing this without asking if caching is desired? - ;;(BB> "launch:setup-body -- cond branch 2") - (let* ((first-pass (find-and-read-config ;; NB// sets MT_RUN_AREA_HOME as side effect - mtconfig - environ-patt: "env-override" - given-toppath: toppath - pathenvvar: "MT_RUN_AREA_HOME")) - (first-rundat (let ((toppath (if toppath - toppath - (car first-pass)))) - (read-config ;; (conc toppath "/runconfigs.config") ;; this should be converted to runconfig:read but it is non-trivial, leaving it for now. - (conc (if (string? toppath) - toppath - (get-environment-variable "MT_RUN_AREA_HOME")) - "/runconfigs.config") - *runconfigdat* #t - sections: sections)))) - (set! *runconfigdat* first-rundat) - (if first-pass ;; - (begin - ;;(BB> "launch:setup-body -- \"first-pass\"=first-pass") - (set! *configdat* (car first-pass)) - ;;(BB> "launch:setup-body -- 2 set! *configdat*="*configdat*) - (set! *configinfo* first-pass) - (set! *toppath* (or toppath (cadr first-pass))) ;; use the gathered data unless already have it - (set! toppath *toppath*) - (if (not *toppath*) - (begin - (debug:print-error 0 *default-log-port* "you are not in a megatest area!") - (exit 1))) - (setenv "MT_RUN_AREA_HOME" *toppath*) - ;; the seed read is done, now read runconfigs, cache it then read megatest.config one more time and cache it - (let* ((keys (common:list-or-null (rmt:get-keys) - message: "Failed to retrieve keys in launch.scm. Please report this to the developers.")) - (key-vals (keys:target->keyval keys target)) - (linktree (common:get-linktree)) ;; (or (getenv "MT_LINKTREE")(if *configdat* (configf:lookup *configdat* "setup" "linktree") #f))) - ; (if *configdat* - ; (configf:lookup *configdat* "setup" "linktree") - ; (conc *toppath* "/lt")))) - (second-pass (find-and-read-config - mtconfig - environ-patt: "env-override" - given-toppath: toppath - pathenvvar: "MT_RUN_AREA_HOME")) - (runconfigdat (begin ;; this read of the runconfigs will see any adjustments made by re-reading megatest.config - (for-each (lambda (kt) - (setenv (car kt) (cadr kt))) - key-vals) - (read-config (conc toppath "/runconfigs.config") *runconfigdat* #t ;; consider using runconfig:read some day ... - sections: sections))) - (cachefiles (launch:get-cache-file-paths areapath toppath target mtconfig)) - (mtcachef (car cachefiles)) - (rccachef (cdr cachefiles))) - ;; trap exception due to stale NFS handle -- Error: (open-output-file) cannot open file - Stale NFS file handle: "/p/fdk/gwa/lefkowit/mtTesting/qa/primbeqa/links/p1222/11/PDK_r1.1.1/prim/clean/pcell_testgen/.runconfigs.cfg-1.6427-7d1e789cb3f62f9cde719a4865bb51b3c17ea853" - ticket 220546342 - ;; TODO - consider 1) using simple-lock to bracket cache write - ;; 2) cache in hash on server, since need to do rmt: anyway to lock. - - (if rccachef - (common:fail-safe - (lambda () - (configf:write-alist runconfigdat rccachef)) - (conc "Could not write cache file - "rccachef))) - (if mtcachef - (common:fail-safe - (lambda () - (configf:write-alist *configdat* mtcachef)) - (conc "Could not write cache file - "mtcachef))) - (set! *runconfigdat* runconfigdat) - (if (and rccachef mtcachef) (set! *configstatus* 'fulldata)))) - ;; no configs found? should not happen but let's try to recover gracefully, return an empty hash-table - (set! *configdat* (make-hash-table)) - ))) - - ;; else read what you can and set the flag accordingly - ;; here we don't have either mtconfig or rccachef - (else - ;;(BB> "launch:setup-body -- cond branch 3 - else") - (let* ((cfgdat (find-and-read-config - (or (args:get-arg "-config") "megatest.config") - environ-patt: "env-override" - given-toppath: (get-environment-variable "MT_RUN_AREA_HOME") - pathenvvar: "MT_RUN_AREA_HOME"))) - - (if (and cfgdat (list? cfgdat) (> (length cfgdat) 0) (hash-table? (car cfgdat))) - (let* ((toppath (or (get-environment-variable "MT_RUN_AREA_HOME")(cadr cfgdat))) - (rdat (read-config (conc toppath ;; convert this to use runconfig:read! - "/runconfigs.config") *runconfigdat* #t sections: sections))) - (set! *configinfo* cfgdat) - (set! *configdat* (car cfgdat)) - (set! *runconfigdat* rdat) - (set! *toppath* toppath) - (set! *configstatus* 'partial)) - (begin - (debug:print-error 0 *default-log-port* "No " mtconfig " file found. Giving up.") - (exit 2)))))) - ;; COND ends here. - - ;; additional house keeping - (let* ((linktree (or (common:get-linktree) - (conc *toppath* "/lt")))) - (if linktree - (begin - (if (not (common:file-exists? linktree)) - (begin - (handle-exceptions - exn - (begin - (debug:print-error 0 *default-log-port* "Something went wrong when trying to create linktree dir at " linktree) - (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn) - (exit 1)) - (create-directory linktree #t)))) - (handle-exceptions - exn - (begin - (debug:print-error 0 *default-log-port* "Something went wrong when trying to create link to linktree at " *toppath*) - (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn)) - (let ((tlink (conc *toppath* "/lt"))) - (if (not (common:file-exists? tlink)) - (create-symbolic-link linktree tlink))))) - (begin - (debug:print-error 0 *default-log-port* "linktree not defined in [setup] section of megatest.config") - ))) - (if (and *toppath* - (directory-exists? *toppath*)) - (begin - (setenv "MT_RUN_AREA_HOME" *toppath*) - (setenv "MT_TESTSUITENAME" (common:get-testsuite-name))) - (begin - (debug:print-error 0 *default-log-port* "failed to find the top path to your Megatest area.") - (set! *toppath* #f) ;; force it to be false so we return #f - #f)) - - ;; needed by various transport and db modules - (dbfile:testsuite-name (common:get-testsuite-name)) ;; (get-testsuite-name *toppath* *configdat*)) - - ;; one more attempt to cache the configs for future reading - (let* ((cachefiles (launch:get-cache-file-paths areapath toppath target mtconfig)) - (mtcachef (car cachefiles)) - (rccachef (cdr cachefiles))) - - ;; trap exception due to stale NFS handle -- Error: (open-output-file) cannot open file - Stale NFS file handle: "...somepath.../.runconfigs.cfg-1.6427-7d1e789cb3f62f9cde719a4865bb51b3c17ea853" - ticket 220546342 - ;; TODO - consider 1) using simple-lock to bracket cache write - ;; 2) cache in hash on server, since need to do rmt: anyway to lock. - (if (and rccachef *runconfigdat* (not (common:file-exists? rccachef))) - (common:fail-safe - (lambda () - (configf:write-alist *runconfigdat* rccachef)) - (conc "Could not write cache file - "rccachef)) - ) - (if (and mtcachef *configdat* (not (common:file-exists? mtcachef))) - (common:fail-safe - (lambda () - (configf:write-alist *configdat* mtcachef)) - (conc "Could not write cache file - "mtcachef)) - ) - (if (and rccachef mtcachef *runconfigdat* *configdat*) - (set! *configstatus* 'fulldata))) - - ;; if have -append-config then read and append here - (let ((cfname (args:get-arg "-append-config"))) - (if (and cfname - (file-read-access? cfname)) - (read-config cfname *configdat* #t))) ;; values are added to the hash, no need to do anything special. - - ;; have config at this time, this is a good place to set params based on config file settings - (let* ((dbmode (configf:lookup *configdat* "setup" "dbcache-mode")) - (syncmode (configf:lookup *configdat* "setup" "sync-mode")) - (srvdebug (configf:lookup *configdat* "server" "debug-parameter"))) - (if dbmode - (begin - (debug:print-info 0 *default-log-port* "Overriding dbmode to "dbmode) - (dbcache-mode (string->symbol dbmode)))) - (if syncmode - (begin - (debug:print-info 0 *default-log-port* "Overriding syncmode to "syncmode) - (dbfile:sync-method (string->symbol syncmode)))) - (if srvdebug - (begin - (debug:print-info 0 *default-log-port* "Overriding server debug parameter to "srvdebug) - (tt-server-profile-string srvdebug))) - ) - - *toppath*))) - - -(define (get-best-disk confdat testconfig) - (let* ((disks (or (and testconfig (hash-table-ref/default testconfig "disks" #f)) - (hash-table-ref/default confdat "disks" #f))) - (minspace (let ((m (configf:lookup confdat "setup" "minspace"))) - (string->number (or m "10000"))))) - (if disks - (let ((res (common:get-disk-with-most-free-space disks minspace))) - (if res - (cdr res) - ;; else if no valid disks... - (begin - (debug:print 0 *default-log-port* "WARNING: No valid disks or no disk with enough space found from " disks) - (if (null? disks) - (cons 1 (conc *toppath* "/runs")) - - ;; else try to create the directories anyway. - (let ((paths (sort disks (lambda (x y) (> (string-length (cadr x)) (string-length (cadr y))))))) - (let loop ((head (car paths)) (tail (cdr paths))) - (let ((result (handle-exceptions exn - (begin - (debug:print 0 *default-log-port* "failed to create dir " (cadr head) ", exn=" exn) - #f) - (create-directory (cadr head) #t)))) - (if result - result - (if (null? tail) - (begin - (debug:print 0 *default-log-port* "Using toppath/runs") - (conc *toppath* "/runs") - ) - (loop (car tail) (cdr tail)))))) - ) - ) ;; if null? disks - ) ;; if not res - ) - ) - ;; no disks definition - use toppath/runs, fall back to currdir/runs - (let* ((toppath (or *toppath* - (common:get-toppath *toppath*) - (begin - (debug:print-error 0 *default-log-port* "Creating runs dir in current directory, this is probably not what you wanted. Please check your setup.") - (current-directory)))) - (runsdir (conc toppath "/runs"))) - (if (not (file-exists? runsdir))(create-directory runsdir)) - runsdir) - ))) ;; the code creates the necessary directories if it does not exist and returns the path. - - - -(define (launch:test-copy test-src-path test-path) - (let* ((ovrcmd (let ((cmd (configf:lookup *configdat* "setup" "testcopycmd"))) - (if cmd - ;; substitute the TEST_SRC_PATH and TEST_TARG_PATH - (string-substitute "TEST_TARG_PATH" test-path - (string-substitute "TEST_SRC_PATH" test-src-path cmd #t) #t) - #f))) - (cmd (if ovrcmd - ovrcmd - (conc "rsync -av" (if (debug:debug-mode 1) "" "q") " " test-src-path "/ " test-path "/" - " >> " test-path "/mt_launch.log 2>> " test-path "/mt_launch.log"))) - (status (system cmd))) - (if (not (eq? status 0)) - (debug:print 2 *default-log-port* "ERROR: problem with running \"" cmd "\"")))) - - -;; Desired directory structure: -;; -;; - - -. -;; | -;; v -;; - - -|- -;; -;; dir stored in test is: -;; -;; - - [ - ] -;; -;; All log file links should be stored relative to the top of link path -;; -;; - [ - ] -;; -(define (create-work-area run-id run-info keyvals test-id test-src-path disk-path testname itemdat #!key (remtries 2)) - (let* ((item-path (if (string? itemdat) itemdat (item-list->path itemdat))) ;; if pass in string - just use it - (runname (if (string? run-info) ;; if we pass in a string as run-info use it as run-name. - run-info - (db:get-value-by-header (db:get-rows run-info) - (db:get-header run-info) - "runname"))) - (contour #f) ;; NOT READY FOR THIS (args:get-arg "-contour")) - ;; convert back to db: from rdb: - this is always run at server end - (target (string-intersperse (map cadr keyvals) "/")) - - (not-iterated (equal? "" item-path)) - - ;; all tests are found at /test-base or /test-base - (testtop-base (conc target "/" runname "/" testname)) - (test-base (conc testtop-base (if not-iterated "" "/") item-path)) - - ;; nb// if itempath is not "" then it is prefixed with "/" - (toptest-path (conc disk-path (if contour (conc "/" contour) "") "/" testtop-base)) - (test-path (conc disk-path (if contour (conc "/" contour) "") "/" test-base)) - - ;; ensure this exists first as links to subtests must be created there - (linktree (common:get-linktree)) - ;; WAS: (let ((rd (configf:lookup *configdat* "setup" "linktree"))) - ;; (if rd rd (conc *toppath* "/runs")))) - ;; which seems wrong ... - - (lnkbase (conc linktree (if contour (conc "/" contour) "") "/" target "/" runname)) - (lnkpath (conc lnkbase "/" testname)) - (lnkpathf (conc lnkpath (if not-iterated "" "/") item-path)) - (lnktarget (conc lnkpath "/" item-path))) - - ;; Update the rundir path in the test record for all, rundir=physical, shortdir=logical - ;; rundir shortdir - (rmt:general-call 'test-set-rundir-shortdir run-id lnkpathf test-path testname item-path run-id) - - (debug:print 2 *default-log-port* "INFO:\n lnkbase=" lnkbase "\n lnkpath=" lnkpath "\n toptest-path=" toptest-path "\n test-path=" test-path) - (if (not (common:file-exists? linktree)) - (begin - (debug:print 0 *default-log-port* "WARNING: linktree did not exist! Creating it now at " linktree) - (create-directory linktree #t))) ;; (system (conc "mkdir -p " linktree)))) - ;; create the directory for the tests dir links, this is needed no matter what... try up to three times - (let loop ((done 3)) - (let ((success (if (and (not (common:directory-exists? lnkbase)) - (not (common:file-exists? lnkbase))) - (handle-exceptions - exn - (begin - (debug:print-error 0 *default-log-port* "Problem creating linktree base at " lnkbase ", exn=" exn) - (print-error-message exn (current-error-port)) - #t) - (create-directory lnkbase #t) - #f)))) - (if (and (not success)(> done 0)) - (loop (- done 1))))) - - ;; update the toptest record with its location rundir, cache the path - ;; This wass highly inefficient, one db write for every subtest, potentially - ;; thousands of unnecessary updates, cache the fact it was set and don't set it - ;; again. - - ;; Now create the link from the test path to the link tree, however - ;; if the test is iterated it is necessary to create the parent path - ;; to the iteration. use pathname-directory to trim the path by one - ;; level - (if (not not-iterated) ;; i.e. iterated - (let ((iterated-parent (pathname-directory (conc lnkpath "/" item-path)))) - (debug:print-info 2 *default-log-port* "Creating iterated parent " iterated-parent) - (handle-exceptions - exn - (begin - (debug:print-error 0 *default-log-port* " Failed to create directory " iterated-parent ((condition-property-accessor 'exn 'message) exn) - ", continuing but link tree may be corrupted, exn=" exn) - #;(exit 1)) - (create-directory iterated-parent #t)))) - - (if (symbolic-link? lnkpath) - (handle-exceptions - exn - (begin - (debug:print-error 0 *default-log-port* " Failed to remove symlink " lnkpath ((condition-property-accessor 'exn 'message) exn) - ", continuing but link tree may be corrupted. exn=" exn) - #;(exit 1)) - (delete-file lnkpath))) - - (if (not (or (common:file-exists? lnkpath) - (symbolic-link? lnkpath))) - (handle-exceptions - exn - (begin - (debug:print-error 0 *default-log-port* " Failed to create symlink " lnkpath ((condition-property-accessor 'exn 'message) exn) - ", continuing but link tree may be corrupted. exn=" exn) - #;(exit 1)) - (create-symbolic-link toptest-path lnkpath))) - - ;; NB - This was not working right - some top tests are not getting the path set!!! - ;; - ;; Do the setting of this record after the paths are created so that the shortdir can - ;; be set to the real directory location. This is safer for future clean up if the link - ;; tree is damaged or lost. - ;; - (if (not (hash-table-ref/default *toptest-paths* testname #f)) - (let* ((testinfo (rmt:get-test-info-by-id run-id test-id)) ;; run-id testname item-path)) - (curr-test-path (if testinfo ;; (filedb:get-path *fdb* - ;; (db:get-path dbstruct - ;; (rmt:sdb-qry 'getstr - (db:test-get-rundir testinfo) ;; ) ;; ) - #f))) - (hash-table-set! *toptest-paths* testname curr-test-path) - ;; NB// Was this for the test or for the parent in an iterated test? - (rmt:general-call 'test-set-rundir-shortdir run-id lnkpath - (if (common:file-exists? lnkpath) - ;; (resolve-pathname lnkpath) - (common:nice-path lnkpath) - lnkpath) - testname "" run-id) - ;; (rmt:general-call 'test-set-rundir run-id lnkpath testname "") ;; toptest-path) - (if (or (not curr-test-path) - (not (directory-exists? toptest-path))) - (begin - (debug:print-info 2 *default-log-port* "Creating " toptest-path " and link " lnkpath) - (handle-exceptions - exn - (if (directory-exists? toptest-path) ;; it was likely created in parallel - #t - (begin - (debug:print-info 0 *default-log-port* "failed to create directory " toptest-path ", exn=" exn) - #f)) - (create-directory toptest-path #t)) - (hash-table-set! *toptest-paths* testname toptest-path))))) - - ;; The toptest path has been created, the link to the test in the linktree has - ;; been created. Now, if this is an iterated test the real test dir must be created - (if (not not-iterated) ;; this is an iterated test - (begin ;; (let ((lnktarget (conc lnkpath "/" item-path))) - (debug:print 2 *default-log-port* "Setting up sub test run area") - (debug:print 2 *default-log-port* " - creating run area in " test-path) - (handle-exceptions - exn - (if (directory-exists? test-path) - #t - (begin - (debug:print-error 0 *default-log-port* " Failed to create directory " test-path ((condition-property-accessor 'exn 'message) exn) - ", exiting, exn=" exn) - (exit 1))) - (create-directory test-path #t)) - (debug:print 2 *default-log-port* - " - creating link from: " test-path "\n" - " to: " lnktarget) - - ;; If there is already a symlink delete it and recreate it. - (handle-exceptions - exn - (begin - (debug:print-error 0 *default-log-port* " Failed to re-create link " lnktarget ((condition-property-accessor 'exn 'message) exn) ", exiting, exn=" exn) - (exit)) - (if (symbolic-link? lnktarget) (delete-file lnktarget)) - (if (not (common:file-exists? lnktarget)) (create-symbolic-link test-path lnktarget))))) - - (if (not (directory? test-path)) - (create-directory test-path #t)) ;; this is a hack, I don't know why out of the blue this path does not exist sometimes - - (if (and test-src-path (directory? test-path)) - (begin - (launch:test-copy test-src-path test-path) - (list lnkpathf lnkpath )) - (if (and test-src-path (> remtries 0)) - (begin - (debug:print-error 0 *default-log-port* "Failed to create work area at " test-path " with link at " lnktarget ", remaining attempts " remtries) - ;; - (create-work-area run-id run-info keyvals test-id test-src-path disk-path testname itemdat remtries: (- remtries 1))) - (list #f #f))))) - - -(define (launch:handle-zombie-tests run-id) - (let* ((key (conc "zombiescan-runid-"run-id)) - (now (current-seconds)) - (threshold (- (current-seconds) (* 2 (or (configf:lookup-number *configdat* "setup" "deadtime") 120)))) - (val (rmt:get-var key)) - (do-scan? - (cond - ((not val) - #t) - ((< val threshold) - #t) - (else #f)))) - (when do-scan? - (debug:print 1 *default-log-port* "INFO: search and mark zombie tests") - (rmt:set-var key (current-seconds)) - (rmt:find-and-mark-incomplete run-id #f)))) - - - - - -;; 1. look though disks list for disk with most space -;; 2. create run dir on disk, path name is meaningful -;; 3. create link from run dir to megatest runs area -;; 4. remotely run the test on allocated host -;; - could be ssh to host from hosts table (update regularly with load) -;; - could be netbatch -;; (launch-test db (cadr status) test-conf)) -(define (launch-test test-id run-id run-info keyvals runname test-conf test-name test-path itemdat params) - (assert runname "FATAL: launch-test called with no runname") - (mutex-lock! *launch-setup-mutex*) ;; setting variables and processing the testconfig is NOT thread-safe, reuse the launch-setup mutex - (let* ( ;; (lock-key (conc "test-" test-id)) - ;; (got-lock (let loop ((lock (rmt:no-sync-get-lock lock-key)) - ;; (expire-time (+ (current-seconds) 15))) ;; give up on getting the lock and steal it after 15 seconds - ;; (if (car lock) - ;; #t - ;; (if (> (current-seconds) expire-time) - ;; (begin - ;; (debug:print-info 0 *default-log-port* "Timed out waiting for a lock to launch test " keyvals " " runname " " test-name " " test-path) - ;; (rmt:no-sync-del! lock-key) ;; destroy the lock - ;; (loop (rmt:no-sync-get-lock lock-key) expire-time)) ;; - ;; (begin - ;; (thread-sleep! 1) - ;; (loop (rmt:no-sync-get-lock lock-key) expire-time)))))) - (item-path (item-list->path itemdat)) - (contour #f)) ;; NOT READY FOR THIS (args:get-arg "-contour"))) - (let loop ((delta (- (current-seconds) *last-launch*)) - (launch-delay (configf:lookup-number *configdat* "setup" "launch-delay" default: 0))) - (if (> launch-delay delta) - (begin - ;; (if (common:low-noise-print 1200 "test launch delay") ;; every two hours or so remind the user about launch delay. - ;; (debug:print-info 0 *default-log-port* "NOTE: test launches are delayed by " launch-delay " seconds. See megatest.config launch-delay setting to adjust.")) ;; launch of " test-name " for " (- launch-delay delta) " seconds")) - (thread-sleep! (- launch-delay delta)) - (loop (- (current-seconds) *last-launch*) launch-delay)))) - (change-directory *toppath*) - (alist->env-vars ;; consolidate this code with the code in megatest.scm for "-execute", *maybe* - the longer they are set the longer each launch takes (must be non-overlapping with the vars) - (append - (list - (list "MT_RUN_AREA_HOME" *toppath*) - (list "MT_TEST_NAME" test-name) - (list "MT_RUNNAME" runname) - (list "MT_ITEMPATH" item-path) - (list "MT_CONTOUR" contour) - ) - itemdat)) - (let* ((tregistry (tests:get-all)) ;; third param (below) is system-allowed - ;; for tconfig, why do we allow fallback to test-conf? - (tconfig (or (tests:get-testconfig test-name item-path tregistry #t force-create: #t) - (begin - (debug:print 0 *default-log-port* "WARNING: falling back to pre-calculated testconfig. This is likely not desired.") - test-conf))) ;; force re-read now that all vars are set - (useshell (let ((ush (configf:lookup *configdat* "jobtools" "useshell"))) - (if ush - (if (equal? ush "no") ;; must use "no" to NOT use shell - #f - ush) - #t))) ;; default is yes - (runscript (configf:lookup tconfig "setup" "runscript")) - (ezsteps (> (length (hash-table-ref/default tconfig "ezsteps" '())) 0)) ;; don't send all the steps, could be big, just send a flag - (subrun (> (length (hash-table-ref/default tconfig "subrun" '())) 0)) ;; send a flag to process a subrun - ;; (diskspace (configf:lookup tconfig "requirements" "diskspace")) - ;; (memory (configf:lookup tconfig "requirements" "memory")) - ;; (hosts (configf:lookup *configdat* "jobtools" "workhosts")) ;; I'm pretty sure this was never completed - (remote-megatest (configf:lookup *configdat* "setup" "executable")) - (run-time-limit (or (configf:lookup tconfig "requirements" "runtimelim") - (configf:lookup *configdat* "setup" "runtimelim"))) - ;; FIXME SOMEDAY: not good how this is so obtuse, this hack is to - ;; allow running from dashboard. Extract the path - ;; from the called megatest and convert dashboard - ;; or dboard to megatest - (local-megatest (common:find-local-megatest)) - #;(local-megatest (let* ((lm (car (argv))) - (dir (pathname-directory lm)) - (exe (pathname-strip-directory lm))) - (conc (if dir (conc dir "/") "") - (case (string->symbol exe) - ((dboard) "../megatest") - ((mtest) "../megatest") - ((dashboard) "megatest") - (else exe))))) - (launcher (common:get-launcher *configdat* test-name item-path)) ;; (configf:lookup *configdat* "jobtools" "launcher")) - (test-sig (conc (common:get-testsuite-name) ":" test-name ":" item-path)) ;; (item-list->path itemdat))) ;; test-path is the full path including the item-path - (work-area #f) - (toptest-work-area #f) ;; for iterated tests the top test contains data relevant for all - (diskpath #f) - (cmdparms #f) - (fullcmd #f) ;; (define a (with-output-to-string (lambda ()(write x)))) - (mt-bindir-path #f) - (testinfo (rmt:get-test-info-by-id run-id test-id)) - (mt_target (string-intersperse (map cadr keyvals) "/")) - (debug-param (append (if (args:get-arg "-debug") (list "-debug" (args:get-arg "-debug")) '()) - (if (args:get-arg "-logging")(list "-logging") '()) - (if (configf:lookup *configdat* "misc" "profilesw") - (list (configf:lookup *configdat* "misc" "profilesw")) - '())))) - ;; (if hosts (set! hosts (string-split hosts))) - ;; set the megatest to be called on the remote host - (if (not remote-megatest)(set! remote-megatest local-megatest)) ;; "megatest")) - (set! mt-bindir-path (pathname-directory remote-megatest)) - (if launcher (set! launcher (string-split launcher))) - ;; set up the run work area for this test - (if (and (args:get-arg "-preclean") ;; user has requested to preclean for this run - (not (member (db:test-get-rundir testinfo)(list "n/a" "/tmp/badname")))) ;; n/a is a placeholder and thus not a read dir - (begin - (debug:print-info 0 *default-log-port* "attempting to preclean directory " (db:test-get-rundir testinfo) " for test " test-name "/" item-path) - (runs:remove-test-directory testinfo 'remove-data-only))) ;; remove data only, do not perturb the record - - ;; prevent overlapping actions - set to LAUNCHED as early as possible - ;; - ;; the following call handles waiver propogation. cannot yet condense into roll-up-pass-fail - (tests:test-set-status! run-id test-id "LAUNCHED" "n/a" #f #f) ;; (if launch-results launch-results "FAILED")) - (rmt:set-state-status-and-roll-up-items run-id test-name item-path #f "LAUNCHED" #f) - ;; (pp (hash-table->alist tconfig)) - (set! diskpath (get-best-disk *configdat* tconfig)) - (debug:print 2 *default-log-port* "best disk path = " diskpath) - (if diskpath - (let ((dat (create-work-area run-id run-info keyvals test-id test-path diskpath test-name itemdat))) - (set! work-area (car dat)) - (set! toptest-work-area (cadr dat)) - (debug:print-info 2 *default-log-port* "Using work area " work-area)) - (begin - (set! work-area (conc test-path "/tmp_run")) - (create-directory work-area #t) - (debug:print 0 *default-log-port* "WARNING: No disk work area specified - running in the test directory under tmp_run"))) - (set! cmdparms (base64:base64-encode - (z3:encode-buffer - (with-output-to-string - (lambda () ;; (list 'hosts hosts) - (write (list (list 'testpath test-path) - ;; (list 'transport (conc *transport-type*)) - ;; (list 'serverinf *server-info*) - #;(list 'homehost (let* ((hhdat (server:get-homehost))) - (if hhdat - (car hhdat) - #f))) - #;(list 'serverurl (if *runremote* ;; would like to add this back ... WORK NEEDED - (remote-server-url *runremote*) - #f)) ;; - (list 'areaname (common:get-testsuite-name)) - (list 'toppath *toppath*) - (list 'work-area work-area) - (list 'test-name test-name) - (list 'runscript runscript) - (list 'run-id run-id ) - (list 'test-id test-id ) - ;; (list 'item-path item-path ) - (list 'itemdat itemdat ) - (list 'megatest remote-megatest) - (list 'ezsteps ezsteps) - (list 'subrun subrun) - (list 'target mt_target) - (list 'contour contour) - (list 'runtlim (if run-time-limit (common:hms-string->seconds run-time-limit) #f)) - (list 'env-ovrd (hash-table-ref/default *configdat* "env-override" '())) - (list 'set-vars (if params (hash-table-ref/default params "-setvars" #f))) - (list 'runname runname) - (list 'mt-bindir-path mt-bindir-path)))))))) - - (setenv "MT_CMDINFO" cmdparms) ;; setting this for use in nblauncher - - ;; clean out step records from previous run if they exist - ;; (rmt:delete-test-step-records run-id test-id) - ;; if the dir does not exist we may have a itempath where individual variables are a path, launch anyway - (if (common:file-exists? work-area) - (change-directory work-area)) ;; so that log files from the launch process don't clutter the test dir - (cond - ;; ((and launcher hosts) ;; must be using ssh hostname - ;; (set! fullcmd (append launcher (car hosts)(list remote-megatest "-m" test-sig "-execute" cmdparms) debug-param))) - ;; (set! fullcmd (append launcher (car hosts)(list remote-megatest test-sig "-execute" cmdparms)))) - (launcher - (set! fullcmd (append launcher (list remote-megatest "-m" test-sig "-execute" cmdparms) debug-param))) - ;; (set! fullcmd (append launcher (list remote-megatest test-sig "-execute" cmdparms)))) - (else - (if (not useshell)(debug:print 0 *default-log-port* "WARNING: internal launching will not work well without \"useshell yes\" in your [jobtools] section")) - (set! fullcmd (append (list remote-megatest "-m" test-sig "-execute" cmdparms) debug-param (list (if useshell "&" "")))))) - ;; (set! fullcmd (list remote-megatest test-sig "-execute" cmdparms (if useshell "&" ""))))) - (if (args:get-arg "-xterm")(set! fullcmd (append fullcmd (list "-xterm")))) - (debug:print 1 *default-log-port* "Launching " work-area) - ;; set pre-launch-env-vars before launching, keep the vars in prevvals and put the envionment back when done - (debug:print 4 *default-log-port* "fullcmd: " fullcmd) - (set! *last-launch* (current-seconds)) ;; all that junk above takes time, set this as late as possible. - (let* ((commonprevvals (alist->env-vars - (hash-table-ref/default *configdat* "env-override" '()))) - (miscprevvals (alist->env-vars ;; consolidate this code with the code in megatest.scm for "-execute" - (append (list (list "MT_TEST_RUN_DIR" work-area) - (list "MT_TEST_NAME" test-name) - (list "MT_ITEM_INFO" (conc itemdat)) - (list "MT_RUNNAME" runname) - (list "MT_TARGET" mt_target) - (list "MT_ITEMPATH" item-path) - ) - itemdat))) - (testprevvals (alist->env-vars - (hash-table-ref/default tconfig "pre-launch-env-overrides" '()))) - ;; Launchwait defaults to true, must override it to turn off wait - (launchwait (if (equal? (configf:lookup *configdat* "setup" "launchwait") "no") #f #t)) - (launch-results-prev (apply (if launchwait ;; BB: TODO: refactor this to examine return code of launcher, if nonzero, set state to launch failed. - process:cmd-run-with-stderr-and-exitcode->list - process-run) - (if useshell - (let ((cmdstr (string-intersperse fullcmd " "))) - (if launchwait - cmdstr - (conc cmdstr " >> mt_launch.log 2>&1 &"))) - (car fullcmd)) - (if useshell - '() - (cdr fullcmd)))) - (success (if launchwait (equal? 0 (cadr launch-results-prev)) #t)) - (launch-results (if launchwait (car launch-results-prev) launch-results-prev))) - (if (not success) - (tests:test-set-status! run-id test-id "COMPLETED" "DEAD" "launcher failed; exited non-zero; check mt_launch.log" #f)) ;; (if launch-results launch-results "FAILED")) - (mutex-unlock! *launch-setup-mutex*) ;; yes, really should mutex all the way to here. Need to put this entire process into a fork. - ;; (rmt:no-sync-del! lock-key) ;; release the lock for starting this test - (if (not launchwait) ;; give the OS a little time to allow the process to start - (thread-sleep! 0.01)) - (with-output-to-file "mt_launch.log" - (lambda () - (print "LAUNCHCMD: " (string-intersperse fullcmd " ")) - (if (list? launch-results) - (apply print launch-results) - (print "NOTE: launched \"" fullcmd "\"\n but did not wait for it to proceed. Add the following to megatest.config \n[setup]\nlaunchwait yes\n if you have problems with this")) - #:append)) - (debug:print 2 *default-log-port* "Launching completed, updating db") - (debug:print 2 *default-log-port* "Launch results: " launch-results) - (if (not launch-results) - (begin - (debug:print 0 *default-log-port* "ERROR: Failed to run " (string-intersperse fullcmd " ") ", exiting now") - ;; (sqlite3:finalize! db) - ;; good ole "exit" seems not to work - ;; (_exit 9) - ;; but this hack will work! Thanks go to Alan Post of the Chicken email list - ;; NB// Is this still needed? Should be safe to go back to "exit" now? - (process-signal (current-process-id) signal/kill) - )) - (alist->env-vars miscprevvals) - (alist->env-vars testprevvals) - (alist->env-vars commonprevvals) - launch-results)) - (change-directory *toppath*) - (thread-sleep! (configf:lookup-number *configdat* "setup" "inter-test-delay" default: 0.0)))) - -;; recover a test where the top controlling mtest may have died -;; -(define (launch:recover-test run-id test-id) - ;; this function is called on the test run host via ssh - ;; - ;; 1. look at the process from pid - ;; - is it owned by calling user - ;; - it it's run directory correct for the test - ;; - is there a controlling mtest (maybe stuck) - ;; 2. if recovery is needed watch pid - ;; - when it exits take the exit code and do the needful - ;; - (let* ((pid (rmt:test-get-top-process-pid run-id test-id)) - (psres (with-input-from-pipe - (conc "ps -F -u " (current-user-name) " | grep -E '" pid " ' | grep -v 'grep -E " pid "'") - (lambda () - (read-line)))) - (rundir (if (string? psres) ;; real process owned by user - (read-symbolic-link (conc "/proc/" pid "/cwd")) - #f))) - ;; now wait on that process if all is correct - ;; periodically update the db with runtime - ;; when the process exits look at the db, if still RUNNING after 10 seconds set - ;; state/status appropriately - (process-wait pid))) + dbmod + dbfile + mtmod + megatestmod + tasksmod + ) + ADDED launchmod.scm Index: launchmod.scm ================================================================== --- /dev/null +++ launchmod.scm @@ -0,0 +1,1359 @@ +;;====================================================================== +;; Copyright 2017, Matthew Welland. +;; +;; This file is part of Megatest. +;; +;; Megatest is free software: you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. +;; +;; Megatest is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with Megatest. If not, see . + +;;====================================================================== + +;;====================================================================== +;; Cpumod: +;; +;; Put things here don't fit anywhere else +;;====================================================================== + +(declare (unit launchmod)) +(declare (uses debugprint)) +(declare (uses mtargs)) +(declare (uses commonmod)) +(declare (uses configfmod)) +(declare (uses dbfile)) +(declare (uses dbmod)) +(declare (uses rmtmod)) +(declare (uses servermod)) +(declare (uses processmod)) +(declare (uses pgdb)) +(declare (uses mtmod)) +(declare (uses megatestmod)) +(declare (uses tasksmod)) +(declare (uses subrunmod)) +(declare (uses testsmod)) +(declare (uses runsmod)) +(declare (uses fsmod)) + +(use srfi-69) + +(module launchmod + * + +(import scheme) +(cond-expand + (chicken-4 + + (import chicken + ports + data-structures + extras + files + matchable + pathname-expand + posix + posix-extras + regex + regex-case + sparse-vectors + + ) + (use srfi-69)) + (chicken-5 + (import (prefix sqlite3 sqlite3:) + chicken.base + chicken.condition + chicken.file + chicken.file.posix + chicken.io + chicken.pathname + chicken.port + chicken.process + chicken.process-context + chicken.process-context.posix + chicken.sort + chicken.string + chicken.time + chicken.time.posix + + matchable + md5 + message-digest + pathname-expand + regex + regex-case + system-information + + ))) + +;; imports common to ck4 and ck5 +(import srfi-1 + srfi-13 + srfi-18 + srfi-69 + typed-records + (prefix base64 base64:) + (prefix sqlite3 sqlite3:) + md5 + message-digest + z3 + csv + directory-utils + call-with-environment-variables + + debugprint + commonmod + configfmod + (prefix mtargs args:) + dbmod + dbfile + rmtmod + servermod + processmod + pgdb + mtmod + megatestmod + tasksmod + subrunmod + testsmod + runsmod + fsmod + ) + +(include "common_records.scm") +(include "key_records.scm") +(include "db_records.scm") +(include "megatest-fossil-hash.scm") + +;;====================================================================== +;; ezsteps +;;====================================================================== + +;; ezsteps were going to be coded as +;; stepname[,predstep1,predstep2 ...] [{VAR1=first,second,third}] command to execute +;; BUT +;; now are +;; stepname {VAR=first,second,third ...} command ... +;; where the {VAR=first,second,third ...} is optional. + +;; given an exit code and whether or not logpro was used calculate OK/BAD +;; return #t if we are ok, #f otherwise +(define (steprun-good? logpro exitcode stepparms) + (or (eq? exitcode 0) + (and logpro (member exitcode '( 2 4 6))) + (let* ((params (alist-ref 'params stepparms)) ;; get the params section + (keep-going (if params + (alist-ref "keep-going" params equal?) + #f))) + (debug:print 0 *default-log-port* "keep-going=" keep-going) + (and keep-going (equal? (car keep-going) "yes"))))) + +;; if handed a string, process it, else look for MT_CMDINFO +(define (launch:get-cmdinfo-assoc-list #!key (encoded-cmd #f)) + (let ((enccmd (if encoded-cmd encoded-cmd (getenv "MT_CMDINFO")))) + (if enccmd + (common:read-encoded-string enccmd) + '()))) + +;; 0 1 2 3 +(defstruct launch:einf (pid #t)(exit-status #t)(exit-code #t)(rollup-status 0)) + +;; return (conc status ": " comment) from the final section so that +;; the comment can be set in the step record in launch.scm +;; +(define (launch:load-logpro-dat run-id test-id stepname) + (let ((cname (conc stepname ".dat"))) + (if (common:file-exists? cname) + (let* ((dat (read-config cname #f #f)) + (csvr (db:logpro-dat->csv dat stepname)) + (csvt (let-values (((fmt-cell fmt-record fmt-csv) (make-format ","))) + (fmt-csv (map list->csv-record csvr)))) + (status (configf:lookup dat "final" "exit-status")) + (msg (configf:lookup dat "final" "message"))) + (if csvt ;; this if blocked stack dump caused by .dat file from logpro being 0-byte. fixed by upgrading logpro + (rmt:csv->test-data run-id test-id csvt) + (debug:print 0 *default-log-port* "ERROR: no csvdat exists for run-id: " run-id " test-id: " test-id " stepname: " stepname ", check that logpro version is 1.15 or newer")) + ;; (debug:print-info 13 *default-log-port* "Error: run-id/test-id/stepname="run-id"/"test-id"/"stepname" => bad csvr="csvr) + ;; ) + (cond + ((equal? status "PASS") "PASS") ;; skip the message part if status is pass + (status (conc (configf:lookup dat "final" "exit-status") ": " (if msg msg "no message"))) + (else #f))) + #f))) + +(define (launch:manage-steps run-id test-id item-path fullrunscript ezsteps subrun test-name tconfigreg exit-info m) + ;; (let-values + ;; (((pid exit-status exit-code) + ;; (run-n-wait fullrunscript))) + ;; (tests:test-set-status! test-id "RUNNING" "n/a" #f #f) + ;; Since we should have a clean slate at this time there is no need to do + ;; any of the other stuff that tests:test-set-status! does. Let's just + ;; force RUNNING/n/a + + ;; (thread-sleep! 0.3) + ;; (tests:test-force-state-status! run-id test-id "RUNNING" "n/a") + (rmt:set-state-status-and-roll-up-items run-id test-name item-path "RUNNING" #f #f) + ;; (thread-sleep! 0.3) ;; NFS slowness has caused grief here + + ;; if there is a runscript do it first + (if fullrunscript + (let ((pid (process-run fullrunscript))) + (rmt:test-set-top-process-pid run-id test-id pid) + (let loop ((i 0)) + (let-values + (((pid-val exit-status exit-code) (process-wait pid #t))) + (mutex-lock! m) + (launch:einf-pid-set! exit-info pid) ;; (vector-set! exit-info 0 pid) + (launch:einf-exit-status-set! exit-info exit-status) ;; (vector-set! exit-info 1 exit-status) + (launch:einf-exit-code-set! exit-info exit-code) ;; (vector-set! exit-info 2 exit-code) + (launch:einf-rollup-status-set! exit-info exit-code) ;; (vector-set! exit-info 3 exit-code) ;; rollup status + (mutex-unlock! m) + (if (eq? pid-val 0) + (begin + (thread-sleep! 2) + (loop (+ i 1))) + ))))) + ;; then, if runscript ran ok (or did not get called) + ;; do all the ezsteps (if any) + (if (or ezsteps subrun) + (let* ((test-run-dir (tests:get-test-path-from-environment)) + (testconfig ;; (read-config (conc work-area "/testconfig") #f #t environ-patt: "pre-launch-env-vars")) ;; FIXME??? is allow-system ok here? + ;; NOTE: it is tempting to turn off force-create of testconfig but dynamic + ;; ezstep names need a full re-eval here. + (tests:get-testconfig test-name item-path tconfigreg #t force-create: #t)) ;; 'return-procs))) + (ezstepslst (if (hash-table? testconfig) + (hash-table-ref/default testconfig "ezsteps" '()) + #f))) + (if testconfig + (hash-table-set! *testconfigs* test-name testconfig) ;; cached for lazy reads later ... + (begin + (launch:setup) + (debug:print 0 *default-log-port* "WARNING: no testconfig found for " test-name " in search path:\n " + (string-intersperse (tests:get-tests-search-path *configdat*) "\n ")))) + ;; after all that, still no testconfig? Time to abort + (if (not testconfig) + (begin + (debug:print-error 0 *default-log-port* "Failed to resolve megatest.config, runconfigs.config and testconfig issues. Giving up now") + (exit 1))) + + ;; create a proc for the subrun if requested, save that proc in the ezsteps table as the last entry + ;; 1. get section [runarun] + ;; 2. unset MT_* vars + ;; 3. fix target + ;; 4. fix runname + ;; 5. fix testpatt or calculate it from contour + ;; 6. launch the run + ;; 7. roll up the run result and or roll up the logpro processed result + (when (configf:lookup testconfig "subrun" "runwait") ;; we use runwait as the flag that a subrun is requested + (subrun:initialize-toprun-test testconfig test-run-dir) + (let* ((mt-cmd (subrun:launch-cmd test-run-dir (configf:lookup testconfig "subrun" "runwait")))) + (debug:print-info 0 *default-log-port* "Subrun command is \"" mt-cmd "\"") + (set! ezsteps #t) ;; set the needed flag + (set! ezstepslst + (append (or ezstepslst '()) + (list (list "subrun" (conc "{subrun=true} " mt-cmd))))))) + + ;; process the ezsteps + (if ezsteps + (let* ((all-steps-dat (make-hash-table))) ;; keep all the info around as stepname ==> alist; where 'params is the params list (add other stuff as needed) + (if (not (common:file-exists? ".ezsteps"))(create-directory ".ezsteps")) + ;; if ezsteps was defined then we are sure to have at least one step but check anyway + (if (not (> (length ezstepslst) 0)) + (debug:print-error 0 *default-log-port* "ezsteps defined but ezstepslst is zero length") + (let ((all-step-names (map car ezstepslst)) + (status-file (file-open "ezsteps.status" (+ open/append open/wronly open/creat))) + ) + (setenv "MT_STEP_NAMES" (string-intersperse all-step-names " ")) + (let loop ((ezstep (car ezstepslst)) + (tal (cdr ezstepslst)) + (prevstep #f)) + (debug:print-info 0 *default-log-port* "Processing ezstep \"" (string-intersperse ezstep " ") "\"") + ;; check exit-info (vector-ref exit-info 1) + (if (launch:einf-exit-status exit-info) ;; (vector-ref exit-info 1) + (let* ((logpro-used (ezsteps:runstep ezstep run-id test-id exit-info m tal testconfig all-steps-dat)) + (stepname (car ezstep)) + (stepparms (hash-table-ref all-steps-dat stepname))) + (setenv "MT_STEP_NAME" stepname) + (pp (hash-table->alist all-steps-dat)) + ;; if logpro-used read in the stepname.dat file + (if (and logpro-used (common:file-exists? (conc stepname ".dat"))) + (launch:load-logpro-dat run-id test-id stepname)) + + (file-write status-file (conc stepname " " (launch:einf-exit-code exit-info) "\n")) + + (if (steprun-good? logpro-used (launch:einf-exit-code exit-info) stepparms) + (if (not (null? tal)) + (loop (car tal) (cdr tal) stepname)) + (debug:print 0 *default-log-port* "WARNING: step " (car ezstep) " failed. Stopping"))) + (debug:print 0 *default-log-port* "WARNING: a prior step failed, stopping at " ezstep))) + (file-close status-file) + ) + + )))))) + +(define (launch:monitor-job run-id test-id item-path fullrunscript ezsteps test-name tconfigreg exit-info m work-area runtlim misc-flags) + (let* ((update-period (string->number (or (configf:lookup *configdat* "setup" "test-stats-update-period") "60"))) + (start-seconds (current-seconds)) + (calc-minutes (lambda () + (inexact->exact + (round + (- + (current-seconds) + start-seconds))))) + (kill-tries 0)) + ;; (tests:set-full-meta-info #f test-id run-id (calc-minutes) work-area) + ;; (tests:set-full-meta-info test-id run-id (calc-minutes) work-area) + (tests:set-full-meta-info #f test-id run-id (calc-minutes) work-area 10) + + (let loop ((minutes (calc-minutes)) + (cpu-load (alist-ref 'adj-core-load (common:get-normalized-cpu-load #f))) + (disk-free (get-df (current-directory))) + (last-sync (current-seconds))) + (let* ((over-time (> (current-seconds) (+ last-sync update-period))) + (new-cpu-load (let* ((load (alist-ref 'adj-core-load (common:get-normalized-cpu-load #f))) + (delta (abs (- load cpu-load)))) + (if (> delta 0.1) ;; don't bother updating with small changes + load + #f))) + (new-disk-free (let* ((df (if over-time ;; only get df every 30 seconds + (get-df (current-directory)) + disk-free)) + (delta (abs (- df disk-free)))) + (if (and (> df 0) + (> (/ delta df) 0.1)) ;; (> delta 200) ;; ignore changes under 200 Meg + df + #f))) + (do-sync (or new-cpu-load new-disk-free over-time)) + + (test-info (rmt:get-test-state-status-by-id run-id test-id)) + (state (car test-info));; (db:test-get-state test-info)) + (status (cdr test-info));; (db:test-get-status test-info)) + (killreq (equal? state "KILLREQ")) + (kill-reason "no kill reason specified") + (kill-job? #f)) + ;; (common:telemetry-log "zombie" (conc "launch:monitor-job - decision time encountered at "(current-seconds)" with last-sync="last-sync" do-sync="do-sync" over-time="over-time" update-period="update-period)) + (cond + (killreq + (set! kill-reason "KILLING TEST since received kill request (KILLREQ)") + (set! kill-job? #t)) + ((and runtlim (> (- (current-seconds) start-seconds) runtlim)) + (set! kill-reason (conc "KILLING TEST DUE TO TIME LIMIT EXCEEDED! Runtime=" (- (current-seconds) start-seconds) " seconds, limit=" runtlim)) + (set! kill-job? #t)) + ((equal? status "DEAD") + (tests:update-central-meta-info run-id test-id new-cpu-load new-disk-free (calc-minutes) #f #f) + (rmt:set-state-status-and-roll-up-items run-id test-id 'foo "RUNNING" "n/a" "was marked dead; really still running.") + ;;(set! kill-reason "KILLING TEST because it was marked as DEAD by launch:handle-zombie-tests (might indicate really overloaded server or else overzealous setup.deadtime)") ;; MARK RUNNING + (set! kill-job? #f))) + + (debug:print 4 *default-log-port* "cpu: " new-cpu-load " disk: " new-disk-free " last-sync: " last-sync " do-sync: " do-sync) + (if (common:low-noise-print 600 "run zombie") ;; every five minutes is plenty + (launch:handle-zombie-tests run-id)) + (when do-sync + (tests:update-central-meta-info run-id test-id new-cpu-load new-disk-free (calc-minutes) #f #f)) + + (if kill-job? + (begin + (debug:print-info 0 *default-log-port* "proceeding to kill test: "kill-reason) + (mutex-lock! m) + ;; NOTE: The pid can change as different steps are run. Do we need handshaking between this + ;; section and the runit section? Or add a loop that tries three times with a 1/4 second + ;; between tries? + (let* ((pid1 (launch:einf-pid exit-info)) ;; (vector-ref exit-info 0)) + (pid2 (rmt:test-get-top-process-pid run-id test-id)) + (pids (delete-duplicates (filter number? (list pid1 pid2))))) + (if (not (null? pids)) + (begin + (for-each + (lambda (pid) + (handle-exceptions + exn + (begin + (debug:print-info 0 *default-log-port* "Unable to kill process with pid " pid ", possibly already killed.") + (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn)) + (debug:print 0 *default-log-port* "WARNING: Request received to kill job " pid) ;; " (attempt # " kill-tries ")") + (debug:print-info 0 *default-log-port* "Signal mask=" (signal-mask)) + ;; (if (process:alive? pid) + ;; (begin + (map (lambda (pid-num) + (process-signal pid-num signal/term)) + (process:get-sub-pids pid)) + (thread-sleep! 5) + ;; (if (process:process-alive? pid) + (map (lambda (pid-num) + (handle-exceptions + exn + (begin + (debug:print 0 *default-log-port* " .... had trouble sending kill to " pid-num ", exn=" exn) + #f) + (process-signal pid-num signal/kill))) + (process:get-sub-pids pid)))) + ;; (debug:print-info 0 *default-log-port* "not killing process " pid " as it is not alive")))) + pids) + ;; BB: question to Matt -- does the tests:test-state-status! encompass rollup to toplevel? If not, should it? + (tests:test-set-status! run-id test-id "KILLED" "KILLED" (conc (args:get-arg "-m")" "kill-reason) #f)) ;; BB ADDED kill-reason -- confirm OK with Matt + (begin + (debug:print-error 0 *default-log-port* "Nothing to kill, pid1=" pid1 ", pid2=" pid2) + (tests:test-set-status! run-id test-id "KILLED" "FAILED TO KILL" (conc (args:get-arg "-m")" "kill-reason) #f) ;; BB ADDED kill-reason -- confirm OK with Matt + ))) + (mutex-unlock! m) + ;; no point in sticking around. Exit now. But run end of run before exiting? + (launch:end-of-run-check run-id) + (exit))) + (if (hash-table-ref/default misc-flags 'keep-going #f) ;; keep originals for cpu-load and disk-free unless they change more than the allowed delta + (begin + (thread-sleep! 6) ;; was 3 + (loop (calc-minutes) + (or new-cpu-load cpu-load) + (or new-disk-free disk-free) + (if do-sync (current-seconds) last-sync)))))) + (tests:update-central-meta-info run-id test-id (commonmod:get-cpu-load) (get-df (current-directory))(calc-minutes) #f #f))) ;; NOTE: Checking twice for keep-going is intentional + + +;; read testconfig and create .logpro and script files +;; - use #f for tconfigreg to re-read the testconfigs from disk +;; +(define (launch:extract-scripts-logpro test-dir test-name item-path tconfigreg-in) + (let* ((tconfigreg (or tconfigreg-in + (tests:get-all))) + (tconfig-fname (conc test-dir "/.testconfig")) + (tconfig-tmpfile (conc tconfig-fname ".tmp")) + (tconfig (tests:get-testconfig test-name item-path tconfigreg #t force-create: #t)) ;; 'return-procs))) + (scripts (configf:get-section tconfig "scripts")) + (logpros (configf:get-section tconfig "logpro"))) + ;; create .testconfig file + (configf:write-alist tconfig tconfig-tmpfile) + (file-move tconfig-tmpfile tconfig-fname #t) + (delete-file* ".final-status") + + ;; extract scripts from testconfig and write them to files in test run dir + (for-each + (lambda (scriptdat) + (match scriptdat + ((name content) + (debug:print-info 2 *default-log-port* "Creating script "(current-directory)"/"name) + (with-output-to-file name + (lambda () + (print content))) + (change-file-mode name (bitwise-ior perm/irwxg perm/irwxu))) + (else + (debug:print-info 0 "Invalid script definiton found in [scripts] section of testconfig. \"" scriptdat "\"")))) + scripts) + + ;; extract logpro from testconfig and write them to files in test run dir + (for-each + (lambda (logprodat) + (match logprodat + ((name content) + (debug:print-info 2 *default-log-port* "Creating logpro file "(current-directory)"/"name".logpro") + (with-output-to-file (conc name".logpro") + (lambda () + (print content) + ;; (change-file-mode name (bitwise-ior perm/irwxg perm/irwxu)) + ))) + (else + (debug:print-info 0 "Invalid logpro definiton found in [logpro] section of testconfig. \"" logprodat "\"")))) + logpros))) + +(define (launch:execute encoded-cmd) + (let* ((cmdinfo (common:read-encoded-string encoded-cmd)) + (tconfigreg #f)) + (setenv "MT_CMDINFO" encoded-cmd) + ;;(bb-check-path msg: "launch:execute incoming") + (if (list? cmdinfo) ;; ((testpath /tmp/mrwellan/jazzmind/src/example_run/tests/sqlitespeed) + ;; (test-name sqlitespeed) (runscript runscript.rb) (db-host localhost) (run-id 1)) + (let* ((testpath (assoc/default 'testpath cmdinfo)) ;; testpath is the test spec area + (top-path (assoc/default 'toppath cmdinfo)) + (work-area (assoc/default 'work-area cmdinfo)) ;; work-area is the test run area + (test-name (assoc/default 'test-name cmdinfo)) + (runscript (assoc/default 'runscript cmdinfo)) + (ezsteps (assoc/default 'ezsteps cmdinfo)) + (subrun (assoc/default 'subrun cmdinfo)) + ;; (runremote (assoc/default 'runremote cmdinfo)) + ;; (transport (assoc/default 'transport cmdinfo)) ;; not used + ;; (serverinf (assoc/default 'serverinf cmdinfo)) + ;; (port (assoc/default 'port cmdinfo)) + (serverurl (assoc/default 'serverurl cmdinfo)) + (homehost (assoc/default 'homehost cmdinfo)) + (run-id (assoc/default 'run-id cmdinfo)) + (test-id (assoc/default 'test-id cmdinfo)) + (target (assoc/default 'target cmdinfo)) + (areaname (assoc/default 'areaname cmdinfo)) + (itemdat (assoc/default 'itemdat cmdinfo)) + (env-ovrd (assoc/default 'env-ovrd cmdinfo)) + (set-vars (assoc/default 'set-vars cmdinfo)) ;; pre-overrides from -setvar + (runname (assoc/default 'runname cmdinfo)) + (megatest (assoc/default 'megatest cmdinfo)) + (runtlim (assoc/default 'runtlim cmdinfo)) + (contour (assoc/default 'contour cmdinfo)) + (item-path (item-list->path itemdat)) + (mt-bindir-path (assoc/default 'mt-bindir-path cmdinfo)) + (keys #f) + (keyvals #f) + (fullrunscript (if (not runscript) + #f + (if (substring-index "/" runscript) + runscript ;; use unadultered if contains slashes + (let ((fulln (conc work-area "/" runscript))) + (if (and (common:file-exists? fulln) + (file-execute-access? fulln)) + fulln + runscript))))) ;; assume it is on the path + (check-work-area (lambda () + ;; NFS might not have propagated the directory meta data to the run host - give it time if needed + (let loop ((count 0)) + (if (or (common:directory-exists? work-area) + (> count 10)) + (change-directory work-area) + (begin + (debug:print 0 *default-log-port* "INFO: Not starting job yet - directory " work-area " not found") + (thread-sleep! 10) + (loop (+ count 1))))) + + (if (not (string=? (common:real-path work-area)(common:real-path (current-directory)))) + (begin + (debug:print 0 *default-log-port* + "INFO: we are expecting to be in directory " work-area "\n" + " but we are actually in the directory " (current-directory) "\n" + " doing another change dir.") + (change-directory work-area))) + + ;; spot check that the files in testpath are available. Too often NFS delays cause problems here. + (let ((files (glob (conc testpath "/*"))) + (bad-files '())) + (for-each + (lambda (fullname) + (let* ((fname (pathname-strip-directory fullname)) + (targn (conc work-area "/" fname))) + (if (not (file-exists? targn)) + (set! bad-files (cons fname bad-files))))) + files) + (if (not (null? bad-files)) + (begin + (debug:print 0 *default-log-port* "INFO: test data from " testpath " not copied properly or filesystem problems causing data to not be found. Re-running the copy command.") + (debug:print 0 *default-log-port* "INFO: missing files from " work-area ": " (string-intersperse bad-files ", ")) + (launch:test-copy testpath work-area)))) + ;; one more time, change to the work-area directory + (change-directory work-area))) + ) ;; let* + + (if contour (setenv "MT_CONTOUR" contour)) + + ;; immediated set some key variables from CMDINFO data, yes, these will be set again below ... + ;; + (setenv "MT_TESTSUITENAME" areaname) + (setenv "MT_RUN_AREA_HOME" top-path) + (set! *toppath* top-path) + (change-directory *toppath*) ;; temporarily switch to the run area home + (setenv "MT_TEST_RUN_DIR" work-area) + + (launch:setup) ;; should be properly in the run area home now + + (if contour (setenv "MT_CONTOUR" contour)) + + ;; immediated set some key variables from CMDINFO data, yes, these will be set again below ... + ;; + (setenv "MT_TESTSUITENAME" areaname) + (setenv "MT_RUN_AREA_HOME" top-path) + (set! *toppath* top-path) + (change-directory *toppath*) ;; temporarily switch to the run area home + (setenv "MT_TEST_RUN_DIR" work-area) + + (launch:setup) ;; should be properly in the run area home now + + (set! tconfigreg (tests:get-all)) ;; mapping of testname => test source path + (let ((sighand (lambda (signum) + ;; (signal-mask! signum) ;; to mask or not? seems to cause issues in exiting + (if (eq? signum signal/stop) + (debug:print-error 0 *default-log-port* "attempt to STOP process. Exiting.")) + (set! *time-to-exit* #t) + (debug:print 0 *default-log-port* "Received signal " signum ", cleaning up before exit (set this test to COMPLETED/ABORT) . Please wait...") + (let ((th1 (make-thread (lambda () + (debug:print 0 *default-log-port* "set test to COMPLETED/ABORT begin.") + (rmt:test-set-state-status run-id test-id "COMPLETED" "ABORT" "received kill signal") + (debug:print 0 *default-log-port* "set test to COMPLETED/ABORT complete.") + (debug:print 0 *default-log-port* "Killed by signal " signum ". Exiting") + (exit 1)))) + (th2 (make-thread (lambda () + (thread-sleep! 20) + (debug:print 0 *default-log-port* "Done") + (exit 4))))) + (thread-start! th2) + (thread-start! th1) + (thread-join! th2))))) + (set-signal-handler! signal/int sighand) + (set-signal-handler! signal/term sighand) + ) ;; (set-signal-handler! signal/stop sighand) + + ;; Do not run the test if it is REMOVING, RUNNING, KILLREQ or REMOTEHOSTSTART, + ;; Mark the test as REMOTEHOSTSTART *IMMEDIATELY* + ;; + (let* ((test-info (let loop ((tries 0)) + (let ((tinfo (rmt:get-test-info-by-id run-id test-id))) + (if tinfo + tinfo + (if (> tries 5) + #f + (begin + (thread-sleep! (+ 1 (* tries 10))) + (loop (+ tries 1)))))))) + (test-host (if test-info + (db:test-get-host test-info) + (begin + (debug:print 0 *default-log-port* "ERROR: failed to find a record for test-id " test-id ", exiting.") + (exit)))) + (test-pid (db:test-get-process_id test-info))) + (cond + ;; -mrw- I'm removing KILLREQ from this list so that a test in KILLREQ state is treated as a "do not run" flag. + ((member (db:test-get-state test-info) '("INCOMPLETE" "KILLED" "UNKNOWN" "STUCK")) ;; prior run of this test didn't complete, go ahead and try to rerun + (debug:print 0 *default-log-port* "INFO: test is INCOMPLETE or KILLED, treat this execute call as a rerun request") + ;; (tests:test-force-state-status! run-id test-id "REMOTEHOSTSTART" "n/a") + + (rmt:general-call 'set-test-start-time run-id test-id) + (rmt:test-set-state-status run-id test-id "REMOTEHOSTSTART" "n/a" #f) + ) ;; prime it for running + ((member (db:test-get-state test-info) '("RUNNING" "REMOTEHOSTSTART")) + (if (process:alive-on-host? test-host test-pid) + (debug:print-error 0 *default-log-port* "test state is " (db:test-get-state test-info) " and process " test-pid " is still running on host " test-host ", cannot proceed") + (exit 1))) + ((member (db:test-get-state test-info) '("COMPLETED")) ;; we do NOT want to re-run COMPLETED jobs. Mark as NOT_STARTED to run! + (debug:print 0 *default-log-port* "test state is " (db:test-get-state test-info) ", cannot proceed") + (debug:print 0 *default-log-port* "exiting with status 1") + (exit 1)) + ((not (member (db:test-get-state test-info) '("REMOVING" "REMOTEHOSTSTART" "RUNNING" "KILLREQ"))) + ;; (tests:test-force-state-status! run-id test-id "REMOTEHOSTSTART" "n/a") + (rmt:general-call 'set-test-start-time run-id test-id) + (rmt:test-set-state-status run-id test-id "REMOTEHOSTSTART" "n/a" #f)) + (else ;; (member (db:test-get-state test-info) '("REMOVING" "REMOTEHOSTSTART" "RUNNING" "KILLREQ")) + (debug:print 0 *default-log-port* "test state is " (db:test-get-state test-info) ", cannot proceed") + (debug:print 0 *default-log-port* "exiting with status 1") + (exit 1)))) + + ;; cleanup prior execution's steps + (rmt:delete-steps-for-test! run-id test-id) + + (debug:print 2 *default-log-port* "Executing " test-name " (id: " test-id ") on " (get-host-name)) + (set! keys (rmt:get-keys)) + ;; (runs:set-megatest-env-vars run-id inkeys: keys inkeyvals: keyvals) ;; these may be needed by the launching process + ;; one of these is defunct/redundant ... + (if (not (launch:setup force-reread: #t)) + (begin + (debug:print 0 *default-log-port* "Failed to setup, exiting") + ;; (sqlite3:finalize! db) + ;; (sqlite3:finalize! tdb) + (exit 1))) + ;; validate that the test run area is available + (check-work-area) + + ;; still need to go back to run area home for next couple steps + (change-directory *toppath*) + + ;; NOTE: Current order is to process runconfigs *before* setting the MT_ vars. This + ;; seems non-ideal but could well break stuff + ;; BUG? BUG? BUG? + + (let ((rconfig (full-runconfigs-read)) ;; (read-config (conc *toppath* "/runconfigs.config") #f #t sections: (list "default" target)))) + (wconfig (read-config "waivers.config" #f #t sections: `( "default" ,target )))) ;; read the waivers config if it exists + ;; (setup-env-defaults (conc *toppath* "/runconfigs.config") run-id (make-hash-table) keyvals target) + ;; (set-run-config-vars run-id keyvals target) ;; (db:get-target db run-id)) + ;; Now have runconfigs data loaded, set environment vars + (for-each + (lambda (section) + (for-each + (lambda (varval) + (let ((var (car varval)) + (val (cadr varval))) + (if (and (string? var)(string? val)) + (begin + (safe-setenv var (configf:eval-string-in-environment val))) ;; val) + (debug:print-error 0 *default-log-port* "bad variable spec, " var "=" val)))) + (configf:get-section rconfig section))) + (list "default" target))) + ;;(bb-check-path msg: "launch:execute post block 1") + + ;; NFS might not have propagated the directory meta data to the run host - give it time if needed + (let loop ((count 0)) + (if (or (common:file-exists? work-area) + (> count 10)) + (change-directory work-area) + (begin + (debug:print 0 *default-log-port* "INFO: Not starting job yet - directory " work-area " not found") + (thread-sleep! 10) + (loop (+ count 1))))) + + ;; now we can switch to the work-area? + (change-directory work-area) + ;;(bb-check-path msg: "launch:execute post block 1.5") + ;; (change-directory work-area) + (set! keyvals (keys:target->keyval keys target)) + ;; apply pre-overrides before other variables. The pre-override vars must not + ;; clobbers things from the official sources such as megatest.config and runconfigs.config + (if (string? set-vars) + (let ((varpairs (string-split set-vars ","))) + (debug:print 4 *default-log-port* "varpairs: " varpairs) + (map (lambda (varpair) + (let ((varval (string-split varpair "="))) + (if (eq? (length varval) 2) + (let ((var (car varval)) + (val (cadr varval))) + (debug:print 1 *default-log-port* "Adding pre-var/val " var " = " val " to the environment") + (setenv var val))))) + varpairs))) + ;;(bb-check-path msg: "launch:execute post block 2") + (for-each + (lambda (varval) + (let ((var (car varval)) + (val (cadr varval))) + (if val + (setenv var val) + (begin + (debug:print-error 0 *default-log-port* "required variable " var " does not have a valid value. Exiting") + (exit))))) + (list + (list "MT_TEST_RUN_DIR" work-area) + (list "MT_TEST_NAME" test-name) + (list "MT_ITEM_INFO" (conc itemdat)) + (list "MT_ITEMPATH" item-path) + (list "MT_RUNNAME" runname) + (list "MT_MEGATEST" megatest) + (list "MT_TARGET" target) + (list "MT_LINKTREE" (common:get-linktree)) ;; (configf:lookup *configdat* "setup" "linktree")) + (list "MT_TESTSUITENAME" (common:get-testsuite-name)))) + ;;(bb-check-path msg: "launch:execute post block 3") + + (let ((tmppath (getenv "PATH"))) + (if (string-search tmppath " ") + (debug:print 0 *default-log-port* "WARNING: spaces in PATH are not supported.")) + (if mt-bindir-path (setenv "PATH" (conc tmppath":"mt-bindir-path)))) + ;;(bb-check-path msg: "launch:execute post block 4") + ;; (change-directory top-path) + ;; Can setup as client for server mode now + ;; (client:setup) + + ;; environment overrides are done *before* the remaining critical envars. + (alist->env-vars env-ovrd) + ;;(bb-check-path msg: "launch:execute post block 41") + (runs:set-megatest-env-vars run-id inkeys: keys inkeyvals: keyvals) + ;;(bb-check-path msg: "launch:execute post block 42") + (set-item-env-vars itemdat) + ;;(bb-check-path msg: "launch:execute post block 43") + (let ((blacklist (configf:lookup *configdat* "setup" "blacklistvars"))) + (if blacklist + (let ((vars (string-split blacklist))) + (save-environment-as-files "megatest" ignorevars: vars) + (for-each (lambda (var) + (unsetenv var)) + vars)) + (save-environment-as-files "megatest"))) + ;;(bb-check-path msg: "launch:execute post block 44") + ;; open-run-close not needed for test-set-meta-info + ;; (tests:set-full-meta-info #f test-id run-id 0 work-area) + ;; (tests:set-full-meta-info test-id run-id 0 work-area) + (tests:set-full-meta-info #f test-id run-id 0 work-area 10) + + ;; (thread-sleep! 0.3) ;; NFS slowness has caused grief here + + (if (args:get-arg "-xterm") + (set! fullrunscript "xterm") + (if (and fullrunscript + (common:file-exists? fullrunscript) + (not (file-execute-access? fullrunscript))) + (system (conc "chmod ug+x " fullrunscript)))) + (launch:extract-scripts-logpro work-area test-name item-path tconfigreg) + +;;;;; ;; We are about to actually kick off the test +;;;;; ;; so this is a good place to remove the records for +;;;;; ;; any previous runs +;;;;; ;; (db:test-remove-steps db run-id testname itemdat) +;;;;; ;; now is also a good time to write the .testconfig file +;;;;; (let* ((tconfig-fname (conc work-area "/.testconfig")) +;;;;; (tconfig-tmpfile (conc tconfig-fname ".tmp")) +;;;;; (tconfig (tests:get-testconfig test-name item-path tconfigreg #t force-create: #t)) ;; 'return-procs))) +;;;;; (scripts (configf:get-section tconfig "scripts")) +;;;;; (precmd (configf:lookup tconfig ) +;;;;; ;; create .testconfig file +;;;;; (configf:write-alist tconfig tconfig-tmpfile) +;;;;; (file-move tconfig-tmpfile tconfig-fname #t) +;;;;; (delete-file* ".final-status") +;;;;; +;;;;; ;; extract scripts from testconfig and write them to files in test run dir +;;;;; (for-each +;;;;; (lambda (scriptdat) +;;;;; (match scriptdat +;;;;; ((name content) +;;;;; (with-output-to-file name +;;;;; (lambda () +;;;;; (print content) +;;;;; (change-file-mode name (bitwise-ior perm/irwxg perm/irwxu))))) +;;;;; (else +;;;;; (debug:print-info 0 "Invalid script definiton found in [scripts] section of testconfig. \"" scriptdat "\"")))) +;;;;; scripts)) + ;; + + (let* ((m (make-mutex)) + (kill-job? #f) + (exit-info (make-launch:einf pid: #t exit-status: #t exit-code: #t rollup-status: 0)) ;; pid exit-status exit-code (i.e. process was successfully run) rollup-status + (job-thread #f) + ;; (keep-going #t) + (misc-flags (let ((ht (make-hash-table))) + (hash-table-set! ht 'keep-going #t) + ht)) + (runit (lambda () + (launch:manage-steps run-id test-id item-path fullrunscript ezsteps subrun test-name tconfigreg exit-info m))) + (monitorjob (lambda () + (launch:monitor-job run-id test-id item-path fullrunscript ezsteps test-name tconfigreg exit-info m work-area runtlim misc-flags))) + (th1 (make-thread monitorjob "monitor job")) + (th2 (make-thread runit "run job")) + (tconfig (tests:get-testconfig test-name item-path tconfigreg #t)) + (propagate-exit-code (configf:lookup *configdat* "setup" "propagate-exit-code")) + (propagate-status-list '("FAIL" "KILLED" "ABORT" "DEAD" "CHECK" "SKIP" "WAIVED")) + (test-status "not set") + (precmd (configf:lookup tconfig "setup" "precmd")) + (postcmd (configf:lookup tconfig "setup" "postcmd"))) + ;; first, if set, run the precmd + (if precmd ;; (file-exists? precmd)(file-execute-access? precmd)) + (system precmd)) ;; up to test author to put nbfake if desired. + (set! job-thread th2) + (thread-start! th1) + (thread-start! th2) + (thread-join! th2) + (debug:print-info 0 *default-log-port* "Megatest execute of test " test-name ", item path " item-path " complete. Notifying the db ...") + (debug:print-info 2 *default-log-port* "exit-info = " exit-info) + (hash-table-set! misc-flags 'keep-going #f) + (thread-join! th1) + (thread-sleep! 1) ;; givbe thread th1 a chance to be done TODO: Verify this is needed. At 0.1 I was getting fail to stop, increased to total of 1.1 sec. + (mutex-lock! m) + (let* ((item-path (item-list->path itemdat)) + ;; only state and status needed - use lazy routine + (testinfo (rmt:get-testinfo-state-status run-id test-id))) + ;; Am I completed? + (if (member (db:test-get-state testinfo) '("REMOTEHOSTSTART" "RUNNING")) + (let ((new-state (if kill-job? "KILLED" "COMPLETED")) + (new-status (cond + ((not (launch:einf-exit-status exit-info)) "FAIL") ;; job failed to run ... (vector-ref exit-info 1) + ((eq? (launch:einf-rollup-status exit-info) 0) ;; (vector-ref exit-info 3) + ;; if the current status is AUTO then defer to the calculated value (i.e. leave this AUTO) + (if (equal? (db:test-get-status testinfo) "AUTO") "AUTO" "PASS")) + ((eq? (launch:einf-rollup-status exit-info) 1) "FAIL") ;; (vector-ref exit-info 3) + ((eq? (launch:einf-rollup-status exit-info) 2) ;; (vector-ref exit-info 3) + ;; if the current status is AUTO the defer to the calculated value but qualify (i.e. make this AUTO-WARN) + (if (equal? (db:test-get-status testinfo) "AUTO") "AUTO-WARN" "WARN")) + ((eq? (launch:einf-rollup-status exit-info) 3) "CHECK") + ((eq? (launch:einf-rollup-status exit-info) 4) "WAIVED") + ((eq? (launch:einf-rollup-status exit-info) 5) "ABORT") + ((eq? (launch:einf-rollup-status exit-info) 6) "SKIP") + (else "FAIL"))) + ) ;; (db:test-get-status testinfo))) + (debug:print-info 0 *default-log-port* "Test exited in state=" (db:test-get-state testinfo) ", setting state/status based on exit code of " (launch:einf-exit-status exit-info) " and rollup-status of " (launch:einf-rollup-status exit-info)) + + ;; Leave a .final-status file for each sub-test + (tests:save-final-status run-id test-id) + + (tests:test-set-status! run-id + test-id + new-state + new-status + (args:get-arg "-m") #f) + ;; need to update the top test record if PASS or FAIL and this is a subtest + ;; NO NEED TO CALL set-state-status-and-roll-up-items HERE, THIS IS DONE IN set-state-status-and-roll-up-items called by tests:test-set-status! + ) + ) + + + ;; for automated creation of the rollup html file this is a good place... + (if (not (equal? item-path "")) + (tests:summarize-items run-id test-id test-name #f)) + ;; BUG was this meant to be the antecnt of the if above? + ;; BUG was this meant to be the antecnt of the if above? + (tests:summarize-test run-id test-id) ;; don't force - just update if no + ;; Leave a .final-status file for the top level test + (tests:save-final-status run-id test-id) + (rmt:update-run-stats run-id (rmt:get-raw-run-stats run-id))) ;; end of let* + + (mutex-unlock! m) + (launch:end-of-run-check run-id ) + (debug:print 2 *default-log-port* "Output from running " fullrunscript ", pid " (launch:einf-pid exit-info) " in work area " + work-area ":\n====\n exit code " (launch:einf-exit-code exit-info) "\n" "====\n") + + + (set! test-status (db:test-get-status (rmt:get-testinfo-state-status run-id test-id))) + + ;; If the propagate-exit-code option has been set in the megatest config, and the test status matches the list, set the exit code to 1. + + (if postcmd + (system postcmd)) + + (if (and propagate-exit-code (string=? propagate-exit-code "yes") (member test-status propagate-status-list)) + (begin + (debug:print 1 *default-log-port* "Setting exit status to 1 because of test status of " test-status) + (set! *globalexitstatus* 1) + ) + ) + + (if (not (launch:einf-exit-status exit-info)) + (exit 4)))) + ))) + +;; DO NOT USE - caching of configs is handled in launch:setup now. +;; +(define (launch:cache-config) + ;; if we have a linktree and -runtests and -target and the directory exists dump the config + ;; to megatest-(current-seconds).cfg and symlink it to megatest.cfg + (if (and *configdat* + (or (args:get-arg "-run") + (args:get-arg "-runtests") + (args:get-arg "-execute"))) + (let* ((linktree (common:get-linktree)) ;; (get-environment-variable "MT_LINKTREE")) + (target (common:args-get-target exit-if-bad: #t)) + (runname (or (args:get-arg "-runname") + (args:get-arg ":runname") + (getenv "MT_RUNNAME"))) + (fulldir (conc linktree "/" + target "/" + runname))) + (if (and linktree (common:file-exists? linktree)) ;; can't proceed without linktree + (begin + (debug:print-info 0 *default-log-port* "Have -run with target=" target ", runname=" runname ", fulldir=" fulldir ", testpatt=" (or (args:get-arg "-testpatt") "%")) + (if (not (common:file-exists? fulldir)) + (create-directory fulldir #t)) ;; need to protect with exception handler + (if (and target + runname + (common:file-exists? fulldir)) + (let ((tmpfile (conc fulldir "/.megatest.cfg." (current-seconds))) + (targfile (conc fulldir "/.megatest.cfg-" megatest-version "-" megatest-fossil-hash)) + (rconfig (conc fulldir "/.runconfig." megatest-version "-" megatest-fossil-hash))) + (if (common:file-exists? rconfig) ;; only cache megatest.config AFTER runconfigs has been cached + (begin + (debug:print-info 0 *default-log-port* "Caching megatest.config in " tmpfile) + (if (not (common:in-running-test?)) + (configf:write-alist *configdat* tmpfile)) + (system (conc "ln -sf " tmpfile " " targfile)))) + ))) + (debug:print-info 1 *default-log-port* "No linktree yet, no caching configs."))))) + +(define (launch:handle-zombie-tests run-id) + (let* ((key (conc "zombiescan-runid-"run-id)) + (now (current-seconds)) + (threshold (- (current-seconds) (* 2 (or (configf:lookup-number *configdat* "setup" "deadtime") 120)))) + (val (rmt:get-var key)) + (do-scan? + (cond + ((not val) + #t) + ((< val threshold) + #t) + (else #f)))) + (when do-scan? + (debug:print 1 *default-log-port* "INFO: search and mark zombie tests") + (rmt:set-var key (current-seconds)) + (rmt:find-and-mark-incomplete run-id #f)))) + +;; recover a test where the top controlling mtest may have died +;; +(define (launch:recover-test run-id test-id) + ;; this function is called on the test run host via ssh + ;; + ;; 1. look at the process from pid + ;; - is it owned by calling user + ;; - it it's run directory correct for the test + ;; - is there a controlling mtest (maybe stuck) + ;; 2. if recovery is needed watch pid + ;; - when it exits take the exit code and do the needful + ;; + (let* ((pid (rmt:test-get-top-process-pid run-id test-id)) + (psres (with-input-from-pipe + (conc "ps -F -u " (current-user-name) " | grep -E '" pid " ' | grep -v 'grep -E " pid "'") + (lambda () + (read-line)))) + (rundir (if (string? psres) ;; real process owned by user + (read-symbolic-link (conc "/proc/" pid "/cwd")) + #f))) + ;; now wait on that process if all is correct + ;; periodically update the db with runtime + ;; when the process exits look at the db, if still RUNNING after 10 seconds set + ;; state/status appropriately + (process-wait pid))) + +;;====================================================================== +;; Maintenance +;;====================================================================== + +(define (rmt:find-and-mark-incomplete run-id ovr-deadtime) + (let* ((cfg-deadtime (configf:lookup-number *configdat* "setup" "deadtime")) + (test-stats-update-period (configf:lookup-number *configdat* "setup" "test-stats-update-period"))) + (rmt:find-and-mark-incomplete-engine run-id ovr-deadtime cfg-deadtime test-stats-update-period) + ;;call end of eud of run detection for posthook + (launch:end-of-run-check run-id))) + +;; select end_time-now from +;; (select testname,item_path,event_time+run_duration as +;; end_time,strftime('%s','now') as now from tests where state in +;; ('RUNNING','REMOTEHOSTSTART','LAUNCHED')); +;; +;; NOT EASY TO MIGRATE TO db{file,mod} +;; +(define (rmt:find-and-mark-incomplete-engine run-id ovr-deadtime cfg-deadtime test-stats-update-period) + (let* ((incompleted '()) + (oldlaunched '()) + (toplevels '()) + ;; The default running-deadtime is 720 seconds = 12 minutes. + ;; "(running-deadtime-default (+ server-start-allowance (* 2 launch-monitor-period)))" = 200 + (2 * (200 + 30 + 30)) + (deadtime-trim (or ovr-deadtime cfg-deadtime)) + (server-start-allowance 200) + (server-overloaded-budget 200) + (launch-monitor-off-time (or test-stats-update-period 30)) + (launch-monitor-on-time-budget 30) + (launch-monitor-period (+ launch-monitor-off-time launch-monitor-on-time-budget server-overloaded-budget)) + (remotehoststart-deadtime-default (+ server-start-allowance server-overloaded-budget 30)) + (remotehoststart-deadtime (or deadtime-trim remotehoststart-deadtime-default)) + (running-deadtime-default (+ server-start-allowance (* 2 launch-monitor-period))) + (running-deadtime (or deadtime-trim running-deadtime-default))) ;; two minutes (30 seconds between updates, this leaves 3x grace period) + + (debug:print-info 4 *default-log-port* "running-deadtime = " running-deadtime) + (debug:print-info 4 *default-log-port* "deadtime-trim = " deadtime-trim) + + (let* ((dat (rmt:get-toplevels-and-incompletes run-id running-deadtime remotehoststart-deadtime))) + (set! oldlaunched (list-ref dat 1)) + (set! toplevels (list-ref dat 2)) + (set! incompleted (list-ref dat 0))) + + (debug:print-info 18 *default-log-port* "Found " (length oldlaunched) " old LAUNCHED items, " + (length toplevels) " old LAUNCHED toplevel tests and " + (length incompleted) " tests marked RUNNING but apparently dead.") + + ;; These are defunct tests, do not do all the overhead of set-state-status. Force them to INCOMPLETE. + ;; + ;; (db:delay-if-busy dbdat) + (let* ((min-incompleted-ids (map car incompleted)) ;; do 'em all + (all-ids (append min-incompleted-ids (map car oldlaunched)))) + (if (> (length all-ids) 0) + (begin + ;; (launch:is-test-alive "localhost" 435) + (debug:print 0 *default-log-port* "WARNING: Marking test(s); " (string-intersperse (map conc all-ids) ", ") + " as DEAD") + (for-each + (lambda (test-id) + (let* ((tinfo (rmt:get-test-info-by-id run-id test-id)) + (run-dir (db:test-get-rundir tinfo)) + (host (db:test-get-host tinfo)) + (pid (db:test-get-process_id tinfo)) + (result (rmt:get-status-from-final-status-file run-dir))) + (if (and (list? result) (> (length result) 1) (equal? "PASS" (cadr result)) (equal? "COMPLETED" (car result))) + (begin + (debug:print 0 *default-log-port* "INFO: test " test-id " actually passed, so marking PASS not DEAD") + (rmt:set-state-status-and-roll-up-items + run-id test-id 'foo "COMPLETED" "PASS" + "Test stopped responding but it has PASSED; marking it PASS in the DB.")) + (let ((is-alive (and (not (eq? pid 0)) ;; 0 is default in re-used field "attemptnum" where pid stored. + (commonmod:is-test-alive host pid)))) + (if is-alive + (debug:print 0 *default-log-port* "INFO: test " test-id " on host " host + " has a process on pid " pid ", NOT setting to DEAD.") + (begin + (debug:print 0 *default-log-port* "INFO: test " test-id + " final state/status is not COMPLETED/PASS. It is " result) + (rmt:set-state-status-and-roll-up-items + run-id test-id 'foo "COMPLETED" "DEAD" + "Test stopped responding while in RUNNING or REMOTEHOSTSTART; presumed dead."))))))) + ;; call end of eud of run detection for posthook - from merge, is it needed? + ;; (launch:end-of-run-check run-id) + all-ids) + ))))) + +;; set up needed environment variables given a run-id and optionally a target, itempath etc. +;; +(define (runs:set-megatest-env-vars run-id #!key (inkeys #f)(inrunname #f)(inkeyvals #f)(intarget #f)(testname #f)(itempath #f)) + ;;(bb-check-path msg: "runs:set-megatest-env-vars entry") + (let* ((target (or intarget + (common:args-get-target) + (get-environment-variable "MT_TARGET"))) + (keys (if inkeys inkeys (rmt:get-keys))) + (keyvals (if inkeyvals inkeyvals (keys:target->keyval keys target))) + (vals (hash-table-ref/default *env-vars-by-run-id* run-id #f)) + (link-tree (common:get-linktree))) ;; (configf:lookup *configdat* "setup" "linktree"))) + (if testname (setenv "MT_TEST_NAME" testname)) + (if itempath (setenv "MT_ITEMPATH" itempath)) + + ;; get the info from the db and put it in the cache + (if link-tree + (setenv "MT_LINKTREE" link-tree) + (debug:print-error 0 *default-log-port* "linktree not set, should be set in megatest.config in [setup] section.")) + (if (not vals) + (let ((ht (make-hash-table))) + (hash-table-set! *env-vars-by-run-id* run-id ht) + (set! vals ht) + (for-each + (lambda (key) + (hash-table-set! vals (car key) (cadr key))) + keyvals))) + ;; from the cached data set the vars + + (hash-table-for-each + vals + (lambda (key val) + (debug:print 2 *default-log-port* "setenv " key " " val) + (safe-setenv key val))) + ;;(bb-check-path msg: "runs:set-megatest-env-vars block 1") + ;;(BB> "*env-vars-by-run-id*/runid("run-id" vals="(hash-table->alist vals)) + + (if (not (get-environment-variable "MT_TARGET"))(setenv "MT_TARGET" target)) + ;; we had a case where there was an exception generated by the hash-table-ref + ;; due to *configdat* being #f Adding a handle and exit + (let fatal-loop ((count 0)) + (handle-exceptions + exn + (let ((call-chain (get-call-chain)) + (msg ((condition-property-accessor 'exn 'message) exn))) + (if (< count 5) + (begin ;; this call is colliding, do some crude stuff to fix it. + (debug:print 0 *default-log-port* "ERROR: *configdat* was inaccessible! This should never happen. Retry #" count + ", exn=" exn) + (launch:setup force-reread: #t) + (fatal-loop (+ count 1))) + (begin + (debug:print 0 *default-log-port* "FATAL: *configdat* was inaccessible! This should never happen. Retried " count + " times. Message: " msg) + (debug:print 0 *default-log-port* "Call chain:") + (with-output-to-port *default-log-port* + (lambda () + (print "*configdat* is >>"*configdat*"<<") + (pp *configdat*) + (pp call-chain))) + + (exit 1)))) + ;;(bb-check-path msg: "runs:set-megatest-env-vars block 1.5") + (when (or (not *configdat*) (not (hash-table? *configdat*))) + (debug:print 0 *default-log-port* "WARNING: *configdat* was inaccessible! This should never happen. Brute force reread.") + ;;(BB> "ERROR: *configdat* was inaccessible! This should never happen. Brute force reread.") + (thread-sleep! 2) ;; assuming nfs lag. + (launch:setup force-reread: #t)) + (alist->env-vars (hash-table-ref/default *configdat* "env-override" '())))) ;;;; environment is tainted HERE in this let block. + ;;(bb-check-path msg: "runs:set-megatest-env-vars block 2") + ;; Lets use this as an opportunity to put MT_RUNNAME in the environment + (let ((runname (if inrunname inrunname (rmt:get-run-name-from-id run-id)))) + (if runname + (setenv "MT_RUNNAME" runname) + (debug:print-error 0 *default-log-port* "no value for runname for id " run-id))) + (setenv "MT_RUN_AREA_HOME" *toppath*) + ;; if a testname and itempath are available set the remaining appropriate variables + (if testname (setenv "MT_TEST_NAME" testname)) + (if itempath (setenv "MT_ITEMPATH" itempath)) + ;;(bb-check-path msg: "runs:set-megatest-env-vars block 3") + (if (and testname link-tree) + (setenv "MT_TEST_RUN_DIR" (conc (getenv "MT_LINKTREE") "/" + (getenv "MT_TARGET") "/" + (getenv "MT_RUNNAME") "/" + (getenv "MT_TEST_NAME") + (if (and itempath + (not (equal? itempath ""))) + (conc "/" itempath) + "")))))) +(define (set-item-env-vars itemdat) + (for-each (lambda (item) + (debug:print 2 *default-log-port* "setenv " (car item) " " (cadr item)) + (setenv (car item) (cadr item))) + itemdat)) + + +;; TODO: deprecate me in favor of ezsteps.scm +;; +(define (ezsteps:runstep ezstep run-id test-id exit-info m tal testconfig all-steps-dat) + (let* ((stepname (car ezstep)) ;; do stuff to run the step + (stepinfo (cadr ezstep)) + ;; (let ((info (cadr ezstep))) + ;; (if (proc? info) "" info))) + ;; (stepproc (let ((info (cadr ezstep))) + ;; (if (proc? info) info #f))) + (stepparts (string-match (regexp "^(\\{([^\\}\\{]*)\\}\\s*|)(.*)$") stepinfo)) + (stepparams (if (and (list? stepparts) + (> (length stepparts) 1)) + (list-ref stepparts 2) + #f)) ;; for future use, {VAR=1,2,3}, run step for each + (paramparts (if (string? stepparams) + (map (lambda (x)(string-split x "=")) (string-split-fields "[^;]*=[^;]*" stepparams)) + '())) + (subrun (alist-ref "subrun" paramparts equal?)) + (stepcmd (if (and (list? stepparts) + (> (length stepparts) 2)) + (list-ref stepparts 3) + (conc "# error, no command for step "stepname))) + (script "") ; "#!/bin/bash\n") ;; yep, we depend on bin/bash FIXME!!!\ + (logpro-file (conc stepname ".logpro")) + (html-file (conc stepname ".html")) + (dat-file (conc stepname ".dat")) + (tconfig-logpro (configf:lookup testconfig "logpro" stepname)) + (logpro-used (common:file-exists? logpro-file)) + (mtexepath (common:get-megatest-exe-path))) + (setenv "MT_STEP_NAME" stepname) + (hash-table-set! all-steps-dat stepname `((params . ,paramparts))) + (debug:print 0 *default-log-port* "stepparts: " stepparts ", stepparams: " stepparams + ", paramparts: " paramparts ", subrun: " subrun ", stepcmd: " stepcmd) + + (if (and tconfig-logpro + (not logpro-used)) ;; no logpro file found but have a defn in the testconfig + (begin + (with-output-to-file logpro-file + (lambda () + (print ";; logpro file extracted from testconfig\n" + ";;") + (print tconfig-logpro))) + (set! logpro-used #t))) + + ;; NB// can safely assume we are in test-area directory + (debug:print 4 *default-log-port* "ezsteps:\n stepname: " stepname " stepinfo: " stepinfo " stepparts: " stepparts + " stepparams: " stepparams " stepcmd: " stepcmd) + + ;; ;; first source the previous environment + ;; (let ((prev-env (conc ".ezsteps/" prevstep (if (string-search (regexp "csh") + ;; (get-environment-variable "SHELL")) ".csh" ".sh")))) + ;; (if (and prevstep (common:file-exists? prev-env)) + ;; (set! script (conc script "source " prev-env)))) + + ;; call the command using mt_ezstep + ;; (set! script (conc "mt_ezstep " stepname " " (if prevstep prevstep "x") " " stepcmd)) + + (debug:print 4 *default-log-port* "script: " script) + (rmt:teststep-set-status! run-id test-id stepname "start" "-" #f #f) + ;; now launch the actual process + (call-with-environment-variables + (list (cons "PATH" mtexepath)) + (lambda () ;; (process-run "/bin/bash" "-c" "exec ls -l /tmp/foobar > /tmp/delme-more.log 2>&1") + (let* ((cmd (conc stepcmd " > " stepname ".log 2>&1")) ;; >outfile 2>&1 + (pid #f)) + (let ((proc (lambda () + (set! pid (process-run "/bin/bash" (list "-c" cmd)))))) + (if subrun + (begin + (debug:print-info 0 *default-log-port* "Running "cmd" without MT_.* environment variables.") + (common:without-vars proc "^MT_.*")) + (proc))) + + (with-output-to-file "Makefile.ezsteps" + (lambda () + (print stepname ".log :") + (print "\t" cmd) + (if (common:file-exists? (conc stepname ".logpro")) + (print "\tlogpro " stepname ".logpro " stepname ".html < " stepname ".log")) + (print) + (print stepname " : " stepname ".log") + (print)) + #:append) + + (rmt:test-set-top-process-pid run-id test-id pid) + (let processloop ((i 0)) + (let-values (((pid-val exit-status exit-code)(process-wait pid #t))) + (mutex-lock! m) + (launch:einf-pid-set! exit-info pid) ;; (vector-set! exit-info 0 pid) + (launch:einf-exit-status-set! exit-info exit-status) ;; (vector-set! exit-info 1 exit-status) + (launch:einf-exit-code-set! exit-info exit-code) ;; (vector-set! exit-info 2 exit-code) + (mutex-unlock! m) + (if (eq? pid-val 0) + (begin + (thread-sleep! 2) + (processloop (+ i 1)))) + ))))) + (debug:print-info 0 *default-log-port* "step " stepname " completed with exit code " (launch:einf-exit-code exit-info)) ;; (vector-ref exit-info 2)) + ;; now run logpro if needed + (if logpro-used + (let* ((logpro-exe (or (getenv "LOGPRO_EXE") "logpro")) + (pid (process-run (conc "/bin/sh -c '"logpro-exe" "logpro-file " " (conc stepname ".html") " < " stepname ".log > /dev/null'")))) + (let processloop ((i 0)) + (let-values (((pid-val exit-status exit-code)(process-wait pid #t))) + (mutex-lock! m) + ;; (make-launch:einf pid: pid exit-status: exit-status exit-code: exit-code) + (launch:einf-pid-set! exit-info pid) ;; (vector-set! exit-info 0 pid) + (launch:einf-exit-status-set! exit-info exit-status) ;; (vector-set! exit-info 1 exit-status) + (launch:einf-exit-code-set! exit-info exit-code) ;; (vector-set! exit-info 2 exit-code) + (mutex-unlock! m) + (if (eq? pid-val 0) + (begin + (thread-sleep! 2) + (processloop (+ i 1))))) + (debug:print-info 0 *default-log-port* "logpro for step " stepname " exited with code " (launch:einf-exit-code exit-info))))) ;; (vector-ref exit-info 2))))) + + (let ((exinfo (launch:einf-exit-code exit-info)) ;; (vector-ref exit-info 2)) + (logfna (if logpro-used (conc stepname ".html") "")) + (comment #f)) + (if logpro-used + (let ((datfile (conc stepname ".dat"))) + ;; load the .dat file into the test_data table if it exists + (if (common:file-exists? datfile) + (set! comment (launch:load-logpro-dat run-id test-id stepname))) + (rmt:test-set-log! run-id test-id (conc stepname ".html")))) + (rmt:teststep-set-status! run-id test-id stepname "end" exinfo comment logfna)) + ;; set the test final status + (let* ((process-exit-status (launch:einf-exit-code exit-info)) ;; (vector-ref exit-info 2)) + (this-step-status (cond + ((and (eq? process-exit-status 2) logpro-used) 'warn) ;; logpro 2 = warnings + ((and (eq? process-exit-status 3) logpro-used) 'check) ;; logpro 3 = check + ((and (eq? process-exit-status 4) logpro-used) 'waived) ;; logpro 4 = waived + ((and (eq? process-exit-status 5) logpro-used) 'abort) ;; logpro 5 = abort + ((and (eq? process-exit-status 6) logpro-used) 'skip) ;; logpro 6 = skip + ((eq? process-exit-status 0) 'pass) ;; logpro 0 = pass + (else 'fail))) + (overall-status + (cond + ((eq? (launch:einf-rollup-status exit-info) 2) 'warn) ;; rollup-status (vector-ref exit-info 3) + ((eq? (launch:einf-rollup-status exit-info) 3) 'check) + ((eq? (launch:einf-rollup-status exit-info) 4) 'waived) + ((eq? (launch:einf-rollup-status exit-info) 5) 'abort) + ((eq? (launch:einf-rollup-status exit-info) 6) 'skip) + ((eq? (launch:einf-rollup-status exit-info) 0) 'pass) + (else 'fail))) + (next-status (common:worse-status-sym this-step-status overall-status)) + + (next-state ;; "RUNNING") ;; WHY WAS THIS CHANGED TO NOT USE (null? tal) ?? + (cond + ((null? tal) ;; more to run? + "COMPLETED") + (else "RUNNING")))) + (debug:print 4 *default-log-port* "Exit value received: " (launch:einf-exit-code exit-info) " logpro-used: " logpro-used + " this-step-status: " this-step-status " overall-status: " overall-status + " next-status: " next-status " rollup-status: " (launch:einf-rollup-status exit-info)) ;; (vector-ref exit-info 3)) + (case next-status + ((warn) + (launch:einf-rollup-status-set! exit-info 2) ;; (vector-set! exit-info 3 2) ;; rollup-status + ;; NB// test-set-status! does rdb calls under the hood + (tests:test-set-status! run-id test-id next-state "WARN" + (if (eq? this-step-status 'warn) "Logpro warning found" #f) + #f)) + ((check) + (launch:einf-rollup-status-set! exit-info 3) ;; (vector-set! exit-info 3 3) ;; rollup-status + ;; NB// test-set-status! does rdb calls under the hood + (tests:test-set-status! run-id test-id next-state "CHECK" + (if (eq? this-step-status 'check) "Logpro check found" #f) + #f)) + ((waived) + (launch:einf-rollup-status-set! exit-info 4) ;; (vector-set! exit-info 3 3) ;; rollup-status + ;; NB// test-set-status! does rdb calls under the hood + (tests:test-set-status! run-id test-id next-state "WAIVED" + (if (eq? this-step-status 'check) "Logpro waived found" #f) + #f)) + ((abort) + (launch:einf-rollup-status-set! exit-info 5) ;; (vector-set! exit-info 3 4) ;; rollup-status + ;; NB// test-set-status! does rdb calls under the hood + (tests:test-set-status! run-id test-id next-state "ABORT" + (if (eq? this-step-status 'abort) "Logpro abort found" #f) + #f)) + ((skip) + (launch:einf-rollup-status-set! exit-info 6) ;; (vector-set! exit-info 3 4) ;; rollup-status + ;; NB// test-set-status! does rdb calls under the hood + (tests:test-set-status! run-id test-id next-state "SKIP" + (if (eq? this-step-status 'skip) "Logpro skip found" #f) + #f)) + ((pass) + (tests:test-set-status! run-id test-id next-state "PASS" #f #f)) + (else ;; 'fail + (launch:einf-rollup-status-set! exit-info 1) ;; (vector-set! exit-info 3 1) ;; force fail, this used to be next-state but that doesn't make sense. should always be "COMPLETED" + (tests:test-set-status! run-id test-id "COMPLETED" "FAIL" (conc "Failed at step " stepname) #f) + ))) + logpro-used)) + +) Index: megatest.scm ================================================================== --- megatest.scm +++ megatest.scm @@ -27,17 +27,56 @@ ;; (declare (uses margs)) (declare (uses mtargs)) (declare (uses mtargs.import)) (declare (uses debugprint)) (declare (uses debugprint.import)) +(declare (uses cookie)) +(declare (uses cookie.import)) +(declare (uses stml2)) +(declare (uses stml2.import)) (declare (uses commonmod)) (declare (uses commonmod.import)) (declare (uses processmod)) (declare (uses processmod.import)) (declare (uses configfmod)) (declare (uses configfmod.import)) +(declare (uses pgdb)) +(declare (uses pgdb.import)) +(declare (uses mtmod)) +(declare (uses mtmod.import)) +(declare (uses servermod)) +(declare (uses servermod.import)) +(declare (uses dbfile)) +(declare (uses dbfile.import)) +(declare (uses dbmod)) +(declare (uses dbmod.import)) +(declare (uses portlogger)) +(declare (uses portlogger.import)) +(declare (uses tcp-transportmod)) +(declare (uses tcp-transportmod.import)) +(declare (uses fsmod)) +(declare (uses fsmod.import)) +(declare (uses megatestmod)) +(declare (uses megatestmod.import)) +(declare (uses apimod)) +(declare (uses apimod.import)) +(declare (uses rmtmod)) +(declare (uses rmtmod.import)) +(declare (uses fsmod)) +(declare (uses fsmod.import)) +(declare (uses cpumod)) +(declare (uses cpumod.import)) +(declare (uses tasksmod)) +(declare (uses tasksmod.import)) +(declare (uses tdb)) +(declare (uses mt)) +(declare (uses api)) +(declare (uses tasks)) ;; only used for debugging. +(declare (uses env)) +(declare (uses diff-report)) +(declare (uses db)) (declare (uses runs)) (declare (uses launch)) (declare (uses server)) (declare (uses tests)) (declare (uses genexample)) @@ -44,30 +83,10 @@ ;; (declare (uses daemon)) (declare (uses db)) ;; (declare (uses dcommon)) -(declare (uses tdb)) -(declare (uses mt)) -(declare (uses api)) -(declare (uses tasks)) ;; only used for debugging. -(declare (uses env)) -(declare (uses diff-report)) -(declare (uses db)) -(declare (uses dbfile)) -(declare (uses dbfile.import)) -(declare (uses dbmod)) -(declare (uses dbmod.import)) -(declare (uses portlogger)) -(declare (uses portlogger.import)) -(declare (uses tcp-transportmod)) -(declare (uses tcp-transportmod.import)) -(declare (uses apimod)) -(declare (uses apimod.import)) -(declare (uses rmtmod)) -(declare (uses rmtmod.import)) - ;; (declare (uses debugprint)) ;; (declare (uses debugprint.import)) ;; (declare (uses ftail)) ;; (import ftail) @@ -81,10 +100,15 @@ dbfile portlogger tcp-transportmod rmtmod apimod + stml2 + mtmod + megatestmod + servermod + tasksmod ) (define *db* #f) ;; this is only for the repl, do not use in general!!!! (include "common_records.scm") @@ -100,10 +124,13 @@ ;; Added for csv stuff - will be removed ;; (use sparse-vectors) (require-library mutils) + +;; remove when configf fully modularized +(read-config-set! configf:read-file) (define *usage-log-file* #f) ;; put path to file for logging usage in this var in the ~/.megatestrc file (define *usage-use-seconds* #t) ;; for Epoc seconds in usage logging change this to #t in ~/.megatestrc file ;; set some parameters here - these need to be put in something that can be loaded from other @@ -1136,53 +1163,10 @@ (json-write targets)) (else (debug:print-error 0 *default-log-port* "dump output format " (args:get-arg "-dumpmode") " not supported for -list-targets"))) (set! *didsomething* #t)))) -;; cache the runconfigs in $MT_LINKTREE/$MT_TARGET/$MT_RUNNAME/.runconfig -;; -(define (full-runconfigs-read) -;; in the envprocessing branch the below code replaces the further below code -;; (if (eq? *configstatus* 'fulldata) -;; *runconfigdat* -;; (begin -;; (launch:setup) -;; *runconfigdat*))) - - (let* ((rundir (if (and (getenv "MT_LINKTREE")(getenv "MT_TARGET")(getenv "MT_RUNNAME")) - (conc (getenv "MT_LINKTREE") "/" (getenv "MT_TARGET") "/" (getenv "MT_RUNNAME")) - #f)) - (cfgf (if rundir (conc rundir "/.runconfig." megatest-version "-" megatest-fossil-hash) #f))) - (if (and cfgf - (common:file-exists? cfgf) - (file-write-access? cfgf) - (common:use-cache?)) - (configf:read-alist cfgf) - (let* ((keys (rmt:get-keys)) - (target (common:args-get-target)) - (key-vals (if target (keys:target->keyval keys target) #f)) - (sections (if target (list "default" target) #f)) - (data (begin - (setenv "MT_RUN_AREA_HOME" *toppath*) - (if key-vals - (for-each (lambda (kt) - (setenv (car kt) (cadr kt))) - key-vals)) - ;; (read-config (conc *toppath* "/runconfigs.config") #f #t sections: sections)))) - (runconfig:read (conc *toppath* "/runconfigs.config") target #f)))) - (if (and rundir ;; have all needed variabless - (directory-exists? rundir) - (file-write-access? rundir)) - (begin - (if (not (common:in-running-test?)) - (configf:write-alist data cfgf)) - ;; force re-read of megatest.config - this resolves circular references between megatest.config - (launch:setup force-reread: #t) - ;; (launch:cache-config) ;; there are two independent config cache locations, turning this one off for now. MRW. - )) ;; we can safely cache megatest.config since we have a valid runconfig - data)))) - (if (args:get-arg "-show-runconfig") (let ((tl (launch:setup))) (push-directory *toppath*) (let ((data (full-runconfigs-read))) ;; keep this one local ADDED megatestmod.scm Index: megatestmod.scm ================================================================== --- /dev/null +++ megatestmod.scm @@ -0,0 +1,805 @@ +;;====================================================================== +;; Copyright 2017, Matthew Welland. +;; +;; This file is part of Megatest. +;; +;; Megatest is free software: you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. +;; +;; Megatest is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with Megatest. If not, see . + +;;====================================================================== + +;;====================================================================== +;; Megatestmod: +;; +;; Put things here don't fit anywhere else +;;====================================================================== + +(declare (unit megatestmod)) +(declare (uses debugprint)) +(declare (uses mtargs)) +(declare (uses dbmod)) +(declare (uses dbfile)) +(declare (uses commonmod)) +(declare (uses configfmod)) +(declare (uses processmod)) +(declare (uses mtmod)) +(declare (uses pkts)) +(declare (uses servermod)) +(declare (uses fsmod)) + +(use srfi-69) + +(module megatestmod + * + +(import scheme) +(cond-expand + (chicken-4 + + (import chicken + ports + (prefix base64 base64:) + + (prefix sqlite3 sqlite3:) + data-structures + extras + files + pathname-expand + posix + posix-extras + (prefix dbi dbi:) + + directory-utils + ) + (use srfi-69)) + (chicken-5 + (import (prefix sqlite3 sqlite3:) + ;; data-structures + ;; extras + ;; files + ;; posix + ;; posix-extras + chicken.base + chicken.condition + chicken.file + chicken.file.posix + chicken.io + chicken.pathname + chicken.port + chicken.process + chicken.process-context + chicken.process-context.posix + chicken.sort + chicken.string + chicken.time + chicken.time.posix + + matchable + md5 + message-digest + pathname-expand + system-information + + ))) + +(import call-with-environment-variables + matchable + md5 + message-digest + regex + regex-case + sparse-vectors + srfi-1 + srfi-13 + srfi-18 + srfi-69 + typed-records + z3 + + (prefix mtargs args:) + commonmod + configfmod + dbfile + dbmod + debugprint + mtmod + pkts + processmod + servermod + fsmod + ) + +;;====================================================================== +;; '(print (string-intersperse (map cadr (hash-table-ref/default (read-config "megatest.config" \#f \#t) "disks" '"'"'("none" ""))) "\n"))' +(define (common:get-disks #!key (configf #f)) + (hash-table-ref/default + (or configf (read-config "megatest.config" #f #t)) + "disks" '("none" ""))) + +(define (common:get-install-area) + (let ((exe-path (car (argv)))) + (if (common:file-exists? exe-path) + (handle-exceptions + exn + #f + (pathname-directory + (pathname-directory + (pathname-directory exe-path)))) + #f))) + + +;;====================================================================== +;; T A R G E T S , S T A T E , S T A T U S , +;; R U N N A M E A N D T E S T P A T T +;;====================================================================== + +;;====================================================================== +;; (map print (map car (hash-table->alist (read-config "runconfigs.config" #f #t)))) +;; +(define (common:get-runconfig-targets #!key (configf #f)) + (let ((targs (sort (map car (hash-table->alist + (or configf ;; NOTE: There is no value in using runconfig:read here. + (read-config (conc *toppath* "/runconfigs.config") + #f #t) + (make-hash-table)))) + stringsymbol force-setting) #f)) + (force-result (case force-type + ((#f) #f) + ((always) #t) + ((test) (if (args:get-arg "-execute") ;; we are in a test + #t + #f)) + (else + (debug:print 0 *default-log-port* "ERROR: Bad server force setting " force-setting ", forcing server.") + #t)))) ;; default to requiring server + (if force-result + (begin + (debug:print-info 0 *default-log-port* "ATTENTION! Forcing use of server, force setting is \"" force-setting "\".") + #t) + #f))) + +;; -mrw- this appears to not be used +;; +;; (define (common:print-delay-table) +;; (let loop ((x 0)) +;; (print x "," (common:get-delay x 1)) +;; (if (< x 2) +;; (loop (+ x 0.1))))) + +;; (define (get-cpu-load #!key (remote-host #f)) +;; (car (common:get-cpu-load remote-host))) + +;;====================================================================== +;; (let* ((load-res (process:cmd-run->list "uptime")) +;; (load-rx (regexp "load average:\\s+(\\d+)")) +;; (cpu-load #f)) +;; (for-each (lambda (l) +;; (let ((match (string-search load-rx l))) +;; (if match +;; (let ((newval (string->number (cadr match)))) +;; (if (number? newval) +;; (set! cpu-load newval)))))) +;; (car load-res)) +;; cpu-load)) + +(define (common:get-pkts-dirs mtconf use-lt) + (let* ((pktsdirs-str (or (configf:lookup mtconf "setup" "pktsdirs") + (and use-lt + (conc (or *toppath* + (current-directory)) + "/lt/.pkts")))) + (pktsdirs (if pktsdirs-str + (string-split pktsdirs-str " ") + #f))) + pktsdirs)) + +(define (common:with-queue-db mtconf proc #!key (use-lt #f)(toppath-in #f)) + (let* ((pktsdirs (common:get-pkts-dirs mtconf use-lt)) + (pktsdir (if pktsdirs (car pktsdirs) #f)) + (toppath (or (configf:lookup mtconf "scratchdat" "toppath") + toppath-in)) + (pdbpath (or (configf:lookup mtconf "setup" "pdbpath") pktsdir))) + (cond + ((not (and pktsdir toppath pdbpath)) + (debug:print 0 *default-log-port* "ERROR: settings are missing in your megatest.config for area management.") + (debug:print 0 *default-log-port* " you need to have pktsdirs in the [setup] section.")) + ((not (common:file-exists? pktsdir)) + (debug:print 0 *default-log-port* "ERROR: pkts directory not found " pktsdir)) + ((not (equal? (file-owner pktsdir)(current-effective-user-id))) + (debug:print 0 *default-log-port* "ERROR: directory " pktsdir " is not owned by " (current-effective-user-name))) + (else + (let* ((pdb (open-queue-db pdbpath "pkts.db" + schema: '("CREATE TABLE groups (id INTEGER PRIMARY KEY,groupname TEXT, CONSTRAINT group_constraint UNIQUE (groupname));")))) + (proc pktsdirs pktsdir pdb) + (dbi:close pdb)))))) + +(define (common:load-pkts-to-db mtconf #!key (use-lt #f)) + (common:with-queue-db + mtconf + (lambda (pktsdirs pktsdir pdb) + (for-each + (lambda (pktsdir) ;; look at all + (cond + ((not (common:file-exists? pktsdir)) + (debug:print 0 *default-log-port* "ERROR: packets directory " pktsdir " does not exist.")) + ((not (directory? pktsdir)) + (debug:print 0 *default-log-port* "ERROR: packets directory path " pktsdir " is not a directory.")) + ((not (file-read-access? pktsdir)) + (debug:print 0 *default-log-port* "ERROR: packets directory path " pktsdir " is not readable.")) + (else + (debug:print-info 0 *default-log-port* "Loading packets found in " pktsdir) + (let ((pkts (glob (conc pktsdir "/*.pkt"))) + (sqdb (dbi:db-conn pdb)) + ) + ;; Put this in a transaction to avoid issues overloading the db + (sqlite3:with-transaction + sqdb + (lambda () + (for-each + (lambda (pkt) + (let* ((uuid (cadr (string-match ".*/([0-9a-f]+).pkt" pkt))) + (exists (lookup-by-uuid pdb uuid #f))) + (if (not exists) + (let* ((pktdat (string-intersperse + (with-input-from-file pkt read-lines) + "\n")) + (apkt (pkt->alist pktdat)) + (ptype (alist-ref 'T apkt))) + (add-to-queue pdb pktdat uuid (or ptype 'cmd) #f 0) + (debug:print 4 *default-log-port* "Added " uuid " of type " ptype " to queue")) + (debug:print 4 *default-log-port* "pkt: " uuid " exists, skipping...") + ))) + pkts))))))) + pktsdirs)) + use-lt: use-lt)) + + +;;====================================================================== +;; use-lt is use linktree "lt" link to find pkts dir +(define (common:save-pkt pktalist-in mtconf use-lt #!key (add-only #f)) ;; add-only saves the pkt only if there is a parent already + (if (or (not add-only) + (hash-table-exists? *pkts-info* 'last-parent)) + (let* ((parent (hash-table-ref/default *pkts-info* 'last-parent #f)) + (pktalist (if parent + (cons `(parent . ,parent) + pktalist-in) + pktalist-in))) + (let-values (((uuid pkt) + (alist->pkt pktalist common:pkts-spec))) + (hash-table-set! *pkts-info* 'last-parent uuid) + (let ((pktsdir (or (hash-table-ref/default *pkts-info* 'pkts-dir #f) + (let* ((pktsdirs (common:get-pkts-dirs mtconf use-lt)) + (pktsdir (car pktsdirs))) ;; assume it is there + (hash-table-set! *pkts-info* 'pkts-dir pktsdir) + pktsdir)))) + (debug:print 0 *default-log-port* "pktsdir: "pktsdir) + (handle-exceptions + exn + (debug:print-info 0 "failed to write out packet to " pktsdir ", exn=" exn) ;; don't care if this failed for now but MUST FIX - BUG!! + (if (not (file-exists? pktsdir)) + (create-directory pktsdir #t)) + (with-output-to-file + (conc pktsdir "/" uuid ".pkt") + (lambda () + (print pkt))))))))) + +;;====================================================================== +;; Lookup a value in runconfigs based on -reqtarg or -target +;; +(define (runconfigs-get config var) + (let ((targ (common:args-get-target))) ;; (or (args:get-arg "-reqtarg")(args:get-arg "-target")(getenv "MT_TARGET")))) + (if targ + (or (configf:lookup config targ var) + (configf:lookup config "default" var)) + (configf:lookup config "default" var)))) + +;;====================================================================== +;; R U N S +;;====================================================================== + +;; set tests with state currstate and status currstatus to newstate and newstatus +;; use currstate = #f and or currstatus = #f to apply to any state or status respectively +;; WARNING: SQL injection risk. NB// See new but not yet used "faster" version below +;; +;; AND NOT (item_path='' AND testname in (SELECT DISTINCT testname FROM tests WHERE testname=? AND item_path != ''));"))) +;; (debug:print 0 *default-log-port* "QRY: " qry) +;; (db:delay-if-busy) +;; +;; NB// This call only operates on toplevel tests. Consider replacing it with more general call +;; +(define (db:set-tests-state-status dbstruct run-id testnames currstate currstatus newstate newstatus) + (let ((test-ids '())) + (for-each + (lambda (testname) + (let ((qry (conc "UPDATE tests SET state=?,status=? WHERE " + (if currstate (conc "state='" currstate "' AND ") "") + (if currstatus (conc "status='" currstatus "' AND ") "") + " run_id=? AND testname LIKE ?;")) + (test-id (db:get-test-id dbstruct run-id testname ""))) + (db:with-db + dbstruct + run-id + #t + (lambda (dbdat db) + (sqlite3:execute db qry + (or newstate currstate "NOT_STARTED") + (or newstatus currstate "UNKNOWN") + run-id testname))) + (if test-id + (begin + (set! test-ids (cons test-id test-ids)) + (mt:process-triggers dbstruct run-id test-id newstate newstatus))))) + testnames) + test-ids)) + +;; ; This is to be the big daddy call NOPE: Replaced by db:set-state-status-and-roll-up-items +;; ; +;; define (db:test-set-state-status dbstruct run-id test-id state status msg) +;; (let ((dbdat (db:get-subdb dbstruct run-id))) +;; (if (member state '("LAUNCHED" "REMOTEHOSTSTART")) +;; (db:general-call dbdat 'set-test-start-time (list test-id))) +;; ;; (if msg +;; ;; (db:general-call dbdat 'state-status-msg (list state status msg test-id)) +;; ;; (db:general-call dbdat 'state-status (list state status test-id))) +;; (db:set-state-status-and-roll-up-items dbstruct run-id test-id #f state status msg) +;; ;; process the test_data table +;; (if (and test-id state status (equal? status "AUTO")) +;; (db:test-data-rollup dbstruct run-id test-id status)) +;; (mt:process-triggers dbstruct run-id test-id state status))) + +;; state is the priority rollup of all states +;; status is the priority rollup of all completed statesfu +;; +;; if test-name is an integer work off that as test-id instead of test-name test-path +;; +(define (db:set-state-status-and-roll-up-items dbstruct run-id test-name item-path state status comment) + ;; establish info on incoming test followed by info on top level test + ;; BBnote - for mode itemwait, linkage between upstream test & matching item status is propagated to run queue in db:prereqs-not-met + (let* ((testdat (if (number? test-name) + (db:get-test-info-by-id dbstruct run-id test-name) ;; test-name is actually a test-id + (db:keep-trying-until-true ;; in our threaded stuff this call could happen before the test is registered (maybe?) + db:get-test-info + (list dbstruct run-id test-name item-path) + 10))) + (test-id (db:test-get-id testdat)) + (test-name (if (number? test-name) + (db:test-get-testname testdat) + test-name)) + (item-path (db:test-get-item-path testdat)) + (tl-testdat (db:get-test-info dbstruct run-id test-name "")) + (tl-test-id (if tl-testdat + (db:test-get-id tl-testdat) + #f)) + (new-state-eh #f) + (new-status-eh #f)) + (if (member state '("LAUNCHED" "REMOTEHOSTSTART")) + (db:general-call dbstruct run-id 'set-test-start-time (list test-id))) + (mutex-lock! *db-transaction-mutex*) + (db:with-db + dbstruct run-id #t + (lambda (dbdat db) + (let ((tr-res + (sqlite3:with-transaction + db + (lambda () + ;; NB// Pass the db so it is part fo the transaction + (db:test-set-state-status-db db run-id test-id state status comment) ;; this call sets the item state/status + (if (not (equal? item-path "")) ;; only roll up IF incoming test is an item + (let* ((state-status-counts (db:get-all-state-status-counts-for-test db run-id test-name item-path state status)) ;; item-path is used to exclude current state/status of THIS test + (state-statuses (db:roll-up-rules state-status-counts state status)) + (newstate (car state-statuses)) + (newstatus (cadr state-statuses))) + (set! new-state-eh newstate) + (set! new-status-eh newstatus) + (debug:print 4 *default-log-port* "BB> tl-test-id="tl-test-id" ; "test-name":"item-path" newstate="newstate" newstatus="newstatus" len(sscs)="(length state-status-counts) " state-status-counts: " + (apply conc + (map (lambda (x) + (conc + (with-output-to-string (lambda () (pp (dbr:counts->alist x)))) " | ")) + state-status-counts))); end debug:print + (if tl-test-id + (db:test-set-state-status-db db run-id tl-test-id newstate newstatus #f)) ;; we are still in the transaction - must access the db and not the dbstruct + )))))) + (mutex-unlock! *db-transaction-mutex*) + (if (and test-id state status (equal? status "AUTO")) + (db:test-data-rollup dbstruct run-id test-id status)) + (if new-state-eh ;; moved from db:test-set-state-status + (mt:process-triggers dbstruct run-id test-id new-state-eh new-status-eh)) + tr-res))))) + +(define (mt:lazy-read-test-config test-name) + (let ((tconf (hash-table-ref/default *testconfigs* test-name #f))) + (if tconf + tconf + (let ((test-dirs (tests:get-tests-search-path *configdat*))) + (let loop ((hed (car test-dirs)) + (tal (cdr test-dirs))) + ;; Setting MT_LINKTREE here is almost certainly unnecessary. + (let ((tconfig-file (conc hed "/" test-name "/testconfig"))) + (if (and (common:file-exists? tconfig-file) + (file-read-access? tconfig-file)) + (let ((link-tree-path (common:get-linktree)) ;; (configf:lookup *configdat* "setup" "linktree")) + (old-link-tree (get-environment-variable "MT_LINKTREE"))) + (if link-tree-path (setenv "MT_LINKTREE" link-tree-path)) + (let ((newtcfg (read-config tconfig-file #f #f))) ;; NOTE: Does NOT run [system ...] + (hash-table-set! *testconfigs* test-name newtcfg) + (if old-link-tree + (setenv "MT_LINKTREE" old-link-tree) + (unsetenv "MT_LINKTREE")) + newtcfg)) + (if (null? tal) + (begin + (debug:print-error 0 *default-log-port* "No readable testconfig found for " test-name) + #f) + (loop (car tal)(cdr tal)))))))))) + +(define (mt:process-triggers dbstruct run-id test-id newstate newstatus) + (if test-id + (let* ((test-dat (db:get-test-info-by-id dbstruct run-id test-id))) + (if test-dat + (let* ((test-rundir (db:test-get-rundir test-dat)) ;; ) ;; ) + (test-name (db:test-get-testname test-dat)) + (item-path (db:test-get-item-path test-dat)) + (duration (db:test-get-run_duration test-dat)) + (comment (db:test-get-comment test-dat)) + (event-time (db:test-get-event_time test-dat)) + (tconfig #f) + (state (if newstate newstate (db:test-get-state test-dat))) + (status (if newstatus newstatus (db:test-get-status test-dat))) + (target (getenv "MT_TARGET")) + (runname (getenv "MT_RUNNAME"))) + ;; (mutex-lock! *triggers-mutex*) + ;;;;;; (handle-exceptions + ;;;;;; exn + ;;;;;; (begin + ;;;;;; (debug:print-error 0 *default-log-port* " Exception in mt:process-triggers for run-id="run-id" test-id="test-id" newstate="newstate" newstatus="newstatus + ;;;;;; "\n error: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn + ;;;;;; "\n test-rundir="test-rundir + ;;;;;; "\n test-name="test-name + ;;;;;; "\n item-path="item-path + ;;;;;; "\n state="state + ;;;;;; "\n status="status + ;;;;;; "\n") + ;;;;;; (print-call-chain (current-error-port)) + ;;;;;; (with-output-to-port *default-log-port* + ;;;;;; (lambda () + ;;;;;; (print (condition->list exn)))) + ;;;;;; #f) + (if (and test-name + test-rundir) ;; #f means no dir set yet + ;; (common:file-exists? test-rundir) + ;; (directory? test-rundir)) + (call-with-environment-variables + (list (cons "MT_TEST_NAME" (or test-name "no such test")) + (cons "MT_TEST_RUN_DIR" (or test-rundir "no test directory yet")) + (cons "MT_ITEMPATH" (or item-path ""))) + (lambda () + (if (directory-exists? test-rundir) + (push-directory test-rundir) + (push-directory *toppath*)) + (set! tconfig (mt:lazy-read-test-config test-name)) + (for-each (lambda (trigger) + (let* ((munged-trigger (string-translate trigger "/ " "--")) + (logname (conc "last-trigger-" munged-trigger ".log"))) + ;; first any triggers from the testconfig + (let ((cmd (configf:lookup tconfig "triggers" trigger))) + (if cmd (mt:run-trigger cmd test-id test-rundir trigger (conc "tconfig-" logname) test-name item-path event-time state status target runname))) + ;; next any triggers from megatest.config + (let ((cmd (configf:lookup *configdat* "triggers" trigger))) + (if cmd (mt:run-trigger cmd test-id test-rundir trigger (conc "mtconfig-" logname) test-name item-path event-time state status target runname))))) + (list + (conc state "/" status) + (conc state "/") + (conc "/" status))) + (pop-directory)) + )) ;; ) + ;; (mutex-unlock! *triggers-mutex*) + ))))) + + +;; Call this one to do all the work and get a standardized list of tests +;; gets paths from configs and finds valid tests +;; returns hash of testname --> fullpath +;; +(define (tests:get-all) + (let* ((test-search-path (tests:get-tests-search-path *configdat*))) + (debug:print 8 *default-log-port* "test-search-path: " test-search-path) + (tests:get-valid-tests (make-hash-table) test-search-path))) + +(define (tests:get-tests-search-path cfgdat) + (let ((paths (let ((section (if cfgdat + (configf:get-section cfgdat "tests-paths") + #f))) + (if section + (map cadr section) + '())))) + (filter (lambda (d) + (if (directory-exists? d) + d + (begin + ;; (if (common:low-noise-print 60 "tests:get-tests-search-path" d) + ;; (debug:print 0 *default-log-port* "WARNING: problem with directory " d ", dropping it from tests path")) + #f))) + (append paths (list (conc *toppath* "/tests")))))) + +(define (tests:get-valid-tests test-registry tests-paths) + (if (null? tests-paths) + test-registry + (let loop ((hed (car tests-paths)) + (tal (cdr tests-paths))) + (if (common:file-exists? hed) + (for-each (lambda (test-path) + (let* ((tname (last (string-split test-path "/"))) + (tconfig (conc test-path "/testconfig"))) + (if (and (not (hash-table-ref/default test-registry tname #f)) + (common:file-exists? tconfig)) + (hash-table-set! test-registry tname test-path)))) + (glob (conc hed "/*")))) + (if (null? tal) + test-registry + (loop (car tal)(cdr tal)))))) + +(define (tests:filter-test-names-not-matched test-names test-patts) + (delete-duplicates + (filter (lambda (testname) + (not (tests:match test-patts testname #f))) + test-names))) + + +(define (tests:filter-test-names test-names test-patts) + (delete-duplicates + (filter (lambda (testname) + (tests:match test-patts testname #f)) + test-names))) + +;; itemmap is a list of testname patterns to maps +;; test1 .*/bar/(\d+) foo/\1 +;; % foo/([^/]+) \1/bar +;; +;; # NOTE: the line with the single % could be the result of +;; # itemmap entry in requirements (legacy). The itemmap +;; # requirements entry is deprecated +;; +(define (tests:get-itemmaps tconfig) + (let ((base-itemmap (configf:lookup tconfig "requirements" "itemmap")) + (itemmap-table (configf:get-section tconfig "itemmap"))) + (append (if base-itemmap + (list (list "%" base-itemmap)) + '()) + (if itemmap-table + itemmap-table + '())))) + + + +(define (tests:get-global-waitons rconfig) + (let* ((global-waitons (runconfigs-get rconfig "!GLOBAL_WAITONS"))) + (if (string? global-waitons) + (string-split global-waitons) + '()))) + +;; return items given config +;; +(define (tests:get-items tconfig) + (let ((items (hash-table-ref/default tconfig "items" #f)) ;; items 4 + (itemstable (hash-table-ref/default tconfig "itemstable" #f))) + ;; if either items or items table is a proc return it so test running + ;; process can know to call items:get-items-from-config + ;; if either is a list and none is a proc go ahead and call get-items + ;; otherwise return #f - this is not an iterated test + (cond + ((procedure? items) + (debug:print-info 4 *default-log-port* "items is a procedure, will calc later") + items) ;; calc later + ((procedure? itemstable) + (debug:print-info 4 *default-log-port* "itemstable is a procedure, will calc later") + itemstable) ;; calc later + ((filter (lambda (x) + (let ((val (car x))) + (if (procedure? val) val #f))) + (append (if (list? items) items '()) + (if (list? itemstable) itemstable '()))) + 'have-procedure) + ((or (list? items)(list? itemstable)) ;; calc now + (debug:print-info 4 *default-log-port* "items and itemstable are lists, calc now\n" + " items: " items " itemstable: " itemstable) + (items:get-items-from-config tconfig)) + (else #f)))) ;; not iterated + + +;; given waiting-test that is waiting on waiton-test extend test-patt appropriately +;; +;; genlib/testconfig sim/testconfig +;; genlib/sch sim/sch/cell1 +;; +;; [requirements] [requirements] +;; mode itemwait +;; # trim off the cell to determine what to run for genlib +;; itemmap /.* +;; +;; waiting-test is waiting on waiton-test so we need to create a pattern for waiton-test given waiting-test and itemmap +;; BB> (tests:extend-test-patts "normal-second/2" "normal-second" "normal-first" '()) +;; observed -> "normal-first/2,normal-first/,normal-second/2,normal-second/" +;; expected -> "normal-first,normal-second/2,normal-second/" +;; testpatt = normal-second/2 +;; waiting-test = normal-second +;; waiton-test = normal-first +;; itemmaps = () + +(define (tests:extend-test-patts test-patt waiting-test waiton-test itemmaps itemized-waiton) + (cond + (itemized-waiton + (let* ((itemmap (tests:lookup-itemmap itemmaps waiton-test)) + (patts (string-split test-patt ",")) + (waiting-test-len (+ (string-length waiting-test) 1)) + (patts-waiton (map (lambda (x) ;; for each incoming patt that matches the waiting test + (let* ((modpatt (if itemmap (db:convert-test-itempath x itemmap) x)) + (newpatt (conc waiton-test "/" (substring modpatt waiting-test-len (string-length modpatt))))) + ;; (conc waiting-test "/," waiting-test "/" (substring modpatt waiton-test-len (string-length modpatt))))) + ;; (print "in map, x=" x ", newpatt=" newpatt) + newpatt)) + (filter (lambda (x) + (eq? (substring-index (conc waiting-test "/") x) 0)) ;; is this patt pertinent to the waiting test + patts))) + (extended-test-patt (append patts (if (null? patts-waiton) + (list (conc waiton-test "/%")) ;; really shouldn't add the waiton forcefully like this + patts-waiton))) + (extended-test-patt-with-toplevels + (fold (lambda (testpatt-item accum ) + (let ((my-match (string-match "^([^%\\/]+)\\/.+$" testpatt-item))) + (cons testpatt-item + (if my-match + (cons + (conc (cadr my-match) "/") + accum) + accum)))) + '() + extended-test-patt))) + (string-intersperse (delete-duplicates extended-test-patt-with-toplevels) ","))) + (else ;; not waiting on items, waiting on entire waiton test. + (let* ((patts (string-split test-patt ",")) + (new-patts (if (member waiton-test patts) + patts + (cons waiton-test patts)))) + (string-intersperse (delete-duplicates new-patts) ","))))) + +) Index: mt.scm ================================================================== --- mt.scm +++ mt.scm @@ -33,15 +33,20 @@ (declare (uses tests)) (declare (uses server)) (declare (uses runs)) (declare (uses rmt)) (declare (uses rmtmod)) +(declare (uses megatestmod)) (import debugprint commonmod configfmod - rmtmod) + rmtmod + megatestmod) + +;; make mt: calls in megatestmod work +;; (read-config-set! read-config) (include "common_records.scm") (include "key_records.scm") (include "db_records.scm") (include "run_records.scm") @@ -48,284 +53,5 @@ (include "test_records.scm") ;; This is the Megatest API. All generally "useful" routines will be wrapped or extended ;; here. -;;====================================================================== -;; R U N S -;;====================================================================== - -;; runs:get-runs-by-patt -;; get runs by list of criteria -;; register a test run with the db -;; -;; Use: (db-get-value-by-header (db:get-header runinfo)(db:get-rows runinfo)) -;; to extract info from the structure returned -;; -(define (mt:get-runs-by-patt keys runnamepatt targpatt) - (let loop ((runsdat (rmt:get-runs-by-patt keys runnamepatt targpatt 0 500 #f 0)) - (res '()) - (offset 0) - (limit 500)) - ;; (print "runsdat: " runsdat) - (let* ((header (vector-ref runsdat 0)) - (runslst (vector-ref runsdat 1)) - (full-list (append res runslst)) - (have-more (eq? (length runslst) limit))) - ;; (debug:print 0 *default-log-port* "header: " header " runslst: " runslst " have-more: " have-more) - (if have-more - (let ((new-offset (+ offset limit)) - (next-batch (rmt:get-runs-by-patt keys runnamepatt targpatt offset limit #f 0))) - (debug:print-info 4 *default-log-port* "More than " limit " runs, have " (length full-list) " runs so far.") - (debug:print-info 0 *default-log-port* "next-batch: " next-batch) - (loop next-batch - full-list - new-offset - limit)) - (vector header full-list))))) - -;;====================================================================== -;; T E S T S -;;====================================================================== - -(define (mt:get-tests-for-run run-id testpatt states status #!key (not-in #t) (sort-by 'event_time) (sort-order "ASC") (qryvals #f)(last-update #f)) - (let loop ((testsdat (rmt:get-tests-for-run run-id testpatt states status 0 500 not-in sort-by sort-order qryvals last-update 'normal)) - (res '()) - (offset 0) - (limit 500)) - (let* ((full-list (append res testsdat)) - (have-more (eq? (length testsdat) limit))) - (if have-more - (let ((new-offset (+ offset limit))) - (debug:print-info 4 *default-log-port* "More than " limit " tests, have " (length full-list) " tests so far.") - (loop (rmt:get-tests-for-run run-id testpatt states status new-offset limit not-in sort-by sort-order qryvals last-update 'normal) - full-list - new-offset - limit)) - full-list)))) - -(define (mt:lazy-get-prereqs-not-met run-id waitons ref-item-path #!key (mode '(normal))(itemmaps #f) ) - (let* ((key (list run-id waitons ref-item-path mode)) - (res (hash-table-ref/default *pre-reqs-met-cache* key #f)) - (useres (let ((last-time (if (vector? res) (vector-ref res 0) #f))) - (if last-time - (< (current-seconds)(+ last-time 5)) - #f)))) - (if useres - (let ((result (vector-ref res 1))) - (debug:print 4 *default-log-port* "Using lazy value res: " result) - result) - (let ((newres (rmt:get-prereqs-not-met run-id waitons ref-item-path mode: mode itemmaps: itemmaps))) - (hash-table-set! *pre-reqs-met-cache* key (vector (current-seconds) newres)) - newres)))) - -(define (mt:get-run-stats dbstruct run-id) -;; Get run stats from local access, move this ... but where? - (db:get-run-stats dbstruct run-id)) - -(define (mt:discard-blocked-tests run-id failed-test tests test-records) - (if (null? tests) - tests - (begin - (debug:print-info 1 *default-log-port* "Discarding tests from " tests " that are waiting on " failed-test) - (let loop ((testn (car tests)) - (remt (cdr tests)) - (res '())) - (let* ((test-dat (hash-table-ref/default test-records testn (vector #f #f '()))) - (waitons (vector-ref test-dat 2))) - ;; (print "mt:discard-blocked-tests run-id: " run-id " failed-test: " failed-test " testn: " testn " with waitons: " waitons) - (if (null? remt) - (let ((new-res (reverse res))) - ;; (print " new-res: " new-res) - new-res) - (loop (car remt) - (cdr remt) - (if (member failed-test waitons) - (begin - (debug:print 0 *default-log-port* "Discarding test " testn "(" test-dat ") due to " failed-test) - res) - (cons testn res))))))))) - -;;====================================================================== -;; T R I G G E R S -;;====================================================================== - -(define (mt:run-trigger cmd test-id test-rundir trigger logname test-name item-path event-time actual-state actual-status target runname) - ;; Putting the commandline into ( )'s means no control over the shell. - ;; stdout and stderr will be caught in the NBFAKE or mt_launch.log files - ;; or equivalent. No need to do this. Just run it? - (let* ((new-trigger-format (configf:lookup *configdat* "setup" "new-trigger-format")) - (fullcmd - (if (and new-trigger-format (string=? new-trigger-format "yes")) - (conc "nbfake " - cmd " " - test-id " " - test-rundir " " - trigger " " - actual-state " " - actual-status " " - event-time " " - target " " - runname " " - test-name " " - item-path - ) - (conc "nbfake " - cmd " " - test-id " " - test-rundir " " - trigger " " - test-name " " - item-path " " - actual-state " " - actual-status " " - event-time - ) - )) - (prev-nbfake-log (get-environment-variable "NBFAKE_LOG"))) - (setenv "NBFAKE_LOG" (conc (cond - ((and (directory-exists? test-rundir) - (file-write-access? test-rundir)) - test-rundir) - ((and (directory-exists? *toppath*) - (file-write-access? *toppath*)) - *toppath*) - (else (conc "/tmp/" (current-user-name)))) - "/" logname)) - (debug:print-info 0 *default-log-port* "TRIGGERED on " trigger ", running command " fullcmd " output at " (get-environment-variable "NBFAKE_LOG")) - (process-run fullcmd) - (if prev-nbfake-log - (setenv "NBFAKE_LOG" prev-nbfake-log) - (unsetenv "NBFAKE_LOG")) - )) - -(define (mt:process-triggers dbstruct run-id test-id newstate newstatus) - (if test-id - (let* ((test-dat (db:get-test-info-by-id dbstruct run-id test-id))) - (if test-dat - (let* ((test-rundir (db:test-get-rundir test-dat)) ;; ) ;; ) - (test-name (db:test-get-testname test-dat)) - (item-path (db:test-get-item-path test-dat)) - (duration (db:test-get-run_duration test-dat)) - (comment (db:test-get-comment test-dat)) - (event-time (db:test-get-event_time test-dat)) - (tconfig #f) - (state (if newstate newstate (db:test-get-state test-dat))) - (status (if newstatus newstatus (db:test-get-status test-dat))) - (target (getenv "MT_TARGET")) - (runname (getenv "MT_RUNNAME"))) - ;; (mutex-lock! *triggers-mutex*) - (handle-exceptions - exn - (begin - (debug:print-error 0 *default-log-port* " Exception in mt:process-triggers for run-id="run-id" test-id="test-id" newstate="newstate" newstatus="newstatus - "\n error: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn - "\n test-rundir="test-rundir - "\n test-name="test-name - "\n item-path="item-path - "\n state="state - "\n status="status - "\n") - (print-call-chain (current-error-port)) - #f) - (if (and test-name - test-rundir) ;; #f means no dir set yet - ;; (common:file-exists? test-rundir) - ;; (directory? test-rundir)) - (call-with-environment-variables - (list (cons "MT_TEST_NAME" (or test-name "no such test")) - (cons "MT_TEST_RUN_DIR" (or test-rundir "no test directory yet")) - (cons "MT_ITEMPATH" (or item-path ""))) - (lambda () - (if (directory-exists? test-rundir) - (push-directory test-rundir) - (push-directory *toppath*)) - (set! tconfig (mt:lazy-read-test-config test-name)) - (for-each (lambda (trigger) - (let* ((munged-trigger (string-translate trigger "/ " "--")) - (logname (conc "last-trigger-" munged-trigger ".log"))) - ;; first any triggers from the testconfig - (let ((cmd (configf:lookup tconfig "triggers" trigger))) - (if cmd (mt:run-trigger cmd test-id test-rundir trigger (conc "tconfig-" logname) test-name item-path event-time state status target runname))) - ;; next any triggers from megatest.config - (let ((cmd (configf:lookup *configdat* "triggers" trigger))) - (if cmd (mt:run-trigger cmd test-id test-rundir trigger (conc "mtconfig-" logname) test-name item-path event-time state status target runname))))) - (list - (conc state "/" status) - (conc state "/") - (conc "/" status))) - (pop-directory)) - ))) - ;; (mutex-unlock! *triggers-mutex*) - ))))) - -;;====================================================================== -;; S T A T E A N D S T A T U S F O R T E S T S -;;====================================================================== - -;; speed up for common cases with a little logic -(define (mt:test-set-state-status-by-id run-id test-id newstate newstatus newcomment) - (if (not (and run-id test-id)) - (begin - (debug:print-error 0 *default-log-port* "bad data handed to mt:test-set-state-status-by-id, run-id=" run-id ", test-id=" test-id ", newstate=" newstate) - (print-call-chain (current-error-port)) - #f) - (begin - ;; cond - ;; ((and newstate newstatus newcomment) - ;; (rmt:general-call 'state-status-msg run-id newstate newstatus newcomment test-id)) - ;; ((and newstate newstatus) - ;; (rmt:general-call 'state-status run-id newstate newstatus test-id)) - ;; (else - ;; (if newstate (rmt:general-call 'set-test-state run-id newstate test-id)) - ;; (if newstatus (rmt:general-call 'set-test-status run-id newstatus test-id)) - ;; (if newcomment (rmt:general-call 'set-test-comment run-id newcomment test-id)))) - (rmt:set-state-status-and-roll-up-items run-id test-id #f newstate newstatus newcomment) - ;; (mt:process-triggers run-id test-id newstate newstatus) - #t))) - - -(define (mt:test-set-state-status-by-id-unless-completed run-id test-id newstate newstatus newcomment) - (let* ((test-vec (rmt:get-testinfo-state-status run-id test-id)) - (state (vector-ref test-vec 3))) - (if (equal? state "COMPLETED") - #t - (rmt:set-state-status-and-roll-up-items run-id test-id #f newstate newstatus newcomment)))) - - -(define (mt:test-set-state-status-by-testname run-id test-name item-path new-state new-status new-comment) - ;(let ((test-id (rmt:get-test-id run-id test-name item-path))) - (rmt:set-state-status-and-roll-up-items run-id test-name item-path new-state new-status new-comment) - ;; (mt:process-triggers run-id test-id new-state new-status) - #t);) - ;;(mt:test-set-state-status-by-id run-id test-id new-state new-status new-comment))) - -(define (mt:test-set-state-status-by-testname-unless-completed run-id test-name item-path new-state new-status new-comment) - (let ((test-id (rmt:get-test-id run-id test-name item-path))) - (mt:test-set-state-status-by-id-unless-completed run-id test-id new-state new-status new-comment))) - -(define (mt:lazy-read-test-config test-name) - (let ((tconf (hash-table-ref/default *testconfigs* test-name #f))) - (if tconf - tconf - (let ((test-dirs (tests:get-tests-search-path *configdat*))) - (let loop ((hed (car test-dirs)) - (tal (cdr test-dirs))) - ;; Setting MT_LINKTREE here is almost certainly unnecessary. - (let ((tconfig-file (conc hed "/" test-name "/testconfig"))) - (if (and (common:file-exists? tconfig-file) - (file-read-access? tconfig-file)) - (let ((link-tree-path (common:get-linktree)) ;; (configf:lookup *configdat* "setup" "linktree")) - (old-link-tree (get-environment-variable "MT_LINKTREE"))) - (if link-tree-path (setenv "MT_LINKTREE" link-tree-path)) - (let ((newtcfg (read-config tconfig-file #f #f))) ;; NOTE: Does NOT run [system ...] - (hash-table-set! *testconfigs* test-name newtcfg) - (if old-link-tree - (setenv "MT_LINKTREE" old-link-tree) - (unsetenv "MT_LINKTREE")) - newtcfg)) - (if (null? tal) - (begin - (debug:print-error 0 *default-log-port* "No readable testconfig found for " test-name) - #f) - (loop (car tal)(cdr tal)))))))))) - ADDED mtmod.scm Index: mtmod.scm ================================================================== --- /dev/null +++ mtmod.scm @@ -0,0 +1,493 @@ +;;====================================================================== +;; Copyright 2017, Matthew Welland. +;; +;; This file is part of Megatest. +;; +;; Megatest is free software: you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. +;; +;; Megatest is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with Megatest. If not, see . + +;;====================================================================== + +;;====================================================================== +;; Megatestmod: +;; +;; Put things here don't fit anywhere else +;;====================================================================== + +(declare (unit mtmod)) +(declare (uses debugprint)) +(declare (uses mtargs)) +(declare (uses commonmod)) +(declare (uses configfmod)) +;; (declare (uses tcp-transportmod)) ;; we don't want mtmod depending on tcp + +(use srfi-69) + +(module mtmod + * + +(import scheme) +(cond-expand + (chicken-4 + + (import chicken + ports + (prefix base64 base64:) + + (prefix sqlite3 sqlite3:) + data-structures + directory-utils + extras + files + matchable + md5 + message-digest + pathname-expand + posix + posix-extras + regex + regex-case + sparse-vectors + srfi-1 + srfi-18 + srfi-69 + typed-records + z3 + + debugprint + commonmod + configfmod + ;; tcp-transportmod + (prefix mtargs args:) + ) + (use srfi-69)) + (chicken-5 + (import (prefix sqlite3 sqlite3:) + ;; data-structures + ;; extras + ;; files + ;; posix + ;; posix-extras + chicken.base + chicken.condition + chicken.file + chicken.file.posix + chicken.io + chicken.pathname + chicken.port + chicken.process + chicken.process-context + chicken.process-context.posix + chicken.sort + chicken.string + chicken.time + chicken.time.posix + + matchable + md5 + message-digest + pathname-expand + regex + regex-case + srfi-1 + srfi-18 + srfi-69 + typed-records + system-information + + debugprint + ))) + +;; imports common to chk5 and ck4 +(import srfi-13) + +(include "db_records.scm") + +;;====================================================================== +;; stuff from keys that can't be in commonmod. Maybe move all from commonmod to here? +;;====================================================================== + +(define (keys:make-key/field-string confdat) + (let ((fields (configf:get-section confdat "fields"))) + (string-join + (map (lambda (field)(conc (car field) " " (cadr field))) + fields) + ","))) + +;;====================================================================== +;; testsuite and area utilites +;;====================================================================== + +(define (common:get-linktree) + (or (getenv "MT_LINKTREE") + (if *configdat* + (configf:lookup *configdat* "setup" "linktree") + #f) + (if (or *toppath* (getenv "MT_RUN_AREA_HOME")) + (conc (or *toppath* (getenv "MT_RUN_AREA_HOME")) "/lt") + #f) + (let* ((tp (common:get-toppath #f)) + (lt (conc tp "/lt"))) + (if (not tp)(debug:print 0 *default-log-port* "WARNING: cannot calculate best path for linktree, using " lt)) + lt))) + +(define (get-testsuite-name toppath configdat) + (or (lookup configdat "setup" "area-name") + (lookup configdat "setup" "testsuite") + (get-environment-variable "MT_TESTSUITE_NAME") + (if (string? toppath) + (pathname-file toppath) + #f))) + +(define (common:get-testsuite-name) + (or (configf:lookup *configdat* "setup" "area-name") ;; megatest is a flexible tool, testsuite is too limiting a description. + (configf:lookup *configdat* "setup" "testsuite" ) + (getenv "MT_TESTSUITE_NAME") + (pathname-file (or (if (string? *toppath* ) + (pathname-file *toppath*) + #f) + (common:get-toppath #f))) + "please-set-setup-area-name")) ;; (pathname-file (current-directory))))) + +;; need generic find-record-with-var-nmatching-val +;; +(define (path->area-record cfgdat path) + (let* ((areadat (get-cfg-areas cfgdat)) + (all (filter (lambda (x) + (let* ((keyvals (cdr x)) + (pth (alist-ref 'path keyvals))) + (equal? path pth))) + areadat))) + (if (null? all) + #f + (car all)))) ;; return first match + +(define (get-area-name configdat toppath #!optional (short #f)) + ;; look up my area name in areas table (future) + ;; generate auto name + (conc (get-area-path-signature toppath short) + "-" + (get-testsuite-name toppath configdat))) + +;; given a config return an alist of alists +;; area-name => data +;; +(define (get-cfg-areas cfgdat) + (let ((adat (get-section cfgdat "areas"))) + (map (lambda (entry) + `(,(car entry) . + ,(val->alist (cadr entry)))) + adat))) + +;;====================================================================== +;; redefine for future cleanup (converge on area-name, the more generic +;; +(define common:get-area-name common:get-testsuite-name) + +(define (common:get-db-tmp-area . junk) + (if *db-cache-path* + *db-cache-path* + (if *toppath* ;; common:get-create-writeable-dir + (handle-exceptions + exn + (begin + (debug:print-error 0 *default-log-port* "Couldn't create path to " *db-cache-path* ", exn=" exn) + (exit 1)) + (let* ((toppath (common:real-path *toppath*)) + (tsname (common:get-testsuite-name)) + (dbpath (common:get-create-writeable-dir + (list (conc "/tmp/" (current-user-name) + "/megatest_localdb/" + tsname "/" + (string-translate toppath "/" ".")) + (conc "/tmp/" (current-process-id) ;; just in case we have an issue with the dir by own user name + "/"(current-user-name) "/megatest_localdb/" + tsname + (string-translate toppath "/" ".")) + )))) + (set! *db-cache-path* dbpath) + ;; ensure megatest area has .mtdb + (let ((dbarea (conc *toppath* "/.mtdb"))) + (if (not (file-exists? dbarea)) + (create-directory dbarea))) + ;; ensure tmp area has .mtdb + (let ((dbarea (conc dbpath "/.mtdb"))) + (if (not (file-exists? dbarea)) + (create-directory dbarea))) + dbpath)) + #f))) + +;====================================================================== +;; T R I G G E R S +;;====================================================================== + +(define (mt:run-trigger cmd test-id test-rundir trigger logname test-name item-path event-time actual-state actual-status target runname) + ;; Putting the commandline into ( )'s means no control over the shell. + ;; stdout and stderr will be caught in the NBFAKE or mt_launch.log files + ;; or equivalent. No need to do this. Just run it? + (let* ((new-trigger-format (configf:lookup *configdat* "setup" "new-trigger-format")) + (fullcmd + (if (and new-trigger-format (string=? new-trigger-format "yes")) + (conc "nbfake " + cmd " " + test-id " " + test-rundir " " + trigger " " + actual-state " " + actual-status " " + event-time " " + target " " + runname " " + test-name " " + item-path + ) + (conc "nbfake " + cmd " " + test-id " " + test-rundir " " + trigger " " + test-name " " + item-path " " + actual-state " " + actual-status " " + event-time + ) + )) + (prev-nbfake-log (get-environment-variable "NBFAKE_LOG"))) + (setenv "NBFAKE_LOG" (conc (cond + ((and (directory-exists? test-rundir) + (file-write-access? test-rundir)) + test-rundir) + ((and (directory-exists? *toppath*) + (file-write-access? *toppath*)) + *toppath*) + (else (conc "/tmp/" (current-user-name)))) + "/" logname)) + (debug:print-info 0 *default-log-port* "TRIGGERED on " trigger ", running command " fullcmd " output at " (get-environment-variable "NBFAKE_LOG")) + (process-run fullcmd) + (if prev-nbfake-log + (setenv "NBFAKE_LOG" prev-nbfake-log) + (unsetenv "NBFAKE_LOG")) + )) + + +(define (mt:discard-blocked-tests run-id failed-test tests test-records) + (if (null? tests) + tests + (begin + (debug:print-info 1 *default-log-port* "Discarding tests from " tests " that are waiting on " failed-test) + (let loop ((testn (car tests)) + (remt (cdr tests)) + (res '())) + (let* ((test-dat (hash-table-ref/default test-records testn (vector #f #f '()))) + (waitons (vector-ref test-dat 2))) + ;; (print "mt:discard-blocked-tests run-id: " run-id " failed-test: " failed-test " testn: " testn " with waitons: " waitons) + (if (null? remt) + (let ((new-res (reverse res))) + ;; (print " new-res: " new-res) + new-res) + (loop (car remt) + (cdr remt) + (if (member failed-test waitons) + (begin + (debug:print 0 *default-log-port* "Discarding test " testn "(" test-dat ") due to " failed-test) + res) + (cons testn res))))))))) + + +;; Puts out all combinations +(define (process-itemlist hierdepth curritemkey itemlist) + (let ((res '())) + (if (not hierdepth) + (set! hierdepth (length itemlist))) + (let loop ((hed (car itemlist)) + (tal (cdr itemlist))) + (if (null? tal) + (for-each (lambda (item) + (if (> (length curritemkey) (- hierdepth 2)) + (set! res (append res (list (append curritemkey (list (list (car hed) item)))))))) + (cadr hed)) + (begin + (for-each (lambda (item) + (set! res (append res (process-itemlist hierdepth (append curritemkey (list (list (car hed) item))) tal)))) + (cadr hed)) + (loop (car tal)(cdr tal))))) + res)) + +;; (item-assoc->item-list '(("ANIMAL" "Elephant Lion")("SEASON" "Spring Fall"))) +;; => ((("ANIMAL" "Elephant") ("SEASON" "Spring")) +;; (("ANIMAL" "Elephant") ("SEASON" "Fall")) +;; (("ANIMAL" "Lion") ("SEASON" "Spring")) +;; (("ANIMAL" "Lion") ("SEASON" "Fall"))) +(define (item-assoc->item-list itemsdat) + (if (and itemsdat (not (null? itemsdat))) + (let ((itemlst (filter (lambda (x) + (list? x)) + (map (lambda (x) + (debug:print 6 *default-log-port* "item-assoc->item-list x: " x) + (if (< (length x) 2) + (begin + (debug:print-error 0 *default-log-port* "malformed items spec " (string-intersperse x " ")) + (list (car x)'())) + (let* ((name (car x)) + (items (cadr x)) + (ilist (list name (if (string? items) + (string-split items) + '())))) + (if (null? ilist) + (debug:print-error 0 *default-log-port* "No items specified for " name)) + ilist))) + itemsdat)))) + (let ((debuglevel 5)) + (debug:print 5 *default-log-port* "item-assoc->item-list: itemsdat => itemlst ") + (if (debug:debug-mode 5) + (begin + (pp itemsdat) + (print " => ") + (pp itemlst)))) + (if (> (length itemlst) 0) + (process-itemlist #f '() itemlst) + '())) + '())) ;; return a list consisting on a single null list for non-item runs + ;; Nope, not now, return null as of 6/6/2011 + +;; (item-table->item-list '(("ANIMAL" "Elephant Lion")("SEASON" "Spring Winter"))) +;; => ((("ANIMAL" "Elephant")("SEASON" "Spring")) +;; (("ANIMAL" "Lion") ("SEASON" "Winter"))) +(define (item-table->item-list itemtable) + (let ((newlst (map (lambda (x) + (if (> (length x) 1) + (list (car x) + (string-split (cadr x))) + (list x '()))) + itemtable)) + (res '())) ;; a list of items + (let loop ((indx 0) + (item '()) ;; an item will be ((KEYNAME1 VAL1)(KEYNAME2 VAL2) ...) + (elflag #f)) + (for-each (lambda (row) + (let ((rowname (car row)) + (rowdat (cadr row))) + (set! item (append item + (list + (if (< indx (length rowdat)) + (let ((new (list rowname (list-ref rowdat indx)))) + ;; (debug:print 0 *default-log-port* "New: " new) + (set! elflag #t) + new + ) ;; i.e. had at least on legit value to use + (list rowname "-"))))))) + newlst) + (if elflag + (begin + (set! res (append res (list item))) + (loop (+ indx 1) + '() + #f))) + res))) + ;; Nope, not now, return null as of 6/6/2011 + +(define (items:check-valid-items class item) + (let ((valid-values (let ((s (configf:lookup *configdat* "validvalues" class))) + (if s (string-split s) #f)))) + (if valid-values + (if (member item valid-values) + item #f) + item))) + +;; '(("k1" "k2" "k3") +;; ("a" "b" "c") +;; ("d" "e" "f")) +;; +;; => '((("k1" "a")("k2" "b")("k3" "c")) +;; (("k1" "d")("k2" "e")("k3" "f"))) +;; +(define (items:first-row-intersperse data) + (if (< (length data) 2) + '() + (let ((header (car data)) + (rows (cdr data))) + (map (lambda (row) + (map list header row)) + rows)))) + +;; k1/k2/k3 +;; a/b/c +;; d/e/f +;; => '(("k1" "k2" "k3") +;; ("a" "b" "c") +;; ("d" "e" "f")) +;; +;; => '((("k1" "a")("k2" "b")("k3" "c")) +;; (("k1" "d")("k2" "e")("k3" "f"))) +;; +(define (items:read-items-file fname ftype) ;; 'sxml 'slash 'space + (if (and fname (file-exists? fname)) + (items:first-row-intersperse (case ftype + ((slash space) + (let ((splitter (case ftype + ((slash) (lambda (x)(string-split x "/"))) + (else string-split)))) + (debug:print 0 *default-log-port* "Reading " fname " of type " ftype) + (with-input-from-file fname + (lambda () + (let loop ((inl (read-line)) + (res '())) + (if (eof-object? inl) + res + (loop (read-line)(cons (splitter inl) res)))))))) + ((sxml)(with-input-from-file fname read)) + (else (debug:print 0 *default-log-port* "items file type " ftype " not recognised")))) + (begin + (if fname (debug:print 0 *default-log-port* "no items file " fname " found")) + '()))) + +(define (items:get-items-from-config tconfig) + (let* ((slashf (configf:lookup tconfig "itemopts" "slash")) ;; a/b/c\nd/e/f\n ... + (sxmlf (configf:lookup tconfig "itemopts" "sxml")) ;; '(("a" "b" "c")("d" "e" "f") ...) + (spacef (configf:lookup tconfig "itemopts" "space")) ;; a b c\nd e f\n ... + (have-items (hash-table-ref/default tconfig "items" #f)) + (have-itable (hash-table-ref/default tconfig "itemstable" #f)) + (items (hash-table-ref/default tconfig "items" '())) + (itemstable (hash-table-ref/default tconfig "itemstable" '()))) + (debug:print 5 *default-log-port* "items: " items " itemstable: " itemstable) + (set! items (map (lambda (item) + (if (procedure? (cadr item)) + (list (car item)((cadr item))) ;; evaluate the proc + item)) + items)) + (set! itemstable (map (lambda (item) + (if (procedure? (cadr item)) + (list (car item)((cadr item))) ;; evaluate the proc + item)) + itemstable)) + (if (and have-items (null? items)) (debug:print 0 *default-log-port* "WARNING:[items] section in testconfig but no entries defined")) + (if (and have-itable (null? itemstable))(debug:print 0 *default-log-port* "WARNNG:[itemstable] section in testconfig but no entries defined")) + (if (or (not (null? items)) + (not (null? itemstable)) + slashf + sxmlf + spacef) + (append (item-assoc->item-list items) + (item-table->item-list itemstable) + (items:read-items-file slashf 'slash) + (items:read-items-file sxmlf 'sxml) + (items:read-items-file spacef 'space)) + '(())))) + +;; (pp (item-assoc->item-list itemdat)) + + + + +) Index: mtut.scm ================================================================== --- mtut.scm +++ mtut.scm @@ -19,16 +19,17 @@ (declare (uses common)) (declare (uses mtargs)) (declare (uses mtargs.import)) (declare (uses debugprint)) (declare (uses debugprint.import)) +(declare (uses commonmod)) +(declare (uses commonmod.import)) (declare (uses configfmod)) (declare (uses configfmod.import)) (declare (uses configf)) -;; (declare (uses rmt)) -(declare (uses commonmod)) -(declare (uses commonmod.import)) +(declare (uses rmtmod)) +(declare (uses rmtmod.import)) (include "megatest-version.scm") ;; fake out readline usage of toplevel-command (define (toplevel-command . a) #f) Index: ods.scm ================================================================== --- ods.scm +++ ods.scm @@ -20,208 +20,5 @@ (declare (unit ods)) (declare (uses common)) (declare (uses commonmod)) (import commonmod) -(define ods:dirs - '("Configurations2" - "Configurations2/toolpanel" - "Configurations2/menubar" - "Configurations2/toolbar" - "Configurations2/progressbar" - "Configurations2/floater" - "Configurations2/images" - "Configurations2/images/Bitmaps" - "Configurations2/statusbar" - "Configurations2/popupmenu" - "Configurations2/accelerator" - "META-INF" - "Thumbnails")) - -(define ods:0-len-files - '("Configurations2/accelerator/current.xml" - ;; "Thumbnails/thumbnail.png" - "content.xml" - )) - -(define ods:files - '(("META-INF/manifest.xml" - ("\n" - "\n" - "\n" - "\n" - "\n" - "\n" - "\n" - "\n" - "\n" - "\n" - "\n" - "\n" - "\n" - "\n" - "\n" - "\n" - "\n" - "\n" - "\n" - "\n" - "\n" - "\n")) - ("styles.xml" - ("\n" - "$-$???Page 1??? (???)09/06/2011, 20:48:51Page 1 / 99\n")) - ("settings.xml" - ("\n" - "0045161799view100000020000010060true04000020000010060trueSheet2270010060falsetruetruetrue12632256truetruetruetruefalsefalse1270127011truefalsetrue3falsetruetruetrue12701270false1truetrue1true12632256falsefalsetrue0truetruetruefalsetrue\n")) - ("mimetype" - ("application/vnd.oasis.opendocument.spreadsheet")) - ("meta.xml" - ("\n" - "Matt Welland2011-09-06T20:46:232011-09-06T20:48:51Matt WellandPT2M29S1LibreOffice/3.3$Linux LibreOffice_project/330m19$Build-301\n")))) - -(define ods:content-header - '("\n" - "\n" - "\n" - "\n" - "\n" - "\n" - "\n" - "\n" - "\n" - "\n" - "\n" - "\n" - "\n" - "\n" - "\n" - "\n" - "\n" - "\n" - "\n" - "\n" - "\n" - "\n")) - -(define ods:content-footer - '("\n" - "\n" - "\n")) - -(define (ods:make-thumbnail path) - (let ((oup (open-output-pipe (conc "uudecode -o " path "/Thumbnails/thumbnail.png")))) - (with-output-to-port oup - (lambda () - (print "begin-base64 640 Thumbnail.png -iVBORw0KGgoAAAANSUhEUgAAAL4AAAEACAIAAACCoVt7AAAEWElEQVR4nO3X -MU4bWQCA4bGUo5gUKCcgJwCaVNvShdI06VKmSxNKp6PdKg3xCcgJIhr7Ll6P -DTgBRbv5i11W+r7Gw7yZx0jv5415sV6vB/h9L/7rB+D/apfO4nxy8nk8OPq0 -vDm9Pr8+nc+mv75pcXl5MNtfsLp8fXDxbRjefl3Pj//xb340yW+N8gyM6awu -vxwu1+txnVar1Xj2z7PJpoUxhYNdFmNSs+EukdHRcHpzt7Kr69s/luub6Wa1 -V8Px9tx9TLsSH2a4OxwjWx5+uLgYhtOr4ezXo8Ori4tt0b8XJf+KMZ3p7N3w -ejIZV227hMP3V+/XNweX59erxZddK98uPi5eDvfdbC672u8I09l8tvlYDC/v -z93HNJa4+Hj7fr0+3mxs54vTw1e7BM+vh9n7T8PBbPlx8jD/k9HT4WzsRzfP -0/aFtVi+vNl9W75b4MODhwv2C7c4vz/e7C8/zzK+8Iav6ycLPJ1Ol3/zAPv5 -N5vfo7tnN+vZuIFNJvJ5frYvrOHLh8nJyfjjuOsM1/slPH53uNmPTnYDD8dH -R5ut4uGFdf9F6WQy3C3wdPbmdjKZDNsw7u56PPMw3F6cXS6vDs/u57/66cE2 -o+e3w+fP203p7RvdPDvbF9bx/GY935/bvYDuPsa//IeBH473jufrH+9+cu54 -f9dPM893u9QPcz4dnT+emGfDP+dE0iGSDpF0iKRDJB0i6RBJh0g6RNIhkg6R -dIikQyQdIukQSYdIOkTSIZIOkXSIpEMkHSLpEEmHSDpE0iGSDpF0iKRDJB0i -6RBJh0g6RNIhkg6RdIikQyQdIukQSYdIOkTSIZIOkXSIpEMkHSLpEEmHSDpE -0iGSDpF0iKRDJB0i6RBJh0g6RNIhkg6RdIikQyQdIukQSYdIOkTSIZIOkXSI -pEMkHSLpEEmHSDpE0iGSDpF0iKRDJB0i6RBJh0g6RNIhkg6RdIikQyQdIukQ -SYdIOkTSIZIOkXSIpEMkHSLpEEmHSDpE0iGSDpF0iKRDJB0i6RBJh0g6RNIh -kg6RdIikQyQdIukQSYdIOkTSIZIOkXSIpEMkHSLpEEmHSDpE0iGSDpF0iKRD -JB0i6RBJh0g6RNIhkg6RdIikQyQdIukQSYdIOkTSIZIOkXSIpEMkHSLpEEmH -SDpE0iGSDpF0iKRDJB0i6RBJh0g6RNIhkg6RdIikQyQdIukQSYdIOkTSIZIO -kXSIpEMkHSLpEEmHSDpE0iGSDpF0iKRDJB0i6RBJh0g6RNIhkg6RdIikQyQd -IukQSYdIOkTSIZIOkXSIpEMkHSLpEEmHSDpE0iGSDpF0iKRDJB0i6RBJh0g6 -RNIhkg6RdIikQyQdIukQSYdIOkTSIZIOkXSIpEMkHSLpEEmHSDpE0iGSDpF0 -iKRDJB0i6RBJh0g6RNIhkg6RdIikQyQdIukQSYdIOkTSIZIOkXSIpEMkHSLp -EEmHSDpE0iGSDpF0iKRDJB0i6RBJh+gv8TgE/jVPQbMAAAAASUVORK5CYII= -===="))))) - -;; sheetdat is '("sheetname" (r1c1 r2c2 ...)(r2c1 r2c2 ...) ...) -(define (ods:sheet sheetdat) - (let ((name (car sheetdat)) - (rows (cdr sheetdat))) - (conc "\n" - (conc (ods:column) - (string-join (map ods:row rows) "")) - ""))) - -;; seems to be called once at top of each sheet, i.e. a column of rows -(define (ods:column) - "\n") - -;; cells is a list of ... -(define (ods:row cells) - (conc "\n" - (string-join (map ods:cell cells) "") - "\n")) - -;; types are "string" or "float" -(define (ods:cell value) - (let* ((type (cond - ((string? value) "string") - ((symbol? value) "string") - ((number? value) "float") - (else #f))) - (tmpval (if (symbol? value) - (symbol->string value) - (if type value ""))) ;; convert everything else to an empty string - (escval (if (string? tmpval)(string-substitute (regexp "<") "<" (string-substitute (regexp ">") ">" tmpval)) tmpval))) - (conc "\n" - "" escval "" "\n" - "" "\n"))) - -;; create the directories -(define (ods:construct-dir path) - (for-each - (lambda (subdir) - (system (conc "mkdir -p " path "/" subdir))) - ods:dirs)) - -;; populate the necessary, non-constructed, files -(define (ods:add-non-content-files path) - ;; first the zero-length files, nb// the dir should already be created - (for-each - (lambda (fname) - (system (conc "touch " path "/" fname))) - ods:0-len-files) - ;; create the files with stuff in them - (for-each - (lambda (fdat) - (let* ((name (car fdat)) - (lines (cadr fdat))) - (with-output-to-file (conc path "/" name) - (lambda () - (for-each - (lambda (line) - (display line)) - lines))))) - ods:files)) - -;; data format: -;; '( (sheet1 (r1c1 r1c2 r1c3 ...) -;; (r2c1 r2c3 r2c3 ...) ) -;; (sheet2 ( ... ) -;; ( ... ) ) ) -(define (ods:list->ods path fname data) - (if (not (common:file-exists? path)) - (print "ERROR: path to create ods data must pre-exist") - (begin - (with-output-to-file (conc path "/content.xml") - (lambda () - (ods:construct-dir path) - (ods:add-non-content-files path) - (ods:make-thumbnail path) - (map display ods:content-header) - ;; process each sheet - (map print - (map ods:sheet data)) - (map display ods:content-footer))) - (system (conc "cd " path "; zip " fname " -n mimetype mimetype `find . |grep -v mimetype` > /dev/null"))))) - ADDED odsmod.scm Index: odsmod.scm ================================================================== --- /dev/null +++ odsmod.scm @@ -0,0 +1,381 @@ +;; Copyright 2011, Matthew Welland. +;; +;; This file is part of Megatest. +;; +;; Megatest is free software: you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. +;; +;; Megatest is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with Megatest. If not, see . +;; + +(use csv-xml regex) +(declare (unit odsmod)) +(declare (uses common)) +(declare (uses commonmod)) +(declare (uses debugprint)) +(declare (uses dbfile)) +(declare (uses dbmod)) + +(module odsmod + * + +(import scheme + chicken + data-structures + extras + posix + ports + regex + srfi-1 + srfi-13 + (prefix sqlite3 sqlite3:) + + commonmod + debugprint + dbfile + dbmod + ) + +(define ods:dirs + '("Configurations2" + "Configurations2/toolpanel" + "Configurations2/menubar" + "Configurations2/toolbar" + "Configurations2/progressbar" + "Configurations2/floater" + "Configurations2/images" + "Configurations2/images/Bitmaps" + "Configurations2/statusbar" + "Configurations2/popupmenu" + "Configurations2/accelerator" + "META-INF" + "Thumbnails")) + +(define ods:0-len-files + '("Configurations2/accelerator/current.xml" + ;; "Thumbnails/thumbnail.png" + "content.xml" + )) + +(define ods:files + '(("META-INF/manifest.xml" + ("\n" + "\n" + "\n" + "\n" + "\n" + "\n" + "\n" + "\n" + "\n" + "\n" + "\n" + "\n" + "\n" + "\n" + "\n" + "\n" + "\n" + "\n" + "\n" + "\n" + "\n" + "\n")) + ("styles.xml" + ("\n" + "$-$???Page 1??? (???)09/06/2011, 20:48:51Page 1 / 99\n")) + ("settings.xml" + ("\n" + "0045161799view100000020000010060true04000020000010060trueSheet2270010060falsetruetruetrue12632256truetruetruetruefalsefalse1270127011truefalsetrue3falsetruetruetrue12701270false1truetrue1true12632256falsefalsetrue0truetruetruefalsetrue\n")) + ("mimetype" + ("application/vnd.oasis.opendocument.spreadsheet")) + ("meta.xml" + ("\n" + "Matt Welland2011-09-06T20:46:232011-09-06T20:48:51Matt WellandPT2M29S1LibreOffice/3.3$Linux LibreOffice_project/330m19$Build-301\n")))) + +(define ods:content-header + '("\n" + "\n" + "\n" + "\n" + "\n" + "\n" + "\n" + "\n" + "\n" + "\n" + "\n" + "\n" + "\n" + "\n" + "\n" + "\n" + "\n" + "\n" + "\n" + "\n" + "\n" + "\n")) + +(define ods:content-footer + '("\n" + "\n" + "\n")) + +(define (ods:make-thumbnail path) + (let ((oup (open-output-pipe (conc "uudecode -o " path "/Thumbnails/thumbnail.png")))) + (with-output-to-port oup + (lambda () + (print "begin-base64 640 Thumbnail.png +iVBORw0KGgoAAAANSUhEUgAAAL4AAAEACAIAAACCoVt7AAAEWElEQVR4nO3X +MU4bWQCA4bGUo5gUKCcgJwCaVNvShdI06VKmSxNKp6PdKg3xCcgJIhr7Ll6P +DTgBRbv5i11W+r7Gw7yZx0jv5415sV6vB/h9L/7rB+D/apfO4nxy8nk8OPq0 +vDm9Pr8+nc+mv75pcXl5MNtfsLp8fXDxbRjefl3Pj//xb340yW+N8gyM6awu +vxwu1+txnVar1Xj2z7PJpoUxhYNdFmNSs+EukdHRcHpzt7Kr69s/luub6Wa1 +V8Px9tx9TLsSH2a4OxwjWx5+uLgYhtOr4ezXo8Ori4tt0b8XJf+KMZ3p7N3w +ejIZV227hMP3V+/XNweX59erxZddK98uPi5eDvfdbC672u8I09l8tvlYDC/v +z93HNJa4+Hj7fr0+3mxs54vTw1e7BM+vh9n7T8PBbPlx8jD/k9HT4WzsRzfP +0/aFtVi+vNl9W75b4MODhwv2C7c4vz/e7C8/zzK+8Iav6ycLPJ1Ol3/zAPv5 +N5vfo7tnN+vZuIFNJvJ5frYvrOHLh8nJyfjjuOsM1/slPH53uNmPTnYDD8dH +R5ut4uGFdf9F6WQy3C3wdPbmdjKZDNsw7u56PPMw3F6cXS6vDs/u57/66cE2 +o+e3w+fP203p7RvdPDvbF9bx/GY935/bvYDuPsa//IeBH473jufrH+9+cu54 +f9dPM893u9QPcz4dnT+emGfDP+dE0iGSDpF0iKRDJB0i6RBJh0g6RNIhkg6R +dIikQyQdIukQSYdIOkTSIZIOkXSIpEMkHSLpEEmHSDpE0iGSDpF0iKRDJB0i +6RBJh0g6RNIhkg6RdIikQyQdIukQSYdIOkTSIZIOkXSIpEMkHSLpEEmHSDpE +0iGSDpF0iKRDJB0i6RBJh0g6RNIhkg6RdIikQyQdIukQSYdIOkTSIZIOkXSI +pEMkHSLpEEmHSDpE0iGSDpF0iKRDJB0i6RBJh0g6RNIhkg6RdIikQyQdIukQ +SYdIOkTSIZIOkXSIpEMkHSLpEEmHSDpE0iGSDpF0iKRDJB0i6RBJh0g6RNIh +kg6RdIikQyQdIukQSYdIOkTSIZIOkXSIpEMkHSLpEEmHSDpE0iGSDpF0iKRD +JB0i6RBJh0g6RNIhkg6RdIikQyQdIukQSYdIOkTSIZIOkXSIpEMkHSLpEEmH +SDpE0iGSDpF0iKRDJB0i6RBJh0g6RNIhkg6RdIikQyQdIukQSYdIOkTSIZIO +kXSIpEMkHSLpEEmHSDpE0iGSDpF0iKRDJB0i6RBJh0g6RNIhkg6RdIikQyQd +IukQSYdIOkTSIZIOkXSIpEMkHSLpEEmHSDpE0iGSDpF0iKRDJB0i6RBJh0g6 +RNIhkg6RdIikQyQdIukQSYdIOkTSIZIOkXSIpEMkHSLpEEmHSDpE0iGSDpF0 +iKRDJB0i6RBJh0g6RNIhkg6RdIikQyQdIukQSYdIOkTSIZIOkXSIpEMkHSLp +EEmHSDpE0iGSDpF0iKRDJB0i6RBJh+gv8TgE/jVPQbMAAAAASUVORK5CYII= +===="))))) + +;; sheetdat is '("sheetname" (r1c1 r2c2 ...)(r2c1 r2c2 ...) ...) +(define (ods:sheet sheetdat) + (let ((name (car sheetdat)) + (rows (cdr sheetdat))) + (conc "\n" + (conc (ods:column) + (string-join (map ods:row rows) "")) + ""))) + +;; seems to be called once at top of each sheet, i.e. a column of rows +(define (ods:column) + "\n") + +;; cells is a list of ... +(define (ods:row cells) + (conc "\n" + (string-join (map ods:cell cells) "") + "\n")) + +;; types are "string" or "float" +(define (ods:cell value) + (let* ((type (cond + ((string? value) "string") + ((symbol? value) "string") + ((number? value) "float") + (else #f))) + (tmpval (if (symbol? value) + (symbol->string value) + (if type value ""))) ;; convert everything else to an empty string + (escval (if (string? tmpval)(string-substitute (regexp "<") "<" (string-substitute (regexp ">") ">" tmpval)) tmpval))) + (conc "\n" + "" escval "" "\n" + "" "\n"))) + +;; create the directories +(define (ods:construct-dir path) + (for-each + (lambda (subdir) + (system (conc "mkdir -p " path "/" subdir))) + ods:dirs)) + +;; populate the necessary, non-constructed, files +(define (ods:add-non-content-files path) + ;; first the zero-length files, nb// the dir should already be created + (for-each + (lambda (fname) + (system (conc "touch " path "/" fname))) + ods:0-len-files) + ;; create the files with stuff in them + (for-each + (lambda (fdat) + (let* ((name (car fdat)) + (lines (cadr fdat))) + (with-output-to-file (conc path "/" name) + (lambda () + (for-each + (lambda (line) + (display line)) + lines))))) + ods:files)) + +;; data format: +;; '( (sheet1 (r1c1 r1c2 r1c3 ...) +;; (r2c1 r2c3 r2c3 ...) ) +;; (sheet2 ( ... ) +;; ( ... ) ) ) +(define (ods:list->ods path fname data) + (if (not (common:file-exists? path)) + (print "ERROR: path to create ods data must pre-exist") + (begin + (with-output-to-file (conc path "/content.xml") + (lambda () + (ods:construct-dir path) + (ods:add-non-content-files path) + (ods:make-thumbnail path) + (map display ods:content-header) + ;; process each sheet + (map print + (map ods:sheet data)) + (map display ods:content-footer))) + (system (conc "cd " path "; zip " fname " -n mimetype mimetype `find . |grep -v mimetype` > /dev/null"))))) + +;;====================================================================== +;; Extract ods file from the db +;;====================================================================== + +;; NOT REWRITTEN YET!!!!! + +;; runspatt is a comma delimited list of run patterns +;; keypatt-alist must contain *all* keys with an associated pattern: '( ("KEY1" "%") .. ) +(define (db:extract-ods-file dbstruct outputfile keypatt-alist runspatt pathmod) + (assert #f "FATAL: call to db:extract-ods-file which is not ported yet.") + (let* ((keysstr (string-intersperse (map car keypatt-alist) ",")) + (keyqry (string-intersperse (map (lambda (p)(conc (car p) " LIKE ? ")) keypatt-alist) " AND ")) + (numkeys (length keypatt-alist)) + (test-ids '()) + (dbdat (db:get-subdb dbstruct)) + (db (dbr:dbdat-dbh dbdat)) + (windows (and pathmod (substring-index "\\" pathmod))) + (tempdir (conc "/tmp/" (current-user-name) "/" runspatt "_" (random 10000) "_" (current-process-id))) + (runsheader (append (list "Run Id" "Runname") ; 0 1 + (map car keypatt-alist) ; + N = length keypatt-alist + (list "Testname" ; 2 + "Item Path" ; 3 + "Description" ; 4 + "State" ; 5 + "Status" ; 6 + "Final Log" ; 7 + "Run Duration" ; 8 + "When Run" ; 9 + "Tags" ; 10 + "Run Owner" ; 11 + "Comment" ; 12 + "Author" ; 13 + "Test Owner" ; 14 + "Reviewed" ; 15 + "Diskfree" ; 16 + "Uname" ; 17 + "Rundir" ; 18 + "Host" ; 19 + "Cpu Load" ; 20 + ))) + (results (list runsheader)) + (testdata-header (list "Run Id" "Testname" "Item Path" "Category" "Variable" "Value" "Expected" "Tol" "Units" "Status" "Comment")) + (mainqry (conc "SELECT + t.testname,r.id,runname," keysstr ",t.testname, + t.item_path,tm.description,t.state,t.status, + final_logf,run_duration, + strftime('%m/%d/%Y %H:%M:%S',datetime(t.event_time,'unixepoch'),'localtime'), + tm.tags,r.owner,t.comment, + author, + tm.owner,reviewed, + diskfree,uname,rundir, + host,cpuload + FROM tests AS t JOIN runs AS r ON t.run_id=r.id JOIN test_meta AS tm ON tm.testname=t.testname + WHERE runname LIKE ? AND " keyqry ";"))) + (debug:print 2 *default-log-port* "Using " tempdir " for constructing the ods file. keyqry: " keyqry " keystr: " keysstr " with keys: " (map cadr keypatt-alist) + "\n mainqry: " mainqry) + ;; "Expected Value" + ;; "Value Found" + ;; "Tolerance" + (apply sqlite3:for-each-row + (lambda (test-id . b) + (set! test-ids (cons test-id test-ids)) ;; test-id is now testname + (set! results (append results ;; note, drop the test-id + (list + (if pathmod + (let* ((vb (apply vector b)) + (keyvals (let loop ((i 0) + (res '())) + (if (>= i numkeys) + res + (loop (+ i 1) + (append res (list (vector-ref vb (+ i 2)))))))) + (runname (vector-ref vb 1)) + (testname (vector-ref vb (+ 2 numkeys))) + (item-path (vector-ref vb (+ 3 numkeys))) + (final-log (vector-ref vb (+ 7 numkeys))) + (run-dir (vector-ref vb (+ 18 numkeys))) + (log-fpath (conc run-dir "/" final-log))) ;; (string-intersperse keyvals "/") "/" testname "/" item-path "/" + (debug:print 4 *default-log-port* "log: " log-fpath " exists: " (common:file-exists? log-fpath)) + (vector-set! vb (+ 7 numkeys) (if (common:file-exists? log-fpath) + (let ((newpath (conc pathmod "/" + (string-intersperse keyvals "/") + "/" runname "/" testname "/" + (if (string=? item-path "") "" (conc "/" item-path)) + final-log))) + ;; for now throw away newpath and use the log-fpath conc'd with pathmod + (set! newpath (conc pathmod log-fpath)) + (if windows (string-translate newpath "/" "\\") newpath)) + (if (debug:debug-mode 1) + (conc final-log " not-found") + ""))) + (vector->list vb)) + b))))) + db + mainqry + runspatt (map cadr keypatt-alist)) + (debug:print 2 *default-log-port* "Found " (length test-ids) " records") + (set! results (list (cons "Runs" results))) + ;; now, for each test, collect the test_data info and add a new sheet + (for-each + (lambda (test-id) + (let ((test-data (list testdata-header)) + (curr-test-name #f)) + (sqlite3:for-each-row + (lambda (run-id testname item-path category variable value expected tol units status comment) + (set! curr-test-name testname) + (set! test-data (append test-data (list (list run-id testname item-path category variable value expected tol units status comment))))) + db + ;; "SELECT run_id,testname,item_path,category,variable,td.value AS value,expected,tol,units,td.status AS status,td.comment AS comment FROM test_data AS td INNER JOIN tests ON tests.id=td.test_id WHERE test_id=?;" + "SELECT run_id,testname,item_path,category,variable,td.value AS value,td.expected,td.tol,td.units,td.status AS status,td.comment AS comment FROM test_data AS td INNER JOIN tests ON tests.id=td.test_id WHERE testname=?;" + test-id) + (if curr-test-name + (set! results (append results (list (cons curr-test-name test-data))))) + )) + (sort (delete-duplicates test-ids) string<=)) + (system (conc "mkdir -p " tempdir)) + ;; (pp results) + (ods:list->ods + tempdir + (if (string-match (regexp "^[/~]+.*") outputfile) ;; full path? + outputfile + (begin + (debug:print 0 *default-log-port* "WARNING: path given, " outputfile " is relative, prefixing with current directory") + (conc (current-directory) "/" outputfile))) + results) + ;; brutal clean up + (dbfile:add-dbdat dbstruct #f dbdat) + (system "rm -rf tempdir"))) + +;; (db:extract-ods-file db "outputfile.ods" '(("sysname" "%")("fsname" "%")("datapath" "%")) "%") + +) ADDED pgdb.scm Index: pgdb.scm ================================================================== --- /dev/null +++ pgdb.scm @@ -0,0 +1,23 @@ +;;====================================================================== +;; Copyright 2019, Matthew Welland. +;; +;; This file is part of Megatest. +;; +;; Megatest is free software: you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. +;; +;; Megatest is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with Megatest. If not, see . + +;;====================================================================== + +(declare (unit pgdb)) + +(include "cgisetup/models/pgdb.scm") Index: portlogger.scm ================================================================== --- portlogger.scm +++ portlogger.scm @@ -18,11 +18,12 @@ ;; (declare (unit portlogger)) (declare (uses debugprint)) -(declare (uses dbmod)) +(declare (uses commonmod)) +;; (declare (uses dbmod)) (module portlogger * (import scheme) @@ -35,11 +36,10 @@ ;; dot-locking extras ) (import (prefix sqlite3 sqlite3:)) - (import debugprint dbmod) ) (chicken-5 (import chicken.base chicken.condition chicken.file @@ -58,11 +58,14 @@ )) (import srfi-1 srfi-69 z3 (srfi 18) s11n) (import (prefix sqlite3 sqlite3:)) -(import debugprint dbmod) +(import debugprint + ;; dbmod + commonmod + ) ;; lsof -i (define (portlogger:open-db fname) (let* ((avail (tasks:wait-on-journal fname 5 remove: #t)) ;; wait up to about 10 seconds for the journal to go away Index: rmt.scm ================================================================== --- rmt.scm +++ rmt.scm @@ -48,750 +48,5 @@ ;; ;; generate entries for ~/.megatestrc with the following ;; ;; grep define ../rmt.scm | grep rmt: |perl -pi -e 's/\(define\s+\((\S+)\W.*$/\1/'|sort -u - -;;====================================================================== -;; S U P P O R T F U N C T I O N S -;;====================================================================== - -(define (rmt:on-homehost? runremote) - (let* ((hh-dat (remote-hh-dat runremote))) - (if (pair? hh-dat) - (cdr hh-dat) - (begin - (debug:print-info 0 *default-log-port* "hh-dat="hh-dat) - #f)))) - -(define (make-and-init-remote areapath) - (case (rmt:transport-mode) - ((http)(make-remote)) - ((tcp) (tt:make-remote areapath)) - (else #f))) - -;;====================================================================== - -(define *send-receive-mutex* (make-mutex)) ;; should have separate mutex per run-id -(define *ttdat* #f) -;; how to make area-dat -(define (rmt:set-ttdat areapath ttdat) - (if ttdat - ttdat - (if *ttdat* - *ttdat* - (begin - (debug:print-info 2 *default-log-port* "rmt:set-ttdat: Initialize new ttdat") - (let* ((newremote (make-and-init-remote areapath))) - (set! *ttdat* newremote) - newremote - ) - ) - ) - ) -) - -;; NB// area-dat replaced by ttdat -;; -(define (rmt:send-receive cmd run-id params #!key (attemptnum 1)(ttdat #f)) - (assert (or (not run-id) (number? run-id)) "FATAL: run-id is required to be a number or #f") - (assert *toppath* "FATAL: rmt:send-receive called with *toppath* not set.") - (let* ((areapath *toppath*) ;; TODO - resolve from dbstruct to be compatible with multiple areas - (readonly-mode (rmtmod:calc-ro-mode ttdat *toppath*)) - (testsuite (common:get-testsuite-name))) - (case (rmt:transport-mode) - ((tcp) - (let* ((start-time (current-seconds)) ;; snapshot time so all use cases get same value - (attemptnum (+ 1 attemptnum)) - (mtexe (common:find-local-megatest)) - (dbfname (conc (dbfile:run-id->dbnum run-id)".db")) - (ttdat (rmt:set-ttdat areapath ttdat)) - (conn (tt:get-conn ttdat dbfname)) - (is-main (equal? dbfname "main.db")) ;; why not (not run-id) ? - (server-start-proc (if is-main - #f - (lambda () - ;; (debug:print-info 0 *default-log-port* "starting server for dbfname: "dbfname) - (rmt:start-server ;; tt:server-process-run - areapath - testsuite ;; (dbfile:testsuite-name) - mtexe - run-id))))) - ;; here we look at ttdat, if dbfname is NOT main.db we check that a conn exists for it - ;; and if there is no conn we first send a request to the main.db server to start a - ;; server for the dbfname. - #;(if (and (not is-main)(not conn)) ;; no existing connection to non-main server, call in a start up request - (begin - (server-start-proc) - (thread-sleep! 1))) - (tt:handler ttdat cmd run-id params attemptnum readonly-mode dbfname testsuite mtexe server-start-proc))) - ((nfs) - (nfs-transport-handler cmd run-id params attemptnum areapath readonly-mode testsuite)) - (else - (debug:print-info 0 *default-log-port* "rmt:transport-mode is "(rmt:transport-mode)) - (assert #f "FATAL: rmt:transport-mode set to invalid value."))))) - -(define (nfs-transport-handler cmd run-id params attemptnum areapath readonly-mode testsuite) - (let* ((keys (common:get-fields *configdat*)) - (dbstruct (dbmod:nfs-get-dbstruct run-id keys (dbfile:db-init-proc) areapath))) - (api:dispatch-request dbstruct cmd run-id params))) - -(define (rmt:get-max-query-average run-id) - (mutex-lock! *db-stats-mutex*) - (let* ((runkey (conc "run-id=" run-id " ")) - (cmds (filter (lambda (x) - (substring-index runkey x)) - (hash-table-keys *db-stats*))) - (res (if (null? cmds) - (cons 'none 0) - (let loop ((cmd (car cmds)) - (tal (cdr cmds)) - (max-cmd (car cmds)) - (res 0)) - (let* ((cmd-dat (hash-table-ref *db-stats* cmd)) - (tot (vector-ref cmd-dat 0)) - (curravg (/ (vector-ref cmd-dat 1) (vector-ref cmd-dat 0))) ;; count is never zero by construction - (currmax (max res curravg)) - (newmax-cmd (if (> curravg res) cmd max-cmd))) - (if (null? tal) - (if (> tot 10) - (cons newmax-cmd currmax) - (cons 'none 0)) - (loop (car tal)(cdr tal) newmax-cmd currmax))))))) - (mutex-unlock! *db-stats-mutex*) - res)) - -(define (rmt:open-qry-close-locally cmd run-id params #!key (remretries 5)) - (let* ((qry-is-write (not (member cmd api:read-only-queries))) - (db-file-path (common:make-tmpdir-name *toppath* "")) ;; 0)) - (dbstructs-local (db:setup)) - (read-only (not (file-write-access? db-file-path))) - (start (current-milliseconds)) - (resdat (if (not (and read-only qry-is-write)) - (let ((v (api:execute-requests dbstructs-local (vector (symbol->string cmd) params)))) - ;; (handle-exceptions ;; there has been a long history of receiving strange errors from values returned by the client when things go wrong.. - ;; exn ;; This is an attempt to detect that situation and recover gracefully - ;; (begin - ;; (debug:print 0 *default-log-port* "ERROR: bad data from server " v " message: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn) - ;; (vector #t '())) ;; should always get a vector but if something goes wrong return a dummy - (if (and (vector? v) - (> (vector-length v) 1)) - (let ((newvec (vector (vector-ref v 0)(vector-ref v 1)))) - newvec) ;; by copying the vector while inside the error handler we should force the detection of a corrupted record - (vector #t '()))) ;; ) ;; we could also check that the returned types are valid - (vector #t '()))) - (success (vector-ref resdat 0)) - (res (vector-ref resdat 1)) - (duration (- (current-milliseconds) start))) - (if (and read-only qry-is-write) - (debug:print 0 *default-log-port* "ERROR: attempt to write to read-only database ignored. cmd=" cmd)) - (if (not success) - (if (> remretries 0) - (begin - (debug:print-error 0 *default-log-port* "local query failed. Trying again.") - (thread-sleep! (/ (random 5000) 1000)) ;; some random delay - (rmt:open-qry-close-locally cmd run-id params remretries: (- remretries 1))) - (begin - (debug:print-error 0 *default-log-port* "too many retries in rmt:open-qry-close-locally, giving up") - #f)) - (begin - ;; (rmt:update-db-stats run-id cmd params duration) - ;; mark this run as dirty if this was a write, the watchdog is responsible for syncing it - (if qry-is-write - (let ((start-time (current-seconds))) - (mutex-lock! *db-multi-sync-mutex*) - (set! *db-last-access* start-time) ;; THIS IS PROBABLY USELESS? (we are on a client) - (mutex-unlock! *db-multi-sync-mutex*))))) - res)) - -;;====================================================================== -;; -;; A C T U A L A P I C A L L S -;; -;;====================================================================== - -;;====================================================================== -;; S E R V E R -;;====================================================================== - -(define (rmt:kill-server run-id) - (rmt:send-receive 'kill-server run-id (list run-id))) - -(define (rmt:start-server areapath testsuite mtexe run-id) ;; run on main.db server - (rmt:send-receive 'start-server #f (list areapath testsuite mtexe run-id))) - -;;====================================================================== -;; M I S C -;;====================================================================== - -(define (rmt:login run-id) - (rmt:send-receive 'login run-id (list *toppath* megatest-version (client:get-signature)))) - -;; This login does no retries under the hood - it acts a bit like a ping. -;; Deprecated for nmsg-transport. -;; -;; (define (rmt:login-no-auto-client-setup runremote) -;; (rmt:send-receive-no-auto-client-setup runremote 'login #f (list *toppath* megatest-version (client:get-signature)))) - - -;; given a hostname, return a pair of cpu load and update time representing latest intelligence from tests running on that host -(define (rmt:get-latest-host-load hostname) - (rmt:send-receive 'get-latest-host-load #f (list hostname))) - -(define (rmt:sdb-qry qry val run-id) - ;; add caching if qry is 'getid or 'getstr - (rmt:send-receive 'sdb-qry run-id (list qry val))) - -;; NOT COMPLETED -(define (rmt:runtests user run-id testpatt params) - (rmt:send-receive 'runtests run-id testpatt)) - -(define (rmt:get-run-record-ids target run keynames ) - (rmt:send-receive 'get-run-record-ids #f (list target run keynames ))) - -(define (rmt:get-changed-record-ids since-time) - (rmt:send-receive 'get-changed-record-ids #f (list since-time)) ) - -(define (rmt:get-all-runids) - (rmt:send-receive 'get-all-run-ids #f '()) ) - -(define (rmt:get-changed-record-run-ids since-time) - (rmt:send-receive 'get-changed-record-run-ids #f (list since-time))) - -(define (rmt:get-changed-record-test-ids run-id since-time) - (rmt:send-receive 'get-changed-record-test-ids run-id (list since-time run-id))) - - - -(define (rmt:drop-all-triggers) - (rmt:send-receive 'drop-all-triggers #f '())) - -(define (rmt:create-all-triggers) - (rmt:send-receive 'create-all-triggers #f '())) - -;;====================================================================== -;; T E S T M E T A -;;====================================================================== - -(define (rmt:get-tests-tags) - (rmt:send-receive 'get-tests-tags #f '())) - -;;====================================================================== -;; K E Y S -;;====================================================================== - -;; These require run-id because the values come from the run! -;; however the query must still apply to main.db -;; -(define (rmt:get-key-val-pairs run-id) - (rmt:send-receive 'get-key-val-pairs #f (list run-id))) - -(define (rmt:get-keys) - (if *db-keys* *db-keys* - (let ((res (rmt:send-receive 'get-keys #f '()))) - (set! *db-keys* res) - res))) - -(define (rmt:get-keys-write) ;; dummy query to force server start - (let ((res (rmt:send-receive 'get-keys-write #f '()))) - (set! *db-keys* res) - res)) - -;; we don't reuse run-id's (except possibly *after* a db cleanup) so it is safe -;; to cache the resuls in a hash -;; -(define (rmt:get-key-vals run-id) - (or (hash-table-ref/default *keyvals* run-id #f) - (let ((res (rmt:send-receive 'get-key-vals #f (list run-id)))) - (hash-table-set! *keyvals* run-id res) - res))) - -(define (rmt:get-targets) - (rmt:send-receive 'get-targets #f '())) - -(define (rmt:get-target run-id) - (assert (number? run-id) "FATAL: Run id required.") - (rmt:send-receive 'get-target #f (list run-id))) - -(define (rmt:get-run-times runpatt targetpatt) - (rmt:send-receive 'get-run-times #f (list runpatt targetpatt ))) - - -;;====================================================================== -;; T E S T S -;;====================================================================== - -;; IDEA: Threadify these - they spend a lot of time waiting ... -;; -(define (rmt:get-tests-for-runs-mindata run-ids testpatt states status not-in) - (let ((multi-run-mutex (make-mutex)) - (run-id-list (if run-ids - run-ids - (rmt:get-all-run-ids))) - (result '())) - (if (null? run-id-list) - '() - (let loop ((hed (car run-id-list)) - (tal (cdr run-id-list)) - (threads '())) - (if (> (length threads) 5) - (loop hed tal (filter (lambda (th)(not (member (thread-state th) '(terminated dead)))) threads)) - (let* ((newthread (make-thread - (lambda () - (let ((res (rmt:send-receive 'get-tests-for-run-mindata hed (list hed testpatt states status not-in)))) - (if (list? res) - (begin - (mutex-lock! multi-run-mutex) - (set! result (append result res)) - (mutex-unlock! multi-run-mutex)) - (debug:print-error 0 *default-log-port* "get-tests-for-run-mindata failed for run-id " hed ", testpatt " testpatt ", states " states ", status " status ", not-in " not-in)))) - (conc "multi-run-thread for run-id " hed))) - (newthreads (cons newthread threads))) - (thread-start! newthread) - (thread-sleep! 0.05) ;; give that thread some time to start - (if (null? tal) - newthreads - (loop (car tal)(cdr tal) newthreads)))))) - result)) - -;; ;; IDEA: Threadify these - they spend a lot of time waiting ... -;; ;; -;; (define (rmt:get-tests-for-runs-mindata run-ids testpatt states status not-in) -;; (let ((run-id-list (if run-ids -;; run-ids -;; (rmt:get-all-run-ids)))) -;; (apply append (map (lambda (run-id) -;; (rmt:send-receive 'get-tests-for-run-mindata run-id (list run-ids testpatt states status not-in))) -;; run-id-list)))) - -(define (rmt:delete-test-records run-id test-id) - (assert (number? run-id) "FATAL: Run id required.") - (rmt:send-receive 'delete-test-records run-id (list run-id test-id))) - -(define (rmt:test-set-state-status run-id test-id state status msg) - (assert (number? run-id) "FATAL: Run id required.") - (rmt:send-receive 'test-set-state-status run-id (list run-id test-id state status msg))) - -(define (rmt:test-toplevel-num-items run-id test-name) - (assert (number? run-id) "FATAL: Run id required.") - (rmt:send-receive 'test-toplevel-num-items run-id (list run-id test-name))) - -;; (define (rmt:get-previous-test-run-record run-id test-name item-path) -;; (rmt:send-receive 'get-previous-test-run-record run-id (list run-id test-name item-path))) - -(define (rmt:get-matching-previous-test-run-records run-id test-name item-path) - (assert (number? run-id) "FATAL: Run id required.") - (rmt:send-receive 'get-matching-previous-test-run-records run-id (list run-id test-name item-path))) - -(define (rmt:test-get-logfile-info run-id test-name) - (assert (number? run-id) "FATAL: Run id required.") - (rmt:send-receive 'test-get-logfile-info run-id (list run-id test-name))) - -(define (rmt:test-get-records-for-index-file run-id test-name) - (assert (number? run-id) "FATAL: Run id required.") - (rmt:send-receive 'test-get-records-for-index-file run-id (list run-id test-name))) - -(define (rmt:get-testinfo-state-status run-id test-id) - (assert (number? run-id) "FATAL: Run id required.") - (rmt:send-receive 'get-testinfo-state-status run-id (list run-id test-id))) - -(define (rmt:test-set-log! run-id test-id logf) - (assert (number? run-id) "FATAL: Run id required.") - (if (string? logf)(rmt:general-call 'test-set-log run-id logf test-id))) - -(define (rmt:test-set-top-process-pid run-id test-id pid) - (assert (number? run-id) "FATAL: Run id required.") - (rmt:send-receive 'test-set-top-process-pid run-id (list run-id test-id pid))) - -(define (rmt:test-get-top-process-pid run-id test-id) - (assert (number? run-id) "FATAL: Run id required.") - (rmt:send-receive 'test-get-top-process-pid run-id (list run-id test-id))) - -(define (rmt:get-run-ids-matching-target keynames target res runname testpatt statepatt statuspatt) - (rmt:send-receive 'get-run-ids-matching-target #f (list keynames target res runname testpatt statepatt statuspatt))) - -;; NOTE: This will open and access ALL run databases. -;; -(define (rmt:test-get-paths-matching-keynames-target-new keynames target res testpatt statepatt statuspatt runname) - (let ((run-ids (rmt:get-run-ids-matching-target keynames target res runname testpatt statepatt statuspatt))) - (apply append - (map (lambda (run-id) - (rmt:send-receive 'test-get-paths-matching-keynames-target-new run-id (list run-id keynames target res testpatt statepatt statuspatt runname))) - run-ids)))) - - - -(define (rmt:get-prereqs-not-met run-id waitons ref-test-name ref-item-path #!key (mode '(normal))(itemmaps #f)) - (assert (number? run-id) "FATAL: Run id required.") - (rmt:send-receive 'get-prereqs-not-met run-id (list run-id waitons ref-test-name ref-item-path mode itemmaps))) - -(define (rmt:get-count-tests-running-for-run-id run-id) - (assert (number? run-id) "FATAL: Run id required.") - (rmt:send-receive 'get-count-tests-running-for-run-id run-id (list run-id))) - -(define (rmt:get-not-completed-cnt run-id) - (assert (number? run-id) "FATAL: Run id required.") - (rmt:send-receive 'get-not-completed-cnt run-id (list run-id))) - - -;; Statistical queries - -(define (rmt:get-count-tests-running run-id) - (assert (number? run-id) "FATAL: Run id required.") - (rmt:send-receive 'get-count-tests-running run-id (list run-id))) - -(define (rmt:get-count-tests-running-for-testname run-id testname) - (assert (number? run-id) "FATAL: Run id required.") - (rmt:send-receive 'get-count-tests-running-for-testname run-id (list run-id testname))) - -(define (rmt:get-count-tests-running-in-jobgroup run-id jobgroup) - (assert (number? run-id) "FATAL: Run id required.") - (rmt:send-receive 'get-count-tests-running-in-jobgroup run-id (list run-id jobgroup))) - -(define (rmt:set-state-status-and-roll-up-run run-id state status) - (assert (number? run-id) "FATAL: Run id required.") - (rmt:send-receive 'set-state-status-and-roll-up-run run-id (list run-id state status))) - - -(define (rmt:update-pass-fail-counts run-id test-name) - (assert (number? run-id) "FATAL: Run id required.") - (rmt:general-call 'update-pass-fail-counts run-id test-name test-name test-name)) - -(define (rmt:top-test-set-per-pf-counts run-id test-name) - (assert (number? run-id) "FATAL: Run id required.") - (rmt:send-receive 'top-test-set-per-pf-counts run-id (list run-id test-name))) - -(define (rmt:get-raw-run-stats run-id) - (assert (number? run-id) "FATAL: Run id required.") - (rmt:send-receive 'get-raw-run-stats run-id (list run-id))) - -(define (rmt:get-test-times runname target) - (rmt:send-receive 'get-test-times #f (list runname target ))) - -;;====================================================================== -;; R U N S -;;====================================================================== - -;; BUG - LOOK AT HOW THIS WORKS!!! -;; -(define (rmt:get-run-info run-id) - (assert (number? run-id) "FATAL: Run id required.") - (rmt:send-receive 'get-run-info #f (list run-id))) - -(define (rmt:get-num-runs runpatt) - (rmt:send-receive 'get-num-runs #f (list runpatt))) - -(define (rmt:get-runs-cnt-by-patt runpatt targetpatt keys) - (rmt:send-receive 'get-runs-cnt-by-patt #f (list runpatt targetpatt keys))) - -;; Use the special run-id == #f scenario here since there is no run yet -(define (rmt:register-run keyvals runname state status user contour) - (rmt:send-receive 'register-run #f (list keyvals runname state status user contour))) - -(define (rmt:get-run-name-from-id run-id) - (assert (number? run-id) "FATAL: Run id required.") - (rmt:send-receive 'get-run-name-from-id #f (list run-id))) - -(define (rmt:delete-run run-id) - (rmt:send-receive 'delete-run #f (list run-id))) - -(define (rmt:update-run-stats run-id stats) - (rmt:send-receive 'update-run-stats #f (list run-id stats))) - -(define (rmt:delete-old-deleted-test-records run-id) - (rmt:send-receive 'delete-old-deleted-test-records run-id (list run-id))) - -(define (rmt:get-runs runpatt count offset keypatts) - (rmt:send-receive 'get-runs #f (list runpatt count offset keypatts))) - -(define (rmt:simple-get-runs runpatt count offset target last-update) - (rmt:send-receive 'simple-get-runs #f (list runpatt count offset target last-update))) - -(define (rmt:get-all-run-ids) - (rmt:send-receive 'get-all-run-ids #f '())) - -(define (rmt:get-prev-run-ids run-id) - (assert (number? run-id) "FATAL: Run id required.") - (rmt:send-receive 'get-prev-run-ids #f (list run-id))) - -(define (rmt:lock/unlock-run run-id lock unlock user) - (assert (number? run-id) "FATAL: Run id required.") - (rmt:send-receive 'lock/unlock-run #f (list run-id lock unlock user))) - -;; set/get status -(define (rmt:get-run-status run-id) - (assert (number? run-id) "FATAL: Run id required.") - (rmt:send-receive 'get-run-status #f (list run-id))) - -(define (rmt:get-run-state run-id) - (assert (number? run-id) "FATAL: Run id required.") - (rmt:send-receive 'get-run-state #f (list run-id))) - -(define (rmt:get-run-state-status run-id) - (assert (number? run-id) "FATAL: Run id required.") - (rmt:send-receive 'get-run-state-status #f (list run-id))) - -(define (rmt:set-run-status run-id run-status #!key (msg #f)) - (assert (number? run-id) "FATAL: Run id required.") - (rmt:send-receive 'set-run-status #f (list run-id run-status msg))) - -(define (rmt:set-run-state-status run-id state status ) - (assert (number? run-id) "FATAL: Run id required.") - (rmt:send-receive 'set-run-state-status #f (list run-id state status))) - -(define (rmt:update-tesdata-on-repilcate-db old-lt new-lt) -(rmt:send-receive 'update-tesdata-on-repilcate-db #f (list old-lt new-lt))) - -(define (rmt:update-run-event_time run-id) - (assert (number? run-id) "FATAL: Run id required.") - (rmt:send-receive 'update-run-event_time #f (list run-id))) - -(define (rmt:get-runs-by-patt keys runnamepatt targpatt offset limit fields last-runs-update #!key (sort-order "asc")) ;; fields of #f uses default - (rmt:send-receive 'get-runs-by-patt #f (list keys runnamepatt targpatt offset limit fields last-runs-update sort-order))) - -(define (rmt:find-and-mark-incomplete run-id ovr-deadtime) - (assert (number? run-id) "FATAL: Run id required.") - ;; (if (rmt:send-receive 'have-incompletes? run-id (list run-id ovr-deadtime)) - (rmt:send-receive 'mark-incomplete run-id (list run-id ovr-deadtime))) ;; ) - -(define (rmt:get-main-run-stats run-id) - (assert (number? run-id) "FATAL: Run id required.") - (rmt:send-receive 'get-main-run-stats #f (list run-id))) - -(define (rmt:get-var varname) - (rmt:send-receive 'get-var #f (list varname))) - -(define (rmt:del-var varname) - (rmt:send-receive 'del-var #f (list varname))) - -(define (rmt:set-var varname value) - (rmt:send-receive 'set-var #f (list varname value))) - -(define (rmt:inc-var varname) - (rmt:send-receive 'inc-var #f (list varname))) - -(define (rmt:dec-var varname) - (rmt:send-receive 'dec-var #f (list varname))) - -(define (rmt:add-var varname value) - (rmt:send-receive 'add-var #f (list varname value))) - -;;====================================================================== -;; M U L T I R U N Q U E R I E S -;;====================================================================== - -;; Need to move this to multi-run section and make associated changes -(define (rmt:find-and-mark-incomplete-all-runs #!key (ovr-deadtime #f)) - (let ((run-ids (rmt:get-all-run-ids))) - (for-each (lambda (run-id) - (rmt:find-and-mark-incomplete run-id ovr-deadtime)) - run-ids))) - -;; get the previous record for when this test was run where all keys match but runname -;; returns #f if no such test found, returns a single test record if found -;; -;; Run this at the client end since we have to connect to multiple run-id dbs -;; -(define (rmt:get-previous-test-run-record run-id test-name item-path) - (let* ((keyvals (rmt:get-key-val-pairs run-id)) - (keys (rmt:get-keys)) - (selstr (string-intersperse keys ",")) - (qrystr (string-intersperse (map (lambda (x)(conc x "=?")) keys) " AND "))) - (if (not keyvals) - #f - (let ((prev-run-ids (rmt:get-prev-run-ids run-id))) - ;; for each run starting with the most recent look to see if there is a matching test - ;; if found then return that matching test record - (debug:print 4 *default-log-port* "selstr: " selstr ", qrystr: " qrystr ", keyvals: " keyvals ", previous run ids found: " prev-run-ids) - (if (null? prev-run-ids) #f - (let loop ((hed (car prev-run-ids)) - (tal (cdr prev-run-ids))) - (let ((results (rmt:get-tests-for-run hed (conc test-name "/" item-path) '() '() ;; run-id testpatt states statuses - #f #f #f ;; offset limit not-in hide/not-hide - #f #f #f #f 'normal))) ;; sort-by sort-order qryvals last-update mode - (debug:print 4 *default-log-port* "Got tests for run-id " run-id ", test-name " test-name ", item-path " item-path ": " results) - (if (and (null? results) - (not (null? tal))) - (loop (car tal)(cdr tal)) - (if (null? results) #f - (car results)))))))))) - -(define (rmt:get-run-stats) - (rmt:send-receive 'get-run-stats #f '())) - -;;====================================================================== -;; S T E P S -;;====================================================================== - -;; Getting steps is more complicated. -;; -;; If given work area -;; 1. Find the testdat.db file -;; 2. Open the testdat.db file and do the query -;; If not given the work area -;; 1. Do a remote call to get the test path -;; 2. Continue as above -;; -;;(define (rmt:get-steps-for-test run-id test-id) -;; (rmt:send-receive 'get-steps-data run-id (list test-id))) - -(define (rmt:teststep-set-status! run-id test-id teststep-name state-in status-in comment logfile) - (assert (number? run-id) "FATAL: Run id required.") - (let* ((state (items:check-valid-items "state" state-in)) - (status (items:check-valid-items "status" status-in))) - (if (or (not state)(not status)) - (debug:print 3 *default-log-port* "WARNING: Invalid " (if status "status" "state") - " value \"" (if status state-in status-in) "\", update your validvalues section in megatest.config")) - (rmt:send-receive 'teststep-set-status! run-id (list run-id test-id teststep-name state-in status-in comment logfile)))) - - -(define (rmt:delete-steps-for-test! run-id test-id) - (assert (number? run-id) "FATAL: Run id required.") - (rmt:send-receive 'delete-steps-for-test! run-id (list run-id test-id))) - -(define (rmt:get-steps-for-test run-id test-id) - (assert (number? run-id) "FATAL: Run id required.") - (rmt:send-receive 'get-steps-for-test run-id (list run-id test-id))) - -(define (rmt:get-steps-info-by-id run-id test-step-id) - (assert (number? run-id) "FATAL: Run id required.") - (rmt:send-receive 'get-steps-info-by-id #f (list run-id test-step-id))) - -;;====================================================================== -;; T E S T D A T A -;;====================================================================== - -(define (rmt:read-test-data run-id test-id categorypatt #!key (work-area #f)) - (assert (number? run-id) "FATAL: Run id required.") - (rmt:send-receive 'read-test-data run-id (list run-id test-id categorypatt))) - -(define (rmt:read-test-data-varpatt run-id test-id categorypatt varpatt #!key (work-area #f)) - (assert (number? run-id) "FATAL: Run id required.") - (rmt:send-receive 'read-test-data-varpatt run-id (list run-id test-id categorypatt varpatt))) - -(define (rmt:get-data-info-by-id run-id test-data-id) - (assert (number? run-id) "FATAL: Run id required.") - (rmt:send-receive 'get-data-info-by-id #f (list run-id test-data-id))) - -(define (rmt:testmeta-add-record testname) - (rmt:send-receive 'testmeta-add-record #f (list testname))) - -(define (rmt:testmeta-get-record testname) - (rmt:send-receive 'testmeta-get-record #f (list testname))) - -(define (rmt:testmeta-update-field test-name fld val) - (rmt:send-receive 'testmeta-update-field #f (list test-name fld val))) - -(define (rmt:test-data-rollup run-id test-id status) - (assert (number? run-id) "FATAL: Run id required.") - (rmt:send-receive 'test-data-rollup run-id (list run-id test-id status))) - -(define (rmt:csv->test-data run-id test-id csvdata) - (assert (number? run-id) "FATAL: Run id required.") - (rmt:send-receive 'csv->test-data run-id (list run-id test-id csvdata))) - -;;====================================================================== -;; T A S K S -;;====================================================================== - -(define (rmt:tasks-find-task-queue-records target run-name test-patt state-patt action-patt) - (rmt:send-receive 'find-task-queue-records #f (list target run-name test-patt state-patt action-patt))) - -(define (rmt:tasks-add action owner target runname testpatt params) - (rmt:send-receive 'tasks-add #f (list action owner target runname testpatt params))) - -(define (rmt:tasks-set-state-given-param-key param-key new-state) - (rmt:send-receive 'tasks-set-state-given-param-key #f (list param-key new-state))) - -(define (rmt:tasks-get-last target runname) - (rmt:send-receive 'tasks-get-last #f (list target runname))) - -;;====================================================================== -;; N O S Y N C D B -;;====================================================================== - -(define (rmt:no-sync-set var val) - (rmt:send-receive 'no-sync-set #f `(,var ,val))) - -(define (rmt:no-sync-get/default var default) - (rmt:send-receive 'no-sync-get/default #f `(,var ,default))) - -(define (rmt:no-sync-del! var) - (rmt:send-receive 'no-sync-del! #f `(,var))) - -(define (rmt:no-sync-get-lock keyname) - (rmt:send-receive 'no-sync-get-lock #f `(,keyname))) - -;; process registration - -(define (rmt:register-process host port pid starttime status purpose dbname mtversion) - (rmt:send-receive 'register-process #f (list host port pid starttime status purpose dbname mtversion))) - -(define (rmt:set-process-done host pid reason) - (rmt:send-receive 'set-process-done #f (list host pid reason))) - -(define (rmt:set-process-status host pid newstatus) - (rmt:send-receive 'set-process-status #f (list host pid newstatus))) - -(define (rmt:get-process-options purpose dbname) - (rmt:get-process-options 'get-process-options #f (list purpose dbname))) - -;;====================================================================== -;; A R C H I V E S -;;====================================================================== - -(define (rmt:archive-get-allocations testname itempath dneeded) - (rmt:send-receive 'archive-get-allocations #f (list testname itempath dneeded))) - -(define (rmt:archive-register-block-name bdisk-id archive-path) - (rmt:send-receive 'archive-register-block-name #f (list bdisk-id archive-path))) - -(define (rmt:archive-allocate-testsuite/area-to-block block-id testsuite-name areakey) - (rmt:send-receive 'archive-allocate-test-to-block #f (list block-id testsuite-name areakey))) - -(define (rmt:archive-register-disk bdisk-name bdisk-path df) - (rmt:send-receive 'archive-register-disk #f (list bdisk-name bdisk-path df))) - -(define (rmt:test-set-archive-block-id run-id test-id archive-block-id) - (assert (number? run-id) "FATAL: Run id required.") - (rmt:send-receive 'test-set-archive-block-id run-id (list run-id test-id archive-block-id))) - -(define (rmt:test-get-archive-block-info archive-block-id) - (rmt:send-receive 'test-get-archive-block-info #f (list archive-block-id))) - -(define (rmtmod:calc-ro-mode runremote *toppath*) - (case (rmt:transport-mode) - ((http) - (if (and runremote - (remote-ro-mode-checked runremote)) - (remote-ro-mode runremote) - (let* ((mtcfgfile (conc *toppath* "/megatest.config")) - (ro-mode (not (file-write-access? mtcfgfile)))) ;; TODO: use dbstruct or runremote to figure this out in future - (if runremote - (begin - (remote-ro-mode-set! runremote ro-mode) - (remote-ro-mode-checked-set! runremote #t) - ro-mode) - ro-mode)))) - ((tcp) - (if (and runremote - (tt-ro-mode-checked runremote)) - (tt-ro-mode runremote) - (let* ((mtcfgfile (conc *toppath* "/megatest.config")) - (ro-mode (not (file-write-access? mtcfgfile)))) ;; TODO: use dbstruct or runremote to figure this out in future - (if runremote - (begin - (tt-ro-mode-set! runremote ro-mode) - (tt-ro-mode-checked-set! runremote #t) - ro-mode) - ro-mode)))))) - -;;====================================================================== -;; Maintenance -;;====================================================================== - -(define (rmt:find-and-mark-incomplete run-id ovr-deadtime) - (let* ((cfg-deadtime (configf:lookup-number *configdat* "setup" "deadtime")) - (test-stats-update-period (configf:lookup-number *configdat* "setup" "test-stats-update-period"))) - (rmt:find-and-mark-incomplete-engine run-id ovr-deadtime cfg-deadtime test-stats-update-period) - ;;call end of eud of run detection for posthook - (launch:end-of-run-check run-id))) Index: rmtmod.scm ================================================================== --- rmtmod.scm +++ rmtmod.scm @@ -19,33 +19,51 @@ ;;====================================================================== (declare (unit rmtmod)) (declare (uses debugprint)) (declare (uses commonmod)) +(declare (uses configfmod)) (declare (uses dbfile)) ;; needed for records - -;; (declare (uses apimod)) -;; (declare (uses apimod.import)) -;; (declare (uses ulex)) - -;; (include "ulex/ulex.scm") +(declare (uses dbmod)) +(declare (uses mtmod)) +(declare (uses tcp-transportmod)) +(declare (uses apimod)) +(declare (uses servermod)) (module rmtmod * -(import scheme chicken data-structures extras matchable srfi-69) -(import (prefix sqlite3 sqlite3:) posix typed-records srfi-18) -(import commonmod dbfile debugprint) ;; (prefix commonmod cmod:)) -;; (import apimod) -;; (import (prefix ulex ulex:)) +(import scheme + chicken + data-structures + regex + extras + matchable + srfi-1 + srfi-69 + (prefix sqlite3 sqlite3:) + posix + typed-records + srfi-18) +(import commonmod + configfmod + tcp-transportmod + dbfile + dbmod + debugprint + apimod + mtmod + servermod + ) (include "db_records.scm") (defstruct alldat (areapath #f) (ulexdat #f) ) + ;; hold the send-receive proc in this parameter (define rmtmod:send-receive #f) ;; (make-parameter #f)) ;;====================================================================== @@ -219,82 +237,920 @@ (if (null? res) #f res))))) ;; (string-split (car res))))))) <== I would have preferred a single line STATE STATUS without "'s ;; (string-split (car res))))))) ;; DUNNO WHICH IS CORRECT -;; select end_time-now from -;; (select testname,item_path,event_time+run_duration as -;; end_time,strftime('%s','now') as now from tests where state in -;; ('RUNNING','REMOTEHOSTSTART','LAUNCHED')); -;; -;; NOT EASY TO MIGRATE TO db{file,mod} -;; -(define (rmt:find-and-mark-incomplete-engine run-id ovr-deadtime cfg-deadtime test-stats-update-period) - (let* ((incompleted '()) - (oldlaunched '()) - (toplevels '()) - ;; The default running-deadtime is 720 seconds = 12 minutes. - ;; "(running-deadtime-default (+ server-start-allowance (* 2 launch-monitor-period)))" = 200 + (2 * (200 + 30 + 30)) - (deadtime-trim (or ovr-deadtime cfg-deadtime)) - (server-start-allowance 200) - (server-overloaded-budget 200) - (launch-monitor-off-time (or test-stats-update-period 30)) - (launch-monitor-on-time-budget 30) - (launch-monitor-period (+ launch-monitor-off-time launch-monitor-on-time-budget server-overloaded-budget)) - (remotehoststart-deadtime-default (+ server-start-allowance server-overloaded-budget 30)) - (remotehoststart-deadtime (or deadtime-trim remotehoststart-deadtime-default)) - (running-deadtime-default (+ server-start-allowance (* 2 launch-monitor-period))) - (running-deadtime (or deadtime-trim running-deadtime-default))) ;; two minutes (30 seconds between updates, this leaves 3x grace period) - - (debug:print-info 4 *default-log-port* "running-deadtime = " running-deadtime) - (debug:print-info 4 *default-log-port* "deadtime-trim = " deadtime-trim) - - (let* ((dat (rmt:get-toplevels-and-incompletes run-id running-deadtime remotehoststart-deadtime))) - (set! oldlaunched (list-ref dat 1)) - (set! toplevels (list-ref dat 2)) - (set! incompleted (list-ref dat 0))) - - (debug:print-info 18 *default-log-port* "Found " (length oldlaunched) " old LAUNCHED items, " - (length toplevels) " old LAUNCHED toplevel tests and " - (length incompleted) " tests marked RUNNING but apparently dead.") - - ;; These are defunct tests, do not do all the overhead of set-state-status. Force them to INCOMPLETE. - ;; - ;; (db:delay-if-busy dbdat) - (let* ((min-incompleted-ids (map car incompleted)) ;; do 'em all - (all-ids (append min-incompleted-ids (map car oldlaunched)))) - (if (> (length all-ids) 0) - (begin - ;; (launch:is-test-alive "localhost" 435) - (debug:print 0 *default-log-port* "WARNING: Marking test(s); " (string-intersperse (map conc all-ids) ", ") - " as DEAD") - (for-each - (lambda (test-id) - (let* ((tinfo (rmt:get-test-info-by-id run-id test-id)) - (run-dir (db:test-get-rundir tinfo)) - (host (db:test-get-host tinfo)) - (pid (db:test-get-process_id tinfo)) - (result (rmt:get-status-from-final-status-file run-dir))) - (if (and (list? result) (> (length result) 1) (equal? "PASS" (cadr result)) (equal? "COMPLETED" (car result))) - (begin - (debug:print 0 *default-log-port* "INFO: test " test-id " actually passed, so marking PASS not DEAD") - (rmt:set-state-status-and-roll-up-items - run-id test-id 'foo "COMPLETED" "PASS" - "Test stopped responding but it has PASSED; marking it PASS in the DB.")) - (let ((is-alive (and (not (eq? pid 0)) ;; 0 is default in re-used field "attemptnum" where pid stored. - (commonmod:is-test-alive host pid)))) - (if is-alive - (debug:print 0 *default-log-port* "INFO: test " test-id " on host " host - " has a process on pid " pid ", NOT setting to DEAD.") - (begin - (debug:print 0 *default-log-port* "INFO: test " test-id - " final state/status is not COMPLETED/PASS. It is " result) - (rmt:set-state-status-and-roll-up-items - run-id test-id 'foo "COMPLETED" "DEAD" - "Test stopped responding while in RUNNING or REMOTEHOSTSTART; presumed dead."))))))) - ;; call end of eud of run detection for posthook - from merge, is it needed? - ;; (launch:end-of-run-check run-id) - all-ids) - ))))) +;;====================================================================== + +(define *send-receive-mutex* (make-mutex)) ;; should have separate mutex per run-id +(define *ttdat* #f) + +;; NB// area-dat replaced by ttdat +;; +(define (rmt:send-receive cmd run-id params #!key (attemptnum 1)(ttdat #f)) + (assert (or (not run-id) (number? run-id)) "FATAL: run-id is required to be a number or #f") + (assert *toppath* "FATAL: rmt:send-receive called with *toppath* not set.") + (let* ((areapath *toppath*) ;; TODO - resolve from dbstruct to be compatible with multiple areas + (readonly-mode (rmtmod:calc-ro-mode ttdat *toppath*)) + (testsuite (common:get-testsuite-name))) + (case (rmt:transport-mode) + ((tcp) + (let* ((start-time (current-seconds)) ;; snapshot time so all use cases get same value + (attemptnum (+ 1 attemptnum)) + (mtexe (common:find-local-megatest)) + (dbfname (conc (dbfile:run-id->dbnum run-id)".db")) + (ttdat (rmt:set-ttdat areapath ttdat)) + (conn (tt:get-conn ttdat dbfname)) + (is-main (equal? dbfname "main.db")) ;; why not (not run-id) ? + (server-start-proc (if is-main + #f + (lambda () + ;; (debug:print-info 0 *default-log-port* "starting server for dbfname: "dbfname) + (rmt:start-server ;; tt:server-process-run + areapath + testsuite ;; (dbfile:testsuite-name) + mtexe + run-id))))) + ;; here we look at ttdat, if dbfname is NOT main.db we check that a conn exists for it + ;; and if there is no conn we first send a request to the main.db server to start a + ;; server for the dbfname. + #;(if (and (not is-main)(not conn)) ;; no existing connection to non-main server, call in a start up request + (begin + (server-start-proc) + (thread-sleep! 1))) + (tt:handler ttdat cmd run-id params attemptnum readonly-mode dbfname testsuite mtexe server-start-proc))) + ((nfs) + (nfs-transport-handler cmd run-id params attemptnum areapath readonly-mode testsuite)) + (else + (debug:print-info 0 *default-log-port* "rmt:transport-mode is "(rmt:transport-mode)) + (assert #f "FATAL: rmt:transport-mode set to invalid value."))))) + +(define (nfs-transport-handler cmd run-id params attemptnum areapath readonly-mode testsuite) + (let* ((keys (common:get-fields *configdat*)) + (dbstruct (dbmod:nfs-get-dbstruct run-id keys (dbfile:db-init-proc) areapath))) + (api:dispatch-request dbstruct cmd run-id params))) + +(define (rmt:get-max-query-average run-id) + (mutex-lock! *db-stats-mutex*) + (let* ((runkey (conc "run-id=" run-id " ")) + (cmds (filter (lambda (x) + (substring-index runkey x)) + (hash-table-keys *db-stats*))) + (res (if (null? cmds) + (cons 'none 0) + (let loop ((cmd (car cmds)) + (tal (cdr cmds)) + (max-cmd (car cmds)) + (res 0)) + (let* ((cmd-dat (hash-table-ref *db-stats* cmd)) + (tot (vector-ref cmd-dat 0)) + (curravg (/ (vector-ref cmd-dat 1) (vector-ref cmd-dat 0))) ;; count is never zero by construction + (currmax (max res curravg)) + (newmax-cmd (if (> curravg res) cmd max-cmd))) + (if (null? tal) + (if (> tot 10) + (cons newmax-cmd currmax) + (cons 'none 0)) + (loop (car tal)(cdr tal) newmax-cmd currmax))))))) + (mutex-unlock! *db-stats-mutex*) + res)) + +;; =not-used= (define (rmt:open-qry-close-locally cmd run-id params #!key (remretries 5)) +;; =not-used= (let* ((qry-is-write (not (member cmd api:read-only-queries))) +;; =not-used= (db-file-path (common:make-tmpdir-name *toppath* "")) ;; 0)) +;; =not-used= (dbstructs-local (db:setup)) +;; =not-used= (read-only (not (file-write-access? db-file-path))) +;; =not-used= (start (current-milliseconds)) +;; =not-used= (resdat (if (not (and read-only qry-is-write)) +;; =not-used= (let ((v (api:execute-requests dbstructs-local (vector (symbol->string cmd) params)))) +;; =not-used= ;; (handle-exceptions ;; there has been a long history of receiving strange errors from values returned by the client when things go wrong.. +;; =not-used= ;; exn ;; This is an attempt to detect that situation and recover gracefully +;; =not-used= ;; (begin +;; =not-used= ;; (debug:print 0 *default-log-port* "ERROR: bad data from server " v " message: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn) +;; =not-used= ;; (vector #t '())) ;; should always get a vector but if something goes wrong return a dummy +;; =not-used= (if (and (vector? v) +;; =not-used= (> (vector-length v) 1)) +;; =not-used= (let ((newvec (vector (vector-ref v 0)(vector-ref v 1)))) +;; =not-used= newvec) ;; by copying the vector while inside the error handler we should force the detection of a corrupted record +;; =not-used= (vector #t '()))) ;; ) ;; we could also check that the returned types are valid +;; =not-used= (vector #t '()))) +;; =not-used= (success (vector-ref resdat 0)) +;; =not-used= (res (vector-ref resdat 1)) +;; =not-used= (duration (- (current-milliseconds) start))) +;; =not-used= (if (and read-only qry-is-write) +;; =not-used= (debug:print 0 *default-log-port* "ERROR: attempt to write to read-only database ignored. cmd=" cmd)) +;; =not-used= (if (not success) +;; =not-used= (if (> remretries 0) +;; =not-used= (begin +;; =not-used= (debug:print-error 0 *default-log-port* "local query failed. Trying again.") +;; =not-used= (thread-sleep! (/ (random 5000) 1000)) ;; some random delay +;; =not-used= (rmt:open-qry-close-locally cmd run-id params remretries: (- remretries 1))) +;; =not-used= (begin +;; =not-used= (debug:print-error 0 *default-log-port* "too many retries in rmt:open-qry-close-locally, giving up") +;; =not-used= #f)) +;; =not-used= (begin +;; =not-used= ;; (rmt:update-db-stats run-id cmd params duration) +;; =not-used= ;; mark this run as dirty if this was a write, the watchdog is responsible for syncing it +;; =not-used= (if qry-is-write +;; =not-used= (let ((start-time (current-seconds))) +;; =not-used= (mutex-lock! *db-multi-sync-mutex*) +;; =not-used= (set! *db-last-access* start-time) ;; THIS IS PROBABLY USELESS? (we are on a client) +;; =not-used= (mutex-unlock! *db-multi-sync-mutex*))))) +;; =not-used= res)) + +;;====================================================================== +;; +;; A C T U A L A P I C A L L S +;; +;;====================================================================== + +;;====================================================================== +;; S E R V E R +;;====================================================================== + +(define (rmt:kill-server run-id) + (rmt:send-receive 'kill-server run-id (list run-id))) + +(define (rmt:start-server areapath testsuite mtexe run-id) ;; run on main.db server + (rmt:send-receive 'start-server #f (list areapath testsuite mtexe run-id))) + +;;====================================================================== +;; M I S C +;;====================================================================== + +(define (rmt:login run-id) + (rmt:send-receive 'login run-id (list *toppath* megatest-version (client:get-signature)))) + +;; This login does no retries under the hood - it acts a bit like a ping. +;; Deprecated for nmsg-transport. +;; +;; (define (rmt:login-no-auto-client-setup runremote) +;; (rmt:send-receive-no-auto-client-setup runremote 'login #f (list *toppath* megatest-version (client:get-signature)))) + + +;; given a hostname, return a pair of cpu load and update time representing latest intelligence from tests running on that host +(define (rmt:get-latest-host-load hostname) + (rmt:send-receive 'get-latest-host-load #f (list hostname))) + +(define (rmt:sdb-qry qry val run-id) + ;; add caching if qry is 'getid or 'getstr + (rmt:send-receive 'sdb-qry run-id (list qry val))) + +;; NOT COMPLETED +(define (rmt:runtests user run-id testpatt params) + (rmt:send-receive 'runtests run-id testpatt)) + +(define (rmt:get-run-record-ids target run keynames ) + (rmt:send-receive 'get-run-record-ids #f (list target run keynames ))) + +(define (rmt:get-changed-record-ids since-time) + (rmt:send-receive 'get-changed-record-ids #f (list since-time)) ) + +(define (rmt:get-all-runids) + (rmt:send-receive 'get-all-run-ids #f '()) ) + +(define (rmt:get-changed-record-run-ids since-time) + (rmt:send-receive 'get-changed-record-run-ids #f (list since-time))) + +(define (rmt:get-changed-record-test-ids run-id since-time) + (rmt:send-receive 'get-changed-record-test-ids run-id (list since-time run-id))) + + + +(define (rmt:drop-all-triggers) + (rmt:send-receive 'drop-all-triggers #f '())) + +(define (rmt:create-all-triggers) + (rmt:send-receive 'create-all-triggers #f '())) + +;;====================================================================== +;; T E S T M E T A +;;====================================================================== + +(define (rmt:get-tests-tags) + (rmt:send-receive 'get-tests-tags #f '())) + +;;====================================================================== +;; K E Y S +;;====================================================================== + +;; These require run-id because the values come from the run! +;; however the query must still apply to main.db +;; +(define (rmt:get-key-val-pairs run-id) + (rmt:send-receive 'get-key-val-pairs #f (list run-id))) + +(define (rmt:get-keys) + (if *db-keys* *db-keys* + (let ((res (rmt:send-receive 'get-keys #f '()))) + (set! *db-keys* res) + res))) + +(define (rmt:get-keys-write) ;; dummy query to force server start + (let ((res (rmt:send-receive 'get-keys-write #f '()))) + (set! *db-keys* res) + res)) + +;; we don't reuse run-id's (except possibly *after* a db cleanup) so it is safe +;; to cache the resuls in a hash +;; +(define (rmt:get-key-vals run-id) + (or (hash-table-ref/default *keyvals* run-id #f) + (let ((res (rmt:send-receive 'get-key-vals #f (list run-id)))) + (hash-table-set! *keyvals* run-id res) + res))) + +(define (rmt:get-targets) + (rmt:send-receive 'get-targets #f '())) + +(define (rmt:get-target run-id) + (assert (number? run-id) "FATAL: Run id required.") + (rmt:send-receive 'get-target #f (list run-id))) + +(define (rmt:get-run-times runpatt targetpatt) + (rmt:send-receive 'get-run-times #f (list runpatt targetpatt ))) + + +;;====================================================================== +;; T E S T S +;;====================================================================== + +;; IDEA: Threadify these - they spend a lot of time waiting ... +;; +(define (rmt:get-tests-for-runs-mindata run-ids testpatt states status not-in) + (let ((multi-run-mutex (make-mutex)) + (run-id-list (if run-ids + run-ids + (rmt:get-all-run-ids))) + (result '())) + (if (null? run-id-list) + '() + (let loop ((hed (car run-id-list)) + (tal (cdr run-id-list)) + (threads '())) + (if (> (length threads) 5) + (loop hed tal (filter (lambda (th)(not (member (thread-state th) '(terminated dead)))) threads)) + (let* ((newthread (make-thread + (lambda () + (let ((res (rmt:send-receive 'get-tests-for-run-mindata hed (list hed testpatt states status not-in)))) + (if (list? res) + (begin + (mutex-lock! multi-run-mutex) + (set! result (append result res)) + (mutex-unlock! multi-run-mutex)) + (debug:print-error 0 *default-log-port* "get-tests-for-run-mindata failed for run-id " hed ", testpatt " testpatt ", states " states ", status " status ", not-in " not-in)))) + (conc "multi-run-thread for run-id " hed))) + (newthreads (cons newthread threads))) + (thread-start! newthread) + (thread-sleep! 0.05) ;; give that thread some time to start + (if (null? tal) + newthreads + (loop (car tal)(cdr tal) newthreads)))))) + result)) + +;; ;; IDEA: Threadify these - they spend a lot of time waiting ... +;; ;; +;; (define (rmt:get-tests-for-runs-mindata run-ids testpatt states status not-in) +;; (let ((run-id-list (if run-ids +;; run-ids +;; (rmt:get-all-run-ids)))) +;; (apply append (map (lambda (run-id) +;; (rmt:send-receive 'get-tests-for-run-mindata run-id (list run-ids testpatt states status not-in))) +;; run-id-list)))) + +(define (rmt:delete-test-records run-id test-id) + (assert (number? run-id) "FATAL: Run id required.") + (rmt:send-receive 'delete-test-records run-id (list run-id test-id))) + +(define (rmt:test-set-state-status run-id test-id state status msg) + (assert (number? run-id) "FATAL: Run id required.") + (rmt:send-receive 'test-set-state-status run-id (list run-id test-id state status msg))) + +(define (rmt:test-toplevel-num-items run-id test-name) + (assert (number? run-id) "FATAL: Run id required.") + (rmt:send-receive 'test-toplevel-num-items run-id (list run-id test-name))) + +;; (define (rmt:get-previous-test-run-record run-id test-name item-path) +;; (rmt:send-receive 'get-previous-test-run-record run-id (list run-id test-name item-path))) + +(define (rmt:get-matching-previous-test-run-records run-id test-name item-path) + (assert (number? run-id) "FATAL: Run id required.") + (rmt:send-receive 'get-matching-previous-test-run-records run-id (list run-id test-name item-path))) + +(define (rmt:test-get-logfile-info run-id test-name) + (assert (number? run-id) "FATAL: Run id required.") + (rmt:send-receive 'test-get-logfile-info run-id (list run-id test-name))) + +(define (rmt:test-get-records-for-index-file run-id test-name) + (assert (number? run-id) "FATAL: Run id required.") + (rmt:send-receive 'test-get-records-for-index-file run-id (list run-id test-name))) + +(define (rmt:get-testinfo-state-status run-id test-id) + (assert (number? run-id) "FATAL: Run id required.") + (rmt:send-receive 'get-testinfo-state-status run-id (list run-id test-id))) + +(define (rmt:test-set-log! run-id test-id logf) + (assert (number? run-id) "FATAL: Run id required.") + (if (string? logf)(rmt:general-call 'test-set-log run-id logf test-id))) + +(define (rmt:test-set-top-process-pid run-id test-id pid) + (assert (number? run-id) "FATAL: Run id required.") + (rmt:send-receive 'test-set-top-process-pid run-id (list run-id test-id pid))) + +(define (rmt:test-get-top-process-pid run-id test-id) + (assert (number? run-id) "FATAL: Run id required.") + (rmt:send-receive 'test-get-top-process-pid run-id (list run-id test-id))) + +(define (rmt:get-run-ids-matching-target keynames target res runname testpatt statepatt statuspatt) + (rmt:send-receive 'get-run-ids-matching-target #f (list keynames target res runname testpatt statepatt statuspatt))) + +;; NOTE: This will open and access ALL run databases. +;; +(define (rmt:test-get-paths-matching-keynames-target-new keynames target res testpatt statepatt statuspatt runname) + (let ((run-ids (rmt:get-run-ids-matching-target keynames target res runname testpatt statepatt statuspatt))) + (apply append + (map (lambda (run-id) + (rmt:send-receive 'test-get-paths-matching-keynames-target-new run-id (list run-id keynames target res testpatt statepatt statuspatt runname))) + run-ids)))) + + + +(define (rmt:get-prereqs-not-met run-id waitons ref-test-name ref-item-path #!key (mode '(normal))(itemmaps #f)) + (assert (number? run-id) "FATAL: Run id required.") + (rmt:send-receive 'get-prereqs-not-met run-id (list run-id waitons ref-test-name ref-item-path mode itemmaps))) + +(define (rmt:get-count-tests-running-for-run-id run-id) + (assert (number? run-id) "FATAL: Run id required.") + (rmt:send-receive 'get-count-tests-running-for-run-id run-id (list run-id))) + +(define (rmt:get-not-completed-cnt run-id) + (assert (number? run-id) "FATAL: Run id required.") + (rmt:send-receive 'get-not-completed-cnt run-id (list run-id))) + + +;; Statistical queries + +(define (rmt:get-count-tests-running run-id) + (assert (number? run-id) "FATAL: Run id required.") + (rmt:send-receive 'get-count-tests-running run-id (list run-id))) + +(define (rmt:get-count-tests-running-for-testname run-id testname) + (assert (number? run-id) "FATAL: Run id required.") + (rmt:send-receive 'get-count-tests-running-for-testname run-id (list run-id testname))) + +(define (rmt:get-count-tests-running-in-jobgroup run-id jobgroup) + (assert (number? run-id) "FATAL: Run id required.") + (rmt:send-receive 'get-count-tests-running-in-jobgroup run-id (list run-id jobgroup))) + +(define (rmt:set-state-status-and-roll-up-run run-id state status) + (assert (number? run-id) "FATAL: Run id required.") + (rmt:send-receive 'set-state-status-and-roll-up-run run-id (list run-id state status))) + + +(define (rmt:update-pass-fail-counts run-id test-name) + (assert (number? run-id) "FATAL: Run id required.") + (rmt:general-call 'update-pass-fail-counts run-id test-name test-name test-name)) + +(define (rmt:top-test-set-per-pf-counts run-id test-name) + (assert (number? run-id) "FATAL: Run id required.") + (rmt:send-receive 'top-test-set-per-pf-counts run-id (list run-id test-name))) + +(define (rmt:get-raw-run-stats run-id) + (assert (number? run-id) "FATAL: Run id required.") + (rmt:send-receive 'get-raw-run-stats run-id (list run-id))) + +(define (rmt:get-test-times runname target) + (rmt:send-receive 'get-test-times #f (list runname target ))) + +;;====================================================================== +;; R U N S +;;====================================================================== + +;; BUG - LOOK AT HOW THIS WORKS!!! +;; +(define (rmt:get-run-info run-id) + (assert (number? run-id) "FATAL: Run id required.") + (rmt:send-receive 'get-run-info #f (list run-id))) + +(define (rmt:get-num-runs runpatt) + (rmt:send-receive 'get-num-runs #f (list runpatt))) + +(define (rmt:get-runs-cnt-by-patt runpatt targetpatt keys) + (rmt:send-receive 'get-runs-cnt-by-patt #f (list runpatt targetpatt keys))) + +;; Use the special run-id == #f scenario here since there is no run yet +(define (rmt:register-run keyvals runname state status user contour) + (rmt:send-receive 'register-run #f (list keyvals runname state status user contour))) + +(define (rmt:get-run-name-from-id run-id) + (assert (number? run-id) "FATAL: Run id required.") + (rmt:send-receive 'get-run-name-from-id #f (list run-id))) + +(define (rmt:delete-run run-id) + (rmt:send-receive 'delete-run #f (list run-id))) + +(define (rmt:update-run-stats run-id stats) + (rmt:send-receive 'update-run-stats #f (list run-id stats))) + +(define (rmt:delete-old-deleted-test-records run-id) + (rmt:send-receive 'delete-old-deleted-test-records run-id (list run-id))) + +(define (rmt:get-runs runpatt count offset keypatts) + (rmt:send-receive 'get-runs #f (list runpatt count offset keypatts))) + +(define (rmt:simple-get-runs runpatt count offset target last-update) + (rmt:send-receive 'simple-get-runs #f (list runpatt count offset target last-update))) + +(define (rmt:get-all-run-ids) + (rmt:send-receive 'get-all-run-ids #f '())) + +(define (rmt:get-prev-run-ids run-id) + (assert (number? run-id) "FATAL: Run id required.") + (rmt:send-receive 'get-prev-run-ids #f (list run-id))) + +(define (rmt:lock/unlock-run run-id lock unlock user) + (assert (number? run-id) "FATAL: Run id required.") + (rmt:send-receive 'lock/unlock-run #f (list run-id lock unlock user))) + +;; set/get status +(define (rmt:get-run-status run-id) + (assert (number? run-id) "FATAL: Run id required.") + (rmt:send-receive 'get-run-status #f (list run-id))) + +(define (rmt:get-run-state run-id) + (assert (number? run-id) "FATAL: Run id required.") + (rmt:send-receive 'get-run-state #f (list run-id))) + +(define (rmt:get-run-state-status run-id) + (assert (number? run-id) "FATAL: Run id required.") + (rmt:send-receive 'get-run-state-status #f (list run-id))) + +(define (rmt:set-run-status run-id run-status #!key (msg #f)) + (assert (number? run-id) "FATAL: Run id required.") + (rmt:send-receive 'set-run-status #f (list run-id run-status msg))) + +(define (rmt:set-run-state-status run-id state status ) + (assert (number? run-id) "FATAL: Run id required.") + (rmt:send-receive 'set-run-state-status #f (list run-id state status))) + +(define (rmt:update-tesdata-on-repilcate-db old-lt new-lt) +(rmt:send-receive 'update-tesdata-on-repilcate-db #f (list old-lt new-lt))) + +(define (rmt:update-run-event_time run-id) + (assert (number? run-id) "FATAL: Run id required.") + (rmt:send-receive 'update-run-event_time #f (list run-id))) + +(define (rmt:get-runs-by-patt keys runnamepatt targpatt offset limit fields last-runs-update #!key (sort-order "asc")) ;; fields of #f uses default + (rmt:send-receive 'get-runs-by-patt #f (list keys runnamepatt targpatt offset limit fields last-runs-update sort-order))) + +(define (rmt:find-and-mark-incomplete run-id ovr-deadtime) + (assert (number? run-id) "FATAL: Run id required.") + ;; (if (rmt:send-receive 'have-incompletes? run-id (list run-id ovr-deadtime)) + (rmt:send-receive 'mark-incomplete run-id (list run-id ovr-deadtime))) ;; ) + +(define (rmt:get-main-run-stats run-id) + (assert (number? run-id) "FATAL: Run id required.") + (rmt:send-receive 'get-main-run-stats #f (list run-id))) + +(define (rmt:get-var varname) + (rmt:send-receive 'get-var #f (list varname))) + +(define (rmt:del-var varname) + (rmt:send-receive 'del-var #f (list varname))) + +(define (rmt:set-var varname value) + (rmt:send-receive 'set-var #f (list varname value))) + +(define (rmt:inc-var varname) + (rmt:send-receive 'inc-var #f (list varname))) + +(define (rmt:dec-var varname) + (rmt:send-receive 'dec-var #f (list varname))) + +(define (rmt:add-var varname value) + (rmt:send-receive 'add-var #f (list varname value))) + +;;====================================================================== +;; M U L T I R U N Q U E R I E S +;;====================================================================== + +;; Need to move this to multi-run section and make associated changes +(define (rmt:find-and-mark-incomplete-all-runs #!key (ovr-deadtime #f)) + (let ((run-ids (rmt:get-all-run-ids))) + (for-each (lambda (run-id) + (rmt:find-and-mark-incomplete run-id ovr-deadtime)) + run-ids))) + +;; get the previous record for when this test was run where all keys match but runname +;; returns #f if no such test found, returns a single test record if found +;; +;; Run this at the client end since we have to connect to multiple run-id dbs +;; +(define (rmt:get-previous-test-run-record run-id test-name item-path) + (let* ((keyvals (rmt:get-key-val-pairs run-id)) + (keys (rmt:get-keys)) + (selstr (string-intersperse keys ",")) + (qrystr (string-intersperse (map (lambda (x)(conc x "=?")) keys) " AND "))) + (if (not keyvals) + #f + (let ((prev-run-ids (rmt:get-prev-run-ids run-id))) + ;; for each run starting with the most recent look to see if there is a matching test + ;; if found then return that matching test record + (debug:print 4 *default-log-port* "selstr: " selstr ", qrystr: " qrystr ", keyvals: " keyvals ", previous run ids found: " prev-run-ids) + (if (null? prev-run-ids) #f + (let loop ((hed (car prev-run-ids)) + (tal (cdr prev-run-ids))) + (let ((results (rmt:get-tests-for-run hed (conc test-name "/" item-path) '() '() ;; run-id testpatt states statuses + #f #f #f ;; offset limit not-in hide/not-hide + #f #f #f #f 'normal))) ;; sort-by sort-order qryvals last-update mode + (debug:print 4 *default-log-port* "Got tests for run-id " run-id ", test-name " test-name ", item-path " item-path ": " results) + (if (and (null? results) + (not (null? tal))) + (loop (car tal)(cdr tal)) + (if (null? results) #f + (car results)))))))))) + +(define (rmt:get-run-stats) + (rmt:send-receive 'get-run-stats #f '())) + +;;====================================================================== +;; S T E P S +;;====================================================================== + +;; Getting steps is more complicated. +;; +;; If given work area +;; 1. Find the testdat.db file +;; 2. Open the testdat.db file and do the query +;; If not given the work area +;; 1. Do a remote call to get the test path +;; 2. Continue as above +;; +;;(define (rmt:get-steps-for-test run-id test-id) +;; (rmt:send-receive 'get-steps-data run-id (list test-id))) + +(define (rmt:teststep-set-status! run-id test-id teststep-name state-in status-in comment logfile) + (assert (number? run-id) "FATAL: Run id required.") + (let* ((state (items:check-valid-items "state" state-in)) + (status (items:check-valid-items "status" status-in))) + (if (or (not state)(not status)) + (debug:print 3 *default-log-port* "WARNING: Invalid " (if status "status" "state") + " value \"" (if status state-in status-in) "\", update your validvalues section in megatest.config")) + (rmt:send-receive 'teststep-set-status! run-id (list run-id test-id teststep-name state-in status-in comment logfile)))) + + +(define (rmt:delete-steps-for-test! run-id test-id) + (assert (number? run-id) "FATAL: Run id required.") + (rmt:send-receive 'delete-steps-for-test! run-id (list run-id test-id))) + +(define (rmt:get-steps-for-test run-id test-id) + (assert (number? run-id) "FATAL: Run id required.") + (rmt:send-receive 'get-steps-for-test run-id (list run-id test-id))) + +(define (rmt:get-steps-info-by-id run-id test-step-id) + (assert (number? run-id) "FATAL: Run id required.") + (rmt:send-receive 'get-steps-info-by-id #f (list run-id test-step-id))) + +;;====================================================================== +;; T E S T D A T A +;;====================================================================== + +(define (rmt:read-test-data run-id test-id categorypatt #!key (work-area #f)) + (assert (number? run-id) "FATAL: Run id required.") + (rmt:send-receive 'read-test-data run-id (list run-id test-id categorypatt))) + +(define (rmt:read-test-data-varpatt run-id test-id categorypatt varpatt #!key (work-area #f)) + (assert (number? run-id) "FATAL: Run id required.") + (rmt:send-receive 'read-test-data-varpatt run-id (list run-id test-id categorypatt varpatt))) + +(define (rmt:get-data-info-by-id run-id test-data-id) + (assert (number? run-id) "FATAL: Run id required.") + (rmt:send-receive 'get-data-info-by-id #f (list run-id test-data-id))) + +(define (rmt:testmeta-add-record testname) + (rmt:send-receive 'testmeta-add-record #f (list testname))) + +(define (rmt:testmeta-get-record testname) + (rmt:send-receive 'testmeta-get-record #f (list testname))) + +(define (rmt:testmeta-update-field test-name fld val) + (rmt:send-receive 'testmeta-update-field #f (list test-name fld val))) + +(define (rmt:test-data-rollup run-id test-id status) + (assert (number? run-id) "FATAL: Run id required.") + (rmt:send-receive 'test-data-rollup run-id (list run-id test-id status))) + +(define (rmt:csv->test-data run-id test-id csvdata) + (assert (number? run-id) "FATAL: Run id required.") + (rmt:send-receive 'csv->test-data run-id (list run-id test-id csvdata))) + +;;====================================================================== +;; T A S K S +;;====================================================================== + +(define (rmt:tasks-find-task-queue-records target run-name test-patt state-patt action-patt) + (rmt:send-receive 'find-task-queue-records #f (list target run-name test-patt state-patt action-patt))) + +(define (rmt:tasks-add action owner target runname testpatt params) + (rmt:send-receive 'tasks-add #f (list action owner target runname testpatt params))) + +(define (rmt:tasks-set-state-given-param-key param-key new-state) + (rmt:send-receive 'tasks-set-state-given-param-key #f (list param-key new-state))) + +(define (rmt:tasks-get-last target runname) + (rmt:send-receive 'tasks-get-last #f (list target runname))) + +;;====================================================================== +;; N O S Y N C D B +;;====================================================================== + +(define (rmt:no-sync-set var val) + (rmt:send-receive 'no-sync-set #f `(,var ,val))) + +(define (rmt:no-sync-get/default var default) + (rmt:send-receive 'no-sync-get/default #f `(,var ,default))) + +(define (rmt:no-sync-del! var) + (rmt:send-receive 'no-sync-del! #f `(,var))) + +(define (rmt:no-sync-get-lock keyname) + (rmt:send-receive 'no-sync-get-lock #f `(,keyname))) + +;; process registration + +(define (rmt:register-process host port pid starttime status purpose dbname mtversion) + (rmt:send-receive 'register-process #f (list host port pid starttime status purpose dbname mtversion))) + +(define (rmt:set-process-done host pid reason) + (rmt:send-receive 'set-process-done #f (list host pid reason))) + +(define (rmt:set-process-status host pid newstatus) + (rmt:send-receive 'set-process-status #f (list host pid newstatus))) + +(define (rmt:get-process-options purpose dbname) + (rmt:get-process-options 'get-process-options #f (list purpose dbname))) + +;;====================================================================== +;; A R C H I V E S +;;====================================================================== + +(define (rmt:archive-get-allocations testname itempath dneeded) + (rmt:send-receive 'archive-get-allocations #f (list testname itempath dneeded))) + +(define (rmt:archive-register-block-name bdisk-id archive-path) + (rmt:send-receive 'archive-register-block-name #f (list bdisk-id archive-path))) + +(define (rmt:archive-allocate-testsuite/area-to-block block-id testsuite-name areakey) + (rmt:send-receive 'archive-allocate-test-to-block #f (list block-id testsuite-name areakey))) + +(define (rmt:archive-register-disk bdisk-name bdisk-path df) + (rmt:send-receive 'archive-register-disk #f (list bdisk-name bdisk-path df))) + +(define (rmt:test-set-archive-block-id run-id test-id archive-block-id) + (assert (number? run-id) "FATAL: Run id required.") + (rmt:send-receive 'test-set-archive-block-id run-id (list run-id test-id archive-block-id))) + +(define (rmt:test-get-archive-block-info archive-block-id) + (rmt:send-receive 'test-get-archive-block-info #f (list archive-block-id))) + +(define (rmtmod:calc-ro-mode runremote *toppath*) + (case (rmt:transport-mode) + ((http) + (if (and runremote + (remote-ro-mode-checked runremote)) + (remote-ro-mode runremote) + (let* ((mtcfgfile (conc *toppath* "/megatest.config")) + (ro-mode (not (file-write-access? mtcfgfile)))) ;; TODO: use dbstruct or runremote to figure this out in future + (if runremote + (begin + (remote-ro-mode-set! runremote ro-mode) + (remote-ro-mode-checked-set! runremote #t) + ro-mode) + ro-mode)))) + ((tcp) + (if (and runremote + (tt-ro-mode-checked runremote)) + (tt-ro-mode runremote) + (let* ((mtcfgfile (conc *toppath* "/megatest.config")) + (ro-mode (not (file-write-access? mtcfgfile)))) ;; TODO: use dbstruct or runremote to figure this out in future + (if runremote + (begin + (tt-ro-mode-set! runremote ro-mode) + (tt-ro-mode-checked-set! runremote #t) + ro-mode) + ro-mode)))))) + + +;;====================================================================== +;; S U P P O R T F U N C T I O N S +;;====================================================================== + +(define (rmt:on-homehost? #!optional (runremote-in #f)) + (let* ((runremote (or runremote-in *runremote*))) + (if runremote + (let* ((hh-dat (remote-hh-dat runremote))) + (if (pair? hh-dat) + (cdr hh-dat) + (begin + (debug:print-info 0 *default-log-port* "hh-dat="hh-dat) + #f))) + #f))) ;; not true strickly speaking, might be runremote was not yet initialized. + +(define (make-and-init-remote areapath) + (case (rmt:transport-mode) + ((http)(make-remote)) + ((tcp) (tt:make-remote areapath)) + (else #f))) + +;; how to make area-dat +(define (rmt:set-ttdat areapath ttdat) + (if ttdat + ttdat + (if *ttdat* + *ttdat* + (begin + (debug:print-info 2 *default-log-port* "rmt:set-ttdat: Initialize new ttdat") + (let* ((newremote (make-and-init-remote areapath))) + (set! *ttdat* newremote) + newremote + ) + ) + ) + ) +) + +;;====================================================================== +;; from metadat lookup MEGATEST_VERSION +;; +(define (common:get-last-run-version) ;; RADT => How does this work in send-receive function??; assume it is the value saved in some DB + (rmt:get-var "MEGATEST_VERSION")) + +(define (common:get-last-run-version-number) + (string->number + (substring (common:get-last-run-version) 0 6))) + +(define (common:set-last-run-version) + (rmt:set-var "MEGATEST_VERSION" (common:version-signature))) + +;;====================================================================== +;; postive number if megatest version > db version +;; negative number if megatest version < db version +(define (common:version-db-delta) + (- megatest-version (common:get-last-run-version-number))) + +(define (common:version-changed?) + (not (equal? (common:get-last-run-version) + (common:version-signature)))) + + +;; From 1.70 to 1.80, db's are compatible. + +(define (common:api-changed?) + (let* ((megatest-major-version (substring (->string megatest-version) 0 4)) + (run-major-version (substring (conc (common:get-last-run-version)) 0 4))) + (and (not (equal? megatest-major-version "1.80")) + (not (equal? megatest-major-version run-major-version))))) + +;;====================================================================== +;; Move me elsewhere ... +;; RADT => Why do we meed the version check here, this is called only if version misma +;; +(define (common:cleanup-db dbstruct #!key (full #f)) + (case (rmt:transport-mode) + ((http) + (apply db:multi-db-sync + dbstruct + 'schema + 'killservers + 'adj-target + 'new2old + '(dejunk) + )) + ((tcp nfs) + (apply db:multi-db-sync + dbstruct + 'schema + 'killservers + 'adj-target + 'new2old + '(dejunk) + ))) + (if (common:api-changed?) + (common:set-last-run-version))) + + +(define (mt:lazy-get-prereqs-not-met run-id waitons ref-item-path #!key (mode '(normal))(itemmaps #f) ) + (let* ((key (list run-id waitons ref-item-path mode)) + (res (hash-table-ref/default *pre-reqs-met-cache* key #f)) + (useres (let ((last-time (if (vector? res) (vector-ref res 0) #f))) + (if last-time + (< (current-seconds)(+ last-time 5)) + #f)))) + (if useres + (let ((result (vector-ref res 1))) + (debug:print 4 *default-log-port* "Using lazy value res: " result) + result) + (let ((newres (rmt:get-prereqs-not-met run-id waitons ref-item-path mode: mode itemmaps: itemmaps))) + (hash-table-set! *pre-reqs-met-cache* key (vector (current-seconds) newres)) + newres)))) + +;;====================================================================== +;; T E S T S +;;====================================================================== + +(define (mt:get-tests-for-run run-id testpatt states status #!key (not-in #t) (sort-by 'event_time) (sort-order "ASC") (qryvals #f)(last-update #f)) + (let loop ((testsdat (rmt:get-tests-for-run run-id testpatt states status 0 500 not-in sort-by sort-order qryvals last-update 'normal)) + (res '()) + (offset 0) + (limit 500)) + (let* ((full-list (append res testsdat)) + (have-more (eq? (length testsdat) limit))) + (if have-more + (let ((new-offset (+ offset limit))) + (debug:print-info 4 *default-log-port* "More than " limit " tests, have " (length full-list) " tests so far.") + (loop (rmt:get-tests-for-run run-id testpatt states status new-offset limit not-in sort-by sort-order qryvals last-update 'normal) + full-list + new-offset + limit)) + full-list)))) + + +;; runs:get-runs-by-patt +;; get runs by list of criteria +;; register a test run with the db +;; +;; Use: (db-get-value-by-header (db:get-header runinfo)(db:get-rows runinfo)) +;; to extract info from the structure returned +;; +(define (mt:get-runs-by-patt keys runnamepatt targpatt) + (let loop ((runsdat (rmt:get-runs-by-patt keys runnamepatt targpatt 0 500 #f 0)) + (res '()) + (offset 0) + (limit 500)) + ;; (print "runsdat: " runsdat) + (let* ((header (vector-ref runsdat 0)) + (runslst (vector-ref runsdat 1)) + (full-list (append res runslst)) + (have-more (eq? (length runslst) limit))) + ;; (debug:print 0 *default-log-port* "header: " header " runslst: " runslst " have-more: " have-more) + (if have-more + (let ((new-offset (+ offset limit)) + (next-batch (rmt:get-runs-by-patt keys runnamepatt targpatt offset limit #f 0))) + (debug:print-info 4 *default-log-port* "More than " limit " runs, have " (length full-list) " runs so far.") + (debug:print-info 0 *default-log-port* "next-batch: " next-batch) + (loop next-batch + full-list + new-offset + limit)) + (vector header full-list))))) + +;;;====================================================================== +;; S T A T E A N D S T A T U S F O R T E S T S +;;====================================================================== + +;; speed up for common cases with a little logic +(define (mt:test-set-state-status-by-id run-id test-id newstate newstatus newcomment) + (if (not (and run-id test-id)) + (begin + (debug:print-error 0 *default-log-port* "bad data handed to mt:test-set-state-status-by-id, run-id=" run-id ", test-id=" test-id ", newstate=" newstate) + (print-call-chain (current-error-port)) + #f) + (begin + ;; cond + ;; ((and newstate newstatus newcomment) + ;; (rmt:general-call 'state-status-msg run-id newstate newstatus newcomment test-id)) + ;; ((and newstate newstatus) + ;; (rmt:general-call 'state-status run-id newstate newstatus test-id)) + ;; (else + ;; (if newstate (rmt:general-call 'set-test-state run-id newstate test-id)) + ;; (if newstatus (rmt:general-call 'set-test-status run-id newstatus test-id)) + ;; (if newcomment (rmt:general-call 'set-test-comment run-id newcomment test-id)))) + (rmt:set-state-status-and-roll-up-items run-id test-id #f newstate newstatus newcomment) + ;; (mt:process-triggers run-id test-id newstate newstatus) + #t))) + +(define (mt:test-set-state-status-by-testname-unless-completed run-id test-name item-path new-state new-status new-comment) + (let ((test-id (rmt:get-test-id run-id test-name item-path))) + (mt:test-set-state-status-by-id-unless-completed run-id test-id new-state new-status new-comment))) + +(define (mt:test-set-state-status-by-id-unless-completed run-id test-id newstate newstatus newcomment) + (let* ((test-vec (rmt:get-testinfo-state-status run-id test-id)) + (state (vector-ref test-vec 3))) + (if (equal? state "COMPLETED") + #t + (rmt:set-state-status-and-roll-up-items run-id test-id #f newstate newstatus newcomment)))) + + + +(define (mt:test-set-state-status-by-testname run-id test-name item-path new-state new-status new-comment) + ;(let ((test-id (rmt:get-test-id run-id test-name item-path))) + (rmt:set-state-status-and-roll-up-items run-id test-name item-path new-state new-status new-comment) + ;; (mt:process-triggers run-id test-id new-state new-status) + #t);) + ;;(mt:test-set-state-status-by-id run-id test-id new-state new-status new-comment))) + +(define (tests:test-set-toplog! run-id test-name logf) + (rmt:general-call 'tests:test-set-toplog run-id logf run-id test-name)) + ) Index: runconfig.scm ================================================================== --- runconfig.scm +++ runconfig.scm @@ -30,140 +30,5 @@ (import commonmod debugprint) (include "common_records.scm") -(define (runconfig:read fname target environ-patt) - (let ((ht (make-hash-table))) - (if target (hash-table-set! ht target '())) - (read-config fname ht #t environ-patt: environ-patt sections: (if target (list "default" target) #f)))) - -;; NB// to process a runconfig ensure to use environ-patt with target! -;; -(define (setup-env-defaults fname run-id already-seen keyvals #!key (environ-patt #f)(change-env #t)) - (let* ((keys (map car keyvals)) - (thekey (if keyvals - (string-intersperse (map (lambda (x)(if x x "-na-")) (map cadr keyvals)) "/") - (or (common:args-get-target) - (get-environment-variable "MT_TARGET") - (begin - (debug:print-error 0 *default-log-port* "setup-env-defaults called with no run-id or -target or -reqtarg") - "nothing matches this I hope")))) - ;; Why was system disallowed in the reading of the runconfigs file? - ;; NOTE: Should be setting env vars based on (target|default) - (confdat (runconfig:read fname thekey environ-patt)) - (whatfound (make-hash-table)) - (finaldat (make-hash-table)) - (sections (list "default" thekey))) - (if (not *target*)(set! *target* thekey)) ;; may save a db access or two but repeats db:get-target code - (debug:print 4 *default-log-port* "Using key=\"" thekey "\"") - - (if change-env - (for-each ;; NB// This can be simplified with new content of keyvals having all that is needed. - (lambda (keyval) - (safe-setenv (car keyval)(cadr keyval))) - keyvals)) - - (for-each - (lambda (section) - (let ((section-dat (hash-table-ref/default confdat section #f))) - (if section-dat - (for-each - (lambda (envvar) - (let ((val (cadr (assoc envvar section-dat)))) - (hash-table-set! whatfound section (+ (hash-table-ref/default whatfound section 0) 1)) - (if (and (string? envvar) - (string? val) - change-env) - (safe-setenv envvar val)) - (hash-table-set! finaldat envvar val))) - (map car section-dat))))) - sections) - (if already-seen - (begin - (debug:print 2 *default-log-port* "Key settings found in runconfigs.config:") - (for-each (lambda (fullkey) - (debug:print 2 *default-log-port* (format #f "~20a ~a\n" fullkey (hash-table-ref/default whatfound fullkey 0)))) - sections) - (debug:print 2 *default-log-port* "---") - (set! *already-seen-runconfig-info* #t))) - ;; finaldat ;; was returning this "finaldat" which would be good but conflicts with other uses - confdat - )) - -(define (set-run-config-vars run-id keyvals targ-from-db) - (push-directory *toppath*) ;; the push/pop doesn't appear to do anything ... - (let ((runconfigf (conc *toppath* "/runconfigs.config")) - (targ (or (common:args-get-target) - targ-from-db - (get-environment-variable "MT_TARGET")))) - (pop-directory) - (if (common:file-exists? runconfigf) - (setup-env-defaults runconfigf run-id #t keyvals - environ-patt: (conc "(default" - (if targ - (conc "|" targ ")") - ")"))) - (debug:print 0 *default-log-port* "WARNING: You do not have a run config file: " runconfigf)))) - -;; given (a (b c) d) return ((a b d)(a c d)) -;; NOTE: this feels like it has been done before - perhaps with items handling? -;; -(define (runconfig:combinations inlst) - (let loop ((hed (car inlst)) - (tal (cdr inlst)) - (res '())) - ;; (print "res: " res " hed: " hed) - (if (list? hed) - (let ((newres (if (null? res) ;; first time through convert incoming items to list of items - (map list hed) - (apply append - (map (lambda (r) ;; iterate over items in res - (map (lambda (h) ;; iterate over items in hed - (append r (list h))) - hed)) - res))))) - ;; (print "newres1: " newres) - (if (null? tal) - newres - (loop (car tal)(cdr tal) newres))) - (let ((newres (if (null? res) - (list (list hed)) - (map (lambda (r) - (append r (list hed))) - res)))) - ;; (print "newres2: " newres) - (if (null? tal) - newres - (loop (car tal)(cdr tal) newres)))))) - -;; multi-part expand -;; Given a/b,c,d/e,f return a/b/e a/b/f a/c/e a/c/f a/d/e a/d/f -;; -(define (runconfig:expand target) - (let* ((parts (map (lambda (x) - (string-split x ",")) - (string-split target "/")))) - (map (lambda (x) - (string-intersperse x "/")) - (runconfig:combinations parts)))) - -;; multi-target expansion -;; a/b/c/x,y,z a/b/d/x,y => a/b/c/x a/b/c/y a/b/c/z a/b/d/x a/b/d/y -;; -(define (runconfig:expand-target target-strs) - (delete-duplicates - (apply append (map runconfig:expand (string-split target-strs " "))))) - -#| - (if (null? target-strs) - '() - (let loop ((hed (car target-strs)) - (tal (cdr target-strs)) - (res '())) - ;; first break all parts into individual target patterns - (if (string-index hed " ") ;; this is a multi-target target - (let ((newres (append (string-split hed " ") res))) - (runconfig:expand-target newres)) - (if (string-index hed ",") ;; this is a multi-target where one or more parts are comma separated - -|# Index: runs.scm ================================================================== --- runs.scm +++ runs.scm @@ -16,25 +16,31 @@ ;; along with Megatest. If not, see . ;; strftime('%m/%d/%Y %H:%M:%S','now','localtime') (declare (unit runs)) -(declare (uses db)) -(declare (uses common)) (declare (uses debugprint)) (declare (uses commonmod)) (declare (uses processmod)) (declare (uses configfmod)) +(declare (uses mtargs)) +(declare (uses rmtmod)) +(declare (uses dbfile)) +(declare (uses dbmod)) +(declare (uses megatestmod)) +(declare (uses mtmod)) +(declare (uses tasksmod)) +(declare (uses servermod)) + +(declare (uses db)) +(declare (uses common)) (declare (uses items)) (declare (uses runconfig)) (declare (uses tests)) (declare (uses server)) (declare (uses mt)) (declare (uses archive)) -(declare (uses mtargs)) -(declare (uses rmtmod)) -(declare (uses dbfile)) (use (prefix sqlite3 sqlite3:) srfi-1 posix regex regex-case srfi-69 (srfi 18) posix-extras directory-utils pathname-expand typed-records format sxml-serializer sxml-modifications matchable) @@ -52,3077 +58,13 @@ processmod configfmod debugprint rmtmod dbfile - (prefix mtargs args:)) - -;; use this struct to facilitate refactoring -;; - -(defstruct runs:dat - reglen regfull - runname max-concurrent-jobs run-id - test-patts required-tests test-registry - registry-mutex flags keyvals run-info all-tests-registry - ;; stores results from last runs:can-run-more-tests - (can-run-more-tests #f) ;; (list can-run-more-flag num-running num-running-in-jobgroup max-concurrent-jobs job-group-limit) - ((can-run-more-tests-count 0) : fixnum) - (last-fuel-check 0) ;; time when we last checked fuel - (beginning-of-time (current-seconds)) - (load-mgmt-function #f) - (wait-for-jobs-function #f) - (last-load-check-time 0) - (last-jobs-check-time 0) - ) - -(defstruct runs:testdat - hed tal reg reruns test-record - test-name item-path jobgroup - waitons testmode newtal itemmaps prereqs-not-met) - -;; look in the $MT_RUN_AREA_HOME/.softlocks directory for key-host-pid.softlock files -;; - remove any that are over 3600 seconds old -;; - if there are any that are younger than 10 seconds -;; * sleep 10 seconds -;; * touch my key-host-pid.softlock file -;; * return -;; - if there are no files younger than 10 seconds -;; * touch my key-host-pid.softlock file -;; * return -;; -(define (runs:wait-on-softlock rdat key) - (if (not (and *toppath* (file-exists? *toppath*))) ;; don't seem to have toppath yet - (debug:print-info 0 *default-log-port* "Can't create softlocks - don't see MTRAH yet.") - (let* ((softlocks-dir (conc *toppath* "/.softlocks"))) - (if (not (file-exists? softlocks-dir)) - (create-directory softlocks-dir #t)) - (let* ((my-lock-file (conc softlocks-dir "/" key "-" (get-host-name) "-" (current-process-id) ".softlock")) - (lock-files (filter (lambda (x) - (not (equal? x my-lock-file))) - (glob (conc softlocks-dir "/" key "*.softlock")))) - (fresh-locks (any (lambda (x) ;; do we have any locks younger than 10 seconds - (let* ((mod-time (file-modification-time x)) - (age (- (current-seconds) mod-time))) - (cond - ((> age 3600) ;; too old to keep, remove it - (delete-file* x) #f) - ((< age 10) #t) - (else #f)))) - lock-files))) - (if fresh-locks - (begin - (if (runs:lownoise "runners-softlock-wait" 360) - (debug:print-info 0 *default-log-port* "Other runners in flight, giving up some time...")) - (thread-sleep! 2)) - (begin - (if (runs:lownoise "runners-softlock-nowait" 360) - (debug:print-info 0 *default-log-port* "No runners in flight, updating softlock")) - (let* ((ouf (open-output-file my-lock-file))) - (with-output-to-port ouf (lambda ()(print (current-seconds)))) - (close-output-port ouf)))) - (runs:dat-last-fuel-check-set! rdat (current-seconds)))))) - -;; Fourth try, do accounting through time.... -;; -(define (runs:parallel-runners-mgmt rdat) - (let ((time-to-check (configf:lookup-number *configdat* "runners" "time-to-check" default: 10)) ;; 28 - (time-to-wait (configf:lookup-number *configdat* "runners" "time-to-wait" default: 30)) - (now-time (current-seconds))) - (if (> (- now-time (runs:dat-last-fuel-check rdat)) time-to-check) ;; time to check - (runs:wait-on-softlock rdat "runners")))) - -;; To test parallel-runners management start a repl: -;; megatest -repl -;; then run: -;; (runs:test-parallel-runners 60) -;; -(define (runs:test-parallel-runners duration #!optional (proc #f)) - (let* ((rdat (make-runs:dat)) - (rtime 0) - (startt (current-seconds)) - (endt (+ startt duration))) - ((or proc runs:parallel-runners-mgmt) rdat) - (let loop () - (let* ((wstart (current-seconds))) - (if (< wstart endt) - (let* ((work-time (random 10))) - #;(debug:print-info 0 *default-log-port* "working for " work-time - " seconds. Total work: " rtime ", elapsed time: " (- wstart startt)) - (thread-sleep! work-time) - (set! rtime (+ rtime work-time)) - ((or proc runs:parallel-runners-mgmt) rdat) - (loop))))) - (let* ((done-time (current-seconds))) - (debug:print-info 0 *default-log-port* "DONE: rtime=" rtime ", elapsed time=" (- done-time startt) - ", ratio=" (/ rtime (- done-time startt)))))) - -(define (runs:get-mt-env-alist run-id runname target testname itempath) - ;;(bb-check-path msg: "runs:set-megatest-env-vars entry") - `(("MT_TEST_NAME" . ,testname) - - ("MT_ITEMPATH" . ,itempath) - - ("MT_TARGET" . ,target) - - ("MT_RUNNAME" . ,runname) - - ("MT_RUN_AREA_HOME" . ,*toppath*) - - ,@(let* ((link-tree (common:get-linktree))) ;; (configf:lookup *configdat* "setup" "linktree"))) - (if link-tree - (list (cons "MT_LINKTREE" link-tree) - - (cons "MT_TEST_RUN_DIR" - (conc link-tree "/" target "/" runname "/" testname - (if (and (string? itempath) (not (equal? itempath ""))) - (conc "/" itempath) - ""))) - ) - '())) - - ,@(map - (lambda (key) - (cons (car key) (cadr key))) - (keys:target->keyval (rmt:get-keys) target)) - - ,@(map (lambda (var) - (let ((val (configf:lookup *configdat* "env-override" var))) - (cons var val))) - (configf:section-vars *configdat* "env-override")))) - -;; set up needed environment variables given a run-id and optionally a target, itempath etc. -;; -(define (runs:set-megatest-env-vars run-id #!key (inkeys #f)(inrunname #f)(inkeyvals #f)(intarget #f)(testname #f)(itempath #f)) - ;;(bb-check-path msg: "runs:set-megatest-env-vars entry") - (let* ((target (or intarget - (common:args-get-target) - (get-environment-variable "MT_TARGET"))) - (keys (if inkeys inkeys (rmt:get-keys))) - (keyvals (if inkeyvals inkeyvals (keys:target->keyval keys target))) - (vals (hash-table-ref/default *env-vars-by-run-id* run-id #f)) - (link-tree (common:get-linktree))) ;; (configf:lookup *configdat* "setup" "linktree"))) - (if testname (setenv "MT_TEST_NAME" testname)) - (if itempath (setenv "MT_ITEMPATH" itempath)) - - ;; get the info from the db and put it in the cache - (if link-tree - (setenv "MT_LINKTREE" link-tree) - (debug:print-error 0 *default-log-port* "linktree not set, should be set in megatest.config in [setup] section.")) - (if (not vals) - (let ((ht (make-hash-table))) - (hash-table-set! *env-vars-by-run-id* run-id ht) - (set! vals ht) - (for-each - (lambda (key) - (hash-table-set! vals (car key) (cadr key))) - keyvals))) - ;; from the cached data set the vars - - (hash-table-for-each - vals - (lambda (key val) - (debug:print 2 *default-log-port* "setenv " key " " val) - (safe-setenv key val))) - ;;(bb-check-path msg: "runs:set-megatest-env-vars block 1") - ;;(BB> "*env-vars-by-run-id*/runid("run-id" vals="(hash-table->alist vals)) - - (if (not (get-environment-variable "MT_TARGET"))(setenv "MT_TARGET" target)) - ;; we had a case where there was an exception generated by the hash-table-ref - ;; due to *configdat* being #f Adding a handle and exit - (let fatal-loop ((count 0)) - (handle-exceptions - exn - (let ((call-chain (get-call-chain)) - (msg ((condition-property-accessor 'exn 'message) exn))) - (if (< count 5) - (begin ;; this call is colliding, do some crude stuff to fix it. - (debug:print 0 *default-log-port* "ERROR: *configdat* was inaccessible! This should never happen. Retry #" count - ", exn=" exn) - (launch:setup force-reread: #t) - (fatal-loop (+ count 1))) - (begin - (debug:print 0 *default-log-port* "FATAL: *configdat* was inaccessible! This should never happen. Retried " count - " times. Message: " msg) - (debug:print 0 *default-log-port* "Call chain:") - (with-output-to-port *default-log-port* - (lambda () - (print "*configdat* is >>"*configdat*"<<") - (pp *configdat*) - (pp call-chain))) - - (exit 1)))) - ;;(bb-check-path msg: "runs:set-megatest-env-vars block 1.5") - (when (or (not *configdat*) (not (hash-table? *configdat*))) - (debug:print 0 *default-log-port* "WARNING: *configdat* was inaccessible! This should never happen. Brute force reread.") - ;;(BB> "ERROR: *configdat* was inaccessible! This should never happen. Brute force reread.") - (thread-sleep! 2) ;; assuming nfs lag. - (launch:setup force-reread: #t)) - (alist->env-vars (hash-table-ref/default *configdat* "env-override" '())))) ;;;; environment is tainted HERE in this let block. - ;;(bb-check-path msg: "runs:set-megatest-env-vars block 2") - ;; Lets use this as an opportunity to put MT_RUNNAME in the environment - (let ((runname (if inrunname inrunname (rmt:get-run-name-from-id run-id)))) - (if runname - (setenv "MT_RUNNAME" runname) - (debug:print-error 0 *default-log-port* "no value for runname for id " run-id))) - (setenv "MT_RUN_AREA_HOME" *toppath*) - ;; if a testname and itempath are available set the remaining appropriate variables - (if testname (setenv "MT_TEST_NAME" testname)) - (if itempath (setenv "MT_ITEMPATH" itempath)) - ;;(bb-check-path msg: "runs:set-megatest-env-vars block 3") - (if (and testname link-tree) - (setenv "MT_TEST_RUN_DIR" (conc (getenv "MT_LINKTREE") "/" - (getenv "MT_TARGET") "/" - (getenv "MT_RUNNAME") "/" - (getenv "MT_TEST_NAME") - (if (and itempath - (not (equal? itempath ""))) - (conc "/" itempath) - "")))))) - -(define (set-item-env-vars itemdat) - (for-each (lambda (item) - (debug:print 2 *default-log-port* "setenv " (car item) " " (cadr item)) - (setenv (car item) (cadr item))) - itemdat)) - -;; Every time can-run-more-tests is called increment the delay -;; -;; NOTE: We run this server-side!! Do not use this global except in the runs:can-run-more-tests routine -;; -(define *last-num-running-tests* 0) -;; (define *runs:can-run-more-tests-count* 0) -(define (runs:shrink-can-run-more-tests-count runsdat) - (runs:dat-can-run-more-tests-count-set! runsdat 0)) - -(define (runs:inc-can-run-more-tests-count runsdat) - (runs:dat-can-run-more-tests-count-set! - runsdat - (+ (runs:dat-can-run-more-tests-count runsdat) 1))) - -;; (set! *runs:can-run-more-tests-count* 0)) ;; (/ *runs:can-run-more-tests-count* 2))) - -;; Temporary globals. Move these into the logic or into common -;; -(define *seen-cant-run-tests* (make-hash-table)) ;; use to track tests that we suspect cannot be run -(define (runs:inc-cant-run-tests testname) - (hash-table-set! *seen-cant-run-tests* testname - (+ (hash-table-ref/default *seen-cant-run-tests* testname 0) 1))) - -(define (runs:can-keep-running? testname n) - (< (hash-table-ref/default *seen-cant-run-tests* testname 0) n)) - -(define *runs:denoise* (make-hash-table)) ;; key => last-time-ran - -;; mechanism to limit printing info to the screen that is repetitive. -;; -;; Example: -;; (if (runs:lownoise "waiting on tasks" 60) -;; (debug:print-info 2 *default-log-port* "waiting for tasks to complete, sleeping briefly ...")) -;; -(define (runs:lownoise key waitval) - (let ((lasttime (hash-table-ref/default *runs:denoise* key 0)) - (currtime (current-seconds))) - (if (> (- currtime lasttime) waitval) - (begin - (hash-table-set! *runs:denoise* key currtime) - #t) - #f))) - -(define *last-test-launch* 0) -(define *too-soon-delays* (make-hash-table)) - -;; to-soon delay, when matching event happened in less than dseconds delay wseconds -;; -(define (runs:too-soon-delay key dseconds wseconds) - (let* ((last-time (hash-table-ref/default *too-soon-delays* key #f))) - (if (and last-time - (< (- (current-seconds) last-time) dseconds)) - (begin - (if (runs:lownoise (conc "too-soon-delay"key) 60) - (debug:print-info 2 *default-log-port* "Polling throttle for "key)) - (thread-sleep! wseconds))) - (hash-table-set! *too-soon-delays* key (current-seconds)))) - -(define (runs:can-run-more-tests runsdat run-id jobgroup max-concurrent-jobs) - - ;; Take advantage of a good place to exit if running the one-pass methodology - (if (and (> (runs:dat-can-run-more-tests-count runsdat) 20) - (args:get-arg "-one-pass")) - (exit 0)) - - (if (runs:dat-load-mgmt-function runsdat)((runs:dat-load-mgmt-function runsdat))) - - (let* ((num-running (rmt:get-count-tests-running-for-run-id run-id)) - (num-running-in-jobgroup (rmt:get-count-tests-running-in-jobgroup run-id jobgroup)) - (job-group-limit (let ((jobg-count (configf:lookup *configdat* "jobgroups" jobgroup))) - (if (string? jobg-count) - (string->number jobg-count) - jobg-count)))) - (if (> (+ num-running num-running-in-jobgroup) 0) - (runs:inc-can-run-more-tests-count runsdat)) ;; (set! *runs:can-run-more-tests-count* (+ *runs:can-run-more-tests-count* 1))) - (if (not (eq? *last-num-running-tests* num-running)) - (begin - (debug:print 2 *default-log-port* "max-concurrent-jobs: " max-concurrent-jobs ", num-running: " num-running) - (set! *last-num-running-tests* num-running))) - (if (not (eq? 0 *globalexitstatus*)) - (list #f num-running num-running-in-jobgroup max-concurrent-jobs job-group-limit) - (let* ((can-not-run-more (cond - ;; if max-concurrent-jobs is set and the number running is greater - ;; than it then cannot run more jobs - ((and max-concurrent-jobs (>= num-running max-concurrent-jobs)) - (if (runs:lownoise "mcj msg" 60) - (debug:print 0 *default-log-port* "WARNING: Max running jobs exceeded, current number running: " num-running - ", max_concurrent_jobs: " max-concurrent-jobs)) - #t) - ;; if job-group-limit is set and number of jobs in the group is greater - ;; than the limit then cannot run more jobs of this kind - ((and job-group-limit - (>= num-running-in-jobgroup job-group-limit)) - (if (runs:lownoise (conc "maxjobgroup " jobgroup) 60) - (debug:print 1 *default-log-port* "WARNING: number of jobs " num-running-in-jobgroup - " in jobgroup \"" jobgroup "\" exceeds limit of " job-group-limit)) - #t) - (else #f)))) - (list (not can-not-run-more) num-running num-running-in-jobgroup max-concurrent-jobs job-group-limit))))) - -(define (runs:run-pre-hook run-id) - (let* ((run-pre-hook (configf:lookup *configdat* "runs" "pre-hook")) - (existing-tests (if run-pre-hook - (rmt:get-tests-for-run run-id "%" '() '() ;; run-id testpatt states statuses - #f #f ;; offset limit - #f ;; not-in - #f ;; sort-by - #f ;; sort-order - #f ;; get full data (not 'shortlist) - 0 ;; (runs:gendat-inc-results-last-update *runs:general-data*) ;; last update time - 'dashboard) - '())) - (log-dir (conc *toppath* "/logs")) - (log-file (conc "pre-hook-" (string-translate (getenv "MT_TARGET") "/" "-") "-" (getenv "MT_RUNNAME") ".log")) - (full-log-fname (conc log-dir "/" log-file))) - (if run-pre-hook - (if (null? existing-tests) - (let* ((use-log-dir (if (not (directory-exists? log-dir)) - (handle-exceptions - exn - (begin - (debug:print 0 *default-log-port* "WARNING: Failed to create " log-dir ", exn=" exn) - #f) - (create-directory log-dir #t) - #t) - #t)) - (start-time (current-seconds)) - (actual-logf (if use-log-dir full-log-fname log-file))) - (handle-exceptions - exn - (begin - (print-call-chain *default-log-port*) - (debug:print 0 *default-log-port* "Message: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn) - (debug:print 0 *default-log-port* "ERROR: failed to run pre-hook " run-pre-hook ", check the log " log-file)) - (debug:print-info 0 *default-log-port* "running run-pre-hook: \"" run-pre-hook "\", log is " actual-logf) - (system (conc run-pre-hook " >> " actual-logf " 2>&1")) - (debug:print-info 0 *default-log-port* "pre-hook \"" run-pre-hook "\" took " (- (current-seconds) start-time) " seconds to run."))) - (debug:print 0 *default-log-port* "Skipping pre-hook call \"" run-pre-hook "\" as there are existing tests for this run."))))) - - - -(define (runs:run-post-hook run-id) - (let* ((run-post-hook (configf:lookup *configdat* "runs" "post-hook")) - (existing-tests (if run-post-hook - (rmt:get-tests-for-run run-id "%" '() '() ;; run-id testpatt states statuses - #f #f ;; offset limit - #f ;; not-in - #f ;; sort-by - #f ;; sort-order - #f ;; get full data (not 'shortlist) - 0 ;; (runs:gendat-inc-results-last-update *runs:general-data*) ;; last update time - 'dashboard) - '())) - (log-dir (conc *toppath* "/logs")) - (log-file (conc "post-hook-" (string-translate (getenv "MT_TARGET") "/" "-") "-" (getenv "MT_RUNNAME") ".log")) - (full-log-fname (conc log-dir "/" log-file))) - (if run-post-hook - ;; (if (null? existing-tests) - ;; (debug:print 0 *default-log-port* "Skipping post-hook call \"" run-post-hook "\" as there are existing tests for this run."))))) - (let* ((use-log-dir (if (not (directory-exists? log-dir)) - (handle-exceptions - exn - (begin - (debug:print 0 *default-log-port* "WARNING: Failed to create " log-dir ", exn=" exn) - #f) - (create-directory log-dir #t) - #t) - #t)) - (start-time (current-seconds)) - (actual-logf (if use-log-dir full-log-fname log-file))) - (handle-exceptions - exn - (begin - (print-call-chain *default-log-port*) - (debug:print 0 *default-log-port* "Message: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn) - (debug:print 0 *default-log-port* "ERROR: failed to run post-hook " run-post-hook ", check the log " log-file)) - (debug:print-info 0 *default-log-port* "running run-post-hook: \"" run-post-hook "\", log is " actual-logf) - (system (conc run-post-hook " >> " actual-logf " 2>&1")) - (debug:print-info 0 *default-log-port* "post-hook \"" run-post-hook "\" took " (- (current-seconds) start-time) " seconds to run.")))))) - - -(define (runs:rerun-hook test-id new-test-path testdat rerunlst) - (let* ((rerun-hook (configf:lookup *configdat* "runs" "rerun-hook")) - (log-dir (conc *toppath* "/reruns/logs")) - (target (getenv "MT_TARGET")) - (runname (common:args-get-runname)) - (rundir (db:test-get-rundir testdat)) - (tarfiledir (conc *toppath* "/reruns")) - (status (db:test-get-status testdat)) - (comment (conc "\"" (db:test-get-comment testdat) "\"" )) - (testname (db:test-get-testname testdat)) - (itempath (db:test-get-item-path testdat)) - (file-body (conc status "-" (string-translate target "/" "-") "-" runname "." testname (if (not (string=? itempath "")) (conc "." (string-translate itempath "/" "-")) ""))) - (log-file (conc file-body ".log")) - ;; (log-file (conc status "-" (string-translate target "/" "-") "-" runname "." testname (if (not (string=? itempath "")) (conc "." (string-translate itempath "/" "-")) "") ".log")) - (full-log-fname (conc log-dir "/" log-file)) - (tarfilename (conc file-body ".tar")) - ;; (tarfilename (conc status "." (string-translate target "/" "-") "." runname "." testname (if (not (string=? itempath "")) (conc "." (string-translate itempath "/" "-")) "") ".tar")) - ) - (if rerun-hook - (let* ((use-log-dir (if (not (directory-exists? log-dir)) - (handle-exceptions - exn - (begin - (debug:print 0 *default-log-port* "WARNING: Failed to create " log-dir ", exn=" exn) - #f) - (create-directory log-dir #t) - #t) - #t)) - (start-time (current-seconds)) - (actual-logf (if use-log-dir full-log-fname log-file)) - (sys-call-text (conc rerun-hook " " tarfilename " " rundir " " actual-logf " " runname " " tarfiledir " " status " " target " " comment " " testname " " itempath " >> " actual-logf " 2>&1")) - ) - (debug:print 2 *default-log-port* "Found rerun-hook in config:" rerun-hook) - (handle-exceptions - exn - (begin - (print-call-chain *default-log-port*) - (debug:print 0 *default-log-port* "Message: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn) - (debug:print 0 *default-log-port* "ERROR: failed to run rerun-hook " rerun-hook ", check the log " log-file)) - (debug:print-info 0 *default-log-port* "running rerun-hook: \"" rerun-hook "\", log is " actual-logf) - ;; call the hook - (debug:print-info 0 *default-log-port* "Calling rerun-hook for " test-id new-test-path testdat rerunlst) - (debug:print-info 0 *default-log-port* "rerun hook: " rerun-hook) - (debug:print-info 0 *default-log-port* "tarfilename: " tarfilename) - (debug:print-info 0 *default-log-port* "rundir: " rundir) - (debug:print-info 0 *default-log-port* "actual-logf: " actual-logf) - (debug:print-info 0 *default-log-port* "runname: " runname) - (debug:print-info 0 *default-log-port* "sys-call-text: " sys-call-text) - (system sys-call-text) - (debug:print-info 0 *default-log-port* "rerun-hook \"" rerun-hook "\" took " (- (current-seconds) start-time) " seconds to run.")))))) - - - - -;; return #t when all items in waitors-upon list are represented in test-patt, #f otherwise. -(define (runs:testpatts-mention-waitors-upon? test-patt waitors-upon) - (null? (tests:filter-test-names-not-matched waitors-upon test-patt))) - -;;====================================================================== -;; runs:run-tests is called from megatest.scm and itself -;;====================================================================== -;; -;; test-names: Comma separated patterns same as test-patts but used in selection -;; of tests to run. The item portions are not respected. -;; FIXME: error out if /patt specified -;; -;; run-count is passed from megatest.scm as configf:lookup *configdat* "setup" "reruns", or defaults to 1. -(define (runs:run-tests target runname test-patts user flags #!key (run-count 1)) ;; test-names - (let* ((keys (keys:config-get-fields *configdat*)) - (keyvals (keys:target->keyval keys target)) - (run-id (rmt:register-run keyvals runname "new" "n/a" user (args:get-arg "-contour"))) ;; test-name))) - ;; (deferred '()) ;; delay running these since they have a waiton clause - (runconfigf (conc *toppath* "/runconfigs.config")) - (mtconfig (conc *toppath* "/megatest.config")) - (readonly-mode (not (file-write-access? mtconfig))) - (test-records (make-hash-table)) - ;; need to process runconfigs before generating these lists - (all-tests-registry #f) ;; (tests:get-all)) ;; (tests:get-valid-tests (make-hash-table) test-search-path)) ;; all valid tests to check waiton names - (all-test-names #f) ;; (hash-table-keys all-tests-registry)) - (test-names #f) ;; Generated by a call to (tests:filter-test-names all-test-names test-patts)) - (required-tests #f) ;; Put fully qualified test/testpath names in this list to be done - (waitors-upon (make-hash-table)) ;; given a test, return list of tests waiting upon this test. - (task-key (conc (hash-table->alist flags) " " (get-host-name) " " (current-process-id))) - ;; (tdbdat (tasks:open-db)) - (config-reruns (let ((x (configf:lookup *configdat* "setup" "reruns"))) - (if x (string->number x) #f))) - (allowed-tests #f) - (runconf #f)) - - ;; check if readonly - (when readonly-mode - (debug:print-error 0 *default-log-port* "Megatest database is readonly. Cannot proceed.") - (exit 1)) - - ;; per user request. If less than 100Meg space on dbdir partition, bail out with error - ;; this will reduce issues in database corruption - (common:check-db-dir-and-exit-if-insufficient) - - ;; override the number of reruns from the configs - ;; this needs to be done at the place where is first runs:run-tests called - ;(if (and config-reruns - ; (> run-count config-reruns)) - ;(set! run-count config-reruns)) - - ;; (if (tasks:need-server run-id)(tasks:start-and-wait-for-server tdbdat run-id 10)) - - (let ((sighand (lambda (signum) - ;; (signal-mask! signum) ;; to mask or not? seems to cause issues in exiting - (set! *time-to-exit* #t) - (debug:print 0 *default-log-port* "Received signal " signum ", cleaning up before exit. Please wait...") - (let ((th1 (make-thread (lambda () - ;; (let ((tdbdat (tasks:open-db))) - (rmt:tasks-set-state-given-param-key task-key "killed") ;; ) - (debug:print 0 *default-log-port* "Killed by signal " signum ". Exiting") - (thread-sleep! 3) - (exit)))) - (th2 (make-thread (lambda () - (thread-sleep! 5) - (debug:print 0 *default-log-port* "Done") - (exit 4))))) - (thread-start! th2) - (thread-start! th1) - (thread-join! th2))))) - (set-signal-handler! signal/int sighand) - (set-signal-handler! signal/term sighand)) - - ;; force the starting of a server -- removed BB 17ww28 - no longer needed. - ;;(debug:print 0 *default-log-port* "waiting on server...") - ;;(server:start-and-wait *toppath*) - - (runs:set-megatest-env-vars run-id inkeys: keys inrunname: runname) ;; these may be needed by the launching process - (set! runconf (if (common:file-exists? runconfigf) - (setup-env-defaults runconfigf run-id *already-seen-runconfig-info* keyvals target) - (begin - (debug:print 0 *default-log-port* "WARNING: You do not have a run config file: " runconfigf) - #f))) - - (if (not test-patts) ;; first time in - adjust testpatt - (set! test-patts (common:args-get-testpatt runconf))) - ;; if test-patts is #f at this point there is something wrong and we need to bail out - (if (not test-patts) - (begin - (debug:print 0 *default-log-port* "WARNING: there is no test pattern for this run. Exiting now.") - (exit 0))) - - (if (args:get-arg "-tagexpr") - (begin - (set! allowed-tests (string-join (runs:get-tests-matching-tags (args:get-arg "-tagexpr")) ",")) - (debug:print-info 0 *default-log-port* "filtering initial test list with tagexpr: " (args:get-arg "-tagexpr") " => " allowed-tests) - ));; tests will be ANDed with this list - - ;; register this run in monitor.db - (rmt:tasks-add "run-tests" user target runname test-patts task-key) ;; params) - (rmt:tasks-set-state-given-param-key task-key "running") - - #;(common:telemetry-log "run-tests" - payload: - `( (target . ,target) - (run-name . ,runname) - (test-patts . ,test-patts) ) ) - - - ;; Now generate all the tests lists - (set! all-tests-registry (tests:get-all)) ;; hash of testname => path-to-test - (set! all-test-names (hash-table-keys all-tests-registry)) - ;; filter first for allowed-tests (from -tagexpr) then for test-patts. - (set! test-names (tests:filter-test-names - (if allowed-tests - (tests:filter-test-names all-test-names allowed-tests) - all-test-names) - test-patts)) - - ;; I think seeding required-tests with all test-names makes sense but lack analysis to back that up. - - ;; NEW STRATEGY HERE: - ;; 1. fill required tests with test-patts - ;; 2. scan testconfigs and if waitons, itemwait, itempatt calc prior test test-patt - ;; 3. repeat until all deps propagated - - ;; any tests with direct mention in test-patts can be added to required - ;;(set! required-tests (lset-intersection equal? (string-split test-patts ",") all-test-names)) - (set! required-tests (tests:filter-test-names all-test-names test-patts)) - ;; - ;; (set! required-tests (lset-intersection equal? test-names all-test-names)) - - ;; look up all tests matching the comma separated list of globs in - ;; test-patts (using % as wildcard) - - ;; (set! test-names (delete-duplicates (tests:get-valid-tests *toppath* test-patts))) - (debug:print-info 0 *default-log-port* "tests search path: " (string-intersperse (tests:get-tests-search-path *configdat*) " ")) - (debug:print-info 2 *default-log-port* "all tests: " (string-intersperse (sort all-test-names string<) " ")) - (debug:print-info 0 *default-log-port* "test names: " (string-intersperse (sort test-names string<) " ")) - (debug:print-info 0 *default-log-port* "required tests: " (string-intersperse (sort required-tests string<) " ")) - - ;; on the first pass or call to run-tests set FAILS to NOT_STARTED if - ;; -keepgoing is specified - (if (eq? *passnum* 0) - (begin - ;; Is this still necessary? I think not. Unreachable tests are marked as such and - ;; should not cause problems here. - ;; - ;; have to delete test records where NOT_STARTED since they can cause -keepgoing to - ;; get stuck due to becoming inaccessible from a failed test. I.e. if test B depends - ;; on test A but test B reached the point on being registered as NOT_STARTED and test - ;; A failed for some reason then on re-run using -keepgoing the run can never complete. - ;; - ;; (rmt:general-call 'delete-tests-in-state run-id "NOT_STARTED") - - ;; Now convert anything in allow-auto-rerun to NOT_STARTED - ;; - (for-each - (lambda (state-status) - (let* ((ss-lst (string-split-fields "/" state-status #:infix)) - (state (if (> (length ss-lst) 0)(car ss-lst) #f)) - (status (if (> (length ss-lst) 1)(cadr ss-lst) #f))) - (rmt:set-tests-state-status run-id test-names state status "NOT_STARTED" status))) - ;; list of state/status pairs separated by spaces - (string-split (or (configf:lookup *configdat* "setup" "allow-auto-rerun") ""))))) - - ;; Ensure all tests are registered in the test_meta table - (runs:update-all-test_meta #f) - - ;; run the run prehook if there are no tests yet run for this run: - ;; - (runs:run-pre-hook run-id) - ;; mark all test launched flag as false in the meta table - (rmt:set-var (conc "lunch-complete-" run-id) "no") - (debug:print-info 1 *default-log-port* "Setting end-of-run to no") - (let* ((config-reruns (let ((x (configf:lookup *configdat* "setup" "reruns"))) - (if x (string->number x) #f))) - (config-rerun-cnt (if config-reruns - config-reruns - 1))) - (if (eq? config-rerun-cnt run-count) - (rmt:set-var (conc "end-of-run-" run-id) "no"))) - - (rmt:set-run-state-status run-id "new" "n/a") - ;; now add non-directly referenced dependencies (i.e. waiton) - ;;====================================================================== - ;; refactoring this block into tests:get-full-data - ;; - ;; What happended, this code is now duplicated in tests!? - ;; - ;;====================================================================== - - (if (not (null? test-names)) ;; BEGIN test-names loop - (let loop ((hed (car test-names)) ;; NOTE: This is the main loop that iterates over the test-names - (tal (cdr test-names))) ;; 'return-procs tells the config reader to prep running system but return a proc - (debug:print-info 4 *default-log-port* "\n\ntestpatt elaboration loop => hed="hed " tal="tal" test-patts="test-patts" test-names="test-names) - (change-directory *toppath*) ;; PLEASE OPTIMIZE ME!!! I think this should be a no-op but there are several places where change-directories could be happening. - (setenv "MT_TEST_NAME" hed) ;; - (let*-values (((waitons waitors config) (tests:get-waitons hed all-tests-registry (tests:get-global-waitons *runconfigdat*))) - - ;; NOTE: Have the config - can extract [waitons] section - - ((hed-mode) - (let ((m (configf:lookup config "requirements" "mode"))) - (if m (map string->symbol (string-split m)) '(normal)))) - ((hed-itemized-waiton) ;; are items in hed waiting on items of waiton? - (not (null? (lset-intersection eq? hed-mode '(itemmatch itemwait))))) - ) - (debug:print-info 8 *default-log-port* "waitons: " waitons) - ;; check for hed in waitons => this would be circular, remove it and issue an - ;; error - (if (or (member hed waitons) - (member hed waitors)) - (begin - (debug:print-error 0 *default-log-port* "test " hed " has listed itself as a waiton or waitor, please correct this!") - (set! waitons (filter (lambda (x)(not (equal? x hed))) waitons)) - (set! waitors (filter (lambda (x)(not (equal? x hed))) waitors)))) - - ;; (items (items:get-items-from-config config))) - (if (not (hash-table-ref/default test-records hed #f)) ;; waiton-tconfig below will be #f until that test is visted here at least once - (hash-table-set! test-records ;; BB: we are doing a manual make-tests:testqueue - hed (vector hed ;; 0 ;; testname - config ;; 1 - waitons ;; 2 - (configf:lookup config "requirements" "priority") ;; priority 3 - (tests:get-items config) ;; 4 ;; expand the [items] and or [itemstable] into explict items - #f ;; itemsdat 5 - #f ;; spare - used for item-path - waitors ;; - ))) - ;; update waitors-upon here - (for-each - (lambda (waiton) - (let* ((current-waitors-upon (hash-table-ref/default waitors-upon waiton '()))) - (debug:print-info 8 *default-log-port* " current-waiters-upon["waiton"] is "current-waitors-upon ) - (when (not (member hed current-waitors-upon)) - (debug:print-info 8 *default-log-port* " current-waiters-upon["waiton"] << "hed ) - (hash-table-set! waitors-upon waiton (cons hed current-waitors-upon))))) - (if (list? waitons) waitons '())) - (debug:print-info 8 *default-log-port* " process waitons&waitors of "hed": "(delete-duplicates (append waitons waitors))) - (for-each - (lambda (waiton) - (if (and waiton (not (member waiton test-names))) - (let* ((waitors-in-testpatt (runs:testpatts-mention-waitors-upon? test-patts (hash-table-ref/default waitors-upon waiton '()))) - (waiton-record (hash-table-ref/default test-records waiton #f)) - (waiton-tconfig (if waiton-record (vector-ref waiton-record 1) #f)) - (waiton-itemized (and waiton-tconfig - (or (hash-table-ref/default waiton-tconfig "items" #f) - (hash-table-ref/default waiton-tconfig "itemstable" #f)))) - (itemmaps (tests:get-itemmaps config)) ;; (configf:lookup config "requirements" "itemmap")) - (new-test-patts (tests:extend-test-patts test-patts hed waiton itemmaps hed-itemized-waiton))) - (debug:print-info 2 *default-log-port* "Test " waiton " has " (if waiton-record "a" "no") " waiton-record and" (if waiton-itemized " " " no ") "items") - ;; need to account for test-patt here, if I am test "a", selected with a test-patt of "hed/b%" - ;; and we are waiting on "waiton" we need to add "waiton/,waiton/b%" to test-patt - ;; is this satisfied by merely appending "/" to the waiton name added to the list? - ;; - ;; This approach causes all of the items in an upstream test to be run - ;; if we have this waiton already processed once we can analzye it for extending - ;; tests to be run, since we can't properly process waitons unless they have been - ;; initially added we add them again to be processed on second round AND add the hed - ;; back in to also be processed on second round - (if waiton-tconfig ;; BB: waiter should be in test-patts as well as the waiton have a tconfig. - (if waiton-itemized - (if waitors-in-testpatt - (begin - (debug:print-info 0 *default-log-port* "New test patts: " new-test-patts ", prev test patts: " test-patts) - (set! test-names (cons waiton test-names)) ;; need to process this one, only add once the waiton tconfig read - (set! required-tests (cons (conc waiton "/") required-tests)) - (set! test-patts new-test-patts)) - (begin - (debug:print-info 0 *default-log-port* "Waitor(s) not yet on testpatt for " waiton ", setting up to re-process it") - (set! tal (append (cons waiton tal)(list hed))))) - (begin - (debug:print-info 2 *default-log-port* "Adding non-itemized test " waiton " to required-tests") - (set! required-tests (cons waiton required-tests)) - (set! test-patts new-test-patts))) - (begin - (debug:print-info 0 *default-log-port* "No testconfig info yet for " waiton ", setting up to re-process it") - (set! tal (append (cons waiton tal)(list hed))))) ;; (cons (conc waiton "/") required-tests)) - ;; NOPE: didn't work. required needs to be plain test names. Try tacking on to test-patts - ;; - doesn't work - ;; (set! test-patts (conc test-patts "," waiton "/")) - ;; (set! test-names (cons waiton test-names))))) ;; was an append, now a cons - ))) - (delete-duplicates (append waitons waitors))) - (let ((remtests (delete-duplicates (append waitons tal)))) - (debug:print-info 8 *default-log-port* " remtests are "remtests) - (if (not (null? remtests)) - (begin - ;; (debug:print-info 0 *default-log-port* "Preprocessing continues for " (string-intersperse remtests ", ")) - (loop (car remtests)(cdr remtests)))))))) ;; END test-names loop - - (if (not (null? required-tests)) - (debug:print-info 1 *default-log-port* "Adding \"" (string-intersperse required-tests " ") "\" to the run queue")) - ;; NOTE: these are all parent tests, items are not expanded yet. - (debug:print-info 4 *default-log-port* "test-records=" (hash-table->alist test-records)) - (let ((reglen (configf:lookup *configdat* "setup" "runqueue"))) - (if (> (length (hash-table-keys test-records)) 0) - (let* ((keep-going #t) - (run-queue-retries 5) - (run-ids (rmt:get-all-run-ids))) - #;(for-each (lambda (run-id) - (if keep-going - (handle-exceptions - exn - (debug:print 0 *default-log-port* "error in calling find-and-mark-incomplete for run-id " run-id ", exn=" exn) - (rmt:find-and-mark-incomplete run-id #f)))) ;; ovr-deadtime))) ;; could be root of https://hsdes.intel.com/appstore/article/#/220546828/main -- Title: Megatest jobs show DEAD even though they are still running (1.64/27) - run-ids) - (runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests - (any->number reglen) all-tests-registry) - (set! keep-going #f) - (if (> run-count 0) ;; handle reruns - (begin - (if (not (hash-table-ref/default flags "-preclean" #f)) - (hash-table-set! flags "-preclean" #t)) - (if (not (hash-table-ref/default flags "-rerun" #f)) - (hash-table-set! flags "-rerun" "ABORT,STUCK/DEAD,n/a,ZERO_ITEMS")) - (runs:run-tests target runname test-patts user flags run-count: (- run-count 1))) - (launch:end-of-run-check run-id))) - (debug:print-info 0 *default-log-port* "No tests to run"))) - (debug:print-info 4 *default-log-port* "All done by here") - ;; TODO: try putting post hook call here - - ; (debug:print-info 2 *default-log-port* " run-count " run-count) - ; (runs:run-post-hook run-id)) - ; (debug:print-info 2 *default-log-port* "Not calling post hook runcount = " run-count )) - (rmt:tasks-set-state-given-param-key task-key "done") - - ;; (sqlite3:finalize! tasks-db) - )) - - -;; loop logic. These are used in runs:run-tests-queue to make it a bit more readable. -;; -;; If reg not full and have items in tal then loop with (car tal)(cdr tal) reg reruns -;; If reg is full (i.e. length >= n -;; loop with (car reg) tal (cdr reg) reruns -;; If tal is empty -;; but have items in reg; loop with (car reg)(cdr reg) '() reruns -;; If reg is empty => all done - -(define (runs:queue-next-hed tal reg n regfull) - (if regfull - (if (null? reg) #f (car reg)) - (if (null? tal) ;; tal is used up, pop from reg - (if (null? reg) #f (car reg)) - (car tal)))) - -(define (runs:queue-next-tal tal reg n regfull) - (if regfull - tal - (if (null? tal) ;; must transfer from reg - (if (null? reg) '() (cdr reg)) - (cdr tal)))) - -(define (runs:queue-next-reg tal reg n regfull) - (if regfull - (if (null? reg) '() (cdr reg)) ;; EXPLORE: reorder (cdr reg) such that looping is more efficient - (if (null? tal) ;; if tal is null and reg not full then '() as reg contents moved to tal - '() - reg))) - -;; this is the list of parameters to the named loop "loop" near the top of runs:run-tests-queue, look around line 1216 -;; -(define (runs:loop-values tal reg reglen regfull reruns) - (list (runs:queue-next-hed tal reg reglen regfull) ;; hed - (runs:queue-next-tal tal reg reglen regfull) ;; tal - (runs:queue-next-reg tal reg reglen regfull) ;; reg - reruns)) ;; reruns - -;; objective - iterate thru tests -;; => want to prioritize tests we haven't seen before -;; => sometimes need to squeeze things in (added to reg) -;; => review of a previously seen test is higher priority of never visited test -;; reg - list of previously visited tests -;; tal - list of never visited tests -;; prefer next hed to be from reg than tal. - -(define runs:nothing-left-in-queue-count 0) - -;;====================================================================== -;; runs:expand-items is called by runs:run-tests-queue -;;====================================================================== -;; -;; return value of runs:expand-items is passed back to runs-tests-queue and is fed to named loop with this signature: -;; (let loop ((hed (car sorted-test-names)) -;; (tal (cdr sorted-test-names)) -;; (reg '()) ;; registered, put these at the head of tal -;; (reruns '())) -(define (runs:expand-items hed tal reg reruns regfull newtal jobgroup max-concurrent-jobs run-id waitons item-path testmode test-record can-run-more items runname tconfig reglen test-registry test-records itemmaps) - (let* ((loop-list (list hed tal reg reruns)) - (junk (debug:print-info 4 *default-log-port* "expand-items calling rmt:get-prereqs-not-met")) - (prereqs-not-met (let ((res (rmt:get-prereqs-not-met run-id waitons hed item-path mode: testmode itemmaps: itemmaps))) - (if (list? res) - res - (begin - (debug:print 0 *default-log-port* - "ERROR: rmt:get-prereqs-not-met returned non-list!\n" - " res=" res " run-id=" run-id " waitons=" waitons " hed=" hed " item-path=" item-path " testmode=" testmode " itemmaps=" itemmaps) - '())))) - (have-itemized (not (null? (lset-intersection eq? testmode '(itemmatch itemwait))))) - ;; (prereqs-not-met (mt:lazy-get-prereqs-not-met run-id waitons item-path mode: testmode itemmap: itemmap)) - (fails (runs:calc-fails prereqs-not-met)) - (prereq-fails (runs:calc-prereq-fail prereqs-not-met)) - (non-completed (runs:calc-not-completed prereqs-not-met)) - (runnables (runs:calc-runnable prereqs-not-met)) - (unexpanded-prereqs - (filter (lambda (testname) - (let* ((test-rec (hash-table-ref test-records testname)) - (items (tests:testqueue-get-items test-rec))) - ;;(BB> "HEY " testname "=>"items) - (or (procedure? items)(eq? items 'have-procedure)))) - waitons)) - - - ) - (debug:print-info 4 *default-log-port* "START OF INNER COND #2 " - "\n can-run-more: " can-run-more - "\n testname: " hed - "\n prereqs-not-met: " (runs:pretty-string prereqs-not-met) - "\n non-completed: " (runs:pretty-string non-completed) - "\n prereq-fails: " (runs:pretty-string prereq-fails) - "\n fails: " (runs:pretty-string fails) - "\n testmode: " testmode - "\n (member 'toplevel testmode): " (member 'toplevel testmode) - "\n (null? non-completed): " (null? non-completed) - "\n reruns: " reruns - "\n items: " items - "\n can-run-more: " can-run-more) - - (cond - ;; all prereqs met, fire off the test - ;; or, if it is a 'toplevel test and all prereqs not met are COMPLETED then launch - - ((and (not (member 'toplevel testmode)) - (member (hash-table-ref/default test-registry (db:test-make-full-name hed item-path) 'n/a) - '(DONOTRUN removed CANNOTRUN))) ;; *common:cant-run-states-sym*) ;; '(COMPLETED KILLED WAIVED UNKNOWN INCOMPLETE)) ;; try to catch repeat processing of COMPLETED tests here - (debug:print-info 4 *default-log-port* "cond branch - " "ei-1") - (debug:print-info 1 *default-log-port* "Test " hed " set to \"" (hash-table-ref test-registry (db:test-make-full-name hed item-path)) "\". Removing it from the queue") - (if (or (not (null? tal)) - (not (null? reg))) - (runs:loop-values tal reg reglen regfull reruns) - (begin - (debug:print-info 0 *default-log-port* "Nothing left in the queue!") - ;; If get here twice then we know we've tried to expand all items - ;; since there must be a logic issue with the handling of loops in the - ;; items expand phase we will brute force an exit here. - (if (> runs:nothing-left-in-queue-count 2) - (begin - (debug:print 0 *default-log-port* "WARNING: this condition is triggered when there were no items to expand and nothing to run. Please check your run for completeness") - (exit 0)) - (set! runs:nothing-left-in-queue-count (+ runs:nothing-left-in-queue-count 1))) - #f))) - - ;; desired result of below cond branch: - ;; we want to expand items in our test of interest (hed) in the following cases: - ;; case 1 - mode is itemmatch or itemwait: - ;; - all prereq tests have been expanded - ;; - at least one prereq's items have completed - ;; case 2 - mode is toplevel - ;; - prereqs are completed. - ;; - or no prereqs can complete - ;; case 3 - mode not specified - ;; - prereqs are completed and passed (we could consider removing "and passed" -- it would change behavior from current) - ((or (null? prereqs-not-met) - (and (member 'toplevel testmode) - (null? non-completed))) - (debug:print-info 4 *default-log-port* "cond branch - " "ei-2") - (debug:print-info 4 *default-log-port* "runs:expand-items: (or (null? prereqs-not-met) (and (member 'toplevel testmode)(null? non-completed)))") - (let ((test-name (tests:testqueue-get-testname test-record))) - (setenv "MT_TEST_NAME" test-name) ;; - (setenv "MT_RUNNAME" runname) - (runs:set-megatest-env-vars run-id inrunname: runname) ;; these may be needed by the launching process - (let ((items-list (items:get-items-from-config tconfig))) - (if (list? items-list) - (begin - (if (null? items-list) - (let ((test-id (rmt:get-test-id run-id test-name "")) - (num-items (rmt:test-toplevel-num-items run-id test-name))) - (if (and test-id - (not (> num-items 0))) - (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "ZERO_ITEMS" "Failed to run due to failed prerequisites")))) - (tests:testqueue-set-items! test-record items-list) - (list hed tal reg reruns)) - (begin - (debug:print-error 0 *default-log-port* "The proc from reading the items table did not yield a list - please report this") - (exit 1)))))) - - ((and (null? fails) - (null? prereq-fails) - (not (null? non-completed))) - (debug:print-info 4 *default-log-port* "cond branch - " "ei-3") - (let* ((allinqueue (map (lambda (x)(if (string? x) x (db:test-get-testname x))) - (append newtal reruns))) - ;; prereqstrs is a list of test names as strings that are prereqs for hed - (prereqstrs (delete-duplicates (map (lambda (x)(if (string? x) x (db:test-get-testname x))) - prereqs-not-met))) - ;; a prereq that is not found in allinqueue will be put in the notinqueue list - ;; - ;; (notinqueue (filter (lambda (x) - ;; (not (member x allinqueue))) - ;; prereqstrs)) - (give-up #f)) - - ;; We can get here when a prereq has not been run due to *it* having a prereq that failed. - ;; We need to use this to dequeue this item as CANNOTRUN - ;; - (if (member 'toplevel testmode) ;; '(toplevel)) ;; NOTE: this probably should be (member 'toplevel testmode) - (for-each (lambda (prereq) - (if (eq? (hash-table-ref/default test-registry prereq 'justfine) 'CANNOTRUN) - (set! give-up #t))) - prereqstrs)) - - (if (and give-up - (not (and (null? tal)(null? reg)))) - (let ((trimmed-tal (mt:discard-blocked-tests run-id hed tal test-records)) - (trimmed-reg (mt:discard-blocked-tests run-id hed reg test-records))) - (debug:print 1 *default-log-port* "WARNING: test " hed " has discarded prerequisites, removing it from the queue") - - (let ((test-id (rmt:get-test-id run-id hed ""))) - (if test-id (mt:test-set-state-status-by-id run-id test-id "COMPLETED" "PREQ_DISCARDED" "Failed to run due to discarded prerequisites"))) - - (if (and (null? trimmed-tal) - (null? trimmed-reg)) - #f - (runs:loop-values trimmed-tal trimmed-reg reglen regfull reruns) - )) - (list (car newtal)(append (cdr newtal) reg) '() reruns)))) - - ((and (null? fails) ;; have not-started tests, but unable to run them. everything looks completed with no prospect of unsticking something that is stuck. we should mark hed as moribund and exit or continue if there are more tests to consider - (null? prereq-fails) - (null? non-completed)) - (debug:print-info 4 *default-log-port* "cond branch - " "ei-4") - (if (runs:can-keep-running? hed 20) - (begin - (runs:inc-cant-run-tests hed) - (debug:print-info 0 *default-log-port* "no fails in prerequisites for " hed " but also none running, keeping " hed " for now. Try count: " (hash-table-ref/default *seen-cant-run-tests* hed 0) ", going to wait 60 sec.") ;; - ;; getting here likely means the system is way overloaded, kill a full minute before continuing - ;; (thread-sleep! 60) ;; TODO: gate by normalized server load > 1.0 (maxload config thing) CHECKTHIS!!! - ;; No runsdat, can't do this yet - ;; (if (runs:dat-load-mgmt-function runsdat)((runs:dat-load-mgmt-function runsdat))) - ;; - (thread-sleep! 5) ;; TODO: gate by normalized server load > 1.0 (maxload config thing) - ;; num-retries code was here - ;; we use this opportunity to move contents of reg to tal - (list (car newtal)(append (cdr newtal) reg) '() reruns)) ;; an issue with prereqs not yet met? - (begin - (debug:print-info 1 *default-log-port* "no fails in prerequisites for " hed " but nothing seen running in a while, dropping test " hed " from the run queue") - (let ((test-id (rmt:get-test-id run-id hed ""))) - (if test-id (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "TIMED_OUT" "Nothing seen running in a while."))) - (runs:loop-values tal reg reglen regfull reruns) - ))) - - ((and - (or (not (null? fails)) - (not (null? prereq-fails))) - (member 'normal testmode)) - (debug:print-info 4 *default-log-port* "cond branch - " "ei-5") - (debug:print-info 1 *default-log-port* "test " hed " (mode=" testmode ") has failed prerequisite(s); " - (string-intersperse (map (lambda (t)(conc (db:test-get-testname t) ":" (db:test-get-state t)"/"(db:test-get-status t))) fails) ", ") - ", removing it from to-do list") - (let ((test-id (rmt:get-test-id run-id hed ""))) - (if test-id - (if (not (null? prereq-fails)) - (mt:test-set-state-status-by-id-unless-completed run-id test-id "COMPLETED" "PREQ_DISCARDED" "Failed to run due to prior failed prerequisites") - (mt:test-set-state-status-by-id-unless-completed run-id test-id "COMPLETED" "PREQ_FAIL" "Failed to run due to failed prerequisites")))) - ;; (debug:print 4 *default-log-port*"BB> set PREQ_FAIL on "hed) - ;; (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_FAIL" "Failed to run due to failed prerequisites")))) ;; BB: this works, btu equivalent for itemwait mode does not work. - (if (or (not (null? reg))(not (null? tal))) - (begin - (hash-table-set! test-registry hed 'CANNOTRUN) - (runs:loop-values tal reg reglen regfull (cons hed reruns)) - ) - #f)) ;; #f flags do not loop - - ((and (not (null? fails))(member 'toplevel testmode)) - (debug:print-info 4 *default-log-port* "cond branch - " "ei-6") - (if (or (not (null? reg))(not (null? tal))) - (list (car newtal)(append (cdr newtal) reg) '() reruns) - #f)) - ((null? runnables) - (debug:print-info 4 *default-log-port* "cond branch - " "ei-7") - #f) ;; if we get here and non-completed is null then it is all over. - (else - (debug:print-info 4 *default-log-port* "cond branch - " "ei-8") - (debug:print 2 *default-log-port* "WARNING: FAILS or incomplete tests maybe preventing completion of this run. Watch for issues with test " hed ", continuing for now") - (list (car newtal)(cdr newtal) reg reruns))))) - -(define (runs:mixed-list-testname-and-testrec->list-of-strings inlst) - (if (null? inlst) - '() - (map (lambda (t) - (cond - ((vector? t) - (let ((test-name (db:test-get-testname t)) - (item-path (db:test-get-item-path t)) - (test-state (db:test-get-state t)) - (test-status (db:test-get-status t))) - (conc test-name (if (equal? item-path "") "" "/") item-path ":" test-state "/" test-status))) - ((string? t) - t) - (else - (conc t)))) - inlst))) - - -;; hed tal reg reruns reglen regfull test-record runname test-name item-path jobgroup max-concurrent-jobs run-id waitons item-path testmode test-patts required-tests test-registry registry-mutex flags keyvals run-info newtal all-tests-registry itemmaps) -(define (runs:process-expanded-tests runsdat testdat) - ;; unroll the contents of runsdat and testdat (due to ongoing refactoring). - (debug:print 2 *default-log-port* "runs:process-expanded-tests; testdat:" ) - (debug:print 2 *default-log-port* (with-output-to-string - (lambda () (pp (runs:testdat->alist testdat) )))) - (let* ((hed (runs:testdat-hed testdat)) - (tal (runs:testdat-tal testdat)) - (reg (runs:testdat-reg testdat)) - (reruns (runs:testdat-reruns testdat)) - (test-name (runs:testdat-test-name testdat)) - (item-path (runs:testdat-item-path testdat)) - (jobgroup (runs:testdat-jobgroup testdat)) - (waitons (runs:testdat-waitons testdat)) - (item-path (runs:testdat-item-path testdat)) - (testmode (runs:testdat-testmode testdat)) - (newtal (runs:testdat-newtal testdat)) - (itemmaps (runs:testdat-itemmaps testdat)) - (test-record (runs:testdat-test-record testdat)) - (prereqs-not-met (runs:testdat-prereqs-not-met testdat)) - - (reglen (runs:dat-reglen runsdat)) - (regfull (runs:dat-regfull runsdat)) - (runname (runs:dat-runname runsdat)) - (max-concurrent-jobs (runs:dat-max-concurrent-jobs runsdat)) - (run-id (runs:dat-run-id runsdat)) - (test-patts (runs:dat-test-patts runsdat)) - (required-tests (runs:dat-required-tests runsdat)) - (test-registry (runs:dat-test-registry runsdat)) - (registry-mutex (runs:dat-registry-mutex runsdat)) - (flags (runs:dat-flags runsdat)) - (keyvals (runs:dat-keyvals runsdat)) - (run-info (runs:dat-run-info runsdat)) - (all-tests-registry (runs:dat-all-tests-registry runsdat)) - (run-limits-info (runs:dat-can-run-more-tests runsdat)) - ;; (runs:can-run-more-tests run-id jobgroup max-concurrent-jobs)) ;; look at the test jobgroup and tot jobs running - (have-resources (car run-limits-info)) - (num-running (list-ref run-limits-info 1)) - (num-running-in-jobgroup(list-ref run-limits-info 2)) - (max-concurrent-jobs (list-ref run-limits-info 3)) - (job-group-limit (list-ref run-limits-info 4)) - ;; (prereqs-not-met (rmt:get-prereqs-not-met run-id waitons hed item-path mode: testmode itemmaps: itemmaps)) - ;; (prereqs-not-met (mt:lazy-get-prereqs-not-met run-id waitons item-path mode: testmode itemmap: itemmap)) - (fails (if (list? prereqs-not-met) ;; TODO: rename fails to failed-prereqs - (runs:calc-fails prereqs-not-met) - (begin - (debug:print-error 0 *default-log-port* "prereqs-not-met is not a list! " prereqs-not-met) - '()))) - (non-completed (filter (lambda (x) ;; remove hed from not completed list, duh, of course it is not completed! - (not (equal? x hed))) - (runs:calc-not-completed prereqs-not-met))) - (loop-list (list hed tal reg reruns)) - ;; configure the load runner - (maxload (string->number (or (configf:lookup *configdat* "jobtools" "maxload") "3.0"))) ;; use a non-number string to disable - (maxhomehostload (string->number (or (configf:lookup *configdat* "jobtools" "maxhomehostload") "2.0"))) ;; use a non-number string to disable - (waitdelay (string->number (or (configf:lookup *configdat* "jobtools" "waitdelay") "60")))) - (debug:print-info 4 *default-log-port* "have-resources: " have-resources " prereqs-not-met: (" - (string-intersperse - (map (lambda (t) - (if (vector? t) - (conc (db:test-get-state t) "/" (db:test-get-status t)) - (conc " WARNING: t is not a vector=" t ))) - prereqs-not-met) - ", ") ") fails: " fails - "\nregistered? " (hash-table-ref/default test-registry (db:test-make-full-name test-name item-path) #f)) - - ;; well, first lets see if cpu load throttling is enabled. If so wait around until the - ;; average cpu load is under the threshold before continuing - ;; - (if (runs:dat-load-mgmt-function runsdat) - ((runs:dat-load-mgmt-function runsdat)) - (runs:dat-load-mgmt-function-set! - runsdat - (lambda () - ;; jobtools maxload is useful for where the full Megatest run is done on one machine - (if (and (not (common:on-homehost?)) - maxload) ;; only gate if maxload is specified, NOTE: maxload is normalized, i.e. load=1 means all cpus fully utilized - (common:wait-for-normalized-load maxload "Waiting for load to drop before starting more tests" #f)) - - ;; jobtools maxhomehostload is intended to prevent overloading on the homehost which can cause database corruption issues - (if maxhomehostload - (common:wait-for-homehost-load maxhomehostload - (conc "Waiting for homehost load to drop below normalized value of " maxhomehostload)))))) - - - - (if (and (not (null? prereqs-not-met)) - (runs:lownoise (conc "waiting on tests " prereqs-not-met hed) 60)) - (debug:print-info 2 *default-log-port* "waiting on tests; " (string-intersperse (runs:mixed-list-testname-and-testrec->list-of-strings prereqs-not-met) ", "))) - - ;; Don't know at this time if the test have been launched at some time in the past - ;; i.e. is this a re-launch? - (debug:print-info 4 *default-log-port* "run-limits-info = " run-limits-info) - - (cond ; cond 894- 1067 - - ;; Check item path against item-patts, - ;; - ((not (tests:match test-patts (tests:testqueue-get-testname test-record) item-path required: required-tests)) ;; This test/itempath is not to be run - ;; else the run is stuck, temporarily or permanently - ;; but should check if it is due to lack of resources vs. prerequisites - (debug:print-info 1 *default-log-port* "Skipping " (tests:testqueue-get-testname test-record) " " item-path " as it doesn't match " test-patts) - (if (or (not (null? tal))(not (null? reg))) - (runs:loop-values tal reg reglen regfull reruns) - #f)) - - ;; Register tests - ;; - ((not (hash-table-ref/default test-registry (db:test-make-full-name test-name item-path) #f)) - (debug:print-info 4 *default-log-port* "Pre-registering test " test-name "/" item-path " to create placeholder" ) - ;; always do firm registration now in v1.60 and greater ;; (eq? *transport-type* 'fs) ;; no point in parallel registration if use fs - (let register-loop ((numtries 15)) - (rmt:register-test run-id test-name item-path) - (if (rmt:get-test-id run-id test-name item-path) - (hash-table-set! test-registry (db:test-make-full-name test-name item-path) 'done) - (if (> numtries 0) - (begin - (thread-sleep! 0.5) - (register-loop (- numtries 1))) - (debug:print-error 0 *default-log-port* "failed to register test " (db:test-make-full-name test-name item-path))))) - (if (not (eq? (hash-table-ref/default test-registry (db:test-make-full-name test-name "") #f) 'done)) - (begin - (rmt:register-test run-id test-name "") - (if (rmt:get-test-id run-id test-name "") - (hash-table-set! test-registry (db:test-make-full-name test-name "") 'done)))) - (runs:shrink-can-run-more-tests-count runsdat) ;; DELAY TWEAKER (still needed?) - (if (and (null? tal)(null? reg)) - (list hed tal (append reg (list hed)) reruns) - (list (runs:queue-next-hed tal reg reglen regfull) ;; cannot replace with a call to runs:loop-values as the logic is different for reg - (runs:queue-next-tal tal reg reglen regfull) - ;; NB// Here we are building reg as we register tests - ;; if regfull we must pop the front item off reg - (if regfull - (append (cdr reg) (list hed)) - (append reg (list hed))) - reruns))) - - ;; At this point hed test registration must be completed. - ;; - ((eq? (hash-table-ref/default test-registry (db:test-make-full-name test-name item-path) #f) - 'start) - (debug:print-info 0 *default-log-port* "Waiting on test registration(s): " - (string-intersperse - (filter (lambda (x) - (eq? (hash-table-ref/default test-registry x #f) 'start)) - (hash-table-keys test-registry)) - ", ")) - (thread-sleep! 0.051) - (list hed tal reg reruns)) - - ;; If no resources are available just kill time and loop again - ;; - ((not have-resources) ;; simply try again after waiting a second - (if (runs:lownoise "no resources" 600) - (debug:print-info 1 *default-log-port* "no resources to run new tests, waiting ...")) - - ;; Have gone back and forth on this but db starvation is an issue. - ;; wait one second before looking again to run jobs. - ;; (thread-sleep! 0.25) - - ;; new logic. - ;; If it has been more than 10 seconds since we were last here don't wait at all - ;; otherwise sleep 2 seconds to give db a rest and let dashboard read data - (if (runs:lownoise "frequent-no-resources" 10) - (thread-sleep! 0.25) ;; no significant delay - (thread-sleep! 2)) - ;; could have done hed tal here but doing car/cdr of newtal to rotate tests - (list (car newtal)(cdr newtal) reg reruns)) - - ;; This is the final stage, everything is in place so launch the test - ;; - ((and have-resources - (or (null? prereqs-not-met) - (and (member 'toplevel testmode) ;; 'toplevel) - (null? non-completed) - (not (member 'exclusive testmode))))) - ;; (hash-table-delete! *max-tries-hash* (db:test-make-full-name test-name item-path)) - ;; we are going to reset all the counters for test retries by setting a new hash table - ;; this means they will increment only when nothing can be run - (set! *max-tries-hash* (make-hash-table)) - - (run:test run-id run-info keyvals runname test-record flags #f test-registry all-tests-registry runsdat testdat) - (set! *last-test-launch* (current-seconds)) - (runs:incremental-print-results run-id) - (hash-table-set! test-registry (db:test-make-full-name test-name item-path) 'running) - (runs:shrink-can-run-more-tests-count runsdat) ;; DELAY TWEAKER (still needed?) - ;; (thread-sleep! *global-delta*) - (if (or (not (null? tal))(not (null? reg))) - (runs:loop-values tal reg reglen regfull reruns) ;; hed should be dropped at this time - #f)) - - ;; must be we have unmet prerequisites - ;; - (else - (debug:print 4 *default-log-port* "FAILS: " fails) - ;; If one or more of the prereqs-not-met are FAIL then we can issue - ;; a message and drop hed from the items to be processed. - ;; (runs:mixed-list-testname-and-testrec->list-of-strings prereqs-not-met) - (if (and (not (null? prereqs-not-met)) - (runs:lownoise (conc "waiting on tests " prereqs-not-met hed) 60)) - (debug:print-info 1 *default-log-port* "waiting on tests; " (string-intersperse - (runs:mixed-list-testname-and-testrec->list-of-strings - prereqs-not-met) ", "))) - (if (or (null? fails) - (member 'toplevel testmode)) - (begin - ;; couldn't run, take a breather - (if (runs:lownoise "Waiting for more work to do..." 60) - (debug:print-info 0 *default-log-port* "Waiting for more work to do...")) - - ;; (if (runs:dat-load-mgmt-function runsdat)((runs:dat-load-mgmt-function runsdat))) - (thread-sleep! 5) - (list (car newtal)(cdr newtal) reg reruns)) - ;; the waiton is FAIL so no point in trying to run hed ever again - (begin - (let ((my-test-id (rmt:get-test-id run-id test-name item-path))) - (mt:test-set-state-status-by-id-unless-completed run-id my-test-id "COMPLETED" "PREQ_FAIL" "Failed to run due to failed prerequisites2")) - (if (or (not (null? reg))(not (null? tal))) - (if (vector? hed) - (begin - (debug:print 1 *default-log-port* "WARNING: Dropping test " test-name "/" item-path - " from the launch list as it has prerequistes that are FAIL") - (let ((test-id (rmt:get-test-id run-id hed ""))) - (if test-id (mt:test-set-state-status-by-id-unless-completed run-id test-id "COMPLETED" "PREQ_FAIL" "Failed to run due to failed prerequisites"))) - (runs:shrink-can-run-more-tests-count runsdat) ;; DELAY TWEAKER (still needed?) - ;; (thread-sleep! *global-delta*) - ;; This next is for the items - - (if (not (null? fails)) - ;;(mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "PREQ_FAIL" #f) - (rmt:set-state-status-and-roll-up-items run-id test-name item-path "NOT_STARTED" "PREQ_FAIL" #f) - ;;(mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "BLOCKED" #f) - (rmt:set-state-status-and-roll-up-items run-id test-name item-path "NOT_STARTED" "BLOCKED" #f) ) - (hash-table-set! test-registry (db:test-make-full-name test-name item-path) 'removed) - (runs:loop-values tal reg reglen regfull reruns)) - (let ((nth-try (hash-table-ref/default test-registry hed 0))) ;; hed not a vector... - (debug:print 2 *default-log-port* "nth-try("hed")="nth-try) - (cond - ((member "RUNNING" (map db:test-get-state prereqs-not-met)) - (if (runs:lownoise (conc "possible RUNNING prerequistes " hed) 60) - (debug:print 0 *default-log-port* "WARNING: test " hed " has possible RUNNING prerequisites, don't give up on it yet.")) - (thread-sleep! 0.1) - (runs:loop-values tal reg reglen regfull reruns)) - ((or (not nth-try) ;; BB: condition on subsequent tries, condition below fires on first try - (and (number? nth-try) - (< nth-try 2))) - (hash-table-set! test-registry hed (if (number? nth-try) - (+ nth-try 1) - 0)) - (if (runs:lownoise (conc "not removing test " hed) 60) - (debug:print 1 *default-log-port* "WARNING: not removing test " hed " from queue although it may not be runnable due to FAILED prerequisites")) - ;; may not have processed correctly. Could be a race condition in your test implementation? Dropping test " hed) ;; " as it has prerequistes that are FAIL. (NOTE: hed is not a vector)") - (runs:shrink-can-run-more-tests-count runsdat) ;; DELAY TWEAKER (still needed?) - (runs:loop-values newtal reg reglen regfull reruns)) - ((symbol? nth-try) ;; BB: 'done matches here in one case where prereq itemwait failed. This is first "try" - (if (eq? nth-try 'removed) ;; removed is removed - drop it NOW - (if (null? tal) - #f ;; yes, really - (list (car tal)(cdr tal) reg reruns)) - (begin - (if (runs:lownoise (conc "FAILED prerequisites or other issue" hed) 60) - (debug:print 0 *default-log-port* "WARNING: test " hed " has FAILED prerequisites or other issue. Internal state >" nth-try "< will be overridden and we'll retry.")) - ;; was: (mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "KEEP_TRYING" #f) - (mt:test-set-state-status-by-testname-unless-completed run-id test-name item-path "COMPLETED" "PREQ_FAIL" #f) - (hash-table-set! test-registry hed 'removed) ;; was 0 - (if (not (and (null? reg) (null? tal))) - (runs:loop-values tal reg reglen regfull reruns) - #f)))) - (else - (if (runs:lownoise (conc "FAILED prerequitests and we tried" hed) 60) - (debug:print 0 *default-log-port* "WARNING: test " hed " has FAILED prerequitests and we've tried at least 10 times to run it. Giving up now.")) - ;; (debug:print 0 *default-log-port* " prereqs: " prereqs-not-met) - (hash-table-set! test-registry hed 'removed) - (mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "TEN_STRIKES" #f) - ;; I'm unclear on if this roll up is needed - it may be the root cause of the "all set to FAIL" bug. - (rmt:set-state-status-and-roll-up-items run-id test-name item-path #f "FAIL" #f) ;; treat as FAIL - (list (if (null? tal)(car newtal)(car tal)) - tal - reg - reruns))))) - ;; ELSE: can't drop this - maybe running? Just keep trying - - ;;(if (not (or (not (null? reg))(not (null? tal)))) ;; old experiment - (let ((runable-tests (runs:runable-tests prereqs-not-met))) ;; SUSPICIOUS: Should look at more than just prereqs-not-met? - (if (null? runable-tests) - #f ;; I think we are truly done here - (runs:loop-values newtal reg reglen regfull reruns))) - ;;) ;;from old experiment - ) ;; end if (or (not (null? reg))(not (null? tal))) - )))))) - -;; scan a list of tests looking to see if any are potentially runnable -;; -(define (runs:runable-tests tests) - (filter (lambda (t) - (if (not (vector? t)) - t - (let ((state (db:test-get-state t)) - (status (db:test-get-status t))) - (case (string->symbol state) - ((COMPLETED INCOMPLETE) #f) - ((NOT_STARTED) - (if (member status '("TEN_STRIKES" "BLOCKED" "PREQ_FAIL" "ZERO_ITEMS" "PREQ_DISCARDED" "TIMED_OUT" )) - #f - t)) - ((DELETED) #f) - (else t))))) - tests)) - -;; move all the miscellanea into this struct -;; -(defstruct runs:gendat inc-results inc-results-last-update inc-results-fmt run-info runname target) - -(define *runs:general-data* - (make-runs:gendat - inc-results: (make-hash-table) - inc-results-last-update: 0 - inc-results-fmt: "~12a~12a~20a~12a~40a\n" ;; state status time duration test-name item-path - run-info: #f - runname: #f - target: #f - ) - ) - -(define (runs:incremental-print-results run-id) - (let ((curr-sec (current-seconds)) - (last-update (runs:gendat-inc-results-last-update *runs:general-data*))) - (if (> (- curr-sec last-update) 5) ;; at least five seconds since last update - (let* ((run-dat (or (runs:gendat-run-info *runs:general-data*)(rmt:get-run-info run-id))) - (runname (or (runs:gendat-runname *runs:general-data*) - (db:get-value-by-header (db:get-rows run-dat) - (db:get-header run-dat) "runname"))) - (target (or (runs:gendat-target *runs:general-data*)(rmt:get-target run-id))) - (testsdat (let ((res (rmt:get-tests-for-run - run-id "%" '() '() ;; run-id testpatt states statuses - #f #f ;; offset limit - #f ;; not-in - #f ;; sort-by - #f ;; sort-order - #f ;; get full data (not 'shortlist) - last-update - 'dashboard))) - (if (list? res) - res - (begin - (debug:print-error - 0 *default-log-port* - "FAILED TO GET DATA using rmt:get-tests-for-run. Notify developers if you see this. result: " res) - '()))))) - (runs:gendat-inc-results-last-update-set! *runs:general-data* (- curr-sec 1)) - (if (not (runs:gendat-run-info *runs:general-data*)) - (runs:gendat-run-info-set! *runs:general-data* run-dat)) - (if (not (runs:gendat-runname *runs:general-data*)) - (runs:gendat-runname-set! *runs:general-data* runname)) - (if (not (runs:gendat-target *runs:general-data*)) - (runs:gendat-target-set! *runs:general-data* target)) - (for-each - (lambda (testdat) - (let* ((test-id (db:test-get-id testdat)) - (prevdat (hash-table-ref/default (runs:gendat-inc-results *runs:general-data*) - (conc run-id "," test-id) #f)) - (test-name (db:test-get-testname testdat)) - (item-path (db:test-get-item-path testdat)) - (state (db:test-get-state testdat)) - (status (db:test-get-status testdat)) - (event-time (db:test-get-event_time testdat)) - (duration (db:test-get-run_duration testdat))) - (if (and (not (member state '("DELETED" "REMOTEHOSTSTART" "RUNNING" "LAUNCHED""NOT_STARTED"))) - (not (and prevdat - (equal? state (db:test-get-state prevdat)) - (equal? status (db:test-get-status prevdat))))) - (let ((fmt (runs:gendat-inc-results-fmt *runs:general-data*)) - (dtime (seconds->year-work-week/day-time event-time))) - (if (runs:lownoise "inc-print" 600) - (format #t fmt "State" "Status" "Start Time" "Duration" "Test path")) - ;; (debug:print 0 *default-log-port* "fmt: " fmt " state: " state " status: " status " test-name: " test-name " item-path: " item-path " dtime: " dtime) - ;; (debug:print 0 #f "event-time: " event-time " duration: " duration) - (format #t fmt - state - status - dtime - (seconds->hr-min-sec duration) - (conc "lt/" target "/" runname "/" test-name (if (string-null? item-path) "" (conc "/" item-path)))) - (hash-table-set! (runs:gendat-inc-results *runs:general-data*) (conc run-id "," test-id) testdat))))) - testsdat))) - - ;; I don't think this should be here? -- Matt - #;(runs:gendat-inc-results-last-update-set! *runs:general-data* (- curr-sec 10)) - - )) - -;; every time though the loop increment the test/itempatt val. -;; when the min is > max-allowed and none running then force exit -;; -(define *max-tries-hash* (make-hash-table)) - -(define (runs:pretty-long-list lst) - (if (> (length lst) 8)(append (take lst 3)(list "...")) lst)) - -(define *last-loop-time-ms* 0) - -;;====================================================================== -;; runs:run-tests-queue is called by runs:run-tests -;;====================================================================== -;; -;; test-records is a hash table testname:item_path => vector < testname testconfig waitons priority items-info ... > -(define (runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests reglen-in all-tests-registry) - ;; At this point the list of parent tests is expanded - ;; NB// Should expand items here and then insert into the run queue. - (debug:print 5 *default-log-port* "test-records: " test-records ", flags: " (hash-table->alist flags)) - - ;; Do mark-and-find clean up of db before starting runing of quue - ;; - ;; (rmt:find-and-mark-incomplete) - - (let* ((run-info (rmt:get-run-info run-id)) - (tests-info (mt:get-tests-for-run run-id #f '() '())) ;; qryvals: "id,testname,item_path")) - (sorted-test-names (tests:sort-by-priority-and-waiton test-records)) - (test-registry (make-hash-table)) - (registry-mutex (make-mutex)) - (num-retries 0) - (max-retries (configf:lookup *configdat* "setup" "maxretries")) - (max-concurrent-jobs (configf:lookup-number *configdat* "setup" "max_concurrent_jobs" default: 50)) - (reglen (if (number? reglen-in) reglen-in 1)) - (last-time-incomplete (- (current-seconds) 900)) ;; force at least one clean up cycle - (last-time-some-running (current-seconds)) - (incoming-tests '()) ;; queue up incoming tests here to tack on to tal when it gets low - ;; (tdbdat (tasks:open-db)) - (runsdat (make-runs:dat - ;; hed: hed - ;; tal: tal - ;; reg: reg - ;; reruns: reruns - reglen: reglen - regfull: #f ;; regfull - ;; test-record: test-record - runname: runname - ;; test-name: test-name - ;; item-path: item-path - ;; jobgroup: jobgroup - max-concurrent-jobs: max-concurrent-jobs - run-id: run-id - ;; waitons: waitons - ;; testmode: testmode - test-patts: test-patts - required-tests: required-tests - test-registry: test-registry - registry-mutex: registry-mutex - flags: flags - keyvals: keyvals - run-info: run-info - ;; newtal: newtal - all-tests-registry: all-tests-registry - ;; itemmaps: itemmaps - ;; prereqs-not-met: (rmt:get-prereqs-not-met run-id waitons hed item-path mode: testmode itemmaps: itemmaps) - ;; can-run-more-tests: (runs:can-run-more-tests run-id jobgroup max-concurrent-jobs) ;; look at the test jobgroup and tot jobs running - ))) - - ;; Initialize the test-registery hash with tests that already have a record - ;; convert state to symbol and use that as the hash value - (for-each (lambda (trec) - (let ((id (db:test-get-id trec)) - (tn (db:test-get-testname trec)) - (ip (db:test-get-item-path trec)) - (st (db:test-get-state trec))) - (if (not (equal? st "DELETED")) - (hash-table-set! test-registry (db:test-make-full-name tn ip) (string->symbol st))))) - tests-info) - (set! max-retries (if (and max-retries (string->number max-retries))(string->number max-retries) 100)) - - (let loop ((hed (car sorted-test-names)) - (tal (cdr sorted-test-names)) - (reg '()) ;; registered, put these at the head of tal - (reruns '())) - - (runs:incremental-print-results run-id) - - (if (not (null? reruns))(debug:print-info 4 *default-log-port* "reruns=" reruns)) - - ;; Here we mark any old defunct tests as incomplete. Do this every fifteen minutes - ;; moving this to a parallel thread and just run it once. - ;; - (if (> (current-seconds)(+ last-time-incomplete 900)) - (begin - (set! last-time-incomplete (current-seconds)) - ;; (rmt:find-and-mark-incomplete-all-runs) - )) - - ;; (print "Top of loop, hed=" hed ", tal=" tal " ,reruns=" reruns) - (let* ((test-record (hash-table-ref test-records hed)) - (test-name (tests:testqueue-get-testname test-record)) - (tconfig (tests:testqueue-get-testconfig test-record)) - (jobgroup (configf:lookup tconfig "test_meta" "jobgroup")) - (testmode (let ((m (configf:lookup tconfig "requirements" "mode"))) - (if m (map string->symbol (string-split m)) '(normal)))) - (itemmaps (tests:get-itemmaps tconfig)) ;; (configf:lookup tconfig "requirements" "itemmap")) - (priority (tests:testqueue-get-priority test-record)) - (itemdat (tests:testqueue-get-itemdat test-record)) ;; itemdat can be a string, list or #f - (items (tests:testqueue-get-items test-record)) - (item-path (item-list->path itemdat)) - (tfullname (db:test-make-full-name test-name item-path)) - ;; these are hard coded item-item waits test/item-path => test/item-path2 ... - (extra-waits (let* ((section (configf:get-section (tests:testqueue-get-testconfig test-record) "waitons")) - (myextra (alist-ref tfullname section equal?))) - (if myextra - (let ((extras (string-split (car myextra)))) - (if (runs:lownoise (conc tfullname "extra-waitons" tfullname) 60) - (debug:print-info 0 *default-log-port* "HAVE EXTRA WAITONS for test " tfullname ": " myextra)) - (for-each - (lambda (extra) - ;; (debug:print 0 *default-log-port* "FYI: extra = " extra " reruns = " reruns) - (let ((basetestname (car (string-split extra "/")))) - #;(if (not (member extra tal)) - (set! reruns (append tal (list extra)))) - (if (not (member basetestname tal)) - (set! reruns (append tal (list basetestname)))) - )) - extras) - extras) - '()))) - (waitons (delete-duplicates (append (tests:testqueue-get-waitons test-record) extra-waits) equal?)) - (newtal (append tal (list hed))) - (regfull (>= (length reg) reglen)) - (num-running (rmt:get-count-tests-running-for-run-id run-id)) - (testdat (make-runs:testdat - hed: hed - tal: tal - reg: reg - reruns: reruns - test-record: test-record - test-name: test-name - item-path: item-path - jobgroup: jobgroup - waitons: waitons - testmode: testmode - newtal: newtal - itemmaps: itemmaps - prereqs-not-met: '() - ))) - - ;; too-tight loop detection and delay, this might hide issues - ;; that occur in long run times. Consider commenting when debugging - ;; - (if (and (>= num-running max-concurrent-jobs) - (< (- (current-milliseconds) *last-loop-time-ms*) 500)) - (begin - (if (runs:lownoise "too-tight-loop" 5) - (debug:print-info 2 *default-log-port* "Excessively fast loop, delaying 1/2 second")) - (thread-sleep! 0.5))) - (set! *last-loop-time-ms* (current-milliseconds)) - - (runs:dat-regfull-set! runsdat regfull) - - - (if (> (- (current-seconds) *last-test-launch*) 5) ;; be pretty aggressive for five seconds after - (runs:too-soon-delay (conc "loop delay " hed) 1 0.6) ;; starting a test then apply more delay - (runs:too-soon-delay (conc "loop delay " hed) 1 0.1)) - - (if (> num-running 0) - (set! last-time-some-running (current-seconds))) - - (if (> (current-seconds)(+ last-time-some-running (or (configf:lookup *configdat* "setup" "give-up-waiting") 36000))) - (hash-table-set! *max-tries-hash* tfullname (+ (hash-table-ref/default *max-tries-hash* tfullname 0) 1))) - ;; (debug:print 0 *default-log-port* "max-tries-hash: " (hash-table->alist *max-tries-hash*)) - - ;; Ensure all top level tests get registered. This way they show up as "NOT_STARTED" on the dashboard - ;; and it is clear they *should* have run but did not. - (if (not (hash-table-ref/default test-registry (db:test-make-full-name test-name "") #f)) - (begin - (rmt:register-test run-id test-name "") - (hash-table-set! test-registry (db:test-make-full-name test-name "") 'done))) - - ;; Fast skip of tests that are already "COMPLETED" - NO! Cannot do that as the items may not have been expanded yet :( - ;; - (if (member (hash-table-ref/default test-registry tfullname #f) - '(DONOTRUN removed)) ;; *common:cant-run-states-sym*) ;; '(COMPLETED KILLED WAIVED UNKNOWN INCOMPLETE)) - (begin - (if (runs:lownoise (conc "been marked do not run " tfullname) 60) - (debug:print-info 0 *default-log-port* "Skipping test " tfullname " as it has been marked do not run due to being completed or not runnable")) - (if (or (not (null? tal))(not (null? reg))) - (loop (runs:queue-next-hed tal reg reglen regfull) - (runs:queue-next-tal tal reg reglen regfull) - (runs:queue-next-reg tal reg reglen regfull) - reruns)))) - ;; (loop (car tal)(cdr tal) reg reruns)))) - (runs:incremental-print-results run-id) - (debug:print 4 *default-log-port* "TOP OF LOOP => " - "test-name: " test-name - "\n hed: " hed - "\n tal: " (runs:pretty-long-list tal) - "\n reg: " reg - "\n test-record " test-record - "\n itemdat: " itemdat - "\n items: " items - "\n item-path: " item-path - "\n waitons: " waitons - "\n num-retries: " num-retries - "\n reruns: " reruns - "\n regfull: " regfull - "\n reglen: " reglen - "\n length reg: " (length reg) - ) - - ;; (runs:parallel-runners-mgmt runsdat) - - ;; check for hed in waitons => this would be circular, remove it and issue an - ;; error - (if (member test-name waitons) - (begin - (debug:print-error 0 *default-log-port* "test " test-name " has listed itself as a waiton, please correct this!") - (set! waiton (filter (lambda (x)(not (equal? x hed))) waitons)))) - - (cond - - ;; We want to catch tests that have waitons that are NOT in the queue and discard them IFF - ;; they have been through the wringer 10 or more times - ((and (list? waitons) - (not (null? waitons)) - (> (hash-table-ref/default *max-tries-hash* tfullname 0) 10) - (not (null? (filter - number? - (map (lambda (waiton) - (if (and (not (member waiton tal)) ;; this waiton is not in the list to be tried to run - (not (member waiton reruns))) - 1 - #f)) - waitons))))) ;; could do this more elegantly with a marker.... - (debug:print-info 4 *default-log-port* "cond branch - " "rtq-1") - (debug:print 0 *default-log-port* "WARNING: Marking test " tfullname " as not runnable. It is waiting on tests that cannot be run. Giving up now.") - (hash-table-set! test-registry tfullname 'removed)) - - ;; items is #f then the test is ok to be handed off to launch (but not before) - ;; - ((not items) - (debug:print-info 4 *default-log-port* "cond branch - " "rtq-2") - (debug:print-info 4 *default-log-port* "OUTER COND: (not items)") - (if (and (not (tests:match test-patts (tests:testqueue-get-testname test-record) item-path required: required-tests)) - (not (null? tal))) - (loop (car tal)(cdr tal) reg reruns)) - - ;; gonna try a strategy change here. - ;; - ;; check if can run more tests. if yes, continue, if no, rest until can run more - ;; look at the test jobgroup and tot jobs running - ;; - ;; NOTE: This does NOT actually gate here, only captures the proc to be called later - ;; - (if (not (runs:dat-wait-for-jobs-function runsdat)) - (runs:dat-wait-for-jobs-function-set! - runsdat - (lambda (testdat-in) - (let* ((jobgroup (runs:testdat-jobgroup testdat-in)) - (can-run-more-tests (runs:dat-can-run-more-tests runsdat)) - (last-jobs-check-time (runs:dat-last-jobs-check-time runsdat)) - (should-check-jobs (match can-run-more-tests - ((can-run-more-flag num-running nr-in-jobgroup max-concurrent-jobs . params) - (if (< (- max-concurrent-jobs num-running) 25) - (begin - (debug:print-info 2 *default-log-port* - "less than 20 jobs headroom, ("max-concurrent-jobs - "-"num-running")>20. Forcing prelaunch check.") - #t) - #f)) - (else #f)))) ;; no record yet - (if should-check-jobs - (let loop-can-run-more - ((res (runs:can-run-more-tests runsdat run-id jobgroup max-concurrent-jobs)) - (remtries 1440)) ;; we can wait for up to two hours for jobs to get done - (match res - ((run-more num-running . rem) - (if (or run-more - (< remtries 1)) - (begin - (if (runs:lownoise "num-running" 30) - (debug:print-info 0 *default-log-port* "Have "num-running" tests of max " max-concurrent-jobs)) - (runs:dat-can-run-more-tests-set! runsdat res)) ;; capture the result and then drop through - (begin - (if (runs:lownoise "num-running" 10) - (debug:print-info 0 *default-log-port* "Can't run more tests, have "num-running" tests of " - max-concurrent-jobs " allowed.")) - (thread-sleep! 5) ;; if we've hit max concurrent jobs take a breather, nb// make this configurable - - ;; wait for load here - (if (runs:dat-load-mgmt-function runsdat)((runs:dat-load-mgmt-function runsdat))) - (loop-can-run-more (runs:can-run-more-tests runsdat run-id jobgroup max-concurrent-jobs) - (- remtries 1))))))) - ))))) - - ;; I'm not clear on why prereqs are gathered here TODO: verfiy this is needed - (runs:testdat-prereqs-not-met-set! testdat (rmt:get-prereqs-not-met run-id waitons hed item-path mode: testmode itemmaps: itemmaps)) - - ;; I'm not clear on why we'd capture running job counts here TODO: verify this is needed - (runs:dat-can-run-more-tests-set! runsdat (runs:can-run-more-tests runsdat run-id jobgroup max-concurrent-jobs)) - - (let ((loop-list (runs:process-expanded-tests runsdat testdat))) ;; in process-expanded-tests ultimately run:test -> launch-test -> test actually running - (if loop-list (apply loop loop-list)))) - - ;; items processed into a list but not came in as a list been processed - ;; - ((and (list? items) ;; thus we know our items are already calculated - (not itemdat)) ;; and not yet expanded into the list of things to be done - (debug:print-info 4 *default-log-port* "cond branch - " "rtq-3") - (debug:print-info 4 *default-log-port* "OUTER COND: (and (list? items)(not itemdat))") - ;; Must determine if the items list is valid. Discard the test if it is not. - (if (and (list? items) - (> (length items) 0) - (and (list? (car items)) - (> (length (car items)) 0)) - (debug:debug-mode 1)) - (debug:print 2 *default-log-port* (map (lambda (row) - (conc (string-intersperse - (map (lambda (varval) - (string-intersperse varval "=")) - row) - " ") - "\n")) - items))) - - (let* ((items-in-testpatt - (filter - (lambda (my-itemdat) - (tests:match test-patts hed (item-list->path my-itemdat) )) - ;; was: (tests:match test-patts hed (item-list->path my-itemdat) required: required-tests)) - items) )) - (if (null? items-in-testpatt) - (debug:print-error 0 *default-log-port* "Test " (tests:testqueue-get-testname test-record) " is itemized but has no items matching the test pattern") - - (for-each (lambda (my-itemdat) - (let* ((new-test-record (let ((newrec (make-tests:testqueue))) - (vector-copy! test-record newrec) - newrec)) - (my-item-path (item-list->path my-itemdat)) - - (newtestname (db:test-make-full-name hed my-item-path))) ;; test names are unique on testname/item-path - (tests:testqueue-set-items! new-test-record #f) - (tests:testqueue-set-itemdat! new-test-record my-itemdat) - (tests:testqueue-set-item_path! new-test-record my-item-path) - (hash-table-set! test-records newtestname new-test-record) - ;; BUG: This next line sucks up a lot of horsepower - ;; (set! tal (append tal (list newtestname))) - ;; (set! tal (cons newtestname tal)) ;; 4/6/2023 - try using cons, does it matter if the test gets added at the beginning? - (set! incoming-tests (cons newtestname incoming-tests)) - )) ;; since these are itemized create new test names testname/itempath - items-in-testpatt))) - - (if (and (< (length tal) 20) - (not (null? incoming-tests))) - (begin - (set! tal (append tal (reverse incoming-tests))) - (set! incoming-tests '()))) - - ;; At this point we have possibly added items to tal but all must be handed off to - ;; INNER COND logic. I think loop without rotating the queue - ;; (loop hed tal reg reruns)) - ;; (let ((newtal (append tal (list hed)))) ;; We should discard hed as it has been expanded into it's items? Yes, but only if this *is* an itemized test - ;; (loop (car newtal)(cdr newtal) reg reruns) - (if (null? tal) - #f - (loop (car tal)(cdr tal) reg reruns))) - - ;; if items is a proc then need to run items:get-items-from-config, get the list and loop - ;; - but only do that if resources exist to kick off the job - ;; EXPAND ITEMS - ((or (procedure? items)(eq? items 'have-procedure)) - (debug:print-info 4 *default-log-port* "cond branch - " "rtq-4") - (let ((can-run-more #f)) ;; (runs:can-run-more-tests runsdat run-id jobgroup max-concurrent-jobs))) - (if (not can-run-more) #;(and (list? can-run-more) - (car can-run-more)) - (let ((loop-list (runs:expand-items hed tal reg reruns regfull newtal jobgroup max-concurrent-jobs run-id waitons item-path testmode test-record can-run-more items runname tconfig reglen test-registry test-records itemmaps))) ;; itemized test expanded here - (if loop-list - (apply loop loop-list) - (debug:print-info 4 *default-log-port* " -- Can't expand hed="hed) - ) - ) - ;; if can't run more just loop with next possible test - (loop (car newtal)(cdr newtal) reg reruns)))) - - - ;; this case should not happen, added to help catch any bugs - ((and (list? items) itemdat) - (debug:print-info 4 *default-log-port* "cond branch - " "rtq-5") - (debug:print-error 0 *default-log-port* "Should not have a list of items in a test and the itemspath set - please report this") - (exit 1)) - - ((not (null? reruns)) - (debug:print-info 4 *default-log-port* "cond branch - " "rtq-6") - (let* ((newlst (tests:filter-non-runnable run-id tal test-records)) ;; i.e. not FAIL, WAIVED, INCOMPLETE, PASS, KILLED, - (junked (lset-difference equal? tal newlst))) - (debug:print-info 4 *default-log-port* "full drop through, if reruns is less than 100 we will force retry them, reruns=" reruns ", tal=" tal) - (if (< num-retries max-retries) - (set! newlst (append reruns newlst))) - (set! num-retries (+ num-retries 1)) - ;; (thread-sleep! (+ 1 *global-delta*)) - (if (not (null? newlst)) - ;; since reruns have been tacked on to newlst create new reruns from junked - (loop (car newlst)(cdr newlst) reg (delete-duplicates junked))))) - - ((not (null? tal)) - (debug:print-info 4 *default-log-port* "cond branch - " "rtq-7") - (debug:print-info 4 *default-log-port* "I'm pretty sure I shouldn't get here.")) - ((not (null? reg)) ;; could we get here with leftovers? - (debug:print-info 4 *default-log-port* "cond branch - " "rtq-8") - (debug:print-info 0 *default-log-port* "Have leftovers!") - (loop (car reg)(cdr reg) '() reruns)) - (else - (debug:print-info 4 *default-log-port* "cond branch - " "rtq-9") - (debug:print-info 4 *default-log-port* "Exiting loop with...\n hed=" hed "\n tal=" tal "\n reruns=" reruns)) - ))) ;; end loop on sorted test names - ;; this is the point where everything is launched and now you can mark the run in metadata table as all launched - (rmt:set-var (conc "lunch-complete-" run-id) "yes") - - ;; now *if* -run-wait we wait for all tests to be done - ;; Now wait for any RUNNING tests to complete (if in run-wait mode) - ;; (if (runs:dat-load-mgmt-function runsdat)((runs:dat-load-mgmt-function runsdat))) - (thread-sleep! 0.1) ;; I think there is a race condition here. Let states/statuses settle - - (let wait-loop ((num-running (rmt:get-count-tests-running-for-run-id run-id)) - (prev-num-running 0)) - ;; (debug:print-info 13 *default-log-port* "num-running=" num-running ", prev-num-running=" prev-num-running) - (if (and (or (args:get-arg "-run-wait") - (equal? (configf:lookup *configdat* "setup" "run-wait") "yes")) - (> num-running 0)) - (begin - ;; Here we mark any old defunct tests as incomplete. Do this every fifteen minutes - ;; (debug:print 0 *default-log-port* "Got here eh! num-running=" num-running " (> num-running 0) " (> num-running 0)) - (if (> (current-seconds)(+ last-time-incomplete 900)) - (let ((actual-num-running (rmt:get-count-tests-running-for-run-id run-id))) - (debug:print-info 0 *default-log-port* "Marking stuck tests as INCOMPLETE while waiting for run " run-id - ". Running as pid " (current-process-id) " on " (get-host-name)) - (set! last-time-incomplete (current-seconds)) ;; FIXME, this might be causing slow down - use of set! - (rmt:find-and-mark-incomplete run-id #f) - (debug:print-info 0 *default-log-port* "run-wait specified, waiting on " actual-num-running - " tests in RUNNING, REMOTEHOSTSTART or LAUNCHED state at " - (time->string (seconds->local-time (current-seconds)))))) - ;; (if (runs:dat-load-mgmt-function runsdat)((runs:dat-load-mgmt-function runsdat))) - (thread-sleep! 5) ;; (if (>= num-running max-concurrent-jobs) 5 1)) - (wait-loop (rmt:get-count-tests-running-for-run-id run-id) - num-running)))) - ;; LET* ((test-record - ;; we get here on "drop through". All done! - ;; this is moved to runs:run-testes since this function is getting called twice to ensure everthing is completed. - ;; (debug:print-info 0 *default-log-port* "Calling Post Hook") - ;; (runs:run-post-hook run-id) - (debug:print-info 1 *default-log-port* "All tests launched"))) - -(define (runs:calc-fails prereqs-not-met) - (filter (lambda (test) - (and (vector? test) ;; not (string? test)) - (member (db:test-get-state test) '("INCOMPLETE" "COMPLETED")) ;; TODO: pull from *common:stuff...* - (not (member (db:test-get-status test) - '("PASS" "WARN" "WAIVED" "SKIP"))))) - prereqs-not-met)) - -(define (runs:calc-prereq-fail prereqs-not-met) ;; REMOVEME since NOT_STARTED/PREQ_FAIL is now COMPLETED/PREQ_FAIL - (filter (lambda (test) - (and (vector? test) ;; not (string? test)) - (equal? (db:test-get-state test) "NOT_STARTED") - (not (member (db:test-get-status test) - '("n/a" "KEEP_TRYING"))))) - prereqs-not-met)) - -(define (runs:calc-not-completed prereqs-not-met) - (filter - (lambda (t) - (or (not (vector? t)) - (not (member (db:test-get-state t) '("INCOMPLETE" "COMPLETED"))))) - prereqs-not-met)) - -;; (define (runs:calc-not-completed prereqs-not-met) -;; (filter -;; (lambda (t) -;; (or (not (vector? t)) -;; (not (equal? "COMPLETED" (db:test-get-state t))))) -;; prereqs-not-met)) - -(define (runs:calc-runnable prereqs-not-met) - (filter - (lambda (t) - (or (not (vector? t)) - (and (equal? "NOT_STARTED" (db:test-get-state t)) - (member (db:test-get-status t) - '("n/a" "KEEP_TRYING"))) - (and (equal? "RUNNING" (db:test-get-state t))))) ;; account for a test that is running - prereqs-not-met)) - -(define (runs:pretty-string lst) - (map (lambda (t) - (if (not (vector? t)) - (conc t) - (conc (db:test-get-testname t) ":" (db:test-get-state t) "/" (db:test-get-status t)))) - lst)) - -;; parent-test is there as a placeholder for when parent-tests can be run as a setup step -;; -(define (run:test run-id run-info keyvals runname test-record flags parent-test test-registry all-tests-registry runsdat testdat-rec) - ;; All these vars might be referenced by the testconfig file reader - ;; - ;; NEED to reprocess testconfig here, ensuring that item variables are available. - ;; This is for Tal's issue with item-specific env vars not being set for use in skip. - ;; HSD https://hsdes.intel.com/appstore/icf/index.html#/article?articleId=1408763273 - ;; Also later HSD https://hsdes.intel.com/appstore/article/#/14012138487 - ;; - (let* ((test-name (tests:testqueue-get-testname test-record)) - (test-waitons (tests:testqueue-get-waitons test-record)) - (itemdat (tests:testqueue-get-itemdat test-record)) - (item-path "") - (db #f) - (full-test-name #f)) - - ;; setting itemdat to a list if it is #f - (if (not itemdat)(set! itemdat '())) - (set! item-path (item-list->path itemdat)) - (set-item-env-vars itemdat) - (set! full-test-name (db:test-make-full-name test-name item-path)) - (runs:set-megatest-env-vars run-id inrunname: runname testname: test-name itempath: item-path) ;; these may be needed by the launching process - - (let* ((test-conf ;; re-instated the tests:get-testconfig to fix HSD https://hsdes.intel.com/appstore/article/#/14012138487, need to be able to skip using [items], [itemstable] variables. - ;; (tests:testqueue-get-testconfig test-record )) ;; vector-ref test-record 3 - (tests:get-testconfig test-name item-path all-tests-registry #t force-create: #t)) - (test-path (hash-table-ref all-tests-registry test-name)) ;; (conc *toppath* "/tests/" test-name)) ;; could use tests:get-testconfig here ... - - (force (hash-table-ref/default flags "-force" #f)) - (rerun (hash-table-ref/default flags "-rerun" #f)) - (keepgoing (hash-table-ref/default flags "-keepgoing" #f)) - (incomplete-timeout (string->number (or (configf:lookup *configdat* "setup" "incomplete-timeout") "x"))) - ) - - (debug:print-info 4 *default-log-port* - "\nTESTNAME: " full-test-name - "\n test-config: " (hash-table->alist test-conf) - "\n itemdat: " itemdat - ) - (debug:print 2 *default-log-port* "Attempting to launch test " full-test-name) - ;; (setenv "MT_TEST_NAME" test-name) ;; - ;; (setenv "MT_ITEMPATH" item-path) - ;; (setenv "MT_RUNNAME" runname) - (change-directory *toppath*) - - ;; Here is where the test_meta table is best updated - ;; Yes, another use of a global for caching. Need a better way? - ;; - ;; There is now a single call to runs:update-all-test_meta and this - ;; per-test call is not needed. Given the delicacy of the move to - ;; v1.55 this code is being left in place for the time being. - ;; - (if (not (hash-table-exists? *test-meta-updated* test-name)) - (begin - (hash-table-set! *test-meta-updated* test-name #t) - (runs:update-test_meta test-name test-conf))) - - ;; itemdat => ((ripeness "overripe") (temperature "cool") (season "summer")) - (let* ((new-test-path (string-intersperse (cons test-path (map cadr itemdat)) "/")) - (test-id (rmt:get-test-id run-id test-name item-path)) - (testdat (if test-id (rmt:get-test-info-by-id run-id test-id) #f))) - (if (not testdat) - (let loop () - ;; ensure that the path exists before registering the test - ;; NOPE: Cannot! Don't know yet which disk area will be assigned.... - ;; (system (conc "mkdir -p " new-test-path)) - ;; - ;; (open-run-close tests:register-test db run-id test-name item-path) - ;; - ;; NB// for the above line. I want the test to be registered long before this routine gets called! - ;; - (if (not test-id)(set! test-id (rmt:get-test-id run-id test-name item-path))) - (if (not test-id) - (begin - (debug:print 2 *default-log-port* "WARN: Test not pre-created? test-name=" test-name ", item-path=" item-path ", run-id=" run-id) - (rmt:register-test run-id test-name item-path) - (set! test-id (rmt:get-test-id run-id test-name item-path)))) - (debug:print-info 4 *default-log-port* "test-id=" test-id ", run-id=" run-id ", test-name=" test-name ", item-path=\"" item-path "\"") - (set! testdat (rmt:get-test-info-by-id run-id test-id)) - (if (not testdat) - (begin - (debug:print-info 0 *default-log-port* "WARNING: server is overloaded, trying again in two seconds") - ;; (if (runs:dat-load-mgmt-function runsdat)((runs:dat-load-mgmt-function runsdat))) - (thread-sleep! 2) - (loop))))) - (if (not testdat) ;; should NOT happen - (debug:print-error 0 *default-log-port* "failed to get test record for test-id " test-id)) - (set! test-id (db:test-get-id testdat)) - (if (common:file-exists? test-path) - (change-directory test-path) - (begin - (debug:print-error 0 *default-log-port* "test run path not created before attempting to run the test. Perhaps you are running -remove-runs at the same time?") - (change-directory *toppath*))) - (case (if force ;; (args:get-arg "-force") - 'NOT_STARTED - (if testdat - (string->symbol (test:get-state testdat)) - 'failed-to-insert)) - ((failed-to-insert) - (debug:print-error 0 *default-log-port* "Failed to insert the record into the db")) - ((NOT_STARTED COMPLETED DELETED INCOMPLETE) - (let ((runflag #f)) - (cond - ;; -force, run no matter what - (force (set! runflag #t)) - ;; NOT_STARTED, run no matter what - ((member (test:get-state testdat) '("DELETED" "NOT_STARTED" "INCOMPLETE"))(set! runflag #t)) - ;; not -rerun and PASS, WARN or CHECK, do no run - ((and (or (not rerun) - keepgoing) - ;; Require to force re-run for COMPLETED or *anything* + PASS,WARN or CHECK - (or (member (test:get-status testdat) '("PASS" "WARN" "CHECK" "SKIP" "WAIVED")) - (member (test:get-state testdat) '("COMPLETED")))) - (debug:print-info 2 *default-log-port* "running test " test-name "/" item-path " suppressed as it is " (test:get-state testdat) " and " (test:get-status testdat)) - (hash-table-set! test-registry full-test-name 'DONOTRUN) ;; COMPLETED) - (set! runflag #f)) - - ;; -rerun and status is one of the specifed, run it - ((and rerun - (let* ((rerunlst (string-split rerun ",")) - (must-rerun (member (test:get-status testdat) rerunlst))) - (debug:print-info 3 *default-log-port* "-rerun list: " rerun ", test-status: " (test:get-status testdat)", must-rerun: " must-rerun) - must-rerun)) - (debug:print-info 2 *default-log-port* "Rerun forced for test " test-name "/" item-path) - (set! runflag #t) - (debug:print-info 2 *default-log-port* "Calling rerun hook") - (runs:rerun-hook test-id new-test-path testdat rerun) - ) - - - - ;; -keepgoing, do not rerun FAIL - ((and keepgoing - (member (test:get-status testdat) '("FAIL"))) - (set! runflag #f)) - - ((and (not rerun) - (member (test:get-status testdat) '("FAIL" "n/a"))) - (set! runflag #t)) - - (else (set! runflag #f))) - (debug:print 4 *default-log-port* "RUNNING => runflag: " runflag " STATE: " (test:get-state testdat) " STATUS: " (test:get-status testdat)) - (if (not runflag) - (if (not parent-test) - (if (runs:lownoise (conc "not starting test" full-test-name) 60) - (debug:print 3 *default-log-port* "NOTE: Not starting test " full-test-name " as it is state \"" (test:get-state testdat) - "\" and status \"" (test:get-status testdat) "\", use -rerun \"" (test:get-status testdat) - "\" or -force to override"))) - ;; NOTE: No longer be checking prerequisites here! Will never get here unless prereqs are - ;; already met. - ;; This would be a great place to do the process-fork - ;; - (let ((skip-test #f) - (skip-check (configf:get-section test-conf "skip"))) - (cond - ;; Have to check for skip conditions. This one skips if there are same-named tests - ;; currently running - ((and skip-check - (configf:lookup test-conf "skip" "prevrunning")) - ;; run-ids = #f means *all* runs - (let ((running-tests (rmt:get-tests-for-runs-mindata #f full-test-name '("RUNNING" "REMOTEHOSTSTART" "LAUNCHED") '() #f))) - (if (not (null? running-tests)) ;; have to skip - (set! skip-test "Skipping due to previous tests running")))) - - ;; split the string and OR of file-exists? - ((and skip-check - (configf:lookup test-conf "skip" "fileexists")) - (let* ((files (string-split (configf:lookup test-conf "skip" "fileexists"))) - (existing (filter common:file-exists? files))) - (if (not (null? existing)) ;; (common:file-exists? (configf:lookup test-conf "skip" "fileexists")) - (set! skip-test (conc "Skipping due to existance of file(s) " (string-intersperse existing ", ")))))) ;; (configf:lookup test-conf "skip" "fileexists"))))) - - ((and skip-check - (configf:lookup test-conf "skip" "filenotexists")) - (let* ((files (string-split (configf:lookup test-conf "skip" "filenotexists"))) - (existing (filter common:file-exists? files))) - (if (null? existing) ;; (common:file-exists? (configf:lookup test-conf "skip" "filenotexists"))) - (set! skip-test (conc "Skipping due to non existance of files " (string-intersperse files ", ")))))) ;; (configf:lookup test-conf "skip" "filenotexists"))))) - - ((and skip-check - (configf:lookup test-conf "skip" "script")) - (if (= (system (configf:lookup test-conf "skip" "script")) 0) - (set! skip-test (conc "Skipping due to zero return value of script " (configf:lookup test-conf "skip" "script"))))) - - ((and skip-check - (configf:lookup test-conf "skip" "rundelay")) - ;; run-ids = #f means *all* runs - (let* ((numseconds (common:hms-string->seconds (configf:lookup test-conf "skip" "rundelay"))) - (running-tests (rmt:get-tests-for-runs-mindata #f full-test-name '("RUNNING" "REMOTEHOSTSTART" "LAUNCHED") '() #f)) - (completed-tests (rmt:get-tests-for-runs-mindata #f full-test-name '("COMPLETED" "INCOMPLETE") '("PASS" "FAIL" "ABORT") #f)) ;; ironically INCOMPLETE is same as COMPLETED in this contex - (last-run-times (map db:mintest-get-event_time completed-tests)) - (time-since-last (- (current-seconds) (if (null? last-run-times) 0 (common:max last-run-times))))) - (if (or (not (null? running-tests)) ;; have to skip if test is running - (> numseconds time-since-last)) - (set! skip-test (conc "Skipping due to previous test run less than " (configf:lookup test-conf "skip" "rundelay") " ago")))))) - - (if skip-test - (begin - (mt:test-set-state-status-by-id run-id test-id "COMPLETED" "SKIP" skip-test) - (debug:print-info 1 *default-log-port* "SKIPPING Test " full-test-name " due to " skip-test)) - ;; - ;; Here the test is handed off to launch.scm for launch-test to complete the launch process - ;; - (begin - ;; wait for less than max jobs here - (if (runs:dat-wait-for-jobs-function runsdat) - ((runs:dat-wait-for-jobs-function runsdat) testdat-rec)) - - (if (not (launch-test test-id run-id run-info keyvals runname test-conf test-name test-path itemdat flags)) - (begin - (debug:print 0 *default-log-port* "ERROR: Failed to launch the test. Exiting as soon as possible") - (set! *globalexitstatus* 1) ;; - (process-signal (current-process-id) signal/kill)) - ) - ;; wait again here? - )))))) - ((KILLED) - (debug:print 1 *default-log-port* "NOTE: " full-test-name " is already running or was explictly killed, use -force to launch it.") - (hash-table-set! test-registry (db:test-make-full-name test-name test-path) 'DONOTRUN)) ;; KILLED)) - ((LAUNCHED REMOTEHOSTSTART RUNNING) - (debug:print 2 *default-log-port* "NOTE: " test-name " is already running")) - ;; (if (> (- (current-seconds)(+ (db:test-get-event_time testdat) - ;; (db:test-get-run_duration testdat))) - ;; (or incomplete-timeout - ;; 6000)) ;; i.e. no update for more than 6000 seconds - ;; (begin - ;; (debug:print 0 *default-log-port* "WARNING: Test " test-name " appears to be dead. Forcing it to state INCOMPLETE and status STUCK/DEAD") - ;; (tests:test-set-status! run-id test-id "INCOMPLETE" "STUCK/DEAD" "" #f)) - ;; ;; (tests:test-set-status! test-id "INCOMPLETE" "STUCK/DEAD" "" #f)) - ;; (debug:print 2 *default-log-port* "NOTE: " test-name " is already running"))) - (else - (debug:print-error 0 *default-log-port* "Failed to launch test " full-test-name ". Unrecognised state " (test:get-state testdat)) - (case (string->symbol (test:get-state testdat)) - ((COMPLETED INCOMPLETE) - (hash-table-set! test-registry (db:test-make-full-name test-name test-path) 'DONOTRUN)) - (else - (hash-table-set! test-registry (db:test-make-full-name test-name test-path) 'DONOTRUN))))))))) - -;;====================================================================== -;; END OF NEW STUFF -;;====================================================================== - -(define (get-dir-up-n dir . params) - (let ((dparts (string-split dir "/")) - (count (if (null? params) 1 (car params)))) - (conc "/" (string-intersperse - (take dparts (- (length dparts) count)) - "/")))) - -(define (runs:recursive-delete-with-error-msg real-dir) - (if (> (system (conc "rm -rf " real-dir)) 0) - (begin - ;; FAILED, possibly due to permissions, do chmod a+rwx then try one more time - (system (conc "chmod -R a+rwx " real-dir)) - (if (> (system (conc "rm -rf " real-dir)) 0) - (debug:print-error 0 *default-log-port* "There was a problem removing " real-dir " with rm -f"))))) - -(define (runs:safe-delete-test-dir real-dir) - ;; first delete all sub-directories - (directory-fold - (lambda (f x) - (let ((fullname (conc real-dir "/" f))) - (if (directory? fullname)(runs:recursive-delete-with-error-msg fullname))) - (+ 1 x)) - 0 real-dir) - ;; then files other than *testdat.db* - (directory-fold - (lambda (f x) - (let ((fullname (conc real-dir "/" f))) - (if (not (string-search (regexp "testdat.db") f)) - (runs:recursive-delete-with-error-msg fullname))) - (+ 1 x)) - 0 real-dir #t) - ;; then the entire directory - (runs:recursive-delete-with-error-msg real-dir)) - -;; cleanup often needs to remove all but the last N runs per target -;; -;; target-patts a1/b1/c1,a2/b2/c2 ... -;; -;; This will fail if called with empty target or a bad target (i.e. missing or extra fields) -;; -(define (runs:get-hash-by-target target-patts runpatt) - (let* ((targets (string-split target-patts ",")) - (keys (rmt:get-keys)) - (res-ht (make-hash-table))) ;; target -> ( runrecord1 runrecord2 ... ) - (for-each - (lambda (target-patt) - (let ((runs (rmt:simple-get-runs runpatt #f #f target-patt #f))) - (for-each - (lambda (run) - (let ((target (simple-run-target run))) - (hash-table-set! res-ht target (cons run (hash-table-ref/default res-ht target '()))))) - runs))) - targets) - res-ht)) - -;; delete runs older than X (weeks, days, months years etc.) -;; delete redundant runs within a target - N is the input -;; delete redundant runs within a target IFF older than given date/time AND keep at least N -;; -(define (runs:remove-all-but-last-n-runs-per-target target-patts runpatt num-to-keep #!key (actions '(print))) - (let* ((runs-ht (runs:get-hash-by-target target-patts runpatt)) - (age (if (args:get-arg "-age")(common:hms-string->seconds (args:get-arg "-age")) #f)) - (age-mark (if age (- (current-seconds) age) (+ (current-seconds) 86400))) - (precmd (or (args:get-arg "-precmd") "")) - (action-chk (member (string->symbol "remove-runs") actions))) - ;; check the sequence of actions archive must comme before remove-runs - (if (and action-chk (member (string->symbol "archive") action-chk)) - (begin - (debug:print-error 0 *default-log-port* "action remove-runs must come after archive") - (exit 1))) - (print "Actions: " actions " age: " age) - (for-each - (lambda (action) - (for-each - (lambda (target) - (let* ((runs (hash-table-ref runs-ht target)) - (sorted (sort runs (lambda (a b)(< (simple-run-event_time a)(simple-run-event_time b))))) - (to-remove (let* ((len (length sorted)) - (trim-amt (- len num-to-keep))) - (if (> trim-amt 0) - (take sorted trim-amt) - '())))) - (hash-table-set! runs-ht target to-remove) - (print target ":") - (for-each - (lambda (run) - (let ((remove (member run to-remove (lambda (a b) - (eq? (simple-run-id a) - (simple-run-id b)))))) - (if (and age (> (simple-run-event_time run) age-mark)) - (print "Skipping handling of " target "/" (simple-run-runname run) " as it is younger than " (args:get-arg "-age")) - (case action - ((print) - (print " " (simple-run-runname run) - " " (time->string (seconds->local-time (simple-run-event_time run)) "WW%V.%u %H:%M:%S") - " " (if remove "REMOVE" ""))) - ((remove-runs) - (if remove (system (conc precmd " megatest -remove-runs -target " target " -runname " (simple-run-runname run) " -testpatt %" - (if (member 'kill-runs actions) ;; if kill-runs is specified then set -kill-wait to 0 - " -kill-wait 0" - ""))))) - ((archive) - (if remove (system (conc precmd " megatest -archive save-remove -target " target " -runname " (simple-run-runname run) " -testpatt %")))) - ((kill-runs) - (if remove (system (conc precmd " megatest -kill-runs -target " target " -runname " (simple-run-runname run) " -testpatt %")))))))) - sorted))) - (hash-table-keys runs-ht))) - actions) - runs-ht)) - -(define (remove-last-path-directory path-in) - (let* ((dparts (string-split path-in "/")) - (path-out (conc "/" (string-intersperse (take dparts (- (length dparts) 1)) "/"))) - ) - path-out - ) -) - - -(define (runs:operate-on action target runnamepatt testpatt #!key (state #f)(status #f)(new-state-status #f)(mode #f)(options '())) - (common:clear-caches) ;; clear all caches - (let* ((db #f) - ;; (tdbdat (tasks:open-db)) - (keys (rmt:get-keys)) - (rundat (mt:get-runs-by-patt keys runnamepatt target)) - (header (vector-ref rundat 0)) - (runs (vector-ref rundat 1)) - (states (if state (string-split state ",") '())) - (statuses (if status (string-split status ",") '())) - (state-status (if (string? new-state-status) (string-split new-state-status ",") '(#f #f))) - (rp-mutex (make-mutex)) - (bup-mutex (make-mutex)) - (keep-records (args:get-arg "-keep-records")) ;; used in conjunction with -remove-runs to keep the records, TODO: consolidate this with "mode". - (test-records '())) ;; for tasks that we wish to operate on all tests in one fell swoop - - (let* ((write-access-actions '(remove-runs set-state-status archive run-wait kill-runs)) - (dbfile (conc *toppath* "/.mtdb/main.db")) - (readonly-mode (not (file-write-access? dbfile)))) - (when (and readonly-mode - (member action write-access-actions)) - (debug:print-error 0 *default-log-port* dbfile " is readonly. Cannot proceed with action ["action"] in which write-access isrequired .") - (exit 1))) - - (debug:print-info 4 *default-log-port* "runs:operate-on => Header: " header " action: " action " new-state-status: " new-state-status) - (if (> 2 (length state-status)) - (begin - (debug:print-error 0 *default-log-port* "the parameter to -set-state-status is a comma delimited string. E.g. COMPLETED,FAIL") - (exit))) - (for-each - (lambda (run) - (let ((runkey (string-intersperse (map (lambda (k) - (db:get-value-by-header run header k)) keys) "/")) - (dirs-to-remove (make-hash-table)) - (proc-get-tests (lambda (run-id) - (mt:get-tests-for-run run-id - testpatt states statuses - not-in: #f - sort-by: (case action - ((remove-runs) 'rundir) - (else 'event_time)))))) - (let* ((run-id (db:get-value-by-header run header "id")) - (run-state (db:get-value-by-header run header "state")) - (run-name (db:get-value-by-header run header "runname")) - (tests (if (not (equal? run-state "locked")) - (proc-get-tests run-id) - '())) - (lasttpath "/does/not/exist/I/hope") - (lastrealpath "/does/not/exist/I/hope") - ;; there may be a number of different disks used in the same run. - (run-paths-hash (make-hash-table)) - (worker-thread #f)) - (debug:print-info 4 *default-log-port* "runs:operate-on run=" run ", header=" header) - (if (not (null? tests)) - (begin - (case action - ((kill-runs) - (tasks:kill-runner target run-name "%") - (debug:print 1 *default-log-port* "Killing tests for run: " runkey " " (db:get-value-by-header run header "runname")) - ) - ((remove-runs) - ;; use this location to cleanup old DELETED records? No. See below for same call - ;; (rmt:delete-old-deleted-test-records run-id) - ;; (if (tasks:need-server run-id)(tasks:start-and-wait-for-server tdbdat run-id 10)) - ;; seek and kill in flight -runtests with % as testpatt here - ;; (if (equal? testpatt "%") - (tasks:kill-runner target run-name testpatt) - ;; (debug:print 0 *default-log-port* "not attempting to kill any run launcher processes as testpatt is " testpatt)) - (debug:print 1 *default-log-port* "Removing tests for run: " runkey " " (db:get-value-by-header run header "runname"))) - ((set-state-status) - ;; (if (tasks:need-server run-id)(tasks:start-and-wait-for-server tdbdat run-id 10)) - (debug:print 2 *default-log-port* "Modifying state and status for tests for run: " runkey " " (db:get-value-by-header run header "runname"))) - ((print-run) - (debug:print 1 *default-log-port* "Printing info for run " runkey ", run=" run ", tests=" tests ", header=" header) - action) - ((run-wait) - (debug:print 1 *default-log-port* "Waiting for run " runkey ", run=" runnamepatt " to complete")) - ((archive) - (debug:print 1 *default-log-port* "Archiving/restoring (" (args:get-arg "-archive") ") data for run: " runkey " " (db:get-value-by-header run header "runname")) - (let ((op (string->symbol (args:get-arg "-archive")))) - (set! worker-thread - (make-thread - (lambda () - (case op - ((save save-remove keep-html) - (archive:run-bup op run-id run-name tests rp-mutex bup-mutex)) - ((restore) - (archive:bup-restore op run-id run-name tests rp-mutex bup-mutex)) - ((get) ;;; NOTE: This is a special case. We wish to operate on ALL tests in one go - (set! test-records (append tests test-records))) - (else - (debug:print-error 0 *default-log-port* "unrecognised sub command " op " for -archive. Run \"megatest\" to see help") - (exit)))) - "archive-bup-thread")) - (thread-start! worker-thread) - (if (eq? op 'get) - (thread-join! worker-thread)) ;; we need the test-records set to not overlap - )) - (else - (debug:print-info 0 *default-log-port* "action not recognised " action))) - - ;; actions that operate on one test at a time can be handled below - ;; - (let ((sorted-tests (filter - vector? - (sort tests (lambda (a b)(let ((dira ;; (rmt:sdb-qry 'getstr - (db:test-get-rundir a)) ;; ) ;; (filedb:get-path *fdb* (db:test-get-rundir a))) - (dirb ;; (rmt:sdb-qry 'getstr - (db:test-get-rundir b))) ;; ) ;; ((filedb:get-path *fdb* (db:test-get-rundir b)))) - (if (and (string? dira)(string? dirb)) - (> (string-length dira)(string-length dirb)) - #f)))))) - (toplevel-retries (make-hash-table)) ;; try three times to loop through and remove top level tests - (test-retry-time (make-hash-table)) - (backgrounded-remove-status (make-hash-table)) - (backgrounded-remove-last-visit (make-hash-table)) - (backgrounded-remove-result (make-hash-table)) - (allow-run-time (string->number (or (args:get-arg "-kill-wait") "10")))) ;; seconds to allow for killing tests before just brutally killing 'em - (let loop ((test (car sorted-tests)) - (tal (cdr sorted-tests))) - (let* ((test-id (db:test-get-id test)) - (new-test-dat (rmt:get-test-info-by-id run-id test-id))) - (if (not new-test-dat) - (begin - (debug:print-error 0 *default-log-port* "We have a test-id of " test-id " but no record was found. NOTE: No locking of records is done between processes, do not simultaneously remove the same run from two processes!") - (if (not (null? tal)) - (loop (car tal)(cdr tal)))) - (let* ((item-path (db:test-get-item-path new-test-dat)) - (test-name (db:test-get-testname new-test-dat)) - (run-dir ;;(filedb:get-path *fdb* - ;; (rmt:sdb-qry 'getid - (db:test-get-rundir new-test-dat)) ;; ) ;; run dir is from the link tree - (has-subrun (and (subrun:subrun-test-initialized? run-dir) - (not (subrun:subrun-removed? run-dir)))) - (test-state (db:test-get-state new-test-dat)) - (test-status (db:test-get-status new-test-dat)) - (test-fulln (db:test-get-fullname new-test-dat)) - (uname (db:test-get-uname new-test-dat)) - (toplevel-with-children (and (db:test-get-is-toplevel test) - (> (rmt:test-toplevel-num-items run-id test-name) 0)))) - - (case action - ((remove-runs) - ;; if the test is a toplevel-with-children issue an error and do not remove - (cond - (toplevel-with-children - (debug:print 0 *default-log-port* "WARNING: skipping removal of " test-fulln " with run-id " run-id " as it has sub tests") - (hash-table-set! toplevel-retries test-fulln (+ (hash-table-ref/default toplevel-retries test-fulln 0) 1)) - (if (> (hash-table-ref toplevel-retries test-fulln) 3) - (if (not (null? tal)) - (loop (car tal)(cdr tal))) ;; no else clause - drop it if no more in queue and > 3 tries - (let ((newtal (append tal (list test)))) - (loop (car newtal)(cdr newtal))))) ;; loop with test still in queue - (has-subrun - ;; - (let ((last-visit (hash-table-ref/default backgrounded-remove-last-visit test-fulln 0)) - (now (current-seconds)) - (rem-status (hash-table-ref/default backgrounded-remove-status test-fulln 'not-started))) - (case rem-status - ((not-started) - (debug:print 0 *default-log-port* "WARNING: postponing removal of " test-fulln " with run-id " run-id " as it has a subrun") - (hash-table-set! backgrounded-remove-status test-fulln 'started) - (hash-table-set! backgrounded-remove-last-visit test-fulln (current-seconds)) - (common:send-thunk-to-background-thread - (lambda () - (let* ((subrun-remove-succeeded - (subrun:remove-subrun run-dir keep-records))) - (hash-table-set! backgrounded-remove-result test-fulln subrun-remove-succeeded) - (hash-table-set! backgrounded-remove-status test-fulln 'done))) - name: (conc "remove-subrun:"test-fulln)) - - ;; send to back of line, loop - (let ((newtal (append tal (list test)))) - (loop (car newtal)(cdr newtal))) - ) - ((started) - ;; if last visit was within last second, sleep 1 second - (if (< (- now last-visit) 1.0) - (thread-sleep! 1.0)) - (hash-table-set! backgrounded-remove-last-visit test-fulln (current-seconds)) - ;; send to back of line, loop - (let ((newtal (append tal (list test)))) - (loop (car newtal)(cdr newtal))) - ) - ((done) - ;; drop this one; if remaining, loop, else finish - (hash-table-set! backgrounded-remove-last-visit test-fulln (current-seconds)) - (let ((subrun-remove-succeeded (hash-table-ref/default backgrounded-remove-result test-fulln 'exception))) - (cond - ((eq? subrun-remove-succeeded 'exception) - (let* ((logfile (subrun:get-log-path run-dir "remove"))) - (debug:print 0 *default-log-port* "ERROR: removing subrun of of " test-fulln " with run-id " run-id " ; see logfile @ "logfile)) - (if (not (null? tal)) - (loop (car tal)(cdr tal)))) - (subrun-remove-succeeded - (debug:print 0 *default-log-port* "Now removing of " test-fulln " with run-id " run-id " since subrun was removed.") - ;;(runs:remove-test-directory new-test-dat mode) ;; let normal case handle this. it will go thru loop again as non-subrun - (let ((newtal (append tal (list test)))) - (loop (car newtal)(cdr newtal)))) - (else - (let* ((logfile (subrun:get-log-path run-dir "remove"))) - (debug:print 0 *default-log-port* "WARNING: removal of subrun failed. Please check "logfile" for details.")) - ;; send to back of line, loop (will not match has-subrun next time through) - (if (not (null? tal)) - (loop (car tal)(cdr tal)))))) - ) - ) ; end case rem-status - ) ; end let - ); end cond has-subrun - - (else - ;; BB - TODO - consider backgrounding to threads to delete tests (work below) - (debug:print-info 2 *default-log-port* "test: " test-name " itest-state: " test-state) - (if (member test-state (list "RUNNING" "LAUNCHED" "REMOTEHOSTSTART" "KILLREQ")) - (begin - (if (not (hash-table-ref/default test-retry-time test-fulln #f)) - (begin - ;; want to set to REMOVING BUT CANNOT do it here? - (hash-table-set! test-retry-time test-fulln (current-seconds)))) - (if (> (- (current-seconds)(hash-table-ref test-retry-time test-fulln)) allow-run-time) - ;; This test is not in a correct state for cleaning up. Let's try some graceful shutdown steps first - ;; Set the test to "KILLREQ" and wait five seconds then try again. Repeat up to five times then give - ;; up and blow it away. - (begin - (debug:print 0 *default-log-port* "WARNING: could not gracefully remove test " test-fulln ", tried to kill it to no avail. Forcing state to FAILEDKILL and continuing") - (mt:test-set-state-status-by-id run-id (db:test-get-id test) "FAILEDKILL" "n/a" #f) - (thread-sleep! 1)) - (begin - (mt:test-set-state-status-by-id run-id (db:test-get-id test) "KILLREQ" "n/a" #f) - (thread-sleep! 1))) - ;; NOTE: This is suboptimal as the testdata will be used later and the state/status may have changed ... - (if (null? tal) - (loop new-test-dat tal) - (loop (car tal)(append tal (list new-test-dat))))) - (begin - (let ((rundir (db:test-get-rundir new-test-dat))) - (if (and (not (string= rundir "/tmp/badname")) - (file-exists? rundir) - (substring-index run-name rundir) - (tests:glob-like-match (conc "%/" target "/%") rundir) - ) - (begin - (set! lasttpath (db:test-get-rundir new-test-dat)) ;; remember this path for run removal - (set! lastrealpath (remove-last-path-directory (resolve-pathname lasttpath))) - (hash-table-set! run-paths-hash lastrealpath 1) - (runs:remove-test-directory new-test-dat mode) ;; 'remove-all) - ) - (begin - (debug:print 2 *default-log-port* "Not removing directory " rundir " because either it doesn't exist or has a bad name") - (debug:print 2 *default-log-port* "Is /tmp/badname: " (string= rundir "/tmp/badname")) - (debug:print 2 *default-log-port* "Exists: " (file-exists? rundir)) - (debug:print 2 *default-log-port* "Has run-name: " (substring-index run-name rundir)) - (debug:print 2 *default-log-port* "Has target: " (tests:glob-like-match (conc "%/" target "/%") rundir)) - (debug:print 2 *default-log-port* "Target: " target) - - ;;PJH remove record from db no need to cleanup directory - (case mode - ((remove-data-only)(mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) (db:test-get-state test)(db:test-get-status test) #f)) - ((archive-remove) (mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) "ARCHIVED" #f #f)) - (else (rmt:delete-test-records (db:test-get-run_id test) (db:test-get-id test)))) - - ) - ) - ) - - (if (not (null? tal)) - (loop (car tal)(cdr tal))))))) - (rmt:update-run-stats run-id (rmt:get-raw-run-stats run-id))) - ((kill-runs) - ;; RUNNING -> KILLREQ - ;; LAUNCHED,RUNNING,REMOTEHOSTSTART -> NOT STARTED - (cond - ((and has-subrun (member test-state (list "RUNNING" "LAUNCHED" "REMOTEHOSTSTART" "KILLREQ"))) - (common:send-thunk-to-background-thread - (lambda () - (let* ((subrun-remove-succeeded - (subrun:kill-subrun run-dir keep-records))) - #t))) - (if (not (null? tal)) - (loop (car tal)(cdr tal))) - ) - ((member test-state (list "RUNNING" "LAUNCHED" "REMOTEHOSTSTART" "KILLREQ")) - (debug:print 1 *default-log-port* "INFO: issuing killreq to test "test-fulln) - (mt:test-set-state-status-by-id run-id (db:test-get-id test) "KILLREQ" "n/a" #f) - (if (not (null? tal)) - (loop (car tal)(cdr tal)))) - ((and (member test-status '("PREQ_FAIL" "PREQ_DISCARDED" "BLOCKED" "ZERO_ITEMS" "KEEP_TRYING" "TEN_STRIKES" "TIMED_OUT"))) - (rmt:set-state-status-and-roll-up-items run-id (db:test-get-id test) 'foo "NOT_STARTED" "n/a" (conc "kill-run moved from "test-state":"test-status" to NOT_STARTED:n/a")) - ;;(mt:test-set-state-status-by-id run-id (db:test-get-id test) "NOT_STARTED" "n/a" (conc "kill-run moved from "test-state":"test-status" to NOT_STARTED:n/a")) - (if (not (null? tal)) - (loop (car tal)(cdr tal))) - ) - (else - (if (not (null? tal)) - (loop (car tal)(cdr tal))) - ))) - ((set-state-status) - (let* ((new-state (car state-status)) - (new-status (cadr state-status)) - (test-id (db:test-get-id test)) - (test-run-dir (db:test-get-rundir new-test-dat)) - (has-subrun (and (subrun:subrun-test-initialized? test-run-dir) - (not (subrun:subrun-removed? test-run-dir))))) - (when has-subrun - (common:send-thunk-to-background-thread - (lambda () - (subrun:set-state-status test-run-dir state status new-state-status) - ) - ) - ) - (debug:print-info 2 *default-log-port* "new state " new-state ", new status " new-status ) - (mt:test-set-state-status-by-id run-id test-id new-state new-status #f)) - (if (not (null? tal)) - (loop (car tal)(cdr tal)))) - ((run-wait) - ;; BB TODO - manage has-subrun case - (debug:print-info 2 *default-log-port* "still waiting, " (length tests) " tests still running") - (thread-sleep! 5) - (let ((new-tests (proc-get-tests run-id))) - (if (null? new-tests) - (debug:print-info 1 *default-log-port* "Run completed according to zero tests matching provided criteria.") - (loop (car new-tests)(cdr new-tests))))) - ((archive) - ;; BB TODO - manage has-subrun case - (if (and run-dir (not toplevel-with-children)) - (let ((ddir (conc run-dir "/"))) - (case (string->symbol (args:get-arg "-archive")) - ((save save-remove keep-html) - (if (common:file-exists? ddir) - (debug:print-info 0 *default-log-port* "Estimating disk space usage for " test-fulln ": " (common:get-disk-space-used ddir))))))) - (if (not (null? tal)) - (loop (car tal)(cdr tal)))) - ))) - ) - (if worker-thread (thread-join! worker-thread))) - (common:join-backgrounded-threads)))) - - ;; remove the run if zero tests remain - (if (eq? action 'remove-runs) - (let* ((run-id (db:get-value-by-header run header "id")) ;; NB// masks run-id from above? - (remtests (mt:get-tests-for-run run-id #f '("DELETED") '("n/a") not-in: #t))) - (if (null? remtests) ;; no more tests remaining - (let* ((linkspath (remove-last-path-directory lasttpath)) - (runpaths (hash-table-keys run-paths-hash)) - ) - - (debug:print 2 *default-log-port* "run-paths-hash: " (hash-table-keys run-paths-hash)) - - (debug:print 1 *default-log-port* "Removing target " target "run: " run-name) - (if (not keep-records) - (begin - (debug:print 1 *default-log-port* "Removing DB records for the run.") - (rmt:delete-run run-id) - (rmt:delete-old-deleted-test-records run-id)) - ) - (if (not (equal? linkspath "/does/not/exist/I")) - (begin - (debug:print 1 *default-log-port* "Recursively removing links dir " linkspath) - (runs:recursive-delete-with-error-msg linkspath))) - - (for-each (lambda(runpath) - (debug:print 1 *default-log-port* "Recursively removing runs dir " runpath) - (runs:recursive-delete-with-error-msg runpath) - ) - runpaths - ) - ))))) - )) - runs) - ;; special case - archive get - (if (equal? (args:get-arg "-archive") "get") - (archive:bup-get-data "get" #f #f test-records rp-mutex bup-mutex)) - (if (or (equal? (args:get-arg "-archive") "save") (equal? (args:get-arg "-archive") "save-remove")) - (begin - (debug:print 0 *default-log-port* "db archive started") - (archive:megatest-db target runnamepatt) - (debug:print 0 *default-log-port* "db archived"))) - ) - #t - ) - -(define (runs:remove-test-directory test mode) ;; remove-data-only) - (let* ((run-dir (db:test-get-rundir test)) ;; run dir is from the link tree - (real-dir (if (common:file-exists? run-dir) - ;; (resolve-pathname run-dir) - (common:nice-path run-dir) - #f)) - (clean-mode (or mode 'remove-all)) - (test-id (db:test-get-id test)) - ;; (lock-key (conc "test-" test-id)) - ;; (got-lock (let loop ((lock (rmt:no-sync-get-lock lock-key)) - ;; (expire-time (+ (current-seconds) 30))) ;; give up on getting the lock and steal it after 15 seconds - ;; (if (car lock) - ;; #t - ;; (if (> (current-seconds) expire-time) - ;; (begin - ;; (debug:print-info 0 *default-log-port* "Timed out waiting for a lock to clean test with id " test-id) - ;; (rmt:no-sync-del! lock-key) ;; destroy the lock - ;; (loop (rmt:no-sync-get-lock lock-key) expire-time)) ;; - ;; (begin - ;; (thread-sleep! 1) - ;; (loop (rmt:no-sync-get-lock lock-key) expire-time))))))) - ) - (case clean-mode - ((remove-data-only)(mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) "CLEANING" "LOCKED" #f)) - ((remove-all) (mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) "REMOVING" "LOCKED" #f)) - ((archive-remove) (mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) "ARCHIVE_REMOVING" #f #f))) - (debug:print-info 2 *default-log-port* "Attempting to remove " (if real-dir (conc " dir " real-dir " and ") "") " link " run-dir) - (if (and real-dir - (> (string-length real-dir) 5) - (common:file-exists? real-dir)) ;; bad heuristic but should prevent /tmp /home etc. - (let* ((realpath (resolve-pathname run-dir))) - (debug:print-info 1 *default-log-port* "Recursively removing " realpath) - (if (common:file-exists? realpath) - (runs:safe-delete-test-dir realpath) - (debug:print 0 *default-log-port* "WARNING: test dir " realpath " appears to not exist or is not readable"))) - (if real-dir - (debug:print 0 *default-log-port* "WARNING: directory " real-dir " does not exist") - (debug:print 0 *default-log-port* "WARNING: no real directory corrosponding to link " run-dir ", nothing done"))) - (if (symbolic-link? run-dir) - (begin - (debug:print-info 1 *default-log-port* "Removing symlink " run-dir) - (handle-exceptions - exn - (debug:print-error 0 *default-log-port* " Failed to remove symlink " run-dir ((condition-property-accessor 'exn 'message) exn) ", attempting to continue, exn=" exn) - (delete-file run-dir))) - (if (directory? run-dir) - (if (> (directory-fold (lambda (f x)(+ 1 x)) 0 run-dir) 0) - (debug:print 0 *default-log-port* "WARNING: refusing to remove " run-dir " as it is not empty") - (handle-exceptions - exn - (debug:print-error 0 *default-log-port* " Failed to remove directory " run-dir ((condition-property-accessor 'exn 'message) exn) ", attempting to continue, exn=" exn) - (delete-directory run-dir))) - (if (and run-dir - (not (member run-dir (list "n/a" "/tmp/badname")))) - (debug:print 0 *default-log-port* "WARNING: not removing " run-dir " as it either doesn't exist or is not a symlink") - (debug:print 0 *default-log-port* "NOTE: the run dir for this test is undefined. Test may have already been deleted.")) - )) - ;; Only delete the records *after* removing the directory. If things fail we have a record - (case clean-mode - ((remove-data-only)(mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) (db:test-get-state test)(db:test-get-status test) #f)) - ((archive-remove) (mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) "ARCHIVED" #f #f)) - (else (rmt:delete-test-records (db:test-get-run_id test) (db:test-get-id test)))) - ;; (rmt:no-sync-del! lock-key) - )) - -;;====================================================================== -;; Routines for manipulating runs -;;====================================================================== - -;; Since many calls to a run require pretty much the same setup -;; this wrapper is used to reduce the replication of code -(define (general-run-call switchname action-desc proc) - (let ((runname (common:args-get-runname)) - (target (common:args-get-target))) - (cond - ((not target) - (debug:print-error 0 *default-log-port* "Missing required parameter for " switchname ", you must specify the target with -target") - (exit 3)) - ((not runname) - (debug:print-error 0 *default-log-port* "Missing required parameter for " switchname ", you must specify the run name with -runname runname") - (exit 3)) - (else - (let (;; (db #f) - (keys #f)) - (if (launch:setup) - (begin - (full-runconfigs-read) ;; cache the run config - ;; (launch:cache-config) ;; there are two independent config cache locations, turning this one off for now. MRW. - ) ;; do not cache here - need to be sure runconfigs is processed - (begin - (debug:print 0 *default-log-port* "Failed to setup, exiting") - (exit 1))) - - - (set! keys (keys:config-get-fields *configdat*)) - ;; have enough to process -target or -reqtarg here - (if (args:get-arg "-reqtarg") - (let* ((runconfigf (conc *toppath* "/runconfigs.config")) ;; DO NOT EVALUATE ALL - (runconfig (read-config runconfigf #f #t environ-patt: #f))) - (if (hash-table-ref/default runconfig (args:get-arg "-reqtarg") #f) - (keys:target-set-args keys (args:get-arg "-reqtarg") args:arg-hash) - - (begin - (debug:print-error 0 *default-log-port* "[" (args:get-arg "-reqtarg") "] not found in " runconfigf) - ;; (if db (sqlite3:finalize! db)) - (exit 1) - ))) - (if (args:get-arg "-target") - (keys:target-set-args keys (args:get-arg "-target" args:arg-hash) args:arg-hash))) - (if (not (car *configinfo*)) - (begin - (debug:print-error 0 *default-log-port* "Attempted to " action-desc " but run area config file not found") - (exit 1)) - ;; Extract out stuff needed in most or many calls - ;; here then call proc - (let* ((keyvals (keys:target->keyval keys target))) - (proc target runname keys keyvals))) - ;; (if db (sqlite3:finalize! db)) - (set! *didsomething* #t)))))) - -;;====================================================================== -;; Lock/unlock runs -;;====================================================================== - -(define (runs:handle-locking target keys runname lock unlock user) - (let* ((db #f) - (rundat (mt:get-runs-by-patt keys runname target)) - (header (vector-ref rundat 0)) - (runs (vector-ref rundat 1))) - (for-each (lambda (run) - (let ((run-id (db:get-value-by-header run header "id")) - (str (if lock - "lock" - "unlock"))) - (if (or lock - (and unlock - (or (args:get-arg "-force") - (begin - (print "Do you really wish to unlock run " run-id "?\n y/n: ") - (equal? "y" (read-line)))))) - (begin - (rmt:lock/unlock-run run-id lock unlock user) - (debug:print-info 0 *default-log-port* "Done " str " on run id " run-id)) - (debug:print-info 0 *default-log-port* "Skipping lock/unlock on " run-id)))) - runs))) -;;====================================================================== -;; Rollup runs -;;====================================================================== - -;; Update the test_meta table for this test -(define (runs:update-test_meta test-name test-conf) - (let ((currrecord (rmt:testmeta-get-record test-name))) - (if (not currrecord) - (begin - (set! currrecord (make-vector 11 #f)) - (rmt:testmeta-add-record test-name))) - (for-each - (lambda (key) - (let* ((idx (cadr key)) - (fld (car key)) - (val (configf:lookup test-conf "test_meta" fld))) - ;; (debug:print 5 *default-log-port* "idx: " idx " fld: " fld " val: " val) - (if (and val (not (equal? (vector-ref currrecord idx) val))) - (begin - (debug:print 0 *default-log-port* "Updating " test-name " " fld " to " val) - (rmt:testmeta-update-field test-name fld val))))) - '(("author" 2)("owner" 3)("description" 4)("reviewed" 5)("tags" 9)("jobgroup" 10))))) - -;; find tests with matching tags, tagpatt is a string "tagpatt1,tagpatt2%, ..." -;; -(define (runs:get-tests-matching-tags tagpatt) - (let* ((tagdata (rmt:get-tests-tags)) - (res '())) ;; list of tests that match one or more tags - (for-each - (lambda (row) - (let* ((tag (car row)) - (tests (cdr row))) - (if (patt-list-match tag tagpatt) - (set! res (append tests res))))) - tagdata) - res)) - - -;; Update test_meta for all tests -(define (runs:update-all-test_meta db) - (let ((test-names (tests:get-all))) ;; (tests:get-valid-tests))) - (for-each - (lambda (test-name) - (let* ((test-conf (mt:lazy-read-test-config test-name))) - (if test-conf (runs:update-test_meta test-name test-conf)))) - (hash-table-keys test-names)))) - -;; This could probably be refactored into one complex query ... -;; NOT PORTED - DO NOT USE YET -;; -#;(define (runs:rollup-run keys runname user keyvals) - (debug:print 4 *default-log-port* "runs:rollup-run, keys: " keys " -runname " runname " user: " user) - (let* ((db #f) - ;; register run operates on the main db - (new-run-id (rmt:register-run keyvals runname "new" "n/a" user (args:get-arg "-contour"))) - (prev-tests (rmt:get-matching-previous-test-run-records new-run-id "%" "%")) - (curr-tests (mt:get-tests-for-run new-run-id "%/%" '() '())) - (curr-tests-hash (make-hash-table))) - (rmt:update-run-event_time new-run-id) - ;; index the already saved tests by testname and itemdat in curr-tests-hash - (for-each - (lambda (testdat) - (let* ((testname (db:test-get-testname testdat)) - (item-path (db:test-get-item-path testdat)) - (full-name (conc testname "/" item-path))) - (hash-table-set! curr-tests-hash full-name testdat))) - curr-tests) - ;; NOPE: Non-optimal approach. Try this instead. - ;; 1. tests are received in a list, most recent first - ;; 2. replace the rollup test with the new *always* - (for-each - (lambda (testdat) - (let* ((testname (db:test-get-testname testdat)) - (item-path (db:test-get-item-path testdat)) - (full-name (conc testname "/" item-path)) - (prev-test-dat (hash-table-ref/default curr-tests-hash full-name #f)) - (test-steps (rmt:get-steps-for-test (db:test-get-id testdat))) - (new-test-record #f)) - ;; replace these with insert ... select - (apply sqlite3:execute - db - (conc "INSERT OR REPLACE INTO tests (run_id,testname,state,status,event_time,host,cpuload,diskfree,uname,rundir,item_path,run_duration,final_logf,comment) " - "VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?);") - new-run-id (cddr (vector->list testdat))) - (set! new-testdat (car (mt:get-tests-for-run new-run-id (conc testname "/" item-path) '() '()))) - (hash-table-set! curr-tests-hash full-name new-testdat) ;; this could be confusing, which record should go into the lookup table? - ;; Now duplicate the test steps - (debug:print 4 *default-log-port* "Copying records in test_steps from test_id=" (db:test-get-id testdat) " to " (db:test-get-id new-testdat)) - (cdb:remote-run ;; to be replaced, note: this routine is not used currently - (lambda () - (sqlite3:execute - db - (conc "INSERT OR REPLACE INTO test_steps (test_id,stepname,state,status,event_time,comment) " - "SELECT " (db:test-get-id new-testdat) ",stepname,state,status,event_time,comment FROM test_steps WHERE test_id=?;") - (db:test-get-id testdat)) - ;; Now duplicate the test data - (debug:print 4 *default-log-port* "Copying records in test_data from test_id=" (db:test-get-id testdat) " to " (db:test-get-id new-testdat)) - (sqlite3:execute - db - (conc "INSERT OR REPLACE INTO test_data (test_id,category,variable,value,expected,tol,units,comment) " - "SELECT " (db:test-get-id new-testdat) ",category,variable,value,expected,tol,units,comment FROM test_data WHERE test_id=?;") - (db:test-get-id testdat)))) - )) - prev-tests))) - -(define doc-template - '(*TOP* - (*PI* xml "version='1.0'") - (testsuite))) - -(define (runs:update-junit-test-reporter-xml run-id) - (let* ( - (junit-test-reporter (configf:lookup *configdat* "runs" "junit-test-reporter-xml")) - (junit-test-report-dir (configf:lookup *configdat* "runs" "junit-test-report-dir")) - (xml-dir (if (and junit-test-reporter (equal? junit-test-reporter "yes" )) - (if junit-test-report-dir - junit-test-report-dir - (conc (getenv "MT_LINKTREE") "/" (getenv "MT_TARGET") "/" (getenv "MT_RUNNAME"))) - #f)) - (xml-ts-name (if xml-dir - (conc (getenv "MT_TESTSUITENAME")"."(string-translate (getenv "MT_TARGET") "/" ".") "." (getenv "MT_RUNNAME")) - #f)) - (keyname (if xml-ts-name (common:get-signature xml-ts-name) #f)) - (xml-path (if xml-dir - (conc xml-dir "/" keyname ".xml") - #f)) - - (test-data (if xml-dir - (rmt:get-tests-for-run run-id "%" '() '() ;; run-id testpatt states statuses - #f #f ;; offset limit - #f ;; not-in - #f ;; sort-by - #f ;; sort-order - #f ;; get full data (not 'shortlist) - 0 ;; (runs:gendat-inc-results-last-update *runs:general-data*) ;; last update time - #f) - '())) - (tests-count (if xml-dir (length test-data) #f))) - (if (and junit-test-reporter (equal? junit-test-reporter "yes" )) - (begin - ;((sxml-modify! `("testsuite" insert-into (@ (name ,xml-ts-name) (tests ,tests-count)))) doc) - - (let loop ((test (car test-data)) - (tail (cdr test-data)) - (doc doc-template) - (fail-cnt 0) - (error-cnt 0)) - (let* ((test-name (vector-ref test 2)) - (test-itempath (vector-ref test 11)) - (tc-name (conc test-name (if (and test-itempath (not (equal? test-itempath ""))) (conc "." (string-translate test-itempath "/" "." )) ""))) - (test-state (vector-ref test 3)) - (comment (vector-ref test 14)) - (test-status (vector-ref test 4)) - (exc-msg (conc "No bucket for State " test-state " Status " test-status)) - (new-doc (cond - ((member test-state (list "RUNNING" )) - ((sxml-modify `("testsuite" insert-into (testcase (@ (name ,tc-name)) (inProgress)))) doc)) - ((member test-state (list "LAUNCHED" "REMOTEHOSTSTART" "NOT_STARTED")) - ((sxml-modify `("testsuite" insert-into (testcase (@ (name ,tc-name)) (inQueue)))) doc)) - ((member test-status (list "PASS" "WARN" "WAIVED")) - ((sxml-modify `("testsuite" insert-into (testcase (@ (name ,tc-name))))) doc)) - ((member test-status (list "FAIL" "CHECK")) - ((sxml-modify `("testsuite" insert-into (testcase (@ (name ,tc-name)) (failure (@ (message ,comment) (type "failure")))))) doc)) - ((member test-status (list "DEAD" "KILLED" "ABORT" "PREQ_FAIL" "PREQ_DISCARDED")) - ((sxml-modify `("testsuite" insert-into (testcase (@ (name ,tc-name)) (failure (@ (message ,comment) (type "error")))))) doc)) - ((member test-status (list "SKIP")) - ((sxml-modify `("testsuite" insert-into (testcase (@ (name ,tc-name)) (skipped (@ (type "skipped")))))) doc)) - (else - (debug:print 0 *default-log-port* (conc "What do I do with State " test-state " Status " test-status)) - ((sxml-modify `("testsuite" insert-into (testcase (@ (name ,tc-name)) (failure (@ (message ,exc-msg) (type "error")))))) doc)))) - (new-error-cnt (if (member test-status (list "DEAD" "KILLED" "ABORT" "PREQ_FAIL" "PREQ_DISCARDED")) - (+ error-cnt 1) - error-cnt)) - (new-fail-cnt (if (member test-status (list "FAIL" "CHECK")) - (+ fail-cnt 1) - fail-cnt))) - (if (null? tail) - (let* ((final-doc ((sxml-modify `("testsuite" insert-into (@ (name ,xml-ts-name) (tests ,tests-count) (errors ,error-cnt) (failures ,fail-cnt)))) new-doc))) - (debug:print 0 *default-log-port* "modify attrib error=" error-cnt " fail= " fail-cnt) - (handle-exceptions - exn - (let* ((msg ((condition-property-accessor 'exn 'message) exn))) - (debug:print 0 *default-log-port* (conc "WARNING: Failed to update file" xml-path". Message:" msg ", exn=" exn))) - - (if (not (file-exists? xml-dir)) - (create-directory xml-dir #t)) - (if (not (rmt:no-sync-get/default keyname #f)) - (begin - (rmt:no-sync-set keyname "on") - (debug:print 0 *default-log-port* "creating xml at " xml-path) - (with-output-to-file xml-path - (lambda () - (print (sxml-serializer#serialize-sxml final-doc ns-prefixes: (list (cons 'gnm "http://foo")))))) - (rmt:no-sync-del! keyname)) - (debug:print 0 *default-log-port* "Could not get the lock. Skip writing the xml file.")))) - (loop (car tail) (cdr tail) new-doc new-fail-cnt new-error-cnt)))))))) - - -;; clean cache files -(define (runs:clean-cache target runname toppath) - (if target - (if runname - (let* ((linktree (common:get-linktree)) ;; (if toppath (configf:lookup *configdat* "setup" "linktree"))) - (runtop (conc linktree "/" target "/" runname)) - (files (if (common:file-exists? runtop) - (append (glob (conc runtop "/.megatest*")) - (glob (conc runtop "/.runconfig*"))) - '()))) - (if (null? files) - (debug:print-info 2 *default-log-port* "No cached megatest or runconfigs files found. None removed.") - (begin - (debug:print-info 2 *default-log-port* "Removing cached files:\n " (string-intersperse files "\n ")) - (for-each - (lambda (f) - (handle-exceptions - exn - (debug:print 0 *default-log-port* "WARNING: Failed to remove file " f ", exn=" exn) - (delete-file f))) - files)))) - (debug:print-error 0 *default-log-port* "-clean-cache requires -runname.")) - (debug:print-error 0 *default-log-port* "-clean-cache requires -target or -reqtarg"))) + dbmod + megatestmod + (prefix mtargs args:) + mtmod + tasksmod + servermod + ) + ADDED runsmod.scm Index: runsmod.scm ================================================================== --- /dev/null +++ runsmod.scm @@ -0,0 +1,4546 @@ +;;====================================================================== +;; Copyright 2017, Matthew Welland. +;; +;; This file is part of Megatest. +;; +;; Megatest is free software: you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. +;; +;; Megatest is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with Megatest. If not, see . + +;;====================================================================== + +;;====================================================================== +;; Cpumod: +;; +;; Put things here don't fit anywhere else +;;====================================================================== + +(declare (unit runsmod)) +(declare (uses debugprint)) +(declare (uses mtargs)) +(declare (uses commonmod)) +(declare (uses configfmod)) +(declare (uses dbfile)) +(declare (uses dbmod)) +(declare (uses rmtmod)) +(declare (uses servermod)) +(declare (uses processmod)) +(declare (uses pgdb)) +(declare (uses mtmod)) +(declare (uses megatestmod)) +(declare (uses tasksmod)) +(declare (uses testsmod)) +(declare (uses subrunmod)) +(declare (uses archivemod)) +(declare (uses fsmod)) + +(use srfi-69) + +(module runsmod + * + +(import scheme) +(cond-expand + (chicken-4 + + (import chicken + ports + data-structures + extras + files + matchable + pathname-expand + posix + posix-extras + regex + regex-case + sparse-vectors + + ) + (use srfi-69)) + (chicken-5 + (import (prefix sqlite3 sqlite3:) + chicken.base + chicken.condition + chicken.file + chicken.file.posix + chicken.io + chicken.pathname + chicken.port + chicken.process + chicken.process-context + chicken.process-context.posix + chicken.sort + chicken.string + chicken.time + chicken.time.posix + + matchable + md5 + message-digest + pathname-expand + regex + regex-case + system-information + + ))) + +;; imports common to ck4 and ck5 +(import srfi-1 + srfi-13 + srfi-18 + srfi-69 + typed-records + (prefix base64 base64:) + (prefix sqlite3 sqlite3:) + md5 + message-digest + z3 + directory-utils + sxml-serializer + sxml-modifications + + debugprint + commonmod + configfmod + (prefix mtargs args:) + dbmod + dbfile + rmtmod + servermod + processmod + pgdb + mtmod + megatestmod + tasksmod + testsmod + subrunmod + archivemod + fsmod + ) + +(include "common_records.scm") +(include "key_records.scm") +(include "db_records.scm") +(include "run_records.scm") +(include "test_records.scm") + +;; use this struct to facilitate refactoring +;; + +(defstruct runs:dat + reglen regfull + runname max-concurrent-jobs run-id + test-patts required-tests test-registry + registry-mutex flags keyvals run-info all-tests-registry + ;; stores results from last runs:can-run-more-tests + (can-run-more-tests #f) ;; (list can-run-more-flag num-running num-running-in-jobgroup max-concurrent-jobs job-group-limit) + ((can-run-more-tests-count 0) : fixnum) + (last-fuel-check 0) ;; time when we last checked fuel + (beginning-of-time (current-seconds)) + (load-mgmt-function #f) + (wait-for-jobs-function #f) + (last-load-check-time 0) + (last-jobs-check-time 0) + ) + +(defstruct runs:testdat + hed tal reg reruns test-record + test-name item-path jobgroup + waitons testmode newtal itemmaps prereqs-not-met) + +;; look in the $MT_RUN_AREA_HOME/.softlocks directory for key-host-pid.softlock files +;; - remove any that are over 3600 seconds old +;; - if there are any that are younger than 10 seconds +;; * sleep 10 seconds +;; * touch my key-host-pid.softlock file +;; * return +;; - if there are no files younger than 10 seconds +;; * touch my key-host-pid.softlock file +;; * return +;; +(define (runs:wait-on-softlock rdat key) + (if (not (and *toppath* (file-exists? *toppath*))) ;; don't seem to have toppath yet + (debug:print-info 0 *default-log-port* "Can't create softlocks - don't see MTRAH yet.") + (let* ((softlocks-dir (conc *toppath* "/.softlocks"))) + (if (not (file-exists? softlocks-dir)) + (create-directory softlocks-dir #t)) + (let* ((my-lock-file (conc softlocks-dir "/" key "-" (get-host-name) "-" (current-process-id) ".softlock")) + (lock-files (filter (lambda (x) + (not (equal? x my-lock-file))) + (glob (conc softlocks-dir "/" key "*.softlock")))) + (fresh-locks (any (lambda (x) ;; do we have any locks younger than 10 seconds + (let* ((mod-time (file-modification-time x)) + (age (- (current-seconds) mod-time))) + (cond + ((> age 3600) ;; too old to keep, remove it + (delete-file* x) #f) + ((< age 10) #t) + (else #f)))) + lock-files))) + (if fresh-locks + (begin + (if (runs:lownoise "runners-softlock-wait" 360) + (debug:print-info 0 *default-log-port* "Other runners in flight, giving up some time...")) + (thread-sleep! 2)) + (begin + (if (runs:lownoise "runners-softlock-nowait" 360) + (debug:print-info 0 *default-log-port* "No runners in flight, updating softlock")) + (let* ((ouf (open-output-file my-lock-file))) + (with-output-to-port ouf (lambda ()(print (current-seconds)))) + (close-output-port ouf)))) + (runs:dat-last-fuel-check-set! rdat (current-seconds)))))) + +;; Fourth try, do accounting through time.... +;; +(define (runs:parallel-runners-mgmt rdat) + (let ((time-to-check (configf:lookup-number *configdat* "runners" "time-to-check" default: 10)) ;; 28 + (time-to-wait (configf:lookup-number *configdat* "runners" "time-to-wait" default: 30)) + (now-time (current-seconds))) + (if (> (- now-time (runs:dat-last-fuel-check rdat)) time-to-check) ;; time to check + (runs:wait-on-softlock rdat "runners")))) + +;; To test parallel-runners management start a repl: +;; megatest -repl +;; then run: +;; (runs:test-parallel-runners 60) +;; +(define (runs:test-parallel-runners duration #!optional (proc #f)) + (let* ((rdat (make-runs:dat)) + (rtime 0) + (startt (current-seconds)) + (endt (+ startt duration))) + ((or proc runs:parallel-runners-mgmt) rdat) + (let loop () + (let* ((wstart (current-seconds))) + (if (< wstart endt) + (let* ((work-time (random 10))) + #;(debug:print-info 0 *default-log-port* "working for " work-time + " seconds. Total work: " rtime ", elapsed time: " (- wstart startt)) + (thread-sleep! work-time) + (set! rtime (+ rtime work-time)) + ((or proc runs:parallel-runners-mgmt) rdat) + (loop))))) + (let* ((done-time (current-seconds))) + (debug:print-info 0 *default-log-port* "DONE: rtime=" rtime ", elapsed time=" (- done-time startt) + ", ratio=" (/ rtime (- done-time startt)))))) + +(define (runs:get-mt-env-alist run-id runname target testname itempath) + ;;(bb-check-path msg: "runs:set-megatest-env-vars entry") + `(("MT_TEST_NAME" . ,testname) + + ("MT_ITEMPATH" . ,itempath) + + ("MT_TARGET" . ,target) + + ("MT_RUNNAME" . ,runname) + + ("MT_RUN_AREA_HOME" . ,*toppath*) + + ,@(let* ((link-tree (common:get-linktree))) ;; (configf:lookup *configdat* "setup" "linktree"))) + (if link-tree + (list (cons "MT_LINKTREE" link-tree) + + (cons "MT_TEST_RUN_DIR" + (conc link-tree "/" target "/" runname "/" testname + (if (and (string? itempath) (not (equal? itempath ""))) + (conc "/" itempath) + ""))) + ) + '())) + + ,@(map + (lambda (key) + (cons (car key) (cadr key))) + (keys:target->keyval (rmt:get-keys) target)) + + ,@(map (lambda (var) + (let ((val (configf:lookup *configdat* "env-override" var))) + (cons var val))) + (configf:section-vars *configdat* "env-override")))) + + +;; Every time can-run-more-tests is called increment the delay +;; +;; NOTE: We run this server-side!! Do not use this global except in the runs:can-run-more-tests routine +;; +(define *last-num-running-tests* 0) +;; (define *runs:can-run-more-tests-count* 0) +(define (runs:shrink-can-run-more-tests-count runsdat) + (runs:dat-can-run-more-tests-count-set! runsdat 0)) + +(define (runs:inc-can-run-more-tests-count runsdat) + (runs:dat-can-run-more-tests-count-set! + runsdat + (+ (runs:dat-can-run-more-tests-count runsdat) 1))) + +;; (set! *runs:can-run-more-tests-count* 0)) ;; (/ *runs:can-run-more-tests-count* 2))) + +;; Temporary globals. Move these into the logic or into common +;; +(define *seen-cant-run-tests* (make-hash-table)) ;; use to track tests that we suspect cannot be run +(define (runs:inc-cant-run-tests testname) + (hash-table-set! *seen-cant-run-tests* testname + (+ (hash-table-ref/default *seen-cant-run-tests* testname 0) 1))) + +(define (runs:can-keep-running? testname n) + (< (hash-table-ref/default *seen-cant-run-tests* testname 0) n)) + +(define *runs:denoise* (make-hash-table)) ;; key => last-time-ran + +;; mechanism to limit printing info to the screen that is repetitive. +;; +;; Example: +;; (if (runs:lownoise "waiting on tasks" 60) +;; (debug:print-info 2 *default-log-port* "waiting for tasks to complete, sleeping briefly ...")) +;; +(define (runs:lownoise key waitval) + (let ((lasttime (hash-table-ref/default *runs:denoise* key 0)) + (currtime (current-seconds))) + (if (> (- currtime lasttime) waitval) + (begin + (hash-table-set! *runs:denoise* key currtime) + #t) + #f))) + +(define *last-test-launch* 0) +(define *too-soon-delays* (make-hash-table)) + +;; to-soon delay, when matching event happened in less than dseconds delay wseconds +;; +(define (runs:too-soon-delay key dseconds wseconds) + (let* ((last-time (hash-table-ref/default *too-soon-delays* key #f))) + (if (and last-time + (< (- (current-seconds) last-time) dseconds)) + (begin + (if (runs:lownoise (conc "too-soon-delay"key) 60) + (debug:print-info 2 *default-log-port* "Polling throttle for "key)) + (thread-sleep! wseconds))) + (hash-table-set! *too-soon-delays* key (current-seconds)))) + +(define (runs:can-run-more-tests runsdat run-id jobgroup max-concurrent-jobs) + + ;; Take advantage of a good place to exit if running the one-pass methodology + (if (and (> (runs:dat-can-run-more-tests-count runsdat) 20) + (args:get-arg "-one-pass")) + (exit 0)) + + (if (runs:dat-load-mgmt-function runsdat)((runs:dat-load-mgmt-function runsdat))) + + (let* ((num-running (rmt:get-count-tests-running-for-run-id run-id)) + (num-running-in-jobgroup (rmt:get-count-tests-running-in-jobgroup run-id jobgroup)) + (job-group-limit (let ((jobg-count (configf:lookup *configdat* "jobgroups" jobgroup))) + (if (string? jobg-count) + (string->number jobg-count) + jobg-count)))) + (if (> (+ num-running num-running-in-jobgroup) 0) + (runs:inc-can-run-more-tests-count runsdat)) ;; (set! *runs:can-run-more-tests-count* (+ *runs:can-run-more-tests-count* 1))) + (if (not (eq? *last-num-running-tests* num-running)) + (begin + (debug:print 2 *default-log-port* "max-concurrent-jobs: " max-concurrent-jobs ", num-running: " num-running) + (set! *last-num-running-tests* num-running))) + (if (not (eq? 0 *globalexitstatus*)) + (list #f num-running num-running-in-jobgroup max-concurrent-jobs job-group-limit) + (let* ((can-not-run-more (cond + ;; if max-concurrent-jobs is set and the number running is greater + ;; than it then cannot run more jobs + ((and max-concurrent-jobs (>= num-running max-concurrent-jobs)) + (if (runs:lownoise "mcj msg" 60) + (debug:print 0 *default-log-port* "WARNING: Max running jobs exceeded, current number running: " num-running + ", max_concurrent_jobs: " max-concurrent-jobs)) + #t) + ;; if job-group-limit is set and number of jobs in the group is greater + ;; than the limit then cannot run more jobs of this kind + ((and job-group-limit + (>= num-running-in-jobgroup job-group-limit)) + (if (runs:lownoise (conc "maxjobgroup " jobgroup) 60) + (debug:print 1 *default-log-port* "WARNING: number of jobs " num-running-in-jobgroup + " in jobgroup \"" jobgroup "\" exceeds limit of " job-group-limit)) + #t) + (else #f)))) + (list (not can-not-run-more) num-running num-running-in-jobgroup max-concurrent-jobs job-group-limit))))) + +(define (runs:run-pre-hook run-id) + (let* ((run-pre-hook (configf:lookup *configdat* "runs" "pre-hook")) + (existing-tests (if run-pre-hook + (rmt:get-tests-for-run run-id "%" '() '() ;; run-id testpatt states statuses + #f #f ;; offset limit + #f ;; not-in + #f ;; sort-by + #f ;; sort-order + #f ;; get full data (not 'shortlist) + 0 ;; (runs:gendat-inc-results-last-update *runs:general-data*) ;; last update time + 'dashboard) + '())) + (log-dir (conc *toppath* "/logs")) + (log-file (conc "pre-hook-" (string-translate (getenv "MT_TARGET") "/" "-") "-" (getenv "MT_RUNNAME") ".log")) + (full-log-fname (conc log-dir "/" log-file))) + (if run-pre-hook + (if (null? existing-tests) + (let* ((use-log-dir (if (not (directory-exists? log-dir)) + (handle-exceptions + exn + (begin + (debug:print 0 *default-log-port* "WARNING: Failed to create " log-dir ", exn=" exn) + #f) + (create-directory log-dir #t) + #t) + #t)) + (start-time (current-seconds)) + (actual-logf (if use-log-dir full-log-fname log-file))) + (handle-exceptions + exn + (begin + (print-call-chain *default-log-port*) + (debug:print 0 *default-log-port* "Message: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn) + (debug:print 0 *default-log-port* "ERROR: failed to run pre-hook " run-pre-hook ", check the log " log-file)) + (debug:print-info 0 *default-log-port* "running run-pre-hook: \"" run-pre-hook "\", log is " actual-logf) + (system (conc run-pre-hook " >> " actual-logf " 2>&1")) + (debug:print-info 0 *default-log-port* "pre-hook \"" run-pre-hook "\" took " (- (current-seconds) start-time) " seconds to run."))) + (debug:print 0 *default-log-port* "Skipping pre-hook call \"" run-pre-hook "\" as there are existing tests for this run."))))) + + + +(define (runs:run-post-hook run-id) + (let* ((run-post-hook (configf:lookup *configdat* "runs" "post-hook")) + (existing-tests (if run-post-hook + (rmt:get-tests-for-run run-id "%" '() '() ;; run-id testpatt states statuses + #f #f ;; offset limit + #f ;; not-in + #f ;; sort-by + #f ;; sort-order + #f ;; get full data (not 'shortlist) + 0 ;; (runs:gendat-inc-results-last-update *runs:general-data*) ;; last update time + 'dashboard) + '())) + (log-dir (conc *toppath* "/logs")) + (log-file (conc "post-hook-" (string-translate (getenv "MT_TARGET") "/" "-") "-" (getenv "MT_RUNNAME") ".log")) + (full-log-fname (conc log-dir "/" log-file))) + (if run-post-hook + ;; (if (null? existing-tests) + ;; (debug:print 0 *default-log-port* "Skipping post-hook call \"" run-post-hook "\" as there are existing tests for this run."))))) + (let* ((use-log-dir (if (not (directory-exists? log-dir)) + (handle-exceptions + exn + (begin + (debug:print 0 *default-log-port* "WARNING: Failed to create " log-dir ", exn=" exn) + #f) + (create-directory log-dir #t) + #t) + #t)) + (start-time (current-seconds)) + (actual-logf (if use-log-dir full-log-fname log-file))) + (handle-exceptions + exn + (begin + (print-call-chain *default-log-port*) + (debug:print 0 *default-log-port* "Message: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn) + (debug:print 0 *default-log-port* "ERROR: failed to run post-hook " run-post-hook ", check the log " log-file)) + (debug:print-info 0 *default-log-port* "running run-post-hook: \"" run-post-hook "\", log is " actual-logf) + (system (conc run-post-hook " >> " actual-logf " 2>&1")) + (debug:print-info 0 *default-log-port* "post-hook \"" run-post-hook "\" took " (- (current-seconds) start-time) " seconds to run.")))))) + + +(define (runs:rerun-hook test-id new-test-path testdat rerunlst) + (let* ((rerun-hook (configf:lookup *configdat* "runs" "rerun-hook")) + (log-dir (conc *toppath* "/reruns/logs")) + (target (getenv "MT_TARGET")) + (runname (common:args-get-runname)) + (rundir (db:test-get-rundir testdat)) + (tarfiledir (conc *toppath* "/reruns")) + (status (db:test-get-status testdat)) + (comment (conc "\"" (db:test-get-comment testdat) "\"" )) + (testname (db:test-get-testname testdat)) + (itempath (db:test-get-item-path testdat)) + (file-body (conc status "-" (string-translate target "/" "-") "-" runname "." testname (if (not (string=? itempath "")) (conc "." (string-translate itempath "/" "-")) ""))) + (log-file (conc file-body ".log")) + ;; (log-file (conc status "-" (string-translate target "/" "-") "-" runname "." testname (if (not (string=? itempath "")) (conc "." (string-translate itempath "/" "-")) "") ".log")) + (full-log-fname (conc log-dir "/" log-file)) + (tarfilename (conc file-body ".tar")) + ;; (tarfilename (conc status "." (string-translate target "/" "-") "." runname "." testname (if (not (string=? itempath "")) (conc "." (string-translate itempath "/" "-")) "") ".tar")) + ) + (if rerun-hook + (let* ((use-log-dir (if (not (directory-exists? log-dir)) + (handle-exceptions + exn + (begin + (debug:print 0 *default-log-port* "WARNING: Failed to create " log-dir ", exn=" exn) + #f) + (create-directory log-dir #t) + #t) + #t)) + (start-time (current-seconds)) + (actual-logf (if use-log-dir full-log-fname log-file)) + (sys-call-text (conc rerun-hook " " tarfilename " " rundir " " actual-logf " " runname " " tarfiledir " " status " " target " " comment " " testname " " itempath " >> " actual-logf " 2>&1")) + ) + (debug:print 2 *default-log-port* "Found rerun-hook in config:" rerun-hook) + (handle-exceptions + exn + (begin + (print-call-chain *default-log-port*) + (debug:print 0 *default-log-port* "Message: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn) + (debug:print 0 *default-log-port* "ERROR: failed to run rerun-hook " rerun-hook ", check the log " log-file)) + (debug:print-info 0 *default-log-port* "running rerun-hook: \"" rerun-hook "\", log is " actual-logf) + ;; call the hook + (debug:print-info 0 *default-log-port* "Calling rerun-hook for " test-id new-test-path testdat rerunlst) + (debug:print-info 0 *default-log-port* "rerun hook: " rerun-hook) + (debug:print-info 0 *default-log-port* "tarfilename: " tarfilename) + (debug:print-info 0 *default-log-port* "rundir: " rundir) + (debug:print-info 0 *default-log-port* "actual-logf: " actual-logf) + (debug:print-info 0 *default-log-port* "runname: " runname) + (debug:print-info 0 *default-log-port* "sys-call-text: " sys-call-text) + (system sys-call-text) + (debug:print-info 0 *default-log-port* "rerun-hook \"" rerun-hook "\" took " (- (current-seconds) start-time) " seconds to run.")))))) + + + + +;; return #t when all items in waitors-upon list are represented in test-patt, #f otherwise. +(define (runs:testpatts-mention-waitors-upon? test-patt waitors-upon) + (null? (tests:filter-test-names-not-matched waitors-upon test-patt))) + +;;====================================================================== +;; runs:run-tests is called from megatest.scm and itself +;;====================================================================== +;; +;; test-names: Comma separated patterns same as test-patts but used in selection +;; of tests to run. The item portions are not respected. +;; FIXME: error out if /patt specified +;; +;; run-count is passed from megatest.scm as configf:lookup *configdat* "setup" "reruns", or defaults to 1. +(define (runs:run-tests target runname test-patts user flags #!key (run-count 1)) ;; test-names + (let* ((keys (keys:config-get-fields *configdat*)) + (keyvals (keys:target->keyval keys target)) + (run-id (rmt:register-run keyvals runname "new" "n/a" user (args:get-arg "-contour"))) ;; test-name))) + ;; (deferred '()) ;; delay running these since they have a waiton clause + (runconfigf (conc *toppath* "/runconfigs.config")) + (mtconfig (conc *toppath* "/megatest.config")) + (readonly-mode (not (file-write-access? mtconfig))) + (test-records (make-hash-table)) + ;; need to process runconfigs before generating these lists + (all-tests-registry #f) ;; (tests:get-all)) ;; (tests:get-valid-tests (make-hash-table) test-search-path)) ;; all valid tests to check waiton names + (all-test-names #f) ;; (hash-table-keys all-tests-registry)) + (test-names #f) ;; Generated by a call to (tests:filter-test-names all-test-names test-patts)) + (required-tests #f) ;; Put fully qualified test/testpath names in this list to be done + (waitors-upon (make-hash-table)) ;; given a test, return list of tests waiting upon this test. + (task-key (conc (hash-table->alist flags) " " (get-host-name) " " (current-process-id))) + ;; (tdbdat (tasks:open-db)) + (config-reruns (let ((x (configf:lookup *configdat* "setup" "reruns"))) + (if x (string->number x) #f))) + (allowed-tests #f) + (runconf #f)) + + ;; check if readonly + (when readonly-mode + (debug:print-error 0 *default-log-port* "Megatest database is readonly. Cannot proceed.") + (exit 1)) + + ;; per user request. If less than 100Meg space on dbdir partition, bail out with error + ;; this will reduce issues in database corruption + (common:check-db-dir-and-exit-if-insufficient) + + ;; override the number of reruns from the configs + ;; this needs to be done at the place where is first runs:run-tests called + ;(if (and config-reruns + ; (> run-count config-reruns)) + ;(set! run-count config-reruns)) + + ;; (if (tasks:need-server run-id)(tasks:start-and-wait-for-server tdbdat run-id 10)) + + (let ((sighand (lambda (signum) + ;; (signal-mask! signum) ;; to mask or not? seems to cause issues in exiting + (set! *time-to-exit* #t) + (debug:print 0 *default-log-port* "Received signal " signum ", cleaning up before exit. Please wait...") + (let ((th1 (make-thread (lambda () + ;; (let ((tdbdat (tasks:open-db))) + (rmt:tasks-set-state-given-param-key task-key "killed") ;; ) + (debug:print 0 *default-log-port* "Killed by signal " signum ". Exiting") + (thread-sleep! 3) + (exit)))) + (th2 (make-thread (lambda () + (thread-sleep! 5) + (debug:print 0 *default-log-port* "Done") + (exit 4))))) + (thread-start! th2) + (thread-start! th1) + (thread-join! th2))))) + (set-signal-handler! signal/int sighand) + (set-signal-handler! signal/term sighand)) + + ;; force the starting of a server -- removed BB 17ww28 - no longer needed. + ;;(debug:print 0 *default-log-port* "waiting on server...") + ;;(server:start-and-wait *toppath*) + + (runs:set-megatest-env-vars run-id inkeys: keys inrunname: runname) ;; these may be needed by the launching process + (set! runconf (if (common:file-exists? runconfigf) + (setup-env-defaults runconfigf run-id *already-seen-runconfig-info* keyvals target) + (begin + (debug:print 0 *default-log-port* "WARNING: You do not have a run config file: " runconfigf) + #f))) + + (if (not test-patts) ;; first time in - adjust testpatt + (set! test-patts (common:args-get-testpatt runconf))) + ;; if test-patts is #f at this point there is something wrong and we need to bail out + (if (not test-patts) + (begin + (debug:print 0 *default-log-port* "WARNING: there is no test pattern for this run. Exiting now.") + (exit 0))) + + (if (args:get-arg "-tagexpr") + (begin + (set! allowed-tests (string-join (runs:get-tests-matching-tags (args:get-arg "-tagexpr")) ",")) + (debug:print-info 0 *default-log-port* "filtering initial test list with tagexpr: " (args:get-arg "-tagexpr") " => " allowed-tests) + ));; tests will be ANDed with this list + + ;; register this run in monitor.db + (rmt:tasks-add "run-tests" user target runname test-patts task-key) ;; params) + (rmt:tasks-set-state-given-param-key task-key "running") + + #;(common:telemetry-log "run-tests" + payload: + `( (target . ,target) + (run-name . ,runname) + (test-patts . ,test-patts) ) ) + + + ;; Now generate all the tests lists + (set! all-tests-registry (tests:get-all)) ;; hash of testname => path-to-test + (set! all-test-names (hash-table-keys all-tests-registry)) + ;; filter first for allowed-tests (from -tagexpr) then for test-patts. + (set! test-names (tests:filter-test-names + (if allowed-tests + (tests:filter-test-names all-test-names allowed-tests) + all-test-names) + test-patts)) + + ;; I think seeding required-tests with all test-names makes sense but lack analysis to back that up. + + ;; NEW STRATEGY HERE: + ;; 1. fill required tests with test-patts + ;; 2. scan testconfigs and if waitons, itemwait, itempatt calc prior test test-patt + ;; 3. repeat until all deps propagated + + ;; any tests with direct mention in test-patts can be added to required + ;;(set! required-tests (lset-intersection equal? (string-split test-patts ",") all-test-names)) + (set! required-tests (tests:filter-test-names all-test-names test-patts)) + ;; + ;; (set! required-tests (lset-intersection equal? test-names all-test-names)) + + ;; look up all tests matching the comma separated list of globs in + ;; test-patts (using % as wildcard) + + ;; (set! test-names (delete-duplicates (tests:get-valid-tests *toppath* test-patts))) + (debug:print-info 0 *default-log-port* "tests search path: " (string-intersperse (tests:get-tests-search-path *configdat*) " ")) + (debug:print-info 2 *default-log-port* "all tests: " (string-intersperse (sort all-test-names string<) " ")) + (debug:print-info 0 *default-log-port* "test names: " (string-intersperse (sort test-names string<) " ")) + (debug:print-info 0 *default-log-port* "required tests: " (string-intersperse (sort required-tests string<) " ")) + + ;; on the first pass or call to run-tests set FAILS to NOT_STARTED if + ;; -keepgoing is specified + (if (eq? *passnum* 0) + (begin + ;; Is this still necessary? I think not. Unreachable tests are marked as such and + ;; should not cause problems here. + ;; + ;; have to delete test records where NOT_STARTED since they can cause -keepgoing to + ;; get stuck due to becoming inaccessible from a failed test. I.e. if test B depends + ;; on test A but test B reached the point on being registered as NOT_STARTED and test + ;; A failed for some reason then on re-run using -keepgoing the run can never complete. + ;; + ;; (rmt:general-call 'delete-tests-in-state run-id "NOT_STARTED") + + ;; Now convert anything in allow-auto-rerun to NOT_STARTED + ;; + (for-each + (lambda (state-status) + (let* ((ss-lst (string-split-fields "/" state-status #:infix)) + (state (if (> (length ss-lst) 0)(car ss-lst) #f)) + (status (if (> (length ss-lst) 1)(cadr ss-lst) #f))) + (rmt:set-tests-state-status run-id test-names state status "NOT_STARTED" status))) + ;; list of state/status pairs separated by spaces + (string-split (or (configf:lookup *configdat* "setup" "allow-auto-rerun") ""))))) + + ;; Ensure all tests are registered in the test_meta table + (runs:update-all-test_meta #f) + + ;; run the run prehook if there are no tests yet run for this run: + ;; + (runs:run-pre-hook run-id) + ;; mark all test launched flag as false in the meta table + (rmt:set-var (conc "lunch-complete-" run-id) "no") + (debug:print-info 1 *default-log-port* "Setting end-of-run to no") + (let* ((config-reruns (let ((x (configf:lookup *configdat* "setup" "reruns"))) + (if x (string->number x) #f))) + (config-rerun-cnt (if config-reruns + config-reruns + 1))) + (if (eq? config-rerun-cnt run-count) + (rmt:set-var (conc "end-of-run-" run-id) "no"))) + + (rmt:set-run-state-status run-id "new" "n/a") + ;; now add non-directly referenced dependencies (i.e. waiton) + ;;====================================================================== + ;; refactoring this block into tests:get-full-data + ;; + ;; What happended, this code is now duplicated in tests!? + ;; + ;;====================================================================== + + (if (not (null? test-names)) ;; BEGIN test-names loop + (let loop ((hed (car test-names)) ;; NOTE: This is the main loop that iterates over the test-names + (tal (cdr test-names))) ;; 'return-procs tells the config reader to prep running system but return a proc + (debug:print-info 4 *default-log-port* "\n\ntestpatt elaboration loop => hed="hed " tal="tal" test-patts="test-patts" test-names="test-names) + (change-directory *toppath*) ;; PLEASE OPTIMIZE ME!!! I think this should be a no-op but there are several places where change-directories could be happening. + (setenv "MT_TEST_NAME" hed) ;; + (let*-values (((waitons waitors config) (tests:get-waitons hed all-tests-registry (tests:get-global-waitons *runconfigdat*))) + + ;; NOTE: Have the config - can extract [waitons] section + + ((hed-mode) + (let ((m (configf:lookup config "requirements" "mode"))) + (if m (map string->symbol (string-split m)) '(normal)))) + ((hed-itemized-waiton) ;; are items in hed waiting on items of waiton? + (not (null? (lset-intersection eq? hed-mode '(itemmatch itemwait))))) + ) + (debug:print-info 8 *default-log-port* "waitons: " waitons) + ;; check for hed in waitons => this would be circular, remove it and issue an + ;; error + (if (or (member hed waitons) + (member hed waitors)) + (begin + (debug:print-error 0 *default-log-port* "test " hed " has listed itself as a waiton or waitor, please correct this!") + (set! waitons (filter (lambda (x)(not (equal? x hed))) waitons)) + (set! waitors (filter (lambda (x)(not (equal? x hed))) waitors)))) + + ;; (items (items:get-items-from-config config))) + (if (not (hash-table-ref/default test-records hed #f)) ;; waiton-tconfig below will be #f until that test is visted here at least once + (hash-table-set! test-records ;; BB: we are doing a manual make-tests:testqueue + hed (vector hed ;; 0 ;; testname + config ;; 1 + waitons ;; 2 + (configf:lookup config "requirements" "priority") ;; priority 3 + (tests:get-items config) ;; 4 ;; expand the [items] and or [itemstable] into explict items + #f ;; itemsdat 5 + #f ;; spare - used for item-path + waitors ;; + ))) + ;; update waitors-upon here + (for-each + (lambda (waiton) + (let* ((current-waitors-upon (hash-table-ref/default waitors-upon waiton '()))) + (debug:print-info 8 *default-log-port* " current-waiters-upon["waiton"] is "current-waitors-upon ) + (when (not (member hed current-waitors-upon)) + (debug:print-info 8 *default-log-port* " current-waiters-upon["waiton"] << "hed ) + (hash-table-set! waitors-upon waiton (cons hed current-waitors-upon))))) + (if (list? waitons) waitons '())) + (debug:print-info 8 *default-log-port* " process waitons&waitors of "hed": "(delete-duplicates (append waitons waitors))) + (for-each + (lambda (waiton) + (if (and waiton (not (member waiton test-names))) + (let* ((waitors-in-testpatt (runs:testpatts-mention-waitors-upon? test-patts (hash-table-ref/default waitors-upon waiton '()))) + (waiton-record (hash-table-ref/default test-records waiton #f)) + (waiton-tconfig (if waiton-record (vector-ref waiton-record 1) #f)) + (waiton-itemized (and waiton-tconfig + (or (hash-table-ref/default waiton-tconfig "items" #f) + (hash-table-ref/default waiton-tconfig "itemstable" #f)))) + (itemmaps (tests:get-itemmaps config)) ;; (configf:lookup config "requirements" "itemmap")) + (new-test-patts (tests:extend-test-patts test-patts hed waiton itemmaps hed-itemized-waiton))) + (debug:print-info 2 *default-log-port* "Test " waiton " has " (if waiton-record "a" "no") " waiton-record and" (if waiton-itemized " " " no ") "items") + ;; need to account for test-patt here, if I am test "a", selected with a test-patt of "hed/b%" + ;; and we are waiting on "waiton" we need to add "waiton/,waiton/b%" to test-patt + ;; is this satisfied by merely appending "/" to the waiton name added to the list? + ;; + ;; This approach causes all of the items in an upstream test to be run + ;; if we have this waiton already processed once we can analzye it for extending + ;; tests to be run, since we can't properly process waitons unless they have been + ;; initially added we add them again to be processed on second round AND add the hed + ;; back in to also be processed on second round + (if waiton-tconfig ;; BB: waiter should be in test-patts as well as the waiton have a tconfig. + (if waiton-itemized + (if waitors-in-testpatt + (begin + (debug:print-info 0 *default-log-port* "New test patts: " new-test-patts ", prev test patts: " test-patts) + (set! test-names (cons waiton test-names)) ;; need to process this one, only add once the waiton tconfig read + (set! required-tests (cons (conc waiton "/") required-tests)) + (set! test-patts new-test-patts)) + (begin + (debug:print-info 0 *default-log-port* "Waitor(s) not yet on testpatt for " waiton ", setting up to re-process it") + (set! tal (append (cons waiton tal)(list hed))))) + (begin + (debug:print-info 2 *default-log-port* "Adding non-itemized test " waiton " to required-tests") + (set! required-tests (cons waiton required-tests)) + (set! test-patts new-test-patts))) + (begin + (debug:print-info 0 *default-log-port* "No testconfig info yet for " waiton ", setting up to re-process it") + (set! tal (append (cons waiton tal)(list hed))))) ;; (cons (conc waiton "/") required-tests)) + ;; NOPE: didn't work. required needs to be plain test names. Try tacking on to test-patts + ;; - doesn't work + ;; (set! test-patts (conc test-patts "," waiton "/")) + ;; (set! test-names (cons waiton test-names))))) ;; was an append, now a cons + ))) + (delete-duplicates (append waitons waitors))) + (let ((remtests (delete-duplicates (append waitons tal)))) + (debug:print-info 8 *default-log-port* " remtests are "remtests) + (if (not (null? remtests)) + (begin + ;; (debug:print-info 0 *default-log-port* "Preprocessing continues for " (string-intersperse remtests ", ")) + (loop (car remtests)(cdr remtests)))))))) ;; END test-names loop + + (if (not (null? required-tests)) + (debug:print-info 1 *default-log-port* "Adding \"" (string-intersperse required-tests " ") "\" to the run queue")) + ;; NOTE: these are all parent tests, items are not expanded yet. + (debug:print-info 4 *default-log-port* "test-records=" (hash-table->alist test-records)) + (let ((reglen (configf:lookup *configdat* "setup" "runqueue"))) + (if (> (length (hash-table-keys test-records)) 0) + (let* ((keep-going #t) + (run-queue-retries 5) + (run-ids (rmt:get-all-run-ids))) + #;(for-each (lambda (run-id) + (if keep-going + (handle-exceptions + exn + (debug:print 0 *default-log-port* "error in calling find-and-mark-incomplete for run-id " run-id ", exn=" exn) + (rmt:find-and-mark-incomplete run-id #f)))) ;; ovr-deadtime))) ;; could be root of https://hsdes.intel.com/appstore/article/#/220546828/main -- Title: Megatest jobs show DEAD even though they are still running (1.64/27) + run-ids) + (runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests + (any->number reglen) all-tests-registry) + (set! keep-going #f) + (if (> run-count 0) ;; handle reruns + (begin + (if (not (hash-table-ref/default flags "-preclean" #f)) + (hash-table-set! flags "-preclean" #t)) + (if (not (hash-table-ref/default flags "-rerun" #f)) + (hash-table-set! flags "-rerun" "ABORT,STUCK/DEAD,n/a,ZERO_ITEMS")) + (runs:run-tests target runname test-patts user flags run-count: (- run-count 1))) + (launch:end-of-run-check run-id))) + (debug:print-info 0 *default-log-port* "No tests to run"))) + (debug:print-info 4 *default-log-port* "All done by here") + ;; TODO: try putting post hook call here + + ; (debug:print-info 2 *default-log-port* " run-count " run-count) + ; (runs:run-post-hook run-id)) + ; (debug:print-info 2 *default-log-port* "Not calling post hook runcount = " run-count )) + (rmt:tasks-set-state-given-param-key task-key "done") + + ;; (sqlite3:finalize! tasks-db) + )) + + +;; loop logic. These are used in runs:run-tests-queue to make it a bit more readable. +;; +;; If reg not full and have items in tal then loop with (car tal)(cdr tal) reg reruns +;; If reg is full (i.e. length >= n +;; loop with (car reg) tal (cdr reg) reruns +;; If tal is empty +;; but have items in reg; loop with (car reg)(cdr reg) '() reruns +;; If reg is empty => all done + +(define (runs:queue-next-hed tal reg n regfull) + (if regfull + (if (null? reg) #f (car reg)) + (if (null? tal) ;; tal is used up, pop from reg + (if (null? reg) #f (car reg)) + (car tal)))) + +(define (runs:queue-next-tal tal reg n regfull) + (if regfull + tal + (if (null? tal) ;; must transfer from reg + (if (null? reg) '() (cdr reg)) + (cdr tal)))) + +(define (runs:queue-next-reg tal reg n regfull) + (if regfull + (if (null? reg) '() (cdr reg)) ;; EXPLORE: reorder (cdr reg) such that looping is more efficient + (if (null? tal) ;; if tal is null and reg not full then '() as reg contents moved to tal + '() + reg))) + +;; this is the list of parameters to the named loop "loop" near the top of runs:run-tests-queue, look around line 1216 +;; +(define (runs:loop-values tal reg reglen regfull reruns) + (list (runs:queue-next-hed tal reg reglen regfull) ;; hed + (runs:queue-next-tal tal reg reglen regfull) ;; tal + (runs:queue-next-reg tal reg reglen regfull) ;; reg + reruns)) ;; reruns + +;; objective - iterate thru tests +;; => want to prioritize tests we haven't seen before +;; => sometimes need to squeeze things in (added to reg) +;; => review of a previously seen test is higher priority of never visited test +;; reg - list of previously visited tests +;; tal - list of never visited tests +;; prefer next hed to be from reg than tal. + +(define runs:nothing-left-in-queue-count 0) + +;;====================================================================== +;; runs:expand-items is called by runs:run-tests-queue +;;====================================================================== +;; +;; return value of runs:expand-items is passed back to runs-tests-queue and is fed to named loop with this signature: +;; (let loop ((hed (car sorted-test-names)) +;; (tal (cdr sorted-test-names)) +;; (reg '()) ;; registered, put these at the head of tal +;; (reruns '())) +(define (runs:expand-items hed tal reg reruns regfull newtal jobgroup max-concurrent-jobs run-id waitons item-path testmode test-record can-run-more items runname tconfig reglen test-registry test-records itemmaps) + (let* ((loop-list (list hed tal reg reruns)) + (junk (debug:print-info 4 *default-log-port* "expand-items calling rmt:get-prereqs-not-met")) + (prereqs-not-met (let ((res (rmt:get-prereqs-not-met run-id waitons hed item-path mode: testmode itemmaps: itemmaps))) + (if (list? res) + res + (begin + (debug:print 0 *default-log-port* + "ERROR: rmt:get-prereqs-not-met returned non-list!\n" + " res=" res " run-id=" run-id " waitons=" waitons " hed=" hed " item-path=" item-path " testmode=" testmode " itemmaps=" itemmaps) + '())))) + (have-itemized (not (null? (lset-intersection eq? testmode '(itemmatch itemwait))))) + ;; (prereqs-not-met (mt:lazy-get-prereqs-not-met run-id waitons item-path mode: testmode itemmap: itemmap)) + (fails (runs:calc-fails prereqs-not-met)) + (prereq-fails (runs:calc-prereq-fail prereqs-not-met)) + (non-completed (runs:calc-not-completed prereqs-not-met)) + (runnables (runs:calc-runnable prereqs-not-met)) + (unexpanded-prereqs + (filter (lambda (testname) + (let* ((test-rec (hash-table-ref test-records testname)) + (items (tests:testqueue-get-items test-rec))) + ;;(BB> "HEY " testname "=>"items) + (or (procedure? items)(eq? items 'have-procedure)))) + waitons)) + + + ) + (debug:print-info 4 *default-log-port* "START OF INNER COND #2 " + "\n can-run-more: " can-run-more + "\n testname: " hed + "\n prereqs-not-met: " (runs:pretty-string prereqs-not-met) + "\n non-completed: " (runs:pretty-string non-completed) + "\n prereq-fails: " (runs:pretty-string prereq-fails) + "\n fails: " (runs:pretty-string fails) + "\n testmode: " testmode + "\n (member 'toplevel testmode): " (member 'toplevel testmode) + "\n (null? non-completed): " (null? non-completed) + "\n reruns: " reruns + "\n items: " items + "\n can-run-more: " can-run-more) + + (cond + ;; all prereqs met, fire off the test + ;; or, if it is a 'toplevel test and all prereqs not met are COMPLETED then launch + + ((and (not (member 'toplevel testmode)) + (member (hash-table-ref/default test-registry (db:test-make-full-name hed item-path) 'n/a) + '(DONOTRUN removed CANNOTRUN))) ;; *common:cant-run-states-sym*) ;; '(COMPLETED KILLED WAIVED UNKNOWN INCOMPLETE)) ;; try to catch repeat processing of COMPLETED tests here + (debug:print-info 4 *default-log-port* "cond branch - " "ei-1") + (debug:print-info 1 *default-log-port* "Test " hed " set to \"" (hash-table-ref test-registry (db:test-make-full-name hed item-path)) "\". Removing it from the queue") + (if (or (not (null? tal)) + (not (null? reg))) + (runs:loop-values tal reg reglen regfull reruns) + (begin + (debug:print-info 0 *default-log-port* "Nothing left in the queue!") + ;; If get here twice then we know we've tried to expand all items + ;; since there must be a logic issue with the handling of loops in the + ;; items expand phase we will brute force an exit here. + (if (> runs:nothing-left-in-queue-count 2) + (begin + (debug:print 0 *default-log-port* "WARNING: this condition is triggered when there were no items to expand and nothing to run. Please check your run for completeness") + (exit 0)) + (set! runs:nothing-left-in-queue-count (+ runs:nothing-left-in-queue-count 1))) + #f))) + + ;; desired result of below cond branch: + ;; we want to expand items in our test of interest (hed) in the following cases: + ;; case 1 - mode is itemmatch or itemwait: + ;; - all prereq tests have been expanded + ;; - at least one prereq's items have completed + ;; case 2 - mode is toplevel + ;; - prereqs are completed. + ;; - or no prereqs can complete + ;; case 3 - mode not specified + ;; - prereqs are completed and passed (we could consider removing "and passed" -- it would change behavior from current) + ((or (null? prereqs-not-met) + (and (member 'toplevel testmode) + (null? non-completed))) + (debug:print-info 4 *default-log-port* "cond branch - " "ei-2") + (debug:print-info 4 *default-log-port* "runs:expand-items: (or (null? prereqs-not-met) (and (member 'toplevel testmode)(null? non-completed)))") + (let ((test-name (tests:testqueue-get-testname test-record))) + (setenv "MT_TEST_NAME" test-name) ;; + (setenv "MT_RUNNAME" runname) + (runs:set-megatest-env-vars run-id inrunname: runname) ;; these may be needed by the launching process + (let ((items-list (items:get-items-from-config tconfig))) + (if (list? items-list) + (begin + (if (null? items-list) + (let ((test-id (rmt:get-test-id run-id test-name "")) + (num-items (rmt:test-toplevel-num-items run-id test-name))) + (if (and test-id + (not (> num-items 0))) + (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "ZERO_ITEMS" "Failed to run due to failed prerequisites")))) + (tests:testqueue-set-items! test-record items-list) + (list hed tal reg reruns)) + (begin + (debug:print-error 0 *default-log-port* "The proc from reading the items table did not yield a list - please report this") + (exit 1)))))) + + ((and (null? fails) + (null? prereq-fails) + (not (null? non-completed))) + (debug:print-info 4 *default-log-port* "cond branch - " "ei-3") + (let* ((allinqueue (map (lambda (x)(if (string? x) x (db:test-get-testname x))) + (append newtal reruns))) + ;; prereqstrs is a list of test names as strings that are prereqs for hed + (prereqstrs (delete-duplicates (map (lambda (x)(if (string? x) x (db:test-get-testname x))) + prereqs-not-met))) + ;; a prereq that is not found in allinqueue will be put in the notinqueue list + ;; + ;; (notinqueue (filter (lambda (x) + ;; (not (member x allinqueue))) + ;; prereqstrs)) + (give-up #f)) + + ;; We can get here when a prereq has not been run due to *it* having a prereq that failed. + ;; We need to use this to dequeue this item as CANNOTRUN + ;; + (if (member 'toplevel testmode) ;; '(toplevel)) ;; NOTE: this probably should be (member 'toplevel testmode) + (for-each (lambda (prereq) + (if (eq? (hash-table-ref/default test-registry prereq 'justfine) 'CANNOTRUN) + (set! give-up #t))) + prereqstrs)) + + (if (and give-up + (not (and (null? tal)(null? reg)))) + (let ((trimmed-tal (mt:discard-blocked-tests run-id hed tal test-records)) + (trimmed-reg (mt:discard-blocked-tests run-id hed reg test-records))) + (debug:print 1 *default-log-port* "WARNING: test " hed " has discarded prerequisites, removing it from the queue") + + (let ((test-id (rmt:get-test-id run-id hed ""))) + (if test-id (mt:test-set-state-status-by-id run-id test-id "COMPLETED" "PREQ_DISCARDED" "Failed to run due to discarded prerequisites"))) + + (if (and (null? trimmed-tal) + (null? trimmed-reg)) + #f + (runs:loop-values trimmed-tal trimmed-reg reglen regfull reruns) + )) + (list (car newtal)(append (cdr newtal) reg) '() reruns)))) + + ((and (null? fails) ;; have not-started tests, but unable to run them. everything looks completed with no prospect of unsticking something that is stuck. we should mark hed as moribund and exit or continue if there are more tests to consider + (null? prereq-fails) + (null? non-completed)) + (debug:print-info 4 *default-log-port* "cond branch - " "ei-4") + (if (runs:can-keep-running? hed 20) + (begin + (runs:inc-cant-run-tests hed) + (debug:print-info 0 *default-log-port* "no fails in prerequisites for " hed " but also none running, keeping " hed " for now. Try count: " (hash-table-ref/default *seen-cant-run-tests* hed 0) ", going to wait 60 sec.") ;; + ;; getting here likely means the system is way overloaded, kill a full minute before continuing + ;; (thread-sleep! 60) ;; TODO: gate by normalized server load > 1.0 (maxload config thing) CHECKTHIS!!! + ;; No runsdat, can't do this yet + ;; (if (runs:dat-load-mgmt-function runsdat)((runs:dat-load-mgmt-function runsdat))) + ;; + (thread-sleep! 5) ;; TODO: gate by normalized server load > 1.0 (maxload config thing) + ;; num-retries code was here + ;; we use this opportunity to move contents of reg to tal + (list (car newtal)(append (cdr newtal) reg) '() reruns)) ;; an issue with prereqs not yet met? + (begin + (debug:print-info 1 *default-log-port* "no fails in prerequisites for " hed " but nothing seen running in a while, dropping test " hed " from the run queue") + (let ((test-id (rmt:get-test-id run-id hed ""))) + (if test-id (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "TIMED_OUT" "Nothing seen running in a while."))) + (runs:loop-values tal reg reglen regfull reruns) + ))) + + ((and + (or (not (null? fails)) + (not (null? prereq-fails))) + (member 'normal testmode)) + (debug:print-info 4 *default-log-port* "cond branch - " "ei-5") + (debug:print-info 1 *default-log-port* "test " hed " (mode=" testmode ") has failed prerequisite(s); " + (string-intersperse (map (lambda (t)(conc (db:test-get-testname t) ":" (db:test-get-state t)"/"(db:test-get-status t))) fails) ", ") + ", removing it from to-do list") + (let ((test-id (rmt:get-test-id run-id hed ""))) + (if test-id + (if (not (null? prereq-fails)) + (mt:test-set-state-status-by-id-unless-completed run-id test-id "COMPLETED" "PREQ_DISCARDED" "Failed to run due to prior failed prerequisites") + (mt:test-set-state-status-by-id-unless-completed run-id test-id "COMPLETED" "PREQ_FAIL" "Failed to run due to failed prerequisites")))) + ;; (debug:print 4 *default-log-port*"BB> set PREQ_FAIL on "hed) + ;; (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_FAIL" "Failed to run due to failed prerequisites")))) ;; BB: this works, btu equivalent for itemwait mode does not work. + (if (or (not (null? reg))(not (null? tal))) + (begin + (hash-table-set! test-registry hed 'CANNOTRUN) + (runs:loop-values tal reg reglen regfull (cons hed reruns)) + ) + #f)) ;; #f flags do not loop + + ((and (not (null? fails))(member 'toplevel testmode)) + (debug:print-info 4 *default-log-port* "cond branch - " "ei-6") + (if (or (not (null? reg))(not (null? tal))) + (list (car newtal)(append (cdr newtal) reg) '() reruns) + #f)) + ((null? runnables) + (debug:print-info 4 *default-log-port* "cond branch - " "ei-7") + #f) ;; if we get here and non-completed is null then it is all over. + (else + (debug:print-info 4 *default-log-port* "cond branch - " "ei-8") + (debug:print 2 *default-log-port* "WARNING: FAILS or incomplete tests maybe preventing completion of this run. Watch for issues with test " hed ", continuing for now") + (list (car newtal)(cdr newtal) reg reruns))))) + +(define (runs:mixed-list-testname-and-testrec->list-of-strings inlst) + (if (null? inlst) + '() + (map (lambda (t) + (cond + ((vector? t) + (let ((test-name (db:test-get-testname t)) + (item-path (db:test-get-item-path t)) + (test-state (db:test-get-state t)) + (test-status (db:test-get-status t))) + (conc test-name (if (equal? item-path "") "" "/") item-path ":" test-state "/" test-status))) + ((string? t) + t) + (else + (conc t)))) + inlst))) + + +;; hed tal reg reruns reglen regfull test-record runname test-name item-path jobgroup max-concurrent-jobs run-id waitons item-path testmode test-patts required-tests test-registry registry-mutex flags keyvals run-info newtal all-tests-registry itemmaps) +(define (runs:process-expanded-tests runsdat testdat) + ;; unroll the contents of runsdat and testdat (due to ongoing refactoring). + (debug:print 2 *default-log-port* "runs:process-expanded-tests; testdat:" ) + (debug:print 2 *default-log-port* (with-output-to-string + (lambda () (pp (runs:testdat->alist testdat) )))) + (let* ((hed (runs:testdat-hed testdat)) + (tal (runs:testdat-tal testdat)) + (reg (runs:testdat-reg testdat)) + (reruns (runs:testdat-reruns testdat)) + (test-name (runs:testdat-test-name testdat)) + (item-path (runs:testdat-item-path testdat)) + (jobgroup (runs:testdat-jobgroup testdat)) + (waitons (runs:testdat-waitons testdat)) + (item-path (runs:testdat-item-path testdat)) + (testmode (runs:testdat-testmode testdat)) + (newtal (runs:testdat-newtal testdat)) + (itemmaps (runs:testdat-itemmaps testdat)) + (test-record (runs:testdat-test-record testdat)) + (prereqs-not-met (runs:testdat-prereqs-not-met testdat)) + + (reglen (runs:dat-reglen runsdat)) + (regfull (runs:dat-regfull runsdat)) + (runname (runs:dat-runname runsdat)) + (max-concurrent-jobs (runs:dat-max-concurrent-jobs runsdat)) + (run-id (runs:dat-run-id runsdat)) + (test-patts (runs:dat-test-patts runsdat)) + (required-tests (runs:dat-required-tests runsdat)) + (test-registry (runs:dat-test-registry runsdat)) + (registry-mutex (runs:dat-registry-mutex runsdat)) + (flags (runs:dat-flags runsdat)) + (keyvals (runs:dat-keyvals runsdat)) + (run-info (runs:dat-run-info runsdat)) + (all-tests-registry (runs:dat-all-tests-registry runsdat)) + (run-limits-info (runs:dat-can-run-more-tests runsdat)) + ;; (runs:can-run-more-tests run-id jobgroup max-concurrent-jobs)) ;; look at the test jobgroup and tot jobs running + (have-resources (car run-limits-info)) + (num-running (list-ref run-limits-info 1)) + (num-running-in-jobgroup(list-ref run-limits-info 2)) + (max-concurrent-jobs (list-ref run-limits-info 3)) + (job-group-limit (list-ref run-limits-info 4)) + ;; (prereqs-not-met (rmt:get-prereqs-not-met run-id waitons hed item-path mode: testmode itemmaps: itemmaps)) + ;; (prereqs-not-met (mt:lazy-get-prereqs-not-met run-id waitons item-path mode: testmode itemmap: itemmap)) + (fails (if (list? prereqs-not-met) ;; TODO: rename fails to failed-prereqs + (runs:calc-fails prereqs-not-met) + (begin + (debug:print-error 0 *default-log-port* "prereqs-not-met is not a list! " prereqs-not-met) + '()))) + (non-completed (filter (lambda (x) ;; remove hed from not completed list, duh, of course it is not completed! + (not (equal? x hed))) + (runs:calc-not-completed prereqs-not-met))) + (loop-list (list hed tal reg reruns)) + ;; configure the load runner + (maxload (string->number (or (configf:lookup *configdat* "jobtools" "maxload") "3.0"))) ;; use a non-number string to disable + (maxhomehostload (string->number (or (configf:lookup *configdat* "jobtools" "maxhomehostload") "2.0"))) ;; use a non-number string to disable + (waitdelay (string->number (or (configf:lookup *configdat* "jobtools" "waitdelay") "60")))) + (debug:print-info 4 *default-log-port* "have-resources: " have-resources " prereqs-not-met: (" + (string-intersperse + (map (lambda (t) + (if (vector? t) + (conc (db:test-get-state t) "/" (db:test-get-status t)) + (conc " WARNING: t is not a vector=" t ))) + prereqs-not-met) + ", ") ") fails: " fails + "\nregistered? " (hash-table-ref/default test-registry (db:test-make-full-name test-name item-path) #f)) + + ;; well, first lets see if cpu load throttling is enabled. If so wait around until the + ;; average cpu load is under the threshold before continuing + ;; + (if (runs:dat-load-mgmt-function runsdat) + ((runs:dat-load-mgmt-function runsdat)) + (runs:dat-load-mgmt-function-set! + runsdat + (lambda () + ;; jobtools maxload is useful for where the full Megatest run is done on one machine + (if (and (not (rmt:on-homehost?)) + maxload) ;; only gate if maxload is specified, NOTE: maxload is normalized, i.e. load=1 means all cpus fully utilized + (common:wait-for-normalized-load maxload "Waiting for load to drop before starting more tests" #f)) + + ;; jobtools maxhomehostload is intended to prevent overloading on the homehost which can cause database corruption issues + (if maxhomehostload + (common:wait-for-homehost-load maxhomehostload + (conc "Waiting for homehost load to drop below normalized value of " maxhomehostload)))))) + + + + (if (and (not (null? prereqs-not-met)) + (runs:lownoise (conc "waiting on tests " prereqs-not-met hed) 60)) + (debug:print-info 2 *default-log-port* "waiting on tests; " (string-intersperse (runs:mixed-list-testname-and-testrec->list-of-strings prereqs-not-met) ", "))) + + ;; Don't know at this time if the test have been launched at some time in the past + ;; i.e. is this a re-launch? + (debug:print-info 4 *default-log-port* "run-limits-info = " run-limits-info) + + (cond ; cond 894- 1067 + + ;; Check item path against item-patts, + ;; + ((not (tests:match test-patts (tests:testqueue-get-testname test-record) item-path required: required-tests)) ;; This test/itempath is not to be run + ;; else the run is stuck, temporarily or permanently + ;; but should check if it is due to lack of resources vs. prerequisites + (debug:print-info 1 *default-log-port* "Skipping " (tests:testqueue-get-testname test-record) " " item-path " as it doesn't match " test-patts) + (if (or (not (null? tal))(not (null? reg))) + (runs:loop-values tal reg reglen regfull reruns) + #f)) + + ;; Register tests + ;; + ((not (hash-table-ref/default test-registry (db:test-make-full-name test-name item-path) #f)) + (debug:print-info 4 *default-log-port* "Pre-registering test " test-name "/" item-path " to create placeholder" ) + ;; always do firm registration now in v1.60 and greater ;; (eq? *transport-type* 'fs) ;; no point in parallel registration if use fs + (let register-loop ((numtries 15)) + (rmt:register-test run-id test-name item-path) + (if (rmt:get-test-id run-id test-name item-path) + (hash-table-set! test-registry (db:test-make-full-name test-name item-path) 'done) + (if (> numtries 0) + (begin + (thread-sleep! 0.5) + (register-loop (- numtries 1))) + (debug:print-error 0 *default-log-port* "failed to register test " (db:test-make-full-name test-name item-path))))) + (if (not (eq? (hash-table-ref/default test-registry (db:test-make-full-name test-name "") #f) 'done)) + (begin + (rmt:register-test run-id test-name "") + (if (rmt:get-test-id run-id test-name "") + (hash-table-set! test-registry (db:test-make-full-name test-name "") 'done)))) + (runs:shrink-can-run-more-tests-count runsdat) ;; DELAY TWEAKER (still needed?) + (if (and (null? tal)(null? reg)) + (list hed tal (append reg (list hed)) reruns) + (list (runs:queue-next-hed tal reg reglen regfull) ;; cannot replace with a call to runs:loop-values as the logic is different for reg + (runs:queue-next-tal tal reg reglen regfull) + ;; NB// Here we are building reg as we register tests + ;; if regfull we must pop the front item off reg + (if regfull + (append (cdr reg) (list hed)) + (append reg (list hed))) + reruns))) + + ;; At this point hed test registration must be completed. + ;; + ((eq? (hash-table-ref/default test-registry (db:test-make-full-name test-name item-path) #f) + 'start) + (debug:print-info 0 *default-log-port* "Waiting on test registration(s): " + (string-intersperse + (filter (lambda (x) + (eq? (hash-table-ref/default test-registry x #f) 'start)) + (hash-table-keys test-registry)) + ", ")) + (thread-sleep! 0.051) + (list hed tal reg reruns)) + + ;; If no resources are available just kill time and loop again + ;; + ((not have-resources) ;; simply try again after waiting a second + (if (runs:lownoise "no resources" 600) + (debug:print-info 1 *default-log-port* "no resources to run new tests, waiting ...")) + + ;; Have gone back and forth on this but db starvation is an issue. + ;; wait one second before looking again to run jobs. + ;; (thread-sleep! 0.25) + + ;; new logic. + ;; If it has been more than 10 seconds since we were last here don't wait at all + ;; otherwise sleep 2 seconds to give db a rest and let dashboard read data + (if (runs:lownoise "frequent-no-resources" 10) + (thread-sleep! 0.25) ;; no significant delay + (thread-sleep! 2)) + ;; could have done hed tal here but doing car/cdr of newtal to rotate tests + (list (car newtal)(cdr newtal) reg reruns)) + + ;; This is the final stage, everything is in place so launch the test + ;; + ((and have-resources + (or (null? prereqs-not-met) + (and (member 'toplevel testmode) ;; 'toplevel) + (null? non-completed) + (not (member 'exclusive testmode))))) + ;; (hash-table-delete! *max-tries-hash* (db:test-make-full-name test-name item-path)) + ;; we are going to reset all the counters for test retries by setting a new hash table + ;; this means they will increment only when nothing can be run + (set! *max-tries-hash* (make-hash-table)) + + (run:test run-id run-info keyvals runname test-record flags #f test-registry all-tests-registry runsdat testdat) + (set! *last-test-launch* (current-seconds)) + (runs:incremental-print-results run-id) + (hash-table-set! test-registry (db:test-make-full-name test-name item-path) 'running) + (runs:shrink-can-run-more-tests-count runsdat) ;; DELAY TWEAKER (still needed?) + ;; (thread-sleep! *global-delta*) + (if (or (not (null? tal))(not (null? reg))) + (runs:loop-values tal reg reglen regfull reruns) ;; hed should be dropped at this time + #f)) + + ;; must be we have unmet prerequisites + ;; + (else + (debug:print 4 *default-log-port* "FAILS: " fails) + ;; If one or more of the prereqs-not-met are FAIL then we can issue + ;; a message and drop hed from the items to be processed. + ;; (runs:mixed-list-testname-and-testrec->list-of-strings prereqs-not-met) + (if (and (not (null? prereqs-not-met)) + (runs:lownoise (conc "waiting on tests " prereqs-not-met hed) 60)) + (debug:print-info 1 *default-log-port* "waiting on tests; " (string-intersperse + (runs:mixed-list-testname-and-testrec->list-of-strings + prereqs-not-met) ", "))) + (if (or (null? fails) + (member 'toplevel testmode)) + (begin + ;; couldn't run, take a breather + (if (runs:lownoise "Waiting for more work to do..." 60) + (debug:print-info 0 *default-log-port* "Waiting for more work to do...")) + + ;; (if (runs:dat-load-mgmt-function runsdat)((runs:dat-load-mgmt-function runsdat))) + (thread-sleep! 5) + (list (car newtal)(cdr newtal) reg reruns)) + ;; the waiton is FAIL so no point in trying to run hed ever again + (begin + (let ((my-test-id (rmt:get-test-id run-id test-name item-path))) + (mt:test-set-state-status-by-id-unless-completed run-id my-test-id "COMPLETED" "PREQ_FAIL" "Failed to run due to failed prerequisites2")) + (if (or (not (null? reg))(not (null? tal))) + (if (vector? hed) + (begin + (debug:print 1 *default-log-port* "WARNING: Dropping test " test-name "/" item-path + " from the launch list as it has prerequistes that are FAIL") + (let ((test-id (rmt:get-test-id run-id hed ""))) + (if test-id (mt:test-set-state-status-by-id-unless-completed run-id test-id "COMPLETED" "PREQ_FAIL" "Failed to run due to failed prerequisites"))) + (runs:shrink-can-run-more-tests-count runsdat) ;; DELAY TWEAKER (still needed?) + ;; (thread-sleep! *global-delta*) + ;; This next is for the items + + (if (not (null? fails)) + ;;(mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "PREQ_FAIL" #f) + (rmt:set-state-status-and-roll-up-items run-id test-name item-path "NOT_STARTED" "PREQ_FAIL" #f) + ;;(mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "BLOCKED" #f) + (rmt:set-state-status-and-roll-up-items run-id test-name item-path "NOT_STARTED" "BLOCKED" #f) ) + (hash-table-set! test-registry (db:test-make-full-name test-name item-path) 'removed) + (runs:loop-values tal reg reglen regfull reruns)) + (let ((nth-try (hash-table-ref/default test-registry hed 0))) ;; hed not a vector... + (debug:print 2 *default-log-port* "nth-try("hed")="nth-try) + (cond + ((member "RUNNING" (map db:test-get-state prereqs-not-met)) + (if (runs:lownoise (conc "possible RUNNING prerequistes " hed) 60) + (debug:print 0 *default-log-port* "WARNING: test " hed " has possible RUNNING prerequisites, don't give up on it yet.")) + (thread-sleep! 0.1) + (runs:loop-values tal reg reglen regfull reruns)) + ((or (not nth-try) ;; BB: condition on subsequent tries, condition below fires on first try + (and (number? nth-try) + (< nth-try 2))) + (hash-table-set! test-registry hed (if (number? nth-try) + (+ nth-try 1) + 0)) + (if (runs:lownoise (conc "not removing test " hed) 60) + (debug:print 1 *default-log-port* "WARNING: not removing test " hed " from queue although it may not be runnable due to FAILED prerequisites")) + ;; may not have processed correctly. Could be a race condition in your test implementation? Dropping test " hed) ;; " as it has prerequistes that are FAIL. (NOTE: hed is not a vector)") + (runs:shrink-can-run-more-tests-count runsdat) ;; DELAY TWEAKER (still needed?) + (runs:loop-values newtal reg reglen regfull reruns)) + ((symbol? nth-try) ;; BB: 'done matches here in one case where prereq itemwait failed. This is first "try" + (if (eq? nth-try 'removed) ;; removed is removed - drop it NOW + (if (null? tal) + #f ;; yes, really + (list (car tal)(cdr tal) reg reruns)) + (begin + (if (runs:lownoise (conc "FAILED prerequisites or other issue" hed) 60) + (debug:print 0 *default-log-port* "WARNING: test " hed " has FAILED prerequisites or other issue. Internal state >" nth-try "< will be overridden and we'll retry.")) + ;; was: (mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "KEEP_TRYING" #f) + (mt:test-set-state-status-by-testname-unless-completed run-id test-name item-path "COMPLETED" "PREQ_FAIL" #f) + (hash-table-set! test-registry hed 'removed) ;; was 0 + (if (not (and (null? reg) (null? tal))) + (runs:loop-values tal reg reglen regfull reruns) + #f)))) + (else + (if (runs:lownoise (conc "FAILED prerequitests and we tried" hed) 60) + (debug:print 0 *default-log-port* "WARNING: test " hed " has FAILED prerequitests and we've tried at least 10 times to run it. Giving up now.")) + ;; (debug:print 0 *default-log-port* " prereqs: " prereqs-not-met) + (hash-table-set! test-registry hed 'removed) + (mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "TEN_STRIKES" #f) + ;; I'm unclear on if this roll up is needed - it may be the root cause of the "all set to FAIL" bug. + (rmt:set-state-status-and-roll-up-items run-id test-name item-path #f "FAIL" #f) ;; treat as FAIL + (list (if (null? tal)(car newtal)(car tal)) + tal + reg + reruns))))) + ;; ELSE: can't drop this - maybe running? Just keep trying + + ;;(if (not (or (not (null? reg))(not (null? tal)))) ;; old experiment + (let ((runable-tests (runs:runable-tests prereqs-not-met))) ;; SUSPICIOUS: Should look at more than just prereqs-not-met? + (if (null? runable-tests) + #f ;; I think we are truly done here + (runs:loop-values newtal reg reglen regfull reruns))) + ;;) ;;from old experiment + ) ;; end if (or (not (null? reg))(not (null? tal))) + )))))) + +;; scan a list of tests looking to see if any are potentially runnable +;; +(define (runs:runable-tests tests) + (filter (lambda (t) + (if (not (vector? t)) + t + (let ((state (db:test-get-state t)) + (status (db:test-get-status t))) + (case (string->symbol state) + ((COMPLETED INCOMPLETE) #f) + ((NOT_STARTED) + (if (member status '("TEN_STRIKES" "BLOCKED" "PREQ_FAIL" "ZERO_ITEMS" "PREQ_DISCARDED" "TIMED_OUT" )) + #f + t)) + ((DELETED) #f) + (else t))))) + tests)) + +;; move all the miscellanea into this struct +;; +(defstruct runs:gendat inc-results inc-results-last-update inc-results-fmt run-info runname target) + +(define *runs:general-data* + (make-runs:gendat + inc-results: (make-hash-table) + inc-results-last-update: 0 + inc-results-fmt: "~12a~12a~20a~12a~40a\n" ;; state status time duration test-name item-path + run-info: #f + runname: #f + target: #f + ) + ) + +(define (runs:incremental-print-results run-id) + (let ((curr-sec (current-seconds)) + (last-update (runs:gendat-inc-results-last-update *runs:general-data*))) + (if (> (- curr-sec last-update) 5) ;; at least five seconds since last update + (let* ((run-dat (or (runs:gendat-run-info *runs:general-data*)(rmt:get-run-info run-id))) + (runname (or (runs:gendat-runname *runs:general-data*) + (db:get-value-by-header (db:get-rows run-dat) + (db:get-header run-dat) "runname"))) + (target (or (runs:gendat-target *runs:general-data*)(rmt:get-target run-id))) + (testsdat (let ((res (rmt:get-tests-for-run + run-id "%" '() '() ;; run-id testpatt states statuses + #f #f ;; offset limit + #f ;; not-in + #f ;; sort-by + #f ;; sort-order + #f ;; get full data (not 'shortlist) + last-update + 'dashboard))) + (if (list? res) + res + (begin + (debug:print-error + 0 *default-log-port* + "FAILED TO GET DATA using rmt:get-tests-for-run. Notify developers if you see this. result: " res) + '()))))) + (runs:gendat-inc-results-last-update-set! *runs:general-data* (- curr-sec 1)) + (if (not (runs:gendat-run-info *runs:general-data*)) + (runs:gendat-run-info-set! *runs:general-data* run-dat)) + (if (not (runs:gendat-runname *runs:general-data*)) + (runs:gendat-runname-set! *runs:general-data* runname)) + (if (not (runs:gendat-target *runs:general-data*)) + (runs:gendat-target-set! *runs:general-data* target)) + (for-each + (lambda (testdat) + (let* ((test-id (db:test-get-id testdat)) + (prevdat (hash-table-ref/default (runs:gendat-inc-results *runs:general-data*) + (conc run-id "," test-id) #f)) + (test-name (db:test-get-testname testdat)) + (item-path (db:test-get-item-path testdat)) + (state (db:test-get-state testdat)) + (status (db:test-get-status testdat)) + (event-time (db:test-get-event_time testdat)) + (duration (db:test-get-run_duration testdat))) + (if (and (not (member state '("DELETED" "REMOTEHOSTSTART" "RUNNING" "LAUNCHED""NOT_STARTED"))) + (not (and prevdat + (equal? state (db:test-get-state prevdat)) + (equal? status (db:test-get-status prevdat))))) + (let ((fmt (runs:gendat-inc-results-fmt *runs:general-data*)) + (dtime (seconds->year-work-week/day-time event-time))) + (if (runs:lownoise "inc-print" 600) + (format #t fmt "State" "Status" "Start Time" "Duration" "Test path")) + ;; (debug:print 0 *default-log-port* "fmt: " fmt " state: " state " status: " status " test-name: " test-name " item-path: " item-path " dtime: " dtime) + ;; (debug:print 0 #f "event-time: " event-time " duration: " duration) + (format #t fmt + state + status + dtime + (seconds->hr-min-sec duration) + (conc "lt/" target "/" runname "/" test-name (if (string-null? item-path) "" (conc "/" item-path)))) + (hash-table-set! (runs:gendat-inc-results *runs:general-data*) (conc run-id "," test-id) testdat))))) + testsdat))) + + ;; I don't think this should be here? -- Matt + #;(runs:gendat-inc-results-last-update-set! *runs:general-data* (- curr-sec 10)) + + )) + +;; every time though the loop increment the test/itempatt val. +;; when the min is > max-allowed and none running then force exit +;; +(define *max-tries-hash* (make-hash-table)) + +(define (runs:pretty-long-list lst) + (if (> (length lst) 8)(append (take lst 3)(list "...")) lst)) + +(define *last-loop-time-ms* 0) + +;;====================================================================== +;; runs:run-tests-queue is called by runs:run-tests +;;====================================================================== +;; +;; test-records is a hash table testname:item_path => vector < testname testconfig waitons priority items-info ... > +(define (runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests reglen-in all-tests-registry) + ;; At this point the list of parent tests is expanded + ;; NB// Should expand items here and then insert into the run queue. + (debug:print 5 *default-log-port* "test-records: " test-records ", flags: " (hash-table->alist flags)) + + ;; Do mark-and-find clean up of db before starting runing of quue + ;; + ;; (rmt:find-and-mark-incomplete) + + (let* ((run-info (rmt:get-run-info run-id)) + (tests-info (mt:get-tests-for-run run-id #f '() '())) ;; qryvals: "id,testname,item_path")) + (sorted-test-names (tests:sort-by-priority-and-waiton test-records)) + (test-registry (make-hash-table)) + (registry-mutex (make-mutex)) + (num-retries 0) + (max-retries (configf:lookup *configdat* "setup" "maxretries")) + (max-concurrent-jobs (configf:lookup-number *configdat* "setup" "max_concurrent_jobs" default: 50)) + (reglen (if (number? reglen-in) reglen-in 1)) + (last-time-incomplete (- (current-seconds) 900)) ;; force at least one clean up cycle + (last-time-some-running (current-seconds)) + (incoming-tests '()) ;; queue up incoming tests here to tack on to tal when it gets low + ;; (tdbdat (tasks:open-db)) + (runsdat (make-runs:dat + ;; hed: hed + ;; tal: tal + ;; reg: reg + ;; reruns: reruns + reglen: reglen + regfull: #f ;; regfull + ;; test-record: test-record + runname: runname + ;; test-name: test-name + ;; item-path: item-path + ;; jobgroup: jobgroup + max-concurrent-jobs: max-concurrent-jobs + run-id: run-id + ;; waitons: waitons + ;; testmode: testmode + test-patts: test-patts + required-tests: required-tests + test-registry: test-registry + registry-mutex: registry-mutex + flags: flags + keyvals: keyvals + run-info: run-info + ;; newtal: newtal + all-tests-registry: all-tests-registry + ;; itemmaps: itemmaps + ;; prereqs-not-met: (rmt:get-prereqs-not-met run-id waitons hed item-path mode: testmode itemmaps: itemmaps) + ;; can-run-more-tests: (runs:can-run-more-tests run-id jobgroup max-concurrent-jobs) ;; look at the test jobgroup and tot jobs running + ))) + + ;; Initialize the test-registery hash with tests that already have a record + ;; convert state to symbol and use that as the hash value + (for-each (lambda (trec) + (let ((id (db:test-get-id trec)) + (tn (db:test-get-testname trec)) + (ip (db:test-get-item-path trec)) + (st (db:test-get-state trec))) + (if (not (equal? st "DELETED")) + (hash-table-set! test-registry (db:test-make-full-name tn ip) (string->symbol st))))) + tests-info) + (set! max-retries (if (and max-retries (string->number max-retries))(string->number max-retries) 100)) + + (let loop ((hed (car sorted-test-names)) + (tal (cdr sorted-test-names)) + (reg '()) ;; registered, put these at the head of tal + (reruns '())) + + (runs:incremental-print-results run-id) + + (if (not (null? reruns))(debug:print-info 4 *default-log-port* "reruns=" reruns)) + + ;; Here we mark any old defunct tests as incomplete. Do this every fifteen minutes + ;; moving this to a parallel thread and just run it once. + ;; + (if (> (current-seconds)(+ last-time-incomplete 900)) + (begin + (set! last-time-incomplete (current-seconds)) + ;; (rmt:find-and-mark-incomplete-all-runs) + )) + + ;; (print "Top of loop, hed=" hed ", tal=" tal " ,reruns=" reruns) + (let* ((test-record (hash-table-ref test-records hed)) + (test-name (tests:testqueue-get-testname test-record)) + (tconfig (tests:testqueue-get-testconfig test-record)) + (jobgroup (configf:lookup tconfig "test_meta" "jobgroup")) + (testmode (let ((m (configf:lookup tconfig "requirements" "mode"))) + (if m (map string->symbol (string-split m)) '(normal)))) + (itemmaps (tests:get-itemmaps tconfig)) ;; (configf:lookup tconfig "requirements" "itemmap")) + (priority (tests:testqueue-get-priority test-record)) + (itemdat (tests:testqueue-get-itemdat test-record)) ;; itemdat can be a string, list or #f + (items (tests:testqueue-get-items test-record)) + (item-path (item-list->path itemdat)) + (tfullname (db:test-make-full-name test-name item-path)) + ;; these are hard coded item-item waits test/item-path => test/item-path2 ... + (extra-waits (let* ((section (configf:get-section (tests:testqueue-get-testconfig test-record) "waitons")) + (myextra (alist-ref tfullname section equal?))) + (if myextra + (let ((extras (string-split (car myextra)))) + (if (runs:lownoise (conc tfullname "extra-waitons" tfullname) 60) + (debug:print-info 0 *default-log-port* "HAVE EXTRA WAITONS for test " tfullname ": " myextra)) + (for-each + (lambda (extra) + ;; (debug:print 0 *default-log-port* "FYI: extra = " extra " reruns = " reruns) + (let ((basetestname (car (string-split extra "/")))) + #;(if (not (member extra tal)) + (set! reruns (append tal (list extra)))) + (if (not (member basetestname tal)) + (set! reruns (append tal (list basetestname)))) + )) + extras) + extras) + '()))) + (waitons (delete-duplicates (append (tests:testqueue-get-waitons test-record) extra-waits) equal?)) + (newtal (append tal (list hed))) + (regfull (>= (length reg) reglen)) + (num-running (rmt:get-count-tests-running-for-run-id run-id)) + (testdat (make-runs:testdat + hed: hed + tal: tal + reg: reg + reruns: reruns + test-record: test-record + test-name: test-name + item-path: item-path + jobgroup: jobgroup + waitons: waitons + testmode: testmode + newtal: newtal + itemmaps: itemmaps + prereqs-not-met: '() + ))) + + ;; too-tight loop detection and delay, this might hide issues + ;; that occur in long run times. Consider commenting when debugging + ;; + (if (and (>= num-running max-concurrent-jobs) + (< (- (current-milliseconds) *last-loop-time-ms*) 500)) + (begin + (if (runs:lownoise "too-tight-loop" 5) + (debug:print-info 2 *default-log-port* "Excessively fast loop, delaying 1/2 second")) + (thread-sleep! 0.5))) + (set! *last-loop-time-ms* (current-milliseconds)) + + (runs:dat-regfull-set! runsdat regfull) + + + (if (> (- (current-seconds) *last-test-launch*) 5) ;; be pretty aggressive for five seconds after + (runs:too-soon-delay (conc "loop delay " hed) 1 0.6) ;; starting a test then apply more delay + (runs:too-soon-delay (conc "loop delay " hed) 1 0.1)) + + (if (> num-running 0) + (set! last-time-some-running (current-seconds))) + + (if (> (current-seconds)(+ last-time-some-running (or (configf:lookup *configdat* "setup" "give-up-waiting") 36000))) + (hash-table-set! *max-tries-hash* tfullname (+ (hash-table-ref/default *max-tries-hash* tfullname 0) 1))) + ;; (debug:print 0 *default-log-port* "max-tries-hash: " (hash-table->alist *max-tries-hash*)) + + ;; Ensure all top level tests get registered. This way they show up as "NOT_STARTED" on the dashboard + ;; and it is clear they *should* have run but did not. + (if (not (hash-table-ref/default test-registry (db:test-make-full-name test-name "") #f)) + (begin + (rmt:register-test run-id test-name "") + (hash-table-set! test-registry (db:test-make-full-name test-name "") 'done))) + + ;; Fast skip of tests that are already "COMPLETED" - NO! Cannot do that as the items may not have been expanded yet :( + ;; + (if (member (hash-table-ref/default test-registry tfullname #f) + '(DONOTRUN removed)) ;; *common:cant-run-states-sym*) ;; '(COMPLETED KILLED WAIVED UNKNOWN INCOMPLETE)) + (begin + (if (runs:lownoise (conc "been marked do not run " tfullname) 60) + (debug:print-info 0 *default-log-port* "Skipping test " tfullname " as it has been marked do not run due to being completed or not runnable")) + (if (or (not (null? tal))(not (null? reg))) + (loop (runs:queue-next-hed tal reg reglen regfull) + (runs:queue-next-tal tal reg reglen regfull) + (runs:queue-next-reg tal reg reglen regfull) + reruns)))) + ;; (loop (car tal)(cdr tal) reg reruns)))) + (runs:incremental-print-results run-id) + (debug:print 4 *default-log-port* "TOP OF LOOP => " + "test-name: " test-name + "\n hed: " hed + "\n tal: " (runs:pretty-long-list tal) + "\n reg: " reg + "\n test-record " test-record + "\n itemdat: " itemdat + "\n items: " items + "\n item-path: " item-path + "\n waitons: " waitons + "\n num-retries: " num-retries + "\n reruns: " reruns + "\n regfull: " regfull + "\n reglen: " reglen + "\n length reg: " (length reg) + ) + + ;; (runs:parallel-runners-mgmt runsdat) + + ;; check for hed in waitons => this would be circular, remove it and issue an + ;; error + (if (member test-name waitons) + (begin + (debug:print-error 0 *default-log-port* "test " test-name " has listed itself as a waiton, please correct this!") + (set! waiton (filter (lambda (x)(not (equal? x hed))) waitons)))) + + (cond + + ;; We want to catch tests that have waitons that are NOT in the queue and discard them IFF + ;; they have been through the wringer 10 or more times + ((and (list? waitons) + (not (null? waitons)) + (> (hash-table-ref/default *max-tries-hash* tfullname 0) 10) + (not (null? (filter + number? + (map (lambda (waiton) + (if (and (not (member waiton tal)) ;; this waiton is not in the list to be tried to run + (not (member waiton reruns))) + 1 + #f)) + waitons))))) ;; could do this more elegantly with a marker.... + (debug:print-info 4 *default-log-port* "cond branch - " "rtq-1") + (debug:print 0 *default-log-port* "WARNING: Marking test " tfullname " as not runnable. It is waiting on tests that cannot be run. Giving up now.") + (hash-table-set! test-registry tfullname 'removed)) + + ;; items is #f then the test is ok to be handed off to launch (but not before) + ;; + ((not items) + (debug:print-info 4 *default-log-port* "cond branch - " "rtq-2") + (debug:print-info 4 *default-log-port* "OUTER COND: (not items)") + (if (and (not (tests:match test-patts (tests:testqueue-get-testname test-record) item-path required: required-tests)) + (not (null? tal))) + (loop (car tal)(cdr tal) reg reruns)) + + ;; gonna try a strategy change here. + ;; + ;; check if can run more tests. if yes, continue, if no, rest until can run more + ;; look at the test jobgroup and tot jobs running + ;; + ;; NOTE: This does NOT actually gate here, only captures the proc to be called later + ;; + (if (not (runs:dat-wait-for-jobs-function runsdat)) + (runs:dat-wait-for-jobs-function-set! + runsdat + (lambda (testdat-in) + (let* ((jobgroup (runs:testdat-jobgroup testdat-in)) + (can-run-more-tests (runs:dat-can-run-more-tests runsdat)) + (last-jobs-check-time (runs:dat-last-jobs-check-time runsdat)) + (should-check-jobs (match can-run-more-tests + ((can-run-more-flag num-running nr-in-jobgroup max-concurrent-jobs . params) + (if (< (- max-concurrent-jobs num-running) 25) + (begin + (debug:print-info 2 *default-log-port* + "less than 20 jobs headroom, ("max-concurrent-jobs + "-"num-running")>20. Forcing prelaunch check.") + #t) + #f)) + (else #f)))) ;; no record yet + (if should-check-jobs + (let loop-can-run-more + ((res (runs:can-run-more-tests runsdat run-id jobgroup max-concurrent-jobs)) + (remtries 1440)) ;; we can wait for up to two hours for jobs to get done + (match res + ((run-more num-running . rem) + (if (or run-more + (< remtries 1)) + (begin + (if (runs:lownoise "num-running" 30) + (debug:print-info 0 *default-log-port* "Have "num-running" tests of max " max-concurrent-jobs)) + (runs:dat-can-run-more-tests-set! runsdat res)) ;; capture the result and then drop through + (begin + (if (runs:lownoise "num-running" 10) + (debug:print-info 0 *default-log-port* "Can't run more tests, have "num-running" tests of " + max-concurrent-jobs " allowed.")) + (thread-sleep! 5) ;; if we've hit max concurrent jobs take a breather, nb// make this configurable + + ;; wait for load here + (if (runs:dat-load-mgmt-function runsdat)((runs:dat-load-mgmt-function runsdat))) + (loop-can-run-more (runs:can-run-more-tests runsdat run-id jobgroup max-concurrent-jobs) + (- remtries 1))))))) + ))))) + + ;; I'm not clear on why prereqs are gathered here TODO: verfiy this is needed + (runs:testdat-prereqs-not-met-set! testdat (rmt:get-prereqs-not-met run-id waitons hed item-path mode: testmode itemmaps: itemmaps)) + + ;; I'm not clear on why we'd capture running job counts here TODO: verify this is needed + (runs:dat-can-run-more-tests-set! runsdat (runs:can-run-more-tests runsdat run-id jobgroup max-concurrent-jobs)) + + (let ((loop-list (runs:process-expanded-tests runsdat testdat))) ;; in process-expanded-tests ultimately run:test -> launch-test -> test actually running + (if loop-list (apply loop loop-list)))) + + ;; items processed into a list but not came in as a list been processed + ;; + ((and (list? items) ;; thus we know our items are already calculated + (not itemdat)) ;; and not yet expanded into the list of things to be done + (debug:print-info 4 *default-log-port* "cond branch - " "rtq-3") + (debug:print-info 4 *default-log-port* "OUTER COND: (and (list? items)(not itemdat))") + ;; Must determine if the items list is valid. Discard the test if it is not. + (if (and (list? items) + (> (length items) 0) + (and (list? (car items)) + (> (length (car items)) 0)) + (debug:debug-mode 1)) + (debug:print 2 *default-log-port* (map (lambda (row) + (conc (string-intersperse + (map (lambda (varval) + (string-intersperse varval "=")) + row) + " ") + "\n")) + items))) + + (let* ((items-in-testpatt + (filter + (lambda (my-itemdat) + (tests:match test-patts hed (item-list->path my-itemdat) )) + ;; was: (tests:match test-patts hed (item-list->path my-itemdat) required: required-tests)) + items) )) + (if (null? items-in-testpatt) + (debug:print-error 0 *default-log-port* "Test " (tests:testqueue-get-testname test-record) " is itemized but has no items matching the test pattern") + + (for-each (lambda (my-itemdat) + (let* ((new-test-record (let ((newrec (make-tests:testqueue))) + (vector-copy! test-record newrec) + newrec)) + (my-item-path (item-list->path my-itemdat)) + + (newtestname (db:test-make-full-name hed my-item-path))) ;; test names are unique on testname/item-path + (tests:testqueue-set-items! new-test-record #f) + (tests:testqueue-set-itemdat! new-test-record my-itemdat) + (tests:testqueue-set-item_path! new-test-record my-item-path) + (hash-table-set! test-records newtestname new-test-record) + ;; BUG: This next line sucks up a lot of horsepower + ;; (set! tal (append tal (list newtestname))) + ;; (set! tal (cons newtestname tal)) ;; 4/6/2023 - try using cons, does it matter if the test gets added at the beginning? + (set! incoming-tests (cons newtestname incoming-tests)) + )) ;; since these are itemized create new test names testname/itempath + items-in-testpatt))) + + (if (and (< (length tal) 20) + (not (null? incoming-tests))) + (begin + (set! tal (append tal (reverse incoming-tests))) + (set! incoming-tests '()))) + + ;; At this point we have possibly added items to tal but all must be handed off to + ;; INNER COND logic. I think loop without rotating the queue + ;; (loop hed tal reg reruns)) + ;; (let ((newtal (append tal (list hed)))) ;; We should discard hed as it has been expanded into it's items? Yes, but only if this *is* an itemized test + ;; (loop (car newtal)(cdr newtal) reg reruns) + (if (null? tal) + #f + (loop (car tal)(cdr tal) reg reruns))) + + ;; if items is a proc then need to run items:get-items-from-config, get the list and loop + ;; - but only do that if resources exist to kick off the job + ;; EXPAND ITEMS + ((or (procedure? items)(eq? items 'have-procedure)) + (debug:print-info 4 *default-log-port* "cond branch - " "rtq-4") + (let ((can-run-more #f)) ;; (runs:can-run-more-tests runsdat run-id jobgroup max-concurrent-jobs))) + (if (not can-run-more) #;(and (list? can-run-more) + (car can-run-more)) + (let ((loop-list (runs:expand-items hed tal reg reruns regfull newtal jobgroup max-concurrent-jobs run-id waitons item-path testmode test-record can-run-more items runname tconfig reglen test-registry test-records itemmaps))) ;; itemized test expanded here + (if loop-list + (apply loop loop-list) + (debug:print-info 4 *default-log-port* " -- Can't expand hed="hed) + ) + ) + ;; if can't run more just loop with next possible test + (loop (car newtal)(cdr newtal) reg reruns)))) + + + ;; this case should not happen, added to help catch any bugs + ((and (list? items) itemdat) + (debug:print-info 4 *default-log-port* "cond branch - " "rtq-5") + (debug:print-error 0 *default-log-port* "Should not have a list of items in a test and the itemspath set - please report this") + (exit 1)) + + ((not (null? reruns)) + (debug:print-info 4 *default-log-port* "cond branch - " "rtq-6") + (let* ((newlst (tests:filter-non-runnable run-id tal test-records)) ;; i.e. not FAIL, WAIVED, INCOMPLETE, PASS, KILLED, + (junked (lset-difference equal? tal newlst))) + (debug:print-info 4 *default-log-port* "full drop through, if reruns is less than 100 we will force retry them, reruns=" reruns ", tal=" tal) + (if (< num-retries max-retries) + (set! newlst (append reruns newlst))) + (set! num-retries (+ num-retries 1)) + ;; (thread-sleep! (+ 1 *global-delta*)) + (if (not (null? newlst)) + ;; since reruns have been tacked on to newlst create new reruns from junked + (loop (car newlst)(cdr newlst) reg (delete-duplicates junked))))) + + ((not (null? tal)) + (debug:print-info 4 *default-log-port* "cond branch - " "rtq-7") + (debug:print-info 4 *default-log-port* "I'm pretty sure I shouldn't get here.")) + ((not (null? reg)) ;; could we get here with leftovers? + (debug:print-info 4 *default-log-port* "cond branch - " "rtq-8") + (debug:print-info 0 *default-log-port* "Have leftovers!") + (loop (car reg)(cdr reg) '() reruns)) + (else + (debug:print-info 4 *default-log-port* "cond branch - " "rtq-9") + (debug:print-info 4 *default-log-port* "Exiting loop with...\n hed=" hed "\n tal=" tal "\n reruns=" reruns)) + ))) ;; end loop on sorted test names + ;; this is the point where everything is launched and now you can mark the run in metadata table as all launched + (rmt:set-var (conc "lunch-complete-" run-id) "yes") + + ;; now *if* -run-wait we wait for all tests to be done + ;; Now wait for any RUNNING tests to complete (if in run-wait mode) + ;; (if (runs:dat-load-mgmt-function runsdat)((runs:dat-load-mgmt-function runsdat))) + (thread-sleep! 0.1) ;; I think there is a race condition here. Let states/statuses settle + + (let wait-loop ((num-running (rmt:get-count-tests-running-for-run-id run-id)) + (prev-num-running 0)) + ;; (debug:print-info 13 *default-log-port* "num-running=" num-running ", prev-num-running=" prev-num-running) + (if (and (or (args:get-arg "-run-wait") + (equal? (configf:lookup *configdat* "setup" "run-wait") "yes")) + (> num-running 0)) + (begin + ;; Here we mark any old defunct tests as incomplete. Do this every fifteen minutes + ;; (debug:print 0 *default-log-port* "Got here eh! num-running=" num-running " (> num-running 0) " (> num-running 0)) + (if (> (current-seconds)(+ last-time-incomplete 900)) + (let ((actual-num-running (rmt:get-count-tests-running-for-run-id run-id))) + (debug:print-info 0 *default-log-port* "Marking stuck tests as INCOMPLETE while waiting for run " run-id + ". Running as pid " (current-process-id) " on " (get-host-name)) + (set! last-time-incomplete (current-seconds)) ;; FIXME, this might be causing slow down - use of set! + (rmt:find-and-mark-incomplete run-id #f) + (debug:print-info 0 *default-log-port* "run-wait specified, waiting on " actual-num-running + " tests in RUNNING, REMOTEHOSTSTART or LAUNCHED state at " + (time->string (seconds->local-time (current-seconds)))))) + ;; (if (runs:dat-load-mgmt-function runsdat)((runs:dat-load-mgmt-function runsdat))) + (thread-sleep! 5) ;; (if (>= num-running max-concurrent-jobs) 5 1)) + (wait-loop (rmt:get-count-tests-running-for-run-id run-id) + num-running)))) + ;; LET* ((test-record + ;; we get here on "drop through". All done! + ;; this is moved to runs:run-testes since this function is getting called twice to ensure everthing is completed. + ;; (debug:print-info 0 *default-log-port* "Calling Post Hook") + ;; (runs:run-post-hook run-id) + (debug:print-info 1 *default-log-port* "All tests launched"))) + +(define (runs:calc-fails prereqs-not-met) + (filter (lambda (test) + (and (vector? test) ;; not (string? test)) + (member (db:test-get-state test) '("INCOMPLETE" "COMPLETED")) ;; TODO: pull from *common:stuff...* + (not (member (db:test-get-status test) + '("PASS" "WARN" "WAIVED" "SKIP"))))) + prereqs-not-met)) + +(define (runs:calc-prereq-fail prereqs-not-met) ;; REMOVEME since NOT_STARTED/PREQ_FAIL is now COMPLETED/PREQ_FAIL + (filter (lambda (test) + (and (vector? test) ;; not (string? test)) + (equal? (db:test-get-state test) "NOT_STARTED") + (not (member (db:test-get-status test) + '("n/a" "KEEP_TRYING"))))) + prereqs-not-met)) + +(define (runs:calc-not-completed prereqs-not-met) + (filter + (lambda (t) + (or (not (vector? t)) + (not (member (db:test-get-state t) '("INCOMPLETE" "COMPLETED"))))) + prereqs-not-met)) + +;; (define (runs:calc-not-completed prereqs-not-met) +;; (filter +;; (lambda (t) +;; (or (not (vector? t)) +;; (not (equal? "COMPLETED" (db:test-get-state t))))) +;; prereqs-not-met)) + +(define (runs:calc-runnable prereqs-not-met) + (filter + (lambda (t) + (or (not (vector? t)) + (and (equal? "NOT_STARTED" (db:test-get-state t)) + (member (db:test-get-status t) + '("n/a" "KEEP_TRYING"))) + (and (equal? "RUNNING" (db:test-get-state t))))) ;; account for a test that is running + prereqs-not-met)) + +(define (runs:pretty-string lst) + (map (lambda (t) + (if (not (vector? t)) + (conc t) + (conc (db:test-get-testname t) ":" (db:test-get-state t) "/" (db:test-get-status t)))) + lst)) + +;; parent-test is there as a placeholder for when parent-tests can be run as a setup step +;; +(define (run:test run-id run-info keyvals runname test-record flags parent-test test-registry all-tests-registry runsdat testdat-rec) + ;; All these vars might be referenced by the testconfig file reader + ;; + ;; NEED to reprocess testconfig here, ensuring that item variables are available. + ;; This is for Tal's issue with item-specific env vars not being set for use in skip. + ;; HSD https://hsdes.intel.com/appstore/icf/index.html#/article?articleId=1408763273 + ;; Also later HSD https://hsdes.intel.com/appstore/article/#/14012138487 + ;; + (let* ((test-name (tests:testqueue-get-testname test-record)) + (test-waitons (tests:testqueue-get-waitons test-record)) + (itemdat (tests:testqueue-get-itemdat test-record)) + (item-path "") + (db #f) + (full-test-name #f)) + + ;; setting itemdat to a list if it is #f + (if (not itemdat)(set! itemdat '())) + (set! item-path (item-list->path itemdat)) + (set-item-env-vars itemdat) + (set! full-test-name (db:test-make-full-name test-name item-path)) + (runs:set-megatest-env-vars run-id inrunname: runname testname: test-name itempath: item-path) ;; these may be needed by the launching process + + (let* ((test-conf ;; re-instated the tests:get-testconfig to fix HSD https://hsdes.intel.com/appstore/article/#/14012138487, need to be able to skip using [items], [itemstable] variables. + ;; (tests:testqueue-get-testconfig test-record )) ;; vector-ref test-record 3 + (tests:get-testconfig test-name item-path all-tests-registry #t force-create: #t)) + (test-path (hash-table-ref all-tests-registry test-name)) ;; (conc *toppath* "/tests/" test-name)) ;; could use tests:get-testconfig here ... + + (force (hash-table-ref/default flags "-force" #f)) + (rerun (hash-table-ref/default flags "-rerun" #f)) + (keepgoing (hash-table-ref/default flags "-keepgoing" #f)) + (incomplete-timeout (string->number (or (configf:lookup *configdat* "setup" "incomplete-timeout") "x"))) + ) + + (debug:print-info 4 *default-log-port* + "\nTESTNAME: " full-test-name + "\n test-config: " (hash-table->alist test-conf) + "\n itemdat: " itemdat + ) + (debug:print 2 *default-log-port* "Attempting to launch test " full-test-name) + ;; (setenv "MT_TEST_NAME" test-name) ;; + ;; (setenv "MT_ITEMPATH" item-path) + ;; (setenv "MT_RUNNAME" runname) + (change-directory *toppath*) + + ;; Here is where the test_meta table is best updated + ;; Yes, another use of a global for caching. Need a better way? + ;; + ;; There is now a single call to runs:update-all-test_meta and this + ;; per-test call is not needed. Given the delicacy of the move to + ;; v1.55 this code is being left in place for the time being. + ;; + (if (not (hash-table-exists? *test-meta-updated* test-name)) + (begin + (hash-table-set! *test-meta-updated* test-name #t) + (runs:update-test_meta test-name test-conf))) + + ;; itemdat => ((ripeness "overripe") (temperature "cool") (season "summer")) + (let* ((new-test-path (string-intersperse (cons test-path (map cadr itemdat)) "/")) + (test-id (rmt:get-test-id run-id test-name item-path)) + (testdat (if test-id (rmt:get-test-info-by-id run-id test-id) #f))) + (if (not testdat) + (let loop () + ;; ensure that the path exists before registering the test + ;; NOPE: Cannot! Don't know yet which disk area will be assigned.... + ;; (system (conc "mkdir -p " new-test-path)) + ;; + ;; (open-run-close tests:register-test db run-id test-name item-path) + ;; + ;; NB// for the above line. I want the test to be registered long before this routine gets called! + ;; + (if (not test-id)(set! test-id (rmt:get-test-id run-id test-name item-path))) + (if (not test-id) + (begin + (debug:print 2 *default-log-port* "WARN: Test not pre-created? test-name=" test-name ", item-path=" item-path ", run-id=" run-id) + (rmt:register-test run-id test-name item-path) + (set! test-id (rmt:get-test-id run-id test-name item-path)))) + (debug:print-info 4 *default-log-port* "test-id=" test-id ", run-id=" run-id ", test-name=" test-name ", item-path=\"" item-path "\"") + (set! testdat (rmt:get-test-info-by-id run-id test-id)) + (if (not testdat) + (begin + (debug:print-info 0 *default-log-port* "WARNING: server is overloaded, trying again in two seconds") + ;; (if (runs:dat-load-mgmt-function runsdat)((runs:dat-load-mgmt-function runsdat))) + (thread-sleep! 2) + (loop))))) + (if (not testdat) ;; should NOT happen + (debug:print-error 0 *default-log-port* "failed to get test record for test-id " test-id)) + (set! test-id (db:test-get-id testdat)) + (if (common:file-exists? test-path) + (change-directory test-path) + (begin + (debug:print-error 0 *default-log-port* "test run path not created before attempting to run the test. Perhaps you are running -remove-runs at the same time?") + (change-directory *toppath*))) + (case (if force ;; (args:get-arg "-force") + 'NOT_STARTED + (if testdat + (string->symbol (test:get-state testdat)) + 'failed-to-insert)) + ((failed-to-insert) + (debug:print-error 0 *default-log-port* "Failed to insert the record into the db")) + ((NOT_STARTED COMPLETED DELETED INCOMPLETE) + (let ((runflag #f)) + (cond + ;; -force, run no matter what + (force (set! runflag #t)) + ;; NOT_STARTED, run no matter what + ((member (test:get-state testdat) '("DELETED" "NOT_STARTED" "INCOMPLETE"))(set! runflag #t)) + ;; not -rerun and PASS, WARN or CHECK, do no run + ((and (or (not rerun) + keepgoing) + ;; Require to force re-run for COMPLETED or *anything* + PASS,WARN or CHECK + (or (member (test:get-status testdat) '("PASS" "WARN" "CHECK" "SKIP" "WAIVED")) + (member (test:get-state testdat) '("COMPLETED")))) + (debug:print-info 2 *default-log-port* "running test " test-name "/" item-path " suppressed as it is " (test:get-state testdat) " and " (test:get-status testdat)) + (hash-table-set! test-registry full-test-name 'DONOTRUN) ;; COMPLETED) + (set! runflag #f)) + + ;; -rerun and status is one of the specifed, run it + ((and rerun + (let* ((rerunlst (string-split rerun ",")) + (must-rerun (member (test:get-status testdat) rerunlst))) + (debug:print-info 3 *default-log-port* "-rerun list: " rerun ", test-status: " (test:get-status testdat)", must-rerun: " must-rerun) + must-rerun)) + (debug:print-info 2 *default-log-port* "Rerun forced for test " test-name "/" item-path) + (set! runflag #t) + (debug:print-info 2 *default-log-port* "Calling rerun hook") + (runs:rerun-hook test-id new-test-path testdat rerun) + ) + + + + ;; -keepgoing, do not rerun FAIL + ((and keepgoing + (member (test:get-status testdat) '("FAIL"))) + (set! runflag #f)) + + ((and (not rerun) + (member (test:get-status testdat) '("FAIL" "n/a"))) + (set! runflag #t)) + + (else (set! runflag #f))) + (debug:print 4 *default-log-port* "RUNNING => runflag: " runflag " STATE: " (test:get-state testdat) " STATUS: " (test:get-status testdat)) + (if (not runflag) + (if (not parent-test) + (if (runs:lownoise (conc "not starting test" full-test-name) 60) + (debug:print 3 *default-log-port* "NOTE: Not starting test " full-test-name " as it is state \"" (test:get-state testdat) + "\" and status \"" (test:get-status testdat) "\", use -rerun \"" (test:get-status testdat) + "\" or -force to override"))) + ;; NOTE: No longer be checking prerequisites here! Will never get here unless prereqs are + ;; already met. + ;; This would be a great place to do the process-fork + ;; + (let ((skip-test #f) + (skip-check (configf:get-section test-conf "skip"))) + (cond + ;; Have to check for skip conditions. This one skips if there are same-named tests + ;; currently running + ((and skip-check + (configf:lookup test-conf "skip" "prevrunning")) + ;; run-ids = #f means *all* runs + (let ((running-tests (rmt:get-tests-for-runs-mindata #f full-test-name '("RUNNING" "REMOTEHOSTSTART" "LAUNCHED") '() #f))) + (if (not (null? running-tests)) ;; have to skip + (set! skip-test "Skipping due to previous tests running")))) + + ;; split the string and OR of file-exists? + ((and skip-check + (configf:lookup test-conf "skip" "fileexists")) + (let* ((files (string-split (configf:lookup test-conf "skip" "fileexists"))) + (existing (filter common:file-exists? files))) + (if (not (null? existing)) ;; (common:file-exists? (configf:lookup test-conf "skip" "fileexists")) + (set! skip-test (conc "Skipping due to existance of file(s) " (string-intersperse existing ", ")))))) ;; (configf:lookup test-conf "skip" "fileexists"))))) + + ((and skip-check + (configf:lookup test-conf "skip" "filenotexists")) + (let* ((files (string-split (configf:lookup test-conf "skip" "filenotexists"))) + (existing (filter common:file-exists? files))) + (if (null? existing) ;; (common:file-exists? (configf:lookup test-conf "skip" "filenotexists"))) + (set! skip-test (conc "Skipping due to non existance of files " (string-intersperse files ", ")))))) ;; (configf:lookup test-conf "skip" "filenotexists"))))) + + ((and skip-check + (configf:lookup test-conf "skip" "script")) + (if (= (system (configf:lookup test-conf "skip" "script")) 0) + (set! skip-test (conc "Skipping due to zero return value of script " (configf:lookup test-conf "skip" "script"))))) + + ((and skip-check + (configf:lookup test-conf "skip" "rundelay")) + ;; run-ids = #f means *all* runs + (let* ((numseconds (common:hms-string->seconds (configf:lookup test-conf "skip" "rundelay"))) + (running-tests (rmt:get-tests-for-runs-mindata #f full-test-name '("RUNNING" "REMOTEHOSTSTART" "LAUNCHED") '() #f)) + (completed-tests (rmt:get-tests-for-runs-mindata #f full-test-name '("COMPLETED" "INCOMPLETE") '("PASS" "FAIL" "ABORT") #f)) ;; ironically INCOMPLETE is same as COMPLETED in this contex + (last-run-times (map db:mintest-get-event_time completed-tests)) + (time-since-last (- (current-seconds) (if (null? last-run-times) 0 (common:max last-run-times))))) + (if (or (not (null? running-tests)) ;; have to skip if test is running + (> numseconds time-since-last)) + (set! skip-test (conc "Skipping due to previous test run less than " (configf:lookup test-conf "skip" "rundelay") " ago")))))) + + (if skip-test + (begin + (mt:test-set-state-status-by-id run-id test-id "COMPLETED" "SKIP" skip-test) + (debug:print-info 1 *default-log-port* "SKIPPING Test " full-test-name " due to " skip-test)) + ;; + ;; Here the test is handed off to launch.scm for launch-test to complete the launch process + ;; + (begin + ;; wait for less than max jobs here + (if (runs:dat-wait-for-jobs-function runsdat) + ((runs:dat-wait-for-jobs-function runsdat) testdat-rec)) + + (if (not (launch-test test-id run-id run-info keyvals runname test-conf test-name test-path itemdat flags)) + (begin + (debug:print 0 *default-log-port* "ERROR: Failed to launch the test. Exiting as soon as possible") + (set! *globalexitstatus* 1) ;; + (process-signal (current-process-id) signal/kill)) + ) + ;; wait again here? + )))))) + ((KILLED) + (debug:print 1 *default-log-port* "NOTE: " full-test-name " is already running or was explictly killed, use -force to launch it.") + (hash-table-set! test-registry (db:test-make-full-name test-name test-path) 'DONOTRUN)) ;; KILLED)) + ((LAUNCHED REMOTEHOSTSTART RUNNING) + (debug:print 2 *default-log-port* "NOTE: " test-name " is already running")) + ;; (if (> (- (current-seconds)(+ (db:test-get-event_time testdat) + ;; (db:test-get-run_duration testdat))) + ;; (or incomplete-timeout + ;; 6000)) ;; i.e. no update for more than 6000 seconds + ;; (begin + ;; (debug:print 0 *default-log-port* "WARNING: Test " test-name " appears to be dead. Forcing it to state INCOMPLETE and status STUCK/DEAD") + ;; (tests:test-set-status! run-id test-id "INCOMPLETE" "STUCK/DEAD" "" #f)) + ;; ;; (tests:test-set-status! test-id "INCOMPLETE" "STUCK/DEAD" "" #f)) + ;; (debug:print 2 *default-log-port* "NOTE: " test-name " is already running"))) + (else + (debug:print-error 0 *default-log-port* "Failed to launch test " full-test-name ". Unrecognised state " (test:get-state testdat)) + (case (string->symbol (test:get-state testdat)) + ((COMPLETED INCOMPLETE) + (hash-table-set! test-registry (db:test-make-full-name test-name test-path) 'DONOTRUN)) + (else + (hash-table-set! test-registry (db:test-make-full-name test-name test-path) 'DONOTRUN))))))))) + +;;====================================================================== +;; END OF NEW STUFF +;;====================================================================== + +(define (get-dir-up-n dir . params) + (let ((dparts (string-split dir "/")) + (count (if (null? params) 1 (car params)))) + (conc "/" (string-intersperse + (take dparts (- (length dparts) count)) + "/")))) + +(define (runs:recursive-delete-with-error-msg real-dir) + (if (> (system (conc "rm -rf " real-dir)) 0) + (begin + ;; FAILED, possibly due to permissions, do chmod a+rwx then try one more time + (system (conc "chmod -R a+rwx " real-dir)) + (if (> (system (conc "rm -rf " real-dir)) 0) + (debug:print-error 0 *default-log-port* "There was a problem removing " real-dir " with rm -f"))))) + +(define (runs:safe-delete-test-dir real-dir) + ;; first delete all sub-directories + (directory-fold + (lambda (f x) + (let ((fullname (conc real-dir "/" f))) + (if (directory? fullname)(runs:recursive-delete-with-error-msg fullname))) + (+ 1 x)) + 0 real-dir) + ;; then files other than *testdat.db* + (directory-fold + (lambda (f x) + (let ((fullname (conc real-dir "/" f))) + (if (not (string-search (regexp "testdat.db") f)) + (runs:recursive-delete-with-error-msg fullname))) + (+ 1 x)) + 0 real-dir #t) + ;; then the entire directory + (runs:recursive-delete-with-error-msg real-dir)) + +;; cleanup often needs to remove all but the last N runs per target +;; +;; target-patts a1/b1/c1,a2/b2/c2 ... +;; +;; This will fail if called with empty target or a bad target (i.e. missing or extra fields) +;; +(define (runs:get-hash-by-target target-patts runpatt) + (let* ((targets (string-split target-patts ",")) + (keys (rmt:get-keys)) + (res-ht (make-hash-table))) ;; target -> ( runrecord1 runrecord2 ... ) + (for-each + (lambda (target-patt) + (let ((runs (rmt:simple-get-runs runpatt #f #f target-patt #f))) + (for-each + (lambda (run) + (let ((target (simple-run-target run))) + (hash-table-set! res-ht target (cons run (hash-table-ref/default res-ht target '()))))) + runs))) + targets) + res-ht)) + +;; delete runs older than X (weeks, days, months years etc.) +;; delete redundant runs within a target - N is the input +;; delete redundant runs within a target IFF older than given date/time AND keep at least N +;; +(define (runs:remove-all-but-last-n-runs-per-target target-patts runpatt num-to-keep #!key (actions '(print))) + (let* ((runs-ht (runs:get-hash-by-target target-patts runpatt)) + (age (if (args:get-arg "-age")(common:hms-string->seconds (args:get-arg "-age")) #f)) + (age-mark (if age (- (current-seconds) age) (+ (current-seconds) 86400))) + (precmd (or (args:get-arg "-precmd") "")) + (action-chk (member (string->symbol "remove-runs") actions))) + ;; check the sequence of actions archive must comme before remove-runs + (if (and action-chk (member (string->symbol "archive") action-chk)) + (begin + (debug:print-error 0 *default-log-port* "action remove-runs must come after archive") + (exit 1))) + (print "Actions: " actions " age: " age) + (for-each + (lambda (action) + (for-each + (lambda (target) + (let* ((runs (hash-table-ref runs-ht target)) + (sorted (sort runs (lambda (a b)(< (simple-run-event_time a)(simple-run-event_time b))))) + (to-remove (let* ((len (length sorted)) + (trim-amt (- len num-to-keep))) + (if (> trim-amt 0) + (take sorted trim-amt) + '())))) + (hash-table-set! runs-ht target to-remove) + (print target ":") + (for-each + (lambda (run) + (let ((remove (member run to-remove (lambda (a b) + (eq? (simple-run-id a) + (simple-run-id b)))))) + (if (and age (> (simple-run-event_time run) age-mark)) + (print "Skipping handling of " target "/" (simple-run-runname run) " as it is younger than " (args:get-arg "-age")) + (case action + ((print) + (print " " (simple-run-runname run) + " " (time->string (seconds->local-time (simple-run-event_time run)) "WW%V.%u %H:%M:%S") + " " (if remove "REMOVE" ""))) + ((remove-runs) + (if remove (system (conc precmd " megatest -remove-runs -target " target " -runname " (simple-run-runname run) " -testpatt %" + (if (member 'kill-runs actions) ;; if kill-runs is specified then set -kill-wait to 0 + " -kill-wait 0" + ""))))) + ((archive) + (if remove (system (conc precmd " megatest -archive save-remove -target " target " -runname " (simple-run-runname run) " -testpatt %")))) + ((kill-runs) + (if remove (system (conc precmd " megatest -kill-runs -target " target " -runname " (simple-run-runname run) " -testpatt %")))))))) + sorted))) + (hash-table-keys runs-ht))) + actions) + runs-ht)) + +(define (remove-last-path-directory path-in) + (let* ((dparts (string-split path-in "/")) + (path-out (conc "/" (string-intersperse (take dparts (- (length dparts) 1)) "/"))) + ) + path-out + ) +) + + +(define (runs:operate-on action target runnamepatt testpatt #!key (state #f)(status #f)(new-state-status #f)(mode #f)(options '())) + (common:clear-caches) ;; clear all caches + (let* ((db #f) + ;; (tdbdat (tasks:open-db)) + (keys (rmt:get-keys)) + (rundat (mt:get-runs-by-patt keys runnamepatt target)) + (header (vector-ref rundat 0)) + (runs (vector-ref rundat 1)) + (states (if state (string-split state ",") '())) + (statuses (if status (string-split status ",") '())) + (state-status (if (string? new-state-status) (string-split new-state-status ",") '(#f #f))) + (rp-mutex (make-mutex)) + (bup-mutex (make-mutex)) + (keep-records (args:get-arg "-keep-records")) ;; used in conjunction with -remove-runs to keep the records, TODO: consolidate this with "mode". + (test-records '())) ;; for tasks that we wish to operate on all tests in one fell swoop + + (let* ((write-access-actions '(remove-runs set-state-status archive run-wait kill-runs)) + (dbfile (conc *toppath* "/.mtdb/main.db")) + (readonly-mode (not (file-write-access? dbfile)))) + (when (and readonly-mode + (member action write-access-actions)) + (debug:print-error 0 *default-log-port* dbfile " is readonly. Cannot proceed with action ["action"] in which write-access isrequired .") + (exit 1))) + + (debug:print-info 4 *default-log-port* "runs:operate-on => Header: " header " action: " action " new-state-status: " new-state-status) + (if (> 2 (length state-status)) + (begin + (debug:print-error 0 *default-log-port* "the parameter to -set-state-status is a comma delimited string. E.g. COMPLETED,FAIL") + (exit))) + (for-each + (lambda (run) + (let ((runkey (string-intersperse (map (lambda (k) + (db:get-value-by-header run header k)) keys) "/")) + (dirs-to-remove (make-hash-table)) + (proc-get-tests (lambda (run-id) + (mt:get-tests-for-run run-id + testpatt states statuses + not-in: #f + sort-by: (case action + ((remove-runs) 'rundir) + (else 'event_time)))))) + (let* ((run-id (db:get-value-by-header run header "id")) + (run-state (db:get-value-by-header run header "state")) + (run-name (db:get-value-by-header run header "runname")) + (tests (if (not (equal? run-state "locked")) + (proc-get-tests run-id) + '())) + (lasttpath "/does/not/exist/I/hope") + (lastrealpath "/does/not/exist/I/hope") + ;; there may be a number of different disks used in the same run. + (run-paths-hash (make-hash-table)) + (worker-thread #f)) + (debug:print-info 4 *default-log-port* "runs:operate-on run=" run ", header=" header) + (if (not (null? tests)) + (begin + (case action + ((kill-runs) + (tasks:kill-runner target run-name "%") + (debug:print 1 *default-log-port* "Killing tests for run: " runkey " " (db:get-value-by-header run header "runname")) + ) + ((remove-runs) + ;; use this location to cleanup old DELETED records? No. See below for same call + ;; (rmt:delete-old-deleted-test-records run-id) + ;; (if (tasks:need-server run-id)(tasks:start-and-wait-for-server tdbdat run-id 10)) + ;; seek and kill in flight -runtests with % as testpatt here + ;; (if (equal? testpatt "%") + (tasks:kill-runner target run-name testpatt) + ;; (debug:print 0 *default-log-port* "not attempting to kill any run launcher processes as testpatt is " testpatt)) + (debug:print 1 *default-log-port* "Removing tests for run: " runkey " " (db:get-value-by-header run header "runname"))) + ((set-state-status) + ;; (if (tasks:need-server run-id)(tasks:start-and-wait-for-server tdbdat run-id 10)) + (debug:print 2 *default-log-port* "Modifying state and status for tests for run: " runkey " " (db:get-value-by-header run header "runname"))) + ((print-run) + (debug:print 1 *default-log-port* "Printing info for run " runkey ", run=" run ", tests=" tests ", header=" header) + action) + ((run-wait) + (debug:print 1 *default-log-port* "Waiting for run " runkey ", run=" runnamepatt " to complete")) + ((archive) + (debug:print 1 *default-log-port* "Archiving/restoring (" (args:get-arg "-archive") ") data for run: " runkey " " (db:get-value-by-header run header "runname")) + (let ((op (string->symbol (args:get-arg "-archive")))) + (set! worker-thread + (make-thread + (lambda () + (case op + ((save save-remove keep-html) + (archive:run-bup op run-id run-name tests rp-mutex bup-mutex)) + ((restore) + (archive:bup-restore op run-id run-name tests rp-mutex bup-mutex)) + ((get) ;;; NOTE: This is a special case. We wish to operate on ALL tests in one go + (set! test-records (append tests test-records))) + (else + (debug:print-error 0 *default-log-port* "unrecognised sub command " op " for -archive. Run \"megatest\" to see help") + (exit)))) + "archive-bup-thread")) + (thread-start! worker-thread) + (if (eq? op 'get) + (thread-join! worker-thread)) ;; we need the test-records set to not overlap + )) + (else + (debug:print-info 0 *default-log-port* "action not recognised " action))) + + ;; actions that operate on one test at a time can be handled below + ;; + (let ((sorted-tests (filter + vector? + (sort tests (lambda (a b)(let ((dira ;; (rmt:sdb-qry 'getstr + (db:test-get-rundir a)) ;; ) ;; (filedb:get-path *fdb* (db:test-get-rundir a))) + (dirb ;; (rmt:sdb-qry 'getstr + (db:test-get-rundir b))) ;; ) ;; ((filedb:get-path *fdb* (db:test-get-rundir b)))) + (if (and (string? dira)(string? dirb)) + (> (string-length dira)(string-length dirb)) + #f)))))) + (toplevel-retries (make-hash-table)) ;; try three times to loop through and remove top level tests + (test-retry-time (make-hash-table)) + (backgrounded-remove-status (make-hash-table)) + (backgrounded-remove-last-visit (make-hash-table)) + (backgrounded-remove-result (make-hash-table)) + (allow-run-time (string->number (or (args:get-arg "-kill-wait") "10")))) ;; seconds to allow for killing tests before just brutally killing 'em + (let loop ((test (car sorted-tests)) + (tal (cdr sorted-tests))) + (let* ((test-id (db:test-get-id test)) + (new-test-dat (rmt:get-test-info-by-id run-id test-id))) + (if (not new-test-dat) + (begin + (debug:print-error 0 *default-log-port* "We have a test-id of " test-id " but no record was found. NOTE: No locking of records is done between processes, do not simultaneously remove the same run from two processes!") + (if (not (null? tal)) + (loop (car tal)(cdr tal)))) + (let* ((item-path (db:test-get-item-path new-test-dat)) + (test-name (db:test-get-testname new-test-dat)) + (run-dir ;;(filedb:get-path *fdb* + ;; (rmt:sdb-qry 'getid + (db:test-get-rundir new-test-dat)) ;; ) ;; run dir is from the link tree + (has-subrun (and (subrun:subrun-test-initialized? run-dir) + (not (subrun:subrun-removed? run-dir)))) + (test-state (db:test-get-state new-test-dat)) + (test-status (db:test-get-status new-test-dat)) + (test-fulln (db:test-get-fullname new-test-dat)) + (uname (db:test-get-uname new-test-dat)) + (toplevel-with-children (and (db:test-get-is-toplevel test) + (> (rmt:test-toplevel-num-items run-id test-name) 0)))) + + (case action + ((remove-runs) + ;; if the test is a toplevel-with-children issue an error and do not remove + (cond + (toplevel-with-children + (debug:print 0 *default-log-port* "WARNING: skipping removal of " test-fulln " with run-id " run-id " as it has sub tests") + (hash-table-set! toplevel-retries test-fulln (+ (hash-table-ref/default toplevel-retries test-fulln 0) 1)) + (if (> (hash-table-ref toplevel-retries test-fulln) 3) + (if (not (null? tal)) + (loop (car tal)(cdr tal))) ;; no else clause - drop it if no more in queue and > 3 tries + (let ((newtal (append tal (list test)))) + (loop (car newtal)(cdr newtal))))) ;; loop with test still in queue + (has-subrun + ;; + (let ((last-visit (hash-table-ref/default backgrounded-remove-last-visit test-fulln 0)) + (now (current-seconds)) + (rem-status (hash-table-ref/default backgrounded-remove-status test-fulln 'not-started))) + (case rem-status + ((not-started) + (debug:print 0 *default-log-port* "WARNING: postponing removal of " test-fulln " with run-id " run-id " as it has a subrun") + (hash-table-set! backgrounded-remove-status test-fulln 'started) + (hash-table-set! backgrounded-remove-last-visit test-fulln (current-seconds)) + (common:send-thunk-to-background-thread + (lambda () + (let* ((subrun-remove-succeeded + (subrun:remove-subrun run-dir keep-records))) + (hash-table-set! backgrounded-remove-result test-fulln subrun-remove-succeeded) + (hash-table-set! backgrounded-remove-status test-fulln 'done))) + name: (conc "remove-subrun:"test-fulln)) + + ;; send to back of line, loop + (let ((newtal (append tal (list test)))) + (loop (car newtal)(cdr newtal))) + ) + ((started) + ;; if last visit was within last second, sleep 1 second + (if (< (- now last-visit) 1.0) + (thread-sleep! 1.0)) + (hash-table-set! backgrounded-remove-last-visit test-fulln (current-seconds)) + ;; send to back of line, loop + (let ((newtal (append tal (list test)))) + (loop (car newtal)(cdr newtal))) + ) + ((done) + ;; drop this one; if remaining, loop, else finish + (hash-table-set! backgrounded-remove-last-visit test-fulln (current-seconds)) + (let ((subrun-remove-succeeded (hash-table-ref/default backgrounded-remove-result test-fulln 'exception))) + (cond + ((eq? subrun-remove-succeeded 'exception) + (let* ((logfile (subrun:get-log-path run-dir "remove"))) + (debug:print 0 *default-log-port* "ERROR: removing subrun of of " test-fulln " with run-id " run-id " ; see logfile @ "logfile)) + (if (not (null? tal)) + (loop (car tal)(cdr tal)))) + (subrun-remove-succeeded + (debug:print 0 *default-log-port* "Now removing of " test-fulln " with run-id " run-id " since subrun was removed.") + ;;(runs:remove-test-directory new-test-dat mode) ;; let normal case handle this. it will go thru loop again as non-subrun + (let ((newtal (append tal (list test)))) + (loop (car newtal)(cdr newtal)))) + (else + (let* ((logfile (subrun:get-log-path run-dir "remove"))) + (debug:print 0 *default-log-port* "WARNING: removal of subrun failed. Please check "logfile" for details.")) + ;; send to back of line, loop (will not match has-subrun next time through) + (if (not (null? tal)) + (loop (car tal)(cdr tal)))))) + ) + ) ; end case rem-status + ) ; end let + ); end cond has-subrun + + (else + ;; BB - TODO - consider backgrounding to threads to delete tests (work below) + (debug:print-info 2 *default-log-port* "test: " test-name " itest-state: " test-state) + (if (member test-state (list "RUNNING" "LAUNCHED" "REMOTEHOSTSTART" "KILLREQ")) + (begin + (if (not (hash-table-ref/default test-retry-time test-fulln #f)) + (begin + ;; want to set to REMOVING BUT CANNOT do it here? + (hash-table-set! test-retry-time test-fulln (current-seconds)))) + (if (> (- (current-seconds)(hash-table-ref test-retry-time test-fulln)) allow-run-time) + ;; This test is not in a correct state for cleaning up. Let's try some graceful shutdown steps first + ;; Set the test to "KILLREQ" and wait five seconds then try again. Repeat up to five times then give + ;; up and blow it away. + (begin + (debug:print 0 *default-log-port* "WARNING: could not gracefully remove test " test-fulln ", tried to kill it to no avail. Forcing state to FAILEDKILL and continuing") + (mt:test-set-state-status-by-id run-id (db:test-get-id test) "FAILEDKILL" "n/a" #f) + (thread-sleep! 1)) + (begin + (mt:test-set-state-status-by-id run-id (db:test-get-id test) "KILLREQ" "n/a" #f) + (thread-sleep! 1))) + ;; NOTE: This is suboptimal as the testdata will be used later and the state/status may have changed ... + (if (null? tal) + (loop new-test-dat tal) + (loop (car tal)(append tal (list new-test-dat))))) + (begin + (let ((rundir (db:test-get-rundir new-test-dat))) + (if (and (not (string= rundir "/tmp/badname")) + (file-exists? rundir) + (substring-index run-name rundir) + (tests:glob-like-match (conc "%/" target "/%") rundir) + ) + (begin + (set! lasttpath (db:test-get-rundir new-test-dat)) ;; remember this path for run removal + (set! lastrealpath (remove-last-path-directory (resolve-pathname lasttpath))) + (hash-table-set! run-paths-hash lastrealpath 1) + (runs:remove-test-directory new-test-dat mode) ;; 'remove-all) + ) + (begin + (debug:print 2 *default-log-port* "Not removing directory " rundir " because either it doesn't exist or has a bad name") + (debug:print 2 *default-log-port* "Is /tmp/badname: " (string= rundir "/tmp/badname")) + (debug:print 2 *default-log-port* "Exists: " (file-exists? rundir)) + (debug:print 2 *default-log-port* "Has run-name: " (substring-index run-name rundir)) + (debug:print 2 *default-log-port* "Has target: " (tests:glob-like-match (conc "%/" target "/%") rundir)) + (debug:print 2 *default-log-port* "Target: " target) + + ;;PJH remove record from db no need to cleanup directory + (case mode + ((remove-data-only)(mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) (db:test-get-state test)(db:test-get-status test) #f)) + ((archive-remove) (mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) "ARCHIVED" #f #f)) + (else (rmt:delete-test-records (db:test-get-run_id test) (db:test-get-id test)))) + + ) + ) + ) + + (if (not (null? tal)) + (loop (car tal)(cdr tal))))))) + (rmt:update-run-stats run-id (rmt:get-raw-run-stats run-id))) + ((kill-runs) + ;; RUNNING -> KILLREQ + ;; LAUNCHED,RUNNING,REMOTEHOSTSTART -> NOT STARTED + (cond + ((and has-subrun (member test-state (list "RUNNING" "LAUNCHED" "REMOTEHOSTSTART" "KILLREQ"))) + (common:send-thunk-to-background-thread + (lambda () + (let* ((subrun-remove-succeeded + (subrun:kill-subrun run-dir keep-records))) + #t))) + (if (not (null? tal)) + (loop (car tal)(cdr tal))) + ) + ((member test-state (list "RUNNING" "LAUNCHED" "REMOTEHOSTSTART" "KILLREQ")) + (debug:print 1 *default-log-port* "INFO: issuing killreq to test "test-fulln) + (mt:test-set-state-status-by-id run-id (db:test-get-id test) "KILLREQ" "n/a" #f) + (if (not (null? tal)) + (loop (car tal)(cdr tal)))) + ((and (member test-status '("PREQ_FAIL" "PREQ_DISCARDED" "BLOCKED" "ZERO_ITEMS" "KEEP_TRYING" "TEN_STRIKES" "TIMED_OUT"))) + (rmt:set-state-status-and-roll-up-items run-id (db:test-get-id test) 'foo "NOT_STARTED" "n/a" (conc "kill-run moved from "test-state":"test-status" to NOT_STARTED:n/a")) + ;;(mt:test-set-state-status-by-id run-id (db:test-get-id test) "NOT_STARTED" "n/a" (conc "kill-run moved from "test-state":"test-status" to NOT_STARTED:n/a")) + (if (not (null? tal)) + (loop (car tal)(cdr tal))) + ) + (else + (if (not (null? tal)) + (loop (car tal)(cdr tal))) + ))) + ((set-state-status) + (let* ((new-state (car state-status)) + (new-status (cadr state-status)) + (test-id (db:test-get-id test)) + (test-run-dir (db:test-get-rundir new-test-dat)) + (has-subrun (and (subrun:subrun-test-initialized? test-run-dir) + (not (subrun:subrun-removed? test-run-dir))))) + (when has-subrun + (common:send-thunk-to-background-thread + (lambda () + (subrun:set-state-status test-run-dir state status new-state-status) + ) + ) + ) + (debug:print-info 2 *default-log-port* "new state " new-state ", new status " new-status ) + (mt:test-set-state-status-by-id run-id test-id new-state new-status #f)) + (if (not (null? tal)) + (loop (car tal)(cdr tal)))) + ((run-wait) + ;; BB TODO - manage has-subrun case + (debug:print-info 2 *default-log-port* "still waiting, " (length tests) " tests still running") + (thread-sleep! 5) + (let ((new-tests (proc-get-tests run-id))) + (if (null? new-tests) + (debug:print-info 1 *default-log-port* "Run completed according to zero tests matching provided criteria.") + (loop (car new-tests)(cdr new-tests))))) + ((archive) + ;; BB TODO - manage has-subrun case + (if (and run-dir (not toplevel-with-children)) + (let ((ddir (conc run-dir "/"))) + (case (string->symbol (args:get-arg "-archive")) + ((save save-remove keep-html) + (if (common:file-exists? ddir) + (debug:print-info 0 *default-log-port* "Estimating disk space usage for " test-fulln ": " (common:get-disk-space-used ddir))))))) + (if (not (null? tal)) + (loop (car tal)(cdr tal)))) + ))) + ) + (if worker-thread (thread-join! worker-thread))) + (common:join-backgrounded-threads)))) + + ;; remove the run if zero tests remain + (if (eq? action 'remove-runs) + (let* ((run-id (db:get-value-by-header run header "id")) ;; NB// masks run-id from above? + (remtests (mt:get-tests-for-run run-id #f '("DELETED") '("n/a") not-in: #t))) + (if (null? remtests) ;; no more tests remaining + (let* ((linkspath (remove-last-path-directory lasttpath)) + (runpaths (hash-table-keys run-paths-hash)) + ) + + (debug:print 2 *default-log-port* "run-paths-hash: " (hash-table-keys run-paths-hash)) + + (debug:print 1 *default-log-port* "Removing target " target "run: " run-name) + (if (not keep-records) + (begin + (debug:print 1 *default-log-port* "Removing DB records for the run.") + (rmt:delete-run run-id) + (rmt:delete-old-deleted-test-records run-id)) + ) + (if (not (equal? linkspath "/does/not/exist/I")) + (begin + (debug:print 1 *default-log-port* "Recursively removing links dir " linkspath) + (runs:recursive-delete-with-error-msg linkspath))) + + (for-each (lambda(runpath) + (debug:print 1 *default-log-port* "Recursively removing runs dir " runpath) + (runs:recursive-delete-with-error-msg runpath) + ) + runpaths + ) + ))))) + )) + runs) + ;; special case - archive get + (if (equal? (args:get-arg "-archive") "get") + (archive:bup-get-data "get" #f #f test-records rp-mutex bup-mutex)) + (if (or (equal? (args:get-arg "-archive") "save") (equal? (args:get-arg "-archive") "save-remove")) + (begin + (debug:print 0 *default-log-port* "db archive started") + (archive:megatest-db target runnamepatt) + (debug:print 0 *default-log-port* "db archived"))) + ) + #t + ) + +(define (runs:remove-test-directory test mode) ;; remove-data-only) + (let* ((run-dir (db:test-get-rundir test)) ;; run dir is from the link tree + (real-dir (if (common:file-exists? run-dir) + ;; (resolve-pathname run-dir) + (common:nice-path run-dir) + #f)) + (clean-mode (or mode 'remove-all)) + (test-id (db:test-get-id test)) + ;; (lock-key (conc "test-" test-id)) + ;; (got-lock (let loop ((lock (rmt:no-sync-get-lock lock-key)) + ;; (expire-time (+ (current-seconds) 30))) ;; give up on getting the lock and steal it after 15 seconds + ;; (if (car lock) + ;; #t + ;; (if (> (current-seconds) expire-time) + ;; (begin + ;; (debug:print-info 0 *default-log-port* "Timed out waiting for a lock to clean test with id " test-id) + ;; (rmt:no-sync-del! lock-key) ;; destroy the lock + ;; (loop (rmt:no-sync-get-lock lock-key) expire-time)) ;; + ;; (begin + ;; (thread-sleep! 1) + ;; (loop (rmt:no-sync-get-lock lock-key) expire-time))))))) + ) + (case clean-mode + ((remove-data-only)(mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) "CLEANING" "LOCKED" #f)) + ((remove-all) (mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) "REMOVING" "LOCKED" #f)) + ((archive-remove) (mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) "ARCHIVE_REMOVING" #f #f))) + (debug:print-info 2 *default-log-port* "Attempting to remove " (if real-dir (conc " dir " real-dir " and ") "") " link " run-dir) + (if (and real-dir + (> (string-length real-dir) 5) + (common:file-exists? real-dir)) ;; bad heuristic but should prevent /tmp /home etc. + (let* ((realpath (resolve-pathname run-dir))) + (debug:print-info 1 *default-log-port* "Recursively removing " realpath) + (if (common:file-exists? realpath) + (runs:safe-delete-test-dir realpath) + (debug:print 0 *default-log-port* "WARNING: test dir " realpath " appears to not exist or is not readable"))) + (if real-dir + (debug:print 0 *default-log-port* "WARNING: directory " real-dir " does not exist") + (debug:print 0 *default-log-port* "WARNING: no real directory corrosponding to link " run-dir ", nothing done"))) + (if (symbolic-link? run-dir) + (begin + (debug:print-info 1 *default-log-port* "Removing symlink " run-dir) + (handle-exceptions + exn + (debug:print-error 0 *default-log-port* " Failed to remove symlink " run-dir ((condition-property-accessor 'exn 'message) exn) ", attempting to continue, exn=" exn) + (delete-file run-dir))) + (if (directory? run-dir) + (if (> (directory-fold (lambda (f x)(+ 1 x)) 0 run-dir) 0) + (debug:print 0 *default-log-port* "WARNING: refusing to remove " run-dir " as it is not empty") + (handle-exceptions + exn + (debug:print-error 0 *default-log-port* " Failed to remove directory " run-dir ((condition-property-accessor 'exn 'message) exn) ", attempting to continue, exn=" exn) + (delete-directory run-dir))) + (if (and run-dir + (not (member run-dir (list "n/a" "/tmp/badname")))) + (debug:print 0 *default-log-port* "WARNING: not removing " run-dir " as it either doesn't exist or is not a symlink") + (debug:print 0 *default-log-port* "NOTE: the run dir for this test is undefined. Test may have already been deleted.")) + )) + ;; Only delete the records *after* removing the directory. If things fail we have a record + (case clean-mode + ((remove-data-only)(mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) (db:test-get-state test)(db:test-get-status test) #f)) + ((archive-remove) (mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) "ARCHIVED" #f #f)) + (else (rmt:delete-test-records (db:test-get-run_id test) (db:test-get-id test)))) + ;; (rmt:no-sync-del! lock-key) + )) + +;;====================================================================== +;; Routines for manipulating runs +;;====================================================================== + +;; Since many calls to a run require pretty much the same setup +;; this wrapper is used to reduce the replication of code +(define (general-run-call switchname action-desc proc) + (let ((runname (common:args-get-runname)) + (target (common:args-get-target))) + (cond + ((not target) + (debug:print-error 0 *default-log-port* "Missing required parameter for " switchname ", you must specify the target with -target") + (exit 3)) + ((not runname) + (debug:print-error 0 *default-log-port* "Missing required parameter for " switchname ", you must specify the run name with -runname runname") + (exit 3)) + (else + (let (;; (db #f) + (keys #f)) + (if (launch:setup) + (begin + (full-runconfigs-read) ;; cache the run config + ;; (launch:cache-config) ;; there are two independent config cache locations, turning this one off for now. MRW. + ) ;; do not cache here - need to be sure runconfigs is processed + (begin + (debug:print 0 *default-log-port* "Failed to setup, exiting") + (exit 1))) + + + (set! keys (keys:config-get-fields *configdat*)) + ;; have enough to process -target or -reqtarg here + (if (args:get-arg "-reqtarg") + (let* ((runconfigf (conc *toppath* "/runconfigs.config")) ;; DO NOT EVALUATE ALL + (runconfig (read-config runconfigf #f #t environ-patt: #f))) + (if (hash-table-ref/default runconfig (args:get-arg "-reqtarg") #f) + (keys:target-set-args keys (args:get-arg "-reqtarg") args:arg-hash) + + (begin + (debug:print-error 0 *default-log-port* "[" (args:get-arg "-reqtarg") "] not found in " runconfigf) + ;; (if db (sqlite3:finalize! db)) + (exit 1) + ))) + (if (args:get-arg "-target") + (keys:target-set-args keys (args:get-arg "-target" args:arg-hash) args:arg-hash))) + (if (not (car *configinfo*)) + (begin + (debug:print-error 0 *default-log-port* "Attempted to " action-desc " but run area config file not found") + (exit 1)) + ;; Extract out stuff needed in most or many calls + ;; here then call proc + (let* ((keyvals (keys:target->keyval keys target))) + (proc target runname keys keyvals))) + ;; (if db (sqlite3:finalize! db)) + (set! *didsomething* #t)))))) + +;;====================================================================== +;; Lock/unlock runs +;;====================================================================== + +(define (runs:handle-locking target keys runname lock unlock user) + (let* ((db #f) + (rundat (mt:get-runs-by-patt keys runname target)) + (header (vector-ref rundat 0)) + (runs (vector-ref rundat 1))) + (for-each (lambda (run) + (let ((run-id (db:get-value-by-header run header "id")) + (str (if lock + "lock" + "unlock"))) + (if (or lock + (and unlock + (or (args:get-arg "-force") + (begin + (print "Do you really wish to unlock run " run-id "?\n y/n: ") + (equal? "y" (read-line)))))) + (begin + (rmt:lock/unlock-run run-id lock unlock user) + (debug:print-info 0 *default-log-port* "Done " str " on run id " run-id)) + (debug:print-info 0 *default-log-port* "Skipping lock/unlock on " run-id)))) + runs))) +;;====================================================================== +;; Rollup runs +;;====================================================================== + +;; Update the test_meta table for this test +(define (runs:update-test_meta test-name test-conf) + (let ((currrecord (rmt:testmeta-get-record test-name))) + (if (not currrecord) + (begin + (set! currrecord (make-vector 11 #f)) + (rmt:testmeta-add-record test-name))) + (for-each + (lambda (key) + (let* ((idx (cadr key)) + (fld (car key)) + (val (configf:lookup test-conf "test_meta" fld))) + ;; (debug:print 5 *default-log-port* "idx: " idx " fld: " fld " val: " val) + (if (and val (not (equal? (vector-ref currrecord idx) val))) + (begin + (debug:print 0 *default-log-port* "Updating " test-name " " fld " to " val) + (rmt:testmeta-update-field test-name fld val))))) + '(("author" 2)("owner" 3)("description" 4)("reviewed" 5)("tags" 9)("jobgroup" 10))))) + +;; find tests with matching tags, tagpatt is a string "tagpatt1,tagpatt2%, ..." +;; +(define (runs:get-tests-matching-tags tagpatt) + (let* ((tagdata (rmt:get-tests-tags)) + (res '())) ;; list of tests that match one or more tags + (for-each + (lambda (row) + (let* ((tag (car row)) + (tests (cdr row))) + (if (patt-list-match tag tagpatt) + (set! res (append tests res))))) + tagdata) + res)) + + +;; Update test_meta for all tests +(define (runs:update-all-test_meta db) + (let ((test-names (tests:get-all))) ;; (tests:get-valid-tests))) + (for-each + (lambda (test-name) + (let* ((test-conf (mt:lazy-read-test-config test-name))) + (if test-conf (runs:update-test_meta test-name test-conf)))) + (hash-table-keys test-names)))) + +;; This could probably be refactored into one complex query ... +;; NOT PORTED - DO NOT USE YET +;; +#;(define (runs:rollup-run keys runname user keyvals) + (debug:print 4 *default-log-port* "runs:rollup-run, keys: " keys " -runname " runname " user: " user) + (let* ((db #f) + ;; register run operates on the main db + (new-run-id (rmt:register-run keyvals runname "new" "n/a" user (args:get-arg "-contour"))) + (prev-tests (rmt:get-matching-previous-test-run-records new-run-id "%" "%")) + (curr-tests (mt:get-tests-for-run new-run-id "%/%" '() '())) + (curr-tests-hash (make-hash-table))) + (rmt:update-run-event_time new-run-id) + ;; index the already saved tests by testname and itemdat in curr-tests-hash + (for-each + (lambda (testdat) + (let* ((testname (db:test-get-testname testdat)) + (item-path (db:test-get-item-path testdat)) + (full-name (conc testname "/" item-path))) + (hash-table-set! curr-tests-hash full-name testdat))) + curr-tests) + ;; NOPE: Non-optimal approach. Try this instead. + ;; 1. tests are received in a list, most recent first + ;; 2. replace the rollup test with the new *always* + (for-each + (lambda (testdat) + (let* ((testname (db:test-get-testname testdat)) + (item-path (db:test-get-item-path testdat)) + (full-name (conc testname "/" item-path)) + (prev-test-dat (hash-table-ref/default curr-tests-hash full-name #f)) + (test-steps (rmt:get-steps-for-test (db:test-get-id testdat))) + (new-test-record #f)) + ;; replace these with insert ... select + (apply sqlite3:execute + db + (conc "INSERT OR REPLACE INTO tests (run_id,testname,state,status,event_time,host,cpuload,diskfree,uname,rundir,item_path,run_duration,final_logf,comment) " + "VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?);") + new-run-id (cddr (vector->list testdat))) + (set! new-testdat (car (mt:get-tests-for-run new-run-id (conc testname "/" item-path) '() '()))) + (hash-table-set! curr-tests-hash full-name new-testdat) ;; this could be confusing, which record should go into the lookup table? + ;; Now duplicate the test steps + (debug:print 4 *default-log-port* "Copying records in test_steps from test_id=" (db:test-get-id testdat) " to " (db:test-get-id new-testdat)) + (cdb:remote-run ;; to be replaced, note: this routine is not used currently + (lambda () + (sqlite3:execute + db + (conc "INSERT OR REPLACE INTO test_steps (test_id,stepname,state,status,event_time,comment) " + "SELECT " (db:test-get-id new-testdat) ",stepname,state,status,event_time,comment FROM test_steps WHERE test_id=?;") + (db:test-get-id testdat)) + ;; Now duplicate the test data + (debug:print 4 *default-log-port* "Copying records in test_data from test_id=" (db:test-get-id testdat) " to " (db:test-get-id new-testdat)) + (sqlite3:execute + db + (conc "INSERT OR REPLACE INTO test_data (test_id,category,variable,value,expected,tol,units,comment) " + "SELECT " (db:test-get-id new-testdat) ",category,variable,value,expected,tol,units,comment FROM test_data WHERE test_id=?;") + (db:test-get-id testdat)))) + )) + prev-tests))) + +(define doc-template + '(*TOP* + (*PI* xml "version='1.0'") + (testsuite))) + +(define (runs:update-junit-test-reporter-xml run-id) + (let* ( + (junit-test-reporter (configf:lookup *configdat* "runs" "junit-test-reporter-xml")) + (junit-test-report-dir (configf:lookup *configdat* "runs" "junit-test-report-dir")) + (xml-dir (if (and junit-test-reporter (equal? junit-test-reporter "yes" )) + (if junit-test-report-dir + junit-test-report-dir + (conc (getenv "MT_LINKTREE") "/" (getenv "MT_TARGET") "/" (getenv "MT_RUNNAME"))) + #f)) + (xml-ts-name (if xml-dir + (conc (getenv "MT_TESTSUITENAME")"."(string-translate (getenv "MT_TARGET") "/" ".") "." (getenv "MT_RUNNAME")) + #f)) + (keyname (if xml-ts-name (common:get-signature xml-ts-name) #f)) + (xml-path (if xml-dir + (conc xml-dir "/" keyname ".xml") + #f)) + + (test-data (if xml-dir + (rmt:get-tests-for-run run-id "%" '() '() ;; run-id testpatt states statuses + #f #f ;; offset limit + #f ;; not-in + #f ;; sort-by + #f ;; sort-order + #f ;; get full data (not 'shortlist) + 0 ;; (runs:gendat-inc-results-last-update *runs:general-data*) ;; last update time + #f) + '())) + (tests-count (if xml-dir (length test-data) #f))) + (if (and junit-test-reporter (equal? junit-test-reporter "yes" )) + (begin + ;((sxml-modify! `("testsuite" insert-into (@ (name ,xml-ts-name) (tests ,tests-count)))) doc) + + (let loop ((test (car test-data)) + (tail (cdr test-data)) + (doc doc-template) + (fail-cnt 0) + (error-cnt 0)) + (let* ((test-name (vector-ref test 2)) + (test-itempath (vector-ref test 11)) + (tc-name (conc test-name (if (and test-itempath (not (equal? test-itempath ""))) (conc "." (string-translate test-itempath "/" "." )) ""))) + (test-state (vector-ref test 3)) + (comment (vector-ref test 14)) + (test-status (vector-ref test 4)) + (exc-msg (conc "No bucket for State " test-state " Status " test-status)) + (new-doc (cond + ((member test-state (list "RUNNING" )) + ((sxml-modify `("testsuite" insert-into (testcase (@ (name ,tc-name)) (inProgress)))) doc)) + ((member test-state (list "LAUNCHED" "REMOTEHOSTSTART" "NOT_STARTED")) + ((sxml-modify `("testsuite" insert-into (testcase (@ (name ,tc-name)) (inQueue)))) doc)) + ((member test-status (list "PASS" "WARN" "WAIVED")) + ((sxml-modify `("testsuite" insert-into (testcase (@ (name ,tc-name))))) doc)) + ((member test-status (list "FAIL" "CHECK")) + ((sxml-modify `("testsuite" insert-into (testcase (@ (name ,tc-name)) (failure (@ (message ,comment) (type "failure")))))) doc)) + ((member test-status (list "DEAD" "KILLED" "ABORT" "PREQ_FAIL" "PREQ_DISCARDED")) + ((sxml-modify `("testsuite" insert-into (testcase (@ (name ,tc-name)) (failure (@ (message ,comment) (type "error")))))) doc)) + ((member test-status (list "SKIP")) + ((sxml-modify `("testsuite" insert-into (testcase (@ (name ,tc-name)) (skipped (@ (type "skipped")))))) doc)) + (else + (debug:print 0 *default-log-port* (conc "What do I do with State " test-state " Status " test-status)) + ((sxml-modify `("testsuite" insert-into (testcase (@ (name ,tc-name)) (failure (@ (message ,exc-msg) (type "error")))))) doc)))) + (new-error-cnt (if (member test-status (list "DEAD" "KILLED" "ABORT" "PREQ_FAIL" "PREQ_DISCARDED")) + (+ error-cnt 1) + error-cnt)) + (new-fail-cnt (if (member test-status (list "FAIL" "CHECK")) + (+ fail-cnt 1) + fail-cnt))) + (if (null? tail) + (let* ((final-doc ((sxml-modify `("testsuite" insert-into (@ (name ,xml-ts-name) (tests ,tests-count) (errors ,error-cnt) (failures ,fail-cnt)))) new-doc))) + (debug:print 0 *default-log-port* "modify attrib error=" error-cnt " fail= " fail-cnt) + (handle-exceptions + exn + (let* ((msg ((condition-property-accessor 'exn 'message) exn))) + (debug:print 0 *default-log-port* (conc "WARNING: Failed to update file" xml-path". Message:" msg ", exn=" exn))) + + (if (not (file-exists? xml-dir)) + (create-directory xml-dir #t)) + (if (not (rmt:no-sync-get/default keyname #f)) + (begin + (rmt:no-sync-set keyname "on") + (debug:print 0 *default-log-port* "creating xml at " xml-path) + (with-output-to-file xml-path + (lambda () + (print (sxml-serializer#serialize-sxml final-doc ns-prefixes: (list (cons 'gnm "http://foo")))))) + (rmt:no-sync-del! keyname)) + (debug:print 0 *default-log-port* "Could not get the lock. Skip writing the xml file.")))) + (loop (car tail) (cdr tail) new-doc new-fail-cnt new-error-cnt)))))))) + + +;; clean cache files +(define (runs:clean-cache target runname toppath) + (if target + (if runname + (let* ((linktree (common:get-linktree)) ;; (if toppath (configf:lookup *configdat* "setup" "linktree"))) + (runtop (conc linktree "/" target "/" runname)) + (files (if (common:file-exists? runtop) + (append (glob (conc runtop "/.megatest*")) + (glob (conc runtop "/.runconfig*"))) + '()))) + (if (null? files) + (debug:print-info 2 *default-log-port* "No cached megatest or runconfigs files found. None removed.") + (begin + (debug:print-info 2 *default-log-port* "Removing cached files:\n " (string-intersperse files "\n ")) + (for-each + (lambda (f) + (handle-exceptions + exn + (debug:print 0 *default-log-port* "WARNING: Failed to remove file " f ", exn=" exn) + (delete-file f))) + files)))) + (debug:print-error 0 *default-log-port* "-clean-cache requires -runname.")) + (debug:print-error 0 *default-log-port* "-clean-cache requires -target or -reqtarg"))) + +;; set up needed environment variables given a run-id and optionally a target, itempath etc. +;; +(define (runs:set-megatest-env-vars run-id #!key (inkeys #f)(inrunname #f)(inkeyvals #f)(intarget #f)(testname #f)(itempath #f)) + ;;(bb-check-path msg: "runs:set-megatest-env-vars entry") + (let* ((target (or intarget + (common:args-get-target) + (get-environment-variable "MT_TARGET"))) + (keys (if inkeys inkeys (rmt:get-keys))) + (keyvals (if inkeyvals inkeyvals (keys:target->keyval keys target))) + (vals (hash-table-ref/default *env-vars-by-run-id* run-id #f)) + (link-tree (common:get-linktree))) ;; (configf:lookup *configdat* "setup" "linktree"))) + (if testname (setenv "MT_TEST_NAME" testname)) + (if itempath (setenv "MT_ITEMPATH" itempath)) + + ;; get the info from the db and put it in the cache + (if link-tree + (setenv "MT_LINKTREE" link-tree) + (debug:print-error 0 *default-log-port* "linktree not set, should be set in megatest.config in [setup] section.")) + (if (not vals) + (let ((ht (make-hash-table))) + (hash-table-set! *env-vars-by-run-id* run-id ht) + (set! vals ht) + (for-each + (lambda (key) + (hash-table-set! vals (car key) (cadr key))) + keyvals))) + ;; from the cached data set the vars + + (hash-table-for-each + vals + (lambda (key val) + (debug:print 2 *default-log-port* "setenv " key " " val) + (safe-setenv key val))) + ;;(bb-check-path msg: "runs:set-megatest-env-vars block 1") + ;;(BB> "*env-vars-by-run-id*/runid("run-id" vals="(hash-table->alist vals)) + + (if (not (get-environment-variable "MT_TARGET"))(setenv "MT_TARGET" target)) + ;; we had a case where there was an exception generated by the hash-table-ref + ;; due to *configdat* being #f Adding a handle and exit + (let fatal-loop ((count 0)) + (handle-exceptions + exn + (let ((call-chain (get-call-chain)) + (msg ((condition-property-accessor 'exn 'message) exn))) + (if (< count 5) + (begin ;; this call is colliding, do some crude stuff to fix it. + (debug:print 0 *default-log-port* "ERROR: *configdat* was inaccessible! This should never happen. Retry #" count + ", exn=" exn) + (launch:setup force-reread: #t) + (fatal-loop (+ count 1))) + (begin + (debug:print 0 *default-log-port* "FATAL: *configdat* was inaccessible! This should never happen. Retried " count + " times. Message: " msg) + (debug:print 0 *default-log-port* "Call chain:") + (with-output-to-port *default-log-port* + (lambda () + (print "*configdat* is >>"*configdat*"<<") + (pp *configdat*) + (pp call-chain))) + + (exit 1)))) + ;;(bb-check-path msg: "runs:set-megatest-env-vars block 1.5") + (when (or (not *configdat*) (not (hash-table? *configdat*))) + (debug:print 0 *default-log-port* "WARNING: *configdat* was inaccessible! This should never happen. Brute force reread.") + ;;(BB> "ERROR: *configdat* was inaccessible! This should never happen. Brute force reread.") + (thread-sleep! 2) ;; assuming nfs lag. + (launch:setup force-reread: #t)) + (alist->env-vars (hash-table-ref/default *configdat* "env-override" '())))) ;;;; environment is tainted HERE in this let block. + ;;(bb-check-path msg: "runs:set-megatest-env-vars block 2") + ;; Lets use this as an opportunity to put MT_RUNNAME in the environment + (let ((runname (if inrunname inrunname (rmt:get-run-name-from-id run-id)))) + (if runname + (setenv "MT_RUNNAME" runname) + (debug:print-error 0 *default-log-port* "no value for runname for id " run-id))) + (setenv "MT_RUN_AREA_HOME" *toppath*) + ;; if a testname and itempath are available set the remaining appropriate variables + (if testname (setenv "MT_TEST_NAME" testname)) + (if itempath (setenv "MT_ITEMPATH" itempath)) + ;;(bb-check-path msg: "runs:set-megatest-env-vars block 3") + (if (and testname link-tree) + (setenv "MT_TEST_RUN_DIR" (conc (getenv "MT_LINKTREE") "/" + (getenv "MT_TARGET") "/" + (getenv "MT_RUNNAME") "/" + (getenv "MT_TEST_NAME") + (if (and itempath + (not (equal? itempath ""))) + (conc "/" itempath) + "")))))) +(define (set-item-env-vars itemdat) + (for-each (lambda (item) + (debug:print 2 *default-log-port* "setenv " (car item) " " (cadr item)) + (setenv (car item) (cadr item))) + itemdat)) + +(define (runconfig:read fname target environ-patt) + (let ((ht (make-hash-table))) + (if target (hash-table-set! ht target '())) + (read-config fname ht #t environ-patt: environ-patt sections: (if target (list "default" target) #f)))) + +;; NB// to process a runconfig ensure to use environ-patt with target! +;; +(define (setup-env-defaults fname run-id already-seen keyvals #!key (environ-patt #f)(change-env #t)) + (let* ((keys (map car keyvals)) + (thekey (if keyvals + (string-intersperse (map (lambda (x)(if x x "-na-")) (map cadr keyvals)) "/") + (or (common:args-get-target) + (get-environment-variable "MT_TARGET") + (begin + (debug:print-error 0 *default-log-port* "setup-env-defaults called with no run-id or -target or -reqtarg") + "nothing matches this I hope")))) + ;; Why was system disallowed in the reading of the runconfigs file? + ;; NOTE: Should be setting env vars based on (target|default) + (confdat (runconfig:read fname thekey environ-patt)) + (whatfound (make-hash-table)) + (finaldat (make-hash-table)) + (sections (list "default" thekey))) + (if (not *target*)(set! *target* thekey)) ;; may save a db access or two but repeats db:get-target code + (debug:print 4 *default-log-port* "Using key=\"" thekey "\"") + + (if change-env + (for-each ;; NB// This can be simplified with new content of keyvals having all that is needed. + (lambda (keyval) + (safe-setenv (car keyval)(cadr keyval))) + keyvals)) + + (for-each + (lambda (section) + (let ((section-dat (hash-table-ref/default confdat section #f))) + (if section-dat + (for-each + (lambda (envvar) + (let ((val (cadr (assoc envvar section-dat)))) + (hash-table-set! whatfound section (+ (hash-table-ref/default whatfound section 0) 1)) + (if (and (string? envvar) + (string? val) + change-env) + (safe-setenv envvar val)) + (hash-table-set! finaldat envvar val))) + (map car section-dat))))) + sections) + (if already-seen + (begin + (debug:print 2 *default-log-port* "Key settings found in runconfigs.config:") + (for-each (lambda (fullkey) + (debug:print 2 *default-log-port* (format #f "~20a ~a\n" fullkey (hash-table-ref/default whatfound fullkey 0)))) + sections) + (debug:print 2 *default-log-port* "---") + (set! *already-seen-runconfig-info* #t))) + ;; finaldat ;; was returning this "finaldat" which would be good but conflicts with other uses + confdat + )) + +(define (set-run-config-vars run-id keyvals targ-from-db) + (push-directory *toppath*) ;; the push/pop doesn't appear to do anything ... + (let ((runconfigf (conc *toppath* "/runconfigs.config")) + (targ (or (common:args-get-target) + targ-from-db + (get-environment-variable "MT_TARGET")))) + (pop-directory) + (if (common:file-exists? runconfigf) + (setup-env-defaults runconfigf run-id #t keyvals + environ-patt: (conc "(default" + (if targ + (conc "|" targ ")") + ")"))) + (debug:print 0 *default-log-port* "WARNING: You do not have a run config file: " runconfigf)))) + +;; given (a (b c) d) return ((a b d)(a c d)) +;; NOTE: this feels like it has been done before - perhaps with items handling? +;; +(define (runconfig:combinations inlst) + (let loop ((hed (car inlst)) + (tal (cdr inlst)) + (res '())) + ;; (print "res: " res " hed: " hed) + (if (list? hed) + (let ((newres (if (null? res) ;; first time through convert incoming items to list of items + (map list hed) + (apply append + (map (lambda (r) ;; iterate over items in res + (map (lambda (h) ;; iterate over items in hed + (append r (list h))) + hed)) + res))))) + ;; (print "newres1: " newres) + (if (null? tal) + newres + (loop (car tal)(cdr tal) newres))) + (let ((newres (if (null? res) + (list (list hed)) + (map (lambda (r) + (append r (list hed))) + res)))) + ;; (print "newres2: " newres) + (if (null? tal) + newres + (loop (car tal)(cdr tal) newres)))))) + +;; multi-part expand +;; Given a/b,c,d/e,f return a/b/e a/b/f a/c/e a/c/f a/d/e a/d/f +;; +(define (runconfig:expand target) + (let* ((parts (map (lambda (x) + (string-split x ",")) + (string-split target "/")))) + (map (lambda (x) + (string-intersperse x "/")) + (runconfig:combinations parts)))) + +;; multi-target expansion +;; a/b/c/x,y,z a/b/d/x,y => a/b/c/x a/b/c/y a/b/c/z a/b/d/x a/b/d/y +;; +(define (runconfig:expand-target target-strs) + (delete-duplicates + (apply append (map runconfig:expand (string-split target-strs " "))))) + +#| + (if (null? target-strs) + '() + (let loop ((hed (car target-strs)) + (tal (cdr target-strs)) + (res '())) + ;; first break all parts into individual target patterns + (if (string-index hed " ") ;; this is a multi-target target + (let ((newres (append (string-split hed " ") res))) + (runconfig:expand-target newres)) + (if (string-index hed ",") ;; this is a multi-target where one or more parts are comma separated + +|# + +;; Spec for End of test +;; At end of each test call, after marking self as COMPLETED do run-state-status-rollup +;; At transition to run COMPLETED/X do hooks +;; Definition: test_dead if event_time + duration + 1 minute? < current_time AND +;; we can prove the process is not alive (ssh host pstree -A pid) +;; if dead safe to mark the test as killed in the db +;; State/status table +;; new +;; 100% COMPLETED/ (PASS,FAIL,ABORT etc.) ==> COMPLETED / X where X is same as itemized rollup +;; > 3 RUNNING with not test_dead do nothing (run should already be RUNNING/ na +;; > 0 RUNNING and test_dead then send KILLREQ ==> COMPLETED +;; 0 RUNNING ==> this is actually the first condition, should not get here +(define *last-rollup* 0) +(define (launch:end-of-run-check run-id ) + (let* ((not-completed-cnt (rmt:get-not-completed-cnt run-id)) + (running-cnt (rmt:get-count-tests-running-for-run-id run-id)) + (all-test-launched (rmt:get-var (conc "lunch-complete-" run-id))) + (current-state-status (rmt:get-run-state-status run-id)) + (current-state (car current-state-status)) ;; (rmt:get-run-state run-id)) + (current-status (cdr current-state-status))) ;; (rmt:get-run-status run-id))) + ;;get-vars run-id to query metadata table to check if all completed. if all-test-launched = yes then only not-completed-cnt = 0 means everyting is completed if no entry found in the table do nothing + (debug:print 0 *default-log-port* "Running test cnt :" running-cnt) + ;; + ;; TODO: add a final rollup when run is done (if there isn't one already) + ;; + (if (or (< running-cnt 3) ;; have only few running + (> (- (current-seconds) *last-rollup*) 10)) ;; or haven't rolled up in past ten seconds + (begin + (rmt:set-state-status-and-roll-up-run run-id current-state current-status) + (set! *last-rollup* (current-seconds)))) + (runs:update-junit-test-reporter-xml run-id) + (cond + ((and all-test-launched (eq? not-completed-cnt 0) (equal? all-test-launched "yes" )) + (if (and (equal? (rmt:get-var (conc "end-of-run-" run-id)) "no") (common:simple-lock (conc "endOfRun" run-id))) + (begin + (debug:print 4 *default-log-port* "look for post hook. currseconds: " (current-seconds) " EOR " (rmt:get-var (conc "end-of-run-" run-id))) + (debug:print 0 *default-log-port* "End of Run Detected.") + (rmt:set-var (conc "end-of-run-" run-id) "yes") + ;(thread-sleep! 10) + (runs:run-post-hook run-id) + (debug:print 4 *default-log-port* "currseconds: " (current-seconds)" eor: " (rmt:get-var (conc "end-of-run-" run-id))) + (common:simple-unlock (conc "endOfRun" run-id))) + (debug:print 0 *default-log-port* "End of Run Detected but not running post hook. This should happen when eor is set to yes. This will happen only when 2 tests exit at smae time. eor= " (rmt:get-var (conc "end-of-run-" run-id))))) + ((> running-cnt 3) + (debug:print 0 *default-log-port* "There are " running-cnt " tests running." )) + ((> running-cnt 0) + (debug:print 0 *default-log-port* "running cnt > 0 but <= 3 kill-running-tests-if-dead" ) + (let ((kill-cnt (launch:kill-tests-if-dead run-id))) + (if (and all-test-launched (equal? all-test-launched "yes") (eq? kill-cnt running-cnt)) + (launch:end-of-run-check run-id)))) ;;todo + (else (debug:print 0 *default-log-port* "Should it get here?? May be everything is not launched yet. Running test cnt:" running-cnt " Not completed test cnt:" not-completed-cnt) + (let* ((not-completed-tests (rmt:get-tests-for-run run-id "%" `("NOT_STARTED" "RUNNING" "LAUNCHED" "REMOTEHOSTSTART") `() #f #f #f #f #f #f #f #f))) + (if (> (length not-completed-tests) 0) + (let loop ((running-test (car not-completed-tests)) + (tal (cdr not-completed-tests))) + (let* ((test-name (vector-ref running-test 2)) + (item-path (vector-ref running-test 11))) + (debug:print 0 *default-log-port* "test " test-name "/" item-path " not completed") + (if (not (null? tal)) + (loop (car tal) (cdr tal))))))))))) + +(define (launch:kill-tests-if-dead run-id) + (let* ((running-tests (rmt:get-tests-for-run run-id "%" `("RUNNING" "LAUNCHED" "REMOTEHOSTSTART") `() #f #f #f #f #f #f #f #f))) + (let loop ((running-test (car running-tests)) + (tal (cdr running-tests)) + (kill-cnt 0)) + (let* ((test-name (vector-ref running-test 2)) + (item-path (vector-ref running-test 11)) + (test-id (vector-ref running-test 0)) + (host (vector-ref running-test 6)) + (pid (rmt:test-get-top-process-pid run-id test-id)) + (event-time (vector-ref running-test 5)) + (duration (vector-ref running-test 12)) + (flag 0) + (curr-time (current-seconds))) + (if (and (< (+ event-time duration 600) curr-time) (not (commonmod:is-test-alive host pid))) ;;test has not updated duration in last 10 min then likely its not running but confirm before marking it as killed + (begin + (debug:print 0 *default-log-port* "test " test-name "/" item-path " needs to be killed") + (set! flag 1) + (rmt:set-state-status-and-roll-up-items run-id test-name item-path "KILLREQ" "n/a" #f))) + (if (not (null? tal)) + (loop (car tal) (cdr tal) (+ kill-cnt flag)) + (+ kill-cnt flag)))))) + +;; 1. look though disks list for disk with most space +;; 2. create run dir on disk, path name is meaningful +;; 3. create link from run dir to megatest runs area +;; 4. remotely run the test on allocated host +;; - could be ssh to host from hosts table (update regularly with load) +;; - could be netbatch +;; (launch-test db (cadr status) test-conf)) +(define (launch-test test-id run-id run-info keyvals runname test-conf test-name test-path itemdat params) + (assert runname "FATAL: launch-test called with no runname") + (mutex-lock! *launch-setup-mutex*) ;; setting variables and processing the testconfig is NOT thread-safe, reuse the launch-setup mutex + (let* ( ;; (lock-key (conc "test-" test-id)) + ;; (got-lock (let loop ((lock (rmt:no-sync-get-lock lock-key)) + ;; (expire-time (+ (current-seconds) 15))) ;; give up on getting the lock and steal it after 15 seconds + ;; (if (car lock) + ;; #t + ;; (if (> (current-seconds) expire-time) + ;; (begin + ;; (debug:print-info 0 *default-log-port* "Timed out waiting for a lock to launch test " keyvals " " runname " " test-name " " test-path) + ;; (rmt:no-sync-del! lock-key) ;; destroy the lock + ;; (loop (rmt:no-sync-get-lock lock-key) expire-time)) ;; + ;; (begin + ;; (thread-sleep! 1) + ;; (loop (rmt:no-sync-get-lock lock-key) expire-time)))))) + (item-path (item-list->path itemdat)) + (contour #f)) ;; NOT READY FOR THIS (args:get-arg "-contour"))) + (let loop ((delta (- (current-seconds) *last-launch*)) + (launch-delay (configf:lookup-number *configdat* "setup" "launch-delay" default: 0))) + (if (> launch-delay delta) + (begin + ;; (if (common:low-noise-print 1200 "test launch delay") ;; every two hours or so remind the user about launch delay. + ;; (debug:print-info 0 *default-log-port* "NOTE: test launches are delayed by " launch-delay " seconds. See megatest.config launch-delay setting to adjust.")) ;; launch of " test-name " for " (- launch-delay delta) " seconds")) + (thread-sleep! (- launch-delay delta)) + (loop (- (current-seconds) *last-launch*) launch-delay)))) + (change-directory *toppath*) + (alist->env-vars ;; consolidate this code with the code in megatest.scm for "-execute", *maybe* - the longer they are set the longer each launch takes (must be non-overlapping with the vars) + (append + (list + (list "MT_RUN_AREA_HOME" *toppath*) + (list "MT_TEST_NAME" test-name) + (list "MT_RUNNAME" runname) + (list "MT_ITEMPATH" item-path) + (list "MT_CONTOUR" contour) + ) + itemdat)) + (let* ((tregistry (tests:get-all)) ;; third param (below) is system-allowed + ;; for tconfig, why do we allow fallback to test-conf? + (tconfig (or (tests:get-testconfig test-name item-path tregistry #t force-create: #t) + (begin + (debug:print 0 *default-log-port* "WARNING: falling back to pre-calculated testconfig. This is likely not desired.") + test-conf))) ;; force re-read now that all vars are set + (useshell (let ((ush (configf:lookup *configdat* "jobtools" "useshell"))) + (if ush + (if (equal? ush "no") ;; must use "no" to NOT use shell + #f + ush) + #t))) ;; default is yes + (runscript (configf:lookup tconfig "setup" "runscript")) + (ezsteps (> (length (hash-table-ref/default tconfig "ezsteps" '())) 0)) ;; don't send all the steps, could be big, just send a flag + (subrun (> (length (hash-table-ref/default tconfig "subrun" '())) 0)) ;; send a flag to process a subrun + ;; (diskspace (configf:lookup tconfig "requirements" "diskspace")) + ;; (memory (configf:lookup tconfig "requirements" "memory")) + ;; (hosts (configf:lookup *configdat* "jobtools" "workhosts")) ;; I'm pretty sure this was never completed + (remote-megatest (configf:lookup *configdat* "setup" "executable")) + (run-time-limit (or (configf:lookup tconfig "requirements" "runtimelim") + (configf:lookup *configdat* "setup" "runtimelim"))) + ;; FIXME SOMEDAY: not good how this is so obtuse, this hack is to + ;; allow running from dashboard. Extract the path + ;; from the called megatest and convert dashboard + ;; or dboard to megatest + (local-megatest (common:find-local-megatest)) + #;(local-megatest (let* ((lm (car (argv))) + (dir (pathname-directory lm)) + (exe (pathname-strip-directory lm))) + (conc (if dir (conc dir "/") "") + (case (string->symbol exe) + ((dboard) "../megatest") + ((mtest) "../megatest") + ((dashboard) "megatest") + (else exe))))) + (launcher (common:get-launcher *configdat* test-name item-path)) ;; (configf:lookup *configdat* "jobtools" "launcher")) + (test-sig (conc (common:get-testsuite-name) ":" test-name ":" item-path)) ;; (item-list->path itemdat))) ;; test-path is the full path including the item-path + (work-area #f) + (toptest-work-area #f) ;; for iterated tests the top test contains data relevant for all + (diskpath #f) + (cmdparms #f) + (fullcmd #f) ;; (define a (with-output-to-string (lambda ()(write x)))) + (mt-bindir-path #f) + (testinfo (rmt:get-test-info-by-id run-id test-id)) + (mt_target (string-intersperse (map cadr keyvals) "/")) + (debug-param (append (if (args:get-arg "-debug") (list "-debug" (args:get-arg "-debug")) '()) + (if (args:get-arg "-logging")(list "-logging") '()) + (if (configf:lookup *configdat* "misc" "profilesw") + (list (configf:lookup *configdat* "misc" "profilesw")) + '())))) + ;; (if hosts (set! hosts (string-split hosts))) + ;; set the megatest to be called on the remote host + (if (not remote-megatest)(set! remote-megatest local-megatest)) ;; "megatest")) + (set! mt-bindir-path (pathname-directory remote-megatest)) + (if launcher (set! launcher (string-split launcher))) + ;; set up the run work area for this test + (if (and (args:get-arg "-preclean") ;; user has requested to preclean for this run + (not (member (db:test-get-rundir testinfo)(list "n/a" "/tmp/badname")))) ;; n/a is a placeholder and thus not a read dir + (begin + (debug:print-info 0 *default-log-port* "attempting to preclean directory " (db:test-get-rundir testinfo) " for test " test-name "/" item-path) + (runs:remove-test-directory testinfo 'remove-data-only))) ;; remove data only, do not perturb the record + + ;; prevent overlapping actions - set to LAUNCHED as early as possible + ;; + ;; the following call handles waiver propogation. cannot yet condense into roll-up-pass-fail + (tests:test-set-status! run-id test-id "LAUNCHED" "n/a" #f #f) ;; (if launch-results launch-results "FAILED")) + (rmt:set-state-status-and-roll-up-items run-id test-name item-path #f "LAUNCHED" #f) + ;; (pp (hash-table->alist tconfig)) + (set! diskpath (get-best-disk *configdat* tconfig)) + (debug:print 2 *default-log-port* "best disk path = " diskpath) + (if diskpath + (let ((dat (create-work-area run-id run-info keyvals test-id test-path diskpath test-name itemdat))) + (set! work-area (car dat)) + (set! toptest-work-area (cadr dat)) + (debug:print-info 2 *default-log-port* "Using work area " work-area)) + (begin + (set! work-area (conc test-path "/tmp_run")) + (create-directory work-area #t) + (debug:print 0 *default-log-port* "WARNING: No disk work area specified - running in the test directory under tmp_run"))) + (set! cmdparms (base64:base64-encode + (z3:encode-buffer + (with-output-to-string + (lambda () ;; (list 'hosts hosts) + (write (list (list 'testpath test-path) + ;; (list 'transport (conc *transport-type*)) + ;; (list 'serverinf *server-info*) + #;(list 'homehost (let* ((hhdat (server:get-homehost))) + (if hhdat + (car hhdat) + #f))) + #;(list 'serverurl (if *runremote* ;; would like to add this back ... WORK NEEDED + (remote-server-url *runremote*) + #f)) ;; + (list 'areaname (common:get-testsuite-name)) + (list 'toppath *toppath*) + (list 'work-area work-area) + (list 'test-name test-name) + (list 'runscript runscript) + (list 'run-id run-id ) + (list 'test-id test-id ) + ;; (list 'item-path item-path ) + (list 'itemdat itemdat ) + (list 'megatest remote-megatest) + (list 'ezsteps ezsteps) + (list 'subrun subrun) + (list 'target mt_target) + (list 'contour contour) + (list 'runtlim (if run-time-limit (common:hms-string->seconds run-time-limit) #f)) + (list 'env-ovrd (hash-table-ref/default *configdat* "env-override" '())) + (list 'set-vars (if params (hash-table-ref/default params "-setvars" #f))) + (list 'runname runname) + (list 'mt-bindir-path mt-bindir-path)))))))) + + (setenv "MT_CMDINFO" cmdparms) ;; setting this for use in nblauncher + + ;; clean out step records from previous run if they exist + ;; (rmt:delete-test-step-records run-id test-id) + ;; if the dir does not exist we may have a itempath where individual variables are a path, launch anyway + (if (common:file-exists? work-area) + (change-directory work-area)) ;; so that log files from the launch process don't clutter the test dir + (cond + ;; ((and launcher hosts) ;; must be using ssh hostname + ;; (set! fullcmd (append launcher (car hosts)(list remote-megatest "-m" test-sig "-execute" cmdparms) debug-param))) + ;; (set! fullcmd (append launcher (car hosts)(list remote-megatest test-sig "-execute" cmdparms)))) + (launcher + (set! fullcmd (append launcher (list remote-megatest "-m" test-sig "-execute" cmdparms) debug-param))) + ;; (set! fullcmd (append launcher (list remote-megatest test-sig "-execute" cmdparms)))) + (else + (if (not useshell)(debug:print 0 *default-log-port* "WARNING: internal launching will not work well without \"useshell yes\" in your [jobtools] section")) + (set! fullcmd (append (list remote-megatest "-m" test-sig "-execute" cmdparms) debug-param (list (if useshell "&" "")))))) + ;; (set! fullcmd (list remote-megatest test-sig "-execute" cmdparms (if useshell "&" ""))))) + (if (args:get-arg "-xterm")(set! fullcmd (append fullcmd (list "-xterm")))) + (debug:print 1 *default-log-port* "Launching " work-area) + ;; set pre-launch-env-vars before launching, keep the vars in prevvals and put the envionment back when done + (debug:print 4 *default-log-port* "fullcmd: " fullcmd) + (set! *last-launch* (current-seconds)) ;; all that junk above takes time, set this as late as possible. + (let* ((commonprevvals (alist->env-vars + (hash-table-ref/default *configdat* "env-override" '()))) + (miscprevvals (alist->env-vars ;; consolidate this code with the code in megatest.scm for "-execute" + (append (list (list "MT_TEST_RUN_DIR" work-area) + (list "MT_TEST_NAME" test-name) + (list "MT_ITEM_INFO" (conc itemdat)) + (list "MT_RUNNAME" runname) + (list "MT_TARGET" mt_target) + (list "MT_ITEMPATH" item-path) + ) + itemdat))) + (testprevvals (alist->env-vars + (hash-table-ref/default tconfig "pre-launch-env-overrides" '()))) + ;; Launchwait defaults to true, must override it to turn off wait + (launchwait (if (equal? (configf:lookup *configdat* "setup" "launchwait") "no") #f #t)) + (launch-results-prev (apply (if launchwait ;; BB: TODO: refactor this to examine return code of launcher, if nonzero, set state to launch failed. + process:cmd-run-with-stderr-and-exitcode->list + process-run) + (if useshell + (let ((cmdstr (string-intersperse fullcmd " "))) + (if launchwait + cmdstr + (conc cmdstr " >> mt_launch.log 2>&1 &"))) + (car fullcmd)) + (if useshell + '() + (cdr fullcmd)))) + (success (if launchwait (equal? 0 (cadr launch-results-prev)) #t)) + (launch-results (if launchwait (car launch-results-prev) launch-results-prev))) + (if (not success) + (tests:test-set-status! run-id test-id "COMPLETED" "DEAD" "launcher failed; exited non-zero; check mt_launch.log" #f)) ;; (if launch-results launch-results "FAILED")) + (mutex-unlock! *launch-setup-mutex*) ;; yes, really should mutex all the way to here. Need to put this entire process into a fork. + ;; (rmt:no-sync-del! lock-key) ;; release the lock for starting this test + (if (not launchwait) ;; give the OS a little time to allow the process to start + (thread-sleep! 0.01)) + (with-output-to-file "mt_launch.log" + (lambda () + (print "LAUNCHCMD: " (string-intersperse fullcmd " ")) + (if (list? launch-results) + (apply print launch-results) + (print "NOTE: launched \"" fullcmd "\"\n but did not wait for it to proceed. Add the following to megatest.config \n[setup]\nlaunchwait yes\n if you have problems with this")) + #:append)) + (debug:print 2 *default-log-port* "Launching completed, updating db") + (debug:print 2 *default-log-port* "Launch results: " launch-results) + (if (not launch-results) + (begin + (debug:print 0 *default-log-port* "ERROR: Failed to run " (string-intersperse fullcmd " ") ", exiting now") + ;; (sqlite3:finalize! db) + ;; good ole "exit" seems not to work + ;; (_exit 9) + ;; but this hack will work! Thanks go to Alan Post of the Chicken email list + ;; NB// Is this still needed? Should be safe to go back to "exit" now? + (process-signal (current-process-id) signal/kill) + )) + (alist->env-vars miscprevvals) + (alist->env-vars testprevvals) + (alist->env-vars commonprevvals) + launch-results)) + (change-directory *toppath*) + (thread-sleep! (configf:lookup-number *configdat* "setup" "inter-test-delay" default: 0.0)))) + + +;; gather available information, if legit read configs in this order: +;; +;; if have cache; +;; read it a return it +;; else +;; megatest.config (do not cache) +;; runconfigs.config (cache if all vars avail) +;; megatest.config (cache if all vars avail) +;; returns: +;; *toppath* +;; side effects: +;; sets; *configdat* (megatest.config info) +;; *runconfigdat* (runconfigs.config info) +;; *configstatus* (status of the read data) +;; +(define (launch:setup #!key (force-reread #f) (areapath #f)) + (mutex-lock! *launch-setup-mutex*) + ;; this stops the train quickly for new processes + (if (and *toppath* + (file-exists? (conc *toppath*"/stop-the-train"))) + (begin + (debug:print 0 *default-log-port* "ERROR: found file "*toppath*"/stop-the-train, exiting immediately") + (exit 1))) + (if (and *toppath* + (eq? *configstatus* 'fulldata) (not force-reread)) ;; got it all + (begin + (debug:print 2 *default-log-port* "NOTE: skipping launch:setup-body call since we have fulldata") + (mutex-unlock! *launch-setup-mutex*) + *toppath*) + (let ((res (launch:setup-body force-reread: force-reread areapath: areapath))) + (mutex-unlock! *launch-setup-mutex*) + res))) + +;; cache the runconfigs in $MT_LINKTREE/$MT_TARGET/$MT_RUNNAME/.runconfig +;; +(define (full-runconfigs-read) +;; in the envprocessing branch the below code replaces the further below code +;; (if (eq? *configstatus* 'fulldata) +;; *runconfigdat* +;; (begin +;; (launch:setup) +;; *runconfigdat*))) + + (let* ((rundir (if (and (getenv "MT_LINKTREE")(getenv "MT_TARGET")(getenv "MT_RUNNAME")) + (conc (getenv "MT_LINKTREE") "/" (getenv "MT_TARGET") "/" (getenv "MT_RUNNAME")) + #f)) + (cfgf (if rundir (conc rundir "/.runconfig." megatest-version "-" megatest-fossil-hash) #f))) + (if (and cfgf + (common:file-exists? cfgf) + (file-write-access? cfgf) + (common:use-cache?)) + (configf:read-alist cfgf) + (let* ((keys (rmt:get-keys)) + (target (common:args-get-target)) + (key-vals (if target (keys:target->keyval keys target) #f)) + (sections (if target (list "default" target) #f)) + (data (begin + (setenv "MT_RUN_AREA_HOME" *toppath*) + (if key-vals + (for-each (lambda (kt) + (setenv (car kt) (cadr kt))) + key-vals)) + ;; (read-config (conc *toppath* "/runconfigs.config") #f #t sections: sections)))) + (runconfig:read (conc *toppath* "/runconfigs.config") target #f)))) + (if (and rundir ;; have all needed variabless + (directory-exists? rundir) + (file-write-access? rundir)) + (begin + (if (not (common:in-running-test?)) + (configf:write-alist data cfgf)) + ;; force re-read of megatest.config - this resolves circular references between megatest.config + (launch:setup force-reread: #t) + ;; (launch:cache-config) ;; there are two independent config cache locations, turning this one off for now. MRW. + )) ;; we can safely cache megatest.config since we have a valid runconfig + data)))) + + +(define (get-best-disk confdat testconfig) + (let* ((disks (or (and testconfig (hash-table-ref/default testconfig "disks" #f)) + (hash-table-ref/default confdat "disks" #f))) + (minspace (let ((m (configf:lookup confdat "setup" "minspace"))) + (string->number (or m "10000"))))) + (if disks + (let ((res (common:get-disk-with-most-free-space disks minspace))) + (if res + (cdr res) + ;; else if no valid disks... + (begin + (debug:print 0 *default-log-port* "WARNING: No valid disks or no disk with enough space found from " disks) + (if (null? disks) + (cons 1 (conc *toppath* "/runs")) + + ;; else try to create the directories anyway. + (let ((paths (sort disks (lambda (x y) (> (string-length (cadr x)) (string-length (cadr y))))))) + (let loop ((head (car paths)) (tail (cdr paths))) + (let ((result (handle-exceptions exn + (begin + (debug:print 0 *default-log-port* "failed to create dir " (cadr head) ", exn=" exn) + #f) + (create-directory (cadr head) #t)))) + (if result + result + (if (null? tail) + (begin + (debug:print 0 *default-log-port* "Using toppath/runs") + (conc *toppath* "/runs") + ) + (loop (car tail) (cdr tail)))))) + ) + ) ;; if null? disks + ) ;; if not res + ) + ) + ;; no disks definition - use toppath/runs, fall back to currdir/runs + (let* ((toppath (or *toppath* + (common:get-toppath *toppath*) + (begin + (debug:print-error 0 *default-log-port* "Creating runs dir in current directory, this is probably not what you wanted. Please check your setup.") + (current-directory)))) + (runsdir (conc toppath "/runs"))) + (if (not (file-exists? runsdir))(create-directory runsdir)) + runsdir) + ))) ;; the code creates the necessary directories if it does not exist and returns the path. + +;; Desired directory structure: +;; +;; - - -. +;; | +;; v +;; - - -|- +;; +;; dir stored in test is: +;; +;; - - [ - ] +;; +;; All log file links should be stored relative to the top of link path +;; +;; - [ - ] +;; +(define (create-work-area run-id run-info keyvals test-id test-src-path disk-path testname itemdat #!key (remtries 2)) + (let* ((item-path (if (string? itemdat) itemdat (item-list->path itemdat))) ;; if pass in string - just use it + (runname (if (string? run-info) ;; if we pass in a string as run-info use it as run-name. + run-info + (db:get-value-by-header (db:get-rows run-info) + (db:get-header run-info) + "runname"))) + (contour #f) ;; NOT READY FOR THIS (args:get-arg "-contour")) + ;; convert back to db: from rdb: - this is always run at server end + (target (string-intersperse (map cadr keyvals) "/")) + + (not-iterated (equal? "" item-path)) + + ;; all tests are found at /test-base or /test-base + (testtop-base (conc target "/" runname "/" testname)) + (test-base (conc testtop-base (if not-iterated "" "/") item-path)) + + ;; nb// if itempath is not "" then it is prefixed with "/" + (toptest-path (conc disk-path (if contour (conc "/" contour) "") "/" testtop-base)) + (test-path (conc disk-path (if contour (conc "/" contour) "") "/" test-base)) + + ;; ensure this exists first as links to subtests must be created there + (linktree (common:get-linktree)) + ;; WAS: (let ((rd (configf:lookup *configdat* "setup" "linktree"))) + ;; (if rd rd (conc *toppath* "/runs")))) + ;; which seems wrong ... + + (lnkbase (conc linktree (if contour (conc "/" contour) "") "/" target "/" runname)) + (lnkpath (conc lnkbase "/" testname)) + (lnkpathf (conc lnkpath (if not-iterated "" "/") item-path)) + (lnktarget (conc lnkpath "/" item-path))) + + ;; Update the rundir path in the test record for all, rundir=physical, shortdir=logical + ;; rundir shortdir + (rmt:general-call 'test-set-rundir-shortdir run-id lnkpathf test-path testname item-path run-id) + + (debug:print 2 *default-log-port* "INFO:\n lnkbase=" lnkbase "\n lnkpath=" lnkpath "\n toptest-path=" toptest-path "\n test-path=" test-path) + (if (not (common:file-exists? linktree)) + (begin + (debug:print 0 *default-log-port* "WARNING: linktree did not exist! Creating it now at " linktree) + (create-directory linktree #t))) ;; (system (conc "mkdir -p " linktree)))) + ;; create the directory for the tests dir links, this is needed no matter what... try up to three times + (let loop ((done 3)) + (let ((success (if (and (not (common:directory-exists? lnkbase)) + (not (common:file-exists? lnkbase))) + (handle-exceptions + exn + (begin + (debug:print-error 0 *default-log-port* "Problem creating linktree base at " lnkbase ", exn=" exn) + (print-error-message exn (current-error-port)) + #t) + (create-directory lnkbase #t) + #f)))) + (if (and (not success)(> done 0)) + (loop (- done 1))))) + + ;; update the toptest record with its location rundir, cache the path + ;; This wass highly inefficient, one db write for every subtest, potentially + ;; thousands of unnecessary updates, cache the fact it was set and don't set it + ;; again. + + ;; Now create the link from the test path to the link tree, however + ;; if the test is iterated it is necessary to create the parent path + ;; to the iteration. use pathname-directory to trim the path by one + ;; level + (if (not not-iterated) ;; i.e. iterated + (let ((iterated-parent (pathname-directory (conc lnkpath "/" item-path)))) + (debug:print-info 2 *default-log-port* "Creating iterated parent " iterated-parent) + (handle-exceptions + exn + (begin + (debug:print-error 0 *default-log-port* " Failed to create directory " iterated-parent ((condition-property-accessor 'exn 'message) exn) + ", continuing but link tree may be corrupted, exn=" exn) + #;(exit 1)) + (create-directory iterated-parent #t)))) + + (if (symbolic-link? lnkpath) + (handle-exceptions + exn + (begin + (debug:print-error 0 *default-log-port* " Failed to remove symlink " lnkpath ((condition-property-accessor 'exn 'message) exn) + ", continuing but link tree may be corrupted. exn=" exn) + #;(exit 1)) + (delete-file lnkpath))) + + (if (not (or (common:file-exists? lnkpath) + (symbolic-link? lnkpath))) + (handle-exceptions + exn + (begin + (debug:print-error 0 *default-log-port* " Failed to create symlink " lnkpath ((condition-property-accessor 'exn 'message) exn) + ", continuing but link tree may be corrupted. exn=" exn) + #;(exit 1)) + (create-symbolic-link toptest-path lnkpath))) + + ;; NB - This was not working right - some top tests are not getting the path set!!! + ;; + ;; Do the setting of this record after the paths are created so that the shortdir can + ;; be set to the real directory location. This is safer for future clean up if the link + ;; tree is damaged or lost. + ;; + (if (not (hash-table-ref/default *toptest-paths* testname #f)) + (let* ((testinfo (rmt:get-test-info-by-id run-id test-id)) ;; run-id testname item-path)) + (curr-test-path (if testinfo ;; (filedb:get-path *fdb* + ;; (db:get-path dbstruct + ;; (rmt:sdb-qry 'getstr + (db:test-get-rundir testinfo) ;; ) ;; ) + #f))) + (hash-table-set! *toptest-paths* testname curr-test-path) + ;; NB// Was this for the test or for the parent in an iterated test? + (rmt:general-call 'test-set-rundir-shortdir run-id lnkpath + (if (common:file-exists? lnkpath) + ;; (resolve-pathname lnkpath) + (common:nice-path lnkpath) + lnkpath) + testname "" run-id) + ;; (rmt:general-call 'test-set-rundir run-id lnkpath testname "") ;; toptest-path) + (if (or (not curr-test-path) + (not (directory-exists? toptest-path))) + (begin + (debug:print-info 2 *default-log-port* "Creating " toptest-path " and link " lnkpath) + (handle-exceptions + exn + (if (directory-exists? toptest-path) ;; it was likely created in parallel + #t + (begin + (debug:print-info 0 *default-log-port* "failed to create directory " toptest-path ", exn=" exn) + #f)) + (create-directory toptest-path #t)) + (hash-table-set! *toptest-paths* testname toptest-path))))) + + ;; The toptest path has been created, the link to the test in the linktree has + ;; been created. Now, if this is an iterated test the real test dir must be created + (if (not not-iterated) ;; this is an iterated test + (begin ;; (let ((lnktarget (conc lnkpath "/" item-path))) + (debug:print 2 *default-log-port* "Setting up sub test run area") + (debug:print 2 *default-log-port* " - creating run area in " test-path) + (handle-exceptions + exn + (if (directory-exists? test-path) + #t + (begin + (debug:print-error 0 *default-log-port* " Failed to create directory " test-path ((condition-property-accessor 'exn 'message) exn) + ", exiting, exn=" exn) + (exit 1))) + (create-directory test-path #t)) + (debug:print 2 *default-log-port* + " - creating link from: " test-path "\n" + " to: " lnktarget) + + ;; If there is already a symlink delete it and recreate it. + (handle-exceptions + exn + (begin + (debug:print-error 0 *default-log-port* " Failed to re-create link " lnktarget ((condition-property-accessor 'exn 'message) exn) ", exiting, exn=" exn) + (exit)) + (if (symbolic-link? lnktarget) (delete-file lnktarget)) + (if (not (common:file-exists? lnktarget)) (create-symbolic-link test-path lnktarget))))) + + (if (not (directory? test-path)) + (create-directory test-path #t)) ;; this is a hack, I don't know why out of the blue this path does not exist sometimes + + (if (and test-src-path (directory? test-path)) + (begin + (launch:test-copy test-src-path test-path) + (list lnkpathf lnkpath )) + (if (and test-src-path (> remtries 0)) + (begin + (debug:print-error 0 *default-log-port* "Failed to create work area at " test-path " with link at " lnktarget ", remaining attempts " remtries) + ;; + (create-work-area run-id run-info keyvals test-id test-src-path disk-path testname itemdat remtries: (- remtries 1))) + (list #f #f))))) + +(define (launch:setup-body #!key (force-reread #f) (areapath #f)) + (if (and (eq? *configstatus* 'fulldata) + *toppath* + (not force-reread)) ;; no need to reprocess + *toppath* ;; return toppath + (let* ((use-cache (common:use-cache?)) ;; BB- use-cache checks *configdat* for use-cache setting. We do not have *configdat*. Bootstrapping problem here. + (toppath (common:get-toppath areapath)) + (target (common:args-get-target)) + (sections (if target (list "default" target) #f)) ;; for runconfigs + (mtconfig (or (args:get-arg "-config") "megatest.config")) ;; allow overriding megatest.config + (cachefiles (launch:get-cache-file-paths areapath toppath target mtconfig)) + ;; checking for null cachefiles should not be necessary, I was seeing error car of '(), might be a chicken bug or a red herring ... + (mtcachef (if (null? cachefiles) + #f + (car cachefiles))) ;; (and cachedir (conc cachedir "/" ".megatest.cfg-" megatest-version "-" megatest-fossil-hash))) + (rccachef (if (null? cachefiles) + #f + (cdr cachefiles)))) ;; (and cachedir (conc cachedir "/" ".runconfigs.cfg-" megatest-version "-" megatest-fossil-hash))) + ;; (cancreate (and cachedir (common:file-exists? cachedir)(file-write-access? cachedir) (not (common:in-running-test?))))) + (set! *toppath* toppath) ;; This is needed when we are running as a test using CMDINFO as a datasource + ;;(BB> "launch:setup-body -- cachefiles="cachefiles) + (cond + ;; if mtcachef exists just read it, however we need to assume toppath is available in $MT_RUN_AREA_HOME + ((and (not force-reread) + mtcachef rccachef + use-cache + (get-environment-variable "MT_RUN_AREA_HOME") + (common:file-exists? mtcachef) + (common:file-exists? rccachef)) + ;;(BB> "launch:setup-body -- cond branch 1 - use-cache") + (set! *configdat* (configf:read-alist mtcachef)) + ;;(BB> "launch:setup-body -- 1 set! *configdat*="*configdat*) + (set! *runconfigdat* (configf:read-alist rccachef)) + (set! *configinfo* (list *configdat* (get-environment-variable "MT_RUN_AREA_HOME"))) + (set! *configstatus* 'fulldata) + (set! *toppath* (get-environment-variable "MT_RUN_AREA_HOME")) + *toppath*) + ;; there are no existing cached configs, do full reads of the configs and cache them + ;; we have all the info needed to fully process runconfigs and megatest.config + ((and ;; (not force-reread) ;; force-reread is irrelevant in the AND, could however OR it? + mtcachef + rccachef) ;; BB- why are we doing this without asking if caching is desired? + ;;(BB> "launch:setup-body -- cond branch 2") + (let* ((first-pass (find-and-read-config ;; NB// sets MT_RUN_AREA_HOME as side effect + mtconfig + environ-patt: "env-override" + given-toppath: toppath + pathenvvar: "MT_RUN_AREA_HOME")) + (first-rundat (let ((toppath (if toppath + toppath + (car first-pass)))) + (read-config ;; (conc toppath "/runconfigs.config") ;; this should be converted to runconfig:read but it is non-trivial, leaving it for now. + (conc (if (string? toppath) + toppath + (get-environment-variable "MT_RUN_AREA_HOME")) + "/runconfigs.config") + *runconfigdat* #t + sections: sections)))) + (set! *runconfigdat* first-rundat) + (if first-pass ;; + (begin + ;;(BB> "launch:setup-body -- \"first-pass\"=first-pass") + (set! *configdat* (car first-pass)) + ;;(BB> "launch:setup-body -- 2 set! *configdat*="*configdat*) + (set! *configinfo* first-pass) + (set! *toppath* (or toppath (cadr first-pass))) ;; use the gathered data unless already have it + (set! toppath *toppath*) + (if (not *toppath*) + (begin + (debug:print-error 0 *default-log-port* "you are not in a megatest area!") + (exit 1))) + (setenv "MT_RUN_AREA_HOME" *toppath*) + ;; the seed read is done, now read runconfigs, cache it then read megatest.config one more time and cache it + (let* ((keys (common:list-or-null (rmt:get-keys) + message: "Failed to retrieve keys in launch.scm. Please report this to the developers.")) + (key-vals (keys:target->keyval keys target)) + (linktree (common:get-linktree)) ;; (or (getenv "MT_LINKTREE")(if *configdat* (configf:lookup *configdat* "setup" "linktree") #f))) + ; (if *configdat* + ; (configf:lookup *configdat* "setup" "linktree") + ; (conc *toppath* "/lt")))) + (second-pass (find-and-read-config + mtconfig + environ-patt: "env-override" + given-toppath: toppath + pathenvvar: "MT_RUN_AREA_HOME")) + (runconfigdat (begin ;; this read of the runconfigs will see any adjustments made by re-reading megatest.config + (for-each (lambda (kt) + (setenv (car kt) (cadr kt))) + key-vals) + (read-config (conc toppath "/runconfigs.config") *runconfigdat* #t ;; consider using runconfig:read some day ... + sections: sections))) + (cachefiles (launch:get-cache-file-paths areapath toppath target mtconfig)) + (mtcachef (car cachefiles)) + (rccachef (cdr cachefiles))) + ;; trap exception due to stale NFS handle -- Error: (open-output-file) cannot open file - Stale NFS file handle: "/p/fdk/gwa/lefkowit/mtTesting/qa/primbeqa/links/p1222/11/PDK_r1.1.1/prim/clean/pcell_testgen/.runconfigs.cfg-1.6427-7d1e789cb3f62f9cde719a4865bb51b3c17ea853" - ticket 220546342 + ;; TODO - consider 1) using simple-lock to bracket cache write + ;; 2) cache in hash on server, since need to do rmt: anyway to lock. + + (if rccachef + (common:fail-safe + (lambda () + (configf:write-alist runconfigdat rccachef)) + (conc "Could not write cache file - "rccachef))) + (if mtcachef + (common:fail-safe + (lambda () + (configf:write-alist *configdat* mtcachef)) + (conc "Could not write cache file - "mtcachef))) + (set! *runconfigdat* runconfigdat) + (if (and rccachef mtcachef) (set! *configstatus* 'fulldata)))) + ;; no configs found? should not happen but let's try to recover gracefully, return an empty hash-table + (set! *configdat* (make-hash-table)) + ))) + + ;; else read what you can and set the flag accordingly + ;; here we don't have either mtconfig or rccachef + (else + ;;(BB> "launch:setup-body -- cond branch 3 - else") + (let* ((cfgdat (find-and-read-config + (or (args:get-arg "-config") "megatest.config") + environ-patt: "env-override" + given-toppath: (get-environment-variable "MT_RUN_AREA_HOME") + pathenvvar: "MT_RUN_AREA_HOME"))) + + (if (and cfgdat (list? cfgdat) (> (length cfgdat) 0) (hash-table? (car cfgdat))) + (let* ((toppath (or (get-environment-variable "MT_RUN_AREA_HOME")(cadr cfgdat))) + (rdat (read-config (conc toppath ;; convert this to use runconfig:read! + "/runconfigs.config") *runconfigdat* #t sections: sections))) + (set! *configinfo* cfgdat) + (set! *configdat* (car cfgdat)) + (set! *runconfigdat* rdat) + (set! *toppath* toppath) + (set! *configstatus* 'partial)) + (begin + (debug:print-error 0 *default-log-port* "No " mtconfig " file found. Giving up.") + (exit 2)))))) + ;; COND ends here. + + ;; additional house keeping + (let* ((linktree (or (common:get-linktree) + (conc *toppath* "/lt")))) + (if linktree + (begin + (if (not (common:file-exists? linktree)) + (begin + (handle-exceptions + exn + (begin + (debug:print-error 0 *default-log-port* "Something went wrong when trying to create linktree dir at " linktree) + (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn) + (exit 1)) + (create-directory linktree #t)))) + (handle-exceptions + exn + (begin + (debug:print-error 0 *default-log-port* "Something went wrong when trying to create link to linktree at " *toppath*) + (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn)) + (let ((tlink (conc *toppath* "/lt"))) + (if (not (common:file-exists? tlink)) + (create-symbolic-link linktree tlink))))) + (begin + (debug:print-error 0 *default-log-port* "linktree not defined in [setup] section of megatest.config") + ))) + (if (and *toppath* + (directory-exists? *toppath*)) + (begin + (setenv "MT_RUN_AREA_HOME" *toppath*) + (setenv "MT_TESTSUITENAME" (common:get-testsuite-name))) + (begin + (debug:print-error 0 *default-log-port* "failed to find the top path to your Megatest area.") + (set! *toppath* #f) ;; force it to be false so we return #f + #f)) + + ;; needed by various transport and db modules + (dbfile:testsuite-name (common:get-testsuite-name)) ;; (get-testsuite-name *toppath* *configdat*)) + + ;; one more attempt to cache the configs for future reading + (let* ((cachefiles (launch:get-cache-file-paths areapath toppath target mtconfig)) + (mtcachef (car cachefiles)) + (rccachef (cdr cachefiles))) + + ;; trap exception due to stale NFS handle -- Error: (open-output-file) cannot open file - Stale NFS file handle: "...somepath.../.runconfigs.cfg-1.6427-7d1e789cb3f62f9cde719a4865bb51b3c17ea853" - ticket 220546342 + ;; TODO - consider 1) using simple-lock to bracket cache write + ;; 2) cache in hash on server, since need to do rmt: anyway to lock. + (if (and rccachef *runconfigdat* (not (common:file-exists? rccachef))) + (common:fail-safe + (lambda () + (configf:write-alist *runconfigdat* rccachef)) + (conc "Could not write cache file - "rccachef)) + ) + (if (and mtcachef *configdat* (not (common:file-exists? mtcachef))) + (common:fail-safe + (lambda () + (configf:write-alist *configdat* mtcachef)) + (conc "Could not write cache file - "mtcachef)) + ) + (if (and rccachef mtcachef *runconfigdat* *configdat*) + (set! *configstatus* 'fulldata))) + + ;; if have -append-config then read and append here + (let ((cfname (args:get-arg "-append-config"))) + (if (and cfname + (file-read-access? cfname)) + (read-config cfname *configdat* #t))) ;; values are added to the hash, no need to do anything special. + + ;; have config at this time, this is a good place to set params based on config file settings + (let* ((dbmode (configf:lookup *configdat* "setup" "dbcache-mode")) + (syncmode (configf:lookup *configdat* "setup" "sync-mode")) + (srvdebug (configf:lookup *configdat* "server" "debug-parameter"))) + (if dbmode + (begin + (debug:print-info 0 *default-log-port* "Overriding dbmode to "dbmode) + (dbcache-mode (string->symbol dbmode)))) + (if syncmode + (begin + (debug:print-info 0 *default-log-port* "Overriding syncmode to "syncmode) + (dbfile:sync-method (string->symbol syncmode)))) + (if srvdebug + (begin + (debug:print-info 0 *default-log-port* "Overriding server debug parameter to "srvdebug) + (tt-server-profile-string srvdebug))) + ) + + *toppath*))) + + +(define (launch:test-copy test-src-path test-path) + (let* ((ovrcmd (let ((cmd (configf:lookup *configdat* "setup" "testcopycmd"))) + (if cmd + ;; substitute the TEST_SRC_PATH and TEST_TARG_PATH + (string-substitute "TEST_TARG_PATH" test-path + (string-substitute "TEST_SRC_PATH" test-src-path cmd #t) #t) + #f))) + (cmd (if ovrcmd + ovrcmd + (conc "rsync -av" (if (debug:debug-mode 1) "" "q") " " test-src-path "/ " test-path "/" + " >> " test-path "/mt_launch.log 2>> " test-path "/mt_launch.log"))) + (status (system cmd))) + (if (not (eq? status 0)) + (debug:print 2 *default-log-port* "ERROR: problem with running \"" cmd "\"")))) + +;; return paths depending on what info is available. +;; +(define (launch:get-cache-file-paths areapath toppath target mtconfig) + (let* ((use-cache (common:use-cache?)) + (runname (common:args-get-runname)) + (linktree (common:get-linktree)) + (testname (common:get-full-test-name)) + (rundir (if (and runname target linktree) + (common:directory-writable? (conc linktree "/" target "/" runname)) + #f)) + (testdir (if (and rundir testname) + (common:directory-writable? (conc rundir "/" testname)) + #f)) + (cachedir (or testdir rundir)) + (mtcachef (and cachedir (conc cachedir "/" ".megatest.cfg-" megatest-version "-" megatest-fossil-hash))) + (rccachef (and cachedir (conc cachedir "/" ".runconfigs.cfg-" megatest-version "-" megatest-fossil-hash)))) + (debug:print-info 6 *default-log-port* + "runname=" runname + "\n linktree=" linktree + "\n testname=" testname + "\n rundir=" rundir + "\n testdir=" testdir + "\n cachedir=" cachedir + "\n mtcachef=" mtcachef + "\n rccachef=" rccachef) + (cons mtcachef rccachef))) + +(define (archive:bup-restore archive-command run-id run-name tests rp-mutex bup-mutex) ;; move the getting of archive space down into the below block so that a single run can + ;; allocate as needed should a disk fill up + ;; + (let* ((bup-exe (or (configf:lookup *configdat* "archive" "bup") "bup")) + (linktree (common:get-linktree))) ;; (configf:lookup *configdat* "setup" "linktree"))) + + ;; from the test info bin the path to the test by stem + ;; + (for-each + (lambda (test-dat) + ;; When restoring test-dat will initially contain an old and invalid path to the test + (let* ((best-disk (get-best-disk *configdat* #f)) ;; BUG: get the testconfig and use it here. Otherwise data pulled out of archive could end up on the wrong kind of disk. + (item-path (db:test-get-item-path test-dat)) + (test-name (db:test-get-testname test-dat)) + (test-id (db:test-get-id test-dat)) + (run-id (db:test-get-run_id test-dat)) + (keyvals (rmt:get-key-val-pairs run-id)) + (target (string-intersperse (map cadr keyvals) "/")) + + (toplevel/children (and (db:test-get-is-toplevel test-dat) + (> (rmt:test-toplevel-num-items run-id test-name) 0))) + (test-partial-path (conc run-name "/" (db:test-make-full-name test-name item-path))) + ;; note the trailing slash to get the dir inspite of it being a link + (test-path (conc linktree "/" test-partial-path)) + ;; if the old path was not deleted then prev-test-physical-path will end up pointing to a real directory + (mutex-lock! rp-mutex) + (prev-test-physical-path (if (common:file-exists? test-path) + ;; (read-symbolic-link test-path #t) + (common:real-path test-path) + #f)) + (mutex-unlock! rp-mutex) + (new-test-physical-path (conc best-disk "/" test-partial-path)) + (archive-block-id (db:test-get-archived test-dat)) + (test-last-update (db:test-get-last_update test-dat)) + (archive-block-info (rmt:test-get-archive-block-info archive-block-id)) + (archive-path (if (vector? archive-block-info) + (vector-ref archive-block-info 2) ;; look in db.scm for test-get-archive-block-info for the vector record info + #f)) ;; no archive found? + (archive-timestamp-dir (if archive-path (archive:get-timestamp-dir bup-exe archive-path (common:get-testsuite-name) (string-substitute "/" "-" target " ") test-partial-path test-last-update) #f)) + (archive-internal-path (conc (common:get-testsuite-name) "-" (string-substitute "/" "-" target " ") "/" archive-timestamp-dir "/" test-partial-path)) + (include-paths (args:get-arg "-include")) + (exclude-pattern (args:get-arg "-exclude-rx")) + (exclude-file (args:get-arg "-exclude-rx-from"))) + (if (not archive-timestamp-dir) + (debug:print-error 0 *default-log-port* "Archive not found for testsuite" (common:get-testsuite-name) " run/test/itempath" test-partial-path) + (begin + ;; some sanity checks, move an existing path out of the way - iif it is not a toplevel with children + (debug:print-info 0 *default-log-port* "Archive time: " archive-timestamp-dir) + (if (and (not toplevel/children) ;; special handling needed for toplevel with children + prev-test-physical-path + (common:file-exists? prev-test-physical-path)) ;; what to do? abort or clean up or link it in? + (let* ((base (pathname-directory prev-test-physical-path)) + (dirn (pathname-file prev-test-physical-path)) + (newn (conc base "/." dirn))) + (debug:print-error 0 *default-log-port* "the old directory " prev-test-physical-path ", still exists! Moving it to " newn) + (rename-file prev-test-physical-path newn))) + + (if (and archive-path ;; no point in proceeding if there is no actual archive + (not toplevel/children)) + (begin + ;; CREATE WORK AREA + ;; test-src-path == #f ==> don't copy in data from tests directory + ;; itemdat == string ==> use directly + (create-work-area run-id run-name keyvals test-id #f best-disk test-name item-path) ;; #!key (remtries 2)) + ;; 1. Get the block id from the test info + ;; 2. Get the block data given the block id + ;; 3. Construct the paths etc. for the following command: + ;; bup -d /tmp/matt/adisk1/2015_q1/fullrun_e1a40/ restore -C /tmp/seeme fullrun-30/latest/ubuntu/nfs/none/w02.1.20.54_b/ + ;; DO BUP RESTORE + (let* ((new-test-dat (rmt:get-test-info-by-id run-id test-id)) + (new-test-path (if (vector? new-test-dat ) + (db:test-get-rundir new-test-dat) + (begin + (debug:print-error 0 *default-log-port* "unable to get data for run-id=" run-id ", test-id=" test-id) + (exit 1)))) + ;; new-test-path won't work - must use best-disk instead? Nope, new-test-path but tack on /.. + (bup-restore-params (list "-d" archive-path "restore" "-C" (conc new-test-path "/..") archive-internal-path))) + (debug:print-info 0 *default-log-port* "Restoring archived data to " new-test-physical-path " from archive in " archive-path " ... " archive-internal-path) + (debug:print-info 0 *default-log-port* bup-exe " " (string-join bup-restore-params " ")) + ;; (mutex-lock! bup-mutex) + (run-n-wait bup-exe params: bup-restore-params print-cmd: #f) + ;; (mutex-unlock! bup-mutex) + (mt:test-set-state-status-by-id run-id test-id "COMPLETED" #f #f))) + (debug:print-error 0 *default-log-port* "No archive path in the record for run-id=" run-id " test-id=" test-id)))))) + (filter vector? tests)))) + +;; archive - run bup +;; +;; 1. create the bup dir if not exists +;; 2. start the du of each directory +;; 3. gen index +;; 4. save +;; +(define (archive:run-bup archive-command run-id run-name tests rp-mutex bup-mutex) + ;; move the getting of archive space down into the below block so that a single run can + ;; allocate as needed should a disk fill up + ;; + (let* ((blockid-cache (make-hash-table)) + (tsname (common:get-testsuite-name)) + (target (string-intersperse (map cadr (rmt:get-key-val-pairs run-id)) "/")) + (min-space (string->number (or (configf:lookup *configdat* "archive" "minspace") "1000"))) + (arch-groups (make-hash-table)) ;; archive groups, each corrosponds to a bup area + (disk-groups (make-hash-table)) ;; + (test-groups (make-hash-table)) ;; these two (disk and test groups) could be combined nicely + (test-dirs (make-hash-table)) + (bup-exe (or (configf:lookup *configdat* "archive" "bup") "bup")) + (compress (or (configf:lookup *configdat* "archive" "compress") "9")) + (linktree (common:get-linktree)) ;; (configf:lookup *configdat* "setup" "linktree"))) + (archiver (let ((s (configf:lookup *configdat* "archive" "archiver"))) + (if s (string->symbol s) 'bup))) + (archiver-cmd (case archiver + ((tar) "tar cfj ARCHIVE_NAME.tar.bz2 ") + ((7z) " 7z u -t7z -m0=lzma -mx=9 -mfb=64 -md=32m -ms=on ARCHIVE_NAME.7z ") + (else #f))) + (src-archive-linktree (rmt:get-var "src-archive-linktree")) + (print-prefix "Running: ") ;; change to #f to turn off printing + (preclean-spec (configf:get-section *configdat* "archive-preclean"))) + + (if (or (not src-archive-linktree) (not (equal? src-archive-linktree linktree))) + (rmt:set-var "src-archive-linktree" linktree)) + ;; (tests:match patt testname itempath) + + ;; from the test info bin the path to the test by stem + ;; + (for-each + (lambda (test-dat) + (let* ((item-path (db:test-get-item-path test-dat)) + (test-name (db:test-get-testname test-dat)) + (test-id (db:test-get-id test-dat)) + (run-id (db:test-get-run_id test-dat)) + + (toplevel/children (and (db:test-get-is-toplevel test-dat) + (> (rmt:test-toplevel-num-items run-id test-name) 0))) + (test-partial-path (conc target "/" run-name "/" (db:test-make-full-name test-name item-path))) + ;; note the trailing slash to get the dir inspite of it being a link + (test-path (conc linktree "/" test-partial-path)) + (mutex-lock! rp-mutex) + (test-physical-path (if (common:file-exists? test-path) + (common:real-path test-path) + #f)) + (mutex-unlock! rp-mutex) + (partial-path-index (if test-physical-path (substring-index test-partial-path test-physical-path) #f)) + (test-base (if (and partial-path-index + test-physical-path ) + (substring test-physical-path + 0 + partial-path-index) + #f)) + ;; we need our archive dir checked for every test to enable folks who want to store other ways. + (archive-info (archive:allocate-new-archive-block blockid-cache *toppath* tsname min-space target run-name test-name)) + (archive-dir (if archive-info (cdr archive-info) #f)) + (archive-id (if archive-info (car archive-info) -1))) + + (if (not archive-dir) ;; no archive disk found, this is fatal + (begin + (debug:print 0 *default-log-port* "FATAL: No archive disks found. Please add disks with at least " + min-space " MB space to the [archive-disks] section of megatest.config") + (debug:print 0 *default-log-port* " use [archive] minspace to specify minimum available space") + (debug:print 0 *default-log-port* " disks: " + (string-intersperse (map cadr (archive:get-archive-disks)) "\n ")) + (exit 1)) + (debug:print-info 0 *default-log-port* "Using path " archive-dir " for archiving test " test-path)) + + ;; preclean the test directory per the spec if provided + (if (not (null? preclean-spec)) ;; we've been asked to preclean before archiving + (let loop ((spec (car preclean-spec)) + (tail (cdr preclean-spec))) + (if (> (length spec) 1) + (let ((testspec (car spec)) + (rules (cadr spec))) + (if (tests:match testspec test-name item-path) + (begin + (debug:print 0 *default-log-port* "INFO: cleanup requested for " test-physical-path) + (common:dir-clean-up test-physical-path rules remove-empty: #t)) + (if (not (null? tail)) + (loop (car tail)(cdr tail))))) + (begin + (debug:print 0 *default-log-port* "ERROR: bad spec line in [archive-preclean] section. \"" spec "\"") + (if (not (null? tail))(loop (car tail)(cdr tail))))))) + (cond + (toplevel/children + (debug:print 0 *default-log-port* "WARNING: cannot archive " test-name " with id " test-id + " as it is a toplevel test with children")) + ((not (common:file-exists? test-path)) + (debug:print 0 *default-log-port* "WARNING: Cannot archive " test-name "/" item-path + " as path " test-path " does not exist")) + (else + (debug:print 2 *default-log-port* + "From test-dat=" test-dat " derived the following:\n" + "test-partial-path = " test-partial-path "\n" + "test-path = " test-path "\n" + "test-physical-path = " test-physical-path "\n" + "partial-path-index = " partial-path-index "\n" + "test-base = " test-base) + (hash-table-set! disk-groups test-base + (cons test-physical-path (hash-table-ref/default disk-groups test-base '()))) + (hash-table-set! test-groups test-base + (cons test-dat (hash-table-ref/default test-groups test-base '()))) + (hash-table-set! arch-groups test-base + (cons archive-info (hash-table-ref/default arch-groups test-base '()))) + (hash-table-set! test-dirs test-id test-path))))) + ;; test-path)))) + tests) + (debug:print 0 *default-log-port* "INFO: DISK GROUPS=" (hash-table->alist disk-groups)) + ;; for each disk-group, initialize the bup area if needed + (for-each + (lambda (test-base) + (let* ((disk-group (hash-table-ref disk-groups test-base)) + (arch-group (hash-table-ref arch-groups test-base)) + (arch-info (car arch-group)) ;; don't know yet how this will work, can I get more than one possibility? + (archive-id (car arch-info)) + (archive-dir (cdr arch-info))) + (debug:print 0 *default-log-port* "Processing disk-group " test-base) + (let* ((test-paths-in (hash-table-ref disk-groups test-base)) + (test-paths (if (args:get-arg "-include") + (let ((subpaths (string-split (args:get-arg "-include") ","))) + (apply append + (map (lambda (p) + (map (lambda (subp) + (conc p "/" subp)) + subpaths)) + test-paths-in))) + test-paths-in))) + (if (not (common:file-exists? archive-dir)) + (create-directory archive-dir #t)) + (case archiver + ((bup) ;; Archive using bup + (let* ((bup-init-params (list "-d" archive-dir "init")) + (bup-index-params (append (list "-d" archive-dir "index") test-paths)) + (bup-save-params (append (list "-d" archive-dir "save" ;; (conc "--strip-path=" linktree) + (conc "-" compress) ;; or (conc "--compress=" compress) + "-n" (conc (common:get-testsuite-name) "-"(string-substitute "/" "-" target " ")) + (conc "--strip-path=" (conc test-base target "/" )) ;; if we push to the directory do we need this? + ) + test-paths))) + (if (not (common:file-exists? (conc archive-dir "/HEAD"))) + (begin + ;; replace this with jobrunner stuff enventually + (debug:print-info 2 *default-log-port* "Init bup in " archive-dir) + ;; (mutex-lock! bup-mutex) + (let-values (((pid-val exit-status exit-code) (run-n-wait bup-exe params: bup-init-params print-cmd: print-prefix))) + (if (not (eq? exit-code 0)) + (begin + (debug:print-error 0 *default-log-port* "There was an error initializing bup. Archive failed.") + (exit 1)))) + ;; (mutex-unlock! bup-mutex) + )) + (debug:print-info 2 *default-log-port* "Indexing data to be archived") + ;; (mutex-lock! bup-mutex) + (let-values (((pid-val exit-status exit-code) (run-n-wait bup-exe params: bup-index-params print-cmd: print-prefix))) + (if (not (eq? exit-code 0)) + (begin + (debug:print-error 0 *default-log-port* "There was an error Indexing bup. Archive failed.") + (exit 1)))) + (debug:print-info 2 *default-log-port* "Archiving data with bup") + (let-values (((pid-val exit-status exit-code) (run-n-wait bup-exe params: bup-save-params print-cmd: print-prefix))) + (if (not (eq? exit-code 0)) + (begin + (debug:print-error 0 *default-log-port* "There was an archiving data with bup. Archive failed.") + (exit 1)))))) + ((7z tar) + (for-each + (lambda (test-dat) + (let* ((test-id (db:test-get-id test-dat)) + (test-name (db:test-get-testname test-dat)) + (item-path (db:test-get-item-path test-dat)) + (test-full-name (db:test-make-full-name test-name item-path)) + (run-id (db:test-get-run_id test-dat)) + (target (string-intersperse (map cadr (rmt:get-key-val-pairs run-id)) "/")) + (run-name (rmt:get-run-name-from-id run-id)) + (source-dir (hash-table-ref test-dirs test-id)) ;; (conc test-base "/" test-name "/" item-path)) + (target-dir (string-substitute "/$" "" (conc archive-dir "/" target "/" run-name "/" test-full-name)))) + ;; create the test and item-path levels under archive-dir + (create-directory (pathname-directory target-dir) #t) + (run-n-wait + (conc + (string-substitute "ARCHIVE_NAME" target-dir archiver-cmd) " " + "." + ) + print-cmd: print-prefix + run-dir: source-dir))) + (hash-table-ref test-groups test-base)))) + ;; (mutex-unlock! bup-mutex) + (for-each + (lambda (test-dat) + (let ((test-id (db:test-get-id test-dat)) + (run-id (db:test-get-run_id test-dat))) + (rmt:test-set-archive-block-id run-id test-id archive-id) + (if (member (symbol->string archive-command) '("save-remove")) + (begin + (debug:print-info 0 *default-log-port* "remove testdat") + (runs:remove-test-directory test-dat 'archive-remove))))) + (hash-table-ref test-groups test-base))))) + (hash-table-keys disk-groups)) + #t)) + +) Index: server.scm ================================================================== --- server.scm +++ server.scm @@ -16,20 +16,18 @@ ;; along with Megatest. If not, see . ;; (declare (unit server)) -(declare (uses commonmod)) -(declare (uses configfmod)) -(declare (uses debugprint)) (declare (uses common)) (declare (uses db)) (declare (uses tasks)) ;; tasks are where stuff is maintained about what is running. -;; (declare (uses synchash)) -;;(declare (uses rpc-transport)) +(declare (uses debugprint)) +(declare (uses commonmod)) +(declare (uses configfmod)) +(declare (uses rmtmod)) (declare (uses launch)) -;; (declare (uses daemon)) (declare (uses mtargs)) (use (srfi 18) extras s11n) (use srfi-1 posix regex regex-case srfi-69 hostinfo md5 message-digest) (use directory-utils posix-extras matchable utils) @@ -41,727 +39,59 @@ (prefix mtargs args:)) (include "common_records.scm") (include "db_records.scm") -(define (server:make-server-url hostport) - (if (not hostport) - #f - (conc "http://" (car hostport) ":" (cadr hostport)))) - -(define *server-loop-heart-beat* (current-seconds)) - -;;====================================================================== -;; P K T S S T U F F -;;====================================================================== - -;; ??? - -;;====================================================================== -;; P K T S S T U F F -;;====================================================================== - -;; ??? - -;;====================================================================== -;; S E R V E R -;;====================================================================== - -;; Call this to start the actual server -;; - -;;====================================================================== -;; S E R V E R U T I L I T I E S -;;====================================================================== - -;; Get the transport -(define (server:get-transport) - (if *transport-type* - *transport-type* - (let ((ttype (string->symbol - (or (args:get-arg "-transport") - (configf:lookup *configdat* "server" "transport") - "rpc")))) - (set! *transport-type* ttype) - ttype))) - -;; Generate a unique signature for this server -(define (server:mk-signature) - (message-digest-string (md5-primitive) - (with-output-to-string - (lambda () - (write (list (current-directory) - (current-process-id) - (argv))))))) - -(define (server:get-client-signature) - (if *my-client-signature* *my-client-signature* - (let ((sig (server:mk-signature))) ;; clients re-use the server:mk-signature logic - (set! *my-client-signature* sig) - *my-client-signature*))) - -(define (server:get-server-id) - (if *server-id* *server-id* - (let ((sig (server:mk-signature))) ;; clients re-use the server:mk-signature logic - (set! *server-id* sig) - *server-id*))) - -;; ;; When using zmq this would send the message back (two step process) -;; ;; with spiffy or rpc this simply returns the return data to be returned -;; ;; -;; (define (server:reply return-addr query-sig success/fail result) -;; (debug:print-info 11 *default-log-port* "server:reply return-addr=" return-addr ", result=" result) -;; ;; (send-message pubsock target send-more: #t) -;; ;; (send-message pubsock -;; (case (server:get-transport) -;; ((rpc) (db:obj->string (vector success/fail query-sig result))) -;; ((http) (db:obj->string (vector success/fail query-sig result))) -;; ((fs) result) -;; (else -;; (debug:print-error 0 *default-log-port* "unrecognised transport type: " *transport-type*) -;; result))) - -;; Given an area path, start a server process ### NOTE ### > file 2>&1 -;; if the target-host is set -;; try running on that host -;; incidental: rotate logs in logs/ dir. -;; -(define (server:run areapath) ;; areapath is *toppath* for a given testsuite area - (let* ((testsuite (common:get-testsuite-name)) - (logfile (conc areapath "/logs/server.log")) ;; -" curr-pid "-" target-host ".log")) - (profile-mode (or (configf:lookup *configdat* "misc" "profilesw") - "")) - (cmdln (conc (common:get-megatest-exe) - " -server - ";; (or target-host "-") - (if (equal? (configf:lookup *configdat* "server" "daemonize") "yes") - " -daemonize " - "") - ;; " -log " logfile - " -m testsuite:" testsuite - " " profile-mode - )) ;; (conc " >> " logfile " 2>&1 &"))))) - (log-rotate (make-thread common:rotate-logs "server run, rotate logs thread")) ;; why are we rotating logs here? This is a sensitive location with a lot going on!? - (load-limit (configf:lookup-number *configdat* "jobtools" "max-server-start-load" default: 3.0))) - ;; we want the remote server to start in *toppath* so push there - (push-directory areapath) - (debug:print 0 *default-log-port* "INFO: Trying to start server (" cmdln ") ...") - (thread-start! log-rotate) - - ;; host.domain.tld match host? - ;; (if (and target-host - ;; ;; look at target host, is it host.domain.tld or ip address and does it - ;; ;; match current ip or hostname - ;; (not (string-match (conc "("curr-host "|" curr-host"\\..*)") target-host)) - ;; (not (equal? curr-ip target-host))) - ;; (begin - ;; (debug:print-info 0 *default-log-port* "Starting server on " target-host ", logfile is " logfile) - ;; (setenv "TARGETHOST" target-host))) - ;; - (setenv "TARGETHOST_LOGF" logfile) - (thread-sleep! (/ (random 3000) 1000)) ;; add a random initial delay. It seems pretty common that many running tests request a server at the same time - (debug:print 0 *default-log-port* "INFO: starting server at " (common:human-time)) - (system (conc "nbfake " cmdln)) - (unsetenv "TARGETHOST_LOGF") - ;; (if (get-environment-variable "TARGETHOST")(unsetenv "TARGETHOST")) - (thread-join! log-rotate) - (pop-directory))) - -;; given a path to a server log return: host port startseconds server-id -;; any changes to number of elements returned by this fuction will dirctly affect server:record->url,server:record->id,server:kill,server:get-num-alive which use match let -;; example of what it's looking for in the log file: -;; SERVER STARTED: 10.38.175.67:50216 AT 1616502350.0 server-id: 4907e90fc55c7a09694e3f658c639cf4 - -(define (server:logf-get-start-info logf) - (let ((server-rx (regexp "^SERVER STARTED: (\\S+):(\\d+) AT ([\\d\\.]+) server-id: (\\S+) pid: (\\d+)")) ;; SERVER STARTED: host:port AT timesecs server id - (dbprep-rx (regexp "^SERVER: dbprep")) - (dbprep-found 0) - (bad-dat (list #f #f #f #f #f))) - (handle-exceptions - exn - (begin - ;; WARNING: this is potentially dangerous to blanket ignore the errors - (if (file-exists? logf) - (debug:print-info 2 *default-log-port* "Unable to get server info from "logf", exn=" exn)) - bad-dat) ;; no idea what went wrong, call it a bad server - (with-input-from-file - logf - (lambda () - (let loop ((inl (read-line)) - (lnum 0)) - (if (not (eof-object? inl)) - (let ((mlst (string-match server-rx inl)) - (dbprep (string-match dbprep-rx inl))) - (if dbprep (set! dbprep-found 1)) - (if (not mlst) - (if (< lnum 500) ;; give up if more than 500 lines of server log read - (loop (read-line)(+ lnum 1)) - (begin - (debug:print-info 0 *default-log-port* "Unable to get server info from first 500 lines of " logf ) - bad-dat)) - (match mlst - ((_ host port start server-id pid) - (list host - (string->number port) - (string->number start) - server-id - (string->number pid))) - (else - (debug:print 0 *current-log-port* "ERROR: did not recognise SERVER line info "mlst) - bad-dat)))) - (begin - (if dbprep-found - (begin - (debug:print-info 2 *default-log-port* "Server is in dbprep at " (common:human-time)) - (thread-sleep! 0.5)) ;; was 25 sec but that blocked things from starting? - (debug:print-info 0 *default-log-port* "Unable to get server info from " logf " at " (seconds->time-string (current-seconds)))) - bad-dat)))))))) - -;; ;; get a list of servers from the log files, with all relevant data -;; ;; ( mod-time host port start-time pid ) -;; ;; -;; (define (server:get-list areapath #!key (limit #f)) -;; (let ((fname-rx (regexp "^(|.*/)server-(\\d+)-(\\S+).log$")) -;; (day-seconds (* 24 60 60))) -;; ;; if the directory exists continue to get the list -;; ;; otherwise attempt to create the logs dir and then -;; ;; continue -;; (if (if (directory-exists? (conc areapath "/logs")) -;; '() -;; (if (file-write-access? areapath) -;; (begin -;; (condition-case -;; (create-directory (conc areapath "/logs") #t) -;; (exn (i/o file)(debug:print 0 *default-log-port* "ERROR: Cannot create directory at " (conc areapath "/logs"))) -;; (exn ()(debug:print 0 *default-log-port* "ERROR: Unknown error attemtping to get server list. exn=" exn))) -;; (directory-exists? (conc areapath "/logs"))) -;; '())) -;; -;; ;; Get the list of server logs. -;; (let* ( -;; ;; For some reason, when I uncomment the below line, ext-tests sometimes starts 1000's of servers. -;; ;; (exiting-servers (system (conc "bash -c 'rm -f `grep -il exiting " areapath "/logs/server-*-*.log 2> /dev/null`'"))) -;; (server-logs (glob (conc areapath "/logs/server-*-*.log"))) -;; (num-serv-logs (length server-logs))) -;; (if (or (null? server-logs) (= num-serv-logs 0)) -;; (let () -;; (debug:print 2 *default-log-port* "There are no servers running at " (common:human-time)) -;; '() -;; ) -;; (let loop ((hed (string-chomp (car server-logs))) -;; (tal (cdr server-logs)) -;; (res '())) -;; (let* ((mod-time (handle-exceptions -;; exn -;; (begin -;; (debug:print 0 *default-log-port* "server:get-list: failed to get modification time on " hed ", exn=" exn) -;; (current-seconds)) ;; 0 -;; (file-modification-time hed))) ;; default to *very* old so log gets ignored if deleted -;; (down-time (- (current-seconds) mod-time)) -;; (serv-dat (if (or (< num-serv-logs 10) -;; (< down-time 900)) ;; day-seconds)) -;; (server:logf-get-start-info hed) -;; '())) ;; don't waste time processing server files not touched in the 15 minutes if there are more than ten servers to look at -;; (serv-rec (cons mod-time serv-dat)) -;; (fmatch (string-match fname-rx hed)) -;; (pid (if fmatch (string->number (list-ref fmatch 2)) #f)) -;; (new-res (if (null? serv-dat) -;; res -;; (cons (append serv-rec (list pid)) res)))) ;; any changes to number of elements in new-res will dirctly affect server:record->url,server:record->id,server:kill,server:get-num-alive which uses match let -;; (if (null? tal) -;; (if (and limit -;; (> (length new-res) limit)) -;; new-res ;; (take new-res limit) <= need intelligent sorting before this will work -;; new-res) -;; (loop (string-chomp (car tal)) (cdr tal) new-res))))))))) - -#;(define (server:get-num-alive srvlst) - (let ((num-alive 0)) - (for-each - (lambda (server) - (handle-exceptions - exn - (begin - (debug:print-info 0 *default-log-port* "Unable to get server start-time and/or mod-time from " server ", exn=" exn)) - (match-let (((mod-time host port start-time server-id pid) - server)) - (let* ((uptime (- (current-seconds) mod-time)) - (runtime (if start-time - (- mod-time start-time) - 0))) - (if (< uptime 5)(set! num-alive (+ num-alive 1))))))) - srvlst) - num-alive)) - -;; ;; given a list of servers get a list of valid servers, i.e. at least -;; ;; 10 seconds old, has started and is less than 1 hour old and is -;; ;; active (i.e. mod-time < 10 seconds -;; ;; -;; ;; mod-time host port start-time pid -;; ;; -;; ;; sort by start-time descending. I.e. get the oldest first. Young servers will thus drop off -;; ;; and servers should stick around for about two hours or so. -;; ;; -;; (define (server:get-best srvlst) -;; (let* ((nums (server:get-num-servers)) -;; (now (current-seconds)) -;; (slst (sort -;; (filter (lambda (rec) -;; (if (and (list? rec) -;; (> (length rec) 2)) -;; (let ((start-time (list-ref rec 3)) -;; (mod-time (list-ref rec 0))) -;; ;; (print "start-time: " start-time " mod-time: " mod-time) -;; (and start-time mod-time -;; (> (- now start-time) 0) ;; been running at least 0 seconds -;; (< (- now mod-time) 16) ;; still alive - file touched in last 16 seconds -;; (or (not (configf:lookup *configdat* "server" "runtime")) ;; skip if not set -;; (< (- now start-time) -;; (+ (- (string->number (configf:lookup *configdat* "server" "runtime")) -;; 180) -;; (random 360)))) ;; under one hour running time +/- 180 -;; )) -;; #f)) -;; srvlst) -;; (lambda (a b) -;; (< (list-ref a 3) -;; (list-ref b 3)))))) -;; (if (> (length slst) nums) -;; (take slst nums) -;; slst))) - -;; ;; switch from server:get-list to server:get-servers-info -;; ;; -;; (define (server:get-first-best areapath) -;; (let ((srvrs (server:get-best (server:get-list areapath)))) -;; (if (and srvrs -;; (not (null? srvrs))) -;; (car srvrs) -;; #f))) -;; -;; (define (server:get-rand-best areapath) -;; (let ((srvrs (server:get-best (server:get-list areapath)))) -;; (if (and (list? srvrs) -;; (not (null? srvrs))) -;; (let* ((len (length srvrs)) -;; (idx (random len))) -;; (list-ref srvrs idx)) -;; #f))) - -(define (server:record->id servr) - (handle-exceptions - exn - (begin - (debug:print-info 0 *default-log-port* "Unable to get server id from " servr ", exn=" exn) - #f) - (match-let (((host port start-time server-id pid) - servr)) - (if server-id - server-id - #f)))) - -(define (server:record->url servr) - (handle-exceptions - exn - (begin - (debug:print-info 0 *default-log-port* "Unable to get server url from " servr ", exn=" exn) - #f) - (match-let (((host port start-time server-id pid) - servr)) - (if (and host port) - (conc host ":" port) - #f)))) - - -;; if server-start-last exists, and wasn't old enough, wait + 1, then call this function recursively until it is old enough. -;; if it is old enough, overwrite it and wait 0.25 seconds. -;; if it then has the wrong server key, wait + 1 and call this function recursively. -;; -#;(define (server:wait-for-server-start-last-flag areapath) - (let* ((start-flag (conc areapath "/logs/server-start-last")) - ;;; THIS INTERACTS WITH [server] timeout. Suggest using 0.1 or above for timeout (6 seconds) - (idletime (configf:lookup-number *configdat* "server" "idletime" default: 4)) - (server-key (conc (get-host-name) "-" (current-process-id)))) - (if (file-exists? start-flag) - (let* ((fmodtime (file-modification-time start-flag)) - (delta (- (current-seconds) fmodtime)) - (old-enough (> delta idletime)) - (new-server-key "")) - ;; write start-flag file, wait 0.25s, then if previously the start-flag file was older than seconds, and the new file still has the same server key as you just wrote, return #t. - ;; the intention is to make sure nfs can read the file we just wrote, and make sure it was written by us, and not another process. - (if (and old-enough - (begin - (debug:print-info 2 *default-log-port* "Writing " start-flag) - (with-output-to-file start-flag (lambda () (print server-key))) - (thread-sleep! 0.25) - (set! new-server-key (with-input-from-file start-flag (lambda () (read-line)))) - (equal? server-key new-server-key))) - #t - ;; If either of the above conditions is not true, print a "Gating server start" message, wait + 1, then call this function recursively. - (begin - (debug:print-info 0 *default-log-port* "Gating server start, last start: " - (seconds->time-string fmodtime) ", time since last start: " delta ", required idletime: " idletime ", gating reason:" (if old-enough "another job started a server" "too soon to start another server")) - - (thread-sleep! ( + 1 idletime)) - (server:wait-for-server-start-last-flag areapath))))))) - -;; oldest server alive determines host then choose random of youngest -;; five servers on that host -;; -(define (server:get-servers-info areapath) - ;; (assert *toppath* "FATAL: server:get-servers-info called before *toppath* has been set.") - (let* ((servinfodir (server:get-servinfo-dir areapath))) ;; (conc *toppath*"/.servinfo"))) - (if (not (file-exists? servinfodir)) - (create-directory servinfodir)) - (let* ((allfiles (glob (conc servinfodir"/*"))) - (res (make-hash-table))) - (for-each - (lambda (f) - (let* ((hostport (pathname-strip-directory f)) - (serverdat (server:logf-get-start-info f))) - (match serverdat - ((host port start server-id pid) - (if (and host port start server-id pid) - (hash-table-set! res hostport serverdat) - (debug:print-info 2 *default-log-port* "bad server info for "f": "serverdat))) - (else - (debug:print-info 2 *default-log-port* "bad server info for "f": "serverdat))))) - allfiles) - res))) - -;; check the .servinfo directory, are there other servers running on this -;; or another host? -;; -;; returns #t => ok to start another server -;; #f => not ok to start another server -;; -(define (server:minimal-check areapath) - (server:clean-up-old areapath) - (let* ((srvdir (server:get-servinfo-dir areapath)) ;; (conc areapath"/.servinfo")) - (servrs (glob (conc srvdir"/*"))) - (thishostip (server:get-best-guess-address (get-host-name))) - (thisservrs (glob (conc srvdir"/"thishostip":*"))) - (homehostinf (server:choose-server areapath 'homehost)) - (havehome (car homehostinf)) - (wearehome (cdr homehostinf))) - (debug:print-info 0 *default-log-port* thishostip", have homehost: "havehome", we are homehost: "wearehome - ", numservers: "(length thisservrs)) - (cond - ((not havehome) #t) ;; no homehost yet, go for it - ((and havehome wearehome (< (length thisservrs) 20)) #t) ;; we are home and less than 20 servers, ok to start another - ((and havehome (not wearehome)) #f) ;; we are not the home host - ((and havehome wearehome (>= (length thisservrs) 20)) #f) ;; have enough running - (else - (debug:print 0 *default-log-port* "WARNING: Unrecognised scenario, servrs="servrs", thishostip="thishostip", thisservrs="thisservrs) - #t)))) - - -(define server-last-start 0) - - -;; oldest server alive determines host then choose random of youngest -;; five servers on that host -;; -;; mode: -;; best - get best server (random of newest five) -;; home - get home host based on oldest server -;; info - print info -(define (server:choose-server areapath #!optional (mode 'best)) - ;; age is current-starttime - ;; find oldest alive - ;; 1. sort by age ascending and ping until good - ;; find alive rand from youngest - ;; 1. sort by age descending - ;; 2. take five - ;; 3. check alive, discard if not and repeat - ;; first we clean up old server files - (assert (eq? (rmt:transport-mode) 'http) "FATAL: server:run called with rmt:transport-mode="(rmt:transport-mode)) - (server:clean-up-old areapath) - (let* ((since-last (- (current-seconds) server-last-start)) - (server-start-delay 10)) - (if ( < (- (current-seconds) server-last-start) 10 ) - (begin - (debug:print 2 *default-log-port* "server:choose-server: seconds since last server start: " (- (current-seconds) server-last-start)) - (debug:print 2 *default-log-port* "server:choose-server: last server start less than " server-start-delay " seconds ago. Sleeping " server-start-delay " seconds") - (thread-sleep! server-start-delay) - ) - (debug:print 2 *default-log-port* "server:choose-server: seconds since last server start: " (- (current-seconds) server-last-start)) - ) - ) - (let* ((serversdat (server:get-servers-info areapath)) - (servkeys (hash-table-keys serversdat)) - (by-time-asc (if (not (null? servkeys)) ;; NOTE: Oldest is last - (sort servkeys ;; list of "host:port" - (lambda (a b) - (>= (list-ref (hash-table-ref serversdat a) 2) - (list-ref (hash-table-ref serversdat b) 2)))) - '()))) - (debug:print 2 *default-log-port* "server:choose-server: serversdat: " serversdat) - (debug:print 2 *default-log-port* "server:choose-server: servkeys: " servkeys) - (if (not (null? by-time-asc)) - (let* ((oldest (last by-time-asc)) - (oldest-dat (hash-table-ref serversdat oldest)) - (host (list-ref oldest-dat 0)) - (all-valid (filter (lambda (x) - (equal? host (list-ref (hash-table-ref serversdat x) 0))) - by-time-asc)) - (best-ten (lambda () - (if (> (length all-valid) 11) - (take (drop-right all-valid 1) 10) ;; remove the oldest from consideration so it can age out - (if (> (length all-valid) 8) - (drop-right all-valid 1) - all-valid)))) - (names->dats (lambda (names) - (map (lambda (x) - (hash-table-ref serversdat x)) - names))) - (am-home? (lambda () - (let* ((currhost (get-host-name)) - (bestadrs (server:get-best-guess-address currhost))) - (or (equal? host currhost) - (equal? host bestadrs)))))) - (case mode - ((info) - (debug:print 0 *default-log-port* "oldest: "oldest-dat", selected host: "host", all-valid: "all-valid) - (debug:print 0 *default-log-port* "youngest: "(hash-table-ref serversdat (car all-valid)))) - ((home) host) - ((homehost) (cons host (am-home?))) ;; shut up old code - ((home?) (am-home?)) - ((best-ten)(names->dats (best-ten))) - ((all-valid)(names->dats all-valid)) - ((best) (let* ((best-ten (best-ten)) - (len (length best-ten))) - (hash-table-ref serversdat (list-ref best-ten (random len))))) - ((count)(length all-valid)) - (else - (debug:print 0 *default-log-port* "ERROR: invalid command "mode) - #f))) - (begin - (server:run areapath) - (set! server-last-start (current-seconds)) - ;; (thread-sleep! 3) - (case mode - ((homehost) (cons #f #f)) - (else #f)))))) - -(define (server:get-servinfo-dir areapath) - (let* ((spath (conc areapath"/.servinfo"))) - (if (not (file-exists? spath)) - (create-directory spath #t)) - spath)) - -(define (server:clean-up-old areapath) - ;; any server file that has not been touched in ten minutes is effectively dead - (let* ((sfiles (glob (conc (server:get-servinfo-dir areapath)"/*")))) - (for-each - (lambda (sfile) - (let* ((modtime (handle-exceptions - exn - (begin - (debug:print 0 *default-log-port* "WARNING: failed to get modification file for "sfile) - (current-seconds)) - (file-modification-time sfile)))) - (if (and (number? modtime) - (> (- (current-seconds) modtime) - 600)) - (begin - (debug:print 0 *default-log-port* "WARNING: found old server info file "sfile", removing it.") - (handle-exceptions - exn - (debug:print 0 *default-log-port* "WARNING: failed to delete old server info file "sfile) - (delete-file sfile)))))) - sfiles))) - -;; would like to eventually get rid of this -;; -(define (common:on-homehost?) - (if (eq? (rmt:transport-mode) 'http) - (server:choose-server *toppath* 'home?) - #t)) ;; there is no homehost for tcp and nfs is always on home so #t should work - -;; kind start up of server, wait before allowing another server for a given -;; area to be launched -;; -(define (server:kind-run areapath) - ;; look for $MT_RUN_AREA_HOME/logs/server-start-last - ;; and wait for it to be at least seconds old - ;; (server:wait-for-server-start-last-flag areapath) - (let loop () - (if (> (alist-ref 'adj-proc-load (common:get-normalized-cpu-load #f)) 2) - (begin - (if (common:low-noise-print 30 "our-host-load") - (debug:print 0 *default-log-port* "WARNING: system load is high, waiting to start server.")) - (loop)))) - (if (< (server:choose-server areapath 'count) 20) - (server:run areapath)) - #;(if (not (server:check-if-running areapath)) ;; why try if there is already a server running? - (let* ((lock-file (conc areapath "/logs/server-start.lock"))) - (let* ((start-flag (conc areapath "/logs/server-start-last"))) - (common:simple-file-lock-and-wait lock-file expire-time: 25) - (debug:print-info 2 *default-log-port* "server:kind-run: touching " start-flag) - (system (conc "touch " start-flag)) ;; lazy but safe - (server:run areapath) - (thread-sleep! 20) ;; don't release the lock for at least a few seconds. And allow time for the server startup to get to "SERVER STARTED". - (common:simple-file-release-lock lock-file))) - (debug:print-info 0 *default-log-port* "Found server already running. NOT trying to start another."))) - -;; this one seems to be the general entry point -;; -(define (server:start-and-wait areapath #!key (timeout 60)) - (let ((give-up-time (+ (current-seconds) timeout))) - (let loop ((server-info (server:check-if-running areapath)) - (try-num 0)) - (if (or server-info - (> (current-seconds) give-up-time)) ;; server-url will be #f if no server available. - (server:record->url server-info) - (let* ( (servers (server:choose-server areapath 'all-valid)) - (num-ok (if servers (length (server:choose-server areapath 'all-valid)) 0))) - (if (and (> try-num 0) ;; first time through simply wait a little while then try again - (< num-ok 1)) ;; if there are no decent candidates for servers then try starting a new one - (server:run areapath)) - (thread-sleep! 5) - (loop (server:check-if-running areapath) - (+ try-num 1))))))) - -(define (server:get-num-servers #!key (numservers 2)) - (let ((ns (string->number - (or (configf:lookup *configdat* "server" "numservers") "notanumber")))) - (or ns numservers))) - -;; no longer care if multiple servers are started by accident. older servers will drop off in time. -;; -(define (server:check-if-running areapath) ;; #!key (numservers "2")) - (let* ((ns (server:get-num-servers)) ;; get the setting the for maximum number of servers allowed - (servers (server:choose-server areapath 'best-ten))) ;; (server:get-best (server:get-list areapath)))) - (if (or (and servers - (null? servers)) - (not servers)) - ;; (and (list? servers) - ;; (< (length servers) (+ 1 (random ns))))) ;; somewhere between 1 and numservers - #f - (let loop ((hed (car servers)) - (tal (cdr servers))) - (let ((res (server:check-server hed))) - (if res - hed - (if (null? tal) - #f - (loop (car tal)(cdr tal))))))))) - -;; ping the given server -;; -(define (server:check-server server-record) - (let* ((server-url (server:record->url server-record)) - (server-id (server:record->id server-record)) - (res (server:ping server-url server-id))) - (if res - server-url - #f))) - -(define (server:kill servr) - (handle-exceptions - exn - (begin - (debug:print-info 0 *default-log-port* "Unable to get host and/or port from " servr ", exn=" exn) - #f) - (match-let (((hostname port start-time server-id pid) - servr)) - (tasks:kill-server hostname pid)))) - -;; ;; called in megatest.scm, host-port is string hostname:port -;; ;; -;; ;; NOTE: This is NOT called directly from clients as not all transports support a client running -;; ;; in the same process as the server. -;; ;; -;; (define (server:ping host:port server-id #!key (do-exit #f)) -;; (let* ((host-port (cond -;; ((string? host:port) -;; (let ((slst (string-split host:port ":"))) -;; (if (eq? (length slst) 2) -;; (list (car slst)(string->number (cadr slst))) -;; #f))) -;; (else -;; #f)))) -;; (cond -;; ((and (list? host-port) -;; (eq? (length host-port) 2)) -;; (let* ((myrunremote (make-and-init-remote *toppath*)) -;; (iface (car host-port)) -;; (port (cadr host-port)) -;; (server-dat (client:connect iface port server-id myrunremote)) -;; (login-res (rmt:login-no-auto-client-setup myrunremote))) -;; (http-transport:close-connections myrunremote) -;; (if (and (list? login-res) -;; (car login-res)) -;; (begin -;; ;; (print "LOGIN_OK") -;; (if do-exit (exit 0)) -;; #t) -;; (begin -;; ;; (print "LOGIN_FAILED") -;; (if do-exit (exit 1)) -;; #f)))) -;; (else -;; (if host:port -;; (debug:print 0 *default-log-port* "ERROR: bad host:port "host:port)) -;; (if do-exit -;; (exit 1) -;; #f))))) -;; -;; ;; run ping in separate process, safest way in some cases -;; ;; -;; (define (server:ping-server ifaceport) -;; (with-input-from-pipe -;; (conc (common:get-megatest-exe) " -ping " ifaceport) -;; (lambda () -;; (let loop ((inl (read-line)) -;; (res "NOREPLY")) -;; (if (eof-object? inl) -;; (case (string->symbol res) -;; ((NOREPLY) #f) -;; ((LOGIN_OK) #t) -;; (else #f)) -;; (loop (read-line) inl)))))) -;; -;; ;; NOT USED (well, ok, reference in rpc-transport but otherwise not used). -;; ;; -;; (define (server:login toppath) -;; (lambda (toppath) -;; (set! *db-last-access* (current-seconds)) ;; might not be needed. -;; (if (equal? *toppath* toppath) -;; #t -;; #f))) - -;; timeout is hms string: 1h 5m 3s, default is 1 minute -;; This is currently broken. Just use the number of hours with no unit. -;; Default is 600 seconds. -;; -(define (server:expiration-timeout) - (let* ((tmo (configf:lookup *configdat* "server" "timeout"))) - (if (string? tmo) - (let* ((num (string->number tmo))) - (if num - (* 3600 num) - (common:hms-string->seconds tmo))) - 600 ;; this is the default - ))) - -(define (server:get-best-guess-address hostname) - (let ((res #f)) - (for-each - (lambda (adr) - (if (not (eq? (u8vector-ref adr 0) 127)) - (set! res adr))) - ;; NOTE: This can fail when there is no mention of the host in /etc/hosts. FIXME - (vector->list (hostinfo-addresses (hostname->hostinfo hostname)))) - (string-intersperse - (map number->string - (u8vector->list - (if res res (hostname->ip hostname)))) "."))) - -;; moving this here as it needs access to db and cannot be in common. -;; - -(define (server:get-bruteforce-syncer dbstruct #!key (fork-to-background #f) (persist-until-sync #f)) - (debug:print "WARNING: bruteforce-syncer is called but has been disabled!") - (lambda () - (debug:print "WARNING: bruteforce-syncer is called but has been disabled!"))) - +(define (db:kill-servers) + (let* ((tl (launch:setup)) ;; need this to initialize *toppath* + (servdir (conc *toppath* "/.servinfo")) + (servfiles (glob (conc servdir "/*:*.db"))) + (fmtstr "~10a~22a~10a~25a~25a~8a\n") + (dbfiles (append (glob (conc *toppath* "/.mtdb/main.db")) (glob (conc *toppath* "/.mtdb/?.db"))(glob (conc *toppath* "/.mtdb/??.db")))) + (ttdat (make-tt areapath: *toppath*)) + ) + (format #t fmtstr "DB" "host:port" "PID" "age" "last mod" "state") + (for-each + (lambda (dbfile) + (let* ( + (dbfname (conc (pathname-file dbfile) ".db")) + (sfiles (tt:find-server *toppath* dbfname)) + ) + (for-each + (lambda (sfile) + (let ( + (sinfos (tt:get-server-info-sorted ttdat dbfname)) + ) + (for-each + (lambda (sinfo) + (let* ( + (db (list-ref sinfo 5)) + (pid (list-ref sinfo 4)) + (host (list-ref sinfo 0)) + (port (list-ref sinfo 1)) + (server-id (list-ref sinfo 3)) + (age (seconds->hr-min-sec (- (current-seconds) (list-ref sinfo 2)))) + (last-mod (seconds->string (list-ref sinfo 2))) + (killed (system (conc "ssh " host " kill " pid " > /dev/null"))) + (dummy2 (sleep 1)) + (state (if (> (system (conc "ssh " host " ps " pid " > /dev/null")) 0) "dead" "alive")) + ) + (format #t fmtstr db (conc host ":" port) pid age last-mod state) + (system (conc "rm " sfile)) + ) + ) + sinfos + ) + ) + ) + sfiles + ) + ) + ) + dbfiles + ) + ;; remove this db, because otherwise metadata contains records for old servers, and this causes a problem with db:no-sync-get-lock-with-id. + (if (file-exists? (conc *toppath* "/.mtdb/no-sync.db")) + (delete-file (conc *toppath* "/.mtdb/no-sync.db")) + ) + ) +) ADDED servermod.scm Index: servermod.scm ================================================================== --- /dev/null +++ servermod.scm @@ -0,0 +1,792 @@ +;; Copyright 2006-2017, Matthew Welland. +;; +;; This file is part of Megatest. +;; +;; Megatest is free software: you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. +;; +;; Megatest is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with Megatest. If not, see . +;; + +(declare (unit servermod)) + +(declare (uses commonmod)) +(declare (uses configfmod)) +(declare (uses mtmod)) +(declare (uses debugprint)) +(declare (uses mtargs)) + +(module servermod + * + +(import scheme + chicken) + +(use (srfi 18) extras s11n) +(use srfi-1 posix regex regex-case srfi-69 hostinfo md5 message-digest) +(use directory-utils posix-extras matchable utils) +(use spiffy uri-common intarweb http-client spiffy-request-vars) + +(import ports + data-structures + files + srfi-4 + typed-records + + commonmod + configfmod + debugprint + (prefix mtargs args:) + mtmod + ) + +(include "common_records.scm") +(include "db_records.scm") + +(define (server:make-server-url hostport) + (if (not hostport) + #f + (conc "http://" (car hostport) ":" (cadr hostport)))) + +(define *server-loop-heart-beat* (current-seconds)) + +;;====================================================================== +;; P K T S S T U F F +;;====================================================================== + +;; ??? + +;;====================================================================== +;; P K T S S T U F F +;;====================================================================== + +;; ??? + +;;====================================================================== +;; S E R V E R +;;====================================================================== + +;; Call this to start the actual server +;; + +;;====================================================================== +;; S E R V E R U T I L I T I E S +;;====================================================================== + +;; Get the transport +(define (server:get-transport) + (if *transport-type* + *transport-type* + (let ((ttype (string->symbol + (or (args:get-arg "-transport") + (configf:lookup *configdat* "server" "transport") + "rpc")))) + (set! *transport-type* ttype) + ttype))) + +;; Generate a unique signature for this server +(define (server:mk-signature) + (message-digest-string (md5-primitive) + (with-output-to-string + (lambda () + (write (list (current-directory) + (current-process-id) + (argv))))))) + +(define (server:get-client-signature) + (if *my-client-signature* *my-client-signature* + (let ((sig (server:mk-signature))) ;; clients re-use the server:mk-signature logic + (set! *my-client-signature* sig) + *my-client-signature*))) + +(define (server:get-server-id) + (if *server-id* *server-id* + (let ((sig (server:mk-signature))) ;; clients re-use the server:mk-signature logic + (set! *server-id* sig) + *server-id*))) + +;; ;; When using zmq this would send the message back (two step process) +;; ;; with spiffy or rpc this simply returns the return data to be returned +;; ;; +;; (define (server:reply return-addr query-sig success/fail result) +;; (debug:print-info 11 *default-log-port* "server:reply return-addr=" return-addr ", result=" result) +;; ;; (send-message pubsock target send-more: #t) +;; ;; (send-message pubsock +;; (case (server:get-transport) +;; ((rpc) (db:obj->string (vector success/fail query-sig result))) +;; ((http) (db:obj->string (vector success/fail query-sig result))) +;; ((fs) result) +;; (else +;; (debug:print-error 0 *default-log-port* "unrecognised transport type: " *transport-type*) +;; result))) + +;; Given an area path, start a server process ### NOTE ### > file 2>&1 +;; if the target-host is set +;; try running on that host +;; incidental: rotate logs in logs/ dir. +;; +(define (server:run areapath) ;; areapath is *toppath* for a given testsuite area + (let* ((testsuite (common:get-testsuite-name)) + (logfile (conc areapath "/logs/server.log")) ;; -" curr-pid "-" target-host ".log")) + (profile-mode (or (configf:lookup *configdat* "misc" "profilesw") + "")) + (cmdln (conc (common:get-megatest-exe) + " -server - ";; (or target-host "-") + (if (equal? (configf:lookup *configdat* "server" "daemonize") "yes") + " -daemonize " + "") + ;; " -log " logfile + " -m testsuite:" testsuite + " " profile-mode + )) ;; (conc " >> " logfile " 2>&1 &"))))) + (log-rotate (make-thread common:rotate-logs "server run, rotate logs thread")) ;; why are we rotating logs here? This is a sensitive location with a lot going on!? + (load-limit (configf:lookup-number *configdat* "jobtools" "max-server-start-load" default: 3.0))) + ;; we want the remote server to start in *toppath* so push there + (push-directory areapath) + (debug:print 0 *default-log-port* "INFO: Trying to start server (" cmdln ") ...") + (thread-start! log-rotate) + + ;; host.domain.tld match host? + ;; (if (and target-host + ;; ;; look at target host, is it host.domain.tld or ip address and does it + ;; ;; match current ip or hostname + ;; (not (string-match (conc "("curr-host "|" curr-host"\\..*)") target-host)) + ;; (not (equal? curr-ip target-host))) + ;; (begin + ;; (debug:print-info 0 *default-log-port* "Starting server on " target-host ", logfile is " logfile) + ;; (setenv "TARGETHOST" target-host))) + ;; + (setenv "TARGETHOST_LOGF" logfile) + (thread-sleep! (/ (random 3000) 1000)) ;; add a random initial delay. It seems pretty common that many running tests request a server at the same time + (debug:print 0 *default-log-port* "INFO: starting server at " (common:human-time)) + (system (conc "nbfake " cmdln)) + (unsetenv "TARGETHOST_LOGF") + ;; (if (get-environment-variable "TARGETHOST")(unsetenv "TARGETHOST")) + (thread-join! log-rotate) + (pop-directory))) + + +(define (server:logf-get-start-info logf) + (let ((server-rx (regexp "^SERVER STARTED: (\\S+):(\\d+) AT ([\\d\\.]+) server-id: (\\S+) pid: (\\d+)")) ;; SERVER STARTED: host:port AT timesecs server id + (dbprep-rx (regexp "^SERVER: dbprep")) + (dbprep-found 0) + (bad-dat (list #f #f #f #f #f))) + (handle-exceptions + exn + (begin + ;; WARNING: this is potentially dangerous to blanket ignore the errors + (if (file-exists? logf) + (debug:print-info 2 *default-log-port* "Unable to get server info from "logf", exn=" exn)) + bad-dat) ;; no idea what went wrong, call it a bad server + (with-input-from-file + logf + (lambda () + (let loop ((inl (read-line)) + (lnum 0)) + (if (not (eof-object? inl)) + (let ((mlst (string-match server-rx inl)) + (dbprep (string-match dbprep-rx inl))) + (if dbprep (set! dbprep-found 1)) + (if (not mlst) + (if (< lnum 500) ;; give up if more than 500 lines of server log read + (loop (read-line)(+ lnum 1)) + (begin + (debug:print-info 0 *default-log-port* "Unable to get server info from first 500 lines of " logf ) + bad-dat)) + (match mlst + ((_ host port start server-id pid) + (list host + (string->number port) + (string->number start) + server-id + (string->number pid))) + (else + (debug:print 0 *default-log-port* "ERROR: did not recognise SERVER line info "mlst) + bad-dat)))) + (begin + (if dbprep-found + (begin + (debug:print-info 2 *default-log-port* "Server is in dbprep at " (common:human-time)) + (thread-sleep! 0.5)) ;; was 25 sec but that blocked things from starting? + (debug:print-info 0 *default-log-port* "Unable to get server info from " logf " at " (seconds->time-string (current-seconds)))) + bad-dat)))))))) + + +(define (server:record->id servr) + (handle-exceptions + exn + (begin + (debug:print-info 0 *default-log-port* "Unable to get server id from " servr ", exn=" exn) + #f) + (match-let (((host port start-time server-id pid) + servr)) + (if server-id + server-id + #f)))) + +(define (server:record->url servr) + (handle-exceptions + exn + (begin + (debug:print-info 0 *default-log-port* "Unable to get server url from " servr ", exn=" exn) + #f) + (match-let (((host port start-time server-id pid) + servr)) + (if (and host port) + (conc host ":" port) + #f)))) + +;; oldest server alive determines host then choose random of youngest +;; five servers on that host +;; +(define (server:get-servers-info areapath) + ;; (assert *toppath* "FATAL: server:get-servers-info called before *toppath* has been set.") + (let* ((servinfodir (server:get-servinfo-dir areapath))) ;; (conc *toppath*"/.servinfo"))) + (if (not (file-exists? servinfodir)) + (create-directory servinfodir)) + (let* ((allfiles (glob (conc servinfodir"/*"))) + (res (make-hash-table))) + (for-each + (lambda (f) + (let* ((hostport (pathname-strip-directory f)) + (serverdat (server:logf-get-start-info f))) + (match serverdat + ((host port start server-id pid) + (if (and host port start server-id pid) + (hash-table-set! res hostport serverdat) + (debug:print-info 2 *default-log-port* "bad server info for "f": "serverdat))) + (else + (debug:print-info 2 *default-log-port* "bad server info for "f": "serverdat))))) + allfiles) + res))) + +;; check the .servinfo directory, are there other servers running on this +;; or another host? +;; +;; returns #t => ok to start another server +;; #f => not ok to start another server +;; +(define (server:minimal-check areapath) + (server:clean-up-old areapath) + (let* ((srvdir (server:get-servinfo-dir areapath)) ;; (conc areapath"/.servinfo")) + (servrs (glob (conc srvdir"/*"))) + (thishostip (server:get-best-guess-address (get-host-name))) + (thisservrs (glob (conc srvdir"/"thishostip":*"))) + (homehostinf (server:choose-server areapath 'homehost)) + (havehome (car homehostinf)) + (wearehome (cdr homehostinf))) + (debug:print-info 0 *default-log-port* thishostip", have homehost: "havehome", we are homehost: "wearehome + ", numservers: "(length thisservrs)) + (cond + ((not havehome) #t) ;; no homehost yet, go for it + ((and havehome wearehome (< (length thisservrs) 20)) #t) ;; we are home and less than 20 servers, ok to start another + ((and havehome (not wearehome)) #f) ;; we are not the home host + ((and havehome wearehome (>= (length thisservrs) 20)) #f) ;; have enough running + (else + (debug:print 0 *default-log-port* "WARNING: Unrecognised scenario, servrs="servrs", thishostip="thishostip", thisservrs="thisservrs) + #t)))) + + +(define server-last-start 0) + + +;; oldest server alive determines host then choose random of youngest +;; five servers on that host +;; +;; mode: +;; best - get best server (random of newest five) +;; home - get home host based on oldest server +;; info - print info +(define (server:choose-server areapath #!optional (mode 'best)) + ;; age is current-starttime + ;; find oldest alive + ;; 1. sort by age ascending and ping until good + ;; find alive rand from youngest + ;; 1. sort by age descending + ;; 2. take five + ;; 3. check alive, discard if not and repeat + ;; first we clean up old server files + (assert (eq? (rmt:transport-mode) 'http) "FATAL: server:run called with rmt:transport-mode="(rmt:transport-mode)) + (server:clean-up-old areapath) + (let* ((since-last (- (current-seconds) server-last-start)) + (server-start-delay 10)) + (if ( < (- (current-seconds) server-last-start) 10 ) + (begin + (debug:print 2 *default-log-port* "server:choose-server: seconds since last server start: " (- (current-seconds) server-last-start)) + (debug:print 2 *default-log-port* "server:choose-server: last server start less than " server-start-delay " seconds ago. Sleeping " server-start-delay " seconds") + (thread-sleep! server-start-delay) + ) + (debug:print 2 *default-log-port* "server:choose-server: seconds since last server start: " (- (current-seconds) server-last-start)) + ) + ) + (let* ((serversdat (server:get-servers-info areapath)) + (servkeys (hash-table-keys serversdat)) + (by-time-asc (if (not (null? servkeys)) ;; NOTE: Oldest is last + (sort servkeys ;; list of "host:port" + (lambda (a b) + (>= (list-ref (hash-table-ref serversdat a) 2) + (list-ref (hash-table-ref serversdat b) 2)))) + '()))) + (debug:print 2 *default-log-port* "server:choose-server: serversdat: " serversdat) + (debug:print 2 *default-log-port* "server:choose-server: servkeys: " servkeys) + (if (not (null? by-time-asc)) + (let* ((oldest (last by-time-asc)) + (oldest-dat (hash-table-ref serversdat oldest)) + (host (list-ref oldest-dat 0)) + (all-valid (filter (lambda (x) + (equal? host (list-ref (hash-table-ref serversdat x) 0))) + by-time-asc)) + (best-ten (lambda () + (if (> (length all-valid) 11) + (take (drop-right all-valid 1) 10) ;; remove the oldest from consideration so it can age out + (if (> (length all-valid) 8) + (drop-right all-valid 1) + all-valid)))) + (names->dats (lambda (names) + (map (lambda (x) + (hash-table-ref serversdat x)) + names))) + (am-home? (lambda () + (let* ((currhost (get-host-name)) + (bestadrs (server:get-best-guess-address currhost))) + (or (equal? host currhost) + (equal? host bestadrs)))))) + (case mode + ((info) + (debug:print 0 *default-log-port* "oldest: "oldest-dat", selected host: "host", all-valid: "all-valid) + (debug:print 0 *default-log-port* "youngest: "(hash-table-ref serversdat (car all-valid)))) + ((home) host) + ((homehost) (cons host (am-home?))) ;; shut up old code + ((home?) (am-home?)) + ((best-ten)(names->dats (best-ten))) + ((all-valid)(names->dats all-valid)) + ((best) (let* ((best-ten (best-ten)) + (len (length best-ten))) + (hash-table-ref serversdat (list-ref best-ten (random len))))) + ((count)(length all-valid)) + (else + (debug:print 0 *default-log-port* "ERROR: invalid command "mode) + #f))) + (begin + (server:run areapath) + (set! server-last-start (current-seconds)) + ;; (thread-sleep! 3) + (case mode + ((homehost) (cons #f #f)) + (else #f)))))) + +(define (server:get-servinfo-dir areapath) + (let* ((spath (conc areapath"/.servinfo"))) + (if (not (file-exists? spath)) + (create-directory spath #t)) + spath)) + +(define (server:clean-up-old areapath) + ;; any server file that has not been touched in ten minutes is effectively dead + (let* ((sfiles (glob (conc (server:get-servinfo-dir areapath)"/*")))) + (for-each + (lambda (sfile) + (let* ((modtime (handle-exceptions + exn + (begin + (debug:print 0 *default-log-port* "WARNING: failed to get modification file for "sfile) + (current-seconds)) + (file-modification-time sfile)))) + (if (and (number? modtime) + (> (- (current-seconds) modtime) + 600)) + (begin + (debug:print 0 *default-log-port* "WARNING: found old server info file "sfile", removing it.") + (handle-exceptions + exn + (debug:print 0 *default-log-port* "WARNING: failed to delete old server info file "sfile) + (delete-file sfile)))))) + sfiles))) + +;; timeout is hms string: 1h 5m 3s, default is 1 minute +;; This is currently broken. Just use the number of hours with no unit. +;; Default is 600 seconds. +;; +(define (server:expiration-timeout) + (let* ((tmo (configf:lookup *configdat* "server" "timeout"))) + (if (string? tmo) + (let* ((num (string->number tmo))) + (if num + (* 3600 num) + (common:hms-string->seconds tmo))) + 600 ;; this is the default + ))) + +(define (server:get-best-guess-address hostname) + (let ((res #f)) + (for-each + (lambda (adr) + (if (not (eq? (u8vector-ref adr 0) 127)) + (set! res adr))) + ;; NOTE: This can fail when there is no mention of the host in /etc/hosts. FIXME + (vector->list (hostinfo-addresses (hostname->hostinfo hostname)))) + (string-intersperse + (map number->string + (u8vector->list + (if res res (hostname->ip hostname)))) "."))) + +;; moving this here as it needs access to db and cannot be in common. +;; + +(define (server:get-bruteforce-syncer dbstruct #!key (fork-to-background #f) (persist-until-sync #f)) + (debug:print "WARNING: bruteforce-syncer is called but has been disabled!") + (lambda () + (debug:print "WARNING: bruteforce-syncer is called but has been disabled!"))) + +;; +(defstruct remote + + ;; transport to be used + ;; http - use http-transport + ;; http-read-cached - use http-transport for writes but in-mem cached for reads + (rmode 'http) + (hh-dat (let ((res (or (server:choose-server *toppath* 'homehost) + (cons #f #f)))) + (assert (pair? res)(conc "FATAL: hh-dat should be a pair, got "res)) + res)) + (server-url #f) ;; (server:check-if-running *toppath*) #f)) + (server-id #f) + (server-info #f) ;; (if *toppath* (server:check-if-running *toppath*) #f)) + (last-server-check 0) ;; last time we checked to see if the server was alive + (connect-time (current-seconds)) ;; when we first connected + (last-access (current-seconds)) ;; last time we talked to server + ;; (conndat #f) ;; iface port api-uri api-url api-req seconds server-id + (server-timeout (server:expiration-timeout)) + (force-server #f) + (ro-mode #f) + (ro-mode-checked #f) ;; flag that indicates we have checked for ro-mode + + ;; conndat stuff + (iface #f) ;; TODO: Consolidate this data with server-url and server-info above + (port #f) + (api-url #f) + (api-uri #f) + (api-req #f)) + +;;====================================================================== +;; Rotate logs, logic: +;; if > 500k and older than 1 week: +;; remove previous compressed log and compress this log +;; WARNING: This proc operates assuming that it is in the directory above the +;; logs directory you wish to log-rotate. +;; +(define (common:rotate-logs) + (let* ((all-files (make-hash-table)) + (stats (make-hash-table)) + (inc-stat (lambda (key) + (hash-table-set! stats key (+ (hash-table-ref/default stats key 0) 1)))) + (max-allowed (string->number (or (configf:lookup *configdat* "setup" "max-logfiles") "600")))) ;; name -> age + (if (not (directory-exists? "logs"))(create-directory "logs")) + (directory-fold + (lambda (file rem) + (handle-exceptions + exn + (begin + (debug:print-info 2 *default-log-port* "unable to rotate log " file ", probably handled by another process, this is safe to ignore. exn=" exn) + (debug:print 2 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) + ;; (print-call-chain (current-error-port)) ;; + ) + (let* ((fullname (conc "logs/" file)) + (mod-time (file-modification-time fullname)) + (file-age (- (current-seconds) mod-time)) + (file-old (> file-age (* 48 60 60))) + (file-big (> (file-size fullname) 200000))) + (hash-table-set! all-files file mod-time) + (if (or (and (string-match "^.*.log" file) + file-old + file-big) + (and (string-match "^server-.*.log" file) + file-old)) + (let ((gzfile (conc fullname ".gz"))) + (if (common:file-exists? gzfile) + (begin + (debug:print-info 0 *default-log-port* "removing " gzfile) + (delete-file* gzfile) + (hash-table-delete! all-files gzfile) ;; needed? + )) + (debug:print-info 0 *default-log-port* "compressing " file) + (system (conc "gzip " fullname)) + (inc-stat "gzipped") + (hash-table-set! all-files (conc file ".gz") file-age) ;; add the .gz file and remove the base file + (hash-table-delete! all-files file) + ) + (if (and (> file-age (* (string->number (or (configf:lookup *configdat* "setup" "log-expire-days") "30")) 24 3600)) + (file-exists? fullname)) ;; just in case it was gzipped - will get it next time + (handle-exceptions + exn + #f + (if (directory? fullname) + (begin + (debug:print-info 0 *default-log-port* fullname " in logs directory is a directory! Cannot rotate it, it is best to not put subdirectories in the logs dir.") + (inc-stat "directories")) + (begin + (delete-file* fullname) + (inc-stat "deleted"))) + (hash-table-delete! all-files file))))))) + '() + "logs") + (for-each + (lambda (category) + (let ((quant (hash-table-ref/default stats category 0))) + (if (> quant 0) + (debug:print-info 0 *default-log-port* category " log files: " quant)))) + `("deleted" "gzipped" "directories")) + (let ((num-logs (hash-table-size all-files))) + (if (> num-logs max-allowed) ;; because NFS => don't let number of logs exceed 300 + (let ((files (take (sort (hash-table-keys all-files) + (lambda (a b) + (< (hash-table-ref all-files a)(hash-table-ref all-files b)))) + (- num-logs max-allowed)))) + (for-each + (lambda (file) + (let* ((fullname (conc "logs/" file))) + (if (directory? fullname) + (debug:print-info 0 *default-log-port* fullname " in logs directory is a directory! Cannot rotate it, it is best to not put subdirectories in the logs dir.") + (handle-exceptions + exn + (debug:print-error 0 *default-log-port* "failed to remove " fullname ", exn=" exn) + (delete-file* fullname))))) + files) + (debug:print-info 0 *default-log-port* "Deleted " (length files) " files from logs, keeping " max-allowed " files.")))))) + +;;====================================================================== +;; E X I T H A N D L I N G +;;====================================================================== + +(define (std-signal-handler signum) + ;; (signal-mask! signum) + (set! *time-to-exit* #t) + ;;(debug:print-info 13 *default-log-port* "got signal "signum) + (debug:print-error 0 *default-log-port* "Received signal " signum " aaa exiting promptly") + ;; (std-exit-procedure) ;; shouldn't need this since we are exiting and it will be called anyway + (exit)) + +(define (special-signal-handler signum) + ;; (signal-mask! signum) + (set! *time-to-exit* #t) + ;;(debug:print-info 13 *default-log-port* "got signal "signum) + (debug:print-error 0 *default-log-port* "Received signal " signum " sending email befor exiting!!") + ;;TODO send email to notify admin contact listed in the config that the lisner got killed + ;; (std-exit-procedure) ;; shouldn't need this since we are exiting and it will be called anyway + (exit)) + + +(set-signal-handler! signal/int std-signal-handler) ;; ^C +(set-signal-handler! signal/term std-signal-handler) +;;====================================================================== +;; calculate a delay number based on a droop curve +;; inputs are: +;; - load-in, load as from uptime, NOT normalized +;; - numcpus, number of cpus, ideally use the real cpus, not threads +;; +(define (common:get-delay load-in numcpus) + (let* ((ratio (/ load-in numcpus)) + (new-option (configf:lookup *configdat* "load" "new-load-method")) + (paramstr (or (configf:lookup *configdat* "load" "exp-params") + "15 12 1281453987.9543 0.75")) ;; 5 4 10 1")) + (paramlst (map string->number (string-split paramstr)))) + (if new-option + (begin + (cond ((and (>= ratio 0) (< ratio .5)) + 0) + ((and (>= ratio 0.5) (<= ratio .9)) + (* ratio (/ 5 .9))) + ((and (> ratio .9) (<= ratio 1.1)) + (+ 5 (* (- ratio .9) (/ 55 .2)))) + ((> ratio 1.1) + 60))) + (match paramlst + ((r1 r2 s1 s2) + (debug:print 3 *default-log-port* "Using params r1=" r1 " r2=" r2 " s1=" s1 " s2=" s2) + (min (max (/ (expt r1 (* r2 s2 ratio)) s1) 0) 30)) + (else + (debug:print 0 *default-log-port* "BAD exp-params, should be \"r1 r2 s1 s2\" but got " paramstr) + 30))))) + +;;====================================================================== +;; DO NOT CALL THIS DIRECTLY. It is called from common:wait-for-normalized-load +;; count - count down to zero, at some point we'd give up if the load never drops +;; num-tries - count down to zero number tries to get numcpus +;; +(define (common:wait-for-cpuload maxnormload numcpus-in + #!key (count 1000) + (msg #f)(remote-host #f)(num-tries 5)) + (let* ((loadavg (common:get-cpu-load remote-host)) + ;; not possible to have zero. If we get 1, it's possible that we got the previous default, and we should check again + (numcpus (if (<= 1 numcpus-in) + (common:get-num-cpus remote-host) numcpus-in)) + (first (car loadavg)) + (next (cadr loadavg)) + (adjmaxload (* maxnormload (max 1 numcpus))) ;; possible bug where numcpus (or could be maxload) is zero, crude + ;; fallback is to at least use 1 + ;; effective load accounts for load jumps, this should elminate all the first-next-avg, adjwait, load-jump-limit + ;; etc. + (effective-load (common:get-intercept first next)) + (recommended-delay (common:get-delay effective-load numcpus)) + (effective-host (or remote-host "localhost")) + (normalized-effective-load (/ effective-load numcpus)) + (will-wait (> normalized-effective-load maxnormload))) + (if (and will-wait (> recommended-delay 1)) + (let* ((actual-delay (min recommended-delay 30))) + (if (common:low-noise-print 30 (conc (round actual-delay) "-safe-load")) + (debug:print-info 0 *default-log-port* "Load control, delaying " + actual-delay " seconds to maintain safe load. current normalized effective load is " + normalized-effective-load". maxnormload = " maxnormload " numcpus = " numcpus " loadavg = " loadavg " effective-load = " effective-load)) + (thread-sleep! actual-delay))) + + (cond + ;; bad data, try again to get the data + ((not will-wait) + (if (common:low-noise-print 3600 (conc (round normalized-effective-load) "-load-acceptable-" effective-host)) + (debug:print 0 *default-log-port* "Effective load on " effective-host " is acceptable at " effective-load " continuing."))) + + ((and (< first 0) ;; this indicates the loadavg data is bad - machine may not be reachable + (> num-tries 0)) + (debug:print 0 *default-log-port* "WARNING: received bad data from get-cpu-load " + first ", we'll sleep 10s and try " num-tries " more times.") + (thread-sleep! 10) + (common:wait-for-cpuload maxnormload numcpus-in + count: count remote-host: remote-host num-tries: (- num-tries 1))) + + ;; need to wait for load to drop + ((and will-wait ;; (> first adjmaxload) + (> count 0)) + (debug:print-info 0 *default-log-port* + "Delaying 15" ;; adjwait + " seconds due to normalized effective load " normalized-effective-load ;; first + " exceeding max of " adjmaxload + " on server " (or remote-host (get-host-name)) + " (normalized load-limit: " maxnormload ") " (if msg msg "")) + (thread-sleep! 15) ;; adjwait) + (common:wait-for-cpuload maxnormload numcpus count: (- count 1) msg: msg remote-host: remote-host) + ;; put the message here to indicate came out of waiting + (debug:print-info 1 *default-log-port* + "On host: " effective-host + ", effective load: " effective-load + ", numcpus: " numcpus + ", normalized effective load: " normalized-effective-load + )) + ;; overloaded and count expired (i.e. went to zero) + (else + (if (> num-tries 0) ;; should be "num-tries-left". + (if (common:low-noise-print 30 (conc (round effective-load) "-load-acceptable-" effective-host)) + (debug:print 0 *default-log-port* "Load on " effective-host " is acceptable at effective normalized load of " + normalized-effective-load " continuing.")) + (debug:print 0 *default-log-port* "Load on " effective-host ", " + first" could not be retrieved. Giving up and continuing.")))))) + +;;====================================================================== +;; DO NOT CALL THIS DIRECTLY. It is called from common:wait-for-normalized-load +;; +;; (define (common:wait-for-cpuload maxload-in numcpus-in waitdelay #!key (count 1000) (msg #f)(remote-host #f)(force-maxload #f)(num-tries 5)) +;; (let* ((loadavg (common:get-cpu-load remote-host)) +;; (numcpus (if (<= 1 numcpus-in) ;; not possible to have zero. If we get 1, it's possible that we got the previous default, and we should check again +;; (common:get-num-cpus remote-host) +;; numcpus-in)) +;; (maxload (if force-maxload +;; maxload-in +;; (if (number? maxload-in) +;; (max maxload-in 0.5) +;; 0.5))) ;; so maxload must be greater than 0.5 for now BUG - FIXME? +;; (first (car loadavg)) +;; (next (cadr loadavg)) +;; (adjmaxload (* maxload (max 1 numcpus))) ;; possible bug where +;; ;; numcpus (or could be +;; ;; maxload) is zero, +;; ;; crude fallback is to +;; ;; at least use 1 +;; (loadjmp (- first (if (> next (* numcpus 0.7)) ;; could do something with average of first and next? +;; 0 +;; next))) ;; we will force a conservative calculation any time next is large. +;; (first-next-avg (/ (+ first next) 2)) +;; ;; add some randomness to the time to break any alignment +;; ;; where netbatch dumps many jobs to machines simultaneously +;; (adjwait (min (+ 300 (random 10)) (abs (* (+ (random 10) +;; (/ (- 1000 count) 10) +;; waitdelay) +;; (- first adjmaxload) )))) +;; (load-jump-limit (configf:lookup-number *configdat* "setup" "load-jump-limit")) +;; ;; effective load accounts for load jumps, this should elminate all the first-next-avg, adjwait, load-jump-limit +;; ;; etc. +;; (effective-load (common:get-intercept first next)) +;; (effective-host (or remote-host "localhost")) +;; (normalized-effective-load (/ effective-load numcpus)) +;; (will-wait (> normalized-effective-load maxload))) +;; +;; ;; let's let the user know once in a long while that load checking +;; ;; is happening but not constantly report it +;; #;(if (common:low-noise-print 30 (conc "cpuload" (or remote-host "localhost"))) ;; (> (random 100) 75) ;; about 25% of the time +;; (debug:print-info 1 *default-log-port* "Checking cpuload on " (or remote-host "localhost") ", maxload: " maxload +;; ", load: " first ", adjmaxload: " adjmaxload ", loadjmp: " loadjmp)) +;; +;; (debug:print-info 1 *default-log-port* +;; "On host: " effective-host +;; ", effective load: " effective-load +;; ", numcpus: " numcpus +;; ", normalized effective load: " normalized-effective-load +;; ) +;; +;; (cond +;; ;; bad data, try again to get the data +;; ((and (< first 0) ;; this indicates the loadavg data is bad - machine may not be reachable +;; (> num-tries 0)) +;; (debug:print 0 *default-log-port* "WARNING: received bad data from get-cpu-load " first ", we'll sleep 10s and try " num-tries " more times.") +;; (thread-sleep! 10) +;; (common:wait-for-cpuload maxload-in numcpus-in waitdelay +;; count: count remote-host: remote-host force-maxload: force-maxload num-tries: (- num-tries 1))) +;; ;; need to wait for load to drop +;; ((and will-wait ;; (> first adjmaxload) +;; (> count 0)) +;; (debug:print-info 0 *default-log-port* +;; "Delaying " 15 ;; adjwait +;; " seconds due to normalized effective load " normalized-effective-load ;; first +;; " exceeding max of " adjmaxload +;; " on server " (or remote-host (get-host-name)) +;; " (normalized load-limit: " maxload ") " (if msg msg "")) +;; (thread-sleep! 15) ;; adjwait) +;; (common:wait-for-cpuload maxload numcpus waitdelay count: (- count 1) msg: msg remote-host: remote-host)) +;; ((and (> loadjmp (cond +;; (load-jump-limit load-jump-limit) +;; ((> numcpus 8)(/ numcpus 2)) +;; ((> numcpus 4)(/ numcpus 1.2)) +;; (else 0.5))) +;; (> count 0)) +;; (debug:print-info 0 *default-log-port* "waiting " adjwait " seconds due to possible load jump " loadjmp ". " +;; (if msg msg "")) +;; (thread-sleep! adjwait) +;; (common:wait-for-cpuload maxload numcpus waitdelay count: (- count 1) msg: msg remote-host: remote-host)) +;; (else +;; (if (> num-tries 0) +;; (if (common:low-noise-print 30 (conc (round first) "-load-acceptable-" (or remote-host "localhost"))) +;; (debug:print 0 *default-log-port* "Load on " (or remote-host "localhost") " is acceptable at " first " continuing.")) +;; (debug:print 0 *default-log-port* "Load on " (or remote-host "localhost") ", "first" could not be retrieved. Giving up and continuing.")))))) +;; + +;;====================================================================== +;; wait for normalized cpu load to drop below maxload +;; +(define (common:wait-for-normalized-load maxnormload msg remote-host #!optional (rem-tries 5)) + (let ((num-cpus (common:get-num-cpus remote-host))) + (if num-cpus + (common:wait-for-cpuload maxnormload num-cpus 15 msg: msg remote-host: remote-host) + (begin + (thread-sleep! (random 60)) ;; we failed to get num cpus. wait a bit and try again + (if (> rem-tries 0) + (common:wait-for-normalized-load maxnormload msg remote-host (- rem-tries 1)) + #f))))) + + +) Index: subrun.scm ================================================================== --- subrun.scm +++ subrun.scm @@ -17,254 +17,24 @@ ;; along with Megatest. If not, see . ;; strftime('%m/%d/%Y %H:%M:%S','now','localtime') (declare (unit subrun)) + (declare (uses debugprint)) -(declare (uses db)) -(declare (uses common)) (declare (uses commonmod)) (declare (uses configfmod)) +(declare (uses tasksmod)) + (declare (uses mt)) +(declare (uses db)) +(declare (uses common)) (use (prefix sqlite3 sqlite3:) srfi-1 posix regex regex-case srfi-69 (srfi 18) posix-extras directory-utils pathname-expand typed-records format call-with-environment-variables) (import commonmod configfmod - debugprint) - -;(include "common_records.scm") -;;(include "key_records.scm") -(include "db_records.scm") ;; provides db:test-get-id -;;(include "run_records.scm") -;;(include "test_records.scm") - -(define (subrun:subrun-test-initialized? test-run-dir) - (if (and (common:file-exists? (conc test-run-dir "/subrun-area") ) - (common:file-exists? (conc test-run-dir "/testconfig.subrun") )) - #t - #f)) - -(define (subrun:launch-dashboard test-run-dir) - (if (subrun:subrun-test-initialized? test-run-dir) - (let* ((subarea (subrun:get-runarea test-run-dir))) - (if (and subarea (common:file-exists? subarea)) - (system (conc "cd " subarea ";env -i PATH=\"$PATH\" DISPLAY=$DISPLAY HOME=$HOME USER=$USER dashboard &")))))) - -(define (subrun:subrun-removed? test-run-dir) - (if (subrun:subrun-test-initialized? test-run-dir) - (let ((flagfile (conc test-run-dir "/subrun.removed"))) - (if (common:file-exists? flagfile) - #t - #f)) - #t)) - -(define (subrun:set-subrun-removed test-run-dir) - (let ((flagfile (conc test-run-dir "/subrun.removed"))) - (if (and (subrun:subrun-test-initialized? test-run-dir) (not (common:file-exists? flagfile))) - (with-output-to-file flagfile - (lambda () (print (current-seconds))))))) - -(define (subrun:unset-subrun-removed test-run-dir) - (let ((flagfile (conc test-run-dir "/subrun.removed"))) - (if (and (subrun:subrun-test-initialized? test-run-dir) (common:file-exists? flagfile)) - (delete-file flagfile)))) - - -(define (subrun:testconfig-defines-subrun? testconfig) - (configf:lookup testconfig "subrun" "runwait")) ;; we use runwait as the flag that a subrun is requested - -(define (subrun:initialize-toprun-test testconfig test-run-dir) - (let ((ra (configf:lookup testconfig "subrun" "run-area")) - (logpro (configf:lookup testconfig "subrun" "logpro")) - (symlink-target (conc test-run-dir "/subrun-area")) - ) - (if (not ra) ;; when runarea is not set we default to *toppath*. However - (let ((fallback-run-area (or *toppath* (conc test-run-dir "/subrun")))) - ;; we need to force the setting in the testconfig so it will - ;; be preserved in the testconfig.subrun file - (configf:set-section-var testconfig "subrun" "run-area" fallback-run-area) - (set! ra fallback-run-area))) - (configf:set-section-var testconfig "logpro" "subrun" logpro) ;; append the logpro rules to the logpro section as stepname subrun - (if (common:file-exists? symlink-target) - (delete-file symlink-target)) - (create-symbolic-link ra symlink-target) - (configf:write-alist testconfig "testconfig.subrun"))) - -(define (subrun:set-state-status test-run-dir state status new-state-status) - (if (and (not (subrun:subrun-removed? test-run-dir)) (subrun:subrun-test-initialized? test-run-dir)) - (let* ((action-switches-str - (conc "-set-state-status "new-state-status - (if state (conc " -state "state) "") - (if status (conc " -status "status) ""))) - (log-prefix - (subrun:sanitize-path - (conc "set-state-status="new-state-status - (if state (conc ":state="state) "") - (if status (conc "+status="status) "")))) - (submt-result - (subrun:exec-sub-megatest test-run-dir action-switches-str log-prefix))) - submt-result))) - -(define (subrun:remove-subrun test-run-dir keep-records ) - (if (and (not (subrun:subrun-removed? test-run-dir)) (subrun:subrun-test-initialized? test-run-dir)) - (let* ((action-switches-str - (conc "-remove-runs" - (if keep-records "-keep-records " "") - )) - (remove-result - (subrun:exec-sub-megatest test-run-dir action-switches-str "remove"))) - (if remove-result - (begin - (subrun:set-subrun-removed test-run-dir) - #t) - #f)) - #t)) - -(define (subrun:kill-subrun test-run-dir ) - (if (and (not (subrun:subrun-removed? test-run-dir)) (subrun:subrun-test-initialized? test-run-dir)) - (let* ((action-switches-str - (conc "-kill-runs" )) - (kill-result - (subrun:exec-sub-megatest test-run-dir action-switches-str "kill"))) - kill-result) - #t)) - -(define (subrun:launch-cmd test-run-dir run-mode #!optional (sub-cmd "-run")) ;; BUG: "-run" should be changed to "-rerun-clean" but current doesn't work - (if (subrun:subrun-removed? test-run-dir) - (subrun:unset-subrun-removed test-run-dir)) - - (let* ((log-prefix "run") - (switches (subrun:selector+log-switches test-run-dir log-prefix)) - (run-wait (equal? run-mode "yes")) - (cmd (conc (common:get-mtexe)" "sub-cmd" "switches" " - (if run-wait "-run-wait " "")))) - cmd)) - - -(define (subrun:sanitize-path inpath) - (let* ((insane-pattern (irregex "[^[a-zA-Z0-9_\\-]"))) - (regex#string-substitute insane-pattern "_" inpath #t))) - -(define (subrun:get-runarea test-run-dir) - (if (subrun:subrun-test-initialized? test-run-dir) - (let* ((info-alist (subrun:selector+log-alist - test-run-dir - "foo")) - (run-area (if (list? info-alist) - (alist-ref "-start-dir" info-alist equal? #f) - #f))) - run-area) - #f)) - -(define (subrun:selector+log-alist test-run-dir log-prefix) - (let* ((switch-def-alist (common:get-param-mapping flavor: 'config)) - (subrunfile (conc test-run-dir "/testconfig.subrun" )) - (subrundata (with-input-from-file subrunfile read)) - (subrunconfig (configf:alist->config subrundata)) - (run-area (configf:lookup subrunconfig "subrun" "run-area")) - (defvals `(("start-dir" . ,(or run-area ;; default values if not specified in subrun section of tconf - (get-environment-variable "MT_RUN_AREA_HOME") - "/no/rundir/found")) - ("run-name" . ,(or (get-environment-variable "MT_RUNNAME") "NO-RUNNAME")) - ("target" . ,(or (get-environment-variable "MT_TARGET") "NO-TARGET")))) - (switch-alist-pre (filter-map (lambda (item) - (let* ((config-key (car item)) - (switch (cdr item)) - (defval (alist-ref config-key defvals equal? #f)) - (val (or (configf:lookup subrunconfig "subrun" config-key) - defval))) - (if val - (cons switch val) - #f))) - switch-def-alist)) - - ;; testpatt may be modified if all three of mode-patt, tag-expr, and testpatt are null - (mode-patt (alist-ref "-modepatt" switch-alist-pre equal? #f)) - (tag-expr (alist-ref "-tagexpr" switch-alist-pre equal? #f)) - (testpatt (alist-ref "-testpatt" switch-alist-pre equal? - (if (not (or mode-patt tag-expr)) "%" #f))) ;; testpatt is % if not - ;; otherwise specified - - ;; define compact-stem for logfile - (target (alist-ref "-target" switch-alist-pre equal? #f)) ;; want data-structures alist-ref, not alist-lib alist-ref - (runname (alist-ref "-runname" switch-alist-pre equal? #f)) - - - (compact-stem (subrun:sanitize-path - (conc - target - "-" - runname - "-" (or testpatt mode-patt tag-expr "NO-TESTPATT")))) - (logfile (conc - test-run-dir "/" - (if log-prefix - (conc (subrun:sanitize-path log-prefix) "-") - "") - compact-stem - ".log")) - ;; swap out testpatt with modified test-patt and add -log - (switch-alist (cons - (cons "-log" logfile) - (map (lambda (item) - (if (equal? (car item) "-testpatt") - (cons "-testpatt" testpatt) - item)) - switch-alist-pre)))) - switch-alist)) - ;; note - get precmd from subrun section - ;; apply to submegatest commands - -(define (subrun:get-log-path test-run-dir log-prefix) - (let* ((alist (subrun:selector+log-alist test-run-dir log-prefix)) - (res (alist-ref "-log" alist equal? #f))) - res)) - -(define (subrun:selector+log-switches test-run-dir log-prefix) - (let* ((switch-alist (subrun:selector+log-alist test-run-dir log-prefix)) - (res - (string-intersperse - (apply - append - (map - (lambda (x) - (list (car x) (cdr x))) - switch-alist)) - " "))) - res)) - -;; NOTE: Here we run sub megatest but this is not intended for one version -;; of megatest to test another version. Thus we propagate the -(define (subrun:exec-sub-megatest test-run-dir action-switches-str log-prefix) - (let* ((mtpathdir (common:get-megatest-exe-dir)) - (mtexe (common:get-mtexe)) - (selector-switches (subrun:selector+log-switches test-run-dir log-prefix)) - (cmd (conc mtexe" "selector-switches" "action-switches-str )) - (pid #f) - (proc (lambda () - (debug:print-info 0 *default-log-port* "Running sub megatest command: "cmd) - ;;(set! pid (process-run "/usr/bin/xterm" (list )))))) - (set! pid (process-run "/bin/bash" (list "-c" cmd)))))) - (call-with-environment-variables - (list (cons "PATH" (common:get-megatest-exe-path))) - (lambda () - (common:without-vars proc "^MT_.*"))) - (let processloop ((i 0)) - (let-values (((pid-val exit-status exit-code)(process-wait pid #t))) - (if (eq? pid-val 0) - (begin - (thread-sleep! 2) - (processloop (+ i 1))) - (begin - (debug:print-info 0 *default-log-port* "sub megatest " action-switches-str " completed with exit code " exit-code) - (if (eq? 0 exit-code) - (begin - #t) - (begin - #f)))))))) - - - -;; (subrun:exec-sub-megatest "/nfs/pdx/disks/icf_env_disk001/bjbarcla/gwa/issues/mtdev/165/megatest/ext-tests/tests/subrun-usecases/toparea/links/SYSTEM_val/RELEASE_val/go/toptest" "-foo" "foo") + debugprint + tasksmod) + ADDED subrunmod.scm Index: subrunmod.scm ================================================================== --- /dev/null +++ subrunmod.scm @@ -0,0 +1,360 @@ +;;====================================================================== +;; Copyright 2017, Matthew Welland. +;; +;; This file is part of Megatest. +;; +;; Megatest is free software: you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. +;; +;; Megatest is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with Megatest. If not, see . + +;;====================================================================== + +;;====================================================================== +;; Cpumod: +;; +;; Put things here don't fit anywhere else +;;====================================================================== + +(declare (unit subrunmod)) +(declare (uses debugprint)) +(declare (uses mtargs)) +(declare (uses commonmod)) +(declare (uses configfmod)) +(declare (uses dbfile)) +(declare (uses dbmod)) +(declare (uses rmtmod)) +(declare (uses servermod)) +(declare (uses processmod)) +(declare (uses pgdb)) +(declare (uses mtmod)) +(declare (uses megatestmod)) +(declare (uses tasksmod)) + +(use srfi-69) + +(module subrunmod + * + +(import scheme) +(cond-expand + (chicken-4 + + (import chicken + ports + data-structures + extras + files + matchable + pathname-expand + posix + posix-extras + regex + regex-case + sparse-vectors + + ) + (use srfi-69)) + (chicken-5 + (import (prefix sqlite3 sqlite3:) + chicken.base + chicken.condition + chicken.file + chicken.file.posix + chicken.io + chicken.pathname + chicken.port + chicken.process + chicken.process-context + chicken.process-context.posix + chicken.sort + chicken.string + chicken.time + chicken.time.posix + + matchable + md5 + message-digest + pathname-expand + regex + regex-case + system-information + + ))) + +;; imports common to ck4 and ck5 +(import srfi-1 + srfi-13 + srfi-18 + srfi-69 + typed-records + (prefix base64 base64:) + (prefix sqlite3 sqlite3:) + md5 + message-digest + z3 + directory-utils + call-with-environment-variables + regex + irregex + + debugprint + commonmod + configfmod + (prefix mtargs args:) + dbmod + dbfile + rmtmod + servermod + processmod + pgdb + mtmod + megatestmod + tasksmod + ) + +;(include "common_records.scm") +;;(include "key_records.scm") +(include "db_records.scm") ;; provides db:test-get-id +;;(include "run_records.scm") +;;(include "test_records.scm") + +(define (subrun:subrun-test-initialized? test-run-dir) + (if (and (common:file-exists? (conc test-run-dir "/subrun-area") ) + (common:file-exists? (conc test-run-dir "/testconfig.subrun") )) + #t + #f)) + +(define (subrun:launch-dashboard test-run-dir) + (if (subrun:subrun-test-initialized? test-run-dir) + (let* ((subarea (subrun:get-runarea test-run-dir))) + (if (and subarea (common:file-exists? subarea)) + (system (conc "cd " subarea ";env -i PATH=\"$PATH\" DISPLAY=$DISPLAY HOME=$HOME USER=$USER dashboard &")))))) + +(define (subrun:subrun-removed? test-run-dir) + (if (subrun:subrun-test-initialized? test-run-dir) + (let ((flagfile (conc test-run-dir "/subrun.removed"))) + (if (common:file-exists? flagfile) + #t + #f)) + #t)) + +(define (subrun:set-subrun-removed test-run-dir) + (let ((flagfile (conc test-run-dir "/subrun.removed"))) + (if (and (subrun:subrun-test-initialized? test-run-dir) (not (common:file-exists? flagfile))) + (with-output-to-file flagfile + (lambda () (print (current-seconds))))))) + +(define (subrun:unset-subrun-removed test-run-dir) + (let ((flagfile (conc test-run-dir "/subrun.removed"))) + (if (and (subrun:subrun-test-initialized? test-run-dir) (common:file-exists? flagfile)) + (delete-file flagfile)))) + + +(define (subrun:testconfig-defines-subrun? testconfig) + (configf:lookup testconfig "subrun" "runwait")) ;; we use runwait as the flag that a subrun is requested + +(define (subrun:initialize-toprun-test testconfig test-run-dir) + (let ((ra (configf:lookup testconfig "subrun" "run-area")) + (logpro (configf:lookup testconfig "subrun" "logpro")) + (symlink-target (conc test-run-dir "/subrun-area")) + ) + (if (not ra) ;; when runarea is not set we default to *toppath*. However + (let ((fallback-run-area (or *toppath* (conc test-run-dir "/subrun")))) + ;; we need to force the setting in the testconfig so it will + ;; be preserved in the testconfig.subrun file + (configf:set-section-var testconfig "subrun" "run-area" fallback-run-area) + (set! ra fallback-run-area))) + (configf:set-section-var testconfig "logpro" "subrun" logpro) ;; append the logpro rules to the logpro section as stepname subrun + (if (common:file-exists? symlink-target) + (delete-file symlink-target)) + (create-symbolic-link ra symlink-target) + (configf:write-alist testconfig "testconfig.subrun"))) + +(define (subrun:set-state-status test-run-dir state status new-state-status) + (if (and (not (subrun:subrun-removed? test-run-dir)) (subrun:subrun-test-initialized? test-run-dir)) + (let* ((action-switches-str + (conc "-set-state-status "new-state-status + (if state (conc " -state "state) "") + (if status (conc " -status "status) ""))) + (log-prefix + (subrun:sanitize-path + (conc "set-state-status="new-state-status + (if state (conc ":state="state) "") + (if status (conc "+status="status) "")))) + (submt-result + (subrun:exec-sub-megatest test-run-dir action-switches-str log-prefix))) + submt-result))) + +(define (subrun:remove-subrun test-run-dir keep-records ) + (if (and (not (subrun:subrun-removed? test-run-dir)) (subrun:subrun-test-initialized? test-run-dir)) + (let* ((action-switches-str + (conc "-remove-runs" + (if keep-records "-keep-records " "") + )) + (remove-result + (subrun:exec-sub-megatest test-run-dir action-switches-str "remove"))) + (if remove-result + (begin + (subrun:set-subrun-removed test-run-dir) + #t) + #f)) + #t)) + +(define (subrun:kill-subrun test-run-dir ) + (if (and (not (subrun:subrun-removed? test-run-dir)) (subrun:subrun-test-initialized? test-run-dir)) + (let* ((action-switches-str + (conc "-kill-runs" )) + (kill-result + (subrun:exec-sub-megatest test-run-dir action-switches-str "kill"))) + kill-result) + #t)) + +(define (subrun:launch-cmd test-run-dir run-mode #!optional (sub-cmd "-run")) ;; BUG: "-run" should be changed to "-rerun-clean" but current doesn't work + (if (subrun:subrun-removed? test-run-dir) + (subrun:unset-subrun-removed test-run-dir)) + + (let* ((log-prefix "run") + (switches (subrun:selector+log-switches test-run-dir log-prefix)) + (run-wait (equal? run-mode "yes")) + (cmd (conc (common:get-mtexe)" "sub-cmd" "switches" " + (if run-wait "-run-wait " "")))) + cmd)) + + +(define (subrun:sanitize-path inpath) + (let* ((insane-pattern (irregex "[^[a-zA-Z0-9_\\-]"))) + (regex#string-substitute insane-pattern "_" inpath #t))) + +(define (subrun:get-runarea test-run-dir) + (if (subrun:subrun-test-initialized? test-run-dir) + (let* ((info-alist (subrun:selector+log-alist + test-run-dir + "foo")) + (run-area (if (list? info-alist) + (alist-ref "-start-dir" info-alist equal? #f) + #f))) + run-area) + #f)) + +(define (subrun:selector+log-alist test-run-dir log-prefix) + (let* ((switch-def-alist (common:get-param-mapping flavor: 'config)) + (subrunfile (conc test-run-dir "/testconfig.subrun" )) + (subrundata (with-input-from-file subrunfile read)) + (subrunconfig (configf:alist->config subrundata)) + (run-area (configf:lookup subrunconfig "subrun" "run-area")) + (defvals `(("start-dir" . ,(or run-area ;; default values if not specified in subrun section of tconf + (get-environment-variable "MT_RUN_AREA_HOME") + "/no/rundir/found")) + ("run-name" . ,(or (get-environment-variable "MT_RUNNAME") "NO-RUNNAME")) + ("target" . ,(or (get-environment-variable "MT_TARGET") "NO-TARGET")))) + (switch-alist-pre (filter-map (lambda (item) + (let* ((config-key (car item)) + (switch (cdr item)) + (defval (alist-ref config-key defvals equal? #f)) + (val (or (configf:lookup subrunconfig "subrun" config-key) + defval))) + (if val + (cons switch val) + #f))) + switch-def-alist)) + + ;; testpatt may be modified if all three of mode-patt, tag-expr, and testpatt are null + (mode-patt (alist-ref "-modepatt" switch-alist-pre equal? #f)) + (tag-expr (alist-ref "-tagexpr" switch-alist-pre equal? #f)) + (testpatt (alist-ref "-testpatt" switch-alist-pre equal? + (if (not (or mode-patt tag-expr)) "%" #f))) ;; testpatt is % if not + ;; otherwise specified + + ;; define compact-stem for logfile + (target (alist-ref "-target" switch-alist-pre equal? #f)) ;; want data-structures alist-ref, not alist-lib alist-ref + (runname (alist-ref "-runname" switch-alist-pre equal? #f)) + + + (compact-stem (subrun:sanitize-path + (conc + target + "-" + runname + "-" (or testpatt mode-patt tag-expr "NO-TESTPATT")))) + (logfile (conc + test-run-dir "/" + (if log-prefix + (conc (subrun:sanitize-path log-prefix) "-") + "") + compact-stem + ".log")) + ;; swap out testpatt with modified test-patt and add -log + (switch-alist (cons + (cons "-log" logfile) + (map (lambda (item) + (if (equal? (car item) "-testpatt") + (cons "-testpatt" testpatt) + item)) + switch-alist-pre)))) + switch-alist)) + ;; note - get precmd from subrun section + ;; apply to submegatest commands + +(define (subrun:get-log-path test-run-dir log-prefix) + (let* ((alist (subrun:selector+log-alist test-run-dir log-prefix)) + (res (alist-ref "-log" alist equal? #f))) + res)) + +(define (subrun:selector+log-switches test-run-dir log-prefix) + (let* ((switch-alist (subrun:selector+log-alist test-run-dir log-prefix)) + (res + (string-intersperse + (apply + append + (map + (lambda (x) + (list (car x) (cdr x))) + switch-alist)) + " "))) + res)) + +;; NOTE: Here we run sub megatest but this is not intended for one version +;; of megatest to test another version. Thus we propagate the +(define (subrun:exec-sub-megatest test-run-dir action-switches-str log-prefix) + (let* ((mtpathdir (common:get-megatest-exe-dir)) + (mtexe (common:get-mtexe)) + (selector-switches (subrun:selector+log-switches test-run-dir log-prefix)) + (cmd (conc mtexe" "selector-switches" "action-switches-str )) + (pid #f) + (proc (lambda () + (debug:print-info 0 *default-log-port* "Running sub megatest command: "cmd) + ;;(set! pid (process-run "/usr/bin/xterm" (list )))))) + (set! pid (process-run "/bin/bash" (list "-c" cmd)))))) + (call-with-environment-variables + (list (cons "PATH" (common:get-megatest-exe-path))) + (lambda () + (common:without-vars proc "^MT_.*"))) + (let processloop ((i 0)) + (let-values (((pid-val exit-status exit-code)(process-wait pid #t))) + (if (eq? pid-val 0) + (begin + (thread-sleep! 2) + (processloop (+ i 1))) + (begin + (debug:print-info 0 *default-log-port* "sub megatest " action-switches-str " completed with exit code " exit-code) + (if (eq? 0 exit-code) + (begin + #t) + (begin + #f)))))))) + + + +;; (subrun:exec-sub-megatest "/nfs/pdx/disks/icf_env_disk001/bjbarcla/gwa/issues/mtdev/165/megatest/ext-tests/tests/subrun-usecases/toparea/links/SYSTEM_val/RELEASE_val/go/toptest" "-foo" "foo") + + +) Index: tasks.scm ================================================================== --- tasks.scm +++ tasks.scm @@ -44,1057 +44,5 @@ (prefix mtargs args:)) (import dbfile) ;; (import pgdb) ;; pgdb is a module -(include "task_records.scm") -(include "db_records.scm") - -;;====================================================================== -;; Tasks db -;;====================================================================== - -(define (tasks:get-task-db-path) - (let ((dbdir (or (configf:lookup *configdat* "setup" "monitordir") - (configf:lookup *configdat* "setup" "dbdir") - (conc (common:get-linktree) "/.db")))) - (handle-exceptions - exn - (begin - (debug:print-error 0 *default-log-port* "Couldn't create path to " dbdir ", exn=" exn) - (exit 1)) - (if (not (directory? dbdir))(create-directory dbdir #t))) - dbdir)) - -;; If file exists AND -;; file readable -;; ==> open it -;; If file exists AND -;; file NOT readable -;; ==> open in-mem version -;; If file NOT exists -;; ==> open in-mem version -;; -(define (tasks:open-db #!key (numretries 4)) - (if *task-db* - *task-db* - (handle-exceptions - exn - (if (> numretries 0) - (begin - (print-call-chain (current-error-port)) - (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) - (debug:print 5 *default-log-port* " exn=" (condition->list exn)) - (thread-sleep! 1) - (tasks:open-db numretries (- numretries 1))) - (begin - (print-call-chain (current-error-port)) - (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) - (debug:print 5 *default-log-port* " exn=" (condition->list exn)))) - (let* ((dbpath (common:make-tmpdir-name *toppath* "")) ;; (tasks:get-task-db-path)) - (dbfile (conc dbpath "/monitor.db")) - (avail (tasks:wait-on-journal dbpath 10)) ;; wait up to about 10 seconds for the journal to go away - (exists (common:file-exists? dbpath)) - (write-access (file-write-access? dbpath)) - (mdb (cond ;; what the hek is *toppath* doing here? - ((and (string? *toppath*)(file-write-access? *toppath*)) - (sqlite3:open-database dbfile)) - ((file-read-access? dbpath) (sqlite3:open-database dbfile)) - (else (sqlite3:open-database ":memory:")))) ;; (never-give-up-open-db dbpath)) - (handler (sqlite3:make-busy-timeout 36000))) - (if (and exists - (not write-access)) - (set! *db-write-access* write-access)) ;; only unset so other db's also can use this control - (sqlite3:set-busy-handler! mdb handler) - (db:set-sync mdb) ;; (sqlite3:execute mdb (conc "PRAGMA synchronous = 0;")) - ;; (if (or (and (not exists) - ;; (file-write-access? *toppath*)) - ;; (not (file-read-access? dbpath))) - ;; (begin - ;; - ;; TASKS QUEUE MOVED TO main.db - ;; - ;; (sqlite3:execute mdb "CREATE TABLE IF NOT EXISTS tasks_queue (id INTEGER PRIMARY KEY, - ;; action TEXT DEFAULT '', - ;; owner TEXT, - ;; state TEXT DEFAULT 'new', - ;; target TEXT DEFAULT '', - ;; name TEXT DEFAULT '', - ;; testpatt TEXT DEFAULT '', - ;; keylock TEXT, - ;; params TEXT, - ;; creation_time TIMESTAMP, - ;; execution_time TIMESTAMP);") - (sqlite3:execute mdb "CREATE TABLE IF NOT EXISTS monitors (id INTEGER PRIMARY KEY, - pid INTEGER, - start_time TIMESTAMP, - last_update TIMESTAMP, - hostname TEXT, - username TEXT, - CONSTRAINT monitors_constraint UNIQUE (pid,hostname));") - (sqlite3:execute mdb "CREATE TABLE IF NOT EXISTS servers (id INTEGER PRIMARY KEY, - pid INTEGER, - interface TEXT, - hostname TEXT, - port INTEGER, - pubport INTEGER, - start_time TIMESTAMP, - priority INTEGER, - state TEXT, - mt_version TEXT, - heartbeat TIMESTAMP, - transport TEXT, - run_id INTEGER);") - ;; CONSTRAINT servers_constraint UNIQUE (pid,hostname,port));") - (sqlite3:execute mdb "CREATE TABLE IF NOT EXISTS clients (id INTEGER PRIMARY KEY, - server_id INTEGER, - pid INTEGER, - hostname TEXT, - cmdline TEXT, - login_time TIMESTAMP, - logout_time TIMESTAMP DEFAULT -1, - CONSTRAINT clients_constraint UNIQUE (pid,hostname));") - - ;)) - (set! *task-db* (cons mdb dbpath)) - *task-db*)))) - -;;====================================================================== -;; Server and client management -;;====================================================================== - -;; make-vector-record tasks hostinfo id interface port pubport transport pid hostname -(define (tasks:hostinfo-get-id vec) (vector-ref vec 0)) -(define (tasks:hostinfo-get-interface vec) (vector-ref vec 1)) -(define (tasks:hostinfo-get-port vec) (vector-ref vec 2)) -(define (tasks:hostinfo-get-pubport vec) (vector-ref vec 3)) -(define (tasks:hostinfo-get-transport vec) (vector-ref vec 4)) -(define (tasks:hostinfo-get-pid vec) (vector-ref vec 5)) -(define (tasks:hostinfo-get-hostname vec) (vector-ref vec 6)) - -(define (tasks:need-server run-id) - (equal? (configf:lookup *configdat* "server" "required") "yes")) - -;; no elegance here ... -;; -(define (tasks:kill-server hostname pid #!key (kill-switch "")) - (debug:print-info 0 *default-log-port* "Attempting to kill server process " pid " on host " hostname) - (setenv "TARGETHOST" hostname) - (let* ((logdir (if (directory-exists? "logs") - "logs/" - "")) - (logfile (if logdir (conc "logs/server-"pid"-"hostname".log") #f)) - (gzfile (if logfile (conc logfile ".gz")))) - (setenv "TARGETHOST_LOGF" (conc logdir "server-kills.log")) - - (system (conc "nbfake kill "kill-switch" "pid)) - - (when logfile - (thread-sleep! 0.5) - (if (common:file-exists? gzfile) (delete-file gzfile)) - (system (conc "gzip " logfile)) - - (unsetenv "TARGETHOST_LOGF") - (unsetenv "TARGETHOST")))) - - -;;====================================================================== -;; M O N I T O R S -;;====================================================================== - -(define (tasks:remove-monitor-record mdb) - (sqlite3:execute mdb "DELETE FROM monitors WHERE pid=? AND hostname=?;" - (current-process-id) - (get-host-name))) - -(define (tasks:get-monitors mdb) - (let ((res '())) - (sqlite3:for-each-row - (lambda (a . rem) - (set! res (cons (apply vector a rem) res))) - mdb - "SELECT id,pid,strftime('%m/%d/%Y %H:%M',datetime(start_time,'unixepoch'),'localtime'),strftime('%m/%d/%Y %H:%M:%S',datetime(last_update,'unixepoch'),'localtime'),hostname,username FROM monitors ORDER BY last_update ASC;") - (reverse res) - )) - -(define (tasks:monitors->text-table monitors) - (let ((fmtstr "~4a~8a~20a~20a~10a~10a")) - (conc (format #f fmtstr "id" "pid" "start time" "last update" "hostname" "user") "\n" - (string-intersperse - (map (lambda (monitor) - (format #f fmtstr - (tasks:monitor-get-id monitor) - (tasks:monitor-get-pid monitor) - (tasks:monitor-get-start_time monitor) - (tasks:monitor-get-last_update monitor) - (tasks:monitor-get-hostname monitor) - (tasks:monitor-get-username monitor))) - monitors) - "\n")))) - -;; update the last_update field with the current time and -;; if any monitors appear dead, remove them -(define (tasks:monitors-update mdb) - (sqlite3:execute mdb "UPDATE monitors SET last_update=strftime('%s','now') WHERE pid=? AND hostname=?;" - (current-process-id) - (get-host-name)) - (let ((deadlist '())) - (sqlite3:for-each-row - (lambda (id pid host last-update delta) - (debug:print 0 *default-log-port* "Going to delete stale record for monitor with pid " pid " on host " host " last updated " delta " seconds ago") - (set! deadlist (cons id deadlist))) - mdb - "SELECT id,pid,hostname,last_update,strftime('%s','now')-last_update AS delta FROM monitors WHERE delta > 700;") - (sqlite3:execute mdb (conc "DELETE FROM monitors WHERE id IN ('" (string-intersperse (map conc deadlist) "','") "');"))) - ) -(define (tasks:register-monitor db port) - (let* ((pid (current-process-id)) - (hostname (get-host-name)) - (userinfo (user-information (current-user-id))) - (username (car userinfo))) - (debug:print 0 *default-log-port* "Register monitor, pid: " pid ", hostname: " hostname ", port: " port ", username: " username) - (sqlite3:execute db "INSERT INTO monitors (pid,start_time,last_update,hostname,username) VALUES (?,strftime('%s','now'),strftime('%s','now'),?,?);" - pid hostname username))) - -(define (tasks:get-num-alive-monitors mdb) - (let ((res 0)) - (sqlite3:for-each-row - (lambda (count) - (set! res count)) - mdb - "SELECT count(id) FROM monitors WHERE last_update < (strftime('%s','now') - 300) AND username=?;" - (car (user-information (current-user-id)))) - res)) - -;; -#;(define (tasks:start-monitor db mdb) - (if (> (tasks:get-num-alive-monitors mdb) 2) ;; have two running, no need for more - (debug:print-info 1 *default-log-port* "Not starting monitor, already have more than two running") - (let* ((megatestdb (conc *toppath* "/megatest.db")) - (monitordbf (conc (db:dbfile-path #f) "/monitor.db")) - (last-db-update 0)) ;; (file-modification-time megatestdb))) - (task:register-monitor mdb) - (let loop ((count 0) - (next-touch 0)) ;; next-touch is the time where we need to update last_update - ;; if the db has been modified we'd best look at the task queue - (let ((modtime (file-modification-time megatestdbpath ))) - (if (> modtime last-db-update) - (tasks:process-queue db)) ;; BROKEN. mdb last-db-update megatestdb next-touch)) - ;; WARNING: Possible race conditon here!! - ;; should this update be immediately after the task-get-action call above? - (if (> (current-seconds) next-touch) - (begin - (tasks:monitors-update mdb) - (loop (+ count 1)(+ (current-seconds) 240))) - (loop (+ count 1) next-touch))))))) - -;;====================================================================== -;; T A S K S Q U E U E -;; -;; NOTE:: These operate on task_queue which is in main.db -;; -;;====================================================================== - -;; NOTE: It might be good to add one more layer of checking to ensure -;; that no task gets run in parallel. - -;; id INTEGER PRIMARY KEY, -;; action TEXT DEFAULT '', -;; owner TEXT, -;; state TEXT DEFAULT 'new', -;; target TEXT DEFAULT '', -;; name TEXT DEFAULT '', -;; testpatt TEXT DEFAULT '', -;; keylock TEXT, -;; params TEXT, -;; creation_time TIMESTAMP DEFAULT (strftime('%s','now')), -;; execution_time TIMESTAMP); - - -;; register a task -(define (tasks:add dbstruct action owner target runname testpatt params) - (db:with-db - dbstruct #f #t - (lambda (dbdat db) - (sqlite3:execute db "INSERT INTO tasks_queue (action,owner,state,target,name,testpatt,params,creation_time,execution_time) - VALUES (?,?,'new',?,?,?,?,strftime('%s','now'),0);" - action - owner - target - runname - testpatt - (if params params ""))))) - -(define (keys:key-vals-hash->target keys key-params) - (let ((tmp (hash-table-ref/default key-params (vector-ref (car keys) 0) ""))) - (if (> (length keys) 1) - (for-each (lambda (key) - (set! tmp (conc tmp "/" (hash-table-ref/default key-params (vector-ref key 0) "")))) - (cdr keys))) - tmp)) - -;; for use from the gui, not ported -;; -;; (define (tasks:add-from-params mdb action keys key-params var-params) -;; (let ((target (keys:key-vals-hash->target keys key-params)) -;; (owner (car (user-information (current-user-id)))) -;; (runname (hash-table-ref/default var-params "runname" #f)) -;; (testpatts (hash-table-ref/default var-params "testpatts" "%")) -;; (params (hash-table-ref/default var-params "params" ""))) -;; (tasks:add mdb action owner target runname testpatts params))) - -;; return one task from those who are 'new' OR 'waiting' AND more than 10sec old -;; -(define (tasks:snag-a-task dbstruct) - (let ((res #f) - (keytxt (conc (current-process-id) "-" (get-host-name) "-" (car (user-information (current-user-id)))))) - (db:with-db - dbstruct #f #t - (lambda (dat db) - ;; first randomly set a new to pid-hostname-hostname - (sqlite3:execute - db - "UPDATE tasks_queue SET keylock=? WHERE id IN - (SELECT id FROM tasks_queue - WHERE state='new' OR - (state='waiting' AND (strftime('%s','now')-execution_time) > 10) OR - state='reset' - ORDER BY RANDOM() LIMIT 1);" keytxt) - - (sqlite3:for-each-row - (lambda (id . rem) - (set! res (apply vector id rem))) - db - "SELECT id,action,owner,state,target,name,test,item,params,creation_time,execution_time FROM tasks_queue WHERE keylock=? ORDER BY execution_time ASC LIMIT 1;" keytxt) - (if res ;; yep, have work to be done - (begin - (sqlite3:execute db "UPDATE tasks_queue SET state='inprogress',execution_time=strftime('%s','now') WHERE id=?;" - (tasks:task-get-id res)) - res) - #f))))) - -(define (tasks:reset-stuck-tasks dbstruct) - (let ((res '())) - (db:with-db - dbstruct #f #t - (lambda (dat db) - (sqlite3:for-each-row - (lambda (id delta) - (set! res (cons id res))) - db - "SELECT id,strftime('%s','now')-execution_time AS delta FROM tasks_queue WHERE state='inprogress' AND delta>700 ORDER BY delta DESC LIMIT 2;") - (sqlite3:execute - db - (conc "UPDATE tasks_queue SET state='reset' WHERE id IN ('" (string-intersperse (map conc res) "','") "');") - ))))) - -;; return all tasks in the tasks_queue table -;; -(define (tasks:get-tasks dbstruct types states) - (let ((res '())) - (db:with-db - dbstruct #f #f - (lambda (dbdat db) - (sqlite3:for-each-row - (lambda (id . rem) - (set! res (cons (apply vector id rem) res))) - db - (conc "SELECT id,action,owner,state,target,name,test,item,params,creation_time,execution_time - FROM tasks_queue " - ;; WHERE - ;; state IN " statesstr " AND - ;; action IN " actionsstr - " ORDER BY creation_time DESC;")) - res)))) - -(define (tasks:get-last dbstruct target runname) - (let ((res #f)) - (db:with-db - dbstruct #f #f - (lambda (dbdat db) - (sqlite3:for-each-row - (lambda (id . rem) - (set! res (apply vector id rem))) - db - (conc "SELECT id,action,owner,state,target,name,testpatt,keylock,params,creation_time,execution_time - FROM tasks_queue - WHERE - target = ? AND name =? - ORDER BY creation_time DESC LIMIT 1;") - target runname) - res)))) - -;; remove tasks given by a string of numbers comma separated -(define (tasks:remove-queue-entries dbstruct task-ids) - (db:with-db - dbstruct #f #t - (lambda (dbdat db) - (sqlite3:execute db (conc "DELETE FROM tasks_queue WHERE id IN (" task-ids ");"))))) - -;; (define (tasks:process-queue dbstruct) -;; (let* ((task (tasks:snag-a-task dbstruct)) -;; (action (if task (tasks:task-get-action task) #f))) -;; (if action (print "tasks:process-queue task: " task)) -;; (if action -;; (case (string->symbol action) -;; ((run) (tasks:start-run dbstruct task)) -;; ((remove) (tasks:remove-runs dbstruct task)) -;; ((lock) (tasks:lock-runs dbstruct task)) -;; ;; ((monitor) (tasks:start-monitor db task)) -;; #;((rollup) (tasks:rollup-runs dbstruct task)) -;; ((updatemeta)(tasks:update-meta dbstruct task)) -;; #;((kill) (tasks:kill-monitors dbstruct task)))))) - -(define (tasks:tasks->text tasks) - (let ((fmtstr "~10a~10a~10a~12a~20a~12a~12a~10a")) - (conc (format #f fmtstr "id" "action" "owner" "state" "target" "runname" "testpatts" "params") "\n" - (string-intersperse - (map (lambda (task) - (format #f fmtstr - (tasks:task-get-id task) - (tasks:task-get-action task) - (tasks:task-get-owner task) - (tasks:task-get-state task) - (tasks:task-get-target task) - (tasks:task-get-name task) - (tasks:task-get-testpatt task) - ;; (tasks:task-get-item task) - (tasks:task-get-params task))) - tasks) "\n")))) - -(define (tasks:set-state dbstruct task-id state) - (db:with-db - dbstruct #f #t - (lambda (dbdat db) - (sqlite3:execute db "UPDATE tasks_queue SET state=? WHERE id=?;" - state - task-id)))) - -;;====================================================================== -;; Access using task key (stored in params; (hash-table->alist flags) hostname pid -;;====================================================================== - -(define (tasks:param-key->id dbstruct task-params) - (db:with-db - dbstruct #f #f - (lambda (dbdat db) - (handle-exceptions - exn - #f - (sqlite3:first-result db "SELECT id FROM tasks_queue WHERE params LIKE ?;" - task-params))))) - -(define (tasks:set-state-given-param-key dbstruct param-key new-state) - (db:with-db - dbstruct #f #t - (lambda (dbdat db) - (sqlite3:execute db "UPDATE tasks_queue SET state=? WHERE params LIKE ?;" new-state param-key)))) - -(define (tasks:get-records-given-param-key dbstruct param-key state-patt action-patt test-patt) - (db:with-db - dbstruct #f #f - (lambda (dbdat db) - (handle-exceptions - exn - '() - (sqlite3:first-row db "SELECT id,action,owner,state,target,name,testpatt,keylock,params WHERE - params LIKE ? AND state LIKE ? AND action LIKE ? AND testpatt LIKE ?;" - param-key state-patt action-patt test-patt))))) - -(define (tasks:find-task-queue-records dbstruct target run-name test-patt state-patt action-patt) - (db:with-db - dbstruct - #f #f - (lambda (dbdat db) - (let ((res '())) - (sqlite3:for-each-row - (lambda (a . b) - (set! res (cons (cons a b) res))) - db "SELECT id,action,owner,state,target,name,testpatt,keylock,params FROM tasks_queue - WHERE - target = ? AND name = ? AND state LIKE ? AND action LIKE ? AND testpatt LIKE ?;" - target run-name state-patt action-patt test-patt) - res)))) - -;; kill any runner processes (i.e. processes handling -runtests) that match target/runname -;; -;; do a remote call to get the task queue info but do the killing as self here. -;; -(define (tasks:kill-runner target run-name testpatt) - (let ((records (rmt:tasks-find-task-queue-records target run-name testpatt "running" "run-tests")) - (hostpid-rx (regexp "\\s+(\\w+)\\s+(\\d+)$"))) ;; host pid is at end of param string - (if (null? records) - (debug:print 0 *default-log-port* "No run launching processes found for " target " / " run-name " with testpatt " (or testpatt "* no testpatt specified! *")) - (debug:print 0 *default-log-port* "Found " (length records) " run(s) to kill.")) - (for-each - (lambda (record) - (let* ((param-key (list-ref record 8)) - (match-dat (string-search hostpid-rx param-key))) - (if match-dat - (let ((hostname (cadr match-dat)) - (pid (string->number (caddr match-dat)))) - (debug:print 0 *default-log-port* "Sending SIGINT to process " pid " on host " hostname) - (if (equal? (get-host-name) hostname) - (if (process:alive? pid) - (begin - (handle-exceptions - exn - (begin - (debug:print 0 *default-log-port* "Kill of process " pid " on host " hostname " failed.") - (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) - #t) - (process-signal pid signal/int) - (thread-sleep! 5) - (if (process:alive? pid) - (process-signal pid signal/kill))))) - ;; (call-with-environment-variables - (let ((old-targethost (getenv "TARGETHOST"))) - (setenv "TARGETHOST" hostname) - (setenv "TARGETHOST_LOGF" "server-kills.log") - (system (conc "nbfake kill " pid)) - (if old-targethost (setenv "TARGETHOST" old-targethost)) - (unsetenv "TARGETHOST") - (unsetenv "TARGETHOST_LOGF")))) - (debug:print-error 0 *default-log-port* "no record or improper record for " target "/" run-name " in tasks_queue in main.db")))) - records))) - -;; (define (tasks:start-run dbstruct mdb task) -;; (let ((flags (make-hash-table))) -;; (hash-table-set! flags "-rerun" "NOT_STARTED") -;; (if (not (string=? (tasks:task-get-params task) "")) -;; (hash-table-set! flags "-setvars" (tasks:task-get-params task))) -;; (print "Starting run " task) -;; ;; sillyness, just call the damn routine with the task vector and be done with it. FIXME SOMEDAY -;; (runs:run-tests db -;; (tasks:task-get-target task) -;; (tasks:task-get-name task) -;; (tasks:task-get-test task) -;; (tasks:task-get-item task) -;; (tasks:task-get-owner task) -;; flags) -;; (tasks:set-state mdb (tasks:task-get-id task) "waiting"))) -;; -;; (define (tasks:rollup-runs db mdb task) -;; (let* ((flags (make-hash-table)) -;; (keys (db:get-keys db)) -;; (keyvals (keys:target-keyval keys (tasks:task-get-target task)))) -;; ;; (hash-table-set! flags "-rerun" "NOT_STARTED") -;; (print "Starting rollup " task) -;; ;; sillyness, just call the damn routine with the task vector and be done with it. FIXME SOMEDAY -;; (runs:rollup-run db -;; keys -;; keyvals -;; (tasks:task-get-name task) -;; (tasks:task-get-owner task)) -;; (tasks:set-state mdb (tasks:task-get-id task) "waiting"))) - -;;====================================================================== -;; S Y N C T O P O S T G R E S Q L -;;====================================================================== - -;; In the spirit of "dump your junk in the tasks module" I'll put the -;; sync to postgres here for now. - -;; attempt to automatically set up an area. call only if get area by path -;; returns naught of interest -;; -(define (tasks:set-area dbh configdat #!key (toppath #f)) ;; could I safely put *toppath* in for the default for toppath? when would it be evaluated? - (let loop ((area-name (or (configf:lookup configdat "setup" "area-name") - (common:get-area-name))) - (modifier 'none)) - (let ((success (handle-exceptions - exn - (begin - (debug:print 0 *default-log-port* "ERROR: cannot create area entry, " ((condition-property-accessor 'exn 'message) exn)) - #f) ;; FIXME: I don't care for now but I should look at *why* there was an exception - (pgdb:add-area dbh area-name (or toppath *toppath*))))) - (or success - (case modifier - ((none)(loop (conc (current-user-name) "_" area-name) 'user)) - ((user)(loop (conc (substring (common:get-area-path-signature) 0 4) - area-name) 'areasig)) - (else #f)))))) ;; give up - -(define (task:print-runtime run-times saperator) -(for-each - (lambda (run-time-info) - (let* ((run-name (vector-ref run-time-info 0)) - (run-time (vector-ref run-time-info 1)) - (target (vector-ref run-time-info 2))) - (print target saperator run-name saperator run-time ))) - run-times)) - -(define (task:print-runtime-as-json run-times) - (let loop ((run-time-info (car run-times)) - (rema (cdr run-times)) - (str "")) - (let* ((run-name (vector-ref run-time-info 0)) - (run-time (vector-ref run-time-info 1)) - (target (vector-ref run-time-info 2))) - ;(print (not (equal? str ""))) - (if (not (equal? str "")) - (set! str (conc str ","))) - (if (null? rema) - (print "[" str "{target:" target ",run-name:" run-name ", run-time:" run-time "}]") - (loop (car rema) (cdr rema) (conc str "{target:" target ", run-name:" run-name ", run-time:" run-time "}")))))) - -(define (task:get-run-times) - (let* ( - (run-patt (if (args:get-arg "-run-patt") - (args:get-arg "-run-patt") - "%")) - (target-patt (if (args:get-arg "-target-patt") - (args:get-arg "-target-patt") - "%")) - - (run-times (rmt:get-run-times run-patt target-patt ))) - (if (eq? (length run-times) 0) - (begin - (debug:print 0 *default-log-port* "Data not found!!") - (exit))) - (if (equal? (args:get-arg "-dumpmode") "json") - (task:print-runtime-as-json run-times) - (if (equal? (args:get-arg "-dumpmode") "csv") - (task:print-runtime run-times ",") - (task:print-runtime run-times " "))))) - - -(define (task:print-testtime test-times saperator) -(for-each - (lambda (test-time-info) - (let* ((test-name (vector-ref test-time-info 0)) - (test-time (vector-ref test-time-info 2)) - (test-item (if (eq? (string-length (vector-ref test-time-info 1)) 0) - "N/A" - (vector-ref test-time-info 1)))) - (print test-name saperator test-item saperator test-time ))) - test-times)) - -(define (task:print-testtime-as-json test-times) - (let loop ((test-time-info (car test-times)) - (rema (cdr test-times)) - (str "")) - (let* ((test-name (vector-ref test-time-info 0)) - (test-time (vector-ref test-time-info 2)) - (item (vector-ref test-time-info 1))) - ;(print (not (equal? str ""))) - (if (not (equal? str "")) - (set! str (conc str ","))) - (if (null? rema) - (print "[" str "{test-name:" test-name ", item-path:" item ", test-time:" test-time "}]") - (loop (car rema) (cdr rema) (conc str "{test-name:" test-name ", item-path:" item ", test-time:" test-time "}")))))) - - - (define (task:get-test-times) - (let* ((runname (if (args:get-arg "-runname") - (args:get-arg "-runname") - #f)) - (target (if (args:get-arg "-target") - (args:get-arg "-target") - #f)) - - (test-times (rmt:get-test-times runname target ))) - (if (not runname) - (begin - (debug:print 0 *default-log-port* "Error: Missing argument -runname") - (exit))) - (if (string-contains runname "%") - (begin - (debug:print 0 *default-log-port* "Error: Invalid runname, '%' not allowed (" runname ") ") - (exit))) - (if (not target) - (begin - (debug:print 0 *default-log-port* "Error: Missing argument -target") - (exit))) - (if (string-contains target "%") - (begin - (debug:print 0 *default-log-port* "Error: Invalid target, '%' not allowed (" target ") ") - (exit))) - - (if (eq? (length test-times) 0) - (begin - (debug:print 0 *default-log-port* "Data not found!!") - (exit))) - (if (equal? (args:get-arg "-dumpmode") "json") - (task:print-testtime-as-json test-times) - (if (equal? (args:get-arg "-dumpmode") "csv") - (task:print-testtime test-times ",") - (task:print-testtime test-times " "))))) - - - -;; gets mtpg-run-id and syncs the record if different -;; -(define (tasks:run-id->mtpg-run-id dbh cached-info run-id area-info smallest-last-update-time) - (let* ((runs-ht (hash-table-ref cached-info 'runs)) - (runinf (hash-table-ref/default runs-ht run-id #f)) - (area-id (vector-ref area-info 0))) - (if runinf - runinf ;; already cached - (let* ((run-dat (rmt:get-run-info run-id)) ;; NOTE: get-run-info returns a vector < row header > - (run-name (rmt:get-run-name-from-id run-id)) - (row (db:get-rows run-dat)) ;; yes, this returns a single row - (header (db:get-header run-dat)) - (state (db:get-value-by-header row header "state")) - (status (db:get-value-by-header row header "status")) - (owner (db:get-value-by-header row header "owner")) - (event-time (db:get-value-by-header row header "event_time")) - (comment (db:get-value-by-header row header "comment")) - (fail-count (db:get-value-by-header row header "fail_count")) - (pass-count (db:get-value-by-header row header "pass_count")) - (db-contour (db:get-value-by-header row header "contour")) - (contour (if (args:get-arg "-prepend-contour") - (if (and db-contour (not (equal? db-contour "")) (string? db-contour )) - (begin - (debug:print-info 10 *default-log-port* "db-contour" db-contour) - db-contour) - (args:get-arg "-contour")))) - (run-tag (if (args:get-arg "-run-tag") - (args:get-arg "-run-tag") - "")) - (last-update (db:get-value-by-header row header "last_update")) - (keytarg (if (or (args:get-arg "-prepend-contour") (args:get-arg "-prefix-target")) - (conc "MT_CONTOUR/MT_AREA/" (string-intersperse (rmt:get-keys) "/")) (string-intersperse (rmt:get-keys) "/"))) ;; e.g. version/iteration/platform - (base-target (rmt:get-target run-id)) - (target (if (or (args:get-arg "-prepend-contour") (args:get-arg "-prefix-target")) - (conc (or (args:get-arg "-prefix-target") (conc contour "/" (common:get-area-name) "/")) base-target) base-target)) ;; e.g. v1.63/a3e1/ubuntu - (spec-id (pgdb:get-ttype dbh keytarg)) - (publish-time (if (args:get-arg "-cp-eventtime-to-publishtime") - event-time - (current-seconds))) - (new-run-id (if (and run-name base-target) (pgdb:get-run-id dbh spec-id target run-name area-id) #f))) - (if new-run-id - (begin ;; let ((run-record (pgdb:get-run-info dbh new-run-id)) - (hash-table-set! runs-ht run-id new-run-id) - ;; ensure key fields are up to date - ;; if last_update == pgdb_last_update do not update smallest-last-update-time - (let* ((pgdb-last-update (pgdb:get-run-last-update dbh new-run-id)) - (smallest-time (hash-table-ref/default smallest-last-update-time "smallest-time" #f))) - (if (and (> last-update pgdb-last-update) (or (not smallest-time) (< last-update smallest-time))) - (hash-table-set! smallest-last-update-time "smallest-time" last-update))) - (pgdb:refresh-run-info - dbh - new-run-id - state status owner event-time comment fail-count pass-count area-id last-update publish-time) - (debug:print-info 4 *default-log-port* (conc "Working on run-id " run-id " pgdb-id " new-run-id)) - (if (not (equal? run-tag "")) - (task:add-run-tag dbh new-run-id run-tag)) - new-run-id) - - (if (or (not state) (equal? state "deleted")) - (begin - (debug:print-info 1 *default-log-port* "Warning: Run with id " run-id " was created after previous sync and deleted before the sync") #f) - (if (handle-exceptions - exn - (begin (print-call-chain) - (debug:print 0 *default-log-port* ((condition-property-accessor 'exn 'message) exn)) - #f) - - (pgdb:insert-run - dbh - spec-id target run-name state status owner event-time comment fail-count pass-count area-id last-update publish-time)) - (let* ((smallest-time (hash-table-ref/default smallest-last-update-time "smallest-time" #f))) - (if (or (not smallest-time) (< last-update smallest-time)) - (hash-table-set! smallest-last-update-time "smallest-time" last-update)) - (tasks:run-id->mtpg-run-id dbh cached-info run-id area-info smallest-last-update-time)) - #f))))))) - -(define (task:add-run-tag dbh run-id tag) - (let* ((tag-info (pgdb:get-tag-info-by-name dbh tag))) - (if (not tag-info) - (begin - (if (handle-exceptions - exn - (begin - (debug:print-info 1 *default-log-port* ((condition-property-accessor 'exn 'message) exn)) - #f) - (pgdb:insert-tag dbh tag)) - (set! tag-info (pgdb:get-tag-info-by-name dbh tag)) - #f))) - ;;add to area_tags - (handle-exceptions - exn - (begin - (debug:print-info 1 *default-log-port* ((condition-property-accessor 'exn 'message) exn)) - #f) - (if (not (pgdb:is-run-taged-with-a-tag dbh (vector-ref tag-info 0) run-id)) - (pgdb:insert-run-tag dbh (vector-ref tag-info 0) run-id))))) - - -(define (tasks:sync-test-steps dbh cached-info test-step-ids smallest-last-update-time) - ; (print "Sync Steps " test-step-ids ) - (let ((test-ht (hash-table-ref cached-info 'tests)) - (step-ht (hash-table-ref cached-info 'steps)) - (run-id-in #f) - ) - (for-each - (lambda (test-step-id) - (set! run-id-in (cdr test-step-id)) - (set! test-step-id (car test-step-id)) - - - (let* ((test-step-info (rmt:get-steps-info-by-id run-id-in test-step-id)) - (step-id (tdb:step-get-id test-step-info)) - (test-id (tdb:step-get-test_id test-step-info)) - (stepname (tdb:step-get-stepname test-step-info)) - (state (tdb:step-get-state test-step-info)) - (status (tdb:step-get-status test-step-info)) - (event_time (tdb:step-get-event_time test-step-info)) - (comment (tdb:step-get-comment test-step-info)) - (logfile (tdb:step-get-logfile test-step-info)) - (last-update (tdb:step-get-last_update test-step-info)) - (pgdb-test-id (hash-table-ref/default test-ht test-id #f)) - (smallest-time (hash-table-ref/default smallest-last-update-time "smallest-time" #f)) - (pgdb-step-id (if pgdb-test-id - (pgdb:get-test-step-id dbh pgdb-test-id stepname state) - #f))) - (if step-id - (begin - (if pgdb-test-id - (begin - (if pgdb-step-id - (begin - (debug:print-info 4 *default-log-port* "Updating existing test-step with test-id: " test-id " and step-id " step-id " pgdb test id: " pgdb-test-id " pgdb step id " pgdb-step-id ) - (let* ((pgdb-last-update (pgdb:get-test-step-last-update dbh pgdb-step-id))) - (if (and (> last-update pgdb-last-update) (or (not smallest-time) (< last-update smallest-time))) - (hash-table-set! smallest-last-update-time "smallest-time" last-update))) - (pgdb:update-test-step dbh pgdb-step-id pgdb-test-id stepname state status event_time comment logfile last-update)) - (begin - (debug:print-info 4 *default-log-port* "Inserting test-step with test-id: " test-id " and step-id " step-id " pgdb test id: " pgdb-test-id) - (if (or (not smallest-time) (< last-update smallest-time)) - (hash-table-set! smallest-last-update-time "smallest-time" last-update)) - (pgdb:insert-test-step dbh pgdb-test-id stepname state status event_time comment logfile last-update ) - (set! pgdb-step-id (pgdb:get-test-step-id dbh pgdb-test-id stepname state)))) - (hash-table-set! step-ht step-id pgdb-step-id )) - (debug:print-info 1 *default-log-port* "Error: Test not cashed"))) - (debug:print-info 1 *default-log-port* "Error: Could not get test step info for step id " test-step-id )))) ;; this is a wierd senario need to debug - test-step-ids))) - -(define (tasks:sync-test-gen-data dbh cached-info test-data-ids smallest-last-update-time) - (let ((test-ht (hash-table-ref cached-info 'tests)) - (data-ht (hash-table-ref cached-info 'data)) - (run-id-in #f) - ) - (for-each - (lambda (test-data-id) - (set! run-id-in (cdr test-data-id)) - (set! test-data-id (car test-data-id)) - (let* ((test-data-info (rmt:get-data-info-by-id run-id-in test-data-id)) - (data-id (db:test-data-get-id test-data-info)) - (test-id (db:test-data-get-test_id test-data-info)) - (category (db:test-data-get-category test-data-info)) - (variable (db:test-data-get-variable test-data-info)) - (value (db:test-data-get-value test-data-info)) - (expected (db:test-data-get-expected test-data-info)) - (tol (db:test-data-get-tol test-data-info)) - (units (db:test-data-get-units test-data-info)) - (comment (db:test-data-get-comment test-data-info)) - (status (db:test-data-get-status test-data-info)) - (type (db:test-data-get-type test-data-info)) - (last-update (db:test-data-get-last_update test-data-info)) - (smallest-time (hash-table-ref/default smallest-last-update-time "smallest-time" #f)) - - (pgdb-test-id (hash-table-ref/default test-ht test-id #f)) - (pgdb-data-id (if pgdb-test-id - (pgdb:get-test-data-id dbh pgdb-test-id category variable) - #f))) - (if data-id - (begin - (if pgdb-test-id - (begin - (if pgdb-data-id - (begin - (debug:print-info 4 *default-log-port* "Updating existing test-data with test-id: " test-id " and data-id " data-id " pgdb test id: " pgdb-test-id " pgdb data id " pgdb-data-id) - (let* ((pgdb-last-update (pgdb:get-test-data-last-update dbh pgdb-data-id))) - (if (and (> last-update pgdb-last-update) (or (not smallest-time) (< last-update smallest-time))) - (hash-table-set! smallest-last-update-time "smallest-time" last-update))) - (pgdb:update-test-data dbh pgdb-data-id pgdb-test-id category variable value expected tol units comment status type last-update)) - (begin - (debug:print-info 4 *default-log-port* "Inserting test-data with test-id: " test-id " and data-id " data-id " pgdb test id: " pgdb-test-id) - (if (handle-exceptions - exn - (begin (print-call-chain) - (debug:print 0 *default-log-port* ((condition-property-accessor 'exn 'message) exn)) - #f) - - (pgdb:insert-test-data dbh pgdb-test-id category variable value expected tol units comment status type last-update)) - ;(tasks:run-id->mtpg-run-id dbh cached-info run-id area-info) - (begin - ;(pgdb:insert-test-data dbh pgdb-test-id category variable value expected tol units comment status type ) - (if (or (not smallest-time) (< last-update smallest-time)) - (hash-table-set! smallest-last-update-time "smallest-time" last-update)) - (set! pgdb-data-id (pgdb:get-test-data-id dbh pgdb-test-id category variable))) - #f))) - (hash-table-set! data-ht data-id pgdb-data-id )) - (begin - (debug:print-info 1 *default-log-port* "Error: Test not in pgdb")))) - - (debug:print-info 1 *default-log-port* "Error: Could not get test data info for data id " test-data-id )))) ;; this is a wierd senario need to debug - test-data-ids))) - - - -(define (tasks:sync-tests-data dbh cached-info test-ids area-info smallest-last-update-time main-run-id) - (let ((test-ht (hash-table-ref cached-info 'tests)) - (run-id-in main-run-id)) - (for-each - (lambda (test-id) - ; (set! run-id-in (cdr test-id)) - ; (set! test-id (car test-id)) - - (debug:print 0 *default-log-port* "test-id: " test-id " run-id: " run-id-in) - (let* ((test-info (rmt:get-test-info-by-id run-id-in test-id)) - (run-id (db:test-get-run_id test-info)) ;; look these up in db_records.scm - (test-id (db:test-get-id test-info)) - (test-name (db:test-get-testname test-info)) - (item-path (db:test-get-item-path test-info)) - (state (db:test-get-state test-info)) - (status (db:test-get-status test-info)) - (host (db:test-get-host test-info)) - (pid (db:test-get-process_id test-info)) - (cpuload (db:test-get-cpuload test-info)) - (diskfree (db:test-get-diskfree test-info)) - (uname (db:test-get-uname test-info)) - (run-dir (db:test-get-rundir test-info)) - (log-file (db:test-get-final_logf test-info)) - (run-duration (db:test-get-run_duration test-info)) - (comment (db:test-get-comment test-info)) - (event-time (db:test-get-event_time test-info)) - (archived (db:test-get-archived test-info)) - (last-update (db:test-get-last_update test-info)) - (pgdb-run-id (tasks:run-id->mtpg-run-id dbh cached-info run-id area-info smallest-last-update-time)) - (smallest-time (hash-table-ref/default smallest-last-update-time "smallest-time" #f)) - (pgdb-test-id (if pgdb-run-id - (begin - ;(print pgdb-run-id) - (pgdb:get-test-id dbh pgdb-run-id test-name item-path)) - #f))) - ;; "id" "run_id" "testname" "state" "status" "event_time" - ;; "host" "cpuload" "diskfree" "uname" "rundir" "item_path" - ;; "run_duration" "final_logf" "comment" "shortdir" "attemptnum" "archived" - (if (or (not item-path) (string-null? item-path)) - (debug:print-info 0 *default-log-port* "Working on Run id : " run-id " and test name : " test-name)) - (if pgdb-run-id - (begin - (if pgdb-test-id ;; have a record - (begin ;; let ((key-name (conc run-id "/" test-name "/" item-path))) - (debug:print-info 4 *default-log-port* "Updating existing test with run-id: " run-id " and test-id: " test-id " pgdb run id: " pgdb-run-id " pgdb-test-id " pgdb-test-id) - (let* ((pgdb-last-update (pgdb:get-test-last-update dbh pgdb-test-id))) - (if (and (> last-update pgdb-last-update) (or (not smallest-time) (< last-update smallest-time))) ;;if last-update is same as pgdb-last-update then it is safe to assume the records are identical and we can use a larger last update time. - (hash-table-set! smallest-last-update-time "smallest-time" last-update))) - (pgdb:update-test dbh pgdb-test-id pgdb-run-id test-name item-path state status host cpuload diskfree uname run-dir log-file run-duration comment event-time archived last-update pid)) - (begin - (debug:print-info 4 *default-log-port* "Inserting test with run-id: " run-id " and test-id: " test-id " pgdb run id: " pgdb-run-id) - (pgdb:insert-test dbh pgdb-run-id test-name item-path state status host cpuload diskfree uname run-dir log-file run-duration comment event-time archived last-update pid) - (if (or (not smallest-time) (< last-update smallest-time)) - (hash-table-set! smallest-last-update-time "smallest-time" last-update)) - (set! pgdb-test-id (pgdb:get-test-id dbh pgdb-run-id test-name item-path)))) - (hash-table-set! test-ht test-id pgdb-test-id)) - (debug:print-info 1 *default-log-port* "WARNING: Skipping run with run-id:" run-id ". This run was created after privious sync and removed before this sync.")))) - test-ids))) - -(define (task:add-area-tag dbh area-info tag) - (let* ((tag-info (pgdb:get-tag-info-by-name dbh tag))) - (if (not tag-info) - (begin - (if (handle-exceptions - exn - (begin - (debug:print-info 1 *default-log-port* ((condition-property-accessor 'exn 'message) exn)) - #f) - (pgdb:insert-tag dbh tag)) - (set! tag-info (pgdb:get-tag-info-by-name dbh tag)) - #f))) - ;;add to area_tags - (handle-exceptions - exn - (begin - (debug:print-info 1 *default-log-port* ((condition-property-accessor 'exn 'message) exn)) - #f) - (if (not (pgdb:is-area-taged-with-a-tag dbh (vector-ref tag-info 0) (vector-ref area-info 0))) - (pgdb:insert-area-tag dbh (vector-ref tag-info 0) (vector-ref area-info 0)))))) - -(define (tasks:sync-run-data dbh cached-info run-ids area-info smallest-last-update-time) - (for-each - (lambda (run-id) - (debug:print-info 4 *default-log-port* "Check if run with " run-id " needs to be synced" ) - (tasks:run-id->mtpg-run-id dbh cached-info run-id area-info smallest-last-update-time)) -run-ids)) - -;; get runs changed since last sync -;; (define (tasks:sync-test-data dbh cached-info area-info) -;; (let* (( - -(define (tasks:sync-to-postgres configdat dest) - ;; (print "In sync") - (let* ((dbh (pgdb:open configdat dbname: dest)) - (area-info (pgdb:get-area-by-path dbh *toppath*)) - (cached-info (make-hash-table)) - (start (current-seconds)) - (test-patt (if (args:get-arg "-testpatt") - (args:get-arg "-testpatt") - "%")) - (target (if (args:get-arg "-target") - (args:get-arg "-target") - #f)) - (run-name (if (args:get-arg "-runname") - (args:get-arg "-runname") - #f))) - (if (and target (not run-name)) - (begin - (debug:print 0 *default-log-port* "Error: Provide runname") - (exit 1))) - (if (and (not target) run-name) - (begin - (debug:print 0 *default-log-port* "Error: Provide target") - (exit 1))) - ;(print "123") - ;(exit 1) - (for-each (lambda (dtype) - (hash-table-set! cached-info dtype (make-hash-table))) - '(runs targets tests steps data)) - (hash-table-set! cached-info 'start start) ;; when done we'll set sync times to this - (if area-info - (let* ((last-sync-time (if (and target run-name) - 0 - (if (args:get-arg "-since") - (string->number (args:get-arg "-since")) (vector-ref area-info 3)))) - (smallest-last-update-time (make-hash-table)) - (run-ids (if (and target run-name) - (rmt:get-run-record-ids target run-name (rmt:get-keys)) - (rmt:get-changed-record-run-ids last-sync-time))) - (all-run-ids (if (and target run-name) '() (rmt:get-all-runids))) - (changed-run-dbs (if (and target run-name) '() (db:get-changed-run-ids last-sync-time))) - (changed-run-ids (if (and target run-name) run-ids (filter (lambda (run) (member (modulo run (num-run-dbs)) changed-run-dbs)) all-run-ids))) - (area-tag (if (args:get-arg "-area-tag") - (args:get-arg "-area-tag") - (if (args:get-arg "-area") - (args:get-arg "-area") - "")))) - (if (and (equal? area-tag "") (not (pgdb:is-area-taged dbh (vector-ref area-info 0)))) - (set! area-tag *default-area-tag*)) - (if (not (equal? area-tag "")) - (task:add-area-tag dbh area-info area-tag)) - (if (not (null? run-ids)) - (begin - (debug:print-info 0 *default-log-port* "syncing runs: " run-ids) - (tasks:sync-run-data dbh cached-info run-ids area-info smallest-last-update-time))) - (for-each - (lambda (run-id) - (let ((test-ids (rmt:get-changed-record-test-ids run-id last-sync-time))) - (print test-ids) - (if (not (null? test-ids)) - (begin - (debug:print-info 0 *default-log-port* "syncing tests: " test-ids) - (tasks:sync-tests-data dbh cached-info test-ids area-info smallest-last-update-time run-id))))) - changed-run-ids) - (let* ((smallest-time (hash-table-ref/default smallest-last-update-time "smallest-time" (current-seconds)))) - (debug:print-info 0 "smallest-time :" smallest-time " last-sync-time " last-sync-time) - (if (not (and target run-name)) - (if (or (and smallest-time (> smallest-time last-sync-time)) (and smallest-time (eq? last-sync-time 0))) - (pgdb:write-sync-time dbh area-info smallest-time))))) ;;this needs to be changed - (if (tasks:set-area dbh configdat) - (tasks:sync-to-postgres configdat dest) - (begin - (debug:print 0 *default-log-port* "ERROR: unable to create an area record") - #f))))) - ADDED tasksmod.scm Index: tasksmod.scm ================================================================== --- /dev/null +++ tasksmod.scm @@ -0,0 +1,1873 @@ +;;====================================================================== +;; Copyright 2017, Matthew Welland. +;; +;; This file is part of Megatest. +;; +;; Megatest is free software: you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. +;; +;; Megatest is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with Megatest. If not, see . + +;;====================================================================== + +;;====================================================================== +;; Cpumod: +;; +;; Put things here don't fit anywhere else +;;====================================================================== + +(declare (unit tasksmod)) +(declare (uses debugprint)) +(declare (uses mtargs)) +(declare (uses commonmod)) +(declare (uses configfmod)) +(declare (uses dbfile)) +(declare (uses dbmod)) +(declare (uses rmtmod)) +(declare (uses servermod)) +(declare (uses processmod)) +(declare (uses pgdb)) +(declare (uses mtmod)) +(declare (uses megatestmod)) + +(use srfi-69) + +(module tasksmod + * + +(import scheme) +(cond-expand + (chicken-4 + + (import chicken + ports + data-structures + extras + files + matchable + pathname-expand + posix + posix-extras + regex + regex-case + sparse-vectors + + ) + (use srfi-69)) + (chicken-5 + (import (prefix sqlite3 sqlite3:) + chicken.base + chicken.condition + chicken.file + chicken.file.posix + chicken.io + chicken.pathname + chicken.port + chicken.process + chicken.process-context + chicken.process-context.posix + chicken.sort + chicken.string + chicken.time + chicken.time.posix + + matchable + md5 + message-digest + pathname-expand + regex + regex-case + system-information + + ))) + +;; imports common to ck4 and ck5 +(import srfi-1 + srfi-13 + srfi-18 + srfi-69 + typed-records + (prefix base64 base64:) + (prefix sqlite3 sqlite3:) + md5 + message-digest + z3 + directory-utils + + debugprint + commonmod + configfmod + (prefix mtargs args:) + dbmod + dbfile + rmtmod + servermod + processmod + pgdb + mtmod + megatestmod + ) + +(include "task_records.scm") +(include "db_records.scm") + +;;====================================================================== +;; Tasks db +;;====================================================================== + +(define (tasks:get-task-db-path) + (let ((dbdir (or (configf:lookup *configdat* "setup" "monitordir") + (configf:lookup *configdat* "setup" "dbdir") + (conc (common:get-linktree) "/.db")))) + (handle-exceptions + exn + (begin + (debug:print-error 0 *default-log-port* "Couldn't create path to " dbdir ", exn=" exn) + (exit 1)) + (if (not (directory? dbdir))(create-directory dbdir #t))) + dbdir)) + +;; If file exists AND +;; file readable +;; ==> open it +;; If file exists AND +;; file NOT readable +;; ==> open in-mem version +;; If file NOT exists +;; ==> open in-mem version +;; +(define (tasks:open-db #!key (numretries 4)) + (if *task-db* + *task-db* + (handle-exceptions + exn + (if (> numretries 0) + (begin + (print-call-chain (current-error-port)) + (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) + (debug:print 5 *default-log-port* " exn=" (condition->list exn)) + (thread-sleep! 1) + (tasks:open-db numretries (- numretries 1))) + (begin + (print-call-chain (current-error-port)) + (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) + (debug:print 5 *default-log-port* " exn=" (condition->list exn)))) + (let* ((dbpath (common:make-tmpdir-name *toppath* "")) ;; (tasks:get-task-db-path)) + (dbfile (conc dbpath "/monitor.db")) + (avail (tasks:wait-on-journal dbpath 10)) ;; wait up to about 10 seconds for the journal to go away + (exists (common:file-exists? dbpath)) + (write-access (file-write-access? dbpath)) + (mdb (cond ;; what the hek is *toppath* doing here? + ((and (string? *toppath*)(file-write-access? *toppath*)) + (sqlite3:open-database dbfile)) + ((file-read-access? dbpath) (sqlite3:open-database dbfile)) + (else (sqlite3:open-database ":memory:")))) ;; (never-give-up-open-db dbpath)) + (handler (sqlite3:make-busy-timeout 36000))) + (if (and exists + (not write-access)) + (set! *db-write-access* write-access)) ;; only unset so other db's also can use this control + (sqlite3:set-busy-handler! mdb handler) + (db:set-sync mdb) ;; (sqlite3:execute mdb (conc "PRAGMA synchronous = 0;")) + ;; (if (or (and (not exists) + ;; (file-write-access? *toppath*)) + ;; (not (file-read-access? dbpath))) + ;; (begin + ;; + ;; TASKS QUEUE MOVED TO main.db + ;; + ;; (sqlite3:execute mdb "CREATE TABLE IF NOT EXISTS tasks_queue (id INTEGER PRIMARY KEY, + ;; action TEXT DEFAULT '', + ;; owner TEXT, + ;; state TEXT DEFAULT 'new', + ;; target TEXT DEFAULT '', + ;; name TEXT DEFAULT '', + ;; testpatt TEXT DEFAULT '', + ;; keylock TEXT, + ;; params TEXT, + ;; creation_time TIMESTAMP, + ;; execution_time TIMESTAMP);") + (sqlite3:execute mdb "CREATE TABLE IF NOT EXISTS monitors (id INTEGER PRIMARY KEY, + pid INTEGER, + start_time TIMESTAMP, + last_update TIMESTAMP, + hostname TEXT, + username TEXT, + CONSTRAINT monitors_constraint UNIQUE (pid,hostname));") + (sqlite3:execute mdb "CREATE TABLE IF NOT EXISTS servers (id INTEGER PRIMARY KEY, + pid INTEGER, + interface TEXT, + hostname TEXT, + port INTEGER, + pubport INTEGER, + start_time TIMESTAMP, + priority INTEGER, + state TEXT, + mt_version TEXT, + heartbeat TIMESTAMP, + transport TEXT, + run_id INTEGER);") + ;; CONSTRAINT servers_constraint UNIQUE (pid,hostname,port));") + (sqlite3:execute mdb "CREATE TABLE IF NOT EXISTS clients (id INTEGER PRIMARY KEY, + server_id INTEGER, + pid INTEGER, + hostname TEXT, + cmdline TEXT, + login_time TIMESTAMP, + logout_time TIMESTAMP DEFAULT -1, + CONSTRAINT clients_constraint UNIQUE (pid,hostname));") + + ;)) + (set! *task-db* (cons mdb dbpath)) + *task-db*)))) + +;;====================================================================== +;; Server and client management +;;====================================================================== + +;; make-vector-record tasks hostinfo id interface port pubport transport pid hostname +(define (tasks:hostinfo-get-id vec) (vector-ref vec 0)) +(define (tasks:hostinfo-get-interface vec) (vector-ref vec 1)) +(define (tasks:hostinfo-get-port vec) (vector-ref vec 2)) +(define (tasks:hostinfo-get-pubport vec) (vector-ref vec 3)) +(define (tasks:hostinfo-get-transport vec) (vector-ref vec 4)) +(define (tasks:hostinfo-get-pid vec) (vector-ref vec 5)) +(define (tasks:hostinfo-get-hostname vec) (vector-ref vec 6)) + +(define (tasks:need-server run-id) + (equal? (configf:lookup *configdat* "server" "required") "yes")) + +;; no elegance here ... +;; +(define (tasks:kill-server hostname pid #!key (kill-switch "")) + (debug:print-info 0 *default-log-port* "Attempting to kill server process " pid " on host " hostname) + (setenv "TARGETHOST" hostname) + (let* ((logdir (if (directory-exists? "logs") + "logs/" + "")) + (logfile (if logdir (conc "logs/server-"pid"-"hostname".log") #f)) + (gzfile (if logfile (conc logfile ".gz")))) + (setenv "TARGETHOST_LOGF" (conc logdir "server-kills.log")) + + (system (conc "nbfake kill "kill-switch" "pid)) + + (when logfile + (thread-sleep! 0.5) + (if (common:file-exists? gzfile) (delete-file gzfile)) + (system (conc "gzip " logfile)) + + (unsetenv "TARGETHOST_LOGF") + (unsetenv "TARGETHOST")))) + + +;;====================================================================== +;; M O N I T O R S +;;====================================================================== + +(define (tasks:remove-monitor-record mdb) + (sqlite3:execute mdb "DELETE FROM monitors WHERE pid=? AND hostname=?;" + (current-process-id) + (get-host-name))) + +(define (tasks:get-monitors mdb) + (let ((res '())) + (sqlite3:for-each-row + (lambda (a . rem) + (set! res (cons (apply vector a rem) res))) + mdb + "SELECT id,pid,strftime('%m/%d/%Y %H:%M',datetime(start_time,'unixepoch'),'localtime'),strftime('%m/%d/%Y %H:%M:%S',datetime(last_update,'unixepoch'),'localtime'),hostname,username FROM monitors ORDER BY last_update ASC;") + (reverse res) + )) + +(define (tasks:monitors->text-table monitors) + (let ((fmtstr "~4a~8a~20a~20a~10a~10a")) + (conc (format #f fmtstr "id" "pid" "start time" "last update" "hostname" "user") "\n" + (string-intersperse + (map (lambda (monitor) + (format #f fmtstr + (tasks:monitor-get-id monitor) + (tasks:monitor-get-pid monitor) + (tasks:monitor-get-start_time monitor) + (tasks:monitor-get-last_update monitor) + (tasks:monitor-get-hostname monitor) + (tasks:monitor-get-username monitor))) + monitors) + "\n")))) + +;; update the last_update field with the current time and +;; if any monitors appear dead, remove them +(define (tasks:monitors-update mdb) + (sqlite3:execute mdb "UPDATE monitors SET last_update=strftime('%s','now') WHERE pid=? AND hostname=?;" + (current-process-id) + (get-host-name)) + (let ((deadlist '())) + (sqlite3:for-each-row + (lambda (id pid host last-update delta) + (debug:print 0 *default-log-port* "Going to delete stale record for monitor with pid " pid " on host " host " last updated " delta " seconds ago") + (set! deadlist (cons id deadlist))) + mdb + "SELECT id,pid,hostname,last_update,strftime('%s','now')-last_update AS delta FROM monitors WHERE delta > 700;") + (sqlite3:execute mdb (conc "DELETE FROM monitors WHERE id IN ('" (string-intersperse (map conc deadlist) "','") "');"))) + ) +(define (tasks:register-monitor db port) + (let* ((pid (current-process-id)) + (hostname (get-host-name)) + (userinfo (user-information (current-user-id))) + (username (car userinfo))) + (debug:print 0 *default-log-port* "Register monitor, pid: " pid ", hostname: " hostname ", port: " port ", username: " username) + (sqlite3:execute db "INSERT INTO monitors (pid,start_time,last_update,hostname,username) VALUES (?,strftime('%s','now'),strftime('%s','now'),?,?);" + pid hostname username))) + +(define (tasks:get-num-alive-monitors mdb) + (let ((res 0)) + (sqlite3:for-each-row + (lambda (count) + (set! res count)) + mdb + "SELECT count(id) FROM monitors WHERE last_update < (strftime('%s','now') - 300) AND username=?;" + (car (user-information (current-user-id)))) + res)) + +;; +#;(define (tasks:start-monitor db mdb) + (if (> (tasks:get-num-alive-monitors mdb) 2) ;; have two running, no need for more + (debug:print-info 1 *default-log-port* "Not starting monitor, already have more than two running") + (let* ((megatestdb (conc *toppath* "/megatest.db")) + (monitordbf (conc (db:dbfile-path #f) "/monitor.db")) + (last-db-update 0)) ;; (file-modification-time megatestdb))) + (task:register-monitor mdb) + (let loop ((count 0) + (next-touch 0)) ;; next-touch is the time where we need to update last_update + ;; if the db has been modified we'd best look at the task queue + (let ((modtime (file-modification-time megatestdbpath ))) + (if (> modtime last-db-update) + (tasks:process-queue db)) ;; BROKEN. mdb last-db-update megatestdb next-touch)) + ;; WARNING: Possible race conditon here!! + ;; should this update be immediately after the task-get-action call above? + (if (> (current-seconds) next-touch) + (begin + (tasks:monitors-update mdb) + (loop (+ count 1)(+ (current-seconds) 240))) + (loop (+ count 1) next-touch))))))) + +;;====================================================================== +;; T A S K S Q U E U E +;; +;; NOTE:: These operate on task_queue which is in main.db +;; +;;====================================================================== + +;; NOTE: It might be good to add one more layer of checking to ensure +;; that no task gets run in parallel. + +;; id INTEGER PRIMARY KEY, +;; action TEXT DEFAULT '', +;; owner TEXT, +;; state TEXT DEFAULT 'new', +;; target TEXT DEFAULT '', +;; name TEXT DEFAULT '', +;; testpatt TEXT DEFAULT '', +;; keylock TEXT, +;; params TEXT, +;; creation_time TIMESTAMP DEFAULT (strftime('%s','now')), +;; execution_time TIMESTAMP); + +(define (keys:key-vals-hash->target keys key-params) + (let ((tmp (hash-table-ref/default key-params (vector-ref (car keys) 0) ""))) + (if (> (length keys) 1) + (for-each (lambda (key) + (set! tmp (conc tmp "/" (hash-table-ref/default key-params (vector-ref key 0) "")))) + (cdr keys))) + tmp)) + +;; for use from the gui, not ported +;; +;; (define (tasks:add-from-params mdb action keys key-params var-params) +;; (let ((target (keys:key-vals-hash->target keys key-params)) +;; (owner (car (user-information (current-user-id)))) +;; (runname (hash-table-ref/default var-params "runname" #f)) +;; (testpatts (hash-table-ref/default var-params "testpatts" "%")) +;; (params (hash-table-ref/default var-params "params" ""))) +;; (tasks:add mdb action owner target runname testpatts params))) + +;; return one task from those who are 'new' OR 'waiting' AND more than 10sec old +;; +(define (tasks:snag-a-task dbstruct) + (let ((res #f) + (keytxt (conc (current-process-id) "-" (get-host-name) "-" (car (user-information (current-user-id)))))) + (db:with-db + dbstruct #f #t + (lambda (dat db) + ;; first randomly set a new to pid-hostname-hostname + (sqlite3:execute + db + "UPDATE tasks_queue SET keylock=? WHERE id IN + (SELECT id FROM tasks_queue + WHERE state='new' OR + (state='waiting' AND (strftime('%s','now')-execution_time) > 10) OR + state='reset' + ORDER BY RANDOM() LIMIT 1);" keytxt) + + (sqlite3:for-each-row + (lambda (id . rem) + (set! res (apply vector id rem))) + db + "SELECT id,action,owner,state,target,name,test,item,params,creation_time,execution_time FROM tasks_queue WHERE keylock=? ORDER BY execution_time ASC LIMIT 1;" keytxt) + (if res ;; yep, have work to be done + (begin + (sqlite3:execute db "UPDATE tasks_queue SET state='inprogress',execution_time=strftime('%s','now') WHERE id=?;" + (tasks:task-get-id res)) + res) + #f))))) + +(define (tasks:reset-stuck-tasks dbstruct) + (let ((res '())) + (db:with-db + dbstruct #f #t + (lambda (dat db) + (sqlite3:for-each-row + (lambda (id delta) + (set! res (cons id res))) + db + "SELECT id,strftime('%s','now')-execution_time AS delta FROM tasks_queue WHERE state='inprogress' AND delta>700 ORDER BY delta DESC LIMIT 2;") + (sqlite3:execute + db + (conc "UPDATE tasks_queue SET state='reset' WHERE id IN ('" (string-intersperse (map conc res) "','") "');") + ))))) + +;; return all tasks in the tasks_queue table +;; +(define (tasks:get-tasks dbstruct types states) + (let ((res '())) + (db:with-db + dbstruct #f #f + (lambda (dbdat db) + (sqlite3:for-each-row + (lambda (id . rem) + (set! res (cons (apply vector id rem) res))) + db + (conc "SELECT id,action,owner,state,target,name,test,item,params,creation_time,execution_time + FROM tasks_queue " + ;; WHERE + ;; state IN " statesstr " AND + ;; action IN " actionsstr + " ORDER BY creation_time DESC;")) + res)))) + +;; remove tasks given by a string of numbers comma separated +(define (tasks:remove-queue-entries dbstruct task-ids) + (db:with-db + dbstruct #f #t + (lambda (dbdat db) + (sqlite3:execute db (conc "DELETE FROM tasks_queue WHERE id IN (" task-ids ");"))))) + +;; (define (tasks:process-queue dbstruct) +;; (let* ((task (tasks:snag-a-task dbstruct)) +;; (action (if task (tasks:task-get-action task) #f))) +;; (if action (print "tasks:process-queue task: " task)) +;; (if action +;; (case (string->symbol action) +;; ((run) (tasks:start-run dbstruct task)) +;; ((remove) (tasks:remove-runs dbstruct task)) +;; ((lock) (tasks:lock-runs dbstruct task)) +;; ;; ((monitor) (tasks:start-monitor db task)) +;; #;((rollup) (tasks:rollup-runs dbstruct task)) +;; ((updatemeta)(tasks:update-meta dbstruct task)) +;; #;((kill) (tasks:kill-monitors dbstruct task)))))) + +(define (tasks:tasks->text tasks) + (let ((fmtstr "~10a~10a~10a~12a~20a~12a~12a~10a")) + (conc (format #f fmtstr "id" "action" "owner" "state" "target" "runname" "testpatts" "params") "\n" + (string-intersperse + (map (lambda (task) + (format #f fmtstr + (tasks:task-get-id task) + (tasks:task-get-action task) + (tasks:task-get-owner task) + (tasks:task-get-state task) + (tasks:task-get-target task) + (tasks:task-get-name task) + (tasks:task-get-testpatt task) + ;; (tasks:task-get-item task) + (tasks:task-get-params task))) + tasks) "\n")))) + +(define (tasks:set-state dbstruct task-id state) + (db:with-db + dbstruct #f #t + (lambda (dbdat db) + (sqlite3:execute db "UPDATE tasks_queue SET state=? WHERE id=?;" + state + task-id)))) + +;;====================================================================== +;; Access using task key (stored in params; (hash-table->alist flags) hostname pid +;;====================================================================== + +(define (tasks:param-key->id dbstruct task-params) + (db:with-db + dbstruct #f #f + (lambda (dbdat db) + (handle-exceptions + exn + #f + (sqlite3:first-result db "SELECT id FROM tasks_queue WHERE params LIKE ?;" + task-params))))) + +(define (tasks:get-records-given-param-key dbstruct param-key state-patt action-patt test-patt) + (db:with-db + dbstruct #f #f + (lambda (dbdat db) + (handle-exceptions + exn + '() + (sqlite3:first-row db "SELECT id,action,owner,state,target,name,testpatt,keylock,params WHERE + params LIKE ? AND state LIKE ? AND action LIKE ? AND testpatt LIKE ?;" + param-key state-patt action-patt test-patt))))) + +;; kill any runner processes (i.e. processes handling -runtests) that match target/runname +;; +;; do a remote call to get the task queue info but do the killing as self here. +;; +(define (tasks:kill-runner target run-name testpatt) + (let ((records (rmt:tasks-find-task-queue-records target run-name testpatt "running" "run-tests")) + (hostpid-rx (regexp "\\s+(\\w+)\\s+(\\d+)$"))) ;; host pid is at end of param string + (if (null? records) + (debug:print 0 *default-log-port* "No run launching processes found for " target " / " run-name " with testpatt " (or testpatt "* no testpatt specified! *")) + (debug:print 0 *default-log-port* "Found " (length records) " run(s) to kill.")) + (for-each + (lambda (record) + (let* ((param-key (list-ref record 8)) + (match-dat (string-search hostpid-rx param-key))) + (if match-dat + (let ((hostname (cadr match-dat)) + (pid (string->number (caddr match-dat)))) + (debug:print 0 *default-log-port* "Sending SIGINT to process " pid " on host " hostname) + (if (equal? (get-host-name) hostname) + (if (process:alive? pid) + (begin + (handle-exceptions + exn + (begin + (debug:print 0 *default-log-port* "Kill of process " pid " on host " hostname " failed.") + (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) + #t) + (process-signal pid signal/int) + (thread-sleep! 5) + (if (process:alive? pid) + (process-signal pid signal/kill))))) + ;; (call-with-environment-variables + (let ((old-targethost (getenv "TARGETHOST"))) + (setenv "TARGETHOST" hostname) + (setenv "TARGETHOST_LOGF" "server-kills.log") + (system (conc "nbfake kill " pid)) + (if old-targethost (setenv "TARGETHOST" old-targethost)) + (unsetenv "TARGETHOST") + (unsetenv "TARGETHOST_LOGF")))) + (debug:print-error 0 *default-log-port* "no record or improper record for " target "/" run-name " in tasks_queue in main.db")))) + records))) + +;; (define (tasks:start-run dbstruct mdb task) +;; (let ((flags (make-hash-table))) +;; (hash-table-set! flags "-rerun" "NOT_STARTED") +;; (if (not (string=? (tasks:task-get-params task) "")) +;; (hash-table-set! flags "-setvars" (tasks:task-get-params task))) +;; (print "Starting run " task) +;; ;; sillyness, just call the damn routine with the task vector and be done with it. FIXME SOMEDAY +;; (runs:run-tests db +;; (tasks:task-get-target task) +;; (tasks:task-get-name task) +;; (tasks:task-get-test task) +;; (tasks:task-get-item task) +;; (tasks:task-get-owner task) +;; flags) +;; (tasks:set-state mdb (tasks:task-get-id task) "waiting"))) +;; +;; (define (tasks:rollup-runs db mdb task) +;; (let* ((flags (make-hash-table)) +;; (keys (db:get-keys db)) +;; (keyvals (keys:target-keyval keys (tasks:task-get-target task)))) +;; ;; (hash-table-set! flags "-rerun" "NOT_STARTED") +;; (print "Starting rollup " task) +;; ;; sillyness, just call the damn routine with the task vector and be done with it. FIXME SOMEDAY +;; (runs:rollup-run db +;; keys +;; keyvals +;; (tasks:task-get-name task) +;; (tasks:task-get-owner task)) +;; (tasks:set-state mdb (tasks:task-get-id task) "waiting"))) + +;;====================================================================== +;; S Y N C T O P O S T G R E S Q L +;;====================================================================== + +;; In the spirit of "dump your junk in the tasks module" I'll put the +;; sync to postgres here for now. + +;; attempt to automatically set up an area. call only if get area by path +;; returns naught of interest +;; +(define (tasks:set-area dbh configdat #!key (toppath #f)) ;; could I safely put *toppath* in for the default for toppath? when would it be evaluated? + (let loop ((area-name (or (configf:lookup configdat "setup" "area-name") + (common:get-area-name))) + (modifier 'none)) + (let ((success (handle-exceptions + exn + (begin + (debug:print 0 *default-log-port* "ERROR: cannot create area entry, " ((condition-property-accessor 'exn 'message) exn)) + #f) ;; FIXME: I don't care for now but I should look at *why* there was an exception + (pgdb:add-area dbh area-name (or toppath *toppath*))))) + (or success + (case modifier + ((none)(loop (conc (current-user-name) "_" area-name) 'user)) + ((user)(loop (conc (substring (common:get-area-path-signature) 0 4) + area-name) 'areasig)) + (else #f)))))) ;; give up + +(define (task:print-runtime run-times saperator) +(for-each + (lambda (run-time-info) + (let* ((run-name (vector-ref run-time-info 0)) + (run-time (vector-ref run-time-info 1)) + (target (vector-ref run-time-info 2))) + (print target saperator run-name saperator run-time ))) + run-times)) + +(define (task:print-runtime-as-json run-times) + (let loop ((run-time-info (car run-times)) + (rema (cdr run-times)) + (str "")) + (let* ((run-name (vector-ref run-time-info 0)) + (run-time (vector-ref run-time-info 1)) + (target (vector-ref run-time-info 2))) + ;(print (not (equal? str ""))) + (if (not (equal? str "")) + (set! str (conc str ","))) + (if (null? rema) + (print "[" str "{target:" target ",run-name:" run-name ", run-time:" run-time "}]") + (loop (car rema) (cdr rema) (conc str "{target:" target ", run-name:" run-name ", run-time:" run-time "}")))))) + +(define (task:get-run-times) + (let* ( + (run-patt (if (args:get-arg "-run-patt") + (args:get-arg "-run-patt") + "%")) + (target-patt (if (args:get-arg "-target-patt") + (args:get-arg "-target-patt") + "%")) + + (run-times (rmt:get-run-times run-patt target-patt ))) + (if (eq? (length run-times) 0) + (begin + (debug:print 0 *default-log-port* "Data not found!!") + (exit))) + (if (equal? (args:get-arg "-dumpmode") "json") + (task:print-runtime-as-json run-times) + (if (equal? (args:get-arg "-dumpmode") "csv") + (task:print-runtime run-times ",") + (task:print-runtime run-times " "))))) + + +(define (task:print-testtime test-times saperator) +(for-each + (lambda (test-time-info) + (let* ((test-name (vector-ref test-time-info 0)) + (test-time (vector-ref test-time-info 2)) + (test-item (if (eq? (string-length (vector-ref test-time-info 1)) 0) + "N/A" + (vector-ref test-time-info 1)))) + (print test-name saperator test-item saperator test-time ))) + test-times)) + +(define (task:print-testtime-as-json test-times) + (let loop ((test-time-info (car test-times)) + (rema (cdr test-times)) + (str "")) + (let* ((test-name (vector-ref test-time-info 0)) + (test-time (vector-ref test-time-info 2)) + (item (vector-ref test-time-info 1))) + ;(print (not (equal? str ""))) + (if (not (equal? str "")) + (set! str (conc str ","))) + (if (null? rema) + (print "[" str "{test-name:" test-name ", item-path:" item ", test-time:" test-time "}]") + (loop (car rema) (cdr rema) (conc str "{test-name:" test-name ", item-path:" item ", test-time:" test-time "}")))))) + + + (define (task:get-test-times) + (let* ((runname (if (args:get-arg "-runname") + (args:get-arg "-runname") + #f)) + (target (if (args:get-arg "-target") + (args:get-arg "-target") + #f)) + + (test-times (rmt:get-test-times runname target ))) + (if (not runname) + (begin + (debug:print 0 *default-log-port* "Error: Missing argument -runname") + (exit))) + (if (string-contains runname "%") + (begin + (debug:print 0 *default-log-port* "Error: Invalid runname, '%' not allowed (" runname ") ") + (exit))) + (if (not target) + (begin + (debug:print 0 *default-log-port* "Error: Missing argument -target") + (exit))) + (if (string-contains target "%") + (begin + (debug:print 0 *default-log-port* "Error: Invalid target, '%' not allowed (" target ") ") + (exit))) + + (if (eq? (length test-times) 0) + (begin + (debug:print 0 *default-log-port* "Data not found!!") + (exit))) + (if (equal? (args:get-arg "-dumpmode") "json") + (task:print-testtime-as-json test-times) + (if (equal? (args:get-arg "-dumpmode") "csv") + (task:print-testtime test-times ",") + (task:print-testtime test-times " "))))) + + + +;; gets mtpg-run-id and syncs the record if different +;; +(define (tasks:run-id->mtpg-run-id dbh cached-info run-id area-info smallest-last-update-time) + (let* ((runs-ht (hash-table-ref cached-info 'runs)) + (runinf (hash-table-ref/default runs-ht run-id #f)) + (area-id (vector-ref area-info 0))) + (if runinf + runinf ;; already cached + (let* ((run-dat (rmt:get-run-info run-id)) ;; NOTE: get-run-info returns a vector < row header > + (run-name (rmt:get-run-name-from-id run-id)) + (row (db:get-rows run-dat)) ;; yes, this returns a single row + (header (db:get-header run-dat)) + (state (db:get-value-by-header row header "state")) + (status (db:get-value-by-header row header "status")) + (owner (db:get-value-by-header row header "owner")) + (event-time (db:get-value-by-header row header "event_time")) + (comment (db:get-value-by-header row header "comment")) + (fail-count (db:get-value-by-header row header "fail_count")) + (pass-count (db:get-value-by-header row header "pass_count")) + (db-contour (db:get-value-by-header row header "contour")) + (contour (if (args:get-arg "-prepend-contour") + (if (and db-contour (not (equal? db-contour "")) (string? db-contour )) + (begin + (debug:print-info 10 *default-log-port* "db-contour" db-contour) + db-contour) + (args:get-arg "-contour")))) + (run-tag (if (args:get-arg "-run-tag") + (args:get-arg "-run-tag") + "")) + (last-update (db:get-value-by-header row header "last_update")) + (keytarg (if (or (args:get-arg "-prepend-contour") (args:get-arg "-prefix-target")) + (conc "MT_CONTOUR/MT_AREA/" (string-intersperse (rmt:get-keys) "/")) (string-intersperse (rmt:get-keys) "/"))) ;; e.g. version/iteration/platform + (base-target (rmt:get-target run-id)) + (target (if (or (args:get-arg "-prepend-contour") (args:get-arg "-prefix-target")) + (conc (or (args:get-arg "-prefix-target") (conc contour "/" (common:get-area-name) "/")) base-target) base-target)) ;; e.g. v1.63/a3e1/ubuntu + (spec-id (pgdb:get-ttype dbh keytarg)) + (publish-time (if (args:get-arg "-cp-eventtime-to-publishtime") + event-time + (current-seconds))) + (new-run-id (if (and run-name base-target) (pgdb:get-run-id dbh spec-id target run-name area-id) #f))) + (if new-run-id + (begin ;; let ((run-record (pgdb:get-run-info dbh new-run-id)) + (hash-table-set! runs-ht run-id new-run-id) + ;; ensure key fields are up to date + ;; if last_update == pgdb_last_update do not update smallest-last-update-time + (let* ((pgdb-last-update (pgdb:get-run-last-update dbh new-run-id)) + (smallest-time (hash-table-ref/default smallest-last-update-time "smallest-time" #f))) + (if (and (> last-update pgdb-last-update) (or (not smallest-time) (< last-update smallest-time))) + (hash-table-set! smallest-last-update-time "smallest-time" last-update))) + (pgdb:refresh-run-info + dbh + new-run-id + state status owner event-time comment fail-count pass-count area-id last-update publish-time) + (debug:print-info 4 *default-log-port* (conc "Working on run-id " run-id " pgdb-id " new-run-id)) + (if (not (equal? run-tag "")) + (task:add-run-tag dbh new-run-id run-tag)) + new-run-id) + + (if (or (not state) (equal? state "deleted")) + (begin + (debug:print-info 1 *default-log-port* "Warning: Run with id " run-id " was created after previous sync and deleted before the sync") #f) + (if (handle-exceptions + exn + (begin (print-call-chain) + (debug:print 0 *default-log-port* ((condition-property-accessor 'exn 'message) exn)) + #f) + + (pgdb:insert-run + dbh + spec-id target run-name state status owner event-time comment fail-count pass-count area-id last-update publish-time)) + (let* ((smallest-time (hash-table-ref/default smallest-last-update-time "smallest-time" #f))) + (if (or (not smallest-time) (< last-update smallest-time)) + (hash-table-set! smallest-last-update-time "smallest-time" last-update)) + (tasks:run-id->mtpg-run-id dbh cached-info run-id area-info smallest-last-update-time)) + #f))))))) + +(define (task:add-run-tag dbh run-id tag) + (let* ((tag-info (pgdb:get-tag-info-by-name dbh tag))) + (if (not tag-info) + (begin + (if (handle-exceptions + exn + (begin + (debug:print-info 1 *default-log-port* ((condition-property-accessor 'exn 'message) exn)) + #f) + (pgdb:insert-tag dbh tag)) + (set! tag-info (pgdb:get-tag-info-by-name dbh tag)) + #f))) + ;;add to area_tags + (handle-exceptions + exn + (begin + (debug:print-info 1 *default-log-port* ((condition-property-accessor 'exn 'message) exn)) + #f) + (if (not (pgdb:is-run-taged-with-a-tag dbh (vector-ref tag-info 0) run-id)) + (pgdb:insert-run-tag dbh (vector-ref tag-info 0) run-id))))) + + +(define (tasks:sync-test-steps dbh cached-info test-step-ids smallest-last-update-time) + ; (print "Sync Steps " test-step-ids ) + (let ((test-ht (hash-table-ref cached-info 'tests)) + (step-ht (hash-table-ref cached-info 'steps)) + (run-id-in #f) + ) + (for-each + (lambda (test-step-id) + (set! run-id-in (cdr test-step-id)) + (set! test-step-id (car test-step-id)) + + + (let* ((test-step-info (rmt:get-steps-info-by-id run-id-in test-step-id)) + (step-id (tdb:step-get-id test-step-info)) + (test-id (tdb:step-get-test_id test-step-info)) + (stepname (tdb:step-get-stepname test-step-info)) + (state (tdb:step-get-state test-step-info)) + (status (tdb:step-get-status test-step-info)) + (event_time (tdb:step-get-event_time test-step-info)) + (comment (tdb:step-get-comment test-step-info)) + (logfile (tdb:step-get-logfile test-step-info)) + (last-update (tdb:step-get-last_update test-step-info)) + (pgdb-test-id (hash-table-ref/default test-ht test-id #f)) + (smallest-time (hash-table-ref/default smallest-last-update-time "smallest-time" #f)) + (pgdb-step-id (if pgdb-test-id + (pgdb:get-test-step-id dbh pgdb-test-id stepname state) + #f))) + (if step-id + (begin + (if pgdb-test-id + (begin + (if pgdb-step-id + (begin + (debug:print-info 4 *default-log-port* "Updating existing test-step with test-id: " test-id " and step-id " step-id " pgdb test id: " pgdb-test-id " pgdb step id " pgdb-step-id ) + (let* ((pgdb-last-update (pgdb:get-test-step-last-update dbh pgdb-step-id))) + (if (and (> last-update pgdb-last-update) (or (not smallest-time) (< last-update smallest-time))) + (hash-table-set! smallest-last-update-time "smallest-time" last-update))) + (pgdb:update-test-step dbh pgdb-step-id pgdb-test-id stepname state status event_time comment logfile last-update)) + (begin + (debug:print-info 4 *default-log-port* "Inserting test-step with test-id: " test-id " and step-id " step-id " pgdb test id: " pgdb-test-id) + (if (or (not smallest-time) (< last-update smallest-time)) + (hash-table-set! smallest-last-update-time "smallest-time" last-update)) + (pgdb:insert-test-step dbh pgdb-test-id stepname state status event_time comment logfile last-update ) + (set! pgdb-step-id (pgdb:get-test-step-id dbh pgdb-test-id stepname state)))) + (hash-table-set! step-ht step-id pgdb-step-id )) + (debug:print-info 1 *default-log-port* "Error: Test not cashed"))) + (debug:print-info 1 *default-log-port* "Error: Could not get test step info for step id " test-step-id )))) ;; this is a wierd senario need to debug + test-step-ids))) + +(define (tasks:sync-test-gen-data dbh cached-info test-data-ids smallest-last-update-time) + (let ((test-ht (hash-table-ref cached-info 'tests)) + (data-ht (hash-table-ref cached-info 'data)) + (run-id-in #f) + ) + (for-each + (lambda (test-data-id) + (set! run-id-in (cdr test-data-id)) + (set! test-data-id (car test-data-id)) + (let* ((test-data-info (rmt:get-data-info-by-id run-id-in test-data-id)) + (data-id (db:test-data-get-id test-data-info)) + (test-id (db:test-data-get-test_id test-data-info)) + (category (db:test-data-get-category test-data-info)) + (variable (db:test-data-get-variable test-data-info)) + (value (db:test-data-get-value test-data-info)) + (expected (db:test-data-get-expected test-data-info)) + (tol (db:test-data-get-tol test-data-info)) + (units (db:test-data-get-units test-data-info)) + (comment (db:test-data-get-comment test-data-info)) + (status (db:test-data-get-status test-data-info)) + (type (db:test-data-get-type test-data-info)) + (last-update (db:test-data-get-last_update test-data-info)) + (smallest-time (hash-table-ref/default smallest-last-update-time "smallest-time" #f)) + + (pgdb-test-id (hash-table-ref/default test-ht test-id #f)) + (pgdb-data-id (if pgdb-test-id + (pgdb:get-test-data-id dbh pgdb-test-id category variable) + #f))) + (if data-id + (begin + (if pgdb-test-id + (begin + (if pgdb-data-id + (begin + (debug:print-info 4 *default-log-port* "Updating existing test-data with test-id: " test-id " and data-id " data-id " pgdb test id: " pgdb-test-id " pgdb data id " pgdb-data-id) + (let* ((pgdb-last-update (pgdb:get-test-data-last-update dbh pgdb-data-id))) + (if (and (> last-update pgdb-last-update) (or (not smallest-time) (< last-update smallest-time))) + (hash-table-set! smallest-last-update-time "smallest-time" last-update))) + (pgdb:update-test-data dbh pgdb-data-id pgdb-test-id category variable value expected tol units comment status type last-update)) + (begin + (debug:print-info 4 *default-log-port* "Inserting test-data with test-id: " test-id " and data-id " data-id " pgdb test id: " pgdb-test-id) + (if (handle-exceptions + exn + (begin (print-call-chain) + (debug:print 0 *default-log-port* ((condition-property-accessor 'exn 'message) exn)) + #f) + + (pgdb:insert-test-data dbh pgdb-test-id category variable value expected tol units comment status type last-update)) + ;(tasks:run-id->mtpg-run-id dbh cached-info run-id area-info) + (begin + ;(pgdb:insert-test-data dbh pgdb-test-id category variable value expected tol units comment status type ) + (if (or (not smallest-time) (< last-update smallest-time)) + (hash-table-set! smallest-last-update-time "smallest-time" last-update)) + (set! pgdb-data-id (pgdb:get-test-data-id dbh pgdb-test-id category variable))) + #f))) + (hash-table-set! data-ht data-id pgdb-data-id )) + (begin + (debug:print-info 1 *default-log-port* "Error: Test not in pgdb")))) + + (debug:print-info 1 *default-log-port* "Error: Could not get test data info for data id " test-data-id )))) ;; this is a wierd senario need to debug + test-data-ids))) + + + +(define (tasks:sync-tests-data dbh cached-info test-ids area-info smallest-last-update-time main-run-id) + (let ((test-ht (hash-table-ref cached-info 'tests)) + (run-id-in main-run-id)) + (for-each + (lambda (test-id) + ; (set! run-id-in (cdr test-id)) + ; (set! test-id (car test-id)) + + (debug:print 0 *default-log-port* "test-id: " test-id " run-id: " run-id-in) + (let* ((test-info (rmt:get-test-info-by-id run-id-in test-id)) + (run-id (db:test-get-run_id test-info)) ;; look these up in db_records.scm + (test-id (db:test-get-id test-info)) + (test-name (db:test-get-testname test-info)) + (item-path (db:test-get-item-path test-info)) + (state (db:test-get-state test-info)) + (status (db:test-get-status test-info)) + (host (db:test-get-host test-info)) + (pid (db:test-get-process_id test-info)) + (cpuload (db:test-get-cpuload test-info)) + (diskfree (db:test-get-diskfree test-info)) + (uname (db:test-get-uname test-info)) + (run-dir (db:test-get-rundir test-info)) + (log-file (db:test-get-final_logf test-info)) + (run-duration (db:test-get-run_duration test-info)) + (comment (db:test-get-comment test-info)) + (event-time (db:test-get-event_time test-info)) + (archived (db:test-get-archived test-info)) + (last-update (db:test-get-last_update test-info)) + (pgdb-run-id (tasks:run-id->mtpg-run-id dbh cached-info run-id area-info smallest-last-update-time)) + (smallest-time (hash-table-ref/default smallest-last-update-time "smallest-time" #f)) + (pgdb-test-id (if pgdb-run-id + (begin + ;(print pgdb-run-id) + (pgdb:get-test-id dbh pgdb-run-id test-name item-path)) + #f))) + ;; "id" "run_id" "testname" "state" "status" "event_time" + ;; "host" "cpuload" "diskfree" "uname" "rundir" "item_path" + ;; "run_duration" "final_logf" "comment" "shortdir" "attemptnum" "archived" + (if (or (not item-path) (string-null? item-path)) + (debug:print-info 0 *default-log-port* "Working on Run id : " run-id " and test name : " test-name)) + (if pgdb-run-id + (begin + (if pgdb-test-id ;; have a record + (begin ;; let ((key-name (conc run-id "/" test-name "/" item-path))) + (debug:print-info 4 *default-log-port* "Updating existing test with run-id: " run-id " and test-id: " test-id " pgdb run id: " pgdb-run-id " pgdb-test-id " pgdb-test-id) + (let* ((pgdb-last-update (pgdb:get-test-last-update dbh pgdb-test-id))) + (if (and (> last-update pgdb-last-update) (or (not smallest-time) (< last-update smallest-time))) ;;if last-update is same as pgdb-last-update then it is safe to assume the records are identical and we can use a larger last update time. + (hash-table-set! smallest-last-update-time "smallest-time" last-update))) + (pgdb:update-test dbh pgdb-test-id pgdb-run-id test-name item-path state status host cpuload diskfree uname run-dir log-file run-duration comment event-time archived last-update pid)) + (begin + (debug:print-info 4 *default-log-port* "Inserting test with run-id: " run-id " and test-id: " test-id " pgdb run id: " pgdb-run-id) + (pgdb:insert-test dbh pgdb-run-id test-name item-path state status host cpuload diskfree uname run-dir log-file run-duration comment event-time archived last-update pid) + (if (or (not smallest-time) (< last-update smallest-time)) + (hash-table-set! smallest-last-update-time "smallest-time" last-update)) + (set! pgdb-test-id (pgdb:get-test-id dbh pgdb-run-id test-name item-path)))) + (hash-table-set! test-ht test-id pgdb-test-id)) + (debug:print-info 1 *default-log-port* "WARNING: Skipping run with run-id:" run-id ". This run was created after privious sync and removed before this sync.")))) + test-ids))) + +(define (task:add-area-tag dbh area-info tag) + (let* ((tag-info (pgdb:get-tag-info-by-name dbh tag))) + (if (not tag-info) + (begin + (if (handle-exceptions + exn + (begin + (debug:print-info 1 *default-log-port* ((condition-property-accessor 'exn 'message) exn)) + #f) + (pgdb:insert-tag dbh tag)) + (set! tag-info (pgdb:get-tag-info-by-name dbh tag)) + #f))) + ;;add to area_tags + (handle-exceptions + exn + (begin + (debug:print-info 1 *default-log-port* ((condition-property-accessor 'exn 'message) exn)) + #f) + (if (not (pgdb:is-area-taged-with-a-tag dbh (vector-ref tag-info 0) (vector-ref area-info 0))) + (pgdb:insert-area-tag dbh (vector-ref tag-info 0) (vector-ref area-info 0)))))) + +(define (tasks:sync-run-data dbh cached-info run-ids area-info smallest-last-update-time) + (for-each + (lambda (run-id) + (debug:print-info 4 *default-log-port* "Check if run with " run-id " needs to be synced" ) + (tasks:run-id->mtpg-run-id dbh cached-info run-id area-info smallest-last-update-time)) +run-ids)) + +;; get runs changed since last sync +;; (define (tasks:sync-test-data dbh cached-info area-info) +;; (let* (( + +(define (tasks:sync-to-postgres configdat dest) + ;; (print "In sync") + (let* ((dbh (pgdb:open configdat dbname: dest)) + (area-info (pgdb:get-area-by-path dbh *toppath*)) + (cached-info (make-hash-table)) + (start (current-seconds)) + (test-patt (if (args:get-arg "-testpatt") + (args:get-arg "-testpatt") + "%")) + (target (if (args:get-arg "-target") + (args:get-arg "-target") + #f)) + (run-name (if (args:get-arg "-runname") + (args:get-arg "-runname") + #f))) + (if (and target (not run-name)) + (begin + (debug:print 0 *default-log-port* "Error: Provide runname") + (exit 1))) + (if (and (not target) run-name) + (begin + (debug:print 0 *default-log-port* "Error: Provide target") + (exit 1))) + ;(print "123") + ;(exit 1) + (for-each (lambda (dtype) + (hash-table-set! cached-info dtype (make-hash-table))) + '(runs targets tests steps data)) + (hash-table-set! cached-info 'start start) ;; when done we'll set sync times to this + (if area-info + (let* ((last-sync-time (if (and target run-name) + 0 + (if (args:get-arg "-since") + (string->number (args:get-arg "-since")) (vector-ref area-info 3)))) + (smallest-last-update-time (make-hash-table)) + (run-ids (if (and target run-name) + (rmt:get-run-record-ids target run-name (rmt:get-keys)) + (rmt:get-changed-record-run-ids last-sync-time))) + (all-run-ids (if (and target run-name) '() (rmt:get-all-runids))) + (changed-run-dbs (if (and target run-name) '() (db:get-changed-run-ids last-sync-time))) + (changed-run-ids (if (and target run-name) run-ids (filter (lambda (run) (member (modulo run (num-run-dbs)) changed-run-dbs)) all-run-ids))) + (area-tag (if (args:get-arg "-area-tag") + (args:get-arg "-area-tag") + (if (args:get-arg "-area") + (args:get-arg "-area") + "")))) + (if (and (equal? area-tag "") (not (pgdb:is-area-taged dbh (vector-ref area-info 0)))) + (set! area-tag *default-area-tag*)) + (if (not (equal? area-tag "")) + (task:add-area-tag dbh area-info area-tag)) + (if (not (null? run-ids)) + (begin + (debug:print-info 0 *default-log-port* "syncing runs: " run-ids) + (tasks:sync-run-data dbh cached-info run-ids area-info smallest-last-update-time))) + (for-each + (lambda (run-id) + (let ((test-ids (rmt:get-changed-record-test-ids run-id last-sync-time))) + (print test-ids) + (if (not (null? test-ids)) + (begin + (debug:print-info 0 *default-log-port* "syncing tests: " test-ids) + (tasks:sync-tests-data dbh cached-info test-ids area-info smallest-last-update-time run-id))))) + changed-run-ids) + (let* ((smallest-time (hash-table-ref/default smallest-last-update-time "smallest-time" (current-seconds)))) + (debug:print-info 0 "smallest-time :" smallest-time " last-sync-time " last-sync-time) + (if (not (and target run-name)) + (if (or (and smallest-time (> smallest-time last-sync-time)) (and smallest-time (eq? last-sync-time 0))) + (pgdb:write-sync-time dbh area-info smallest-time))))) ;;this needs to be changed + (if (tasks:set-area dbh configdat) + (tasks:sync-to-postgres configdat dest) + (begin + (debug:print 0 *default-log-port* "ERROR: unable to create an area record") + #f))))) + + +;;====================================================================== +;; see defstruct host at top of file. +;; host: reachable last-update last-used last-cpuload +;; +(define (common:update-host-loads-table hosts-raw) + (let* ((hosts (filter (lambda (x) + (string-match (regexp "^\\S+$") x)) + hosts-raw))) + (for-each + (lambda (hostname) + (let* ((rec (let ((h (hash-table-ref/default *host-loads* hostname #f))) + (if h + h + (let ((h (make-host))) + (hash-table-set! *host-loads* hostname h) + h)))) + (host-info (common:get-host-info hostname)) + (is-reachable (car host-info)) + (last-reached-time (cadr host-info)) + (load (caddr host-info))) + (host-reachable-set! rec is-reachable) + (host-last-update-set! rec last-reached-time) + (host-last-cpuload-set! rec load))) + hosts))) + +;;====================================================================== +;; ideally put all this info into the db, no need to preserve it across moving homehost +;; +;; return list of +;; ( reachable? cpuload update-time ) +(define (common:get-host-info hostname) + (let* ((loadinfo (rmt:get-latest-host-load hostname)) ;; if this host happens to have been recently used by a test reuse the load data + (load (car loadinfo)) + (load-sample-time (cdr loadinfo)) + (load-sample-age (- (current-seconds) load-sample-time)) + (loadinfo-timeout-seconds 6) ;; this was 20 seconds, seems way too lax. Switch to 6 seconds + (host-last-update-timeout-seconds 4) + (host-rec (hash-table-ref/default *host-loads* hostname #f)) + ) + (cond + ((< load-sample-age loadinfo-timeout-seconds) + (list #t + load-sample-time + load)) + ((and host-rec + (< (current-seconds) (+ (host-last-update host-rec) host-last-update-timeout-seconds))) + (list #t + (host-last-update host-rec) + (host-last-cpuload host-rec ))) + ((common:unix-ping hostname) + (list #t + (current-seconds) + (alist-ref 'adj-core-load (common:get-normalized-cpu-load hostname)))) ;; this is cheaper than you might think. get-normalized-cpu-load is cached for up to 5 seconds + (else + (list #f 0 -1) ;; bad host, don't use! + )))) + +;;====================================================================== +;; Force a megatest cleanup-db if version is changed and skip-version-check not specified +;; Do NOT check if not on homehost! +;; +(define (common:exit-on-version-changed) + (if (and *toppath* ;; do nothing if *toppath* not yet provided + (rmt:on-homehost?)) + (if (common:api-changed?) + (let* ((mtconf (conc (get-environment-variable "MT_RUN_AREA_HOME") "/megatest.config")) + (dbfile (conc (get-environment-variable "MT_RUN_AREA_HOME") ".mtdb/main.db")) + (read-only (not (file-write-access? dbfile))) + (dbstruct (db:setup))) ;; (db:setup-db *dbstruct-dbs* *toppath* #f))) ;; #t))) + (debug:print 0 *default-log-port* + "WARNING: Version mismatch!\n" + " expected: " (common:version-signature) "\n" + " got: " (common:get-last-run-version)) + (cond + ((get-environment-variable "MT_SKIP_DB_MIGRATE") #t) + ((and (common:file-exists? mtconf) (common:file-exists? dbfile) (not read-only) + (eq? (current-user-id)(file-owner mtconf))) ;; safe to run -cleanup-db + (debug:print 0 *default-log-port* " I see you are the owner of megatest.config, attempting to cleanup and reset to new version") + (handle-exceptions + exn + (begin + (debug:print 0 *default-log-port* "Failed to switch versions. exn=" exn) + (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) + (print-call-chain (current-error-port)) + (exit 1)) + (common:cleanup-db dbstruct))) + ((not (common:file-exists? mtconf)) + (debug:print 0 *default-log-port* " megatest.config does not exist in this area. Cannot proceed with megatest version migration.") + (exit 1)) + ((not (common:file-exists? dbfile)) + (debug:print 0 *default-log-port* " .mtdb/main.db does not exist in this area. Cannot proceed with megatest version migration.") + (exit 1)) + ((not (eq? (current-user-id)(file-owner mtconf))) + (debug:print 0 *default-log-port* " You do not own .mtdb/main.db in this area. Cannot proceed with megatest version migration.") + (exit 1)) + (read-only + (debug:print 0 *default-log-port* " You have read-only access to this area. Cannot proceed with megatest version migration.") + (exit 1)) + (else + (debug:print 0 *default-log-port* " to switch versions you can run: \"megatest -cleanup-db\"") + (exit 1))))))) +;;====================================================================== +;; (begin +;; (debug:print 0 *default-log-port* "ERROR: cannot migrate version unless on homehost. Exiting.") +;; (exit 1)))) + +(define (common:wait-for-homehost-load maxnormload msg) + (let loop ((start-time (current-seconds))) ;; we saw some instances of this being called before *toppath* was set. This might be an early setup race. This delay should help but it is impossible to test... + (if (not *toppath*) + (begin + (debug:print 0 *default-log-port* "ERROR: common:wait-for-homehost-load called before *toppath* set.") + (thread-sleep! 30) + (if (< (- (current-seconds) start-time) 300) + (loop start-time))))) + (case (rmt:transport-mode) + ((http) + (let* ((hh-dat (if (rmt:on-homehost?) ;; if we are on the homehost then pass in #f so the calls are local. + #f + (server:choose-server *toppath* 'homehost))) + (hh (if hh-dat (car hh-dat) #f))) + (common:wait-for-normalized-load maxnormload msg hh))) + (else + (common:wait-for-normalized-load maxnormload msg (get-host-name))))) + + +(define (configf:write-alist cdat fname) + (if (not (common:faux-lock fname)) + (debug:print 0 *default-log-port* "INFO: Could not get lock on " fname)) + (let* ((dat (configf:config->alist cdat)) + (res + (begin + (with-output-to-file fname ;; first write out the file + (lambda () + (pp dat))) + + (if (common:file-exists? fname) ;; now verify it is readable + (if (configf:read-alist fname) + #t ;; data is good. + (begin + (handle-exceptions + exn + (begin + (debug:print 0 *default-log-port* "deleting " fname " failed, exn=" exn) + #f) + (debug:print 0 *default-log-port* "WARNING: content " dat " for cache " fname " is not readable. Deleting generated file.") + (delete-file fname)) + #f)) + #f)))) + (common:faux-unlock fname) + res)) + +;;====================================================================== +;; faux-lock is deprecated. Please use simple-lock below +;; +(define (common:faux-lock keyname #!key (wait-time 8)(allow-lock-steal #t)) + (if (rmt:no-sync-get/default keyname #f) ;; do not be tempted to compare to pid. locking is a one-shot action, if already locked for this pid it doesn't actually count + (if (> wait-time 0) + (begin + (thread-sleep! 1) + (if (eq? wait-time 1) ;; only one second left, steal the lock + (begin + (debug:print-info 0 *default-log-port* "stealing lock for " keyname) + (common:faux-unlock keyname force: #t))) + (common:faux-lock keyname wait-time: (- wait-time 1))) + #f) + (begin + (rmt:no-sync-set keyname (conc (current-process-id))) + (equal? (conc (current-process-id)) (conc (rmt:no-sync-get/default keyname #f)))))) + +(define (common:faux-unlock keyname #!key (force #f)) + (if (or force (equal? (conc (current-process-id)) (conc (rmt:no-sync-get/default keyname #f)))) + (begin + (if (rmt:no-sync-get/default keyname #f) (rmt:no-sync-del! keyname)) + #t) + #f)) + +;;====================================================================== +;; simple lock. improve and converge on this one. +;; +(define (common:simple-lock keyname) + (rmt:no-sync-get-lock keyname)) + +(define (common:simple-unlock keyname #!key (force #f)) + (rmt:no-sync-del! keyname)) + +;; returns waitons waitors tconfigdat +;; +(define (tests:get-waitons test-name all-tests-registry global-waitons) + (let* ((config (tests:get-testconfig test-name #f all-tests-registry 'return-procs))) ;; assuming no problems with immediate evaluation, this could be simplified ('return-procs -> #t) + (let ((instr (if config + (configf:lookup config "requirements" "waiton") + (begin ;; No config means this is a non-existant test + (debug:print-error 0 *default-log-port* "non-existent required test \"" test-name "\"") + (exit 1)))) + (instr2 (if config + (configf:lookup config "requirements" "waitor") + ""))) + (debug:print-info 8 *default-log-port* "waitons string is " instr ", waitors string is " instr2) + (let* ((newwaitons-tmp + (string-split (cond + ((procedure? instr) ;; here + (let ((res (instr))) + (debug:print-info 8 *default-log-port* "waiton procedure results in string " res " for test " test-name) + res)) + ((string? instr) instr) + (else + ;; NOTE: This is actually the case of *no* waitons! ;; (debug:print-error 0 *default-log-port* "something went wrong in processing waitons for test " test-name) + "")))) + (newwaitors + (string-split (cond + ((procedure? instr2) + (let ((res (instr2))) + (debug:print-info 8 *default-log-port* "waitor procedure results in string " res " for test " test-name) + res)) + ((string? instr2) instr2) + (else + ;; NOTE: This is actually the case of *no* waitons! ;; (debug:print-error 0 *default-log-port* "something went wrong in processing waitons for test " test-name) + "")))) + (newwaitons (if (and (list? global-waitons) + (not (null? global-waitons))) + (begin + (debug:print 0 *default-log-port* "Adding global waitons " global-waitons) + (append newwaitons-tmp (filter (lambda (x) ;; remove self from global waitons + (not (equal? x test-name))) + global-waitons))) + newwaitons-tmp))) + (values + ;; the waitons + (filter (lambda (x) + (if (hash-table-ref/default all-tests-registry x #f) + #t + (begin + (debug:print-error 0 *default-log-port* "test " test-name " has unrecognised waiton testname " x) + #f))) + newwaitons) + (filter (lambda (x) + (if (hash-table-ref/default all-tests-registry x #f) + #t + (begin + (debug:print-error 0 *default-log-port* "test " test-name " has unrecognised waiton testname " x) + #f))) + newwaitors) + config))))) + +;; Check for waiver eligibility +;; +(define (tests:check-waiver-eligibility testdat prev-testdat) + (let* ((test-registry (make-hash-table)) + (testconfig (tests:get-testconfig (db:test-get-testname testdat) (db:test-get-item-path testdat) test-registry #f)) + (test-rundir ;; (sdb:qry 'passstr + (db:test-get-rundir testdat)) ;; ) + (prev-rundir ;; (sdb:qry 'passstr + (db:test-get-rundir prev-testdat)) ;; ) + (waivers (if testconfig (configf:section-vars testconfig "waivers") '())) + (waiver-rx (regexp "^(\\S+)\\s+(.*)$")) + (diff-rule "diff %file1% %file2%") + (logpro-rule "diff %file1% %file2% | logpro %waivername%.logpro %waivername%.html")) + (if (not (common:file-exists? test-rundir)) + (begin + (debug:print-error 0 *default-log-port* "test run directory is gone, cannot propagate waiver") + #f) + (begin + (push-directory test-rundir) + (let ((result (if (null? waivers) + #f + (let loop ((hed (car waivers)) + (tal (cdr waivers))) + (debug:print 0 *default-log-port* "INFO: Applying waiver rule \"" hed "\"") + (let* ((waiver (configf:lookup testconfig "waivers" hed)) + (wparts (if waiver (string-match waiver-rx waiver) #f)) + (waiver-rule (if wparts (cadr wparts) #f)) + (waiver-glob (if wparts (caddr wparts) #f)) + (logpro-file (if waiver + (let ((fname (conc hed ".logpro"))) + (if (common:file-exists? fname) + fname + (begin + (debug:print 0 *default-log-port* "INFO: No logpro file " fname " falling back to diff") + #f))) + #f)) + ;; if rule by name of waiver-rule is found in testconfig - use it + ;; else if waivername.logpro exists use logpro-rule + ;; else default to diff-rule + (rule-string (let ((rule (configf:lookup testconfig "waiver_rules" waiver-rule))) + (if rule + rule + (if logpro-file + logpro-rule + (begin + (debug:print 0 *default-log-port* "INFO: No logpro file " logpro-file " found, using diff rule") + diff-rule))))) + ;; (string-substitute "%file1%" "foofoo.txt" "This is %file1% and so is this %file1%." #t) + (processed-cmd (string-substitute + "%file1%" (conc test-rundir "/" waiver-glob) + (string-substitute + "%file2%" (conc prev-rundir "/" waiver-glob) + (string-substitute + "%waivername%" hed rule-string #t) #t) #t)) + (res #f)) + (debug:print 0 *default-log-port* "INFO: waiver command is \"" processed-cmd "\"") + (if (eq? (system processed-cmd) 0) + (if (null? tal) + #t + (loop (car tal)(cdr tal))) + #f)))))) + (pop-directory) + result))))) + + +(define (tests:get-test-path-from-environment) + (if (and (getenv "MT_LINKTREE") + (getenv "MT_TARGET") + (getenv "MT_RUNNAME") + (getenv "MT_TEST_NAME") + (getenv "MT_ITEMPATH")) + (conc (getenv "MT_LINKTREE") "/" + (getenv "MT_TARGET") "/" + (getenv "MT_RUNNAME") "/" + (getenv "MT_TEST_NAME") + (if (and (getenv "MT_ITEMPATH") + (not (string=? "" (getenv "MT_ITEMPATH")))) + (conc "/" (getenv "MT_ITEMPATH")) + "")) + #f)) + + +;; if .testconfig exists in test directory read and return it +;; else if have cached copy in *testconfigs* return it IFF there is a section "have fulldata" +;; else read the testconfig file +;; if have path to test directory save the config as .testconfig and return it +;; +(define (tests:get-testconfig test-name item-path test-registry system-allowed #!key (force-create #f)(allow-write-cache #t)(wait-a-minute #f)) + (let* ((use-cache (common:use-cache?)) + (cache-path (tests:get-test-path-from-environment)) + (cache-file (and cache-path (conc cache-path "/.testconfig"))) + (cache-exists (and cache-file + (not force-create) ;; if force-create then pretend there is no cache to read + (common:file-exists? cache-file))) + (cached-dat (if (and (not force-create) + cache-exists + use-cache) + (handle-exceptions + exn + (begin + (debug:print 0 *default-log-port* "failed to read " cache-file ", exn=" exn) + #f) ;; any issues, just give up with the cached version and re-read + (configf:read-alist cache-file)) + #f)) + (test-full-name (if (and item-path (not (string-null? item-path))) + (conc test-name "/" item-path) + test-name))) + (if cached-dat + cached-dat + (let ((dat (hash-table-ref/default *testconfigs* test-full-name #f))) + (if (and dat ;; have a locally cached version + (hash-table-ref/default dat "have fulldata" #f)) ;; marked as good data? + dat + ;; no cached data available + (let* ((treg (or test-registry + (tests:get-all))) + (test-path (or (hash-table-ref/default treg test-name #f) + (let* ((local-tcdir (conc (getenv "MT_LINKTREE") "/" + (getenv "MT_TARGET") "/" + (getenv "MT_RUNNAME") "/" + test-name "/" item-path)) + (local-tcfg (conc local-tcdir "/testconfig"))) + (if (common:file-exists? local-tcfg) + local-tcdir + #f)) + (conc *toppath* "/tests/" test-name))) + (test-configf (conc test-path "/testconfig")) + (testexists (let loopa ((tries-left 30)) + (cond + ( + (and (common:file-exists? test-configf)(file-read-access? test-configf)) + #t) + ( + (common:file-exists? test-configf) + (debug:print 0 *default-log-port* "WARNING: Cannot read testconfig file: "test-configf) + #f) + ( + (and wait-a-minute (> tries-left 0)) + (thread-sleep! 10) + (debug:print 0 *default-log-port* "WARNING: testconfig file does not exist: "test-configf" will retry in 10 seconds. Tries left: "tries-left) ;; BB: this fires + (loopa (sub1 tries-left))) + (else + (debug:print 2 *default-log-port* "WARNING: testconfig file does not exist: "test-configf) ;; BB: this fires + #f)))) + (tcfg (if testexists + (read-config test-configf #f system-allowed + environ-patt: (if system-allowed + "pre-launch-env-vars" + #f)) + #f))) + (if (and tcfg cache-file) (hash-table-set! tcfg "have fulldata" #t)) ;; mark this as fully read data + (if tcfg (hash-table-set! *testconfigs* test-full-name tcfg)) + (if (and testexists + cache-file + (file-write-access? cache-path) + allow-write-cache) + (let ((tpath (conc cache-path "/.testconfig"))) + (debug:print-info 1 *default-log-port* "Caching testconfig for " test-name " in " tpath) + (if (and tcfg (not (common:in-running-test?))) + (configf:write-alist tcfg tpath)))) + tcfg)))))) + + +;;====================================================================== +;; go through the hosts from least recently used to most recently used, pick the first that meets the load criteral from the +;; [host-rules] section. +;; +(define (common:get-least-loaded-host hosts-raw host-type configdat) + (let* ((rdat (configf:lookup configdat "host-rules" host-type)) + (rules (common:val->alist (or rdat "") convert: #t)) ;; maxnload, maxnjobs, maxjobrate + (maxnload (common:alist-ref/default 'maxnload rules 1.5)) ;; max normalized load + (maxnjobs (common:alist-ref/default 'maxnjobs rules 1.5)) ;; max normalized number of jobs + (maxjobrate (common:alist-ref/default 'maxjobrate rules (/ 1 6))) ;; max rate of submitting jobs to a given host in jobs/second + (hosts (filter (lambda (x) + (string-match (regexp "^\\S+$") x)) + hosts-raw)) + ;; (best-host #f) + (get-rec (lambda (hostname) + ;; (print "get-rec hostname=" hostname) + (let ((h (hash-table-ref/default *host-loads* hostname #f))) + (if h + h + (let ((h (make-host))) + (hash-table-set! *host-loads* hostname h) + h))))) + (best-load 99999) + (curr-time (current-seconds)) + (get-hosts-sorted (lambda (hosts) + (sort hosts (lambda (a b) + (let ((a-rec (get-rec a)) + (b-rec (get-rec b))) + ;; (print "a=" a " a-rec=" a-rec " host-last-used=" (host-last-used a-rec)) + ;; (print "b=" b " b-rec=" b-rec " host-last-used=" (host-last-used b-rec)) + (< (host-last-used a-rec) + (host-last-used b-rec)))))))) + (debug:print 0 *default-log-port* "INFO: hosts-sorted=" (get-hosts-sorted hosts)) + (if (null? hosts) + #f ;; no hosts to select from. All done and giving up now. + (let ((hosts-sorted (get-hosts-sorted hosts))) + (common:update-host-loads-table hosts) + (let loop ((hostname (car hosts-sorted)) + (tal (cdr hosts-sorted)) + (best-host #f)) + (let* ((rec (get-rec hostname)) + (reachable (host-reachable rec)) + (load (host-last-cpuload rec)) + (last-used (host-last-used rec)) + (delta (- curr-time last-used)) + (job-rate (if (> delta 0) + (/ 1 delta) + 999)) ;; jobs per second + (new-best + (cond + ((not reachable) + (debug:print 0 *default-log-port* "Skipping host " hostname " as it cannot be reached.") + best-host) + ((and (< load maxnload) ;; load is acceptable + (< job-rate maxjobrate)) ;; job rate is acceptable + (set! best-load load) + hostname) + (else best-host)))) + (debug:print 0 *default-log-port* "INFO: Trying host " hostname " with load " load ", last used " delta " seconds ago, with job-rate " job-rate " for running a test." ) + (if new-best + (begin ;; found a host, return it + (debug:print 0 *default-log-port* "INFO: Found host: " new-best " load: " load " last-used: " delta " seconds ago, with job-rate: " job-rate) + (host-last-used-set! rec curr-time) + new-best) + (if (null? tal) #f (loop (car tal)(cdr tal) best-host))))))))) + +;;====================================================================== +;; T E S T L A U N C H I N G P E R I T E M W I T H H O S T T Y P E S +;;====================================================================== +;; +;; [hosts] +;; arm cubie01 cubie02 +;; x86_64 zeus xena myth01 +;; allhosts #{g hosts arm} #{g hosts x86_64} +;; +;; [host-types] +;; general #MTLOWESTLOAD #{g hosts allhosts} +;; arm #MTLOWESTLOAD #{g hosts arm} +;; nbgeneral nbjob run JOBCOMMAND -log $MT_LINKTREE/$MT_TARGET/$MT_RUNNAME.$MT_TESTNAME-$MT_ITEM_PATH.lgo +;; +;; [host-rules] +;; # maxnload => max normalized load +;; # maxnjobs => max jobs per cpu +;; # maxjobrate => max jobs per second +;; general maxnload=1.1; maxnjobs=1.2; maxjobrate=0.1 +;; +;; [launchers] +;; envsetup general +;; xor/%/n 4C16G +;; % nbgeneral +;; +;; [jobtools] +;; # if defined and not "no" flexi-launcher will bypass "launcher" unless no match. +;; flexi-launcher yes +;; launcher nbfake +;; +(define (common:get-launcher configdat testname itempath) + (let ((fallback-launcher (configf:lookup configdat "jobtools" "launcher"))) + (if (and (configf:lookup configdat "jobtools" "flexi-launcher") ;; overrides launcher + (not (equal? (configf:lookup configdat "jobtools" "flexi-launcher") "no"))) + (let* ((launchers (hash-table-ref/default configdat "launchers" '()))) + (if (null? launchers) + fallback-launcher + (let loop ((hed (car launchers)) + (tal (cdr launchers))) + (let ((patt (car hed)) + (host-type (cadr hed))) + (if (tests:match patt testname itempath) + (begin + (debug:print-info 2 *default-log-port* "Have flexi-launcher match for " testname "/" itempath " = " host-type) + (let ((launcher (configf:lookup configdat "host-types" host-type))) + (if launcher + (let* ((launcher-parts (string-split launcher)) + (launcher-exe (car launcher-parts))) + (if (equal? launcher-exe "#MTLOWESTLOAD") ;; this is our special case, we will find the lowest load and craft a nbfake commandline + (let host-loop ((targ-host (common:get-least-loaded-host (cdr launcher-parts) host-type configdat)) + (count 100)) + (if targ-host + (conc "remrun " targ-host) + (if (> count 0) + (begin + (debug:print 0 *default-log-port* "INFO: Waiting for a host for host-type " host-type) + (thread-sleep! (- 101 count)) + (host-loop (common:get-least-loaded-host (cdr launcher-parts) host-type configdat) + (- count 1))) + (begin + (debug:print 0 *default-log-port* "FATAL: Failed to find a host from #MTLOWESTLOAD for host-type " host-type) + (exit))))) + launcher)) + (begin + (debug:print-info 0 *default-log-port* "WARNING: no launcher found for host-type " host-type) + (if (null? tal) + fallback-launcher + (loop (car tal)(cdr tal))))))) + ;; no match, try again + (if (null? tal) + fallback-launcher + (loop (car tal)(cdr tal)))))))) + fallback-launcher))) + +;; Do not rpc this one, do the underlying calls!!! +(define (tests:test-set-status! run-id test-id state status comment dat #!key (work-area #f)) + (let* ((real-status status) + (otherdat (if dat dat (make-hash-table))) + (testdat (rmt:get-test-info-by-id run-id test-id)) + (test-name (db:test-get-testname testdat)) + (item-path (db:test-get-item-path testdat)) + ;; before proceeding we must find out if the previous test (where all keys matched except runname) + ;; was WAIVED if this test is FAIL + + ;; NOTES: + ;; 1. Is the call to test:get-previous-run-record remotified? + ;; 2. Add test for testconfig waiver propagation control here + ;; + (prev-test (if (equal? status "FAIL") + (rmt:get-previous-test-run-record run-id test-name item-path) + #f)) + (waived (if prev-test + (if prev-test ;; true if we found a previous test in this run series + (let ((prev-status (db:test-get-status prev-test)) + (prev-state (db:test-get-state prev-test)) + (prev-comment (db:test-get-comment prev-test))) + (debug:print 4 *default-log-port* "prev-status " prev-status ", prev-state " prev-state ", prev-comment " prev-comment) + (if (and (equal? prev-state "COMPLETED") + (equal? prev-status "WAIVED")) + (if comment + comment + prev-comment) ;; waived is either the comment or #f + #f)) + #f) + #f))) + (if (and waived + (tests:check-waiver-eligibility testdat prev-test)) + (set! real-status "WAIVED")) + + (debug:print 4 *default-log-port* "real-status " real-status ", waived " waived ", status " status) + + ;; update the primary record IF state AND status are defined + (if (and state status) + (begin + (rmt:set-state-status-and-roll-up-items run-id test-id item-path state real-status (if waived waived comment)) + ;; (mt:process-triggers run-id test-id state real-status) ;; triggers are called in test-set-state-status + )) + + ;; if status is "AUTO" then call rollup (note, this one modifies data in test + ;; run area, it does remote calls under the hood. + ;; (if (and test-id state status (equal? status "AUTO")) + ;; (rmt:test-data-rollup run-id test-id status)) + + ;; add metadata (need to do this way to avoid SQL injection issues) + + ;; :first_err + ;; (let ((val (hash-table-ref/default otherdat ":first_err" #f))) + ;; (if val + ;; (sqlite3:execute db "UPDATE tests SET first_err=? WHERE run_id=? AND testname=? AND item_path=?;" val run-id test-name item-path))) + ;; + ;; ;; :first_warn + ;; (let ((val (hash-table-ref/default otherdat ":first_warn" #f))) + ;; (if val + ;; (sqlite3:execute db "UPDATE tests SET first_warn=? WHERE run_id=? AND testname=? AND item_path=?;" val run-id test-name item-path))) + + (let ((category (hash-table-ref/default otherdat ":category" "")) + (variable (hash-table-ref/default otherdat ":variable" "")) + (value (hash-table-ref/default otherdat ":value" #f)) + (expected (hash-table-ref/default otherdat ":expected" "n/a")) + (tol (hash-table-ref/default otherdat ":tol" "n/a")) + (units (hash-table-ref/default otherdat ":units" "")) + (type (hash-table-ref/default otherdat ":type" "")) + (dcomment (hash-table-ref/default otherdat ":comment" ""))) + (debug:print 4 *default-log-port* + "category: " category ", variable: " variable ", value: " value + ", expected: " expected ", tol: " tol ", units: " units) + (if (and value) ;; require only value; BB was- all three required + (let ((dat (conc category "," + variable "," + value "," + expected "," + tol "," + units "," + dcomment ",," ;; extra comma for status + type ))) + ;; This was run remote, don't think that makes sense. Perhaps not, but that is the easiest path for the moment. + (rmt:csv->test-data run-id test-id + dat) + ;; This was added in check-in a5adfa3f9a. Message was: "...added delay in set-values to allow for delayed write on server start" + ;; I'm inserting an arbitrary rmt: call to force/ensure that the server is available to (hopefully) prevent a communication issue. + (rmt:get-var "MEGATEST_VERSION") ;; this does NOTHING but ensure the server is reachable. This is almost certainly NOT needed :) + ;; BB - commentiong out arbitrary 10 second wait (thread-sleep! 10) ;; add 10 second delay before quit incase rmt needs time to start a server. + ))) + + ;; need to update the top test record if PASS or FAIL and this is a subtest + ;;;;;; (if (not (equal? item-path "")) + ;;;;;; (rmt:set-state-status-and-roll-up-items run-id test-name item-path state status #f) ;;;;;) + + (if (or (and (string? comment) + (string-match (regexp "\\S+") comment)) + waived) + (let ((cmt (if waived waived comment))) + (rmt:general-call 'set-test-comment run-id cmt test-id))))) + +;;====================================================================== +;; refactoring this block into tests:get-full-data from line 263 of runs.scm +;;====================================================================== +;; hed is the test name +;; test-records is a hash of test-name => test record +(define (tests:get-full-data test-names test-records required-tests all-tests-registry) + (let ((missing-waitons (make-hash-table))) + (if (not (null? test-names)) + (let loop ((hed (car test-names)) + (tal (cdr test-names))) ;; 'return-procs tells the config reader to prep running system but return a proc + (debug:print-info 4 *default-log-port* "hed=" hed " at top of loop") + ;; don't know item-path at this time, let the testconfig get the top level testconfig + (let* ((config (tests:get-testconfig hed #f all-tests-registry 'return-procs)) + (waitons (let ((instr (if config + (configf:lookup config "requirements" "waiton") + (begin ;; No config means this is a non-existent test + (let ((waiters '())) + ;; find the waiter(s) for this waiton. + (for-each + (lambda(waiter) + ;; (print "test-record = " (hash-table-ref test-records waiter)) + ;; (print "waitons = " (vector-ref (hash-table-ref test-records waiter) 2)) + (if (member hed (vector-ref (hash-table-ref test-records waiter) 2)) + (set! waiters (cons waiter waiters)) + ) + ) + (hash-table-keys test-records)) + (hash-table-set! missing-waitons hed waiters) + ) + "")))) + (debug:print-info 8 *default-log-port* "waitons string is " instr) + (string-split (cond + ((procedure? instr) + (let ((res (instr))) + (debug:print-info 8 *default-log-port* "waiton procedure results in string " res " for test " hed) + res)) + ((string? instr) instr) + (else + ;; NOTE: This is actually the case of *no* waitons! ;; + "")))))) + (if (not config) ;; this is a non-existant test called in a waiton. + (if (null? tal) + test-records + (loop (car tal)(cdr tal))) + (begin + (debug:print-info 8 *default-log-port* "waitons: " waitons) + ;; check for hed in waitons => this would be circular, remove it and issue an + ;; error + (if (member hed waitons) + (begin + (debug:print-error 0 *default-log-port* "test " hed " has listed itself as a waiton, please correct this!") + (set! waitons (filter (lambda (x)(not (equal? x hed))) waitons)))) + + ;; (items (items:get-items-from-config config))) + (if (not (hash-table-ref/default test-records hed #f)) + (hash-table-set! test-records + hed (vector hed ;; 0 + config ;; 1 + waitons ;; 2 + (configf:lookup config "requirements" "priority") ;; priority 3 + (let ((items (hash-table-ref/default config "items" #f)) ;; items 4 + (itemstable (hash-table-ref/default config "itemstable" #f))) + ;; if either items or items table is a proc return it so test running + ;; process can know to call items:get-items-from-config + ;; if either is a list and none is a proc go ahead and call get-items + ;; otherwise return #f - this is not an iterated test + (cond + ((procedure? items) + (debug:print-info 4 *default-log-port* "items is a procedure, will calc later") + items) ;; calc later + ((procedure? itemstable) + (debug:print-info 4 *default-log-port* "itemstable is a procedure, will calc later") + itemstable) ;; calc later + ((filter (lambda (x) + (let ((val (car x))) + (if (procedure? val) val #f))) + (append (if (list? items) items '()) + (if (list? itemstable) itemstable '()))) + 'have-procedure) + ((or (list? items)(list? itemstable)) ;; calc now + (debug:print-info 4 *default-log-port* "items and itemstable are lists, calc now\n" + " items: " items " itemstable: " itemstable) + (items:get-items-from-config config)) + (else #f))) ;; not iterated + #f ;; itemsdat 5 + #f ;; spare - used for item-path + ))) + (for-each + (lambda (waiton) + (if (and waiton (not (string= "#f" waiton)) (not (member waiton test-names))) + (begin + (set! required-tests (cons waiton required-tests)) + (set! test-names (cons waiton test-names))))) ;; was an append, now a cons + waitons) + (let ((remtests (delete-duplicates (append waitons tal)))) + (if (not (null? remtests)) + (loop (car remtests)(cdr remtests)) + test-records))))))) + (for-each + (lambda (missing-waiton) + (debug:print-error 0 *default-log-port* "non-existent test \"" missing-waiton "\" is a waiton for tests " (hash-table-ref missing-waitons missing-waiton)) + ) + (hash-table-keys missing-waitons) + ) +)) + + +) Index: tcp-transportmod.scm ================================================================== --- tcp-transportmod.scm +++ tcp-transportmod.scm @@ -22,10 +22,11 @@ (declare (uses debugprint)) (declare (uses commonmod)) (declare (uses dbfile)) (declare (uses dbmod)) (declare (uses portlogger)) +(declare (uses mtmod)) (use address-info tcp) (module tcp-transportmod * @@ -85,10 +86,11 @@ debugprint commonmod dbfile dbmod + mtmod portlogger ) ;;====================================================================== ;; client Index: tdb.scm ================================================================== --- tdb.scm +++ tdb.scm @@ -56,155 +56,155 @@ ;;====================================================================== ;; T E S T S P E C I F I C D B ;;====================================================================== -;; Create the sqlite db for the individual test(s) -;; -;; Moved these tables into .db -;; THIS CODE TO BE REMOVED -;; -(define (open-test-db work-area) - (debug:print-info 11 *default-log-port* "open-test-db " work-area) - (if (and work-area - (directory? work-area) - (file-read-access? work-area)) - (let* ((dbpath (conc work-area "/testdat.db")) - (dbexists (common:file-exists? dbpath)) - (work-area-writeable (file-write-access? work-area)) - (db (handle-exceptions ;; open the db if area writeable or db pre-existing. open in-mem otherwise. if exception, open in-mem - exn - (begin - (print-call-chain (current-error-port)) - (debug:print 2 *default-log-port* "ERROR: problem accessing test db " work-area ", you probably should clean and re-run this test" - ((condition-property-accessor 'exn 'message) exn)) - (set! dbexists #f) ;; must force re-creation of tables, more tom-foolery - (sqlite3:open-database ":memory:")) ;; open an in-memory db to allow readonly access - (if (or work-area-writeable - dbexists) - (sqlite3:open-database dbpath) - (sqlite3:open-database ":memory:")))) - (tdb-writeable (and (file-write-access? work-area) - (file-write-access? dbpath))) - (handler (sqlite3:make-busy-timeout (if (args:get-arg "-override-timeout") - (string->number (args:get-arg "-override-timeout")) - 136000)))) - - (if (and tdb-writeable - *db-write-access*) - (sqlite3:set-busy-handler! db handler)) - (if (not dbexists) - (begin - (db:set-sync db) ;; (sqlite3:execute db "PRAGMA synchronous = FULL;") - (debug:print-info 11 *default-log-port* "Initialized test database " dbpath) - (tdb:testdb-initialize db))) - ;; (sqlite3:execute db "PRAGMA synchronous = 0;") - (debug:print-info 11 *default-log-port* "open-test-db END (sucessful)" work-area) - ;; now let's test that everything is correct - (handle-exceptions - exn - (begin - (print-call-chain (current-error-port)) - (debug:print-error 0 *default-log-port* "problem accessing test db " work-area ", you probably should clean and re-run this test or remove the file " - dbpath ".\n " - ((condition-property-accessor 'exn 'message) exn)) - #f) - ;; Is there a cheaper single line operation that will check for existance of a table - ;; and raise an exception ? - (sqlite3:execute db "SELECT id FROM test_data LIMIT 1;")) - db) - ;; no work-area or not readable - create a placeholder to fake rest of world out - (let ((baddb (sqlite3:open-database ":memory:"))) - (debug:print-info 11 *default-log-port* "open-test-db END (unsucessful)" work-area) - ;; provide an in-mem db (this is dangerous!) - (tdb:testdb-initialize baddb) - baddb))) - -;; find and open the testdat.db file for an existing test -(define (tdb:open-test-db-by-test-id test-id #!key (work-area #f)) - (let* ((test-path (if work-area - work-area - (rmt:test-get-rundir-from-test-id test-id)))) - (debug:print 3 *default-log-port* "TEST PATH: " test-path) - (open-test-db test-path))) - -;; find and open the testdat.db file for an existing test -(define (tdb:open-test-db-by-test-id-local dbstruct run-id test-id #!key (work-area #f)) - (let* ((test-path (if work-area - work-area - (db:test-get-rundir-from-test-id dbstruct run-id test-id)))) - (debug:print 3 *default-log-port* "TEST PATH: " test-path) - (open-test-db test-path))) - -;; find and open the testdat.db file for an existing test -(define (tdb:open-run-close-db-by-test-id-local dbstruct run-id test-id work-area proc . params) - (let* ((test-path (if work-area - work-area - (db:test-get-rundir-from-test-id dbstruct run-id test-id))) - (tdb (open-test-db test-path))) - (apply proc tdb params))) - -(define (tdb:testdb-initialize db) - (debug:print 11 *default-log-port* "db:testdb-initialize START") - (sqlite3:with-transaction - db - (lambda () - (for-each - (lambda (sqlcmd) - (sqlite3:execute db sqlcmd)) - (list "CREATE TABLE IF NOT EXISTS test_rundat ( - id INTEGER PRIMARY KEY, - update_time TIMESTAMP, - cpuload INTEGER DEFAULT -1, - diskfree INTEGER DEFAULT -1, - diskusage INTGER DEFAULT -1, - run_duration INTEGER DEFAULT 0);" - "CREATE TABLE IF NOT EXISTS test_data ( - id INTEGER PRIMARY KEY, - test_id INTEGER, - category TEXT DEFAULT '', - variable TEXT, - value REAL, - expected REAL, - tol REAL, - units TEXT, - comment TEXT DEFAULT '', - status TEXT DEFAULT 'n/a', - type TEXT DEFAULT '', - CONSTRAINT test_data_constraint UNIQUE (test_id,category,variable));" - "CREATE TABLE IF NOT EXISTS test_steps ( - id INTEGER PRIMARY KEY, - test_id INTEGER, - stepname TEXT, - state TEXT DEFAULT 'NOT_STARTED', - status TEXT DEFAULT 'n/a', - event_time TIMESTAMP, - comment TEXT DEFAULT '', - logfile TEXT DEFAULT '', - CONSTRAINT test_steps_constraint UNIQUE (test_id,stepname,state));" - ;; test_meta can be used for handing commands to the test - ;; e.g. KILLREQ - ;; the ackstate is set to 1 once the command has been completed - "CREATE TABLE IF NOT EXISTS test_meta ( - id INTEGER PRIMARY KEY, - var TEXT, - val TEXT, - ackstate INTEGER DEFAULT 0, - CONSTRAINT metadat_constraint UNIQUE (var));")))) - (debug:print 11 *default-log-port* "db:testdb-initialize END")) - -;; This routine moved to db:read-test-data -;; -(define (tdb:read-test-data tdb test-id categorypatt) - (let ((res '())) - (sqlite3:for-each-row - (lambda (id test_id category variable value expected tol units comment status type) - (set! res (cons (vector id test_id category variable value expected tol units comment status type) res))) - tdb - "SELECT id,test_id,category,variable,value,expected,tol,units,comment,status,type FROM test_data WHERE test_id=? AND category LIKE ? ORDER BY category,variable;" test-id categorypatt) - (sqlite3:finalize! tdb) - (reverse res))) +;; =not-used= ;; Create the sqlite db for the individual test(s) +;; =not-used= ;; +;; =not-used= ;; Moved these tables into .db +;; =not-used= ;; THIS CODE TO BE REMOVED +;; =not-used= ;; +;; =not-used= (define (open-test-db work-area) +;; =not-used= (debug:print-info 11 *default-log-port* "open-test-db " work-area) +;; =not-used= (if (and work-area +;; =not-used= (directory? work-area) +;; =not-used= (file-read-access? work-area)) +;; =not-used= (let* ((dbpath (conc work-area "/testdat.db")) +;; =not-used= (dbexists (common:file-exists? dbpath)) +;; =not-used= (work-area-writeable (file-write-access? work-area)) +;; =not-used= (db (handle-exceptions ;; open the db if area writeable or db pre-existing. open in-mem otherwise. if exception, open in-mem +;; =not-used= exn +;; =not-used= (begin +;; =not-used= (print-call-chain (current-error-port)) +;; =not-used= (debug:print 2 *default-log-port* "ERROR: problem accessing test db " work-area ", you probably should clean and re-run this test" +;; =not-used= ((condition-property-accessor 'exn 'message) exn)) +;; =not-used= (set! dbexists #f) ;; must force re-creation of tables, more tom-foolery +;; =not-used= (sqlite3:open-database ":memory:")) ;; open an in-memory db to allow readonly access +;; =not-used= (if (or work-area-writeable +;; =not-used= dbexists) +;; =not-used= (sqlite3:open-database dbpath) +;; =not-used= (sqlite3:open-database ":memory:")))) +;; =not-used= (tdb-writeable (and (file-write-access? work-area) +;; =not-used= (file-write-access? dbpath))) +;; =not-used= (handler (sqlite3:make-busy-timeout (if (args:get-arg "-override-timeout") +;; =not-used= (string->number (args:get-arg "-override-timeout")) +;; =not-used= 136000)))) +;; =not-used= +;; =not-used= (if (and tdb-writeable +;; =not-used= *db-write-access*) +;; =not-used= (sqlite3:set-busy-handler! db handler)) +;; =not-used= (if (not dbexists) +;; =not-used= (begin +;; =not-used= (db:set-sync db) ;; (sqlite3:execute db "PRAGMA synchronous = FULL;") +;; =not-used= (debug:print-info 11 *default-log-port* "Initialized test database " dbpath) +;; =not-used= (tdb:testdb-initialize db))) +;; =not-used= ;; (sqlite3:execute db "PRAGMA synchronous = 0;") +;; =not-used= (debug:print-info 11 *default-log-port* "open-test-db END (sucessful)" work-area) +;; =not-used= ;; now let's test that everything is correct +;; =not-used= (handle-exceptions +;; =not-used= exn +;; =not-used= (begin +;; =not-used= (print-call-chain (current-error-port)) +;; =not-used= (debug:print-error 0 *default-log-port* "problem accessing test db " work-area ", you probably should clean and re-run this test or remove the file " +;; =not-used= dbpath ".\n " +;; =not-used= ((condition-property-accessor 'exn 'message) exn)) +;; =not-used= #f) +;; =not-used= ;; Is there a cheaper single line operation that will check for existance of a table +;; =not-used= ;; and raise an exception ? +;; =not-used= (sqlite3:execute db "SELECT id FROM test_data LIMIT 1;")) +;; =not-used= db) +;; =not-used= ;; no work-area or not readable - create a placeholder to fake rest of world out +;; =not-used= (let ((baddb (sqlite3:open-database ":memory:"))) +;; =not-used= (debug:print-info 11 *default-log-port* "open-test-db END (unsucessful)" work-area) +;; =not-used= ;; provide an in-mem db (this is dangerous!) +;; =not-used= (tdb:testdb-initialize baddb) +;; =not-used= baddb))) +;; =not-used= +;; =not-used= ;; find and open the testdat.db file for an existing test +;; =not-used= (define (tdb:open-test-db-by-test-id test-id #!key (work-area #f)) +;; =not-used= (let* ((test-path (if work-area +;; =not-used= work-area +;; =not-used= (rmt:test-get-rundir-from-test-id test-id)))) +;; =not-used= (debug:print 3 *default-log-port* "TEST PATH: " test-path) +;; =not-used= (open-test-db test-path))) +;; =not-used= +;; =not-used= ;; find and open the testdat.db file for an existing test +;; =not-used= (define (tdb:open-test-db-by-test-id-local dbstruct run-id test-id #!key (work-area #f)) +;; =not-used= (let* ((test-path (if work-area +;; =not-used= work-area +;; =not-used= (db:test-get-rundir-from-test-id dbstruct run-id test-id)))) +;; =not-used= (debug:print 3 *default-log-port* "TEST PATH: " test-path) +;; =not-used= (open-test-db test-path))) +;; =not-used= +;; =not-used= ;; find and open the testdat.db file for an existing test +;; =not-used= (define (tdb:open-run-close-db-by-test-id-local dbstruct run-id test-id work-area proc . params) +;; =not-used= (let* ((test-path (if work-area +;; =not-used= work-area +;; =not-used= (db:test-get-rundir-from-test-id dbstruct run-id test-id))) +;; =not-used= (tdb (open-test-db test-path))) +;; =not-used= (apply proc tdb params))) +;; =not-used= +;; =not-used= (define (tdb:testdb-initialize db) +;; =not-used= (debug:print 11 *default-log-port* "db:testdb-initialize START") +;; =not-used= (sqlite3:with-transaction +;; =not-used= db +;; =not-used= (lambda () +;; =not-used= (for-each +;; =not-used= (lambda (sqlcmd) +;; =not-used= (sqlite3:execute db sqlcmd)) +;; =not-used= (list "CREATE TABLE IF NOT EXISTS test_rundat ( +;; =not-used= id INTEGER PRIMARY KEY, +;; =not-used= update_time TIMESTAMP, +;; =not-used= cpuload INTEGER DEFAULT -1, +;; =not-used= diskfree INTEGER DEFAULT -1, +;; =not-used= diskusage INTGER DEFAULT -1, +;; =not-used= run_duration INTEGER DEFAULT 0);" +;; =not-used= "CREATE TABLE IF NOT EXISTS test_data ( +;; =not-used= id INTEGER PRIMARY KEY, +;; =not-used= test_id INTEGER, +;; =not-used= category TEXT DEFAULT '', +;; =not-used= variable TEXT, +;; =not-used= value REAL, +;; =not-used= expected REAL, +;; =not-used= tol REAL, +;; =not-used= units TEXT, +;; =not-used= comment TEXT DEFAULT '', +;; =not-used= status TEXT DEFAULT 'n/a', +;; =not-used= type TEXT DEFAULT '', +;; =not-used= CONSTRAINT test_data_constraint UNIQUE (test_id,category,variable));" +;; =not-used= "CREATE TABLE IF NOT EXISTS test_steps ( +;; =not-used= id INTEGER PRIMARY KEY, +;; =not-used= test_id INTEGER, +;; =not-used= stepname TEXT, +;; =not-used= state TEXT DEFAULT 'NOT_STARTED', +;; =not-used= status TEXT DEFAULT 'n/a', +;; =not-used= event_time TIMESTAMP, +;; =not-used= comment TEXT DEFAULT '', +;; =not-used= logfile TEXT DEFAULT '', +;; =not-used= CONSTRAINT test_steps_constraint UNIQUE (test_id,stepname,state));" +;; =not-used= ;; test_meta can be used for handing commands to the test +;; =not-used= ;; e.g. KILLREQ +;; =not-used= ;; the ackstate is set to 1 once the command has been completed +;; =not-used= "CREATE TABLE IF NOT EXISTS test_meta ( +;; =not-used= id INTEGER PRIMARY KEY, +;; =not-used= var TEXT, +;; =not-used= val TEXT, +;; =not-used= ackstate INTEGER DEFAULT 0, +;; =not-used= CONSTRAINT metadat_constraint UNIQUE (var));")))) +;; =not-used= (debug:print 11 *default-log-port* "db:testdb-initialize END")) +;; =not-used= +;; =not-used= ;; This routine moved to db:read-test-data +;; =not-used= ;; +;; =not-used= (define (tdb:read-test-data tdb test-id categorypatt) +;; =not-used= (let ((res '())) +;; =not-used= (sqlite3:for-each-row +;; =not-used= (lambda (id test_id category variable value expected tol units comment status type) +;; =not-used= (set! res (cons (vector id test_id category variable value expected tol units comment status type) res))) +;; =not-used= tdb +;; =not-used= "SELECT id,test_id,category,variable,value,expected,tol,units,comment,status,type FROM test_data WHERE test_id=? AND category LIKE ? ORDER BY category,variable;" test-id categorypatt) +;; =not-used= (sqlite3:finalize! tdb) +;; =not-used= (reverse res))) ;;====================================================================== ;; T E S T D A T A ;;====================================================================== @@ -248,14 +248,10 @@ (loop (read-line))))) ;; roll up the current results. ;; FIXME: Add the status too (rmt:test-data-rollup run-id test-id #f)) -(define (tdb:get-prev-tol-for-test tdb test-id category variable) - ;; Finish me? - (values #f #f #f)) - ;;====================================================================== ;; S T E P S ;;====================================================================== (define (tdb:step-get-time-as-string vec) Index: tests.scm ================================================================== --- tests.scm +++ tests.scm @@ -33,2012 +33,20 @@ (declare (uses items)) (declare (uses runconfig)) (declare (uses server)) (declare (uses mtargs)) (declare (uses rmtmod)) +(declare (uses megatestmod)) +(declare (uses tasksmod)) (use sqlite3 srfi-1 posix regex regex-case srfi-69 dot-locking directory-utils) (import (prefix sqlite3 sqlite3:)) (import commonmod configfmod (prefix mtargs args:) debugprint - rmtmod) -(require-library stml) - -(include "common_records.scm") -(include "key_records.scm") -(include "db_records.scm") -(include "run_records.scm") -(include "test_records.scm") -(include "js-path.scm") - -(define (init-java-script-lib) - (set! *java-script-lib* (conc (common:get-install-area) "/share/js/jquery-3.1.0.slim.min.js")) - ) - -;; Call this one to do all the work and get a standardized list of tests -;; gets paths from configs and finds valid tests -;; returns hash of testname --> fullpath -;; -(define (tests:get-all) - (let* ((test-search-path (tests:get-tests-search-path *configdat*))) - (debug:print 8 *default-log-port* "test-search-path: " test-search-path) - (tests:get-valid-tests (make-hash-table) test-search-path))) - -(define (tests:get-tests-search-path cfgdat) - (let ((paths (let ((section (if cfgdat - (configf:get-section cfgdat "tests-paths") - #f))) - (if section - (map cadr section) - '())))) - (filter (lambda (d) - (if (directory-exists? d) - d - (begin - ;; (if (common:low-noise-print 60 "tests:get-tests-search-path" d) - ;; (debug:print 0 *default-log-port* "WARNING: problem with directory " d ", dropping it from tests path")) - #f))) - (append paths (list (conc *toppath* "/tests")))))) - -(define (tests:get-valid-tests test-registry tests-paths) - (if (null? tests-paths) - test-registry - (let loop ((hed (car tests-paths)) - (tal (cdr tests-paths))) - (if (common:file-exists? hed) - (for-each (lambda (test-path) - (let* ((tname (last (string-split test-path "/"))) - (tconfig (conc test-path "/testconfig"))) - (if (and (not (hash-table-ref/default test-registry tname #f)) - (common:file-exists? tconfig)) - (hash-table-set! test-registry tname test-path)))) - (glob (conc hed "/*")))) - (if (null? tal) - test-registry - (loop (car tal)(cdr tal)))))) - -(define (tests:filter-test-names-not-matched test-names test-patts) - (delete-duplicates - (filter (lambda (testname) - (not (tests:match test-patts testname #f))) - test-names))) - - -(define (tests:filter-test-names test-names test-patts) - (delete-duplicates - (filter (lambda (testname) - (tests:match test-patts testname #f)) - test-names))) - -;; itemmap is a list of testname patterns to maps -;; test1 .*/bar/(\d+) foo/\1 -;; % foo/([^/]+) \1/bar -;; -;; # NOTE: the line with the single % could be the result of -;; # itemmap entry in requirements (legacy). The itemmap -;; # requirements entry is deprecated -;; -(define (tests:get-itemmaps tconfig) - (let ((base-itemmap (configf:lookup tconfig "requirements" "itemmap")) - (itemmap-table (configf:get-section tconfig "itemmap"))) - (append (if base-itemmap - (list (list "%" base-itemmap)) - '()) - (if itemmap-table - itemmap-table - '())))) - -;; given a list of itemmaps (testname . map), return the first match -;; -(define (tests:lookup-itemmap itemmaps testname) - (let ((best-matches (filter (lambda (itemmap) - (tests:match (car itemmap) testname #f)) - itemmaps))) - (if (null? best-matches) - #f - (let ((res (car best-matches))) - ;; (debug:print 0 *default-log-port* "res=" res) - (cond - ((string? res) res) ;;; FIX THE ROOT CAUSE HERE .... - ((null? res) #f) - ((string? (cdr res)) (cdr res)) ;; it is a pair - ((string? (cadr res))(cadr res)) ;; it is a list - (else cadr res)))))) - -(define (tests:get-global-waitons rconfig) - (let* ((global-waitons (runconfigs-get rconfig "!GLOBAL_WAITONS"))) - (if (string? global-waitons) - (string-split global-waitons) - '()))) - -;; return items given config -;; -(define (tests:get-items tconfig) - (let ((items (hash-table-ref/default tconfig "items" #f)) ;; items 4 - (itemstable (hash-table-ref/default tconfig "itemstable" #f))) - ;; if either items or items table is a proc return it so test running - ;; process can know to call items:get-items-from-config - ;; if either is a list and none is a proc go ahead and call get-items - ;; otherwise return #f - this is not an iterated test - (cond - ((procedure? items) - (debug:print-info 4 *default-log-port* "items is a procedure, will calc later") - items) ;; calc later - ((procedure? itemstable) - (debug:print-info 4 *default-log-port* "itemstable is a procedure, will calc later") - itemstable) ;; calc later - ((filter (lambda (x) - (let ((val (car x))) - (if (procedure? val) val #f))) - (append (if (list? items) items '()) - (if (list? itemstable) itemstable '()))) - 'have-procedure) - ((or (list? items)(list? itemstable)) ;; calc now - (debug:print-info 4 *default-log-port* "items and itemstable are lists, calc now\n" - " items: " items " itemstable: " itemstable) - (items:get-items-from-config tconfig)) - (else #f)))) ;; not iterated - - -;; returns waitons waitors tconfigdat -;; -(define (tests:get-waitons test-name all-tests-registry global-waitons) - (let* ((config (tests:get-testconfig test-name #f all-tests-registry 'return-procs))) ;; assuming no problems with immediate evaluation, this could be simplified ('return-procs -> #t) - (let ((instr (if config - (configf:lookup config "requirements" "waiton") - (begin ;; No config means this is a non-existant test - (debug:print-error 0 *default-log-port* "non-existent required test \"" test-name "\"") - (exit 1)))) - (instr2 (if config - (configf:lookup config "requirements" "waitor") - ""))) - (debug:print-info 8 *default-log-port* "waitons string is " instr ", waitors string is " instr2) - (let* ((newwaitons-tmp - (string-split (cond - ((procedure? instr) ;; here - (let ((res (instr))) - (debug:print-info 8 *default-log-port* "waiton procedure results in string " res " for test " test-name) - res)) - ((string? instr) instr) - (else - ;; NOTE: This is actually the case of *no* waitons! ;; (debug:print-error 0 *default-log-port* "something went wrong in processing waitons for test " test-name) - "")))) - (newwaitors - (string-split (cond - ((procedure? instr2) - (let ((res (instr2))) - (debug:print-info 8 *default-log-port* "waitor procedure results in string " res " for test " test-name) - res)) - ((string? instr2) instr2) - (else - ;; NOTE: This is actually the case of *no* waitons! ;; (debug:print-error 0 *default-log-port* "something went wrong in processing waitons for test " test-name) - "")))) - (newwaitons (if (and (list? global-waitons) - (not (null? global-waitons))) - (begin - (debug:print 0 *default-log-port* "Adding global waitons " global-waitons) - (append newwaitons-tmp (filter (lambda (x) ;; remove self from global waitons - (not (equal? x test-name))) - global-waitons))) - newwaitons-tmp))) - (values - ;; the waitons - (filter (lambda (x) - (if (hash-table-ref/default all-tests-registry x #f) - #t - (begin - (debug:print-error 0 *default-log-port* "test " test-name " has unrecognised waiton testname " x) - #f))) - newwaitons) - (filter (lambda (x) - (if (hash-table-ref/default all-tests-registry x #f) - #t - (begin - (debug:print-error 0 *default-log-port* "test " test-name " has unrecognised waiton testname " x) - #f))) - newwaitors) - config))))) - -;; given waiting-test that is waiting on waiton-test extend test-patt appropriately -;; -;; genlib/testconfig sim/testconfig -;; genlib/sch sim/sch/cell1 -;; -;; [requirements] [requirements] -;; mode itemwait -;; # trim off the cell to determine what to run for genlib -;; itemmap /.* -;; -;; waiting-test is waiting on waiton-test so we need to create a pattern for waiton-test given waiting-test and itemmap -;; BB> (tests:extend-test-patts "normal-second/2" "normal-second" "normal-first" '()) -;; observed -> "normal-first/2,normal-first/,normal-second/2,normal-second/" -;; expected -> "normal-first,normal-second/2,normal-second/" -;; testpatt = normal-second/2 -;; waiting-test = normal-second -;; waiton-test = normal-first -;; itemmaps = () - -(define (tests:extend-test-patts test-patt waiting-test waiton-test itemmaps itemized-waiton) - (cond - (itemized-waiton - (let* ((itemmap (tests:lookup-itemmap itemmaps waiton-test)) - (patts (string-split test-patt ",")) - (waiting-test-len (+ (string-length waiting-test) 1)) - (patts-waiton (map (lambda (x) ;; for each incoming patt that matches the waiting test - (let* ((modpatt (if itemmap (db:convert-test-itempath x itemmap) x)) - (newpatt (conc waiton-test "/" (substring modpatt waiting-test-len (string-length modpatt))))) - ;; (conc waiting-test "/," waiting-test "/" (substring modpatt waiton-test-len (string-length modpatt))))) - ;; (print "in map, x=" x ", newpatt=" newpatt) - newpatt)) - (filter (lambda (x) - (eq? (substring-index (conc waiting-test "/") x) 0)) ;; is this patt pertinent to the waiting test - patts))) - (extended-test-patt (append patts (if (null? patts-waiton) - (list (conc waiton-test "/%")) ;; really shouldn't add the waiton forcefully like this - patts-waiton))) - (extended-test-patt-with-toplevels - (fold (lambda (testpatt-item accum ) - (let ((my-match (string-match "^([^%\\/]+)\\/.+$" testpatt-item))) - (cons testpatt-item - (if my-match - (cons - (conc (cadr my-match) "/") - accum) - accum)))) - '() - extended-test-patt))) - (string-intersperse (delete-duplicates extended-test-patt-with-toplevels) ","))) - (else ;; not waiting on items, waiting on entire waiton test. - (let* ((patts (string-split test-patt ",")) - (new-patts (if (member waiton-test patts) - patts - (cons waiton-test patts)))) - (string-intersperse (delete-duplicates new-patts) ","))))) - -(define *glob-like-match-cache* (make-hash-table)) -(define (tests:cache-regexp str-in flag) - (let* ((key (conc str-in flag))) - (or (hash-table-ref/default *glob-like-match-cache* key #f) - (let* ((newrx (regexp str-in flag))) - (hash-table-set! *glob-like-match-cache* key newrx) - newrx)))) - -;; tests:glob-like-match -(define (tests:glob-like-match patt str) - (let* ((like (substring-index "%" patt)) - (notpatt (equal? (substring-index "~" patt) 0)) - (newpatt (if notpatt (substring patt 1) patt)) - (finpatt (if like - (string-substitute (regexp "%") ".*" newpatt #f) - (string-substitute (regexp "\\*") ".*" newpatt #f))) - (rx (tests:cache-regexp finpatt (if like #t #f))) - (res (string-match rx str))) - (if notpatt (not res) res))) - -;; if itempath is #f then look only at the testname part -;; -(define (tests:match patterns testname itempath #!key (required '())) - (if (string? patterns) - (let ((patts (append (string-split patterns ",") required))) - (if (null? patts) ;;; no pattern(s) means no match - #f - (let loop ((patt (car patts)) - (tal (cdr patts))) - ;; (print "loop: patt: " patt ", tal " tal) - (if (string=? patt "") - #f ;; nothing ever matches empty string - policy - (let* ((patt-parts (string-match (regexp "^([^\\/]*)(\\/(.*)|)$") patt)) - (test-patt (cadr patt-parts)) - (item-patt (cadddr patt-parts))) - ;; special case: test vs. test/ - ;; test => "test" "%" - ;; test/ => "test" "" - (if (and (not (substring-index "/" patt)) ;; no slash in the original - (or (not item-patt) - (equal? item-patt ""))) ;; should always be true that item-patt is "" - (set! item-patt "%")) - ;; (print "tests:match => patt-parts: " patt-parts ", test-patt: " test-patt ", item-patt: " item-patt) - (if (and (tests:glob-like-match test-patt testname) - (or (not itempath) - (tests:glob-like-match (if item-patt item-patt "") itempath))) - #t - (if (null? tal) - #f - (loop (car tal)(cdr tal))))))))))) - -;; if itempath is #f then look only at the testname part -;; -(define (tests:match->sqlqry patterns) - (if (string? patterns) - (let ((patts (string-split patterns ","))) - (if (null? patts) ;;; no pattern(s) means no match, we will do no query - #f - (let loop ((patt (car patts)) - (tal (cdr patts)) - (res '())) - ;; (print "loop: patt: " patt ", tal " tal) - (let* ((patt-parts (string-match (regexp "^([^\\/]*)(\\/(.*)|)$") patt)) - (test-patt (cadr patt-parts)) - (item-patt (cadddr patt-parts)) - (test-qry (db:patt->like "testname" test-patt)) - (item-qry (db:patt->like "item_path" item-patt)) - (qry (conc "(" test-qry " AND " item-qry ")"))) - ;; (print "tests:match => patt-parts: " patt-parts ", test-patt: " test-patt ", item-patt: " item-patt) - (if (null? tal) - (string-intersperse (append (reverse res)(list qry)) " OR ") - (loop (car tal)(cdr tal)(cons qry res))))))) - #f)) - -;; Check for waiver eligibility -;; -(define (tests:check-waiver-eligibility testdat prev-testdat) - (let* ((test-registry (make-hash-table)) - (testconfig (tests:get-testconfig (db:test-get-testname testdat) (db:test-get-item-path testdat) test-registry #f)) - (test-rundir ;; (sdb:qry 'passstr - (db:test-get-rundir testdat)) ;; ) - (prev-rundir ;; (sdb:qry 'passstr - (db:test-get-rundir prev-testdat)) ;; ) - (waivers (if testconfig (configf:section-vars testconfig "waivers") '())) - (waiver-rx (regexp "^(\\S+)\\s+(.*)$")) - (diff-rule "diff %file1% %file2%") - (logpro-rule "diff %file1% %file2% | logpro %waivername%.logpro %waivername%.html")) - (if (not (common:file-exists? test-rundir)) - (begin - (debug:print-error 0 *default-log-port* "test run directory is gone, cannot propagate waiver") - #f) - (begin - (push-directory test-rundir) - (let ((result (if (null? waivers) - #f - (let loop ((hed (car waivers)) - (tal (cdr waivers))) - (debug:print 0 *default-log-port* "INFO: Applying waiver rule \"" hed "\"") - (let* ((waiver (configf:lookup testconfig "waivers" hed)) - (wparts (if waiver (string-match waiver-rx waiver) #f)) - (waiver-rule (if wparts (cadr wparts) #f)) - (waiver-glob (if wparts (caddr wparts) #f)) - (logpro-file (if waiver - (let ((fname (conc hed ".logpro"))) - (if (common:file-exists? fname) - fname - (begin - (debug:print 0 *default-log-port* "INFO: No logpro file " fname " falling back to diff") - #f))) - #f)) - ;; if rule by name of waiver-rule is found in testconfig - use it - ;; else if waivername.logpro exists use logpro-rule - ;; else default to diff-rule - (rule-string (let ((rule (configf:lookup testconfig "waiver_rules" waiver-rule))) - (if rule - rule - (if logpro-file - logpro-rule - (begin - (debug:print 0 *default-log-port* "INFO: No logpro file " logpro-file " found, using diff rule") - diff-rule))))) - ;; (string-substitute "%file1%" "foofoo.txt" "This is %file1% and so is this %file1%." #t) - (processed-cmd (string-substitute - "%file1%" (conc test-rundir "/" waiver-glob) - (string-substitute - "%file2%" (conc prev-rundir "/" waiver-glob) - (string-substitute - "%waivername%" hed rule-string #t) #t) #t)) - (res #f)) - (debug:print 0 *default-log-port* "INFO: waiver command is \"" processed-cmd "\"") - (if (eq? (system processed-cmd) 0) - (if (null? tal) - #t - (loop (car tal)(cdr tal))) - #f)))))) - (pop-directory) - result))))) - -;; Do not rpc this one, do the underlying calls!!! -(define (tests:test-set-status! run-id test-id state status comment dat #!key (work-area #f)) - (let* ((real-status status) - (otherdat (if dat dat (make-hash-table))) - (testdat (rmt:get-test-info-by-id run-id test-id)) - (test-name (db:test-get-testname testdat)) - (item-path (db:test-get-item-path testdat)) - ;; before proceeding we must find out if the previous test (where all keys matched except runname) - ;; was WAIVED if this test is FAIL - - ;; NOTES: - ;; 1. Is the call to test:get-previous-run-record remotified? - ;; 2. Add test for testconfig waiver propagation control here - ;; - (prev-test (if (equal? status "FAIL") - (rmt:get-previous-test-run-record run-id test-name item-path) - #f)) - (waived (if prev-test - (if prev-test ;; true if we found a previous test in this run series - (let ((prev-status (db:test-get-status prev-test)) - (prev-state (db:test-get-state prev-test)) - (prev-comment (db:test-get-comment prev-test))) - (debug:print 4 *default-log-port* "prev-status " prev-status ", prev-state " prev-state ", prev-comment " prev-comment) - (if (and (equal? prev-state "COMPLETED") - (equal? prev-status "WAIVED")) - (if comment - comment - prev-comment) ;; waived is either the comment or #f - #f)) - #f) - #f))) - (if (and waived - (tests:check-waiver-eligibility testdat prev-test)) - (set! real-status "WAIVED")) - - (debug:print 4 *default-log-port* "real-status " real-status ", waived " waived ", status " status) - - ;; update the primary record IF state AND status are defined - (if (and state status) - (begin - (rmt:set-state-status-and-roll-up-items run-id test-id item-path state real-status (if waived waived comment)) - ;; (mt:process-triggers run-id test-id state real-status) ;; triggers are called in test-set-state-status - )) - - ;; if status is "AUTO" then call rollup (note, this one modifies data in test - ;; run area, it does remote calls under the hood. - ;; (if (and test-id state status (equal? status "AUTO")) - ;; (rmt:test-data-rollup run-id test-id status)) - - ;; add metadata (need to do this way to avoid SQL injection issues) - - ;; :first_err - ;; (let ((val (hash-table-ref/default otherdat ":first_err" #f))) - ;; (if val - ;; (sqlite3:execute db "UPDATE tests SET first_err=? WHERE run_id=? AND testname=? AND item_path=?;" val run-id test-name item-path))) - ;; - ;; ;; :first_warn - ;; (let ((val (hash-table-ref/default otherdat ":first_warn" #f))) - ;; (if val - ;; (sqlite3:execute db "UPDATE tests SET first_warn=? WHERE run_id=? AND testname=? AND item_path=?;" val run-id test-name item-path))) - - (let ((category (hash-table-ref/default otherdat ":category" "")) - (variable (hash-table-ref/default otherdat ":variable" "")) - (value (hash-table-ref/default otherdat ":value" #f)) - (expected (hash-table-ref/default otherdat ":expected" "n/a")) - (tol (hash-table-ref/default otherdat ":tol" "n/a")) - (units (hash-table-ref/default otherdat ":units" "")) - (type (hash-table-ref/default otherdat ":type" "")) - (dcomment (hash-table-ref/default otherdat ":comment" ""))) - (debug:print 4 *default-log-port* - "category: " category ", variable: " variable ", value: " value - ", expected: " expected ", tol: " tol ", units: " units) - (if (and value) ;; require only value; BB was- all three required - (let ((dat (conc category "," - variable "," - value "," - expected "," - tol "," - units "," - dcomment ",," ;; extra comma for status - type ))) - ;; This was run remote, don't think that makes sense. Perhaps not, but that is the easiest path for the moment. - (rmt:csv->test-data run-id test-id - dat) - ;; This was added in check-in a5adfa3f9a. Message was: "...added delay in set-values to allow for delayed write on server start" - ;; I'm inserting an arbitrary rmt: call to force/ensure that the server is available to (hopefully) prevent a communication issue. - (rmt:get-var "MEGATEST_VERSION") ;; this does NOTHING but ensure the server is reachable. This is almost certainly NOT needed :) - ;; BB - commentiong out arbitrary 10 second wait (thread-sleep! 10) ;; add 10 second delay before quit incase rmt needs time to start a server. - ))) - - ;; need to update the top test record if PASS or FAIL and this is a subtest - ;;;;;; (if (not (equal? item-path "")) - ;;;;;; (rmt:set-state-status-and-roll-up-items run-id test-name item-path state status #f) ;;;;;) - - (if (or (and (string? comment) - (string-match (regexp "\\S+") comment)) - waived) - (let ((cmt (if waived waived comment))) - (rmt:general-call 'set-test-comment run-id cmt test-id))))) - -(define (tests:test-set-toplog! run-id test-name logf) - (rmt:general-call 'tests:test-set-toplog run-id logf run-id test-name)) - -(define (tests:summarize-items run-id test-id test-name force) - ;; if not force then only update the record if one of these is true: - ;; 1. logf is "log/final.log - ;; 2. logf is same as outputfilename - (let* ((outputfilename (conc "megatest-rollup-" test-name ".html")) - (orig-dir (current-directory)) - (logf-info (rmt:test-get-logfile-info run-id test-name)) - (logf (if logf-info (cadr logf-info) #f)) - (path (if logf-info (car logf-info) #f))) - ;; This query finds the path and changes the directory to it for the test - (if (and (string? path) - (directory? path)) ;; can get #f here under some wierd conditions. why, unknown ... - (begin - (debug:print 4 *default-log-port* "Found path: " path) - (change-directory path)) - ;; (set! outputfilename (conc path "/" outputfilename))) - (debug:print-error 0 *default-log-port* "summarize-items for run-id=" run-id ", test-name=" test-name ", no such path: " path)) - (debug:print 4 *default-log-port* "summarize-items with logf " logf ", outputfilename " outputfilename " and force " force) - (if (or (equal? logf "logs/final.log") - (equal? logf outputfilename) - force) - (let ((my-start-time (current-seconds)) - (lockf (conc outputfilename ".lock"))) - (let loop ((have-lock (common:simple-file-lock lockf))) - (if have-lock - (let ((script (configf:lookup *configdat* "testrollup" test-name))) - (debug:print 0 *default-log-port* "Obtained lock for " outputfilename) - (rmt:set-state-status-and-roll-up-items run-id test-name "" #f #f #f) - (if script - (system (conc script " > " outputfilename " & ")) - (tests:generate-html-summary-for-iterated-test run-id test-id test-name outputfilename)) - (common:simple-file-release-lock lockf) - (change-directory orig-dir) - ;; NB// tests:test-set-toplog! is remote internal... - (tests:test-set-toplog! run-id test-name outputfilename)) - ;; didn't get the lock, check to see if current update started later than this - ;; update, if so we can exit without doing any work - (if (> my-start-time (handle-exceptions - exn - (begin - (debug:print 0 *default-log-port* "failed to get mod time on " lockf ", exn=" exn) - 0) - (file-modification-time lockf))) - ;; we started since current re-gen in flight, delay a little and try again - (begin - (debug:print-info 1 *default-log-port* "Waiting to update " outputfilename ", another test currently updating it") - (thread-sleep! (+ 5 (random 5))) ;; delay between 5 and 10 seconds - (loop (common:simple-file-lock lockf)))))))))) - -(define (tests:generate-html-summary-for-iterated-test run-id test-id test-name outputfilename) - (let ((counts (make-hash-table)) - (statecounts (make-hash-table)) - (outtxt "") - (tot 0) - (testdat (rmt:test-get-records-for-index-file run-id test-name))) - (with-output-to-file outputfilename - (lambda () - (set! outtxt (conc outtxt "Summary: " test-name - "

Summary for " test-name "

")) - (for-each - (lambda (testrecord) - (let ((id (vector-ref testrecord 0)) - (itempath (vector-ref testrecord 1)) - (state (vector-ref testrecord 2)) - (status (vector-ref testrecord 3)) - (run_duration (vector-ref testrecord 4)) - (logf (vector-ref testrecord 5)) - (comment (vector-ref testrecord 6))) - (hash-table-set! counts status (+ 1 (hash-table-ref/default counts status 0))) - (hash-table-set! statecounts state (+ 1 (hash-table-ref/default statecounts state 0))) - (set! outtxt (conc outtxt "" - ;; " " itempath "" - " " itempath "" - "" state "" - "" status "" - "" (if (equal? comment "") - " " - comment) "" - "")))) - (if (list? testdat) - testdat - (begin - (debug:print 0 *default-log-port* "ERROR: failed to get records with rmt:test-get-records-for-index-file run-id=" run-id "test-name=" test-name) - '()))) - - (print "
") - ;; Print out stats for status - (set! tot 0) - (print "") - (for-each (lambda (state) - (set! tot (+ tot (hash-table-ref statecounts state))) - (print "")) - (hash-table-keys statecounts)) - (print "

State stats

" state "" (hash-table-ref statecounts state) "
Total" tot "
") - (print "
") - ;; Print out stats for state - (set! tot 0) - (print "") - (for-each (lambda (status) - (set! tot (+ tot (hash-table-ref counts status))) - (print "")) - (hash-table-keys counts)) - (print "

Status stats

" status - "" (hash-table-ref counts status) "
Total" tot "
") - (print "
") - - (print "" - "" - outtxt "
ItemStateStatusComment
") - ;; (release-dot-lock outputfilename) - ;;(rmt:update-run-stats - ;; run-id - ;; (hash-table-map - ;; state-status-counts - ;; (lambda (key val) - ;; (append key (list val))))) - )))) - -(define tests:css-jscript-block -#< -ul.LinkedList { display: block; } -/* ul.LinkedList ul { display: none; } */ -.HandCursorStyle { cursor: pointer; cursor: hand; } /* For IE */ -th {background-color: #8c8c8c;} -td.test {background-color: #d9dbdd;} -td.PASS {background-color: #347533;} -td.FAIL {background-color: #cc2812;} -td.SKIP{background-color: #FFD733;} -td.WARN {background-color: #EA8724;} -td.WAIVED {background-color: #838A12;} -td.ABORT{background-color: #EA24B7;} -.PASS .link, .SKIP .link, .WARN .link,.WAIVED .link,.ABORT .link, .FAIL .link{color: #FFFFFF;} - - - - - - -EOF -) - -(define tests:css-jscript-block-dynamic -#< -EOF -) - -(define (test:js-block javascript-lib) - (conc "" )) - - -(define tests:css-jscript-block-static (test:js-block *java-script-lib*)) - -(define (tests:css-jscript-block-cond dynamic) - (if (equal? dynamic #t) - tests:css-jscript-block-dynamic - tests:css-jscript-block-static)) - - -(define (tests:run-record->test-path run numkeys) - (append (take (vector->list run) numkeys) - (list (vector-ref run (+ 1 numkeys))))) - - -(define (tests:get-rest-data runs header numkeys) - (let ((resh (make-hash-table))) - (for-each - (lambda (run) - (let* ((run-id (db:get-value-by-header run header "id")) - (run-dir (tests:run-record->test-path run numkeys)) - (test-data (rmt:get-tests-for-run - run-id - "%" ;; testnamepatt - '() ;; states - '() ;; statuses - #f ;; offset - #f ;; num-to-get - #f ;; hide/not-hide - #f ;; sort-by - #f ;; sort-order - #f ;; 'shortlist ;; qrytype - 0 ;; last update - #f))) - - (map (lambda (test) - (let* ((test-name (vector-ref test 2)) - (test-html-path (conc (vector-ref test 10) "/" (vector-ref test 13))) - (test-item (conc test-name ":" (vector-ref test 11))) - (test-status (vector-ref test 4))) - - (if (not (hash-table-ref/default resh test-name #f)) - (hash-table-set! resh test-name (make-hash-table))) - (if (not (hash-table-ref/default (hash-table-ref/default resh test-name #f) test-item #f)) - (hash-table-set! (hash-table-ref/default resh test-name #f) test-item (make-hash-table))) - (hash-table-set! (hash-table-ref/default (hash-table-ref/default resh test-name #f) test-item #f) run-id (list test-status test-html-path)))) - test-data))) - runs) - resh)) - - -;; tests:genrate dashboard body -;; - -(define (tests:dashboard-body page pg-size keys numkeys total-runs linktree area-name get-prev-links get-next-links flag run-patt target-patt) - (let* ((start (* page pg-size)) - ;(runsdat (rmt:get-runs "%" pg-size start (map (lambda (x)(list x "%")) keys))) - (runsdat (rmt:get-runs-by-patt keys run-patt target-patt start pg-size #f 0 sort-order: "desc")) - ; db:get-runs-by-patt keys runnamepatt targpatt offset limit fields last-update - (header (vector-ref runsdat 0)) - (runs (vector-ref runsdat 1)) - (ctr 0) - (test-runs-hash (tests:get-rest-data runs header numkeys)) - (test-list (hash-table-keys test-runs-hash))) - - (s:html tests:css-jscript-block (tests:css-jscript-block-cond flag) - (s:title "Summary for " area-name) - (s:body 'onload "addEvents();" - (get-prev-links page linktree) - (get-next-links page linktree total-runs) - - (s:h1 "Summary for " area-name) - (s:h3 "Filter" ) - (s:input 'type "text" 'name "testname" 'id "testname" 'length "30" 'onkeyup "filtersome()") - ;; top list - - (s:table 'id "LinkedList1" 'border "1" 'cellspacing 0 - (map (lambda (key) - (let* ((res (s:tr 'class "something" - (s:th key ) - (map (lambda (run) - (s:th (vector-ref run ctr))) - runs)))) - (set! ctr (+ ctr 1)) - res)) - keys) - (s:tr - (s:th "Run Name") - (map (lambda (run) - (s:th (db:get-value-by-header run header "runname"))) - runs)) - - (map (lambda (test-name) - (let* ((item-hash (hash-table-ref/default test-runs-hash test-name #f)) - (item-keys (sort (hash-table-keys item-hash) string<=?))) - (map (lambda (item-name) - (let* ((res (s:tr 'class item-name - (s:td item-name 'class "test" ) - (map (lambda (run) - (let* ((run-test (hash-table-ref/default item-hash item-name #f)) - (run-id (db:get-value-by-header run header "id")) - (result (hash-table-ref/default run-test run-id "n/a")) - ;(relative-path (get-relative-path)) - (status (if (string? result) - result - (car result))) - (link (if (string? result) - result - (if (equal? flag #t) - (s:a (car result) 'href (conc "./test_log?runid=" run-id "&testname=" item-name )) - (s:a (car result) 'href (string-substitute (conc linktree "/") "" (cadr result) "-")))))) - (s:td link 'class status))) - runs)))) - res)) - item-keys))) - test-list)))))) - -;; (tests:create-html-tree "test-index.html") -;; -(define (tests:create-html-tree outf) - (let* ((lockfile (conc outf ".lock")) - (runs-to-process '()) - (linktree (common:get-linktree)) - (area-name (common:get-testsuite-name)) - (keys (rmt:get-keys)) - (numkeys (length keys)) - (run-patt (or (args:get-arg "-run-patt") - (args:get-arg "-runname") - "%")) - (target (or (args:get-arg "-target-patt") - (args:get-arg "-target") - "%")) - (targlist (string-split target "/")) - (numtarg (length targlist)) - (targtweaked (if (> numkeys numtarg) - (append targlist (make-list (- numkeys numtarg) "%")) - targlist)) - (target-patt (string-join targtweaked "/")) - ;(total-runs (rmt:get-num-runs "%")) ;;this needs to be changed to filter by target - (total-runs (rmt:get-runs-cnt-by-patt run-patt target-patt keys )) - (pg-size 10)) - (if (common:simple-file-lock lockfile) - (begin - ;(print total-runs) - (let loop ((page 0)) - (let* ((oup (open-output-file (or outf (conc linktree "/page" page ".html")))) - (get-prev-links (lambda (page linktree ) - (let* ((link (if (not (eq? page 0)) - (s:a "<<prev" 'href (conc "page" (- page 1) ".html")) - (s:a "" 'href (conc "page" page ".html"))))) - link))) - (get-next-links (lambda (page linktree total-runs) - (let* ((link (if (> total-runs (+ 10 (* page pg-size))) - (s:a "next>>" 'href (conc "page" (+ page 1) ".html")) - (s:a "" 'href (conc "page" page ".html"))))) - link))) ) - (print "total runs: " total-runs) - (s:output-new - oup - (tests:dashboard-body page pg-size keys numkeys total-runs linktree area-name get-prev-links get-next-links #f run-patt target-patt)) ;; update this function - (close-output-port oup) - ; (set! page (+ 1 page)) - (if (> total-runs (* (+ 1 page) pg-size)) - (loop (+ 1 page))))) - (common:simple-file-release-lock lockfile)) - (begin - (debug:print 0 *default-log-port* "Failed to get lock on file outf, lockfile: " lockfile) #f)))) - - -(define (tests:readlines filename) - (call-with-input-file filename - (lambda (p) - (let loop ((line (read-line p)) - (result '())) - (if (eof-object? line) - (reverse result) - (loop (read-line p) (cons line result))))))) - -(define (tests:get-test-log run-id test-name item-name) - (let* ((test-data (rmt:get-tests-for-run - (string->number run-id) - test-name ;; testnamepatt - '() ;; states - '() ;; statuses - #f ;; offset - #f ;; num-to-get - #f ;; hide/not-hide - #f ;; sort-by - #f ;; sort-order - #f ;; 'shortlist ;; qrytype - 0 ;; last update - #f)) - (path "") - (found 0)) - (debug:print-info 0 *default-log-port* "found: " found ) - - (let loop ((hed (car test-data)) - (tal (cdr test-data))) - (debug:print-info 0 *default-log-port* "item: " (vector-ref hed 11) (vector-ref hed 10) "/" (vector-ref hed 13)) - - (if (equal? (vector-ref hed 11) item-name) - (begin - (set! found 1) - (set! path (conc (vector-ref hed 10) "/" (vector-ref hed 13))))) - (if (and (not (null? tal)) (equal? found 0)) - (loop (car tal)(cdr tal)))) - (if (equal? path "") - "

Data not found

" - (string-join (tests:readlines path) "\n")))) - - -(define (tests:dynamic-dboard page) -;(define (tests:create-html-tree o) - (let* ( -;(page "1") - (linktree (common:get-linktree)) - (area-name (common:get-testsuite-name)) - (keys (rmt:get-keys)) - (numkeys (length keys)) - (targtweaked (make-list numkeys "%")) - (target-patt (string-join targtweaked "/")) - (total-runs (rmt:get-num-runs "%")) - (pg-size 10) - (pg (if (equal? page #f) - 0 - (- (string->number page) 1))) - (get-prev-links (lambda (pg linktree) - (debug:print-info 0 *default-log-port* "val: " (- 1 pg)) - (let* ((link (if (not (eq? pg 0)) - (s:a "<<prev " 'href (conc "dashboard?page=" pg )) - (s:a "" 'href (conc "dashboard?page=" pg))))) - link))) - (get-next-links (lambda (pg linktree total-runs) - (debug:print-info 0 *default-log-port* "val: " pg) - (debug:print-info 0 *default-log-port* "val: " total-runs " size" pg-size) - - (let* ((link (if (> total-runs (+ 10 (* pg pg-size))) - (s:a "next>> " 'href (conc "dashboard?page=" (+ pg 2) )) - (s:a "" 'href (conc "dashboard?page=" pg ))))) - link))) - (html-body (tests:dashboard-body pg pg-size keys numkeys total-runs linktree area-name get-prev-links get-next-links #t "%" target-patt))) ;; update tis function - html-body)) - -(define (tests:create-html-summary outf) - (let* ((lockfile (conc outf ".lock")) - (linktree (common:get-linktree)) - (keys (rmt:get-keys)) - (area-name (common:get-testsuite-name)) - (run-patt (or (args:get-arg "-run-patt") - (args:get-arg "-runname") - "%")) - (target (or (args:get-arg "-target-patt") - (args:get-arg "-target") - "%")) - (targlist (string-split target "/")) - (numkeys (length keys)) - (numtarg (length targlist)) - (targtweaked (if (> numkeys numtarg) - (append targlist (make-list (- numkeys numtarg) "%")) - targlist)) - (target-patt (string-join targtweaked "/"))) - (if (common:simple-file-lock lockfile) - (begin - (let* (;(runsdat1 (rmt:get-runs run-patt #f #f (map (lambda (x)(list x "%")) keys))) - (runsdat (rmt:get-runs-by-patt keys run-patt target-patt #f #f #f 0)) - (runs (vector-ref runsdat 1)) - (header (vector-ref runsdat 0)) - (oup (open-output-file (or outf (conc linktree "/targets.html")))) - (target-hash (test:create-target-hash runs header (length keys)))) - (test:create-target-html target-hash oup area-name linktree) - (test:create-run-html runs area-name linktree (length keys) header)) - (common:simple-file-release-lock lockfile)) - #f))) - -(define (test:get-test-hash test-data) - (let ((resh (make-hash-table))) - (map (lambda (test) - (let* ((test-name (vector-ref test 2)) - (test-html-path (if (file-exists? (conc (vector-ref test 10) "/test-summary.html")) - (conc (vector-ref test 10) "/test-summary.html" ) - (conc (vector-ref test 10) "/" (vector-ref test 13)))) - (test-item (vector-ref test 11)) - (test-status (vector-ref test 4))) - (if (not (hash-table-ref/default resh test-item #f)) - (hash-table-set! resh test-item (make-hash-table))) - (hash-table-set! (hash-table-ref/default resh test-item #f) test-name (list test-status test-html-path)))) - test-data) -resh)) - -(define (test:get-data->b-keys ordered-data a-keys) - (delete-duplicates - (sort (apply - append - (map (lambda (sub-key) - (let ((subdat (hash-table-ref ordered-data sub-key))) - (hash-table-keys subdat))) - a-keys)) - string>=?))) - - -(define (test:create-run-html runs area-name linktree numkeys header) - (map (lambda (run) - (let* ((target (string-join (take (vector->list run) numkeys) "/")) - (run-name (db:get-value-by-header run header "runname")) - (run-time (seconds->work-week/day-time (db:get-value-by-header run header "event_time"))) - (oup (if (file-exists? (conc linktree "/" target "/" run-name)) - (open-output-file (conc linktree "/" target "/" run-name "/run.html")) - #f)) - (run-id (db:get-value-by-header run header "id")) - (test-data (rmt:get-tests-for-run - run-id - "%" ;; testnamepatt - '() ;; states - '() ;; statuses - #f ;; offset - #f ;; num-to-get - #f ;; hide/not-hide - #f ;; sort-by - #f ;; sort-order - #f ;; 'shortlist ;; qrytype - 0 ;; last update - #f)) - (item-test-hash (test:get-test-hash test-data)) - (items (hash-table-keys item-test-hash)) - (test-names (test:get-data->b-keys item-test-hash items))) - (if oup - (begin - (s:output-new - oup - (s:html tests:css-jscript-block (tests:css-jscript-block-cond #f) - (s:title "Runs View " run-name) - (s:body - (s:h1 "Runs View " ) - (s:h3 "Target" target) - (s:p - (s:b "Run name" ) run-name) - (s:p - (s:b "Run Date" ) run-time) - (s:table 'border 1 'cellspacing 0 - (s:tr - (s:th "Items") - (map (lambda (test) - (s:th test)) - test-names)) - (map (lambda (item) - (let* ((test-hash (hash-table-ref/default item-test-hash item #f))) - (if test-hash - (begin - (s:tr - (s:td 'class "test" item) - (map (lambda (test) - (let* ((test-details (hash-table-ref/default test-hash test #f)) - (status (if test-details - (car test-details))) - (link (if test-details - (string-substitute (conc linktree "/" target "/" run-name "/") "" (cadr test-details) "-")))) - (if test-details - (s:td 'class status - (s:a 'class "link" 'href link status )) - (s:td "")))) - test-names)))))) - (sort items string<=?)))))) - (close-output-port oup)) - (debug:print-info 0 "Skip: Dirctory structure " linktree "/" target "/" run-name " does not exist. Megatest will not create run.html")))) -runs)) - -(define (test:create-target-hash runs header numkeys) - (let ((resh (make-hash-table))) - (for-each - (lambda (run) - (let* ((run-name (db:get-value-by-header run header "runname")) - (target (string-join (take (vector->list run) numkeys) "/")) - (run-list (hash-table-ref/default resh target #f))) - - (if (not run-list) - (hash-table-set! resh target (list run-name)) - (hash-table-set! resh target (cons run-name run-list))))) - runs) - resh)) - -(define (test:get-max-run-cnt target-hash targets) - (let* ((cnt 0 )) - (map (lambda (target) - (let* ((runs (hash-table-ref/default target-hash target #f)) - (run-length (if runs - (length runs) - 0))) - - (if (< cnt run-length) - (set! cnt run-length)))) - targets) -cnt)) - -(define (test:pad-runs target-hash targets max-row-length) - (map (lambda (target) - (let loop ((run-list (hash-table-ref/default target-hash target #f))) - (if (< (length run-list) max-row-length) - (begin - (hash-table-set! target-hash target (cons "" run-list)) - (loop (hash-table-ref/default target-hash target #f) ))))) - targets) - target-hash) - -(define (test:create-target-html target-hash oup area-name linktree) - (let* ((targets (hash-table-keys target-hash)) - (max-row-length (test:get-max-run-cnt target-hash targets)) - (pad-runs-hash (test:pad-runs target-hash targets max-row-length))) - (s:output-new - oup - (s:html tests:css-jscript-block (tests:css-jscript-block-cond #f) - - (s:title "Target View " area-name) - (s:body - (s:h1 "Target View " area-name) - (s:table 'id "LinkedList1" 'border "1" 'cellspacing 0 - (s:tr 'class "something" - (s:th "Target") - (s:th 'colspan max-row-length "Runs")) - (let* ((tbl (map (lambda (target) - (s:tr - (s:td 'class "test" target) - (let* ((runs (hash-table-ref/default target-hash target #f)) - (rest-row (map (lambda (run) - (if (equal? run "") - (s:td run) - (if (file-exists?(conc linktree "/" target "/" run )) - (begin - (s:td - (s:a 'href (conc target "/" run "/run.html") run)))))) - (reverse runs)))) - rest-row))) - targets))) - tbl))))) - (close-output-port oup))) - - -(define (tests:create-html-tree-old outf) - (let* ((lockfile (conc outf ".lock")) - (runs-to-process '())) - (if (common:simple-file-lock lockfile) - (let* ((linktree (common:get-linktree)) - (oup (open-output-file (or outf (conc linktree "/runs-index.html")))) - (area-name (common:get-testsuite-name)) - (keys (rmt:get-keys)) - (numkeys (length keys)) - (runsdat (rmt:get-runs "%" #f #f (map (lambda (x)(list x "%")) keys))) - (header (vector-ref runsdat 0)) - (runs (vector-ref runsdat 1)) - (runtreedat (map (lambda (x) - (tests:run-record->test-path x numkeys)) - runs)) - (runs-htree (common:list->htree runtreedat))) - (set! runs-to-process runs) - (s:output-new - oup - (s:html tests:css-jscript-block - (s:title "Summary for " area-name) - (s:body 'onload "addEvents();" - (s:h1 "Summary for " area-name) - ;; top list - (s:ul 'id "LinkedList1" 'class "LinkedList" - (s:li - "Runs" - (common:htree->html runs-htree - '() - (lambda (x p) - (let* ((targ-path (string-intersperse p "/")) - (full-path (conc linktree "/" targ-path)) - (run-name (car (reverse p)))) - (if (and (common:file-exists? full-path) - (directory? full-path) - (file-write-access? full-path)) - (s:a run-name 'href (conc targ-path "/run-summary.html")) - (begin - (debug:print 0 *default-log-port* "INFO: Can't create " targ-path "/run-summary.html") - (conc run-name " (Not able to create summary at " targ-path ")"))))))))))) - (close-output-port oup) - (common:simple-file-release-lock lockfile) - - (for-each - (lambda (run) - (let* ((test-subpath (tests:run-record->test-path run numkeys)) - (run-id (db:get-value-by-header run header "id")) - (run-dir (tests:run-record->test-path run numkeys)) - (test-dats (rmt:get-tests-for-run - run-id - "%/" ;; testnamepatt - '() ;; states - '() ;; statuses - #f ;; offset - #f ;; num-to-get - #f ;; hide/not-hide - #f ;; sort-by - #f ;; sort-order - #f ;; 'shortlist ;; qrytype - 0 ;; last update - #f)) - (tests-tree-dat (map (lambda (test-dat) - ;; (tests:run-record->test-path x numkeys)) - (let* ((test-name (db:test-get-testname test-dat)) - (item-path (db:test-get-item-path test-dat)) - (full-name (db:test-make-full-name test-name item-path)) - (path-parts (string-split full-name))) - path-parts)) - test-dats)) - (tests-htree (common:list->htree tests-tree-dat)) - (html-dir (conc linktree "/" (string-intersperse run-dir "/"))) - (html-path (conc html-dir "/run-summary.html")) - (oup (if (and (common:file-exists? html-dir) - (directory? html-dir) - (file-write-access? html-dir)) - (open-output-file html-path) - #f))) - ;; (print "run-dir: " run-dir ", tests-tree-dat: " tests-tree-dat) - (if oup - (begin - (s:output-new - oup - (s:html tests:css-jscript-block - (s:title "Summary for " area-name) - (s:body 'onload "addEvents();" - (s:h1 "Summary for " (string-intersperse run-dir "/")) - ;; top list - (s:ul 'id "LinkedList1" 'class "LinkedList" - (s:li - "Tests" - (common:htree->html tests-htree - '() - (lambda (x p) - (let* ((targ-path (string-intersperse p "/")) - (test-name (car p)) - (item-path ;; (if (> (length p) 2) ;; test-name + run-name - (string-intersperse p "/")) - (full-targ (conc html-dir "/" targ-path)) - (std-file (conc full-targ "/test-summary.html")) - (alt-file (conc full-targ "/megatest-rollup-" test-name ".html")) - (html-file (if (common:file-exists? alt-file) - alt-file - std-file)) - (run-name (car (reverse p)))) - (if (and (not (common:file-exists? full-targ)) - (directory? full-targ) - (file-write-access? full-targ)) - (tests:summarize-test - run-id - (rmt:get-test-id run-id test-name item-path))) - (if (common:file-exists? full-targ) - (s:a run-name 'href html-file) - (begin - (debug:print 0 *default-log-port* "ERROR: can't access " full-targ) - (conc "No summary for " run-name))))) - )))))) - (close-output-port oup))))) - runs) - #t) - #f))) - - - - - - - -;; CHECK - WAS THIS ADDED OR REMOVED? MANUAL MERGE WITH API STUFF!!! -;; -;; get a pretty table to summarize steps -;; -;; (define (dcommon:process-steps-table steps);; db test-id #!key (work-area #f)) -(define (tests:process-steps-table steps);; db test-id #!key (work-area #f)) -;; (let ((steps (db:get-steps-for-test db test-id work-area: work-area))) - ;; organise the steps for better readability - (let ((res (make-hash-table))) - (for-each - (lambda (step) - (debug:print 6 *default-log-port* "step=" step) - (let ((record (hash-table-ref/default - res - (tdb:step-get-stepname step) - ;; 0 1 2 3 4 5 6 7 - ;; stepname start end status Duration Logfile Comment first-id - (vector (tdb:step-get-stepname step) "" "" "" "" "" "" #f)))) - (debug:print 6 *default-log-port* "record(before) = " record - "\nid: " (tdb:step-get-id step) - "\nstepname: " (tdb:step-get-stepname step) - "\nstate: " (tdb:step-get-state step) - "\nstatus: " (tdb:step-get-status step) - "\ntime: " (tdb:step-get-event_time step)) - (if (not (vector-ref record 7))(vector-set! record 7 (tdb:step-get-id step))) ;; do not clobber the id if previously set - (case (string->symbol (tdb:step-get-state step)) - ((start)(vector-set! record 1 (tdb:step-get-event_time step)) - (vector-set! record 3 (if (equal? (vector-ref record 3) "") - (tdb:step-get-status step))) - (if (> (string-length (tdb:step-get-logfile step)) - 0) - (vector-set! record 5 (tdb:step-get-logfile step)))) - ((end) - (vector-set! record 2 (any->number (tdb:step-get-event_time step))) - (vector-set! record 3 (tdb:step-get-status step)) - (vector-set! record 4 (let ((startt (any->number (vector-ref record 1))) - (endt (any->number (vector-ref record 2)))) - (debug:print 4 *default-log-port* "record[1]=" (vector-ref record 1) - ", startt=" startt ", endt=" endt - ", get-status: " (tdb:step-get-status step)) - (if (and (number? startt)(number? endt)) - (seconds->hr-min-sec (- endt startt)) "-1"))) - (if (> (string-length (tdb:step-get-logfile step)) - 0) - (vector-set! record 5 (tdb:step-get-logfile step))) - (if (> (string-length (tdb:step-get-comment step)) - 0) - (vector-set! record 6 (tdb:step-get-comment step)))) - (else - (vector-set! record 2 (tdb:step-get-state step)) - (vector-set! record 3 (tdb:step-get-status step)) - (vector-set! record 4 (tdb:step-get-event_time step)) - (vector-set! record 6 (tdb:step-get-comment step)))) - (hash-table-set! res (tdb:step-get-stepname step) record) - (debug:print 6 *default-log-port* "record(after) = " record - "\nid: " (tdb:step-get-id step) - "\nstepname: " (tdb:step-get-stepname step) - "\nstate: " (tdb:step-get-state step) - "\nstatus: " (tdb:step-get-status step) - "\ntime: " (tdb:step-get-event_time step)))) - ;; (else (vector-set! record 1 (tdb:step-get-event_time step))) - (sort steps (lambda (a b) - (cond - ((< (tdb:step-get-event_time a)(tdb:step-get-event_time b)) #t) - ((eq? (tdb:step-get-event_time a)(tdb:step-get-event_time b)) - (< (tdb:step-get-id a) (tdb:step-get-id b))) - (else #f))))) - res)) - -;; -;; -(define (tests:get-compressed-steps run-id test-id) - (let* ((steps-data (rmt:get-steps-for-test run-id test-id)) ;; 0 1 2 3 4 5 6 7 - (comprsteps (tests:process-steps-table steps-data))) ;; # - (map (lambda (x) - ;; take advantage of the \n on time->string - (vector ;; we are constructing basically the original vector but collapsing start end records - (vector-ref x 0) ;; id 0 - (let ((s (vector-ref x 1))) - (if (number? s)(seconds->time-string s) s)) ;; starttime 1 - (let ((s (vector-ref x 2))) - (if (number? s)(seconds->time-string s) s)) ;; endtime 2 - (vector-ref x 3) ;; status 3 - (vector-ref x 4) ;; duration 4 - (vector-ref x 5) ;; logfile 5 - (vector-ref x 6) ;; comment 6 - (vector-ref x 7))) ;; id 7 - (sort (hash-table-values comprsteps) - (lambda (a b) - (let ((time-a (vector-ref a 1)) - (time-b (vector-ref b 1)) - (id-a (vector-ref a 7)) - (id-b (vector-ref b 7))) - (if (and (number? time-a)(number? time-b)) - (if (< time-a time-b) - #t - (if (eq? time-a time-b) - (< id-a id-b) - ;; (stringwork-week/day-time - (db:test-get-event_time test-dat))) - (s:td "Duration") (s:td (seconds->hr-min-sec (db:test-get-run_duration test-dat))))) - (s:h3 "Log files") - (s:table - 'cellspacing "0" 'border "1" - (s:tr (s:td "Final log")(s:td (s:a 'href logf logf)))) - (s:table - 'cellspacing "0" 'border "1" - (s:tr (s:td "Step Name")(s:td "Start")(s:td "End")(s:td "Status")(s:td "Duration")(s:td "Log File")) - (map (lambda (step-dat) - (s:tr (s:td (tdb:steps-table-get-stepname step-dat)) - (s:td (tdb:steps-table-get-start step-dat)) - (s:td (tdb:steps-table-get-end step-dat)) - (s:td (tdb:steps-table-get-status step-dat)) - (s:td (tdb:steps-table-get-runtime step-dat)) - (s:td (let ((step-log (tdb:steps-table-get-log-file step-dat))) - (s:a 'href step-log step-log))))) - steps-dat)) - ))) - (close-output-port oup))))) - - -;; MUST BE CALLED local! -;; -(define (tests:test-get-paths-matching keynames target fnamepatt #!key (res '())) - ;; BUG: Move the values derived from args to parameters and push to megatest.scm - (let* ((testpatt (or (args:get-arg "-testpatt")(args:get-arg "-testpatt") "%")) - (statepatt (or (args:get-arg "-state") (args:get-arg ":state") "%")) - (statuspatt (or (args:get-arg "-status") (args:get-arg ":status") "%")) - (runname (or (args:get-arg "-runname") (args:get-arg ":runname") "%")) - (paths-from-db (rmt:test-get-paths-matching-keynames-target-new keynames target res - testpatt - statepatt - statuspatt - runname))) - (if fnamepatt - (apply append - (map (lambda (p) - (if (directory-exists? p) - (let ((glob-query (conc p "/" fnamepatt))) - (handle-exceptions - exn - (begin - (print "built-in glob on " glob-query ", failed, try using the shell. exn=" exn) - (with-input-from-pipe - (conc "echo " glob-query) - read-lines)) ;; we aren't going to try too hard. If glob breaks it is likely because someone tried to do */*/*.log or similar - (glob glob-query))) - '())) - paths-from-db)) - paths-from-db))) - - -;;====================================================================== -;; Gather data from test/task specifications -;;====================================================================== - -;; (define (tests:get-valid-tests testsdir test-patts) ;; #!key (test-names '())) -;; (let ((tests (glob (conc testsdir "/tests/*")))) ;; " (string-translate patt "%" "*"))))) -;; (set! tests (filter (lambda (test)(common:file-exists? (conc test "/testconfig"))) tests)) -;; (delete-duplicates -;; (filter (lambda (testname) -;; (tests:match test-patts testname #f)) -;; (map (lambda (testp) -;; (last (string-split testp "/"))) -;; tests))))) - -(define (tests:get-test-path-from-environment) - (if (and (getenv "MT_LINKTREE") - (getenv "MT_TARGET") - (getenv "MT_RUNNAME") - (getenv "MT_TEST_NAME") - (getenv "MT_ITEMPATH")) - (conc (getenv "MT_LINKTREE") "/" - (getenv "MT_TARGET") "/" - (getenv "MT_RUNNAME") "/" - (getenv "MT_TEST_NAME") - (if (and (getenv "MT_ITEMPATH") - (not (string=? "" (getenv "MT_ITEMPATH")))) - (conc "/" (getenv "MT_ITEMPATH")) - "")) - #f)) - -;; if .testconfig exists in test directory read and return it -;; else if have cached copy in *testconfigs* return it IFF there is a section "have fulldata" -;; else read the testconfig file -;; if have path to test directory save the config as .testconfig and return it -;; -(define (tests:get-testconfig test-name item-path test-registry system-allowed #!key (force-create #f)(allow-write-cache #t)(wait-a-minute #f)) - (let* ((use-cache (common:use-cache?)) - (cache-path (tests:get-test-path-from-environment)) - (cache-file (and cache-path (conc cache-path "/.testconfig"))) - (cache-exists (and cache-file - (not force-create) ;; if force-create then pretend there is no cache to read - (common:file-exists? cache-file))) - (cached-dat (if (and (not force-create) - cache-exists - use-cache) - (handle-exceptions - exn - (begin - (debug:print 0 *default-log-port* "failed to read " cache-file ", exn=" exn) - #f) ;; any issues, just give up with the cached version and re-read - (configf:read-alist cache-file)) - #f)) - (test-full-name (if (and item-path (not (string-null? item-path))) - (conc test-name "/" item-path) - test-name))) - (if cached-dat - cached-dat - (let ((dat (hash-table-ref/default *testconfigs* test-full-name #f))) - (if (and dat ;; have a locally cached version - (hash-table-ref/default dat "have fulldata" #f)) ;; marked as good data? - dat - ;; no cached data available - (let* ((treg (or test-registry - (tests:get-all))) - (test-path (or (hash-table-ref/default treg test-name #f) - (let* ((local-tcdir (conc (getenv "MT_LINKTREE") "/" - (getenv "MT_TARGET") "/" - (getenv "MT_RUNNAME") "/" - test-name "/" item-path)) - (local-tcfg (conc local-tcdir "/testconfig"))) - (if (common:file-exists? local-tcfg) - local-tcdir - #f)) - (conc *toppath* "/tests/" test-name))) - (test-configf (conc test-path "/testconfig")) - (testexists (let loopa ((tries-left 30)) - (cond - ( - (and (common:file-exists? test-configf)(file-read-access? test-configf)) - #t) - ( - (common:file-exists? test-configf) - (debug:print 0 *default-log-port* "WARNING: Cannot read testconfig file: "test-configf) - #f) - ( - (and wait-a-minute (> tries-left 0)) - (thread-sleep! 10) - (debug:print 0 *default-log-port* "WARNING: testconfig file does not exist: "test-configf" will retry in 10 seconds. Tries left: "tries-left) ;; BB: this fires - (loopa (sub1 tries-left))) - (else - (debug:print 2 *default-log-port* "WARNING: testconfig file does not exist: "test-configf) ;; BB: this fires - #f)))) - (tcfg (if testexists - (read-config test-configf #f system-allowed - environ-patt: (if system-allowed - "pre-launch-env-vars" - #f)) - #f))) - (if (and tcfg cache-file) (hash-table-set! tcfg "have fulldata" #t)) ;; mark this as fully read data - (if tcfg (hash-table-set! *testconfigs* test-full-name tcfg)) - (if (and testexists - cache-file - (file-write-access? cache-path) - allow-write-cache) - (let ((tpath (conc cache-path "/.testconfig"))) - (debug:print-info 1 *default-log-port* "Caching testconfig for " test-name " in " tpath) - (if (and tcfg (not (common:in-running-test?))) - (configf:write-alist tcfg tpath)))) - tcfg)))))) - -;; sort tests by priority and waiton -;; Move test specific stuff to a test unit FIXME one of these days -(define (tests:sort-by-priority-and-waiton test-records) - (if (eq? (hash-table-size test-records) 0) - '() - (let* ((mungepriority (lambda (priority) - (if priority - (let ((tmp (any->number priority))) - (if tmp tmp (begin (debug:print-error 0 *default-log-port* "bad priority value " priority ", using 0") 0))) - 0))) - (all-tests (hash-table-keys test-records)) - (all-waited-on (let loop ((hed (car all-tests)) - (tal (cdr all-tests)) - (res '())) - (let* ((trec (hash-table-ref test-records hed)) - (waitons (or (tests:testqueue-get-waitons trec) '()))) - (if (null? tal) - (append res waitons) - (loop (car tal)(cdr tal)(append res waitons)))))) - (sort-fn1 - (lambda (a b) - (let* ((a-record (hash-table-ref test-records a)) - (b-record (hash-table-ref test-records b)) - (a-waitons (or (tests:testqueue-get-waitons a-record) '())) - (b-waitons (or (tests:testqueue-get-waitons b-record) '())) - (a-config (tests:testqueue-get-testconfig a-record)) - (b-config (tests:testqueue-get-testconfig b-record)) - (a-raw-pri (configf:lookup a-config "requirements" "priority")) - (b-raw-pri (configf:lookup b-config "requirements" "priority")) - (a-priority (mungepriority a-raw-pri)) - (b-priority (mungepriority b-raw-pri))) - (tests:testqueue-set-priority! a-record a-priority) - (tests:testqueue-set-priority! b-record b-priority) - ;; (debug:print 0 *default-log-port* "a=" a ", b=" b ", a-waitons=" a-waitons ", b-waitons=" b-waitons) - (cond - ;; is - ((member a b-waitons) ;; is b waiting on a? - ;; (debug:print 0 *default-log-port* "case1") - #t) - ((member b a-waitons) ;; is a waiting on b? - ;; (debug:print 0 *default-log-port* "case2") - #f) - ((and (not (null? a-waitons)) ;; both have waitons - do not disturb - (not (null? b-waitons))) - ;; (debug:print 0 *default-log-port* "case2.1") - #t) - ((and (null? a-waitons) ;; no waitons for a but b has waitons - (not (null? b-waitons))) - ;; (debug:print 0 *default-log-port* "case3") - #f) - ((and (not (null? a-waitons)) ;; a has waitons but b does not - (null? b-waitons)) - ;; (debug:print 0 *default-log-port* "case4") - #t) - ((not (eq? a-priority b-priority)) ;; use - (> a-priority b-priority)) - (else - ;; (debug:print 0 *default-log-port* "case5") - (string>? a b)))))) - - (sort-fn2 - (lambda (a b) - (> (mungepriority (tests:testqueue-get-priority (hash-table-ref test-records a))) - (mungepriority (tests:testqueue-get-priority (hash-table-ref test-records b))))))) - ;; (let ((dot-res (tests:run-dot (tests:tests->dot test-records) "plain"))) - ;; (debug:print "dot-res=" dot-res)) - ;; (let ((data (map cdr (filter - ;; (lambda (x)(equal? "node" (car x))) - ;; (map string-split (tests:easy-dot test-records "plain")))))) - ;; (map car (sort data (lambda (a b) - ;; (> (string->number (caddr a))(string->number (caddr b))))))) - ;; )) - (sort all-tests sort-fn1)))) ;; avoid dealing with deleted tests, look at the hash table - -(define (tests:easy-dot test-records outtype) - (let-values (((fd temp-path) (file-mkstemp (conc "/tmp/" (current-user-name) ".XXXXXX")))) - (let ((all-testnames (hash-table-keys test-records)) - (temp-port (open-output-file* fd))) - ;; (format temp-port "This file is ~A.~%" temp-path) - (format temp-port "digraph tests {\n") - (format temp-port " size=4,8\n") - ;; (format temp-port " splines=none\n") - (for-each - (lambda (testname) - (let* ((testrec (hash-table-ref test-records testname)) - (waitons (or (tests:testqueue-get-waitons testrec) '()))) - (for-each - (lambda (waiton) - (format temp-port (conc " " waiton " -> " testname " [splines=ortho]\n"))) - waitons))) - all-testnames) - (format temp-port "}\n") - (close-output-port temp-port) - (with-input-from-pipe - (conc "env -i PATH=$PATH dot -T" outtype " < " temp-path) - (lambda () - (let ((res (read-lines))) - ;; (delete-file temp-path) - res)))))) - -(define (tests:write-dot-file test-records fname sizex sizey) - (if (file-write-access? (pathname-directory fname)) - (with-output-to-file fname - (lambda () - (map print (tests:tests->dot test-records sizex sizey)))))) - -(define (tests:tests->dot test-records sizex sizey) - (let ((all-testnames (hash-table-keys test-records))) - (if (null? all-testnames) - '() - (let loop ((hed (car all-testnames)) - (tal (cdr all-testnames)) - (res (list "digraph tests {" - (conc " size=\"" (or sizex 11) "," (or sizey 11) "\";") - " ratio=0.95;" - ))) - (let* ((testrec (hash-table-ref test-records hed)) - (waitons (or (tests:testqueue-get-waitons testrec) '())) - (newres (append res - (if (null? waitons) - (list (conc " \"" hed "\" [shape=box];")) - (map (lambda (waiton) - (conc " \"" waiton "\" -> \"" hed "\" [shape=box];")) - waitons) - )))) - (if (null? tal) - (append newres (list "}")) - (loop (car tal)(cdr tal) newres) - )))))) - -;; (tests:run-dot (list "digraph tests {" "a -> b" "}") "plain") - -(define (tests:run-dot indat outtype) ;; outtype is plain, fig, dot, etc. http://www.graphviz.org/content/output-formats - (let-values (((inp oup pid)(process "env -i PATH=\"$PATH\" dot" (list "-T" outtype)))) - (with-output-to-port oup - (lambda () - (map print indat))) - (close-output-port oup) - (let ((res (with-input-from-port inp - (lambda () - (read-lines))))) - (close-input-port inp) - res))) - -;; read data from tmp file or create if not exists -;; if exists regen in background -;; -(define (tests:lazy-dot testrecords outtype sizex sizey) - (let ((dfile (conc "/tmp/." (current-user-name) "-" (server:mk-signature) ".dot")) - (fname (conc "/tmp/." (current-user-name) "-" (server:mk-signature) ".dotdat"))) - (tests:write-dot-file testrecords dfile sizex sizey) - (if (common:file-exists? fname) - (let ((res (with-input-from-file fname - (lambda () - (read-lines))))) - (system (conc "env -i PATH=\"$PATH\" dot -T " outtype " < " dfile " > " fname "&")) - res) - (begin - (system (conc "env -i PATH=\"$PATH\" dot -T " outtype " < " dfile " > " fname)) - (with-input-from-file fname - (lambda () - (read-lines))))))) - - -;; for each test: -;; -(define (tests:filter-non-runnable run-id testkeynames testrecordshash) - (let ((runnables '())) - (for-each - (lambda (testkeyname) - (let* ((test-record (hash-table-ref testrecordshash testkeyname)) - (test-name (tests:testqueue-get-testname test-record)) - (itemdat (tests:testqueue-get-itemdat test-record)) - (item-path (tests:testqueue-get-item_path test-record)) - (waitons (tests:testqueue-get-waitons test-record)) - (keep-test #t) - (test-id (rmt:get-test-id run-id test-name item-path)) - (tdat (rmt:get-testinfo-state-status run-id test-id))) ;; (cdb:get-test-info-by-id *runremote* test-id))) - (if tdat - (begin - ;; Look at the test state and status - (if (or (and (member (db:test-get-status tdat) - '("PASS" "WARN" "WAIVED" "CHECK" "SKIP")) - (equal? (db:test-get-state tdat) "COMPLETED")) - (member (db:test-get-state tdat) - '("INCOMPLETE" "KILLED"))) - (set! keep-test #f)) - - ;; examine waitons for any fails. If it is FAIL or INCOMPLETE then eliminate this test - ;; from the runnable list - (if keep-test - (for-each (lambda (waiton) - ;; for now we are waiting only on the parent test - (let* ((parent-test-id (rmt:get-test-id run-id waiton "")) - (wtdat (rmt:get-testinfo-state-status run-id test-id))) ;; (cdb:get-test-info-by-id *runremote* test-id))) - (if (or (and (equal? (db:test-get-state wtdat) "COMPLETED") - (member (db:test-get-status wtdat) '("FAIL" "ABORT"))) - (member (db:test-get-status wtdat) '("KILLED")) - (member (db:test-get-state wtdat) '("INCOMPETE"))) - ;; (if (or (member (db:test-get-status wtdat) - ;; '("FAIL" "KILLED")) - ;; (member (db:test-get-state wtdat) - ;; '("INCOMPETE"))) - (set! keep-test #f)))) ;; no point in running this one again - waitons)))) - (if keep-test (set! runnables (cons testkeyname runnables))))) - testkeynames) - runnables)) - -;;====================================================================== -;; refactoring this block into tests:get-full-data from line 263 of runs.scm -;;====================================================================== -;; hed is the test name -;; test-records is a hash of test-name => test record -(define (tests:get-full-data test-names test-records required-tests all-tests-registry) - (let ((missing-waitons (make-hash-table))) - (if (not (null? test-names)) - (let loop ((hed (car test-names)) - (tal (cdr test-names))) ;; 'return-procs tells the config reader to prep running system but return a proc - (debug:print-info 4 *default-log-port* "hed=" hed " at top of loop") - ;; don't know item-path at this time, let the testconfig get the top level testconfig - (let* ((config (tests:get-testconfig hed #f all-tests-registry 'return-procs)) - (waitons (let ((instr (if config - (configf:lookup config "requirements" "waiton") - (begin ;; No config means this is a non-existent test - (let ((waiters '())) - ;; find the waiter(s) for this waiton. - (for-each - (lambda(waiter) - ;; (print "test-record = " (hash-table-ref test-records waiter)) - ;; (print "waitons = " (vector-ref (hash-table-ref test-records waiter) 2)) - (if (member hed (vector-ref (hash-table-ref test-records waiter) 2)) - (set! waiters (cons waiter waiters)) - ) - ) - (hash-table-keys test-records)) - (hash-table-set! missing-waitons hed waiters) - ) - "")))) - (debug:print-info 8 *default-log-port* "waitons string is " instr) - (string-split (cond - ((procedure? instr) - (let ((res (instr))) - (debug:print-info 8 *default-log-port* "waiton procedure results in string " res " for test " hed) - res)) - ((string? instr) instr) - (else - ;; NOTE: This is actually the case of *no* waitons! ;; - "")))))) - (if (not config) ;; this is a non-existant test called in a waiton. - (if (null? tal) - test-records - (loop (car tal)(cdr tal))) - (begin - (debug:print-info 8 *default-log-port* "waitons: " waitons) - ;; check for hed in waitons => this would be circular, remove it and issue an - ;; error - (if (member hed waitons) - (begin - (debug:print-error 0 *default-log-port* "test " hed " has listed itself as a waiton, please correct this!") - (set! waitons (filter (lambda (x)(not (equal? x hed))) waitons)))) - - ;; (items (items:get-items-from-config config))) - (if (not (hash-table-ref/default test-records hed #f)) - (hash-table-set! test-records - hed (vector hed ;; 0 - config ;; 1 - waitons ;; 2 - (configf:lookup config "requirements" "priority") ;; priority 3 - (let ((items (hash-table-ref/default config "items" #f)) ;; items 4 - (itemstable (hash-table-ref/default config "itemstable" #f))) - ;; if either items or items table is a proc return it so test running - ;; process can know to call items:get-items-from-config - ;; if either is a list and none is a proc go ahead and call get-items - ;; otherwise return #f - this is not an iterated test - (cond - ((procedure? items) - (debug:print-info 4 *default-log-port* "items is a procedure, will calc later") - items) ;; calc later - ((procedure? itemstable) - (debug:print-info 4 *default-log-port* "itemstable is a procedure, will calc later") - itemstable) ;; calc later - ((filter (lambda (x) - (let ((val (car x))) - (if (procedure? val) val #f))) - (append (if (list? items) items '()) - (if (list? itemstable) itemstable '()))) - 'have-procedure) - ((or (list? items)(list? itemstable)) ;; calc now - (debug:print-info 4 *default-log-port* "items and itemstable are lists, calc now\n" - " items: " items " itemstable: " itemstable) - (items:get-items-from-config config)) - (else #f))) ;; not iterated - #f ;; itemsdat 5 - #f ;; spare - used for item-path - ))) - (for-each - (lambda (waiton) - (if (and waiton (not (string= "#f" waiton)) (not (member waiton test-names))) - (begin - (set! required-tests (cons waiton required-tests)) - (set! test-names (cons waiton test-names))))) ;; was an append, now a cons - waitons) - (let ((remtests (delete-duplicates (append waitons tal)))) - (if (not (null? remtests)) - (loop (car remtests)(cdr remtests)) - test-records))))))) - (for-each - (lambda (missing-waiton) - (debug:print-error 0 *default-log-port* "non-existent test \"" missing-waiton "\" is a waiton for tests " (hash-table-ref missing-waitons missing-waiton)) - ) - (hash-table-keys missing-waitons) - ) -)) - -;;====================================================================== -;; test steps -;;====================================================================== - -;; teststep-set-status! used to be here - -;; NOT NEEDED -#;(define (test-get-kill-request run-id test-id) ;; run-id test-name itemdat) - (let* ((testdat (rmt:get-test-state-status-by-id run-id test-id))) - (and testdat - (equal? (car testdat) "KILLREQ")))) - -(define (test:tdb-get-rundat-count tdb) - (if tdb - (let ((res 0)) - (sqlite3:for-each-row - (lambda (count) - (set! res count)) - tdb - "SELECT count(id) FROM test_rundat;") - res)) - 0) - -(define (tests:update-central-meta-info run-id test-id cpuload diskfree minutes uname hostname) - (rmt:general-call 'update-test-rundat run-id test-id (current-seconds) (or cpuload -1)(or diskfree -1) -1 (or minutes -1)) - (if (and cpuload diskfree) - (rmt:general-call 'update-cpuload-diskfree run-id cpuload diskfree test-id)) - (if minutes - (rmt:general-call 'update-run-duration run-id minutes test-id)) - (if (and uname hostname) - (rmt:general-call 'update-uname-host run-id uname hostname test-id))) - -;; This one is for running with no db access (i.e. via rmt: internally) -(define (tests:set-full-meta-info db test-id run-id minutes work-area remtries) -;; (define (tests:set-full-meta-info test-id run-id minutes work-area) -;; (let ((remtries 10)) - (let* ((cpuload (commonmod:get-cpu-load)) - (diskfree (get-df (current-directory))) - (uname (get-uname "-srvpio")) - (hostname (get-host-name))) - (tests:update-central-meta-info run-id test-id cpuload diskfree minutes uname hostname))) - -;; (define (tests:set-partial-meta-info test-id run-id minutes work-area) -#;(define (tests:set-partial-meta-info test-id run-id minutes work-area remtries) - (let* ((cpuload (get-cpu-load)) - (diskfree (get-df (current-directory))) - (remtries 10)) - (handle-exceptions - exn - (if (> remtries 0) - (begin - (print-call-chain (current-error-port)) - (debug:print-info 0 *default-log-port* "WARNING: failed to set meta info. Will try " remtries " more times") - (set! remtries (- remtries 1)) - (thread-sleep! 10) - (tests:set-full-meta-info db test-id run-id minutes work-area (- remtries 1))) - (let ((err-status ((condition-property-accessor 'sqlite3 'status #f) exn))) - (debug:print-error 0 *default-log-port* "tried for over a minute to update meta info and failed. Giving up") - (debug:print 0 *default-log-port* "EXCEPTION: database probably overloaded or unreadable.") - (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) - (debug:print 5 *default-log-port* "exn=" (condition->list exn)) - (debug:print 0 *default-log-port* " status: " ((condition-property-accessor 'sqlite3 'status) exn)) - (print-call-chain (current-error-port)))) - (tests:update-testdat-meta-info db test-id work-area cpuload diskfree minutes) - ))) - -;;====================================================================== -;; A R C H I V I N G -;;====================================================================== - -(define (test:archive db test-id) - #f) - -(define (test:archive-tests db keynames target) - #f) + rmtmod + megatestmod + tasksmod + ) +(require-library stml) ADDED testsmod.scm Index: testsmod.scm ================================================================== --- /dev/null +++ testsmod.scm @@ -0,0 +1,1441 @@ +;;====================================================================== +;; Copyright 2017, Matthew Welland. +;; +;; This file is part of Megatest. +;; +;; Megatest is free software: you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. +;; +;; Megatest is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with Megatest. If not, see . + +;;====================================================================== + +;;====================================================================== +;; Cpumod: +;; +;; Put things here don't fit anywhere else +;;====================================================================== + +(declare (unit testsmod)) +(declare (uses debugprint)) +(declare (uses mtargs)) +(declare (uses commonmod)) +(declare (uses configfmod)) +(declare (uses dbmod)) +(declare (uses dbfile)) +(declare (uses megatestmod)) +(declare (uses rmtmod)) +(declare (uses stml2)) +(declare (uses mtmod)) +(declare (uses servermod)) + +(use srfi-69) + +(module testsmod + * + +(import scheme) +(cond-expand + (chicken-4 + + (import chicken + ports + (prefix base64 base64:) + + (prefix sqlite3 sqlite3:) + data-structures + extras + files + matchable + md5 + message-digest + pathname-expand + posix + posix-extras + regex + regex-case + sparse-vectors + srfi-1 + srfi-13 + srfi-18 + srfi-69 + typed-records + z3 + + debugprint + (prefix mtargs args:) + ) + (use srfi-69)) + (chicken-5 + (import (prefix sqlite3 sqlite3:) + ;; data-structures + ;; extras + ;; files + ;; posix + ;; posix-extras + chicken.base + chicken.condition + chicken.file + chicken.file.posix + chicken.io + chicken.pathname + chicken.port + chicken.process + chicken.process-context + chicken.process-context.posix + chicken.sort + chicken.string + chicken.time + chicken.time.posix + + matchable + md5 + message-digest + pathname-expand + regex + regex-case + srfi-1 + srfi-18 + srfi-69 + typed-records + system-information + + debugprint + ))) + +(import directory-utils + + debugprint + commonmod + configfmod + dbmod + dbfile + megatestmod + rmtmod + stml2 + mtmod + servermod + ) + +(include "common_records.scm") +(include "key_records.scm") +(include "db_records.scm") +(include "run_records.scm") +(include "test_records.scm") +(include "js-path.scm") + +(define (init-java-script-lib) + (set! *java-script-lib* (conc (common:get-install-area) "/share/js/jquery-3.1.0.slim.min.js")) + ) +(define (tests:summarize-items run-id test-id test-name force) + ;; if not force then only update the record if one of these is true: + ;; 1. logf is "log/final.log + ;; 2. logf is same as outputfilename + (let* ((outputfilename (conc "megatest-rollup-" test-name ".html")) + (orig-dir (current-directory)) + (logf-info (rmt:test-get-logfile-info run-id test-name)) + (logf (if logf-info (cadr logf-info) #f)) + (path (if logf-info (car logf-info) #f))) + ;; This query finds the path and changes the directory to it for the test + (if (and (string? path) + (directory? path)) ;; can get #f here under some wierd conditions. why, unknown ... + (begin + (debug:print 4 *default-log-port* "Found path: " path) + (change-directory path)) + ;; (set! outputfilename (conc path "/" outputfilename))) + (debug:print-error 0 *default-log-port* "summarize-items for run-id=" run-id ", test-name=" test-name ", no such path: " path)) + (debug:print 4 *default-log-port* "summarize-items with logf " logf ", outputfilename " outputfilename " and force " force) + (if (or (equal? logf "logs/final.log") + (equal? logf outputfilename) + force) + (let ((my-start-time (current-seconds)) + (lockf (conc outputfilename ".lock"))) + (let loop ((have-lock (common:simple-file-lock lockf))) + (if have-lock + (let ((script (configf:lookup *configdat* "testrollup" test-name))) + (debug:print 0 *default-log-port* "Obtained lock for " outputfilename) + (rmt:set-state-status-and-roll-up-items run-id test-name "" #f #f #f) + (if script + (system (conc script " > " outputfilename " & ")) + (tests:generate-html-summary-for-iterated-test run-id test-id test-name outputfilename)) + (common:simple-file-release-lock lockf) + (change-directory orig-dir) + ;; NB// tests:test-set-toplog! is remote internal... + (tests:test-set-toplog! run-id test-name outputfilename)) + ;; didn't get the lock, check to see if current update started later than this + ;; update, if so we can exit without doing any work + (if (> my-start-time (handle-exceptions + exn + (begin + (debug:print 0 *default-log-port* "failed to get mod time on " lockf ", exn=" exn) + 0) + (file-modification-time lockf))) + ;; we started since current re-gen in flight, delay a little and try again + (begin + (debug:print-info 1 *default-log-port* "Waiting to update " outputfilename ", another test currently updating it") + (thread-sleep! (+ 5 (random 5))) ;; delay between 5 and 10 seconds + (loop (common:simple-file-lock lockf)))))))))) + +(define (tests:generate-html-summary-for-iterated-test run-id test-id test-name outputfilename) + (let ((counts (make-hash-table)) + (statecounts (make-hash-table)) + (outtxt "") + (tot 0) + (testdat (rmt:test-get-records-for-index-file run-id test-name))) + (with-output-to-file outputfilename + (lambda () + (set! outtxt (conc outtxt "Summary: " test-name + "

Summary for " test-name "

")) + (for-each + (lambda (testrecord) + (let ((id (vector-ref testrecord 0)) + (itempath (vector-ref testrecord 1)) + (state (vector-ref testrecord 2)) + (status (vector-ref testrecord 3)) + (run_duration (vector-ref testrecord 4)) + (logf (vector-ref testrecord 5)) + (comment (vector-ref testrecord 6))) + (hash-table-set! counts status (+ 1 (hash-table-ref/default counts status 0))) + (hash-table-set! statecounts state (+ 1 (hash-table-ref/default statecounts state 0))) + (set! outtxt (conc outtxt "" + ;; " " itempath "" + " " itempath "" + "" state "" + "" status "" + "" (if (equal? comment "") + " " + comment) "" + "")))) + (if (list? testdat) + testdat + (begin + (debug:print 0 *default-log-port* "ERROR: failed to get records with rmt:test-get-records-for-index-file run-id=" run-id "test-name=" test-name) + '()))) + + (print "
") + ;; Print out stats for status + (set! tot 0) + (print "") + (for-each (lambda (state) + (set! tot (+ tot (hash-table-ref statecounts state))) + (print "")) + (hash-table-keys statecounts)) + (print "

State stats

" state "" (hash-table-ref statecounts state) "
Total" tot "
") + (print "
") + ;; Print out stats for state + (set! tot 0) + (print "") + (for-each (lambda (status) + (set! tot (+ tot (hash-table-ref counts status))) + (print "")) + (hash-table-keys counts)) + (print "

Status stats

" status + "" (hash-table-ref counts status) "
Total" tot "
") + (print "
") + + (print "" + "" + outtxt "
ItemStateStatusComment
") + ;; (release-dot-lock outputfilename) + ;;(rmt:update-run-stats + ;; run-id + ;; (hash-table-map + ;; state-status-counts + ;; (lambda (key val) + ;; (append key (list val))))) + )))) + +(define tests:css-jscript-block +#< +ul.LinkedList { display: block; } +/* ul.LinkedList ul { display: none; } */ +.HandCursorStyle { cursor: pointer; cursor: hand; } /* For IE */ +th {background-color: #8c8c8c;} +td.test {background-color: #d9dbdd;} +td.PASS {background-color: #347533;} +td.FAIL {background-color: #cc2812;} +td.SKIP{background-color: #FFD733;} +td.WARN {background-color: #EA8724;} +td.WAIVED {background-color: #838A12;} +td.ABORT{background-color: #EA24B7;} +.PASS .link, .SKIP .link, .WARN .link,.WAIVED .link,.ABORT .link, .FAIL .link{color: #FFFFFF;} + + + + + + +EOF +) + +(define tests:css-jscript-block-dynamic +#< +EOF +) + +(define (test:js-block javascript-lib) + (conc "" )) + + +(define tests:css-jscript-block-static (test:js-block *java-script-lib*)) + +(define (tests:css-jscript-block-cond dynamic) + (if (equal? dynamic #t) + tests:css-jscript-block-dynamic + tests:css-jscript-block-static)) + + +(define (tests:run-record->test-path run numkeys) + (append (take (vector->list run) numkeys) + (list (vector-ref run (+ 1 numkeys))))) + + +(define (tests:get-rest-data runs header numkeys) + (let ((resh (make-hash-table))) + (for-each + (lambda (run) + (let* ((run-id (db:get-value-by-header run header "id")) + (run-dir (tests:run-record->test-path run numkeys)) + (test-data (rmt:get-tests-for-run + run-id + "%" ;; testnamepatt + '() ;; states + '() ;; statuses + #f ;; offset + #f ;; num-to-get + #f ;; hide/not-hide + #f ;; sort-by + #f ;; sort-order + #f ;; 'shortlist ;; qrytype + 0 ;; last update + #f))) + + (map (lambda (test) + (let* ((test-name (vector-ref test 2)) + (test-html-path (conc (vector-ref test 10) "/" (vector-ref test 13))) + (test-item (conc test-name ":" (vector-ref test 11))) + (test-status (vector-ref test 4))) + + (if (not (hash-table-ref/default resh test-name #f)) + (hash-table-set! resh test-name (make-hash-table))) + (if (not (hash-table-ref/default (hash-table-ref/default resh test-name #f) test-item #f)) + (hash-table-set! (hash-table-ref/default resh test-name #f) test-item (make-hash-table))) + (hash-table-set! (hash-table-ref/default (hash-table-ref/default resh test-name #f) test-item #f) run-id (list test-status test-html-path)))) + test-data))) + runs) + resh)) + + +;; tests:genrate dashboard body +;; + +(define (tests:dashboard-body page pg-size keys numkeys total-runs linktree area-name get-prev-links get-next-links flag run-patt target-patt) + (let* ((start (* page pg-size)) + ;(runsdat (rmt:get-runs "%" pg-size start (map (lambda (x)(list x "%")) keys))) + (runsdat (rmt:get-runs-by-patt keys run-patt target-patt start pg-size #f 0 sort-order: "desc")) + ; db:get-runs-by-patt keys runnamepatt targpatt offset limit fields last-update + (header (vector-ref runsdat 0)) + (runs (vector-ref runsdat 1)) + (ctr 0) + (test-runs-hash (tests:get-rest-data runs header numkeys)) + (test-list (hash-table-keys test-runs-hash))) + + (s:html tests:css-jscript-block (tests:css-jscript-block-cond flag) + (s:title "Summary for " area-name) + (s:body 'onload "addEvents();" + (get-prev-links page linktree) + (get-next-links page linktree total-runs) + + (s:h1 "Summary for " area-name) + (s:h3 "Filter" ) + (s:input 'type "text" 'name "testname" 'id "testname" 'length "30" 'onkeyup "filtersome()") + ;; top list + + (s:table 'id "LinkedList1" 'border "1" 'cellspacing 0 + (map (lambda (key) + (let* ((res (s:tr 'class "something" + (s:th key ) + (map (lambda (run) + (s:th (vector-ref run ctr))) + runs)))) + (set! ctr (+ ctr 1)) + res)) + keys) + (s:tr + (s:th "Run Name") + (map (lambda (run) + (s:th (db:get-value-by-header run header "runname"))) + runs)) + + (map (lambda (test-name) + (let* ((item-hash (hash-table-ref/default test-runs-hash test-name #f)) + (item-keys (sort (hash-table-keys item-hash) string<=?))) + (map (lambda (item-name) + (let* ((res (s:tr 'class item-name + (s:td item-name 'class "test" ) + (map (lambda (run) + (let* ((run-test (hash-table-ref/default item-hash item-name #f)) + (run-id (db:get-value-by-header run header "id")) + (result (hash-table-ref/default run-test run-id "n/a")) + ;(relative-path (get-relative-path)) + (status (if (string? result) + result + (car result))) + (link (if (string? result) + result + (if (equal? flag #t) + (s:a (car result) 'href (conc "./test_log?runid=" run-id "&testname=" item-name )) + (s:a (car result) 'href (string-substitute (conc linktree "/") "" (cadr result) "-")))))) + (s:td link 'class status))) + runs)))) + res)) + item-keys))) + test-list)))))) + +;; (tests:create-html-tree "test-index.html") +;; +(define (tests:create-html-tree outf) + (let* ((lockfile (conc outf ".lock")) + (runs-to-process '()) + (linktree (common:get-linktree)) + (area-name (common:get-testsuite-name)) + (keys (rmt:get-keys)) + (numkeys (length keys)) + (run-patt (or (args:get-arg "-run-patt") + (args:get-arg "-runname") + "%")) + (target (or (args:get-arg "-target-patt") + (args:get-arg "-target") + "%")) + (targlist (string-split target "/")) + (numtarg (length targlist)) + (targtweaked (if (> numkeys numtarg) + (append targlist (make-list (- numkeys numtarg) "%")) + targlist)) + (target-patt (string-join targtweaked "/")) + ;(total-runs (rmt:get-num-runs "%")) ;;this needs to be changed to filter by target + (total-runs (rmt:get-runs-cnt-by-patt run-patt target-patt keys )) + (pg-size 10)) + (if (common:simple-file-lock lockfile) + (begin + ;(print total-runs) + (let loop ((page 0)) + (let* ((oup (open-output-file (or outf (conc linktree "/page" page ".html")))) + (get-prev-links (lambda (page linktree ) + (let* ((link (if (not (eq? page 0)) + (s:a "<<prev" 'href (conc "page" (- page 1) ".html")) + (s:a "" 'href (conc "page" page ".html"))))) + link))) + (get-next-links (lambda (page linktree total-runs) + (let* ((link (if (> total-runs (+ 10 (* page pg-size))) + (s:a "next>>" 'href (conc "page" (+ page 1) ".html")) + (s:a "" 'href (conc "page" page ".html"))))) + link))) ) + (print "total runs: " total-runs) + (s:output-new + oup + (tests:dashboard-body page pg-size keys numkeys total-runs linktree area-name get-prev-links get-next-links #f run-patt target-patt)) ;; update this function + (close-output-port oup) + ; (set! page (+ 1 page)) + (if (> total-runs (* (+ 1 page) pg-size)) + (loop (+ 1 page))))) + (common:simple-file-release-lock lockfile)) + (begin + (debug:print 0 *default-log-port* "Failed to get lock on file outf, lockfile: " lockfile) #f)))) + + +(define (tests:readlines filename) + (call-with-input-file filename + (lambda (p) + (let loop ((line (read-line p)) + (result '())) + (if (eof-object? line) + (reverse result) + (loop (read-line p) (cons line result))))))) + +(define (tests:get-test-log run-id test-name item-name) + (let* ((test-data (rmt:get-tests-for-run + (string->number run-id) + test-name ;; testnamepatt + '() ;; states + '() ;; statuses + #f ;; offset + #f ;; num-to-get + #f ;; hide/not-hide + #f ;; sort-by + #f ;; sort-order + #f ;; 'shortlist ;; qrytype + 0 ;; last update + #f)) + (path "") + (found 0)) + (debug:print-info 0 *default-log-port* "found: " found ) + + (let loop ((hed (car test-data)) + (tal (cdr test-data))) + (debug:print-info 0 *default-log-port* "item: " (vector-ref hed 11) (vector-ref hed 10) "/" (vector-ref hed 13)) + + (if (equal? (vector-ref hed 11) item-name) + (begin + (set! found 1) + (set! path (conc (vector-ref hed 10) "/" (vector-ref hed 13))))) + (if (and (not (null? tal)) (equal? found 0)) + (loop (car tal)(cdr tal)))) + (if (equal? path "") + "

Data not found

" + (string-join (tests:readlines path) "\n")))) + + +(define (tests:dynamic-dboard page) +;(define (tests:create-html-tree o) + (let* ( +;(page "1") + (linktree (common:get-linktree)) + (area-name (common:get-testsuite-name)) + (keys (rmt:get-keys)) + (numkeys (length keys)) + (targtweaked (make-list numkeys "%")) + (target-patt (string-join targtweaked "/")) + (total-runs (rmt:get-num-runs "%")) + (pg-size 10) + (pg (if (equal? page #f) + 0 + (- (string->number page) 1))) + (get-prev-links (lambda (pg linktree) + (debug:print-info 0 *default-log-port* "val: " (- 1 pg)) + (let* ((link (if (not (eq? pg 0)) + (s:a "<<prev " 'href (conc "dashboard?page=" pg )) + (s:a "" 'href (conc "dashboard?page=" pg))))) + link))) + (get-next-links (lambda (pg linktree total-runs) + (debug:print-info 0 *default-log-port* "val: " pg) + (debug:print-info 0 *default-log-port* "val: " total-runs " size" pg-size) + + (let* ((link (if (> total-runs (+ 10 (* pg pg-size))) + (s:a "next>> " 'href (conc "dashboard?page=" (+ pg 2) )) + (s:a "" 'href (conc "dashboard?page=" pg ))))) + link))) + (html-body (tests:dashboard-body pg pg-size keys numkeys total-runs linktree area-name get-prev-links get-next-links #t "%" target-patt))) ;; update tis function + html-body)) + +(define (tests:create-html-summary outf) + (let* ((lockfile (conc outf ".lock")) + (linktree (common:get-linktree)) + (keys (rmt:get-keys)) + (area-name (common:get-testsuite-name)) + (run-patt (or (args:get-arg "-run-patt") + (args:get-arg "-runname") + "%")) + (target (or (args:get-arg "-target-patt") + (args:get-arg "-target") + "%")) + (targlist (string-split target "/")) + (numkeys (length keys)) + (numtarg (length targlist)) + (targtweaked (if (> numkeys numtarg) + (append targlist (make-list (- numkeys numtarg) "%")) + targlist)) + (target-patt (string-join targtweaked "/"))) + (if (common:simple-file-lock lockfile) + (begin + (let* (;(runsdat1 (rmt:get-runs run-patt #f #f (map (lambda (x)(list x "%")) keys))) + (runsdat (rmt:get-runs-by-patt keys run-patt target-patt #f #f #f 0)) + (runs (vector-ref runsdat 1)) + (header (vector-ref runsdat 0)) + (oup (open-output-file (or outf (conc linktree "/targets.html")))) + (target-hash (test:create-target-hash runs header (length keys)))) + (test:create-target-html target-hash oup area-name linktree) + (test:create-run-html runs area-name linktree (length keys) header)) + (common:simple-file-release-lock lockfile)) + #f))) + +(define (test:get-test-hash test-data) + (let ((resh (make-hash-table))) + (map (lambda (test) + (let* ((test-name (vector-ref test 2)) + (test-html-path (if (file-exists? (conc (vector-ref test 10) "/test-summary.html")) + (conc (vector-ref test 10) "/test-summary.html" ) + (conc (vector-ref test 10) "/" (vector-ref test 13)))) + (test-item (vector-ref test 11)) + (test-status (vector-ref test 4))) + (if (not (hash-table-ref/default resh test-item #f)) + (hash-table-set! resh test-item (make-hash-table))) + (hash-table-set! (hash-table-ref/default resh test-item #f) test-name (list test-status test-html-path)))) + test-data) +resh)) + +(define (test:get-data->b-keys ordered-data a-keys) + (delete-duplicates + (sort (apply + append + (map (lambda (sub-key) + (let ((subdat (hash-table-ref ordered-data sub-key))) + (hash-table-keys subdat))) + a-keys)) + string>=?))) + + +(define (test:create-run-html runs area-name linktree numkeys header) + (map (lambda (run) + (let* ((target (string-join (take (vector->list run) numkeys) "/")) + (run-name (db:get-value-by-header run header "runname")) + (run-time (seconds->work-week/day-time (db:get-value-by-header run header "event_time"))) + (oup (if (file-exists? (conc linktree "/" target "/" run-name)) + (open-output-file (conc linktree "/" target "/" run-name "/run.html")) + #f)) + (run-id (db:get-value-by-header run header "id")) + (test-data (rmt:get-tests-for-run + run-id + "%" ;; testnamepatt + '() ;; states + '() ;; statuses + #f ;; offset + #f ;; num-to-get + #f ;; hide/not-hide + #f ;; sort-by + #f ;; sort-order + #f ;; 'shortlist ;; qrytype + 0 ;; last update + #f)) + (item-test-hash (test:get-test-hash test-data)) + (items (hash-table-keys item-test-hash)) + (test-names (test:get-data->b-keys item-test-hash items))) + (if oup + (begin + (s:output-new + oup + (s:html tests:css-jscript-block (tests:css-jscript-block-cond #f) + (s:title "Runs View " run-name) + (s:body + (s:h1 "Runs View " ) + (s:h3 "Target" target) + (s:p + (s:b "Run name" ) run-name) + (s:p + (s:b "Run Date" ) run-time) + (s:table 'border 1 'cellspacing 0 + (s:tr + (s:th "Items") + (map (lambda (test) + (s:th test)) + test-names)) + (map (lambda (item) + (let* ((test-hash (hash-table-ref/default item-test-hash item #f))) + (if test-hash + (begin + (s:tr + (s:td 'class "test" item) + (map (lambda (test) + (let* ((test-details (hash-table-ref/default test-hash test #f)) + (status (if test-details + (car test-details))) + (link (if test-details + (string-substitute (conc linktree "/" target "/" run-name "/") "" (cadr test-details) "-")))) + (if test-details + (s:td 'class status + (s:a 'class "link" 'href link status )) + (s:td "")))) + test-names)))))) + (sort items string<=?)))))) + (close-output-port oup)) + (debug:print-info 0 "Skip: Dirctory structure " linktree "/" target "/" run-name " does not exist. Megatest will not create run.html")))) +runs)) + +(define (test:create-target-hash runs header numkeys) + (let ((resh (make-hash-table))) + (for-each + (lambda (run) + (let* ((run-name (db:get-value-by-header run header "runname")) + (target (string-join (take (vector->list run) numkeys) "/")) + (run-list (hash-table-ref/default resh target #f))) + + (if (not run-list) + (hash-table-set! resh target (list run-name)) + (hash-table-set! resh target (cons run-name run-list))))) + runs) + resh)) + +(define (test:get-max-run-cnt target-hash targets) + (let* ((cnt 0 )) + (map (lambda (target) + (let* ((runs (hash-table-ref/default target-hash target #f)) + (run-length (if runs + (length runs) + 0))) + + (if (< cnt run-length) + (set! cnt run-length)))) + targets) +cnt)) + +(define (test:pad-runs target-hash targets max-row-length) + (map (lambda (target) + (let loop ((run-list (hash-table-ref/default target-hash target #f))) + (if (< (length run-list) max-row-length) + (begin + (hash-table-set! target-hash target (cons "" run-list)) + (loop (hash-table-ref/default target-hash target #f) ))))) + targets) + target-hash) + +(define (test:create-target-html target-hash oup area-name linktree) + (let* ((targets (hash-table-keys target-hash)) + (max-row-length (test:get-max-run-cnt target-hash targets)) + (pad-runs-hash (test:pad-runs target-hash targets max-row-length))) + (s:output-new + oup + (s:html tests:css-jscript-block (tests:css-jscript-block-cond #f) + + (s:title "Target View " area-name) + (s:body + (s:h1 "Target View " area-name) + (s:table 'id "LinkedList1" 'border "1" 'cellspacing 0 + (s:tr 'class "something" + (s:th "Target") + (s:th 'colspan max-row-length "Runs")) + (let* ((tbl (map (lambda (target) + (s:tr + (s:td 'class "test" target) + (let* ((runs (hash-table-ref/default target-hash target #f)) + (rest-row (map (lambda (run) + (if (equal? run "") + (s:td run) + (if (file-exists?(conc linktree "/" target "/" run )) + (begin + (s:td + (s:a 'href (conc target "/" run "/run.html") run)))))) + (reverse runs)))) + rest-row))) + targets))) + tbl))))) + (close-output-port oup))) + + +(define (tests:create-html-tree-old outf) + (let* ((lockfile (conc outf ".lock")) + (runs-to-process '())) + (if (common:simple-file-lock lockfile) + (let* ((linktree (common:get-linktree)) + (oup (open-output-file (or outf (conc linktree "/runs-index.html")))) + (area-name (common:get-testsuite-name)) + (keys (rmt:get-keys)) + (numkeys (length keys)) + (runsdat (rmt:get-runs "%" #f #f (map (lambda (x)(list x "%")) keys))) + (header (vector-ref runsdat 0)) + (runs (vector-ref runsdat 1)) + (runtreedat (map (lambda (x) + (tests:run-record->test-path x numkeys)) + runs)) + (runs-htree (common:list->htree runtreedat))) + (set! runs-to-process runs) + (s:output-new + oup + (s:html tests:css-jscript-block + (s:title "Summary for " area-name) + (s:body 'onload "addEvents();" + (s:h1 "Summary for " area-name) + ;; top list + (s:ul 'id "LinkedList1" 'class "LinkedList" + (s:li + "Runs" + (common:htree->html runs-htree + '() + (lambda (x p) + (let* ((targ-path (string-intersperse p "/")) + (full-path (conc linktree "/" targ-path)) + (run-name (car (reverse p)))) + (if (and (common:file-exists? full-path) + (directory? full-path) + (file-write-access? full-path)) + (s:a run-name 'href (conc targ-path "/run-summary.html")) + (begin + (debug:print 0 *default-log-port* "INFO: Can't create " targ-path "/run-summary.html") + (conc run-name " (Not able to create summary at " targ-path ")"))))))))))) + (close-output-port oup) + (common:simple-file-release-lock lockfile) + + (for-each + (lambda (run) + (let* ((test-subpath (tests:run-record->test-path run numkeys)) + (run-id (db:get-value-by-header run header "id")) + (run-dir (tests:run-record->test-path run numkeys)) + (test-dats (rmt:get-tests-for-run + run-id + "%/" ;; testnamepatt + '() ;; states + '() ;; statuses + #f ;; offset + #f ;; num-to-get + #f ;; hide/not-hide + #f ;; sort-by + #f ;; sort-order + #f ;; 'shortlist ;; qrytype + 0 ;; last update + #f)) + (tests-tree-dat (map (lambda (test-dat) + ;; (tests:run-record->test-path x numkeys)) + (let* ((test-name (db:test-get-testname test-dat)) + (item-path (db:test-get-item-path test-dat)) + (full-name (db:test-make-full-name test-name item-path)) + (path-parts (string-split full-name))) + path-parts)) + test-dats)) + (tests-htree (common:list->htree tests-tree-dat)) + (html-dir (conc linktree "/" (string-intersperse run-dir "/"))) + (html-path (conc html-dir "/run-summary.html")) + (oup (if (and (common:file-exists? html-dir) + (directory? html-dir) + (file-write-access? html-dir)) + (open-output-file html-path) + #f))) + ;; (print "run-dir: " run-dir ", tests-tree-dat: " tests-tree-dat) + (if oup + (begin + (s:output-new + oup + (s:html tests:css-jscript-block + (s:title "Summary for " area-name) + (s:body 'onload "addEvents();" + (s:h1 "Summary for " (string-intersperse run-dir "/")) + ;; top list + (s:ul 'id "LinkedList1" 'class "LinkedList" + (s:li + "Tests" + (common:htree->html tests-htree + '() + (lambda (x p) + (let* ((targ-path (string-intersperse p "/")) + (test-name (car p)) + (item-path ;; (if (> (length p) 2) ;; test-name + run-name + (string-intersperse p "/")) + (full-targ (conc html-dir "/" targ-path)) + (std-file (conc full-targ "/test-summary.html")) + (alt-file (conc full-targ "/megatest-rollup-" test-name ".html")) + (html-file (if (common:file-exists? alt-file) + alt-file + std-file)) + (run-name (car (reverse p)))) + (if (and (not (common:file-exists? full-targ)) + (directory? full-targ) + (file-write-access? full-targ)) + (tests:summarize-test + run-id + (rmt:get-test-id run-id test-name item-path))) + (if (common:file-exists? full-targ) + (s:a run-name 'href html-file) + (begin + (debug:print 0 *default-log-port* "ERROR: can't access " full-targ) + (conc "No summary for " run-name))))) + )))))) + (close-output-port oup))))) + runs) + #t) + #f))) + + + + + + + +;; CHECK - WAS THIS ADDED OR REMOVED? MANUAL MERGE WITH API STUFF!!! +;; +;; get a pretty table to summarize steps +;; +;; (define (dcommon:process-steps-table steps);; db test-id #!key (work-area #f)) +(define (tests:process-steps-table steps);; db test-id #!key (work-area #f)) +;; (let ((steps (db:get-steps-for-test db test-id work-area: work-area))) + ;; organise the steps for better readability + (let ((res (make-hash-table))) + (for-each + (lambda (step) + (debug:print 6 *default-log-port* "step=" step) + (let ((record (hash-table-ref/default + res + (tdb:step-get-stepname step) + ;; 0 1 2 3 4 5 6 7 + ;; stepname start end status Duration Logfile Comment first-id + (vector (tdb:step-get-stepname step) "" "" "" "" "" "" #f)))) + (debug:print 6 *default-log-port* "record(before) = " record + "\nid: " (tdb:step-get-id step) + "\nstepname: " (tdb:step-get-stepname step) + "\nstate: " (tdb:step-get-state step) + "\nstatus: " (tdb:step-get-status step) + "\ntime: " (tdb:step-get-event_time step)) + (if (not (vector-ref record 7))(vector-set! record 7 (tdb:step-get-id step))) ;; do not clobber the id if previously set + (case (string->symbol (tdb:step-get-state step)) + ((start)(vector-set! record 1 (tdb:step-get-event_time step)) + (vector-set! record 3 (if (equal? (vector-ref record 3) "") + (tdb:step-get-status step))) + (if (> (string-length (tdb:step-get-logfile step)) + 0) + (vector-set! record 5 (tdb:step-get-logfile step)))) + ((end) + (vector-set! record 2 (any->number (tdb:step-get-event_time step))) + (vector-set! record 3 (tdb:step-get-status step)) + (vector-set! record 4 (let ((startt (any->number (vector-ref record 1))) + (endt (any->number (vector-ref record 2)))) + (debug:print 4 *default-log-port* "record[1]=" (vector-ref record 1) + ", startt=" startt ", endt=" endt + ", get-status: " (tdb:step-get-status step)) + (if (and (number? startt)(number? endt)) + (seconds->hr-min-sec (- endt startt)) "-1"))) + (if (> (string-length (tdb:step-get-logfile step)) + 0) + (vector-set! record 5 (tdb:step-get-logfile step))) + (if (> (string-length (tdb:step-get-comment step)) + 0) + (vector-set! record 6 (tdb:step-get-comment step)))) + (else + (vector-set! record 2 (tdb:step-get-state step)) + (vector-set! record 3 (tdb:step-get-status step)) + (vector-set! record 4 (tdb:step-get-event_time step)) + (vector-set! record 6 (tdb:step-get-comment step)))) + (hash-table-set! res (tdb:step-get-stepname step) record) + (debug:print 6 *default-log-port* "record(after) = " record + "\nid: " (tdb:step-get-id step) + "\nstepname: " (tdb:step-get-stepname step) + "\nstate: " (tdb:step-get-state step) + "\nstatus: " (tdb:step-get-status step) + "\ntime: " (tdb:step-get-event_time step)))) + ;; (else (vector-set! record 1 (tdb:step-get-event_time step))) + (sort steps (lambda (a b) + (cond + ((< (tdb:step-get-event_time a)(tdb:step-get-event_time b)) #t) + ((eq? (tdb:step-get-event_time a)(tdb:step-get-event_time b)) + (< (tdb:step-get-id a) (tdb:step-get-id b))) + (else #f))))) + res)) + +;; +;; +(define (tests:get-compressed-steps run-id test-id) + (let* ((steps-data (rmt:get-steps-for-test run-id test-id)) ;; 0 1 2 3 4 5 6 7 + (comprsteps (tests:process-steps-table steps-data))) ;; # + (map (lambda (x) + ;; take advantage of the \n on time->string + (vector ;; we are constructing basically the original vector but collapsing start end records + (vector-ref x 0) ;; id 0 + (let ((s (vector-ref x 1))) + (if (number? s)(seconds->time-string s) s)) ;; starttime 1 + (let ((s (vector-ref x 2))) + (if (number? s)(seconds->time-string s) s)) ;; endtime 2 + (vector-ref x 3) ;; status 3 + (vector-ref x 4) ;; duration 4 + (vector-ref x 5) ;; logfile 5 + (vector-ref x 6) ;; comment 6 + (vector-ref x 7))) ;; id 7 + (sort (hash-table-values comprsteps) + (lambda (a b) + (let ((time-a (vector-ref a 1)) + (time-b (vector-ref b 1)) + (id-a (vector-ref a 7)) + (id-b (vector-ref b 7))) + (if (and (number? time-a)(number? time-b)) + (if (< time-a time-b) + #t + (if (eq? time-a time-b) + (< id-a id-b) + ;; (stringwork-week/day-time + (db:test-get-event_time test-dat))) + (s:td "Duration") (s:td (seconds->hr-min-sec (db:test-get-run_duration test-dat))))) + (s:h3 "Log files") + (s:table + 'cellspacing "0" 'border "1" + (s:tr (s:td "Final log")(s:td (s:a 'href logf logf)))) + (s:table + 'cellspacing "0" 'border "1" + (s:tr (s:td "Step Name")(s:td "Start")(s:td "End")(s:td "Status")(s:td "Duration")(s:td "Log File")) + (map (lambda (step-dat) + (s:tr (s:td (tdb:steps-table-get-stepname step-dat)) + (s:td (tdb:steps-table-get-start step-dat)) + (s:td (tdb:steps-table-get-end step-dat)) + (s:td (tdb:steps-table-get-status step-dat)) + (s:td (tdb:steps-table-get-runtime step-dat)) + (s:td (let ((step-log (tdb:steps-table-get-log-file step-dat))) + (s:a 'href step-log step-log))))) + steps-dat)) + ))) + (close-output-port oup))))) + + +;; MUST BE CALLED local! +;; +(define (tests:test-get-paths-matching keynames target fnamepatt #!key (res '())) + ;; BUG: Move the values derived from args to parameters and push to megatest.scm + (let* ((testpatt (or (args:get-arg "-testpatt")(args:get-arg "-testpatt") "%")) + (statepatt (or (args:get-arg "-state") (args:get-arg ":state") "%")) + (statuspatt (or (args:get-arg "-status") (args:get-arg ":status") "%")) + (runname (or (args:get-arg "-runname") (args:get-arg ":runname") "%")) + (paths-from-db (rmt:test-get-paths-matching-keynames-target-new keynames target res + testpatt + statepatt + statuspatt + runname))) + (if fnamepatt + (apply append + (map (lambda (p) + (if (directory-exists? p) + (let ((glob-query (conc p "/" fnamepatt))) + (handle-exceptions + exn + (begin + (print "built-in glob on " glob-query ", failed, try using the shell. exn=" exn) + (with-input-from-pipe + (conc "echo " glob-query) + read-lines)) ;; we aren't going to try too hard. If glob breaks it is likely because someone tried to do */*/*.log or similar + (glob glob-query))) + '())) + paths-from-db)) + paths-from-db))) + + +;;====================================================================== +;; Gather data from test/task specifications +;;====================================================================== + +;; (define (tests:get-valid-tests testsdir test-patts) ;; #!key (test-names '())) +;; (let ((tests (glob (conc testsdir "/tests/*")))) ;; " (string-translate patt "%" "*"))))) +;; (set! tests (filter (lambda (test)(common:file-exists? (conc test "/testconfig"))) tests)) +;; (delete-duplicates +;; (filter (lambda (testname) +;; (tests:match test-patts testname #f)) +;; (map (lambda (testp) +;; (last (string-split testp "/"))) +;; tests))))) +;; sort tests by priority and waiton +;; Move test specific stuff to a test unit FIXME one of these days +(define (tests:sort-by-priority-and-waiton test-records) + (if (eq? (hash-table-size test-records) 0) + '() + (let* ((mungepriority (lambda (priority) + (if priority + (let ((tmp (any->number priority))) + (if tmp tmp (begin (debug:print-error 0 *default-log-port* "bad priority value " priority ", using 0") 0))) + 0))) + (all-tests (hash-table-keys test-records)) + (all-waited-on (let loop ((hed (car all-tests)) + (tal (cdr all-tests)) + (res '())) + (let* ((trec (hash-table-ref test-records hed)) + (waitons (or (tests:testqueue-get-waitons trec) '()))) + (if (null? tal) + (append res waitons) + (loop (car tal)(cdr tal)(append res waitons)))))) + (sort-fn1 + (lambda (a b) + (let* ((a-record (hash-table-ref test-records a)) + (b-record (hash-table-ref test-records b)) + (a-waitons (or (tests:testqueue-get-waitons a-record) '())) + (b-waitons (or (tests:testqueue-get-waitons b-record) '())) + (a-config (tests:testqueue-get-testconfig a-record)) + (b-config (tests:testqueue-get-testconfig b-record)) + (a-raw-pri (configf:lookup a-config "requirements" "priority")) + (b-raw-pri (configf:lookup b-config "requirements" "priority")) + (a-priority (mungepriority a-raw-pri)) + (b-priority (mungepriority b-raw-pri))) + (tests:testqueue-set-priority! a-record a-priority) + (tests:testqueue-set-priority! b-record b-priority) + ;; (debug:print 0 *default-log-port* "a=" a ", b=" b ", a-waitons=" a-waitons ", b-waitons=" b-waitons) + (cond + ;; is + ((member a b-waitons) ;; is b waiting on a? + ;; (debug:print 0 *default-log-port* "case1") + #t) + ((member b a-waitons) ;; is a waiting on b? + ;; (debug:print 0 *default-log-port* "case2") + #f) + ((and (not (null? a-waitons)) ;; both have waitons - do not disturb + (not (null? b-waitons))) + ;; (debug:print 0 *default-log-port* "case2.1") + #t) + ((and (null? a-waitons) ;; no waitons for a but b has waitons + (not (null? b-waitons))) + ;; (debug:print 0 *default-log-port* "case3") + #f) + ((and (not (null? a-waitons)) ;; a has waitons but b does not + (null? b-waitons)) + ;; (debug:print 0 *default-log-port* "case4") + #t) + ((not (eq? a-priority b-priority)) ;; use + (> a-priority b-priority)) + (else + ;; (debug:print 0 *default-log-port* "case5") + (string>? a b)))))) + + (sort-fn2 + (lambda (a b) + (> (mungepriority (tests:testqueue-get-priority (hash-table-ref test-records a))) + (mungepriority (tests:testqueue-get-priority (hash-table-ref test-records b))))))) + ;; (let ((dot-res (tests:run-dot (tests:tests->dot test-records) "plain"))) + ;; (debug:print "dot-res=" dot-res)) + ;; (let ((data (map cdr (filter + ;; (lambda (x)(equal? "node" (car x))) + ;; (map string-split (tests:easy-dot test-records "plain")))))) + ;; (map car (sort data (lambda (a b) + ;; (> (string->number (caddr a))(string->number (caddr b))))))) + ;; )) + (sort all-tests sort-fn1)))) ;; avoid dealing with deleted tests, look at the hash table + +(define (tests:easy-dot test-records outtype) + (let-values (((fd temp-path) (file-mkstemp (conc "/tmp/" (current-user-name) ".XXXXXX")))) + (let ((all-testnames (hash-table-keys test-records)) + (temp-port (open-output-file* fd))) + ;; (format temp-port "This file is ~A.~%" temp-path) + (format temp-port "digraph tests {\n") + (format temp-port " size=4,8\n") + ;; (format temp-port " splines=none\n") + (for-each + (lambda (testname) + (let* ((testrec (hash-table-ref test-records testname)) + (waitons (or (tests:testqueue-get-waitons testrec) '()))) + (for-each + (lambda (waiton) + (format temp-port (conc " " waiton " -> " testname " [splines=ortho]\n"))) + waitons))) + all-testnames) + (format temp-port "}\n") + (close-output-port temp-port) + (with-input-from-pipe + (conc "env -i PATH=$PATH dot -T" outtype " < " temp-path) + (lambda () + (let ((res (read-lines))) + ;; (delete-file temp-path) + res)))))) + +(define (tests:write-dot-file test-records fname sizex sizey) + (if (file-write-access? (pathname-directory fname)) + (with-output-to-file fname + (lambda () + (map print (tests:tests->dot test-records sizex sizey)))))) + +(define (tests:tests->dot test-records sizex sizey) + (let ((all-testnames (hash-table-keys test-records))) + (if (null? all-testnames) + '() + (let loop ((hed (car all-testnames)) + (tal (cdr all-testnames)) + (res (list "digraph tests {" + (conc " size=\"" (or sizex 11) "," (or sizey 11) "\";") + " ratio=0.95;" + ))) + (let* ((testrec (hash-table-ref test-records hed)) + (waitons (or (tests:testqueue-get-waitons testrec) '())) + (newres (append res + (if (null? waitons) + (list (conc " \"" hed "\" [shape=box];")) + (map (lambda (waiton) + (conc " \"" waiton "\" -> \"" hed "\" [shape=box];")) + waitons) + )))) + (if (null? tal) + (append newres (list "}")) + (loop (car tal)(cdr tal) newres) + )))))) + +;; (tests:run-dot (list "digraph tests {" "a -> b" "}") "plain") + +(define (tests:run-dot indat outtype) ;; outtype is plain, fig, dot, etc. http://www.graphviz.org/content/output-formats + (let-values (((inp oup pid)(process "env -i PATH=\"$PATH\" dot" (list "-T" outtype)))) + (with-output-to-port oup + (lambda () + (map print indat))) + (close-output-port oup) + (let ((res (with-input-from-port inp + (lambda () + (read-lines))))) + (close-input-port inp) + res))) + +;; read data from tmp file or create if not exists +;; if exists regen in background +;; +(define (tests:lazy-dot testrecords outtype sizex sizey) + (let ((dfile (conc "/tmp/." (current-user-name) "-" (server:mk-signature) ".dot")) + (fname (conc "/tmp/." (current-user-name) "-" (server:mk-signature) ".dotdat"))) + (tests:write-dot-file testrecords dfile sizex sizey) + (if (common:file-exists? fname) + (let ((res (with-input-from-file fname + (lambda () + (read-lines))))) + (system (conc "env -i PATH=\"$PATH\" dot -T " outtype " < " dfile " > " fname "&")) + res) + (begin + (system (conc "env -i PATH=\"$PATH\" dot -T " outtype " < " dfile " > " fname)) + (with-input-from-file fname + (lambda () + (read-lines))))))) + + +;; for each test: +;; +(define (tests:filter-non-runnable run-id testkeynames testrecordshash) + (let ((runnables '())) + (for-each + (lambda (testkeyname) + (let* ((test-record (hash-table-ref testrecordshash testkeyname)) + (test-name (tests:testqueue-get-testname test-record)) + (itemdat (tests:testqueue-get-itemdat test-record)) + (item-path (tests:testqueue-get-item_path test-record)) + (waitons (tests:testqueue-get-waitons test-record)) + (keep-test #t) + (test-id (rmt:get-test-id run-id test-name item-path)) + (tdat (rmt:get-testinfo-state-status run-id test-id))) ;; (cdb:get-test-info-by-id *runremote* test-id))) + (if tdat + (begin + ;; Look at the test state and status + (if (or (and (member (db:test-get-status tdat) + '("PASS" "WARN" "WAIVED" "CHECK" "SKIP")) + (equal? (db:test-get-state tdat) "COMPLETED")) + (member (db:test-get-state tdat) + '("INCOMPLETE" "KILLED"))) + (set! keep-test #f)) + + ;; examine waitons for any fails. If it is FAIL or INCOMPLETE then eliminate this test + ;; from the runnable list + (if keep-test + (for-each (lambda (waiton) + ;; for now we are waiting only on the parent test + (let* ((parent-test-id (rmt:get-test-id run-id waiton "")) + (wtdat (rmt:get-testinfo-state-status run-id test-id))) ;; (cdb:get-test-info-by-id *runremote* test-id))) + (if (or (and (equal? (db:test-get-state wtdat) "COMPLETED") + (member (db:test-get-status wtdat) '("FAIL" "ABORT"))) + (member (db:test-get-status wtdat) '("KILLED")) + (member (db:test-get-state wtdat) '("INCOMPETE"))) + ;; (if (or (member (db:test-get-status wtdat) + ;; '("FAIL" "KILLED")) + ;; (member (db:test-get-state wtdat) + ;; '("INCOMPETE"))) + (set! keep-test #f)))) ;; no point in running this one again + waitons)))) + (if keep-test (set! runnables (cons testkeyname runnables))))) + testkeynames) + runnables)) + +;;====================================================================== +;; test steps +;;====================================================================== + +;; teststep-set-status! used to be here + +;; NOT NEEDED +#;(define (test-get-kill-request run-id test-id) ;; run-id test-name itemdat) + (let* ((testdat (rmt:get-test-state-status-by-id run-id test-id))) + (and testdat + (equal? (car testdat) "KILLREQ")))) + +(define (test:tdb-get-rundat-count tdb) + (if tdb + (let ((res 0)) + (sqlite3:for-each-row + (lambda (count) + (set! res count)) + tdb + "SELECT count(id) FROM test_rundat;") + res)) + 0) + +(define (tests:update-central-meta-info run-id test-id cpuload diskfree minutes uname hostname) + (rmt:general-call 'update-test-rundat run-id test-id (current-seconds) (or cpuload -1)(or diskfree -1) -1 (or minutes -1)) + (if (and cpuload diskfree) + (rmt:general-call 'update-cpuload-diskfree run-id cpuload diskfree test-id)) + (if minutes + (rmt:general-call 'update-run-duration run-id minutes test-id)) + (if (and uname hostname) + (rmt:general-call 'update-uname-host run-id uname hostname test-id))) + +;; This one is for running with no db access (i.e. via rmt: internally) +(define (tests:set-full-meta-info db test-id run-id minutes work-area remtries) +;; (define (tests:set-full-meta-info test-id run-id minutes work-area) +;; (let ((remtries 10)) + (let* ((cpuload (commonmod:get-cpu-load)) + (diskfree (get-df (current-directory))) + (uname (get-uname "-srvpio")) + (hostname (get-host-name))) + (tests:update-central-meta-info run-id test-id cpuload diskfree minutes uname hostname))) + +;; (define (tests:set-partial-meta-info test-id run-id minutes work-area) +#;(define (tests:set-partial-meta-info test-id run-id minutes work-area remtries) + (let* ((cpuload (get-cpu-load)) + (diskfree (get-df (current-directory))) + (remtries 10)) + (handle-exceptions + exn + (if (> remtries 0) + (begin + (print-call-chain (current-error-port)) + (debug:print-info 0 *default-log-port* "WARNING: failed to set meta info. Will try " remtries " more times") + (set! remtries (- remtries 1)) + (thread-sleep! 10) + (tests:set-full-meta-info db test-id run-id minutes work-area (- remtries 1))) + (let ((err-status ((condition-property-accessor 'sqlite3 'status #f) exn))) + (debug:print-error 0 *default-log-port* "tried for over a minute to update meta info and failed. Giving up") + (debug:print 0 *default-log-port* "EXCEPTION: database probably overloaded or unreadable.") + (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) + (debug:print 5 *default-log-port* "exn=" (condition->list exn)) + (debug:print 0 *default-log-port* " status: " ((condition-property-accessor 'sqlite3 'status) exn)) + (print-call-chain (current-error-port)))) + (tests:update-testdat-meta-info db test-id work-area cpuload diskfree minutes) + ))) + +;;====================================================================== +;; A R C H I V I N G +;;====================================================================== + +(define (test:archive db test-id) + #f) + +(define (test:archive-tests db keynames target) + #f) + + +) Index: utils/plot-uses.scm ================================================================== --- utils/plot-uses.scm +++ utils/plot-uses.scm @@ -59,10 +59,18 @@ (print " \""unitname"\" -> \""modname"\";")) (print-err "ERROR: bad declare line \""inl"\"")) (loop modname)))) (else (loop modname))))))))) + +;; ./utils/plot-uses todot portlogger,stml2,debugprint,mtargs +;; apimod.scm commonmod.scm configfmod.scm dbmod.scm megatestmod.scm +;; mtmod.scm processmod.scm rmtmod.scm servermod.scm +;; tcp-transportmod.scm > uses.dot + +;; dot uses.dot -Tpdf -o uses.pdf + (define (main) (match (command-line-arguments) (("todot" ignoreunits . files) (let* ((ignores (string-split ignoreunits ","))) @@ -73,11 +81,11 @@ (print "// Filename: "fname) (process-file ignores fname)) files) (print "}"))) (else - (print-err "Usage: plot-uses u1,u2... file1.scm ...") + (print-err "Usage: plot-uses todot u1,u2... file1.scm ...") (print-err " where u1,u2... are units to ignore and file1.scm... are the files to process.")))) (main) ) ADDED utils/run-plot.sh Index: utils/run-plot.sh ================================================================== --- /dev/null +++ utils/run-plot.sh @@ -0,0 +1,12 @@ +#!/bin/bash + +IGNORE_UNITS=mtargs,ods + +FILES=$(ls *mod.scm|grep -v import) + +if [[ utils/plot-uses.scm -nt utils/plot-uses ]];then + oldcsc csc utils/plot-uses.scm +fi + +./utils/plot-uses todot $IGNORE_UNITS $FILES > unitdeps.dot +dot unitdeps.dot -Tpdf -o unitdeps.pdf