@@ -37,11 +37,11 @@ ;; This is the *new* methodology. One record to inform them and in the chaos, organise them. ;; (define (runs:create-run-record) (let* ((mconfig (if *configdat* *configdat* - (if (setup-for-run) + (if (launch:setup-for-run) *configdat* (begin (debug:print 0 "ERROR: Called setup in a non-megatest area, exiting") (exit 1))))) (runrec (runs:runrec-make-record)) @@ -88,11 +88,11 @@ (safe-setenv (car varval)(cadr varval))) (configf:get-section runconfig section))) (list "default" target)) (vector target runname testpatt keys keyvals envdat mconfig runconfig serverdat transport db toppath run-id))) -(define (set-megatest-env-vars run-id #!key (inkeys #f)(inrunname #f)(inkeyvals #f)) +(define (runs:set-megatest-env-vars run-id #!key (inkeys #f)(inrunname #f)(inkeyvals #f)) (let* ((target (or (common:args-get-target) (get-environment-variable "MT_TARGET"))) (keys (if inkeys inkeys (rmt:get-keys))) (keyvals (if inkeyvals inkeyvals (keys:target->keyval keys target))) (vals (hash-table-ref/default *env-vars-by-run-id* run-id #f)) @@ -206,25 +206,34 @@ (keyvals (keys:target->keyval keys target)) (run-id (rmt:register-run keyvals runname "new" "n/a" user)) ;; test-name))) (deferred '()) ;; delay running these since they have a waiton clause (runconfigf (conc *toppath* "/runconfigs.config")) (test-records (make-hash-table)) - (all-tests-registry (tests:get-all)) ;; (tests:get-valid-tests (make-hash-table) test-search-path)) ;; all valid tests to check waiton names - (all-test-names (hash-table-keys all-tests-registry)) - (test-names (tests:filter-test-names all-test-names test-patts)) - (required-tests (lset-intersection equal? (string-split test-patts ",") test-names))) ;; test-names)) ;; Added test-names as initial for required-tests but that failed to work + ;; need to process runconfigs before generating these lists + (all-tests-registry #f) ;; (tests:get-all)) ;; (tests:get-valid-tests (make-hash-table) test-search-path)) ;; all valid tests to check waiton names + (all-test-names #f) ;; (hash-table-keys all-tests-registry)) + (test-names #f) ;; (tests:filter-test-names all-test-names test-patts)) + (required-tests #f)) ;;(lset-intersection equal? (string-split test-patts ",") test-names))) ;; test-names)) ;; Added test-names as initial for required-tests but that failed to work - (set-megatest-env-vars run-id inkeys: keys inrunname: runname) ;; these may be needed by the launching process + (runs:set-megatest-env-vars run-id inkeys: keys inrunname: runname) ;; these may be needed by the launching process (if (file-exists? runconfigf) - (setup-env-defaults runconfigf run-id *already-seen-runconfig-info* keyvals "pre-launch-env-vars") + (setup-env-defaults runconfigf run-id *already-seen-runconfig-info* keyvals target) (debug:print 0 "WARNING: You do not have a run config file: " runconfigf)) + + ;; Now generate all the tests lists + (set! all-tests-registry (tests:get-all)) + (set! all-test-names (hash-table-keys all-tests-registry)) + (set! test-names (tests:filter-test-names all-test-names test-patts)) + (set! required-tests (lset-intersection equal? (string-split test-patts ",") test-names)) ;; look up all tests matching the comma separated list of globs in ;; test-patts (using % as wildcard) ;; (set! test-names (delete-duplicates (tests:get-valid-tests *toppath* test-patts))) - (debug:print-info 0 "test names " test-names) + (debug:print-info 0 "tests search path: " (tests:get-tests-search-path *configdat*)) + (debug:print-info 0 "all tests: " (string-intersperse (sort all-test-names string<) " ")) + (debug:print-info 0 "test names: " (string-intersperse (sort test-names string<) " ")) ;; on the first pass or call to run-tests set FAILS to NOT_STARTED if ;; -keepgoing is specified (if (eq? *passnum* 0) (begin @@ -430,14 +439,17 @@ (null? non-completed))) (debug:print-info 4 "runs:expand-items: (or (null? prereqs-not-met) (and (member 'toplevel testmode)(null? non-completed)))") (let ((test-name (tests:testqueue-get-testname test-record))) (setenv "MT_TEST_NAME" test-name) ;; (setenv "MT_RUNNAME" runname) - (set-megatest-env-vars run-id inrunname: runname) ;; these may be needed by the launching process + (runs:set-megatest-env-vars run-id inrunname: runname) ;; these may be needed by the launching process (let ((items-list (items:get-items-from-config tconfig))) (if (list? items-list) (begin + (if (null? items-list) + (let ((test-id (rmt:get-test-id run-id test-name ""))) + (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "ZERO_ITEMS" "Failed to run due to failed prerequisites"))) (tests:testqueue-set-items! test-record items-list) (list hed tal reg reruns)) (begin (debug:print 0 "ERROR: The proc from reading the items table did not yield a list - please report this") (exit 1)))))) @@ -470,11 +482,11 @@ (let ((trimmed-tal (mt:discard-blocked-tests run-id hed tal test-records)) (trimmed-reg (mt:discard-blocked-tests run-id hed reg test-records))) (debug:print 1 "WARNING: test " hed " has discarded prerequisites, removing it from the queue") (let ((test-id (rmt:get-test-id run-id hed ""))) - (mt:test-set-state-status-by-id test-id "DEQUEUED" "PREQ_FAIL" "Failed to run due to failed prerequisites")) + (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_DISCARDED" "Failed to run due to discarded prerequisites")) (if (and (null? trimmed-tal) (null? trimmed-reg)) #f (list (runs:queue-next-hed trimmed-tal trimmed-reg reglen regfull) @@ -546,19 +558,23 @@ ;; num-retries code was here ;; we use this opportunity to move contents of reg to tal (list (car newtal)(append (cdr newtal) reg) '() reruns)) ;; an issue with prereqs not yet met? (begin (debug:print-info 1 "no fails in prerequisites for " hed " but nothing seen running in a while, dropping test " hed " from the run queue") + (let ((test-id (rmt:get-test-id run-id hed ""))) + (mt:test-set-state-status-by-id run-id test-id "DEQUEDED" "TIMED_OUT" "Nothing seen running in a while.")) (list (runs:queue-next-hed tal reg reglen regfull) (runs:queue-next-tal tal reg reglen regfull) (runs:queue-next-reg tal reg reglen regfull) reruns)))) ((and (not (null? fails))(member 'normal testmode)) (debug:print-info 1 "test " hed " (mode=" testmode ") has failed prerequisite(s); " (string-intersperse (map (lambda (t)(conc (db:test-get-testname t) ":" (db:test-get-state t)"/"(db:test-get-status t))) fails) ", ") ", removing it from to-do list") + (let ((test-id (rmt:get-test-id run-id hed ""))) + (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_FAIL" "Failed to run due to failed prerequisites")) (if (or (not (null? reg))(not (null? tal))) (begin (hash-table-set! test-registry hed 'CANNOTRUN) (list (runs:queue-next-hed tal reg reglen regfull) (runs:queue-next-tal tal reg reglen regfull) @@ -739,12 +755,15 @@ (if (or (not (null? reg))(not (null? tal))) (if (vector? hed) (begin (debug:print 1 "WARNING: Dropping test " test-name "/" item-path " from the launch list as it has prerequistes that are FAIL") + (let ((test-id (rmt:get-test-id run-id hed ""))) + (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_FAIL" "Failed to run due to failed prerequisites")) (runs:shrink-can-run-more-tests-count) ;; DELAY TWEAKER (still needed?) ;; (thread-sleep! *global-delta*) + ;; This next is for the items (mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "BLOCKED" #f) (hash-table-set! test-registry (runs:make-full-test-name test-name item-path) 'removed) (list (runs:queue-next-hed tal reg reglen regfull) (runs:queue-next-tal tal reg reglen regfull) (runs:queue-next-reg tal reg reglen regfull) @@ -902,11 +921,11 @@ (num-running (rmt:get-count-tests-running-for-run-id run-id))) (if (> num-running 0) (set! last-time-some-running (current-seconds))) - (if (> (current-seconds)(+ last-time-some-running 60)) + (if (> (current-seconds)(+ last-time-some-running 240)) (hash-table-set! *max-tries-hash* tfullname (+ (hash-table-ref/default *max-tries-hash* tfullname 0) 1))) ;; (debug:print 0 "max-tries-hash: " (hash-table->alist *max-tries-hash*)) ;; Ensure all top level tests get registered. This way they show up as "NOT_STARTED" on the dashboard ;; and it is clear they *should* have run but did not. @@ -1116,11 +1135,11 @@ ) (debug:print 2 "Attempting to launch test " full-test-name) (setenv "MT_TEST_NAME" test-name) ;; (setenv "MT_ITEMPATH" item-path) (setenv "MT_RUNNAME" runname) - (set-megatest-env-vars run-id inrunname: runname) ;; these may be needed by the launching process + (runs:set-megatest-env-vars run-id inrunname: runname) ;; these may be needed by the launching process (change-directory *toppath*) ;; Here is where the test_meta table is best updated ;; Yes, another use of a global for caching. Need a better way? ;; @@ -1360,10 +1379,11 @@ (dirb ;; (rmt:sdb-qry 'getstr (db:test-get-rundir b))) ;; ) ;; ((filedb:get-path *fdb* (db:test-get-rundir b)))) (if (and (string? dira)(string? dirb)) (> (string-length dira)(string-length dirb)) #f))))) + (toplevel-retries (make-hash-table)) ;; try three times to loop through and remove top level tests (test-retry-time (make-hash-table)) (allow-run-time 10)) ;; seconds to allow for killing tests before just brutally killing 'em (let loop ((test (car sorted-tests)) (tal (cdr sorted-tests))) (let* ((test-id (db:test-get-id test)) @@ -1385,11 +1405,18 @@ (> (cdb:remote-run db:test-toplevel-num-items db run-id test-name) 0)))) (case action ((remove-runs) ;; if the test is a toplevel-with-children issue an error and do not remove (if toplevel-with-children - (debug:print 0 "WARNING: skipping removal of " test-fulln " with run-id " run-id " as it has sub tests") + (begin + (debug:print 0 "WARNING: skipping removal of " test-fulln " with run-id " run-id " as it has sub tests") + (hash-table-set! toplevel-retries test-fulln (+ (hash-table-ref/default toplevel-retries test-fulln 0) 1)) + (if (> (hash-table-ref toplevel-retries test-fulln) 3) + (if (not (null? tal)) + (loop (car tal)(cdr tal))) ;; no else clause - drop it if no more in queue and > 3 tries + (let ((newtal (append tal (list test)))) + (loop (car newtal)(cdr newtal))))) ;; loop with test still in queue (begin (debug:print-info 0 "test: " test-name " itest-state: " test-state) (if (member test-state (list "RUNNING" "LAUNCHED" "REMOTEHOSTSTART" "KILLREQ")) (begin (if (not (hash-table-ref/default test-retry-time test-fulln #f)) @@ -1454,11 +1481,11 @@ (let* ((run-dir (db:test-get-rundir test)) ;; run dir is from the link tree (real-dir (if (file-exists? run-dir) (resolve-pathname run-dir) #f))) (if (not remove-data-only) - (mt:test-set-state-status-by-id (db:test-get-id test) "REMOVING" "LOCKED" #f)) + (mt:test-set-state-status-by-id (db:test-get-run-id test)(db:test-get-id test) "REMOVING" "LOCKED" #f)) (debug:print-info 1 "Attempting to remove " (if real-dir (conc " dir " real-dir " and ") "") " link " run-dir) (if (and real-dir (> (string-length real-dir) 5) (file-exists? real-dir)) ;; bad heuristic but should prevent /tmp /home etc. (begin ;; let* ((realpath (resolve-pathname run-dir))) @@ -1508,11 +1535,11 @@ (debug:print 0 "ERROR: Missing required parameter for " switchname ", you must specify the run name with -runname runname") (exit 3)) (else (let ((db #f) (keys #f)) - (if (not (setup-for-run)) + (if (not (launch:setup-for-run)) (begin (debug:print 0 "Failed to setup, exiting") (exit 1))) ;; (if (args:get-arg "-server") ;; (cdb:remote-run server:start db (args:get-arg "-server")))