@@ -271,41 +271,71 @@ ;; NB// Should expand items here and then insert into the run queue. (debug:print 5 "test-records: " test-records ", keyvallst: " keyvallst " flags: " (hash-table->alist flags)) (let ((sorted-test-names (tests:sort-by-priority-and-waiton test-records)) (item-patts (hash-table-ref/default flags "-itempatt" #f))) (if (not (null? sorted-test-names)) - (let loop (; (numtimes 0) ;; shouldn't need this - (hed (car sorted-test-names)) + (let loop ((hed (car sorted-test-names)) (tal (cdr sorted-test-names))) (let* ((test-record (hash-table-ref test-records hed)) (tconfig (tests:testqueue-get-testconfig test-record)) (waitons (tests:testqueue-get-waitons test-record)) (priority (tests:testqueue-get-priority test-record)) (itemdat (tests:testqueue-get-itemdat test-record)) ;; itemdat can be a string, list or #f (items (tests:testqueue-get-items test-record)) - (item-path (item-list->path itemdat))) + (item-path (item-list->path itemdat)) + (newtal (append tal (list hed))) + (calc-fails (lambda (prereqs-not-met) + (filter (lambda (test) + (debug:print 9 "test: " test) + (and (vector? test) ;; not (string? test)) + (not (member (db:test-get-status test) + '("PASS" "WARN" "CHECK" "WAIVED"))))) + prereqs-not-met)))) (debug:print 6 "itemdat: " itemdat "\n items: " items "\n item-path: " item-path) (cond ((not items) ;; when false the test is ok to be handed off to launch (but not before) - (let ((have-resources (runs:can-run-more-tests db test-record)) ;; look at the test jobgroup and tot jobs running - (prereqs-not-met (db:get-prereqs-not-met db run-id waitons item-path))) + (let* ((have-resources (runs:can-run-more-tests db test-record)) ;; look at the test jobgroup and tot jobs running + (prereqs-not-met (db:get-prereqs-not-met db run-id waitons item-path)) + (fails (calc-fails prereqs-not-met))) + (debug:print 8 "INFO: have-resources: " have-resources " prereqs-not-met: " prereqs-not-met " fails: " fails) ;; Don't know at this time if the test have been launched at some time in the past ;; i.e. is this a re-launch? - (if (and have-resources - (null? prereqs-not-met)) - ;; no loop - drop though and use the loop at the bottom - (if (patt-list-match item-path item-patts) - (run:test db run-id runname keyvallst test-record flags #f) - (debug:print 1 "INFO: Skipping " (tests:testqueue-get-testname test-record) " " item-path " as it doesn't match " item-patts)) - ;; else the run is stuck, temporarily or permanently - (let ((newtal (append tal (list hed)))) - ;; couldn't run, take a breather - (thread-sleep! 0.1) ;; long sleep here - no resources, may as well be patient - (loop (car newtal)(cdr newtal)))))) + (cond + ((and have-resources + (null? prereqs-not-met)) + ;; no loop here, just drop though and use the loop at the bottom + (if (patt-list-match item-path item-patts) + (run:test db run-id runname keyvallst test-record flags #f) + (debug:print 1 "INFO: Skipping " (tests:testqueue-get-testname test-record) " " item-path " as it doesn't match " item-patts)) + ;; else the run is stuck, temporarily or permanently + ;; but should check if it is due to lack of resources vs. prerequisites + ) + ((not have-resources) ;; simply try again after waiting a second + (thread-sleep! 1.0) + (debug:print 1 "INFO: no resources to run new tests, waiting ...") + ;; could have done hed tal here but doing car/cdr of newtal to rotate tests + (loop (car newtal)(cdr newtal))) + (else ;; must be we have unmet prerequisites + (debug:print 4 "FAILS: " fails) + ;; If one or more of the prereqs-not-met are FAIL then we can issue + ;; a message and drop hed from the items to be processed. + (if (null? fails) + (begin + ;; couldn't run, take a breather + (debug:print 4 "INFO: Shouldn't really get here, race condition? Unable to launch more tests at this moment, killing time ...") + (thread-sleep! 0.1) ;; long sleep here - no resources, may as well be patient + ;; we made new tal by sticking hed at the back of the list + (loop (car newtal)(cdr newtal))) + ;; the waiton is FAIL so no point in trying to run hed ever again + (if (not (null? tal)) + (begin + (debug:print 1 "WARN: Dropping test " (db:test-get-test-name hed) "/" (db:test-get-item-path hed) + " from the launch list as it has prerequistes that are FAIL") + (loop (car tal)(cdr tal))))))))) ;; case where an items came in as a list been processed ((and (list? items) ;; thus we know our items are already calculated (not itemdat)) ;; and not yet expanded into the list of things to be done (if (and (>= *verbosity* 1) @@ -334,12 +364,18 @@ (loop (car tal)(cdr tal)))) ;; if items is a proc then need to run items:get-items-from-config, get the list and loop ;; - but only do that if resources exist to kick off the job ((or (procedure? items)(eq? items 'have-procedure)) - (if (and (runs:can-run-more-tests db test-record) - (null? (db:get-prereqs-not-met db run-id waitons item-path))) + (let* ((can-run-more (runs:can-run-more-tests db test-record)) + (prereqs-not-met (db:get-prereqs-not-met db run-id waitons item-path)) + (fails (calc-fails prereqs-not-met))) + (debug:print 8 "INFO: can-run-more: " can-run-more + " prereqs-not-met:\n " (intersperse prereqs-not-met "\n") + " fails:\n " (intersperse fails "\n")) + (cond + ((and can-run-more (null? prereqs-not-met)) (let ((test-name (tests:testqueue-get-testname test-record))) (setenv "MT_TEST_NAME" test-name) ;; (setenv "MT_RUNNAME" runname) (set-megatest-env-vars db run-id) ;; these may be needed by the launching process (let ((items-list (items:get-items-from-config tconfig))) @@ -347,16 +383,22 @@ (begin (tests:testqueue-set-items! test-record items-list) (loop hed tal)) (begin (debug:print 0 "ERROR: The proc from reading the setup did not yield a list - please report this") - (exit 1))))) - (let ((newtal (append tal (list hed)))) - ;; if can't run more tests, lets take a breather - (thread-sleep! 0.1) ;; may as well wait a while for resources to free up - (loop (car newtal)(cdr newtal))))) - + (exit 1)))))) + ((and can-run-more (null? fails)) + (debug:print 4 "INFO: Can't run more right now, killing a little time ...") + (thread-sleep! 3.0) + (loop (car newtal)(cdr newtal))) ;; an issue with prereqs not yet met? + (else + (debug:print 1 "INFO: test " hed " has failed prerequisite(s); " + (string-intersperse (map db:test-get-testname fails) ", ") + ", removing it from to-do list") + (if (not (null? tal)) + (loop (car tal)(cdr tal))))))) + ;; this case should not happen, added to help catch any bugs ((and (list? items) itemdat) (debug:print 0 "ERROR: Should not have a list of items in a test and the itemspath set - please report this") (exit 1)))) @@ -428,11 +470,10 @@ (string->symbol (test:get-state testdat)) 'failed-to-insert)) ((failed-to-insert) (debug:print 0 "ERROR: Failed to insert the record into the db")) ((NOT_STARTED COMPLETED) - (debug:print 6 "Got here, " (test:get-state testdat)) (let ((runflag #f)) (cond ;; -force, run no matter what (force (set! runflag #t)) ;; NOT_STARTED, run no matter what @@ -441,11 +482,11 @@ ((and (or (not rerun) keepgoing) ;; Require to force re-run for COMPLETED or *anything* + PASS,WARN or CHECK (or (member (test:get-status testdat) '("PASS" "WARN" "CHECK")) (member (test:get-state testdat) '("COMPLETED")))) - (debug:print 2 "INFO: running test " test-name "/" item-path " suppressed as it is COMPLETED and " (test:get-state testdat)) + (debug:print 2 "INFO: running test " test-name "/" item-path " suppressed as it is " (test:get-state testdat) " and " (test:get-status testdat)) (set! runflag #f)) ;; -rerun and status is one of the specifed, run it ((and rerun (let* ((rerunlst (string-split rerun ",")) (must-rerun (member (test:get-status testdat) rerunlst)))