Index: db.scm ================================================================== --- db.scm +++ db.scm @@ -1581,11 +1581,11 @@ ;; select end_time-now from ;; (select testname,item_path,event_time+run_duration as ;; end_time,strftime('%s','now') as now from tests where state in -;; ('RUNNING','REMOTEHOSTSTART','LAUNCED')); +;; ('RUNNING','REMOTEHOSTSTART','LAUNCHED')); (define (db:find-and-mark-incomplete dbstruct run-id ovr-deadtime) (let* ((incompleted '()) (oldlaunched '()) (toplevels '()) Index: launch.scm ================================================================== --- launch.scm +++ launch.scm @@ -1297,10 +1297,11 @@ ;; - could be ssh to host from hosts table (update regularly with load) ;; - could be netbatch ;; (launch-test db (cadr status) test-conf)) (define (launch-test test-id run-id run-info keyvals runname test-conf test-name test-path itemdat params) (mutex-lock! *launch-setup-mutex*) ;; setting variables and processing the testconfig is NOT thread-safe, reuse the launch-setup mutex + (BB> "entered launch-test") (let* ( ;; (lock-key (conc "test-" test-id)) ;; (got-lock (let loop ((lock (rmt:no-sync-get-lock lock-key)) ;; (expire-time (+ (current-seconds) 15))) ;; give up on getting the lock and steal it after 15 seconds ;; (if (car lock) ;; #t @@ -1331,10 +1332,11 @@ (list "MT_RUNNAME" runname) (list "MT_ITEMPATH" item-path) (list "MT_CONTOUR" contour) ) itemdat)) + (BB> "set env vars") (let* ((tregistry (tests:get-all)) ;; third param (below) is system-allowed ;; for tconfig, why do we allow fallback to test-conf? (tconfig (or (tests:get-testconfig test-name item-path tregistry #t force-create: #t) (begin (debug:print 0 *default-log-port* "WARNING: falling back to pre-calculated testconfig. This is likely not desired.") @@ -1376,10 +1378,11 @@ (mt-bindir-path #f) (testinfo (rmt:get-test-info-by-id run-id test-id)) (mt_target (string-intersperse (map cadr keyvals) "/")) (debug-param (append (if (args:get-arg "-debug") (list "-debug" (args:get-arg "-debug")) '()) (if (args:get-arg "-logging")(list "-logging") '())))) + (BB> "entered let 1") ;; (if hosts (set! hosts (string-split hosts))) ;; set the megatest to be called on the remote host (if (not remote-megatest)(set! remote-megatest local-megatest)) ;; "megatest")) (set! mt-bindir-path (pathname-directory remote-megatest)) (if launcher (set! launcher (string-split launcher))) @@ -1387,27 +1390,30 @@ (if (and (args:get-arg "-preclean") ;; user has requested to preclean for this run (not (member (db:test-get-rundir testinfo)(list "n/a" "/tmp/badname")))) ;; n/a is a placeholder and thus not a read dir (begin (debug:print-info 0 *default-log-port* "attempting to preclean directory " (db:test-get-rundir testinfo) " for test " test-name "/" item-path) (runs:remove-test-directory testinfo 'remove-data-only))) ;; remove data only, do not perturb the record - + + (BB> "after launcher set") ;; prevent overlapping actions - set to LAUNCHED as early as possible ;; ;; the following call handles waiver propogation. cannot yet condense into roll-up-pass-fail (tests:test-set-status! run-id test-id "LAUNCHED" "n/a" #f #f) ;; (if launch-results launch-results "FAILED")) (rmt:set-state-status-and-roll-up-items run-id test-name item-path #f "LAUNCHED" #f) ;; (pp (hash-table->alist tconfig)) (set! diskpath (get-best-disk *configdat* tconfig)) + (BB> "after launch state set") (if diskpath (let ((dat (create-work-area run-id run-info keyvals test-id test-path diskpath test-name itemdat))) (set! work-area (car dat)) (set! toptest-work-area (cadr dat)) (debug:print-info 2 *default-log-port* "Using work area " work-area)) (begin (set! work-area (conc test-path "/tmp_run")) (create-directory work-area #t) (debug:print 0 *default-log-port* "WARNING: No disk work area specified - running in the test directory under tmp_run"))) + (BB> "after disk path set") (set! cmdparms (base64:base64-encode (z3:encode-buffer (with-output-to-string (lambda () ;; (list 'hosts hosts) (write (list (list 'testpath test-path) @@ -1436,11 +1442,11 @@ (list 'runtlim (if run-time-limit (common:hms-string->seconds run-time-limit) #f)) (list 'env-ovrd (hash-table-ref/default *configdat* "env-override" '())) (list 'set-vars (if params (hash-table-ref/default params "-setvars" #f))) (list 'runname runname) (list 'mt-bindir-path mt-bindir-path)))))))) - + (BB> "after cmdparams set") ;; clean out step records from previous run if they exist ;; (rmt:delete-test-step-records run-id test-id) ;; if the dir does not exist we may have a itempath where individual variables are a path, launch anyway (if (common:file-exists? work-area) (change-directory work-area)) ;; so that log files from the launch process don't clutter the test dir @@ -1458,11 +1464,12 @@ (if (args:get-arg "-xterm")(set! fullcmd (append fullcmd (list "-xterm")))) (debug:print 1 *default-log-port* "Launching " work-area) ;; set pre-launch-env-vars before launching, keep the vars in prevvals and put the envionment back when done (debug:print 4 *default-log-port* "fullcmd: " fullcmd) (set! *last-launch* (current-seconds)) ;; all that junk above takes time, set this as late as possible. - (let* ((commonprevvals (alist->env-vars + (BB> "after set *last-launch*") + (let* ((commonprevvals (alist->env-vars ;; observed this let can be very slow. (>5 sec) (hash-table-ref/default *configdat* "env-override" '()))) (miscprevvals (alist->env-vars ;; consolidate this code with the code in megatest.scm for "-execute" (append (list (list "MT_TEST_RUN_DIR" work-area) (list "MT_TEST_NAME" test-name) (list "MT_ITEM_INFO" (conc itemdat)) @@ -1485,10 +1492,11 @@ (conc cmdstr " >> mt_launch.log 2>&1 &"))) (car fullcmd)) (if useshell '() (cdr fullcmd))))) + (BB> "let depth 2 entered") (mutex-unlock! *launch-setup-mutex*) ;; yes, really should mutex all the way to here. Need to put this entire process into a fork. ;; (rmt:no-sync-del! lock-key) ;; release the lock for starting this test (if (not launchwait) ;; give the OS a little time to allow the process to start (thread-sleep! 0.01)) (with-output-to-file "mt_launch.log" Index: runs.scm ================================================================== --- runs.scm +++ runs.scm @@ -204,11 +204,11 @@ jobg-count)))) (if (> (+ num-running num-running-in-jobgroup) 0) (runs:inc-can-run-more-tests-count runsdat)) ;; (set! *runs:can-run-more-tests-count* (+ *runs:can-run-more-tests-count* 1))) (if (not (eq? *last-num-running-tests* num-running)) (begin - (debug:print 2 *default-log-port* "max-concurrent-jobs: " max-concurrent-jobs ", num-running: " num-running) + (debug:print 1 *default-log-port* "max-concurrent-jobs: " max-concurrent-jobs ", num-running: " num-running) (set! *last-num-running-tests* num-running))) (if (not (eq? 0 *globalexitstatus*)) (list #f num-running num-running-in-jobgroup max-concurrent-jobs job-group-limit) (let ((can-not-run-more (cond ;; if max-concurrent-jobs is set and the number running is greater @@ -713,10 +713,11 @@ (completed-prereq-items (let ((foo (begin (BB> "hello prereqs: "prereqs) #t)) (res (filter (lambda (test) (BB> "foo - "test) (and (vector? test) + (equal? "COMPLETED" (db:test-get-state test)) (equal? "COMPLETED" (db:test-get-state test)) (not (equal? "" (db:test-get-item-path test))))) prereqs))) res)) @@ -767,18 +768,19 @@ (set! runs:nothing-left-in-queue-count (+ runs:nothing-left-in-queue-count 1))) #f))) ;;; desired result of below cond branch: ;; we want to expand items in our test of interest (hed) in the following cases: - ;; case 1 - mode is itemmatch or itemwait: (TODO) + ;; case 1 - mode is itemmatch or itemwait: (DONE) ;; - all prereq tests have been expanded ;; - at least one prereq's items have completed - ;; case 2 - mode is toplevel (DONE) + ;; case 2 - mode is toplevel (PARTIAL) ;; - prereqs are completed. + ;; - or no prereqs can complete (TODO) ;; case 3 - mode not specified (DONE) - ;; - prereqs are completed and passed (we could consider removing "and passed" -- it would change behavior from current) - + ;; - prereqs are completed and passed (we could consider removing "and passed" -- it would change behavior from current) + ;; runs:expand-items case: toplevel or else no dangling prerequeistes -- expand items now. ((or (and have-itemized (null? unexpanded-prereqs) (not (null? completed-prereq-items))) (null? prereqs) ;; nothing is in our way to proceed (need to expand this to an item level check.) (and (member 'toplevel testmode) ;; for toplevel test - proceed (nothing in our way) @@ -1425,11 +1427,11 @@ (runs:queue-next-reg tal reg reglen regfull) reruns)))) ;; (loop (car tal)(cdr tal) reg reruns)))) (runs:incremental-print-results run-id) - (debug:print 4 *default-log-port* "TOP OF LOOP => " + (debug:print 1 *default-log-port* "TOP OF LOOP => " "test-name: " test-name "\n test-record " test-record "\n hed: " hed "\n itemdat: " itemdat "\n items: " items @@ -1466,10 +1468,17 @@ #f)) waitons))))) ;; could do this more elegantly with a marker.... (debug:print 0 *default-log-port* "WARNING: Marking test " tfullname " as not runnable. It is waiting on tests that cannot be run. Giving up now.") (hash-table-set! test-registry tfullname 'removed)) + + ; BB - a possibility for preqfail propagation + ;; ((and (not items) (string? item-path) (not (equal? item-path "")) + ;; (lset-intersection testmode '(itemmatch itemwait))) + + ;; ) + ;; items is #f then the test is ok to be handed off to launch (but not before) ;; ((not items) (debug:print-info 4 *default-log-port* "OUTER COND: (not items)") (if (and (not (tests:match test-patts (tests:testqueue-get-testname test-record) item-path required: required-tests)) @@ -1589,31 +1598,34 @@ ;; LET* ((test-record ;; we get here on "drop through". All done! (runs:run-post-hook run-id) (debug:print-info 1 *default-log-port* "All tests launched"))) -(define (runs:calc-fails prereqs-not-met) +(define (runs:calc-fails prereqs-not-met) ;; BB is this redundant with runs:runable-tests ? (filter (lambda (test) - (and (vector? test) ;; not (string? test)) - (member (db:test-get-state test) '("INCOMPLETE" "COMPLETED")) - (not (member (db:test-get-status test) - '("PASS" "WARN" "CHECK" "WAIVED" "SKIP"))))) + (or + (and (vector? test) ;; not (string? test)) + (member (db:test-get-status test) '("TEN_STRIKES" "BLOCKED" "PREQ_FAIL" "ZERO_ITEMS" "PREQ_DISCARDED" "TIMED_OUT" "KILLED"))) + (and (vector? test) ;; not (string? test)) + (member (db:test-get-state test) '("INCOMPLETE" "COMPLETED")) + (not (member (db:test-get-status test) + '("PASS" "WARN" "CHECK" "WAIVED" "SKIP")))))) prereqs-not-met)) (define (runs:calc-prereq-fail prereqs-not-met) (filter (lambda (test) (and (vector? test) ;; not (string? test)) (equal? (db:test-get-state test) "NOT_STARTED") - (not (member (db:test-get-status test) + (not (member (db:test-get-status test) '("n/a" "KEEP_TRYING"))))) prereqs-not-met)) (define (runs:calc-not-completed prereqs-not-met) ;; filter out tests which have reached a ground state -- they are done one way or another. (filter (lambda (t) (or (not (vector? t)) - (not (and (equal? (db:test-get-state t) "NOT_STARTED") (equal? (db:test-get-status t) "PREQ_FAIL"))) + (not (member (db:test-get-status t) '("TEN_STRIKES" "BLOCKED" "PREQ_FAIL" "ZERO_ITEMS" "PREQ_DISCARDED" "TIMED_OUT" "KILLED"))) (not (member (db:test-get-state t) '("INCOMPLETE" "COMPLETED"))))) prereqs-not-met)) ;; (define (runs:calc-not-completed prereqs-not-met) ;; (filter @@ -1627,11 +1639,11 @@ (lambda (t) (or (not (vector? t)) (and (equal? "NOT_STARTED" (db:test-get-state t)) (member (db:test-get-status t) '("n/a" "KEEP_TRYING"))) - (and (equal? "RUNNING" (db:test-get-state t))))) ;; account for a test that is running + (and (member (db:test-get-state t) '("RUNNING" "LAUNCHED" "REMOTEHOSTSTART" ))))) ;; account for a test that is running prereqs-not-met)) (define (runs:pretty-string lst) (map (lambda (t) (if (not (vector? t))