Index: common.scm ================================================================== --- common.scm +++ common.scm @@ -1802,10 +1802,13 @@ ;; clear vars matching pattern, run proc, set vars back ;; if proc is a string run that string as a command with ;; system. ;; + + + (define (common:without-vars proc . var-patts) (let ((vars (make-hash-table))) (for-each (lambda (vardat) ;; each env var (for-each @@ -1823,10 +1826,61 @@ (hash-table-for-each vars (lambda (var val) (setenv var val))) vars)) + +(define (common:get-param-mapping #!key (flavor #f)) + "returns alist mapping string keys in testconfig/subrun to megatest command line switches; if flavor is switch-symbol, maps tcmt symbolic switches to megatest switches" + (let ((default '(("tag-expr" . "-tagexpr") + ("mode-patt" . "-modepatt") + ("run-name" . "-runname") + ("contour" . "-contour") + ("mode-patt" . "-mode-patt") + + ("test-patt" . "-testpatt") + ("msg" . "-m") + ("new" . "-set-state-status")))) + (if (eq? flavor 'switch) + (map (lambda (x) + (cons (string->symbol (conc "-" (car x)) (cdr x)))) + default) + default))) + +(define (common:sub-megatest-selector-switches test-run-dir) + (let* ((switch-def-alist (common:get-param-mapping flavor: 'config))) + (subrunfile (conc test-run-dir "/testconfig.subrun" )) + (subrundata (with-input-from-file subrunfile read)) + (subrunconfig (configf:alist->config subrundata))) + ;; note - get precmd from subrun section + ;; apply to submegatest commands + + (apply append + (filter-map (lambda (item) + (let ((config-key (car item)) + (switch (cdr item)) + (val (configf:lookup subrunconfig switch))) + (if val + (list switch val) + #f))) + switch-def-alist)))) + +(define (common:sub-megatest-run test-run-dir switches #!key (logfile #f)) + (let* ((real-logfile (or logfile (conc (test-run-dir) "/subrun-" + (string-substitute "[/*]" "_" (string-intersperse switches "^"))"-" + (number->string (current-seconds)) ".log"))) + (selector-switches (common:sub-megatest-selector-switches test-run-dir)) + (cmd-list `("megatest" ,@selector-switches ,@switches "-log" ,real-logfile)) + ) + (call-with-environment-variables + (list (cons "PATH" (conc (get-environment-variable "PATH") ":."))) + (lambda () + (common:without-vars proc "^MT_.*") + + )))) + + (define (common:run-a-command cmd #!key (with-vars #f)) (let* ((pre-cmd (dtests:get-pre-command)) (post-cmd (dtests:get-post-command)) (fullcmd (if (or pre-cmd post-cmd) Index: launch.scm ================================================================== --- launch.scm +++ launch.scm @@ -321,21 +321,23 @@ ;; 4. fix runname ;; 5. fix testpatt or calculate it from contour ;; 6. launch the run ;; 7. roll up the run result and or roll up the logpro processed result (if (configf:lookup testconfig "subrun" "runwait") ;; we use runwait as the flag that a subrun is requested - (let* ((runarea (let ((ra (configf:lookup testconfig "subrun" "runarea"))) + (configf:write-alist testconfig "testconfig.subrun") ;; BB: created here + (let* ((runarea (let ((ra (configf:lookup testconfig "subrun" "run-area"))) (if ra ;; when runarea is not set we default to *toppath*. However ra ;; we need to force the setting in the testconfig so it will (begin ;; be preserved in the testconfig.subrun file (configf:set-section-var testconfig "subrun" "runarea" *toppath*) *toppath*)))) + ;;; BB: TODO - use common:param (passfail (configf:lookup testconfig "subrun" "passfail")) (target (or (configf:lookup testconfig "subrun" "target") (get-environment-variable "MT_TARGET"))) (runname (or (configf:lookup testconfig "subrun" "runname")(get-environment-variable "MT_RUNNAME"))) (contour (configf:lookup testconfig "subrun" "contour")) - (testpatt (configf:lookup testconfig "subrun" "testpatt")) + (testpatt (configf:lookup testconfig "subrun" "test-patt")) (mode-patt (configf:lookup testconfig "subrun" "mode-patt")) (tag-expr (configf:lookup testconfig "subrun" "tag-expr")) (run-wait (configf:lookup testconfig "subrun" "runwait")) (logpro (configf:lookup testconfig "subrun" "logpro")) (compact-stem (string-substitute "[/*]" "_" (conc target "-" runname "-" (or testpatt mode-patt tag-expr)))) @@ -366,11 +368,11 @@ (set! ezsteps #t) ;; set the needed flag (set! ezstepslst (append (or ezstepslst '()) (list (list "subrun" (conc "{subrun=true} " mt-cmd))))) (configf:set-section-var testconfig "logpro" "subrun" logpro) ;; append the logpro rules to the logpro section as stepname subrun (if runarea (configf:set-section-var testconfig "setup" "submegatest" runarea)) - (configf:write-alist testconfig "testconfig.subrun") + )) ;; process the ezsteps (if ezsteps (begin Index: runs.scm ================================================================== --- runs.scm +++ runs.scm @@ -1150,19 +1150,19 @@ (rmt:set-state-status-and-roll-up-items run-id test-name item-path #f "FAIL" #f) ;; treat as FAIL (list (if (null? tal)(car newtal)(car tal)) tal reg reruns))))) - ;; ELSE: can't drop this - maybe running? Just keep trying + ;; ELSE: can't drop this - maybe running? Just keep trying ;;(if (not (or (not (null? reg))(not (null? tal)))) ;; old experiment (let ((runable-tests (runs:runable-tests prereqs-not-met))) ;; SUSPICIOUS: Should look at more than just prereqs-not-met? (if (null? runable-tests) #f ;; I think we are truly done here (runs:loop-values newtal reg reglen regfull reruns))) ;;) ;;from old experiment - ) ;; end if (or (not (null? reg))(not (null? tal))) + ) ;; end if (or (not (null? reg))(not (null? tal))) )))))) ;; scan a list of tests looking to see if any are potentially runnable ;; (define (runs:runable-tests tests) @@ -1192,11 +1192,11 @@ inc-results-fmt: "~12a~12a~20a~12a~40a\n" ;; state status time duration test-name item-path run-info: #f runname: #f target: #f ) -) + ) (define (runs:incremental-print-results run-id) (let ((curr-sec (current-seconds))) (if (> (- curr-sec (runs:gendat-inc-results-last-update *runs:general-data*)) 5) ;; at least five seconds since last update (let* ((run-dat (or (runs:gendat-run-info *runs:general-data*)(rmt:get-run-info run-id))) @@ -1380,11 +1380,11 @@ ;; (server:ping (remote-server-url *runremote*))) ;; (server:check-if-running *toppath*)))) ;; (server:kind-run *toppath*)) (if (> num-running 0) - (set! last-time-some-running (current-seconds))) + (set! last-time-some-running (current-seconds))) (if (> (current-seconds)(+ last-time-some-running (or (configf:lookup *configdat* "setup" "give-up-waiting") 36000))) (hash-table-set! *max-tries-hash* tfullname (+ (hash-table-ref/default *max-tries-hash* tfullname 0) 1))) ;; (debug:print 0 *default-log-port* "max-tries-hash: " (hash-table->alist *max-tries-hash*)) @@ -1405,11 +1405,11 @@ (if (or (not (null? tal))(not (null? reg))) (loop (runs:queue-next-hed tal reg reglen regfull) (runs:queue-next-tal tal reg reglen regfull) (runs:queue-next-reg tal reg reglen regfull) reruns)))) - ;; (loop (car tal)(cdr tal) reg reruns)))) + ;; (loop (car tal)(cdr tal) reg reruns)))) (runs:incremental-print-results run-id) (debug:print 4 *default-log-port* "TOP OF LOOP => " "test-name: " test-name "\n hed: " hed @@ -1462,11 +1462,11 @@ (not (null? tal))) (loop (car tal)(cdr tal) reg reruns)) (runs:testdat-prereqs-not-met-set! testdat (rmt:get-prereqs-not-met run-id waitons hed item-path mode: testmode itemmaps: itemmaps)) (runs:dat-can-run-more-tests-set! runsdat (runs:can-run-more-tests runsdat run-id jobgroup max-concurrent-jobs)) ;; look at the test jobgroup and tot jobs running (let ((loop-list (runs:process-expanded-tests runsdat testdat))) - (if loop-list (apply loop loop-list)))) + (if loop-list (apply loop loop-list)))) ;; items processed into a list but not came in as a list been processed ;; ((and (list? items) ;; thus we know our items are already calculated (not itemdat)) ;; and not yet expanded into the list of things to be done @@ -1477,17 +1477,17 @@ (> (length items) 0) (and (list? (car items)) (> (length (car items)) 0)) (debug:debug-mode 1)) (debug:print 2 *default-log-port* (map (lambda (row) - (conc (string-intersperse - (map (lambda (varval) - (string-intersperse varval "=")) - row) - " ") - "\n")) - items))) + (conc (string-intersperse + (map (lambda (varval) + (string-intersperse varval "=")) + row) + " ") + "\n")) + items))) (let* ((items-in-testpatt (filter (lambda (my-itemdat) (tests:match test-patts hed (item-list->path my-itemdat) )) @@ -1505,27 +1505,27 @@ newrec)) (my-item-path (item-list->path my-itemdat)) (newtestname (db:test-make-full-name hed my-item-path))) ;; test names are unique on testname/item-path (tests:testqueue-set-items! new-test-record #f) - (tests:testqueue-set-itemdat! new-test-record my-itemdat) - (tests:testqueue-set-item_path! new-test-record my-item-path) - (hash-table-set! test-records newtestname new-test-record) - (set! tal (append tal (list newtestname))))) ;; since these are itemized create new test names testname/itempath + (tests:testqueue-set-itemdat! new-test-record my-itemdat) + (tests:testqueue-set-item_path! new-test-record my-item-path) + (hash-table-set! test-records newtestname new-test-record) + (set! tal (append tal (list newtestname))))) ;; since these are itemized create new test names testname/itempath items-in-testpatt))) - - + + ;; At this point we have possibly added items to tal but all must be handed off to ;; INNER COND logic. I think loop without rotating the queue ;; (loop hed tal reg reruns)) ;; (let ((newtal (append tal (list hed)))) ;; We should discard hed as it has been expanded into it's items? Yes, but only if this *is* an itemized test ;; (loop (car newtal)(cdr newtal) reg reruns) (if (null? tal) #f (loop (car tal)(cdr tal) reg reruns))) - + ;; if items is a proc then need to run items:get-items-from-config, get the list and loop ;; - but only do that if resources exist to kick off the job ;; EXPAND ITEMS ((or (procedure? items)(eq? items 'have-procedure)) (debug:print-info 4 *default-log-port* "cond branch - " "rtq-4") @@ -1538,11 +1538,11 @@ (debug:print-info 4 *default-log-port* " -- Can't expand hed="hed) ) ) ;; if can't run more just loop with next possible test (loop (car newtal)(cdr newtal) reg reruns)))) - + ;; this case should not happen, added to help catch any bugs ((and (list? items) itemdat) (debug:print-info 4 *default-log-port* "cond branch - " "rtq-5") (debug:print-error 0 *default-log-port* "Should not have a list of items in a test and the itemspath set - please report this") (exit 1)) @@ -1684,12 +1684,12 @@ ;; per-test call is not needed. Given the delicacy of the move to ;; v1.55 this code is being left in place for the time being. ;; (if (not (hash-table-ref/default *test-meta-updated* test-name #f)) (begin - (hash-table-set! *test-meta-updated* test-name #t) - (runs:update-test_meta test-name test-conf))) + (hash-table-set! *test-meta-updated* test-name #t) + (runs:update-test_meta test-name test-conf))) ;; itemdat => ((ripeness "overripe") (temperature "cool") (season "summer")) (let* ((new-test-path (string-intersperse (cons test-path (map cadr itemdat)) "/")) (test-id (rmt:get-test-id run-id test-name item-path)) (testdat (if test-id (rmt:get-test-info-by-id run-id test-id) #f))) @@ -1929,11 +1929,11 @@ ((remove-runs) (if remove (system (conc precmd " megatest -remove-runs -target " target " -runname " (simple-run-runname run) " -testpatt %")))) ((archive) (if remove (system (conc precmd " megatest -archive save-remove -target " target " -runname " (simple-run-runname run) " -testpatt %")))))) actions)))) - sorted))) + sorted))) ;; (print "Sorted: " (map simple-run-event_time sorted)) ;; (print "Remove: " (map simple-run-event_time to-remove)))) (hash-table-keys runs-ht)) runs-ht)) @@ -1988,16 +1988,16 @@ (lambda (run) (let ((runkey (string-intersperse (map (lambda (k) (db:get-value-by-header run header k)) keys) "/")) (dirs-to-remove (make-hash-table)) (proc-get-tests (lambda (run-id) - (mt:get-tests-for-run run-id - testpatt states statuses - not-in: #f - sort-by: (case action - ((remove-runs) 'rundir) - (else 'event_time)))))) + (mt:get-tests-for-run run-id + testpatt states statuses + not-in: #f + sort-by: (case action + ((remove-runs) 'rundir) + (else 'event_time)))))) (let* ((run-id (db:get-value-by-header run header "id")) (run-state (db:get-value-by-header run header "state")) (run-name (db:get-value-by-header run header "runname")) (tests (if (not (equal? run-state "locked")) (proc-get-tests run-id) @@ -2072,46 +2072,46 @@ (toplevel-with-children (and (db:test-get-is-toplevel test) (> (rmt:test-toplevel-num-items run-id test-name) 0)))) (case action ((remove-runs) ;; if the test is a toplevel-with-children issue an error and do not remove - (if toplevel-with-children - (begin - (debug:print 0 *default-log-port* "WARNING: skipping removal of " test-fulln " with run-id " run-id " as it has sub tests") - (hash-table-set! toplevel-retries test-fulln (+ (hash-table-ref/default toplevel-retries test-fulln 0) 1)) - (if (> (hash-table-ref toplevel-retries test-fulln) 3) - (if (not (null? tal)) - (loop (car tal)(cdr tal))) ;; no else clause - drop it if no more in queue and > 3 tries - (let ((newtal (append tal (list test)))) - (loop (car newtal)(cdr newtal))))) ;; loop with test still in queue - (begin - (debug:print-info 0 *default-log-port* "test: " test-name " itest-state: " test-state) - (if (member test-state (list "RUNNING" "LAUNCHED" "REMOTEHOSTSTART" "KILLREQ")) - (begin - (if (not (hash-table-ref/default test-retry-time test-fulln #f)) - (begin - ;; want to set to REMOVING BUT CANNOT do it here? - (hash-table-set! test-retry-time test-fulln (current-seconds)))) - (if (> (- (current-seconds)(hash-table-ref test-retry-time test-fulln)) allow-run-time) - ;; This test is not in a correct state for cleaning up. Let's try some graceful shutdown steps first - ;; Set the test to "KILLREQ" and wait five seconds then try again. Repeat up to five times then give - ;; up and blow it away. - (begin - (debug:print 0 *default-log-port* "WARNING: could not gracefully remove test " test-fulln ", tried to kill it to no avail. Forcing state to FAILEDKILL and continuing") - (mt:test-set-state-status-by-id run-id (db:test-get-id test) "FAILEDKILL" "n/a" #f) - (thread-sleep! 1)) - (begin - (mt:test-set-state-status-by-id run-id (db:test-get-id test) "KILLREQ" "n/a" #f) - (thread-sleep! 1))) - ;; NOTE: This is suboptimal as the testdata will be used later and the state/status may have changed ... - (if (null? tal) - (loop new-test-dat tal) - (loop (car tal)(append tal (list new-test-dat))))) - (begin - (runs:remove-test-directory new-test-dat mode) ;; 'remove-all) - (if (not (null? tal)) - (loop (car tal)(cdr tal))))))) + (cond + (toplevel-with-children + (debug:print 0 *default-log-port* "WARNING: skipping removal of " test-fulln " with run-id " run-id " as it has sub tests") + (hash-table-set! toplevel-retries test-fulln (+ (hash-table-ref/default toplevel-retries test-fulln 0) 1)) + (if (> (hash-table-ref toplevel-retries test-fulln) 3) + (if (not (null? tal)) + (loop (car tal)(cdr tal))) ;; no else clause - drop it if no more in queue and > 3 tries + (let ((newtal (append tal (list test)))) + (loop (car newtal)(cdr newtal))))) ;; loop with test still in queue + (else + (debug:print-info 0 *default-log-port* "test: " test-name " itest-state: " test-state) + (if (member test-state (list "RUNNING" "LAUNCHED" "REMOTEHOSTSTART" "KILLREQ")) + (begin + (if (not (hash-table-ref/default test-retry-time test-fulln #f)) + (begin + ;; want to set to REMOVING BUT CANNOT do it here? + (hash-table-set! test-retry-time test-fulln (current-seconds)))) + (if (> (- (current-seconds)(hash-table-ref test-retry-time test-fulln)) allow-run-time) + ;; This test is not in a correct state for cleaning up. Let's try some graceful shutdown steps first + ;; Set the test to "KILLREQ" and wait five seconds then try again. Repeat up to five times then give + ;; up and blow it away. + (begin + (debug:print 0 *default-log-port* "WARNING: could not gracefully remove test " test-fulln ", tried to kill it to no avail. Forcing state to FAILEDKILL and continuing") + (mt:test-set-state-status-by-id run-id (db:test-get-id test) "FAILEDKILL" "n/a" #f) + (thread-sleep! 1)) + (begin + (mt:test-set-state-status-by-id run-id (db:test-get-id test) "KILLREQ" "n/a" #f) + (thread-sleep! 1))) + ;; NOTE: This is suboptimal as the testdata will be used later and the state/status may have changed ... + (if (null? tal) + (loop new-test-dat tal) + (loop (car tal)(append tal (list new-test-dat))))) + (begin + (runs:remove-test-directory new-test-dat mode) ;; 'remove-all) + (if (not (null? tal)) + (loop (car tal)(cdr tal))))))) (rmt:update-run-stats run-id (rmt:get-raw-run-stats run-id))) ((set-state-status) (debug:print-info 2 *default-log-port* "new state " (car state-status) ", new status " (cadr state-status)) (mt:test-set-state-status-by-id run-id (db:test-get-id test) (car state-status)(cadr state-status) #f) (if (not (null? tal))