Index: db.scm ================================================================== --- db.scm +++ db.scm @@ -1332,11 +1332,16 @@ fail_count INTEGER DEFAULT 0, pass_count INTEGER DEFAULT 0, archived INTEGER DEFAULT 0, -- 0=no, > 1=archive block id where test data can be found last_update INTEGER DEFAULT (strftime('%s','now')), CONSTRAINT testsconstraint UNIQUE (run_id, testname, item_path));") - (sqlite3:execute db "CREATE INDEX IF NOT EXISTS tests_index ON tests (run_id, testname, item_path, uname);") + ;; deprecated -- (sqlite3:execute db "CREATE INDEX IF NOT EXISTS tests_index ON tests (run_id, testname, item_path, uname);") + + (sqlite3:execute db "CREATE INDEX IF NOT EXISTS tests_run_id_index ON tests (run_id);") ;; new + (sqlite3:execute db "CREATE INDEX IF NOT EXISTS tests_testname_index ON tests (testname,item_path);") ;; new + (sqlite3:execute db "CREATE INDEX IF NOT EXISTS tests_state_status_index ON tests (state, status); ") ;; new + (sqlite3:execute db "CREATE TRIGGER IF NOT EXISTS update_tests_trigger AFTER UPDATE ON tests FOR EACH ROW BEGIN UPDATE tests SET last_update=(strftime('%s','now')) WHERE id=old.id; Index: launch.scm ================================================================== --- launch.scm +++ launch.scm @@ -610,13 +610,11 @@ (rmt:test-set-state-status run-id test-id "REMOTEHOSTSTART" "n/a" #f) ) ;; prime it for running ((member (db:test-get-state test-info) '("RUNNING" "REMOTEHOSTSTART")) (if (process:alive-on-host? test-host test-pid) (debug:print-error 0 *default-log-port* "test state is " (db:test-get-state test-info) " and process " test-pid " is still running on host " test-host ", cannot proceed") - ;; (tests:test-force-state-status! run-id test-id "REMOTEHOSTSTART" "n/a") - (rmt:test-set-state-status run-id test-id "REMOTEHOSTSTART" "n/a" #f) - )) + (exit))) ((not (member (db:test-get-state test-info) '("REMOVING" "REMOTEHOSTSTART" "RUNNING" "KILLREQ"))) ;; (tests:test-force-state-status! run-id test-id "REMOTEHOSTSTART" "n/a") (rmt:test-set-state-status run-id test-id "REMOTEHOSTSTART" "n/a" #f) ) (else ;; (member (db:test-get-state test-info) '("REMOVING" "REMOTEHOSTSTART" "RUNNING" "KILLREQ")) Index: megatest-version.scm ================================================================== --- megatest-version.scm +++ megatest-version.scm @@ -18,6 +18,6 @@ ;; Always use two or four digit decimal ;; 1.01, 1.02...1.10,1.11,1.1101 ... 1.99,2.00.. (declare (unit megatest-version)) -(define megatest-version 1.6521) +(define megatest-version 1.6523) Index: megatest.scm ================================================================== --- megatest.scm +++ megatest.scm @@ -112,10 +112,12 @@ Launching and managing runs -run : run all tests or as specified by -testpatt -remove-runs : remove the data for a run, requires -runname and -testpatt Optionally use :state and :status, use -keep-records to remove only the run data. + -kill-runs : kill existing run(s) (all incomplete tests killed) + -kill-rerun : kill an existing run (all incomplete tests killed and run is rerun) -set-state-status X,Y : set state to X and status to Y, requires controls per -remove-runs -rerun FAIL,WARN... : force re-run for tests with specificed status(s) -rerun-clean : set all tests not COMPLETED+PASS,WARN,WAIVED to NOT_STARTED,n/a and then run the specified testpatt with -preclean -rerun-all : set all tests to NOT_STARTED,n/a and run with -preclean @@ -293,10 +295,11 @@ "-set-toplog" "-runstep" "-logpro" "-m" "-rerun" + "-days" "-rename-run" "-to" ;; values and messages ":category" @@ -405,10 +408,12 @@ "-test-paths" ;; get path(s) to a test, ordered by youngest first "-runall" ;; run all tests, respects -testpatt, defaults to % "-run" ;; alias for -runall "-remove-runs" + "-kill-runs" + "-kill-rerun" "-keep-records" ;; use with -remove-runs to remove only the run data "-rebuild-db" "-cleanup-db" "-rollup" "-update-meta" @@ -573,11 +578,11 @@ (process:children #f)) (original-exit exit-code))))) ;; for some switches always print the command to stderr ;; -(if (args:any? "-run" "-runall" "-remove-runs" "-set-state-status") +(if (args:any? "-run" "-runall" "-remove-runs" "-set-state-status" "-kill-runs" "-kill-rerun") (debug:print 0 *default-log-port* (string-intersperse (argv) " "))) ;; some switches imply homehost. Exit here if not on homehost ;; (let ((homehost-required (list "-cleanup-db" "-server"))) @@ -1040,11 +1045,11 @@ (exit 1)) ((not (or (args:get-arg ":runname") (args:get-arg "-runname"))) (debug:print-error 0 *default-log-port* "Missing required parameter for " action ", you must specify the run name pattern with -runname patt") (exit 2)) - ((not (args:get-arg "-testpatt")) + ((not (or (args:get-arg "-testpatt") (eq? action 'kill-runs))) (debug:print-error 0 *default-log-port* "Missing required parameter for " action ", you must specify the test pattern with -testpatt") (exit 3)) (else (if (not (car *configinfo*)) (begin @@ -1061,10 +1066,46 @@ state: (common:args-get-state) status: (common:args-get-status) new-state-status: (args:get-arg "-set-state-status") mode: mode))) (set! *didsomething* #t))))) + +(if (args:get-arg "-kill-runs") + (general-run-call + "-kill-runs" + "kill runs" + (lambda (target runname keys keyvals) + (operate-on 'kill-runs mode: #f) + ))) + +(if (args:get-arg "-kill-rerun") + (let* ((target-patt (args:get-arg "-target")) + (runname-patt (args:get-arg "-runname"))) + (cond ((not target-patt) + (debug:print-error 0 *default-log-port* "Missing target, must specify target for -kill-rerun with -target ") + (exit 1)) + ((not runname-patt) + (debug:print-error 0 *default-log-port* "Missing runname, must specify runname for -kill-rerun with -runname ") + (exit 1)) + ((string-search "[ ,%]" target-patt) + (debug:print-error 0 *default-log-port* "Invalid target ["target-patt"], must specify exact target (no wildcards) for -kill-rerun with -target ") + (exit 1)) + ((string-search "[ ,%]" runname-patt) + (debug:print-error 0 *default-log-port* "Invalid runname ["runname-patt"], must specify exact runname (no wildcards) for -kill-rerun with -runname ") + (exit 1)) + (else + (general-run-call + "-kill-runs" + "kill runs" + (lambda (target runname keys keyvals) + (operate-on 'kill-runs mode: #f) + )) + + (thread-sleep! 15)) + ;; fall thru and let "-run" loop fire + ))) + (if (args:get-arg "-remove-runs") (general-run-call "-remove-runs" "remove runs" @@ -1624,18 +1665,19 @@ ;; run all tests are are Not COMPLETED and PASS or CHECK (if (or (args:get-arg "-runall") (args:get-arg "-run") (args:get-arg "-rerun-clean") (args:get-arg "-rerun-all") - (args:get-arg "-runtests")) + (args:get-arg "-runtests") + (args:get-arg "-kill-rerun")) (let ((need-clean (or (args:get-arg "-rerun-clean") (args:get-arg "-rerun-all")))) (general-run-call "-runall" "run all tests" (lambda (target runname keys keyvals) - (if (args:get-arg "-rerun-clean") ;; first set states/statuses correct + (if (or (args:get-arg "-kill-rerun") (args:get-arg "-rerun-clean")) ;; first set states/statuses correct ;; For rerun-clean do we or do we not support the testpatt? (let ((states (or (configf:lookup *configdat* "validvalues" "cleanrerun-states") "KILLREQ,KILLED,UNKNOWN,INCOMPLETE,STUCK,NOT_STARTED")) (statuses (or (configf:lookup *configdat* "validvalues" "cleanrerun-statuses") "FAIL,INCOMPLETE,ABORT,CHECK,DEAD,PREQ_FAIL,PREQ_DISCARDED"))) Index: mtut.scm ================================================================== --- mtut.scm +++ mtut.scm @@ -140,10 +140,12 @@ Run management: run : initiate or resume a run, already completed and in-progress tests are not affected. rerun-clean : clean and rerun all not completed pass/fail tests rerun-all : clean and rerun entire run + kill-run : kill all tests in run + kill-rerun : kill all tests in run and restart non-completed tests remove : remove runs set-ss : set state/status archive : compress and move test data to archive disk kill : stop tests or entire runs db : database utilities @@ -247,10 +249,12 @@ ;; alist to map actions to old megatest commands (define *action-keys* '((run . "-run") (rerun-clean . "-rerun-clean") (rerun-all . "-rerun-all") + (kill-run . "-kill-runs") + (kill-rerun . "-kill-rerun") (sync . "") (archive . "-archive") (set-ss . "-set-state-status") (remove . "-remove-runs"))) @@ -1445,11 +1449,11 @@ (set! *default-log-port* oup) ))) (if *action* (case (string->symbol *action*) - ((run remove rerun rerun-clean rerun-all set-ss archive kill list) + ((run remove rerun rerun-clean rerun-all set-ss archive kill list kill-run kill-rerun) (let* ((mtconfdat (simple-setup (args:get-arg "-start-dir"))) (mtconf (car mtconfdat)) (area (args:get-arg "-area")) ;; look up the area to dispatch to from [areas] section (areasec (if area (configf:lookup mtconf "areas" area) #f)) (areadat (if areasec (common:val->alist areasec) #f)) Index: runs.scm ================================================================== --- runs.scm +++ runs.scm @@ -2044,11 +2044,11 @@ (state-status (if (string? new-state-status) (string-split new-state-status ",") '(#f #f))) (rp-mutex (make-mutex)) (bup-mutex (make-mutex)) (keep-records (args:get-arg "-keep-records"))) ;; used in conjunction with -remove-runs to keep the records, TODO: consolidate this with "mode". - (let* ((write-access-actions '(remove-runs set-state-status archive run-wait)) + (let* ((write-access-actions '(remove-runs set-state-status archive run-wait kill-runs)) (dbfile (conc *toppath* "/megatest.db")) (readonly-mode (not (file-write-access? dbfile)))) (when (and readonly-mode (member action write-access-actions)) (debug:print-error 0 *default-log-port* "megatest.db is readonly. Cannot proceed with action ["action"] in which write-access isrequired .") @@ -2081,16 +2081,19 @@ (worker-thread #f)) (debug:print-info 4 *default-log-port* "runs:operate-on run=" run ", header=" header) (if (not (null? tests)) (begin (case action + ((kill-runs) + (tasks:kill-runner target run-name "%") + (debug:print 1 *default-log-port* "Killing tests for run: " runkey " " (db:get-value-by-header run header "runname")) + ) ((remove-runs) ;; (if (tasks:need-server run-id)(tasks:start-and-wait-for-server tdbdat run-id 10)) ;; seek and kill in flight -runtests with % as testpatt here ;; (if (equal? testpatt "%") (tasks:kill-runner target run-name testpatt) - ;; (debug:print 0 *default-log-port* "not attempting to kill any run launcher processes as testpatt is " testpatt)) (debug:print 1 *default-log-port* "Removing tests for run: " runkey " " (db:get-value-by-header run header "runname"))) ((set-state-status) ;; (if (tasks:need-server run-id)(tasks:start-and-wait-for-server tdbdat run-id 10)) (debug:print 1 *default-log-port* "Modifying state and staus for tests for run: " runkey " " (db:get-value-by-header run header "runname"))) @@ -2194,11 +2197,11 @@ (if (< (- now last-visit) 1.0) (thread-sleep! 1.0)) (hash-table-set! backgrounded-remove-last-visit test-fulln (current-seconds)) ;; send to back of line, loop (let ((newtal (append tal (list test)))) - (loop (car newtal)(cdr newtal))) + (loop (car newtal)(cdr newtal))) ) ((done) ;; drop this one; if remaining, loop, else finish (hash-table-set! backgrounded-remove-last-visit test-fulln (current-seconds)) (let ((subrun-remove-succeeded (hash-table-ref/default backgrounded-remove-result test-fulln 'exception))) @@ -2251,10 +2254,32 @@ (begin (runs:remove-test-directory new-test-dat mode) ;; 'remove-all) (if (not (null? tal)) (loop (car tal)(cdr tal))))))) (rmt:update-run-stats run-id (rmt:get-raw-run-stats run-id))) + ((kill-runs) + ;; RUNNING -> KILLREQ + ;; LAUNCHED,RUNNING,REMOTEHOSTSTART -> NOT STARTED + (cond + ((and has-subrun (member test-state (list "RUNNING" "LAUNCHED" "REMOTEHOSTSTART" "KILLREQ"))) + (common:send-thunk-to-background-thread + (lambda () + (let* ((subrun-remove-succeeded + (subrun:kill-subrun run-dir keep-records))) + #t))) + (if (not (null? tal)) + (loop (car tal)(cdr tal))) + ) + ((member test-state (list "RUNNING" "LAUNCHED" "REMOTEHOSTSTART" "KILLREQ")) + (debug:print 1 *default-log-port* "INFO: issuing killreq to test "test-fulln) + (mt:test-set-state-status-by-id run-id (db:test-get-id test) "KILLREQ" "n/a" #f) + (if (not (null? tal)) + (loop (car tal)(cdr tal)))) + (else + (if (not (null? tal)) + (loop (car tal)(cdr tal))) + ))) ((set-state-status) (let* ((new-state (car state-status)) (new-status (cadr state-status)) (test-id (db:test-get-id test)) (test-run-dir (db:test-get-rundir new-test-dat)) Index: subrun.scm ================================================================== --- subrun.scm +++ subrun.scm @@ -118,10 +118,19 @@ (begin (subrun:set-subrun-removed test-run-dir) #t) #f)) #t)) + +(define (subrun:kill-subrun test-run-dir ) + (if (and (not (subrun:subrun-removed? test-run-dir)) (subrun:subrun-test-initialized? test-run-dir)) + (let* ((action-switches-str + (conc "-kill-runs" )) + (kill-result + (subrun:exec-sub-megatest test-run-dir action-switches-str "kill"))) + kill-result) + #t)) (define (subrun:launch-cmd test-run-dir) (if (subrun:subrun-removed? test-run-dir) (subrun:unset-subrun-removed test-run-dir))