Index: commonmod.scm ================================================================== --- commonmod.scm +++ commonmod.scm @@ -278,11 +278,22 @@ *test-meta-updated* tests:testqueue-set-item_path! tests:testqueue-set-itemdat! make-tests:testqueue -) + megatest-fossil-hash + + common:steps-can-proceed-given-status-sym + status-sym->string + common:worse-status-sym + common:logpro-exit-code->status-sym + + save-environment-as-files + assoc/default + common:read-encoded-string + + ) (import scheme) (cond-expand (chicken-4 Index: configfmod.scm ================================================================== --- configfmod.scm +++ configfmod.scm @@ -41,10 +41,12 @@ configf:alist->config configf:set-section-var find-and-read-config common:args-get-target + configf:eval-string-in-environment + ) (import scheme chicken extras Index: dbmod.scm ================================================================== --- dbmod.scm +++ dbmod.scm @@ -167,10 +167,11 @@ db:get-changed-run-ids db:set-sync db:setup + db:logpro-dat->csv ) (import scheme) (cond-expand Index: launchmod.scm ================================================================== --- launchmod.scm +++ launchmod.scm @@ -45,11 +45,10 @@ (use srfi-69) (module launchmod ( - rmt:find-and-mark-incomplete ) (import scheme) (cond-expand (chicken-4 @@ -981,96 +980,10 @@ ;; periodically update the db with runtime ;; when the process exits look at the db, if still RUNNING after 10 seconds set ;; state/status appropriately (process-wait pid))) -;;====================================================================== -;; Maintenance -;;====================================================================== - -(define (rmt:find-and-mark-incomplete run-id ovr-deadtime) - (let* ((cfg-deadtime (configf:lookup-number *configdat* "setup" "deadtime")) - (test-stats-update-period (configf:lookup-number *configdat* "setup" "test-stats-update-period"))) - (rmt:find-and-mark-incomplete-engine run-id ovr-deadtime cfg-deadtime test-stats-update-period) - ;;call end of eud of run detection for posthook - (launch:end-of-run-check run-id))) - -;; select end_time-now from -;; (select testname,item_path,event_time+run_duration as -;; end_time,strftime('%s','now') as now from tests where state in -;; ('RUNNING','REMOTEHOSTSTART','LAUNCHED')); -;; -;; NOT EASY TO MIGRATE TO db{file,mod} -;; -(define (rmt:find-and-mark-incomplete-engine run-id ovr-deadtime cfg-deadtime test-stats-update-period) - (let* ((incompleted '()) - (oldlaunched '()) - (toplevels '()) - ;; The default running-deadtime is 720 seconds = 12 minutes. - ;; "(running-deadtime-default (+ server-start-allowance (* 2 launch-monitor-period)))" = 200 + (2 * (200 + 30 + 30)) - (deadtime-trim (or ovr-deadtime cfg-deadtime)) - (server-start-allowance 200) - (server-overloaded-budget 200) - (launch-monitor-off-time (or test-stats-update-period 30)) - (launch-monitor-on-time-budget 30) - (launch-monitor-period (+ launch-monitor-off-time launch-monitor-on-time-budget server-overloaded-budget)) - (remotehoststart-deadtime-default (+ server-start-allowance server-overloaded-budget 30)) - (remotehoststart-deadtime (or deadtime-trim remotehoststart-deadtime-default)) - (running-deadtime-default (+ server-start-allowance (* 2 launch-monitor-period))) - (running-deadtime (or deadtime-trim running-deadtime-default))) ;; two minutes (30 seconds between updates, this leaves 3x grace period) - - (debug:print-info 4 *default-log-port* "running-deadtime = " running-deadtime) - (debug:print-info 4 *default-log-port* "deadtime-trim = " deadtime-trim) - - (let* ((dat (rmt:get-toplevels-and-incompletes run-id running-deadtime remotehoststart-deadtime))) - (set! oldlaunched (list-ref dat 1)) - (set! toplevels (list-ref dat 2)) - (set! incompleted (list-ref dat 0))) - - (debug:print-info 18 *default-log-port* "Found " (length oldlaunched) " old LAUNCHED items, " - (length toplevels) " old LAUNCHED toplevel tests and " - (length incompleted) " tests marked RUNNING but apparently dead.") - - ;; These are defunct tests, do not do all the overhead of set-state-status. Force them to INCOMPLETE. - ;; - ;; (db:delay-if-busy dbdat) - (let* ((min-incompleted-ids (map car incompleted)) ;; do 'em all - (all-ids (append min-incompleted-ids (map car oldlaunched)))) - (if (> (length all-ids) 0) - (begin - ;; (launch:is-test-alive "localhost" 435) - (debug:print 0 *default-log-port* "WARNING: Marking test(s); " (string-intersperse (map conc all-ids) ", ") - " as DEAD") - (for-each - (lambda (test-id) - (let* ((tinfo (rmt:get-test-info-by-id run-id test-id)) - (run-dir (db:test-get-rundir tinfo)) - (host (db:test-get-host tinfo)) - (pid (db:test-get-process_id tinfo)) - (result (rmt:get-status-from-final-status-file run-dir))) - (if (and (list? result) (> (length result) 1) (equal? "PASS" (cadr result)) (equal? "COMPLETED" (car result))) - (begin - (debug:print 0 *default-log-port* "INFO: test " test-id " actually passed, so marking PASS not DEAD") - (rmt:set-state-status-and-roll-up-items - run-id test-id 'foo "COMPLETED" "PASS" - "Test stopped responding but it has PASSED; marking it PASS in the DB.")) - (let ((is-alive (and (not (eq? pid 0)) ;; 0 is default in re-used field "attemptnum" where pid stored. - (commonmod:is-test-alive host pid)))) - (if is-alive - (debug:print 0 *default-log-port* "INFO: test " test-id " on host " host - " has a process on pid " pid ", NOT setting to DEAD.") - (begin - (debug:print 0 *default-log-port* "INFO: test " test-id - " final state/status is not COMPLETED/PASS. It is " result) - (rmt:set-state-status-and-roll-up-items - run-id test-id 'foo "COMPLETED" "DEAD" - "Test stopped responding while in RUNNING or REMOTEHOSTSTART; presumed dead."))))))) - ;; call end of eud of run detection for posthook - from merge, is it needed? - ;; (launch:end-of-run-check run-id) - all-ids) - ))))) - ;; TODO: deprecate me in favor of ezsteps.scm ;; (define (ezsteps:runstep ezstep run-id test-id exit-info m tal testconfig all-steps-dat) (let* ((stepname (car ezstep)) ;; do stuff to run the step Index: processmod.scm ================================================================== --- processmod.scm +++ processmod.scm @@ -29,10 +29,12 @@ process:cmd-run->list process:alive? run-n-wait process:cmd-run-with-stderr-and-exitcode->list + process:alive-on-host? + process:get-sub-pids ) (import scheme) (cond-expand (chicken-4 Index: rmtmod.scm ================================================================== --- rmtmod.scm +++ rmtmod.scm @@ -127,10 +127,22 @@ rmt:get-count-tests-running-in-jobgroup rmt:get-count-tests-running-for-run-id rmt:test-set-state-status-by-id mt:test-set-state-status-by-id + + rmt:get-status-from-final-status-file + rmt:get-toplevels-and-incompletes + + rmt:test-set-log! + rmt:teststep-set-status! + + rmt:delete-steps-for-test! + rmt:test-set-state-status + rmt:get-test-state-status-by-id + rmt:test-set-top-process-pid + ) (import scheme chicken Index: runsmod.scm ================================================================== --- runsmod.scm +++ runsmod.scm @@ -45,10 +45,18 @@ (use srfi-69) (module runsmod ( + rmt:find-and-mark-incomplete + launch:setup + launch:end-of-run-check + launch:test-copy + + set-item-env-vars + runs:set-megatest-env-vars + full-runconfigs-read ) (import scheme) (cond-expand (chicken-4 @@ -4541,7 +4549,95 @@ (debug:print-info 0 *default-log-port* "remove testdat") (runs:remove-test-directory test-dat 'archive-remove))))) (hash-table-ref test-groups test-base))))) (hash-table-keys disk-groups)) #t)) + +;;====================================================================== +;; Maintenance +;;====================================================================== + +(define (rmt:find-and-mark-incomplete run-id ovr-deadtime) + (let* ((cfg-deadtime (configf:lookup-number *configdat* "setup" "deadtime")) + (test-stats-update-period (configf:lookup-number *configdat* "setup" "test-stats-update-period"))) + (rmt:find-and-mark-incomplete-engine run-id ovr-deadtime cfg-deadtime test-stats-update-period) + ;;call end of eud of run detection for posthook + (launch:end-of-run-check run-id))) + +;; select end_time-now from +;; (select testname,item_path,event_time+run_duration as +;; end_time,strftime('%s','now') as now from tests where state in +;; ('RUNNING','REMOTEHOSTSTART','LAUNCHED')); +;; +;; NOT EASY TO MIGRATE TO db{file,mod} +;; +(define (rmt:find-and-mark-incomplete-engine run-id ovr-deadtime cfg-deadtime test-stats-update-period) + (let* ((incompleted '()) + (oldlaunched '()) + (toplevels '()) + ;; The default running-deadtime is 720 seconds = 12 minutes. + ;; "(running-deadtime-default (+ server-start-allowance (* 2 launch-monitor-period)))" = 200 + (2 * (200 + 30 + 30)) + (deadtime-trim (or ovr-deadtime cfg-deadtime)) + (server-start-allowance 200) + (server-overloaded-budget 200) + (launch-monitor-off-time (or test-stats-update-period 30)) + (launch-monitor-on-time-budget 30) + (launch-monitor-period (+ launch-monitor-off-time launch-monitor-on-time-budget server-overloaded-budget)) + (remotehoststart-deadtime-default (+ server-start-allowance server-overloaded-budget 30)) + (remotehoststart-deadtime (or deadtime-trim remotehoststart-deadtime-default)) + (running-deadtime-default (+ server-start-allowance (* 2 launch-monitor-period))) + (running-deadtime (or deadtime-trim running-deadtime-default))) ;; two minutes (30 seconds between updates, this leaves 3x grace period) + + (debug:print-info 4 *default-log-port* "running-deadtime = " running-deadtime) + (debug:print-info 4 *default-log-port* "deadtime-trim = " deadtime-trim) + + (let* ((dat (rmt:get-toplevels-and-incompletes run-id running-deadtime remotehoststart-deadtime))) + (set! oldlaunched (list-ref dat 1)) + (set! toplevels (list-ref dat 2)) + (set! incompleted (list-ref dat 0))) + + (debug:print-info 18 *default-log-port* "Found " (length oldlaunched) " old LAUNCHED items, " + (length toplevels) " old LAUNCHED toplevel tests and " + (length incompleted) " tests marked RUNNING but apparently dead.") + + ;; These are defunct tests, do not do all the overhead of set-state-status. Force them to INCOMPLETE. + ;; + ;; (db:delay-if-busy dbdat) + (let* ((min-incompleted-ids (map car incompleted)) ;; do 'em all + (all-ids (append min-incompleted-ids (map car oldlaunched)))) + (if (> (length all-ids) 0) + (begin + ;; (launch:is-test-alive "localhost" 435) + (debug:print 0 *default-log-port* "WARNING: Marking test(s); " (string-intersperse (map conc all-ids) ", ") + " as DEAD") + (for-each + (lambda (test-id) + (let* ((tinfo (rmt:get-test-info-by-id run-id test-id)) + (run-dir (db:test-get-rundir tinfo)) + (host (db:test-get-host tinfo)) + (pid (db:test-get-process_id tinfo)) + (result (rmt:get-status-from-final-status-file run-dir))) + (if (and (list? result) (> (length result) 1) (equal? "PASS" (cadr result)) (equal? "COMPLETED" (car result))) + (begin + (debug:print 0 *default-log-port* "INFO: test " test-id " actually passed, so marking PASS not DEAD") + (rmt:set-state-status-and-roll-up-items + run-id test-id 'foo "COMPLETED" "PASS" + "Test stopped responding but it has PASSED; marking it PASS in the DB.")) + (let ((is-alive (and (not (eq? pid 0)) ;; 0 is default in re-used field "attemptnum" where pid stored. + (commonmod:is-test-alive host pid)))) + (if is-alive + (debug:print 0 *default-log-port* "INFO: test " test-id " on host " host + " has a process on pid " pid ", NOT setting to DEAD.") + (begin + (debug:print 0 *default-log-port* "INFO: test " test-id + " final state/status is not COMPLETED/PASS. It is " result) + (rmt:set-state-status-and-roll-up-items + run-id test-id 'foo "COMPLETED" "DEAD" + "Test stopped responding while in RUNNING or REMOTEHOSTSTART; presumed dead."))))))) + ;; call end of eud of run detection for posthook - from merge, is it needed? + ;; (launch:end-of-run-check run-id) + all-ids) + ))))) + + ) Index: subrunmod.scm ================================================================== --- subrunmod.scm +++ subrunmod.scm @@ -47,10 +47,12 @@ subrun:kill-subrun subrun:get-log-path subrun:remove-subrun subrun:subrun-removed? subrun:subrun-test-initialized? + subrun:launch-cmd + subrun:initialize-toprun-test ) (import scheme) (cond-expand (chicken-4 Index: tasksmod.scm ================================================================== --- tasksmod.scm +++ tasksmod.scm @@ -48,10 +48,12 @@ tests:test-set-status! common:get-launcher tasks:kill-runner tests:get-testconfig tests:get-waitons + + tests:get-test-path-from-environment ) (import scheme) (cond-expand Index: testsmod.scm ================================================================== --- testsmod.scm +++ testsmod.scm @@ -44,10 +44,14 @@ ( tests:summarize-items tests:filter-non-runnable tests:sort-by-priority-and-waiton + tests:summarize-test + tests:save-final-status + tests:update-central-meta-info + tests:set-full-meta-info ) (import scheme) (cond-expand (chicken-4