Index: api.scm ================================================================== --- api.scm +++ api.scm @@ -49,10 +49,11 @@ test-get-paths-matching-keynames-target-new get-prereqs-not-met get-count-tests-running-for-run-id get-run-info get-run-status + get-run-state get-run-stats get-run-times get-targets get-target ;; register-run @@ -80,11 +81,11 @@ testmeta-get-record have-incompletes? synchash-get get-changed-record-ids get-run-record-ids - )) + get-not-completed-cnt)) (define api:write-queries '( get-keys-write ;; dummy "write" query to force server start @@ -97,10 +98,11 @@ delete-test-records delete-old-deleted-test-records test-set-state-status test-set-top-process-pid set-state-status-and-roll-up-items + update-pass-fail-counts top-test-set-per-pf-counts ;; (db:top-test-set-per-pf-counts (db:get-db *db* 5) 5 "runfirst") ;; RUNS register-run @@ -107,11 +109,11 @@ set-tests-state-status delete-run lock/unlock-run update-run-event_time mark-incomplete - + set-state-status-and-roll-up-run ;; STEPS teststep-set-status! ;; TEST DATA test-data-rollup @@ -198,10 +200,11 @@ ((delete-test-records) (apply db:delete-test-records dbstruct params)) ((delete-old-deleted-test-records) (apply db:delete-old-deleted-test-records dbstruct params)) ((test-set-state-status) (apply db:test-set-state-status dbstruct params)) ((test-set-top-process-pid) (apply db:test-set-top-process-pid dbstruct params)) ((set-state-status-and-roll-up-items) (apply db:set-state-status-and-roll-up-items dbstruct params)) + ((set-state-status-and-roll-up-run) (apply db:set-state-status-and-roll-up-run dbstruct params)) ((top-test-set-per-pf-counts) (apply db:top-test-set-per-pf-counts dbstruct params)) ((test-set-archive-block-id) (apply db:test-set-archive-block-id dbstruct params)) ;; RUNS ((register-run) (apply db:register-run dbstruct params)) @@ -276,18 +279,21 @@ ((get-testinfo-state-status) (apply db:get-testinfo-state-status dbstruct params)) ((test-get-top-process-pid) (apply db:test-get-top-process-pid dbstruct params)) ((test-get-paths-matching-keynames-target-new) (apply db:test-get-paths-matching-keynames-target-new dbstruct params)) ((get-prereqs-not-met) (apply db:get-prereqs-not-met dbstruct params)) ((get-count-tests-running-for-run-id) (apply db:get-count-tests-running-for-run-id dbstruct params)) + ((get-not-completed-cnt) (apply db:get-not-completed-cnt dbstruct params)) ((synchash-get) (apply synchash:server-get dbstruct params)) ((get-raw-run-stats) (apply db:get-raw-run-stats dbstruct params)) ((get-test-times) (apply db:get-test-times dbstruct params)) ;; RUNS ((get-run-info) (apply db:get-run-info dbstruct params)) ((get-run-status) (apply db:get-run-status dbstruct params)) + ((get-run-state) (apply db:get-run-state dbstruct params)) ((set-run-status) (apply db:set-run-status dbstruct params)) + ((set-run-state-status) (apply db:set-run-state-status dbstruct params)) ((get-tests-for-run) (apply db:get-tests-for-run dbstruct params)) ((get-test-id) (apply db:get-test-id dbstruct params)) ((get-tests-for-run-mindata) (apply db:get-tests-for-run-mindata dbstruct params)) ((get-tests-for-runs-mindata) (apply db:get-tests-for-runs-mindata dbstruct params)) ((get-runs) (apply db:get-runs dbstruct params)) Index: dashboard.scm ================================================================== --- dashboard.scm +++ dashboard.scm @@ -157,10 +157,11 @@ ;; RA => Might require revert for filters ;; create a watch dog to move changes from lt/.db/*.db to megatest.db ;; ;;;(if (file-write-access? (conc *toppath* "/megatest.db")) ;;(debug:print-info 13 *default-log-port* "Before common:watchdog spawn") + (thread-start! (make-thread common:watchdog "Watchdog thread")) ;;(debug:print-info 13 *default-log-port* "After common:watchdog spawn") ;; (if (not (args:get-arg "-use-db-cache")) ;; (begin ;; (debug:print-info 0 *default-log-port* "Forcing db-cache mode due to read-only access to megatest.db") @@ -2636,14 +2637,16 @@ #:tabchangepos-cb (lambda (obj curr prev) (debug:catch-and-dump (lambda () (let* ((tab-num (dboard:commondat-curr-tab-num commondat)) (tabdat (dboard:common-get-tabdat commondat tab-num: tab-num))) + (dboard:tabdat-layout-update-ok-set! tabdat #f)) (dboard:commondat-curr-tab-num-set! commondat curr) (let* ((tab-num (dboard:commondat-curr-tab-num commondat)) (tabdat (dboard:common-get-tabdat commondat tab-num: tab-num))) + (dboard:commondat-please-update-set! commondat #t) (dboard:tabdat-layout-update-ok-set! tabdat #t))) "tabchangepos")) (dashboard:summary commondat stats-dat tab-num: 0) runs-view Index: db.scm ================================================================== --- db.scm +++ db.scm @@ -2628,13 +2628,21 @@ (define (db:set-run-status dbstruct run-id status msg) (db:with-db dbstruct #f #f (lambda (db) - (if msg + (if msg (sqlite3:execute db "UPDATE runs SET status=?,comment=? WHERE id=?;" status msg run-id) (sqlite3:execute db "UPDATE runs SET status=? WHERE id=?;" status run-id))))) + +(define (db:set-run-state-status dbstruct run-id state status ) + (db:with-db + dbstruct #f #f + (lambda (db) + (sqlite3:execute db "UPDATE runs SET status=?,state=? WHERE id=?;" status state run-id)))) + + (define (db:get-run-status dbstruct run-id) (let ((res "n/a")) (db:with-db dbstruct #f #f @@ -2644,10 +2652,24 @@ (set! res status)) db "SELECT status FROM runs WHERE id=?;" run-id) res)))) + +(define (db:get-run-state dbstruct run-id) + (let ((res "n/a")) + (db:with-db + dbstruct #f #f + (lambda (db) + (sqlite3:for-each-row + (lambda (status) + (set! res status)) + db + "SELECT state FROM runs WHERE id=?;" + run-id) + res)))) + ;;====================================================================== ;; K E Y S ;;====================================================================== @@ -3020,10 +3042,22 @@ (lambda (db) (sqlite3:first-result db "SELECT count(id) FROM tests WHERE state in ('RUNNING','LAUNCHED','REMOTEHOSTSTART') AND run_id=? AND NOT (uname = 'n/a' AND item_path = '') AND testname=?;" run-id testname)))) + +(define (db:get-not-completed-cnt dbstruct run-id) +(db:with-db + dbstruct + run-id + #f + (lambda (db) + ;(print "SELECT count(id) FROM tests WHERE state not in ('COMPLETED', 'DELETED') AND run_id=" run-id) + (sqlite3:first-result + db + "SELECT count(id) FROM tests WHERE state not in ('COMPLETED', 'DELETED') AND run_id=?;" run-id)))) + (define (db:get-count-tests-running-in-jobgroup dbstruct run-id jobgroup) (if (not jobgroup) 0 ;; (let ((testnames '())) ;; get the testnames @@ -3694,11 +3728,10 @@ ;; if test-name is an integer work off that instead of test-name test-path ;; (define (db:set-state-status-and-roll-up-items dbstruct run-id test-name item-path state status comment) ;; establish info on incoming test followed by info on top level test ;; BBnote - for mode itemwait, linkage between upstream test & matching item status is propagated to run queue in db:prereqs-not-met - (let* ((testdat (if (number? test-name) (db:get-test-info-by-id dbstruct run-id test-name) ;; test-name is actually a test-id (db:get-test-info dbstruct run-id test-name item-path))) (test-id (db:test-get-id testdat)) (test-name (if (number? test-name) @@ -3721,63 +3754,69 @@ (lambda () ;; NB// Pass the db so it is part fo the transaction (db:test-set-state-status db run-id test-id state status comment) ;; this call sets the item state/status (if (not (equal? item-path "")) ;; only roll up IF incoming test is an item (let* ((state-status-counts (db:get-all-state-status-counts-for-test dbstruct run-id test-name item-path state status)) ;; item-path is used to exclude current state/status of THIS test - (running (length (filter (lambda (x) - (member (dbr:counts-state x) *common:running-states*)) - state-status-counts))) - (bad-not-started (length (filter (lambda (x) - (and (equal? (dbr:counts-state x) "NOT_STARTED") - (not (member (dbr:counts-status x) - *common:not-started-ok-statuses*)))) - state-status-counts))) - ;; (non-completes (filter (lambda (x) - ;; (not (equal? (dbr:counts-state x) "COMPLETED"))) - ;; state-status-counts)) - (all-curr-states (common:special-sort ;; worst -> best (sort of) - (delete-duplicates - (if (not (member state *common:dont-roll-up-states*)) - (cons state (map dbr:counts-state state-status-counts)) - (map dbr:counts-state state-status-counts))) - *common:std-states* >)) - (all-curr-statuses (common:special-sort ;; worst -> best - (delete-duplicates - (if (not (member state *common:dont-roll-up-states*)) - (cons status (map dbr:counts-status state-status-counts)) - (map dbr:counts-status state-status-counts))) + (state-stauses (db:roll-up-rules state-status-counts state status)) + (newstate (car state-stauses)) + (newstatus (cadr state-stauses))) + (debug:print 4 *default-log-port* "BB> tl-test-id="tl-test-id" ; "test-name":"item-path" newstate="newstate" newstatus="newstatus" len(sscs)="(length state-status-counts) " state-status-counts: " + (apply conc + (map (lambda (x) + (conc + (with-output-to-string (lambda () (pp (dbr:counts->alist x)))) " | ")) + state-status-counts))); end debug:print + + (if tl-test-id + (db:test-set-state-status db run-id tl-test-id newstate newstatus #f)) ;; we are still in the transaction - must access the db and not the dbstruct + )))))) + (mutex-unlock! *db-transaction-mutex*) + (if (and test-id state status (equal? status "AUTO")) + (db:test-data-rollup dbstruct run-id test-id status)) + tr-res))))) + +(define (db:roll-up-rules state-status-counts state status) + (let* ((running (length (filter (lambda (x) + (member (dbr:counts-state x) *common:running-states*)) + state-status-counts))) + (bad-not-started (length (filter (lambda (x) + (and (equal? (dbr:counts-state x) "NOT_STARTED") + (not (member (dbr:counts-status x) *common:not-started-ok-statuses*)))) + state-status-counts))) + (all-curr-states (common:special-sort ;; worst -> best (sort of) + (delete-duplicates + (if (and state (not (member state *common:dont-roll-up-states*))) + (cons state (map dbr:counts-state state-status-counts)) + (map dbr:counts-state state-status-counts))) + *common:std-states* >)) + (all-curr-statuses (common:special-sort ;; worst -> best + (delete-duplicates + (if (and state status (not (member state *common:dont-roll-up-states*))) + (cons status (map dbr:counts-status state-status-counts)) + (map dbr:counts-status state-status-counts))) *common:std-statuses* >)) - (non-completes (filter (lambda (x) - (not (member x (cons "COMPLETED" *common:dont-roll-up-states*)))) - all-curr-states)) - (preq-fails (filter (lambda (x) - (equal? x "PREQ_FAIL")) - all-curr-statuses)) - (num-non-completes (length non-completes)) - (newstate (cond - ((> running 0) "RUNNING") ;; anything running, call the situation running - ((> (length preq-fails) 0) - "NOT_STARTED") - ((> bad-not-started 0) "COMPLETED") ;; we have an ugly situation, it is completed in the sense we cannot do more. - ((> num-non-completes 0) (car non-completes)) ;; (remove (lambda (x)(equal? "COMPLETED" x)) all-curr-states))) ;; only rollup DELETED if all DELETED - (else (car all-curr-states)))) - ;; (if (> running 0) - ;; "RUNNING" - ;; (if (> bad-not-started 0) - ;; "COMPLETED" - ;; (car all-curr-states)))) - (newstatus (cond - ((> (length preq-fails) 0) - "PREQ_FAIL") - ((or (> bad-not-started 0) - (and (equal? newstate "NOT_STARTED") - (> num-non-completes 0))) - "STARTED") - (else - (car all-curr-statuses))))) - - (debug:print-info 2 *default-log-port* + (non-completes (filter (lambda (x) + (not (member x (cons "COMPLETED" *common:dont-roll-up-states*)))) + all-curr-states)) + (preq-fails (filter (lambda (x) + (equal? x "PREQ_FAIL")) + all-curr-statuses)) + (num-non-completes (length non-completes)) + (newstate (cond + ((> running 0) "RUNNING") ;; anything running, call the situation running + ((> (length preq-fails) 0) "NOT_STARTED") + ((> bad-not-started 0) "COMPLETED") ;; we have an ugly situation, it is completed in the sense we cannot do more. + ((> num-non-completes 0) (car non-completes)) ;; (remove (lambda (x)(equal? "COMPLETED" x)) all-curr-states))) ;; only rollup DELETED if all DELETED + (else (car all-curr-states)))) + (newstatus (cond + ((> (length preq-fails) 0) "PREQ_FAIL") + ((or (> bad-not-started 0) + (and (equal? newstate "NOT_STARTED") + (> num-non-completes 0))) + "STARTED") + (else (car all-curr-statuses))))) + (debug:print-info 2 *default-log-port* "\n--> probe db:set-state-status-and-roll-up-items: " "\n--> state-status-counts: "(map dbr:counts->alist state-status-counts) "\n--> running: "running "\n--> bad-not-started: "bad-not-started "\n--> non-non-completes: "num-non-completes @@ -3786,33 +3825,47 @@ "\n--> all-curr-statuses: "all-curr-statuses "\n--> newstate "newstate "\n--> newstatus "newstatus "\n\n") - ;; (print "bad-not-supported: " bad-not-support " all-curr-states: " all-curr-states " all-curr-statuses: " all-curr-states) - ;; " newstate: " newstate " newstatus: " newstatus) - ;; NB// Pass the db so it is part of the transaction - (debug:print 4 *default-log-port* "BB> tl-test-id="tl-test-id" ; "test-name":"item-path"> bad-not-started="bad-not-started" newstate="newstate" newstatus="newstatus" num-non-completes="num-non-completes" non-completes="non-completes "len(sscs)="(length state-status-counts) " state-status-counts: " - (apply conc - (map (lambda (x) - (conc - (with-output-to-string (lambda () (pp (dbr:counts->alist x)))) " | ")) - state-status-counts)) - - ); end debug:print - (if tl-test-id - (db:test-set-state-status db run-id tl-test-id newstate newstatus #f)) ;; we are still in the transaction - must access the db and not the dbstruct - )))))) - + ;; NB// Pass the db so it is part of the transaction + (list newstate newstatus))) + +(define (db:set-state-status-and-roll-up-run dbstruct run-id curr-state curr-status) + (mutex-lock! *db-transaction-mutex*) + (db:with-db + dbstruct #f #f + (lambda (db) + (let ((tr-res + (sqlite3:with-transaction + db + (lambda () + (let* ((state-status-counts (db:get-all-state-status-counts-for-run dbstruct run-id)) + (state-stauses (db:roll-up-rules state-status-counts #f #f )) + (newstate (car state-stauses)) + (newstatus (cadr state-stauses))) + (if (or (not (eq? newstate curr-state)) (not (eq? newstatus curr-status))) + (db:set-run-state-status dbstruct run-id newstate newstatus ))))))) (mutex-unlock! *db-transaction-mutex*) - (if (and test-id state status (equal? status "AUTO")) - (db:test-data-rollup dbstruct run-id test-id status)) - tr-res))))) + tr-res)))) + + +(define (db:get-all-state-status-counts-for-run dbstruct run-id) + (let* ((test-count-recs (db:with-db + dbstruct #f #f + (lambda (db) + (sqlite3:map-row + (lambda (state status count) + (make-dbr:counts state: state status: status count: count)) + db + "SELECT state,status,count(id) FROM tests WHERE run_id=? GROUP BY state,status;" + run-id ))))) + test-count-recs)) + + ;; BBnote: db:get-all-state-status-counts-for-test returns dbr:counts object aggregating state and status of items of a given test, *not including rollup state/status* (define (db:get-all-state-status-counts-for-test dbstruct run-id test-name item-path item-state-in item-status-in) - - (let* ((test-info (db:get-test-info dbstruct run-id test-name item-path)) (item-state (or item-state-in (db:test-get-state test-info))) (item-status (or item-status-in (db:test-get-status test-info))) (other-items-count-recs (db:with-db dbstruct #f #f @@ -3891,11 +3944,11 @@ ;; TESTS '(register-test "INSERT OR IGNORE INTO tests (run_id,testname,event_time,item_path,state,status) VALUES (?,?,strftime('%s','now'),?,'NOT_STARTED','n/a');") ;; Test state and status '(set-test-state "UPDATE tests SET state=? WHERE id=?;") '(set-test-status "UPDATE tests SET state=? WHERE id=?;") - '(state-status "UPDATE tests SET state=?,status=? WHERE id=?;") ;; DONE + '(state-status "UPDATE tests SET state=?,status=? WHERE id=?;") ;; D/ONE '(state-status-msg "UPDATE tests SET state=?,status=?,comment=? WHERE id=?;") ;; DONE ;; Test comment '(set-test-comment "UPDATE tests SET comment=? WHERE id=?;") '(set-test-start-time "UPDATE tests SET event_time=strftime('%s','now') WHERE id=?;") ;; DONE '(pass-fail-counts "UPDATE tests SET pass_count=?,fail_count=? WHERE id=?;") Index: launch.scm ================================================================== --- launch.scm +++ launch.scm @@ -470,11 +470,12 @@ (begin (debug:print-error 0 *default-log-port* "Nothing to kill, pid1=" pid1 ", pid2=" pid2) (tests:test-set-status! run-id test-id "KILLED" "FAILED TO KILL" (conc (args:get-arg "-m")" "kill-reason) #f) ;; BB ADDED kill-reason -- confirm OK with Matt ))) (mutex-unlock! m) - ;; no point in sticking around. Exit now. + ;; no point in sticking around. Exit now. But run end of run before exiting? + (launch:end-of-run-check run-id) (exit))) (if (hash-table-ref/default misc-flags 'keep-going #f) (begin (thread-sleep! 3) ;; (+ 3 (random 6))) ;; add some jitter to the call home time to spread out the db accesses (if (hash-table-ref/default misc-flags 'keep-going #f) ;; keep originals for cpu-load and disk-free unless they change more than the allowed delta @@ -807,12 +808,11 @@ ;; only state and status needed - use lazy routine (testinfo (rmt:get-testinfo-state-status run-id test-id))) ;; Am I completed? (if (member (db:test-get-state testinfo) '("REMOTEHOSTSTART" "RUNNING")) ;; NOTE: It should *not* be REMOTEHOSTSTART but for reasons I don't yet understand it sometimes gets stuck in that state ;; (not (equal? (db:test-get-state testinfo) "COMPLETED")) (let ((new-state (if kill-job? "KILLED" "COMPLETED") ;; (if (eq? (vector-ref exit-info 2) 0) ;; exited with "good" status - ;; "COMPLETED" - ;; (db:test-get-state testinfo))) ;; else preseve the state as set within the test + ;; "COMPLETED" ;; (db:test-get-state testinfo))) ;; else preseve the state as set within the test ) (new-status (cond ((not (launch:einf-exit-status exit-info)) "FAIL") ;; job failed to run ... (vector-ref exit-info 1) ((eq? (launch:einf-rollup-status exit-info) 0) ;; (vector-ref exit-info 3) ;; if the current status is AUTO then defer to the calculated value (i.e. leave this AUTO) @@ -839,16 +839,95 @@ (if (not (equal? item-path "")) (tests:summarize-items run-id test-id test-name #f)) (tests:summarize-test run-id test-id) ;; don't force - just update if no (rmt:update-run-stats run-id (rmt:get-raw-run-stats run-id))) (mutex-unlock! m) + (launch:end-of-run-check run-id ) (debug:print 2 *default-log-port* "Output from running " fullrunscript ", pid " (launch:einf-pid exit-info) " in work area " work-area ":\n====\n exit code " (launch:einf-exit-code exit-info) "\n" "====\n") (if (not (launch:einf-exit-status exit-info)) (exit 4)))) ))) +;; Spec for End of test +;; At end of each test call, after marking self as COMPLETED do run-state-status-rollup +;; At transition to run COMPLETED/X do hooks +;; Definition: test_dead if event_time + duration + 1 minute? < current_time AND +;; we can prove the process is not alive (ssh host pstree -A pid) +;; if dead safe to mark the test as killed in the db +;; State/status table +;; new +;; 100% COMPLETED/ (PASS,FAIL,ABORT etc.) ==> COMPLETED / X where X is same as itemized rollup +;; > 3 RUNNING with not test_dead do nothing (run should already be RUNNING/ na +;; > 0 RUNNING and test_dead then send KILLREQ ==> COMPLETED +;; 0 RUNNING ==> this is actually the first condition, should not get here + +(define (launch:end-of-run-check run-id ) + (let* ((not-completed-cnt (rmt:get-not-completed-cnt run-id)) + (running-cnt (rmt:get-count-tests-running-for-run-id run-id)) + (all-test-launched (rmt:get-var (conc "lunch-complete-" run-id))) + (current-state (rmt:get-run-state run-id)) + (current-status (rmt:get-run-status run-id))) + ;;get-vars run-id to query metadata table to check if all completed. if all-test-launched = yes then only not-completed-cnt = 0 means everyting is completed if no entry found in the table do nothing + (debug:print 0 *default-log-port* "rollup run state/status") + (rmt:set-state-status-and-roll-up-run run-id current-state current-status) + + (cond + ((and all-test-launched (eq? not-completed-cnt 0) (equal? all-test-launched "yes" )) + (debug:print 0 *default-log-port* "look for post hook.") + (runs:run-post-hook run-id)) + ((> running-cnt 3) + (debug:print 0 *default-log-port* "There are " running-cnt " tests running." )) + ((> running-cnt 0) + (debug:print 0 *default-log-port* "running cnt > 0 but <= 3 kill-running-tests-if-dead" ) + (let ((kill-cnt (launch:kill-tests-if-dead run-id))) + (if (and all-test-launched (equal? all-test-launched "yes") (eq? kill-cnt running-cnt)) + (launch:end-of-run-check run-id)))) ;;todo + (else (debug:print 0 *default-log-port* "Should it get here?? May be everything is not launched yet. Running test cnt:" running-cnt " Not completed test cnt:" not-completed-cnt) + (let* ((not-completed-tests (rmt:get-tests-for-run run-id "%" `("NOT_STARTED" "RUNNING" "LAUNCHED" "REMOTEHOSTSTART") `() #f #f #f #f #f #f #f #f))) + (if (> (length not-completed-tests) 0) + (let loop ((running-test (car not-completed-tests)) + (tal (cdr not-completed-tests))) + (let* ((test-name (vector-ref running-test 2)) + (item-path (vector-ref running-test 11))) + (debug:print 0 *default-log-port* "test " test-name "/" item-path " not completed") + (if (not (null? tal)) + (loop (car tal) (cdr tal))))))))))) + +(define (launch:is-test-alive host pid) +(if (and host pid (not (equal? host "n/a"))) +(let* ((cmd (conc "ssh " host " pstree -A " pid)) + (output (with-input-from-pipe cmd read-lines))) + (print "cmd: " cmd "\n op: " output ) + (if(eq? (length output) 0) + #f + #t)) +#t)) + +(define (launch:kill-tests-if-dead run-id) + (let* ((running-tests (rmt:get-tests-for-run run-id "%" `("RUNNING" "LAUNCHED" "REMOTEHOSTSTART") `() #f #f #f #f #f #f #f #f))) + (let loop ((running-test (car running-tests)) + (tal (cdr running-tests)) + (kill-cnt 0)) + (let* ((test-name (vector-ref running-test 2)) + (item-path (vector-ref running-test 11)) + (test-id (vector-ref running-test 0)) + (host (vector-ref running-test 6)) + (pid (rmt:test-get-top-process-pid run-id test-id)) + (event-time (vector-ref running-test 5)) + (duration (vector-ref running-test 12)) + (flag 0) + (curr-time (current-seconds))) + (if (and (< (+ event-time duration 600) curr-time) (not (launch:is-test-alive host pid))) ;;test has not updated duration in last 10 min then likely its not running but confirm before marking it as killed + (begin + (debug:print 0 *default-log-port* "test " test-name "/" item-path " needs to be killed") + (set! flag 1) + (rmt:set-state-status-and-roll-up-items run-id test-name item-path "KILLREQ" "n/a" #f))) + (if (not (null? tal)) + (loop (car tal) (cdr tal) (+ kill-cnt flag)) + (+ kill-cnt flag)))))) + ;; DO NOT USE - caching of configs is handled in launch:setup now. ;; (define (launch:cache-config) ;; if we have a linktree and -runtests and -target and the directory exists dump the config ;; to megatest-(current-seconds).cfg and symlink it to megatest.cfg Index: mtut.scm ================================================================== --- mtut.scm +++ mtut.scm @@ -217,10 +217,12 @@ ("-run-name" . n) ("-mode-patt" . o) ("-test-patt" . p) ;; idea, enhance margs ("-test-patt" "-testpatt") => yields one value in "-test-patt" ("-status" . s) ("-target" . t) + ("-reqtarg" . R) + ("-tag-expr" . x) ;; misc ("-debug" . #f) ;; for *verbosity* > 2 ("-load" . #f) ;; load and exectute a scheme file ("-log" . #f) @@ -241,10 +243,11 @@ ;; misc ("-repl" . #f) ("-immediate" . I) ("-preclean" . r) ("-prepend-contour" . w) + ("-force" . F) ("-list-pkt-keys" . #f) )) ;; alist to map actions to old megatest commands (define *action-keys* @@ -251,10 +254,12 @@ '((run . "-run") (rerun-clean . "-rerun-clean") (rerun-all . "-rerun-all") (kill-run . "-kill-runs") (kill-rerun . "-kill-rerun") + (lock . "-lock") + (unlock . "-unlock") (sync . "") (archive . "-archive") (set-ss . "-set-state-status") (remove . "-remove-runs"))) @@ -1463,11 +1468,12 @@ (set! *default-log-port* oup) ))) (if *action* (case (string->symbol *action*) - ((run remove rerun rerun-clean rerun-all set-ss archive kill list kill-run kill-rerun) + ((run remove rerun rerun-clean rerun-all set-ss archive kill list kill-run kill-rerun lock unlock) + (let* ((mtconfdat (simple-setup (args:get-arg "-start-dir"))) (mtconf (car mtconfdat)) (area (args:get-arg "-area")) ;; look up the area to dispatch to from [areas] section (areasec (if area (configf:lookup mtconf "areas" area) #f)) (areadat (if areasec (common:val->alist areasec) #f)) Index: rmt.scm ================================================================== --- rmt.scm +++ rmt.scm @@ -676,10 +676,14 @@ (define (rmt:get-prereqs-not-met run-id waitons ref-test-name ref-item-path #!key (mode '(normal))(itemmaps #f)) (rmt:send-receive 'get-prereqs-not-met run-id (list run-id waitons ref-test-name ref-item-path mode itemmaps))) (define (rmt:get-count-tests-running-for-run-id run-id) (rmt:send-receive 'get-count-tests-running-for-run-id run-id (list run-id))) + +(define (rmt:get-not-completed-cnt run-id) + (rmt:send-receive 'get-not-completed-cnt run-id (list run-id))) + ;; Statistical queries (define (rmt:get-count-tests-running run-id) (rmt:send-receive 'get-count-tests-running run-id (list run-id))) @@ -692,10 +696,14 @@ ;; state and status are extra hints not usually used in the calculation ;; (define (rmt:set-state-status-and-roll-up-items run-id test-name item-path state status comment) (rmt:send-receive 'set-state-status-and-roll-up-items run-id (list run-id test-name item-path state status comment))) + +(define (rmt:set-state-status-and-roll-up-run run-id state status) + (rmt:send-receive 'set-state-status-and-roll-up-run run-id (list run-id state status))) + (define (rmt:update-pass-fail-counts run-id test-name) (rmt:general-call 'update-pass-fail-counts run-id test-name test-name test-name)) (define (rmt:top-test-set-per-pf-counts run-id test-name) @@ -752,13 +760,21 @@ (rmt:send-receive 'lock/unlock-run #f (list run-id lock unlock user))) ;; set/get status (define (rmt:get-run-status run-id) (rmt:send-receive 'get-run-status #f (list run-id))) + +(define (rmt:get-run-state run-id) + (rmt:send-receive 'get-run-state #f (list run-id))) + (define (rmt:set-run-status run-id run-status #!key (msg #f)) (rmt:send-receive 'set-run-status #f (list run-id run-status msg))) + +(define (rmt:set-run-state-status run-id state status ) + (rmt:send-receive 'set-run-state-status #f (list run-id state status))) + (define (rmt:update-run-event_time run-id) (rmt:send-receive 'update-run-event_time #f (list run-id))) (define (rmt:get-runs-by-patt keys runnamepatt targpatt offset limit fields last-runs-update #!key (sort-order "asc")) ;; fields of #f uses default Index: runs.scm ================================================================== --- runs.scm +++ runs.scm @@ -520,11 +520,13 @@ (runs:update-all-test_meta #f) ;; run the run prehook if there are no tests yet run for this run: ;; (runs:run-pre-hook run-id) - + ;; mark all test launced flag as false in the meta table + (rmt:set-var (conc "lunch-complete-" run-id) "no") + (rmt:set-run-state-status run-id "new" "n/a") ;; now add non-directly referenced dependencies (i.e. waiton) ;;====================================================================== ;; refactoring this block into tests:get-full-data ;; ;; What happended, this code is now duplicated in tests!? @@ -674,16 +676,16 @@ ;; recursive call to self (runs:run-tests target runname test-patts user flags run-count: (- run-count 1))))) (debug:print-info 0 *default-log-port* "No tests to run"))) (debug:print-info 4 *default-log-port* "All done by here") ;; TODO: try putting post hook call here - (if (eq? run-count 0) - (begin - (debug:print-info 0 *default-log-port* "Calling Post Hook") - (debug:print-info 2 *default-log-port* " run-count " run-count) - (runs:run-post-hook run-id)) - (debug:print-info 2 *default-log-port* "Not calling post hook runcount = " run-count )) + ;(if (eq? run-count 0) + ; (begin + ; (debug:print-info 0 *default-log-port* "Calling Post Hook") + ; (debug:print-info 2 *default-log-port* " run-count " run-count) + ; (runs:run-post-hook run-id)) + ; (debug:print-info 2 *default-log-port* "Not calling post hook runcount = " run-count )) (rmt:tasks-set-state-given-param-key task-key "done") ;; (sqlite3:finalize! tasks-db) )) @@ -1647,11 +1649,13 @@ (loop (car reg)(cdr reg) '() reruns)) (else (debug:print-info 4 *default-log-port* "cond branch - " "rtq-9") (debug:print-info 4 *default-log-port* "Exiting loop with...\n hed=" hed "\n tal=" tal "\n reruns=" reruns)) ))) ;; end loop on sorted test names - + ;; this is the point where everything is launced and now you can mark the run in metadata table as all launced + (rmt:set-var (conc "lunch-complete-" run-id) "yes") + ;; now *if* -run-wait we wait for all tests to be done ;; Now wait for any RUNNING tests to complete (if in run-wait mode) (thread-sleep! 10) ;; I think there is a race condition here. Let states/statuses settle (let wait-loop ((num-running (rmt:get-count-tests-running-for-run-id run-id)) (prev-num-running 0)) @@ -2494,17 +2498,23 @@ (let* ((db #f) (rundat (mt:get-runs-by-patt keys runname target)) (header (vector-ref rundat 0)) (runs (vector-ref rundat 1))) (for-each (lambda (run) - (let ((run-id (db:get-value-by-header run header "id"))) + (let ((run-id (db:get-value-by-header run header "id")) + (str (if lock + "lock" + "unlock"))) (if (or lock (and unlock - (begin + (or (args:get-arg "-force") + (begin (print "Do you really wish to unlock run " run-id "?\n y/n: ") - (equal? "y" (read-line))))) + (equal? "y" (read-line)))))) + (begin (rmt:lock/unlock-run run-id lock unlock user) + (debug:print-info 0 *default-log-port* "Done " str " on run id " run-id)) (debug:print-info 0 *default-log-port* "Skipping lock/unlock on " run-id)))) runs))) ;;====================================================================== ;; Rollup runs ;;====================================================================== Index: tasks.scm ================================================================== --- tasks.scm +++ tasks.scm @@ -993,10 +993,11 @@ ;; get runs changed since last sync ;; (define (tasks:sync-test-data dbh cached-info area-info) ;; (let* (( (define (tasks:sync-to-postgres configdat dest) + (print "In sync") (let* ((dbh (pgdb:open configdat dbname: dest)) (area-info (pgdb:get-area-by-path dbh *toppath*)) (cached-info (make-hash-table)) (start (current-seconds)) (test-patt (if (args:get-arg "-testpatt") @@ -1014,15 +1015,17 @@ (exit 1))) (if (and (not target) run-name) (begin (print "Error: Provide target") (exit 1))) - + (print "123") + (exit 1) (for-each (lambda (dtype) (hash-table-set! cached-info dtype (make-hash-table))) '(runs targets tests steps data)) (hash-table-set! cached-info 'start start) ;; when done we'll set sync times to this + (print "here") (if area-info (let* ((last-sync-time (vector-ref area-info 3)) (smallest-last-update-time (make-hash-table)) (changed (if (and target run-name) (rmt:get-run-record-ids target run-name (rmt:get-keys) test-patt) @@ -1035,10 +1038,11 @@ (area-tag (if (args:get-arg "-area-tag") (args:get-arg "-area-tag") (if (args:get-arg "-area") (args:get-arg "-area") "")))) + (print "here2") (if (and (equal? area-tag "") (not (pgdb:is-area-taged dbh (vector-ref area-info 0)))) (set! area-tag *default-area-tag*)) (if (not (equal? area-tag "")) (task:add-area-tag dbh area-info area-tag)) (if (or (not (null? test-ids)) (not (null? run-ids)))