Index: TODO ================================================================== --- TODO +++ TODO @@ -56,6 +56,26 @@ .. Move main.db to global? .. [ run-id.db inmemdb last-mod last-read last-sync inuse ] . Re-work all queries to use run-id to dereference server . Open main.db directly in calls to -runtests etc. No need to talk remote? . remove common:faux-lock + +db:get-test-info-by-id +db:get-test-state-status-by-id +db:get-test-info - do a get id by name/item-path + cache the id- + use test id plus run id to get from cache + +need to do db:get-test-info-db +look at html gen for items - rollup needs deduplication nonoverlap + +;; cache write these with transaction +db:teststep-set-status! +db:test-set-top-process-id + +;; called a lot, maybe from rollup? +db:get-all-state-status-counts-for-test + +;; load to move from server to client +tests:summarize-items ;; appears to be on client +tests:summarize-tests Index: db.scm ================================================================== --- db.scm +++ db.scm @@ -2315,20 +2315,18 @@ ;; ;; NB// Ultimately this will be deprecated in deference to mt:test-set-state-status-by-id ;; ;; NOTE: run-id is not used ;; ;; (define (db:test-set-state-status dbstruct run-id test-id newstate newstatus newcomment) - (let* ((hash-key (cons run-id test-id))) - (hash-table-delete! *db:get-test-info-by-id-cache* hash-key) - (hash-table-delete! *db:get-test-state-status-by-id-cache*hash-key)) (db:with-db dbstruct run-id #t (lambda (dbdat db) (db:test-set-state-status-db db run-id test-id newstate newstatus newcomment)))) (define (db:test-set-state-status-db db run-id test-id newstate newstatus newcomment) + ;; clear cache after this, I think that makes sense (cond ((and newstate newstatus newcomment) (sqlite3:execute db "UPDATE tests SET state=?,status=?,comment=? WHERE id=?;" newstate newstatus newcomment ;; (sdb:qry 'getid newcomment) test-id)) ((and newstate newstatus) @@ -2337,10 +2335,13 @@ (if newstate (sqlite3:execute db "UPDATE tests SET state=? WHERE id=?;" newstate test-id)) (if newstatus (sqlite3:execute db "UPDATE tests SET status=? WHERE id=?;" newstatus test-id)) (if newcomment (sqlite3:execute db "UPDATE tests SET comment=? WHERE id=?;" newcomment ;; (sdb:qry 'getid newcomment) test-id)))) ;; (mt:process-triggers dbstruct run-id test-id newstate newstatus)) ;; NOTE: Moved into calling function + (let* ((hash-key (cons run-id test-id))) + (hash-table-delete! *db:get-test-info-by-id-cache* hash-key) + (hash-table-delete! *db:get-test-state-status-by-id-cache* hash-key)) ) ;; NEW BEHAVIOR: Count tests running in all runs! ;; (define (db:get-count-tests-running dbstruct run-id) ;; fastmode) @@ -3126,25 +3127,10 @@ (print-call-chain (current-error-port)) msg))) ;; crude reply for when things go awry ((zmq nmsg)(with-input-from-string msg (lambda ()(deserialize)))) (else msg))) ;; rpc -;; ; This is to be the big daddy call NOPE: Replaced by db:set-state-status-and-roll-up-items -;; ; -;; define (db:test-set-state-status dbstruct run-id test-id state status msg) -;; (let ((dbdat (db:get-subdb dbstruct run-id))) -;; (if (member state '("LAUNCHED" "REMOTEHOSTSTART")) -;; (db:general-call dbdat 'set-test-start-time (list test-id))) -;; ;; (if msg -;; ;; (db:general-call dbdat 'state-status-msg (list state status msg test-id)) -;; ;; (db:general-call dbdat 'state-status (list state status test-id))) -;; (db:set-state-status-and-roll-up-items dbstruct run-id test-id #f state status msg) -;; ;; process the test_data table -;; (if (and test-id state status (equal? status "AUTO")) -;; (db:test-data-rollup dbstruct run-id test-id status)) -;; (mt:process-triggers dbstruct run-id test-id state status))) - ;; state is the priority rollup of all states ;; status is the priority rollup of all completed statesfu ;; ;; if test-name is an integer work off that as test-id instead of test-name test-path ;; @@ -3185,11 +3171,12 @@ (state-statuses (db:roll-up-rules state-status-counts state status)) (newstate (car state-statuses)) (newstatus (cadr state-statuses))) (set! new-state-eh newstate) (set! new-status-eh newstatus) - (debug:print 4 *default-log-port* "BB> tl-test-id="tl-test-id" ; "test-name":"item-path" newstate="newstate" newstatus="newstatus" len(sscs)="(length state-status-counts) " state-status-counts: " + (debug:print 4 *default-log-port* "BB> tl-test-id="tl-test-id" ; "test-name":"item-path + " newstate="newstate" newstatus="newstatus" len(sscs)="(length state-status-counts) " state-status-counts: " (apply conc (map (lambda (x) (conc (with-output-to-string (lambda () (pp (dbr:counts->alist x)))) " | ")) state-status-counts))); end debug:print Index: launch.scm ================================================================== --- launch.scm +++ launch.scm @@ -741,11 +741,12 @@ ) ;; for automated creation of the rollup html file this is a good place... (if (not (equal? item-path "")) - (tests:summarize-items run-id test-id test-name #f)) + (tests:summarize-items run-id test-id test-name #f)) + ;; BUG was this meant to be the antecnt of the if above? (tests:summarize-test run-id test-id) ;; don't force - just update if no ;; Leave a .final-status file for the top level test (tests:save-final-status run-id test-id) (rmt:update-run-stats run-id (rmt:get-raw-run-stats run-id))) ;; end of let* Index: mt.scm ================================================================== --- mt.scm +++ mt.scm @@ -190,13 +190,13 @@ (if prev-nbfake-log (setenv "NBFAKE_LOG" prev-nbfake-log) (unsetenv "NBFAKE_LOG")) )) -(define (mt:process-triggers dbstruct run-id test-id newstate newstatus) +(define (mt:process-triggers run-id test-id newstate newstatus) (if test-id - (let* ((test-dat (db:get-test-info-by-id dbstruct run-id test-id))) + (let* ((test-dat (rmt:get-test-info-by-id run-id test-id))) (if test-dat (let* ((test-rundir (db:test-get-rundir test-dat)) ;; ) ;; ) (test-name (db:test-get-testname test-dat)) (item-path (db:test-get-item-path test-dat)) (duration (db:test-get-run_duration test-dat)) @@ -262,21 +262,11 @@ (begin (debug:print-error 0 *default-log-port* "bad data handed to mt:test-set-state-status-by-id, run-id=" run-id ", test-id=" test-id ", newstate=" newstate) (print-call-chain (current-error-port)) #f) (begin - ;; cond - ;; ((and newstate newstatus newcomment) - ;; (rmt:general-call 'state-status-msg run-id newstate newstatus newcomment test-id)) - ;; ((and newstate newstatus) - ;; (rmt:general-call 'state-status run-id newstate newstatus test-id)) - ;; (else - ;; (if newstate (rmt:general-call 'set-test-state run-id newstate test-id)) - ;; (if newstatus (rmt:general-call 'set-test-status run-id newstatus test-id)) - ;; (if newcomment (rmt:general-call 'set-test-comment run-id newcomment test-id)))) (rmt:set-state-status-and-roll-up-items run-id test-id #f newstate newstatus newcomment) - ;; (mt:process-triggers run-id test-id newstate newstatus) #t))) (define (mt:test-set-state-status-by-id-unless-completed run-id test-id newstate newstatus newcomment) (let* ((test-vec (rmt:get-testinfo-state-status run-id test-id)) @@ -294,11 +284,146 @@ ;;(mt:test-set-state-status-by-id run-id test-id new-state new-status new-comment))) (define (mt:test-set-state-status-by-testname-unless-completed run-id test-name item-path new-state new-status new-comment) (let ((test-id (rmt:get-test-id run-id test-name item-path))) (mt:test-set-state-status-by-id-unless-completed run-id test-id new-state new-status new-comment))) - + +;; state and status are extra hints not usually used in the calculation +;; +(define (rmt:set-state-status-and-roll-up-items run-id test-name item-path state status comment) + (assert (number? run-id) "FATAL: Run id required.") + (rmt:client-side-set-state-status-and-roll-up run-id test-name item-path state status comment) + ;; (rmtmod:send-receive 'set-state-status-and-roll-up-items run-id (list run-id test-name item-path state status comment)) + ) + +(define (rmt:client-side-set-state-status-and-roll-up run-id test-name item-path state status comment) + ;; establish info on incoming test followed by info on top level test + ;; BBnote - for mode itemwait, linkage between upstream test & matching item status is propagated to run queue in db:prereqs-not-met + (let* ((testdat (if (number? test-name) + (rmt:get-test-info-by-id run-id test-name) ;; test-name is actually a test-id + (db:keep-trying-until-true ;; in our threaded stuff this call could happen before the test is registered (maybe?) + rmt:get-test-info + (list run-id test-name item-path) + 10))) + (test-id (db:test-get-id testdat)) + (test-name (if (number? test-name) + (db:test-get-testname testdat) + test-name)) + (item-path (db:test-get-item-path testdat)) + (tl-testdat (rmt:get-test-info run-id test-name "")) + (tl-test-id (if tl-testdat + (db:test-get-id tl-testdat) + #f)) + (new-state-eh #f) + (new-status-eh #f)) + (if (member state '("LAUNCHED" "REMOTEHOSTSTART")) + (rmt:general-call run-id 'set-test-start-time (list test-id))) + (let* ((res (begin + (rmt:test-set-state-status-db run-id test-id state status comment) ;; this call sets the item state/status + (if (not (equal? item-path "")) ;; only roll up IF incoming test is an item + (let* ((state-status-counts (rmt:get-all-state-status-counts-for-test run-id test-name item-path state status)) ;; item-path is used to exclude current state/status of THIS test + (state-statuses (db:roll-up-rules state-status-counts state status)) + (newstate (car state-statuses)) + (newstatus (cadr state-statuses))) + (set! new-state-eh newstate) + (set! new-status-eh newstatus) + (debug:print 4 *default-log-port* "BB> tl-test-id="tl-test-id" ; "test-name":"item-path + " newstate="newstate" newstatus="newstatus" len(sscs)="(length state-status-counts) " state-status-counts: " + (apply conc + (map (lambda (x) + (conc + (with-output-to-string (lambda () (pp (dbr:counts->alist x)))) " | ")) + state-status-counts))); end debug:print + (if tl-test-id + (rmt:test-set-state-status run-id tl-test-id newstate newstatus #f)) ;; we are still in the transaction - must access the db and not the dbstruct + ))))) + (mutex-unlock! *db-transaction-mutex*) + (if (and test-id state status (equal? status "AUTO")) + (rmt:test-data-rollup run-id test-id status)) + (if new-state-eh ;; moved from db:test-set-state-status + (mt:process-triggers run-id test-id new-state-eh new-status-eh)) + res))) + +;; select end_time-now from +;; (select testname,item_path,event_time+run_duration as +;; end_time,strftime('%s','now') as now from tests where state in +;; ('RUNNING','REMOTEHOSTSTART','LAUNCHED')); +;; +;; NOT EASY TO MIGRATE TO db{file,mod} +;; +(define (rmt:find-and-mark-incomplete-engine run-id ovr-deadtime cfg-deadtime test-stats-update-period) + (let* ((incompleted '()) + (oldlaunched '()) + (toplevels '()) + ;; The default running-deadtime is 720 seconds = 12 minutes. + ;; "(running-deadtime-default (+ server-start-allowance (* 2 launch-monitor-period)))" = 200 + (2 * (200 + 30 + 30)) + (deadtime-trim (or ovr-deadtime cfg-deadtime)) + (server-start-allowance 200) + (server-overloaded-budget 200) + (launch-monitor-off-time (or test-stats-update-period 30)) + (launch-monitor-on-time-budget 30) + (launch-monitor-period (+ launch-monitor-off-time launch-monitor-on-time-budget server-overloaded-budget)) + (remotehoststart-deadtime-default (+ server-start-allowance server-overloaded-budget 30)) + (remotehoststart-deadtime (or deadtime-trim remotehoststart-deadtime-default)) + (running-deadtime-default (+ server-start-allowance (* 2 launch-monitor-period))) + (running-deadtime (or deadtime-trim running-deadtime-default))) ;; two minutes (30 seconds between updates, this leaves 3x grace period) + + (debug:print-info 4 *default-log-port* "running-deadtime = " running-deadtime) + (debug:print-info 4 *default-log-port* "deadtime-trim = " deadtime-trim) + + (let* ((dat (rmt:get-toplevels-and-incompletes run-id running-deadtime remotehoststart-deadtime))) + (set! oldlaunched (list-ref dat 1)) + (set! toplevels (list-ref dat 2)) + (set! incompleted (list-ref dat 0))) + + (debug:print-info 18 *default-log-port* "Found " (length oldlaunched) " old LAUNCHED items, " + (length toplevels) " old LAUNCHED toplevel tests and " + (length incompleted) " tests marked RUNNING but apparently dead.") + + ;; These are defunct tests, do not do all the overhead of set-state-status. Force them to INCOMPLETE. + ;; + ;; (db:delay-if-busy dbdat) + (let* ((min-incompleted-ids (map car incompleted)) ;; do 'em all + (all-ids (append min-incompleted-ids (map car oldlaunched)))) + (if (> (length all-ids) 0) + (begin + ;; (launch:is-test-alive "localhost" 435) + (debug:print 0 *default-log-port* "WARNING: Marking test(s); " (string-intersperse (map conc all-ids) ", ") + " as DEAD") + (for-each + (lambda (test-id) + (let* ((tinfo (rmt:get-test-info-by-id run-id test-id)) + (run-dir (db:test-get-rundir tinfo)) + (host (db:test-get-host tinfo)) + (pid (db:test-get-process_id tinfo)) + (result (rmt:get-status-from-final-status-file run-dir))) + (if (and (list? result) (> (length result) 1) (equal? "PASS" (cadr result)) (equal? "COMPLETED" (car result))) + (begin + (debug:print 0 *default-log-port* "INFO: test " test-id " actually passed, so marking PASS not DEAD") + (rmt:set-state-status-and-roll-up-items + run-id test-id 'foo "COMPLETED" "PASS" + "Test stopped responding but it has PASSED; marking it PASS in the DB.")) + (let ((is-alive (and (not (eq? pid 0)) ;; 0 is default in re-used field "attemptnum" where pid stored. + (commonmod:is-test-alive host pid)))) + (if is-alive + (debug:print 0 *default-log-port* "INFO: test " test-id " on host " host + " has a process on pid " pid ", NOT setting to DEAD.") + (begin + (debug:print 0 *default-log-port* "INFO: test " test-id + " final state/status is not COMPLETED/PASS. It is " result) + (rmt:set-state-status-and-roll-up-items + run-id test-id 'foo "COMPLETED" "DEAD" + "Test stopped responding while in RUNNING or REMOTEHOSTSTART; presumed dead."))))))) + ;; call end of eud of run detection for posthook - from merge, is it needed? + ;; (launch:end-of-run-check run-id) + all-ids) + ))))) + + + + + (define (mt:lazy-read-test-config test-name) (let ((tconf (hash-table-ref/default *testconfigs* test-name #f))) (if tconf tconf (let ((test-dirs (tests:get-tests-search-path *configdat*))) Index: rmt.scm ================================================================== --- rmt.scm +++ rmt.scm @@ -443,10 +443,11 @@ (define (rmt:set-state-status-and-roll-up-run run-id state status) (assert (number? run-id) "FATAL: Run id required.") (rmt:send-receive 'set-state-status-and-roll-up-run run-id (list run-id state status))) +;; run on client version of set-state-status-and-roll-up-run (define (rmt:update-pass-fail-counts run-id test-name) (assert (number? run-id) "FATAL: Run id required.") (rmt:general-call 'update-pass-fail-counts run-id test-name test-name test-name)) Index: rmtmod.scm ================================================================== --- rmtmod.scm +++ rmtmod.scm @@ -177,17 +177,10 @@ (define (rmt:get-tests-for-run-mindata run-id testpatt states status not-in) (assert (number? run-id) "FATAL: Run id required.") (rmtmod:send-receive 'get-tests-for-run-mindata run-id (list run-id testpatt states status not-in))) -;; state and status are extra hints not usually used in the calculation -;; -(define (rmt:set-state-status-and-roll-up-items run-id test-name item-path state status comment) - (assert (number? run-id) "FATAL: Run id required.") - (rmtmod:send-receive 'set-state-status-and-roll-up-items run-id (list run-id test-name item-path state status comment))) - - ;;====================================================================== ;; Maintenance ;;====================================================================== @@ -204,82 +197,7 @@ #f ) (with-input-from-file infile read-lines) ))) -;; select end_time-now from -;; (select testname,item_path,event_time+run_duration as -;; end_time,strftime('%s','now') as now from tests where state in -;; ('RUNNING','REMOTEHOSTSTART','LAUNCHED')); -;; -;; NOT EASY TO MIGRATE TO db{file,mod} -;; -(define (rmt:find-and-mark-incomplete-engine run-id ovr-deadtime cfg-deadtime test-stats-update-period) - (let* ((incompleted '()) - (oldlaunched '()) - (toplevels '()) - ;; The default running-deadtime is 720 seconds = 12 minutes. - ;; "(running-deadtime-default (+ server-start-allowance (* 2 launch-monitor-period)))" = 200 + (2 * (200 + 30 + 30)) - (deadtime-trim (or ovr-deadtime cfg-deadtime)) - (server-start-allowance 200) - (server-overloaded-budget 200) - (launch-monitor-off-time (or test-stats-update-period 30)) - (launch-monitor-on-time-budget 30) - (launch-monitor-period (+ launch-monitor-off-time launch-monitor-on-time-budget server-overloaded-budget)) - (remotehoststart-deadtime-default (+ server-start-allowance server-overloaded-budget 30)) - (remotehoststart-deadtime (or deadtime-trim remotehoststart-deadtime-default)) - (running-deadtime-default (+ server-start-allowance (* 2 launch-monitor-period))) - (running-deadtime (or deadtime-trim running-deadtime-default))) ;; two minutes (30 seconds between updates, this leaves 3x grace period) - - (debug:print-info 4 *default-log-port* "running-deadtime = " running-deadtime) - (debug:print-info 4 *default-log-port* "deadtime-trim = " deadtime-trim) - - (let* ((dat (rmt:get-toplevels-and-incompletes run-id running-deadtime remotehoststart-deadtime))) - (set! oldlaunched (list-ref dat 1)) - (set! toplevels (list-ref dat 2)) - (set! incompleted (list-ref dat 0))) - - (debug:print-info 18 *default-log-port* "Found " (length oldlaunched) " old LAUNCHED items, " - (length toplevels) " old LAUNCHED toplevel tests and " - (length incompleted) " tests marked RUNNING but apparently dead.") - - ;; These are defunct tests, do not do all the overhead of set-state-status. Force them to INCOMPLETE. - ;; - ;; (db:delay-if-busy dbdat) - (let* ((min-incompleted-ids (map car incompleted)) ;; do 'em all - (all-ids (append min-incompleted-ids (map car oldlaunched)))) - (if (> (length all-ids) 0) - (begin - ;; (launch:is-test-alive "localhost" 435) - (debug:print 0 *default-log-port* "WARNING: Marking test(s); " (string-intersperse (map conc all-ids) ", ") - " as DEAD") - (for-each - (lambda (test-id) - (let* ((tinfo (rmt:get-test-info-by-id run-id test-id)) - (run-dir (db:test-get-rundir tinfo)) - (host (db:test-get-host tinfo)) - (pid (db:test-get-process_id tinfo)) - (result (rmt:get-status-from-final-status-file run-dir))) - (if (and (list? result) (> (length result) 1) (equal? "PASS" (cadr result)) (equal? "COMPLETED" (car result))) - (begin - (debug:print 0 *default-log-port* "INFO: test " test-id " actually passed, so marking PASS not DEAD") - (rmt:set-state-status-and-roll-up-items - run-id test-id 'foo "COMPLETED" "PASS" - "Test stopped responding but it has PASSED; marking it PASS in the DB.")) - (let ((is-alive (and (not (eq? pid 0)) ;; 0 is default in re-used field "attemptnum" where pid stored. - (commonmod:is-test-alive host pid)))) - (if is-alive - (debug:print 0 *default-log-port* "INFO: test " test-id " on host " host - " has a process on pid " pid ", NOT setting to DEAD.") - (begin - (debug:print 0 *default-log-port* "INFO: test " test-id - " final state/status is not COMPLETED/PASS. It is " result) - (rmt:set-state-status-and-roll-up-items - run-id test-id 'foo "COMPLETED" "DEAD" - "Test stopped responding while in RUNNING or REMOTEHOSTSTART; presumed dead."))))))) - ;; call end of eud of run detection for posthook - from merge, is it needed? - ;; (launch:end-of-run-check run-id) - all-ids) - ))))) - )