Index: api.scm ================================================================== --- api.scm +++ api.scm @@ -76,10 +76,11 @@ ((get-prev-run-ids) (apply db:get-prev-run-ids dbstruct params)) ((get-run-ids-matching-target) (apply db:get-run-ids-matching-target dbstruct params)) ((get-runs-by-patt) (apply db:get-runs-by-patt dbstruct params)) ((lock/unlock-run) (apply db:lock/unlock-run dbstruct params)) ((update-run-event_time) (apply db:update-run-event_time dbstruct params)) + ((find-and-mark-incompete (apply db:find-and-mark-incomplete dbstruct (car params) ovr-deadtime: (cadr params)))) ;; STEPS ((teststep-set-status!) (apply db:teststep-set-status! dbstruct params)) ;; TEST DATA Index: db.scm ================================================================== --- db.scm +++ db.scm @@ -617,13 +617,13 @@ units TEXT, comment TEXT DEFAULT '', status TEXT DEFAULT 'n/a', type TEXT DEFAULT '', CONSTRAINT test_data_constraint UNIQUE (test_id,category,variable));") - (sqlite3:execute db "CREATE TABLE IF NOT EXISTS test_rundat ( - ;; Why use FULL here? This data is not that critical - ;; (sqlite3:execute db "PRAGMA synchronous = FULL;") + ;; Why use FULL here? This data is not that critical + ;; (sqlite3:execute db "PRAGMA synchronous = FULL;") + (sqlite3:execute db "CREATE TABLE IF NOT EXISTS test_rundat ( id INTEGER PRIMARY KEY, test_id INTEGER, update_time TIMESTAMP, cpuload INTEGER DEFAULT -1, diskfree INTEGER DEFAULT -1, @@ -674,74 +674,70 @@ ;; select end_time-now from ;; (select testname,item_path,event_time+run_duration as ;; end_time,strftime('%s','now') as now from tests where state in ;; ('RUNNING','REMOTEHOSTSTART','LAUNCED')); -(define (db:find-and-mark-incomplete db #!key (ovr-deadtime #f)) +(define (db:find-and-mark-incomplete db run-id #!key (ovr-deadtime #f)) (let* ((incompleted '()) (oldlaunched '()) (toplevels '()) (deadtime-str (configf:lookup *configdat* "setup" "deadtime")) (deadtime (if (and deadtime-str (string->number deadtime-str)) (string->number deadtime-str) - 7200)) ;; two hours - (run-ids (db:get-all-run-ids db))) ;; iterate over runs to divy up the calls + 7200))) ;; two hours (if (number? ovr-deadtime)(set! deadtime ovr-deadtime)) - (for-each - (lambda (run-id) - - ;; in RUNNING or REMOTEHOSTSTART for more than 10 minutes - ;; - ;; THIS CANNOT WORK. The run_duration is not updated in the central db due to performance concerns. - ;; The testdat.db file must be consulted. - ;; - ;; HOWEVER: this code in run:test seems to work fine - ;; (> (- (current-seconds)(+ (db:test-get-event_time testdat) - ;; (db:test-get-run_duration testdat))) - ;; 600) - (db:delay-if-busy) - (sqlite3:for-each-row - (lambda (test-id run-dir uname testname item-path) - (if (and (equal? uname "n/a") - (equal? item-path "")) ;; this is a toplevel test - ;; what to do with toplevel? call rollup? - (begin - (set! toplevels (cons (list test-id run-dir uname testname item-path run-id) toplevels)) - (debug:print-info 0 "Found old toplevel test in RUNNING state, test-id=" test-id)) - (set! incompleted (cons (list test-id run-dir uname testname item-path run-id) incompleted)))) - db - "SELECT id,rundir,uname,testname,item_path FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > 600 AND state IN ('RUNNING','REMOTEHOSTSTART');" - run-id) - - ;; in LAUNCHED for more than one day. Could be long due to job queues TODO/BUG: Need override for this in config - ;; - (db:delay-if-busy) - (sqlite3:for-each-row - (lambda (test-id run-dir uname testname item-path) - (if (and (equal? uname "n/a") - (equal? item-path "")) ;; this is a toplevel test - ;; what to do with toplevel? call rollup? - (set! toplevels (cons (list test-id run-dir uname testname item-path run-id) toplevels)) - (set! oldlaunched (cons (list test-id run-dir uname testname item-path run-id) oldlaunched)))) - db - "SELECT id,rundir,uname,testname,item_path FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > 86400 AND state IN ('LAUNCHED');" - run-id)) - run-ids) + + ;; in RUNNING or REMOTEHOSTSTART for more than 10 minutes + ;; + ;; THIS CANNOT WORK. The run_duration is not updated in the central db due to performance concerns. + ;; The testdat.db file must be consulted. + ;; + ;; HOWEVER: this code in run:test seems to work fine + ;; (> (- (current-seconds)(+ (db:test-get-event_time testdat) + ;; (db:test-get-run_duration testdat))) + ;; 600) + ;; (db:delay-if-busy) + (sqlite3:for-each-row + (lambda (test-id run-dir uname testname item-path) + (if (and (equal? uname "n/a") + (equal? item-path "")) ;; this is a toplevel test + ;; what to do with toplevel? call rollup? + (begin + (set! toplevels (cons (list test-id run-dir uname testname item-path run-id) toplevels)) + (debug:print-info 0 "Found old toplevel test in RUNNING state, test-id=" test-id)) + (set! incompleted (cons (list test-id run-dir uname testname item-path run-id) incompleted)))) + db + "SELECT id,rundir,uname,testname,item_path FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > 600 AND state IN ('RUNNING','REMOTEHOSTSTART');" + run-id) + + ;; in LAUNCHED for more than one day. Could be long due to job queues TODO/BUG: Need override for this in config + ;; + ;; (db:delay-if-busy) + (sqlite3:for-each-row + (lambda (test-id run-dir uname testname item-path) + (if (and (equal? uname "n/a") + (equal? item-path "")) ;; this is a toplevel test + ;; what to do with toplevel? call rollup? + (set! toplevels (cons (list test-id run-dir uname testname item-path run-id) toplevels)) + (set! oldlaunched (cons (list test-id run-dir uname testname item-path run-id) oldlaunched)))) + db + "SELECT id,rundir,uname,testname,item_path FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > 86400 AND state IN ('LAUNCHED');" + run-id) ;; These are defunct tests, do not do all the overhead of set-state-status. Force them to INCOMPLETE. ;; - (db:delay-if-busy) + ;; (db:delay-if-busy) (let* ((min-incompleted (filter (lambda (x) - (let* ((testpath (cadr x)) - (tdatpath (conc testpath "/testdat.db")) - (dbexists (file-exists? tdatpath))) - (or (not dbexists) ;; if no file then something wrong - mark as incomplete - (> (- (current-seconds)(file-modification-time tdatpath)) 600)))) ;; no change in 10 minutes to testdat.db - she's dead Jim - incompleted)) - (min-incompleted-ids (map car min-incompleted)) - (all-ids (append min-incompleted-ids (map car oldlaunched)))) + (let* ((testpath (cadr x)) + (tdatpath (conc testpath "/testdat.db")) + (dbexists (file-exists? tdatpath))) + (or (not dbexists) ;; if no file then something wrong - mark as incomplete + (> (- (current-seconds)(file-modification-time tdatpath)) 600)))) ;; no change in 10 minutes to testdat.db - she's dead Jim + incompleted)) + (min-incompleted-ids (map car min-incompleted)) + (all-ids (append min-incompleted-ids (map car oldlaunched)))) (if (> (length all-ids) 0) (begin (debug:print 0 "WARNING: Marking test(s); " (string-intersperse (map conc all-ids) ", ") " as INCOMPLETE") (sqlite3:execute db @@ -837,15 +833,15 @@ (debug:print-info 4 "launch throttle factor=" *global-delta*) (set! *last-global-delta-printed* *global-delta*))) res)) (define (db:set-var dbstruct var val) - (db:delay-if-busy) + ;; (db:delay-if-busy) (sqlite3:execute (db:get-db dbstruct #f) "INSERT OR REPLACE INTO metadat (var,val) VALUES (?,?);" var val)) (define (db:del-var dbstruct var) - (db:delay-if-busy) + ;; (db:delay-if-busy) (sqlite3:execute (db:get-db dbstruct #f) "DELETE FROM metadat WHERE var=?;" var)) ;; use a global for some primitive caching, it is just silly to ;; re-read the db over and over again for the keys since they never ;; change @@ -941,11 +937,11 @@ (key=?str (string-intersperse (map (lambda (k)(conc k "=?")) keys) " AND "))) (debug:print 3 "keys: " keys " allvals: " allvals " keyvals: " keyvals " key=?str is " key=?str) (debug:print 2 "NOTE: using target " (string-intersperse (map cadr keyvals) "/") " for this run") (if (and runname (null? (filter (lambda (x)(not x)) keyvals))) ;; there must be a better way to "apply and" (let ((res #f)) - (db:delay-if-busy) + ;; (db:delay-if-busy) (apply sqlite3:execute db (conc "INSERT OR IGNORE INTO runs (runname,state,status,owner,event_time" comma keystr ") VALUES (?,?,?,?,strftime('%s','now')" comma valslots ");") allvals) (apply sqlite3:for-each-row (lambda (id) (set! res id)) @@ -952,11 +948,11 @@ db (let ((qry (conc "SELECT id FROM runs WHERE (runname=? " andstr key=?str ");"))) ;(debug:print 4 "qry: " qry) qry) qryvals) - (db:delay-if-busy) + ;; (db:delay-if-busy) (sqlite3:execute db "UPDATE runs SET state=?,status=?,event_time=strftime('%s','now') WHERE id=? AND state='deleted';" state status res) res) (begin (debug:print 0 "ERROR: Called without all necessary keys") #f)))) @@ -1195,41 +1191,41 @@ (let ((finalres (vector header res))) ;; (hash-table-set! *run-info-cache* run-id finalres) finalres))) (define (db:set-comment-for-run dbstruct run-id comment) - (db:delay-if-busy) + ;; (db:delay-if-busy) (sqlite3:execute (db:get-db dbstruct #f) "UPDATE runs SET comment=? WHERE id=?;" comment ;; (sdb:qry 'getid comment) run-id)) ;; does not (obviously!) removed dependent data. But why not!!? (define (db:delete-run dbstruct run-id) ;; First set any related tests to DELETED (let ((db (db:get-db dbstruct run-id))) - (db:delay-if-busy) + ;; (db:delay-if-busy) (sqlite3:execute db "UPDATE tests SET state='DELETED',comment='';") (sqlite3:execute db "DELETE FROM test_steps;") (sqlite3:execute db "DELETE FROM test_data;") (sqlite3:execute (db:get-db dbstruct #f) "UPDATE runs SET state='deleted',comment='' WHERE id=?;" run-id))) (define (db:update-run-event_time dbstruct run-id) - (db:delay-if-busy) + ;; (db:delay-if-busy) (sqlite3:execute (db:get-db dbstruct #f) "UPDATE runs SET event_time=strftime('%s','now') WHERE id=?;" run-id)) (define (db:lock/unlock-run dbstruct run-id lock unlock user) (let ((newlockval (if lock "locked" (if unlock "unlocked" "locked")))) ;; semi-failsafe (sqlite3:execute (db:get-db dbstruct #f) "UPDATE runs SET state=? WHERE id=?;" newlockval run-id) - (db:delay-if-busy) + ;; (db:delay-if-busy) (sqlite3:execute (db:get-db dbstruct #f) "INSERT INTO access_log (user,accessed,args) VALUES(?,strftime('%s','now'),?);" user (conc newlockval " " run-id)) (debug:print-info 1 "" newlockval " run number " run-id))) (define (db:set-run-status db run-id status #!key (msg #f)) - (db:delay-if-busy) + ;; (db:delay-if-busy) (if msg (sqlite3:execute db "UPDATE runs SET status=?,comment=? WHERE id=?;" status msg run-id) (sqlite3:execute db "UPDATE runs SET status=? WHERE id=?;" status run-id))) (define (db:get-run-status db run-id) @@ -1426,11 +1422,11 @@ ;; use db:mintests-get-{id ,run_id,testname ...} (define (db:get-tests-for-runs-mindata dbstruct run-ids testpatt states statuses not-in) (db:get-tests-for-runs dbstruct run-ids testpatt states statuses not-in: not-in qryvals: "id,run_id,testname,state,status,event_time,item_path")) (define (db:get-tests-for-runs dbstruct run-ids testpatt states statuses #!key (not-in #f)(qryvals #f)) - (db:delay-if-busy) + ;; (db:delay-if-busy) (let ((res '())) (for-each (lambda (run-id) (set! res (append res @@ -1444,11 +1440,11 @@ ;; (define (db:delete-test-records dbstruct run-id test-id) (let ((db (db:get-db dbstruct run-id))) (db:general-call db 'delete-test-step-records (list test-id)) - (db:delay-if-busy) + ;; (db:delay-if-busy) (db:general-call db 'delete-test-data-records (list test-id)) (sqlite3:execute db "UPDATE tests SET state='DELETED',status='n/a',comment='' WHERE id=?;" test-id))) (define (db:delete-tests-for-run dbdbstruct run-id) (let ((db (db:get-db dbstruct run-id))) @@ -1471,19 +1467,19 @@ (let ((qry (conc "UPDATE tests SET state=?,status=? WHERE " (if currstate (conc "state='" currstate "' AND ") "") (if currstatus (conc "status='" currstatus "' AND ") "") " run_id=? AND testname=? AND NOT (item_path='' AND testname in (SELECT DISTINCT testname FROM tests WHERE testname=? AND item_path != ''));"))) ;;(debug:print 0 "QRY: " qry) - (db:delay-if-busy) + ;; (db:delay-if-busy) (sqlite3:execute (db:get-db dbstruct run-id) qry run-id newstate newstatus testname testname))) testnames)) ;; speed up for common cases with a little logic ;; NB// Ultimately this will be deprecated in deference to mt:test-set-state-status-by-id ;; (define (db:test-set-state-status-by-id dbstruct run-id test-id newstate newstatus newcomment) - (db:delay-if-busy) + ;; (db:delay-if-busy) (let ((db (db:get-db dbstruct run-id))) (cond ((and newstate newstatus newcomment) (sqlite3:execute db "UPDATE tests SET state=?,status=?,comment=? WHERE id=?;" newstate newstatus newcomment ;; (sdb:qry 'getid newcomment) test-id)) @@ -1496,11 +1492,11 @@ test-id)))) (mt:process-triggers run-id test-id newstate newstatus))) ;; Never used, but should be? (define (db:test-set-state-status-by-run-id-testname db run-id test-name item-path status state) - (db:delay-if-busy) + ;; (db:delay-if-busy) (sqlite3:execute db "UPDATE tests SET state=?,status=?,event_time=strftime('%s','now') WHERE run_id=? AND testname=? AND item_path=?;" state status run-id test-name item-path)) ;; NEW BEHAVIOR: Count tests running in only one run! ;; @@ -1652,11 +1648,11 @@ (conc "SELECT " db:test-record-qry-selector " FROM tests WHERE testname=? AND item_path=?;") test-name item-path) res)) (define (db:test-get-rundir-from-test-id dbstruct run-id test-id) - (db:delay-if-busy) + ;; (db:delay-if-busy) (let ((db (db:get-db dbstruct run-id)) (res #f)) (sqlite3:for-each-row (lambda (tpath) (set! res tpath)) Index: rmt.scm ================================================================== --- rmt.scm +++ rmt.scm @@ -283,13 +283,23 @@ (rmt:send-receive 'update-run-event_time #f (list run-id))) (define (rmt:get-runs-by-patt keys runnamepatt targpatt offset limit) (rmt:send-receive 'get-runs-by-patt #f (list keys runnamepatt targpatt offset limit))) +(define (rmt:find-and-mark-incomplete run-id #!key (ovr-deadtime #f)) + (rmt:send-receive 'find-and-mark-incomplete #f (list run-id ovr-deadtime))) + ;;====================================================================== ;; M U L T I R U N Q U E R I E S ;;====================================================================== + +;; Need to move this to multi-run section and make associated changes +(define (rmt:find-and-mark-incomplete-all-runs #!key (ovr-deadtime #f)) + (let ((run-ids (rmt:get-all-run-ids))) + (for-each (lambda (run-id) + (rmt:find-and-mark-incomplete run-id ovr-deadtime: ovr-deadtime)) + run-ids))) ;; get the previous record for when this test was run where all keys match but runname ;; returns #f if no such test found, returns a single test record if found ;; ;; Run this at the client end since we have to connect to multiple run-id dbs Index: runs.scm ================================================================== --- runs.scm +++ runs.scm @@ -879,11 +879,11 @@ ;; Here we mark any old defunct tests as incomplete. Do this every fifteen minutes (if (> (current-seconds)(+ last-time-incomplete 900)) (begin (set! last-time-incomplete (current-seconds)) - (cdb:remote-run db:find-and-mark-incomplete #f))) + (rmt:find-and-mark-incomplete-all-runs))) ;; (print "Top of loop, hed=" hed ", tal=" tal " ,reruns=" reruns) (let* ((test-record (hash-table-ref test-records hed)) (test-name (tests:testqueue-get-testname test-record)) (tconfig (tests:testqueue-get-testconfig test-record))