Index: api.scm ================================================================== --- api.scm +++ api.scm @@ -50,28 +50,12 @@ ;; These are called by the server on recipt of /api calls (define (api:execute-requests dbstruct cmd params) (case (string->symbol cmd) ;; SERVERS - ((start-server) (apply server:kind-run params)) - ;; ((kill-server) - ;; (db:sync-tables (db:tbls *inmemdb*) *inmemdb* *db*) ;; (db:sync-to *inmemdb* *db*) - ;; (let ((hostname (car *runremote*)) - ;; (port (cadr *runremote*)) - ;; (pid (if (null? params) #f (car params))) - ;; (th1 (make-thread (lambda ()(thread-sleep! 3)(debug:print 0 "Server exiting!")(exit 0)) "Server exit thread"))) - ;; (debug:print 0 "WARNING: Server on " hostname ":" port " going down by user request!") - ;; (debug:print-info 1 "current pid=" (current-process-id)) - ;; (open-run-close tasks:server-deregister tasks:open-db - ;; hostname - ;; port: port) - ;; (set! *server-run* #f) - ;; (thread-sleep! 3) - ;; (if pid - ;; (process-signal pid signal/kill) - ;; (thread-start! th1)) - ;; '(#t "exit process started"))) + ((start-server) (apply server:kind-run params)) + ((kill-server) (set! *server-run* #f)) ;; KEYS ((get-key-val-pairs) (apply db:get-key-val-pairs dbstruct params)) ((get-keys) (db:get-keys dbstruct)) @@ -113,11 +97,11 @@ ((get-prev-run-ids) (apply db:get-prev-run-ids dbstruct params)) ((get-run-ids-matching-target) (apply db:get-run-ids-matching-target dbstruct params)) ((get-runs-by-patt) (apply db:get-runs-by-patt dbstruct params)) ((lock/unlock-run) (apply db:lock/unlock-run dbstruct params)) ((update-run-event_time) (apply db:update-run-event_time dbstruct params)) - ((find-and-mark-incompete (apply db:find-and-mark-incomplete dbstruct (car params) ovr-deadtime: (cadr params)))) + ((find-and-mark-incomplete) (apply db:find-and-mark-incomplete dbstruct params)) ;; STEPS ((teststep-set-status!) (apply db:teststep-set-status! dbstruct params)) ;; TEST DATA Index: db.scm ================================================================== --- db.scm +++ db.scm @@ -421,11 +421,11 @@ (lambda (a . b) (set! fromdat (cons (apply vector a b) fromdat))) fromdb full-sel) - (debug:print 0 "INFO: found " (length fromdat) " records to sync") + (debug:print-info 2 "found " (length fromdat) " records to sync") ;; read the target table (sqlite3:for-each-row (lambda (a . b) (hash-table-set! todat a (apply vector a b))) @@ -750,12 +750,13 @@ ;; select end_time-now from ;; (select testname,item_path,event_time+run_duration as ;; end_time,strftime('%s','now') as now from tests where state in ;; ('RUNNING','REMOTEHOSTSTART','LAUNCED')); -(define (db:find-and-mark-incomplete db run-id #!key (ovr-deadtime #f)) - (let* ((incompleted '()) +(define (db:find-and-mark-incomplete dbstruct run-id ovr-deadtime) + (let* ((db (db:get-db dbstruct run-id)) + (incompleted '()) (oldlaunched '()) (toplevels '()) (deadtime-str (configf:lookup *configdat* "setup" "deadtime")) (deadtime (if (and deadtime-str (string->number deadtime-str)) @@ -763,13 +764,10 @@ 7200))) ;; two hours (if (number? ovr-deadtime)(set! deadtime ovr-deadtime)) ;; in RUNNING or REMOTEHOSTSTART for more than 10 minutes ;; - ;; THIS CANNOT WORK. The run_duration is not updated in the central db due to performance concerns. - ;; The testdat.db file must be consulted. - ;; ;; HOWEVER: this code in run:test seems to work fine ;; (> (- (current-seconds)(+ (db:test-get-event_time testdat) ;; (db:test-get-run_duration testdat))) ;; 600) ;; (db:delay-if-busy) @@ -781,13 +779,13 @@ (begin (set! toplevels (cons (list test-id run-dir uname testname item-path run-id) toplevels)) (debug:print-info 0 "Found old toplevel test in RUNNING state, test-id=" test-id)) (set! incompleted (cons (list test-id run-dir uname testname item-path run-id) incompleted)))) db - "SELECT id,rundir,uname,testname,item_path FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > 600 AND state IN ('RUNNING','REMOTEHOSTSTART');" - run-id) - + "SELECT id,rundir,uname,testname,item_path FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > (run_duration + ?) AND state IN ('RUNNING','REMOTEHOSTSTART');" + run-id deadtime) + ;; in LAUNCHED for more than one day. Could be long due to job queues TODO/BUG: Need override for this in config ;; ;; (db:delay-if-busy) (sqlite3:for-each-row (lambda (test-id run-dir uname testname item-path) @@ -798,21 +796,23 @@ (set! oldlaunched (cons (list test-id run-dir uname testname item-path run-id) oldlaunched)))) db "SELECT id,rundir,uname,testname,item_path FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > 86400 AND state IN ('LAUNCHED');" run-id) + (debug:print-info 18 "Found " (length oldlaunched) " old LAUNCHED items, " (length toplevels) " old LAUNCHED toplevel tests and " (length incompleted) " tests marked RUNNING but apparently dead.") + ;; These are defunct tests, do not do all the overhead of set-state-status. Force them to INCOMPLETE. ;; ;; (db:delay-if-busy) - (let* ((min-incompleted (filter (lambda (x) - (let* ((testpath (cadr x)) - (tdatpath (conc testpath "/testdat.db")) - (dbexists (file-exists? tdatpath))) - (or (not dbexists) ;; if no file then something wrong - mark as incomplete - (> (- (current-seconds)(file-modification-time tdatpath)) 600)))) ;; no change in 10 minutes to testdat.db - she's dead Jim - incompleted)) - (min-incompleted-ids (map car min-incompleted)) + (let* (;; (min-incompleted (filter (lambda (x) + ;; (let* ((testpath (cadr x)) + ;; (tdatpath (conc testpath "/testdat.db")) + ;; (dbexists (file-exists? tdatpath))) + ;; (or (not dbexists) ;; if no file then something wrong - mark as incomplete + ;; (> (- (current-seconds)(file-modification-time tdatpath)) 600)))) ;; no change in 10 minutes to testdat.db - she's dead Jim + ;; incompleted)) + (min-incompleted-ids (map car incompleted)) ;; do 'em all (all-ids (append min-incompleted-ids (map car oldlaunched)))) (if (> (length all-ids) 0) (begin (debug:print 0 "WARNING: Marking test(s); " (string-intersperse (map conc all-ids) ", ") " as INCOMPLETE") (sqlite3:execute @@ -823,13 +823,13 @@ ;; Now do rollups for the toplevel tests ;; (for-each (lambda (toptest) - (let ((test-name (list-ref toptest 3)) - (run-id (list-ref toptest 5))) - (cdb:top-test-set-per-pf-counts *runremote* run-id test-name))) + (let ((test-name (list-ref toptest 3))) +;; (run-id (list-ref toptest 5))) + (db:general-call db 'top-test-set-per-pf-counts (list test-name run-id test-name test-name test-name)))) ;; (list run-id test-name)))) toplevels))) ;; Clean out old junk and vacuum the database ;; ;; Ultimately do something like this: Index: http-transport.scm ================================================================== --- http-transport.scm +++ http-transport.scm @@ -375,11 +375,11 @@ (rem-time #f)) (if *inmemdb* (db:sync-touched *inmemdb* *run-id* force-sync: #t)) (set! sync-time (- (current-milliseconds) start-time)) (set! rem-time (quotient (- 4000 sync-time) 1000)) - (debug:print 0 "SYNC: time= " sync-time ", rem-time=" rem-time) + (debug:print 2 "SYNC: time= " sync-time ", rem-time=" rem-time) ;; ;; set_running after our first pass through and start the db ;; (if (eq? server-state 'available) Index: rmt.scm ================================================================== --- rmt.scm +++ rmt.scm @@ -83,11 +83,11 @@ (debug:print-info 4 "no server and read-only query, bypassing normal channel") ;; (if (rmt:write-frequency-over-limit? cmd run-id)(server:kind-run run-id)) (let ((curr-max (rmt:get-max-query-average))) (if (> (cdr curr-max) max-avg-qry) (begin - (debug:print-info 0 "Max average query, " (inexact->exact (round (cdr curr-max))) "ms (" (car curr-max) ") exceeds " max-avg-qry ", try starting server ...") + (debug:print-info 3 "Max average query, " (inexact->exact (round (cdr curr-max))) "ms (" (car curr-max) ") exceeds " max-avg-qry ", try starting server ...") (server:kind-run run-id)))) (rmt:open-qry-close-locally cmd run-id params))))) (define (rmt:update-db-stats rawcmd params duration) (mutex-lock! *db-stats-mutex*) @@ -399,22 +399,22 @@ (rmt:send-receive 'update-run-event_time #f (list run-id))) (define (rmt:get-runs-by-patt keys runnamepatt targpatt offset limit) (rmt:send-receive 'get-runs-by-patt #f (list keys runnamepatt targpatt offset limit))) -(define (rmt:find-and-mark-incomplete run-id #!key (ovr-deadtime #f)) - (rmt:send-receive 'find-and-mark-incomplete #f (list run-id ovr-deadtime))) +(define (rmt:find-and-mark-incomplete run-id ovr-deadtime) + (rmt:send-receive 'find-and-mark-incomplete run-id (list run-id ovr-deadtime))) ;;====================================================================== ;; M U L T I R U N Q U E R I E S ;;====================================================================== ;; Need to move this to multi-run section and make associated changes (define (rmt:find-and-mark-incomplete-all-runs #!key (ovr-deadtime #f)) (let ((run-ids (rmt:get-all-run-ids))) (for-each (lambda (run-id) - (rmt:find-and-mark-incomplete run-id ovr-deadtime: ovr-deadtime)) + (rmt:find-and-mark-incomplete run-id ovr-deadtime)) run-ids))) ;; get the previous record for when this test was run where all keys match but runname ;; returns #f if no such test found, returns a single test record if found ;;