Index: db.scm
==================================================================
--- db.scm
+++ db.scm
@@ -482,13 +482,13 @@
 ;;  select end_time-now from
 ;;      (select testname,item_path,event_time+run_duration as
 ;;                          end_time,strftime('%s','now') as now from tests where state in
 ;;      ('RUNNING','REMOTEHOSTSTART','LAUNCED'));
 
-
 (define (db:find-and-mark-incomplete db #!key (ovr-deadtime #f))
   (let* ((incompleted '())
+	 (toplevels   '())
 	 (deadtime-str (configf:lookup *configdat* "setup" "deadtime"))
 	 (deadtime     (if (and deadtime-str
 				(string->number deadtime-str))
 			   (string->number deadtime-str)
 			   7200)) ;; two hours
@@ -504,37 +504,63 @@
        ;;
        ;; HOWEVER: this code in run:test seems to work fine
        ;;              (> (- (current-seconds)(+ (db:test-get-event_time testdat)
        ;;                     (db:test-get-run_duration testdat)))
        ;;                    600) 
+       (db:delay-if-busy)
        (sqlite3:for-each-row 
-	(lambda (test-id)
-	  (set! incompleted (cons test-id incompleted)))
+	(lambda (test-id run-dir uname testname item-path)
+	  (if (and (equal? uname "n/a")
+		   (equal? item-path "")) ;; this is a toplevel test
+	      ;; what to do with toplevel? call rollup?
+	      (begin
+		(set! toplevels   (cons (list test-id run-dir uname testname item-path run-id) toplevels))
+		(debug:print-info 0 "Found old toplevel test in RUNNING state, test-id=" test-id))
+	      (set! incompleted (cons (list test-id run-dir uname testname item-path run-id) incompleted))))
 	db
-	"SELECT id FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time - run_duration) > ? AND state IN ('RUNNING','REMOTEHOSTSTART');"
-	run-id deadtime)
+	"SELECT id,rundir,uname,testname,item_path FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > 600 AND state IN ('RUNNING','REMOTEHOSTSTART');"
+	run-id)
 
        ;; in LAUNCHED for more than one day. Could be long due to job queues TODO/BUG: Need override for this in config
        ;;
+       (db:delay-if-busy)
        (sqlite3:for-each-row
-	(lambda (test-id)
-	  (set! incompleted (cons test-id incompleted)))
+	(lambda (test-id run-dir uname testname item-path)
+	  (set! incompleted (cons (list test-id run-dir uname testname item-path run-id) incompleted)))
 	db
-	"SELECT id FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time - run_duration) > ? AND state IN ('LAUNCHED');"
-	run-id (* 60 60 24)))
+	"SELECT id,rundir,uname,testname,item_path FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > 86400 AND state IN ('LAUNCHED');"
+	run-id))
      run-ids)
-       
+    
     ;; These are defunct tests, do not do all the overhead of set-state-status. Force them to INCOMPLETE.
     ;;
-    (if (> (length incompleted) 0)
-	(begin
-	  (debug:print 0 "WARNING: Marking test(s); " (string-intersperse (map conc incompleted) ", ") " as INCOMPLETE")
-	  (sqlite3:execute 
-	   db
-	   (conc "UPDATE tests SET state='INCOMPLETE' WHERE id IN (" 
-		 (string-intersperse (map conc incompleted) ",")
-		 ");"))))))
+    (db:delay-if-busy)
+    (let* ((min-incompleted (filter (lambda (x)
+				     (let* ((testpath (cadr x))
+					    (tdatpath (conc testpath "/testdat.db"))
+					    (dbexists (file-exists? tdatpath)))
+				       (or (not dbexists) ;; if no file then something wrong - mark as incomplete
+					   (> (- (current-seconds)(file-modification-time tdatpath)) 600)))) ;; no change in 10 minutes to testdat.db - she's dead Jim
+				   incompleted))
+	  (min-incompleted-ids (map car min-incompleted)))
+      (if (> (length min-incompleted-ids) 0)
+	  (begin
+	    (debug:print 0 "WARNING: Marking test(s); " (string-intersperse (map conc min-incompleted-ids) ", ") " as INCOMPLETE")
+	    (sqlite3:execute 
+	     db
+	     (conc "UPDATE tests SET state='INCOMPLETE' WHERE id IN (" 
+		   (string-intersperse (map conc min-incompleted-ids) ",")
+		   ");")))))
+
+    ;; Now do rollups for the toplevel tests
+    ;;
+    (for-each
+     (lambda (toptest)
+       (let ((test-name (list-ref toptest 3))
+	     (run-id    (list-ref toptest 5)))
+	 (cdb:top-test-set-per-pf-counts *runremote* run-id test-name)))
+     toplevels)))
 		     
 ;; Clean out old junk and vacuum the database
 ;;
 ;; Ultimately do something like this:
 ;;

Index: runs.scm
==================================================================
--- runs.scm
+++ runs.scm
@@ -859,11 +859,11 @@
 	(max-concurrent-jobs   (let ((mcj (config-lookup *configdat* "setup"     "max_concurrent_jobs")))
 				 (if (and mcj (string->number mcj))
 				     (string->number mcj)
 				     1))) ;; length of the register queue ahead
 	(reglen                (if (number? reglen-in) reglen-in 1))
-	(last-time-incomplete  (current-seconds))
+	(last-time-incomplete  (- (current-seconds) 900)) ;; force at least one clean up cycle
 	(last-time-some-running (current-seconds)))
 
     ;; Initialize the test-registery hash with tests that already have a record
     ;; convert state to symbol and use that as the hash value
     (for-each (lambda (trec)
@@ -878,17 +878,18 @@
 
     (let loop ((hed         (car sorted-test-names))
 	       (tal         (cdr sorted-test-names))
 	       (reg         '()) ;; registered, put these at the head of tal 
 	       (reruns      '()))
+
       (if (not (null? reruns))(debug:print-info 4 "reruns=" reruns))
 
       ;; Here we mark any old defunct tests as incomplete. Do this every fifteen minutes
-      ;; (if (> (current-seconds)(+ last-time-incomplete 900))
-      ;;     (begin
-      ;;       (set! last-time-incomplete (current-seconds))
-      ;;       (cdb:remote-run db:find-and-mark-incomplete #f)))
+      (if (> (current-seconds)(+ last-time-incomplete 900))
+          (begin
+            (set! last-time-incomplete (current-seconds))
+            (cdb:remote-run db:find-and-mark-incomplete #f)))
 
       ;; (print "Top of loop, hed=" hed ", tal=" tal " ,reruns=" reruns)
       (let* ((test-record (hash-table-ref test-records hed))
 	     (test-name   (tests:testqueue-get-testname test-record))
 	     (tconfig     (tests:testqueue-get-testconfig test-record))