Index: TODO
==================================================================
--- TODO
+++ TODO
@@ -56,6 +56,26 @@
 .. Move main.db to global?
 .. [ run-id.db inmemdb last-mod last-read last-sync inuse ]
 . Re-work all queries to use run-id to dereference server
 . Open main.db directly in calls to -runtests etc. No need to talk remote?
 . remove common:faux-lock
+
+db:get-test-info-by-id
+db:get-test-state-status-by-id
+db:get-test-info - do a get id by name/item-path
+    cache the id-
+    use test id plus run id to get from cache
+ 
+need to do db:get-test-info-db
+look at html gen for items - rollup needs deduplication nonoverlap
+
+;; cache write these with transaction
+db:teststep-set-status!
+db:test-set-top-process-id
+
+;; called a lot, maybe from rollup?
+db:get-all-state-status-counts-for-test
+
+;; load to move from server to client
+tests:summarize-items ;; appears to be on client
+tests:summarize-tests
 

Index: db.scm
==================================================================
--- db.scm
+++ db.scm
@@ -2315,20 +2315,18 @@
 ;; ;; NB// Ultimately this will be deprecated in deference to mt:test-set-state-status-by-id
 ;;
 ;;      NOTE: run-id is not used
 ;; ;;
 (define (db:test-set-state-status dbstruct run-id test-id newstate newstatus newcomment)
-  (let* ((hash-key (cons run-id test-id)))
-    (hash-table-delete! *db:get-test-info-by-id-cache* hash-key)
-    (hash-table-delete! *db:get-test-state-status-by-id-cache*hash-key))
   (db:with-db
    dbstruct
    run-id #t
    (lambda (dbdat db)
      (db:test-set-state-status-db db run-id test-id newstate newstatus newcomment))))
 
 (define (db:test-set-state-status-db db run-id test-id newstate newstatus newcomment)
+  ;; clear cache after this, I think that makes sense
   (cond
    ((and newstate newstatus newcomment)
     (sqlite3:execute db "UPDATE tests SET state=?,status=?,comment=? WHERE id=?;" newstate newstatus newcomment ;; (sdb:qry 'getid newcomment)
 		     test-id))
    ((and newstate newstatus)
@@ -2337,10 +2335,13 @@
     (if newstate   (sqlite3:execute db "UPDATE tests SET state=?   WHERE id=?;" newstate   test-id))
     (if newstatus  (sqlite3:execute db "UPDATE tests SET status=?  WHERE id=?;" newstatus  test-id))
     (if newcomment (sqlite3:execute db "UPDATE tests SET comment=? WHERE id=?;" newcomment ;; (sdb:qry 'getid newcomment)
 				    test-id))))
   ;; (mt:process-triggers dbstruct run-id test-id newstate newstatus)) ;; NOTE: Moved into calling function
+  (let* ((hash-key (cons run-id test-id)))
+    (hash-table-delete! *db:get-test-info-by-id-cache* hash-key)
+    (hash-table-delete! *db:get-test-state-status-by-id-cache* hash-key))
   )
 
 ;; NEW BEHAVIOR: Count tests running in all runs!
 ;;
 (define (db:get-count-tests-running dbstruct run-id) ;; fastmode)
@@ -3126,25 +3127,10 @@
            (print-call-chain (current-error-port))
 	   msg))) ;; crude reply for when things go awry
     ((zmq nmsg)(with-input-from-string msg (lambda ()(deserialize))))
     (else msg))) ;; rpc
 
-;; ; This is to be the big daddy call NOPE: Replaced by db:set-state-status-and-roll-up-items
-;; ;
-;; define (db:test-set-state-status dbstruct run-id test-id state status msg)
-;;  (let ((dbdat  (db:get-subdb dbstruct run-id)))
-;;    (if (member state '("LAUNCHED" "REMOTEHOSTSTART"))
-;; 	(db:general-call dbdat 'set-test-start-time (list test-id)))
-;;    ;; (if msg
-;;    ;; 	(db:general-call dbdat 'state-status-msg (list state status msg test-id))
-;;    ;; 	(db:general-call dbdat 'state-status     (list state status test-id)))
-;;    (db:set-state-status-and-roll-up-items dbstruct run-id test-id #f state status msg)
-;;    ;; process the test_data table
-;;    (if (and test-id state status (equal? status "AUTO")) 
-;; 	(db:test-data-rollup dbstruct run-id test-id status))
-;;    (mt:process-triggers dbstruct run-id test-id state status)))
-
 ;; state is the priority rollup of all states
 ;; status is the priority rollup of all completed statesfu
 ;;
 ;; if test-name is an integer work off that as test-id instead of test-name test-path
 ;;
@@ -3185,11 +3171,12 @@
 			    (state-statuses      (db:roll-up-rules state-status-counts state status))
                             (newstate            (car state-statuses))
                             (newstatus           (cadr state-statuses)))
 		       (set! new-state-eh newstate)
 		       (set! new-status-eh newstatus)
-                       (debug:print 4 *default-log-port* "BB> tl-test-id="tl-test-id" ; "test-name":"item-path" newstate="newstate" newstatus="newstatus" len(sscs)="(length state-status-counts)  " state-status-counts: "
+                       (debug:print 4 *default-log-port* "BB> tl-test-id="tl-test-id" ; "test-name":"item-path
+				    " newstate="newstate" newstatus="newstatus" len(sscs)="(length state-status-counts)  " state-status-counts: "
 				    (apply conc
 					   (map (lambda (x)
 						  (conc
                      				   (with-output-to-string (lambda () (pp (dbr:counts->alist x)))) " | "))
 						state-status-counts))); end debug:print

Index: launch.scm
==================================================================
--- launch.scm
+++ launch.scm
@@ -741,11 +741,12 @@
               )
 
 
 	      ;; for automated creation of the rollup html file this is a good place...
 	      (if (not (equal? item-path ""))
-		      (tests:summarize-items run-id test-id test-name #f))
+		  (tests:summarize-items run-id test-id test-name #f))
+	      ;; BUG was this meant to be the antecnt of the if above?
 	      (tests:summarize-test run-id test-id)  ;; don't force - just update if no
               ;; Leave a .final-status file for the top level test
               (tests:save-final-status run-id test-id)
 	      (rmt:update-run-stats run-id (rmt:get-raw-run-stats run-id))) ;; end of let*
 

Index: mt.scm
==================================================================
--- mt.scm
+++ mt.scm
@@ -190,13 +190,13 @@
     (if prev-nbfake-log
 	(setenv "NBFAKE_LOG" prev-nbfake-log)
 	(unsetenv "NBFAKE_LOG"))
     ))
 
-(define (mt:process-triggers dbstruct run-id test-id newstate newstatus)
+(define (mt:process-triggers run-id test-id newstate newstatus)
   (if test-id 
-      (let* ((test-dat      (db:get-test-info-by-id dbstruct run-id test-id)))
+      (let* ((test-dat      (rmt:get-test-info-by-id run-id test-id)))
 	(if test-dat
 	    (let* ((test-rundir   (db:test-get-rundir       test-dat)) ;; ) ;; )
 		   (test-name     (db:test-get-testname     test-dat))
 		   (item-path     (db:test-get-item-path    test-dat))
 		   (duration      (db:test-get-run_duration test-dat))
@@ -262,21 +262,11 @@
       (begin
 	(debug:print-error 0 *default-log-port* "bad data handed to mt:test-set-state-status-by-id, run-id=" run-id ", test-id=" test-id ", newstate=" newstate)
 	(print-call-chain (current-error-port))
 	#f)
       (begin
-	;; cond
-	;; ((and newstate newstatus newcomment)
-	;;  (rmt:general-call 'state-status-msg run-id newstate newstatus newcomment test-id))
-	;; ((and newstate newstatus)
-	;;  (rmt:general-call 'state-status run-id newstate newstatus test-id))
-	;; (else
-	;;  (if newstate   (rmt:general-call 'set-test-state   run-id newstate   test-id))
-	;;  (if newstatus  (rmt:general-call 'set-test-status  run-id newstatus  test-id))
-	;;  (if newcomment (rmt:general-call 'set-test-comment run-id newcomment test-id))))
 	(rmt:set-state-status-and-roll-up-items run-id test-id #f newstate newstatus newcomment)
-	;; (mt:process-triggers run-id test-id newstate newstatus)
 	#t)))
 
 
 (define (mt:test-set-state-status-by-id-unless-completed run-id test-id newstate newstatus newcomment)
   (let* ((test-vec   (rmt:get-testinfo-state-status run-id test-id))
@@ -294,11 +284,146 @@
 	;;(mt:test-set-state-status-by-id run-id test-id new-state new-status new-comment)))
 
 (define (mt:test-set-state-status-by-testname-unless-completed run-id test-name item-path new-state new-status new-comment)
   (let ((test-id (rmt:get-test-id run-id test-name item-path)))
     (mt:test-set-state-status-by-id-unless-completed run-id test-id new-state new-status new-comment)))
-    
+
+;; state and status are extra hints not usually used in the calculation
+;;
+(define (rmt:set-state-status-and-roll-up-items run-id test-name item-path state status comment)
+  (assert (number? run-id) "FATAL: Run id required.")
+  (rmt:client-side-set-state-status-and-roll-up  run-id test-name item-path state status comment)
+  ;; (rmtmod:send-receive 'set-state-status-and-roll-up-items run-id (list run-id test-name item-path state status comment))
+  )
+
+(define (rmt:client-side-set-state-status-and-roll-up  run-id test-name item-path state status comment)
+  ;; establish info on incoming test followed by info on top level test
+  ;; BBnote - for mode itemwait, linkage between upstream test & matching item status is propagated to run queue in db:prereqs-not-met
+  (let* ((testdat      (if (number? test-name)
+			   (rmt:get-test-info-by-id run-id test-name) ;; test-name is actually a test-id
+			   (db:keep-trying-until-true ;; in our threaded stuff this call could happen before the test is registered (maybe?)
+			    rmt:get-test-info
+			    (list run-id test-name item-path)
+			    10)))
+	 (test-id      (db:test-get-id testdat))
+	 (test-name    (if (number? test-name)
+			   (db:test-get-testname testdat)
+			   test-name))
+	 (item-path    (db:test-get-item-path testdat))
+         (tl-testdat   (rmt:get-test-info run-id test-name ""))
+         (tl-test-id   (if tl-testdat
+			   (db:test-get-id tl-testdat)
+			   #f))
+	 (new-state-eh #f)
+	 (new-status-eh #f))
+    (if (member state '("LAUNCHED" "REMOTEHOSTSTART")) 
+	(rmt:general-call run-id 'set-test-start-time (list test-id)))
+    (let* ((res (begin
+		  (rmt:test-set-state-status-db run-id test-id state status comment) ;; this call sets the item state/status
+		  (if (not (equal? item-path "")) ;; only roll up IF incoming test is an item
+		      (let* ((state-status-counts (rmt:get-all-state-status-counts-for-test run-id test-name item-path state status)) ;; item-path is used to exclude current state/status of THIS test
+			     (state-statuses      (db:roll-up-rules state-status-counts state status))
+			     (newstate            (car state-statuses))
+			     (newstatus           (cadr state-statuses)))
+			(set! new-state-eh newstate)
+			(set! new-status-eh newstatus)
+			(debug:print 4 *default-log-port* "BB> tl-test-id="tl-test-id" ; "test-name":"item-path
+				     " newstate="newstate" newstatus="newstatus" len(sscs)="(length state-status-counts)  " state-status-counts: "
+				     (apply conc
+					    (map (lambda (x)
+						   (conc
+						    (with-output-to-string (lambda () (pp (dbr:counts->alist x)))) " | "))
+						 state-status-counts))); end debug:print
+			(if tl-test-id
+			    (rmt:test-set-state-status run-id tl-test-id newstate newstatus #f)) ;; we are still in the transaction - must access the db and not the dbstruct
+		       )))))
+      (mutex-unlock! *db-transaction-mutex*)
+      (if (and test-id state status (equal? status "AUTO")) 
+	  (rmt:test-data-rollup run-id test-id status))
+      (if new-state-eh ;; moved from db:test-set-state-status
+	  (mt:process-triggers run-id test-id new-state-eh new-status-eh))
+      res)))
+
+;;  select end_time-now from
+;;      (select testname,item_path,event_time+run_duration as
+;;                          end_time,strftime('%s','now') as now from tests where state in
+;;      ('RUNNING','REMOTEHOSTSTART','LAUNCHED'));
+;;
+;; NOT EASY TO MIGRATE TO db{file,mod}
+;;
+(define (rmt:find-and-mark-incomplete-engine run-id ovr-deadtime cfg-deadtime test-stats-update-period)
+  (let* ((incompleted '())
+	 (oldlaunched '())
+	 (toplevels   '())
+          ;; The default running-deadtime is 720 seconds = 12 minutes.
+          ;; "(running-deadtime-default (+ server-start-allowance (* 2 launch-monitor-period)))" = 200 + (2 * (200 + 30 + 30))
+         (deadtime-trim (or ovr-deadtime cfg-deadtime))
+         (server-start-allowance 200)
+         (server-overloaded-budget 200)
+         (launch-monitor-off-time (or test-stats-update-period 30))
+         (launch-monitor-on-time-budget 30)
+         (launch-monitor-period (+ launch-monitor-off-time launch-monitor-on-time-budget server-overloaded-budget))
+         (remotehoststart-deadtime-default (+ server-start-allowance server-overloaded-budget 30))
+         (remotehoststart-deadtime (or deadtime-trim remotehoststart-deadtime-default))
+         (running-deadtime-default (+ server-start-allowance (* 2 launch-monitor-period)))
+         (running-deadtime (or deadtime-trim running-deadtime-default))) ;; two minutes (30 seconds between updates, this leaves 3x grace period)
+
+    (debug:print-info 4  *default-log-port* "running-deadtime = " running-deadtime)
+    (debug:print-info 4  *default-log-port* "deadtime-trim = " deadtime-trim)
+
+    (let* ((dat (rmt:get-toplevels-and-incompletes run-id running-deadtime remotehoststart-deadtime)))
+      (set! oldlaunched (list-ref dat 1))
+      (set! toplevels   (list-ref dat 2))
+      (set! incompleted (list-ref dat 0)))
+
+    (debug:print-info 18 *default-log-port* "Found " (length oldlaunched) " old LAUNCHED items, "
+		      (length toplevels) " old LAUNCHED toplevel tests and "
+		      (length incompleted) " tests marked RUNNING but apparently dead.")
+  
+    ;; These are defunct tests, do not do all the overhead of set-state-status. Force them to INCOMPLETE.
+    ;;
+    ;; (db:delay-if-busy dbdat)
+    (let* ((min-incompleted-ids (map car incompleted)) ;; do 'em all
+	   (all-ids             (append min-incompleted-ids (map car oldlaunched))))
+      (if (> (length all-ids) 0)
+	  (begin
+	    ;; (launch:is-test-alive "localhost" 435)
+	    (debug:print 0 *default-log-port* "WARNING: Marking test(s); " (string-intersperse (map conc all-ids) ", ")
+			 " as DEAD")
+	    (for-each
+             (lambda (test-id)
+               (let* ((tinfo   (rmt:get-test-info-by-id run-id test-id))
+		      (run-dir (db:test-get-rundir     tinfo))
+		      (host    (db:test-get-host       tinfo))
+		      (pid     (db:test-get-process_id tinfo))
+		      (result (rmt:get-status-from-final-status-file run-dir)))
+		 (if (and (list? result) (> (length result) 1) (equal? "PASS" (cadr result)) (equal? "COMPLETED" (car result))) 
+		     (begin
+		       (debug:print 0 *default-log-port* "INFO: test " test-id " actually passed, so marking PASS not DEAD")
+		       (rmt:set-state-status-and-roll-up-items
+			run-id test-id 'foo "COMPLETED" "PASS"
+			"Test stopped responding but it has PASSED; marking it PASS in the DB."))
+		     (let ((is-alive (and (not (eq? pid 0))  ;; 0 is default in re-used field "attemptnum" where pid stored.
+					  (commonmod:is-test-alive host pid))))
+		       (if is-alive
+			   (debug:print 0 *default-log-port* "INFO: test " test-id " on host " host
+					" has a process on pid " pid ", NOT setting to DEAD.")
+			   (begin
+			     (debug:print 0 *default-log-port* "INFO: test " test-id
+					  " final state/status is not COMPLETED/PASS. It is " result)
+			     (rmt:set-state-status-and-roll-up-items
+			      run-id test-id 'foo "COMPLETED" "DEAD"
+			      "Test stopped responding while in RUNNING or REMOTEHOSTSTART; presumed dead.")))))))
+	     ;; call end of eud of run detection for posthook - from merge, is it needed?
+	     ;; (launch:end-of-run-check run-id)
+	     all-ids)
+	    )))))
+
+
+
+
+
 (define (mt:lazy-read-test-config test-name)
   (let ((tconf (hash-table-ref/default *testconfigs* test-name #f)))
     (if tconf
 	tconf
 	(let ((test-dirs (tests:get-tests-search-path *configdat*)))

Index: rmt.scm
==================================================================
--- rmt.scm
+++ rmt.scm
@@ -443,10 +443,11 @@
 
 (define (rmt:set-state-status-and-roll-up-run run-id state status)
   (assert (number? run-id) "FATAL: Run id required.")
   (rmt:send-receive 'set-state-status-and-roll-up-run run-id (list run-id state status)))
 
+;; run on client version of set-state-status-and-roll-up-run
 
 (define (rmt:update-pass-fail-counts run-id test-name)
   (assert (number? run-id) "FATAL: Run id required.")
   (rmt:general-call 'update-pass-fail-counts run-id test-name test-name test-name))
 

Index: rmtmod.scm
==================================================================
--- rmtmod.scm
+++ rmtmod.scm
@@ -177,17 +177,10 @@
 
 (define (rmt:get-tests-for-run-mindata run-id testpatt states status not-in)
   (assert (number? run-id) "FATAL: Run id required.")
   (rmtmod:send-receive 'get-tests-for-run-mindata run-id (list run-id testpatt states status not-in)))
   
-;; state and status are extra hints not usually used in the calculation
-;;
-(define (rmt:set-state-status-and-roll-up-items run-id test-name item-path state status comment)
-  (assert (number? run-id) "FATAL: Run id required.")
-  (rmtmod:send-receive 'set-state-status-and-roll-up-items run-id (list run-id test-name item-path state status comment)))
-
-
 ;;======================================================================
 ;; Maintenance
 ;;======================================================================
 
 
@@ -204,82 +197,7 @@
           #f
           )
         (with-input-from-file infile read-lines)
 	)))
   
-;;  select end_time-now from
-;;      (select testname,item_path,event_time+run_duration as
-;;                          end_time,strftime('%s','now') as now from tests where state in
-;;      ('RUNNING','REMOTEHOSTSTART','LAUNCHED'));
-;;
-;; NOT EASY TO MIGRATE TO db{file,mod}
-;;
-(define (rmt:find-and-mark-incomplete-engine run-id ovr-deadtime cfg-deadtime test-stats-update-period)
-  (let* ((incompleted '())
-	 (oldlaunched '())
-	 (toplevels   '())
-          ;; The default running-deadtime is 720 seconds = 12 minutes.
-          ;; "(running-deadtime-default (+ server-start-allowance (* 2 launch-monitor-period)))" = 200 + (2 * (200 + 30 + 30))
-         (deadtime-trim (or ovr-deadtime cfg-deadtime))
-         (server-start-allowance 200)
-         (server-overloaded-budget 200)
-         (launch-monitor-off-time (or test-stats-update-period 30))
-         (launch-monitor-on-time-budget 30)
-         (launch-monitor-period (+ launch-monitor-off-time launch-monitor-on-time-budget server-overloaded-budget))
-         (remotehoststart-deadtime-default (+ server-start-allowance server-overloaded-budget 30))
-         (remotehoststart-deadtime (or deadtime-trim remotehoststart-deadtime-default))
-         (running-deadtime-default (+ server-start-allowance (* 2 launch-monitor-period)))
-         (running-deadtime (or deadtime-trim running-deadtime-default))) ;; two minutes (30 seconds between updates, this leaves 3x grace period)
-
-    (debug:print-info 4  *default-log-port* "running-deadtime = " running-deadtime)
-    (debug:print-info 4  *default-log-port* "deadtime-trim = " deadtime-trim)
-
-    (let* ((dat (rmt:get-toplevels-and-incompletes run-id running-deadtime remotehoststart-deadtime)))
-      (set! oldlaunched (list-ref dat 1))
-      (set! toplevels   (list-ref dat 2))
-      (set! incompleted (list-ref dat 0)))
-
-    (debug:print-info 18 *default-log-port* "Found " (length oldlaunched) " old LAUNCHED items, "
-		      (length toplevels) " old LAUNCHED toplevel tests and "
-		      (length incompleted) " tests marked RUNNING but apparently dead.")
-  
-    ;; These are defunct tests, do not do all the overhead of set-state-status. Force them to INCOMPLETE.
-    ;;
-    ;; (db:delay-if-busy dbdat)
-    (let* ((min-incompleted-ids (map car incompleted)) ;; do 'em all
-	   (all-ids             (append min-incompleted-ids (map car oldlaunched))))
-      (if (> (length all-ids) 0)
-	  (begin
-	    ;; (launch:is-test-alive "localhost" 435)
-	    (debug:print 0 *default-log-port* "WARNING: Marking test(s); " (string-intersperse (map conc all-ids) ", ")
-			 " as DEAD")
-	    (for-each
-             (lambda (test-id)
-               (let* ((tinfo   (rmt:get-test-info-by-id run-id test-id))
-		      (run-dir (db:test-get-rundir     tinfo))
-		      (host    (db:test-get-host       tinfo))
-		      (pid     (db:test-get-process_id tinfo))
-		      (result (rmt:get-status-from-final-status-file run-dir)))
-		 (if (and (list? result) (> (length result) 1) (equal? "PASS" (cadr result)) (equal? "COMPLETED" (car result))) 
-		     (begin
-		       (debug:print 0 *default-log-port* "INFO: test " test-id " actually passed, so marking PASS not DEAD")
-		       (rmt:set-state-status-and-roll-up-items
-			run-id test-id 'foo "COMPLETED" "PASS"
-			"Test stopped responding but it has PASSED; marking it PASS in the DB."))
-		     (let ((is-alive (and (not (eq? pid 0))  ;; 0 is default in re-used field "attemptnum" where pid stored.
-					  (commonmod:is-test-alive host pid))))
-		       (if is-alive
-			   (debug:print 0 *default-log-port* "INFO: test " test-id " on host " host
-					" has a process on pid " pid ", NOT setting to DEAD.")
-			   (begin
-			     (debug:print 0 *default-log-port* "INFO: test " test-id
-					  " final state/status is not COMPLETED/PASS. It is " result)
-			     (rmt:set-state-status-and-roll-up-items
-			      run-id test-id 'foo "COMPLETED" "DEAD"
-			      "Test stopped responding while in RUNNING or REMOTEHOSTSTART; presumed dead.")))))))
-	     ;; call end of eud of run detection for posthook - from merge, is it needed?
-	     ;; (launch:end-of-run-check run-id)
-	     all-ids)
-	    )))))
-
 
 )