Index: Makefile
==================================================================
--- Makefile
+++ Makefile
@@ -6,12 +6,12 @@
            ods.scm runconfig.scm server.scm configf.scm \
            db.scm keys.scm margs.scm megatest-version.scm \
            process.scm runs.scm tasks.scm tests.scm genexample.scm \
 	   fs-transport.scm http-transport.scm \
            client.scm gutils.scm synchash.scm daemon.scm mt.scm dcommon.scm \
-	   tree.scm rmt.scm api.scm tdb.scm  \
-	   ezsteps.scm 
+	   tree.scm rmt.scm api.scm tdb.scm \
+	   ezsteps.scm lock-queue.scm
 
 GUISRCF  = dashboard-tests.scm dashboard-guimonitor.scm 
 
 OFILES   = $(SRCFILES:%.scm=%.o)
 GOFILES  = $(GUISRCF:%.scm=%.o)

Index: dashboard-tests.scm
==================================================================
--- dashboard-tests.scm
+++ dashboard-tests.scm
@@ -387,11 +387,11 @@
 	 (last-update   0) ;; (current-seconds))
 	 (request-update #t)
 	 (db             #f))
     (if (not testdat)
 	(begin
-	  (debug:print 0 "ERROR: No test data found for test " test-id ", exiting")
+	  (debug:print 2 "ERROR: No test data found for test " test-id ", exiting")
 	  (exit 1))
 	(let* ((run-id        (if testdat (db:test-get-run_id testdat) #f))
 	       (keydat        (if testdat (rmt:get-key-val-pairs run-id) #f))
 	       (rundat        (if testdat (rmt:get-run-info run-id) #f))
 	       (runname       (if testdat (db:get-value-by-header (db:get-row rundat)

Index: db.scm
==================================================================
--- db.scm
+++ db.scm
@@ -360,13 +360,13 @@
 	  (sqlite3:execute db (conc "PRAGMA synchronous = 0;"))))
     db))
 
 (define (db:log-local-event . loglst)
   (let ((logline (apply conc loglst)))
-	;; (pwd     (current-directory))
-	;; (cmdline (string-intersperse (argv) " "))
-	;; (pid     (current-process-id)))
+    ;; (pwd     (current-directory))
+    ;; (cmdline (string-intersperse (argv) " "))
+    ;; (pid     (current-process-id)))
     (db:log-event logline)))
 
 (define (db:log-event logline)
   (let ((db (open-logging-db)))
     (sqlite3:execute db "INSERT INTO log (logline,pwd,cmdline,pid) VALUES (?,?,?,?);"
@@ -503,11 +503,11 @@
     (map sqlite3:finalize! statements)
     (sqlite3:finalize! count-stmt)
     (sqlite3:execute db "VACUUM;")))
 
 ;; (define (db:report-junk-records db)
-  
+
 
 ;;======================================================================
 ;; meta get and set vars
 ;;======================================================================
 
@@ -638,17 +638,17 @@
     (if (and runname (null? (filter (lambda (x)(not x)) keyvals))) ;; there must be a better way to "apply and"
 	(let ((res #f))
 	  (apply sqlite3:execute db (conc "INSERT OR IGNORE INTO runs (runname,state,status,owner,event_time" comma keystr ") VALUES (?,?,?,?,strftime('%s','now')" comma valslots ");")
 		 allvals)
 	  (apply sqlite3:for-each-row 
-	   (lambda (id)
-	     (set! res id))
-	   db
-	   (let ((qry (conc "SELECT id FROM runs WHERE (runname=? " andstr key=?str ");")))
-	     ;(debug:print 4 "qry: " qry) 
-	     qry)
-	   qryvals)
+		 (lambda (id)
+		   (set! res id))
+		 db
+		 (let ((qry (conc "SELECT id FROM runs WHERE (runname=? " andstr key=?str ");")))
+					;(debug:print 4 "qry: " qry) 
+		   qry)
+		 qryvals)
 	  (sqlite3:execute db "UPDATE runs SET state=?,status=?,event_time=strftime('%s','now') WHERE id=?;" state status res)
 	  res) 
 	(begin
 	  (debug:print 0 "ERROR: Called without all necessary keys")
 	  #f))))
@@ -740,11 +740,11 @@
        (let* ((stateparts (string-split state "|"))
 	      (newstate   (conc (car stateparts) "\n" (cadr stateparts))))
 	 (hash-table-set! totals newstate (+ (hash-table-ref/default totals newstate 0) count))
 	 (set! res (cons (list runname newstate count) res))))
      db
-    "SELECT runname,t.state||'|'||t.status AS s,count(t.id) FROM runs AS r INNER JOIN tests AS t ON r.id=t.run_id GROUP BY s,runname ORDER BY r.event_time,s DESC;" )
+     "SELECT runname,t.state||'|'||t.status AS s,count(t.id) FROM runs AS r INNER JOIN tests AS t ON r.id=t.run_id GROUP BY s,runname ORDER BY r.event_time,s DESC;" )
     ;; (set! res (reverse res))
     (for-each (lambda (state)
 		(set! res (cons (list "Totals" state (hash-table-ref totals state)) res)))
 	      (sort (hash-table-keys totals) string>=))
     res))
@@ -791,27 +791,27 @@
 
 ;; use (get-value-by-header (db:get-header runinfo)(db:get-row runinfo))
 (define (db:get-run-info db run-id)
   ;;(if (hash-table-ref/default *run-info-cache* run-id #f)
   ;;    (hash-table-ref *run-info-cache* run-id)
-      (let* ((res      #f)
-	     (keys      (db:get-keys db))
-	     (remfields (list "id" "runname" "state" "status" "owner" "event_time"))
-	     (header    (append keys remfields))
-	     (keystr    (conc (keys->keystr keys) ","
-			      (string-intersperse remfields ","))))
-	(debug:print-info 11 "db:get-run-info run-id: " run-id " header: " header " keystr: " keystr)
-	(sqlite3:for-each-row
-	 (lambda (a . x)
-	   (set! res (apply vector a x)))
-	 db
-	 (conc "SELECT " keystr " FROM runs WHERE id=? AND state != 'deleted';")
-	 run-id)
-	(debug:print-info 11 "db:get-run-info run-id: " run-id " header: " header " keystr: " keystr)
-	(let ((finalres (vector header res)))
-	  ;; (hash-table-set! *run-info-cache* run-id finalres)
-	  finalres)))
+  (let* ((res      #f)
+	 (keys      (db:get-keys db))
+	 (remfields (list "id" "runname" "state" "status" "owner" "event_time"))
+	 (header    (append keys remfields))
+	 (keystr    (conc (keys->keystr keys) ","
+			  (string-intersperse remfields ","))))
+    (debug:print-info 11 "db:get-run-info run-id: " run-id " header: " header " keystr: " keystr)
+    (sqlite3:for-each-row
+     (lambda (a . x)
+       (set! res (apply vector a x)))
+     db
+     (conc "SELECT " keystr " FROM runs WHERE id=? AND state != 'deleted';")
+     run-id)
+    (debug:print-info 11 "db:get-run-info run-id: " run-id " header: " header " keystr: " keystr)
+    (let ((finalres (vector header res)))
+      ;; (hash-table-set! *run-info-cache* run-id finalres)
+      finalres)))
 
 (define (db:set-comment-for-run db run-id comment)
   (debug:print-info 11 "db:set-comment-for-run START run-id: " run-id " comment: " comment)
   (sqlite3:execute db "UPDATE runs SET comment=? WHERE id=?;" comment run-id)
   (debug:print-info 11 "db:set-comment-for-run END run-id: " run-id " comment: " comment))
@@ -867,37 +867,37 @@
     (debug:print-info 11 "db:get-key-val-pairs END keys: " keys " run-id: " run-id)
     (reverse res)))
 
 ;; get key vals for a given run-id
 (define (db:get-key-vals db run-id)
-   (let ((mykeyvals (hash-table-ref/default *keyvals* run-id #f)))
+  (let ((mykeyvals (hash-table-ref/default *keyvals* run-id #f)))
      (if mykeyvals 
 	 mykeyvals
-	 (let* ((keys (db:get-keys db))
-		(res  '()))
-	   (debug:print-info 11 "db:get-key-vals START keys: " keys " run-id: " run-id)
-	   (for-each 
-	    (lambda (key)
-	      (let ((qry (conc "SELECT " key " FROM runs WHERE id=?;")))
-		;; (debug:print 0 "qry: " qry)
-		(sqlite3:for-each-row 
-		 (lambda (key-val)
-		   (set! res (cons key-val res)))
-		 db qry run-id)))
-	    keys)
-	   (debug:print-info 11 "db:get-key-vals END keys: " keys " run-id: " run-id)
+	(let* ((keys (db:get-keys db))
+	       (res  '()))
+	  (debug:print-info 11 "db:get-key-vals START keys: " keys " run-id: " run-id)
+	  (for-each 
+	   (lambda (key)
+	     (let ((qry (conc "SELECT " key " FROM runs WHERE id=?;")))
+	       ;; (debug:print 0 "qry: " qry)
+	       (sqlite3:for-each-row 
+		(lambda (key-val)
+		  (set! res (cons key-val res)))
+		db qry run-id)))
+	   keys)
+	  (debug:print-info 11 "db:get-key-vals END keys: " keys " run-id: " run-id)
 	   (let ((final-res (reverse res)))
 	     (hash-table-set! *keyvals* run-id final-res)
 	     final-res)))))
 
 ;; The target is keyval1/keyval2..., cached in *target* as it is used often
 (define (db:get-target db run-id)
   (let ((mytarg (hash-table-ref/default *target* run-id #f)))
     (if mytarg
 	mytarg
-  (let* ((keyvals (db:get-key-vals db run-id))
-	 (thekey  (string-intersperse (map (lambda (x)(if x x "-na-")) keyvals) "/")))
+	(let* ((keyvals (db:get-key-vals db run-id))
+	       (thekey  (string-intersperse (map (lambda (x)(if x x "-na-")) keyvals) "/")))
 	  (hash-table-set! *target* run-id thekey)
 	  thekey))))
 
 ;;======================================================================
 ;;  T E S T S
@@ -930,18 +930,18 @@
 					" NOT IN ('"
 					" IN ('") 
 				    (string-intersperse statuses "','")
 				    "')")))
 	 (states-statuses-qry 
-	                  (cond 
-			   ((and states-qry statuses-qry)
-			    (conc " AND ( " states-qry " AND " statuses-qry " ) "))
-			   (states-qry  
-			    (conc " AND " states-qry))
-			   (statuses-qry 
-			    (conc " AND " statuses-qry))
-			   (else "")))
+	  (cond 
+	   ((and states-qry statuses-qry)
+	    (conc " AND ( " states-qry " AND " statuses-qry " ) "))
+	   (states-qry  
+	    (conc " AND " states-qry))
+	   (statuses-qry 
+	    (conc " AND " statuses-qry))
+	   (else "")))
 	 (tests-match-qry (tests:match->sqlqry testpatt))
 	 (qry             (conc "SELECT " qryvals
 				" FROM tests WHERE run_id=? AND state != 'DELETED' "
 				states-statuses-qry
 				(if tests-match-qry (conc " AND (" tests-match-qry ") ") "")
@@ -952,11 +952,11 @@
 						    (conc " ORDER BY " sort-by) 
 						    "")))
 				(if limit  (conc " LIMIT " limit)   "")
 				(if offset (conc " OFFSET " offset) "")
 				";"
-			 )))
+				)))
     (debug:print-info 8 "db:get-tests-for-run qry=" qry)
     (sqlite3:for-each-row 
      (lambda (a . b) ;; id run-id testname state status event-time host cpuload diskfree uname rundir item-path run-duration final-logf comment)
        (set! res (cons (apply vector a b) res))) ;; id run-id testname state status event-time host cpuload diskfree uname rundir item-path run-duration final-logf comment) res)))
      db 
@@ -977,13 +977,13 @@
 ;; i.e. these lists define what to NOT show.
 ;; states and statuses are required to be lists, empty is ok
 ;; not-in #t = above behaviour, #f = must match
 ;; run-ids is a list of run-ids or a single number or #f for all runs
 (define (db:get-tests-for-runs db run-ids testpatt states statuses 
-			      #!key (not-in #t)
-			      (sort-by #f)
-			      (qryvals "id,run_id,testname,state,status,event_time,host,cpuload,diskfree,uname,rundir,item_path,run_duration,final_logf,comment")) ;; 'rundir 'event_time
+			       #!key (not-in #t)
+			       (sort-by #f)
+			       (qryvals "id,run_id,testname,state,status,event_time,host,cpuload,diskfree,uname,rundir,item_path,run_duration,final_logf,comment")) ;; 'rundir 'event_time
   (debug:print-info 11 "db:get-tests-for-run START run-ids=" run-ids ", testpatt=" testpatt ", states=" states ", statuses=" statuses ", not-in=" not-in ", sort-by=" sort-by)
   (let* ((res '())
 	 ;; if states or statuses are null then assume match all when not-in is false
 	 (states-qry      (if (null? states) 
 			      #f
@@ -1012,11 +1012,11 @@
 				(if tests-match-qry (conc " AND (" tests-match-qry ") ") "")
 				(case sort-by
 				  ((rundir)     " ORDER BY length(rundir) DESC;")
 				  ((event_time) " ORDER BY event_time ASC;")
 				  (else         ";"))
-			 )))
+				)))
     (debug:print-info 8 "db:get-tests-for-run qry=" qry)
     (sqlite3:for-each-row 
      (lambda (a . b) ;; id run-id testname state status event-time host cpuload diskfree uname rundir item-path run-duration final-logf comment)
        (set! res (cons (apply vector a b) res))) ;; id run-id testname state status event-time host cpuload diskfree uname rundir item-path run-duration final-logf comment) res)))
      db 
@@ -1105,29 +1105,35 @@
 
 (define (cdb:tests-update-run-duration serverdat test-id minutes)
   (cdb:client-call serverdat 'update-run-duration #t *default-numtries* minutes test-id))
 
 (define (cdb:tests-update-uname-host serverdat test-id uname hostname)
-  (cdb:client-call serverdat 'update-uname-host #t *default-numtries* test-id uname hostname))
+  (cdb:client-call serverdat 'update-uname-host #t *default-numtries* uname hostname test-id))
+
+(define (db:process-triggers test-id newstate newstatus)
+  #t)
 
 ;; speed up for common cases with a little logic
+;; NB// Ultimately this will be deprecated in deference to mt:test-set-state-status-by-id
+;;
 (define (db:test-set-state-status-by-id db test-id newstate newstatus newcomment)
   (cond
    ((and newstate newstatus newcomment)
     (sqlite3:execute db "UPDATE tests SET state=?,status=?,comment=? WHERE id=?;" newstate newstatus newcomment test-id))
    ((and newstate newstatus)
     (sqlite3:execute db "UPDATE tests SET state=?,status=? WHERE id=?;" newstate newstatus test-id))
    (else
     (if newstate   (sqlite3:execute db "UPDATE tests SET state=?   WHERE id=?;" newstate   test-id))
     (if newstatus  (sqlite3:execute db "UPDATE tests SET status=?  WHERE id=?;" newstatus  test-id))
-    (if newcomment (sqlite3:execute db "UPDATE tests SET comment=? WHERE id=?;" newcomment test-id))
-    ))
+    (if newcomment (sqlite3:execute db "UPDATE tests SET comment=? WHERE id=?;" newcomment test-id))))
+  (db:process-triggers test-id newstate newstatus)
   #t) ;; retrun something to keep the remote calls happy
 
-(define (db:test-set-state-status-by-run-id-testname db run-id test-name item-path status state)
-  (sqlite3:execute db "UPDATE tests SET state=?,status=?,event_time=strftime('%s','now') WHERE run_id=? AND testname=? AND item_path=?;" 
-		   state status run-id test-name item-path))
+;; Never used
+;; (define (db:test-set-state-status-by-run-id-testname db run-id test-name item-path status state)
+;;   (sqlite3:execute db "UPDATE tests SET state=?,status=?,event_time=strftime('%s','now') WHERE run_id=? AND testname=? AND item_path=?;" 
+;; 		   state status run-id test-name item-path))
 
 (define (db:get-count-tests-running db)
   (let ((res 0))
     (sqlite3:for-each-row
      (lambda (count)
@@ -1281,11 +1287,11 @@
 (define (db:get-test-info db run-id testname item-path)
   (db:get-test-info-by-id db (db:get-test-id db run-id testname item-path)))
 
 (define (db:test-set-comment db test-id comment)
   (sqlite3:execute 
-   db 
+   db
    "UPDATE tests SET comment=? WHERE id=?;"
    comment test-id))
 
 (define (cdb:test-set-rundir! serverdat run-id test-name item-path rundir)
   (cdb:client-call serverdat 'test-set-rundir #t *default-numtries* rundir run-id test-name item-path))
@@ -1315,11 +1321,11 @@
   ;; BUG: Move the values derived from args to parameters and push to megatest.scm
   (let* ((testpatt   (if (args:get-arg "-testpatt")(args:get-arg "-testpatt") "%"))
 	 (statepatt  (if (args:get-arg ":state")   (args:get-arg ":state")    "%"))
 	 (statuspatt (if (args:get-arg ":status")  (args:get-arg ":status")   "%"))
 	 (runname    (if (args:get-arg ":runname") (args:get-arg ":runname")  "%"))
-	 (paths-from-db (cdb:remote-run db:test-get-paths-matching-keynames-target db keynames target res
+	 (paths-from-db (cdb:remote-run db:test-get-paths-matching-keynames-target-new db keynames target res
 					testpatt:   testpatt
 					statepatt:  statepatt
 					statuspatt: statuspatt
 					runname:    runname)))
     (if fnamepatt
@@ -1346,17 +1352,46 @@
 	 (testqry (tests:match->sqlqry testpatt))
 	 (qrystr (conc "SELECT t.rundir FROM tests AS t INNER JOIN runs AS r ON t.run_id=r.id WHERE "
 		       keystr " AND r.runname LIKE '" runname "' AND " testqry
 		       " AND t.state LIKE '" statepatt "' AND t.status LIKE '" statuspatt 
 		       "' ORDER BY t.event_time ASC;")))
-    (debug:print 3 "qrystr: " qrystr)
     (sqlite3:for-each-row 
      (lambda (p)
        (set! res (cons p res)))
      db 
      qrystr)
     res))
+
+(define (db:test-get-paths-matching-keynames-target-new db keynames target res 
+							#!key
+							(testpatt   "%")
+							(statepatt  "%")
+							(statuspatt "%")
+							(runname    "%"))
+  (let* ((row-ids '())
+	 (keystr (string-intersperse 
+		  (map (lambda (key val)
+			 (conc key " like '" val "'"))
+		       keynames 
+		       (string-split target "/"))
+		  " AND "))
+	 (testqry (tests:match->sqlqry testpatt))
+	 (runsqry (sqlite3:prepare db (conc "SELECT id FROM runs WHERE " keystr " AND runname LIKE '" runname "';")))
+	 (tstsqry (sqlite3:prepare db (conc "SELECT rundir FROM tests WHERE run_id=? AND " testqry " AND state LIKE '" statepatt "' AND status LIKE '" statuspatt "' ORDER BY event_time ASC;"))))
+    (sqlite3:for-each-row
+     (lambda (rid)
+       (set! row-ids (cons rid row-ids)))
+     runsqry)
+    (for-each (lambda (rid)
+		(sqlite3:for-each-row 
+		 (lambda (p)
+		   (set! res (cons p res)))
+		 tstsqry rid))
+	      row-ids)
+    (sqlite3:finalize! tstsqry)
+    (sqlite3:finalize! runsqry)
+    res))
 
 ;; look through tests from matching runs for a file
 (define (db:test-get-first-path-matching db keynames target fname)
   ;; [refpaths] is the section where references to other megatest databases are stored
   (let ((mt-paths (configf:get-section "refpaths"))
@@ -1392,28 +1427,28 @@
     ((zmq)(with-output-to-string (lambda ()(serialize obj))))
     (else obj)))
 
 (define (db:string->obj msg)
   (case *transport-type*
-   ((fs) msg)
-   ((http)
-    (if (string? msg)
-	(with-input-from-string 
-	    (base64:base64-decode
-	     (string-substitute 
-	      (regexp "_") "=" msg #t))
-	  (lambda ()(deserialize)))
-	(vector #f #f #f))) ;; crude reply for when things go awry
-   ((zmq)(with-input-from-string msg (lambda ()(deserialize))))
-   (else msg)))
+    ((fs) msg)
+    ((http)
+     (if (string? msg)
+	 (with-input-from-string 
+	     (base64:base64-decode
+	      (string-substitute 
+	       (regexp "_") "=" msg #t))
+	   (lambda ()(deserialize)))
+	 (vector #f #f #f))) ;; crude reply for when things go awry
+    ((zmq)(with-input-from-string msg (lambda ()(deserialize))))
+    (else msg)))
 
 (define (cdb:use-non-blocking-mode proc)
   (set! *client-non-blocking-mode* #t)
   (let ((res (proc)))
     (set! *client-non-blocking-mode* #f)
     res))
-  
+
 ;; params = 'target cached remparams
 ;;
 ;; make-vector-record cdb packet client-sig qtype immediate query-sig params qtime
 ;;
 ;; cdb:client-call is the unified interface to all the transports. It dispatches the
@@ -1469,35 +1504,35 @@
 			       ;; now get the actual message
 			       (let ((myres (db:string->obj (receive-message* sub-socket))))
 				 (if (equal? query-sig (vector-ref myres 1))
 				     (set! res (vector-ref myres 2))
 				     (loop)))))))
-	    ;; (timeout (lambda ()
-	    ;;     	(let loop ((n numretries))
-	    ;;     	  (thread-sleep! 15)
-	    ;;     	  (if (not res)
-	    ;;     	      (if (> numretries 0)
-	    ;;     		  (begin
-	    ;;     		    (debug:print 2 "WARNING: no reply to query " params ", trying resend")
-	    ;;     		    (debug:print-info 11 "re-sending message")
-	    ;;     		    (send-message push-socket zdat)
-	    ;;     		    (debug:print-info 11 "message re-sent")
-	    ;;     		    (loop (- n 1)))
-	    ;;     		  ;; (apply cdb:client-call *runremote* qtype immediate (- numretries 1) params))
-	    ;;     		  (begin
-	    ;;     		    (debug:print 0 "ERROR: cdb:client-call timed out " params ", exiting.")
-	    ;;     		    (exit 5))))))))
+	;; (timeout (lambda ()
+	;;     	(let loop ((n numretries))
+	;;     	  (thread-sleep! 15)
+	;;     	  (if (not res)
+	;;     	      (if (> numretries 0)
+	;;     		  (begin
+	;;     		    (debug:print 2 "WARNING: no reply to query " params ", trying resend")
+	;;     		    (debug:print-info 11 "re-sending message")
+	;;     		    (send-message push-socket zdat)
+	;;     		    (debug:print-info 11 "message re-sent")
+	;;     		    (loop (- n 1)))
+	;;     		  ;; (apply cdb:client-call *runremote* qtype immediate (- numretries 1) params))
+	;;     		  (begin
+	;;     		    (debug:print 0 "ERROR: cdb:client-call timed out " params ", exiting.")
+	;;     		    (exit 5))))))))
 	(debug:print-info 11 "Starting threads")
 	(let ((th1 (make-thread send-receive "send receive"))
 	      ;; (th2 (make-thread timeout      "timeout"))
 	      )
 	  (thread-start! th1)
 	  ;; (thread-start! th2)
 	  (thread-join!  th1)
 	  (debug:print-info 11 "cdb:client-call returning res=" res)
 	  res))))))
-  
+
 (define (cdb:set-verbosity serverdat val)
   (cdb:client-call serverdat 'set-verbosity #f *default-numtries* val))
 
 (define (cdb:login serverdat keyval signature)
   (cdb:client-call serverdat 'login #t *default-numtries* keyval megatest-version signature))
@@ -1506,10 +1541,11 @@
   (cdb:client-call serverdat 'logout #t *default-numtries* keyval signature))
 
 (define (cdb:num-clients serverdat)
   (cdb:client-call serverdat 'numclients #t *default-numtries*))
 
+;; I think this would be more efficient if executed on client side FIXME???
 (define (cdb:test-set-status-state serverdat test-id status state msg)
   (if (member state '("LAUNCHED" "REMOTEHOSTSTART"))
       (cdb:client-call serverdat 'set-test-start-time #t *default-numtries* test-id))
   (if msg
       (cdb:client-call serverdat 'state-status-msg #t *default-numtries* state status msg test-id)
@@ -1525,10 +1561,22 @@
 (define (cdb:pass-fail-counts serverdat test-id fail-count pass-count)
   (cdb:client-call serverdat 'pass-fail-counts #t *default-numtries* fail-count pass-count test-id))
 
 (define (cdb:tests-register-test serverdat run-id test-name item-path)
   (cdb:client-call serverdat 'register-test #t *default-numtries* run-id test-name item-path))
+
+;; more transactioned calls, these for roll-up-pass-fail stuff
+(define (cdb:update-pass-fail-counts serverdat run-id test-name)
+  (cdb:client-call serverdat 'update-fail-pass-counts #t *default-numtries* run-id test-name run-id test-name run-id test-name))
+
+(define (cdb:top-test-set-running serverdat run-id test-name)
+  (cdb:client-call serverdat 'top-test-set-running #t *default-numtries* run-id test-name))
+
+(define (cdb:top-test-set-per-pf-counts serverdat run-id test-name)
+  (cdb:client-call serverdat 'top-test-set-per-pf-counts #t *default-numtries* run-id test-name run-id test-name run-id test-name))
+
+;;=
 
 (define (cdb:flush-queue serverdat)
   (cdb:client-call serverdat 'flush #f *default-numtries*))
 
 (define (cdb:kill-server serverdat pid)
@@ -1565,13 +1613,18 @@
 ;; A G R E G A T E D   T R A N S A C T I O N   D B   W R I T E S 
 ;;======================================================================
 
 (define db:queries 
   (list '(register-test          "INSERT OR IGNORE INTO tests (run_id,testname,event_time,item_path,state,status) VALUES (?,?,strftime('%s','now'),?,'NOT_STARTED','n/a');")
+	;; Test state and status
+	'(set-test-state         "UPDATE tests SET state=?   WHERE id=?;")
+	'(set-test-status        "UPDATE tests SET state=?   WHERE id=?;")
 	'(state-status           "UPDATE tests SET state=?,status=? WHERE id=?;")
-	'(set-test-start-time    "UPDATE tests SET event_time=strftime('%s','now') WHERE id=?;")
 	'(state-status-msg       "UPDATE tests SET state=?,status=?,comment=? WHERE id=?;")
+	;; Test comment
+	'(set-test-comment       "UPDATE tests SET comment=? WHERE id=?;")
+	'(set-test-start-time    "UPDATE tests SET event_time=strftime('%s','now') WHERE id=?;")
 	'(pass-fail-counts       "UPDATE tests SET fail_count=?,pass_count=? WHERE id=?;")
 	;; test_data-pf-rollup is used to set a tests PASS/FAIL based on the pass/fail info from the steps
 	'(test_data-pf-rollup    "UPDATE tests
                                     SET status=CASE WHEN (SELECT fail_count FROM tests WHERE id=?) > 0 
                                       THEN 'FAIL'
@@ -1588,17 +1641,39 @@
 	'(update-cpuload-diskfree "UPDATE tests SET cpuload=?,diskfree=? WHERE id=?;")
 	'(update-run-duration     "UPDATE tests SET run_duration=? WHERE id=?;")
 	'(update-uname-host       "UPDATE tests SET uname=?,host=? WHERE id=?;")
 	'(update-test-state       "UPDATE tests SET state=? WHERE state=? AND run_id=? AND testname=? AND NOT (item_path='' AND testname IN (SELECT DISTINCT testname FROM tests WHERE testname=? AND item_path != ''));")
 	'(update-test-status      "UPDATE tests SET status=? WHERE status like ? AND run_id=? AND testname=? AND NOT (item_path='' AND testname IN (SELECT DISTINCT testname FROM tests WHERE testname=? AND item_path != ''));")
-    ))
+	;; stuff for roll-up-pass-fail-counts
+	'(update-fail-pass-counts "UPDATE tests 
+             SET fail_count=(SELECT count(id) FROM tests WHERE run_id=? AND testname=? AND item_path != '' AND status IN ('FAIL','CHECK')),
+                 pass_count=(SELECT count(id) FROM tests WHERE run_id=? AND testname=? AND item_path != '' AND status IN ('PASS','WARN','WAIVED'))
+             WHERE run_id=? AND testname=? AND item_path='';")
+	'(top-test-set-running  "UPDATE tests SET state='RUNNING' WHERE run_id=? AND testname=? AND item_path='';")
+	'(top-test-set-per-pf-counts "UPDATE tests
+                       SET state=CASE 
+                                   WHEN (SELECT count(id) FROM tests 
+                                                WHERE run_id=? AND testname=?
+                                                     AND item_path != '' 
+                                                     AND state in ('RUNNING','NOT_STARTED')) > 0 THEN 'RUNNING'
+                                   ELSE 'COMPLETED' END,
+                            status=CASE 
+                                  WHEN fail_count > 0 THEN 'FAIL' 
+                                  WHEN pass_count > 0 AND fail_count=0 THEN 'PASS' 
+                                  WHEN (SELECT count(id) FROM tests
+                                         WHERE run_id=? AND testname=?
+                                              AND item_path != ''
+                                              AND status = 'SKIP') > 0 THEN 'SKIP'
+                                  ELSE 'UNKNOWN' END
+                       WHERE run_id=? AND testname=? AND item_path='';")
+	))
 
 ;; do not run these as part of the transaction
 (define db:special-queries   '(rollup-tests-pass-fail
-			       db:roll-up-pass-fail-counts
-                               login
-                               immediate
+			       ;; db:roll-up-pass-fail-counts  ;; WHY NOT!?
+			       login
+			       immediate
 			       flush
 			       sync
 			       set-verbosity
 			       killserver
 			       ))
@@ -1617,11 +1692,11 @@
     (mutex-unlock! *incoming-mutex*)
     (if (> (length data) 0)
 	;; Process if we have data
 	(begin
 	  (debug:print-info 7 "Writing cached data " data)
-    
+	  
 	  ;; Prepare the needed sql statements
 	  ;;
 	  (for-each (lambda (request-item)
 		      (let ((stmt-key (vector-ref request-item 0))
 			    (query    (vector-ref request-item 1)))
@@ -1709,11 +1784,11 @@
 	    (thread-sleep! 0.01)
 	    (loop))))
     (set! *number-of-writes*   (+ *number-of-writes*   1))
     (set! *writes-total-delay* (+ *writes-total-delay* (- (current-milliseconds) start-time)))
     got-it))
-	  
+
 (define (db:process-queue-item db item)
   (let* ((stmt-key       (cdb:packet-get-qtype item))
 	 (qry-sig        (cdb:packet-get-query-sig item))
 	 (return-address (cdb:packet-get-client-sig item))
 	 (params         (cdb:packet-get-params item))
@@ -1799,49 +1874,10 @@
      db
      "SELECT id,item_path,state,status,run_duration,final_logf,comment FROM tests WHERE run_id=? AND testname=? AND item_path != '';"
      run-id test-name)
     res))
 
-;; Rollup the pass/fail counts from itemized tests into fail_count and pass_count
-;; NOTE: Is this duplicating (db:test-data-rollup db test-id status) ????
-(define (db:roll-up-pass-fail-counts db run-id test-name item-path status)
-  ;; (cdb:flush-queue *runremote*)
-  (if (and (not (equal? item-path ""))
-	   (member status '("PASS" "WARN" "FAIL" "WAIVED" "RUNNING" "CHECK" "SKIP")))
-      (begin
-	(sqlite3:execute 
-	 db
-	 "UPDATE tests 
-             SET fail_count=(SELECT count(id) FROM tests WHERE run_id=? AND testname=? AND item_path != '' AND status IN ('FAIL','CHECK')),
-                 pass_count=(SELECT count(id) FROM tests WHERE run_id=? AND testname=? AND item_path != '' AND status IN ('PASS','WARN','WAIVED'))
-             WHERE run_id=? AND testname=? AND item_path='';"
-	 run-id test-name run-id test-name run-id test-name)
-        ;; (thread-sleep! 0.1) ;; give other processes a chance here, no, better to be done ASAP?
-	(if (equal? status "RUNNING") ;; running takes priority over all other states, force the test state to RUNNING
-	    (sqlite3:execute db "UPDATE tests SET state=? WHERE run_id=? AND testname=? AND item_path='';" "RUNNING" run-id test-name)
-	    (sqlite3:execute
-	     db
-	     "UPDATE tests
-                       SET state=CASE 
-                                   WHEN (SELECT count(id) FROM tests 
-                                                WHERE run_id=? AND testname=?
-                                                     AND item_path != '' 
-                                                     AND state in ('RUNNING','NOT_STARTED')) > 0 THEN 'RUNNING'
-                                   ELSE 'COMPLETED' END,
-                            status=CASE 
-                                  WHEN fail_count > 0 THEN 'FAIL' 
-                                  WHEN pass_count > 0 AND fail_count=0 THEN 'PASS' 
-                                  WHEN (SELECT count(id) FROM tests
-                                         WHERE run_id=? AND testname=?
-                                              AND item_path != ''
-                                              AND status = 'SKIP') > 0 THEN 'SKIP'
-                                  ELSE 'UNKNOWN' END
-                       WHERE run_id=? AND testname=? AND item_path='';"
-	     run-id test-name run-id test-name run-id test-name))
-	#f)
-      #f))
-
 ;;======================================================================
 ;; Tests meta data
 ;;======================================================================
 
 ;; read the record given a testname
@@ -1973,17 +2009,17 @@
 	  ;; if the test is not FAIL then set status based on the fail and pass counts.
 	  (cdb:test-rollup-test_data-pass-fail *runremote* test-id)
 	  ;; (sqlite3:execute
 	  ;;  db   ;;; NOTE: Should this be WARN,FAIL? A WARN is not a FAIL????? BUG FIXME
 	  ;;  "UPDATE tests
-          ;;             SET status=CASE WHEN (SELECT fail_count FROM tests WHERE id=?) > 0 
-          ;;                THEN 'FAIL'
-          ;;             WHEN (SELECT pass_count FROM tests WHERE id=?) > 0 AND 
-          ;;                  (SELECT status FROM tests WHERE id=?) NOT IN ('WARN','FAIL')
-          ;;             THEN 'PASS'
-          ;;             ELSE status
-          ;;         END WHERE id=?;"
+	  ;;             SET status=CASE WHEN (SELECT fail_count FROM tests WHERE id=?) > 0 
+	  ;;                THEN 'FAIL'
+	  ;;             WHEN (SELECT pass_count FROM tests WHERE id=?) > 0 AND 
+	  ;;                  (SELECT status FROM tests WHERE id=?) NOT IN ('WARN','FAIL')
+	  ;;             THEN 'PASS'
+	  ;;             ELSE status
+	  ;;         END WHERE id=?;"
 	  ;;  test-id test-id test-id test-id)
 	  ))))
 
 (define (db:get-prev-tol-for-test db test-id category variable)
   ;; Finish me?
@@ -2118,15 +2154,15 @@
 			(set! parent-waiton-met #t)))
 		   ;; normal checking of parent items, any parent or parent item not ok blocks running
 		   ((and is-completed
 			 (or is-ok 
 			     (eq? mode 'toplevel))              ;; toplevel does not block on FAIL
-			     (and is-ok (eq? mode 'itemmatch))) ;; itemmatch blocks on not ok
+			 (and is-ok (eq? mode 'itemmatch))) ;; itemmatch blocks on not ok
 		    (set! item-waiton-met #t)))))
 	      tests)
-             ;; both requirements, parent and item-waiton must be met to NOT add item to
-             ;; prereq's not met list
+	     ;; both requirements, parent and item-waiton must be met to NOT add item to
+	     ;; prereq's not met list
 	     (if (not (or parent-waiton-met item-waiton-met))
 		 (set! result (append (if (null? tests) (list waitontest-name) tests) result)))
 	     ;; if the test is not found then clearly the waiton is not met...
 	     ;; (if (not ever-seen)(set! result (cons waitontest-name result)))))
 	     (if (not ever-seen)

ADDED   docs/manual/howto.txt
Index: docs/manual/howto.txt
==================================================================
--- /dev/null
+++ docs/manual/howto.txt
@@ -0,0 +1,48 @@
+
+How To Do Things
+================
+
+Tricks
+------
+
+This section is a compendium of a various useful tricks for debugging,
+configuring and generally getting the most out of Megatest.
+
+Debugging Tricks
+----------------
+
+Examining The Environment
+~~~~~~~~~~~~~~~~~~~~~~~~~
+
+During Config File Processing
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Organising Your Tests and Tasks
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+/nfs/ch/disks/ch_unienv_disk005/qa_mrwellan/interim/src/megatest/tests/fdktestqa/testqa
+----------------------------
+[tests-paths]
+1 #{get misc parent}/simplerun/tests
+----------------------------
+
+-------------------
+[setup]
+-------------------
+
+The runscript method is a brute force way to run scripts where the
+user is responsible for setting STATE and STATUS
+
+-------------------
+runscript main.csh
+-------------------
+
+
+ww30.2
+cellname/LVS/cellname.LAYOUT_ERRORS
+
+Error: text open
+
+ww31.3
+cellname/LVS/cellname.LAYOUT_ERRORS
+
+Error: text open

Index: docs/manual/megatest_manual.txt
==================================================================
--- docs/manual/megatest_manual.txt
+++ docs/manual/megatest_manual.txt
@@ -46,10 +46,11 @@
 sqlite3 database.
 
 include::../plan.txt[]
 include::getting_started.txt[]
 include::writing_tests.txt[]
+include::howto.txt[]
 include::reference.txt[]
 
 [appendix]
 Example Appendix
 ================

Index: ezsteps.scm
==================================================================
--- ezsteps.scm
+++ ezsteps.scm
@@ -165,9 +165,9 @@
 		  ;; need to update the top test record if PASS or FAIL and this is a subtest
 		  (if (not (equal? item-path ""))
 		      (cdb:roll-up-pass-fail-counts *runremote* run-id test-name item-path new-status))))
 	    ;; for automated creation of the rollup html file this is a good place...
 	    (if (not (equal? item-path ""))
-		(tests:summarize-items #f run-id test-name #f)) ;; don't force - just update if no
+		(tests:summarize-items #f run-id test-id test-name #f)) ;; don't force - just update if no
 	    )))
     (pop-directory)
     rollup-status))

Index: http-transport.scm
==================================================================
--- http-transport.scm
+++ http-transport.scm
@@ -353,11 +353,12 @@
 					   fullurl 
 					   (list (cons 'key "thekey")
 						 (cons 'cmd cmd)
 						 (cons 'params params))
 					   read-string))
-				(close-all-connections)
+				 ;; Shouldn't this be a call to the managed call-all-connections stuff above?
+				(close-all-connections!)
 				(mutex-unlock! *http-mutex*)
 				))
 	                          ;; (if cleanup
 				  ;;   ;; mutex already set
 				  ;;   (begin

Index: launch.scm
==================================================================
--- launch.scm
+++ launch.scm
@@ -1,7 +1,7 @@
 
-;; Copyright 2006-2012, Matthew Welland.
+;; Copyright 2006-2013, Matthew Welland.
 ;; 
 ;;  This program is made available under the GNU GPL version 2.0 or
 ;;  greater. See the accompanying file COPYING for details.
 ;; 
 ;;  This program is distributed WITHOUT ANY WARRANTY; without even the
@@ -135,11 +135,11 @@
 	  (alist->env-vars env-ovrd)
 	  (set-megatest-env-vars run-id)
 	  (set-item-env-vars itemdat)
 	  (save-environment-as-files "megatest")
 	  ;; open-run-close not needed for test-set-meta-info
-	  (tests:set-meta-info #f test-id run-id test-name itemdat 0 work-area)
+	  (tests:set-full-meta-info #f test-id run-id 0 work-area)
 	  (tests:test-set-status! test-id "REMOTEHOSTSTART" "n/a" (args:get-arg "-m") #f)
 	  ;; (cdb:set-test-start-time!  *runremote* test-id)
 	  (if (args:get-arg "-xterm")
 	      (set! fullrunscript "xterm")
 	      (if (and fullrunscript (not (file-execute-access? fullrunscript)))
@@ -151,10 +151,11 @@
 	  
 	  (let* ((m            (make-mutex))
 		 (kill-job?    #f)
 		 (exit-info    (vector #t #t #t))
 		 (job-thread   #f)
+		 (keep-going   #t)
 		 (runit        (lambda ()
 				 ;; (let-values
 				 ;;  (((pid exit-status exit-code)
 				 ;;    (run-n-wait fullrunscript)))
 				 (tests:test-set-status! test-id "RUNNING" "n/a" #f #f)
@@ -170,11 +171,11 @@
 					  (vector-set! exit-info 2 exit-code)
 					  (set! rollup-status exit-code) 
 					  (mutex-unlock! m)
 					  (if (eq? pid-val 0)
 					      (begin
-						(thread-sleep! 2)
+						(thread-sleep! 1)
 						(loop (+ i 1)))
 					      )))))
 				 ;; then, if runscript ran ok (or did not get called)
 				 ;; do all the ezsteps (if any)
 				 (if ezsteps
@@ -223,11 +224,11 @@
 								   (vector-set! exit-info 1 exit-status)
 								   (vector-set! exit-info 2 exit-code)
 								   (mutex-unlock! m)
 								   (if (eq? pid-val 0)
 								       (begin
-									 (thread-sleep! 2)
+									 (thread-sleep! 1)
 									 (processloop (+ i 1))))
 								   ))
                                                      (let ((exinfo (vector-ref exit-info 2))
                                                            (logfna (if logpro-used (conc stepname ".html") "")))
 						       ;; testing if procedures called in a remote call cause problems (ans: no or so I suspect)
@@ -275,10 +276,11 @@
 							  (round 
 							   (- 
 							    (current-seconds) 
 							    start-seconds)))))
 					(kill-tries 0))
+				   (tests:set-full-meta-info #f test-id run-id (calc-minutes) work-area)
 				   (let loop ((minutes   (calc-minutes)))
 				     (begin
 				       (set! kill-job? (or (test-get-kill-request test-id) ;; run-id test-name itemdat))
 							   (and runtlim (let* ((run-seconds   (- (current-seconds) start-seconds))
 									       (time-exceeded (> run-seconds runtlim)))
@@ -286,14 +288,17 @@
 									      (begin
 										(debug:print-info 0 "KILLING TEST DUE TO TIME LIMIT EXCEEDED! Runtime=" run-seconds " seconds, limit=" runtlim)
 										#t)
 									      #f)))))
 				       ;; open-run-close not needed for test-set-meta-info
-				       (tests:set-meta-info #f test-id run-id test-name itemdat minutes work-area)
+				       (tests:set-partial-meta-info #f test-id run-id minutes work-area)
 				       (if kill-job? 
 					   (begin
 					     (mutex-lock! m)
+					     ;; NOTE: The pid can change as different steps are run. Do we need handshaking between this
+					     ;;       section and the runit section? Or add a loop that tries three times with a 1/4 second
+					     ;;       between tries?
 					     (let* ((pid (vector-ref exit-info 0)))
 					       (if (number? pid)
 						   (process-signal pid signal/kill)
 						   ;; (begin
 						   ;;   (debug:print 0 "WARNING: Request received to kill job (attempt # " kill-tries ")")
@@ -317,19 +322,25 @@
 						     (sqlite3:finalize! tdb)
 						     (exit 1))))
 					     (set! kill-tries (+ 1 kill-tries))
 					     (mutex-unlock! m)))
 				       ;; (sqlite3:finalize! db)
-				       (thread-sleep! (+ 10 (random 10))) ;; add some jitter to the call home time to spread out the db accesses
-				       (loop (calc-minutes)))))))
-		 (th1          (make-thread monitorjob))
-		 (th2          (make-thread runit)))
+				       (if keep-going
+					   (begin
+					     (thread-sleep! (+ 10 (random 10))) ;; add some jitter to the call home time to spread out the db accesses
+					     (if keep-going
+						 (loop (calc-minutes)))))))))) ;; NOTE: Checking twice for keep-going is intentional
+		 (th1          (make-thread monitorjob "monitor job"))
+		 (th2          (make-thread runit "run job")))
 	    (set! job-thread th2)
 	    (thread-start! th1)
 	    (thread-start! th2)
 	    (thread-join! th2)
-	    (thread-sleep! 0.1) ;; give thread th1 a chance to be done TODO: Verify this is needed.
+	    (set! keep-going #f)
+	    (thread-sleep! 1)
+	    (thread-terminate! th1) ;; Not sure if this is a good idea
+	    (thread-sleep! 0.1) ;; give thread th1 a chance to be done TODO: Verify this is needed. At 0.1 I was getting fail to stop, increased to total of 1.1 sec.
 	    (mutex-lock! m)
 	    (let* ((item-path (item-list->path itemdat))
 		   (testinfo  (cdb:get-test-info-by-id *runremote* test-id))) ;; )) ;; run-id test-name item-path)))
 	      ;; Am I completed?
 	      (if (equal? (db:test-get-state testinfo) "RUNNING") ;; (not (equal? (db:test-get-state testinfo) "COMPLETED"))
@@ -338,38 +349,35 @@
 							                ;; (db:test-get-state testinfo)))   ;; else preseve the state as set within the test
 				    )
 			(new-status (cond
 				     ((not (vector-ref exit-info 1)) "FAIL") ;; job failed to run
 				     ((eq? rollup-status 0)
-				      ;; if the current status is AUTO the defer to the calculated value (i.e. leave this AUTO)
+				      ;; if the current status is AUTO then defer to the calculated value (i.e. leave this AUTO)
 				      (if (equal? (db:test-get-status testinfo) "AUTO") "AUTO" "PASS"))
 				     ((eq? rollup-status 1) "FAIL")
 				     ((eq? rollup-status 2)
 				      ;; if the current status is AUTO the defer to the calculated value but qualify (i.e. make this AUTO-WARN)
 				      (if (equal? (db:test-get-status testinfo) "AUTO") "AUTO-WARN" "WARN"))
 				     (else "FAIL")))) ;; (db:test-get-status testinfo)))
-		    (debug:print-info 2 "Test NOT logged as COMPLETED, (state=" (db:test-get-state testinfo) "), updating result, rollup-status is " rollup-status)
+		    (debug:print-info 1 "Test exited in state=" (db:test-get-state testinfo) ", setting state/status based on exit code of " (vector-ref exit-info 1) " and rollup-status of " rollup-status)
 		    (tests:test-set-status! test-id 
 					    new-state
 					    new-status
 					    (args:get-arg "-m") #f)
 		    ;; need to update the top test record if PASS or FAIL and this is a subtest
-		    (if (not (equal? item-path ""))
-			(cdb:roll-up-pass-fail-counts *runremote* run-id test-name item-path new-status))
-		    
+		    ;; NO NEED TO CALL roll-up-pass-fail-counts HERE, THIS IS DONE IN roll-up-pass-fail-counts called by tests:test-set-status!
+		    ;; (if (not (equal? item-path ""))
+		    ;;     (begin
+		    ;;       (thread-sleep! 0.1) ;; give other processes an opportunity to access the db as rollup is lower priority
+		    ;;       (cdb:roll-up-pass-fail-counts *runremote* run-id test-name item-path new-status)))
 		    ))
 	      ;; for automated creation of the rollup html file this is a good place...
 	      (if (not (equal? item-path ""))
-		  (tests:summarize-items #f run-id test-name #f)) ;; don't force - just update if no
-	      )
+		  (tests:summarize-items #f run-id test-id test-name #f))) ;; don't force - just update if no
 	    (mutex-unlock! m)
-	    ;; (exec-results (cmd-run->list fullrunscript)) ;;  (list ">" (conc test-name "-run.log"))))
-	    ;; (success      exec-results)) ;; (eq? (cadr exec-results) 0)))
 	    (debug:print 2 "Output from running " fullrunscript ", pid " (vector-ref exit-info 0) " in work area " 
 			 work-area ":\n====\n exit code " (vector-ref exit-info 2) "\n" "====\n")
-	    ;; (sqlite3:finalize! db)
-	    ;; (sqlite3:finalize! tdb)
 	    (if (not (vector-ref exit-info 1))
 		(exit 4)))))))
 
 ;; set up the very basics needed for doing anything here.
 (define (setup-for-run)

ADDED   lock-queue.scm
Index: lock-queue.scm
==================================================================
--- /dev/null
+++ lock-queue.scm
@@ -0,0 +1,134 @@
+;; Copyright 2006-2013, Matthew Welland.
+;; 
+;;  This program is made available under the GNU GPL version 2.0 or
+;;  greater. See the accompanying file COPYING for details.
+;; 
+;;  This program is distributed WITHOUT ANY WARRANTY; without even the
+;;  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+;;  PURPOSE.
+
+;;======================================================================
+;; launch a task - this runs on the originating host, tests themselves
+;;
+;;======================================================================
+
+(use sqlite3 srfi-18)
+(import (prefix sqlite3 sqlite3:))
+
+(declare (unit lock-queue))
+(declare (uses common))
+
+;;======================================================================
+;; attempt to prevent overlapping updates of rollup files by queueing
+;; update requests in an sqlite db
+;;======================================================================
+
+(define (lock-queue:open-db fname)
+  (let* ((actualfname (conc fname ".lockdb"))
+	 (dbexists (file-exists? actualfname))
+	 (db       (sqlite3:open-database actualfname))
+	 (handler  (make-busy-timeout 3600)))
+    (if dbexists
+	db
+	(begin
+	  (sqlite3:execute 
+	   db
+	   "CREATE TABLE IF NOT EXISTS queue (
+  	      id         INTEGER PRIMARY KEY,
+              test_id    INTEGER,
+              start_time INTEGER,
+              state      TEXT,
+              CONSTRAINT queue_constraint UNIQUE (test_id));")
+	  (sqlite3:execute
+	   db
+	   "CREATE TABLE IF NOT EXISTS runlocks (
+              id         INTEGER PRIMARY KEY,
+              test_id    INTEGER,
+              run_lock   TEXT,
+              CONSTRAINT runlock_constraint UNIQUE (run_lock));")))
+    (sqlite3:set-busy-handler! db handler)
+    db))
+
+(define (lock-queue:set-state db test-id newstate)
+  (sqlite3:execute db "UPDATE queue SET state=? WHERE test_id=?;"
+		   newstate
+		   test-id))
+
+(define (lock-queue:any-younger? db mystart test-id)
+  (let ((res #f))
+    (sqlite3:for-each-row
+     (lambda (tid)
+       ;; Actually this should not be needed as mystart cannot be simultaneously less than and test-id same as 
+       (if (not (equal? tid test-id)) 
+	   (set! res tid)))
+     db
+     "SELECT test_id FROM queue WHERE start_time > ?;" mystart)
+    res))
+
+(define (lock-queue:get-lock db test-id)
+  (let ((res       #f)
+	(lckqry    (sqlite3:prepare db "SELECT test_id,run_lock FROM runlocks WHERE run_lock='locked';"))
+	(mklckqry  (sqlite3:prepare db "INSERT INTO runlocks (test_id,run_lock) VALUES (?,'locked');")))
+    (let ((result 
+	   (handle-exceptions
+	    exn
+	    #f
+	    (sqlite3:with-transaction
+	     db
+	     (lambda ()
+	       (sqlite3:for-each-row (lambda (tid lockstate)
+				       (set! res (list tid lockstate)))
+				     lckqry)
+	       (if res
+		   (if (equal? (car res) test-id)
+		       #t ;; already have the lock
+		       #f)
+		   (begin
+		     (sqlite3:execute mklckqry test-id)
+		     ;; if no error handled then return #t for got the lock
+		     #t)))))))
+      (sqlite3:finalize! lckqry)
+      (sqlite3:finalize! mklckqry)
+      result)))
+
+(define (lock-queue:release-lock fname test-id)
+  (let ((db (lock-queue:open-db fname)))
+    (sqlite3:execute db "DELETE FROM runlocks WHERE test_id=?;" test-id)
+    (sqlite3:finalize! db)))
+
+(define (lock-queue:steal-lock db test-id)
+  (sqlite3:execute db "DELETE FROM runlocks WHERE run_lock='locked';")
+  (lock-queue:get-lock db test-it))
+
+;; returns #f if ok to skip the task
+;; returns #t if ok to proceed with task
+;; otherwise waits
+;;
+(define (lock-queue:wait-turn fname test-id)
+  (let ((db      (lock-queue:open-db fname))
+	(mystart (current-seconds)))
+    (sqlite3:execute
+     db
+     "INSERT OR REPLACE INTO queue (test_id,start_time,state) VALUES (?,?,'waiting');"
+     test-id mystart)
+    (thread-sleep! 1) ;; give other tests a chance to register
+    (let ((result 
+	   (let loop ((younger-waiting (lock-queue:any-younger? db mystart test-id)))
+	     (if younger-waiting
+		 (begin
+		   ;; no need for us to wait. mark in the lock queue db as skipping
+		   (lock-queue:set-state db test-id "skipping")
+		   #f) ;; let the calling process know that nothing needs to be done
+		 (if (lock-queue:get-lock db test-id)
+		     #t
+		     (if (> (- (current-seconds) mystart) 36000) ;; waited too long, steal the lock
+			 (lock-queue:steal-lock db test-id)
+			 (begin
+			   (thread-sleep! 1)
+			   (loop (lock-queue:any-younger? db mystart test-id)))))))))
+      (sqlite3:finalize! db)
+      result)))
+	  
+            
+;; (use trace)
+;; (trace lock-queue:get-lock lock-queue:release-lock lock-queue:wait-turn lock-queue:any-younger? lock-queue:set-state)

Index: megatest.scm
==================================================================
--- megatest.scm
+++ megatest.scm
@@ -938,11 +938,11 @@
 	  (if (args:get-arg "-set-toplog")
 	      ;; DO NOT run remote
 	      (tests:test-set-toplog! db run-id test-name (args:get-arg "-set-toplog")))
 	  (if (args:get-arg "-summarize-items")
 	      ;; DO NOT run remote
-	      (tests:summarize-items db run-id test-name #t)) ;; do force here
+	      (tests:summarize-items db run-id test-id test-name #t)) ;; do force here
 	  (if (args:get-arg "-runstep")
 	      (if (null? remargs)
 		  (begin
 		    (debug:print 0 "ERROR: nothing specified to run!")
 		    (if db (sqlite3:finalize! db))

Index: mt.scm
==================================================================
--- mt.scm
+++ mt.scm
@@ -87,8 +87,31 @@
 
 (define (mt:get-run-stats)
   (cdb:remote-run db:get-run-stats #f))
 
 ;;======================================================================
-;;  S T E P S
+;;  S T A T E   A N D   S T A T U S   F O R   T E S T S 
 ;;======================================================================
 
+(define (mt:roll-up-pass-fail-counts run-id test-name item-path status)
+  (if (and (not (equal? item-path ""))
+	   (member status '("PASS" "WARN" "FAIL" "WAIVED" "RUNNING" "CHECK" "SKIP")))
+      (begin
+	(cdb:update-pass-fail-counts *runremote* run-id test-name)
+	(if (equal? status "RUNNING")
+	    (cdb:top-test-set-running *runremote* run-id test-name)
+	    (cdb:top-test-set-per-pf-counts *runremote* run-id test-name))
+	#f)
+      #f))
+
+;; speed up for common cases with a little logic
+(define (mt:test-set-state-status-by-id test-id newstate newstatus newcomment)
+  (cond
+   ((and newstate newstatus newcomment)
+    (cdb:client-call *runremote* 'state-status-msg #t *default-numtries* newstate newstatus newcomment test-id))
+   ((and newstate newstatus)
+    (cdb:client-call serverdat 'state-status #t *default-numtries* newstate newstatus test-id))
+   (else
+    (if newstate   (cdb:client-call *runremote* 'set-test-state #t *default-numtries* newstate test-id))
+    (if newstatus  (cdb:client-call *runremote* 'set-test-status #t *default-numtries* newstatus test-id))
+    (if newcomment (cdb:client-call *runremote* 'set-test-comment #t *default-numtries* newcomment test-id))))
+   (db:process-triggers test-id newstate newstatus))

Index: runs.scm
==================================================================
--- runs.scm
+++ runs.scm
@@ -791,11 +791,11 @@
     ;; itemdat => ((ripeness "overripe") (temperature "cool") (season "summer"))
     (let* ((new-test-path (string-intersperse (cons test-path (map cadr itemdat)) "/"))
 	   (test-id       (cdb:remote-run db:get-test-id #f  run-id test-name item-path))
 	   (testdat       (cdb:get-test-info-by-id *runremote* test-id)))
       (if (not testdat)
-	  (begin
+	  (let loop ()
 	    ;; ensure that the path exists before registering the test
 	    ;; NOPE: Cannot! Don't know yet which disk area will be assigned....
 	    ;; (system (conc "mkdir -p " new-test-path))
 	    ;;
 	    ;; (open-run-close tests:register-test db run-id test-name item-path)
@@ -807,11 +807,16 @@
 		(begin
 		  (debug:print 2 "WARN: Test not pre-created? test-name=" test-name ", item-path=" item-path ", run-id=" run-id)
 		  (cdb:tests-register-test *runremote* run-id test-name item-path)
 		  (set! test-id (cdb:remote-run db:get-test-id #f run-id test-name item-path))))
 	    (debug:print-info 4 "test-id=" test-id ", run-id=" run-id ", test-name=" test-name ", item-path=\"" item-path "\"")
-	    (set! testdat (cdb:get-test-info-by-id *runremote* test-id))))
+	    (set! testdat (cdb:get-test-info-by-id *runremote* test-id))
+	    (if (not testdat)
+		(begin
+		  (debug:print-info 0 "WARNING: server is overloaded, trying again in one second")
+		  (thread-sleep! 1)
+		  (loop)))))
       (if (not testdat) ;; should NOT happen
 	  (debug:print 0 "ERROR: failed to get test record for test-id " test-id))
       (set! test-id (db:test-get-id testdat))
       (if (file-exists? test-path)
 	  (change-directory test-path)
@@ -881,11 +886,11 @@
 			(configf:lookup test-conf "skip" "fileexists"))
 		   (if (file-exists? (configf:lookup test-conf "skip" "fileexists"))
 		       (set! skip-test (conc "Skipping due to existance of file " (configf:lookup test-conf "skip" "fileexists"))))))
 		 (if skip-test
 		     (begin
-		       (cdb:remote-run db:test-set-state-status-by-id #f test-id "COMPLETED" "SKIP" skip-test)
+		       (mt:test-set-state-status-by-id test-id "COMPLETED" "SKIP" skip-test)
 		       (debug:print-info 1 "SKIPPING Test " full-test-name " due to " skip-test))
 		     (if (not (launch-test test-id run-id run-info keyvals runname test-conf test-name test-path itemdat flags))
 			 (begin
 			   (print "ERROR: Failed to launch the test. Exiting as soon as possible")
 			   (set! *globalexitstatus* 1) ;; 
@@ -1024,21 +1029,21 @@
 				      ;; This test is not in a correct state for cleaning up. Let's try some graceful shutdown steps first
 				      ;; Set the test to "KILLREQ" and wait five seconds then try again. Repeat up to five times then give
 				      ;; up and blow it away.
 				      (begin
 					(debug:print 0 "WARNING: could not gracefully remove test " test-fulln ", tried to kill it to no avail. Forcing state to FAILEDKILL and continuing")
-					(cdb:remote-run db:test-set-state-status-by-id db (db:test-get-id test) "FAILEDKILL" "n/a" #f)
+					(mt:test-set-state-status-by-id (db:test-get-id test) "FAILEDKILL" "n/a" #f)
 					(thread-sleep! 1))
 				      (begin
-					(cdb:remote-run db:test-set-state-status-by-id db (db:test-get-id test) "KILLREQ" "n/a" #f)
+					(mt:test-set-state-status-by-id (db:test-get-id test) "KILLREQ" "n/a" #f)
 					(thread-sleep! 1)))
 				    ;; NOTE: This is suboptimal as the testdata will be used later and the state/status may have changed ...
 				    (if (null? tal)
 					(loop new-test-dat tal)
 					(loop (car tal)(append tal (list new-test-dat)))))
 				  (begin
-				    (cdb:remote-run db:test-set-state-status-by-id db (db:test-get-id test) "REMOVING" "LOCKED" #f)
+				    (mt:test-set-state-status-by-id (db:test-get-id test) "REMOVING" "LOCKED" #f)
 				    (debug:print-info 1 "Attempting to remove " (if real-dir (conc " dir " real-dir " and ") "") " link " run-dir)
 				    (if (and real-dir 
 					     (> (string-length real-dir) 5)
 					     (file-exists? real-dir)) ;; bad heuristic but should prevent /tmp /home etc.
 					(begin ;; let* ((realpath (resolve-pathname run-dir)))
@@ -1071,11 +1076,11 @@
 				    (cdb:remote-run db:delete-test-records db #f (db:test-get-id test))
 				    (if (not (null? tal))
 					(loop (car tal)(cdr tal))))))
 			     ((set-state-status)
 			      (debug:print-info 2 "new state " (car state-status) ", new status " (cadr state-status))
-			      (cdb:remote-run db:test-set-state-status-by-id db (db:test-get-id test) (car state-status)(cadr state-status) #f)
+			      (mt:test-set-state-status-by-id (db:test-get-id test) (car state-status)(cadr state-status) #f)
 			      (if (not (null? tal))
 				  (loop (car tal)(cdr tal))))
 			     ((run-wait)
 			      (debug:print-info 2 "still waiting, " (length tests) " tests still running")
 			      (thread-sleep! 10)

Index: tests.scm
==================================================================
--- tests.scm
+++ tests.scm
@@ -15,10 +15,11 @@
 
 (use sqlite3 srfi-1 posix regex regex-case srfi-69 dot-locking tcp directory-utils)
 (import (prefix sqlite3 sqlite3:))
 
 (declare (unit tests))
+(declare (uses lock-queue))
 (declare (uses db))
 (declare (uses common))
 (declare (uses items))
 (declare (uses runconfig))
 
@@ -365,11 +366,11 @@
 	    (db:csv->test-data #f test-id
 				dat))))
       
     ;; need to update the top test record if PASS or FAIL and this is a subtest
     (if (not (equal? item-path ""))
-	(cdb:roll-up-pass-fail-counts *runremote* run-id test-name item-path status))
+	(mt:roll-up-pass-fail-counts run-id test-name item-path status))
 
     (if (or (and (string? comment)
 		 (string-match (regexp "\\S+") comment))
 	    waived)
 	(let ((cmt  (if waived waived comment)))
@@ -378,11 +379,11 @@
 
 
 (define (tests:test-set-toplog! db run-id test-name logf) 
   (cdb:client-call *runremote* 'tests:test-set-toplog #t 2 logf run-id test-name))
 
-(define (tests:summarize-items db run-id test-name force)
+(define (tests:summarize-items db run-id test-id test-name force)
   ;; if not force then only update the record if one of these is true:
   ;;   1. logf is "log/final.log
   ;;   2. logf is same as outputfilename
   (let* ((outputfilename (conc "megatest-rollup-" test-name ".html"))
 	 (orig-dir       (current-directory))
@@ -399,12 +400,13 @@
     (debug:print 4 "summarize-items with logf " logf ", outputfilename " outputfilename " and force " force)
     (if (or (equal? logf "logs/final.log")
 	    (equal? logf outputfilename)
 	    force)
 	(begin
-	  (if (not (obtain-dot-lock outputfilename 1 5 7)) ;; retry every second for 20 seconds, call it dead after 30 seconds and steal the lock
-	      (print "Failed to obtain lock for " outputfilename)
+	  (if ;; (not (obtain-dot-lock outputfilename 1 5 7)) ;; retry every second for 20 seconds, call it dead after 30 seconds and steal the lock
+              (not (lock-queue:wait-turn outputfilename test-id))
+	      (print "Not updating " outputfilename " as another test item has signed up for the job")
 	      (begin
 		(print "Obtained lock for " outputfilename)
 		(let ((oup    (open-output-file outputfilename))
 		      (counts (make-hash-table))
 		      (statecounts (make-hash-table))
@@ -461,10 +463,11 @@
 		      (print "<table cellspacing=\"0\" border=\"1\">" 
 			     "<tr><td>Item</td><td>State</td><td>Status</td><td>Comment</td>"
 			     outtxt "</table></body></html>")
 		      (release-dot-lock outputfilename)))
 		  (close-output-port oup)
+		  (lock-queue:release-lock outputfilename test-id)
 		  (change-directory orig-dir)
 		  ;; NB// tests:test-set-toplog! is remote internal...
 		  (tests:test-set-toplog! db run-id test-name outputfilename)
 		  )))))))
 
@@ -679,42 +682,43 @@
 	 tdb
 	 "SELECT count(id) FROM test_rundat;")
 	res))
   0)
 
-(define (tests:update-central-meta-info test-id cpuload diskfree minutes num-records uname hostname)
+(define (tests:update-central-meta-info test-id cpuload diskfree minutes uname hostname)
   ;; This is a good candidate for threading the requests to enable
   ;; transactionized write at the server
   (cdb:tests-update-cpuload-diskfree *runremote* test-id cpuload diskfree)
-  ;; (let ((db (open-db)))
-    ;; (sqlite3:execute db "UPDATE tests SET cpuload=?,diskfree=? WHERE id=?;"
-    ;;     	     cpuload
-    ;;     	     diskfree
-    ;;     	     test-id)
-    (if minutes 
-	(cdb:tests-update-run-duration *runremote* test-id minutes))
-	;; (sqlite3:execute db "UPDATE tests SET run_duration=? WHERE id=?;" minutes test-id))
-    (if (eq? num-records 0)
-	(cdb:tests-update-uname-host *runremote* test-id uname hostname))
-	;;(sqlite3:execute db "UPDATE tests SET uname=?,host=? WHERE id=?;" uname hostname test-id))
-    ;;(sqlite3:finalize! db))
-    )
-  
-(define (tests:set-meta-info db test-id run-id testname itemdat minutes work-area)
+  (if minutes 
+      (cdb:tests-update-run-duration *runremote* test-id minutes))
+  (if (and uname hostname)
+      (cdb:tests-update-uname-host *runremote* test-id uname hostname)))
+  
+(define (tests:set-full-meta-info db test-id run-id minutes work-area)
+  ;; DOES cdb:remote-run under the hood!
+  (let* ((num-records 0) ;; (test:tdb-get-rundat-count tdb))
+	 (cpuload  (get-cpu-load))
+	 (diskfree (get-df (current-directory)))
+	 (uname    (get-uname "-srvpio"))
+	 (hostname (get-host-name)))
+    (tests:update-testdat-meta-info db test-id work-area cpuload diskfree minutes)
+    (tests:update-central-meta-info test-id cpuload diskfree minutes uname hostname)))
+	  
+(define (tests:set-partial-meta-info db test-id run-id minutes work-area)
   ;; DOES cdb:remote-run under the hood!
-  (let* ((tdb         (db:open-test-db-by-test-id db test-id work-area: work-area))
-	 (num-records (test:tdb-get-rundat-count tdb))
-	 (cpuload  (get-cpu-load))
+  (let* ((cpuload  (get-cpu-load))
 	 (diskfree (get-df (current-directory))))
-    (if (eq? (modulo num-records 10) 0) ;; every ten records update central
-	(let ((uname    (get-uname "-srvpio"))
-	      (hostname (get-host-name)))
-	  (tests:update-central-meta-info test-id cpuload diskfree minutes num-records uname hostname)))
+    (tests:update-testdat-meta-info db test-id work-area cpuload diskfree minutes)
+    ;; Update central with uname and hostname = #f
+    (tests:update-central-meta-info test-id cpuload diskfree minutes #f #f)))
+	 
+(define (tests:update-testdat-meta-info db test-id work-area cpuload diskfree minutes)
+  (let ((tdb         (db:open-test-db-by-test-id db test-id work-area: work-area)))
     (sqlite3:execute tdb "INSERT INTO test_rundat (update_time,cpuload,diskfree,run_duration) VALUES (strftime('%s','now'),?,?,?);"
 		     cpuload diskfree minutes)
     (sqlite3:finalize! tdb)))
-	  
+ 
 ;;======================================================================
 ;; A R C H I V I N G
 ;;======================================================================
 
 (define (test:archive db test-id)

Index: tests/Makefile
==================================================================
--- tests/Makefile
+++ tests/Makefile
@@ -128,10 +128,13 @@
 	  for fs in afs nfs zfs; do \
 	     for dpath in none tmp; do \
 	        (cd fullrun;$(MEGATEST) -runtests priority_10_waiton_1 -target $$sys/$$fs/$$dpath :runname $(RUNNAME) &);\
 	     done;done;done
 
+test11 :
+	 cd fullrun;time (for a in 1 2 3 4 5 6 7 8 9 10  1 2 3 4 5 6 7 8 9 10  1 2 3 4 5 6 7 8 9 10  1 2 3 4 5 6 7 8 9 10 ;do   (megatest -test-paths -target %/%/% > /dev/null ) & done; wait; )
+
 minsetup : 
 	cd ..;make && make install
 	mkdir -p mintest/runs mintest/links
 	cd mintest;megatest -stop-server 0
 	cd mintest;megatest -server - -debug $(DEBUG) > server.log 2> server.log & 

Index: tests/fdktestqa/fdk.config
==================================================================
--- tests/fdktestqa/fdk.config
+++ tests/fdktestqa/fdk.config
@@ -2,11 +2,11 @@
 SYSTEM TEXT
 RELEASE TEXT
 
 [setup]
 # Adjust max_concurrent_jobs to limit how much you load your machines
-max_concurrent_jobs 50
+max_concurrent_jobs 150
 
 # This is your link path, you can move it but it is generally better to keep it stable
 linktree #{shell readlink -f #{getenv PWD}/../simplelinks}
 
 [include testqa/configs/megatest.abc.config]

Index: tests/fdktestqa/testqa/Makefile
==================================================================
--- tests/fdktestqa/testqa/Makefile
+++ tests/fdktestqa/testqa/Makefile
@@ -14,11 +14,11 @@
 
 bigrun :
 	$(MEGATEST) -runtests bigrun -target a/bigrun :runname a
 
 bigrun2 :
-	$(MEGATEST) -runtests bigrun2 -target a/bigrun2 :runname a
+	$(MEGATEST) -runtests bigrun2 -target a/bigrun2 :runname a -transport http
 
 dashboard : 
 	$(DASHBOARD) -rows 20 &
 
 compile :

Index: tests/fdktestqa/testqa/megatest.config
==================================================================
--- tests/fdktestqa/testqa/megatest.config
+++ tests/fdktestqa/testqa/megatest.config
@@ -1,8 +1,10 @@
 [setup]
 testcopycmd cp --remove-destination -rlv TEST_SRC_PATH/. TEST_TARG_PATH/. >> TEST_TARG_PATH/mt_launch.log 2>> TEST_TARG_PATH/mt_launch.log
-runqueue 2
+runqueue 20
+transport http
+launchwait no
 
 [include ../fdk.config]
 
 [server]
-timeout 0.05
+port 9080

Index: tests/fullrun/megatest.config
==================================================================
--- tests/fullrun/megatest.config
+++ tests/fullrun/megatest.config
@@ -16,13 +16,12 @@
 
 [tests-paths]
 1 #{get misc parent}/simplerun/tests
 
 [setup]
-# Set launchwait to yes to use the old launch run code that waits for the launch process to return before 
-# proceeding.
-# launchwait yes
+# Set launchwait to no to use the more agressive code that does not wait for the launch to complete before proceeding
+launchwait no
 
 # Use http instead of direct filesystem access
 transport http