Index: Makefile
==================================================================
--- Makefile
+++ Makefile
@@ -93,11 +93,12 @@
 	csc $(CSCOPTS) $(OFILES) dashboard.scm $(GOFILES) $(MOFILES) $(MOIMPFILES) -o dboard
 
 mtut: $(OFILES) $(MOFILES) megatest-fossil-hash.scm mtut.scm megatest-version.scm
 	csc $(CSCOPTS) $(OFILES) $(MOFILES) mtut.scm -o mtut
 
-# include makefile.inc
+include makefile.inc
+include chicken.makefile
 
 TCMTOBJS = \
 	api.o \
 	archive.o \
 	cgisetup/models/pgdb.o \

Index: NOTES
==================================================================
--- NOTES
+++ NOTES
@@ -12,10 +12,15 @@
 #     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 #     GNU General Public License for more details.
 # 
 #     You should have received a copy of the GNU General Public License
 #     along with Megatest.  If not, see <http://www.gnu.org/licenses/>.
+
+(server:writable-watchdog-bruteforce dbstruct)
+
+(server:writable-watchdog-deltasync dbstruct)
+
 
 =====================================================================
 NOTES from looking at branch v1.62-rpc
 =====================================================================
 

Index: TODO
==================================================================
--- TODO
+++ TODO
@@ -16,10 +16,17 @@
 #     along with Megatest.  If not, see <http://www.gnu.org/licenses/>.
 
 TODO
 ====
 
+WW38
+. Add test_rundat to no-sync ==> correction, put in <testdir>/.meta/test-run.dat
+. Add STATE/STATUS transitions to .meta/test-run.dat or similar
+. Swizzle update-test-rundat to operate on no-sync
+. Swizzle update-run-duration, -uname-host and cpuload-diskfree to no-sync
+. On state/status change update tests table with duration
+
 WW15
 . fill newview matrix with data, filter pipeline gui elements
 . improve [script], especially indent handling
 
 WW16

Index: cgisetup/models/pgdb.scm
==================================================================
--- cgisetup/models/pgdb.scm
+++ cgisetup/models/pgdb.scm
@@ -177,15 +177,15 @@
    state status owner event-time comment fail-count pass-count last_update publish-time run-id area-id ))
 
 ;; given all needed info create run record
 ;;
 (define (pgdb:insert-run dbh ttype-id target run-name state status owner event-time comment fail-count pass-count area-id last-update publish-time)
-    (dbi:exec
+  (dbi:exec
    dbh
    "INSERT INTO runs (ttype_id,target,run_name,state,status,owner,event_time,comment,fail_count,pass_count,area_id,last_update,publish_time)
       VALUES (?,?,?,?,?,?,?,?,?,?,?,?, ?);"
-    ttype-id target run-name state status owner event-time comment fail-count pass-count area-id last-update publish-time))
+   ttype-id target run-name state status owner event-time comment fail-count pass-count area-id last-update publish-time))
 
 ;;======================================================================
 ;;  T E S T - S T E P S
 ;;======================================================================
 

Index: chicken.makefile
==================================================================
--- chicken.makefile
+++ chicken.makefile
@@ -23,11 +23,11 @@
 
 # CHICKEN_BIN_DIR=$(shell dirname $(shell which csi))
 # if have csi on path use that, else use default
 # CSIPATH=$(shell which csi)
 # CKPATH=$(shell dirname $(shell dirname $(CSIPATH)))
-sCHICKEN_PREFIX=$(or $(CKPATH),$(PREFIX)/bin/.$(ARCHSTR))
+CHICKEN_PREFIX=$(or $(CKPATH),$(PREFIX)/bin/.$(ARCHSTR))
 
 whatever :
 	@echo "CHICKEN_PREFIX=$(CHICKEN_PREFIX)"
 
 tgz-$(USER)/postgresql-9.6.4.tar.gz :
@@ -66,10 +66,11 @@
 	cd tgz-$(USER)/nanomsg-1.0.0/build-$(USER); make; make install
 
 $(CHICKEN_PREFIX)/chicken-4.13.0/LICENSE : tgz-$(USER)/chicken-4.13.0.tar.gz
 	mkdir -p build-$(USER)/eggs-installed
 	cd build-$(USER);tar xf ../tgz-$(USER)/chicken-4.13.0.tar.gz
+	if [[ -e $(CHICKEN_PREFIX)/chicken-4.13.0/LICENSE ]];then touch $(CHICKEN_PREFIX)/chicken-4.13.0/LICENSE;fi
 
 tgz-$(USER)/opensrc.fossil :
 	cd tgz-$(USER); fossil clone http://www.kiatoa.com/fossils/opensrc opensrc.fossil
 	mkdir tgz-$(USER)/opensrc
 	cd tgz-$(USER)/opensrc; fossil open --nested ../opensrc.fossil; fossil up; fossil uv sync
@@ -101,11 +102,11 @@
 	cd tgz-$(USER)/ffcall; make CC="gcc -fPIC"; make install	
 
 $(CHICKEN_PREFIX)/bin/sqlite3 : build-$(USER)/sqlite-autoconf-3090200/configure 
 	cd build-$(USER)/sqlite-autoconf-3090200; ./configure --prefix=$(CHICKEN_PREFIX); make; make install
 
-$(CHICKEN_PREFIX)/bin/csi : $(CHICKEN_PREFIX)/bin/sqlite3 $(CHICKEN_PREFIX)/lib/libiupweb.so $(CHICKEN_PREFIX)/chicken-4.13.0/LICENSE
+$(CHICKEN_PREFIX)/bin/csi : $(SQLITE3_DEP) $(CHICKEN_PREFIX)/lib/libiupweb.so $(CHICKEN_PREFIX)/chicken-4.13.0/LICENSE
 	cd build-$(USER)/chicken-4.13.0;make PLATFORM=linux PREFIX=$(CHICKEN_PREFIX) 
 	cd build-$(USER)/chicken-4.13.0;make PLATFORM=linux PREFIX=$(CHICKEN_PREFIX) install
 
 ALL_CKBIN=chicken chicken-bind chicken-bug chicken-dump			\
 chicken-install chicken-profile chicken-sqlite3 chicken-status		\
@@ -112,35 +113,56 @@
 chicken-uninstall csc csi feathers nanocat sqlite3 vacuumdb logpro	\
 refdb
 
 CKBIN_WRAPPERS=$(addprefix $(PREFIX)/bin/,$(ALL_CKBIN))
 
-$(PREFIX)/bin/% : $(CHICKEN_PREFIX)/bin/% $(CHICKEN_PREFIX)/bin/csi $(EGGSTARG2)
+$(PREFIX)/bin/% : $(CHICKEN_PREFIX)/bin/% $(CHICKEN_PREFIX)/bin/csi
 	utils/mk_wrapper_tool $(PREFIX) $* $(PREFIX)/bin/$*
 	chmod a+x $(PREFIX)/bin/$*
 
 $(PREFIX)/bin :
 	mkdir -p $(PREFIX)/bin $(CHICKEN_PREFIX)/bin
 
-chicken : $(PREFIX)/bin $(CHICKEN_PREFIX)/bin/csi  binwrappers
+# For the future -   binwrappers
+chicken : $(PREFIX)/bin $(CHICKEN_PREFIX)/bin/csi  postgresql.done nanomsg.done iup.done canvas-draw.done sqlite3.done sql-de-lite.done dbi.done  $(EGGSTARG2)
 	@echo "Fake target to build prefix chicken"
 
 binwrappers : $(CKBIN_WRAPPERS)
 
-postgresql.done : $(CHICKEN_PREFIX)/bin/pg_config
+# make the dep a dummy if not requiring our own build of postgres
+ifeq ($(BUILD_POSTGRES),yes)
+PG_DEP=$(CHICKEN_PREFIX)/bin/pg_config
+else
+PG_DEP=$(CHICKEN_PREFIX)/bin/csi
+endif
+
+postgresql.done : $(PG_DEP)
 	CSC_OPTIONS="-I$(CHICKEN_PREFIX)/include -L$(CHICKEN_PREFIX)/lib -L$(CHICKEN_PREFIX)/lib64" $(CHICKEN_PREFIX)/bin/chicken-install postgresql > postgresql.done
 
-nanomsg.done : $(CHICKEN_PREFIX)/lib/libnanomsg.so
+ifeq ($(BUILD_NANOMSG),yes)
+NMSG_DEP=$(CHICKEN_PREFIX)/lib/libnanomsg.so
+else
+NMSG_DEP=$(CHICKEN_PREFIX)/bin/csi
+endif
+
+nanomsg.done : $(NMSG_DEP)
 	CSC_OPTIONS="-I$(CHICKEN_PREFIX)/include -L$(CHICKEN_PREFIX)/lib -L$(CHICKEN_PREFIX)/lib64" $(CHICKEN_PREFIX)/bin/chicken-install nanomsg > nanomsg.done
 
 iup.done : $(CHICKEN_PREFIX)/lib/libcallback.a
 	CSC_OPTIONS="-I$(CHICKEN_PREFIX)/include -L$(CHICKEN_PREFIX)/lib" $(CHICKEN_PREFIX)/bin/chicken-install -D no-library-checks -feature disable-iup-web -feature disable-iup-pplot -feature disable-iup-matrixex iup > iup.done
 
 canvas-draw.done :
 	CSC_OPTIONS="-I$(CHICKEN_PREFIX)/include -L$(CHICKEN_PREFIX)/lib" $(CHICKEN_PREFIX)/bin/chicken-install -D no-library-checks canvas-draw > canvas-draw.done
 
-sqlite3.done :
+# make the dep a dummy if not requiring our own build of postgres
+ifeq ($(BUILD_SQLITE3),yes)
+SQLITE3_DEP=$(CHICKEN_PREFIX)/bin/sqlite3
+else
+SQLITE3_DEP=$(CHICKEN_PREFIX)/bin/csi
+endif
+
+sqlite3.done : $(SQLITE3_DEP)
 	CSC_OPTIONS="-I$(CHICKEN_PREFIX)/include -L$(CHICKEN_PREFIX)/lib" $(CHICKEN_PREFIX)/bin/chicken-install sqlite3 > sqlite3.done
 
 sql-de-lite.done :
 	CSC_OPTIONS="-I$(CHICKEN_PREFIX)/include -L$(CHICKEN_PREFIX)/lib" $(CHICKEN_PREFIX)/bin/chicken-install sql-de-lite > sql-de-lite.done
 

Index: common.scm
==================================================================
--- common.scm
+++ common.scm
@@ -1047,11 +1047,11 @@
 	     ((dbr:dbstruct-read-only dbstruct)
 	      (debug:print-info 13 *default-log-port* "loading read-only watchdog")
 	      (common:readonly-watchdog dbstruct))
 	     (else
 	      (debug:print-info 13 *default-log-port* "loading writable-watchdog.")
-              (let* ((syncer (or (configf:lookup *configdat* "server" "sync-method") "brute-force-sync")))
+              (let* ((syncer (or (configf:lookup *configdat* "server" "sync-method") "delta-sync"))) ;; "brute-force-sync")))
                 (cond
                  ((equal? syncer "brute-force-sync")
                   (server:writable-watchdog-bruteforce dbstruct))
                  ((equal? syncer "delta-sync")
                   (server:writable-watchdog-deltasync dbstruct))
@@ -2638,10 +2638,12 @@
                    ("mode-patt" . "-modepatt")
                    ("run-name"  . "-runname")
                    ("contour"   . "-contour")
                    ("target"    . "-target")
                    ("test-patt" . "-testpatt")
+		   ("rerun"     . "-rerun")
+		   ("setvars"   . "-setvars")
                    ("msg"       . "-m")
                    ("log"       . "-log")
                    ("start-dir" . "-start-dir")
                    ("new"       . "-set-state-status"))))
     (if (eq? flavor 'switch-symbol)
@@ -2716,10 +2718,39 @@
     (hash-table-for-each
      vars
      (lambda (var val)
        (setenv var val)))
     vars))
+
+(define (common:propogate-mt-vars-to-subrun proc propogate-vars)
+  (let ((vars      (make-hash-table))
+        (var-patt  "^MT_.*"))
+    (for-each
+     (lambda (vardat) ;; each env var
+       ;(for-each
+      ;(lambda (var-patt)
+	  (if (string-match var-patt (car vardat))
+	      (let ((var (car vardat))
+		    (val (cdr vardat)))
+		(hash-table-set! vars var val)
+                 (if (member var propogate-vars)
+                  (begin
+                  (print var "  " (string-substitute "MT_" "PARENT_" var))
+                  (setenv (string-substitute "MT_" "PARENT_"  var) val)))
+		(unsetenv var))))
+;	var-patts))
+     (get-environment-variables))
+    (cond
+     ((string? proc)(system proc))
+     (proc          (proc)))
+    (hash-table-for-each
+     vars
+     (lambda (var val)
+         (if (member var propogate-vars)
+         (unsetenv (string-substitute "MT_" "PARENT_" var)))  
+       (setenv var val)))
+    vars))
 
 
 (define (common:run-a-command cmd #!key (with-vars #f) (with-orig-env #f))
   (let* ((pre-cmd  (dtests:get-pre-command))
          (post-cmd (dtests:get-post-command))

Index: configure
==================================================================
--- configure
+++ configure
@@ -71,11 +71,11 @@
     ARCHSTR=$(/usr/bin/sw_vers -productVersion)
 else
     ARCHSTR=$(lsb_release -sr)
 fi
 
-echo "CHICKEN_PREFIX=$PREFIX/.$ARCHSTR" >> makefile.inc
+echo "CKPATH=$PREFIX/.$ARCHSTR" >> makefile.inc
 CHICKEN_PREFIX=$PREFIX/bin/.$ARCHSTR
 
 if [[ ! $(type csi) ]];then
     echo "Chicken build needed."
     echo "BUILD_CHICKEN=yes" >> makefile.inc

Index: dashboard.scm
==================================================================
--- dashboard.scm
+++ dashboard.scm
@@ -1900,27 +1900,42 @@
                         ;; (set! colnum (+ colnum 1))
                         ))))
 	      run-ids)))
 
 (define (dashboard:tests-ht->tests-dat tests-ht)
-  (reverse
-   (sort
-    (hash-table-values tests-ht)
-    (lambda (a b) 
-      (let ((a-test-name  (db:test-get-testname a))
-            (a-item-path  (db:test-get-item-path a))
-            (b-test-name  (db:test-get-testname b))
-            (b-item-path  (db:test-get-item-path b))
-            (a-event-time (db:test-get-event_time a))
-            (b-event-time (db:test-get-event_time b)))
-        (if (not (equal? a-test-name b-test-name))
-            (> a-event-time b-event-time)
-            (cond
-             ((< 0 (string-compare3 a-test-name b-test-name)) #t)
-             ((> 0 (string-compare3 a-test-name b-test-name)) #f)
-             ((< 0 (string-compare3 a-item-path b-item-path)) #t)
-             (else #f))))))))
+  (let ((oldest-item (make-hash-table))) ;;
+    ;; populate the oldest-item table
+    (for-each
+     (lambda (tdat)
+       (let ((tname (db:test-get-testname tdat))
+	     (etime (db:test-get-event_time tdat)))
+	 (if (hash-table-exists? oldest-item tname)
+	     (if (< (hash-table-ref oldest-item tname) etime)
+		 (hash-table-set! oldest-item tname etime))
+	     (hash-table-set! oldest-item tname etime))))
+     (hash-table-values tests-ht))
+    (reverse
+     (sort
+      (hash-table-values tests-ht)
+      (lambda (a b) 
+	(let ((a-test-name  (db:test-get-testname a))
+	      (a-item-path  (db:test-get-item-path a))
+	      (b-test-name  (db:test-get-testname b))
+	      (b-item-path  (db:test-get-item-path b))
+	      (a-event-time (db:test-get-event_time a))
+	      (b-event-time (db:test-get-event_time b)))
+	  (if (equal? a-test-name b-test-name)
+	      (> a-event-time b-event-time)
+	      (> (hash-table-ref oldest-item a-test-name)
+		 (hash-table-ref oldest-item b-test-name)))))))))
+;;	  (if (not (equal? a-test-name b-test-name))
+;;	      (> a-event-time b-event-time)
+;;	      (cond
+;;	       ((< 0 (string-compare3 a-test-name b-test-name)) #t)
+;;	       ((> 0 (string-compare3 a-test-name b-test-name)) #f)
+;;	       ((< 0 (string-compare3 a-item-path b-item-path)) #t)
+;;	       (else #f)))))))))
 
 
 (define (dashboard:run-id->tests-mindat run-id tabdat runs-hash)
   (let* ((run          (hash-table-ref/default runs-hash run-id #f))
          (key-vals     (rmt:get-key-vals run-id))
@@ -2015,10 +2030,12 @@
                     (iup:attribute-set! run-matrix "NUMCOL" max-col ))
 
                 (let ((effective-max-row (if (< max-row max-visible) max-visible max-row)))
                   (if (> effective-max-row (string->number (iup:attribute run-matrix "NUMLIN")))
                       (iup:attribute-set! run-matrix "NUMLIN" effective-max-row )))
+
+		(iup:attribute-set! run-matrix "WIDTHDEF" 16)
                 
                 ;; Row labels
                 (for-each (lambda (ind)
                             (let* ((name (car ind))
                                    (num  (cadr ind))
@@ -2054,20 +2071,20 @@
                                     (iup:attribute-set! run-matrix key (cadr value))
                                     (iup:attribute-set! run-matrix (conc "BGCOLOR" key) (car value))))))
                           matrix-content)
                 
                 ;; Col labels - do after setting Cell contents so they are accounted for in the size calc.
-                
+
                 (for-each (lambda (ind)
                             (let* ((name (car ind))
                                    (num  (cadr ind))
                                    (key  (conc "0:" num)))
                               (if (not (equal? (iup:attribute run-matrix key) name))
                                   (begin
                                     (set! changed #t)
-                                    (iup:attribute-set! run-matrix key name)
-                                    (if (<= num max-col)
+                                    (iup:attribute-set! run-matrix key name) ;; (list->string (intersperse (string->list name) #\newline))) ;; name)
+				    #;(if (<= num max-col)
                                         (iup:attribute-set! run-matrix "FITTOTEXT" (conc "C" num)))))))
                           col-indices)
                 
                 (if (and (eq? pass-num 0) changed)
                     (loop 1 #t)) ;; force second pass due to column labels changing

Index: db.scm
==================================================================
--- db.scm
+++ db.scm
@@ -1776,10 +1776,33 @@
           #f
           )
         (with-input-from-file infile read-lines)
 	)))
 
+;; check duration against test-run.dat file if it exists and update the value in
+;; the db if necessary
+;;
+(define (db:adjust-run-duration dbstruct test-id run-dir event-time run-duration)
+  (let* ((datf             (conc run-dir ".mt_data/test-run.dat"))
+	 (modt             (if (and (file-exists? datf)
+				    (file-read-access? datf))
+			       (file-modification-time datf)
+			       #f)) ;; (+ event-time run-duration))))
+	 (alt-run-duration (if modt
+			       (- modt event-time)
+			       #f)))
+    (if (and alt-run-duration
+	     (> alt-run-duration run-duration))
+	(begin
+	  (debug:print 0 *default-log-port* "Test " test-id " run duration mismatch. Setting to " alt-run-duration)
+	  (db:with-db
+	   dbstruct #f #f
+	   (lambda (db)
+	     (sqlite3:execute db "UPDATE tests SET run_duration=? WHERE id=?;" alt-run-duration test-id)
+	     #t)))
+	#f))) ;; #f = we did NOT adjust the time
+	      
 ;;  select end_time-now from
 ;;      (select testname,item_path,event_time+run_duration as
 ;;                          end_time,strftime('%s','now') as now from tests where state in
 ;;      ('RUNNING','REMOTEHOSTSTART','LAUNCHED'));
 
@@ -1828,37 +1851,39 @@
 	 ;;                     (db:test-get-run_duration testdat)))
 	 ;;                    600) 
 	 ;; (db:delay-if-busy dbdat)
 	 (sqlite3:for-each-row 
 	  (lambda (test-id run-dir uname testname item-path event-time run-duration)
-	    (if (and (equal? uname "n/a")
-		     (equal? item-path "")) ;; this is a toplevel test
-		;; what to do with toplevel? call rollup?
-		(begin
-		  (set! toplevels   (cons (list test-id run-dir uname testname item-path run-id) toplevels))
-		  (debug:print-info 0 *default-log-port* "Found old toplevel test in RUNNING state, test-id=" test-id))
-		(begin
-		  (set! incompleted (cons (list test-id run-dir uname testname item-path run-id) incompleted))
-		  (debug:print-info 0 *default-log-port* "Found old test in RUNNING state, test-id="
-				    test-id" exceeded running-deadtime "running-deadtime" now="(current-seconds)
-				    " event-time="event-time" run-duration="run-duration))))
+	    (if (not (db:adjust-run-duration dbstruct test-id run-dir event-time run-duration))
+		(if (and (equal? uname "n/a")
+			 (equal? item-path "")) ;; this is a toplevel test
+		    ;; what to do with toplevel? call rollup?
+		    (begin
+		      (set! toplevels   (cons (list test-id run-dir uname testname item-path run-id) toplevels))
+		      (debug:print-info 0 *default-log-port* "Found old toplevel test in RUNNING state, test-id=" test-id))
+		    (begin
+		      (set! incompleted (cons (list test-id run-dir uname testname item-path run-id) incompleted))
+		      (debug:print-info 0 *default-log-port* "Found old test in RUNNING state, test-id="
+					test-id" exceeded running-deadtime "running-deadtime" now="(current-seconds)
+					" event-time="event-time" run-duration="run-duration)))))
 	  stmth1
 	  run-id running-deadtime) ;; default time 720 seconds
-       
+	    
 	 (sqlite3:for-each-row 
 	  (lambda (test-id run-dir uname testname item-path event-time run-duration)
-	    (if (and (equal? uname "n/a")
-		     (equal? item-path "")) ;; this is a toplevel test
-		;; what to do with toplevel? call rollup?
-		(begin
-		  (set! toplevels   (cons (list test-id run-dir uname testname item-path run-id) toplevels))
-		  (debug:print-info 0 *default-log-port* "Found old toplevel test in RUNNING state, test-id=" test-id))
-		(begin
-		  (debug:print-info 0 *default-log-port* "Found old test in REMOTEHOSTSTART state, test-id=" test-id
-				    " exceeded running-deadtime "running-deadtime" now="(current-seconds)" event-time="event-time
-				    " run-duration="run-duration)
-		  (set! incompleted (cons (list test-id run-dir uname testname item-path run-id) incompleted)))))
+	    (if (not (db:adjust-run-duration dbstruct test-id run-dir event-time run-duration))
+		(if (and (equal? uname "n/a")
+			 (equal? item-path "")) ;; this is a toplevel test
+		    ;; what to do with toplevel? call rollup?
+		    (begin
+		      (set! toplevels   (cons (list test-id run-dir uname testname item-path run-id) toplevels))
+		      (debug:print-info 0 *default-log-port* "Found old toplevel test in RUNNING state, test-id=" test-id))
+		    (begin
+		      (debug:print-info 0 *default-log-port* "Found old test in REMOTEHOSTSTART state, test-id=" test-id
+					" exceeded running-deadtime "running-deadtime" now="(current-seconds)" event-time="event-time
+					" run-duration="run-duration)
+		      (set! incompleted (cons (list test-id run-dir uname testname item-path run-id) incompleted))))))
 	  stmth2
 	  run-id remotehoststart-deadtime) ;; default time 230 seconds
 	 
 	 ;; in LAUNCHED for more than one day. Could be long due to job queues TODO/BUG: Need override for this in config
 	 ;;
@@ -3268,21 +3293,19 @@
 				       test-id))))))
   (mt:process-triggers dbstruct run-id test-id newstate newstatus))
 
 ;; NEW BEHAVIOR: Count tests running in all runs!
 ;;
-(define (db:get-count-tests-running dbstruct run-id fastmode)
-  (let* ((qry (if fastmode
-		  "SELECT count(id) FROM tests WHERE state in ('RUNNING','LAUNCHED','REMOTEHOSTSTART') AND NOT (uname = 'n/a' AND item_path = '') LIMIT 1;"
-		  "SELECT count(id) FROM tests WHERE state in ('RUNNING','LAUNCHED','REMOTEHOSTSTART') AND NOT (uname = 'n/a' AND item_path = '');")))
-  (db:with-db
-   dbstruct
-   run-id
-   #f
-   (lambda (db)
-     (let* ((stmth (db:get-cache-stmth dbstruct  db qry)))
-       (sqlite3:first-result stmth))))))
+(define (db:get-count-tests-running dbstruct run-id)
+  (let* ((qry  "SELECT count(id) FROM tests WHERE state in ('RUNNING','LAUNCHED','REMOTEHOSTSTART') AND NOT (uname = 'n/a' AND item_path = '');"))
+    (db:with-db
+     dbstruct
+     run-id
+     #f
+     (lambda (db)
+       (let* ((stmth (db:get-cache-stmth dbstruct db qry)))
+	 (sqlite3:first-result stmth))))))
 
 ;; NEW BEHAVIOR: Count tests running in only one run!
 ;;
 (define (db:get-count-tests-actually-running dbstruct run-id)
   (db:with-db
@@ -3298,21 +3321,23 @@
       run-id)))) ;; NOT IN (SELECT id FROM runs WHERE state='deleted');")
 
 ;; NEW BEHAVIOR: Look only at single run with run-id
 ;; 
 ;; (define (db:get-running-stats dbstruct run-id)
-(define (db:get-count-tests-running-for-run-id dbstruct run-id fastmode)
-  (let* ((qry (if fastmode
-		  "SELECT count(id) FROM tests WHERE state in ('RUNNING','LAUNCHED','REMOTEHOSTSTART') AND run_id=? LIMIT 1;"
-		  "SELECT count(id) FROM tests WHERE state in ('RUNNING','LAUNCHED','REMOTEHOSTSTART') AND run_id=?;")))
+(define (db:get-count-tests-running-for-run-id dbstruct run-id)
+  (let* ((qry "SELECT count(id) FROM tests WHERE state in ('RUNNING','LAUNCHED','REMOTEHOSTSTART') AND run_id=?;"))
     (db:with-db
      dbstruct
      run-id
      #f
      (lambda (db)
        (let* ((stmth (db:get-cache-stmth dbstruct db qry)))
-	 (sqlite3:first-result stmth run-id))))))
+	 (sqlite3:fold-row
+	  (lambda (res val) val)
+	  0 stmth run-id))))))
+
+;;	 (sqlite3:first-result stmth run-id))))))
 
 ;; For a given testname how many items are running? Used to determine
 ;; probability for regenerating html
 ;;
 (define (db:get-count-tests-running-for-testname dbstruct run-id testname)
@@ -3321,23 +3346,23 @@
    run-id
    #f
    (lambda (db)
      (let* ((stmt "SELECT count(id) FROM tests WHERE state in ('RUNNING','LAUNCHED','REMOTEHOSTSTART') AND run_id=? AND NOT (uname = 'n/a' AND item_path = '') AND testname=?;")
 	    (stmth (db:get-cache-stmth dbstruct db stmt)))
-       (sqlite3:first-result
-	stmth run-id testname)))))
+       (sqlite3:fold-row
+	(lambda (res val) val) 0 stmth run-id testname)))))
 
 (define (db:get-not-completed-cnt dbstruct run-id)
-(db:with-db
+  (db:with-db
    dbstruct
    run-id
    #f
    (lambda (db)
-      ;(print "SELECT count(id) FROM tests WHERE state not in ('COMPLETED', 'DELETED') AND run_id=" run-id)  
-     (sqlite3:first-result
-      db
-      "SELECT count(id) FROM tests WHERE state not in ('COMPLETED', 'DELETED') AND run_id=?;" run-id))))
+     (let* ((stmt "SELECT count(id) FROM tests WHERE state not in ('COMPLETED', 'DELETED') AND run_id=?;"))
+       (sqlite3:fold-row
+	(lambda (res val) val)
+	0 (db:get-cache-stmth dbstruct db stmt) run-id)))))
 
 (define (db:get-count-tests-running-in-jobgroup dbstruct run-id jobgroup)
   (if (not jobgroup)
       0 ;; 
       (let ((testnames '()))
@@ -3520,11 +3545,11 @@
      (lambda (run-id)
        (let ((testrecs (db:get-all-tests-info-by-run-id mtdb run-id)))
 	 (db:prep-megatest.db-adj-test-ids (db:dbdat-get-db mtdb) run-id testrecs)))
      run-ids)))
 
-;; Get test data using test_id, run-id is not used
+;; Get test data using test_id, run-id is not used - but it will be!
 ;; 
 (define (db:get-test-info-by-id dbstruct run-id test-id)
   (db:with-db
    dbstruct
    #f ;; run-id

Index: dcommon.scm
==================================================================
--- dcommon.scm
+++ dcommon.scm
@@ -953,14 +953,13 @@
   (+ yoffset (* y scalef)))
 
 ;; sizex, sizey     - canvas size
 ;; originx, originy - canvas origin
 ;;
-(define (dcommon:initial-draw-tests cnv xadj yadj sizex sizey sizexmm sizeymm originx originy tests-draw-state sorted-testnames test-records)
-  (let* ((dot-data ;; (map cdr (filter
-		   ;; 	  (lambda (x)(equal? "node" (car x)))
-	  (map string-split (tests:lazy-dot test-records "plain" sizex sizey))) ;; (tests:easy-dot test-records "plain")))
+(define (dcommon:initial-draw-tests cnv xadj yadj sizex sizey sizexmm sizeymm originx originy
+				    tests-draw-state sorted-testnames test-records)
+  (let* ((dot-data       (tests:lazy-dot test-records "plain" sizex sizey 'munged))
 	 (xoffset	 (dcommon:get-xoffset tests-draw-state sizex xadj))
 	 (yoffset        (dcommon:get-yoffset tests-draw-state sizey yadj))
 	 (no-dot         (configf:lookup *configdat* "setup" "nodot"))
 	 (boxh           15)
 	 (boxw           10)

Index: docs/manual/megatest_manual.html
==================================================================
--- docs/manual/megatest_manual.html
+++ docs/manual/megatest_manual.html
@@ -2420,10 +2420,25 @@
 <pre>[setup]
 # this will automatically kill the test if it runs for more than 1h 2m and 3s
 runtimelim 1h 2m 3s</pre>
 </div></div>
 </div>
+<div class="sect4">
+<h5 id="_post_run_hook">Post Run Hook</h5>
+<div class="paragraph"><p>This runs script to-run.sh after all tests have been completed. It is
+not necessary to use -run-wait as each test will check for other
+running tests on completion and if there are none it will call the
+post run hook.</p></div>
+<div class="paragraph"><p>Note that the output from the script call will be placed in a log file
+in the logs directory with a file name derived by replacing / with _
+in post-hook-&lt;target&gt;-&lt;runname&gt;.log.</p></div>
+<div class="listingblock">
+<div class="content monospaced">
+<pre>[runs]
+post-hook /path/to/script/to-run.sh</pre>
+</div></div>
+</div>
 </div>
 </div>
 <div class="sect2">
 <h3 id="_tests_browser_view">Tests browser view</h3>
 <div class="paragraph"><p>The tests browser (see the Run Control tab on the dashboard) has two views for displaying the tests.</p></div>
@@ -2996,10 +3011,21 @@
 </div></div>
 </div>
 </div>
 <div class="sect2">
 <h3 id="_ezsteps">Ezsteps</h3>
+<div class="paragraph"><p>Ezsteps is the recommended way to implement tests and automation in
+Megatest.</p></div>
+<div class="admonitionblock">
+<table><tr>
+<td class="icon">
+<img src="/nfs/pdx/disks/ice.disk.002/icfadm/pkgs/asciidoc/8.6.7/images/icons/note.png" alt="Note">
+</td>
+<td class="content">Each ezstep must be a single line. Use the [scripts] mechanism
+to create multiline scripts (see example below).</td>
+</tr></table>
+</div>
 <div class="listingblock">
 <div class="title">Example ezsteps with logpro rules</div>
 <div class="content monospaced">
 <pre>[ezsteps]
 lookittmp   ls /tmp
@@ -3007,16 +3033,72 @@
 [logpro]
 lookittmp ;; Note: config file format supports multi-line entries where leading whitespace is removed from each line
   ;;     a blank line indicates the end of the block of text
   (expect:required in "LogFileBody" &gt; 0 "A file name that should never exist!" #/This is a awfully stupid file name that should never be found in the temp dir/)</pre>
 </div></div>
-<div class="paragraph"><p>To transfer the environment to the next step you can do the following:</p></div>
+<div class="sect3">
+<h4 id="_automatic_environment_propagation_with_ezsteps">Automatic environment propagation with Ezsteps</h4>
+<div class="paragraph"><p>Turn on ezpropvars and environment variables will be propagated from
+step to step. Use this to source script files that modify the
+envionment where the modifications are needed in subsequent steps.</p></div>
+<div class="admonitionblock">
+<table><tr>
+<td class="icon">
+<img src="/nfs/pdx/disks/ice.disk.002/icfadm/pkgs/asciidoc/8.6.7/images/icons/note.png" alt="Note">
+</td>
+<td class="content">aliases and variables with strange whitespace or characters will
+not propagate correctly. Put in a ticket on the
+<a href="http://www.kiatoa.com/fossils/megatest">http://www.kiatoa.com/fossils/megatest</a> site if you need support for a
+specific strange character combination.</td>
+</tr></table>
+</div>
+<div class="listingblock">
+<div class="title">Turn on auto propagate for bash</div>
+<div class="content monospaced">
+<pre>[setup]
+ezpropvars sh</pre>
+</div></div>
+<div class="listingblock">
+<div class="title">Write your ezsteps. The loadenv.csh step will use /bin/csh as its shell, other steps will use bash.</div>
+<div class="content monospaced">
+<pre>[ezsteps]
+loadenv.csh source $REF/ourenviron.csh
+compile make
+install make install</pre>
+</div></div>
+<div class="paragraph"><p>Bash and csh are supported. You can override the shell binary location
+from the default /bin/bash and /bin/csh if needed.</p></div>
+<div class="listingblock">
+<div class="title">Turn on auto propagate for csh</div>
+<div class="content monospaced">
+<pre>[setup]
+ezpropvars csh /bin/csh</pre>
+</div></div>
+<div class="listingblock">
+<div class="title">Example of auto propagation using extensions</div>
+<div class="content monospaced">
+<pre>[ezsteps]
+step1.sh export SOMEVAR=$(ps -def | wc -l);ls /tmp
+# The next step will get the value of $SOMEVAR from step1.sh
+step2.sh echo $SOMEVAR</pre>
+</div></div>
 <div class="listingblock">
-<div class="title">Propagate environment to next step</div>
+<div class="title">Example of multi-line script</div>
 <div class="content monospaced">
-<pre>$MT_MEGATEST -env2file .ezsteps/${stepname}</pre>
+<pre>[scripts]
+tarresults tar cfvz $DEST/srcdir1.tar.gz srcdir1
+  tar cfvz $DEST/srcdir2.tar.gz srcdir2
+
+[setup]
+ezpropvars sh
+
+[ezsteps]
+step1 DEST=/tmp/targz;source tarresults</pre>
 </div></div>
+<div class="paragraph"><p>The above example will result in files; tarresults and ez_step1 being
+created in the test dir.</p></div>
+</div>
 </div>
 <div class="sect2">
 <h3 id="_scripts">Scripts</h3>
 <div class="listingblock">
 <div class="title">Specifying scripts inline (best used for only simple scripts)</div>
@@ -3484,10 +3566,10 @@
 </div>
 <div id="footnotes"><hr></div>
 <div id="footer">
 <div id="footer-text">
 Version 1.5<br>
-Last updated 2020-09-08 08:39:29 PDT
+Last updated 2020-10-13 21:24:33 PDT
 </div>
 </div>
 </body>
 </html>

Index: docs/manual/reference.txt
==================================================================
--- docs/manual/reference.txt
+++ docs/manual/reference.txt
@@ -205,10 +205,27 @@
 [setup]
 # this will automatically kill the test if it runs for more than 1h 2m and 3s
 runtimelim 1h 2m 3s
 -----------------
 
+Post Run Hook
++++++++++++++
+
+This runs script to-run.sh after all tests have been completed. It is
+not necessary to use -run-wait as each test will check for other
+running tests on completion and if there are none it will call the
+post run hook.
+
+Note that the output from the script call will be placed in a log file
+in the logs directory with a file name derived by replacing / with _
+in post-hook-<target>-<runname>.log.
+
+-------------------
+[runs]
+post-hook /path/to/script/to-run.sh
+-------------------
+
 Tests browser view
 ~~~~~~~~~~~~~~~~~~
 
 The tests browser (see the Run Control tab on the dashboard) has two views for displaying the tests. 
 
@@ -707,10 +724,16 @@
 ---------------------------
 
 Ezsteps
 ~~~~~~~
 
+Ezsteps is the recommended way to implement tests and automation in
+Megatest.
+
+NOTE: Each ezstep must be a single line. Use the [scripts] mechanism
+to create multiline scripts (see example below).
+
 .Example ezsteps with logpro rules
 -----------------
 [ezsteps]
 lookittmp   ls /tmp
 
@@ -719,16 +742,68 @@
   ;;     a blank line indicates the end of the block of text 
   (expect:required in "LogFileBody" > 0 "A file name that should never exist!" #/This is a awfully stupid file name that should never be found in the temp dir/)
 
 -----------------
 
-To transfer the environment to the next step you can do the following:
+Automatic environment propagation with Ezsteps
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-.Propagate environment to next step
-----------------------------
-$MT_MEGATEST -env2file .ezsteps/${stepname}	  
-----------------------------
+Turn on ezpropvars and environment variables will be propagated from
+step to step. Use this to source script files that modify the
+envionment where the modifications are needed in subsequent steps.
+
+NOTE: aliases and variables with strange whitespace or characters will
+not propagate correctly. Put in a ticket on the
+http://www.kiatoa.com/fossils/megatest site if you need support for a
+specific strange character combination.
+
+.Turn on auto propagate for bash
+---------------------------
+[setup]
+ezpropvars sh
+---------------------------
+
+.Write your ezsteps. The loadenv.csh step will use /bin/csh as its shell, other steps will use bash.
+---------------------------
+[ezsteps]
+loadenv.csh source $REF/ourenviron.csh
+compile make
+install make install
+---------------------------
+
+Bash and csh are supported. You can override the shell binary location
+from the default /bin/bash and /bin/csh if needed.
+
+.Turn on auto propagate for csh
+---------------------------
+[setup]
+ezpropvars csh /bin/csh
+---------------------------
+
+.Example of auto propagation using extensions
+---------------------------
+[ezsteps]
+step1.sh export SOMEVAR=$(ps -def | wc -l);ls /tmp
+# The next step will get the value of $SOMEVAR from step1.sh
+step2.sh echo $SOMEVAR
+---------------------------
+
+.Example of multi-line script
+---------------------------
+[scripts]
+tarresults tar cfvz $DEST/srcdir1.tar.gz srcdir1
+  tar cfvz $DEST/srcdir2.tar.gz srcdir2
+
+[setup]
+ezpropvars sh
+
+[ezsteps]
+step1 DEST=/tmp/targz;source tarresults
+---------------------------
+
+The above example will result in files; tarresults and ez_step1 being
+created in the test dir.
 
 Scripts
 ~~~~~~~
 
 .Specifying scripts inline (best used for only simple scripts)

Index: env.scm
==================================================================
--- env.scm
+++ env.scm
@@ -21,20 +21,22 @@
 (declare (unit env))
 
 (use sql-de-lite) ;; srfi-1 posix regex regex-case srfi-69 srfi-18 call-with-environment-variables)
 
 (define (env:open-db fname)
-  (let* ((db-exists (common:file-exists? fname))
+  (let* ((db-exists (if (equal? fname ":memory:")
+			#f
+			(common:file-exists? fname)))
 	 (db        (open-database fname)))
     (if (not db-exists)
 	(begin
-	  (exec (sql db "CREATE TABLE envvars (
-                    id INTEGER PRIMARY KEY,
-                    context TEXT NOT NULL,
-                    var TEXT NOT NULL,
-                    val TEXT NOT NULL,
-                       CONSTRAINT envvars_constraint UNIQUE (context,var))"))))
+	  (exec (sql db "CREATE TABLE IF NOT EXISTS envvars (
+                            id INTEGER PRIMARY KEY,
+                            context TEXT NOT NULL,
+                            var TEXT NOT NULL,
+                            val TEXT NOT NULL,
+                               CONSTRAINT envvars_constraint UNIQUE (context,var))"))))
     (set-busy-handler! db (busy-timeout 10000))
     db))
 
 ;; save vars in given context, this is NOT incremental by default
 ;;
@@ -77,10 +79,33 @@
 				  val)))))
 	(sql db "SELECT var,val FROM envvars WHERE context=?")
 	context))
      contexts)
     result))
+
+;; envdelta: a-b (start=a, end=b, get the delta)
+;; ofile:    #f = write to stdout, else write to file with string name
+;;
+(define (env:envdelta db envdelta ofile)
+  (let ((match (string-split envdelta "-")));; (string-match "([a-z0-9_]+)=([a-z0-9_\\-,]+)" envdelta)))
+    (if (not (null? match))
+	(let* ((parts     match) ;; (string-split equn "-"))
+	       (minuend   (car parts))
+	       (subtraend (cadr parts))
+	       (added     (env:get-added   db minuend subtraend))
+	       (removed   (env:get-removed db minuend subtraend))
+	       (changed   (env:get-changed db minuend subtraend)))
+	  ;; (pp (hash-table->alist added))
+	  ;; (pp (hash-table->alist removed))
+	  ;; (pp (hash-table->alist changed))
+	  (if (args:get-arg "-o")
+	      (with-output-to-file
+		  (args:get-arg "-o")
+		(lambda ()
+		  (env:print added removed changed)))
+	      (env:print added removed changed)))
+	#f)))
 
 ;;  get list of removed variables between two contexts
 ;;
 (define (env:get-removed db contexta contextb)
   (let ((result (make-hash-table)))

Index: ezsteps.scm
==================================================================
--- ezsteps.scm
+++ ezsteps.scm
@@ -34,21 +34,82 @@
 (include "key_records.scm")
 (include "db_records.scm")
 (include "run_records.scm")
 
 
-;;(rmt:get-test-info-by-id run-id test-id) -> testdat
+(define (ezsteps:step-name->mode stepname)
+  (match (string-search "\\.([^\\.]+)$" stepname)
+    ((_ ext) (string->symbol ext))
+    (else    #f)))
+
+(define (ezsteps:create-step-script envdbf stepname prevstepname mode cmd shellexe)
+  (let* (#;(shebang (case mode
+		    ((sh)  "/bin/sh")
+		    ((csh) "/bin/csh")
+		    (else  "/bin/bash")))
+	 (sourcef (conc ".ezsteps/vars_" prevstepname "." mode))
+	 (scriptn (conc "ez_" stepname))) ;; remember the name already has an extension .sh, .csh etc.
+    (with-output-to-file scriptn
+      (lambda ()
+	;; the shebang line
+	(print "#!" shellexe)
+	;; save the env at start
+	(print "megatest -envcap "stepname"_start "envdbf)
+	;; source vars from previous steps
+	(if (file-exists? sourcef)
+	    (print "source " sourcef))
+	;; run the command
+	(print cmd)
+	(if (eq? mode 'csh)
+	    (print "set ecode=$?")
+	    (print "ecode=$?"))
+	;; save the env at end
+	(print "megatest -envcap "stepname"_end "envdbf)
+	;; write the delta
+	(print "megatest -envdelta "stepname"_start-"stepname"_end -dumpmode bash -o .ezsteps/vars_"stepname".sh "envdbf)
+	(print "megatest -envdelta "stepname"_start-"stepname"_end -dumpmode csh -o .ezsteps/vars_"stepname".csh "envdbf)
+	(print "exit $ecode")))
+    (system (conc "chmod a+x " scriptn))))
+
+(define (ezsteps:get-ezpropvars res) ;; testconfig)
+  ;; (let* ((res (configf:lookup testconfig "setup" "ezpropvars")))
+    (if (string? res)
+	(let* ((dat (string-split res)))
+	  (match dat
+	    ((s shellexe)
+	     (let ((shl (string->symbol s)))
+	       `(,shl . ,shellexe)))
+	    ((s)
+	     (let* ((shl      (string->symbol s))
+		    (shellexe (if (eq? shl 'csh) "/bin/csh" "/bin/bash")))
+	       `(,shl . ,shellexe)))
+	    (else #f)))
+	#f))
 
-;; TODO: deprecate me in favor of ezsteps.scm
+;; NOTE: returns logpro-used?
 ;;
-(define (launch:runstep ezstep run-id test-id exit-info m tal testconfig all-steps-dat)
+(define (launch:runstep ezstep run-id test-id exit-info m tal testconfig all-steps-dat prevstepname envdbf)
   (let* ((stepname       (car ezstep))  ;; do stuff to run the step
+	 (stepmode-n     (ezsteps:step-name->mode stepname))
 	 (stepinfo       (cadr ezstep))
-	;; (let ((info (cadr ezstep)))
-	;; 		   (if (proc? info) "" info)))
-	;; (stepproc       (let ((info (cadr ezstep)))
-	;; 		   (if (proc? info) info #f)))
+	 (shellmode      (ezsteps:get-ezpropvars  (configf:lookup testconfig "setup" "ezpropvars"))) ;; returns '(csh|sh . "/path/to/shell")
+	 (stepmode       (if stepmode-n ;; the .sh or .csh always wins
+			     stepmode-n
+			     (if shellmode
+				 (car shellmode)
+				 #f)))
+	 (shellexe       (if stepmode-n
+			     (case stepmode
+			       ((csh) "/bin/csh")
+			       (else  "/bin/bash"))
+			     (if shellmode
+				 (cdr shellmode)
+				 "/bin/bash")))
+	 ;; (let ((info (cadr ezstep)))
+	 ;; 		   (if (proc? info) "" info)))
+	 ;; (stepproc       (let ((info (cadr ezstep)))
+	 ;; 		   (if (proc? info) info #f)))
 	 (stepparts      (string-match (regexp "^(\\{([^\\}\\{]*)\\}\\s*|)(.*)$") stepinfo))
 	 (stepparams     (if (and (list? stepparts)
 				  (> (length stepparts) 1))
 			     (list-ref stepparts 2)
 			     #f)) ;; for future use, {VAR=1,2,3}, run step for each
@@ -80,13 +141,16 @@
 		     ";;")
 	      (print tconfig-logpro)))
 	  (set! logpro-used #t)))
     
     ;; NB// can safely assume we are in test-area directory
-    (debug:print 4 *default-log-port* "ezsteps:\n stepname: " stepname " stepinfo: " stepinfo " stepparts: " stepparts
+    (debug:print 4 *default-log-port* "ezsteps:\n stepname: " stepname " stepinfo: " stepinfo
+		 " stepparts: " stepparts
 		 " stepparams: " stepparams " stepcmd: " stepcmd)
-    
+
+    (if stepmode (ezsteps:create-step-script envdbf stepname prevstepname stepmode stepcmd shellexe))
+
     ;; ;; first source the previous environment
     ;; (let ((prev-env (conc ".ezsteps/" prevstep (if (string-search (regexp "csh") 
     ;;      							 (get-environment-variable "SHELL")) ".csh" ".sh"))))
     ;;   (if (and prevstep (common:file-exists? prev-env))
     ;;       (set! script (conc script "source " prev-env))))
@@ -98,18 +162,21 @@
     (rmt:teststep-set-status! run-id test-id stepname "start" "-" #f #f)
     ;; now launch the actual process
     (call-with-environment-variables 
      (list (cons "PATH" (conc (get-environment-variable "PATH") ":.")))
      (lambda () ;; (process-run "/bin/bash" "-c" "exec ls -l /tmp/foobar > /tmp/delme-more.log 2>&1")
-       (let* ((cmd (conc stepcmd " > " stepname ".log 2>&1")) ;; >outfile 2>&1 
+       (let* ((cmd (conc (if stepmode
+			     (conc "ez_" stepname)
+			     stepcmd)
+			 " > " stepname ".log 2>&1")) ;; >outfile 2>&1 
 	      (pid #f))
 	 (let ((proc (lambda ()
 		       (set! pid (process-run "/bin/bash" (list "-c" cmd))))))
 	   (if subrun
                (begin
                  (debug:print-info 0 *default-log-port* "Running without MT_.* environment variables.")
-                 (common:without-vars proc "^MT_.*"))
+                 (common:propogate-mt-vars-to-subrun proc '("MT_TARGET" "MT_LINKTREE" "MT_RUNNAME")))
 	       (proc)))
 	 
          (with-output-to-file "Makefile.ezsteps"
            (lambda ()
              (print stepname ".log :")

Index: launch.scm
==================================================================
--- launch.scm
+++ launch.scm
@@ -168,34 +168,41 @@
                     (append (or ezstepslst '())
                             (list (list "subrun" (conc "{subrun=true} " mt-cmd)))))))
 
 	;; process the ezsteps
 	(if ezsteps
-	    (let* ((all-steps-dat (make-hash-table))) ;; keep all the info around as stepname ==> alist; where  'params is the params list (add other stuff as needed)
+	    (let* ((envdbf        (conc "/tmp/."(current-user-name)"-"(current-process-id)"-"run-id"-"test-id".db"))
+		   (all-steps-dat (make-hash-table))) ;; keep all the info around as stepname ==> alist;
+	                                              ;;; where  'params is the params list (add other
+	                                              ;;; stuff as needed)
 	      (if (not (common:file-exists? ".ezsteps"))(create-directory ".ezsteps"))
 	      ;; if ezsteps was defined then we are sure to have at least one step but check anyway
 	      (if (not (> (length ezstepslst) 0))
 		  (debug:print-error 0 *default-log-port* "ezsteps defined but ezstepslst is zero length")
-		  (let loop ((ezstep (car ezstepslst))
-			     (tal    (cdr ezstepslst))
-			     (prevstep #f))
-                    (debug:print-info 0 *default-log-port* "Processing ezstep \"" (string-intersperse ezstep " ") "\"")
-		    ;; check exit-info (vector-ref exit-info 1)
-		    (if (launch:einf-exit-status exit-info) ;; (vector-ref exit-info 1)
-			(let* ((logpro-used (launch:runstep ezstep run-id test-id exit-info m tal testconfig all-steps-dat))
-			       (stepname    (car ezstep))
-			       (stepparms   (hash-table-ref all-steps-dat stepname)))
-			  (setenv "MT_STEP_NAME" stepname)
-			  (pp (hash-table->alist all-steps-dat))
-			  ;; if logpro-used read in the stepname.dat file
-			  (if (and logpro-used (common:file-exists? (conc stepname ".dat")))
-			      (launch:load-logpro-dat run-id test-id stepname))
-			  (if (steprun-good? logpro-used (launch:einf-exit-code exit-info) stepparms)
-			      (if (not (null? tal))
-				  (loop (car tal) (cdr tal) stepname))
-			      (debug:print 0 *default-log-port* "WARNING: step " (car ezstep) " failed. Stopping")))
-			(debug:print 0 *default-log-port* "WARNING: a prior step failed, stopping at " ezstep)))))))))
+		  (let ((all-step-names (map car ezstepslst)))
+		    (setenv "MT_STEP_NAMES" (string-intersperse all-step-names " "))
+		    (let loop ((ezstep (car ezstepslst))
+			       (tal    (cdr ezstepslst))
+			       (prevstep #f))
+		      (debug:print-info 0 *default-log-port* "Processing ezstep \"" (string-intersperse ezstep " ") "\"")
+		      ;; check exit-info (vector-ref exit-info 1)
+		      (if (launch:einf-exit-status exit-info) ;; (vector-ref exit-info 1)
+			  (let* ((logpro-used (launch:runstep ezstep run-id test-id exit-info m
+							      tal testconfig all-steps-dat prevstep envdbf))
+				 (stepname    (car ezstep))
+				 (stepparms   (hash-table-ref all-steps-dat stepname)))
+			    (setenv "MT_STEP_NAME" stepname)
+			    (pp (hash-table->alist all-steps-dat))
+			    ;; if logpro-used read in the stepname.dat file
+			    (if (and logpro-used (common:file-exists? (conc stepname ".dat")))
+				(launch:load-logpro-dat run-id test-id stepname))
+			    (if (steprun-good? logpro-used (launch:einf-exit-code exit-info) stepparms)
+				(if (not (null? tal))
+				    (loop (car tal) (cdr tal) stepname))
+				(debug:print 0 *default-log-port* "WARNING: step " (car ezstep) " failed. Stopping")))
+			  (debug:print 0 *default-log-port* "WARNING: a prior step failed, stopping at " ezstep))
+		      ))))))))
 
 (define (launch:monitor-job run-id test-id item-path fullrunscript ezsteps test-name tconfigreg exit-info m work-area runtlim misc-flags)
   (let* ((update-period (string->number (or (configf:lookup *configdat* "setup" "test-stats-update-period") "30")))
          (start-seconds (current-seconds))
 	 (calc-minutes  (lambda ()
@@ -205,17 +212,19 @@
 			     (current-seconds) 
 			     start-seconds)))))
 	 (kill-tries 0))
     ;; (tests:set-full-meta-info #f test-id run-id (calc-minutes) work-area)
     ;; (tests:set-full-meta-info test-id run-id (calc-minutes) work-area)
-    (tests:set-full-meta-info #f test-id run-id (calc-minutes) work-area 10)
+    (tests:set-full-meta-info #f test-id run-id (calc-minutes) work-area 10 update-db: #t)
 
     (let loop ((minutes   (calc-minutes))
 	       (cpu-load  (alist-ref 'adj-core-load (common:get-normalized-cpu-load #f)))
 	       (disk-free (get-df (current-directory)))
                (last-sync (current-seconds)))
-      (common:telemetry-log "zombie" (conc "launch:monitor-job - top of loop encountered at "(current-seconds)" with last-sync="last-sync))
+      ;; (common:telemetry-log "zombie" (conc "launch:monitor-job -
+      ;; top of loop encountered at "(current-seconds)" with
+      ;; last-sync="last-sync))
       (let* ((over-time     (> (current-seconds) (+ last-sync update-period)))
              (new-cpu-load  (let* ((load  (alist-ref 'adj-core-load (common:get-normalized-cpu-load #f)))
                                    (delta (abs (- load cpu-load))))
                               (if (> delta 0.1) ;; don't bother updating with small changes
                                   load
@@ -233,33 +242,28 @@
              (test-info   (rmt:get-test-info-by-id run-id test-id))
              (state       (db:test-get-state test-info))
              (status      (db:test-get-status test-info))
              (kill-reason  "no kill reason specified")
              (kill-job?    #f))
-        (common:telemetry-log "zombie" (conc "launch:monitor-job - decision time encountered at "(current-seconds)" with last-sync="last-sync" do-sync="do-sync" over-time="over-time" update-period="update-period))
+        #;(common:telemetry-log "zombie" (conc "launch:monitor-job - decision time encountered at "(current-seconds)" with last-sync="last-sync" do-sync="do-sync" over-time="over-time" update-period="update-period))
         (cond
          ((test-get-kill-request run-id test-id)
           (set! kill-reason "KILLING TEST since received kill request (KILLREQ)")
           (set! kill-job? #t))
          ((and runtlim (> (- (current-seconds) start-seconds) runtlim))
           (set! kill-reason (conc "KILLING TEST DUE TO TIME LIMIT EXCEEDED! Runtime=" (- (current-seconds) start-seconds) " seconds, limit=" runtlim))
           (set! kill-job? #t))
          ((equal? status "DEAD")
-          (tests:update-central-meta-info run-id test-id new-cpu-load new-disk-free (calc-minutes) #f #f)
+          (tests:update-central-meta-info run-id test-id new-cpu-load new-disk-free (calc-minutes) #f #f update-db: #t)
           (rmt:set-state-status-and-roll-up-items run-id test-id 'foo "RUNNING" "n/a" "was marked dead; really still running.")
           ;;(set! kill-reason "KILLING TEST because it was marked as DEAD by launch:handle-zombie-tests (might indicate really overloaded server or else overzealous setup.deadtime)") ;; MARK RUNNING
           (set! kill-job? #f)))
 
         (debug:print 4 *default-log-port* "cpu: " new-cpu-load " disk: " new-disk-free " last-sync: " last-sync " do-sync: " do-sync)
         (launch:handle-zombie-tests run-id)
-        (when do-sync
-          ;;(with-output-to-file (conc (getenv "MT_TEST_RUN_DIR") "/last-loadinfo.log" #:append)
-          ;;  (lambda () (pp (list (current-seconds) new-cpu-load new-disk-free (calc-minutes)))))
-          (common:telemetry-log "zombie" (conc  "launch:monitor-job - dosync started at "(current-seconds)))
-          (tests:update-central-meta-info run-id test-id new-cpu-load new-disk-free (calc-minutes) #f #f)
-          (common:telemetry-log "zombie" (conc "launch:monitor-job - dosync finished at "(current-seconds))))
-        
+        (if do-sync ;; save meta data about the running of this test
+	    (tests:update-central-meta-info run-id test-id new-cpu-load new-disk-free (calc-minutes) #f #f))
 	(if kill-job? 
 	    (begin
               (debug:print-info 0 *default-log-port* "proceeding to kill test: "kill-reason)
 	      (mutex-lock! m)
 	      ;; NOTE: The pid can change as different steps are run. Do we need handshaking between this
@@ -312,11 +316,11 @@
 	      (if (hash-table-ref/default misc-flags 'keep-going #f)  ;; keep originals for cpu-load and disk-free unless they change more than the allowed delta
 		  (loop (calc-minutes)
                         (or new-cpu-load cpu-load)
                         (or new-disk-free disk-free)
                         (if do-sync (current-seconds) last-sync)))))))
-    (tests:update-central-meta-info run-id test-id (get-cpu-load) (get-df (current-directory))(calc-minutes) #f #f))) ;; NOTE: Checking twice for keep-going is intentional
+    (tests:update-central-meta-info run-id test-id (get-cpu-load) (get-df (current-directory))(calc-minutes) #f #f update-db: #t))) ;; NOTE: Checking twice for keep-going is intentional
 
 
 (define (launch:execute encoded-cmd)
   (let* ((cmdinfo    (common:read-encoded-string encoded-cmd))
 	 (tconfigreg #f))
@@ -398,11 +402,10 @@
                                             ;; one more time, change to the work-area directory
                                             (change-directory work-area)))
 	       ) ;; let*
 
 	  (if contour (setenv "MT_CONTOUR" contour))
-	  
 	  ;; immediated set some key variables from CMDINFO data, yes, these will be set again below ...
 	  ;;
 	  (setenv "MT_TESTSUITENAME" areaname)
 	  (setenv "MT_RUN_AREA_HOME" top-path)
 	  (set! *toppath* top-path)
@@ -465,10 +468,13 @@
 				  (debug:print 0 *default-log-port* "ERROR: failed to find a record for test-id " test-id ", exiting.")
 				  (exit))))
 		 (test-pid  (db:test-get-process_id  test-info)))
 	    (cond
              ;; -mrw- I'm removing KILLREQ from this list so that a test in KILLREQ state is treated as a "do not run" flag.
+	     ;;((or (member (db:test-get-state test-info) '("INCOMPLETE" "KILLED" "UNKNOWN" "STUCK")) ;; prior run of this test didn't complete, go ahead and try to rerun
+	     ;;	  (and (equal? (db:test-get-state test-info) "COMPLETED")                           ;; completed/abort => rerun if asked
+	     ;;	       (member (db:test-get-status test-info) '("ABORT"))))
 	     ((member (db:test-get-state test-info) '("INCOMPLETE" "KILLED" "UNKNOWN" "STUCK")) ;; prior run of this test didn't complete, go ahead and try to rerun
 	      (debug:print 0 *default-log-port* "INFO: test is INCOMPLETE or KILLED, treat this execute call as a rerun request")
 	      ;; (tests:test-force-state-status! run-id test-id "REMOTEHOSTSTART" "n/a")
 
               (rmt:general-call 'set-test-start-time #f test-id)
@@ -731,11 +737,11 @@
 ;; > 0 RUNNING and test_dead then send KILLREQ ==> COMPLETED
 ;; 0 RUNNING ==> this is actually the first condition, should not get here
 
 (define (launch:end-of-run-check run-id )
     (let* ((not-completed-cnt (rmt:get-not-completed-cnt run-id))  
-           (running-cnt (rmt:get-count-tests-running-for-run-id run-id #f)) ;; fastmode=no
+           (running-cnt (rmt:get-count-tests-running-for-run-id run-id))
            (all-test-launched (rmt:get-var (conc "lunch-complete-" run-id)))
            (current-state (rmt:get-run-state run-id))
            (current-status (rmt:get-run-status run-id)))
      ;;get-vars run-id to query metadata table to check if all completed. if all-test-launched = yes then only not-completed-cnt = 0 means everyting is completed if no entry found in the table do nothing 
      (debug:print 0 *default-log-port* "Running test cnt :" running-cnt)                      

Index: megatest-version.scm
==================================================================
--- megatest-version.scm
+++ megatest-version.scm
@@ -18,6 +18,6 @@
 ;; Always use two or four digit decimal
 ;; 1.01, 1.02...1.10,1.11,1.1101 ... 1.99,2.00..
 
 ;; (declare (unit megatest-version))
 
-(define megatest-version 1.6569)
+(define megatest-version 1.6576)

Index: megatest.scm
==================================================================
--- megatest.scm
+++ megatest.scm
@@ -39,11 +39,11 @@
 (declare (uses mt))
 (declare (uses api))
 (declare (uses tasks)) ;; only used for debugging.
 (declare (uses env))
 (declare (uses diff-report))
-
+(declare (uses mutils))
 (declare (uses adjutant))
 (import adjutant)
 
 (declare (uses mttop))
 (import mttop)
@@ -65,11 +65,13 @@
 
 ;; Added for csv stuff - will be removed
 ;;
 (use sparse-vectors)
 
-(require-library mutils)
+(import mutils)
+
+;;(require-library mutils)
 
 (define *usage-log-file* #f)    ;; put path to file for logging usage in this var in the ~/.megatestrc file
 (define *usage-use-seconds* #t) ;; for Epoc seconds in usage logging change this to #t in ~/.megatestrc file
 
 ;; load the ~/.megatestrc file, put (use trace)(trace-call-sites #t)(trace function-you-want-to-trace) in this file
@@ -535,10 +537,11 @@
          "-show-cmdinfo"
 	 "-cleanup-db"))
        (no-watchdog-args-vals (filter (lambda (x) x)
                                       (map args:get-arg no-watchdog-args)))
        (start-watchdog (null? no-watchdog-args-vals)))
+       ;;(print  "no-watchdog-args="no-watchdog-args "no-watchdog-args-vals="no-watchdog-args-vals " start-watchdog-specail-arg-val:" start-watchdog-specail-arg-val " start-watchdog:" start-watchdog) 
   ;;(BB> "no-watchdog-args="no-watchdog-args "no-watchdog-args-vals="no-watchdog-args-vals) 
   (if start-watchdog
       (thread-start! *watchdog*)))
 
 
@@ -879,11 +882,14 @@
       (let* ((db      (env:open-db (if (null? remargs) "envdat.db" (car remargs)))))
 	(env:save-env-vars db envcap)
 	(env:close-database db)
 	(set! *didsomething* #t))))
 
-;; delta "language" will eventually be res=a+b-c but for now it is just res=a-b 
+;; delta "language" will eventually be res=a+b-c but for now it is just res=a-b
+;;
+;; db file can be stuck on the end of the command line:
+;;   megatest -envdelta start-end -dumpmode bash -o .ezsteps/step5.sh /tmp/myfile.db 
 ;;
 (let ((envdelta (args:get-arg "-envdelta")))
   (if envdelta
       (let ((match (string-split envdelta "-")));; (string-match "([a-z0-9_]+)=([a-z0-9_\\-,]+)" envdelta)))
 	(if (not (null? match))

Index: rmt.scm
==================================================================
--- rmt.scm
+++ rmt.scm
@@ -59,19 +59,19 @@
 (define *rmt-query-last-call-time* 0)
 (define *rmt-query-last-rest-time* 0) ;; last time there was at least a 1/2 second rest - giving other processes access to the db
 
 ;; NOTE: This query rest algorythm will not adapt to long query times. REDESIGN NEEDED. TODO. FIXME.
 ;;
-(define (rmt:query-rest)
+(define (rmt:query-rest cmd rid params)
   (let* ((now (current-milliseconds)))
     (cond
-     ((> (- now *rmt-query-last-call-time*) 500)  ;; it's been a while since last query - no need to rest
+     ((> (- now *rmt-query-last-call-time*) 100)  ;; it's been a while since last query - no need to rest
       (set! *rmt-query-last-rest-time*  now)
       (set! *rmt-query-last-call-time*  now))
      ((> (- now *rmt-query-last-rest-time*) 5000) ;; no natural rests have happened
-      (debug:print 0 *default-log-port* "query rest needed. blocking for 1/2 second.")
-      (thread-sleep! 0.5) ;; force a rest of a half second
+      (debug:print 0 *default-log-port* "query rest needed. blocking for 0.1 second. cmd="cmd", run id="rid", params="params)
+      (thread-sleep! 0.1) ;; force a rest of a half second
       (set! *rmt-query-last-rest-time* now)
       (set! *rmt-query-last-call-time* now))
      (else ;; sufficient rests have occurred, just record the last query time
       (set! *rmt-query-last-call-time* now)))))
 
@@ -81,11 +81,11 @@
 
   #;(common:telemetry-log (conc "rmt:"(->string cmd))
                         payload: `((rid . ,rid)
                                    (params . ,params)))
   (if (not (equal? (configf:lookup *configdat* "setup" "query-rest") "no"))
-      (rmt:query-rest))
+      (rmt:query-rest cmd rid params))
   
   (if (> attemptnum 2)
       (debug:print 0 *default-log-port* "INFO: attemptnum in rmt:send-receive is " attemptnum))
     
   (cond
@@ -546,15 +546,32 @@
   (rmt:general-call 'register-test run-id run-id test-name item-path))
 
 (define (rmt:get-test-id run-id testname item-path)
   (rmt:send-receive 'get-test-id run-id (list run-id testname item-path)))
 
-;; run-id is NOT used
+;; run-id is NOT used - but it will be! 
 ;;
 (define (rmt:get-test-info-by-id run-id test-id)
   (if (number? test-id)
-      (rmt:send-receive 'get-test-info-by-id run-id (list run-id test-id))
+      (let* ((testdat  (rmt:send-receive 'get-test-info-by-id run-id (list run-id test-id)))
+             (trundir  (vector-ref testdat 10))
+	     (trundatf (conc trundir"/.mt_data/test-run.dat")))
+	;; now we can update a couple fields from the filesystem
+	(handle-exceptions
+	    exn
+	    (begin
+	      (debug:print-info 0 *default-log-port* "Could not update testdat record from "trundatf", exn=" exn)
+	      #f)
+	  (if (and trundir
+		   (file-exists? trundatf))
+	      (let* ((duration     (vector-ref testdat 12)) ;; (db:test-get-run_duration testdat))
+		     (event-time   (vector-ref testdat 5))   ;; (db:test-get-event_time   testdat))
+		     (last-touch   (file-modification-time trundatf))
+		     (new-duration (max duration (- last-touch event-time))))
+		(vector-set! testdat 12 new-duration))))
+	      #;(db:test-set-run_duration! testdat (max duration (- last-touch event-time)))
+	testdat)
       (begin
 	(debug:print 0 *default-log-port* "WARNING: Bad data handed to rmt:get-test-info-by-id run-id=" run-id ", test-id=" test-id)
 	(print-call-chain (current-error-port))
 	#f)))
 
@@ -682,21 +699,23 @@
 	   run-ids))))
 
 (define (rmt:get-prereqs-not-met run-id waitons ref-test-name ref-item-path #!key (mode '(normal))(itemmaps #f))
   (rmt:send-receive 'get-prereqs-not-met run-id (list run-id waitons ref-test-name ref-item-path mode itemmaps)))
 
-(define (rmt:get-count-tests-running-for-run-id run-id fastmode)
-  (rmt:send-receive 'get-count-tests-running-for-run-id run-id (list run-id fastmode)))
+(define (rmt:get-count-tests-running-for-run-id run-id)
+  (if (number? run-id)
+      (rmt:send-receive 'get-count-tests-running-for-run-id run-id (list run-id))
+      0))
 
 (define (rmt:get-not-completed-cnt run-id)
   (rmt:send-receive 'get-not-completed-cnt run-id (list run-id)))
 
 
 ;; Statistical queries
 
-(define (rmt:get-count-tests-running run-id fastmode)
-  (rmt:send-receive 'get-count-tests-running run-id (list run-id fastmode)))
+(define (rmt:get-count-tests-running run-id)
+  (rmt:send-receive 'get-count-tests-running run-id (list run-id)))
 
 (define (rmt:get-count-tests-running-for-testname run-id testname)
   (rmt:send-receive 'get-count-tests-running-for-testname run-id (list run-id testname)))
 
 (define (rmt:get-count-tests-running-in-jobgroup run-id jobgroup)

Index: runs.scm
==================================================================
--- runs.scm
+++ runs.scm
@@ -61,11 +61,38 @@
 
 (defstruct runs:testdat
   hed tal reg reruns  test-record
   test-name item-path jobgroup
   waitons testmode  newtal itemmaps prereqs-not-met)
+
+(module runsmod
+    (
+     runs:wait-if-seen-recently
+     )
   
+(import scheme chicken data-structures extras files)
+(import posix typed-records srfi-18 srfi-69
+	  md5 message-digest
+	  regex srfi-1)
+
+(define *last-seen-ht* (make-hash-table))
+
+(define (runs:wait-if-seen-recently wait-until . keys)
+  (let* ((full-key   (string-intersperse keys "-"))
+	 (last-seen  (hash-table-ref/default *last-seen-ht* full-key 0))
+	 (now        (current-seconds))
+	 (delta      (- now last-seen))
+	 (needed     (if (< delta wait-until)
+			 0
+			 (- wait-until delta))))
+    (if (> needed 0)(thread-sleep! needed))
+    (hash-table-set! *last-seen-ht* full-key (current-seconds))
+    needed))
+)
+
+(import runsmod)
+    
 ;; look in the $MT_RUN_AREA_HOME/.softlocks directory for key-host-pid.softlock files
 ;;  - remove any that are over 3600 seconds old
 ;;  - if there are any that are younger than 10 seconds
 ;;      * sleep 10 seconds
 ;;      * touch my key-host-pid.softlock file
@@ -321,11 +348,11 @@
 	   (args:get-arg "-one-pass"))
       (exit 0))
 
   (if (runs:dat-load-mgmt-function runsdat)((runs:dat-load-mgmt-function runsdat)))
 
-  (let* ((num-running             (rmt:get-count-tests-running run-id #f)) ;; fastmode=no
+  (let* ((num-running             (rmt:get-count-tests-running run-id)) 
 	 (num-running-in-jobgroup (rmt:get-count-tests-running-in-jobgroup run-id jobgroup))
 	 (job-group-limit         (let ((jobg-count (configf:lookup *configdat* "jobgroups" jobgroup)))
 				    (if (string? jobg-count)
 					(string->number jobg-count)
 					jobg-count))))
@@ -435,10 +462,12 @@
 	      (debug:print-info 0 *default-log-port* "post-hook \"" run-post-hook "\" took " (- (current-seconds) start-time) " seconds to run."))))))
 
 ;; return #t when all items in waitors-upon list are represented in test-patt, #f otherwise.
 (define (runs:testpatts-mention-waitors-upon? test-patt waitors-upon)
   (null? (tests:filter-test-names-not-matched waitors-upon test-patt)))
+
+(define *find-and-mark-incomplete-last-run* (make-hash-table))
 
 ;;======================================================================
 ;; runs:run-tests is called from megatest.scm and itself
 ;;======================================================================
 ;;
@@ -606,17 +635,17 @@
     (runs:run-pre-hook run-id)
     ;; mark all test launced flag as false in the meta table 
     (rmt:set-var (conc "lunch-complete-" run-id) "no")
     (debug:print-info 1 *default-log-port* "Setting end-of-run to no")
     (let* ((config-reruns      (let ((x (configf:lookup *configdat* "setup" "reruns")))
-			       (if x (string->number x) #f)))
-	  (config-rerun-cnt (if config-reruns
-			config-reruns
-			1)))
-    (if (eq? config-rerun-cnt run-count)
-      (rmt:set-var (conc "end-of-run-" run-id) "no")))
-
+				 (if x (string->number x) #f)))
+	   (config-rerun-cnt (if config-reruns
+				 config-reruns
+				 1)))
+      (if (eq? config-rerun-cnt run-count)
+	  (rmt:set-var (conc "end-of-run-" run-id) "no")))
+    
     (rmt:set-run-state-status run-id "new" "n/a")
     ;; now add non-directly referenced dependencies (i.e. waiton)
     ;;======================================================================
     ;; refactoring this block into tests:get-full-data
     ;;
@@ -730,34 +759,27 @@
     (debug:print-info 4 *default-log-port* "test-records=" (hash-table->alist test-records))
     (let ((reglen (configf:lookup *configdat* "setup" "runqueue")))
       (if (> (length (hash-table-keys test-records)) 0)
 	  (let* ((keep-going        #t)
 		 (run-queue-retries 5)
-		 #;(th1        (make-thread (lambda ()
-					    (handle-exceptions
-						exn
-						(begin
-						  (print-call-chain)
-						  (print " message: " ((condition-property-accessor 'exn 'message) exn)))
-					      (runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests
-								    (any->number reglen) all-tests-registry)))
-					  "runs:run-tests-queue"))
 		 (th2        (make-thread (lambda ()			 ;; BBQ: why are we visiting ALL runs here?	    
 					    ;; (rmt:find-and-mark-incomplete-all-runs))))) CAN'T INTERRUPT IT ...
 					    (let ((run-ids (rmt:get-all-run-ids)))
 					      (for-each (lambda (run-id)
 							  (if keep-going
 							      (handle-exceptions
-							       exn
-							       (debug:print 0 *default-log-port* "error in calling find-and-mark-incomplete for run-id " run-id ", exn=" exn)
-							       (rmt:find-and-mark-incomplete run-id #f)))) ;; ovr-deadtime))) ;; could be root of https://hsdes.intel.com/appstore/article/#/220546828/main -- Title: Megatest jobs show DEAD even though they are still running (1.64/27)
+								  exn
+								  (debug:print 0 *default-log-port* "error in calling find-and-mark-incomplete for run-id " run-id ", exn=" exn)
+								;; lets run this only if a run has been NOT seen for more than 900 seconds
+								(if (> (- (current-seconds)(hash-table-ref/default *find-and-mark-incomplete-last-run* run-id 0)) 900)
+								    (begin
+								      (rmt:find-and-mark-incomplete run-id #f)
+								      (hash-table-set! *find-and-mark-incomplete-last-run* run-id (current-seconds)))
+								    )))) ;; ovr-deadtime))) ;; could be root of https://hsdes.intel.com/appstore/article/#/220546828/main -- Title: Megatest jobs show DEAD even though they are still running (1.64/27)
 							run-ids)))
 					  "runs: mark-incompletes")))
-	    ;; (thread-start! th1)
 	    (thread-start! th2)
-	    ;; (thread-join! th1)
-	    ;; just do the main stuff in the main thread
 	    (runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests
 								    (any->number reglen) all-tests-registry)
 	    (set! keep-going #f)
 	    (thread-join! th2)
 	    ;; if run-count > 0 call, set -preclean and -rerun STUCK/DEAD
@@ -766,12 +788,12 @@
 		  (if (not (hash-table-ref/default flags "-preclean" #f))
 		      (hash-table-set! flags "-preclean" #t))
 		  (if (not (hash-table-ref/default flags "-rerun" #f))
 		      (hash-table-set! flags "-rerun" "STUCK/DEAD,n/a,ZERO_ITEMS"))
 		  ;; recursive call to self
-      (runs:run-tests target runname test-patts user flags run-count: (- run-count 1)))
-                  (launch:end-of-run-check run-id)))
+		  (runs:run-tests target runname test-patts user flags run-count: (- run-count 1)))
+		(launch:end-of-run-check run-id)))
 	  (debug:print-info 0 *default-log-port* "No tests to run")))
     (debug:print-info 4 *default-log-port* "All done by here")
     ;; TODO: try putting post hook call here
       
     ;  (debug:print-info 2 *default-log-port* " run-count " run-count)
@@ -1470,10 +1492,11 @@
          (max-concurrent-jobs   (configf:lookup-number *configdat* "setup" "max_concurrent_jobs" default: 50))
          (reglen                (if (number? reglen-in) reglen-in 1))
          (last-time-incomplete  (- (current-seconds) 900)) ;; force at least one clean up cycle
          (last-time-some-running (current-seconds))
          ;; (tdbdat                (tasks:open-db))
+	 (misc-data             (make-hash-table)) ;; use as needed
          (runsdat (make-runs:dat
                    ;; hed: hed
                    ;; tal: tal
                    ;; reg: reg
                    ;; reruns: reruns
@@ -1530,10 +1553,16 @@
           (begin
             (set! last-time-incomplete (current-seconds))
             ;; (rmt:find-and-mark-incomplete-all-runs)
 	    ))
 
+      ;; WAIT FOR TIME ON TIGHT LOOP
+      (if (< (- (current-milliseconds)(hash-table-ref/default misc-data "tight-loop-last-time" 0))
+	     100) ;; less than 1/100 second since came through the loop
+	  (thread-sleep! 0.1)) ;; wait a 1/100 seconds
+      (hash-table-set! misc-data "tight-loop-last-time" (current-milliseconds))
+      
       ;; (print "Top of loop, hed=" hed ", tal=" tal " ,reruns=" reruns)
       (let* ((test-record (hash-table-ref test-records hed))
 	     (test-name   (tests:testqueue-get-testname test-record))
 	     (tconfig     (tests:testqueue-get-testconfig test-record))
 	     (jobgroup    (configf:lookup tconfig "test_meta" "jobgroup"))
@@ -1565,11 +1594,11 @@
 				  extras)
 				'())))
 	     (waitons     (delete-duplicates (append (tests:testqueue-get-waitons test-record) extra-waits) equal?))
 	     (newtal      (append tal (list hed)))
 	     (regfull     (>= (length reg) reglen))
-	     (num-running (rmt:get-count-tests-running-for-run-id run-id #t)) ;; fastmode=yes
+	     (num-running (rmt:get-count-tests-running-for-run-id run-id))
 	     (testdat     (make-runs:testdat
 			   hed: hed
 			   tal: tal
 			   reg: reg
 			   reruns: reruns
@@ -1715,10 +1744,13 @@
 				  (loop-can-run-more (runs:can-run-more-tests runsdat run-id jobgroup max-concurrent-jobs)
 						     (- remtries 1)))))))
 		       )))))
 
 	  ;; I'm not clear on why prereqs are gathered here TODO: verfiy this is needed
+	  (let ((waited (runs:wait-if-seen-recently 5 "prereqs-not-met" hed item-path))) ;; if we've been down this path in the past 5 seconds - wait out the difference
+	    (if (> waited 0)(debug:print 0 *default-log-port* "Waited for prereqs-not-met-"hed"-"item-path" for " waited "seconds.")))
+	  
 	  (runs:testdat-prereqs-not-met-set! testdat (rmt:get-prereqs-not-met run-id waitons hed item-path mode: testmode itemmaps: itemmaps))
 
 	  ;; I'm not clear on why we'd capture running job counts here TODO: verify this is needed
 	  (runs:dat-can-run-more-tests-set! runsdat (runs:can-run-more-tests runsdat run-id jobgroup max-concurrent-jobs))
 
@@ -1831,31 +1863,33 @@
     ;; now *if* -run-wait we wait for all tests to be done
     ;; Now wait for any RUNNING tests to complete (if in run-wait mode)
     ;; (if (runs:dat-load-mgmt-function runsdat)((runs:dat-load-mgmt-function runsdat)))
     (thread-sleep! 10) ;; I think there is a race condition here. Let states/statuses settle
     
-    (let wait-loop ((num-running      (rmt:get-count-tests-running-for-run-id run-id #t)) ;; fastmode=yes
+    (let wait-loop ((num-running      (rmt:get-count-tests-running-for-run-id run-id))
 		    (prev-num-running 0))
       ;; (debug:print-info 13 *default-log-port* "num-running=" num-running ", prev-num-running=" prev-num-running)
       (if (and (or (args:get-arg "-run-wait")
 		   (equal? (configf:lookup *configdat* "setup" "run-wait") "yes"))
 	       (> num-running 0))
 	  (begin
 	    ;; Here we mark any old defunct tests as incomplete. Do this every fifteen minutes
 	    ;; (debug:print 0 *default-log-port* "Got here eh! num-running=" num-running " (> num-running 0) " (> num-running 0))
-	    (if (> (current-seconds)(+ last-time-incomplete 900))
-		(let ((actual-num-running (rmt:get-count-tests-running-for-run-id run-id #f))) ;; fastmode=no
+	    (if (> (- (current-seconds)(hash-table-ref/default *find-and-mark-incomplete-last-run* run-id 0)) 900)
+		;; (begin(if (> (current-seconds)(+ last-time-incomplete 900))
+		(let ((actual-num-running num-running)) ;; (rmt:get-count-tests-running-for-run-id run-id))) ;; why call it again?
 		  (debug:print-info 0 *default-log-port* "Marking stuck tests as INCOMPLETE while waiting for run " run-id
 				    ". Running as pid " (current-process-id) " on " (get-host-name))
-		  (set! last-time-incomplete (current-seconds)) ;; FIXME, this might be causing slow down - use of set!
+		  ;; (set! last-time-incomplete (current-seconds)) ;; FIXME, this might be causing slow down - use of set!
 		  (rmt:find-and-mark-incomplete run-id #f)
+		  (hash-table-set! *find-and-mark-incomplete-last-run* run-id (current-seconds))
 		  (debug:print-info 0 *default-log-port* "run-wait specified, waiting on " actual-num-running
 				    " tests in RUNNING, REMOTEHOSTSTART or LAUNCHED state at "
 				    (time->string (seconds->local-time (current-seconds))))))
 	    ;; (if (runs:dat-load-mgmt-function runsdat)((runs:dat-load-mgmt-function runsdat)))
 	    (thread-sleep! 5) ;; (if (>= num-running max-concurrent-jobs) 5 1))
-	    (wait-loop (rmt:get-count-tests-running-for-run-id run-id #t) ;; fastmode=yes
+	    (wait-loop (rmt:get-count-tests-running-for-run-id run-id)
 		       num-running))))
     ;; LET* ((test-record
     ;; we get here on "drop through". All done!
     ;; this is moved to runs:run-testes since this function is getting called twice to ensure everthing is completed. 
     ;; (debug:print-info 0 *default-log-port* "Calling Post Hook")    
@@ -2255,29 +2289,10 @@
     path-out
   )
 )
 
 
-;; (define (runs:remove-all-but-last-n-runs-per-target target-patts runpatt num-to-keep)
-;;   (let ((data (runs:get-all-but-most-recent-n-per-target target-patts runpatt num-to-keep)))
-;;     (for-each
-;;      (lambda (target)
-;;        (let ((runs-to-remove (hash-table-ref data target )))
-;;          (for-each
-;;           (lambda (run)
-;;             (print "megatest -remove-runs -target " target " -runname " (simple-run-runname run) " -testpatt %"))
-;;           runs-to-remove)))
-;;      (hash-table-keys data))))
-
-;; Remove runs
-;; fields are passing in through 
-;; action:
-;;    'remove-runs
-;;    'set-state-status
-;;
-;; NB// should pass in keys?
-;;
 (define (runs:operate-on action target runnamepatt testpatt #!key (state #f)(status #f)(new-state-status #f)(mode #f)(options '()))
   (common:clear-caches) ;; clear all caches
   (let* ((db           #f)
 	 ;; (tdbdat       (tasks:open-db))
 	 (keys         (rmt:get-keys))
@@ -2511,11 +2526,11 @@
                                       (begin
                                        (let ((rundir (db:test-get-rundir new-test-dat)))
                                         (if (and (not (string=  rundir "/tmp/badname")) 
                                              (file-exists? rundir)
                                              (substring-index run-name rundir)
-                                             (substring-index target rundir)
+                                             (tests:glob-like-match (conc "%/" target "/%") rundir)
                                              )
                                           (begin
                                             (set! lasttpath (db:test-get-rundir new-test-dat)) ;; remember this path for run removal
                                             (set! lastrealpath (remove-last-path-directory (resolve-pathname lasttpath)))
                                             (hash-table-set! run-paths-hash lastrealpath 1)
@@ -2524,11 +2539,13 @@
                                           (begin
                                             (debug:print 2 *default-log-port* "Not removing directory " rundir " because either it doesn't exist or has a bad name")
                                             (debug:print 2 *default-log-port* "Is /tmp/badname: " (string=  rundir "/tmp/badname"))
                                             (debug:print 2 *default-log-port* "Exists: " (file-exists? rundir))
                                             (debug:print 2 *default-log-port* "Has run-name: " (substring-index run-name rundir))
-                                            (debug:print 2 *default-log-port* "Has target: " (substring-index target rundir))
+                                            (debug:print 2 *default-log-port* "Has target: " (tests:glob-like-match (conc "%/" target "/%") rundir))
+                                            (debug:print 2 *default-log-port* "Target: " target)
+
                                             ;;PJH remove record from db no need to cleanup directory
                                             (case mode
                                                ((remove-data-only)(mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) (db:test-get-state test)(db:test-get-status test) #f))
                                                ((archive-remove)  (mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) "ARCHIVED" #f #f))
                                                (else (rmt:delete-test-records (db:test-get-run_id test) (db:test-get-id test))))

Index: server.scm
==================================================================
--- server.scm
+++ server.scm
@@ -217,11 +217,11 @@
 			 (tal  (cdr server-logs))
 			 (res '()))
 		(let* ((mod-time  (handle-exceptions
 				   exn
 				   (begin
-				     (print "failed to get modification time on " hed ", exn=" exn)
+				     (debug:print 0 *default-log-port* "failed to get modification time on " hed ", exn=" exn)
 				     (current-seconds)) ;; 0
 				   (file-modification-time hed))) ;; default to *very* old so log gets ignored if deleted
 		       (down-time (- (current-seconds) mod-time))
 		       (serv-dat  (if (or (< num-serv-logs 10)
 				  	  (< down-time 900)) ;; day-seconds))
@@ -326,10 +326,11 @@
 (define (server:wait-for-server-start-last-flag areapath)
   (let* ((start-flag (conc areapath "/logs/server-start-last"))
 	 ;;; THIS INTERACTS WITH [server] timeout. Suggest using 0.1 or above for timeout (6 seconds)
 	 (reftime    (configf:lookup-number *configdat* "server" "idletime" default: 4))
 	 (server-key (conc (get-host-name) "-" (current-process-id))))
+    ;; (thread-sleep! (/ (random 500) 1000)) ;; I don't think this made a difference
     (if (file-exists? start-flag)
 	(let* ((fmodtime (file-modification-time start-flag))
 	       (delta    (- (current-seconds) fmodtime))
 	       (all-go   (> delta reftime)))
 	  (if (and all-go

Index: tasks.scm
==================================================================
--- tasks.scm
+++ tasks.scm
@@ -443,23 +443,23 @@
   (db:with-db
    dbstruct #f #t
    (lambda (db)
      (sqlite3:execute db (conc "DELETE FROM tasks_queue WHERE id IN (" task-ids ");")))))
 
-#;(define (tasks:process-queue dbstruct)
-  (let* ((task   (tasks:snag-a-task dbstruct))
-	 (action (if task (tasks:task-get-action task) #f)))
-    (if action (print "tasks:process-queue task: " task))
-    (if action
-	(case (string->symbol action)
-	  ((run)       (tasks:start-run     dbstruct task))
-	  ((remove)    (tasks:remove-runs   dbstruct task))
-	  ((lock)      (tasks:lock-runs     dbstruct task))
-	  ;; ((monitor)   (tasks:start-monitor db task))
-	  #;((rollup)    (tasks:rollup-runs   dbstruct task))
-	  ((updatemeta)(tasks:update-meta   dbstruct task))
-	  #;((kill)      (tasks:kill-monitors dbstruct task))))))
+;; (define (tasks:process-queue dbstruct)
+;;   (let* ((task   (tasks:snag-a-task dbstruct))
+;; 	 (action (if task (tasks:task-get-action task) #f)))
+;;     (if action (print "tasks:process-queue task: " task))
+;;     (if action
+;; 	(case (string->symbol action)
+;; 	  ((run)       (tasks:start-run     dbstruct task))
+;; 	  ((remove)    (tasks:remove-runs   dbstruct task))
+;; 	  ((lock)      (tasks:lock-runs     dbstruct task))
+;; 	  ;; ((monitor)   (tasks:start-monitor db task))
+;; 	  #;((rollup)    (tasks:rollup-runs   dbstruct task))
+;; 	  ((updatemeta)(tasks:update-meta   dbstruct task))
+;; 	  #;((kill)      (tasks:kill-monitors dbstruct task))))))
 
 (define (tasks:tasks->text tasks)
   (let ((fmtstr "~10a~10a~10a~12a~20a~12a~12a~10a"))
     (conc (format #f fmtstr "id" "action" "owner" "state" "target" "runname" "testpatts" "params") "\n"
 	  (string-intersperse 
@@ -742,11 +742,11 @@
 ;;
 (define (tasks:run-id->mtpg-run-id dbh cached-info run-id area-info smallest-last-update-time)
   (let* ((runs-ht (hash-table-ref cached-info 'runs))
 	 (runinf  (hash-table-ref/default runs-ht run-id #f))
          (area-id (vector-ref area-info 0)))
-       (if runinf
+    (if runinf
 	runinf ;; already cached
 	(let* ((run-dat    (rmt:get-run-info run-id))               ;; NOTE: get-run-info returns a vector < row header >
 	       (run-name   (rmt:get-run-name-from-id run-id))
 	       (row        (db:get-rows run-dat))                   ;; yes, this returns a single row
 	       (header     (db:get-header run-dat))
@@ -755,65 +755,65 @@
 	       (owner      (db:get-value-by-header row header "owner"))
 	       (event-time (db:get-value-by-header row header "event_time"))
 	       (comment    (db:get-value-by-header row header "comment"))
 	       (fail-count (db:get-value-by-header row header "fail_count"))
 	       (pass-count (db:get-value-by-header row header "pass_count"))
-         (db-contour (db:get-value-by-header row header "contour"))
+	       (db-contour (db:get-value-by-header row header "contour"))
 	       (contour    (if (args:get-arg "-prepend-contour") 
-                                 (if (and db-contour (not (equal? db-contour ""))  (string? db-contour )) 
-                                           (begin 
-                                            (debug:print-info 1 *default-log-port*  "db-contour") 
- 						db-contour)
-					    (args:get-arg "-contour"))))
-         (run-tag (if (args:get-arg "-run-tag")
+			       (if (and db-contour (not (equal? db-contour ""))  (string? db-contour )) 
+				   (begin 
+				     (debug:print-info 1 *default-log-port*  "db-contour") 
+				     db-contour)
+				   (args:get-arg "-contour"))))
+	       (run-tag (if (args:get-arg "-run-tag")
                             (args:get-arg "-run-tag")
-									""))
-         (last-update (db:get-value-by-header row header "last_update"))
+			    ""))
+	       (last-update (db:get-value-by-header row header "last_update"))
 	       (keytarg    (if (or (args:get-arg "-prepend-contour") (args:get-arg "-prefix-target"))
-	       			(conc "MT_CONTOUR/MT_AREA/" (string-intersperse (rmt:get-keys) "/")) (string-intersperse (rmt:get-keys) "/"))) ;; e.g. version/iteration/platform
+			       (conc "MT_CONTOUR/MT_AREA/" (string-intersperse (rmt:get-keys) "/")) (string-intersperse (rmt:get-keys) "/"))) ;; e.g. version/iteration/platform
 	       (target     (if (or (args:get-arg "-prepend-contour") (args:get-arg "-prefix-target")) 
-	       			(conc (or (args:get-arg "-prefix-target") (conc contour "/" (common:get-area-name) "/")) (rmt:get-target run-id)) (rmt:get-target run-id)))                 ;; e.g. v1.63/a3e1/ubuntu
+			       (conc (or (args:get-arg "-prefix-target") (conc contour "/" (common:get-area-name) "/")) (rmt:get-target run-id)) (rmt:get-target run-id)))                 ;; e.g. v1.63/a3e1/ubuntu
 	       (spec-id    (pgdb:get-ttype dbh keytarg))
 	       (publish-time (if (args:get-arg "-cp-eventtime-to-publishtime")
-                            event-time
-                           (current-seconds))) 
+				 event-time
+				 (current-seconds))) 
 	       (new-run-id (pgdb:get-run-id dbh spec-id target run-name area-id)))
-         (if new-run-id
-	         (begin ;; let ((run-record (pgdb:get-run-info dbh new-run-id))
-		        (hash-table-set! runs-ht run-id new-run-id)
+	  (if new-run-id
+	      (begin ;; let ((run-record (pgdb:get-run-info dbh new-run-id))
+		(hash-table-set! runs-ht run-id new-run-id)
 		;; ensure key fields are up to date
-     ;; if last_update == pgdb_last_update do not update smallest-last-update-time  
-    (let* ((pgdb-last-update (pgdb:get-run-last-update dbh new-run-id))
-           (smallest-time (hash-table-ref/default smallest-last-update-time "smallest-time" #f)))
-     (if (and  (> last-update pgdb-last-update) (or (not smallest-time) (< last-update smallest-time)))
-        (hash-table-set! smallest-last-update-time "smallest-time" last-update)))
+		;; if last_update == pgdb_last_update do not update smallest-last-update-time  
+		(let* ((pgdb-last-update (pgdb:get-run-last-update dbh new-run-id))
+		       (smallest-time (hash-table-ref/default smallest-last-update-time "smallest-time" #f)))
+		  (if (and  (> last-update pgdb-last-update) (or (not smallest-time) (< last-update smallest-time)))
+		      (hash-table-set! smallest-last-update-time "smallest-time" last-update)))
 		(pgdb:refresh-run-info
 		 dbh
 		 new-run-id
 		 state status owner event-time comment fail-count pass-count area-id last-update publish-time)
-     (debug:print-info 0 *default-log-port* "Working on run-id " run-id " pgdb-id "  new-run-id )
-     (if (not (equal? run-tag ""))
-      (task:add-run-tag dbh new-run-id run-tag))
+		(debug:print-info 0 *default-log-port* "Working on run-id " run-id " pgdb-id "  new-run-id )
+		(if (not (equal? run-tag ""))
+		    (task:add-run-tag dbh new-run-id run-tag))
 		new-run-id) 
-      
+	      
 	      (if (equal? state "deleted")
-          (begin 
-          (debug:print-info 1 *default-log-port*  "Warning: Run with id " run-id " was created after previous sync and deleted before the sync") #f)
-          (if (handle-exceptions
-		        exn
-		        (begin (print-call-chain)
-              (print ((condition-property-accessor 'exn 'message) exn))     
+		  (begin 
+		    (debug:print-info 1 *default-log-port*  "Warning: Run with id " run-id " was created after previous sync and deleted before the sync") #f)
+		  (if (handle-exceptions
+		       exn
+		       (begin (print-call-chain)
+			      (print ((condition-property-accessor 'exn 'message) exn))     
 			      #f)
-            
-            (pgdb:insert-run
-		     dbh
-		     spec-id target run-name state status owner event-time comment fail-count pass-count  area-id last-update publish-time))
-		       (let* ((smallest-time (hash-table-ref/default smallest-last-update-time "smallest-time" #f)))
-             (if (or (not smallest-time) (< last-update smallest-time))
-        				(hash-table-set! smallest-last-update-time "smallest-time" last-update))
-             (tasks:run-id->mtpg-run-id dbh cached-info run-id area-info smallest-last-update-time))
-		  #f)))))))
+		       
+		       (pgdb:insert-run
+			dbh
+			spec-id target run-name state status owner event-time comment fail-count pass-count  area-id last-update publish-time))
+		      (let* ((smallest-time (hash-table-ref/default smallest-last-update-time "smallest-time" #f)))
+			(if (or (not smallest-time) (< last-update smallest-time))
+			    (hash-table-set! smallest-last-update-time "smallest-time" last-update))
+			(tasks:run-id->mtpg-run-id dbh cached-info run-id area-info smallest-last-update-time))
+		      #f)))))))
 
 (define (task:add-run-tag dbh run-id tag) 
   (let* ((tag-info (pgdb:get-tag-info-by-name dbh tag)))
    (if (not tag-info)
      (begin   
@@ -1015,11 +1015,11 @@
 (define (tasks:sync-run-data dbh cached-info run-ids area-info smallest-last-update-time) 
   (for-each
      (lambda (run-id)
       (debug:print-info 1 *default-log-port*   "Check if run with " run-id " needs to be synced" )
        (tasks:run-id->mtpg-run-id dbh cached-info run-id area-info smallest-last-update-time))
-run-ids))
+     run-ids))
 
 
 ;; get runs changed since last sync
 ;; (define (tasks:sync-test-data dbh cached-info area-info)
 ;;   (let* ((

Index: tests.scm
==================================================================
--- tests.scm
+++ tests.scm
@@ -1701,10 +1701,27 @@
 	;;   (map car (sort data (lambda (a b)
 	;;     		    (> (string->number (caddr a))(string->number (caddr b)))))))
 	;; ))
 	(sort all-tests sort-fn1)))) ;; avoid dealing with deleted tests, look at the hash table
 
+;; look up all waitons that are related to test "testname"
+;;
+(define (tests:get-mt-waitons testname flatten)
+  (let* ((mt-waitons    (configf:get-section *configdat* "waitons"))
+	 (my-waitons    (filter
+			 (lambda (x)
+			   (string-match (conc "^(" testname "|" testname"/.*)$") (car x)))
+			 mt-waitons)))
+    (if flatten
+	(map (lambda (w)
+	       (car (string-split w "/")))
+	     (apply append (map (lambda (x)
+				  (string-split (cadr x)))
+				my-waitons)))
+	my-waitons)))
+
+;; NOT USED
 (define (tests:easy-dot test-records outtype)
   (let-values (((fd temp-path) (file-mkstemp (conc "/tmp/" (current-user-name) ".XXXXXX"))))
     (let ((all-testnames (hash-table-keys test-records))
 	  (temp-port     (open-output-file* fd)))
       ;; (format temp-port "This file is ~A.~%" temp-path)
@@ -1712,15 +1729,17 @@
       (format temp-port "  size=4,8\n")
       ;; (format temp-port "   splines=none\n")
       (for-each
        (lambda (testname)
 	 (let* ((testrec (hash-table-ref test-records testname))
-		(waitons (or (tests:testqueue-get-waitons testrec) '())))
+		(waitons (or (tests:testqueue-get-waitons testrec) '()))
+		(my-mt-waitons (tests:get-mt-waitons testname #t)))
+	   ;; (print "my-mt-waitons=" my-mt-waitons)
 	   (for-each
 	    (lambda (waiton)
 	      (format temp-port (conc "   " waiton " -> " testname " [splines=ortho]\n")))
-	    waitons)))
+	    (append waitons my-mt-waitons))))
        all-testnames)
       (format temp-port "}\n")
       (close-output-port temp-port)
       (with-input-from-pipe
        (conc "env -i PATH=$PATH dot -T" outtype " < " temp-path)
@@ -1745,17 +1764,19 @@
 			      (conc " size=\"" (or sizex 11) "," (or sizey 11) "\";")
 			      " ratio=0.95;"
 			      )))
 	  (let* ((testrec (hash-table-ref test-records hed))
 		 (waitons (or (tests:testqueue-get-waitons testrec) '()))
+		 (my-mt-waitons (tests:get-mt-waitons hed #t))
+		 (all-waitons   (delete-duplicates (append waitons my-mt-waitons)))
 		 (newres  (append res
-				  (if (null? waitons)
+				  (if (null? all-waitons)
 				      (list (conc "   \"" hed "\" [shape=box];"))
 				      (map (lambda (waiton)
 					     (conc "   \"" waiton "\" -> \"" hed "\" [shape=box];"))
-					   waitons)
-				      ))))
+					   all-waitons)))))
+	    ;; (debug:print 0 *default-log-port* "For test "hed" got "all-waitons)
 	    (if (null? tal)
 		(append newres (list "}"))
 		(loop (car tal)(cdr tal) newres)
 		))))))
 
@@ -1773,27 +1794,34 @@
       (close-input-port inp)
       res)))
 
 ;; read data from tmp file or create if not exists
 ;; if exists regen in background
+;; mode: raw (return data as read) or munged (convert to list of lists and remove " from strings)
 ;;
-(define (tests:lazy-dot testrecords  outtype sizex sizey)
+(define (tests:lazy-dot testrecords  outtype sizex sizey mode)
   (let ((dfile (conc "/tmp/." (current-user-name) "-" (server:mk-signature) ".dot"))
 	(fname (conc "/tmp/." (current-user-name) "-" (server:mk-signature) ".dotdat")))
     (tests:write-dot-file testrecords dfile sizex sizey)
-    (if (common:file-exists? fname)
-	(let ((res (with-input-from-file fname
-		     (lambda ()
-		       (read-lines)))))
-	  (system (conc "env -i PATH=$PATH dot -T " outtype " < " dfile " > " fname "&"))
-	  res)
-	(begin
-	  (system (conc "env -i PATH=$PATH dot -T " outtype " < " dfile " > " fname))
-	  (with-input-from-file fname
-	    (lambda ()
-	      (read-lines)))))))
-	  
+    (let ((data (if (common:file-exists? fname)
+		    (let ((res (with-input-from-file fname
+				 (lambda ()
+				   (read-lines)))))
+		      (system (conc "env -i PATH=$PATH dot -T " outtype " < " dfile " > " fname "&"))
+		      res)
+		    (begin
+		      (system (conc "env -i PATH=$PATH dot -T " outtype " < " dfile " > " fname))
+		      (with-input-from-file fname
+			(lambda ()
+			  (read-lines)))))))
+      (if (eq? mode 'raw)
+	  data
+	  (map (lambda (inl)
+		 (map (lambda (s)
+			(string-substitute "\"" "" s #t))
+		      (string-split inl)))
+	       data)))))
 
 ;; for each test:
 ;;   
 (define (tests:filter-non-runnable run-id testkeynames testrecordshash)
   (let ((runnables '()))
@@ -1944,28 +1972,52 @@
 	 tdb
 	 "SELECT count(id) FROM test_rundat;")
 	res))
   0)
 
-(define (tests:update-central-meta-info run-id test-id cpuload diskfree minutes uname hostname)
-  (rmt:general-call 'update-test-rundat run-id test-id (current-seconds) (or cpuload -1)(or diskfree -1) -1 (or minutes -1))
-  (if (and cpuload diskfree)
-      (rmt:general-call 'update-cpuload-diskfree run-id cpuload diskfree test-id))
-  (if minutes 
-      (rmt:general-call 'update-run-duration run-id minutes test-id))
-  (if (and uname hostname)
-      (rmt:general-call 'update-uname-host run-id uname hostname test-id)))
+;; 
+(define (tests:update-central-meta-info run-id test-id cpuload diskfree minutes uname hostname #!key (update-db #f)(tmpfree #f))
+  (if (get-environment-variable "MT_TEST_RUN_DIR")
+      (let* ((dest-dir (conc (get-environment-variable "MT_TEST_RUN_DIR") "/.mt_data"))
+	     (or-dash  (lambda (instr)
+			 (cond
+			  ((not instr) "") ;; #f -> blank, indicates value unchanged since last measurement taken
+			  ((string? instr)(if (string-search " " instr) (conc "\"" instr "\"") instr))
+			  (else instr))))
+	     (file-new (not (directory-exists? dest-dir))))
+	(if file-new (create-directory dest-dir #t))
+	(let* ((outp (open-output-file (conc dest-dir "/test-run.dat") #:append)))
+	  (with-output-to-port outp
+	    (lambda ()
+	      (if file-new
+		  (print "epoch_time,run_id,test_id,cpuload,diskfree,tmpfree,run_minutes,hostname,uname"))
+	      (print (current-seconds) "," (or-dash run-id)   "," (or-dash test-id)  ","
+		     (or-dash cpuload) "," (or-dash diskfree) "," (or-dash tmpfree)  ","
+		     (or-dash minutes) "," (or-dash hostname) ","
+		     (or-dash uname)))) ;; put uname last as it has spaces in it
+	  (close-output-port outp)))
+      (begin
+	(rmt:general-call 'update-test-rundat run-id test-id (current-seconds) (or cpuload -1)(or diskfree -1) -1 (or minutes -1))))
+  (if update-db
+      (begin
+	(if (and cpuload diskfree)
+	    (rmt:general-call 'update-cpuload-diskfree run-id cpuload diskfree test-id))
+	(if minutes 
+	    (rmt:general-call 'update-run-duration run-id minutes test-id))
+	(if (and uname hostname)
+	    (rmt:general-call 'update-uname-host run-id uname hostname test-id)))))
   
 ;; This one is for running with no db access (i.e. via rmt: internally)
-(define (tests:set-full-meta-info db test-id run-id minutes work-area remtries)
+(define (tests:set-full-meta-info db test-id run-id minutes work-area remtries #!key (update-db #f))
 ;; (define (tests:set-full-meta-info test-id run-id minutes work-area)
 ;;  (let ((remtries 10))
   (let* ((cpuload  (get-cpu-load))
 	 (diskfree (get-df (current-directory)))
+	 (tmpfree  (get-df "/tmp"))
 	 (uname    (get-uname "-srvpio"))
 	 (hostname (get-host-name)))
-    (tests:update-central-meta-info run-id test-id cpuload diskfree minutes uname hostname)))
+    (tests:update-central-meta-info run-id test-id cpuload diskfree minutes uname hostname update-db: update-db tmpfree: tmpfree)))
     
 ;; (define (tests:set-partial-meta-info test-id run-id minutes work-area)
 #;(define (tests:set-partial-meta-info test-id run-id minutes work-area remtries)
   (let* ((cpuload  (get-cpu-load))
 	 (diskfree (get-df (current-directory)))

Index: utils/remrun
==================================================================
--- utils/remrun
+++ utils/remrun
@@ -40,6 +40,12 @@
   exit
 fi
 
 export NBFAKE_HOST=$1
 shift
-exec nbfake $*
+cmd=""
+for var in $(env | egrep "^(PARENT_|MT_)"|cut -d= -f1);do
+  new_var="`echo ${!var}`"
+  cmd="$cmd export $var=$new_var;" 
+done
+cmd="$cmd $*"
+exec nbfake $cmd