Index: .mtutil.scm
==================================================================
--- .mtutil.scm
+++ .mtutil.scm
@@ -23,45 +23,46 @@
 (define (str-first-char->number str)
   (char->integer (string-ref str 0)))
  
 ;; example of how to set up and write target mappers
 ;;
-(hash-table-set! *target-mappers*
-		 'prefix-contour
-		 (lambda (target run-name area area-path reason contour mode-patt)
-		   (conc contour "/" target)))
-(hash-table-set! *target-mappers*
-		 'prefix-area-contour
-		 (lambda (target run-name area area-path reason contour mode-patt)
-		   (conc area "/" contour "/" target)))
-  
-(hash-table-set! *runname-mappers*
-		 'corporate-ww
-		 (lambda (target run-name area area-path reason contour mode-patt)
-		   (print "corporate-ww called with: target=" target " run-name=" run-name " area=" area " area-path=" area-path " reason=" reason " contour=" contour " mode-patt=" mode-patt)
-		   (let* ((last-name   (get-last-runname area-path target))
-			  (last-letter (let* ((ch (if (string? last-name)
-						      (let ((len (string-length last-name)))
-							(substring last-name (- len 1) len))
-						      "a"))
-					      (chnum (str-first-char->number ch))
-					      (a     (str-first-char->number "a"))
-					      (z     (str-first-char->number "z")))
-					 (if (and (>= chnum a)(<= chnum z))
-					     chnum
-					     #f)))
-			  (next-letter (if last-letter
-					   (list->string
-					    (list
-					     (integer->char
-					      (+ last-letter 1)))) ;; surely there is an easier way?
-					   "a")))
-		     ;; (print "last-name: " last-name " last-letter: " last-letter " next-letter: " next-letter)
-		     (conc (seconds->wwdate (current-seconds)) next-letter))))
-
-(hash-table-set! *runname-mappers*
-		 'auto
-		 (lambda (target run-name area area-path reason contour mode-patt)
-		   "auto-eh"))
-
-;; (print "Got here!")
+(add-target-mapper 'prefix-contour
+		   (lambda (target run-name area area-path reason contour mode-patt)
+		     (conc contour "/" target)))
+(add-target-mapper 'prefix-area-contour
+		   (lambda (target run-name area area-path reason contour mode-patt)
+		     (conc area "/" contour "/" target)))
+  
+(add-runname-mapper 'corporate-ww
+		    (lambda (target run-name area area-path reason contour mode-patt)
+		      (print "corporate-ww called with: target=" target " run-name=" run-name " area=" area " area-path=" area-path " reason=" reason " contour=" contour " mode-patt=" mode-patt)
+		      (let* ((last-name   (get-last-runname area-path target))
+			     (last-letter (let* ((ch (if (string? last-name)
+							 (let ((len (string-length last-name)))
+							   (substring last-name (- len 1) len))
+							 "a"))
+						 (chnum (str-first-char->number ch))
+						 (a     (str-first-char->number "a"))
+						 (z     (str-first-char->number "z")))
+					    (if (and (>= chnum a)(<= chnum z))
+						chnum
+						#f)))
+			     (next-letter (if last-letter
+					      (list->string
+					       (list
+						(integer->char
+						 (+ last-letter 1)))) ;; surely there is an easier way?
+					      "a")))
+			;; (print "last-name: " last-name " last-letter: " last-letter " next-letter: " next-letter)
+			(conc (seconds->wwdate (current-seconds)) next-letter))))
+
+(add-runname-mapper 'auto
+		    (lambda (target run-name area area-path reason contour mode-patt)
+		      "auto-eh"))
+
+;; run only areas where first letter of area name is "a"
+;;
+(add-area-checker 'first-letter-a
+                  (lambda (area target contour)
+                    (string-match "^a.*$" area)))
+
 

Index: Makefile
==================================================================
--- Makefile
+++ Makefile
@@ -6,14 +6,14 @@
 INSTALL=install
 SRCFILES = common.scm items.scm launch.scm \
    ods.scm runconfig.scm server.scm configf.scm \
    db.scm keys.scm margs.scm megatest-version.scm \
    process.scm runs.scm tasks.scm tests.scm genexample.scm \
-   http-transport.scm filedb.scm \
-   client.scm synchash.scm daemon.scm mt.scm \
+   http-transport.scm filedb.scm tdb.scm \
+   client.scm daemon.scm mt.scm \
    ezsteps.scm lock-queue.scm sdb.scm \
-   rmt.scm api.scm tdb.scm rpc-transport.scm \
+   rmt.scm api.scm \
    portlogger.scm archive.scm env.scm diff-report.scm cgisetup/models/pgdb.scm
 
 # Eggs to install (straightforward ones)
 EGGS=matchable readline apropos base64 regex-literals format regex-case test coops trace csv \
 dot-locking posix-utils posix-extras directory-utils hostinfo tcp-server rpc csv-xml fmt \
@@ -52,10 +52,44 @@
 	csc $(CSCOPTS) $(OFILES) $(GOFILES) newdashboard.scm -o ndboard
 
 mtut: $(OFILES) mtut.scm
 	csc $(CSCOPTS) $(OFILES) mtut.scm -o mtut
 
+TCMTOBJS = \
+	api.o \
+	archive.o \
+	cgisetup/models/pgdb.o \
+	client.o \
+	common.o \
+	configf.o \
+	daemon.o \
+	db.o \
+	env.o \
+	http-transport.o \
+	items.o \
+	keys.o \
+	launch.o \
+	lock-queue.o \
+	margs.o \
+	mt.o \
+	megatest-version.o \
+	ods.o \
+	portlogger.o \
+	process.o \
+	rmt.o \
+	rpc-transport.o \
+	runconfig.o \
+	runs.o \
+	server.o \
+	tasks.o \
+	tdb.o \
+	tests.o \
+
+
+tcmt : $(TCMTOBJS) tcmt.scm
+	csc $(CSCOPTS) $(TCMTOBJS) tcmt.scm -o tcmt
+
 # install documentation to $(PREFIX)/docs
 # DOES NOT REBUILD DOCS
 #
 $(PREFIX)/share/docs/megatest_manual.html : docs/manual/megatest_manual.html
 	mkdir -p $(PREFIX)/share/docs
@@ -114,10 +148,17 @@
 	$(INSTALL) mtut $(PREFIX)/bin/.$(ARCHSTR)/mtut
 
 $(PREFIX)/bin/mtutil : $(PREFIX)/bin/.$(ARCHSTR)/mtut utils/mk_wrapper
 	utils/mk_wrapper $(PREFIX) mtut $(PREFIX)/bin/mtutil
 	chmod a+x $(PREFIX)/bin/mtutil
+
+$(PREFIX)/bin/.$(ARCHSTR)/tcmt : tcmt
+	$(INSTALL) tcmt $(PREFIX)/bin/.$(ARCHSTR)/tcmt
+
+$(PREFIX)/bin/tcmt : $(PREFIX)/bin/.$(ARCHSTR)/tcmt utils/mk_wrapper
+	utils/mk_wrapper $(PREFIX) tcmt $(PREFIX)/bin/tcmt
+	chmod a+x $(PREFIX)/bin/tcmt
 
 # $(PREFIX)/bin/.$(ARCHSTR)/mdboard : multi-dboard
 #	$(INSTALL) multi-dboard $(PREFIX)/bin/.$(ARCHSTR)/mdboard
 
 # $(PREFIX)/bin/mdboard : $(PREFIX)/bin/.$(ARCHSTR)/mdboard  utils/mk_wrapper
@@ -193,11 +234,13 @@
 
 install : $(PREFIX)/bin/.$(ARCHSTR) $(PREFIX)/bin/.$(ARCHSTR)/mtest $(PREFIX)/bin/megatest \
           $(PREFIX)/bin/.$(ARCHSTR)/dboard $(PREFIX)/bin/dashboard $(HELPERS) $(PREFIX)/bin/nbfake \
 	  $(PREFIX)/bin/nbfind $(PREFIX)/bin/loadrunner $(PREFIX)/bin/viewscreen $(PREFIX)/bin/mt_xterm \
 	  $(PREFIX)/share/docs/megatest_manual.html $(PREFIX)/bin/remrun $(PREFIX)/bin/mtutil \
-          $(PREFIX)/share/db/mt-pg.sql $(PREFIX)/bin/.$(ARCHSTR)/ndboard $(PREFIX)/bin/newdashboard
+          $(PREFIX)/share/db/mt-pg.sql $(PREFIX)/bin/.$(ARCHSTR)/ndboard  $(PREFIX)/bin/tcmt
+
+# $(PREFIX)/bin/newdashboard
 
 $(PREFIX)/bin/.$(ARCHSTR) : 
 	mkdir -p $(PREFIX)/bin/.$(ARCHSTR)
 	mkdir -p $(PREFIX)/bin/.$(ARCHSTR)/lib
 
@@ -304,5 +347,9 @@
 	fi
 
 portlogger-example : portlogger-example.scm api.o archive.o client.o common.o configf.o daemon.o dashboard-tests.o db.o dcommon.o ezsteps.o filedb.o genexample.o gutils.o http-transport.o items.o keys.o launch.o lock-queue.o margs.o megatest-version.o mt.o ods.o portlogger.o process.o rmt.o rpc-transport.o runconfig.o runs.o sdb.o server.o synchash.o tasks.o tdb.o tests.o tree.o
 	csc $(CSCOPTS) portlogger-example.scm api.o archive.o client.o common.o configf.o daemon.o dashboard-tests.o db.o dcommon.o ezsteps.o filedb.o genexample.o gutils.o http-transport.o items.o keys.o launch.o lock-queue.o margs.o megatest-version.o mt.o ods.o portlogger.o process.o rmt.o rpc-transport.o runconfig.o runs.o sdb.o server.o synchash.o tasks.o tdb.o tests.o tree.o
 
+# create a pdf dot graphviz diagram from notations in rmt.scm
+rmt.pdf : rmt.scm
+	grep ';;DOT' rmt.scm | sed -e 's/.*;;DOT //' > rmt.dot;dot -Tpdf rmt.dot -o rmt.pdf
+

Index: api.scm
==================================================================
--- api.scm
+++ api.scm
@@ -56,10 +56,11 @@
     get-run-ids-matching-target
     get-runs-by-patt
     get-steps-data
     get-steps-for-test
     read-test-data
+    read-test-data*
     login
     tasks-get-last
     testmeta-get-record
     have-incompletes?
     synchash-get
@@ -126,10 +127,11 @@
    (cond
     ((not (vector? dat))                    ;; it is an error to not receive a vector
      (vector #f (vector #f "remote must be called with a vector")))
     ((> *api-process-request-count* 20) ;; 20)
      (debug:print 0 *default-log-port* "WARNING: api:execute-requests received an overloaded message.")
+     (set! *server-overloaded* #t)
      (vector #f (vector #f 'overloaded))) ;; the inner vector is what gets returned. nope, don't know why. please refactor!
     (else  
      (let* ((cmd-in            (vector-ref dat 0))
             (cmd               (if (symbol? cmd-in)
 				   cmd-in
@@ -152,11 +154,26 @@
                    ;; SERVERS
                    ((start-server)                    (apply server:kind-run params))
                    ((kill-server)                     (set! *server-run* #f))
 
                    ;; TESTS
-                   ((test-set-state-status-by-id)     (apply mt:test-set-state-status-by-id dbstruct params))
+
+                   ;;((test-set-state-status-by-id)     (apply mt:test-set-state-status-by-id dbstruct params))
+                   ;;BB - commented out above because it was calling below, eventually, incorrectly (dbstruct passed to mt:test-set-state-status-by-id, which previosly did more, but now only passes thru to db:set-state-status-and-roll-up-items.
+                   ((test-set-state-status-by-id)
+
+                    ;; (define (db:set-state-status-and-roll-up-items dbstruct run-id test-name item-path state status comment)
+                    (db:set-state-status-and-roll-up-items
+                     dbstruct
+                     (list-ref params 0) ; run-id
+                     (list-ref params 1) ; test-name
+                     #f                  ; item-path
+                     (list-ref params 2) ; state
+                     (list-ref params 3) ; status
+                     (list-ref params 4) ; comment
+                     ))
+                   
                    ((delete-test-records)             (apply db:delete-test-records dbstruct params))
                    ((delete-old-deleted-test-records) (apply db:delete-old-deleted-test-records dbstruct params))
                    ((test-set-state-status)           (apply db:test-set-state-status dbstruct params))
                    ((test-set-top-process-pid)        (apply db:test-set-top-process-pid dbstruct params))
                    ((set-state-status-and-roll-up-items) (apply db:set-state-status-and-roll-up-items dbstruct params))
@@ -193,10 +210,15 @@
                    ;; TASKS
                    ((tasks-add)                 (apply tasks:add dbstruct params))   
                    ((tasks-set-state-given-param-key) (apply tasks:set-state-given-param-key dbstruct params))
                    ((tasks-get-last)            (apply tasks:get-last dbstruct params))
 
+		   ;; NO SYNC DB
+		   ((no-sync-set)               (apply db:no-sync-set         *no-sync-db* params))
+		   ((no-sync-get/default)       (apply db:no-sync-get/default *no-sync-db* params))
+		   ((no-sync-del!)              (apply db:no-sync-del!        *no-sync-db* params))
+		 
                    ;; ARCHIVES
                    ;; ((archive-get-allocations)   
                    ((archive-register-disk)     (apply db:archive-register-disk dbstruct params))
                    ((archive-register-block-name)(apply db:archive-register-block-name dbstruct params))
                    ((archive-allocate-testsuite/area-to-block)(apply db:archive-allocate-testsuite/area-to-block dbstruct block-id testsuite-name areakey))
@@ -257,10 +279,11 @@
                    ((get-steps-data)               (apply db:get-steps-data dbstruct params))
                    ((get-steps-for-test)           (apply db:get-steps-for-test dbstruct params))
 
                    ;; TEST DATA
                    ((read-test-data)               (apply db:read-test-data dbstruct params))
+                   ((read-test-data*)              (apply db:read-test-data* dbstruct params))
 
                    ;; MISC
                    ((get-latest-host-load)         (apply db:get-latest-host-load dbstruct params))
                    ((have-incompletes?)            (apply db:have-incompletes? dbstruct params))
                    ((login)                        (apply db:login dbstruct params))

Index: archive.scm
==================================================================
--- archive.scm
+++ archive.scm
@@ -7,12 +7,11 @@
 ;;  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
 ;;  PURPOSE.
 
 ;;  strftime('%m/%d/%Y %H:%M:%S','now','localtime')
 
-(use sqlite3 srfi-1 posix regex regex-case srfi-69 dot-locking format md5 message-digest srfi-18)
-(import (prefix sqlite3 sqlite3:))
+(use (prefix sqlite3 sqlite3:) srfi-1 posix regex regex-case srfi-69 format md5 message-digest srfi-18)
 
 (declare (unit archive))
 (declare (uses db))
 (declare (uses common))
 
@@ -30,11 +29,11 @@
 	(flavor  'plain) ;; type of machine to run jobs on
 	(maxload 1.5)   ;; max allowed load for this work
 	(adisks  (archive:get-archive-disks)))
     ;; get testdir size
     ;;   - hand off du to job mgr
-    (if (and (file-exists? testdir)
+    (if (and (common:file-exists? testdir)
 	     (file-is-writable? testdir))
 	(let* ((dused  (jobrunner:run-job 
 			flavor  ;; machine type
 			maxload ;; max allowed load
 			'()     ;; prevars - environment vars to set for the job
@@ -137,11 +136,11 @@
 				      (> (rmt:test-toplevel-num-items run-id test-name) 0)))
 	      (test-partial-path (conc target "/" run-name "/" (db:test-make-full-name test-name item-path)))
 	      ;; note the trailing slash to get the dir inspite of it being a link
 	      (test-path         (conc linktree "/" test-partial-path))
 	      (mutex-lock! rp-mutex)
-	      (test-physical-path (if (file-exists? test-path) 
+	      (test-physical-path (if (common:file-exists? test-path) 
 				      (common:real-path test-path)
 				      #f))
 	      (mutex-unlock! rp-mutex)
 	      (partial-path-index (if test-physical-path (substring-index test-partial-path test-physical-path) #f))
 	      (test-base         (if (and partial-path-index 
@@ -152,11 +151,11 @@
 				     #f)))
 	 
  	 (cond
 	  (toplevel/children
 	   (debug:print 0 *default-log-port* "WARNING: cannot archive " test-name " with id " test-id " as it is a toplevel test with children"))
-	  ((not (file-exists? test-path))
+	  ((not (common:file-exists? test-path))
 	   (debug:print 0 *default-log-port* "WARNING: Cannot archive " test-name "/" item-path " as path " test-path " does not exist"))
 	  (else
 	   (debug:print 0 *default-log-port*
 			"From test-dat=" test-dat " derived the following:\n"
 			"test-partial-path  = " test-partial-path "\n"
@@ -180,13 +179,13 @@
 					      (conc "-" compress) ;; or (conc "--compress=" compress)
 					      "-n" (conc (common:get-testsuite-name) "-" run-id)
 					      (conc "--strip-path=" disk-group))
 					test-paths))
 	      (print-prefix      #f)) ;; "Running: ")) ;; change to #f to turn off printing
-	 (if (not (file-exists? archive-dir))
+	 (if (not (common:file-exists? archive-dir))
 	     (create-directory archive-dir #t))
-	 (if (not (file-exists? (conc archive-dir "/HEAD")))
+	 (if (not (common:file-exists? (conc archive-dir "/HEAD")))
 	     (begin
 	       ;; replace this with jobrunner stuff enventually
 	       (debug:print-info 0 *default-log-port* "Init bup in " archive-dir)
 	       ;; (mutex-lock! bup-mutex)
 	       (run-n-wait bup-exe params: bup-init-params print-cmd: print-prefix)
@@ -233,11 +232,11 @@
 	      (test-partial-path (conc target "/" run-name "/" (db:test-make-full-name test-name item-path)))
 	      ;; note the trailing slash to get the dir inspite of it being a link
 	      (test-path         (conc linktree "/" test-partial-path))
 	      ;; if the old path was not deleted then prev-test-physical-path will end up pointing to a real directory
 	      (mutex-lock! rp-mutex)
-	      (prev-test-physical-path (if (file-exists? test-path)
+	      (prev-test-physical-path (if (common:file-exists? test-path)
 					   ;; (read-symbolic-link test-path #t)
 					   (common:real-path test-path)
 					   #f))
 	      (mutex-unlock! rp-mutex)
 	      (new-test-physical-path  (conc best-disk "/" test-partial-path))
@@ -250,11 +249,11 @@
 	 
 	 ;; some sanity checks, move an existing path out of the way - iif it is not a toplevel with children
 	 ;;
 	 (if (and (not toplevel/children)  ;; special handling needed for toplevel with children
 		  prev-test-physical-path
-		  (file-exists? prev-test-physical-path)) ;; what to do? abort or clean up or link it in?
+		  (common:file-exists? prev-test-physical-path)) ;; what to do? abort or clean up or link it in?
 	     (let* ((base (pathname-directory prev-test-physical-path))
 		    (dirn (pathname-file      prev-test-physical-path))
 		    (newn (conc base "/." dirn)))
 	       (debug:print-error 0 *default-log-port* "the old directory " prev-test-physical-path ", still exists! Moving it to " newn)
 	       (rename-file prev-test-physical-path newn)))

Index: client.scm
==================================================================
--- client.scm
+++ client.scm
@@ -10,18 +10,13 @@
 
 ;;======================================================================
 ;; C L I E N T S
 ;;======================================================================
 
-(require-extension (srfi 18) extras tcp s11n)
-
-(use srfi-1 posix regex regex-case srfi-69 hostinfo md5 message-digest matchable)
-;; (use zmq)
-
-(use (prefix sqlite3 sqlite3:))
-
-(use spiffy uri-common intarweb http-client spiffy-request-vars uri-common intarweb directory-utils)
+(use srfi-18 extras tcp s11n srfi-1 posix regex srfi-69 hostinfo md5
+     message-digest matchable spiffy uri-common intarweb http-client
+     spiffy-request-vars uri-common intarweb directory-utils)
 
 (declare (unit client))
 
 (declare (uses common))
 (declare (uses db))

Index: common.scm
==================================================================
--- common.scm
+++ common.scm
@@ -8,20 +8,17 @@
 ;;  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
 ;;  PURPOSE.
 ;;======================================================================
 
 (use srfi-1 posix regex-case base64 format dot-locking csv-xml z3 sql-de-lite hostinfo md5 message-digest typed-records directory-utils stack
-     matchable)
-(require-extension regex posix)
-
-(require-extension (srfi 18) extras tcp rpc)
+     matchable regex posix srfi-18 extras
+     pkts (prefix dbi dbi:))
 
 (import (prefix sqlite3 sqlite3:))
 (import (prefix base64 base64:))
 
 (declare (unit common))
-(declare (uses keys))
 
 (include "common_records.scm")
 
 ;; (require-library margs)
 ;; (include "margs.scm")
@@ -46,11 +43,11 @@
 	  (debug:print-error 0 *default-log-port* "bad value for setenv, key=" key ", value=" val))))
 
 (define home (getenv "HOME"))
 (define user (getenv "USER"))
 
-;; GLOBAL GLETCHES
+;; GLOBALS
 
 ;; CONTEXTS
 (defstruct cxt
   (taskdb #f)
   (cmutex (make-mutex)))
@@ -111,10 +108,12 @@
 (define *db-access-mutex*     (make-mutex))
 (define *db-transaction-mutex* (make-mutex))
 (define *db-cache-path*       #f)
 (define *db-with-db-mutex*    (make-mutex))
 (define *db-api-call-time*    (make-hash-table)) ;; hash of command => (list of times)
+;; no sync db
+(define *no-sync-db*          #f)
 
 ;; SERVER
 (define *my-client-signature* #f)
 (define *transport-type*    'http)             ;; override with [server] transport http|rpc|nmsg
 (define *runremote*         #f)                ;; if set up for server communication this will hold <host port>
@@ -129,10 +128,11 @@
 (define *home-host*         #f)
 ;; (define *total-non-write-delay* 0)
 (define *heartbeat-mutex*   (make-mutex))
 (define *api-process-request-count* 0)
 (define *max-api-process-requests* 0)
+(define *server-overloaded*  #f)
 
 ;; client
 (define *rmt-mutex*         (make-mutex))     ;; remote access calls mutex 
 
 ;; RPC transport
@@ -149,10 +149,13 @@
 
 (define *run-info-cache*     (make-hash-table)) ;; run info is stable, no need to reget
 (define *launch-setup-mutex* (make-mutex))     ;; need to be able to call launch:setup often so mutex it and re-call the real deal only if *toppath* not set
 (define *homehost-mutex*     (make-mutex))
 
+;; Miscellaneous
+(define *triggers-mutex*     (make-mutex))     ;; block overlapping processing of triggers
+
 (defstruct remote
   (hh-dat            (common:get-homehost)) ;; homehost record ( addr . hhflag )
   (server-url        (if *toppath* (server:check-if-running *toppath*))) ;; (server:check-if-running *toppath*) #f))
   (last-server-check 0)  ;; last time we checked to see if the server was alive
   (conndat           #f)
@@ -225,28 +228,40 @@
    (substring (common:get-last-run-version) 0 6)))
 
 (define (common:set-last-run-version)
   (rmt:set-var "MEGATEST_VERSION" (common:version-signature)))
 
+;; postive number if megatest version > db version
+;; negative number if megatest version < db version
+(define (common:version-db-delta)
+         (- megatest-version (common:get-last-run-version-number)))
+
 (define (common:version-changed?)
   (not (equal? (common:get-last-run-version)
-	       (common:version-signature))))
+               (common:version-signature))))
 
+(define (common:api-changed?)
+  (not (equal? (substring (->string megatest-version) 0 4)
+               (substring (conc (common:get-last-run-version)) 0 4))))
+  
 ;; Move me elsewhere ...
 ;; RADT => Why do we meed the version check here, this is called only if version misma
 ;;
-(define (common:cleanup-db dbstruct)
-  (db:multi-db-sync 
+(define (common:cleanup-db dbstruct #!key (full #f))
+  (apply db:multi-db-sync 
    dbstruct
+   'schema
    ;; 'new2old
    'killservers
-   'dejunk
-   ;; 'adj-testids
+   'adj-target
    ;; 'old2new
    'new2old
-   'schema)
-  (if (common:version-changed?)
+   ;; (if full
+       '(dejunk)
+       ;; '())
+       )
+  (if (common:api-changed?)
       (common:set-last-run-version)))
 
 ;; Rotate logs, logic: 
 ;;                 if > 500k and older than 1 week:
 ;;                     remove previous compressed log and compress this log
@@ -266,11 +281,11 @@
                      (> (file-size fullname) 200000))
                 (and (string-match "^server-.*.log" file)
                      (> (- (current-seconds) (file-modification-time fullname))
                         (* 8 60 60))))
             (let ((gzfile (conc fullname ".gz")))
-              (if (file-exists? gzfile)
+              (if (common:file-exists? gzfile)
                   (begin
                     (debug:print-info 0 *default-log-port* "removing " gzfile)
                     (delete-file gzfile)))
               (debug:print-info 0 *default-log-port* "compressing " file)
               (system (conc "gzip " fullname)))
@@ -285,22 +300,22 @@
 ;; Force a megatest cleanup-db if version is changed and skip-version-check not specified
 ;; Do NOT check if not on homehost!
 ;;
 (define (common:exit-on-version-changed)
   (if (common:on-homehost?)
-      (if (common:version-changed?)
+      (if (common:api-changed?)
 	  (let* ((mtconf (conc (get-environment-variable "MT_RUN_AREA_HOME") "/megatest.config"))
                 (dbfile (conc (get-environment-variable "MT_RUN_AREA_HOME") "/megatest.db"))
                 (read-only (not (file-write-access? dbfile)))
-                (dbstruct (db:setup)))
+                (dbstruct (db:setup #t)))
 	    (debug:print 0 *default-log-port*
 			 "WARNING: Version mismatch!\n"
 			 "   expected: " (common:version-signature) "\n"
 			 "   got:      " (common:get-last-run-version))
             (cond
              ((get-environment-variable "MT_SKIP_DB_MIGRATE") #t)
-             ((and (file-exists? mtconf) (file-exists? dbfile) (not read-only)
+             ((and (common:file-exists? mtconf) (common:file-exists? dbfile) (not read-only)
                    (eq? (current-user-id)(file-owner mtconf))) ;; safe to run -cleanup-db
               (debug:print 0 *default-log-port* "   I see you are the owner of megatest.config, attempting to cleanup and reset to new version")
               (handle-exceptions
                exn
                (begin
@@ -307,14 +322,14 @@
                  (debug:print 0 *default-log-port* "Failed to switch versions.")
                  (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
                  (print-call-chain (current-error-port))
                  (exit 1))
                (common:cleanup-db dbstruct)))
-             ((not (file-exists? mtconf))
+             ((not (common:file-exists? mtconf))
               (debug:print 0 *default-log-port* "   megatest.config does not exist in this area.  Cannot proceed with megatest version migration.")
               (exit 1))
-             ((not (file-exists? dbfile))
+             ((not (common:file-exists? dbfile))
               (debug:print 0 *default-log-port* "   megatest.db does not exist in this area.  Cannot proceed with megatest version migration.")
               (exit 1))
              ((not (eq? (current-user-id)(file-owner mtconf)))
               (debug:print 0 *default-log-port* "   You do not own megatest.db in this area.  Cannot proceed with megatest version migration.")
               (exit 1))
@@ -321,14 +336,14 @@
              (read-only
               (debug:print 0 *default-log-port* "   You have read-only access to this area.  Cannot proceed with megatest version migration.")
               (exit 1))
              (else
               (debug:print 0 *default-log-port* " to switch versions you can run: \"megatest -cleanup-db\"")
-              (exit 1)))))
-      (begin
-	(debug:print 0 *default-log-port* "ERROR: cannot migrate version unless on homehost. Exiting.")
-	(exit 1))))
+              (exit 1)))))))
+;;      (begin
+;;	(debug:print 0 *default-log-port* "ERROR: cannot migrate version unless on homehost. Exiting.")
+;;	(exit 1))))
 
 ;;======================================================================
 ;; S P A R S E   A R R A Y S
 ;;======================================================================
 
@@ -424,11 +439,11 @@
 ;;
 (define (common:simple-file-lock fname #!key (expire-time 300))
   (handle-exceptions
       exn
       #f ;; don't really care what went wrong right now. NOTE: I have not seen this one actually fail.
-    (if (file-exists? fname)
+    (if (common:file-exists? fname)
 	(if (> (- (current-seconds)(file-modification-time fname)) expire-time)
 	    (begin
 	      (delete-file* fname)
 	      (common:simple-file-lock fname expire-time: expire-time))
 	    #f)
@@ -435,11 +450,11 @@
 	(let ((key-string (conc (get-host-name) "-" (current-process-id))))
 	  (with-output-to-file fname
 	    (lambda ()
 	      (print key-string)))
 	  (thread-sleep! 0.25)
-	  (if (file-exists? fname)
+	  (if (common:file-exists? fname)
 	      (with-input-from-file fname
 		(lambda ()
 		  (equal? key-string (read-line))))
 	      #f)))))
 
@@ -462,11 +477,12 @@
 
 ;;======================================================================
 ;; S T A T E S   A N D   S T A T U S E S
 ;;======================================================================
 
-(define *common:std-states*   
+;; BBnote: *common:std-states* - dashboard filter control and test control state buttons defined here; used in set-fields-panel and dboard:make-controls
+(define *common:std-states*   ;; for toggle buttons in dashboard
   '((0 "ARCHIVED")
     (1 "STUCK")
     (2 "KILLREQ")
     (3 "KILLED")
     (4 "NOT_STARTED")
@@ -474,18 +490,19 @@
     (6 "LAUNCHED")
     (7 "REMOTEHOSTSTART")
     (8 "RUNNING")
     ))
 
+;; BBnote: *common:std-statuses* dashboard filter control and test control status buttons defined here; used in set-fields-panel and dboard:make-controls
 (define *common:std-statuses*
   '(;; (0 "DELETED")
     (1 "n/a")
     (2 "PASS")
-    (3 "CHECK")
-    (4 "SKIP")
-    (5 "WARN")
-    (6 "WAIVED")
+    (3 "SKIP")
+    (4 "WARN")
+    (5 "WAIVED")
+    (6 "CHECK")
     (7 "STUCK/DEAD")
     (8 "FAIL")
     (9 "ABORT")))
 
 (define *common:ended-states*       ;; states which indicate the test is stopped and will not proceed
@@ -492,12 +509,13 @@
   '("COMPLETED" "ARCHIVED" "KILLED" "KILLREQ" "STUCK" "INCOMPLETE"))
 
 (define *common:badly-ended-states* ;; these roll up as CHECK, i.e. results need to be checked
   '("KILLED" "KILLREQ" "STUCK" "INCOMPLETE" "DEAD"))
 
+;; BBnote: *common:running-states* used from db:set-state-status-and-roll-up-items
 (define *common:running-states*     ;; test is either running or can be run
-  '("RUNNING" "REMOTEHOSTSTART" "LAUNCHED"))
+  '("RUNNING" "REMOTEHOSTSTART" "LAUNCHED" "STARTED"))
 
 (define *common:cant-run-states*    ;; These are stopping conditions that prevent a test from being run
   '("COMPLETED" "KILLED" "UNKNOWN" "INCOMPLETE" "ARCHIVED"))
 
 (define *common:not-started-ok-statuses* ;; if not one of these statuses when in not_started state treat as dead
@@ -588,20 +606,26 @@
           (pathname-file *toppath*)
           #f))) ;; (pathname-file (current-directory)))))
 
 (define common:get-area-name common:get-testsuite-name)
 
-(define (common:get-db-tmp-area)
+(define (common:get-db-tmp-area . junk)
   (if *db-cache-path*
       *db-cache-path*
-      (if *toppath*
-	  (let ((dbpath (create-directory (conc "/tmp/" (current-user-name)
-						"/megatest_localdb/"
-						(common:get-testsuite-name) "/"
-						(string-translate *toppath* "/" ".")) #t)))
-	    (set! *db-cache-path* dbpath)
-	    dbpath)
+      (if *toppath* ;; common:get-create-writeable-dir
+	  (handle-exceptions
+	      exn
+	      (begin
+		(debug:print-error 0 *default-log-port* "Couldn't create path to " dbdir)
+		(exit 1))
+	    (let ((dbpath (common:get-create-writeable-dir
+			   (list (conc "/tmp/" (current-user-name)
+				       "/megatest_localdb/"
+				       (common:get-testsuite-name) "/"
+				       (string-translate *toppath* "/" ".")))))) ;;  #t))))
+	      (set! *db-cache-path* dbpath)
+	      dbpath))
 	  #f)))
 
 (define (common:get-area-path-signature)
   (message-digest-string (md5-primitive) *toppath*))
 
@@ -618,23 +642,10 @@
 
 ;;   (let ((ohh (common:on-homehost?))
 ;; 	(srv (args:get-arg "-server")))
 ;;     (and ohh srv)))
     ;; (debug:print-info 0 *default-log-port* "common:run-sync? ohh=" ohh ", srv=" srv)
-
-;;;; run-ids
-;;    if #f use *db-local-sync* : or 'local-sync-flags
-;;    if #t use timestamps      : or 'timestamps
-(define (common:sync-to-megatest.db dbstruct) 
-  (let ((start-time         (current-seconds))
-	(res                (db:multi-db-sync dbstruct 'new2old)))
-    (let ((sync-time (- (current-seconds) start-time)))
-      (debug:print-info 3 *default-log-port* "Sync of newdb to olddb completed in " sync-time " seconds pid="(current-process-id))
-      (if (common:low-noise-print 30 "sync new to old")
-	  (debug:print-info 0 *default-log-port* "Sync of newdb to olddb completed in " sync-time " seconds pid="(current-process-id))))
-    res))
-
 
 
 
 (define *wdnum* 0)
 (define *wdnum*mutex (make-mutex))
@@ -661,102 +672,34 @@
                  (< duration-since-last-sync sync-cool-off-duration))
             (thread-sleep! (- sync-cool-off-duration duration-since-last-sync)))
         (if (not *time-to-exit*)
             (let ((golden-mtdb-mtime (file-modification-time golden-mtpath))
                   (tmp-mtdb-mtime    (file-modification-time tmp-mtpath)))
-              (if (> golden-mtdb-mtime tmp-mtdb-mtime)
-                  (let ((res (db:multi-db-sync dbstruct 'old2new)))
-                    (debug:print-info 13 *default-log-port* "rosync called, " res " records transferred.")))
+	      (if (> golden-mtdb-mtime tmp-mtdb-mtime)
+		  (if (< golden-mtdb-mtime (- (current-seconds) 3)) ;; file has NOT been touched in past three seconds, this way multiple servers won't fight to sync back
+		      (let ((res (db:multi-db-sync dbstruct 'old2new)))
+			(debug:print-info 13 *default-log-port* "rosync called, " res " records transferred."))))
               (loop (current-seconds)))
             #t)))
     (debug:print-info 0 *default-log-port* "Exiting readonly-watchdog timer, *time-to-exit* = " *time-to-exit*" pid="(current-process-id)" mtpath="golden-mtpath)))
 
-
-        
-(define (common:writable-watchdog dbstruct)
-  (thread-sleep! 0.05) ;; delay for startup
-  (let ((legacy-sync (common:run-sync?))
-	(debug-mode  (debug:debug-mode 1))
-	(last-time   (current-seconds))
-        (this-wd-num     (begin (mutex-lock! *wdnum*mutex) (let ((x *wdnum*)) (set! *wdnum* (add1 *wdnum*)) (mutex-unlock! *wdnum*mutex) x))))
-    (debug:print-info 2 *default-log-port* "Periodic sync thread started.")
-    (debug:print-info 3 *default-log-port* "watchdog starting. legacy-sync is " legacy-sync" pid="(current-process-id)" this-wd-num="this-wd-num)
-    (if (and legacy-sync (not *time-to-exit*))
-	(let* (;;(dbstruct (db:setup))
-	       (mtdb     (dbr:dbstruct-mtdb dbstruct))
-	       (mtpath   (db:dbdat-get-path mtdb)))
-	  (debug:print-info 0 *default-log-port* "Server running, periodic sync started.")
-	  (let loop ()
-	    ;; sync for filesystem local db writes
-	    ;;
-	    (mutex-lock! *db-multi-sync-mutex*)
-	    (let* ((need-sync        (>= *db-last-access* *db-last-sync*)) ;; no sync since last write
-		   (sync-in-progress *db-sync-in-progress*)
-		   (should-sync      (and (not *time-to-exit*)
-                                          (> (- (current-seconds) *db-last-sync*) 5))) ;; sync every five seconds minimum
-		   (start-time       (current-seconds))
-		   (mt-mod-time      (file-modification-time mtpath))
-		   (recently-synced  (< (- start-time mt-mod-time) 4))
-		   (will-sync        (and (or need-sync should-sync)
-					  (not sync-in-progress)
-					  (not recently-synced))))
-              (debug:print-info 13 *default-log-port* "WD writable-watchdog top of loop.  need-sync="need-sync" sync-in-progress="sync-in-progress" should-sync="should-sync" start-time="start-time" mt-mod-time="mt-mod-time" recently-synced="recently-synced" will-sync="will-sync)
-	      ;; (if recently-synced (debug:print-info 0 *default-log-port* "Skipping sync due to recently-synced flag=" recently-synced))
-	      ;; (debug:print-info 0 *default-log-port* "need-sync: " need-sync " sync-in-progress: " sync-in-progress " should-sync: " should-sync " will-sync: " will-sync)
-	      (if will-sync (set! *db-sync-in-progress* #t))
-	      (mutex-unlock! *db-multi-sync-mutex*)
-	      (if will-sync
-		  (let ((res (common:sync-to-megatest.db dbstruct))) ;; did we sync any data? If so need to set the db touched flag to keep the server alive
-		    (if (> res 0) ;; some records were transferred, keep the db alive
-			(begin
-			  (mutex-lock! *heartbeat-mutex*)
-			  (set! *db-last-access* (current-seconds))
-			  (mutex-unlock! *heartbeat-mutex*)
-			  (debug:print-info 0 *default-log-port* "sync called, " res " records transferred."))
-			(debug:print-info 2 *default-log-port* "sync called but zero records transferred"))))
-	      (if will-sync
-		  (begin
-		    (mutex-lock! *db-multi-sync-mutex*)
-		    (set! *db-sync-in-progress* #f)
-		    (set! *db-last-sync* start-time)
-		    (mutex-unlock! *db-multi-sync-mutex*)))
-	      (if (and debug-mode
-		       (> (- start-time last-time) 60))
-		  (begin
-		    (set! last-time start-time)
-		    (debug:print-info 4 *default-log-port* "timestamp -> " (seconds->time-string (current-seconds)) ", time since start -> " (seconds->hr-min-sec (- (current-seconds) *time-zero*))))))
-	    
-	    ;; keep going unless time to exit
-	    ;;
-	    (if (not *time-to-exit*)
-		(let delay-loop ((count 0))
-                  ;;(debug:print-info 13 *default-log-port* "delay-loop top; count="count" pid="(current-process-id)" this-wd-num="this-wd-num" *time-to-exit*="*time-to-exit*)
-                                                            
-		  (if (and (not *time-to-exit*)
-			   (< count 4)) ;; was 11, changing to 4. 
-		      (begin
-			(thread-sleep! 1)
-			(delay-loop (+ count 1))))
-		  (if (not *time-to-exit*) (loop))))
-	    (if (common:low-noise-print 30)
-		(debug:print-info 0 *default-log-port* "Exiting watchdog timer, *time-to-exit* = " *time-to-exit*" pid="(current-process-id)" this-wd-num="this-wd-num)))))))
-
 ;; TODO: for multiple areas, we will have multiple watchdogs; and multiple threads to manage
 (define (common:watchdog)
   (debug:print-info 13 *default-log-port* "common:watchdog entered.")
-  (if (common:on-homehost?)
-      (let ((dbstruct (db:setup)))
-	(debug:print-info 13 *default-log-port* "after db:setup with dbstruct="dbstruct)
-	(cond
-	 ((dbr:dbstruct-read-only dbstruct)
-	  (debug:print-info 13 *default-log-port* "loading read-only watchdog")
-	  (common:readonly-watchdog dbstruct))
-	 (else
-	  (debug:print-info 13 *default-log-port* "loading writable-watchdog.")
-	  (common:writable-watchdog dbstruct)))
-	(debug:print-info 13 *default-log-port* "watchdog done."))
-      (debug:print-info 13 *default-log-port* "no need for watchdog on non-homehost")))
+  (if (launch:setup)
+      (if (common:on-homehost?)
+	  (let ((dbstruct (db:setup #t)))
+	    (debug:print-info 13 *default-log-port* "after db:setup with dbstruct=" dbstruct)
+	    (cond
+	     ((dbr:dbstruct-read-only dbstruct)
+	      (debug:print-info 13 *default-log-port* "loading read-only watchdog")
+	      (common:readonly-watchdog dbstruct))
+	     (else
+	      (debug:print-info 13 *default-log-port* "loading writable-watchdog.")
+	      (server:writable-watchdog dbstruct)))
+	    (debug:print-info 13 *default-log-port* "watchdog done."))
+	  (debug:print-info 13 *default-log-port* "no need for watchdog on non-homehost"))))
 
 
 (define (std-exit-procedure)
   (on-exit (lambda () 0))
   ;;(debug:print-info 13 *default-log-port* "std-exit-procedure called; *time-to-exit*="*time-to-exit*)
@@ -776,14 +719,15 @@
 					(begin
 					  (sqlite3:interrupt! db)
 					  (sqlite3:finalize! db #t)
 					  ;; (vector-set! *task-db* 0 #f)
 					  (set! *task-db* #f)))))
-                              (if (and *runremote*
-                                       (remote-conndat *runremote*))
-                                  (begin
-                                    (http-client#close-all-connections!))) ;; for http-client
+                              (http-client#close-all-connections!)
+                              ;; (if (and *runremote*
+                              ;;          (remote-conndat *runremote*))
+                              ;;     (begin
+                              ;;       (http-client#close-all-connections!))) ;; for http-client
                               (if (not (eq? *default-log-port* (current-error-port)))
                                   (close-output-port *default-log-port*))
 			      (set! *default-log-port* (current-error-port))) "Cleanup db exit thread"))
 	  (th2 (make-thread (lambda ()
 			      (debug:print 4 *default-log-port* "Attempting clean exit. Please be patient and wait a few seconds...")
@@ -858,19 +802,19 @@
       #f
       (let loop ((hed (car cmds))
 		 (tal (cdr cmds)))
 	(let ((res (with-input-from-pipe (conc "which " hed) read-line)))
 	  (if (and (string? res)
-		   (file-exists? res))
+		   (common:file-exists? res))
 	      res
 	      (if (null? tal)
 		  #f
 		  (loop (car tal)(cdr tal))))))))
   
 (define (common:get-install-area)
   (let ((exe-path (car (argv))))
-    (if (file-exists? exe-path)
+    (if (common:file-exists? exe-path)
 	(handle-exceptions
 	 exn
 	 #f
 	 (pathname-directory
 	  (pathname-directory 
@@ -886,12 +830,14 @@
 		 (tal (cdr dirs)))
 	(let ((res (or (and (directory? hed)
 			    (file-write-access? hed)
 			    hed)
 		       (handle-exceptions
-			exn
-			#f
+			   exn
+			   (begin
+			     (debug:print-info 0 *default-log-port* "could not create " hed ", this might cause problems down the road.")
+			     #f)
 			(create-directory hed #t)))))
 	  (if (and (string? res)
 		   (directory? res))
 	      res
 	      (if (null? tal)
@@ -969,10 +915,14 @@
          ;; (tags-testpatt (if tagexpr (string-join (runs:get-tests-matching-tags tagexpr) ",") #f))
          (testpatt-key  (if (args:get-arg "--modepatt") (args:get-arg "--modepatt") "TESTPATT"))
          (args-testpatt (or (args:get-arg "-testpatt") (args:get-arg "-runtests") "%"))
          (rtestpatt     (if rconf (runconfigs-get rconf testpatt-key) #f)))
     (cond
+     ((args:get-arg "--modepatt") ;; modepatt is a forced setting, when set it MUST refer to an existing PATT in the runconfig
+      (if rconf
+	  (runconfigs-get rconf testpatt-key)
+	  #f))     ;; We do NOT fall back to "%"
      ;; (tags-testpatt
      ;;  (debug:print-info 0 *default-log-port* "-tagexpr "tagexpr" selects testpatt "tags-testpatt)
      ;;  tags-testpatt)
      ((and (equal? args-testpatt "%") rtestpatt)
       (debug:print-info 0 *default-log-port* "testpatt defined in "testpatt-key" from runconfigs: " rtestpatt)
@@ -1000,30 +950,44 @@
   ;;;; TODO: catch permission denied exceptions and emit appropriate warnings, eg:  system error while trying to access file: "/nfs/pdx/disks/icf_env_disk001/bjbarcla/gwa/issues/mtdev/randy-slow/reproduce/q...
   (common:false-on-exception (lambda () (directory-exists? path-string))
                              message: (conc "Unable to access path: " path-string)
                              ))
 
+;; does the directory exist and do we have write access?
+;;
+;;    returns the directory or #f
+;;
+(define (common:directory-writable? path-string)
+  (handle-exceptions
+   exn
+   #f
+   (if (and (directory-exists? path-string)
+            (file-write-access? path-string))
+       path-string
+       #f)))
 
 (define (common:get-linktree)
   (or (getenv "MT_LINKTREE")
-      (or (and *configdat*
-	       (configf:lookup *configdat* "setup" "linktree"))
+      (if *configdat*
+	  (configf:lookup *configdat* "setup" "linktree")
 	  (if *toppath*
 	      (conc *toppath* "/lt")
-	      (if (file-exists? "megatest.config") ;; we are in the toppath (new area, mtutils compatible)
-		  (conc (current-directory) "/lt")
-		  #f)))))
+	      #f))))
 
 (define (common:args-get-runname)
   (let ((res (or (args:get-arg "-runname")
 		 (args:get-arg ":runname")
 		 (getenv "MT_RUNNAME"))))
     ;; (if res (set-environment-variable "MT_RUNNAME" res)) ;; not sure if this is a good idea. side effect and all ...
     res))
+
+(define (common:get-fields cfgdat)
+  (let ((fields (hash-table-ref/default cfgdat "fields" '())))
+    (map car fields)))
 
 (define (common:args-get-target #!key (split #f)(exit-if-bad #f))
-  (let* ((keys    (if (hash-table? *configdat*) (keys:config-get-fields *configdat*) '()))
+  (let* ((keys    (if (hash-table? *configdat*) (common:get-fields *configdat*) '()))
 	 (numkeys (length keys))
 	 (target  (or (args:get-arg "-reqtarg")
 		      (args:get-arg "-target")
 		      (getenv "MT_TARGET")))
 	 (tlist   (if target (string-split target "/" #t) '()))
@@ -1041,10 +1005,20 @@
 	    (begin
 	      (debug:print-error 0 *default-log-port* "Invalid target, spaces or blanks not allowed \"" target "\", target should be: " (string-intersperse keys "/") ", have " tlist " for elements")
 	      (if exit-if-bad (exit 1))
 	      #f)
 	    #f))))
+
+;; looking only (at least for now) at the MT_ variables craft the full testname
+;;
+(define (common:get-full-test-name)
+  (if (getenv "MT_TEST_NAME")
+      (if (and (getenv "MT_ITEMPATH")
+               (not (equal? (getenv "MT_ITEMPATH") "")))
+          (getenv "MT_TEST_NAME")
+          (conc (getenv "MT_TEST_NAME") "/" (getenv "MT_ITEMPATH")))
+      #f))
 
 ;; logic for getting homehost. Returns (host . at-home)
 ;; IF *toppath* is not set, wait up to five seconds trying every two seconds
 ;; (this is to accomodate the watchdog)
 ;;
@@ -1079,11 +1053,11 @@
 				 (begin
 				   (mutex-unlock! *homehost-mutex*)
 				   (debug:print 0 *default-log-port* "ERROR: Failed to read .homehost file after trying five times. Giving up and exiting, message: "  ((condition-property-accessor 'exn 'message) exn))
 				   (exit 1)))
 			   (let ((hhf (conc *toppath* "/.homehost")))
-			     (if (file-exists? hhf)
+			     (if (common:file-exists? hhf)
 				 (with-input-from-file hhf read-line)
 				 (if (file-write-access? *toppath*)
 				     (begin
 				       (with-output-to-file hhf
 					 (lambda ()
@@ -1107,14 +1081,24 @@
 	#f)))
 
 ;; do we honor the caches of the config files?
 ;;
 (define (common:use-cache?)
-  (not (or (args:get-arg "-no-cache")
-	   (and *configdat*
-		(equal? (configf:lookup *configdat* "setup" "use-cache") "no")))))
-
+  (let ((res #t)) ;; priority by order of evaluation
+    (if *configdat* ;; sillyness here. can't use setup/use-cache to know if we can use the cached files!
+	(if (equal? (configf:lookup *configdat* "setup" "use-cache") "no")
+	    (set! res #f)
+	    (if (equal? (configf:lookup *configdat* "setup" "use-cache") "yes")
+		(set! res #t))))
+    (if (args:get-arg "-no-cache")(set! res #f)) ;; overrides setting in "setup"
+    (if (getenv "MT_USE_CACHE")
+	(if (equal? (getenv "MT_USE_CACHE") "yes")
+	    (set! res #t)
+	    (if (equal? (getenv "MT_USE_CACHE") "no")
+		(set! res #f))))    ;; overrides -no-cache switch
+    res))
+  
 ;; force use of server?
 ;;
 (define (common:force-server?)
   (let* ((force-setting (configf:lookup *configdat* "server" "force"))
 	 (force-type    (if force-setting (string->symbol force-setting) #f))
@@ -1131,17 +1115,10 @@
 	(begin
 	  (debug:print-info 0 *default-log-port* "forcing use of server, force setting is \"" force-setting "\".")
 	  #t)
 	#f)))
 
-;; do we honor the caches of the config files?
-;;
-(define (common:use-cache?)
-  (not (or (args:get-arg "-no-cache")
-	   (and *configdat*
-		(equal? (configf:lookup *configdat* "setup" "use-cache") "no")))))
-
 ;;======================================================================
 ;; M I S C   L I S T S
 ;;======================================================================
 
 ;; items in lista are matched value and position in listb
@@ -1509,31 +1486,40 @@
            (set! best-load load)
            (set! best-host hostname)))))
      hosts)
     best-host))
 
-
-
-
-(define (common:wait-for-cpuload maxload numcpus waitdelay #!key (count 1000) (msg #f)(remote-host #f))
+(define (common:wait-for-cpuload maxload-in numcpus-in waitdelay #!key (count 1000) (msg #f)(remote-host #f))
   (let* ((loadavg (common:get-cpu-load remote-host))
+	 (numcpus (if (< 1 numcpus-in) ;; not possible
+		      (common:get-num-cpus remote-host)
+		      numcpus-in))
+	 (maxload (max maxload-in 0.5)) ;; so maxload must be greater than 0.5 for now BUG - FIXME?
 	 (first   (car loadavg))
 	 (next    (cadr loadavg))
-	 (adjload (* maxload numcpus))
+	 (adjload (* maxload (max 1 numcpus))) ;; possible bug where numcpus (or could be maxload) is zero, crude fallback is to at least use 1
 	 (loadjmp (- first next)))
     (cond
      ((and (> first adjload)
 	   (> count 0))
-      (debug:print-info 0 *default-log-port* "waiting " waitdelay " seconds due to load " first " exceeding max of " adjload (if msg msg ""))
+      (debug:print-info 0 *default-log-port* "server start delayed " waitdelay " seconds due to load " first " exceeding max of " adjload " (normalized load-limit: " maxload ") " (if msg msg ""))
       (thread-sleep! waitdelay)
       (common:wait-for-cpuload maxload numcpus waitdelay count: (- count 1)))
      ((and (> loadjmp numcpus)
 	   (> count 0))
       (debug:print-info 0 *default-log-port* "waiting " waitdelay " seconds due to load jump " loadjmp " > numcpus " numcpus (if msg msg ""))
       (thread-sleep! waitdelay)
       (common:wait-for-cpuload maxload numcpus waitdelay count: (- count 1))))))
 
+(define (common:wait-for-homehost-load maxload msg)
+  (let* ((hh-dat (if (common:on-homehost?) ;; if we are on the homehost then pass in #f so the calls are local.
+                     #f
+                     (common:get-homehost)))
+         (hh     (if hh-dat (car hh-dat) #f))
+         (numcpus (common:get-num-cpus hh)))
+    (common:wait-for-normalized-load maxload msg: msg remote-host: hh)))
+
 (define (common:get-num-cpus remote-host)
   (let ((proc (lambda ()
 		(let loop ((numcpu 0)
 			   (inl    (read-line)))
 		  (if (eof-object? inl)
@@ -1550,11 +1536,11 @@
 
 ;; wait for normalized cpu load to drop below maxload
 ;;
 (define (common:wait-for-normalized-load maxload #!key (msg #f)(remote-host #f))
   (let ((num-cpus (common:get-num-cpus remote-host)))
-    (common:wait-for-cpuload maxload num-cpus 15 msg: msg)))
+    (common:wait-for-cpuload maxload num-cpus 15 msg: msg remote-host: remote-host)))
 
 (define (get-uname . params)
   (let* ((uname-res (process:cmd-run->list (conc "uname " (if (null? params) "-a" (car params)))))
 	 (uname #f))
     (if (null? (car uname-res))
@@ -1708,11 +1694,12 @@
 		      (let* ((key   (car keyval))
 			     (val   (cdr keyval))
 			     (delim (if (string-search whitesp val) 
 					"\""
 					"")))
-			(print (if (member key ignorevars)
+			(print (if (or (member key ignorevars)
+				       (string-search whitesp key))
 				   "# setenv "
 				   "setenv ")
 			       key " " delim (mungeval val) delim)))
 		    envvars)))
      (with-output-to-file (conc fname ".sh")
@@ -1722,10 +1709,11 @@
 			     (val (cdr keyval))
 			     (delim (if (string-search whitesp val) 
 					"\""
 					"")))
 			(print (if (or (member key ignorevars)
+				       (string-search whitesp key)
 				       (string-search ":" key)) ;; internal only values to be skipped.
 				   "# export "
 				   "export ")
 			       key "=" delim (mungeval val) delim)))
                     envvars)))))
@@ -1739,11 +1727,11 @@
 		    (let* ((var (car  p))
 			   (val (cadr p))
 			   (prv (get-environment-variable var)))
 		      (set! res (cons (list var prv) res))
 		      (if val 
-			  (setenv var (->string val))
+			  (safe-setenv var (->string val))
 			  (unsetenv var))))
 		  lst)
 	res)
       '()))
 
@@ -2091,21 +2079,29 @@
           (number->string x 16))
         (map string->number
              (string-split instr)))
    "/"))
 
-(define (common:faux-lock keyname)
-  (if (rmt:get-var keyname)
-      #f
+(define (common:faux-lock keyname #!key (wait-time 8))
+  (if (rmt:no-sync-get/default keyname #f) ;; do not be tempted to compare to pid. locking is a one-shot action, if already locked for this pid it doesn't actually count
+      (if (> wait-time 0)
+	  (begin
+	    (thread-sleep! 1)
+	    (if (eq? wait-time 1) ;; only one second left, steal the lock
+		(begin
+		  (debug:print-info 0 *default-log-port* "stealing lock for " keyname)
+		  (common:faux-unlock keyname force: #t)))
+	    (common:faux-lock keyname wait-time: (- wait-time 1)))
+	  #f)
       (begin
-        (rmt:set-var keyname (conc (current-process-id)))
-        (equal? (conc (current-process-id)) (conc (rmt:get-var keyname))))))
+        (rmt:no-sync-set keyname (conc (current-process-id)))
+        (equal? (conc (current-process-id)) (conc (rmt:no-sync-get/default keyname #f))))))
 
 (define (common:faux-unlock keyname #!key (force #f))
-  (if (or force (equal? (conc (current-process-id)) (conc (rmt:get-var keyname))))
+  (if (or force (equal? (conc (current-process-id)) (conc (rmt:no-sync-get/default keyname #f))))
       (begin
-        (if (rmt:get-var keyname) (rmt:del-var keyname))
+        (if (rmt:no-sync-get/default keyname #f) (rmt:no-sync-del! keyname))
         #t)
       #f))
 
   
 (define (common:in-running-test?)
@@ -2120,93 +2116,81 @@
    ((equal? status "KILLREQ") "purple")
    ((equal? status "RUNNING") "blue")
    ((equal? status "ABORT")   "brown")
    (else "black")))
 
-;;======================================================================
-;; N A N O M S G   C L I E N T
-;;======================================================================
-
-(define (server:get-best-guess-address hostname)
-  (let ((res #f))
-    (for-each 
-     (lambda (adr)
-       (if (not (eq? (u8vector-ref adr 0) 127))
-	   (set! res adr)))
-     ;; NOTE: This can fail when there is no mention of the host in /etc/hosts. FIXME
-     (vector->list (hostinfo-addresses (hostname->hostinfo hostname))))
-    (string-intersperse 
-     (map number->string
-	  (u8vector->list
-	   (if res res (hostname->ip hostname)))) ".")))
-
-
-(define (common:send-dboard-main-changed)
-  (let* ((dashboard-ips (mddb:get-dashboards)))
-    (for-each
-     (lambda (ipadr)
-       (let* ((soc (common:open-nm-req (conc "tcp://" ipadr)))
-	      (msg (conc "main " *toppath*))
-	      (res (common:nm-send-receive-timeout soc msg)))
-	 (if (not res) ;; couldn't reach that dashboard - remove it from db
-	     (print "ERROR: couldn't reach dashboard " ipadr))
-	 res))
-     dashboard-ips)))
-    
-    
-;;======================================================================
-;; D A S H B O A R D   D B 
-;;======================================================================
-
-(define (mddb:open-db)
-  (let* ((db (open-database (conc (get-environment-variable "HOME") "/.dashboard.db"))))
-    (set-busy-handler! db (busy-timeout 10000))
-    (for-each
-     (lambda (qry)
-       (exec (sql db qry)))
-     (list 
-      "CREATE TABLE IF NOT EXISTS vars       (id INTEGER PRIMARY KEY,key TEXT, val TEXT, CONSTRAINT varsconstraint UNIQUE (key));"
-      "CREATE TABLE IF NOT EXISTS dashboards (
-          id         INTEGER PRIMARY KEY,
-          pid        INTEGER,
-          username   TEXT,
-          hostname   TEXT,
-          ipaddr     TEXT,
-          portnum    INTEGER,
-          start_time TIMESTAMP DEFAULT (strftime('%s','now')),
-             CONSTRAINT hostport UNIQUE (hostname,portnum)
-        );"
-      ))
-    db))
-
-;; register a dashboard 
-;;
-(define (mddb:register-dashboard port)
-  (let* ((pid      (current-process-id))
-	 (hostname (get-host-name))
-	 (ipaddr   (server:get-best-guess-address hostname))
-	 (username (current-user-name)) ;; (car userinfo)))
-	 (db      (mddb:open-db)))
-    (print "Register monitor, pid: " pid ", hostname: " hostname ", port: " port ", username: " username)
-    (exec (sql db "INSERT OR REPLACE INTO dashboards (pid,username,hostname,ipaddr,portnum) VALUES (?,?,?,?,?);")
-	   pid username hostname ipaddr port)
-    (close-database db)))
-
-;; unregister a monitor
-;;
-(define (mddb:unregister-dashboard host port)
-  (let* ((db      (mddb:open-db)))
-    (print "Register unregister monitor, host:port=" host ":" port)
-    (exec (sql db "DELETE FROM dashboards WHERE hostname=? AND portnum=?;") host port)
-    (close-database db)))
-
-;; get registered dashboards
-;;
-(define (mddb:get-dashboards)
-  (let ((db (mddb:open-db)))
-    (query fetch-column
-	   (sql db "SELECT ipaddr || ':' || portnum FROM dashboards;"))))
+;; ;;======================================================================
+;; ;; N A N O M S G   C L I E N T
+;; ;;======================================================================
+;; 
+;; 
+;; 
+;; (define (common:send-dboard-main-changed)
+;;   (let* ((dashboard-ips (mddb:get-dashboards)))
+;;     (for-each
+;;      (lambda (ipadr)
+;;        (let* ((soc (common:open-nm-req (conc "tcp://" ipadr)))
+;; 	      (msg (conc "main " *toppath*))
+;; 	      (res (common:nm-send-receive-timeout soc msg)))
+;; 	 (if (not res) ;; couldn't reach that dashboard - remove it from db
+;; 	     (print "ERROR: couldn't reach dashboard " ipadr))
+;; 	 res))
+;;      dashboard-ips)))
+;;     
+;;     
+;; ;;======================================================================
+;; ;; D A S H B O A R D   D B 
+;; ;;======================================================================
+;; 
+;; (define (mddb:open-db)
+;;   (let* ((db (open-database (conc (get-environment-variable "HOME") "/.dashboard.db"))))
+;;     (set-busy-handler! db (busy-timeout 10000))
+;;     (for-each
+;;      (lambda (qry)
+;;        (exec (sql db qry)))
+;;      (list 
+;;       "CREATE TABLE IF NOT EXISTS vars       (id INTEGER PRIMARY KEY,key TEXT, val TEXT, CONSTRAINT varsconstraint UNIQUE (key));"
+;;       "CREATE TABLE IF NOT EXISTS dashboards (
+;;           id         INTEGER PRIMARY KEY,
+;;           pid        INTEGER,
+;;           username   TEXT,
+;;           hostname   TEXT,
+;;           ipaddr     TEXT,
+;;           portnum    INTEGER,
+;;           start_time TIMESTAMP DEFAULT (strftime('%s','now')),
+;;              CONSTRAINT hostport UNIQUE (hostname,portnum)
+;;         );"
+;;       ))
+;;     db))
+;; 
+;; ;; register a dashboard 
+;; ;;
+;; (define (mddb:register-dashboard port)
+;;   (let* ((pid      (current-process-id))
+;; 	 (hostname (get-host-name))
+;; 	 (ipaddr   (server:get-best-guess-address hostname))
+;; 	 (username (current-user-name)) ;; (car userinfo)))
+;; 	 (db      (mddb:open-db)))
+;;     (print "Register monitor, pid: " pid ", hostname: " hostname ", port: " port ", username: " username)
+;;     (exec (sql db "INSERT OR REPLACE INTO dashboards (pid,username,hostname,ipaddr,portnum) VALUES (?,?,?,?,?);")
+;; 	   pid username hostname ipaddr port)
+;;     (close-database db)))
+;; 
+;; ;; unregister a monitor
+;; ;;
+;; (define (mddb:unregister-dashboard host port)
+;;   (let* ((db      (mddb:open-db)))
+;;     (print "Register unregister monitor, host:port=" host ":" port)
+;;     (exec (sql db "DELETE FROM dashboards WHERE hostname=? AND portnum=?;") host port)
+;;     (close-database db)))
+;; 
+;; ;; get registered dashboards
+;; ;;
+;; (define (mddb:get-dashboards)
+;;   (let ((db (mddb:open-db)))
+;;     (query fetch-column
+;; 	   (sql db "SELECT ipaddr || ':' || portnum FROM dashboards;"))))
     
 ;;======================================================================
 ;;  T E S T   L A U N C H I N G   P E R   I T E M   W I T H   H O S T   T Y P E S
 ;;======================================================================
 ;; 
@@ -2271,12 +2255,113 @@
 ;;
 (define (common:load-views-config)
   (let* ((view-cfgdat    (make-hash-table))
 	 (home-cfgfile   (conc (get-environment-variable "HOME") "/.mtviews.config"))
 	 (mthome-cfgfile (conc *toppath* "/.mtviews.config")))
-    (if (file-exists? mthome-cfgfile)
+    (if (common:file-exists? mthome-cfgfile)
 	(read-config mthome-cfgfile view-cfgdat #t))
     ;; we load the home dir file AFTER the MTRAH file so the user can clobber settings when running the dashboard in read-only areas
-    (if (file-exists? home-cfgfile)
+    (if (common:file-exists? home-cfgfile)
 	(read-config home-cfgfile view-cfgdat #t))
     view-cfgdat))
+
+;;======================================================================
+;; Manage pkts, used in servers, tests and likely other contexts so put
+;; in common
+;;======================================================================
+
+(define common:pkt-spec
+  '((server . ((action    . a)
+	       (pid       . d)
+	       (ipaddr    . i)
+	       (port      . p)))
+    			  
+    (test   . ((cpuuse    . c)
+	       (diskuse   . d)
+	       (item-path . i)
+	       (runname   . r)
+	       (state     . s)
+	       (target    . t)
+	       (status    . u)))))
+
+(define (common:get-pkts-dirs mtconf use-lt)
+  (let* ((pktsdirs-str (or (configf:lookup mtconf "setup"  "pktsdirs")
+			   (and use-lt
+				(conc *toppath* "/lt/.pkts"))))
+	 (pktsdirs  (if pktsdirs-str
+			(string-split pktsdirs-str " ")
+			#f)))
+    pktsdirs))
+
+;; use-lt is use linktree "lt" link to find pkts dir
+(define (common:with-queue-db mtconf proc #!key (use-lt #f)(toppath-in #f))
+  (let* ((pktsdirs (common:get-pkts-dirs mtconf use-lt))
+	 (pktsdir  (if pktsdirs (car pktsdirs) #f))
+	 (toppath  (or (configf:lookup mtconf "scratchdat" "toppath")
+		       toppath-in))
+	 (pdbpath  (or (configf:lookup mtconf "setup"  "pdbpath") pktsdir)))
+    (cond
+     ((not (and  pktsdir toppath pdbpath))
+      (debug:print 0 *default-log-port* "ERROR: settings are missing in your megatest.config for area management.")
+      (debug:print  0 *default-log-port* "  you need to have pktsdir in the [setup] section."))
+     ((not (common:file-exists? pktsdir))
+      (debug:print 0 *default-log-port* "ERROR: pkts directory not found " pktsdir))
+     ((not (equal? (file-owner pktsdir)(current-effective-user-id)))
+      (debug:print 0 *default-log-port* "ERROR: directory " pktsdir " is not owned by " (current-effective-user-name)))
+     (else
+	(let* ((pdb  (open-queue-db pdbpath "pkts.db"
+				    schema: '("CREATE TABLE groups (id INTEGER PRIMARY KEY,groupname TEXT, CONSTRAINT group_constraint UNIQUE (groupname));"))))
+	  (proc pktsdirs pktsdir pdb)
+	  (dbi:close pdb))))))
+
+(define (common:load-pkts-to-db mtconf)
+  (common:with-queue-db
+   mtconf
+   (lambda (pktsdirs pktsdir pdb)
+     (for-each
+      (lambda (pktsdir) ;; look at all
+	(cond
+	 ((not (common:file-exists? pktsdir))
+	  (debug:print 0 *default-log-port* "ERROR: packets directory " pktsdir " does not exist."))
+	 ((not (directory? pktsdir))
+	  (debug:print 0 *default-log-port* "ERROR: packets directory path " pktsdir " is not a directory."))
+	 ((not (file-read-access? pktsdir))
+	  (debug:print 0 *default-log-port* "ERROR: packets directory path " pktsdir " is not readable."))
+	 (else
+	  (debug:print-info 0 *default-log-port* "Loading packets found in " pktsdir)
+	  (let ((pkts (glob (conc pktsdir "/*.pkt"))))
+	    (for-each
+	     (lambda (pkt)
+	       (let* ((uuid    (cadr (string-match ".*/([0-9a-f]+).pkt" pkt)))
+		      (exists  (lookup-by-uuid pdb uuid #f)))
+		 (if (not exists)
+		     (let* ((pktdat (string-intersperse
+				     (with-input-from-file pkt read-lines)
+				     "\n"))
+			    (apkt   (pkt->alist pktdat))
+			    (ptype  (alist-ref 'T apkt)))
+		       (add-to-queue pdb pktdat uuid (or ptype 'cmd) #f 0)
+		       (debug:print 4 *default-log-port* "Added " uuid " of type " ptype " to queue"))
+		     (debug:print 4 *default-log-port* "pkt: " uuid " exists, skipping...")
+		     )))
+	     pkts)))))
+      pktsdirs))))
+
+(define (common:get-pkt-alists pkts)
+  (map (lambda (x)
+	 (alist-ref 'apkt x)) ;; 'pkta pulls out the alist from the read pkt
+       pkts))
+
+;; given list of pkts (alist mode) return list of D cards as Unix epoch, sorted descending
+;; also delete duplicates by target i.e. (car pkt)
+;;
+(define (common:get-pkt-times pkts)
+  (delete-duplicates
+   (sort 
+    (map (lambda (x)
+	   `(,(alist-ref 't x) . ,(string->number (alist-ref 'D x))))
+	 pkts)
+    (lambda (a b)(> (cdr a)(cdr b))))      ;; sort descending
+   (lambda (a b)(equal? (car a)(car b))))) ;; remove duplicates by target
+
+
 

Index: configf.scm
==================================================================
--- configf.scm
+++ configf.scm
@@ -23,18 +23,18 @@
 
 ;; return list (path fullpath configname)
 (define (find-config configname #!key (toppath #f))
   (if toppath
       (let ((cfname (conc toppath "/" configname)))
-	(if (file-exists? cfname)
+	(if (common:file-exists? cfname)
 	    (list toppath cfname configname)
 	    (list #f      #f     #f)))
       (let* ((cwd (string-split (current-directory) "/")))
 	(let loop ((dir cwd))
 	  (let* ((path     (conc "/" (string-intersperse dir "/")))
 		 (fullpath (conc path "/" configname)))
-	    (if (file-exists? fullpath)
+	    (if (common:file-exists? fullpath)
 		(list path fullpath configname)
 		(let ((remcwd (take dir (- (length dir) 1))))
 		  (if (null? remcwd)
 		      (list #f #f #f) ;;  #f #f) 
 		  (loop remcwd)))))))))
@@ -228,11 +228,11 @@
 (define (read-config path ht allow-system #!key (environ-patt #f)            (curr-section #f)
 		     (sections #f)              (settings (make-hash-table)) (keep-filenames #f)
 		     (post-section-procs '())   (apply-wildcards #t))
   (debug:print 9 *default-log-port* "START: " path)
   (if (and (not (port? path))
-	   (not (file-exists? path))) ;; for case where we are handed a port
+	   (not (common:file-exists? path))) ;; for case where we are handed a port
       (begin 
 	(debug:print-info 1 *default-log-port* "read-config - file not found " path " current path: " (current-directory))
 	;; WARNING: This is a risky change but really, we should not return an empty hash table if no file read?
 	#f) ;; (if (not ht)(make-hash-table) ht))
       (let ((inp        (if (string? path)
@@ -281,11 +281,11 @@
 										(common:nice-path 
 										 (conc (if curr-conf-dir
 											   curr-conf-dir
 											   ".")
 										       "/" include-file)))))
-							(if (file-exists? full-conf)
+							(if (common:file-exists? full-conf)
 							    (begin
 							      ;; (push-directory conf-dir)
 							      (debug:print 9 *default-log-port* "Including: " full-conf)
 							      (read-config full-conf res allow-system environ-patt: environ-patt curr-section: curr-section-name sections: sections settings: settings keep-filenames: keep-filenames)
 							      ;; (pop-directory)
@@ -297,11 +297,11 @@
 	       (configf:script-rx ( x include-script params);; handle-exceptions
                                   ;;    exn
                                   ;;    (begin
                                   ;;      (debug:print '(0 2 9) #f "INFO: include from script " include-script " failed.")
                                   ;;      (loop (configf:read-line inp res (calc-allow-system allow-system curr-section-name sections) settings) curr-section-name #f #f))
-							 (if (and (file-exists? include-script)(file-execute-access? include-script))
+							 (if (and (common:file-exists? include-script)(file-execute-access? include-script))
 							     (let* ((new-inp-port (open-input-pipe (conc include-script " " params))))
 							       (debug:print '(2 9) *default-log-port* "Including from script output: " include-script)
 							      ;;  (print "We got here, calling read-config next. Port is: " new-inp-port)
 							       (read-config new-inp-port res allow-system environ-patt: environ-patt curr-section: curr-section-name sections: sections settings: settings keep-filenames: keep-filenames)
 							       (close-input-port new-inp-port)
@@ -320,10 +320,11 @@
 								 (proc curr-section-name section-name res path))))
 							 post-section-procs)
                                                         ;; after gathering the vars for a section and if apply-wildcards is true and if there is a wildcard in the section name process wildcards
                                                         ;; NOTE: we are processing the curr-section-name, NOT section-name.
                                                         (process-wildcards res curr-section-name)
+							(if (not (hash-table-ref/default res section-name #f))(hash-table-set! res section-name '())) ;; ensure that mere mention of a section is not lost
 							(loop (configf:read-line inp res (calc-allow-system allow-system curr-section-name sections) settings)
 							      ;; if we have the sections list then force all settings into "" and delete it later?
 							      ;; (if (or (not sections) 
 							      ;;	      (member section-name sections))
 							      ;;	  section-name "") ;; stick everything into "". NOPE: We need new strategy. Put stuff in correct sections and then delete all sections later.
@@ -400,11 +401,11 @@
   (let* ((curr-dir   (current-directory))
          (configinfo (find-config fname toppath: given-toppath))
 	 (toppath    (car configinfo))
 	 (configfile (cadr configinfo))
 	 (set-fields (lambda (curr-section next-section ht path)
-		       (let ((field-names (if ht (keys:config-get-fields ht) '()))
+		       (let ((field-names (if ht (common:get-fields ht) '()))
 			     (target      (or (getenv "MT_TARGET")(args:get-arg "-reqtarg")(args:get-arg "-target"))))
 			 (debug:print-info 9 *default-log-port* "set-fields with field-names=" field-names " target=" target " curr-section=" curr-section " next-section=" next-section " path=" path " ht=" ht)
 			 (if (not (null? field-names))(keys:target-set-args field-names target #f))))))
     (if toppath (change-directory toppath)) 
     (if (and toppath pathenvvar)(setenv pathenvvar toppath))
@@ -423,12 +424,34 @@
 		  (cadr match)
 		  #f))
 	    ))
       #f))
 
+;; use to have definitive setting:
+;;  [foo]
+;;  var yes
+;;
+;;  (configf:var-is? cfgdat "foo" "var" "yes") => #t
+;;
+(define (configf:var-is? cfgdat section var expected-val)
+  (equal? (configf:lookup cfgdat section var) expected-val))
+
 (define configf:lookup config-lookup)
 (define configf:read-file read-config)
+
+;; safely look up a value that is expected to be a number, return
+;; a default (#f unless provided)
+;;
+(define (configf:lookup-number cfdat section varname #!key (default #f))
+  (let* ((val (configf:lookup *configdat* section varname))
+         (res (if val
+                  (string->number (string-substitute "\\s+" "" val #t))
+                  #f)))
+    (cond
+     (res  res)
+     (val  (debug:print 0 *default-log-port* "ERROR: no number found for [" section "], " varname ", got: " val))
+     (else default))))
 
 (define (configf:section-vars cfgdat section)
   (let ((sectdat (hash-table-ref/default cfgdat section '())))
     (if (null? sectdat)
 	'()
@@ -487,11 +510,11 @@
 	  (if (null? tal)
 	      newres
 	      (loop (car tal)(cdr tal) newres))))))
 
 (define (configf:file->list fname)
-  (if (file-exists? fname)
+  (if (common:file-exists? fname)
       (let ((inp (open-input-file fname)))
 	(let loop ((inl (read-line inp))
 		   (res '()))
 	  (if (eof-object? inl)
 	      (begin
@@ -591,11 +614,11 @@
 
 ;; reads a refdb into an assoc array of assoc arrays
 ;;   returns (list dat msg)
 (define (configf:read-refdb refdb-path)
   (let ((sheets-file  (conc refdb-path "/sheet-names.cfg")))
-    (if (not (file-exists? sheets-file))
+    (if (not (common:file-exists? sheets-file))
 	(list #f (conc "ERROR: no refdb found at " refdb-path))
 	(if (not (file-read-access? sheets-file))
 	    (list #f (conc "ERROR: refdb file not readable at " refdb-path))
 	    (let* ((sheets (with-input-from-file sheets-file
 			     (lambda ()
@@ -663,36 +686,33 @@
       #f
     (configf:alist->config
      (with-input-from-file fname read))))
 
 (define (configf:write-alist cdat fname)
-    (if (common:faux-lock fname)
-        (let* ((dat  (configf:config->alist cdat))
-               (res
-                (begin
-                  (with-output-to-file fname ;; first write out the file
-                    (lambda ()
-                      (pp dat)))
-                  
-                  (if (common:file-exists? fname)   ;; now verify it is readable
-                      (if (configf:read-alist fname)
-                          #t ;; data is good.
-                          (begin
-                            (handle-exceptions
-                             exn
-                             #f
-                             (debug:print 0 *default-log-port* "WARNING: content " dat " for cache " fname " is not readable. Deleting generated file.")
-                             (delete-file fname))
-                            #f))
-                      #f))))
-          
-          (common:faux-unlock fname)
-          res)
-        (begin
-          (debug:print 0 *default-log-port* "WARNING: could not get faux-lock on " fname)
-          #f)))
-
+  (if (not (common:faux-lock fname))
+      (debug:print 0 *default-log-port* "INFO: Could not get lock on " fname))
+  (let* ((dat  (configf:config->alist cdat))
+         (res
+          (begin
+            (with-output-to-file fname ;; first write out the file
+              (lambda ()
+                (pp dat)))
+            
+            (if (common:file-exists? fname)   ;; now verify it is readable
+                (if (configf:read-alist fname)
+                    #t ;; data is good.
+                    (begin
+                      (handle-exceptions
+                       exn
+                       #f
+                       (debug:print 0 *default-log-port* "WARNING: content " dat " for cache " fname " is not readable. Deleting generated file.")
+                       (delete-file fname))
+                      #f))
+                #f))))
+    (common:faux-unlock fname)
+    res))
+  
 ;; convert hierarchial list to ini format
 ;;
 (define (configf:config->ini data)
   (map 
    (lambda (section)

Index: dashboard-tests.scm
==================================================================
--- dashboard-tests.scm
+++ dashboard-tests.scm
@@ -234,11 +234,11 @@
 
 ;; if there is a submegatest create a button to launch dashboard in that area
 ;;
 (define (submegatest-panel dbstruct keydat testdat runname testconfig)
   (let* ((subarea (configf:lookup testconfig "setup" "submegatest"))
-	 (area-exists (and subarea (file-exists? subarea))))
+	 (area-exists (and subarea (common:file-exists? subarea))))
     ;; (debug:print-info 0 *default-log-port* "Megatest subarea=" subarea ", area-exists=" area-exists)
     (if subarea
 	(iup:frame 
 	 #:title "Megatest Run Info" ; #:expand "YES"
 	 (iup:button
@@ -458,11 +458,11 @@
 			    "/"))
 	       (item-path  (db:test-get-item-path testdat))
 	       ;; this next block was added to fix a bug where variables were
                ;; needed. Revisit this.
 	       (runconfig  (let ((runconfigf (conc  *toppath* "/runconfigs.config"))) ;; no rush but it would be good to convert this call to use runconfig:read
-	 		     (if (file-exists? runconfigf)
+	 		     (if (common:file-exists? runconfigf)
 	 			 (handle-exceptions
                                    exn
                                    #f  ;; do nothing, just keep on trucking ....
                                    (setup-env-defaults runconfigf run-id (make-hash-table) keydat environ-patt: keystring))
 	 			 (make-hash-table))))
@@ -472,18 +472,18 @@
 				(handle-exceptions
 				 exn  ;; NOTE: I've no idea why this was written this way. Research, study and fix needed!
 				 (tests:get-testconfig (db:test-get-testname testdat) (db:test-get-item-path testdat) test-registry #f)
 				 (tests:get-testconfig (db:test-get-testname testdat) item-path test-registry #t))))
 	       (viewlog    (lambda (x)
-			     (if (file-exists? logfile)
+			     (if (common:file-exists? logfile)
 					;(system (conc "firefox " logfile "&"))
 				 (dcommon:run-html-viewer logfile)
 				 (message-window (conc "File " logfile " not found")))))
 	       (view-a-log (lambda (lfile) 
 			     (let ((lfilename (conc rundir "/" lfile)))
 			       ;; (print "lfilename: " lfilename)
-			       (if (file-exists? lfilename)
+			       (if (common:file-exists? lfilename)
 					;(system (conc "firefox " logfile "&"))
 				   (dcommon:run-html-viewer lfilename)
 				   (message-window (conc "File " lfilename " not found"))))))
 	       (xterm      (lambda (x)
 			     (if (directory-exists? rundir)
@@ -496,11 +496,11 @@
 				    "MT_.*"))
 				 (message-window  (conc "Directory " rundir " not found")))))
 	       (widgets    (make-hash-table))
 	       (refreshdat (lambda ()
 			     (let* ((curr-mod-time (file-modification-time db-path))
-				                   ;;     (max ..... (if (file-exists? testdat-path)
+				                   ;;     (max ..... (if (common:file-exists? testdat-path)
 						   ;;      	      (file-modification-time testdat-path)
 						   ;;      	      (begin
 						   ;;      		(set! testdat-path (conc rundir "/testdat.db"))
 						   ;;      		0))))
 				    (need-update   (or (and (>= curr-mod-time db-mod-time)

Index: dashboard.scm
==================================================================
--- dashboard.scm
+++ dashboard.scm
@@ -85,10 +85,11 @@
 			"-v"
 			"-q"
 			"-use-db-cache"
 			"-skip-version-check"
 			"-repl"
+                        "-rh5.11" ;; fix to allow running on rh5.11
 			)
 		 args:arg-hash
 		 0))
 
 (if (not (null? remargs))
@@ -105,10 +106,17 @@
 ;;
 (if (not (launch:setup))
     (begin
       (print "Failed to find megatest.config, exiting") 
       (exit 1)))
+
+;; deal with RH 5.11 gtk lib or iup lib missing detachbox feature
+;; first check for the switch
+;;
+(if (or (args:get-arg "-rh5.11")
+	(configf:lookup *configdat* "dashboard" "no-detachbox"))
+    (set! iup:detachbox iup:vbox))
 
 (if (not (common:on-homehost?))
     (begin
       (debug:print 0 *default-log-port* "WARNING: Current policy requires running dashboard on homehost: " (common:get-homehost))))
     
@@ -204,11 +212,13 @@
   ((allruns-by-id    (make-hash-table))  : hash-table)  ;; hash of run-id -> dboard:rundat records
   ((done-runs       '())                 : list)        ;; list of runs already drawn
   ((not-done-runs   '())                 : list)        ;; list of runs not yet drawn
   (header            #f)                                ;; header for decoding the run records
   (keys              #f)                                ;; keys for this run (i.e. target components)
-  ((numruns          (string->number (or (args:get-arg "-cols") "10")))                 : number)      ;; 
+  ((numruns          (string->number (or (args:get-arg "-cols")
+					 (configf:lookup *configdat* "dashboard" "cols")
+					 "8")))                 : number)      ;; 
   ((tot-runs          0)                 : number)
   ((last-data-update  0)                 : number)      ;; last time the data in allruns was updated
   ((last-runs-update  0)                 : number)      ;; last time we pulled the runs info to update the tree
   (runs-mutex         (make-mutex))                     ;; use to prevent parallel access to draw objects
   ((run-update-times  (make-hash-table)) : hash-table)  ;; update times indexed by run-id
@@ -222,11 +232,11 @@
   (runs-matrix        #f)                               ;; used in newdashboard
   ((start-run-offset   0)                : number)      ;; left-right slider value
   ((start-test-offset  0)                : number)      ;; up-down slider value
   ((runs-btn-height    (or (configf:lookup *configdat* "dashboard" "btn-height") "x16")) : string)  ;; was 12
   ((runs-btn-fontsz    (or (configf:lookup *configdat* "dashboard" "btn-fontsz") "10")) : string)   ;; was 8
-  ((runs-cell-width    (or (configf:lookup *configdat* "dashboard" "cell-width") "60")) : string)   ;; was 50
+  ((runs-cell-width    (or (configf:lookup *configdat* "dashboard" "cell-width") "50")) : string)   ;; was 50
   ((all-test-names     '())              : list)
   
   ;; Canvas and drawing data
   (cnv                #f)
   (cnv-obj            #f)
@@ -1085,10 +1095,11 @@
     (dboard:tabdat-filters-changed-set! tabdat #t)))
 
 (define (update-search commondat tabdat x val)
   (hash-table-set! (dboard:tabdat-searchpatts tabdat) x val)
   (dboard:tabdat-filters-changed-set! tabdat #t)
+  (mark-for-update tabdat)
   (set-bg-on-filter commondat tabdat))
 
 ;; force ALL updates to zero (effectively)
 ;;
 (define (mark-for-update tabdat)
@@ -1376,11 +1387,11 @@
 	     ;; Target, testpatt, state and status input boxes
 	     ;;
 	     (iup:vbox
 	      ;; Command to run, placed over the top of the canvas
 	      (dcommon:command-action-selector commondat tabdat tab-num: tab-num)
-	      (dboard:runs-tree-browser commondat tabdat)
+              (dboard:runs-tree-browser commondat tabdat)
 	      (dcommon:command-runname-selector commondat tabdat tab-num: tab-num)
 	      (dcommon:command-testname-selector commondat tabdat update-keyvals))
 	     ;;  key-listboxes))
 	     (dcommon:command-tests-tasks-canvas tabdat test-records sorted-testnames tests-draw-state))))
 	   (tb (dboard:tabdat-runs-tree tabdat)))
@@ -1397,14 +1408,20 @@
  ;; (let ((logs-tb (iup:textbox #:expand "YES"
  ;;				   #:multiline "YES")))
  ;;	 (dboard:tabdat-logs-textbox-set! tabdat logs-tb)
  ;;	 logs-tb))
 
+;; browse runs as a tree. Used in both "Runs" tab and
+;; in the runs control panel.
+;;
 (define (dboard:runs-tree-browser commondat tabdat)
-  (let* ((txtbox (iup:textbox #:action (lambda (val a b)
+  (let* (
+	 (txtbox (iup:textbox #:action (lambda (val a b)
 					 (debug:catch-and-dump
 					  (lambda ()
+					    ;; for the Runs view we put the list of keyvals into tabdat target
+					    ;; for the Run Controls we put then update the run-command
 					    (if b (dboard:tabdat-target-set! tabdat (string-split b "/")))
 					    (dashboard:update-run-command tabdat))
 					  "command-testname-selector tb action"))
 			      #:value (dboard:test-patt->lines
 				       (dboard:tabdat-test-patts-use tabdat))
@@ -1414,11 +1431,11 @@
 	 (tb
           (iup:treebox
            #:value 0
            #:name "Runs"
            #:expand "YES"
-           #:addexpanded "NO"
+           #:addexpanded "YES"
            #:size "10x"
            #:selection-cb
            (lambda (obj id state)
              (debug:catch-and-dump
               (lambda ()
@@ -1440,11 +1457,14 @@
                       (debug:print-error 5 *default-log-port* "tree-path->run-id returned non-number " run-id))))
               "treebox"))
            ;; (print "path: " (tree:node->path obj id) " run-id: " run-id)
            )))
     (dboard:tabdat-runs-tree-set! tabdat tb)
-    (iup:vbox tb txtbox)))
+    (iup:detachbox
+     (iup:vbox 
+      tb
+      txtbox))))
 
 ;;======================================================================
 ;; R U N   C O N T R O L S
 ;;======================================================================
 ;;
@@ -1509,11 +1529,11 @@
      (iup:vbox
       (iup:split
        #:orientation "HORIZONTAL"
        #:value 800
       (let* ((cnv-obj (iup:canvas 
-		       ;; #:size "500x400"
+		       ;; #:size "250x250" ;; "500x400"
 		       #:expand "YES"
 		       #:scrollbar "YES"
 		       #:posx "0.5"
 		       #:posy "0.5"
 		       #:action (make-canvas-action
@@ -1554,15 +1574,15 @@
       (let* ((hb1 (iup:hbox))
              (graph-cell-table (dboard:tabdat-graph-cell-table tabdat))
              (changed #f)
              (graph-matrix (iup:matrix
                            #:alignment1 "ALEFT"
-                           #:expand "YES" ;; "HORIZONTAL"
+                           ;; #:expand "YES" ;; "HORIZONTAL"
                            #:scrollbar "YES"
                            #:numcol 10
                            #:numlin 20
-                           #:numcol-visible (min 8)
+                           #:numcol-visible 5 ;; (min 8)
                            #:numlin-visible 1
                            #:click-cb
                            (lambda (obj row col status)
                              (let*
                                  ((graph-cell (conc row ":" col))
@@ -1865,11 +1885,11 @@
 (define (dashboard:summary commondat tabdat #!key (tab-num #f))
   (let* ((rawconfig        (read-config (conc *toppath* "/megatest.config") #f #f)) ;; changed to #f since I want #{} to be expanded by [system ...] to NOT be expanded. WAS: 'return-string)))
 	 (changed          #f))
     (iup:vbox
      (iup:split
-      #:value 500
+      #:value 300
       (iup:frame 
        #:title "General Info"
        (iup:vbox
 	(iup:hbox
 	 (iup:label "Area Path")
@@ -1902,11 +1922,11 @@
   (let* ((success #t) ;; at any stage of the process set this flag to #f to skip downstream steps. Intention here is to recover gracefully if user provided tabs fail to load.
 	 (source  (configf:lookup views-cfgdat view-name "source"))
 	 (viewgen (configf:lookup views-cfgdat view-name "viewgen"))
 	 (updater (configf:lookup views-cfgdat view-name "updater"))
 	 (result-child #f))
-    (if (and (file-exists? source)
+    (if (and (common:file-exists? source)
 	     (file-read-access? source))
 	(handle-exceptions
 	 exn
 	 (begin
 	   (print-call-chain)
@@ -2048,11 +2068,11 @@
   (let* ((update-mutex (dboard:commondat-update-mutex commondat))
 	 (tb      (iup:treebox
 		   #:value 0
 		   #:name "Runs"
 		   #:expand "YES"
-		   #:addexpanded "NO"
+		   #:addexpanded "YES"
 		   #:selection-cb
 		   (lambda (obj id state)
 		     (debug:catch-and-dump
 		      (lambda ()
 			;; (print "obj: " obj ", id: " id ", state: " state)
@@ -2159,10 +2179,28 @@
 
 ;;======================================================================
 ;; R U N S 
 ;;======================================================================
 
+(define (dboard:squarify toggles size)
+  (let loop ((hed (car toggles))
+	     (tal (cdr toggles))
+	     (cur '())
+	     (res '()))
+    (let* ((ovrflo (>= (length cur) size))
+	   (newcur (if ovrflo
+		       (list hed)
+		       (cons hed cur)))
+	   (newres (if ovrflo
+		       (cons cur res)
+		       res)))
+      (if (null? tal)
+	  (if ovrflo
+	      newres
+	      (cons newcur res))
+	  (loop (car tal)(cdr tal) newcur newres)))))
+
 (define (dboard:make-controls commondat tabdat #!key (extra-widget #f) )
   (let ((btn-fontsz  (dboard:tabdat-runs-btn-fontsz tabdat)))
     (iup:hbox
      (iup:vbox
       (iup:frame 
@@ -2223,17 +2261,17 @@
 						    (mark-for-update tabdat))))
 		(default-cmd (car (list-ref *tests-sort-type-index* *tests-sort-reverse*))))
                 
 	   (iuplistbox-fill-list sort-lb cmds-list selected-item: default-cmd)
 	   
-	   (set! hide-empty (iup:button "HideEmpty"
-					;; #:expand HORIZONTAL"
-					#:expand "NO" #:size "80x15"
-					#:action (lambda (obj)
-						   (dboard:tabdat-hide-empty-runs-set! tabdat (not (dboard:tabdat-hide-empty-runs tabdat)))
-						   (iup:attribute-set! obj "TITLE" (if (dboard:tabdat-hide-empty-runs tabdat) "+HideE" "-HideE"))
-						   (mark-for-update tabdat))))
+	   ;; (set! hide-empty (iup:button "HideEmpty"
+	   ;; 				;; #:expand HORIZONTAL"
+	   ;; 				#:expand "NO" #:size "80x15"
+	   ;; 				#:action (lambda (obj)
+	   ;; 					   (dboard:tabdat-hide-empty-runs-set! tabdat (not (dboard:tabdat-hide-empty-runs tabdat)))
+	   ;; 					   (iup:attribute-set! obj "TITLE" (if (dboard:tabdat-hide-empty-runs tabdat) "+HideE" "-HideE"))
+	   ;; 					   (mark-for-update tabdat))))
 	   (set! hide (iup:button "Hide"
 				  #:expand "NO" #:size "40x15" ;; #:expand "HORIZONTAL"
 				  #:action (lambda (obj)
 					     (dboard:tabdat-hide-not-hide-set! tabdat #t) ;; (not (dboard:tabdat-hide-not-hide tabdat)))
 					     ;; (iup:attribute-set! obj "TITLE" (if (dboard:tabdat-hide-not-hide tabdat) "HideTests" "NotHide"))
@@ -2263,54 +2301,80 @@
         
 
         
         )))
 
-     (iup:frame 
-      #:title "state/status filter"
-      (iup:vbox
-       (apply 
-	iup:hbox
-	(map (lambda (status)
-	       (iup:toggle (conc status "  ")
-			   #:fontsize btn-fontsz ;; "10"
-			   #:expand "HORIZONTAL"
-			   #:action   (lambda (obj val)
-					(mark-for-update tabdat)
-					(if (eq? val 1)
-					    (hash-table-set! (dboard:tabdat-status-ignore-hash tabdat) status #t)
-					    (hash-table-delete! (dboard:tabdat-status-ignore-hash tabdat) status))
-					(set-bg-on-filter commondat tabdat))))
-	     (map cadr *common:std-statuses*))) ;; '("PASS" "FAIL" "WARN" "CHECK" "WAIVED" "STUCK/DEAD" "n/a" "SKIP")))
-       (apply 
-	iup:hbox
-	(map (lambda (state)
-	       (iup:toggle (conc state "  ")
-			   #:fontsize btn-fontsz
-			   #:expand "HORIZONTAL"
-			   #:action   (lambda (obj val)
-					(mark-for-update tabdat)
-					(if (eq? val 1)
-					    (hash-table-set! (dboard:tabdat-state-ignore-hash tabdat) state #t)
-					    (hash-table-delete! (dboard:tabdat-state-ignore-hash tabdat) state))
-					(set-bg-on-filter commondat tabdat))))
-	     (map cadr *common:std-states*))) ;; '("RUNNING" "COMPLETED" "INCOMPLETE" "LAUNCHED" "NOT_STARTED" "KILLED" "DELETED")))
-       (iup:valuator #:valuechanged_cb (lambda (obj)
-					 (let ((val (inexact->exact (round (/ (string->number (iup:attribute obj "VALUE")) 10))))
-					       (oldmax   (string->number (iup:attribute obj "MAX")))
-					       (maxruns  (dboard:tabdat-tot-runs tabdat)))
-					   (dboard:tabdat-start-run-offset-set! tabdat val)
-					   (mark-for-update tabdat)
-					   (debug:print 6 *default-log-port* "(dboard:tabdat-start-run-offset tabdat) " (dboard:tabdat-start-run-offset tabdat) " maxruns: " maxruns ", val: " val " oldmax: " oldmax)
-					   (iup:attribute-set! obj "MAX" (* maxruns 10))))
-		     #:expand "HORIZONTAL"
-		     #:max (* 10 (max (hash-table-size (dboard:tabdat-allruns-by-id tabdat)) 10))
-		     #:min 0
-		     #:step 0.01)))
-     ;;(iup:button "inc rows" #:action (lambda (obj)(dboard:tabdat-num-tests-set! tabdat (+ (dboard:tabdat-num-tests tabdat) 1))))
-					;(iup:button "dec rows" #:action (lambda (obj)(dboard:tabdat-num-tests-set! tabdat (if (> (dboard:tabdat-num-tests tabdat) 0)(- (dboard:tabdat-num-tests tabdat) 1) 0))))
-     )))
+     (let* ((status-toggles (map (lambda (status)
+				   (iup:toggle (conc status)
+					       #:fontsize 8 ;; btn-fontsz ;; "10"
+					       ;; #:expand "HORIZONTAL"
+					       #:action   (lambda (obj val)
+							    (mark-for-update tabdat)
+							    (if (eq? val 1)
+								(hash-table-set! (dboard:tabdat-status-ignore-hash tabdat) status #t)
+								(hash-table-delete! (dboard:tabdat-status-ignore-hash tabdat) status))
+							    (set-bg-on-filter commondat tabdat))))
+				 (map cadr *common:std-statuses*))) ;; '("PASS" "FAIL" "WARN" "CHECK" "WAIVED" "STUCK/DEAD" "n/a" "SKIP")))
+	    (state-toggles  (map (lambda (state)
+				   (iup:toggle (conc state)
+					       #:fontsize 8 ;; btn-fontsz
+					       ;; #:expand "HORIZONTAL"
+					       #:action   (lambda (obj val)
+							    (mark-for-update tabdat)
+							    (if (eq? val 1)
+								(hash-table-set! (dboard:tabdat-state-ignore-hash tabdat) state #t)
+								(hash-table-delete! (dboard:tabdat-state-ignore-hash tabdat) state))
+							    (set-bg-on-filter commondat tabdat))))
+				 (map cadr *common:std-states*))) ;; '("RUNNING" "COMPLETED" "INCOMPLETE" "LAUNCHED" "NOT_STARTED" "KILLED" "DELETED")))
+	    (num-toggle-cols (inexact->exact (round (/ (max (length status-toggles)(length state-toggles)) 3)))))
+       (iup:vbox
+	(iup:hbox
+	 (iup:frame
+	  #:title "states"
+	  (apply
+	   iup:hbox
+	   (map (lambda (colgrp)
+		  (apply iup:vbox colgrp))
+		(dboard:squarify state-toggles 3))))
+	 (iup:frame
+	  #:title "statuses"
+	  (apply
+	   iup:hbox
+	   (map (lambda (colgrp)
+		  (apply iup:vbox colgrp))
+		(dboard:squarify status-toggles 3)))))
+	;; 
+	;; (iup:frame 
+	;; 	#:title "state/status filter"
+	;; 	(iup:vbox
+	;; 	 (apply
+	;; 	  iup:hbox
+	;; 	  (map
+	;; 	   (lambda (status-toggle state-toggle)
+	;; 	     (iup:vbox
+	;; 	      status-toggle
+	;; 	      state-toggle))
+	;; 	   status-toggles state-toggles))
+
+	;; horizontal slider was here
+	
+	)))))
+
+(define (dashboard:runs-horizontal-slider tabdat )
+  (iup:valuator #:valuechanged_cb (lambda (obj)
+				    (let ((val (inexact->exact (round (/ (string->number (iup:attribute obj "VALUE")) 10))))
+					  (oldmax   (string->number (iup:attribute obj "MAX")))
+					  (maxruns  (dboard:tabdat-tot-runs tabdat)))
+				      (dboard:tabdat-start-run-offset-set! tabdat val)
+				      (mark-for-update tabdat)
+				      (debug:print 6 *default-log-port* "(dboard:tabdat-start-run-offset tabdat) " (dboard:tabdat-start-run-offset tabdat) " maxruns: " maxruns ", val: " val " oldmax: " oldmax)
+				      (iup:attribute-set! obj "MAX" (* maxruns 10))))
+		#:expand "HORIZONTAL"
+		#:max (* 10 (max (hash-table-size (dboard:tabdat-allruns-by-id tabdat)) 10))
+		#:min 0
+		#:step 0.01))
+
 
 (define (dashboard:popup-menu  run-id test-id target runname test-name testpatt item-test-path test-info)
   (iup:menu 
    (iup:menu-item
     "Test Control Panel"
@@ -2415,11 +2479,20 @@
       (lambda (obj)
         (common:run-a-command
          (conc "megatest -set-state-status KILLREQ,n/a -target " target
                " -runname " runname
                " -testpatt % "
-               "  -state RUNNING,REMOTEHOSTSTART,LAUNCHED"))))))
+               "  -state RUNNING,REMOTEHOSTSTART,LAUNCHED"))))
+     (iup:menu-item 
+      "Delete Run Data"
+      #:action
+      (lambda (obj)
+        (common:run-a-command
+         (conc "megatest -remove-runs -target " target
+               " -runname " runname
+               " -testpatt % "
+               "  -keep-records"))))))
    (iup:menu-item
     "Test"
     (iup:menu 
      (iup:menu-item
       (conc "Rerun " item-test-path)
@@ -2438,10 +2511,19 @@
 	(common:run-a-command
 	 (conc "megatest -set-state-status KILLREQ,n/a -target " target
                " -runname " runname
 	       " -testpatt " item-test-path 
 	       " -state RUNNING,REMOTEHOSTSTART,LAUNCHED"))))
+     (iup:menu-item
+      (conc "Delete data : " item-test-path)
+      #:action
+      (lambda (obj)
+	(common:run-a-command
+	 (conc "megatest -remove-runs -target " target
+               " -runname " runname
+	       " -testpatt " item-test-path 
+	       " -keep-records"))))
      (iup:menu-item
       (conc "Clean "item-test-path)
       #:action
       (lambda (obj)
 	(common:run-a-command
@@ -2506,10 +2588,13 @@
 			       (map (lambda (x)		
 				      (let ((res (iup:hbox #:expand "HORIZONTAL"
 							   (iup:label x #:size (conc 40 btn-height) #:fontsize btn-fontsz #:expand "NO") ;; "HORIZONTAL")
 							   (iup:textbox #:size (conc 35 btn-height) #:fontsize btn-fontsz #:value "%" #:expand "NO" ;; "HORIZONTAL"
 									#:action (lambda (obj unk val)
+										   ;; each field (field name is "x" var) live updates
+										   ;; the search filter as it is typed
+										   (dboard:tabdat-target-set! runs-dat #f) ;; ensure the fields text boxes are used and not the info from the tree
 										   (mark-for-update runs-dat)
 										   (update-search commondat runs-dat x val))))))
 					(set! i (+ i 1))
 					res))
 				    keynames)))))
@@ -2630,20 +2715,23 @@
       #:title (conc "Megatest dashboard " (current-user-name) ":" *toppath*)
       #:menu (dcommon:main-menu)
       (let* ((runs-view (iup:vbox
 			 (iup:split
 			  #:orientation "VERTICAL" ;; "HORIZONTAL"
-			  #:value 150
+			  #:value 100
 			  (dboard:runs-tree-browser commondat runs-dat)
 			  (iup:split
+			   #:value 100
 			   ;; left most block, including row names
 			   (apply iup:vbox lftlst)
 			   ;; right hand block, including cells
 			   (iup:vbox
+			    #:expand "YES"
 			    ;; the header
 			    (apply iup:hbox (reverse hdrlst))
-			    (apply iup:hbox (reverse bdylst)))))
+			    (apply iup:hbox (reverse bdylst))
+			    (dashboard:runs-horizontal-slider runs-dat))))
 			 controls
 			 ))
 	     (views-cfgdat (common:load-views-config))
 	     (additional-tabnames '())
 	     (tab-start-num       5)   ;; DON'T FORGET TO UPDATE THIS WHEN CHANGING THE STANDARD TABS BELOW
@@ -2688,10 +2776,11 @@
 			  runs-view
 			  (dashboard:runs-summary commondat onerun-dat tab-num: 2)
 			  ;; (dashboard:new-view db data new-view-dat tab-num: 3)
 			  (dashboard:run-controls commondat runcontrols-dat tab-num: 3)
 			  (dashboard:run-times commondat runtimes-dat tab-num: 4)
+			  ;; (dashboard:runs-summary commondat onerun-dat tab-num: 4)
 			  additional-views)))
 	;; (set! (iup:callback tabs tabchange-cb:) (lambda (a b c)(print "SWITCHED TO TAB: " a " " b " " c)))
 	(iup:attribute-set! tabs "TABTITLE0" "Summary")
 	(iup:attribute-set! tabs "TABTITLE1" "Runs")
 	(iup:attribute-set! tabs "TABTITLE2" "Run Summary")
@@ -2758,11 +2847,11 @@
 		    (glob (conc dbdir "/*.db*"))))))
 
 (define (dashboard:monitor-changed? commondat tabdat)
   (let* ((run-update-time (current-seconds))
 	 (monitor-db-path (dboard:tabdat-monitor-db-path tabdat))
-	 (monitor-modtime (if (and monitor-db-path (file-exists? monitor-db-path))
+	 (monitor-modtime (if (and monitor-db-path (common:file-exists? monitor-db-path))
 			      (file-modification-time monitor-db-path)
 			      -1)))
     (if (and (eq? (dboard:commondat-curr-tab-num commondat) 0)
 	     (or (> monitor-modtime *last-monitor-update-time*)
 		 (> (- run-update-time *last-monitor-update-time*) 5))) ;; update every 1/2 minute just in case
@@ -3475,11 +3564,11 @@
 ;; The heavy lifting starts here
 ;;======================================================================
 
 (define (main)
   (let ((mtdb-path (conc *toppath* "/megatest.db"))) ;; 
-    (if (and (file-exists? mtdb-path)
+    (if (and (common:file-exists? mtdb-path)
 	     (file-write-access? mtdb-path))
 	(if (not (args:get-arg "-skip-version-check"))
             (common:exit-on-version-changed)))
     (let* ((commondat       (dboard:commondat-make)))
       ;; Move this stuff to db.scm? I'm not sure that is the right thing to do...
@@ -3534,12 +3623,12 @@
 	(thread-start! th2)
 	(thread-join! th2)))))
 
 ;; ease debugging by loading ~/.dashboardrc
 (let ((debugcontrolf (conc (get-environment-variable "HOME") "/.dashboardrc")))
-  (if (file-exists? debugcontrolf)
+  (if (common:file-exists? debugcontrolf)
       (load debugcontrolf)))
 
 (if (args:get-arg "-repl")
     (repl)
     (main))
 

Index: datashare.scm
==================================================================
--- datashare.scm
+++ datashare.scm
@@ -226,11 +226,11 @@
     (if (and path
 	     (directory? path)
 	     (file-read-access? path))
 	(let* ((dbpath    (conc path "/datashare.db"))
 	       (writeable (file-write-access? dbpath))
-	       (dbexists  (file-exists? dbpath))
+	       (dbexists  (common:file-exists? dbpath))
 	       (handler   (make-busy-timeout 136000)))
 	  (handle-exceptions
 	   exn
 	   (begin
 	     (debug:print 2 *default-log-port* "ERROR: problem accessing db " dbpath
@@ -413,11 +413,11 @@
 	paths))
 
 ;; remove existing link and if possible ...
 ;; create path to next of tip of target, create link back to source
 (define (datashare:build-dir-make-link source target)
-  (if (file-exists? target)(datashare:backup-move target))
+  (if (common:file-exists? target)(datashare:backup-move target))
   (create-directory (pathname-directory target) #t)
   (create-symbolic-link source target))
 
 (define (datashare:backup-move path)
   (let* ((trashdir  (conc (pathname-directory path) "/.trash"))
@@ -518,11 +518,11 @@
 (define (datashare:path->lst path)
   (string-split path "/"))
 
 (define (datashare:pathdat-apply-heuristics configdat path)
   (cond
-   ((file-exists? path) "found")
+   ((common:file-exists? path) "found")
    (else (conc path " not installed"))))
 
 (define (datashare:get-view configdat)
   (iup:vbox
    (iup:hbox
@@ -692,11 +692,11 @@
 (define (datashare:find name paths)
   (if (null? paths)
       #f
       (let loop ((hed (car paths))
 		 (tal (cdr paths)))
-	(if (file-exists? (conc hed "/" name))
+	(if (common:file-exists? (conc hed "/" name))
 	    hed
 	    (if (null? tal)
 		#f
 		(loop (car tal)(cdr tal)))))))
 
@@ -706,11 +706,11 @@
 
 (define (datashare:load-config exe-dir exe-name)
   (let* ((fname   (conc exe-dir "/." exe-name ".config")))
     (ini:property-separator-patt " *  *")
     (ini:property-separator #\space)
-    (if (file-exists? fname)
+    (if (common:file-exists? fname)
 	;; (ini:read-ini fname)
 	(read-config fname #f #t)
 	(make-hash-table))))
 
 (define (datashare:process-action configdat action . args)
@@ -785,11 +785,11 @@
 	    versions)
        (sqlite3:finalize! db)))))
 
 ;; ease debugging by loading ~/.dashboardrc - REMOVE FROM PRODUCTION!
 (let ((debugcontrolf (conc (get-environment-variable "HOME") "/.datasharerc")))
-  (if (file-exists? debugcontrolf)
+  (if (common:file-exists? debugcontrolf)
       (load debugcontrolf)))
 
 (define (main)
   (let* ((args      (argv))
 	 (prog      (car args))

Index: db.scm
==================================================================
--- db.scm
+++ db.scm
@@ -188,19 +188,11 @@
 ;; NB// #f => return dbdir only
 ;;      (was planned to be;  zeroth db with name=main.db)
 ;; 
 ;; If run-id is #f return to create and retrieve the path where the db will live.
 ;;
-(define (db:dbfile-path . junk) ;;  run-id)
-  (let* ((dbdir           (common:get-db-tmp-area)))
-    (handle-exceptions
-     exn
-     (begin
-       (debug:print-error 0 *default-log-port* "Couldn't create path to " dbdir)
-       (exit 1))
-     (if (not (directory? dbdir))(create-directory dbdir #t)))
-    dbdir))
+(define db:dbfile-path common:get-db-tmp-area)
 
 (define (db:set-sync db)
   (let ((syncprag (configf:lookup *configdat* "setup" "sychronous")))
     (sqlite3:execute db (conc "PRAGMA synchronous = " (or syncprag 0) ";")))) 
 
@@ -212,20 +204,20 @@
 
 (define (db:lock-create-open fname initproc)
   (let* ((parent-dir   (or (pathname-directory fname)(current-directory))) ;; no parent? go local
          (raw-fname    (pathname-file fname))
 	 (dir-writable (file-write-access? parent-dir))
-	 (file-exists  (file-exists? fname))
+	 (file-exists  (common:file-exists? fname))
 	 (file-write   (if file-exists
 			   (file-write-access? fname)
 			   dir-writable )))
     ;;(mutex-lock! *db-open-mutex*) ;; tried this mutex, not clear it helped.
     (if file-write ;; dir-writable
 	(condition-case
          (let* ((lockfname   (conc fname ".lock"))
                 (readyfname  (conc parent-dir "/.ready-" raw-fname))
-                (readyexists (file-exists? readyfname)))
+                (readyexists (common:file-exists? readyfname)))
            (if (not readyexists)
                (common:simple-file-lock-and-wait lockfname))
            (let ((db      (sqlite3:open-database fname)))
              (sqlite3:set-busy-handler! db (make-busy-timeout 136000))
              (sqlite3:execute db "PRAGMA synchronous = 0;")
@@ -232,11 +224,11 @@
              (if (not file-exists)
                  (begin
                    (if (and (configf:lookup *configdat* "setup" "use-wal")
                             (string-match "^/tmp/.*" fname)) ;; this is a file in /tmp
                        (sqlite3:execute db "PRAGMA journal_mode=WAL;")
-                       (print "Creating " fname " in NON-WAL mode."))
+                       (debug:print 2 *default-log-port* "Creating " fname " in NON-WAL mode."))
                    (initproc db)))
              (if (not readyexists)
                  (begin
                    (common:simple-file-release-lock lockfname)
                    (with-output-to-file
@@ -272,11 +264,11 @@
 
 ;; ;; This routine creates the db. It is only called if the db is not already opened
 ;; ;; 
 ;; (define (db:open-rundb dbstruct run-id #!key (attemptnum 0)(do-not-open #f)) ;;  (conc *toppath* "/megatest.db") (car *configinfo*)))
 ;;   (let* ((dbfile       (db:dbfile-path run-id)) ;; (conc toppath "/db/" run-id ".db"))
-;;          (dbexists     (file-exists? dbfile))
+;;          (dbexists     (common:file-exists? dbfile))
 ;;          (db           (db:lock-create-open dbfile (lambda (db)
 ;;                                                      (handle-exceptions
 ;;                                                       exn
 ;;                                                       (begin
 ;;                                                         ;; (release-dot-lock dbpath)
@@ -307,19 +299,19 @@
 ;;     (db:sync-tables db:sync-tests-only *megatest-db* db)
 ;;     db))
 
 ;; This routine creates the db if not already present. It is only called if the db is not already opened
 ;;
-(define (db:open-db dbstruct #!key (areapath #f)) ;; TODO: actually use areapath
+(define (db:open-db dbstruct #!key (areapath #f)(do-sync #t)) ;; TODO: actually use areapath
   (let ((tmpdb-stack (dbr:dbstruct-dbstack dbstruct))) ;; RA => Returns the first reference in dbstruct
     (if (stack? tmpdb-stack)
 	(db:get-db tmpdb-stack) ;; get previously opened db (will create new db handle if all in the stack are already used
         (let* ((dbpath       (db:dbfile-path ))      ;; path to tmp db area
-               (dbexists     (file-exists? dbpath))
+               (dbexists     (common:file-exists? dbpath))
 	       (tmpdbfname   (conc dbpath "/megatest.db"))
-	       (dbfexists    (file-exists? tmpdbfname))  ;; (conc dbpath "/megatest.db")))
-               (mtdbexists   (file-exists? (conc *toppath* "/megatest.db")))
+	       (dbfexists    (common:file-exists? tmpdbfname))  ;; (conc dbpath "/megatest.db")))
+               (mtdbexists   (common:file-exists? (conc *toppath* "/megatest.db")))
                
                (mtdb         (db:open-megatest-db))
                (mtdbpath     (db:dbdat-get-path mtdb))
                (tmpdb        (db:open-megatest-db path: dbpath)) ;; lock-create-open dbpath db:initialize-main-db))
                (refndb       (db:open-megatest-db path: dbpath name: "megatest_ref.db"))
@@ -337,13 +329,14 @@
           (dbr:dbstruct-tmpdb-set!  dbstruct tmpdb)
           (dbr:dbstruct-dbstack-set! dbstruct (make-stack)) ;; BB: why a stack?  Why would the number of db's be indeterminate?  Is this a legacy of 1.db 2.db .. ?
           (stack-push! (dbr:dbstruct-dbstack dbstruct) tmpdb) ;; olddb is already a (cons db path)
           (dbr:dbstruct-refndb-set! dbstruct refndb)
           ;;	    (mutex-unlock! *rundb-mutex*)
-          (if (or (not dbfexists)
-                  (and modtimedelta
-                       (> modtimedelta 10))) ;; if db in tmp is over ten seconds older than the file in MTRA then do a sync back
+          (if (and  (or (not dbfexists)
+			(and modtimedelta
+			     (> modtimedelta 10))) ;; if db in tmp is over ten seconds older than the file in MTRA then do a sync back
+		    do-sync)
 	      (begin
 		(debug:print 4 *default-log-port* "filling db " (db:dbdat-get-path tmpdb) " with data \n    from " (db:dbdat-get-path mtdb) " mod time delta: " modtimedelta)
 		(db:sync-tables (db:sync-all-tables-list dbstruct) #f mtdb refndb tmpdb)
                 (debug:print-info 13 *default-log-port* "db:sync-all-tables-list done.")
                 )
@@ -353,23 +346,22 @@
 
 ;; Make the dbstruct, setup up auxillary db's and call for main db at least once
 ;;
 ;; called in http-transport and replicated in rmt.scm for *local* access. 
 ;;
-(define (db:setup #!key (areapath #f))
+(define (db:setup do-sync #!key (areapath #f))
   ;;
-
   (cond
    (*dbstruct-db* *dbstruct-db*);; TODO: when multiple areas are supported, this optimization will be a hazard
    (else ;;(common:on-homehost?)
     (debug:print-info 13 *default-log-port* "db:setup entered (first time, not cached.)")
     (let* ((dbstruct (make-dbr:dbstruct)))
       (when (not *toppath*)
         (debug:print-info 13 *default-log-port* "in db:setup, *toppath* not set; calling launch:setup")
         (launch:setup areapath: areapath))
       (debug:print-info 13 *default-log-port* "Begin db:open-db")
-      (db:open-db dbstruct areapath: areapath)
+      (db:open-db dbstruct areapath: areapath do-sync: do-sync)
       (debug:print-info 13 *default-log-port* "Done db:open-db")
       (set! *dbstruct-db* dbstruct)
       ;;(debug:print-info 13 *default-log-port* "new dbstruct = "(dbr:dbstruct->alist dbstruct))
       dbstruct))))
    ;; (else
@@ -381,11 +373,11 @@
 ;;   NOTE: returns a dbdat not a dbstruct!
 ;;
 (define (db:open-megatest-db #!key (path #f)(name #f))
   (let* ((dbdir        (or path *toppath*))
          (dbpath       (conc  dbdir "/" (or name "megatest.db")))
-	 (dbexists     (file-exists? dbpath))
+	 (dbexists     (common:file-exists? dbpath))
 	 (db           (db:lock-create-open dbpath
 					    (lambda (db)
                                               (db:initialize-main-db db)
 					      ;;(db:initialize-run-id-db db)
 					      )))
@@ -410,10 +402,25 @@
     (mutex-lock! *db-multi-sync-mutex*)
     (set! *db-last-sync* start-t)
     (set! *db-last-access* start-t)
     (mutex-unlock! *db-multi-sync-mutex*)
     (stack-push! (dbr:dbstruct-dbstack dbstruct) tmpdb)))
+
+(define (db:safely-close-sqlite3-db db #!key (try-num 3))
+  (if (<= try-num 0)
+      #f
+      (handle-exceptions
+	  exn
+	  (begin
+	    (thread-sleep! 3)
+	    (sqlite3:interrupt! db)
+	    (db:safely-close-sqlite3-db db try-num: (- try-num 1)))
+	(if (sqlite3:database? db)
+	    (begin
+	      (sqlite3:finalize! db)
+	      #t)
+	    #f))))
 
 ;; close all opened run-id dbs
 (define (db:close-all dbstruct)
   (if (dbr:dbstruct? dbstruct)
       (handle-exceptions
@@ -425,15 +432,16 @@
         (let ((tdbs (map db:dbdat-get-db 
                          (stack->list (dbr:dbstruct-dbstack dbstruct))))
               (mdb (db:dbdat-get-db (dbr:dbstruct-mtdb   dbstruct)))
               (rdb (db:dbdat-get-db (dbr:dbstruct-refndb dbstruct))))
           (map (lambda (db)
-		 (if (sqlite3:database? db)
-		     (sqlite3:finalize! db)))
+		 (db:safely-close-sqlite3-db db))
+;; 		 (if (sqlite3:database? db)
+;; 		     (sqlite3:finalize! db)))
 	       tdbs)
-          (if (sqlite3:database? mdb) (sqlite3:finalize! mdb))
-          (if (sqlite3:database? rdb) (sqlite3:finalize! rdb))))))
+          (db:safely-close-sqlite3-db mdb)     ;; (if (sqlite3:database? mdb) (sqlite3:finalize! mdb))
+          (db:safely-close-sqlite3-db rdb))))) ;; (if (sqlite3:database? rdb) (sqlite3:finalize! rdb))))))
 
 ;;   (let ((locdbs (dbr:dbstruct-locdbs dbstruct)))
 ;;     (if (hash-table? locdbs)
 ;; 	(for-each (lambda (run-id)
 ;; 		    (db:close-run-db dbstruct run-id))
@@ -537,11 +545,11 @@
 	 (tmpname  (conc fname "." (current-process-id)))
 	 (tmpjnl   (conc fnamejnl "." (current-process-id))))
     (debug:print-error 0 *default-log-port* "" fname " appears corrupted. Making backup \"old/" fname "\"")
     (system (conc "cd " dbdir ";mkdir -p old;cat " fname " > old/" tmpname))
     (system (conc "rm -f " dbpath))
-    (if (file-exists? fnamejnl)
+    (if (common:file-exists? fnamejnl)
 	(begin
 	  (debug:print-error 0 *default-log-port* "" fnamejnl " found, moving it to old dir as " tmpjnl)
 	  (system (conc "cd " dbdir ";mkdir -p old;cat " fnamejnl " > old/" tmpjnl))
 	  (system (conc "rm -f " dbdir "/" fnamejnl))))
     ;; attempt to recreate database
@@ -613,11 +621,11 @@
    exn
    (begin
      (debug:print 0 *default-log-port* "EXCEPTION: database probably overloaded or unreadable in db:sync-tables.")
      (print-call-chain (current-error-port))
      (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
-     (print "exn=" (condition->list exn))
+     (debug:print 5 *default-log-port* "exn=" (condition->list exn))
      (debug:print 0 *default-log-port* " status:  " ((condition-property-accessor 'sqlite3 'status) exn))
      (debug:print 0 *default-log-port* " src db:  " (db:dbdat-get-path fromdb))
      (for-each (lambda (dbdat)
 		 (let ((dbpath (db:dbdat-get-path dbdat)))
 		   (debug:print 0 *default-log-port* " dbpath:  " dbpath)
@@ -661,26 +669,42 @@
 	   (numrecs     (make-hash-table))
 	   (start-time  (current-milliseconds))
 	   (tot-count   0))
        (for-each ;; table
 	(lambda (tabledat)
-	  (let* ((tablename  (car tabledat))
-		 (fields     (cdr tabledat))
-		 (use-last-update  (if last-update
-				       (if (pair? last-update)
-					   (member (car last-update)    ;; last-update field name
-						   (map car fields))
-					   (begin
-					     (debug:print 0 *default-log-port* "ERROR: parameter last-update for db:sync-tables must be a pair, received: " last-update) ;; found in fields
-					     #f))
-				       #f))
+	  (let* ((tablename        (car tabledat))
+		 (fields           (cdr tabledat))
+		 (has-last-update  (member "last_update" fields))
+		 (use-last-update  (cond
+				    ((and has-last-update
+					  (member "last_update" fields))
+				     #t) ;; if given a number, just use it for all fields
+				    ((number? last-update) #f) ;; if not matched first entry then ignore last-update for this table
+				    ((and (pair? last-update)
+					  (member (car last-update)    ;; last-update field name
+						  (map car fields))) #t)
+				    (last-update
+				     (debug:print 0 *default-log-port* "ERROR: parameter last-update for db:sync-tables must be a pair or a number, received: " last-update) ;; found in fields
+				     #f)
+				    (else
+				     #f)))
+		 (last-update-value (if use-last-update ;; no need to check for has-last-update - it is already accounted for
+					(if (number? last-update)
+					    last-update
+					    (cdr last-update))
+					#f))
+		 (last-update-field (if use-last-update
+					(if (number? last-update)
+					    "last_update"
+					    (car last-update))
+					#f))
 		 (num-fields (length fields))
 		 (field->num (make-hash-table))
 		 (num->field (apply vector (map car fields)))
 		 (full-sel   (conc "SELECT " (string-intersperse (map car fields) ",") 
 				   " FROM " tablename (if use-last-update ;; apply last-update criteria
-							  (conc " " (car last-update) ">=" (cdr last-update))
+							  (conc " WHERE " last-update-field " >= " last-update-value)
 							  "")
 				   ";"))
 		 (full-ins   (conc "INSERT OR REPLACE INTO " tablename " ( " (string-intersperse (map car fields) ",") " ) "
 				   " VALUES ( " (string-intersperse (make-list num-fields "?") ",") " );"))
 		 (fromdat    '())
@@ -834,12 +858,50 @@
   (sqlite3:execute maindb "CREATE TRIGGER  IF NOT EXISTS update_run_stats_trigger AFTER UPDATE ON run_stats
                              FOR EACH ROW
                                BEGIN 
                                  UPDATE run_stats SET last_update=(strftime('%s','now'))
                                    WHERE id=old.id;
-                               END;"))
+                               END;")
+  (sqlite3:execute maindb "CREATE TABLE IF NOT EXISTS test_rundat (
+                              id           INTEGER PRIMARY KEY,
+                              test_id      INTEGER,
+                              update_time  TIMESTAMP,
+                              cpuload      INTEGER DEFAULT -1,
+                              diskfree     INTEGER DEFAULT -1,
+                              diskusage    INTGER DEFAULT -1,
+                              run_duration INTEGER DEFAULT 0);"))
 
+(define (db:adj-target db)
+  (let ((fields    (configf:get-section *configdat* "fields"))
+	(field-num 0))
+    ;; because we will be refreshing the keys table it is best to clear it here
+    (sqlite3:execute db "DELETE FROM keys;")
+    (for-each
+     (lambda (field)
+       (let ((column (car field))
+	     (spec   (cadr field)))
+	 (handle-exceptions
+	  exn
+	  (if (string-match ".*duplicate.*" ((condition-property-accessor 'exn 'message) exn))
+	      (debug:print 0 *default-log-port* "Target field " column " already exists in the runs table")
+	      (db:general-sqlite-error-dump exn "alter table runs ..." #f "none"))
+	  ;; Add the column if needed
+	  (sqlite3:execute
+	   db
+	   (conc "ALTER TABLE runs ADD COLUMN " column " " spec)))
+	 ;; correct the entry in the keys column
+	 (sqlite3:execute
+	  db
+	  "INSERT INTO keys (id,fieldname,fieldtype) VALUES (?,?,?);"
+	  field-num column spec)
+	 ;; fill in blanks (not allowed as it would be part of the path
+	 (sqlite3:execute
+	  db
+	  (conc "UPDATE runs SET " column "='x' WHERE " column "='';"))
+	 (set! field-num (+ field-num 1))))
+     fields)))
+  
 (define *global-db-store* (make-hash-table))
 
 (define (db:get-access-mode)
   (if (args:get-arg "-use-db-cache") 'cached 'rmt))
 
@@ -857,11 +919,11 @@
 (define (db:cache-for-read-only source target #!key (use-last-update #f))
   (if (and (hash-table-ref/default *global-db-store* target #f)
 	   (>= (file-modification-time target)(file-modification-time source)))
       (hash-table-ref *global-db-store* target)
       (let* ((toppath   (launch:setup))
-	     (targ-db-last-mod (if (file-exists? target)
+	     (targ-db-last-mod (if (common:file-exists? target)
 				   (file-modification-time target)
 				   0))
 	     (cache-db  (or (hash-table-ref/default *global-db-store* target #f)
 			    (db:open-megatest-db path: target)))
 	     (source-db (db:open-megatest-db path: source))
@@ -871,41 +933,41 @@
 	(db:sync-tables (db:sync-main-list source-db) last-update source-db cache-db)
 	(db:sync-tables db:sync-tests-only last-update source-db cache-db)
 	(hash-table-set! *global-db-store* target cache-db)
 	cache-db)))
 
-;; call a proc with a cached db
-;;
-(define (db:call-with-cached-db proc . params)
-  ;; first cache the db in /tmp
-  (let* ((cname-part (conc "megatest_cache/" (common:get-testsuite-name)))
-	 (fname      (conc  (common:get-area-path-signature) ".db"))
-	 (cache-dir  (common:get-create-writeable-dir
-		      (list (conc "/tmp/" (current-user-name) "/" cname-part)
-			    (conc "/tmp/" (current-user-name) "-" cname-part)
-			     (conc "/tmp/" (current-user-name) "_" cname-part))))
-	 (megatest-db (conc *toppath* "/megatest.db")))
-    ;; (debug:print-info 0 *default-log-port* "Using cache dir " cache-dir)
-    (if (not cache-dir)
-	(begin
-	  (debug:print 0 *default-log-port* "ERROR: Failed to find an area to write the cache db")
-	  (exit 1))
-	(let* ((th1      (make-thread
-			  (lambda ()
-			    (if (and (file-exists? megatest-db)
-				     (file-write-access? megatest-db))
-				(begin
-				  (common:sync-to-megatest.db 'timestamps) ;; internally mutexes on *db-local-sync*
-				  (debug:print-info 2 *default-log-port* "Done syncing to megatest.db"))))
-			  "call-with-cached-db sync-to-megatest.db"))
-	       (cache-db (db:cache-for-read-only
-			  megatest-db
-			  (conc cache-dir "/" fname)
-			  use-last-update: #t)))
-	  (thread-start! th1)
-	  (apply proc cache-db params)
-	  ))))
+;; ;; call a proc with a cached db
+;; ;;
+;; (define (db:call-with-cached-db proc . params)
+;;   ;; first cache the db in /tmp
+;;   (let* ((cname-part (conc "megatest_cache/" (common:get-testsuite-name)))
+;; 	 (fname      (conc  (common:get-area-path-signature) ".db"))
+;; 	 (cache-dir  (common:get-create-writeable-dir
+;; 		      (list (conc "/tmp/" (current-user-name) "/" cname-part)
+;; 			    (conc "/tmp/" (current-user-name) "-" cname-part)
+;; 			     (conc "/tmp/" (current-user-name) "_" cname-part))))
+;; 	 (megatest-db (conc *toppath* "/megatest.db")))
+;;     ;; (debug:print-info 0 *default-log-port* "Using cache dir " cache-dir)
+;;     (if (not cache-dir)
+;; 	(begin
+;; 	  (debug:print 0 *default-log-port* "ERROR: Failed to find an area to write the cache db")
+;; 	  (exit 1))
+;; 	(let* ((th1      (make-thread
+;; 			  (lambda ()
+;; 			    (if (and (common:file-exists? megatest-db)
+;; 				     (file-write-access? megatest-db))
+;; 				(begin
+;; 				  (db:sync-to-megatest.db dbstruct 'timestamps) ;; internally mutexes on *db-local-sync*
+;; 				  (debug:print-info 2 *default-log-port* "Done syncing to megatest.db"))))
+;; 			  "call-with-cached-db sync-to-megatest.db"))
+;; 	       (cache-db (db:cache-for-read-only
+;; 			  megatest-db
+;; 			  (conc cache-dir "/" fname)
+;; 			  use-last-update: #t)))
+;; 	  (thread-start! th1)
+;; 	  (apply proc cache-db params)
+;; 	  ))))
 
 ;; options:
 ;;
 ;;  'killservers  - kills all servers
 ;;  'dejunk       - removes junk records
@@ -915,147 +977,100 @@
 ;;  'closeall     - close all opened dbs
 ;;  'schema       - attempt to apply schema changes
 ;;  run-ids: '(1 2 3 ...) or #f (for all)
 ;;
 (define (db:multi-db-sync dbstruct . options)
-  (if (not (launch:setup))
-      (debug:print 0 *default-log-port* "ERROR: not able to setup up for megatest.")
-      (let* ((mtdb     (dbr:dbstruct-mtdb dbstruct))
-	     (tmpdb    (db:get-db dbstruct))
-             (refndb   (dbr:dbstruct-refndb dbstruct))
-	     (allow-cleanup #t) ;; (if run-ids #f #t))
-	     (servers (server:get-list *toppath*)) ;; (tasks:get-all-servers (db:delay-if-busy tdbdat)))
-	     (data-synced 0)) ;; count of changed records (I hope)
-    
-	;; kill servers
-	(if (member 'killservers options)
-	    (for-each
-	     (lambda (server)
-	       (match-let (((mod-time host port start-time pid) server))
-		 (if (and host pid)
-		     (tasks:kill-server host pid))))
-	     servers))
-
-	;; clear out junk records
-	;;
-	(if (member 'dejunk options)
-	    (begin
-	      (db:delay-if-busy mtdb) ;; ok to delay on mtdb
-	      (db:clean-up mtdb)
-	      (db:clean-up tmpdb)
-              (db:clean-up refndb)))
-
-	;; adjust test-ids to fit into proper range
-	;;
-	;; (if (member 'adj-testids options)
-	;;     (begin
-	;;       (db:delay-if-busy mtdb)
-	;;       (db:prep-megatest.db-for-migration mtdb)))
-
-	;; sync runs, test_meta etc.
-	;;
-	(if (member 'old2new options)
-	    ;; (begin
-            (set! data-synced
-                  (+ (db:sync-tables (db:sync-all-tables-list dbstruct) #f mtdb tmpdb refndb)
-                     data-synced)))
-			      ;; (db:sync-main-list mtdb) mtdb (db:get-db dbstruct #f))
-;; 	      (for-each 
-;; 	       (lambda (run-id)
-;; 		 (db:delay-if-busy mtdb)
-;; 		 (let ((testrecs (db:get-all-tests-info-by-run-id mtdb run-id)))
-;; ;;		       (dbstruct (if toppath (make-dbr:dbstruct path: toppath local: #t) #f)))
-;; 		   (debug:print 0 *default-log-port* "INFO: Propagating " (length testrecs) " records for run-id=" run-id " to run specific db")
-;; 		   (db:replace-test-records dbstruct run-id testrecs)
-;; 		   (sqlite3:finalize! (db:dbdat-get-db (dbr:dbstruct-rundb dbstruct)))))
-;; 	       run-ids)))
-
-	;; now ensure all newdb data are synced to megatest.db
-	;; do not use the run-ids list passed in to the function
-	;;
-	(if (member 'new2old options)
-	    (set! data-synced
-		  (+ (db:sync-tables (db:sync-all-tables-list dbstruct) #f tmpdb refndb mtdb)
-		      data-synced)))
-
-
-
-        (if (member 'schema options)
-            (begin
-              (db:patch-schema-maindb (db:dbdat-get-db mtdb))
-              (db:patch-schema-maindb (db:dbdat-get-db tmpdb))
-              (db:patch-schema-maindb (db:dbdat-get-db refndb))
-              (db:patch-schema-rundb  (db:dbdat-get-db mtdb))
-              (db:patch-schema-rundb  (db:dbdat-get-db tmpdb))
-              (db:patch-schema-rundb  (db:dbdat-get-db refndb))))
-              
-	;; (let* ((maindb      (make-dbr:dbstruct path: toppath local: #t))
-	;; 	   (src-run-ids (if run-ids run-ids (db:get-all-run-ids (db:dbdat-get-db (db:get-db maindb 0)))))
-	;; 	   (all-run-ids (sort (delete-duplicates (cons 0 src-run-ids)) <))
-	;; 	   (count       1)
-	;; 	   (total       (length all-run-ids))
-	;; 	   (dead-runs  '()))
-	;;   ;; first fix schema if needed
-	;;   (map
-	;;    (lambda (th)
-	;; 	 (thread-join! th))
-	;;    (map
-	;; 	(lambda (run-id)
-	;; 	  (thread-start! 
-	;; 	   (make-thread
-	;; 	    (lambda ()
-	;; 	      (let* ((fromdb (if toppath (make-dbr:dbstruct path: toppath local: #t) #f))
-;;                    (if (member 'schema options)
-	;; 		(if (eq? run-id 0)
-	;; 		    (let ((maindb  (db:dbdat-get-db (db:get-db fromdb #f))))
-	;; 		      (db:patch-schema-maindb run-id maindb))
-	;; 		    (db:patch-schema-rundb run-id frundb)))
-	;; 	      (set! count (+ count 1))
-	;; 	      (debug:print 0 *default-log-port* "Finished patching schema for " (if (eq? run-id 0) " main.db " (conc run-id ".db")) ", " count " of " total)))))
-	;; 	all-run-ids))
-	;;   ;; Then sync and fix db's
-	;;   (set! count 0)
-	;;   (process-fork
-	;;    (lambda ()
-	;; 	 (map
-	;; 	  (lambda (th)
-	;; 	    (thread-join! th))
-	;; 	  (map
-	;; 	   (lambda (run-id)
-	;; 	     (thread-start! 
-	;; 	      (make-thread
-	;; 	       (lambda ()
-	;; 		 (let* ((fromdb (if toppath (make-dbr:dbstruct path: toppath local: #t) #f))
-	;; 			(frundb (db:dbdat-get-db (db:get-db fromdb run-id))))
-	;; 		   (if (eq? run-id 0)
-	;; 		       (let ((maindb  (db:dbdat-get-db (db:get-db fromdb #f))))
-;;                             (db:sync-tables (db:sync-main-list dbstruct) #f (db:get-db fromdb #f) mtdb)
-	;; 			 (set! dead-runs (db:clean-up-maindb (db:get-db fromdb #f))))
-	;; 		       (begin
-	;; 			 ;; NB// must sync first to ensure deleted tests get marked as such in megatest.db
-;;                             (db:sync-tables db:sync-tests-only #f (db:get-db fromdb run-id) mtdb)
-	;; 			 (db:clean-up-rundb (db:get-db fromdb run-id)))))
-	;; 		 (set! count (+ count 1))
-	;; 		 (debug:print 0 *default-log-port* "Finished clean up of "
-	;; 			      (if (eq? run-id 0)
-	;; 				  " main.db " (conc run-id ".db")) ", " count " of " total)))))
-	;; 	   all-run-ids))))
-
-	;; removed deleted runs
-;; (let ((dbdir (tasks:get-task-db-path)))
-;;   (for-each (lambda (run-id)
-;; 	      (let ((fullname (conc dbdir "/" run-id ".db")))
-;; 		(if (file-exists? fullname)
-;; 		    (begin
-;; 		      (debug:print 0 *default-log-port* "Removing database file for deleted run " fullname)
-;; 		      (delete-file fullname)))))
-;; 	    dead-runs))))
-;; 
-	;; (db:close-all dbstruct)
-	;; (sqlite3:finalize! mdb)
-        (stack-push! (dbr:dbstruct-dbstack dbstruct) tmpdb)
-	data-synced)))
+  ;; (if (not (launch:setup))
+  ;;    (debug:print 0 *default-log-port* "ERROR: not able to setup up for megatest.")
+  (let* ((mtdb     (dbr:dbstruct-mtdb dbstruct))
+	 (tmpdb    (db:get-db dbstruct))
+	 (refndb   (dbr:dbstruct-refndb dbstruct))
+	 (allow-cleanup #t) ;; (if run-ids #f #t))
+	 (servers (server:get-list *toppath*)) ;; (tasks:get-all-servers (db:delay-if-busy tdbdat)))
+	 (data-synced 0)) ;; count of changed records (I hope)
+    
+    (for-each
+     (lambda (option)
+       
+       (case option
+	 ;; kill servers
+	 ((killservers)
+	  (for-each
+	   (lambda (server)
+	     (match-let (((mod-time host port start-time pid) server))
+	       (if (and host pid)
+		   (tasks:kill-server host pid))))
+	   servers))
+	 
+	 ;; clear out junk records
+	 ;;
+	 ((dejunk)
+	  (db:delay-if-busy mtdb) ;; ok to delay on mtdb
+	  (db:clean-up mtdb)
+	  (db:clean-up tmpdb)
+	  (db:clean-up refndb))
+
+	 ;; sync runs, test_meta etc.
+	 ;;
+	 ((old2new)
+	  (set! data-synced
+	    (+ (db:sync-tables (db:sync-all-tables-list dbstruct) #f mtdb tmpdb refndb)
+	       data-synced)))
+	 
+	 ;; now ensure all newdb data are synced to megatest.db
+	 ;; do not use the run-ids list passed in to the function
+	 ;;
+	 ((new2old)
+	  (set! data-synced
+	    (+ (db:sync-tables (db:sync-all-tables-list dbstruct) #f tmpdb refndb mtdb)
+	       data-synced)))
+
+	 ((adj-target)
+	  (db:adj-target (db:dbdat-get-db mtdb))
+	  (db:adj-target (db:dbdat-get-db tmpdb))
+	  (db:adj-target (db:dbdat-get-db refndb)))
+	 
+	 ((schema)
+	  (db:patch-schema-maindb (db:dbdat-get-db mtdb))
+	  (db:patch-schema-maindb (db:dbdat-get-db tmpdb))
+	  (db:patch-schema-maindb (db:dbdat-get-db refndb))
+	  (db:patch-schema-rundb  (db:dbdat-get-db mtdb))
+	  (db:patch-schema-rundb  (db:dbdat-get-db tmpdb))
+	  (db:patch-schema-rundb  (db:dbdat-get-db refndb))))
+       
+       (stack-push! (dbr:dbstruct-dbstack dbstruct) tmpdb))
+     options)
+    data-synced))
+
+(define (db:tmp->megatest.db-sync dbstruct last-update)
+  (let* ((mtdb        (dbr:dbstruct-mtdb dbstruct))
+	 (tmpdb       (db:get-db dbstruct))
+	 (refndb      (dbr:dbstruct-refndb dbstruct)))
+    (db:sync-tables (db:sync-all-tables-list dbstruct) last-update tmpdb refndb mtdb)))
+
+;;;; run-ids
+;;    if #f use *db-local-sync* : or 'local-sync-flags
+;;    if #t use timestamps      : or 'timestamps
+(define (db:sync-to-megatest.db dbstruct #!key (no-sync-db #f)) 
+  (let* ((start-time         (current-seconds))
+	 (last-update        (if no-sync-db
+				 (db:no-sync-get/default no-sync-db "LAST_UPDATE" 0)
+				 0)) ;; (or (db:get-var dbstruct "LAST_UPDATE") 0))
+	 (sync-needed        (> (- start-time last-update) 6))
+	 (res                (if sync-needed ;; don't sync if a sync already occurred in the past 6 seconds
+				 (begin
+				   (if no-sync-db
+				       (db:no-sync-set no-sync-db "LAST_UPDATE" start-time))
+				   (db:tmp->megatest.db-sync dbstruct last-update))
+				 0))
+	 (sync-time           (- (current-seconds) start-time)))
+      (debug:print-info 3 *default-log-port* "Sync of newdb to olddb completed in " sync-time " seconds pid="(current-process-id))
+      (if (common:low-noise-print 30 "sync new to old")
+          (if sync-needed
+              (debug:print-info 0 *default-log-port* "Sync of " res " records from newdb to olddb completed in " sync-time " seconds pid="(current-process-id))
+              (debug:print-info 0 *default-log-port* "No sync needed, last updated " (- start-time last-update) " seconds ago")))
+      res))
 
 ;; keeping it around for debugging purposes only
 (define (open-run-close-no-exception-handling  proc idb . params)
   (debug:print-info 11 *default-log-port* "open-run-close-no-exception-handling START given a db=" (if idb "yes " "no ") ", params=" params)
   (print "I don't work anymore. open-run-close-no-exception-handling needs fixing or removing...")
@@ -1084,11 +1099,11 @@
        ((busy)
 	(thread-sleep! sleep-time))
        (else
 	(debug:print 0 *default-log-port* "EXCEPTION: database probably overloaded or unreadable.")
 	(debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
-	(print "exn=" (condition->list exn))
+	(debug:print 5 *default-log-port* "exn=" (condition->list exn))
 	(debug:print 0 *default-log-port* " status:  " ((condition-property-accessor 'sqlite3 'status) exn))
 	(print-call-chain (current-error-port))
 	(thread-sleep! sleep-time)
 	(debug:print-info 0 *default-log-port* "trying db call one more time....this may never recover, if necessary kill process " (current-process-id) " on host " (get-host-name) " to clean up")))
      (apply open-run-close-exception-handling proc idb params))
@@ -1105,17 +1120,17 @@
            (launch:setup)) ;; added because Elena was getting stack dump because *configinfo* below was #f.
   (let* ((configdat (car *configinfo*))  ;; tut tut, global warning...
 	 (keys     (keys:config-get-fields configdat))
 	 (havekeys (> (length keys) 0))
 	 (keystr   (keys->keystr keys))
-	 (fieldstr (keys->key/field keys))
+	 (fieldstr (keys:make-key/field-string configdat))
 	 (db       (db:dbdat-get-db dbdat)))
     (for-each (lambda (key)
 		(let ((keyn key))
 		  (if (member (string-downcase keyn)
 			      (list "runname" "state" "status" "owner" "event_time" "comment" "fail_count"
-				    "pass_count"))
+				    "pass_count" "contour"))
 		      (begin
 			(print "ERROR: your key cannot be named " keyn " as this conflicts with the same named field in the runs table, you must remove your megatest.db and <linktree>/.db before trying again.")
 			(exit 1)))))
 	      keys)
     (sqlite3:with-transaction
@@ -1448,11 +1463,11 @@
 ;; L O G G I N G    D B 
 ;;======================================================================
 
 (define (open-logging-db)
   (let* ((dbpath    (conc (if *toppath* (conc *toppath* "/") "") "logging.db")) ;; fname)
-	 (dbexists  (file-exists? dbpath))
+	 (dbexists  (common:file-exists? dbpath))
 	 (db        (sqlite3:open-database dbpath))
 	 (handler   (make-busy-timeout (if (args:get-arg "-override-timeout")
 					   (string->number (args:get-arg "-override-timeout"))
 					   136000)))) ;; 136000)))
     (sqlite3:set-busy-handler! db handler)
@@ -1604,11 +1619,11 @@
        ;;
        ;; (db:delay-if-busy dbdat)
        (let* (;; (min-incompleted (filter (lambda (x)
               ;;      		      (let* ((testpath (cadr x))
               ;;      			     (tdatpath (conc testpath "/testdat.db"))
-              ;;      			     (dbexists (file-exists? tdatpath)))
+              ;;      			     (dbexists (common:file-exists? tdatpath)))
               ;;      			(or (not dbexists) ;; if no file then something wrong - mark as incomplete
               ;;      			    (> (- (current-seconds)(file-modification-time tdatpath)) 600)))) ;; no change in 10 minutes to testdat.db - she's dead Jim
               ;;      		    incompleted))
               (min-incompleted-ids (map car incompleted)) ;; do 'em all
               (all-ids             (append min-incompleted-ids (map car oldlaunched))))
@@ -1671,10 +1686,14 @@
 	       "DELETE FROM tests WHERE run_id NOT IN (SELECT DISTINCT id FROM runs);"
 	       ;; delete all runs that are state='deleted'
 	       "DELETE FROM runs WHERE state='deleted';"
 	       ;; delete empty runs
 	       "DELETE FROM runs WHERE id NOT IN (SELECT DISTINCT r.id FROM runs AS r INNER JOIN tests AS t ON t.run_id=r.id);"
+	       ;; remove orphaned test_rundat entries
+	       "DELETE FROM test_rundat where test_id NOT IN (SELECT id FROM tests);"
+	       ;; 
+	       "DELETE FROM test_steps WHERE test_id NOT IN (SELECT id FROM tests);"
 	       ))))
     ;; (db:delay-if-busy dbdat)
     (sqlite3:with-transaction 
      db
      (lambda ()
@@ -1822,10 +1841,60 @@
 
 (define (db:del-var dbstruct var)
   (db:with-db dbstruct #f #t 
 	      (lambda (db)
 		(sqlite3:execute db "DELETE FROM metadat WHERE var=?;" var))))
+
+;;======================================================================
+;; no-sync.db - small bits of data to be shared between servers
+;;======================================================================
+
+(define (db:open-no-sync-db)
+  (let* ((dbpath (db:dbfile-path))
+	 (dbname (conc dbpath "/no-sync.db"))
+	 (db     (sqlite3:open-database dbname)))
+    (sqlite3:set-busy-handler! db (make-busy-timeout 136000))
+    (sqlite3:execute db "PRAGMA synchronous = 0;")
+    (sqlite3:execute db "CREATE TABLE IF NOT EXISTS no_sync_metadat (var TEXT,val TEXT, CONSTRAINT no_sync_metadat_constraint UNIQUE (var));")
+    db))
+
+;; if we are not a server create a db handle. this is not finalized
+;; so watch for problems. I'm still not clear if it is needed to manually
+;; finalize sqlite3 dbs with the sqlite3 egg.
+;;
+(define (db:no-sync-db db-in)
+  (if db-in
+      db-in
+      (let ((db (db:open-no-sync-db)))
+	(set! *no-sync-db* db)
+	db)))
+
+(define (db:no-sync-set db var val)
+  (sqlite3:execute (db:no-sync-db db) "INSERT OR REPLACE INTO no_sync_metadat (var,val) VALUES (?,?);" var val))
+
+(define (db:no-sync-del! db var)
+  (sqlite3:execute (db:no-sync-db db) "DELETE FROM no_sync_metadat WHERE var=?;" var))
+
+(define (db:no-sync-get/default db var default)
+  (let ((res default))
+    (sqlite3:for-each-row
+     (lambda (val)
+       (set! res val))
+     (db:no-sync-db db)
+     "SELECT val FROM no_sync_metadat WHERE var=?;"
+     var)
+    (if res
+        (let ((newres (if (string? res)
+			  (string->number res)
+			  #f)))
+          (if newres
+              newres
+              res))
+        res)))
+
+(define (db:no-sync-close-db db)
+  (db:safely-close-sqlite3-db db))
 
 ;; use a global for some primitive caching, it is just silly to
 ;; re-read the db over and over again for the keys since they never
 ;; change
 
@@ -1848,14 +1917,19 @@
 ;; look up values in a header/data structure
 (define (db:get-value-by-header row header field)
   (if (or (null? header) (not row))
       #f
       (let loop ((hed (car header))
-		 (tal (cdr header))
-		 (n   0))
-	(if (equal? hed field)
-	    (vector-ref row n)
+                 (tal (cdr header))
+                 (n   0))
+        (if (equal? hed field)
+            (handle-exceptions
+             exn
+             (begin
+               (debug:print 0 *default-log-port* "WARNING: attempt to read non-existant field, row=" row " header=" header " field=" field)
+               #f)
+             (vector-ref row n))
 	    (if (null? tal) #f (loop (car tal)(cdr tal)(+ n 1)))))))
 
 ;; Accessors for the header/data structure
 ;; get rows and header from 
 (define (db:get-header vec)(vector-ref vec 0))
@@ -3077,12 +3151,12 @@
 			     (configf:lookup dat entry-name "message")      ;; 4 ;; Comment
 			     (configf:lookup dat entry-name "exit-status")  ;; 5 ;; Status
 			     "logpro"                                       ;; 6 ;; Type
 			     ))))
 	   (let* ((value     (or (configf:lookup dat entry-name "measured")  "n/a"))
-		  (expected  (or (configf:lookup dat entry-name "expected")  "n/a"))
-		  (tolerance (or (configf:lookup dat entry-name "tolerance") "n/a"))
+		  (expected  (or (configf:lookup dat entry-name "expected")  0.0))
+		  (tolerance (or (configf:lookup dat entry-name "tolerance") 0.0))
 		  (comment   (or (configf:lookup dat entry-name "comment")
 				 (configf:lookup dat entry-name "desc")      "n/a"))
 		  (status    (or (configf:lookup dat entry-name "status")    "n/a"))
 		  (type      (or (configf:lookup dat entry-name "expected")  "n/a")))
 	     (set! res (append
@@ -3183,10 +3257,25 @@
 	(lambda (id test_id category variable value expected tol units comment status type)
 	  (set! res (cons (vector id test_id category variable value expected tol units comment status type) res)))
 	db
 	"SELECT id,test_id,category,variable,value,expected,tol,units,comment,status,type FROM test_data WHERE test_id=? AND category LIKE ? ORDER BY category,variable;" test-id categorypatt)
        (reverse res)))))
+
+;; This routine moved from tdb.scm, tdb:read-test-data
+;;
+(define (db:read-test-data* dbstruct run-id test-id categorypatt varpatt)
+  (let* ((res '()))
+    (db:with-db
+     dbstruct #f #f
+     (lambda (db)
+       (sqlite3:for-each-row 
+	(lambda (id test_id category variable value expected tol units comment status type)
+	  (set! res (cons (vector id test_id category variable value expected tol units comment status type) res)))
+	db
+	"SELECT id,test_id,category,variable,value,expected,tol,units,comment,status,type FROM test_data WHERE test_id=? AND category LIKE ? AND variable LIKE ? ORDER BY category,variable;" test-id categorypatt varpatt)
+       (reverse res)))))
+
 
 ;;======================================================================
 ;; Misc. test related queries
 ;;======================================================================
 
@@ -3302,20 +3391,24 @@
 ;;
 ;; if test-name is an integer work off that instead of test-name test-path
 ;;
 (define (db:set-state-status-and-roll-up-items dbstruct run-id test-name item-path state status comment)
   ;; establish info on incoming test followed by info on top level test
+  ;; BBnote - for mode itemwait, linkage between upstream test & matching item status is propagated to run queue in db:prereqs-not-met
+  
   (let* ((testdat      (if (number? test-name)
 			   (db:get-test-info-by-id dbstruct run-id test-name) ;; test-name is actually a test-id
 			   (db:get-test-info       dbstruct run-id test-name item-path)))
 	 (test-id      (db:test-get-id testdat))
 	 (test-name    (if (number? test-name)
 			   (db:test-get-testname testdat)
 			   test-name))
 	 (item-path    (db:test-get-item-path testdat))
          (tl-testdat   (db:get-test-info dbstruct run-id test-name ""))
-         (tl-test-id   (db:test-get-id tl-testdat)))
+         (tl-test-id   (if tl-testdat
+			   (db:test-get-id tl-testdat)
+			   #f)))
     (if (member state '("LAUNCHED" "REMOTEHOSTSTART"))
 	(db:general-call dbstruct 'set-test-start-time (list test-id)))
     (mutex-lock! *db-transaction-mutex*)
     (db:with-db
      dbstruct #f #f
@@ -3339,41 +3432,55 @@
                             ;; (non-completes        (filter (lambda (x)
                             ;;                                 (not (equal? (dbr:counts-state x) "COMPLETED")))
                             ;;                               state-status-counts))
                             (all-curr-states      (common:special-sort  ;; worst -> best (sort of)
                                                    (delete-duplicates
-                                                    (cons state (map dbr:counts-state state-status-counts)))
+                                                    (if (not (equal? state "DELETED"))
+                                                        (cons state (map dbr:counts-state state-status-counts))
+                                                        (map dbr:counts-state state-status-counts)))
                                                    *common:std-states* >))
                             (all-curr-statuses    (common:special-sort  ;; worst -> best
                                                    (delete-duplicates
-                                                    (cons status (map dbr:counts-status state-status-counts)))
+                                                    (if (not (equal? state "DELETED"))
+                                                        (cons status (map dbr:counts-status state-status-counts))
+                                                        (map dbr:counts-status state-status-counts)))
                                                    *common:std-statuses* >))
 			    (non-completes     (filter (lambda (x)
 							 (not (equal? x "COMPLETED")))
 						       all-curr-states))
+			    (num-non-completes (length non-completes))
+                            
                             (newstate          (cond
-						((> (length non-completes) 0) ;;
+						((> running 0)
+						 "RUNNING") ;; anything running, call the situation running
+						((> bad-not-started 0)  ;; we have an ugly situation, it is completed in the sense we cannot do more.
+						 "COMPLETED") 
+						((> num-non-completes 0) ;;
 						 (car non-completes))  ;;  (remove (lambda (x)(equal? "COMPLETED" x)) all-curr-states)))
+                                                ;; only rollup DELETED if all DELETED
 						(else
 						 (car all-curr-states))))
 			                       ;; (if (> running 0)
                                                ;;     "RUNNING"
                                                ;;     (if (> bad-not-started 0)
                                                ;;         "COMPLETED"
                                                ;;         (car all-curr-states))))
-                            (newstatus            (if (> bad-not-started 0)
-                                                      "CHECK"
+                            (newstatus            (if (or (> bad-not-started 0)
+							  (and (equal? newstate "NOT_STARTED")
+							       (> num-non-completes 0)))
+						      "STARTED"
                                                       (car all-curr-statuses))))
                        ;; (print "bad-not-supported: " bad-not-support " all-curr-states: " all-curr-states " all-curr-statuses: " all-curr-states)
                        ;;      " newstate: " newstate " newstatus: " newstatus)
                        ;; NB// Pass the db so it is part of the transaction
-                       (db:test-set-state-status db run-id tl-test-id newstate newstatus #f)))))))
+                       (if tl-test-id
+			   (db:test-set-state-status db run-id tl-test-id newstate newstatus #f))))))))
          (mutex-unlock! *db-transaction-mutex*)
          (if (and test-id state status (equal? status "AUTO")) 
              (db:test-data-rollup dbstruct run-id test-id status))
          tr-res)))))
-
+;; BBnote: db:get-all-state-status-counts-for-test returns dbr:counts object aggregating state and status of items of a given test, *not including rollup state/status*
 (define (db:get-all-state-status-counts-for-test dbstruct run-id test-name item-path)
   (db:with-db
    dbstruct #f #f
    (lambda (db)
      (sqlite3:map-row
@@ -3719,11 +3826,11 @@
 		 exn
 		 (begin
 		   (debug:print-info 0 *default-log-port* "WARNING: failed to test for existance of " dbfj)
 		   (thread-sleep! 1)
 		   (db:delay-if-busy count (- count 1))) 
-		 (file-exists? dbfj))
+		 (common:file-exists? dbfj))
 		(case count
 		  ((6)
 		   (thread-sleep! 0.2)
 		   (db:delay-if-busy count: 5))
 		  ((5)
@@ -3894,10 +4001,11 @@
 ;;       mode 'itemmatch or 'itemwait means that tests items must be COMPLETED and (PASS|WARN|WAIVED|CHECK) [[ NB// NOT IMPLEMENTED YET ]]
 ;;       mode 'exclusive means this test/item cannot run if the same test/item is LAUNCHED,REMOTEHOSTSTART or RUNNING
 ;; 
 ;; (define (db:get-prereqs-not-met dbstruct run-id waitons ref-item-path mode)
 (define (db:get-prereqs-not-met dbstruct run-id waitons ref-test-name ref-item-path mode itemmaps) ;; #!key (mode '(normal))(itemmap #f))
+  ;; BBnote - rollup of an itemized test's overall state/status done in db:set-state-status-and-roll-up-items
   (append
    (if (member 'exclusive mode)
        (let ((running-tests (db:get-tests-for-run dbstruct
 						  #f  ;; run-id of #f means for all runs. 
 						  (if (string=? ref-item-path "")   ;; testpatt
@@ -3935,15 +4043,15 @@
 	    (let ((tests             (db:get-tests-for-run-state-status dbstruct run-id waitontest-name))
 		  (ever-seen         #f)
 		  (parent-waiton-met #f)
 		  (item-waiton-met   #f))
 	      (for-each 
-	       (lambda (test)
+	       (lambda (test) ;; BB- this is the upstream test
 		 ;; (if (equal? waitontest-name (db:test-get-testname test)) ;; by defintion this had better be true ...
 		 (let* ((state             (db:test-get-state test))
 			(status            (db:test-get-status test))
-			(item-path         (db:test-get-item-path test))
+			(item-path         (db:test-get-item-path test)) ;; BB- this is the upstream itempath
 			(is-completed      (equal? state "COMPLETED"))
 			(is-running        (equal? state "RUNNING"))
 			(is-killed         (equal? state "KILLED"))
 			(is-ok             (member status '("PASS" "WARN" "CHECK" "WAIVED" "SKIP")))
 			;;                                       testname-b    path-a    path-b
@@ -3964,12 +4072,12 @@
 		    ((and (not (null? (lset-intersection eq? mode '(itemmatch itemwait)))) ;; how is that different from (member mode '(itemmatch itemwait)) ?????
 			  ;; (not (equal? item-path "")) ;; this applies to both top level (to allow launching of next batch) and items
 			  same-itempath)
 		     (if (and is-completed is-ok)
 			 (set! item-waiton-met #t))
-		     (if (and (equal? item-path "")
-			      (or is-completed is-running));; this is the parent, set it to run if completed or running
+		     (if (and (equal? item-path "") ;; if upstream rollup test is completed, parent-waiton-met is set
+			      (or is-completed is-running));; this is the parent, set it to run if completed or running ;; BB1
 			 (set! parent-waiton-met #t)))
 		    ;; normal checking of parent items, any parent or parent item not ok blocks running
 		    ((and is-completed
 			  (or is-ok 
 			      (member 'toplevel mode))              ;; toplevel does not block on FAIL
@@ -4083,12 +4191,12 @@
 					       (testname  (vector-ref vb (+  2 numkeys)))
 					       (item-path (vector-ref vb (+  3 numkeys)))
 					       (final-log (vector-ref vb (+  7 numkeys)))
 					       (run-dir   (vector-ref vb (+ 18 numkeys)))
 					       (log-fpath (conc run-dir "/"  final-log))) ;; (string-intersperse keyvals "/") "/" testname "/" item-path "/"
-					  (debug:print 4 *default-log-port* "log: " log-fpath " exists: " (file-exists? log-fpath))
-					  (vector-set! vb (+ 7 numkeys) (if (file-exists? log-fpath)
+					  (debug:print 4 *default-log-port* "log: " log-fpath " exists: " (common:file-exists? log-fpath))
+					  (vector-set! vb (+ 7 numkeys) (if (common:file-exists? log-fpath)
 									    (let ((newpath (conc pathmod "/"
 												 (string-intersperse keyvals "/")
 												 "/" runname "/" testname "/"
 												 (if (string=? item-path "") "" (conc "/" item-path))
 												 final-log)))

Index: dcommon.scm
==================================================================
--- dcommon.scm
+++ dcommon.scm
@@ -19,11 +19,11 @@
 (declare (unit dcommon))
 
 (declare (uses megatest-version))
 (declare (uses gutils))
 (declare (uses db))
-(declare (uses synchash))
+;; (declare (uses synchash))
 
 (include "common_records.scm")
 (include "db_records.scm")
 (include "key_records.scm")
 (include "run_records.scm")
@@ -85,185 +85,185 @@
 ;;  3. Add extraction of filters to synchash calls
 ;;
 ;;    NOTE: Used in newdashboard
 ;;
 ;; Mode is 'full or 'incremental for full refresh or incremental refresh
-(define (dcommon:run-update keys data runname keypatts testpatt states statuses mode window-id)
-  (let* (;; count and offset => #f so not used
-	 ;; the synchash calls modify the "data" hash
-	 (changed         #f)
-	 (get-runs-sig    (conc (client:get-signature) " get-runs"))
-	 (get-tests-sig   (conc (client:get-signature) " get-tests"))
-	 (get-details-sig (conc (client:get-signature) " get-test-details"))
-
-	 ;; test-ids to get and display are indexed on window-id in curr-test-ids hash
-	 (test-ids        (hash-table-values (dboard:tabdat-curr-test-ids data)))
-	 ;; run-id is #f in next line to send the query to server 0
- 	 (run-changes     (synchash:client-get 'db:get-runs get-runs-sig (length keypatts) data #f runname #f #f keypatts))
-	 (tests-detail-changes (if (not (null? test-ids))
-				   (synchash:client-get 'db:get-test-info-by-ids get-details-sig 0  data #f test-ids)
-				   '()))
-
-	 ;; Now can calculate the run-ids
-	 (run-hash    (hash-table-ref/default data get-runs-sig #f))
-	 (run-ids     (if run-hash (filter number? (hash-table-keys run-hash)) '()))
-
-	 (all-test-changes (let ((res (make-hash-table)))
-			     (for-each (lambda (run-id)
-					 (if (> run-id 0)
-					     (hash-table-set! res run-id (synchash:client-get 'db:get-tests-for-run-mindata get-tests-sig 0 data run-id 1 testpatt states statuses #f))))
-				       run-ids)
-			     res))
-	 (runs-hash    (hash-table-ref/default data get-runs-sig #f))
-	 (header       (hash-table-ref/default runs-hash "header" #f))
-	 (run-ids      (sort (filter number? (hash-table-keys runs-hash))
-			     (lambda (a b)
-			       (let* ((record-a (hash-table-ref runs-hash a))
-				      (record-b (hash-table-ref runs-hash b))
-				      (time-a   (db:get-value-by-header record-a header "event_time"))
-				      (time-b   (db:get-value-by-header record-b header "event_time")))
-				 (> time-a time-b)))
-			     ))
-	 (runid-to-col    (hash-table-ref *cachedata* "runid-to-col"))
-	 (testname-to-row (hash-table-ref *cachedata* "testname-to-row")) 
-	 (colnum       1)
-	 (rownum       0)
-	 (cellname (conc rownum ":" colnum))) ;; rownum = 0 is the header
-;; (debug:print 0 *default-log-port* "test-ids " test-ids ", tests-detail-changes " tests-detail-changes)
-    
-	 ;; tests related stuff
-	 ;; (all-testnames (delete-duplicates (map db:test-get-testname test-changes))))
-
-    ;; Given a run-id and testname/item_path calculate a cell R:C
-
-    ;; NOTE: Also build the test tree browser and look up table
-    ;;
-    ;; Each run is unique on its keys and runname or run-id, store in hash on colnum
-    (for-each (lambda (run-id)
-		(let* ((run-record (hash-table-ref/default runs-hash run-id #f))
-		       (key-vals   (map (lambda (key)(db:get-value-by-header run-record header key))
-					keys))
-		       (run-name   (db:get-value-by-header run-record header "runname"))
-		       (col-name   (conc (string-intersperse key-vals "\n") "\n" run-name))
-		       (run-path   (append key-vals (list run-name))))
-		  (hash-table-set! (dboard:tabdat-run-keys data) run-id run-path)
-		  ;; modify cell - but only if changed
-		  (set! changed (dcommon:modifiy-if-different (dboard:tabdat-runs-matrix data) cellname col-name changed))
-		  (hash-table-set! runid-to-col run-id (list colnum run-record))
-		  ;; Here we update the tests treebox and tree keys
-		  (tree:add-node (dboard:tabdat-tests-tree data) "Runs" (append key-vals (list run-name))
-				 userdata: (conc "run-id: " run-id))
-		  (set! colnum (+ colnum 1))))
-	      run-ids)
-
-    ;; Scan all tests to be displayed and organise all the test names, respecting what is in the hash table
-    ;; Do this analysis in the order of the run-ids, the most recent run wins
-    (for-each (lambda (run-id)
-		(let* ((run-path       (hash-table-ref (dboard:tabdat-run-keys data) run-id))
-		       (test-changes   (hash-table-ref all-test-changes run-id))
-		       (new-test-dat   (car test-changes))
-		       (removed-tests  (cadr test-changes))
-		       (tests          (sort (map cadr (filter (lambda (testrec)
-								 (eq? run-id (db:mintest-get-run_id (cadr testrec))))
-							       new-test-dat))
-					     (lambda (a b)
-					       (let ((time-a (db:mintest-get-event_time a))
-						     (time-b (db:mintest-get-event_time b)))
-						 (> time-a time-b)))))
-		       ;; test-changes is a list of (( id record ) ... )
-		       ;; Get list of test names sorted by time, remove tests
-		       (test-names (delete-duplicates (map (lambda (t)
-							     (let ((i (db:mintest-get-item_path t))
-								   (n (db:mintest-get-testname  t)))
-							       (if (string=? i "")
-								   (conc "   " i)
-								   n)))
-							   tests)))
-		       (colnum     (car (hash-table-ref runid-to-col run-id))))
-		  ;; for each test name get the slot if it exists and fill in the cell
-		  ;; or take the next slot and fill in the cell, deal with items in the
-		  ;; run view panel? The run view panel can have a tree selector for
-		  ;; browsing the tests/items
-
-		  ;; SWITCH THIS TO USING CHANGED TESTS ONLY
-		  (for-each (lambda (test)
-			      (let* ((test-id   (db:mintest-get-id test))
-				     (state     (db:mintest-get-state test))
-				     (status    (db:mintest-get-status test))
-				     (testname  (db:mintest-get-testname test))
-				     (itempath  (db:mintest-get-item_path test))
-				     (fullname  (conc testname "/" itempath))
-				     (dispname  (if (string=? itempath "") testname (conc "   " itempath)))
-				     (rownum    (hash-table-ref/default testname-to-row fullname #f))
-				     (test-path (append run-path (if (equal? itempath "") 
-								     (list testname)
-								     (list testname itempath))))
-				     (tb         (dboard:tabdat-tests-tree data)))
-				(print "INFONOTE: run-path: " run-path)
-				(tree:add-node (dboard:tabdat-tests-tree data) "Runs" 
-					       test-path
-					       userdata: (conc "test-id: " test-id))
-				(let ((node-num (tree:find-node tb (cons "Runs" test-path)))
-				      (color    (car (gutils:get-color-for-state-status state status))))
-				  (debug:print 0 *default-log-port* "node-num: " node-num ", color: " color)
-
-				  (set! changed (dcommon:modifiy-if-different 
-						 tb
-						 (conc "COLOR" node-num)
-						 color changed))
-
-				  ;; (iup:attribute-set! tb (conc "COLOR" node-num) color)
-				  )
-				(hash-table-set! (dboard:tabdat-path-test-ids data) test-path test-id)
-				(if (not rownum)
-				    (let ((rownums (hash-table-values testname-to-row)))
-				      (set! rownum (if (null? rownums)
-						       1
-						       (+ 1 (common:max rownums))))
-				      (hash-table-set! testname-to-row fullname rownum)
-				      ;; create the label
-				      (set! changed (dcommon:modifiy-if-different 
-						     (dboard:tabdat-runs-matrix data)
-						     (conc rownum ":" 0)
-						     dispname
-						     changed))
-				      ;; (iup:attribute-set! (dboard:tabdat-runs-matrix data)
-				      ;;   		  (conc rownum ":" 0) dispname)
-				      ))
-				;; set the cell text and color
-				;; (debug:print 2 *default-log-port* "rownum:colnum=" rownum ":" colnum ", state=" status)
-				(set! changed (dcommon:modifiy-if-different 
-						     (dboard:tabdat-runs-matrix data)
-						     (conc rownum ":" colnum)
-						     (if (member state '("ARCHIVED" "COMPLETED"))
-							 status
-							 state)
-						     changed))
-				;; (iup:attribute-set! (dboard:tabdat-runs-matrix data)
-				;; 		    (conc rownum ":" colnum)
-				;; 		    (if (member state '("ARCHIVED" "COMPLETED"))
-				;; 			status
-				;; 			state))
-				(set! changed (dcommon:modifiy-if-different 
-					       (dboard:tabdat-runs-matrix data)
-					       (conc "BGCOLOR" rownum ":" colnum)
-					       (car (gutils:get-color-for-state-status state status))
-					       changed))
-				;; (iup:attribute-set! (dboard:tabdat-runs-matrix data)
-				;; 		    (conc "BGCOLOR" rownum ":" colnum)
-				;; 		    (car (gutils:get-color-for-state-status state status)))
-				))
-			    tests)))
-	      run-ids)
-
-    (let ((updater (hash-table-ref/default  (dboard:commondat-updaters commondat) window-id #f)))
-      (if updater (updater (hash-table-ref/default data get-details-sig #f))))
-
-    (if changed (iup:attribute-set! (dboard:tabdat-runs-matrix data) "REDRAW" "ALL"))
-    ;; (debug:print 2 *default-log-port* "run-changes: " run-changes)
-    ;; (debug:print 2 *default-log-port* "test-changes: " test-changes)
-    (list run-changes all-test-changes)))
+;; (define (dcommon:run-update keys data runname keypatts testpatt states statuses mode window-id)
+;;   (let* (;; count and offset => #f so not used
+;; 	 ;; the synchash calls modify the "data" hash
+;; 	 (changed         #f)
+;; 	 (get-runs-sig    (conc (client:get-signature) " get-runs"))
+;; 	 (get-tests-sig   (conc (client:get-signature) " get-tests"))
+;; 	 (get-details-sig (conc (client:get-signature) " get-test-details"))
+;; 
+;; 	 ;; test-ids to get and display are indexed on window-id in curr-test-ids hash
+;; 	 (test-ids        (hash-table-values (dboard:tabdat-curr-test-ids data)))
+;; 	 ;; run-id is #f in next line to send the query to server 0
+;;  	 (run-changes     (synchash:client-get 'db:get-runs get-runs-sig (length keypatts) data #f runname #f #f keypatts))
+;; 	 (tests-detail-changes (if (not (null? test-ids))
+;; 				   (synchash:client-get 'db:get-test-info-by-ids get-details-sig 0  data #f test-ids)
+;; 				   '()))
+;; 
+;; 	 ;; Now can calculate the run-ids
+;; 	 (run-hash    (hash-table-ref/default data get-runs-sig #f))
+;; 	 (run-ids     (if run-hash (filter number? (hash-table-keys run-hash)) '()))
+;; 
+;; 	 (all-test-changes (let ((res (make-hash-table)))
+;; 			     (for-each (lambda (run-id)
+;; 					 (if (> run-id 0)
+;; 					     (hash-table-set! res run-id (synchash:client-get 'db:get-tests-for-run-mindata get-tests-sig 0 data run-id 1 testpatt states statuses #f))))
+;; 				       run-ids)
+;; 			     res))
+;; 	 (runs-hash    (hash-table-ref/default data get-runs-sig #f))
+;; 	 (header       (hash-table-ref/default runs-hash "header" #f))
+;; 	 (run-ids      (sort (filter number? (hash-table-keys runs-hash))
+;; 			     (lambda (a b)
+;; 			       (let* ((record-a (hash-table-ref runs-hash a))
+;; 				      (record-b (hash-table-ref runs-hash b))
+;; 				      (time-a   (db:get-value-by-header record-a header "event_time"))
+;; 				      (time-b   (db:get-value-by-header record-b header "event_time")))
+;; 				 (> time-a time-b)))
+;; 			     ))
+;; 	 (runid-to-col    (hash-table-ref *cachedata* "runid-to-col"))
+;; 	 (testname-to-row (hash-table-ref *cachedata* "testname-to-row")) 
+;; 	 (colnum       1)
+;; 	 (rownum       0)
+;; 	 (cellname (conc rownum ":" colnum))) ;; rownum = 0 is the header
+;; ;; (debug:print 0 *default-log-port* "test-ids " test-ids ", tests-detail-changes " tests-detail-changes)
+;;     
+;; 	 ;; tests related stuff
+;; 	 ;; (all-testnames (delete-duplicates (map db:test-get-testname test-changes))))
+;; 
+;;     ;; Given a run-id and testname/item_path calculate a cell R:C
+;; 
+;;     ;; NOTE: Also build the test tree browser and look up table
+;;     ;;
+;;     ;; Each run is unique on its keys and runname or run-id, store in hash on colnum
+;;     (for-each (lambda (run-id)
+;; 		(let* ((run-record (hash-table-ref/default runs-hash run-id #f))
+;; 		       (key-vals   (map (lambda (key)(db:get-value-by-header run-record header key))
+;; 					keys))
+;; 		       (run-name   (db:get-value-by-header run-record header "runname"))
+;; 		       (col-name   (conc (string-intersperse key-vals "\n") "\n" run-name))
+;; 		       (run-path   (append key-vals (list run-name))))
+;; 		  (hash-table-set! (dboard:tabdat-run-keys data) run-id run-path)
+;; 		  ;; modify cell - but only if changed
+;; 		  (set! changed (dcommon:modifiy-if-different (dboard:tabdat-runs-matrix data) cellname col-name changed))
+;; 		  (hash-table-set! runid-to-col run-id (list colnum run-record))
+;; 		  ;; Here we update the tests treebox and tree keys
+;; 		  (tree:add-node (dboard:tabdat-tests-tree data) "Runs" (append key-vals (list run-name))
+;; 				 userdata: (conc "run-id: " run-id))
+;; 		  (set! colnum (+ colnum 1))))
+;; 	      run-ids)
+;; 
+;;     ;; Scan all tests to be displayed and organise all the test names, respecting what is in the hash table
+;;     ;; Do this analysis in the order of the run-ids, the most recent run wins
+;;     (for-each (lambda (run-id)
+;; 		(let* ((run-path       (hash-table-ref (dboard:tabdat-run-keys data) run-id))
+;; 		       (test-changes   (hash-table-ref all-test-changes run-id))
+;; 		       (new-test-dat   (car test-changes))
+;; 		       (removed-tests  (cadr test-changes))
+;; 		       (tests          (sort (map cadr (filter (lambda (testrec)
+;; 								 (eq? run-id (db:mintest-get-run_id (cadr testrec))))
+;; 							       new-test-dat))
+;; 					     (lambda (a b)
+;; 					       (let ((time-a (db:mintest-get-event_time a))
+;; 						     (time-b (db:mintest-get-event_time b)))
+;; 						 (> time-a time-b)))))
+;; 		       ;; test-changes is a list of (( id record ) ... )
+;; 		       ;; Get list of test names sorted by time, remove tests
+;; 		       (test-names (delete-duplicates (map (lambda (t)
+;; 							     (let ((i (db:mintest-get-item_path t))
+;; 								   (n (db:mintest-get-testname  t)))
+;; 							       (if (string=? i "")
+;; 								   (conc "   " i)
+;; 								   n)))
+;; 							   tests)))
+;; 		       (colnum     (car (hash-table-ref runid-to-col run-id))))
+;; 		  ;; for each test name get the slot if it exists and fill in the cell
+;; 		  ;; or take the next slot and fill in the cell, deal with items in the
+;; 		  ;; run view panel? The run view panel can have a tree selector for
+;; 		  ;; browsing the tests/items
+;; 
+;; 		  ;; SWITCH THIS TO USING CHANGED TESTS ONLY
+;; 		  (for-each (lambda (test)
+;; 			      (let* ((test-id   (db:mintest-get-id test))
+;; 				     (state     (db:mintest-get-state test))
+;; 				     (status    (db:mintest-get-status test))
+;; 				     (testname  (db:mintest-get-testname test))
+;; 				     (itempath  (db:mintest-get-item_path test))
+;; 				     (fullname  (conc testname "/" itempath))
+;; 				     (dispname  (if (string=? itempath "") testname (conc "   " itempath)))
+;; 				     (rownum    (hash-table-ref/default testname-to-row fullname #f))
+;; 				     (test-path (append run-path (if (equal? itempath "") 
+;; 								     (list testname)
+;; 								     (list testname itempath))))
+;; 				     (tb         (dboard:tabdat-tests-tree data)))
+;; 				(print "INFONOTE: run-path: " run-path)
+;; 				(tree:add-node (dboard:tabdat-tests-tree data) "Runs" 
+;; 					       test-path
+;; 					       userdata: (conc "test-id: " test-id))
+;; 				(let ((node-num (tree:find-node tb (cons "Runs" test-path)))
+;; 				      (color    (car (gutils:get-color-for-state-status state status))))
+;; 				  (debug:print 0 *default-log-port* "node-num: " node-num ", color: " color)
+;; 
+;; 				  (set! changed (dcommon:modifiy-if-different 
+;; 						 tb
+;; 						 (conc "COLOR" node-num)
+;; 						 color changed))
+;; 
+;; 				  ;; (iup:attribute-set! tb (conc "COLOR" node-num) color)
+;; 				  )
+;; 				(hash-table-set! (dboard:tabdat-path-test-ids data) test-path test-id)
+;; 				(if (not rownum)
+;; 				    (let ((rownums (hash-table-values testname-to-row)))
+;; 				      (set! rownum (if (null? rownums)
+;; 						       1
+;; 						       (+ 1 (common:max rownums))))
+;; 				      (hash-table-set! testname-to-row fullname rownum)
+;; 				      ;; create the label
+;; 				      (set! changed (dcommon:modifiy-if-different 
+;; 						     (dboard:tabdat-runs-matrix data)
+;; 						     (conc rownum ":" 0)
+;; 						     dispname
+;; 						     changed))
+;; 				      ;; (iup:attribute-set! (dboard:tabdat-runs-matrix data)
+;; 				      ;;   		  (conc rownum ":" 0) dispname)
+;; 				      ))
+;; 				;; set the cell text and color
+;; 				;; (debug:print 2 *default-log-port* "rownum:colnum=" rownum ":" colnum ", state=" status)
+;; 				(set! changed (dcommon:modifiy-if-different 
+;; 						     (dboard:tabdat-runs-matrix data)
+;; 						     (conc rownum ":" colnum)
+;; 						     (if (member state '("ARCHIVED" "COMPLETED"))
+;; 							 status
+;; 							 state)
+;; 						     changed))
+;; 				;; (iup:attribute-set! (dboard:tabdat-runs-matrix data)
+;; 				;; 		    (conc rownum ":" colnum)
+;; 				;; 		    (if (member state '("ARCHIVED" "COMPLETED"))
+;; 				;; 			status
+;; 				;; 			state))
+;; 				(set! changed (dcommon:modifiy-if-different 
+;; 					       (dboard:tabdat-runs-matrix data)
+;; 					       (conc "BGCOLOR" rownum ":" colnum)
+;; 					       (car (gutils:get-color-for-state-status state status))
+;; 					       changed))
+;; 				;; (iup:attribute-set! (dboard:tabdat-runs-matrix data)
+;; 				;; 		    (conc "BGCOLOR" rownum ":" colnum)
+;; 				;; 		    (car (gutils:get-color-for-state-status state status)))
+;; 				))
+;; 			    tests)))
+;; 	      run-ids)
+;; 
+;;     (let ((updater (hash-table-ref/default  (dboard:commondat-updaters commondat) window-id #f)))
+;;       (if updater (updater (hash-table-ref/default data get-details-sig #f))))
+;; 
+;;     (if changed (iup:attribute-set! (dboard:tabdat-runs-matrix data) "REDRAW" "ALL"))
+;;     ;; (debug:print 2 *default-log-port* "run-changes: " run-changes)
+;;     ;; (debug:print 2 *default-log-port* "test-changes: " test-changes)
+;;     (list run-changes all-test-changes)))
 
 (define (dcommon:runsdat-get-col-num dat target runname force-set)
   (let* ((runs-index (dboard:runsdat-runs-index dat))
 	 (col-name   (conc target "/" runname))
 	 (res        (hash-table-ref/default runs-index col-name #f)))
@@ -491,11 +491,11 @@
 (define (dcommon:section-matrix rawconfig sectionname varcolname valcolname #!key (title #f))
   (let* ((curr-row-num    1)
          (key-vals        (configf:section-vars rawconfig sectionname))
          (section-matrix  (iup:matrix
                            #:alignment1 "ALEFT"
-                           #:expand "YES" ;; "HORIZONTAL"
+                           ;; #:expand "YES" ;; "HORIZONTAL"
                            #:numcol 1
                            #:numlin (length key-vals)
                            #:numcol-visible 1
                            #:numlin-visible (min 10 (length key-vals))
 			   #:scrollbar "YES")))
@@ -1185,11 +1185,11 @@
 										       (* scalef 0.01)
 										       (* scalef -0.01))))
 				      (if the-cnv
 					  (dashboard:draw-tests the-cnv last-xadj last-yadj tests-draw-state sorted-testnames test-records))
 				      ))
-		       ;; #:size "50x50"
+		       ;; #:size "250x250"
 		       #:expand "YES"
 		       #:scrollbar "YES"
 		       #:posx "0.5"
 		       #:posy "0.5"
 		       #:button-cb (lambda (obj btn pressed x y status)

Index: docs/Makefile
==================================================================
--- docs/Makefile
+++ docs/Makefile
@@ -13,5 +13,7 @@
 	fossil add html/*
 
 megatest.pdf : megatest.lyx
 	lyx -e pdf2 megatest.lyx
 
+pkts.pdf : pkts.dot
+	dot -Tpdf pkts.dot -o pkts.pdf

Index: docs/manual/megatest_manual.html
==================================================================
--- docs/manual/megatest_manual.html
+++ docs/manual/megatest_manual.html
@@ -801,18 +801,17 @@
 </div>
 </div>
 <div class="sect1">
 <h2 id="_megatest_design_philosophy">Megatest Design Philosophy</h2>
 <div class="sectionbody">
-<div class="paragraph"><p>Megatest is intended to provide the minimum needed resources to make
-writing a suite of tests and tasks for implementing continuous build
-for software, design engineering or process control (via owlfs for
-example) without being specialized for any specific problem
-space. Megatest in of itself does not know what constitutes a PASS or
-FAIL of a test or task. In most cases megatest is best used in
-conjunction with logpro or a similar tool to parse, analyze and decide
-on the test outcome.</p></div>
+<div class="paragraph"><p>Megatest is a distributed system intended to provide the minimum needed
+resources to make writing a suite of tests and tasks for implementing
+continuous build for software, design engineering or process control (via
+owlfs for example) without being specialized for any specific problem
+space. Megatest in of itself does not know what constitutes a PASS or FAIL
+of a test or task. In most cases megatest is best used in conjunction with
+logpro or a similar tool to parse, analyze and decide on the test outcome.</p></div>
 <div class="ulist"><ul>
 <li>
 <p>
 Self-checking -Repeatable strive for directed or self-checking test
    as opposed to delta based tests
@@ -876,24 +875,24 @@
 <h4 id="_goals">Goals</h4>
 <div class="olist arabic"><ol class="arabic">
 <li>
 <p>
 Reduce load on the file system. Sqlite3 files on network filesystem can be
-  a burden.
+  a burden. <span class="green">[DONE]</span>
 </p>
 </li>
 <li>
 <p>
 Reduce number of servers and frequency of start/stop. This is mostly an
-  issue of clutter but also a reduction in "moving parts".
+  issue of clutter but also a reduction in "moving parts". <span class="green">[DONE]</span>
 </p>
 </li>
 <li>
 <p>
 Coalesce activities to a single home host where possible. Give the user
   feedback that they have started the dashboard on a host other than the
-  home host.
+  home host. <span class="green">[DONE]</span>
 </p>
 </li>
 <li>
 <p>
 Reduce number of processes involved in managing running tests.
@@ -905,35 +904,35 @@
 <h4 id="_changes_needed">Changes Needed</h4>
 <div class="olist arabic"><ol class="arabic">
 <li>
 <p>
 ACID compliant db will be on /tmp and synced to megatest.db with a five
-  second max delay.
+  second max delay. <span class="green">[DONE]</span>
 </p>
 </li>
 <li>
 <p>
 Read/writes to db for processes on homehost will go direct to /tmp
-  megatest.db file.
+  megatest.db file. <span class="green">[DONE]</span>
 </p>
 </li>
 <li>
 <p>
 Read/wites fron non-homehost processes will go through one server. Bulk
   reads (e.g. for dashboard or list-runs) will be cached on the current host
-  in /tmp and synced from the home megatest.db in the testsuite area.
+  in /tmp and synced from the home megatest.db in the testsuite area. <span class="green">[DONE]</span>
 </p>
 </li>
 <li>
 <p>
-Db syncs rely on the target db file timestame minus some margin.
+Db syncs rely on the target db file timestame minus some margin. <span class="green">[DONE]</span>
 </p>
 </li>
 <li>
 <p>
 Since bulk reads do not use the server we can switch to simple RPC for the
-  network transport.
+  network transport. <span class="green">[DONE]</span>
 </p>
 </li>
 <li>
 <p>
 Test running manager process extended to manage multiple running tests.
@@ -947,31 +946,32 @@
 <div class="sect3">
 <h4 id="_ww05_migrate_to_inmem_db">ww05 - migrate to inmem-db</h4>
 <div class="olist arabic"><ol class="arabic">
 <li>
 <p>
-Switch to inmem db with fast sync to on disk db&#8217;s [DONE]
+Switch to inmem db with fast sync to on disk db&#8217;s <span class="green">[DONE]</span>
 </p>
 </li>
 <li>
 <p>
 Server polls tasks table for next action
 </p>
 <div class="olist loweralpha"><ol class="loweralpha">
 <li>
 <p>
-Task table used for tracking runner process [DONE]
+Task table used for tracking runner process <span class="red">[Replaced by mtutil]</span>
 </p>
 </li>
 <li>
 <p>
-Task table used for jobs to run
+Task table used for jobs to run <span class="red">[Replaced by mtutil]</span>
 </p>
 </li>
 <li>
 <p>
-Task table used for queueing runner actions (remove runs, cleanRunExecute, etc)
+Task table used for queueing runner actions (remove runs,
+   cleanRunExecute, etc)  <span class="red">[Replaced by mtutil</span>]
 </p>
 </li>
 </ol></div>
 </li>
 </ol></div>
@@ -1415,15 +1415,35 @@
 <pre>[items]
 A a b c
 B d e f</pre>
 </div></div>
 <div class="paragraph"><p>Then the config file would effectively appear to contain an items section
-exactly like the output from the script. This is extremely useful when
-dynamically creating items, itemstables and other config structures. You can
-see the expansion of the call by looking in the cached files (look in your
-linktree for megatest.config and runconfigs.config cache files and in your
-test run areas for the expanded and cached testconfig).</p></div>
+exactly like the output from the script. This is useful when dynamically
+creating items, itemstables and other config structures. You can see the
+expansion of the call by looking in the cached files (look in your linktree
+for megatest.config and runconfigs.config cache files and in your test run
+areas for the expanded and cached testconfig).</p></div>
+<div class="paragraph"><p>Wildcards and regexes in Targets</p></div>
+<div class="listingblock">
+<div class="content monospaced">
+<pre>[a/2/b]
+VAR1 VAL1
+
+[a/%/b]
+VAR1 VAL2</pre>
+</div></div>
+<div class="paragraph"><p>Will result in:</p></div>
+<div class="listingblock">
+<div class="content monospaced">
+<pre>[a/2/b]
+VAR1 VAL2</pre>
+</div></div>
+<div class="paragraph"><p>Can use either wildcard of "%" or a regular expression:</p></div>
+<div class="listingblock">
+<div class="content monospaced">
+<pre>[/abc.*def/]</pre>
+</div></div>
 <div class="sect3">
 <h4 id="_disk_space_checks">Disk Space Checks</h4>
 <div class="paragraph"><p>Some parameters you can put in the [setup] section of megatest.config:</p></div>
 <div class="listingblock">
 <div class="content monospaced">
@@ -1489,10 +1509,19 @@
 <div class="listingblock">
 <div class="title">In megatest.config</div>
 <div class="content monospaced">
 <pre>[setup]
 reruns 5</pre>
+</div></div>
+<div class="paragraph"><p>Replace the default blacklisted environment variables with user supplied
+list.</p></div>
+<div class="paragraph"><p>Default list: USER HOME DISPLAY LS_COLORS XKEYSYMDB EDITOR MAKEFLAGS MAKEF MAKEOVERRIDES</p></div>
+<div class="paragraph"><div class="title">Add a "bad" variable "PROMPT" to the variables that will be commented out</div><p>in the megatest.sh and megatest.csh files:</p></div>
+<div class="listingblock">
+<div class="content monospaced">
+<pre>[setup]
+blacklistvars USER HOME DISPLAY LS_COLORS XKEYSYMDB EDITOR MAKEFLAGS PROMPT</pre>
 </div></div>
 </div>
 <div class="sect4">
 <h5 id="_run_time_limit">Run time limit</h5>
 <div class="listingblock">
@@ -1893,11 +1922,11 @@
 <pre>$MT_MEGATEST -env2file .ezsteps/${stepname}</pre>
 </div></div>
 </div>
 <div class="sect2">
 <h3 id="_triggers">Triggers</h3>
-<div class="paragraph"><p>In your testconfig triggers can be specified</p></div>
+<div class="paragraph"><p>In your testconfig or megatest.config triggers can be specified</p></div>
 <div class="listingblock">
 <div class="content monospaced">
 <pre>[triggers]
 
 # Call script running.sh when test goes to state=RUNNING, status=PASS
@@ -1907,11 +1936,11 @@
 RUNNING/ running.sh
 
 # Call script onpass.sh any time status goes to PASS
 PASS/ onpass.sh</pre>
 </div></div>
-<div class="paragraph"><p>Scripts called will have; test-id test-rundir trigger, added to the commandline.</p></div>
+<div class="paragraph"><p>Scripts called will have; test-id test-rundir trigger test-name item-path state status event-time, added to the commandline.</p></div>
 <div class="paragraph"><p>HINT</p></div>
 <div class="paragraph"><p>To start an xterm (useful for debugging), use a command line like the following:</p></div>
 <div class="listingblock">
 <div class="content monospaced">
 <pre>[triggers]
@@ -1923,10 +1952,78 @@
 <img src="/nfs/pdx/disks/ice.disk.002/icfadm/pkgs/asciidoc/8.6.7/images/icons/note.png" alt="Note">
 </td>
 <td class="content">There is a trailing space after the --</td>
 </tr></table>
 </div>
+<div class="paragraph"><p>There are a number of environment variables available to the trigger script
+but since triggers can be called in various contexts not all variables are
+available at all times. The trigger script should check for the variable and
+fail gracefully if it doesn&#8217;t exist.</p></div>
+<table class="tableblock frame-topbot grid-all"
+style="
+width:90%;
+">
+<caption class="title">Table 4. Environment variables visible to the trigger script</caption>
+<col style="width:33%;">
+<col style="width:66%;">
+<thead>
+<tr>
+<th class="tableblock halign-center valign-top" >Variable             </th>
+<th class="tableblock halign-left valign-top" > Purpose</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td class="tableblock halign-center valign-top" ><p class="tableblock">MT_TEST_RUN_DIR</p></td>
+<td class="tableblock halign-left valign-top" ><p class="tableblock monospaced">The directory where Megatest ran this test</p></td>
+</tr>
+<tr>
+<td class="tableblock halign-center valign-top" ><p class="tableblock">MT_CMDINFO</p></td>
+<td class="tableblock halign-left valign-top" ><p class="tableblock monospaced">Encoded command data for the test</p></td>
+</tr>
+<tr>
+<td class="tableblock halign-center valign-top" ><p class="tableblock">MT_DEBUG_MODE</p></td>
+<td class="tableblock halign-left valign-top" ><p class="tableblock monospaced">Used to pass the debug mode to nested calls to Megatest</p></td>
+</tr>
+<tr>
+<td class="tableblock halign-center valign-top" ><p class="tableblock">MT_RUN_AREA_HOME</p></td>
+<td class="tableblock halign-left valign-top" ><p class="tableblock monospaced">Megatest home area</p></td>
+</tr>
+<tr>
+<td class="tableblock halign-center valign-top" ><p class="tableblock">MT_TESTSUITENAME</p></td>
+<td class="tableblock halign-left valign-top" ><p class="tableblock monospaced">The name of this testsuite or area</p></td>
+</tr>
+<tr>
+<td class="tableblock halign-center valign-top" ><p class="tableblock">MT_TEST_NAME</p></td>
+<td class="tableblock halign-left valign-top" ><p class="tableblock monospaced">The name of this test</p></td>
+</tr>
+<tr>
+<td class="tableblock halign-center valign-top" ><p class="tableblock">MT_ITEM_INFO</p></td>
+<td class="tableblock halign-left valign-top" ><p class="tableblock monospaced">The variable and values for the test item</p></td>
+</tr>
+<tr>
+<td class="tableblock halign-center valign-top" ><p class="tableblock">MT_MEGATEST</p></td>
+<td class="tableblock halign-left valign-top" ><p class="tableblock monospaced">Which Megatest binary is being used by this area</p></td>
+</tr>
+<tr>
+<td class="tableblock halign-center valign-top" ><p class="tableblock">MT_TARGET</p></td>
+<td class="tableblock halign-left valign-top" ><p class="tableblock monospaced">The target variable values, separated by <em>/</em></p></td>
+</tr>
+<tr>
+<td class="tableblock halign-center valign-top" ><p class="tableblock">MT_LINKTREE</p></td>
+<td class="tableblock halign-left valign-top" ><p class="tableblock monospaced">The base of the link tree where all run tests can be found</p></td>
+</tr>
+<tr>
+<td class="tableblock halign-center valign-top" ><p class="tableblock">MT_ITEMPATH</p></td>
+<td class="tableblock halign-left valign-top" ><p class="tableblock monospaced">The values of the item path variables, separated by <em>/</em></p></td>
+</tr>
+<tr>
+<td class="tableblock halign-center valign-top" ><p class="tableblock">MT_RUNNAME</p></td>
+<td class="tableblock halign-left valign-top" ><p class="tableblock monospaced">The name of the run</p></td>
+</tr>
+</tbody>
+</table>
 </div>
 <div class="sect2">
 <h3 id="_override_the_toplevel_html_file">Override the Toplevel HTML File</h3>
 <div class="paragraph"><p>Megatest generates a simple html file summary for top level tests of
 iterated tests. The generation can be overridden. NOTE: the output of
@@ -2011,11 +2108,11 @@
 <div class="paragraph"><p>These routines can be called from the megatest repl.</p></div>
 <table class="tableblock frame-topbot grid-all"
 style="
 width:70%;
 ">
-<caption class="title">Table 4. API Keys Related Calls</caption>
+<caption class="title">Table 5. API Keys Related Calls</caption>
 <col style="width:14%;">
 <col style="width:28%;">
 <col style="width:28%;">
 <col style="width:28%;">
 <thead>
@@ -2063,10 +2160,10 @@
 </div>
 <div id="footnotes"><hr></div>
 <div id="footer">
 <div id="footer-text">
 Version 1.0<br>
-Last updated 2016-12-12 13:03:08 PST
+Last updated 2017-05-15 15:18:21 PDT
 </div>
 </div>
 </body>
 </html>

Index: docs/manual/megatest_manual.txt
==================================================================
--- docs/manual/megatest_manual.txt
+++ docs/manual/megatest_manual.txt
@@ -24,18 +24,17 @@
 qualification.
 
 Megatest Design Philosophy
 --------------------------
 
-Megatest is intended to provide the minimum needed resources to make
-writing a suite of tests and tasks for implementing continuous build
-for software, design engineering or process control (via owlfs for
-example) without being specialized for any specific problem
-space. Megatest in of itself does not know what constitutes a PASS or
-FAIL of a test or task. In most cases megatest is best used in
-conjunction with logpro or a similar tool to parse, analyze and decide
-on the test outcome. 
+Megatest is a distributed system intended to provide the minimum needed
+resources to make writing a suite of tests and tasks for implementing
+continuous build for software, design engineering or process control (via
+owlfs for example) without being specialized for any specific problem
+space. Megatest in of itself does not know what constitutes a PASS or FAIL
+of a test or task. In most cases megatest is best used in conjunction with
+logpro or a similar tool to parse, analyze and decide on the test outcome.
 
  * Self-checking -Repeatable strive for directed or self-checking test
    as opposed to delta based tests
 
  * Traceable - environment variables, host OS and other possibly influential

Index: docs/manual/reference.txt
==================================================================
--- docs/manual/reference.txt
+++ docs/manual/reference.txt
@@ -67,11 +67,13 @@
 VAR1 VAL2
 -------------------------
 
 Can use either wildcard of "%" or a regular expression:
 
+-------------------------
 [/abc.*def/]
+-------------------------
 
 Disk Space Checks
 ^^^^^^^^^^^^^^^^^
 
 Some parameters you can put in the [setup] section of megatest.config:
@@ -143,10 +145,22 @@
 .In megatest.config
 ------------------
 [setup]
 reruns 5
 ------------------
+
+Replace the default blacklisted environment variables with user supplied
+list.
+
+Default list: USER HOME DISPLAY LS_COLORS XKEYSYMDB EDITOR MAKEFLAGS MAKEF MAKEOVERRIDES
+
+.Add a "bad" variable "PROMPT" to the variables that will be commented out
+in the megatest.sh and megatest.csh files:
+-----------------
+[setup]
+blacklistvars USER HOME DISPLAY LS_COLORS XKEYSYMDB EDITOR MAKEFLAGS PROMPT
+-----------------
 
 Run time limit
 ++++++++++++++
 
 -----------------
@@ -468,11 +482,11 @@
 ----------------------------
 
 Triggers
 ~~~~~~~~
 
-In your testconfig triggers can be specified 
+In your testconfig or megatest.config triggers can be specified 
 
 -----------------
 [triggers]
 
 # Call script running.sh when test goes to state=RUNNING, status=PASS
@@ -483,11 +497,11 @@
 
 # Call script onpass.sh any time status goes to PASS
 PASS/ onpass.sh
 -----------------
 
-Scripts called will have; test-id test-rundir trigger, added to the commandline.
+Scripts called will have; test-id test-rundir trigger test-name item-path state status event-time, added to the commandline.
 
 HINT
 
 To start an xterm (useful for debugging), use a command line like the following:
 
@@ -495,10 +509,33 @@
 [triggers]
 COMPLETED/ xterm -e bash -s -- 
 -----------------
 
 NOTE: There is a trailing space after the --
+
+There are a number of environment variables available to the trigger script
+but since triggers can be called in various contexts not all variables are
+available at all times. The trigger script should check for the variable and
+fail gracefully if it doesn't exist.
+
+.Environment variables visible to the trigger script
+[width="90%",cols="^,2m",frame="topbot",options="header"]
+|======================
+|Variable             | Purpose            
+| MT_TEST_RUN_DIR     | The directory where Megatest ran this test                   
+| MT_CMDINFO          | Encoded command data for the test                   
+| MT_DEBUG_MODE       | Used to pass the debug mode to nested calls to Megatest                   
+| MT_RUN_AREA_HOME    | Megatest home area 
+| MT_TESTSUITENAME    | The name of this testsuite or area                   
+| MT_TEST_NAME        | The name of this test
+| MT_ITEM_INFO        | The variable and values for the test item
+| MT_MEGATEST         | Which Megatest binary is being used by this area
+| MT_TARGET           | The target variable values, separated by '/'
+| MT_LINKTREE         | The base of the link tree where all run tests can be found
+| MT_ITEMPATH         | The values of the item path variables, separated by '/'
+| MT_RUNNAME          | The name of the run
+|======================
 
 
 Override the Toplevel HTML File
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 

ADDED   docs/pkts.dot
Index: docs/pkts.dot
==================================================================
--- /dev/null
+++ docs/pkts.dot
@@ -0,0 +1,59 @@
+digraph megatest_pkts {
+  ranksep=0.05
+  // rankdir=LR
+
+node [shape=box,style=filled];
+  
+  "SENSORS" [ label = "{ Sensor Processing | { file | git | fossil | script }}"
+	      shape = "record"; ];
+  
+  "RUNS"    [ label = "{ Runs Processing | { launch | clean | re-run | archive } | { dispatcher }}";
+	      shape = "record"; ];
+
+  "WORK"    [ label = "{ Work Items | { start task | task competed }}";
+	      shape = "record"; ];
+
+  "USERREQ" [ label = "{ User Requests (Unix and Web) | { launch | clean | re-run | archive }}";
+	      shape = "record"; ];
+
+  "MTAREA1" [ label = "{ Megatest Area 1 | { parallel job\nmanagement | test\nmanagement | data\nrollup }}";
+	      shape = "record"; ];
+  
+  "MTAREA2" [ label = "{ Megatest Area 2 | { parallel job\nmanagement | test\nmanagement | data\nrollup }}";
+	      shape = "record"; ];
+  
+  "MTAREA3" [ label = "More Megatest Areas ... ";
+	      shape = "record"; ];
+
+  "PGDB"    [ label = "postgres database";
+	      shape = "cylinder"; ];
+
+  "WEBAPP"  [ label = "{ Web View | { Runs | Contours | Control | Time View }}";
+	      shape = "record"; ];
+
+  // "WEBCTRL" [ label = "{ Web View \n(control) }";
+  //	      shape = "record"; ];
+  
+  "SENSORS" -> "SPKTS";
+  "RUNS"    -> "run pkts";
+  "run pkts" -> "RUNS";
+  "WORK"    -> "work pkts";
+  "work pkts" -> "RUNS";
+  "USERREQ" -> "user request pkts";
+  "SPKTS"   -> "RUNS";
+  "user request pkts" -> "RUNS";
+  "RUNS"    -> "MTAREA1" -> "PGDB";
+  "RUNS"    -> "MTAREA2" -> "PGDB";
+  "RUNS"    -> "MTAREA3" -> "PGDB";
+  "PGDB"    -> "WEBAPP";
+  // "WEBCTRL" -> "run pkts";
+  
+  subgraph cluster_pkts {
+    label="Packets";
+    "SPKTS" [ label = "Sensor Packets" ];
+    "run pkts";
+    "work pkts";
+    "user request pkts";
+  }
+}
+

ADDED   docs/pkts.pdf
Index: docs/pkts.pdf
==================================================================
--- /dev/null
+++ docs/pkts.pdf
cannot compute difference between binary files

Index: docs/plan.txt
==================================================================
--- docs/plan.txt
+++ docs/plan.txt
@@ -8,44 +8,45 @@
 
 Goals
 ^^^^^
 
 . Reduce load on the file system. Sqlite3 files on network filesystem can be
-  a burden.
+  a burden. [green]#[DONE]#
 . Reduce number of servers and frequency of start/stop. This is mostly an
-  issue of clutter but also a reduction in "moving parts".
+  issue of clutter but also a reduction in "moving parts". [green]#[DONE]#
 . Coalesce activities to a single home host where possible. Give the user
   feedback that they have started the dashboard on a host other than the
-  home host.
+  home host. [green]#[DONE]#
 . Reduce number of processes involved in managing running tests.
 
 Changes Needed
 ^^^^^^^^^^^^^^
 
 . ACID compliant db will be on /tmp and synced to megatest.db with a five
-  second max delay.
+  second max delay. [green]#[DONE]#
 . Read/writes to db for processes on homehost will go direct to /tmp
-  megatest.db file.
+  megatest.db file. [green]#[DONE]#
 . Read/wites fron non-homehost processes will go through one server. Bulk
   reads (e.g. for dashboard or list-runs) will be cached on the current host
-  in /tmp and synced from the home megatest.db in the testsuite area.
-. Db syncs rely on the target db file timestame minus some margin.
+  in /tmp and synced from the home megatest.db in the testsuite area. [green]#[DONE]#
+. Db syncs rely on the target db file timestame minus some margin. [green]#[DONE]#
 . Since bulk reads do not use the server we can switch to simple RPC for the
-  network transport.
+  network transport. [green]#[DONE]#
 . Test running manager process extended to manage multiple running tests.
 
 Current Items
 ~~~~~~~~~~~~~
 
 ww05 - migrate to inmem-db
 ^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-. Switch to inmem db with fast sync to on disk db's [DONE]
+. Switch to inmem db with fast sync to on disk db's [green]#[DONE]#
 . Server polls tasks table for next action
-.. Task table used for tracking runner process [DONE]
-.. Task table used for jobs to run
-.. Task table used for queueing runner actions (remove runs, cleanRunExecute, etc)
+.. Task table used for tracking runner process [red]#[Replaced by mtutil]#
+.. Task table used for jobs to run [red]#[Replaced by mtutil]#
+.. Task table used for queueing runner actions (remove runs,
+   cleanRunExecute, etc)  [red]#[Replaced by mtutil#]
 
 
 // ww32
 // ~~~~
 // 

ADDED   emergency-patch-1.scm
Index: emergency-patch-1.scm
==================================================================
--- /dev/null
+++ emergency-patch-1.scm
@@ -0,0 +1,203 @@
+
+
+;; These are called by the server on recipt of /api calls
+;;    - keep it simple, only return the actual result of the call, i.e. no meta info here
+;;
+;;    - returns #( flag result )
+;;
+(define (api:execute-requests dbstruct dat)
+  (handle-exceptions
+   exn
+   (let ((call-chain (get-call-chain)))
+     (debug:print 0 *default-log-port* "WARNING: api:execute-requests received an exception from peer, dat=" dat)
+     (print-call-chain (current-error-port))
+     (debug:print 0 *default-log-port* " message: "  ((condition-property-accessor 'exn 'message) exn))       
+     (vector #f (vector exn call-chain dat))) ;; return some stuff for debug if an exception happens
+   (cond
+    ((not (vector? dat))                    ;; it is an error to not receive a vector
+     (vector #f (vector #f "remote must be called with a vector")))
+    ((> *api-process-request-count* 20) ;; 20)
+     (debug:print 0 *default-log-port* "WARNING: api:execute-requests received an overloaded message.")
+     (set! *server-overloaded* #t)
+     (vector #f (vector #f 'overloaded))) ;; the inner vector is what gets returned. nope, don't know why. please refactor!
+    (else  
+     (let* ((cmd-in            (vector-ref dat 0))
+            (cmd               (if (symbol? cmd-in)
+				   cmd-in
+				   (string->symbol cmd-in)))
+            (params            (vector-ref dat 1))
+            (start-t           (current-milliseconds))
+            (readonly-mode     (dbr:dbstruct-read-only dbstruct))
+            (readonly-command  (member cmd api:read-only-queries))
+            (writecmd-in-readonly-mode (and readonly-mode (not readonly-command)))
+            (res    
+             (if writecmd-in-readonly-mode
+                 (conc "attempt to run write command "cmd" on a read-only database")
+                 (case cmd
+                   ;;===============================================
+                   ;; READ/WRITE QUERIES
+                   ;;===============================================
+
+                   ((get-keys-write)                        (db:get-keys dbstruct)) ;; force a dummy "write" query to force server; for debug in -repl
+                   
+                   ;; SERVERS
+                   ((start-server)                    (apply server:kind-run params))
+                   ((kill-server)                     (set! *server-run* #f))
+
+                   ;; TESTS
+
+                   ;;((test-set-state-status-by-id)     (apply mt:test-set-state-status-by-id dbstruct params))
+                   ;;BB - commented out above because it was calling below, eventually, incorrectly (dbstruct passed to mt:test-set-state-status-by-id, which previosly did more, but now only passes thru to db:set-state-status-and-roll-up-items.
+                   ((test-set-state-status-by-id)
+
+                    ;; (define (db:set-state-status-and-roll-up-items dbstruct run-id test-name item-path state status comment)
+                    (db:set-state-status-and-roll-up-items
+                     dbstruct
+                     (list-ref params 0) ; run-id
+                     (list-ref params 1) ; test-name
+                     #f                  ; item-path
+                     (list-ref params 2) ; state
+                     (list-ref params 3) ; status
+                     (list-ref params 4) ; comment
+                     ))
+                   
+                   ((delete-test-records)             (apply db:delete-test-records dbstruct params))
+                   ((delete-old-deleted-test-records) (apply db:delete-old-deleted-test-records dbstruct params))
+                   ((test-set-state-status)           (apply db:test-set-state-status dbstruct params))
+                   ((test-set-top-process-pid)        (apply db:test-set-top-process-pid dbstruct params))
+                   ((set-state-status-and-roll-up-items) (apply db:set-state-status-and-roll-up-items dbstruct params))
+                   ((top-test-set-per-pf-counts)      (apply db:top-test-set-per-pf-counts dbstruct params))
+                   ((test-set-archive-block-id)       (apply db:test-set-archive-block-id dbstruct params))
+
+                   ;; RUNS
+                   ((register-run)                 (apply db:register-run dbstruct params))
+                   ((set-tests-state-status)       (apply db:set-tests-state-status dbstruct params))
+                   ((delete-run)                   (apply db:delete-run dbstruct params))
+                   ((lock/unlock-run)              (apply db:lock/unlock-run dbstruct params))
+                   ((update-run-event_time)        (apply db:update-run-event_time dbstruct params))
+                   ((update-run-stats)             (apply db:update-run-stats dbstruct params))
+                   ((set-var)                      (apply db:set-var dbstruct params))
+                   ((del-var)                      (apply db:del-var dbstruct params))
+
+                   ;; STEPS
+                   ((teststep-set-status!)         (apply db:teststep-set-status! dbstruct params))
+
+                   ;; TEST DATA
+                   ((test-data-rollup)             (apply db:test-data-rollup dbstruct params))
+                   ((csv->test-data)               (apply db:csv->test-data dbstruct params))
+
+                   ;; MISC
+                   ((sync-inmem->db)               (let ((run-id (car params)))
+                                                     (db:sync-touched dbstruct run-id force-sync: #t)))
+                   ((mark-incomplete)              (apply db:find-and-mark-incomplete dbstruct params))
+
+                   ;; TESTMETA
+                   ((testmeta-add-record)       (apply db:testmeta-add-record dbstruct params))
+                   ((testmeta-update-field)     (apply db:testmeta-update-field dbstruct params))
+                   ((get-tests-tags)            (db:get-tests-tags dbstruct))
+
+                   ;; TASKS
+                   ((tasks-add)                 (apply tasks:add dbstruct params))   
+                   ((tasks-set-state-given-param-key) (apply tasks:set-state-given-param-key dbstruct params))
+                   ((tasks-get-last)            (apply tasks:get-last dbstruct params))
+
+		   ;; NO SYNC DB
+		   ((no-sync-set)               (apply db:no-sync-set         *no-sync-db* params))
+		   ((no-sync-get/default)       (apply db:no-sync-get/default *no-sync-db* params))
+		   ((no-sync-del!)              (apply db:no-sync-del!        *no-sync-db* params))
+		 
+                   ;; ARCHIVES
+                   ;; ((archive-get-allocations)   
+                   ((archive-register-disk)     (apply db:archive-register-disk dbstruct params))
+                   ((archive-register-block-name)(apply db:archive-register-block-name dbstruct params))
+                   ((archive-allocate-testsuite/area-to-block)(apply db:archive-allocate-testsuite/area-to-block dbstruct block-id testsuite-name areakey))
+
+                   ;;======================================================================
+                   ;; READ ONLY QUERIES
+                   ;;======================================================================
+
+                   ;; KEYS
+                   ((get-key-val-pairs)               (apply db:get-key-val-pairs dbstruct params))
+                   ((get-keys)                        (db:get-keys dbstruct))
+                   ((get-key-vals)                    (apply db:get-key-vals dbstruct params))
+                   ((get-target)                      (apply db:get-target dbstruct params))
+                   ((get-targets)                     (db:get-targets dbstruct))
+
+                   ;; ARCHIVES
+                   ((test-get-archive-block-info)     (apply db:test-get-archive-block-info dbstruct params))
+                   
+                   ;; TESTS
+                   ((test-toplevel-num-items)         (apply db:test-toplevel-num-items dbstruct params))
+                   ((get-test-info-by-id)	       (apply db:get-test-info-by-id dbstruct params))
+                   ((test-get-rundir-from-test-id)    (apply db:test-get-rundir-from-test-id dbstruct params))
+                   ((get-count-tests-running-for-testname) (apply db:get-count-tests-running-for-testname dbstruct params))
+                   ((get-count-tests-running)         (apply db:get-count-tests-running dbstruct params))
+                   ((get-count-tests-running-in-jobgroup) (apply db:get-count-tests-running-in-jobgroup dbstruct params))
+                   ;; ((delete-test-step-records)        (apply db:delete-test-step-records dbstruct params))
+                   ((get-previous-test-run-record)    (apply db:get-previous-test-run-record dbstruct params))
+                   ((get-matching-previous-test-run-records)(apply db:get-matching-previous-test-run-records dbstruct params))
+                   ((test-get-logfile-info)           (apply db:test-get-logfile-info dbstruct params))
+                   ((test-get-records-for-index-file)  (apply db:test-get-records-for-index-file dbstruct params))
+                   ((get-testinfo-state-status)       (apply db:get-testinfo-state-status dbstruct params))
+                   ((test-get-top-process-pid)        (apply db:test-get-top-process-pid dbstruct params))
+                   ((test-get-paths-matching-keynames-target-new) (apply db:test-get-paths-matching-keynames-target-new dbstruct params))
+                   ((get-prereqs-not-met)             (apply db:get-prereqs-not-met dbstruct params))
+                   ((get-count-tests-running-for-run-id) (apply db:get-count-tests-running-for-run-id dbstruct params))
+                   ((synchash-get)                    (apply synchash:server-get dbstruct params))
+                   ((get-raw-run-stats)               (apply db:get-raw-run-stats dbstruct params))
+
+                   ;; RUNS
+                   ((get-run-info)                 (apply db:get-run-info dbstruct params))
+                   ((get-run-status)               (apply db:get-run-status dbstruct params))
+                   ((set-run-status)               (apply db:set-run-status dbstruct params))
+                   ((get-tests-for-run)            (apply db:get-tests-for-run dbstruct params))
+                   ((get-test-id)                  (apply db:get-test-id dbstruct params))
+                   ((get-tests-for-run-mindata)    (apply db:get-tests-for-run-mindata dbstruct params))
+                   ((get-runs)                     (apply db:get-runs dbstruct params))
+                   ((get-num-runs)                 (apply db:get-num-runs dbstruct params))
+                   ((get-all-run-ids)              (db:get-all-run-ids dbstruct))
+                   ((get-prev-run-ids)             (apply db:get-prev-run-ids dbstruct params))
+                   ((get-run-ids-matching-target)  (apply db:get-run-ids-matching-target dbstruct params))
+                   ((get-runs-by-patt)             (apply db:get-runs-by-patt dbstruct params))
+                   ((get-run-name-from-id)         (apply db:get-run-name-from-id dbstruct params))
+                   ((get-main-run-stats)           (apply db:get-main-run-stats dbstruct params))
+                   ((get-var)                      (apply db:get-var dbstruct params))
+                   ((get-run-stats)                (apply db:get-run-stats dbstruct params))
+
+                   ;; STEPS
+                   ((get-steps-data)               (apply db:get-steps-data dbstruct params))
+                   ((get-steps-for-test)           (apply db:get-steps-for-test dbstruct params))
+
+                   ;; TEST DATA
+                   ((read-test-data)               (apply db:read-test-data dbstruct params))
+                   ((read-test-data*)              (apply db:read-test-data* dbstruct params))
+
+                   ;; MISC
+                   ((get-latest-host-load)         (apply db:get-latest-host-load dbstruct params))
+                   ((have-incompletes?)            (apply db:have-incompletes? dbstruct params))
+                   ((login)                        (apply db:login dbstruct params))
+                   ((general-call)                 (let ((stmtname   (car params))
+                                                         (run-id     (cadr params))
+                                                         (realparams (cddr params)))
+                                                     (db:general-call dbstruct stmtname realparams)))
+                   ((sdb-qry)                      (apply sdb:qry params))
+                   ((ping)                         (current-process-id))
+		   ((get-changed-record-ids)       (apply db:get-changed-record-ids dbstruct params))
+		   
+                   ;; TESTMETA
+                   ((testmeta-get-record)       (apply db:testmeta-get-record dbstruct params))
+
+                   ;; TASKS 
+                   ((find-task-queue-records)   (apply tasks:find-task-queue-records dbstruct params))
+		   (else
+		    (debug:print 0 *default-log-port* "ERROR: bad api call " cmd)
+		    (conc "ERROR: BAD api call " cmd))))))
+       
+       ;; save all stats
+       (let ((delta-t (- (current-milliseconds)
+			 start-t)))
+	 (hash-table-set! *db-api-call-time* cmd
+			  (cons delta-t (hash-table-ref/default *db-api-call-time* cmd '()))))
+       (if writecmd-in-readonly-mode
+	   (vector #f res)
+           (vector #t res)))))))

ADDED   emergency-patch-2.scm
Index: emergency-patch-2.scm
==================================================================
--- /dev/null
+++ emergency-patch-2.scm
@@ -0,0 +1,311 @@
+(include "common_records.scm")
+(include "key_records.scm")
+(include "db_records.scm")
+(include "run_records.scm")
+(include "test_records.scm")
+
+(define (common:wait-for-cpuload maxload numcpus waitdelay #!key (count 1000) (msg #f)(remote-host #f))
+  (let* ((loadavg (common:get-cpu-load remote-host))
+	 (first   (car loadavg))
+	 (next    (cadr loadavg))
+	 (adjload (* maxload numcpus))
+	 (loadjmp (- first next)))
+    (cond
+     ((and (> first adjload)
+	   (> count 0))
+      (debug:print-info 0 *default-log-port* "waiting " waitdelay " seconds due to load " first " exceeding max of " adjload " " (if msg msg ""))
+      (thread-sleep! waitdelay)
+      (common:wait-for-cpuload maxload numcpus waitdelay count: (- count 1)))
+     ((and (> loadjmp numcpus)
+	   (> count 0))
+      (debug:print-info 0 *default-log-port* "waiting " waitdelay " seconds due to load jump " loadjmp " > numcpus " numcpus (if msg msg ""))
+      (thread-sleep! waitdelay)
+      (common:wait-for-cpuload maxload numcpus waitdelay count: (- count 1))))))
+
+(define (common:wait-for-homehost-load maxload msg)
+  (let* ((hh-dat (if (common:on-homehost?) ;; if we are on the homehost then pass in #f so the calls are local.
+                     #f
+                     (common:get-homehost)))
+         (hh     (if hh-dat (car hh-dat) #f))
+         (numcpus (common:get-num-cpus hh)))
+    (common:wait-for-normalized-load maxload msg: msg remote-host: hh)))
+
+;; wait for normalized cpu load to drop below maxload
+;;
+(define (common:wait-for-normalized-load maxload #!key (msg #f)(remote-host #f))
+  (let ((num-cpus (common:get-num-cpus remote-host)))
+    (common:wait-for-cpuload maxload num-cpus 15 msg: msg remote-host: remote-host)))
+
+;;  hed tal reg reruns reglen regfull test-record runname test-name item-path jobgroup max-concurrent-jobs run-id waitons item-path testmode test-patts required-tests test-registry registry-mutex flags keyvals run-info newtal all-tests-registry itemmaps)
+(define (runs:process-expanded-tests runsdat testdat)
+  ;; unroll the contents of runsdat and testdat (due to ongoing refactoring).
+  (let* ((hed                    (runs:testdat-hed testdat))
+	 (tal                    (runs:testdat-tal testdat))
+	 (reg                    (runs:testdat-reg testdat))
+	 (reruns                 (runs:testdat-reruns testdat))
+	 (test-name              (runs:testdat-test-name testdat))
+	 (item-path              (runs:testdat-item-path testdat))
+	 (jobgroup               (runs:testdat-jobgroup testdat))
+	 (waitons                (runs:testdat-waitons testdat))
+	 (item-path              (runs:testdat-item-path testdat))
+	 (testmode               (runs:testdat-testmode testdat))
+	 (newtal                 (runs:testdat-newtal testdat))
+	 (itemmaps               (runs:testdat-itemmaps testdat))
+	 (test-record            (runs:testdat-test-record testdat))
+	 (prereqs-not-met        (runs:testdat-prereqs-not-met testdat))
+
+	 (reglen                 (runs:dat-reglen runsdat))
+	 (regfull                (runs:dat-regfull runsdat))
+	 (runname                (runs:dat-runname runsdat))
+	 (max-concurrent-jobs    (runs:dat-max-concurrent-jobs runsdat))
+	 (run-id                 (runs:dat-run-id runsdat))
+	 (test-patts             (runs:dat-test-patts runsdat))
+	 (required-tests         (runs:dat-required-tests runsdat))
+	 (test-registry          (runs:dat-test-registry runsdat))
+	 (registry-mutex         (runs:dat-registry-mutex runsdat))
+	 (flags                  (runs:dat-flags runsdat))
+	 (keyvals                (runs:dat-keyvals runsdat))
+	 (run-info               (runs:dat-run-info runsdat))
+	 (all-tests-registry     (runs:dat-all-tests-registry runsdat))
+	 (run-limits-info        (runs:dat-can-run-more-tests runsdat))
+	 ;; (runs:can-run-more-tests run-id jobgroup max-concurrent-jobs)) ;; look at the test jobgroup and tot jobs running
+	 (have-resources         (car run-limits-info))
+	 (num-running            (list-ref run-limits-info 1))
+	 (num-running-in-jobgroup(list-ref run-limits-info 2)) 
+	 (max-concurrent-jobs    (list-ref run-limits-info 3))
+	 (job-group-limit        (list-ref run-limits-info 4))
+	 ;; (prereqs-not-met        (rmt:get-prereqs-not-met run-id waitons hed item-path mode: testmode itemmaps: itemmaps))
+	 ;; (prereqs-not-met         (mt:lazy-get-prereqs-not-met run-id waitons item-path mode: testmode itemmap: itemmap))
+	 (fails                  (if (list? prereqs-not-met)
+				      (runs:calc-fails prereqs-not-met)
+				      (begin
+					(debug:print-error 0 *default-log-port* "prereqs-not-met is not a list! " prereqs-not-met)
+					'())))
+	 (non-completed           (filter (lambda (x)             ;; remove hed from not completed list, duh, of course it is not completed!
+					    (not (equal? x hed)))
+					  (runs:calc-not-completed prereqs-not-met)))
+	 (loop-list               (list hed tal reg reruns))
+	 ;; configure the load runner
+	 (numcpus                 (common:get-num-cpus #f))
+	 (maxload                 (string->number (or (configf:lookup *configdat* "jobtools" "maxload") "3.0")))         ;; use a non-number string to disable
+         (maxhomehostload         (string->number (or (configf:lookup *configdat* "jobtools" "maxhomehostload") "1.2"))) ;; use a non-number string to disable
+         (waitdelay               (string->number (or (configf:lookup *configdat* "jobtools" "waitdelay") "60"))))
+    (debug:print-info 4 *default-log-port* "have-resources: " have-resources " prereqs-not-met: (" 
+		      (string-intersperse 
+		       (map (lambda (t)
+			      (if (vector? t)
+				  (conc (db:test-get-state t) "/" (db:test-get-status t))
+				  (conc " WARNING: t is not a vector=" t )))
+			    prereqs-not-met)
+		       ", ") ") fails: " fails
+		       "\nregistered? " (hash-table-ref/default test-registry (db:test-make-full-name test-name item-path) #f))
+			    
+
+    
+    (if (and (not (null? prereqs-not-met))
+	     (runs:lownoise (conc "waiting on tests " prereqs-not-met hed) 60))
+	(debug:print-info 2 *default-log-port* "waiting on tests; " (string-intersperse (runs:mixed-list-testname-and-testrec->list-of-strings prereqs-not-met) ", ")))
+
+    ;; Don't know at this time if the test have been launched at some time in the past
+    ;; i.e. is this a re-launch?
+    (debug:print-info 4 *default-log-port* "run-limits-info = " run-limits-info)
+    
+    (cond
+     
+     ;; Check item path against item-patts, 
+     ;;
+     ((not (tests:match test-patts (tests:testqueue-get-testname test-record) item-path required: required-tests)) ;; This test/itempath is not to be run
+      ;; else the run is stuck, temporarily or permanently
+      ;; but should check if it is due to lack of resources vs. prerequisites
+      (debug:print-info 1 *default-log-port* "Skipping " (tests:testqueue-get-testname test-record) " " item-path " as it doesn't match " test-patts)
+      (if (or (not (null? tal))(not (null? reg)))
+	  (list (runs:queue-next-hed tal reg reglen regfull)
+		(runs:queue-next-tal tal reg reglen regfull)
+		(runs:queue-next-reg tal reg reglen regfull)
+		reruns)
+	  #f))
+     
+     ;; Register tests 
+     ;;
+     ((not (hash-table-ref/default test-registry (db:test-make-full-name test-name item-path) #f))
+      (debug:print-info 4 *default-log-port* "Pre-registering test " test-name "/" item-path " to create placeholder" )
+      ;; always do firm registration now in v1.60 and greater ;; (eq? *transport-type* 'fs) ;; no point in parallel registration if use fs
+      (let register-loop ((numtries 15))
+	(rmt:register-test run-id test-name item-path)
+	(if (rmt:get-test-id run-id test-name item-path)
+	    (hash-table-set! test-registry (db:test-make-full-name test-name item-path) 'done)
+	    (if (> numtries 0)
+		(begin
+		  (thread-sleep! 0.5)
+		  (register-loop (- numtries 1)))
+		(debug:print-error 0 *default-log-port* "failed to register test " (db:test-make-full-name test-name item-path)))))
+      (if (not (eq? (hash-table-ref/default test-registry (db:test-make-full-name test-name "") #f) 'done))
+	  (begin
+	    (rmt:register-test run-id test-name "")
+	    (if (rmt:get-test-id run-id test-name "")
+		(hash-table-set! test-registry (db:test-make-full-name test-name "") 'done))))
+      (runs:shrink-can-run-more-tests-count runsdat)   ;; DELAY TWEAKER (still needed?)
+      (if (and (null? tal)(null? reg))
+	  (list hed tal (append reg (list hed)) reruns)
+	  (list (runs:queue-next-hed tal reg reglen regfull)
+		(runs:queue-next-tal tal reg reglen regfull)
+		;; NB// Here we are building reg as we register tests
+		;; if regfull we must pop the front item off reg
+		(if regfull
+		    (append (cdr reg) (list hed))
+		    (append reg (list hed)))
+		reruns)))
+     
+     ;; At this point hed test registration must be completed.
+     ;;
+     ((eq? (hash-table-ref/default test-registry (db:test-make-full-name test-name item-path) #f)
+	   'start)
+      (debug:print-info 0 *default-log-port* "Waiting on test registration(s): "
+			(string-intersperse 
+			 (filter (lambda (x)
+				   (eq? (hash-table-ref/default test-registry x #f) 'start))
+				 (hash-table-keys test-registry))
+			 ", "))
+      (thread-sleep! 0.051)
+      (list hed tal reg reruns))
+     
+     ;; If no resources are available just kill time and loop again
+     ;;
+     ((not have-resources) ;; simply try again after waiting a second
+      (if (runs:lownoise "no resources" 60)
+	  (debug:print-info 1 *default-log-port* "no resources to run new tests, waiting ..."))
+      ;; Have gone back and forth on this but db starvation is an issue.
+      ;; wait one second before looking again to run jobs.
+      (thread-sleep! 1)
+      ;; could have done hed tal here but doing car/cdr of newtal to rotate tests
+      (list (car newtal)(cdr newtal) reg reruns))
+     
+     ;; This is the final stage, everything is in place so launch the test
+     ;;
+     ((and have-resources
+	   (or (null? prereqs-not-met)
+	       (and (member 'toplevel testmode) ;;  'toplevel)
+		    (null? non-completed)
+		    (not (member 'exclusive testmode)))))
+      ;; (hash-table-delete! *max-tries-hash* (db:test-make-full-name test-name item-path))
+      ;; we are going to reset all the counters for test retries by setting a new hash table
+      ;; this means they will increment only when nothing can be run
+      (set! *max-tries-hash* (make-hash-table))
+      ;; well, first lets see if cpu load throttling is enabled. If so wait around until the
+      ;; average cpu load is under the threshold before continuing
+      (if maxload ;; only gate if maxload is specified
+          (common:wait-for-cpuload maxload numcpus waitdelay))
+      (if maxhomehostload
+          (common:wait-for-homehost-load maxhomehostload (conc "Waiting for homehost load to drop below normalized value of " maxhomehostload)))
+      
+      (run:test run-id run-info keyvals runname test-record flags #f test-registry all-tests-registry)
+      (runs:incremental-print-results run-id)
+      (hash-table-set! test-registry (db:test-make-full-name test-name item-path) 'running)
+      (runs:shrink-can-run-more-tests-count runsdat)  ;; DELAY TWEAKER (still needed?)
+      ;; (thread-sleep! *global-delta*)
+      (if (or (not (null? tal))(not (null? reg)))
+	  (list (runs:queue-next-hed tal reg reglen regfull)
+		(runs:queue-next-tal tal reg reglen regfull)
+		(runs:queue-next-reg tal reg reglen regfull)
+		reruns)
+	  #f))
+     
+     ;; must be we have unmet prerequisites
+     ;;
+     (else
+      (debug:print 4 *default-log-port* "FAILS: " fails)
+      ;; If one or more of the prereqs-not-met are FAIL then we can issue
+      ;; a message and drop hed from the items to be processed.
+      ;; (runs:mixed-list-testname-and-testrec->list-of-strings prereqs-not-met)
+      (if (and (not (null? prereqs-not-met))
+	       (runs:lownoise (conc "waiting on tests " prereqs-not-met hed) 60))
+	  (debug:print-info 1 *default-log-port* "waiting on tests; " (string-intersperse 
+						    (runs:mixed-list-testname-and-testrec->list-of-strings 
+						     prereqs-not-met) ", ")))
+      (if (or (null? fails)
+	      (member 'toplevel testmode))
+	  (begin
+	    ;; couldn't run, take a breather
+	    (if  (runs:lownoise "Waiting for more work to do..." 60)
+		 (debug:print-info 0 *default-log-port* "Waiting for more work to do..."))
+	    (thread-sleep! 1)
+	    (list (car newtal)(cdr newtal) reg reruns))
+	  ;; the waiton is FAIL so no point in trying to run hed ever again
+	  (if (or (not (null? reg))(not (null? tal)))
+	      (if (vector? hed)
+		  (begin
+		    (debug:print 1 *default-log-port* "WARNING: Dropping test " test-name "/" item-path
+				 " from the launch list as it has prerequistes that are FAIL")
+		    (let ((test-id (rmt:get-test-id run-id hed "")))
+		      (if test-id (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_FAIL" "Failed to run due to failed prerequisites")))
+		    (runs:shrink-can-run-more-tests-count runsdat) ;; DELAY TWEAKER (still needed?)
+		    ;; (thread-sleep! *global-delta*)
+		    ;; This next is for the items
+		    (mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "BLOCKED" #f)
+		    (hash-table-set! test-registry (db:test-make-full-name test-name item-path) 'removed)
+		    (list (runs:queue-next-hed tal reg reglen regfull)
+			  (runs:queue-next-tal tal reg reglen regfull)
+			  (runs:queue-next-reg tal reg reglen regfull)
+			  reruns ;; WAS: (cons hed reruns) ;; but that makes no sense?
+			  ))
+		  (let ((nth-try (hash-table-ref/default test-registry hed 0)))
+		    (cond
+		     ((member "RUNNING" (map db:test-get-state prereqs-not-met))
+		      (if (runs:lownoise (conc "possible RUNNING prerequistes " hed) 60)
+			  (debug:print 0 *default-log-port* "WARNING: test " hed " has possible RUNNING prerequisites, don't give up on it yet."))
+		      (thread-sleep! 4)
+		      (list (runs:queue-next-hed newtal reg reglen regfull)
+			    (runs:queue-next-tal newtal reg reglen regfull)
+			    (runs:queue-next-reg newtal reg reglen regfull)
+			    reruns))
+		     ((or (not nth-try)
+			  (and (number? nth-try)
+			       (< nth-try 10)))
+		      (hash-table-set! test-registry hed (if (number? nth-try)
+							     (+ nth-try 1)
+							     0))
+		      (if (runs:lownoise (conc "not removing test " hed) 60)
+			  (debug:print 1 *default-log-port* "WARNING: not removing test " hed " from queue although it may not be runnable due to FAILED prerequisites"))
+		      ;; may not have processed correctly. Could be a race condition in your test implementation? Dropping test " hed) ;;  " as it has prerequistes that are FAIL. (NOTE: hed is not a vector)")
+		      (runs:shrink-can-run-more-tests-count runsdat) ;; DELAY TWEAKER (still needed?)
+		      ;; (list hed tal reg reruns)
+		      ;; (list (car newtal)(cdr newtal) reg reruns)
+		      ;; (hash-table-set! test-registry hed 'removed)
+		      (list (runs:queue-next-hed newtal reg reglen regfull)
+			    (runs:queue-next-tal newtal reg reglen regfull)
+			    (runs:queue-next-reg newtal reg reglen regfull)
+			    reruns))
+		     ((symbol? nth-try)
+		      (if (eq? nth-try 'removed) ;; removed is removed - drop it NOW
+			  (if (null? tal)
+			      #f ;; yes, really
+			      (list (car tal)(cdr tal) reg reruns))
+			  (begin
+			    (if (runs:lownoise (conc "FAILED prerequisites or other issue" hed) 60)
+				(debug:print 0 *default-log-port* "WARNING: test " hed " has FAILED prerequisites or other issue. Internal state " nth-try " will be overridden and we'll retry."))
+			    (mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "KEEP_TRYING" #f)
+			    (hash-table-set! test-registry hed 0)
+			    (list (runs:queue-next-hed newtal reg reglen regfull)
+				  (runs:queue-next-tal newtal reg reglen regfull)
+				  (runs:queue-next-reg newtal reg reglen regfull)
+				  reruns))))
+		     (else
+		      (if (runs:lownoise (conc "FAILED prerequitests and we tried" hed) 60)
+			  (debug:print 0 *default-log-port* "WARNING: test " hed " has FAILED prerequitests and we've tried at least 10 times to run it. Giving up now."))
+		      ;; (debug:print 0 *default-log-port* "         prereqs: " prereqs-not-met)
+		      (hash-table-set! test-registry hed 'removed)
+		      (mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "TEN_STRIKES" #f)
+		      ;; I'm unclear on if this roll up is needed - it may be the root cause of the "all set to FAIL" bug.
+		      (rmt:set-state-status-and-roll-up-items run-id test-name item-path #f "FAIL" #f) ;; treat as FAIL
+		      (list (if (null? tal)(car newtal)(car tal))
+			    tal
+			    reg
+			    reruns)))))
+	      ;; can't drop this - maybe running? Just keep trying
+	      (let ((runable-tests (runs:runable-tests prereqs-not-met)))
+		(if (null? runable-tests)
+		    #f   ;; I think we are truly done here
+		    (list (runs:queue-next-hed newtal reg reglen regfull)
+			    (runs:queue-next-tal newtal reg reglen regfull)
+			    (runs:queue-next-reg newtal reg reglen regfull)
+			    reruns)))))))))

ADDED   emergency-patch-3.scm
Index: emergency-patch-3.scm
==================================================================
--- /dev/null
+++ emergency-patch-3.scm
@@ -0,0 +1,81 @@
+ ;; To build patch:
+ ;;;;;;;;;;;;;;;;;;;;;;;;;
+ ;; ldd /p/foundry/env/pkgs/megatest/1.64/19/bin/.11/mtest
+ ;;        linux-vdso.so.1 =>  (0x00002aaaaaaab000)
+ ;;        libchicken.so.7 => /p/foundry/env/pkgs/megatest/1.64/chicken-4.10.0//lib/libchicken.so.7 (0x00002aaaaaaad000)
+ ;;        libm.so.6 => /lib64/libm.so.6 (0x00002aaaab0a6000)
+ ;;        libdl.so.2 => /lib64/libdl.so.2 (0x00002aaaab31f000)
+ ;;        libc.so.6 => /lib64/libc.so.6 (0x00002aaaab523000)
+ ;;        /lib64/ld-linux-x86-64.so.2 (0x0000555555554000)
+ ;;
+ ;;  /p/foundry/env/pkgs/megatest/1.64/chicken-4.10.0/bin/csc -s emergency-patch-3.scm
+ ;;
+
+
+ ;; to test patch:
+ ;;;;;;;;;;;;;;;;;;;;;;;;;
+ ;; in .megatestrc, add:
+ ;; (if (and (> megatest-version 1.64)
+ ;;         (< megatest-version 1.6421))
+ ;;   (begin
+ ;;      (load "/p/foundry/env/pkgs/megatest/1.64/19/share/epatch-1.so")
+ ;;      (load "/p/foundry/env/pkgs/megatest/1.64/19/share/epatch-2.so"))) 
+ ;;
+
+
+ ;; to productize patch:
+ ;;;;;;;;;;;;;;;;;;;;;;;;;
+ ;; 
+(use directory-utils regex)
+
+(include "common_records.scm")
+(include "key_records.scm")
+(include "db_records.scm")
+(include "run_records.scm")
+(include "test_records.scm")
+
+;; Given a run id start a server process    ### NOTE ### > file 2>&1 
+;; if the run-id is zero and the target-host is set 
+;; try running on that host
+;;   incidental: rotate logs in logs/ dir.
+;;
+(define  (server:run areapath) ;; areapath is *toppath* for a given testsuite area
+  (let* ((curr-host   (get-host-name))
+         ;; (attempt-in-progress (server:start-attempted? areapath))
+         ;; (dot-server-url (server:check-if-running areapath))
+	 (curr-ip     (server:get-best-guess-address curr-host))
+	 (curr-pid    (current-process-id))
+	 (homehost    (common:get-homehost)) ;; configf:lookup *configdat* "server" "homehost" ))
+	 (target-host (car homehost))
+	 (testsuite   (common:get-testsuite-name))
+	 (logfile     (conc areapath "/logs/server.log")) ;; -" curr-pid "-" target-host ".log"))
+	 (cmdln (conc (common:get-megatest-exe)
+		      " -server " (or target-host "-") (if (equal? (configf:lookup *configdat* "server" "daemonize") "yes")
+							   " -daemonize "
+							   "")
+		      ;; " -log " logfile
+		      " -m testsuite:" testsuite)) ;; (conc " >> " logfile " 2>&1 &")))))
+	 (log-rotate  (make-thread common:rotate-logs  "server run, rotate logs thread"))
+         (load-limit  (configf:lookup-number *configdat* "server" "load-limit" default: 0.9)))
+    ;; we want the remote server to start in *toppath* so push there
+    (push-directory areapath)
+    (debug:print 0 *default-log-port* "INFO: Trying to start server (" cmdln ") ...")
+    (thread-start! log-rotate)
+    
+    ;; host.domain.tld match host?
+    (if (and target-host 
+	     ;; look at target host, is it host.domain.tld or ip address and does it 
+	     ;; match current ip or hostname
+	     (not (string-match (conc "("curr-host "|" curr-host"\\..*)") target-host))
+	     (not (equal? curr-ip target-host)))
+	(begin
+	  (debug:print-info 0 *default-log-port* "Starting server on " target-host ", logfile is " logfile)
+	  (setenv "TARGETHOST" target-host)))
+      
+    (setenv "TARGETHOST_LOGF" logfile)
+    (common:wait-for-normalized-load load-limit " delaying server start due to load" remote-host: (get-environment-variable "TARGETHOST")) ;; do not try starting servers on an already overloaded machine, just wait forever
+    (system (conc "nbfake " cmdln))
+    (unsetenv "TARGETHOST_LOGF")
+    (if (get-environment-variable "TARGETHOST")(unsetenv "TARGETHOST"))
+    (thread-join! log-rotate)
+    (pop-directory)))

Index: env.scm
==================================================================
--- env.scm
+++ env.scm
@@ -12,11 +12,11 @@
 (declare (unit env))
 
 (use sql-de-lite) ;; srfi-1 posix regex regex-case srfi-69 srfi-18 call-with-environment-variables)
 
 (define (env:open-db fname)
-  (let* ((db-exists (file-exists? fname))
+  (let* ((db-exists (common:file-exists? fname))
 	 (db        (open-database fname)))
     (if (not db-exists)
 	(begin
 	  (exec (sql db "CREATE TABLE envvars (
                     id INTEGER PRIMARY KEY,

Index: ezsteps.scm
==================================================================
--- ezsteps.scm
+++ ezsteps.scm
@@ -8,12 +8,11 @@
 ;;  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
 ;;  PURPOSE.
 
 ;;  strftime('%m/%d/%Y %H:%M:%S','now','localtime')
 
-(use sqlite3 srfi-1 posix regex regex-case srfi-69 dot-locking directory-utils)
-(import (prefix sqlite3 sqlite3:))
+(use srfi-1 posix regex srfi-69 directory-utils)
 
 (declare (unit ezsteps))
 (declare (uses db))
 (declare (uses common))
 (declare (uses items))
@@ -37,19 +36,19 @@
 	 (test-id       (db:test-get-id testdat))
 	 (run-id        (db:test-get-run_id testdat))
 	 (test-name     (db:test-get-testname testdat))
 	 (kill-job      #f)) ;; for future use (on re-factoring with launch.scm code
     (let loop ((count 5))
-      (if (file-exists? test-run-dir)
+      (if (common:file-exists? test-run-dir)
 	  (push-directory test-run-dir)
 	  (if (> count 0)
 	      (begin
 		(debug:print 0 *default-log-port* "WARNING: ezsteps attempting to run but test run directory " test-run-dir " is not there. Waiting and trying again " count " more times")
 		(sleep 3)
 		(loop (- count 1))))))
     (debug:print-info 0 *default-log-port* "Running in directory " test-run-dir)
-    (if (not (file-exists? ".ezsteps"))(create-directory ".ezsteps"))
+    (if (not (common:file-exists? ".ezsteps"))(create-directory ".ezsteps"))
     ;; if ezsteps was defined then we are sure to have at least one step but check anyway
     (if (not (> (length ezstepslst) 0))
 	(message-window "ERROR: You can only re-run steps defined via ezsteps")
 	(begin
 	  (let loop ((ezstep   (car ezstepslst))
@@ -75,11 +74,11 @@
 			      (loop (car tal)(cdr tal) stepname #f))))
 
 		  (debug:print 4 *default-log-port* "ezsteps:\n stepname: " stepname " stepinfo: " stepinfo " stepparts: " stepparts
 			       " stepparms: " stepparms " stepcmd: " stepcmd)
 		  
-		  (if (file-exists? (conc stepname ".logpro"))(set! logpro-used #t))
+		  (if (common:file-exists? (conc stepname ".logpro"))(set! logpro-used #t))
 		  
 		  ;; call the command using mt_ezstep
 		  (set! script (conc "mt_ezstep " stepname " " (if prevstep prevstep "-") " " stepcmd))
 		  
 		  (debug:print 4 *default-log-port* "script: " script)

ADDED   file-tail.scm
Index: file-tail.scm
==================================================================
--- /dev/null
+++ file-tail.scm
@@ -0,0 +1,76 @@
+
+(use (prefix sqlite3 sqlite3:) posix typed-records) 
+
+(define (open-tail-db )
+  (let* ((basedir   (create-directory (conc "/tmp/" (current-user-name))))
+	 (dbpath    (conc basedir "/megatest_logs.db"))
+	 (dbexists  (common:file-exists? dbpath))
+	 (db        (sqlite3:open-database dbpath))
+	 (handler   (sqlite3:make-busy-timeout 136000)))
+    (sqlite3:set-busy-handler! db handler)
+    (sqlite3:execute db "PRAGMA synchronous = 0;")
+    (if (not dbexists)
+	(begin
+	  (sqlite3:execute db "CREATE TABLE IF NOT EXISTS log_files (id INTEGER PRIMARY KEY,filename TEXT,event_time TIMESTAMP DEFAULT (strftime('%s','now')));")
+	  (sqlite3:execute db "CREATE TABLE IF NOT EXISTS log_data  (id INTEGER PRIMARY KEY,fid INTEGER,line TEXT,event_time TIMESTAMP DEFAULT (strftime('%s','now')));")
+	  ))
+    db))
+
+(define (tail-write db fid lines)
+  (sqlite3:with-transaction
+   db
+   (lambda ()
+     (for-each
+      (lambda (line)
+	(sqlite3:execute db "INSERT INTO log_data (fid,line) VALUES (?,?);" fid line))
+      lines))))
+
+(define (tail-get-fid db fname)
+  (let ((fid   (handle-exceptions
+		   exn
+		   #f
+		 (sqlite3:first-result db "SELECT id FROM log_files WHERE filename=?;" fname))))
+    (if fid
+	fid
+	(begin
+	  (sqlite3:execute db "INSERT INTO log_files (filename) VALUES (?);" fname)
+	  (tail-get-fid db fname)))))
+
+(define (file-tail fname #!key (db-in #f))
+  (let* ((inp (open-input-file fname))
+	 (db  (or db-in (open-tail-db)))
+	 (fid (tail-get-fid db fname)))
+    (let loop ((inl    (read-line inp))
+	       (lines '())
+	       (lastwr (current-seconds)))
+      (if (eof-object? inl)
+	  (let ((timed-out (> (- (current-seconds) lastwr) 60)))
+	    (if timed-out (tail-write db fid (reverse lines)))
+	    (sleep 1)
+	    (if timed-out
+		(loop (read-line inp) '() (current-seconds))
+		(loop (read-line inp) lines lastwr)))
+	  (let* ((savelines (> (length lines) 19)))
+	    ;; (print inl)
+	    (if savelines (tail-write db fid (reverse lines)))
+	    (loop (read-line inp)
+		  (if savelines
+		      '()
+		      (cons inl lines))
+		  (if savelines
+		      (current-seconds)
+		      lastwr)))))))
+
+;; offset -20 means get last 20 lines
+;;
+(define (tail-get-lines db fid offset count)
+  (if (> offset 0)
+      (map-row (lambda (id line)
+		 (vector id line))
+	       db
+	       "SELECT id,line FROM log_data WHERE fid=? OFFSET ? LIMIT ?;" fid offset count)
+      (reverse ;; get N from the end
+       (map-row (lambda (id line)
+		  (vector id line))
+		db
+		"SELECT id,line FROM log_data WHERE fid=? ORDER BY id DESC LIMIT ?;" fid (abs offset)))))

Index: filedb.scm
==================================================================
--- filedb.scm
+++ filedb.scm
@@ -16,11 +16,11 @@
 (include "fdb_records.scm")
 ;; (include "settings.scm")
 
 (define (filedb:open-db dbpath)
   (let* ((fdb      (make-filedb:fdb))
-	 (dbexists (file-exists? dbpath))
+	 (dbexists (common:file-exists? dbpath))
 	 (db (sqlite3:open-database dbpath)))
     (filedb:fdb-set-db!        fdb db)
     (filedb:fdb-set-dbpath!    fdb dbpath)
     (filedb:fdb-set-pathcache! fdb (make-hash-table))
     (filedb:fdb-set-idcache!   fdb (make-hash-table))

Index: genexample.scm
==================================================================
--- genexample.scm
+++ genexample.scm
@@ -56,11 +56,11 @@
 	(begin
 	  (print "The path " path " does not exist or is not a directory. Attempting to create it now")
 	  (create-directory path #t)))
 
     ;; First check that the directory is empty!
-    (if (and (file-exists? path)
+    (if (and (common:file-exists? path)
 	     (not (null? (glob (conc path "/*")))))
 	(begin
 	  (print "WARNING: directory " path " is not empty, are you sure you want to continue?")
 	  (display "Enter y/n: ")
 	  (if (equal? "y" (read-line))
@@ -210,22 +210,22 @@
 	(scripts  '())
 	(items    '())
 	(rel-path #f))
 
     (cond
-     ((file-exists? "megatest.config")         (set! rel-path "./"))
-     ((file-exists? "../megatest.config")      (set! rel-path "../"))
-     ((file-exists? "../../megatest.config")   (set! rel-path "../../"))
-     ((file-exists? "../../../megatest.config")(set! rel-path "../../../"))) ;; good enough dang it.
+     ((common:file-exists? "megatest.config")         (set! rel-path "./"))
+     ((common:file-exists? "../megatest.config")      (set! rel-path "../"))
+     ((common:file-exists? "../../megatest.config")   (set! rel-path "../../"))
+     ((common:file-exists? "../../../megatest.config")(set! rel-path "../../../"))) ;; good enough dang it.
 
     ;; Don't gather data or continue if a) megatest.config can't be found or b) testconfig already exists
     (if (not rel-path)
 	(begin
 	  (print "ERROR: I could not find megatest.config, please run -create-test in the top dir of your megatest area")
 	  (exit 1)))
 
-    (if (file-exists? (conc rel-path "tests/" testname "/testconfig"))
+    (if (common:file-exists? (conc rel-path "tests/" testname "/testconfig"))
 	(begin
 	  (print "WARNING: You already have a testconfig in " rel-path "tests/" testname ", do you want to clobber your files?")
 	  (display "Enter y/n: ")
 	  (if (not (equal? "y" (read-line)))
 	      (begin

Index: gutils.scm
==================================================================
--- gutils.scm
+++ gutils.scm
@@ -20,24 +20,35 @@
   (let* ((c1 (map string->number (string-split color1)))
 	 (c2 (map string->number (string-split color2)))
 	 (delta (map (lambda (a b)(abs (- a b))) c1 c2)))
     (null? (filter (lambda (x)(> x 3)) delta))))
 
+(define gutils:colors
+  '((PASS . "70 249 73")
+    (FAIL . "253 33 49")
+    (SKIP . "230 230 0")))
+
+(define (gutils:get-color-spec effective-state)
+  (or (alist-ref effective-state gutils:colors)
+      (alist-ref 'FAIL gutils:colors)))
+
+;; BBnote - state status dashboard button color / text defined here
 (define (gutils:get-color-for-state-status state status);; #!key (get-label #f))
   ;; ((if get-label cadr car)
   (case (string->symbol state)
     ((COMPLETED) ;; ARCHIVED)
      (case (string->symbol status)
        ((PASS)        (list "70  249 73" status))
        ((WARN WAIVED) (list "255 172 13" status))
-       ((SKIP)        (list "230 230 0" status))
+       ((SKIP)        (list (gutils:get-color-spec 'SKIP) status))
+       ((ABORT)       (list "198 36 166" status))
        (else (list "253 33 49" status))))
     ((ARCHIVED)
      (case (string->symbol status)
        ((PASS)        (list "70  170 73" status))
        ((WARN WAIVED) (list "200 130 13" status))
-       ((SKIP)        (list "180 180 0" status))
+       ((SKIP)        (list (gutils:get-color-spec 'SKIP) status))
        (else (list "180 33 49" status))))
     ;;      (if (equal? status "PASS")
     ;;	  '("70 249 73" "PASS")
     ;;	  (if (or (equal? status "WARN")
     ;;		  (equal? status "WAIVED"))
@@ -44,14 +55,16 @@
     ;;	      (list "255 172 13" status)
     ;;	      (list "223 33 49"  status)))) ;; greenish orangeish redish
     ((LAUNCHED)         (list "101 123 142"  state))
     ((CHECK)            (list "255 100 50"   state))
     ((REMOTEHOSTSTART)  (list "50 130 195"   state))
-    ((RUNNING)          (list "9 131 232"    state))
+    ((RUNNING STARTED)          (list "9 131 232"    state))
     ((KILLREQ)          (list "39 82 206"    state))
     ((KILLED)           (list "234 101 17"   state))
-    ((NOT_STARTED)      (list "240 240 240"  state))
+    ((NOT_STARTED)      (case (string->symbol status)
+			  ((CHECK STARTED)(list (gutils:get-color-spec 'SKIP) state))
+			  (else   (list "240 240 240"                 state))))
     ;; for xor mode below
     ;;
     ((CLEAN)
      (case (string->symbol status)
        ((CLEAN-FAIL CLEAN-CHECK CLEAN-ABORT)  (list "200 130 13" status)) ;; orange requested for these

Index: http-transport.scm
==================================================================
--- http-transport.scm
+++ http-transport.scm
@@ -8,12 +8,11 @@
 ;;  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
 ;;  PURPOSE.
 
 (require-extension (srfi 18) extras tcp s11n)
 
-(use  srfi-1 posix regex regex-case srfi-69 hostinfo md5 message-digest posix-extras) ;; sqlite3
-;; (import (prefix sqlite3 sqlite3:))
+(use  srfi-1 posix regex regex-case srfi-69 hostinfo md5 message-digest posix-extras)
 
 (use spiffy uri-common intarweb http-client spiffy-request-vars intarweb spiffy-directory-listing)
 
 ;; Configurations for server
 (tcp-buffer-size 2048)
@@ -109,21 +108,24 @@
     (http-transport:try-start-server ipaddrstr start-port)))
 
 ;; This is recursively run by http-transport:run until sucessful
 ;;
 (define (http-transport:try-start-server ipaddrstr portnum)
-  (let ((config-hostname (configf:lookup *configdat* "server" "hostname")))
+  (let ((config-hostname (configf:lookup *configdat* "server" "hostname"))
+	(config-use-proxy (equal? (configf:lookup *configdat* "client" "use-http_proxy") "yes")))
+    (if (not config-use-proxy)
+	(determine-proxy (constantly #f)))
     (debug:print-info 0 *default-log-port* "http-transport:try-start-server time=" (seconds->time-string (current-seconds)) " ipaddrsstr=" ipaddrstr " portnum=" portnum " config-hostname=" config-hostname)
     (handle-exceptions
 	exn
 	(begin
 	  (print-error-message exn)
 	  (if (< portnum 64000)
 	      (begin 
 		(debug:print 0 *default-log-port* "WARNING: attempt to start server failed. Trying again ...")
 		(debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
-		(debug:print 0 *default-log-port* "exn=" (condition->list exn))
+		(debug:print 5 *default-log-port* "exn=" (condition->list exn))
 		(portlogger:open-run-close portlogger:set-failed portnum)
 		(debug:print 0 *default-log-port* "WARNING: failed to start on portnum: " portnum ", trying next port")
 		(thread-sleep! 0.1)
 		
 		;; get_next_port goes here
@@ -292,12 +294,17 @@
 	 (server-dat (if runremote
                          (remote-conndat runremote)
                          #f))) ;; (hash-table-ref/default *runremote* run-id #f)))
     (if (vector? server-dat)
 	(let ((api-dat (http-transport:server-dat-get-api-uri server-dat)))
-	  (close-connection! api-dat)
-	  #t)
+	  (handle-exceptions
+	    exn
+	    (begin
+	      (print-call-chain *default-log-port*)
+	      (debug:print-error 0 *default-log-port* " closing connection failed with error: " ((condition-property-accessor 'exn 'message) exn)))
+	    (close-connection! api-dat)
+	    #t))
 	#f)))
 
 
 (define (make-http-transport:server-dat)(make-vector 6))
 (define (http-transport:server-dat-get-iface         vec)    (vector-ref  vec 0))
@@ -380,11 +387,11 @@
 	       (start-time     (current-milliseconds)))
       ;; Use this opportunity to sync the tmp db to megatest.db
       (if (not server-going) ;; *dbstruct-db* 
 	  (begin
 	    (debug:print 0 *default-log-port* "SERVER: dbprep")
-	    (set! *dbstruct-db*  (db:setup)) ;;  run-id))
+	    (set! *dbstruct-db*  (db:setup #t)) ;;  run-id))
 	    (set! server-going #t)
 	    (debug:print 0 *default-log-port* "SERVER: running, megatest version: " (common:get-full-version)) ;; NOTE: the server is NOT yet marked as running in the log. We do that in the keep-running routine.
 	    (thread-start! *watchdog*)))
       
       ;; when things go wrong we don't want to be doing the various queries too often
@@ -424,11 +431,11 @@
       (if (common:low-noise-print 60 "dbstats")
 	  (begin
 	    (debug:print 0 *default-log-port* "Server stats:")
 	    (db:print-current-query-stats)))
       (let* ((hrs-since-start  (/ (- (current-seconds) server-start-time) 3600))
-	     (adjusted-timeout (if (> hrs-since-start 1)
+	     (adjusted-timeout (if (> hrs-since-start 1)  ;; never used!
 				   (- server-timeout (inexact->exact (round (* hrs-since-start 60))))  ;; subtract 60 seconds per hour
 				   server-timeout)))
 	(if (common:low-noise-print 120 "server timeout")
 	    (debug:print-info 0 *default-log-port* "Adjusted server timeout: " adjusted-timeout))
 	(cond
@@ -440,11 +447,12 @@
               (debug:print-info 0 *default-log-port* "Server continuing, seconds since last db access: " (- (current-seconds) last-access))
 	      (let ((curr-time (current-seconds)))
 		(handle-exceptions
 		    exn
 		    (debug:print 0 *default-log-port* "ERROR: Failed to change timestamp on log file " server-log-file ". Are you out of space on that disk?")
-		  (change-file-times server-log-file curr-time curr-time))))
+		  (if (not *server-overloaded*)
+		      (change-file-times server-log-file curr-time curr-time)))))
           (loop 0 server-state bad-sync-count (current-milliseconds)))
          (else
           (debug:print-info 0 *default-log-port* "Server timed out. seconds since last db access: " (- (current-seconds) last-access))
           (http-transport:server-shutdown port)))))))
 
@@ -483,17 +491,16 @@
 ;; all routes though here end in exit ...
 ;;
 ;; start_server? 
 ;;
 (define (http-transport:launch)
-  ;; (if (args:get-arg "-daemonize")
-  ;;     (begin
-  ;; 	(daemon:ize)
-  ;; 	(if *alt-log-file* ;; we should re-connect to this port, I think daemon:ize disrupts it
-  ;; 	    (begin
-  ;; 	      (current-error-port *alt-log-file*)
-  ;; 	      (current-output-port *alt-log-file*)))))
+  ;; lets not even bother to start if there are already three or more server files ready to go
+  (let* ((num-alive   (server:get-num-alive (server:get-list *toppath*))))
+    (if (> num-alive 3)
+	(begin
+	  (debug:print 0 *default-log-port* "ERROR: Aborting server start because there are already " num-alive " possible servers either running or starting up")
+	  (exit))))
   (let* ((th2 (make-thread (lambda ()
 			     (debug:print-info 0 *default-log-port* "Server run thread started")
 			     (http-transport:run 
 			      (if (args:get-arg "-server")
 				  (args:get-arg "-server")

Index: items.scm
==================================================================
--- items.scm
+++ items.scm
@@ -132,12 +132,12 @@
     (set! itemstable (map (lambda (item)
 			    (if (procedure? (cadr item))
 				(list (car item)((cadr item)))  ;; evaluate the proc
 				item))
 			  itemstable))
-    (if (and have-items  (null? items))     (debug:print-error 0 *default-log-port* "[items] section in testconfig but no entries defined"))
-    (if (and have-itable (null? itemstable))(debug:print-error 0 *default-log-port* "[itemstable] section in testconfig but no entries defined"))
+    (if (and have-items  (null? items))     (debug:print 0 *default-log-port* "WARNING:[items] section in testconfig but no entries defined"))
+    (if (and have-itable (null? itemstable))(debug:print 0 *default-log-port* "WARNNG:[itemstable] section in testconfig but no entries defined"))
     (if (or (not (null? items))(not (null? itemstable)))
 	(append (item-assoc->item-list items)
 		(item-table->item-list itemstable))
 	'(()))))
 

Index: key_records.scm
==================================================================
--- key_records.scm
+++ key_records.scm
@@ -10,13 +10,13 @@
 ;;======================================================================
 
 (define-inline (keys->valslots keys) ;; => ?,?,? ....
   (string-intersperse (map (lambda (x) "?") keys) ","))
 
-(define-inline (keys->key/field keys . additional)
-  (string-join (map (lambda (k)(conc k " TEXT"))
-		    (append keys additional)) ","))
+;; (define-inline (keys->key/field keys . additional)
+;;   (string-join (map (lambda (k)(conc k " TEXT"))
+;; 		    (append keys additional)) ","))
 
 (define-inline (item-list->path itemdat)
   (if (list? itemdat)
       (string-intersperse  (map cadr itemdat) "/")
       ""))

Index: keys.scm
==================================================================
--- keys.scm
+++ keys.scm
@@ -64,9 +64,13 @@
 
 ;;======================================================================
 ;; config file related routines
 ;;======================================================================
 
-(define (keys:config-get-fields confdat)
-  (let ((fields (hash-table-ref/default confdat "fields" '())))
-    (map car fields)))
+(define keys:config-get-fields common:get-fields)
+(define (keys:make-key/field-string confdat)
+  (let ((fields (configf:get-section confdat "fields")))
+    (string-join
+     (map (lambda (field)(conc (car field) " " (cadr field)))
+	  fields)
+     ",")))
 

Index: launch.scm
==================================================================
--- launch.scm
+++ launch.scm
@@ -21,13 +21,10 @@
 
 (declare (unit launch))
 (declare (uses common))
 (declare (uses configf))
 (declare (uses db))
-;; (declare (uses sdb))
-(declare (uses tdb))
-;; (declare (uses filedb))
 
 (include "common_records.scm")
 (include "key_records.scm")
 (include "db_records.scm")
 
@@ -61,11 +58,11 @@
 ;; return (conc status ": " comment) from the final section so that
 ;;   the comment can be set in the step record in launch.scm
 ;;
 (define (launch:load-logpro-dat run-id test-id stepname)
   (let ((cname (conc stepname ".dat")))
-    (if (file-exists? cname)
+    (if (common:file-exists? cname)
 	(let* ((dat  (read-config cname #f #f))
 	       (csvr (db:logpro-dat->csv dat stepname))
 	       (csvt (let-values (((fmt-cell fmt-record fmt-csv) (make-format ",")))
 		       (fmt-csv (map list->csv-record csvr))))
 	       (status (configf:lookup dat "final" "exit-status"))
@@ -90,11 +87,11 @@
 	 (script         "") ; "#!/bin/bash\n") ;; yep, we depend on bin/bash FIXME!!!\
 	 (logpro-file    (conc stepname ".logpro"))
 	 (html-file      (conc stepname ".html"))
 	 (dat-file       (conc stepname ".dat"))
 	 (tconfig-logpro (configf:lookup testconfig "logpro" stepname))
-	 (logpro-used    (file-exists? logpro-file)))
+	 (logpro-used    (common:file-exists? logpro-file)))
 
     (if (and tconfig-logpro
 	     (not logpro-used)) ;; no logpro file found but have a defn in the testconfig
 	(begin
 	  (with-output-to-file logpro-file
@@ -109,11 +106,11 @@
 		 " stepparms: " stepparms " stepcmd: " stepcmd)
     
     ;; ;; first source the previous environment
     ;; (let ((prev-env (conc ".ezsteps/" prevstep (if (string-search (regexp "csh") 
     ;;      							 (get-environment-variable "SHELL")) ".csh" ".sh"))))
-    ;;   (if (and prevstep (file-exists? prev-env))
+    ;;   (if (and prevstep (common:file-exists? prev-env))
     ;;       (set! script (conc script "source " prev-env))))
     
     ;; call the command using mt_ezstep
     ;; (set! script (conc "mt_ezstep " stepname " " (if prevstep prevstep "x") " " stepcmd))
     
@@ -128,11 +125,11 @@
 
          (with-output-to-file "Makefile.ezsteps"
            (lambda ()
              (print stepname ".log :")
              (print "\t" cmd)
-             (if (file-exists? (conc stepname ".logpro"))
+             (if (common:file-exists? (conc stepname ".logpro"))
                  (print "\tlogpro " stepname ".logpro " stepname ".html < " stepname ".log"))
              (print)
              (print stepname " : " stepname ".log")
              (print))
            #:append)
@@ -172,11 +169,11 @@
 	  (logfna (if logpro-used (conc stepname ".html") ""))
 	  (comment #f))
       (if logpro-used
 	  (let ((datfile (conc stepname ".dat")))
 	    ;; load the .dat file into the test_data table if it exists
-	    (if (file-exists? datfile)
+	    (if (common:file-exists? datfile)
 		(set! comment (launch:load-logpro-dat run-id test-id stepname)))
 	    (rmt:test-set-log! run-id test-id (conc stepname ".html"))))
       (rmt:teststep-set-status! run-id test-id stepname "end" exinfo comment logfna))
     ;; set the test final status
     (let* ((process-exit-status (launch:einf-exit-code exit-info)) ;; (vector-ref exit-info 2))
@@ -296,11 +293,11 @@
 	;; after all that, still no testconfig? Time to abort
 	(if (not testconfig)
 	    (begin
 	      (debug:print-error 0 *default-log-port* "Failed to resolve megatest.config, runconfigs.config and testconfig issues. Giving up now")
 	      (exit 1)))
-	(if (not (file-exists? ".ezsteps"))(create-directory ".ezsteps"))
+	(if (not (common:file-exists? ".ezsteps"))(create-directory ".ezsteps"))
 	;; if ezsteps was defined then we are sure to have at least one step but check anyway
 	(if (not (> (length ezstepslst) 0))
 	    (debug:print-error 0 *default-log-port* "ezsteps defined but ezstepslst is zero length")
 	    (let loop ((ezstep (car ezstepslst))
 		       (tal    (cdr ezstepslst))
@@ -308,20 +305,21 @@
 	      ;; check exit-info (vector-ref exit-info 1)
 	      (if (launch:einf-exit-status exit-info) ;; (vector-ref exit-info 1)
 		  (let ((logpro-used (launch:runstep ezstep run-id test-id exit-info m tal testconfig))
 			(stepname    (car ezstep)))
 		    ;; if logpro-used read in the stepname.dat file
-		    (if (and logpro-used (file-exists? (conc stepname ".dat")))
+		    (if (and logpro-used (common:file-exists? (conc stepname ".dat")))
 			(launch:load-logpro-dat run-id test-id stepname))
 		    (if (steprun-good? logpro-used (launch:einf-exit-code exit-info))
 			(if (not (null? tal))
 			    (loop (car tal) (cdr tal) stepname))
 			(debug:print 4 *default-log-port* "WARNING: step " (car ezstep) " failed. Stopping")))
 		  (debug:print 4 *default-log-port* "WARNING: a prior step failed, stopping at " ezstep)))))))
 
 (define (launch:monitor-job run-id test-id item-path fullrunscript ezsteps test-name tconfigreg exit-info m work-area runtlim misc-flags)
-  (let* ((start-seconds (current-seconds))
+  (let* ((update-period (string->number (or (configf:lookup *configdat* "setup" "test-stats-update-period") "30")))
+         (start-seconds (current-seconds))
 	 (calc-minutes  (lambda ()
 			  (inexact->exact 
 			   (round 
 			    (- 
 			     (current-seconds) 
@@ -330,30 +328,38 @@
     ;; (tests:set-full-meta-info #f test-id run-id (calc-minutes) work-area)
     ;; (tests:set-full-meta-info test-id run-id (calc-minutes) work-area)
     (tests:set-full-meta-info #f test-id run-id (calc-minutes) work-area 10)
     (let loop ((minutes   (calc-minutes))
 	       (cpu-load  (alist-ref 'adj-core-load (common:get-normalized-cpu-load #f)))
-	       (disk-free (get-df (current-directory))))
-      (let ((new-cpu-load (let* ((load  (alist-ref 'adj-core-load (common:get-normalized-cpu-load #f)))
-				 (delta (abs (- load cpu-load))))
-			    (if (> delta 0.1) ;; don't bother updating with small changes
-				load
-				#f)))
-	    (new-disk-free (let* ((df    (get-df (current-directory)))
-				  (delta (abs (- df disk-free))))
-			     (if (> delta 200) ;; ignore changes under 200 Meg
-				 df
-				 #f))))
+	       (disk-free (get-df (current-directory)))
+               (last-sync (current-seconds)))
+      (let* ((over-time     (> (current-seconds) (+ last-sync update-period)))
+             (new-cpu-load  (let* ((load  (alist-ref 'adj-core-load (common:get-normalized-cpu-load #f)))
+                                   (delta (abs (- load cpu-load))))
+                              (if (> delta 0.1) ;; don't bother updating with small changes
+                                  load
+                                  #f)))
+             (new-disk-free (let* ((df    (if over-time ;; only get df every 30 seconds
+                                              (get-df (current-directory))
+                                              disk-free))
+                                   (delta (abs (- df disk-free))))
+                              (if (and (> df 0)
+                                       (> (/ delta df) 0.1)) ;; (> delta 200) ;; ignore changes under 200 Meg
+                                  df
+                                  #f)))
+             (do-sync       (or new-cpu-load new-disk-free over-time)))
+        (debug:print 4 *default-log-port* "cpu: " new-cpu-load " disk: " new-disk-free " last-sync: " last-sync " do-sync: " do-sync)
 	(set! kill-job? (or (test-get-kill-request run-id test-id) ;; run-id test-name itemdat))
 			    (and runtlim (let* ((run-seconds   (- (current-seconds) start-seconds))
 						(time-exceeded (> run-seconds runtlim)))
 					   (if time-exceeded
 					       (begin
 						 (debug:print-info 0 *default-log-port* "KILLING TEST DUE TO TIME LIMIT EXCEEDED! Runtime=" run-seconds " seconds, limit=" runtlim)
 						 #t)
 					       #f)))))
-	(tests:update-central-meta-info run-id test-id new-cpu-load new-disk-free (calc-minutes) #f #f)
+        (if do-sync
+            (tests:update-central-meta-info run-id test-id new-cpu-load new-disk-free (calc-minutes) #f #f))
 	(if kill-job? 
 	    (begin
 	      (mutex-lock! m)
 	      ;; NOTE: The pid can change as different steps are run. Do we need handshaking between this
 	      ;;       section and the runit section? Or add a loop that tries three times with a 1/4 second
@@ -397,11 +403,14 @@
 	      (exit)))
 	(if (hash-table-ref/default misc-flags 'keep-going #f)
 	    (begin
 	      (thread-sleep! 3) ;; (+ 3 (random 6))) ;; add some jitter to the call home time to spread out the db accesses
 	      (if (hash-table-ref/default misc-flags 'keep-going #f)  ;; keep originals for cpu-load and disk-free unless they change more than the allowed delta
-		  (loop (calc-minutes) (or new-cpu-load cpu-load) (or new-disk-free disk-free)))))))
+		  (loop (calc-minutes)
+                        (or new-cpu-load cpu-load)
+                        (or new-disk-free disk-free)
+                        (if do-sync (current-seconds) last-sync)))))))
     (tests:update-central-meta-info run-id test-id (get-cpu-load) (get-df (current-directory))(calc-minutes) #f #f))) ;; NOTE: Checking twice for keep-going is intentional
 
 
 (define (launch:execute encoded-cmd)
   (let* ((cmdinfo    (common:read-encoded-string encoded-cmd))
@@ -440,11 +449,11 @@
 	       (fullrunscript (if (not runscript)
                                   #f
                                   (if (substring-index "/" runscript)
                                       runscript ;; use unadultered if contains slashes
                                       (let ((fulln (conc testpath "/" runscript)))
-	                                  (if (and (file-exists? fulln)
+	                                  (if (and (common:file-exists? fulln)
                                                    (file-execute-access? fulln))
                                               fulln
                                               runscript))))) ;; assume it is on the path
 	       ) ;; (rollup-status 0)
 
@@ -505,11 +514,11 @@
 		      (debug:print 0 *default-log-port* "Failed to create directory " logdir " expect problems, message: " ((condition-property-accessor 'exn 'message) exn))
 		    (create-directory logdir #t)))))
 		  
 	  ;; NFS might not have propagated the directory meta data to the run host - give it time if needed
 	  (let loop ((count 0))
-	    (if (or (file-exists? top-path)
+	    (if (or (common:file-exists? top-path)
 		    (> count 10))
 		(change-directory top-path)
 		(begin
 		  (debug:print 0 *default-log-port* "INFO: Not starting job yet - directory " top-path " not found")
 		  (thread-sleep! 10)
@@ -540,11 +549,15 @@
 	  
 	  ;; Do not run the test if it is REMOVING, RUNNING, KILLREQ or REMOTEHOSTSTART,
 	  ;; Mark the test as REMOTEHOSTSTART *IMMEDIATELY*
 	  ;;
 	  (let* ((test-info (rmt:get-test-info-by-id run-id test-id))
-		 (test-host (db:test-get-host        test-info))
+		 (test-host (if test-info
+				(db:test-get-host        test-info)
+				(begin
+				  (debug:print 0 *default-log-port* "ERROR: failed to find a record for test-id " test-id ", exiting.")
+				  (exit))))
 		 (test-pid  (db:test-get-process_id  test-info)))
 	    (cond
 	     ((member (db:test-get-state test-info) '("INCOMPLETE" "KILLED" "UNKNOWN" "KILLREQ" "STUCK")) ;; prior run of this test didn't complete, go ahead and try to rerun
 	      (debug:print 0 *default-log-port* "INFO: test is INCOMPLETE or KILLED, treat this execute call as a rerun request")
 	      ;; (tests:test-force-state-status! run-id test-id "REMOTEHOSTSTART" "n/a")
@@ -566,11 +579,11 @@
 	  
 	  (debug:print 2 *default-log-port* "Exectuing " test-name " (id: " test-id ") on " (get-host-name))
 	  (set! keys       (rmt:get-keys))
 	  ;; (runs:set-megatest-env-vars run-id inkeys: keys inkeyvals: keyvals) ;; these may be needed by the launching process
 	  ;; one of these is defunct/redundant ...
-	  (if (not (launch:setup force: #t))
+	  (if (not (launch:setup force-reread: #t))
 	      (begin
 		(debug:print 0 *default-log-port* "Failed to setup, exiting") 
 		;; (sqlite3:finalize! db)
 		;; (sqlite3:finalize! tdb)
 		(exit 1)))
@@ -597,11 +610,11 @@
 		      (list "default" target)))
           ;;(bb-check-path msg: "launch:execute post block 1")
 
 	  ;; NFS might not have propagated the directory meta data to the run host - give it time if needed
 	  (let loop ((count 0))
-	    (if (or (file-exists? work-area)
+	    (if (or (common:file-exists? work-area)
 		    (> count 10))
 		(change-directory work-area)
 		(begin
 		  (debug:print 0 *default-log-port* "INFO: Not starting job yet - directory " work-area " not found")
 		  (thread-sleep! 10)
@@ -656,11 +669,14 @@
           ;;(bb-check-path msg: "launch:execute post block 41")
 	  (runs:set-megatest-env-vars run-id inkeys: keys inkeyvals: keyvals)
           ;;(bb-check-path msg: "launch:execute post block 42")
 	  (set-item-env-vars itemdat)
           ;;(bb-check-path msg: "launch:execute post block 43")
-	  (save-environment-as-files "megatest")
+          (let ((blacklist (configf:lookup *configdat* "setup" "blacklistvars")))
+            (if blacklist
+                (save-environment-as-files "megatest" ignorevars: (string-split blacklist))
+                (save-environment-as-files "megatest")))
           ;;(bb-check-path msg: "launch:execute post block 44")
 	  ;; open-run-close not needed for test-set-meta-info
 	  ;; (tests:set-full-meta-info #f test-id run-id 0 work-area)
 	  ;; (tests:set-full-meta-info test-id run-id 0 work-area)
 	  (tests:set-full-meta-info #f test-id run-id 0 work-area 10)
@@ -668,11 +684,11 @@
 	  ;; (thread-sleep! 0.3) ;; NFS slowness has caused grief here
 
 	  (if (args:get-arg "-xterm")
 	      (set! fullrunscript "xterm")
 	      (if (and fullrunscript 
-		       (file-exists? fullrunscript)
+		       (common:file-exists? fullrunscript)
 		       (not (file-execute-access? fullrunscript)))
 		  (system (conc "chmod ug+x " fullrunscript))))
 
 	  ;; We are about to actually kick off the test
 	  ;; so this is a good place to remove the records for 
@@ -741,12 +757,15 @@
 	      (rmt:update-run-stats run-id (rmt:get-raw-run-stats run-id)))
 	    (mutex-unlock! m)
 	    (debug:print 2 *default-log-port* "Output from running " fullrunscript ", pid " (launch:einf-pid exit-info) " in work area " 
 			 work-area ":\n====\n exit code " (launch:einf-exit-code exit-info) "\n" "====\n")
 	    (if (not (launch:einf-exit-status exit-info))
-		(exit 4)))))))
+		(exit 4))))
+        )))
 
+;; DO NOT USE - caching of configs is handled in launch:setup now.
+;;
 (define (launch:cache-config)
   ;; if we have a linktree and -runtests and -target and the directory exists dump the config
   ;; to megatest-(current-seconds).cfg and symlink it to megatest.cfg
   (if (and *configdat* 
 	   (or (args:get-arg "-run")
@@ -758,22 +777,22 @@
 			   (args:get-arg ":runname")
 			   (getenv "MT_RUNNAME")))
 	     (fulldir  (conc linktree "/"
 			     target "/"
 			     runname)))
-	(if (and linktree (file-exists? linktree)) ;; can't proceed without linktree
+	(if (and linktree (common:file-exists? linktree)) ;; can't proceed without linktree
 	    (begin
 	      (debug:print-info 0 *default-log-port* "Have -run with target=" target ", runname=" runname ", fulldir=" fulldir ", testpatt=" (or (args:get-arg "-testpatt") "%"))
-	      (if (not (file-exists? fulldir))
+	      (if (not (common:file-exists? fulldir))
 		  (create-directory fulldir #t)) ;; need to protect with exception handler 
 	      (if (and target
 		       runname
-		       (file-exists? fulldir))
+		       (common:file-exists? fulldir))
 		  (let ((tmpfile  (conc fulldir "/.megatest.cfg." (current-seconds)))
 			(targfile (conc fulldir "/.megatest.cfg-"  megatest-version "-" megatest-fossil-hash))
 			(rconfig  (conc fulldir "/.runconfig." megatest-version "-" megatest-fossil-hash)))
-		    (if (file-exists? rconfig) ;; only cache megatest.config AFTER runconfigs has been cached
+		    (if (common:file-exists? rconfig) ;; only cache megatest.config AFTER runconfigs has been cached
 			(begin
 			  (debug:print-info 0 *default-log-port* "Caching megatest.config in " tmpfile)
                           (if (not (common:in-running-test?))
                               (configf:write-alist *configdat* tmpfile))
 			  (system (conc "ln -sf " tmpfile " " targfile))))
@@ -794,61 +813,92 @@
 ;;   side effects:
 ;;     sets; *configdat*    (megatest.config info)
 ;;           *runconfigdat* (runconfigs.config info)
 ;;           *configstatus* (status of the read data)
 ;;
-(define (launch:setup #!key (force #f) (areapath #f))
+(define (launch:setup #!key (force-reread #f) (areapath #f))
   (mutex-lock! *launch-setup-mutex*)
   (if (and *toppath*
-	   (eq? *configstatus* 'fulldata)) ;; got it all
+	   (eq? *configstatus* 'fulldata) (not force-reread)) ;; got it all
       (begin
-	(debug:print 0 *default-log-port* "NOTE: skipping launch:setup-body call since we have fulldata")
+	(debug:print 2 *default-log-port* "NOTE: skipping launch:setup-body call since we have fulldata")
 	(mutex-unlock! *launch-setup-mutex*)
 	*toppath*)
-      (let ((res (launch:setup-body force: force areapath: areapath)))
+      (let ((res (launch:setup-body force-reread: force-reread areapath: areapath)))
 	(mutex-unlock! *launch-setup-mutex*)
 	res)))
+
+;; return paths depending on what info is available.
+;;
+(define (launch:get-cache-file-paths areapath toppath target mtconfig)
+  (let* ((use-cache (common:use-cache?))
+         (runname  (common:args-get-runname))
+         (linktree (common:get-linktree))
+         (testname (common:get-full-test-name))
+         (rundir   (if (and runname target linktree)
+                       (common:directory-writable? (conc linktree "/" target "/" runname))
+                       #f))
+         (testdir  (if (and rundir testname)
+                       (common:directory-writable? (conc rundir "/" testname))
+                       #f))
+         (cachedir (or testdir rundir))
+         (mtcachef (and cachedir (conc cachedir "/" ".megatest.cfg-"  megatest-version "-" megatest-fossil-hash)))
+         (rccachef (and cachedir (conc cachedir "/" ".runconfigs.cfg-"  megatest-version "-" megatest-fossil-hash))))
+    (debug:print-info 6 *default-log-port* 
+                      "runname=" runname 
+                      "\n  linktree=" linktree
+                      "\n  testname=" testname
+                      "\n  rundir=" rundir 
+                      "\n  testdir=" testdir 
+                      "\n  cachedir=" cachedir
+                      "\n  mtcachef=" mtcachef
+                      "\n  rccachef=" rccachef)
+    (cons mtcachef rccachef)))
 
 (define (launch:setup-body #!key (force-reread #f) (areapath #f))
   (if (and (eq? *configstatus* 'fulldata)
 	   *toppath*
 	   (not force-reread)) ;; no need to reprocess
       *toppath*   ;; return toppath
-      (let* ((use-cache (common:use-cache?))
+      (let* ((use-cache (common:use-cache?)) ;; BB- use-cache checks *configdat* for use-cache setting.  We do not have *configdat*.  Bootstrapping problem here.
 	     (toppath  (or *toppath* areapath (getenv "MT_RUN_AREA_HOME"))) ;; preserve toppath
-	     
-	     (runname  (common:args-get-runname))
 	     (target   (common:args-get-target))
-	     (linktree (common:get-linktree))
-	     (contour  #f) ;; NOT READY FOR THIS (args:get-arg "-contour"))
 	     (sections (if target (list "default" target) #f)) ;; for runconfigs
 	     (mtconfig (or (args:get-arg "-config") "megatest.config")) ;; allow overriding megatest.config 
-	     (rundir   (if (and runname target linktree)
-			   (conc linktree (if contour (conc "/" contour) "") "/" target "/" runname)
-			   #f))
-             
-	     (mtcachef (and rundir (conc rundir "/" ".megatest.cfg-"  megatest-version "-" megatest-fossil-hash)))
-	     (rccachef (and rundir (conc rundir "/" ".runconfigs.cfg-"  megatest-version "-" megatest-fossil-hash)))
-	     (cancreate (and rundir (common:file-exists? rundir)(file-write-access? rundir) (not (common:in-running-test?)))))
-	;; (cxt       (hash-table-ref/default *contexts* toppath #f)))
-
-	;; create our cxt for this area if it doesn't already exist
-	;; (if (not cxt)(hash-table-set! *contexts* toppath (make-cxt)))
-	
-	;; (print "runname: " runname " target: " target " mtcachef: " mtcachef " rccachef: " rccachef)
+             (cachefiles (launch:get-cache-file-paths areapath toppath target mtconfig))
+	     ;; checking for null cachefiles should not be necessary, I was seeing error car of '(), might be a chicken bug or a red herring ...
+	     (mtcachef   (if (null? cachefiles)
+			     #f
+			     (car cachefiles))) ;; (and cachedir (conc cachedir "/" ".megatest.cfg-"  megatest-version "-" megatest-fossil-hash)))
+	     (rccachef   (if (null? cachefiles)
+			     #f
+			     (cdr cachefiles)))) ;; (and cachedir (conc cachedir "/" ".runconfigs.cfg-"  megatest-version "-" megatest-fossil-hash)))
+	      ;; (cancreate (and cachedir (common:file-exists? cachedir)(file-write-access? cachedir) (not (common:in-running-test?)))))
 	(set! *toppath* toppath) ;; This is needed when we are running as a test using CMDINFO as a datasource
+        ;;(BB> "launch:setup-body -- cachefiles="cachefiles)
 	(cond
 	 ;; if mtcachef exists just read it, however we need to assume toppath is available in $MT_RUN_AREA_HOME
-	 ((and mtcachef (common:file-exists? mtcachef) (get-environment-variable "MT_RUN_AREA_HOME") use-cache)
-	  (set! *configdat*    (configf:read-alist mtcachef))
+	 ((and (not force-reread)
+	       mtcachef  rccachef
+	       use-cache
+	       (get-environment-variable "MT_RUN_AREA_HOME")
+	       (common:file-exists? mtcachef)
+	       (common:file-exists? rccachef))
+          ;;(BB> "launch:setup-body -- cond branch 1 - use-cache")
+          (set! *configdat*    (configf:read-alist mtcachef))
+          ;;(BB> "launch:setup-body -- 1 set! *configdat*="*configdat*)
 	  (set! *runconfigdat* (configf:read-alist rccachef))
 	  (set! *configinfo*   (list *configdat*  (get-environment-variable "MT_RUN_AREA_HOME")))
 	  (set! *configstatus* 'fulldata)
 	  (set! *toppath*      (get-environment-variable "MT_RUN_AREA_HOME"))
 	  *toppath*)
+	 ;; there are no existing cached configs, do full reads of the configs and cache them
 	 ;; we have all the info needed to fully process runconfigs and megatest.config
-	 (mtcachef              
+	 ((and ;; (not force-reread) ;; force-reread is irrelevant in the AND, could however OR it?
+	       mtcachef
+	       rccachef) ;; BB- why are we doing this without asking if caching is desired?
+          ;;(BB> "launch:setup-body -- cond branch 2")
 	  (let* ((first-pass    (find-and-read-config        ;; NB// sets MT_RUN_AREA_HOME as side effect
 				 mtconfig
 				 environ-patt: "env-override"
 				 given-toppath: toppath
 				 pathenvvar: "MT_RUN_AREA_HOME"))
@@ -863,11 +913,13 @@
 				   *runconfigdat* #t 
 				   sections: sections))))
 	    (set! *runconfigdat* first-rundat)
 	    (if first-pass  ;; 
 		(begin
+                  ;;(BB> "launch:setup-body -- \"first-pass\"=first-pass")
 		  (set! *configdat*  (car first-pass))
+                  ;;(BB> "launch:setup-body -- 2 set! *configdat*="*configdat*)
 		  (set! *configinfo* first-pass)
 		  (set! *toppath*    (or toppath (cadr first-pass))) ;; use the gathered data unless already have it
 		  (set! toppath      *toppath*)
 		  (if (not *toppath*)
 		      (begin
@@ -889,26 +941,33 @@
 			 (runconfigdat (begin     ;; this read of the runconfigs will see any adjustments made by re-reading megatest.config
 					 (for-each (lambda (kt)
 						     (setenv (car kt) (cadr kt)))
 						   key-vals)
 					 (read-config (conc toppath "/runconfigs.config") *runconfigdat* #t ;; consider using runconfig:read some day ...
-						      sections: sections))))
-		    (if cancreate (configf:write-alist runconfigdat rccachef))
+						      sections: sections)))
+                         (cachefiles   (launch:get-cache-file-paths areapath toppath target mtconfig))
+                         (mtcachef     (car cachefiles))
+                         (rccachef     (cdr cachefiles)))
+		    (if rccachef (configf:write-alist runconfigdat rccachef))
+		    (if mtcachef (configf:write-alist *configdat* mtcachef))
 		    (set! *runconfigdat* runconfigdat)
-		    (if cancreate (configf:write-alist *configdat* mtcachef))
-		    (if cancreate (set! *configstatus* 'fulldata))))
+		    (if (and rccachef mtcachef) (set! *configstatus* 'fulldata))))
 		;; no configs found? should not happen but let's try to recover gracefully, return an empty hash-table
 		(set! *configdat* (make-hash-table))
 		)))
+
 	 ;; else read what you can and set the flag accordingly
+	 ;; here we don't have either mtconfig or rccachef
 	 (else
+          ;;(BB> "launch:setup-body -- cond branch 3 - else")
 	  (let* ((cfgdat   (find-and-read-config 
 			    (or (args:get-arg "-config") "megatest.config")
 			    environ-patt: "env-override"
 			    given-toppath: (get-environment-variable "MT_RUN_AREA_HOME")
 			    pathenvvar: "MT_RUN_AREA_HOME")))
-	    (if cfgdat
+
+            (if (and cfgdat (list? cfgdat) (> (length cfgdat) 0) (hash-table? (car cfgdat)))
 		(let* ((toppath  (or (get-environment-variable "MT_RUN_AREA_HOME")(cadr cfgdat)))
 		       (rdat     (read-config (conc toppath  ;; convert this to use runconfig:read!
 						    "/runconfigs.config") *runconfigdat* #t sections: sections)))
 		  (set! *configinfo*   cfgdat)
 		  (set! *configdat*    (car cfgdat))
@@ -916,10 +975,12 @@
 		  (set! *toppath*      toppath)
 		  (set! *configstatus* 'partial))
 		(begin
 		  (debug:print-error 0 *default-log-port* "No " mtconfig " file found. Giving up.")
 		  (exit 2))))))
+	;; COND ends here.
+	
 	;; additional house keeping
 	(let* ((linktree (common:get-linktree)))
 	  (if linktree
 	      (begin
 		(if (not (common:file-exists? linktree))
@@ -935,11 +996,11 @@
 		    exn
 		    (begin
 		      (debug:print-error 0 *default-log-port* "Something went wrong when trying to create link to linktree at " *toppath*)
 		      (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)))
 		  (let ((tlink (conc *toppath* "/lt")))
-		    (if (not (file-exists? tlink))
+		    (if (not (common:file-exists? tlink))
 			(create-symbolic-link linktree tlink)))))
 	      (begin
 		(debug:print-error 0 *default-log-port* "linktree not defined in [setup] section of megatest.config")
 		)))
 	(if (and *toppath*
@@ -947,14 +1008,22 @@
 	    (begin
 	      (setenv "MT_RUN_AREA_HOME" *toppath*)
 	      (setenv "MT_TESTSUITENAME" (common:get-testsuite-name)))
 	    (begin
 	      (debug:print-error 0 *default-log-port* "failed to find the top path to your Megatest area.")
-	      ;;(exit 1)
 	      (set! *toppath* #f) ;; force it to be false so we return #f
-	      #f
-	      ))
+	      #f))
+	
+        ;; one more attempt to cache the configs for future reading
+        (let* ((cachefiles   (launch:get-cache-file-paths areapath toppath target mtconfig))
+               (mtcachef     (car cachefiles))
+               (rccachef     (cdr cachefiles)))
+          (if (and rccachef *runconfigdat* (not (common:file-exists? rccachef))) (configf:write-alist *runconfigdat* rccachef))
+          (if (and mtcachef *configdat*    (not (common:file-exists? mtcachef))) (configf:write-alist *configdat* mtcachef))
+          (if (and rccachef mtcachef *runconfigdat* *configdat*)
+              (set! *configstatus* 'fulldata)))
+
 	;; if have -append-config then read and append here
 	(let ((cfname (args:get-arg "-append-config")))
 	  (if (and cfname
 		   (file-read-access? cfname))
 	      (read-config cfname *configdat* #t))) ;; values are added to the hash, no need to do anything special.
@@ -1065,11 +1134,11 @@
 	 (begin
 	   (debug:print-error 0 *default-log-port* " Failed to remove symlink " lnkpath ((condition-property-accessor 'exn 'message) exn) ", exiting")
 	   (exit 1))
 	 (delete-file lnkpath)))
 
-    (if (not (or (file-exists? lnkpath)
+    (if (not (or (common:file-exists? lnkpath)
 		 (symbolic-link? lnkpath)))
 	(handle-exceptions
 	 exn
 	 (begin
 	   (debug:print-error 0 *default-log-port* " Failed to create symlink " lnkpath ((condition-property-accessor 'exn 'message) exn) ", exiting")
@@ -1090,11 +1159,11 @@
 				   (db:test-get-rundir testinfo) ;; ) ;; )
 				   #f)))
 	  (hash-table-set! *toptest-paths* testname curr-test-path)
 	  ;; NB// Was this for the test or for the parent in an iterated test?
 	  (rmt:general-call 'test-set-rundir-shortdir run-id lnkpath 
-			    (if (file-exists? lnkpath)
+			    (if (common:file-exists? lnkpath)
 				;; (resolve-pathname lnkpath)
 				(common:nice-path lnkpath)
 				lnkpath)
 			    testname "" run-id)
 	  ;; (rmt:general-call 'test-set-rundir run-id lnkpath testname "") ;; toptest-path)
@@ -1129,11 +1198,11 @@
 	   exn
 	   (begin
 	     (debug:print-error 0 *default-log-port* " Failed to re-create link " lnktarget ((condition-property-accessor 'exn 'message) exn) ", exiting")
 	     (exit))
 	   (if (symbolic-link? lnktarget)     (delete-file lnktarget))
-	   (if (not (file-exists? lnktarget)) (create-symbolic-link test-path lnktarget)))))
+	   (if (not (common:file-exists? lnktarget)) (create-symbolic-link test-path lnktarget)))))
 
     (if (not (directory? test-path))
 	(create-directory test-path #t)) ;; this is a hack, I don't know why out of the blue this path does not exist sometimes
 
     (if (and test-src-path (directory? test-path))
@@ -1169,14 +1238,15 @@
 (define (launch-test test-id run-id run-info keyvals runname test-conf test-name test-path itemdat params)
   (mutex-lock! *launch-setup-mutex*) ;; setting variables and processing the testconfig is NOT thread-safe, reuse the launch-setup mutex
   (let* ((item-path       (item-list->path itemdat))
 	 (contour         #f)) ;; NOT READY FOR THIS (args:get-arg "-contour")))
     (let loop ((delta        (- (current-seconds) *last-launch*))
-	       (launch-delay (string->number (or (configf:lookup *configdat* "setup" "launch-delay") "5"))))
+	       (launch-delay (configf:lookup-number *configdat* "setup" "launch-delay" default: 1)))
       (if (> launch-delay delta)
 	  (begin
-	    (debug:print-info 0 *default-log-port* "Delaying launch of " test-name " for " (- launch-delay delta) " seconds")
+	    (if (common:low-noise-print 1200 "test launch delay") ;; every two hours or so remind the user about launch delay.
+		(debug:print-info 0 *default-log-port* "NOTE: test launches are delayed by " launch-delay " seconds. See megatest.config launch-delay setting to adjust.")) ;; launch of " test-name " for " (- launch-delay delta) " seconds"))
 	    (thread-sleep! (- launch-delay delta))
 	    (loop (- (current-seconds) *last-launch*) launch-delay))))
     (change-directory *toppath*)
     (alist->env-vars ;; consolidate this code with the code in megatest.scm for "-execute", *maybe* - the longer they are set the longer each launch takes (must be non-overlapping with the vars)
      (append
@@ -1295,11 +1365,11 @@
 					(list 'mt-bindir-path mt-bindir-path))))))))
       
       ;; clean out step records from previous run if they exist
       ;; (rmt:delete-test-step-records run-id test-id)
       ;; if the dir does not exist we may have a itempath where individual variables are a path, launch anyway
-      (if (file-exists? work-area)
+      (if (common:file-exists? work-area)
 	  (change-directory work-area)) ;; so that log files from the launch process don't clutter the test dir
       (cond
        ;; ((and launcher hosts) ;; must be using ssh hostname
        ;;    (set! fullcmd (append launcher (car hosts)(list remote-megatest "-m" test-sig "-execute" cmdparms) debug-param)))
        ;; (set! fullcmd (append launcher (car hosts)(list remote-megatest test-sig "-execute" cmdparms))))

Index: lock-queue.scm
==================================================================
--- lock-queue.scm
+++ lock-queue.scm
@@ -5,12 +5,11 @@
 ;; 
 ;;  This program is distributed WITHOUT ANY WARRANTY; without even the
 ;;  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
 ;;  PURPOSE.
 
-(use sqlite3 srfi-18)
-(import (prefix sqlite3 sqlite3:))
+(use (prefix sqlite3 sqlite3:) srfi-18)
 
 (declare (unit lock-queue))
 (declare (uses common))
 (declare (uses tasks))
 
@@ -33,11 +32,11 @@
   (let ((fname (lock-queue:db-dat-get-path dbdat)))
     (system (conc "rm -f " fname "*"))))
 
 (define (lock-queue:open-db fname #!key (count 10))
   (let* ((actualfname (conc fname ".lockdb"))
-	 (dbexists (file-exists? actualfname))
+	 (dbexists (common:file-exists? actualfname))
 	 (db       (sqlite3:open-database actualfname))
 	 (handler  (make-busy-timeout 136000)))
     (if dbexists
 	(vector db actualfname)
 	(begin
@@ -164,12 +163,12 @@
 	     ;; If we've tried ten times and failed there is a serious problem
 	     ;; try to remove the lock db and allow it to be recreated
 	     (handle-exceptions
 	      exn
 	      #f
-	      (if (file-exists? journal)(delete-file journal))
-	      (if (file-exists? fname)  (delete-file fname))
+	      (if (common:file-exists? journal)(delete-file journal))
+	      (if (common:file-exists? fname)  (delete-file fname))
 	      #f))))
      (sqlite3:execute (lock-queue:db-dat-get-db dbdat) "DELETE FROM runlocks WHERE test_id=?;" test-id)
      (sqlite3:finalize! (lock-queue:db-dat-get-db dbdat)))))
 
 (define (lock-queue:steal-lock dbdat test-id #!key (count 10))

Index: megatest-version.scm
==================================================================
--- megatest-version.scm
+++ megatest-version.scm
@@ -1,7 +1,7 @@
 ;; Always use two or four digit decimal
 ;; 1.01, 1.02...1.10,1.11,1.1101 ... 1.99,2.00..
 
 (declare (unit megatest-version))
 
-(define megatest-version 1.6404)
+(define megatest-version 1.6502)
 

Index: megatest.config
==================================================================
--- megatest.config
+++ megatest.config
@@ -1,22 +1,54 @@
-[fields]
-a text
-b text
-c text
+
+## commented out due to a bug in v1.6501 in mtutil
+## [fields]
+## a text
+## b text
+## c text
+usercode    .mtutil.scm
+areafilter  area-to-run
+targtrans   generic-target-translator
+runtrans    generic-runname-translator
 
 [setup]
-pktsdirs /tmp/pkts /some/other/source
+pktsdirs /tmp/mt_pkts /some/other/source
 
 [areas]
 #         path-to-area   map-target-script(future, optional)
-fullrun   path=tests/fullrun
+# someqa     path=../megatestqa/someqa; targtrans=somefunc; areafilter=area-to-run
+fullrun   path=tests/fullrun; 
 # targtrans is name of scheme proc stored in .mtutil.scm, which lives in PWD where mtutil is run
+#           the target translator can return: a/target OR (list/of targets/to apply/run) OR #f i.e. run nothing
 # ext-tests path=ext-tests; targtrans=prefix-contour;
-ext-tests path=ext-tests
+ext       path=ext-tests
 
 [contours]
 #     mode-patt/tag-expr
-quick selector=QUICKPATT/quick
-full  areas=fullrun,ext-tests; selector=MAXPATT/
-all   areas=fullrun,ext-tests
-snazy areas=%; selector=QUICKPATT/
+quick areas=ext;    selector=/QUICKPATT
+quick2 areafn=check-area; selector=/QUICKPATT
+# quick areas=fullrun,ext-tests; selector=QUICKPATT/quick
+# full  areas=fullrun,ext-tests; selector=MAXPATT/
+# short areas=fullrun,ext-tests; selector=MAXPATT/
+# all   areas=fullrun,ext-tests
+# snazy selector=QUICKPATT/
+
+[nopurpose]
+
+[access]
+ext matt:admin mattw:owner
+
+[accesstypes]
+admin run rerun resume remove set-ss
+owner run rerun resume remove
+badguy set-ss
+
+[setup]
+maxload 1.2
+
+[listeners]
+localhost:12345  contact=matt@kiatoa.com
+localhost:54321  contact=matt@kiatoa.com
+
+[listener]
+script nbfake echo
+
 

Index: megatest.scm
==================================================================
--- megatest.scm
+++ megatest.scm
@@ -11,20 +11,18 @@
 ;; (include "megatest-version.scm")
 
 ;; fake out readline usage of toplevel-command
 (define (toplevel-command . a) #f)
 
-(use sqlite3 srfi-1 posix regex regex-case srfi-69 base64 readline apropos json http-client directory-utils rpc typed-records;; (srfi 18) extras)
-     http-client srfi-18 extras format) ;;  zmq extras)
+(use (prefix sqlite3 sqlite3:) srfi-1 posix regex regex-case srfi-69 (prefix base64 base64:)
+     readline apropos json http-client directory-utils typed-records
+     http-client srfi-18 extras format)
 
 ;; Added for csv stuff - will be removed
 ;;
 (use sparse-vectors)
 
-(import (prefix sqlite3 sqlite3:))
-(import (prefix base64 base64:))
-(import (prefix rpc rpc:))
 (require-library mutils)
 
 ;; (use zmq)
 
 (declare (uses common))
@@ -54,11 +52,11 @@
 (include "db_records.scm")
 (include "run_records.scm")
 (include "megatest-fossil-hash.scm")
 
 (let ((debugcontrolf (conc (get-environment-variable "HOME") "/.megatestrc")))
-  (if (file-exists? debugcontrolf)
+  (if (common:file-exists? debugcontrolf)
       (load debugcontrolf)))
 
 ;; Disabled help items
 ;;  -rollup                 : (currently disabled) fill run (set by :runname)  with latest test(s)
 ;;                            from prior runs with same keys
@@ -75,11 +73,12 @@
   -version                : print megatest version (currently " megatest-version ")
 
 Launching and managing runs
   -run                    : run all tests or as specified by -testpatt
   -remove-runs            : remove the data for a run, requires -runname and -testpatt
-                            Optionally use :state and :status
+                            Optionally use :state and :status, use -keep-records to remove only
+                            the run data.
   -set-state-status X,Y   : set state to X and status to Y, requires controls per -remove-runs
   -rerun FAIL,WARN...     : force re-run for tests with specificed status(s)
   -rerun-clean            : set all tests not COMPLETED+PASS,WARN,WAIVED to NOT_STARTED,n/a
                             and then run the specified testpatt with -preclean
   -rerun-all              : set all tests to NOT_STARTED,n/a and run with -preclean
@@ -99,10 +98,11 @@
   -runname                : required, name for this particular test run
   -state                  : Applies to runs, tests or steps depending on context
   -status                 : Applies to runs, tests or steps depending on context
   --modepatt key          : load testpatt from <key> in runconfigs instead of default TESTPATT if -testpatt and -tagexpr are not specified
   -tagexpr tag1,tag2%,..  : select tests with tags matching expression
+  
 
 Test helpers (for use inside tests)
   -step stepname
   -test-status            : set the state and status of a test (use :state and :status)
   -setlog logfname        : set the path/filename to the final log relative to the test
@@ -133,17 +133,18 @@
   -list-disks             : list the disks available for storing runs
   -list-targets           : list the targets in runconfigs.config
   -list-db-targets        : list the target combinations used in the db
   -show-config            : dump the internal representation of the megatest.config file
   -show-runconfig         : dump the internal representation of the runconfigs.config file
-  -dumpmode MODE          : dump in MODE format instead of sexpr, MODE=json,ini,sexp etc.
+  -dumpmode MODE          : dump in MODE format instead of sexpr, MODE=json,ini,sexp etc. (add -debug 0,9 to see which file contributes each line)
   -show-cmdinfo           : dump the command info for a test (run in test environment)
   -section sectionName
   -var varName            : for config and runconfig lookup value for sectionName varName
   -since N                : get list of runs changed since time N (Unix seconds)
   -fields fieldspec       : fields to include in json dump; runs:id,runame+tests:testname+steps
   -sort fieldname         : in -list-runs sort tests by this field
+  -testdata-csv [categorypatt/]varpatt  : dump testdata for given category
 
 Misc 
   -start-dir path         : switch to this directory before running megatest
   -contour cname          : add a level of hierarcy to the linktree and run paths
   -rebuild-db             : bring the database schema up to date
@@ -228,10 +229,11 @@
 			":state"  
 			"-state"
 			":status"
 			"-status"
 			"-list-runs"
+                        "-testdata-csv"
 			"-testpatt"
                         "--modepatt"
                         "-tagexpr"
 			"-itempatt"
 			"-setlog"
@@ -337,10 +339,11 @@
 			"-test-paths" ;; get path(s) to a test, ordered by youngest first
 
 			"-runall"    ;; run all tests, respects -testpatt, defaults to %
 			"-run"       ;; alias for -runall
 			"-remove-runs"
+                        "-keep-records" ;; use with -remove-runs to remove only the run data
 			"-rebuild-db"
 			"-cleanup-db"
 			"-rollup"
 			"-update-meta"
 			"-create-megatest-area"
@@ -372,11 +375,11 @@
     (debug:print-error 0 *default-log-port* "Unrecognised arguments: " (string-intersperse (if (list? remargs) remargs (argv))  " ")))
 
 ;; before doing anything else change to the start-dir if provided
 ;;
 (if (args:get-arg "-start-dir")
-    (if (file-exists? (args:get-arg "-start-dir"))
+    (if (common:file-exists? (args:get-arg "-start-dir"))
         (let ((fullpath (common:real-path (args:get-arg "-start-dir"))))
           (setenv "PWD" fullpath)
           (change-directory fullpath))
 	(begin
 	  (debug:print-error 0 *default-log-port* "non-existant start dir " (args:get-arg "-start-dir") " specified, exiting.")
@@ -389,25 +392,35 @@
 
 ;; The watchdog is to keep an eye on things like db sync etc.
 ;;
 
 ;; TODO: for multiple areas, we will have multiple watchdogs; and multiple threads to manage
-(define *watchdog* (make-thread common:watchdog "Watchdog thread"))
+(define *watchdog* (make-thread
+		    (lambda ()
+		      (handle-exceptions
+			  exn
+			  (begin
+			    (print-call-chain)
+			    (print " message: " ((condition-property-accessor 'exn 'message) exn)))
+			(common:watchdog)))
+		    "Watchdog thread"))
 
 ;;(if (not (args:get-arg "-server"))
 ;;    (thread-start! *watchdog*)) ;; if starting a server; wait till we get to running state before kicking off watchdog
 (let* ((no-watchdog-args
        '("-list-runs"
+         "-testdata-csv"
          "-list-servers"
          "-server"
          "-list-disks"
          "-list-targets"
          "-show-runconfig"
          ;;"-list-db-targets"
          "-show-runconfig"
          "-show-config"
-         "-show-cmdinfo"))
+         "-show-cmdinfo"
+	 "-cleanup-db"))
        (no-watchdog-args-vals (filter (lambda (x) x)
                                       (map args:get-arg no-watchdog-args)))
        (start-watchdog (null? no-watchdog-args-vals)))
   ;;(BB> "no-watchdog-args="no-watchdog-args "no-watchdog-args-vals="no-watchdog-args-vals) 
   (if start-watchdog
@@ -424,18 +437,21 @@
    (exn ()
         (debug:print-error 0 *default-log-port* "Could not open log file for write: "logpath)
         (define *didsomething* #t)  
         (exit 1))))
 
-    
+;; this segment will run launch:setup only if -log is not set. This is fairly safe as servers are not
+;; manually started and thus should never be started in a non-megatest area. Thus no need to handle situation
+;; where (launch:setup) returns #f?
+;;
 (if (or (args:get-arg "-log")(args:get-arg "-server")) ;; redirect the log always when a server
     (handle-exceptions
 	exn
 	(begin
-	  (print "ERROR: Failed to switch to log output. " ((conition-property-accessor 'exn 'message) exn))
+	  (print "ERROR: Failed to switch to log output. " ((condition-property-accessor 'exn 'message) exn))
 	  )
-      (let* ((tl   (or (args:get-arg "-log")(launch:setup)))   ;; run launch:setup if -server
+      (let* ((tl   (or (args:get-arg "-log")(launch:setup)))   ;; run launch:setup if -server, ensure we do NOT run launch:setup if -log specified
 	     (logf (or (args:get-arg "-log") ;; use -log unless we are a server, then craft a logfile name
 		       (conc tl "/logs/server-" (current-process-id) "-" (get-host-name) ".log")))
 	     (oup  (open-logfile logf)))
 	(if (not (args:get-arg "-log"))
 	    (hash-table-set! args:arg-hash "-log" logf)) ;; fake out future queries of -log
@@ -453,11 +469,11 @@
     (let* ((htmlviewercmd (or (configf:lookup *configdat* "setup" "htmlviewercmd")
 			      (common:which '("firefox" "arora"))))
 	   (install-home  (common:get-install-area))
 	   (manual-html   (conc install-home "/share/docs/megatest_manual.html")))
       (if (and install-home
-	       (file-exists? manual-html))
+	       (common:file-exists? manual-html))
 	  (system (conc "(" htmlviewercmd " " manual-html " ) &"))
 	  (system (conc "(" htmlviewercmd " http://www.kiatoa.com/cgi-bin/fossils/megatest/doc/tip/docs/manual/megatest_manual.html ) &")))
       (exit)))
 
 (if (args:get-arg "-version")
@@ -494,10 +510,24 @@
 
 ;; for some switches always print the command to stderr
 ;;
 (if (args:any? "-run" "-runall" "-remove-runs" "-set-state-status")
     (debug:print 0 *default-log-port* (string-intersperse (argv) " ")))
+
+;; some switches imply homehost. Exit here if not on homehost
+;;
+(let ((homehost-required  (list "-cleanup-db" "-server")))
+  (if (apply args:any? homehost-required)
+      (if (not (common:on-homehost?))
+	  (for-each
+	   (lambda (switch)
+	     (if (args:get-arg switch)
+		 (begin
+		   (debug:print 0 *default-log-port* "ERROR: you must be on the homehost to run with " switch
+				", you can move homehost by removing the .homehost file but this will disrupt any runs in progress.")
+		   (exit 1))))
+	   homehost-required))))
 
 ;;======================================================================
 ;; Misc setup stuff
 ;;======================================================================
 
@@ -534,11 +564,15 @@
 ;; handle a clean-cache request as early as possible
 ;;
 (if (args:get-arg "-clean-cache")
     (let ((toppath  (launch:setup)))
       (set! *didsomething* #t) ;; suppress the help output.
-      (runs:clean-cache (getenv "MT_TARGET")(args:get-arg "-runname") toppath)))
+      (runs:clean-cache (or (getenv "MT_TARGET")
+			    (args:get-arg "-target")
+			    (args:get-arg "-remtarg"))
+			(args:get-arg "-runname")
+			toppath)))
 	  
 (if (args:get-arg "-env2file")
     (begin
       (save-environment-as-files (args:get-arg "-env2file"))
       (set! *didsomething* #t)))
@@ -690,11 +724,11 @@
 				 (else
 				  (loop row (+ col 1) (append curr-row (list val)) result)))))))))
 		    (hash-table-keys results))))
 		((sqlite3)
 		 (let* ((db-file   (or out-file (pathname-file input-db)))
-			(db-exists (file-exists? db-file))
+			(db-exists (common:file-exists? db-file))
 			(db        (sqlite3:open-database db-file)))
 		   (if (not db-exists)(sqlite3:execute db "CREATE TABLE data (sheet,section,var,val);"))
 		   (configf:map-all-hier-alist
 		    data
 		    (lambda (sheetname sectionname varname val)
@@ -838,11 +872,11 @@
   (let* ((rundir (if (and (getenv "MT_LINKTREE")(getenv "MT_TARGET")(getenv "MT_RUNNAME"))
 		     (conc (getenv "MT_LINKTREE") "/" (getenv "MT_TARGET") "/" (getenv "MT_RUNNAME"))
 		     #f))
 	 (cfgf   (if rundir (conc rundir "/.runconfig." megatest-version "-" megatest-fossil-hash) #f)))
     (if (and cfgf
-	     (file-exists? cfgf)
+	     (common:file-exists? cfgf)
 	     (file-write-access? cfgf)
 	     (common:use-cache?))
 	(configf:read-alist cfgf)
 	(let* ((keys   (rmt:get-keys))
 	       (target (common:args-get-target))
@@ -861,12 +895,13 @@
 		   (file-write-access? rundir))
 	      (begin
                 (if (not (common:in-running-test?))
                     (configf:write-alist data cfgf))
 		;; force re-read of megatest.config - this resolves circular references between megatest.config
-		(launch:setup force: #t)
-		(launch:cache-config))) ;; we can safely cache megatest.config since we have a valid runconfig
+		(launch:setup force-reread: #t)
+		;; (launch:cache-config) ;; there are two independent config cache locations, turning this one off for now. MRW.
+		)) ;; we can safely cache megatest.config since we have a valid runconfig
 	  data))))
 
 (if (args:get-arg "-show-runconfig")
     (let ((tl (launch:setup)))
       (push-directory *toppath*)
@@ -928,11 +963,11 @@
 ;; Remove old run(s)
 ;;======================================================================
 
 ;; since several actions can be specified on the command line the removal
 ;; is done first
-(define (operate-on action)
+(define (operate-on action #!key (mode #f)) ;; #f is "use default"
   (let* ((runrec (runs:runrec-make-record))
 	 (target (common:args-get-target)))
     (cond
      ((not target)
       (debug:print-error 0 *default-log-port* "Missing required parameter for " action ", you must specify -target or -reqtarg")
@@ -957,19 +992,22 @@
 			      target
 			      (common:args-get-runname)  ;; (or (args:get-arg "-runname")(args:get-arg ":runname"))
 			      (common:args-get-testpatt #f) ;; (args:get-arg "-testpatt")
 			      state: (common:args-get-state)
 			      status: (common:args-get-status)
-			      new-state-status: (args:get-arg "-set-state-status"))))
+			      new-state-status: (args:get-arg "-set-state-status")
+                              mode: mode)))
       (set! *didsomething* #t)))))
 
 (if (args:get-arg "-remove-runs")
     (general-run-call 
      "-remove-runs"
      "remove runs"
      (lambda (target runname keys keyvals)
-       (operate-on 'remove-runs))))
+       (operate-on 'remove-runs mode: (if (args:get-arg "-keep-records")
+                                          'remove-data-only
+                                          'remove-all)))))
 
 (if (args:get-arg "-set-state-status")
     (general-run-call 
      "-set-state-status"
      "set state and status"
@@ -1023,10 +1061,114 @@
     (if indx
 	(if (>= indx (vector-length datavec))
 	    #f ;; index too high, should raise an error I suppose
 	    (vector-ref datavec indx))
 	#f)))
+
+
+
+
+
+(when (args:get-arg "-testdata-csv")
+  (if (launch:setup)
+      (let* ((keys        (rmt:get-keys)) ;; (db:get-keys dbstruct))
+             (runpatt     (or (args:get-arg "-runname") "%"))
+             (testpatt    (common:args-get-testpatt #f))
+             (datapatt    (args:get-arg "-testdata-csv"))
+             (match-data  (string-match "^([^/]+)/(.*)" (args:get-arg "-testdata-csv")))
+             (categorypatt (if match-data (list-ref match-data 1) "%"))
+             (setvarpatt  (if match-data
+                              (list-ref match-data 2)
+                              (args:get-arg "-testdata-csv")))
+             (runsdat     (rmt:get-runs-by-patt keys (or runpatt "%") 
+                                                (common:args-get-target) #f #f '("id" "runname" "state" "status" "owner" "event_time" "comment") 0))
+             (header      (db:get-header runsdat))
+             (access-mode (db:get-access-mode))
+             (testpatt    (common:args-get-testpatt #f))
+             (fields-spec (if (args:get-arg "-fields")
+                              (extract-fields-constraints (args:get-arg "-fields"))
+                              (list (cons "runs" (append keys (list "id" "runname" "state" "status" "owner" "event_time" "comment" "fail_count" "pass_count")))
+                                    (cons "tests"  db:test-record-fields) ;; "id" "testname" "test_path")
+                                    (list "steps" "id" "stepname"))))
+             (tests-spec  (let ((t (alist-ref "tests" fields-spec equal?)))
+                            (if (and t (null? t)) ;; all fields
+                                db:test-record-fields
+                                t)))
+             (adj-tests-spec (delete-duplicates (if tests-spec (cons "id" tests-spec) db:test-record-fields))) 
+             (test-field-index (make-hash-table))
+             (runs (db:get-rows runsdat))
+             )
+        (if (and tests-spec (not (null? tests-spec))) ;; do some validation and processing of the test-spec
+            (let ((invalid-tests-spec (filter (lambda (x)(not (member x db:test-record-fields))) tests-spec)))
+              (if (null? invalid-tests-spec)
+                  ;; generate the lookup map test-field-name => index-number
+                  (let loop ((hed (car adj-tests-spec))
+                             (tal (cdr adj-tests-spec))
+                             (idx 0))
+                    (hash-table-set! test-field-index hed idx)
+                    (if (not (null? tal))(loop (car tal)(cdr tal)(+ idx 1))))
+                  (begin
+                    (debug:print-error 0 *default-log-port* "Invalid test fields specified: " (string-intersperse invalid-tests-spec ", "))
+                    (exit)))))
+        (let* ((table-header (string-split "target,run,test,itempath,category,var,value,comment" ","))
+               (table-rows
+                (apply append (map  
+                               (lambda (run)
+                                 (let* ((target (string-intersperse (map (lambda (x)
+							 (db:get-value-by-header run header x))
+						       keys) "/"))
+                                        (statuses (string-split (or (args:get-arg "-status") "") ","))
+                                        (run-id  (db:get-value-by-header run header "id"))
+                                        (runname (db:get-value-by-header run header "runname")) 
+                                        (states  (string-split (or (args:get-arg "-state") "") ","))
+                                        (tests   (if tests-spec
+                                                     (db:dispatch-query access-mode rmt:get-tests-for-run db:get-tests-for-run run-id testpatt states statuses #f #f #f 'testname 'asc ;; (db:get-tests-for-run dbstruct run-id testpatt '() '() #f #f #f 'testname 'asc 
+                                                                        ;; use qryvals if test-spec provided
+                                                                        (if tests-spec
+                                                                            (string-intersperse adj-tests-spec ",")
+                                                                            ;; db:test-record-fields
+                                                                            #f)
+                                                                        #f
+                                                                        'normal)
+                                                     '())))
+                                   (apply append
+                                          (map
+                                           (lambda (test)
+                                             (let* (
+                                                    (test-id      (if (member "id"           tests-spec)(get-value-by-fieldname test test-field-index "id"          ) #f)) ;; (db:test-get-id         test))
+                                                    (testname     (if (member "testname"     tests-spec)(get-value-by-fieldname test test-field-index "testname"    ) #f)) ;; (db:test-get-testname   test))
+                                                    (itempath     (if (member "item_path"    tests-spec)(get-value-by-fieldname test test-field-index "item_path"   ) #f)) ;; (db:test-get-item-path  test))
+                                                    (fullname     (conc testname
+                                                                        (if (equal? itempath "")
+                                                                            "" 
+                                                                            (conc "/" itempath ))))
+                                                    (testdat-raw (map vector->list (rmt:read-test-data* run-id test-id categorypatt setvarpatt)))
+                                                    (testdat (filter
+                                                              (lambda (x)
+                                                                (not (equal? "logpro"
+                                                                             (list-ref x 10))))
+                                                              testdat-raw)))
+                                               (map 
+                                                (lambda (item)
+                                                  (receive (id test_id category
+                                                               variable value expected
+                                                               tol units comment status type)
+                                                      (apply values item)
+                                                    (list target runname testname itempath category variable value comment)))
+                                                testdat)))
+                                           tests))))
+                               runs))))
+          (print (string-join table-header ","))
+          (for-each (lambda(table-row)
+                      (print (string-join (map ->string table-row) ",")))
+
+                    
+                            table-rows))))
+  (set! *didsomething* #t)
+  (set! *time-to-exit* #t))
+
+
 
 ;; NOTE: list-runs and list-db-targets operate on local db!!!
 ;;
 ;; IDEA: megatest list -runname blah% ...
 ;;
@@ -1160,11 +1302,11 @@
 		      (lambda (test)
 		      	(common:debug-handle-exceptions #f
 			 exn
 			 (begin
 			   (debug:print-error 0 *default-log-port* "Bad data in test record? " test)
-			   (print "exn=" (condition->list exn))
+			   (debug:print-error 5 *default-log-port* "exn=" (condition->list exn))
 			   (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
 			   (print-call-chain (current-error-port)))
 			 (let* ((test-id      (if (member "id"           tests-spec)(get-value-by-fieldname test test-field-index "id"          ) #f)) ;; (db:test-get-id         test))
 				(testname     (if (member "testname"     tests-spec)(get-value-by-fieldname test test-field-index "testname"    ) #f)) ;; (db:test-get-testname   test))
 				(itempath     (if (member "item_path"    tests-spec)(get-value-by-fieldname test test-field-index "item_path"   ) #f)) ;; (db:test-get-item-path  test))
@@ -1403,61 +1545,63 @@
 (if (or (args:get-arg "-runall")
 	(args:get-arg "-run")
 	(args:get-arg "-rerun-clean")
 	(args:get-arg "-rerun-all")
 	(args:get-arg "-runtests"))
-    (general-run-call 
-     "-runall"
-     "run all tests"
-     (lambda (target runname keys keyvals)
-       (if (args:get-arg "-rerun-clean") ;; first set states/statuses correct
-	   (let ((states   (or (configf:lookup *configdat* "validvalues" "cleanrerun-states")
-			       "KILLREQ,KILLED,UNKNOWN,INCOMPLETE,STUCK,NOT_STARTED"))
-		 (statuses (or (configf:lookup *configdat* "validvalues" "cleanrerun-statuses")
-			       "FAIL,INCOMPLETE,ABORT,CHECK")))
-	     (hash-table-set! args:arg-hash "-preclean" #t)
-	     (runs:operate-on 'set-state-status
-			      target
-			      (common:args-get-runname)  ;; (or (args:get-arg "-runname")(args:get-arg ":runname"))
-			      "%" ;; (common:args-get-testpatt #f) ;; (args:get-arg "-testpatt")
-			      state:  states
-			      ;; status: statuses
-			      new-state-status: "NOT_STARTED,n/a")
-	     (runs:clean-cache target runname *toppath*)
-	     (runs:operate-on 'set-state-status
-			      target
-			      (common:args-get-runname)  ;; (or (args:get-arg "-runname")(args:get-arg ":runname"))
-			      "%" ;; (common:args-get-testpatt #f) ;; (args:get-arg "-testpatt")
-			      ;; state:  states
-			      status: statuses
-			      new-state-status: "NOT_STARTED,n/a")))
-       ;; RERUN ALL
-       (if (args:get-arg "-rerun-all") ;; first set states/statuses correct
-	   (begin
-	     (hash-table-set! args:arg-hash "-preclean" #t)
-	     (runs:operate-on 'set-state-status
-			      target
-			      (common:args-get-runname)  ;; (or (args:get-arg "-runname")(args:get-arg ":runname"))
-			      "%" ;; (common:args-get-testpatt #f) ;; (args:get-arg "-testpatt")
-			      state:  #f
-			      ;; status: statuses
-			      new-state-status: "NOT_STARTED,n/a")
-	     (runs:clean-cache target runname *toppath*)
-	     (runs:operate-on 'set-state-status
-			      target
-			      (common:args-get-runname)  ;; (or (args:get-arg "-runname")(args:get-arg ":runname"))
-			      "%" ;; (common:args-get-testpatt #f) ;; (args:get-arg "-testpatt")
-			      ;; state:  states
-			      status: #f
-			      new-state-status: "NOT_STARTED,n/a")))
-       (runs:run-tests target
-		       runname
-		       #f ;; (common:args-get-testpatt #f)
-		       ;; (or (args:get-arg "-testpatt")
-		       ;;     "%")
-		       user
-		       args:arg-hash))))
+    (let ((need-clean (or (args:get-arg "-rerun-clean")
+                          (args:get-arg "-rerun-all"))))
+      (general-run-call 
+       "-runall"
+       "run all tests"
+       (lambda (target runname keys keyvals)
+         (if (args:get-arg "-rerun-clean") ;; first set states/statuses correct
+             (let ((states   (or (configf:lookup *configdat* "validvalues" "cleanrerun-states")
+                                 "KILLREQ,KILLED,UNKNOWN,INCOMPLETE,STUCK,NOT_STARTED"))
+                   (statuses (or (configf:lookup *configdat* "validvalues" "cleanrerun-statuses")
+                                 "FAIL,INCOMPLETE,ABORT,CHECK")))
+               (hash-table-set! args:arg-hash "-preclean" #t)
+               (runs:operate-on 'set-state-status
+                                target
+                                (common:args-get-runname)  ;; (or (args:get-arg "-runname")(args:get-arg ":runname"))
+                                "%" ;; (common:args-get-testpatt #f) ;; (args:get-arg "-testpatt")
+                                state:  states
+                                ;; status: statuses
+                                new-state-status: "NOT_STARTED,n/a")
+               (runs:clean-cache target runname *toppath*)
+               (runs:operate-on 'set-state-status
+                                target
+                                (common:args-get-runname)  ;; (or (args:get-arg "-runname")(args:get-arg ":runname"))
+                                "%" ;; (common:args-get-testpatt #f) ;; (args:get-arg "-testpatt")
+                                ;; state:  states
+                                status: statuses
+                                new-state-status: "NOT_STARTED,n/a")))
+         ;; RERUN ALL
+         (if (args:get-arg "-rerun-all") ;; first set states/statuses correct
+             (begin
+               (hash-table-set! args:arg-hash "-preclean" #t)
+               (runs:operate-on 'set-state-status
+                                target
+                                (common:args-get-runname)  ;; (or (args:get-arg "-runname")(args:get-arg ":runname"))
+                                "%" ;; (common:args-get-testpatt #f) ;; (args:get-arg "-testpatt")
+                                state:  #f
+                                ;; status: statuses
+                                new-state-status: "NOT_STARTED,n/a")
+               (runs:clean-cache target runname *toppath*)
+               (runs:operate-on 'set-state-status
+                                target
+                                (common:args-get-runname)  ;; (or (args:get-arg "-runname")(args:get-arg ":runname"))
+                                "%" ;; (common:args-get-testpatt #f) ;; (args:get-arg "-testpatt")
+                                ;; state:  states
+                                status: #f
+                                new-state-status: "NOT_STARTED,n/a")))
+         (runs:run-tests target
+                         runname
+                         #f ;; (common:args-get-testpatt #f)
+                         ;; (or (args:get-arg "-testpatt")
+                         ;;     "%")
+                         user
+                         args:arg-hash)))))
 
 ;;======================================================================
 ;; run one test
 ;;======================================================================
 
@@ -1559,11 +1703,11 @@
 	  (let* ((keys     (rmt:get-keys))
 		 ;; db:test-get-paths must not be run remote
 		 (paths    (tests:test-get-paths-matching keys target (args:get-arg "-test-files"))))
 	    (set! *didsomething* #t)
 	    (for-each (lambda (path)
-			(if (file-exists? path)
+			(if (common:file-exists? path)
 			(print path)))	
 		      paths)))
 	;; else do a general-run-call
 	(general-run-call 
 	 "-test-files"
@@ -1849,20 +1993,22 @@
       (if (not (launch:setup))
 	  (begin
 	    (debug:print 0 *default-log-port* "Failed to setup, exiting") 
 	    (exit 1)))
       ;; keep this one local
-      (open-run-close patch-db #f)
+      ;; (open-run-close patch-db #f)
+      (let ((dbstruct (db:setup #f areapath: *toppath*)))
+        (common:cleanup-db dbstruct full: #t))
       (set! *didsomething* #t)))
 
 (if (args:get-arg "-cleanup-db")
     (begin
       (if (not (launch:setup))
 	  (begin
 	    (debug:print 0 *default-log-port* "Failed to setup, exiting") 
 	    (exit 1)))
-      (let ((dbstruct (db:setup *toppath*)))
+      (let ((dbstruct (db:setup #f areapath: *toppath*)))
         (common:cleanup-db dbstruct))
       (set! *didsomething* #t)))
 
 (if (args:get-arg "-mark-incompletes")
     (begin
@@ -1917,11 +2063,11 @@
 	(args:get-arg "-repl")
 	(args:get-arg "-load"))
     (let* ((toppath (launch:setup))
 	   (dbstruct (if (and toppath
                               (common:on-homehost?))
-                         (db:setup)
+                         (db:setup #t)
                          #f))) ;; make-dbr:dbstruct path: toppath local: (args:get-arg "-local")) #f)))
       (if *toppath*
 	  (cond
 	   ((getenv "MT_RUNSCRIPT")
 	    ;; How to run megatest scripts
@@ -2006,25 +2152,24 @@
 ;; ;; ;; redo me       (set! *didsomething* #t)))
 
 (if (args:get-arg "-import-megatest.db")
     (begin
       (db:multi-db-sync 
-       (db:setup)
+       (db:setup #f)
        'killservers
        'dejunk
        'adj-testids
        'old2new
        ;; 'new2old
        )
       (set! *didsomething* #t)))
 
 (if (args:get-arg "-sync-to-megatest.db")
-    (begin
-      (db:multi-db-sync 
-       (db:setup)
-       'new2old
-       )
+    (let ((res (db:multi-db-sync 
+                (db:setup #f)
+                'new2old)))
+      (print "Synced " res " records to megatest.db")
       (set! *didsomething* #t)))
 
 (if (args:get-arg "-sync-to")
     (let ((toppath (launch:setup)))
       (tasks:sync-to-postgres *configdat* (args:get-arg "-sync-to"))

ADDED   minimt/Makefile
Index: minimt/Makefile
==================================================================
--- /dev/null
+++ minimt/Makefile
@@ -0,0 +1,12 @@
+minimt : minimt.scm db.scm setup.scm direct.scm
+	csc minimt.scm
+
+run    : minimt
+	export PATH="$(PWD)":$(PATH) ; minimt runrun foo/bar run1 
+
+runseq : clean run 
+	sleep 5;tail -F runtest/*log
+
+clean :
+	rm -rf runtest/*
+

ADDED   minimt/db.scm
Index: minimt/db.scm
==================================================================
--- /dev/null
+++ minimt/db.scm
@@ -0,0 +1,173 @@
+;; pretend to be a simplified Megatest
+
+(use sql-de-lite defstruct)
+
+;; init the db - NOTE: takes a db NOT a dbconn
+;;
+(define (init-db db)
+  (with-transaction
+   db
+   (lambda ()
+     (for-each
+      (lambda (qrystr)
+	(exec (sql db qrystr)))
+      '("CREATE TABLE IF NOT EXISTS runs 
+           (id        INTEGER PRIMARY KEY,
+            target    TEXT NOT NULL,
+            run_name  TEXT NOT NULL,
+            state     TEXT NOT NULL,
+            status    TEXT NOT NULL,
+            CONSTRAINT runs_constraint UNIQUE (run_name));"
+	"CREATE TABLE IF NOT EXISTS tests
+           (id        INTEGER PRIMARY KEY,
+            run_id    INTEGER NOT NULL,
+            test_name TEXT NOT NULL,
+            state     TEXT NOT NULL,
+            status    TEXT NOT NULL,
+            start_time INTEGER DEFAULT (strftime('%s','now')),
+            end_time   INTEGER DEFAULT -1,
+            CONSTRAINT tests_constraint UNIQUE (run_id,test_name));"
+	"CREATE TABLE IF NOT EXISTS steps
+           (id        INTEGER PRIMARY KEY,
+            test_id   INTEGER NOT NULL,
+            step_name  TEXT NOT NULL,
+            state     TEXT NOT NULL,
+            status    TEXT NOT NULL,
+            CONSTRAINT step_constraint UNIQUE (test_id,step_name));")))))
+
+(defstruct dbconn-dat
+  dbh       ;; the database handle
+  writeable ;; do we have write access?
+  path      ;; where the db lives
+  name      ;; name of the db
+  )
+
+;; open the database, return a dbconn struct
+(define (open-create-db path fname init)
+  (let* ((fullname       (conc path "/" fname))
+	 (already-exists (file-exists? fullname))
+	 (write-access   (and (file-write-access? path)
+			      (or (not already-exists)
+				  (and already-exists
+				       (file-write-access? fullname)))))
+	 (db             (if (or already-exists write-access)
+			     (open-database fullname)
+			     (begin
+			       (print "FATAL: No existing db and no write access thus cannot create " fullname)  ;; no db and no write access cannot proceed.
+			       (exit 1))))
+	 (dbconn         (make-dbconn-dat)))
+    (set-busy-handler! db (busy-timeout 120000)) ;; set a busy timeout
+    (exec (sql db "PRAGMA synchronous=0;"))
+    (if (and init write-access (not already-exists))
+	(init db))
+    (dbconn-dat-dbh-set!       dbconn db)
+    (dbconn-dat-writeable-set! dbconn write-access)
+    (dbconn-dat-path-set!      dbconn path)
+    (dbconn-dat-name-set!      dbconn fname)
+    dbconn))
+
+(define-inline (get-db dbconn)
+  (dbconn-dat-dbh dbconn))
+
+;; RUNS
+
+;; create a run
+(define (create-run dbconn target run-name)
+  (exec (sql (get-db dbconn) "INSERT INTO runs (run_name,target,state,status) VALUES (?,?,'NEW','na');")
+	run-name target))
+
+;; get a run id
+(define (get-run-id dbconn target run-name)
+  (first-column (query fetch (sql (get-db dbconn) "SELECT id FROM runs WHERE target=? AND run_name=?;")
+		       target run-name)))
+
+;; TESTS
+
+(defstruct test-dat
+  id
+  run-id
+  test-name
+  state
+  status)
+
+;; create a test
+(define (create-test dbconn run-id test-name)
+  (exec (sql (get-db dbconn) "INSERT INTO tests (run_id,test_name,state,status) VALUES (?,?,'NOT_STARTED','na');")
+	run-id test-name))
+
+;; get a test id
+(define (get-test-id dbconn run-id test-name)
+  (first-column (query fetch (sql (get-db dbconn) "SELECT id FROM tests WHERE run_id=? AND test_name=?;")
+		       run-id test-name)))
+
+(define-inline (test-row->test-dat row)
+    (make-test-dat
+     id:        (list-ref row 0)
+     run-id:    (list-ref row 1)
+     test-name: (list-ref row 2)
+     state:     (list-ref row 3)
+     status:    (list-ref row 4)))
+  
+;; get the data for given test-id
+(define (test-get-record dbconn test-id)
+  (let* ((row (query fetch-row (sql (get-db dbconn) "SELECT id,run_id,test_name,state,status FROM tests WHERE test_id=?;")
+		     test-id)))
+    (test-row->test-dat row)))
+
+;; get a bunch of tests data
+(define (test-get-tests dbconn run-ids test-name-patt)
+  (let* ((rows (query fetch-rows
+		      (sql (get-db dbconn)
+			   (conc "SELECT id,run_id,test_name,state,status FROM tests WHERE test_name LIKE ? AND run_id IN ("
+				 (string-intersperse (map conc run-ids) ",") ");"))
+		      test-name-patt)))
+    (map test-row->test-dat rows)))
+   
+(define (test-set-state-status dbconn test-id new-state new-status)
+  (exec (sql (get-db dbconn) "UPDATE tests SET state=?,status=?,end_time=? WHERE id=?;")
+	new-state new-status (current-seconds) test-id))
+
+;; STEPS
+
+;; create a step
+(define (create-step dbconn test-id step-name)
+  (exec (sql (get-db dbconn) "INSERT INTO steps (test_id,step_name,state,status) VALUES (?,?,'NOT_STARTED','na');")
+	test-id step-name))
+
+;; get a step id
+(define (get-step-id dbconn test-id step-name)
+  (first-column (query fetch (sql (get-db dbconn) "SELECT id FROM steps WHERE test_id=? AND step_name=?;")
+		       test-id step-name)))
+
+(define (step-set-state-status dbconn step-id new-state new-status)
+  (exec (sql (get-db dbconn) "UPDATE steps SET state=?,status=? WHERE id=?;")
+	new-state new-status step-id))
+
+;;======================================================================
+;; Statistics gathering
+;;======================================================================
+
+(define *stats* (make-hash-table))
+
+(define (update-stats key duration)
+  (let ((rec (or (hash-table-ref/default *stats* key #f)
+		 (let ((new (vector 0 0 0)))
+		   (hash-table-set! *stats* key new)
+		   new))))
+    (vector-set! rec 0 (+ (vector-ref rec 0) 1))        ;; num calls
+    (vector-set! rec 1 (+ (vector-ref rec 1) duration)) ;; total duration
+    (if (> duration (vector-ref rec 2) )
+	(vector-set! rec 2 duration))))
+
+(define (statwrap name proc)
+  (lambda params
+    (let ((start-time (current-milliseconds))
+	  (res        (apply proc params)))
+      (update-stats name (- (current-milliseconds) start-time))
+      res)))
+
+(define (print-stats statdat)
+  (hash-table-for-each
+   statdat
+   (lambda (key val)
+     (print key " count: " (vector-ref val 0) " avg: " (/ (vector-ref val 1)(vector-ref val 0)) " max: " (vector-ref val 2)))))

ADDED   minimt/direct.scm
Index: minimt/direct.scm
==================================================================
--- /dev/null
+++ minimt/direct.scm
@@ -0,0 +1,11 @@
+;; direct API, call the db calls directly
+(define rmt:create-run            (statwrap 'create-run  create-run))
+(define rmt:create-step           (statwrap 'create-step create-step))
+(define rmt:create-test           (statwrap 'create-test create-test))
+(define rmt:get-test-id           (statwrap 'get-test-id get-test-id))
+(define rmt:get-run-id            (statwrap 'get-run-id  get-run-id))
+(define rmt:open-create-db        (statwrap 'open        open-create-db))
+(define rmt:step-set-state-status (statwrap 'step-set-state-status step-set-state-status))
+(define rmt:test-set-state-status (statwrap 'test-set-state-status test-set-state-status))
+(define rmt:test-get-tests        (statwrap 'test-get-tests        test-get-tests))
+

ADDED   minimt/minimt.scm
Index: minimt/minimt.scm
==================================================================
--- /dev/null
+++ minimt/minimt.scm
@@ -0,0 +1,86 @@
+(use posix)
+
+(include "db.scm")
+
+;; define following in setup.scm
+;;    *remotehost*  => host for "tests"
+;;    *homehost*    => host for servers
+;;    *homepath*    => directory from which to run
+;;    *numtests*    => how many tests to simulate for each run
+;;    *numruns*     => how many runs to simulate
+;;    
+(include "setup.scm")
+
+(include "direct.scm") ;; direct db calls
+
+;; RUN A TEST
+(define (run-test dbconn run-id test-name)
+  (rmt:create-test dbconn run-id test-name)
+  (let ((test-id (rmt:get-test-id dbconn run-id test-name)))
+    (rmt:test-set-state-status dbconn test-id "LAUNCHED" "na")
+    (thread-sleep! *launchdelay*)
+    (rmt:test-set-state-status dbconn test-id "RUNNING" "na")
+    (let loop ((step-num 0))
+      (let ((step-name (conc "step" step-num)))
+       (rmt:create-step dbconn test-id step-name)
+       (let ((step-id (get-step-id dbconn test-id step-name)))
+	 (rmt:step-set-state-status dbconn step-id "START" -1)
+	 (thread-sleep! *stepdelay*)
+	 (rmt:step-set-state-status dbconn step-id "END" 0)
+	 (print"   STEP: " step-name " done.")))
+      (if (< step-num *numsteps*)
+	  (loop (+ step-num 1))))
+    ;; we will do a large but bogus read to simulate the logic in Megatest
+    (rmt:test-get-tests dbconn `(,run-id) "%")
+    (rmt:test-set-state-status dbconn test-id "COMPLETED" (if (> (random 10) 2) "PASS" "FAIL"))
+    (print "TEST: " test-name " done.")
+    (print "Stats:")
+    (print-stats *stats*)
+    test-id))
+
+;; RUN A RUN
+(define (run-run dbconn target run-name num-tests)
+  (rmt:create-run dbconn target run-name)
+  (let ((run-id (rmt:get-run-id dbconn target run-name)))
+    (let loop ((test-num 0))
+      (system (conc "NBFAKE_LOG=test-" test-num "-run-id-" run-id ".log NBFAKE_HOST=" *remotehost* " nbfake minimt runtest " run-id " test-" test-num))
+      (if (< test-num num-tests)
+	  (loop (+ test-num 1))))))
+
+;; Do what is asked
+(let ((args (cdr (argv))))
+  (if (< (length args) 1)
+      (print
+       "Usage: minimt [options]" "
+  runtest run-id testname
+  runrun  target runname")
+      (let ((cmd    (car args))
+	    (dbconn (rmt:open-create-db *homepath* "mt.db" init-db)))
+	(thread-sleep! 0.5) ;; be sure the db is written out to disk? Should really not be needed.
+	(change-directory *homepath*)
+	(case (string->symbol cmd)
+	  ((runtest)
+	   (let ((run-id    (string->number (cadr args)))
+		 (test-name (caddr args)))
+	     (print "Launching test " test-name " for run-id " run-id)
+	     (run-test dbconn run-id test-name)))
+	  ((runrun)
+	   (let ((target   (cadr args))
+		 (run-name (caddr args)))
+	     (run-run dbconn target run-name *numtests*)
+	     (print "Use: sqlite3 runtest/mt.db 'select max(end_time)-min(start_time) from tests;' to see the total run time")
+	     ))
+	  ((runall)
+	   (for-each
+	    (lambda (target)
+	      (let loop ((run-num 0))
+		(thread-sleep! *rundelay*)
+		(system (conc "NBFAKE_LOG=run-" target "-" run-num ".log nbfake minimt runrun " target " run-" run-num))
+		(if (< run-num *numruns*)
+		    (loop (+ run-num 1)))))
+	    *targets*))
+	  ((server)
+	   (start-server dbconn))
+	  (else
+	   (print "Command: " cmd " not recognised. Run without params to see help.")))
+	(close-database (dbconn-dat-dbh dbconn)))))

ADDED   minimt/queued.scm
Index: minimt/queued.scm
==================================================================
--- /dev/null
+++ minimt/queued.scm
@@ -0,0 +1,209 @@
+
+(use nanomsg defstruct srfi-18)
+
+;;======================================================================
+;; Commands
+;;======================================================================
+
+(define *commands* (make-hash-table))
+
+(defstruct cmd
+  key
+  proc
+  ctype ;; command type; 'r (read), 'w (write) or 't (transaction)
+  )
+
+(define (register-command key ctype proc)
+  (hash-table-set! *commands*
+		   key
+		   (make-cmd key: key ctype: ctype proc: proc)))
+
+(define (get-proc key)
+  (cmd-proc (hash-table-ref key *commands*)))
+
+(for-each
+ (lambda (dat)
+   (apply register-command dat))
+ `( (create-run    w ,create-run)
+    (create-step   w ,create-step)
+    (create-test   w ,create-test)
+    (get-test-id   r ,get-test-id)
+    (get-run-id    r ,get-run-id)
+    ;; (open-db       w ,open-create-db)
+    (step-set-ss   w ,step-set-state-status)
+    (test-set-ss   w ,test-set-state-status)
+    (test-get-tests r ,test-get-tests) ))
+
+;;======================================================================
+;; Server/client stuff
+;;======================================================================
+
+(define-inline (encode data)
+  (with-output-to-string
+    (lambda ()
+      (write data))))
+
+(define-inline (decode data)
+  (with-input-from-string
+      data
+    (lambda ()
+      (read))))
+  
+;;======================================================================
+;; Command queue
+;;======================================================================
+
+(defstruct qitem
+  command
+  params
+  host-port)
+
+(define *cmd-queue* '())
+(define *queue-mutex* (make-mutex))
+
+(define (queue-push cmddat)
+  (mutex-lock! *queue-mutex*)
+  (set! *cmd-queue* (cons cmddat *cmd-queue*))
+  (mutex-unlock! *queue-mutex*))
+
+;; get all the cmds of type ctype and return them, also remove them from the queue
+(define (queue-take ctype)
+  (mutex-lock! *queue-mutex*)
+  (let ((res (filter (lambda (x)(eq? (cmd-ctype x) ctype))       *cmd-queue*))
+	(rem (filter (lambda (x)(not (eq? (cmd-ctype x) ctype))) *cmd-queue*)))
+    (set! *queue* rem)
+    (mutex-unlock! *queue-mutex*)
+    res))
+
+(define (queue-process-commands dbconn commands)
+  (for-each
+   (lambda (qitem)
+     (let ((soc (request-connect (qitem-host-port qitem))) ;; we will be sending the data back to host-port via soc
+	   (cmd (hash-table-ref/default *commands* (qitem-command qitem) #f)))
+       (if cmd
+	   (let* ((res (apply (get-proc cmd) dbconn (qitem-params qitem)))
+		  (pkg (encode `((r . ,res)))))
+	     (nn-send soc pkg)
+	     (if (not (eq? (nn-recv soc)) "ok")
+		 (print "Client failed to receive properly the data from " cmd " request"))))))
+   commands))
+
+;; the continuously running queue processor
+;;
+(define ((queue-processor dbconn))
+  (let loop ()
+    (queue-process-commands dbconn (queue-take 'r))  ;; reads first, probably largest numbers of them
+    (queue-process-commands dbconn (queue-take 'w))  ;; writes next
+    (queue-process-commands dbconn (queue-take 't))  ;; lastly process transactions
+    (thread-sleep! 0.2)                              ;; open up the db for any other processes to access
+    (loop)))
+
+;;======================================================================
+;; Client stuff
+;;======================================================================
+
+;; client struct
+(defstruct client
+  host-port
+  socket
+  last-access)
+
+(define *clients* (make-hash-table)) ;; host:port -> client struct
+(define *client-mutex* (make-mutex))
+
+;; add a channel or return existing channel, this is a normal req
+;; 
+(define (request-connect host-port)
+  (mutex-lock! *client-mutex*)
+  (let* ((curr (hash-table-ref/default *clients* host-port #f)))
+    (if curr
+	(begin
+	  (mutex-unlock! *client-mutex*)
+	  curr)
+	(let ((req (nn-socket 'req)))
+	  (nn-connect req host-port) ;; "inproc://test")
+	  (hash-table-set! *clients* host-port req)
+	  (mutex-unlock! *client-mutex*)
+	  req))))
+
+;; open up a channel to the server and send a package of info for the server to act on
+;; host-port needs to be found and provided
+;;
+(define (generic-db-access host-port)
+  (let* ((soc (request-connect host-port))
+	 ;; NEED *MY* host/port also to let the server know where to send the results
+	 )))
+    
+
+(define (client-send-receive soc msg)
+  (nn-send soc msg)
+  (nn-recv soc))
+  
+;;======================================================================
+;; Server
+;;======================================================================
+
+(defstruct srvdat
+  host
+  port
+  soc)
+
+;; remember, everyone starts a server, both client and the actual server alike.
+;; clients start a server for the server to return results to.
+;;
+(define (start-raw-server #!key (given-host-name #f))
+  (let ((srvdat    (let loop ((portnum 10000))
+		     (handle-exceptions
+			 exn
+			 (if (< portnum 64000)
+			     (loop (+ portnum 1))
+			     #f)
+		       (let* ((rep (nn-socket 'rep)))
+			 (nn-bind rep (conc "tcp://*:" portnum)) ;; "inproc://test")
+			 (make-srvdat port: portnum soc: rep)))))
+	(host-name (or give-host-name (get-host-name)))
+	(soc       (srvdat-soc srvdat)))
+    (srvdat-host-set! srvdat host-name)
+    srvdat))
+
+;; The actual *server* side server
+;;
+(define (start-server dbconn #!key (given-host-name #f))
+  (let* ((srvdat    (start-raw-server given-host-name: given-host-name))
+	 (host-name (srvdat-host srvdat))
+	 (soc       (srvdat-soc srvdat)))
+    
+    ;; start the queue processor
+    (thread-start! (queue-processory dbconn) "Queue processor")
+    ;; msg is an alist
+    ;;  'r host:port  <== where to return the data
+    ;;  'p params     <== data to apply the command to
+    ;;  'e j|s|l      <== encoding of the params. default is s (sexp), if not specified is assumed to be default
+    ;;  'c command    <== look up the function to call using this key
+    ;;
+    (let loop ((msg-in (nn-recv soc)))
+      (if (not (equal? msg-in "quit"))
+	  (let* ((dat        (decode msg-in))
+		 (host-port  (alist-ref 'r dat)) ;; this is for the reverse req rep where the server is a client of the original client
+		 (params     (alist-ref 'p dat))
+		 (command    (let ((c (alist-ref 'c dat)))(if c (string->symbol c) #f)))
+		 (all-good   (and host-port params command (hash-table-exists? *commands* command))))
+	    (if all-good
+		(let ((cmddat (make-qitem
+			       command:   command
+			       host-port: host-port
+			       params:    params)))
+		  (queue-push cmddat) 		;; put request into the queue
+		  (nn-send soc "queued"))         ;; reply with "queued"
+		(print "ERROR: BAD request " dat))
+	    (loop (nn-recv soc)))))
+    (nn-close soc)))
+  
+;;======================================================================
+;; Gasket layer
+;;======================================================================
+
+(define rmt:open-create-db open-create-db)
+(define (rmt:create-run . params)
+  
+  

ADDED   minimt/setup.scm
Index: minimt/setup.scm
==================================================================
--- /dev/null
+++ minimt/setup.scm
@@ -0,0 +1,17 @@
+(define *remotehost* "orion")
+(define *homehost*   "zeus")
+(define *homepath*   "/nfs/phoebe/disk1/home/mfs_matt/data/megatest/minimt/runtest")
+(define *numsteps*   20)
+(define *numtests*   20)
+(define *numruns*    5)
+(define *targets*    '("targ1"))
+(define *testdelay*  0)
+(define *rundelay*   0)
+(define *launchdelay* 0)
+(define *stepdelay*   0)
+
+(use trace)
+(trace-call-sites #t)
+(trace
+;;  open-create-db
+ )

Index: mlaunch.scm
==================================================================
--- mlaunch.scm
+++ mlaunch.scm
@@ -15,12 +15,11 @@
 ;;   take jobs from the given queue and keep launching them keeping
 ;;   the cpu load at the targeted level
 ;;
 ;;======================================================================
 
-(use sqlite3 srfi-1 posix regex regex-case srfi-69 dot-locking format)
-(import (prefix sqlite3 sqlite3:))
+(use sqlite3 srfi-1 posix regex regex-case srfi-69 format)
 
 (declare (unit mlaunch))
 (declare (uses db))
 (declare (uses common))
 

Index: mt.scm
==================================================================
--- mt.scm
+++ mt.scm
@@ -127,47 +127,90 @@
 			  (cons testn res)))))))))
 
 ;;======================================================================
 ;;  T R I G G E R S
 ;;======================================================================
+
+(define (mt:run-trigger cmd test-id test-rundir trigger logname test-name item-path event-time actual-state actual-status)
+  ;; Putting the commandline into ( )'s means no control over the shell. 
+  ;; stdout and stderr will be caught in the NBFAKE or mt_launch.log files
+  ;; or equivalent. No need to do this. Just run it?
+  (let* ((fullcmd (conc "nbfake "
+			cmd           " "
+			test-id       " "
+			test-rundir   " "
+			trigger       " "
+			test-name     " "
+			item-path     " " ;; has / prepended to deal with toplevel tests
+			actual-state  " "
+			actual-status " "
+			event-time
+			))
+	 (prev-nbfake-log (get-environment-variable "NBFAKE_LOG")))
+    (setenv "NBFAKE_LOG" (conc (cond
+				((and (directory-exists? test-rundir)
+				      (file-write-access? test-rundir))
+				 test-rundir)
+				((and (directory-exists? *toppath*)
+				      (file-write-access? *toppath*))
+				 *toppath*)
+				(else (conc "/tmp/" (current-user-name))))
+			       "/" logname))
+    (debug:print-info 0 *default-log-port* "TRIGGERED on " trigger ", running command " fullcmd " output at " (get-environment-variable "NBFAKE_LOG"))
+    ;; (call-with-environment-variables
+    ;;  `(("NBFAKE_LOG" . ,(conc test-rundir "/" logname)))
+    ;;  (lambda ()
+    (process-run fullcmd)
+    (if prev-nbfake-log
+	(setenv "NBFAKE_LOG" prev-nbfake-log)
+	(unsetenv "NBFAKE_LOG"))
+    )) ;; ))
 
 (define (mt:process-triggers dbstruct run-id test-id newstate newstatus)
-  (let* ((test-dat      (db:get-test-info-by-id dbstruct run-id test-id)))
-    (if test-dat
-	(let* ((test-rundir   ;; (rmt:sdb-qry 'getstr ;; (filedb:get-path *fdb*
-		(db:test-get-rundir test-dat)) ;; ) ;; )
-	       (test-name     (db:test-get-testname test-dat))
-	       (tconfig       #f)
-	       (state         (if newstate  newstate  (db:test-get-state  test-dat)))
-	       (status        (if newstatus newstatus (db:test-get-status test-dat))))
-	  (if (and test-name
-		   test-rundir   ;; #f means no dir set yet
-		   (file-exists? test-rundir)
-		   (directory? test-rundir))
-	      (call-with-environment-variables
-	       (list (cons "MT_TEST_NAME" test-name)
-		     (cons "MT_TEST_RUN_DIR" test-rundir)
-		     (cons "MT_ITEMPATH"     (db:test-get-item-path test-dat)))
-	       (lambda ()
-		 (push-directory test-rundir)
-		 (set! tconfig (mt:lazy-read-test-config test-name))
-		 (for-each (lambda (trigger)
-			     (let ((cmd  (configf:lookup tconfig "triggers" trigger))
-				   (logf (conc  test-rundir "/last-trigger.log")))
-			       (if cmd
-				   ;; Putting the commandline into ( )'s means no control over the shell. 
-				   ;; stdout and stderr will be caught in the NBFAKE or mt_launch.log files
-				   ;; or equivalent. No need to do this. Just run it?
-				   (let ((fullcmd (conc cmd " " test-id " " test-rundir " " trigger "&")))
-				     (debug:print-info 0 *default-log-port* "TRIGGERED on " trigger ", running command " fullcmd)
-				     (process-run fullcmd)))))
-			   (list
-			    (conc state "/" status)
-			    (conc state "/")
-			    (conc "/" status)))
-		 (pop-directory))
-	       ))))))
+  (if test-id 
+      (let* ((test-dat      (db:get-test-info-by-id dbstruct run-id test-id)))
+	(if test-dat
+	    (let* ((test-rundir   (db:test-get-rundir       test-dat)) ;; ) ;; )
+		   (test-name     (db:test-get-testname     test-dat))
+		   (item-path     (db:test-get-item-path    test-dat))
+		   (duration      (db:test-get-run_duration test-dat))
+		   (comment       (db:test-get-comment      test-dat))
+		   (event-time    (db:test-get-event_time   test-dat))
+		   (tconfig       #f)
+		   (state         (if newstate  newstate  (db:test-get-state  test-dat)))
+		   (status        (if newstatus newstatus (db:test-get-status test-dat))))
+	      ;; (mutex-lock! *triggers-mutex*)
+	      (if (and test-name
+		       test-rundir)   ;; #f means no dir set yet
+		       ;; (common:file-exists? test-rundir)
+		       ;; (directory? test-rundir))
+		  (call-with-environment-variables
+		   (list (cons "MT_TEST_NAME"    (or test-name "no such test"))
+			 (cons "MT_TEST_RUN_DIR" (or test-rundir "no test directory yet"))
+			 (cons "MT_ITEMPATH"     (or item-path "")))
+		   (lambda ()
+		     (if (directory-exists? test-rundir)
+			 (push-directory test-rundir)
+			 (push-directory *toppath*))
+		     (set! tconfig (mt:lazy-read-test-config test-name))
+		     (for-each (lambda (trigger)
+				 (let* ((munged-trigger (string-translate trigger "/ " "--"))
+					(logname        (conc "last-trigger-" munged-trigger ".log")))
+				   ;; first any triggers from the testconfig
+				   (let ((cmd  (configf:lookup tconfig "triggers" trigger)))
+				     (if cmd (mt:run-trigger cmd test-id test-rundir trigger (conc "tconfig-" logname) test-name item-path event-time state status)))
+				   ;; next any triggers from megatest.config
+				   (let ((cmd  (configf:lookup *configdat* "triggers" trigger)))
+				     (if cmd (mt:run-trigger cmd test-id test-rundir trigger (conc "mtconfig-" logname) test-name item-path event-time state status)))))
+			       (list
+				(conc state "/" status)
+				(conc state "/")
+				(conc "/" status)))
+		     (pop-directory))
+		   ))
+	      ;; (mutex-unlock! *triggers-mutex*)
+	      )))))
 
 ;;======================================================================
 ;;  S T A T E   A N D   S T A T U S   F O R   T E S T S 
 ;;======================================================================
 
@@ -206,11 +249,11 @@
 	(let ((test-dirs (tests:get-tests-search-path *configdat*)))
 	  (let loop ((hed (car test-dirs))
 		     (tal (cdr test-dirs)))
 	    ;; Setting MT_LINKTREE here is almost certainly unnecessary. 
 	    (let ((tconfig-file (conc hed "/" test-name "/testconfig")))
-	      (if (and (file-exists? tconfig-file)
+	      (if (and (common:file-exists? tconfig-file)
 		       (file-read-access? tconfig-file))
 		  (let ((link-tree-path (common:get-linktree)) ;; (configf:lookup *configdat* "setup" "linktree"))
 			(old-link-tree  (get-environment-variable "MT_LINKTREE")))
 		    (if link-tree-path (setenv "MT_LINKTREE" link-tree-path))
 		    (let ((newtcfg (read-config tconfig-file #f #f))) ;; NOTE: Does NOT run [system ...]

Index: mtut.scm
==================================================================
--- mtut.scm
+++ mtut.scm
@@ -12,12 +12,13 @@
 
 ;; fake out readline usage of toplevel-command
 (define (toplevel-command . a) #f)
 
 (use srfi-1 posix srfi-69 readline ;;  regex regex-case srfi-69 apropos json http-client directory-utils rpc typed-records;; (srfi 18) extras)
-     srfi-18 extras format pkts pkts regex regex-case
-     (prefix dbi dbi:)) ;;  zmq extras)
+     srfi-18 extras format pkts regex regex-case
+     (prefix dbi dbi:)
+     nanomsg)
 
 (declare (uses common))
 (declare (uses megatest-version))
 (declare (uses margs))
 (declare (uses configf))
@@ -25,26 +26,71 @@
 
 (include "megatest-fossil-hash.scm")
 
 (require-library stml)
 
-(define *target-mappers*  (make-hash-table)) ;; '())
-(define *runname-mappers* (make-hash-table)) ;; '())
+;; stuff for the mapper and checker functions
+;;
+(define *target-mappers*  (make-hash-table)) 
+(define *runname-mappers* (make-hash-table)) 
+(define *area-checkers*   (make-hash-table)) 
 
+;; helpers for mappers/checkers
+(define (add-target-mapper name proc)
+  (hash-table-set! *target-mappers* name proc))
+(define (add-runname-mapper name proc)
+  (hash-table-set! *runname-mappers* name proc))
+(define (add-area-checker name proc)
+  (hash-table-set! *area-checkers* name proc))
+
+;; given a runkey, xlatr-key and other info return one of the following:
+;;   list of targets, null list to skip processing
+;;   
+(define (map-targets mtconf aval-alist runkey area contour #!key (xlatr-key-in #f))
+  (let* ((xlatr-key (or xlatr-key-in
+                        (conf-get/default mtconf aval-alist 'targtrans)))
+         (proc      (hash-table-ref/default *target-mappers* xlatr-key #f)))
+    (if proc
+        (begin
+          (print "Using target mapper: " xlatr-key)
+          (handle-exceptions
+           exn
+           (begin
+             (print "FAILED TO RUN TARGET MAPPER FOR " area ", called " xlatr-key)
+             (print "   function is: " (hash-table-ref/default *target-mappers* xlatr-key #f ) )
+             (print " message: " ((condition-property-accessor 'exn 'message) exn))
+             runkey)
+           (proc runkey area contour)))
+        (begin
+          (if xlatr-key 
+              (print "ERROR: Failed to find named target translator " xlatr-key ", using original target."))
+          `(,runkey))))) ;; no proc then use runkey
+
+;; given mtconf and areaconf extract a translator/filter, first look at areaconf
+;; then if not found look at default
+;;
+(define (conf-get/default mtconf areaconf keyname #!key (default #f))
+  (let ((res (or (alist-ref keyname areaconf)
+                 (configf:lookup mtconf "default" (conc keyname))
+                 default)))
+    (if res
+        (string->symbol res)
+        res)))
+  
 ;; this needs some thought regarding security implications.
 ;;
 ;;   i. Check that owner of the file and calling user are same?
 ;;  ii. Check that we are in a legal megatest area?
 ;; iii. Have some form of authentication or record of the md5sum or similar of the file?
 ;;  iv. Use compiled version in preference to .scm version. Thus there is a manual "blessing"
 ;;      required to use .mtutil.scm.
 ;;
-(if (file-exists? "megatest.config")
-    (if (file-exists? ".mtutil.so")
+(if (common:file-exists? "megatest.config")
+    (if (common:file-exists? ".mtutil.so")
 	(load ".mtutil.so")
-	(if (file-exists? ".mtutil.scm")
-	(load ".mtutil.scm"))))
+	(if (common:file-exists? ".mtutil.scm")
+            (load ".mtutil.scm"))))
 
 ;; Disabled help items
 ;;  -rollup                 : (currently disabled) fill run (set by :runname)  with latest test(s)
 ;;                            from prior runs with same keys
 ;; Contour actions
@@ -56,55 +102,60 @@
 mtutil, part of the Megatest tool suite, documentation at http://www.kiatoa.com/fossils/megatest
   version " megatest-version "
   license GPL, Copyright Matt Welland 2006-2017
 
 Usage: mtutil action [options]
-  -h                       : this help
-  -manual                  : show the Megatest user manual
-  -version                 : print megatest version (currently " megatest-version ")
-
-Actions:
-   run                     : initiate runs
-   remove                  : remove runs
-   rerun                   : register action for processing
-   set-ss                  : set state/status
-   archive                 : compress and move test data to archive disk
-   kill                    : stop tests or entire runs
-   db                      : database utilities
+  -h                         : this help
+  -manual                    : show the Megatest user manual
+  -version                   : print megatest version (currently " megatest-version ")
+			     
+Actions:		     
+   run                       : initiate runs
+   remove                    : remove runs
+   rerun                     : register action for processing
+   set-ss                    : set state/status
+   archive                   : compress and move test data to archive disk
+   kill                      : stop tests or entire runs
+   db                        : database utilities
+   areas, contours, setup    : show areas, contours or setup section from megatest.config
 
 Contour actions:
-   process                 : runs import, rungen and dispatch 
-
-Selectors 
-  -immediate               : apply this action immediately, default is to queue up actions
-  -area areapatt1,area2... : apply this action only to the specified areas
-  -target key1/key2/...    : run for key1, key2, etc.
-  -test-patt p1/p2,p3/...  : % is wildcard
-  -run-name                : required, name for this particular test run
-  -contour contourname     : run all targets for contourname, requires -run-name, -target
-  -state-status c/p,c/f    : Specify a list of state and status patterns
-  -tag-expr tag1,tag2%,..  : select tests with tags matching expression
-  -mode-patt key           : load testpatt from <key> in runconfigs instead of default TESTPATT
-                             if -testpatt and -tagexpr are not specified
-  -new state/status        : specify new state/status for set-ss
-
-Misc 
-  -start-dir path          : switch to this directory before running mtutil
-  -set-vars V1=1,V2=2      : Add environment variables to a run NB// these are
-                                 overwritten by values set in config files.
-  -log logfile             : send stdout and stderr to logfile
-  -repl                    : start a repl (useful for extending megatest)
-  -load file.scm           : load and run file.scm
-  -debug N|N,M,O...        : enable debug messages 0-N or N and M and O ...
-
-Utility
- db pgschema               : emit postgresql schema; do \"mtutil db pgschema | psql -d mydb\"
+   process                   : runs import, rungen and dispatch 
+
+Trigger propagation actions:
+   tsend a=b,c=d...          : send trigger info to all recpients in the [listeners] section
+   tlisten -port N           : listen for trigger info on port N
+			     
+Selectors 		     
+  -immediate                 : apply this action immediately, default is to queue up actions
+  -area areapatt1,area2...   : apply this action only to the specified areas
+  -target key1/key2/...      : run for key1, key2, etc.
+  -test-patt p1/p2,p3/...    : % is wildcard
+  -run-name                  : required, name for this particular test run
+  -contour contourname       : run all targets for contourname, requires -run-name, -target
+  -state-status c/p,c/f      : Specify a list of state and status patterns
+  -tag-expr tag1,tag2%,..    : select tests with tags matching expression
+  -mode-patt key             : load testpatt from <key> in runconfigs instead of default TESTPATT
+                               if -testpatt and -tagexpr are not specified
+  -new state/status          : specify new state/status for set-ss
+			     
+Misc 			     
+  -start-dir path            : switch to this directory before running mtutil
+  -set-vars V1=1,V2=2        : Add environment variables to a run NB// these are
+                                   overwritten by values set in config files.
+  -log logfile               : send stdout and stderr to logfile
+  -repl                      : start a repl (useful for extending megatest)
+  -load file.scm             : load and run file.scm
+  -debug N|N,M,O...          : enable debug messages 0-N or N and M and O ...
+			     
+Utility			     
+ db pgschema                 : emit postgresql schema; do \"mtutil db pgschema | psql -d mydb\"
 
 Examples:
 
 # Start a megatest run in the area \"mytests\"
-mtutil -area mytests -action run -target v1.63/aa3e -mode-patt MYPATT -tag-expr quick
+mtutil run -area mytests -target v1.63/aa3e -mode-patt MYPATT -tag-expr quick
 
 # Start a contour
 mtutil run -contour quick -target v1.63/aa3e 
 
 Called as " (string-intersperse (argv) " ") "
@@ -111,17 +162,20 @@
 Version " megatest-version ", built from " megatest-fossil-hash ))
 
 ;; args and pkt key specs
 ;;
 (define *arg-keys*
+  ;; used keys
+  ;;    a  - action
   '(
     ("-area"            . G) ;; maps to group
     ("-contour"         . c)
     ("-append-config"   . d)
     ("-state"           . e)
     ("-item-patt"       . i)
     ("-sync-to"         . k)
+    ("-new"             . l) ;; l (see below) is new-ss
     ("-run-name"        . n)
     ("-mode-patt"       . o)
     ("-test-patt"       . p)  ;; idea, enhance margs ("-test-patt" "-testpatt") => yields one value in "-test-patt"
     ("-status"          . s)
     ("-target"          . t)
@@ -152,11 +206,35 @@
 ;; alist to map actions to old megatest commands
 (define *action-keys*
   '((run         . "-run")
     (sync        . "")
     (archive     . "-archive")
-    (set-ss      . "-set-state-status")))
+    (set-ss      . "-set-state-status")
+    (remove      . "-remove-runs")))
+
+;; Card types:
+;;
+;; A action
+;; U username (Unix)
+;; D timestamp
+;; T card type
+
+;; utilitarian alist for standard cards
+;;
+(define *additional-cards*
+  '(
+    ;; Standard Cards
+    (A  . action    )
+    (D  . timestamp )
+    (T  . cardtype  )
+    (U  . user      ) ;; username
+    (Z  . shar1sum  )
+
+    ;; Extras
+    (a  . runkey    ) ;; needed for matching up pkts with target derived from runkey
+    ;; (l  . new-ss    ) ;; new state/status
+    ))
 
 ;; inlst is an alternative input
 ;;
 (define (lookup-param-by-key key #!key (inlst #f))
   (fold (lambda (a res)
@@ -179,11 +257,12 @@
   (or (alist-ref (string->symbol param)
 		 '((-tag-expr  . "-tagexpr")
 		   (-mode-patt . "--modepatt")
 		   (-run-name  . "-runname")
 		   (-test-patt . "-testpatt")
-		   (-msg       . "-m")))
+		   (-msg       . "-m")
+		   (-new       . "-set-state-status")))
       param))
 
 (define (val->alist val)
   (let ((val-list (string-split-fields ";\\s*" val #:infix)))
     (if val-list
@@ -210,11 +289,11 @@
 	(print "ERROR: failed to create directory " dest-dir " message: " ((condition-property-accessor 'exn 'message) exn))
       (create-directory dest-dir #t))
     (handle-exceptions
 	exn
 	(print "ERROR: failed to clone or sync 1ossil " url " message: " ((condition-property-accessor 'exn 'message) exn))
-      (if (file-exists? targ-file)
+      (if (common:file-exists? targ-file)
 	  (system (conc "fossil pull --once " url " -R " targ-file))
 	  (system (conc "fossil clone " url " " targ-file))
 	  ))))
 
 (define (fossil:last-change-node-and-time fossils-dir fossil-name branch)
@@ -259,22 +338,14 @@
 	   (loop (get-line) date node time))))
        (else ;; no more datat and last node on branch not found
 	(close-input-port timeline-port)
 	(values  (common:date-time->seconds (conc date " " time)) node))))))
 
-
 ;;======================================================================
 ;; GLOBALS
 ;;======================================================================
 
-;; Card types:
-;;
-;; a action
-;; u username (Unix)
-;; D timestamp
-;; T card type
-
 ;; process args
 (define *action* (if (> (length (argv)) 1)
 		     (cadr (argv))
 		     #f))
 (define remargs (args:get-args 
@@ -300,11 +371,12 @@
 (if (and (not (null? remargs))
 	 (not (or
 	       (args:get-arg "-runstep")
 	       (args:get-arg "-envcap")
 	       (args:get-arg "-envdelta")
-	       (member *action* '("db"))   ;; very loose checks on db.
+	       (member *action* '("db" "tsend" "tlisten"))   ;; very loose checks on db and tsend/listen
+	       (equal? *action* "show")    ;; just keep going if list
 	       )))
     (debug:print-error 0 *default-log-port* "Unrecognised arguments: " (string-intersperse (if (list? remargs) remargs (argv))  " ")))
 
 (if (or (args:any? "-h" "help" "-help" "--help")
 	(member *action* '("-h" "-help" "--help" "help")))
@@ -311,71 +383,69 @@
     (begin
       (print help)
       (exit 1)))
 
 ;;======================================================================
-;; pkts
-;;======================================================================
-
-(define (with-queue-db mtconf proc)
-  (let* ((pktsdirs (configf:lookup mtconf "setup"  "pktsdirs"))
-	 (pktsdir  (if pktsdirs (car (string-split pktsdirs " ")) #f))
-	 (toppath  (configf:lookup mtconf "dyndat" "toppath"))
-	 (pdbpath  (or (configf:lookup mtconf "setup"  "pdbpath") pktsdir)))
-    (if (not (and  pktsdir toppath pdbpath))
-	(begin
-	  (print "ERROR: settings are missing in your megatest.config for area management.")
-	  (print "  you need to have pktsdir in the [setup] section."))
-	(let* ((pdb  (open-queue-db pdbpath "pkts.db"
-				    schema: '("CREATE TABLE groups (id INTEGER PRIMARY KEY,groupname TEXT, CONSTRAINT group_constraint UNIQUE (groupname));"))))
-	  (proc pktsdirs pktsdir pdb)
-	  (dbi:close pdb)))))
-
-(define (load-pkts-to-db mtconf)
-  (with-queue-db
-   mtconf
-   (lambda (pktsdirs pktsdir pdb)
-     (for-each
-      (lambda (pktsdir) ;; look at all
-	(if (and (file-exists? pktsdir)
-		 (directory? pktsdir)
-		 (file-read-access? pktsdir))
-	    (let ((pkts (glob (conc pktsdir "/*.pkt"))))
-	      (for-each
-	       (lambda (pkt)
-		 (let* ((uuid    (cadr (string-match ".*/([0-9a-f]+).pkt" pkt)))
-			(exists  (lookup-by-uuid pdb uuid #f)))
-		   (if (not exists)
-		       (let* ((pktdat (string-intersperse
-				       (with-input-from-file pkt read-lines)
-				       "\n"))
-			      (apkt   (pkt->alist pktdat))
-			      (ptype  (alist-ref 'T apkt)))
-			 (add-to-queue pdb pktdat uuid (or ptype 'cmd) #f 0)
-			 (debug:print 4 *default-log-port* "Added " uuid " of type " ptype " to queue"))
-		       (debug:print 4 *default-log-port* "pkt: " uuid " exists, skipping...")
-		       )))
-	       pkts))))
-      (string-split pktsdirs)))))
-
-(define (get-pkt-alists pkts)
-  (map (lambda (x)
-	 (alist-ref 'apkt x)) ;; 'pkta pulls out the alist from the read pkt
-       pkts))
-
-;; given list of pkts (alist mode) return list of D cards as Unix epoch, sorted descending
-;; also delete duplicates by target i.e. (car pkt)
-(define (get-pkt-times pkts)
-  (delete-duplicates
-   (sort 
-    (map (lambda (x)
-	   `(,(alist-ref 't x) . ,(string->number (alist-ref 'D x))))
-	 pkts)
-    (lambda (a b)(> (cdr a)(cdr b))))      ;; sort descending
-   (lambda (a b)(equal? (car a)(car b))))) ;; remove duplicates by target
-
-;;======================================================================
+;; Nanomsg transport
+;;======================================================================
+
+(define-inline (encode data)
+  (with-output-to-string
+    (lambda ()
+      (write data))))
+
+(define-inline (decode data)
+  (with-input-from-string
+      data
+    (lambda ()
+      (read))))
+
+;;start a server, returns the connection
+;;
+(define (start-nn-server portnum)
+  (let ((rep (nn-socket 'rep)))
+    (handle-exceptions
+     exn
+     (let ((emsg ((condition-property-accessor 'exn 'message) exn)))
+       (print "ERROR: Failed to start server \"" emsg "\"")
+       (exit 1))
+     (nn-bind rep (conc "tcp://*:" portnum)))
+    rep))
+
+;; open connection to server, send message, close connection
+;;
+(define (open-send-close-nn host-port msg #!key (timeout 3)) ;; default timeout is 3 seconds
+  (let ((req  (nn-socket 'req))
+        (uri  (conc "tcp://" host-port))
+        (res  #f)) 
+    (handle-exceptions
+     exn
+     (let ((emsg ((condition-property-accessor 'exn 'message) exn)))
+       (print "ERROR: Failed to connect/send to " uri " message was \"" emsg "\"")
+       #f)
+     (nn-connect req uri)
+     (nn-send req msg)
+     ;; NEED timer here!
+     (let* ((th1  (make-thread (lambda ()
+                                 (let ((resp (nn-recv req)))
+                                   (nn-close req)
+                                   (set! res (if (equal? resp "ok")
+                                                 #t
+                                                 #f))))
+                               "recv thread"))
+            (th2 (make-thread (lambda ()
+                                (thread-sleep! timeout)
+                                (thread-terminate! th1))
+                             "timer thread")))
+       (thread-start! th1)
+       (thread-start! th2)
+       (thread-join! th1)
+       res))))
+
+;;======================================================================
+
+
 ;; Runs
 ;;======================================================================
 
 ;; make a runname
 ;;
@@ -386,24 +456,34 @@
 ;; collect, translate, collate and assemble a pkt from the command-line
 ;;
 ;; sched => force the run start time to be recorded as sched Unix
 ;; epoch. This aligns times properly for triggers in some cases.
 ;;
-(define (command-line->pkt action args-alist sched-in)
+;;  extra-dat format is ( 'x xval 'y yval .... )
+;;
+(define (command-line->pkt action args-alist sched-in #!key (extra-dat '())(area-path #f)(new-ss #f))
   (let* ((sched     (cond
 		     ((vector? sched-in)(local-time->seconds sched-in)) ;; we recieved a time
 		     ((number? sched-in) sched-in)
 		     (else     (current-seconds))))
 	 (args-data (if args-alist
 			(if (hash-table? args-alist) ;; seriously?
 			    (hash-table->alist args-alist)
 			    args-alist)
 			(hash-table->alist args:arg-hash))) ;; if no args-alist then we assume this is a call driven directly by commandline
-	 (alldat    (apply append (list 'T "cmd"
-					'a action
-					'U (current-user-name)
-					'D sched)
+	 (alldat    (apply append
+			   (list 'A action
+				 'U (current-user-name)
+				 'D sched)
+			   (if area-path
+			       (list 'S area-path) ;; the area-path is mapped to the start-dir
+			       '())
+                           (if (list? extra-dat)
+			       extra-dat
+			       (begin
+				 (debug:print 0 *default-log-port* "ERROR: command-line->pkt received bad extra-dat " extra-dat)
+				 '()))
 			   (map (lambda (x)
 				  (let* ((param (car x))
 					 (value (cdr x))
 					 (pmeta (assoc param *arg-keys*))    ;; translate the card key to a megatest switch or parameter
 					 (smeta (assoc param *switch-keys*)) ;; first lookup the key in arg-keys or switch-keys
@@ -412,12 +492,12 @@
 						    #f)))
 				    (if (or pmeta smeta)                     ;; construct the switch/param pair.
 					(list meta value)
 					'())))
 				(filter cdr args-data)))))
-;; (print  "Alldat: " alldat
-;;         " args-data: " args-data)
+    (print  "Alldat: " alldat
+	    " args-data: " args-data)
     (add-z-card
      (apply construct-sdat alldat))))
 
 (define (simple-setup start-dir-in)
   (let* ((start-dir (or start-dir-in "."))
@@ -427,15 +507,15 @@
 		     ;; environ-patt: "env-override"
 		     given-toppath: start-dir
 		     ;; pathenvvar: "MT_RUN_AREA_HOME"
 		     ))
 	 (mtconf    (if mtconfdat (car mtconfdat) #f)))
-    ;; we set some dynamic data in a section called "dyndata"
+    ;; we set some dynamic data in a section called "scratchdata"
     (if mtconf
 	(begin
-	  (configf:section-var-set! mtconf "dyndat" "toppath" start-dir)))
-    ;; (print "TOPPATH: " (configf:lookup mtconf "dyndat" "toppath"))
+	  (configf:section-var-set! mtconf "scratchdat" "toppath" start-dir)))
+    ;; (print "TOPPATH: " (configf:lookup mtconf "scratchdat" "toppath"))
     mtconfdat))
 
 
 ;; NEED TIMESTAMP ON PKTS for efficient loading of packets into db.
 
@@ -446,16 +526,19 @@
 ;;  ii. Pass the pkt keys and values to this proc and go from there.
 ;; iii. Maybe have an abstraction alist with meaningful names for the pkt keys
 ;;
 ;; Override the run start time record with sched. Usually #f is fine.
 ;; 
-(define (create-run-pkt mtconf action area runkey runname mode-patt tag-expr pktsdir reason contour sched dbdest append-conf runtrans)
+(define (create-run-pkt mtconf action area runkey target runname mode-patt 
+                        tag-expr pktsdir reason contour sched dbdest append-conf
+                        runtrans)
   (let* ((good-val   (lambda (inval)(and inval (string? inval)(not (string-null? inval)))))
 	 (area-dat   (val->alist (or (configf:lookup mtconf "areas" area) "")))
 	 (area-path  (alist-ref 'path      area-dat))
-	 (area-xlatr (alist-ref 'targtrans area-dat))
-	 (new-runname (let* ((callname (if (string? runtrans)(string->symbol runtrans) #f))
+	 ;; (area-xlatr (alist-ref 'targtrans area-dat))
+         ;; (xlatr-key  (if area-xlatr (string->symbol area-xlatr) #f))
+         (new-runname (let* ((callname (if (string? runtrans)(string->symbol runtrans) #f))
 			     (mapper   (if callname (hash-table-ref/default *runname-mappers* callname #f) #f)))
 			;; (print "callname=" callname " runtrans=" runtrans " mapper=" mapper)
 			(if (and callname
 				 (not (equal? callname "auto"))
 				 (not mapper))
@@ -471,28 +554,11 @@
 			      (print "(mapper " (string-intersperse (list runkey runname area area-path reason contour mode-patt) ", ") ")")
 			      (mapper runkey runname area area-path reason contour mode-patt))
 			    (case callname
 			      ((auto) runname)
 			      (else   runtrans)))))
-	 (new-target (if area-xlatr 
-			 (let ((xlatr-key (string->symbol area-xlatr)))
-			   (if (hash-table-exists? *target-mappers* xlatr-key)
-			       (begin
-				 (print "Using target mapper: " area-xlatr)
-				 (handle-exceptions
-				     exn
-				     (begin
-				       (print "FAILED TO RUN TARGET MAPPER FOR " area ", called " area-xlatr)
-				       (print "   function is: " (hash-table-ref/default *target-mappers* xlatr-key #f ) )
-				       (print " message: " ((condition-property-accessor 'exn 'message) exn))
-				       runkey)
-				   ((hash-table-ref *target-mappers* xlatr-key)
-				    runkey new-runname area area-path reason contour mode-patt)))
-			       (begin
-				 (print "ERROR: Failed to find named target translator " xlatr-key ", using original target.")
-				 runkey)))
-			 runkey))
+	 (new-target     target) ;; I believe we will want target manipulation here .. (map-targets xlatr-key runkey area contour))
 	 (actual-action  (if action
 			     (if (equal? action "sync-prepend")
 				 "sync"
 				 action)
 			     "run"))) ;; this has gotten a bit ugly. Need a function to handle actions processing.
@@ -499,11 +565,11 @@
     ;; some hacks to remove switches not needed in certain cases
     (case (string->symbol (or action "run"))
       ((sync sync-prepend)
        (set! new-target #f)
        (set! runame     #f)))
-    (print "area-path: " area-path " area-xlatr: " area-xlatr " orig-target: " runkey " new-target: " new-target)
+    ;; (print "area-path: " area-path " orig-target: " runkey " new-target: " new-target)
     (let-values (((uuid pkt)
 		  (command-line->pkt
 		   actual-action
 		   (append 
 		    `(("-start-dir"  . ,area-path)
@@ -523,23 +589,50 @@
 			    (equal? action "run"))
 			`(("-preclean"  . " ")
 			  ("-rerun-all" . " "))      ;; if run we *always* want preclean set, use single space as placeholder
 			'())
 		    )
-		   sched)))
+		   sched
+                   extra-dat: `(a ,runkey)  ;; we need the run key for marking the run as launched
+                   )))
       (with-output-to-file
 	  (conc pktsdir "/" uuid ".pkt")
 	(lambda ()
 	  (print pkt))))))
+
+;; look for areas=a1,a2,a3 OR areafn=somefuncname
+;;
+(define (val-alist->areas val-alist)
+  (let ((areas-string   (alist-ref 'areas  val-alist))
+	(areas-procname (alist-ref 'areafn val-alist)))
+    (if areas-procname ;; areas-procname take precedence
+	areas-procname
+	(string-split (or areas-string "") ","))))
+
+;; area   - the current area under consideration
+;; areas  - the list of allowed areas from the contour spec -OR-
+;;          if it is a string then it is the function to use to
+;;          lookup in *area-checkers*
+;;
+(define (area-allowed? area areas runkey contour mode-patt)
+  (cond
+   ((not areas) #t) ;; no spec
+   ((string? areas) ;; 
+    (let ((check-fn (hash-table-ref/default *area-checkers* (string->symbol areas) #f)))
+      (if check-fn
+	  (check-fn area runkey contour mode-patt)
+	  #f)))
+   ((list? areas)(member area areas))
+   (else #f))) ;; shouldn't get here 
 
 ;; (use trace)(trace create-run-pkt)
 
 ;; collect all needed data and create run pkts for contours with changed inputs
 ;;
 (define (generate-run-pkts mtconf toppath)
   (let ((std-runname (conc "sched"  (time->string (seconds->local-time (current-seconds)) "%M%H%d"))))
-    (with-queue-db
+    (common:with-queue-db
      mtconf
      (lambda (pktsdirs pktsdir pdb)
        (let* ((rgconfdat (find-and-read-config (conc toppath "/runconfigs.config")))
 	      (rgconf    (car rgconfdat))
 	      (all-areas (map car (configf:get-section mtconf "areas")))
@@ -564,26 +657,35 @@
 			(optional   (if (> len-key 3)(cadddr keyparts) #f))
 			;; (val-list   (string-split-fields ";\\s*" val #:infix)) ;; (string-split val)) ;; runname-rule params
 			(val-alist  (val->alist val))
 			(runname    (make-runname "" ""))
 			(runtrans   (alist-ref 'runtrans val-alist))
+
+			;; these may or may not be defined and not all are used in each handler type in the case below
+			(run-name   (alist-ref 'run-name val-alist))
+			(target     (alist-ref 'target   val-alist))
+			(crontab    (alist-ref 'cron     val-alist))
+			(areas      (val-alist->areas    val-alist)) ;; areas can be a single string (a reference to call an areas function), or a list of area names.
+			(dbdest     (alist-ref 'dbdest   val-alist))
+			(appendconf (alist-ref 'appendconf val-alist))
+			(file-globs (alist-ref 'glob val-alist))
 			
 			(runstarts  (find-pkts pdb '(runstart) `((o . ,contour)
 								 (t . ,runkey))))
-			(rspkts     (get-pkt-alists runstarts))
+			(rspkts     (common:get-pkt-alists runstarts))
 			;; starttimes is for run start times and is used to know when the last run was launched
-			(starttimes (get-pkt-times rspkts)) ;; sort by age (youngest first) and delete duplicates by target
-			(last-run (if (null? starttimes) ;; if '() then it has never been run, else get the max
-				      0
-				      (apply max (map cdr starttimes))))
+			(starttimes (common:get-pkt-times rspkts)) ;; sort by age (youngest first) and delete duplicates by target
+			(last-run   (if (null? starttimes) ;; if '() then it has never been run, else get the max
+					0
+					(apply max (map cdr starttimes))))
 			;; synctimes is for figuring out the last time a sync was done
-			(syncstarts   (find-pkts pdb '(syncstart) '())) ;; no qualifiers, a sync does all tarets etc.
-			(sspkts       (get-pkt-alists syncstarts))
-			(synctimes    (get-pkt-times  sspkts))
-			(last-sync (if (null? synctimes) ;; if '() then it has never been run, else get the max
-				      0
-				      (apply max (map cdr synctimes))))
+			(syncstarts (find-pkts pdb '(syncstart) '())) ;; no qualifiers, a sync does all tarets etc.
+			(sspkts       (common:get-pkt-alists syncstarts))
+			(synctimes    (common:get-pkt-times  sspkts))
+			(last-sync  (if (null? synctimes) ;; if '() then it has never been run, else get the max
+					0
+					(apply max (map cdr synctimes))))
 			)
 
 		   (let ((delta (lambda (x)
 				  (round (/ (- (current-seconds) x) 60)))))
 		     (print "runkey: " runkey ", ruletype: " ruletype ", action: " action ", last-run: " last-run " time since; last-run: " (delta last-run) ", last-sync: " (delta last-sync)))
@@ -597,41 +699,53 @@
 		   
 		   (case (string->symbol (or ruletype "no-such-rule"))
 
 		     ((no-such-rule) (print "ERROR: no such rule for " sense))
 
+		     ;; Handle crontab like rules
+		     ;;
 		     ((scheduled)
 		      (if (not (alist-ref 'cron val-alist)) ;; gotta have cron spec
 			  (print "ERROR: bad sense spec \"" (string-intersperse sense " ") "\" params: " val-alist)
-			  (let* ((run-name (alist-ref 'run-name val-alist))
-				 (target   (alist-ref 'target   val-alist))
-				 (crontab  (alist-ref 'cron     val-alist))
+			  (let* (
 				 ;; (action   (alist-ref 'action   val-alist))
-				 (cron-safe-string (string-translate (string-intersperse (string-split (alist-ref 'cron val-alist)) "-") "*" "X"))
+				 (cron-safe-string (string-translate (string-intersperse (string-split crontab) "-") "*" "X"))
 				 (runname  std-runname)) ;; (conc "sched" (time->string (seconds->local-time (current-seconds)) "%M%H%d")))))
 			    ;; (print "last-run: " last-run " need-run: " need-run)
 			    ;; (if need-run
 			    (case (string->symbol action)
 			      ((sync sync-prepend)
 			       (if (common:extended-cron crontab #f last-sync)
 				   (push-run-spec torun contour runkey
 						  `((message . ,(conc ruletype ":sync-" cron-safe-string))
 						    (action  . ,action)
-						    (dbdest  . ,(alist-ref 'dbdest val-alist))
-						    (append  . ,(alist-ref 'appendconf val-alist))))))
+						    (dbdest  . ,dbdest)
+						    (append  . ,appendconf)
+						    (areas   . ,areas)))))
 			      ((run)
 			       (if (common:extended-cron crontab #f last-run)
 				   (push-run-spec torun contour runkey
-						  `((message . ,(conc ruletype ":" cron-safe-string))
-						    (runname . ,runname)
+						  `((message  . ,(conc ruletype ":" cron-safe-string))
+						    (runname  . ,runname)
+						    (runtrans . ,runtrans)
+						    (action   . ,action)
+						    (areas    . ,areas)
+						    (target   . ,target)))))
+                              ((remove)
+                               (push-run-spec torun contour runkey
+						  `((message  . ,(conc ruletype ":" cron-safe-string))
+						    (runname  . ,runname)
 						    (runtrans . ,runtrans)
-						    (action  . ,action)
-						    (target  . ,target)))))
+						    (action   . ,action)
+						    (areas    . ,areas)
+						    (target   . ,target))))
 			      (else
 			       (print "ERROR: action \"" action "\" has no scheduled handler")
 			       )))))
 
+		     ;; script based sensors
+		     ;;
 		     ((script)
 		      ;; syntax is a little different here. It is a list of commands to run, "scriptname = extra_parameters;scriptname = ..."
 		      ;; where scriptname may be repeated multiple times. The script must return unix-epoch of last change, new-target-name and new-run-name
 		      ;; the script is called like this:  scriptname contour runkey std-runname action extra_param1 extra_param2 ...
 		      (for-each
@@ -664,19 +778,23 @@
 				 (if need-run
 				     (let* ((key-msg    `((message  . ,(conc ruletype ":" message))
 							  (runname  . ,runname)
 							  (runtrans . ,runtrans)
 							  (action   . ,action)
-							  (target   . ,new-target))))
+							  (areas    . ,areas)
+							  (target   . ,new-target) ;; overriding with result from runing the script
+                                                          )))
 				       (print "key-msg: " key-msg)
 				       (push-run-spec torun contour
 						      (if optional  ;; we need to be able to differentiate same contour, different behavior. 
 							  (conc runkey ":" optional)  ;; NOTE: NOT COMPLETELY IMPLEMENTED. DO NOT USE
 							  runkey)
 						      key-msg)))))))
 		       val-alist)) ;; iterate over the param split by ;\s*
 
+		     ;; fossil scm based triggers
+		     ;;
 		     ((fossil)
 		      (for-each
 		       (lambda (fspec)
 			 (print "fspec: " fspec)
 			 (let* ((url         (symbol->string (car fspec))) ;; THIS COULD BE TROUBLE. Add option to reading line to return as string.
@@ -688,65 +806,73 @@
 			   (fossil:clone-or-sync url fname fdir) ;; )
 			   (let-values (((datetime node)
 					 (fossil:last-change-node-and-time fdir fname branch)))
 			     (if (null? starttimes)
 				 (push-run-spec torun contour runkey
-						`((message . ,(conc "fossil:" branch "-neverrun"))
-						  (runname . ,(conc runname "-" node))
+						`((message  . ,(conc "fossil:" branch "-neverrun"))
+						  (runname  . ,(conc runname "-" node))
 						  (runtrans . ,runtrans)
-						  (target  . ,runkey)))
+						  (areas    . ,areas)
+						  ;; (target   . ,runkey)
+                                                  ))
 				 (if (> datetime last-run) ;; change time is greater than last-run time
 				     (push-run-spec torun contour runkey
-						    `((message . ,(conc "fossil:" branch "-" node))
-						      (runname . ,(conc runname "-" node))
+						    `((message  . ,(conc "fossil:" branch "-" node))
+						      (runname  . ,(conc runname "-" node))
 						      (runtrans . ,runtrans)
-						      (target  . ,runkey)))))
+						      (areas    . ,areas)
+						      ;; (target   . ,runkey)
+                                                      ))))
 			     (print "Got datetime=" datetime " node=" node))))
 		       val-alist))
-		     
+
+		     ;; sensor looking for one or more files newer than reference
+		     ;;
 		     ((file file-or) ;; one or more files must be newer than the reference
-		      (let* ((file-globs  (alist-ref 'glob val-alist))
-			     (youngestdat (common:get-youngest (common:bash-glob file-globs)))
+		      (let* ((youngestdat (common:get-youngest (common:bash-glob file-globs)))
 			     (youngestmod (car youngestdat)))
 			;; (print "youngestmod: " youngestmod " starttimes: " starttimes)
 			(if (null? starttimes) ;; this target has never been run
 			    (push-run-spec torun contour runkey
-					   `((message . "file:neverrun")
-					     (action  . ,action)
+					   `((message  . "file:neverrun")
+					     (action   . ,action)
 					     (runtrans . ,runtrans)
-					     (target  . ,runkey)
-					     (runname . ,runname)))
+					     ;; (target   . ,runkey)
+					     (areas    . ,areas)
+					     (runname  . ,runname)))
 			;; (for-each
 			;;  (lambda (starttime) ;; look at the time the last run was kicked off for this contour
 			;;    (if (> youngestmod (cdr starttime))
 			;; 	   (begin
 			;; 	     (print "starttime younger than youngestmod: " starttime " Youngestmod: " youngestmod)
 			    (if (> youngestmod last-run)
 				(push-run-spec torun contour runkey
-					       `((message . ,(conc ruletype ":" (cadr youngestdat)))
-						 (action  . ,action)
-						 (target  . ,runkey)
+					       `((message  . ,(conc ruletype ":" (cadr youngestdat)))
+						 (action   . ,action)
+						 ;; (target   . ,runkey)
 						 (runtrans . ,runtrans)
-						 (runname . ,runname)
+						 (areas    . ,areas)
+						 (runname  . ,runname)
 						 ))))))
-		      ;; starttimes))
 
+		     ;; all globbed files must be newer than the reference
+		     ;;
 		     ((file-and) ;; all files must be newer than the reference
-		      (let* ((file-globs  (alist-ref 'glob val-alist))
-			     (youngestdat (common:get-youngest file-globs))
+		      (let* ((youngestdat (common:get-youngest file-globs))
 			     (youngestmod (car youngestdat))
 			     (success     #t)) ;; any cases of not true, set flag to #f for AND
 			;; (print "youngestmod: " youngestmod " starttimes: " starttimes)
 			(if (null? starttimes) ;; this target has never been run
 			    (push-run-spec torun contour runkey
-					   `((message . "file:neverrun")
-					     (runname . ,runname)
+					   `((message  . "file:neverrun")
+					     (runname  . ,runname)
 					     (runtrans . ,runtrans)
-					     (target  . ,runkey)
-					     (action  . ,action)))
+					     (areas    . ,areas)
+					     ;; (target   . ,runkey)
+					     (action   . ,action)))
 			    ;; NB// I think this is wrong. It should be looking at last-run only.
-			    (if (> youngestmod last-run)
+			    (if (> youngestmod last-run) ;; WAIT!! Shouldn't file-and be looking at the *oldest* file (thus all are younger than ...)
 				
 				;; 			    (for-each
 				;; 			     (lambda (starttime) ;; look at the time the last run was kicked off for this contour
 				;; 			       (if (< youngestmod (cdr starttime))
 				;; 				   (set! success #f)))
@@ -753,82 +879,98 @@
 				;; 			     starttimes))
 				;; 			(if success
 				;; 			    (begin
 				;; 			      (print "starttime younger than youngestmod: " starttime " Youngestmod: " youngestmod)
 				(push-run-spec torun contour runkey
-					       `((message . ,(conc ruletype ":" (cadr youngestdat)))
-						 (runname . ,runname)
+					       `((message  . ,(conc ruletype ":" (cadr youngestdat)))
+						 (runname  . ,runname)
 						 (runtrans . ,runtrans)
-						 (target  . ,runkey)
-						 (action  . ,action)
+						 ;; (target   . ,runkey)
+						 (areas    . ,areas)
+						 (action   . ,action)
 						 ))))))
 		     (else (print "ERROR: unrecognised rule \"" ruletype)))))
 	       keydats))) ;; sense rules
 	  (hash-table-keys rgconf))
 	 
 	 ;; now have to run populated
 	 (for-each
 	  (lambda (contour)
-	    (print "contour: " contour)
-	    (let* ((val       (or (configf:lookup mtconf "contours" contour) ""))
-		   (val-alist (val->alist val))
-		   (areas     (string-split (or (alist-ref 'areas val-alist) "") ","))
-		   (selector  (alist-ref 'selector val-alist))
-		   (mode-tag  (and selector (string-split-fields "/" selector #:infix)))
-		   (mode-patt (and mode-tag (if (eq? (length mode-tag) 2)(cadr mode-tag) #f)))
-		   (tag-expr  (and mode-tag (if (null? mode-tag) #f (car mode-tag)))))
+	    (let* ((cval       (or (configf:lookup mtconf "contours" contour) ""))
+		   (cval-alist (val->alist cval))                     ;; BEWARE ... NOT the same val-alist as above!
+		   (areas      (val-alist->areas cval-alist))
+		   (selector   (alist-ref 'selector cval-alist))
+		   (mode-tag   (and selector (string-split-fields "/" selector #:infix)))
+		   (mode-patt  (and mode-tag (if (eq? (length mode-tag) 2)(cadr mode-tag) #f)))
+		   (tag-expr   (and mode-tag (if (null? mode-tag) #f (car mode-tag)))))
+	      (print "contour: " contour " areas=" areas " cval=" cval)
 	      (for-each
-	       (lambda (runkeydatset)
+	       (lambda (runkeydatset) 
 		 ;; (print "runkeydatset: ")(pp runkeydatset)
 		 (let ((runkey     (car runkeydatset))
 		       (runkeydats (cadr runkeydatset)))
 		   (for-each
 		    (lambda (runkeydat)
 		      (for-each
 		       (lambda (area)
-			 (let ((runname (alist-ref 'runname runkeydat))
-			       (runtrans (alist-ref 'runtrans runkeydat))
-			       (reason  (alist-ref 'message runkeydat))
-			       (sched   (alist-ref 'sched   runkeydat))
-			       (action  (alist-ref 'action  runkeydat))
-			       (dbdest  (alist-ref 'dbdest  runkeydat))
-			       (append  (alist-ref 'append  runkeydat))
-			       (target  (or (alist-ref 'target  runkeydat) runkey))) ;; override with target if forced
-			   (print "Have: runkey=" runkey " contour=" contour " area=" area " action=" action " tag-expr=" tag-expr " mode-patt=" mode-patt " target=" target)
-			   (if (case (or (and action (string->symbol action)) 'noaction)  ;; ensure we have the needed data to run this action
-				 ((noaction) #f)
-				 ((run)      (and runname reason))
-				 ((sync sync-prepend)     (and reason dbdest))
-				 (else       #f))
-			       ;; instead of unwrapping the runkeydat alist, pass it directly to create-run-pkt
-			       (create-run-pkt mtconf action area runkey runname mode-patt tag-expr pktsdir reason contour sched dbdest append runtrans) 
-			       (print "ERROR: Missing info to make a " action " call: runkey=" runkey " contour=" contour " area=" area  " tag-expr=" tag-expr " mode-patt=" mode-patt " dbdest=" dbdest)
-			       )))
-		       all-areas))
+			 (if (area-allowed? area areas runkey contour mode-patt) ;; is this area to be handled (from areas=a,b,c OR using areafn=abcfn and *area-checks* ...)
+                             (let* ((aval       (or (configf:lookup mtconf "areas" area) ""))
+                                    (aval-alist (val->alist aval))
+                                    (runname    (alist-ref 'runname runkeydat))
+                                    (runtrans   (alist-ref 'runtrans runkeydat))
+                                    
+                                    (reason     (alist-ref 'message runkeydat))
+                                    (sched      (alist-ref 'sched   runkeydat))
+                                    (action     (alist-ref 'action  runkeydat))
+                                    (dbdest     (alist-ref 'dbdest  runkeydat))
+                                    (append     (alist-ref 'append  runkeydat))
+                                    (targets    (or (alist-ref 'target  runkeydat)
+                                                    (map-targets mtconf aval-alist runkey area contour)))) ;; override with target if forced
+                               ;; NEED TO EXPAND RUNKEY => ALL TARGETS MAPPED AND THEN FOREACH .... 
+                               (for-each
+                                (lambda (target)
+                                  (print "Creating pkt for runkey=" runkey " target=" target " contour=" contour " area=" area " action=" action " tag-expr=" tag-expr " mode-patt=" mode-patt)
+                                  (if (case (or (and action (string->symbol action)) 'noaction)  ;; ensure we have the needed data to run this action
+                                        ((noaction)           #f)
+                                        ((run)                (and runname reason))
+                                        ((sync sync-prepend)  (and reason dbdest))
+                                        (else                 #f))
+                                      ;; instead of unwrapping the runkeydat alist, pass it directly to create-run-pkt
+                                      (create-run-pkt mtconf action area runkey target runname mode-patt
+                                                      tag-expr pktsdir reason contour sched dbdest append 
+                                                      runtrans) 
+                                      (print "ERROR: Missing info to make a " action " call: runkey=" runkey " contour=" contour " area=" area  " tag-expr=" tag-expr " mode-patt=" mode-patt " dbdest=" dbdest)
+                                      ))
+                                targets))
+                             (print "NOTE: skipping " runkeydat " for area \"" area "\", not in " areas)))
+                       all-areas))
 		    runkeydats)))
 	       (let ((res (configf:get-section torun contour))) ;; each contour / target
 		 ;; (print "res=" res)
 		 res))))
 	  (hash-table-keys torun)))))))
 
 (define (pkt->cmdline pkta)
-  (let ((action (or (lookup-action-by-key (alist-ref 'a pkta)) "noaction")))
+  (let* ((action (or (lookup-action-by-key (alist-ref 'A pkta)) "noaction"))
+	 (action-param (case (string->symbol action)
+			 ((-set-state-status) (conc (alist-ref 'l pkta) " "))
+			 (else ""))))
     (fold (lambda (a res)
 	    (let* ((key (car a)) ;; get the key name
 		   (val (cdr a))
 		   (par (or (lookup-param-by-key key)  ;; need to check also if it is a switch
 			    (lookup-param-by-key key inlst: *switch-keys*))))
 	      ;; (print "key: " key " val: " val " par: " par)
 	      (if par
 		  (conc res " " (param-translate par) " " val)
-		  (if (member key '(a Z U D T)) ;; a is the action
+		  (if (alist-ref key *additional-cards*) ;; these cards do not translate to parameters or switches
 		      res
 		      (begin
 			(print "ERROR: Unknown key in packet \"" key "\" with value \"" val "\"")
 			res)))))
 	  (conc "megatest " (if (not (member action '("sync")))
-				(conc action " ")
+				(conc action " " action-param)
 				""))
 	  pkta)))
 
 ;; (use trace)(trace pkt->cmdline)
 
@@ -851,12 +993,16 @@
 		     #f
 		   (create-directory "logs")
 		   #t)
 		 #t)
 	     "logs"
-	     "/tmp")))
-    (with-queue-db
+	     "/tmp"))
+	(cpuload (alist-ref 'adj-proc-load (common:get-normalized-cpu-load #f)))
+	(maxload (string->number (or (configf:lookup mtconf "setup" "maxload")
+				     (configf:lookup mtconf "jobtools" "maxload") ;; respect value used by Megatest calls
+				     "1.1"))))
+    (common:with-queue-db
      mtconf
      (lambda (pktsdirs pktsdir pdb)
        (let* ((rgconfdat (find-and-read-config (conc toppath "/runconfigs.config")))
 	      (rgconf    (car rgconfdat))
 	      (areas     (configf:get-section mtconf "areas"))
@@ -865,85 +1011,214 @@
 	      (torun     (make-hash-table)) ;; target => ( ... info ... )
 	      (rgentargs (hash-table-keys rgconf))) ;; these are the targets registered for automatically triggering
 	 (for-each
 	  (lambda (pktdat)
 	    (let* ((pkta    (alist-ref 'apkt pktdat))
-		   (action  (alist-ref 'a pkta))
+		   (action  (alist-ref 'A pkta))
 		   (cmdline (pkt->cmdline pkta))
 		   (uuid    (alist-ref 'Z pkta))
+		   (user    (alist-ref 'U pkta))
+		   (area    (alist-ref 'G pkta))
 		   (logf    (conc logdir "/" uuid "-run.log"))
 		   (fullcmd (conc "NBFAKE_LOG=" logf " nbfake " cmdline)))
-	      (print "RUNNING: " fullcmd)
-	      (system fullcmd)
-	      (mark-processed pdb (list (alist-ref 'id pktdat)))
-	      (let-values (((ack-uuid ack-pkt)
-			    (add-z-card
-			     (construct-sdat 'P uuid
-					     'T (case (string->symbol action)
-						  ((run) "runstart")
-						  ((sync) "syncstart")    ;; example of translating run -> runstart
-						  (else   action))
-					     'c (alist-ref 'o pkta) ;; THIS IS WRONG! SHOULD BE 'c
-					     't (alist-ref 't pkta)))))
-		(write-pkt pktsdir ack-uuid ack-pkt))))
+	      (if (check-access user mtconf action area)
+		  (if (and (> cpuload maxload)
+			   (member action '("run" "archive"))) ;; do not run archive or run if load is over the specified limit
+		      (print "WARNING: cpuload too high, skipping processing of " uuid)
+		      (begin
+			(print "RUNNING: " fullcmd)
+			(system fullcmd) ;; replace with process ...
+			(mark-processed pdb (list (alist-ref 'id pktdat)))
+			(let-values (((ack-uuid ack-pkt)
+				      (add-z-card
+				       (construct-sdat 'P uuid
+						       'T (case (string->symbol action)
+							    ((run) "runstart")
+							    ((sync) "syncstart")    ;; example of translating run -> runstart
+							    (else   action))
+						       'c (alist-ref 'o pkta) ;; THIS IS WRONG! SHOULD BE 'c
+						       't (alist-ref 't pkta)))))
+			  (write-pkt pktsdir ack-uuid ack-pkt))))
+		  (begin ;; access denied! Mark as such
+		    (mark-processed pdb (list (alist-ref 'id pktdat)))
+		    (let-values (((ack-uuid ack-pkt)
+				  (add-z-card
+				   (construct-sdat 'P uuid
+						   'T "access-denied"
+						   'c (alist-ref 'o pkta) ;; THIS IS WRONG! SHOULD BE 'c
+						   't (alist-ref 't pkta)))))
+		      (write-pkt pktsdir ack-uuid ack-pkt))))))
 	  pkts))))))
-  
+
+(define (check-access user mtconf action area)
+  ;; NOTE: Need control over defaults. E.g. default might be no access
+  (let* ((access-ctrl (hash-table-exists? mtconf "access"))  ;; if there is an access section the default is to REQUIRE enablement/access
+	 (access-list (map (lambda (x)
+			     (string-split x ":"))
+			   (string-split (or (configf:lookup mtconf "access" area) ;; userid:rightstype userid2:rightstype2 ...
+					     (if access-ctrl
+						 "*:none"  ;; nobody has access by default
+						 "*:all")))))
+	 (access-types-dat (configf:get-section mtconf "accesstypes")))
+    (debug:print 0 *default-log-port* "Checking access in " access-list " with access-ctrl " access-ctrl " for area " area)
+    (if access-ctrl
+	(let* ((user-access     (or (assoc user access-list)
+				    (assoc "*"  access-list)))
+	       (access-type     (cadr user-access))
+	       (access-types    (let ((res (alist-ref access-type access-types-dat equal?)))
+				  (if res (car res) res)))
+	       (allowed-actions (string-split (or access-types ""))))
+	  (print "Got " allowed-actions " for user " user " where access-types=" access-types " access-type=" access-type)
+	  (cond
+	   ((and access-types (member action allowed-actions))
+	    ;; (print "Access granted for " user " for " action)
+	    #t)
+	   (else
+	    ;; (print "Access denied for " user " for " action)
+	    #f))))))
+
 (define (get-pkts-dir mtconf)
   (let ((pktsdirs  (configf:lookup mtconf "setup" "pktsdirs"))
 	(pktsdir   (if pktsdirs (car (string-split pktsdirs " ")) #f)))
     pktsdir))
 
 (let ((debugcontrolf (conc (get-environment-variable "HOME") "/.mtutilrc")))
-  (if (file-exists? debugcontrolf)
+  (if (common:file-exists? debugcontrolf)
       (load debugcontrolf)))
 
 (if *action*
     (case (string->symbol *action*)
-      ((run remove rerun set-ss archive kill)
+      ((run remove rerun set-ss archive kill list)
        (let* ((mtconfdat (simple-setup (args:get-arg "-start-dir")))
 	      (mtconf    (car mtconfdat))
+	      (area      (args:get-arg "-area")) ;; look up the area to dispatch to from [areas] section
+	      (areasec   (if area (configf:lookup mtconf "areas" area) #f))
+	      (areadat   (if areasec (val->alist areasec) #f))
+	      (area-path (if areadat (alist-ref 'path areadat) #f))
 	      (pktsdirs  (configf:lookup mtconf "setup" "pktsdirs"))
 	      (pktsdir   (if pktsdirs (car (string-split pktsdirs " ")) #f))
-	      (adjargs   (hash-table-copy args:arg-hash)))
+	      (adjargs   (hash-table-copy args:arg-hash))
+	      (new-ss    (args:get-arg "-new")))
+	 ;; check a few things
+	 (cond
+	  ((and area (not area-path))
+	   (print "ERROR: the specified area was not found in the [areas] table. Area name=" area)
+	   (exit 1))
+	  ((not area)
+	   (print "ERROR: no area specified. Use -area <areaname>")
+	   (exit 1))
+	  (else
+	   (let ((user (current-user-name)))
+	     (if (check-access user mtconf *action* area);; check rights
+		 (print "Access granted for " *action* " action by " user)
+		 (begin
+		   (print "Access denied for " *action* " action by " user)
+		   (exit 1))))))
+	 
 	 ;; (for-each
 	 ;;  (lambda (key)
 	 ;;    (if (not (member key *legal-params*))
 	 ;; 	(hash-table-delete! adjargs key))) ;; we need to delete any params intended for mtutil
 	 ;;  (hash-table-keys adjargs))
 	 (let-values (((uuid pkt)
-		       (command-line->pkt *action* adjargs #f)))
+		       (command-line->pkt *action* adjargs #f area-path: area-path new-ss: new-ss)))
 	   (write-pkt pktsdir uuid pkt))))
       ((dispatch import rungen process)
        (let* ((mtconfdat (simple-setup (args:get-arg "-start-dir")))
 	      (mtconf    (car mtconfdat))
-	      (toppath   (configf:lookup mtconf "dyndat" "toppath")))
+	      (toppath   (configf:lookup mtconf "scratchdat" "toppath")))
 	 (case (string->symbol *action*)
 	   ((process)  (begin
-			 (load-pkts-to-db mtconf)
+			 (common:load-pkts-to-db mtconf)
 			 (generate-run-pkts mtconf toppath)
-			 (load-pkts-to-db mtconf)
+			 (common:load-pkts-to-db mtconf)
 			 (dispatch-commands mtconf toppath)))
-	   ((import)   (load-pkts-to-db mtconf)) ;; import pkts
+	   ((import)   (common:load-pkts-to-db mtconf)) ;; import pkts
 	   ((rungen)   (generate-run-pkts mtconf toppath))
 	   ((dispatch) (dispatch-commands mtconf toppath)))))
+      ;; misc
+      ((show)
+       (if (> (length remargs) 0)
+	   (let* ((mtconfdat (simple-setup (args:get-arg "-start-dir")))
+		  (mtconf    (car mtconfdat))
+		  (sect-dat (configf:get-section mtconf (car remargs))))
+	     (if sect-dat
+		 (for-each
+		  (lambda (entry)
+		    (if (> (length entry) 1)
+			(print (car entry) "   " (cadr entry))
+			(print (car entry))))
+		  sect-dat)
+		 (print "No section \"" (car remargs) "\" found")))
+	   (print "ERROR: list requires section parameter; areas, setup or contours")))
+      ((gendot)
+       (let* ((mtconfdat (simple-setup (args:get-arg "-start-dir")))
+	      (mtconf    (car mtconfdat)))
+	 (common:with-queue-db
+	  mtconf
+	  (lambda (pktsdirs pktsdir conn)
+	    ;;                       pktspec display-fields 
+	    (make-report "out.dot" conn
+			 '((cmd      . ((parent . P)
+					(user   . M)
+					(target . t)))
+			   (runstart . ((parent . P)
+					(target . t)))
+			   (runtype . ((parent . P)))) ;; pktspec
+			 '(P U t)                                                     ;; 
+			 )))))  ;; no ptypes listed (ptypes are strings of pkt types to read from db
       ((db)
        (if (null? remargs)
 	   (print "ERROR: missing sub command for db command")
 	   (let ((subcmd (car remargs)))
 	     (case (string->symbol subcmd)
 	       ((pgschema)
 		(let* ((install-home (common:get-install-area))
 		       (schema-file  (conc install-home "/share/db/mt-pg.sql")))
-		  (if (file-exists? schema-file)
+		  (if (common:file-exists? schema-file)
 		      (system (conc "/bin/cat " schema-file)))))
 	       ((sqlite3schema)
 		(let* ((install-home (common:get-install-area))
 		       (schema-file  (conc install-home "/share/db/mt-sqlite3.sql")))
-		  (if (file-exists? schema-file)
+		  (if (common:file-exists? schema-file)
 		      (system (conc "/bin/cat " schema-file)))))
 	       ((junk)
-		(rmt:get-keys))))))))
+		(rmt:get-keys))))))
+      ((tsend)
+       (if (null? remargs)
+	   (print "ERROR: missing data to send to trigger listeners")
+	   (let* ((msg       (car remargs))
+                  (mtconfdat (simple-setup (args:get-arg "-start-dir")))
+                  (mtconf    (car mtconfdat))
+                  (listeners (configf:get-section mtconf "listeners"))
+                  (prev-seen (make-hash-table))) ;; catch duplicates
+             (for-each
+              (lambda (listener)
+                (let ((host-port (car listener))
+                      (remdat    (cdr listener)))
+                  (print "sending " msg " to " host-port)
+                  (open-send-close-nn host-port msg timeout: 2)))
+              listeners))))
+      ((tlisten)
+       (if (null? remargs)
+           (print "ERROR: useage for tlisten is \"mtutil tlisten portnum\"")
+           (let ((portnum (string->number (car remargs))))
+             (if (not portnum)
+                 (print "ERROR: the portnumber parameter must be a number, you gave: " (car remargs))
+                 (let* ((rep       (start-nn-server portnum))
+                        (mtconfdat (simple-setup (args:get-arg "-start-dir")))
+                        (mtconf    (car mtconfdat))
+                        (script    (configf:lookup mtconf "listener" "script")))
+                   (print "Listening on port " portnum " for messages")
+                   (let loop ((instr (nn-recv rep)))
+                     (print "received " instr ", running \"" script " " instr "\"")
+                     (system (conc script " " instr))
+                     (nn-send rep "ok")
+                     (loop (nn-recv rep))))))))
+      
+      )) ;; the end
+             
 
 ;; If HTTP_HOST is defined then we must be in the cgi environment
 ;; so run stml and exit
 ;;
 (if (get-environment-variable "HTTP_HOST")
@@ -963,5 +1238,11 @@
       (install-history-file (get-environment-variable "HOME") ".mtutil_history") ;;  [homedir] [filename] [nlines])
       (current-input-port (make-readline-port "mtutil> "))
       (if (args:get-arg "-repl")
 	  (repl)
 	  (load (args:get-arg "-load")))))
+
+#|
+(define mtconf (car (simple-setup #f)))
+(define dat (with-queue-db mtconf (lambda (conn)(get-pkts conn '()))))
+(pp (pkts#flatten-all dat '((cmd . ((parent . P)(url . M)))(runtype . ((parent . P)))) 'id 'group-id 'uuid 'parent 'pkt-type 'pkt 'processed))
+|#

Index: newdashboard.scm
==================================================================
--- newdashboard.scm
+++ newdashboard.scm
@@ -84,11 +84,11 @@
       (print help)
       (exit)))
 
 ;; ease debugging by loading ~/.dashboardrc
 (let ((debugcontrolf (conc (get-environment-variable "HOME") "/.dashboardrc")))
-  (if (file-exists? debugcontrolf)
+  (if (common:file-exists? debugcontrolf)
       (load debugcontrolf)))
 
 (debug:setup)
 
 (define *tim* (iup:timer))
@@ -375,11 +375,11 @@
       #f))
 
 (define (test-panel window-id)
   (let* ((curr-row-num 0)
 	 (viewlog    (lambda (x)
-		       (if (file-exists? logfile)
+		       (if (common:file-exists? logfile)
 					;(system (conc "firefox " logfile "&"))
 			   (iup:send-url logfile)
 			   (message-window (conc "File " logfile " not found")))))
 	 (xterm      (lambda (x)
 		       (if (directory-exists? rundir)
@@ -730,14 +730,15 @@
 		       (lambda (x)
 			 ;; Want to dedicate no more than 50% of the time to this so skip if
 			 ;; 2x delta time has not passed since last query
 			 (if (< nextmintime (current-milliseconds))
 			     (let* ((starttime (current-milliseconds))
-				    (changes   (dcommon:run-update keys data runname keypatts testpatt states statuses 'full my-window-id))
+				    ;; (changes   (dcommon:run-update keys data runname keypatts testpatt states statuses 'full my-window-id))
 				    (endtime   (current-milliseconds)))
 			       (set! nextmintime (+ endtime (* 2 (- endtime starttime))))
-			       (debug:print 11 *default-log-port* "CHANGE(S): " (car changes) "..."))
+			       ;; (debug:print 11 *default-log-port* "CHANGE(S): " (car changes) "..."))
+                               )
 			     (debug:print-info 11 *default-log-port* "Server overloaded"))))))
 
 ;; (dboard:data-updaters-set! *data* (make-hash-table))
 (newdashboard #f) ;; *dbstruct-local*)    
 (iup:main-loop)

ADDED   nexttag.rb
Index: nexttag.rb
==================================================================
--- /dev/null
+++ nexttag.rb
@@ -0,0 +1,46 @@
+#!/usr/bin/env ruby
+
+
+def get_next_tag(branch)
+
+
+
+  abort "Not on a version branch like v1.64 (got: >#{branch}<)" unless branch.match(/^v\d\.\d\d$/)
+  
+  #puts "this branch: #{branch}"
+
+  tag_pat = /#{branch}(\d\d)/
+  remote=`fsl remote`.chomp.sub(/^file:\/\//,'') # get tagset from origin
+  cmd="fossil tag -R '#{remote}' list"
+  tags = `#{cmd}`.split /\n/
+  abort "fossil command failed [#{cmd}]" if $? != 0
+  branch_tags = tags.find_all{|x| x.match(tag_pat) }.sort
+  if branch_tags.length == 0
+    return branch + "01"
+  else
+    latest_tag = branch_tags.last
+    m1 = latest_tag.match(tag_pat)
+    minor_digits = m1[1].to_i + 1
+    if (minor_digits % 10) == 0
+      minor_digits += 1
+    end
+    new_tag=sprintf("%s%02d", branch, minor_digits)
+    return new_tag
+  end
+end
+
+branch = `fossil branch`.sub(/\A.*\* /m,'').sub(/\n.*\z/m,'')
+tag= get_next_tag(branch)
+
+puts "TODO: Write to megatest-version.scm:"
+puts ";; Always use two or four digit decimal
+;; 1.01, 1.02...1.10,1.11,1.1101 ... 1.99,2.00..
+
+(declare (unit megatest-version))
+
+(define megatest-version #{tag.sub(/^v/,'')})
+
+"
+
+puts "TODO: fossil tag add #{tag} #{branch}"
+puts ""

Index: ods.scm
==================================================================
--- ods.scm
+++ ods.scm
@@ -197,11 +197,11 @@
 ;;   '( (sheet1 (r1c1 r1c2 r1c3 ...)
 ;;              (r2c1 r2c3 r2c3 ...) )
 ;;      (sheet2 ( ... )
 ;;              ( ... ) ) )
 (define (ods:list->ods path fname data)
-  (if (not (file-exists? path))
+  (if (not (common:file-exists? path))
       (print "ERROR: path to create ods data must pre-exist")
       (begin
 	(with-output-to-file (conc path "/content.xml")
 	  (lambda ()
 	    (ods:construct-dir path)

Index: portlogger.scm
==================================================================
--- portlogger.scm
+++ portlogger.scm
@@ -19,11 +19,11 @@
 ;; lsof -i
 
 
 (define (portlogger:open-db fname)
   (let* ((avail    (tasks:wait-on-journal fname 5 remove: #t)) ;; wait up to about 10 seconds for the journal to go away
-	 (exists   (file-exists? fname))
+	 (exists   (common:file-exists? fname))
 	 (db       (if avail 
 		       (sqlite3:open-database fname)
 		       (begin
 			 (system (conc "rm -f " fname))
 			 (sqlite3:open-database fname))))
@@ -56,12 +56,12 @@
      exn
      (begin
        ;; (release-dot-lock fname)
        (debug:print-error 0 *default-log-port* "portlogger:open-run-close failed. " proc " " params)
        (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
-       (debug:print 0 *default-log-port* "exn=" (condition->list exn))
-       (if (file-exists? fname)(delete-file fname)) ;; brutally get rid of it
+       (debug:print 5 *default-log-port* "exn=" (condition->list exn))
+       (if (common:file-exists? fname)(delete-file fname)) ;; brutally get rid of it
        (print-call-chain (current-error-port)))
      (let* (;; (lock   (obtain-dot-lock fname 2 9 10))
 	    (db     (portlogger:open-db fname))
 	    (res    (apply proc db params)))
        (sqlite3:finalize! db)
@@ -103,11 +103,11 @@
   (handle-exceptions
       exn
       (begin
 	(debug:print 0 *default-log-port* "EXCEPTION: portlogger database probably overloaded or unreadable. If you see this message again remove /tmp/.$USER-portlogger.db")
 	(debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
-	(debug:print 0 *default-log-port* "exn=" (condition->list exn))
+	(debug:print 5 *default-log-port* "exn=" (condition->list exn))
 	(print-call-chain (current-error-port))
 	(debug:print 0 *default-log-port* "Continuing anyway.")
 	#f)
     (sqlite3:fold-row
      (lambda (var curr)
@@ -128,11 +128,11 @@
     (handle-exceptions
      exn
      (begin
        (debug:print 0 *default-log-port* "EXCEPTION: portlogger database probably overloaded or unreadable. If you see this message again remove /tmp/.$USER-portlogger.db")
        (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
-       (debug:print 0 *default-log-port* "exn=" (condition->list exn))
+       (debug:print 5 *default-log-port* "exn=" (condition->list exn))
        (print-call-chain (current-error-port))
        (debug:print 0 *default-log-port* "Continuing anyway."))
      (portlogger:take-port db portnum))
     portnum))
 
@@ -158,11 +158,11 @@
 	  (handle-exceptions
 	   exn
 	   (begin
 	     (debug:print 0 *default-log-port* "EXCEPTION: portlogger database at " dbfname " probably overloaded or unreadable. Try removing it.")
 	     (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
-	     (print "exn=" (condition->list exn))
+	     (debug:print 5 *default-log-port* "exn=" (condition->list exn))
 	     (debug:print 0 *default-log-port* " status:  " ((condition-property-accessor 'sqlite3 'status) exn))
 	     (print-call-chain (current-error-port))
 	     #f)
 	   (case (string->symbol (car args)) ;; commands with two or more params
 	     ((take)(portlogger:take-port db (string->number (cadr args))))

Index: process.scm
==================================================================
--- process.scm
+++ process.scm
@@ -13,11 +13,10 @@
 ;; Process convience utils
 ;;======================================================================
 
 (use regex)
 (declare (unit process))
-;;(declare (uses common))
 
 (define (process:conservative-read port)
   (let loop ((res ""))
     (if (not (eof-object? (peek-char port)))
 	(loop (conc res (read-char port)))
@@ -53,11 +52,11 @@
   (handle-exceptions
    exn
    (begin
      (print "ERROR:  Failed to run command: " cmd " " (string-intersperse params " "))
      (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
-     (print "exn=" (condition->list exn))
+     (debug:print 5 *default-log-port* "exn=" (condition->list exn))
      #f)
    (let-values (((fh fho pid) (if (null? params)
 				  (process cmd)
 				  (process cmd params))))
        (let loop ((curr (read-line fh))
@@ -143,11 +142,11 @@
 
 (define (process:alive? pid)
   (handle-exceptions
    exn
    ;; possibly pid is a process not a child, look in /proc to see if it is running still
-   (file-exists? (conc "/proc/" pid))
+   (common:file-exists? (conc "/proc/" pid))
    (let-values (((rpid exit-type exit-signal)(process-wait pid #t)))
        (and (number? rpid)
 	    (equal? rpid pid)))))
 
 (define (process:alive-on-host? host pid)

Index: rmt.scm
==================================================================
--- rmt.scm
+++ rmt.scm
@@ -11,13 +11,11 @@
 
 (use format typed-records) ;; RADT => purpose of json format??
 
 (declare (unit rmt))
 (declare (uses api))
-(declare (uses tdb))
 (declare (uses http-transport))
-;;(declare (uses nmsg-transport))
 (include "common_records.scm")
 
 ;;
 ;; THESE ARE ALL CALLED ON THE CLIENT SIDE!!!
 ;;
@@ -48,13 +46,19 @@
 
 ;; RA => e.g. usage (rmt:send-receive 'get-var #f (list varname))
 ;;
 (define (rmt:send-receive cmd rid params #!key (attemptnum 1)(area-dat #f)) ;; start attemptnum at 1 so the modulo below works as expected
 
+  ;;DOT digraph megatest_state_status {
+  ;;DOT   ranksep=0;
+  ;;DOT   // rankdir=LR;
+  ;;DOT   node [shape="box"];
+  ;;DOT "rmt:send-receive" -> MUTEXLOCK;
+  ;;DOT { edge [style=invis];"case 1" -> "case 2" -> "case 3" -> "case 4" -> "case 5" -> "case 6" -> "case 7" -> "case 8" -> "case 9" -> "case 10" -> "case 11"; }
   ;; do all the prep locked under the rmt-mutex
   (mutex-lock! *rmt-mutex*)
-
+  
   ;; 1. check if server is started IFF cmd is a write OR if we are not on the homehost, store in runremote
   ;; 2. check the age of the connections. refresh the connection if it is older than timeout-20 seconds.
   ;; 3. do the query, if on homehost use local access
   ;;
   (let* ((start-time    (current-seconds)) ;; snapshot time so all use cases get same value
@@ -71,114 +75,160 @@
 				    (remote-ro-mode-set! runremote ro-mode)
 				    (remote-ro-mode-checked-set! runremote #t)
 				    ro-mode)
 				  ro-mode)))))
 
-     ;; ensure we have a record for our connection for given area
-     (if (not runremote)                   ;; can remove this one. should never get here.         
-	 (begin
-	   (set! *runremote* (make-remote))
-	   (set! runremote   *runremote*))) ;; new runremote will come from this on next iteration
-     ;; ensure we have a homehost record
-     (if (not (pair? (remote-hh-dat runremote)))  ;; not on homehost
-	 (thread-sleep! 0.1) ;; since we shouldn't get here, delay a little
-	 (remote-hh-dat-set! runremote (common:get-homehost)))
-
-     ;;(print "BB> readonly-mode is "readonly-mode" dbfile is "dbfile)
+    ;; DOT INIT_RUNREMOTE; // leaving off - doesn't really add to the clarity
+    ;; DOT MUTEXLOCK -> INIT_RUNREMOTE [label="no remote?"];
+    ;; DOT INIT_RUNREMOTE -> MUTEXLOCK;
+    ;; ensure we have a record for our connection for given area
+    (if (not runremote)                   ;; can remove this one. should never get here.         
+	(begin
+	  (set! *runremote* (make-remote))
+	  (set! runremote   *runremote*))) ;; new runremote will come from this on next iteration
+    
+    ;; DOT SET_HOMEHOST; // leaving off - doesn't really add to the clarity
+    ;; DOT MUTEXLOCK -> SET_HOMEHOST [label="no homehost?"];
+    ;; DOT SET_HOMEHOST -> MUTEXLOCK;
+    ;; ensure we have a homehost record
+    (if (not (pair? (remote-hh-dat runremote)))  ;; not on homehost
+	(thread-sleep! 0.1) ;; since we shouldn't get here, delay a little
+	(remote-hh-dat-set! runremote (common:get-homehost)))
+    
+    ;;(print "BB> readonly-mode is "readonly-mode" dbfile is "dbfile)
     (cond
+     ;;DOT EXIT;
+     ;;DOT MUTEXLOCK -> EXIT [label="> 15 attempts"]; {rank=same "case 1" "EXIT" }
      ;; give up if more than 15 attempts
      ((> attemptnum 15)
       (debug:print 0 *default-log-port* "ERROR: 15 tries to start/connect to server. Giving up.")
       (exit 1))
 
-     ;; readonly mode, read request-  handle it - case 20
+     ;;DOT CASE2 [label="local\nreadonly\nquery"];
+     ;;DOT MUTEXLOCK -> CASE2; {rank=same "case 2" CASE2}
+     ;;DOT CASE2 -> "rmt:open-qry-close-locally";
+     ;; readonly mode, read request-  handle it - case 2
      ((and readonly-mode
            (member cmd api:read-only-queries)) 
       (mutex-unlock! *rmt-mutex*)
-      (debug:print-info 12 *default-log-port* "rmt:send-receive, case 20")
+      (debug:print-info 12 *default-log-port* "rmt:send-receive, case 2")
       (rmt:open-qry-close-locally cmd 0 params)
       )
 
+     ;;DOT CASE3 [label="write in\nread-only mode"];
+     ;;DOT MUTEXLOCK -> CASE3 [label="readonly\nmode?"]; {rank=same "case 3" CASE3}
+     ;;DOT CASE3 -> "#f";
      ;; readonly mode, write request.  Do nothing, return #f
      (readonly-mode
       (mutex-unlock! *rmt-mutex*)
-      (debug:print-info 12 *default-log-port* "rmt:send-receive, case 21")
+      (debug:print-info 12 *default-log-port* "rmt:send-receive, case 3")
       (debug:print 0 *default-log-port* "WARNING: write transaction requested on a readonly area.  cmd="cmd" params="params)
-      #f
-      )
-
-     ;; reset the connection if it has been unused too long
-     ((and runremote
-           (remote-conndat runremote)
-	   (let ((expire-time (+ (- start-time (remote-server-timeout runremote))(random 10)))) ;; Subtract or add the random value? Seems like it should be substract but Neither fixes the "WARNING: failure in with-input-from-request to #<request>.\n message: Server closed connection before sending response"
-	     (< (http-transport:server-dat-get-last-access (remote-conndat runremote)) expire-time)))
-      (debug:print-info 0 *default-log-port* "Connection to " (remote-server-url runremote) " expired due to no accesses, forcing new connection.")
-      (remote-conndat-set! runremote #f) ;; invalidate the connection, thus forcing a new connection.
-      (mutex-unlock! *rmt-mutex*)
-      (rmt:send-receive cmd rid params attemptnum: attemptnum))
+      #f)
+
+     ;; This block was for pre-emptively resetting the connection if there had been no communication for some time.
+     ;; I don't think it adds any value. If the server is not there, just fail and start a new connection.
+     ;; also, the expire-time calculation might not be correct. We want, time-since-last-server-access > (server:get-timeout)
+     ;;
+     ;; ;;DOT CASE4 [label="reset\nconnection"];
+     ;; ;;DOT MUTEXLOCK -> CASE4 [label="have connection,\nlast_access > expire_time"]; {rank=same "case 4" CASE4}
+     ;; ;;DOT CASE4 -> "rmt:send-receive";
+     ;; ;; reset the connection if it has been unused too long
+     ;; ((and runremote
+     ;;       (remote-conndat runremote)
+     ;; 	   (let ((expire-time (+ (- start-time (remote-server-timeout runremote))(random 10)))) ;; Subtract or add the random value? Seems like it should be substract but Neither fixes the "WARNING: failure in with-input-from-request to #<request>.\n message: Server closed connection before sending response"
+     ;; 	     (< (http-transport:server-dat-get-last-access (remote-conndat runremote)) expire-time)))
+     ;;  (debug:print-info 0 *default-log-port* "Connection to " (remote-server-url runremote) " expired due to no accesses, forcing new connection.")
+     ;;  (http-transport:close-connections area-dat: runremote)
+     ;;  (remote-conndat-set! runremote #f) ;; invalidate the connection, thus forcing a new connection.
+     ;;  (mutex-unlock! *rmt-mutex*)
+     ;;  (rmt:send-receive cmd rid params attemptnum: attemptnum))
+
+     ;;DOT CASE5 [label="local\nread"];
+     ;;DOT MUTEXLOCK -> CASE5 [label="server not required,\non homehost,\nread-only query"]; {rank=same "case 5" CASE5};
+     ;;DOT CASE5 -> "rmt:open-qry-close-locally";
      ;; on homehost and this is a read
-     ((and (not (remote-force-server runremote))      ;; honor forced use of server
-	   (cdr (remote-hh-dat runremote))     ;; on homehost
-           (member cmd api:read-only-queries)) ;; this is a read
+     ((and (not (remote-force-server runremote)) ;; honor forced use of server, i.e. server NOT required
+	   (cdr (remote-hh-dat runremote))       ;; on homehost
+           (member cmd api:read-only-queries))   ;; this is a read
       (mutex-unlock! *rmt-mutex*)
-      (debug:print-info 12 *default-log-port* "rmt:send-receive, case  3")
+      (debug:print-info 12 *default-log-port* "rmt:send-receive, case  5")
       (rmt:open-qry-close-locally cmd 0 params))
 
+     ;;DOT CASE6 [label="init\nremote"];
+     ;;DOT MUTEXLOCK -> CASE6 [label="on homehost,\nwrite query,\nhave server,\ncan't reach it"]; {rank=same "case 6" CASE6};
+     ;;DOT CASE6 -> "rmt:send-receive";
      ;; on homehost and this is a write, we already have a server, but server has died
      ((and (cdr (remote-hh-dat runremote))           ;; on homehost
            (not (member cmd api:read-only-queries))  ;; this is a write
            (remote-server-url runremote)             ;; have a server
            (not (server:ping (remote-server-url runremote))))  ;; server has died. NOTE: this is not a cheap call! Need better approach.
       (set! *runremote* (make-remote))
+      (remote-force-server-set! runremote (common:force-server?))
       (mutex-unlock! *rmt-mutex*)
-      (debug:print-info 12 *default-log-port* "rmt:send-receive, case  4.1")
+      (debug:print-info 12 *default-log-port* "rmt:send-receive, case  6")
       (rmt:send-receive cmd rid params attemptnum: attemptnum))
 
+     ;;DOT CASE7 [label="homehost\nwrite"];
+     ;;DOT MUTEXLOCK -> CASE7 [label="server not required,\non homehost,\na write,\nhave a server"]; {rank=same "case 7" CASE7};
+     ;;DOT CASE7 -> "rmt:open-qry-close-locally";
      ;; on homehost and this is a write, we already have a server
-     ((and (not (remote-force-server runremote))     ;; honor forced use of server
+     ((and (not (remote-force-server runremote))     ;; honor forced use of server, i.e. server NOT required
 	   (cdr (remote-hh-dat runremote))           ;; on homehost
            (not (member cmd api:read-only-queries))  ;; this is a write
            (remote-server-url runremote))            ;; have a server
       (mutex-unlock! *rmt-mutex*)
-      (debug:print-info 12 *default-log-port* "rmt:send-receive, case  4")
+      (debug:print-info 12 *default-log-port* "rmt:send-receive, case  4.1")
       (rmt:open-qry-close-locally cmd 0 params))
 
+     ;;DOT CASE8 [label="force\nserver"];
+     ;;DOT MUTEXLOCK -> CASE8 [label="server not required,\nhave homehost info,\nno connection yet,\nnot a read-only query"]; {rank=same "case 8" CASE8};
+     ;;DOT CASE8 -> "rmt:open-qry-close-locally";
      ;;  on homehost, no server contact made and this is a write, passively start a server 
-     ((and (not (remote-force-server runremote)) ;; honor forced use of server
-	   (cdr (remote-hh-dat runremote))       ;; new
-           (not (remote-server-url runremote))
-	   (not (member cmd api:read-only-queries)))
-      (debug:print-info 12 *default-log-port* "rmt:send-receive, case  5")
+     ((and (not (remote-force-server runremote))     ;; honor forced use of server, i.e. server NOT required
+	   (cdr (remote-hh-dat runremote))           ;; have homehost
+           (not (remote-server-url runremote))       ;; no connection yet
+	   (not (member cmd api:read-only-queries))) ;; not a read-only query
+      (debug:print-info 12 *default-log-port* "rmt:send-receive, case  8")
       (let ((server-url  (server:check-if-running *toppath*))) ;; (server:read-dotserver->url *toppath*))) ;; (server:check-if-running *toppath*))) ;; Do NOT want to run server:check-if-running - very expensive to do for every write call
 	(if server-url
 	    (remote-server-url-set! runremote server-url) ;; the string can be consumed by the client setup if needed
 	    (if (common:force-server?)
 		(server:start-and-wait *toppath*)
 		(server:kind-run *toppath*))))
       (remote-force-server-set! runremote (common:force-server?))
       (mutex-unlock! *rmt-mutex*)
-      (debug:print-info 12 *default-log-port* "rmt:send-receive, case  5.1")
+      (debug:print-info 12 *default-log-port* "rmt:send-receive, case  8.1")
       (rmt:open-qry-close-locally cmd 0 params))
 
+     ;;DOT CASE9 [label="force server\nnot on homehost"];
+     ;;DOT MUTEXLOCK -> CASE9 [label="no connection\nand either require server\nor not on homehost"]; {rank=same "case 9" CASE9};
+     ;;DOT CASE9 -> "start\nserver" -> "rmt:send-receive";
      ((or (and (remote-force-server runremote)              ;; we are forcing a server and don't yet have a connection to one
 	       (not (remote-conndat runremote)))
 	  (and (not (cdr (remote-hh-dat runremote)))        ;; not on a homehost 
 	       (not (remote-conndat runremote))))           ;; and no connection
-      (debug:print-info 12 *default-log-port* "rmt:send-receive, case  6  hh-dat: " (remote-hh-dat runremote) " conndat: " (remote-conndat runremote))
+      (debug:print-info 12 *default-log-port* "rmt:send-receive, case 9, hh-dat: " (remote-hh-dat runremote) " conndat: " (remote-conndat runremote))
       (mutex-unlock! *rmt-mutex*)
       (if (not (server:check-if-running *toppath*)) ;; who knows, maybe one has started up?
 	  (server:start-and-wait *toppath*))
-      (remote-force-server-set! runremote (common:force-server?))
       (remote-conndat-set! runremote (rmt:get-connection-info *toppath*)) ;; calls client:setup which calls client:setup-http
       (rmt:send-receive cmd rid params attemptnum: attemptnum)) ;; TODO: add back-off timeout as
+
+     ;;DOT CASE10 [label="on homehost"];
+     ;;DOT MUTEXLOCK -> CASE10 [label="server not required,\non homehost"]; {rank=same "case 10" CASE10};
+     ;;DOT CASE10 -> "rmt:open-qry-close-locally";
      ;; all set up if get this far, dispatch the query
      ((and (not (remote-force-server runremote))
 	   (cdr (remote-hh-dat runremote))) ;; we are on homehost
       (mutex-unlock! *rmt-mutex*)
-      (debug:print-info 12 *default-log-port* "rmt:send-receive, case  7")
+      (debug:print-info 12 *default-log-port* "rmt:send-receive, case 10")
       (rmt:open-qry-close-locally cmd (if rid rid 0) params))
 
+     ;;DOT CASE11 [label="send_receive"];
+     ;;DOT MUTEXLOCK -> CASE11 [label="else"]; {rank=same "case 11" CASE11};
+     ;;DOT CASE11 -> "rmt:send-receive" [label="call failed"];
+     ;;DOT CASE11 -> "RESULT" [label="call succeeded"];
      ;; not on homehost, do server query
      (else
       (mutex-unlock! *rmt-mutex*)
       (debug:print-info 12 *default-log-port* "rmt:send-receive, case  9")
       (mutex-lock! *rmt-mutex*)
@@ -191,11 +241,15 @@
 			 (else
 			  (debug:print 0 *default-log-port* "ERROR: transport " (remote-transport runremote) " not supported")
 			  (exit))))
 	     (success  (if (vector? dat) (vector-ref dat 0) #f))
 	     (res      (if (vector? dat) (vector-ref dat 1) #f)))
-	(if (vector? conninfo)(http-transport:server-dat-update-last-access conninfo)) ;; refresh access time
+	(if (and (vector? conninfo) (> 5 (vector-length conninfo)))
+            (http-transport:server-dat-update-last-access conninfo) ;; refresh access time
+            (begin
+              (set! conninfo #f)
+              (remote-conndat-set! runremote #f))) 
 	;; (mutex-unlock! *rmt-mutex*)
         (debug:print-info 13 *default-log-port* "rmt:send-receive, case  9. conninfo=" conninfo " dat=" dat " runremote = " runremote)
 	(mutex-unlock! *rmt-mutex*)
 	(if success ;; success only tells us that the transport was successful, have to examine the data to see if there was a detected issue at the other end
 	    (if (and (vector? res)
@@ -202,24 +256,28 @@
 		     (eq? (vector-length res) 2)
 		     (eq? (vector-ref res 1) 'overloaded)) ;; since we are looking at the data to carry the error we'll use a fairly obtuse combo to minimise the chances of some sort of collision.
 		(let ((wait-delay (+ attemptnum (* attemptnum 10))))
 		  (debug:print 0 *default-log-port* "WARNING: server is overloaded. Delaying " wait-delay " seconds and trying call again.")
 		  (mutex-lock! *rmt-mutex*)
+		  (http-transport:close-connections area-dat: runremote)
 		  (set! *runremote* #f) ;; force starting over
 		  (mutex-unlock! *rmt-mutex*)
 		  (thread-sleep! wait-delay)
 		  (rmt:send-receive cmd rid params attemptnum: (+ attemptnum 1)))
 		res) ;; All good, return res
 	    (begin
 	      (debug:print 0 *default-log-port* "WARNING: communication failed. Trying again, try num: " attemptnum)
 	      (remote-conndat-set!    runremote #f)
+	      (http-transport:close-connections area-dat: runremote)
 	      (remote-server-url-set! runremote #f)
 	      (debug:print-info 12 *default-log-port* "rmt:send-receive, case  9.1")
-	      (if (not (server:check-if-running *toppath*))
-		  (server:start-and-wait *toppath*))
+	      ;; (if (not (server:check-if-running *toppath*))
+	      ;; 	  (server:start-and-wait *toppath*))
 	      (rmt:send-receive cmd rid params attemptnum: (+ attemptnum 1)))))))))
 
+    ;;DOT }
+    
 ;; (define (rmt:update-db-stats run-id rawcmd params duration)
 ;;   (mutex-lock! *db-stats-mutex*)
 ;;   (handle-exceptions
 ;;    exn
 ;;    (begin
@@ -275,11 +333,11 @@
     res))
 
 (define (rmt:open-qry-close-locally cmd run-id params #!key (remretries 5))
   (let* ((qry-is-write   (not (member cmd api:read-only-queries)))
 	 (db-file-path   (db:dbfile-path)) ;;  0))
-	 (dbstruct-local (db:setup))  ;; make-dbr:dbstruct path:  dbdir local: #t)))
+	 (dbstruct-local (db:setup #t))  ;; make-dbr:dbstruct path:  dbdir local: #t)))
 	 (read-only      (not (file-write-access? db-file-path)))
 	 (start          (current-milliseconds))
 	 (resdat         (if (not (and read-only qry-is-write))
 			     (let ((v (api:execute-requests dbstruct-local (vector (symbol->string cmd) params))))
 			       (handle-exceptions ;; there has been a long history of receiving strange errors from values returned by the client when things go wrong..
@@ -474,16 +532,16 @@
 
 (define (rmt:set-tests-state-status run-id                      testnames currstate currstatus newstate newstatus)
   (rmt:send-receive 'set-tests-state-status run-id (list run-id testnames currstate currstatus newstate newstatus)))
 
 (define (rmt:get-tests-for-run run-id testpatt states statuses offset limit not-in sort-by sort-order qryvals last-update mode)
-  (if (number? run-id)
-      (rmt:send-receive 'get-tests-for-run run-id (list run-id testpatt states statuses offset limit not-in sort-by sort-order qryvals last-update mode))
-      (begin
-	(debug:print-error 0 *default-log-port* "rmt:get-tests-for-run called with bad run-id=" run-id)
-	(print-call-chain (current-error-port))
-	'())))
+  ;; (if (number? run-id)
+  (rmt:send-receive 'get-tests-for-run run-id (list run-id testpatt states statuses offset limit not-in sort-by sort-order qryvals last-update mode)))
+  ;;    (begin
+  ;;	(debug:print-error 0 *default-log-port* "rmt:get-tests-for-run called with bad run-id=" run-id)
+  ;;	(print-call-chain (current-error-port))
+  ;;	'())))
 
 ;; get stuff via synchash 
 (define (rmt:synchash-get run-id proc synckey keynum params)
   (rmt:send-receive 'synchash-get run-id (list run-id proc synckey keynum params)))
 
@@ -755,14 +813,13 @@
 ;;  T E S T   D A T A 
 ;;======================================================================
 
 (define (rmt:read-test-data run-id test-id categorypatt #!key (work-area #f)) 
   (rmt:send-receive 'read-test-data run-id (list run-id test-id categorypatt)))
-;;   (let ((tdb  (rmt:open-test-db-by-test-id run-id test-id work-area: work-area)))
-;;     (if tdb
-;; 	(tdb:read-test-data tdb test-id categorypatt)
-;; 	'())))
+(define (rmt:read-test-data* run-id test-id categorypatt varpatt #!key (work-area #f)) 
+  (rmt:send-receive 'read-test-data* run-id (list run-id test-id categorypatt varpatt)))
+
 
 (define (rmt:testmeta-add-record testname)
   (rmt:send-receive 'testmeta-add-record #f (list testname)))
 
 (define (rmt:testmeta-get-record testname)
@@ -790,10 +847,23 @@
 (define (rmt:tasks-set-state-given-param-key param-key new-state)
   (rmt:send-receive 'tasks-set-state-given-param-key #f (list  param-key new-state)))
 
 (define (rmt:tasks-get-last target runname)
   (rmt:send-receive 'tasks-get-last #f (list target runname)))
+
+;;======================================================================
+;; N O   S Y N C   D B 
+;;======================================================================
+
+(define (rmt:no-sync-set var val)
+  (rmt:send-receive 'no-sync-set #f `(,var ,val)))
+
+(define (rmt:no-sync-get/default var default)
+  (rmt:send-receive 'no-sync-get/default #f `(,var ,default)))
+
+(define (rmt:no-sync-del! var)
+  (rmt:send-receive 'no-sync-del! #f `(,var)))
 
 ;;======================================================================
 ;; A R C H I V E S
 ;;======================================================================
 

Index: rpctest/rpctest-continuous-client.scm
==================================================================
--- rpctest/rpctest-continuous-client.scm
+++ rpctest/rpctest-continuous-client.scm
@@ -17,11 +17,11 @@
 (print "Operation: " operation ", param: " param)
 
 ;; have a pool of db's to pick from
 (define *dbpool* '())
 (define *pool-mutex* (make-mutex))
-1
+
 (define (get-db)
   (mutex-lock! *pool-mutex*)
   (if (null? *dbpool*)
       (begin
 	(mutex-unlock! *pool-mutex*)

Index: runconfig.scm
==================================================================
--- runconfig.scm
+++ runconfig.scm
@@ -73,11 +73,11 @@
   (let ((runconfigf (conc  *toppath* "/runconfigs.config"))
 	(targ       (or (common:args-get-target)
 			targ-from-db
 			(get-environment-variable "MT_TARGET"))))
     (pop-directory)
-    (if (file-exists? runconfigf)
+    (if (common:file-exists? runconfigf)
 	(setup-env-defaults runconfigf run-id #t keyvals
 			    environ-patt: (conc "(default"
 						(if targ
 						    (conc "|" targ ")")
 						    ")")))

Index: runconfigs.config
==================================================================
--- runconfigs.config
+++ runconfigs.config
@@ -7,13 +7,13 @@
 #
 
 # example of a cron entry to run sync using db spec pgdb, with pgdb setting in file local.config
 #
 [a/b/c]
-all:scheduled:sync          cron=  0/5 * * * *;dbdest=pgdb;appendconf=/mfs/matt/.sysmaint/local.config
-quick:scheduled:sync        cron=  0/5 * * * *;dbdest=pgdb;appendconf=/mfs/matt/.sysmaint/local.config
-fast:scheduled:sync-prepend cron=  0/1 * * * *;dbdest=pgdb;appendconf=/mfs/matt/.sysmaint/local.config
+all:scheduled:sync     cron=  0/5 * * * *;dbdest=pgdb;appendconf=/nfs/phoebe/disk1/home/mfs/matt/.sysmaint/local.config
+# quick:scheduled:sync   cron=  0/5 * * * *;dbdest=pgdb;appendconf=/nfs/phoebe/disk1/home/mfs/matt/.sysmaint/local.config
+# fast:scheduled:sync-prepend cron=  0/1 * * * *;dbdest=pgdb;appendconf=/mfs/matt/.sysmaint/local.config
 
 [scriptinc ./gentargets.sh #{getenv USER}]
 # [v1.23/45/67]
 
 # tip will be replaced with hashkey?
@@ -25,24 +25,25 @@
 # [v1.63/tip/dev]
 # file:   files changes since last run trigger new run
 # script: script is called with unix seconds as last parameter (other parameters are preserved)
 #
 # contour:sensetype:action params            data
-quick:file:run             runtrans=auto;         glob=/home/matt/data/megatest/*.scm
-snazy:file:run             runtrans=corporate-ww; glob=/home/matt/data/megatest/*.scm
-short:file:run             runtrans=short;        glob=/home/matt/data/megatest/*.scm
+# commented out for debug
+quick:file:run             runtrans=auto;         glob=/home/matt/data/megatest/*.scm foo.touchme
+# snazy:file:run             runtrans=corporate-ww; glob=/home/matt/data/megatest/*.scm
+# short:file:run       runtrans=short;        glob=/home/matt/data/megatest/*.scm
 
 # script returns change-time (unix epoch), new-target-name, run-name
 #
 # quick:script:run           checkfossil = http://www.kiatoa.com/fossils/megatest v1.63;\
 #                            checkfossil = http://www.kiatoa.com/fossils/megatest_qa trunk
 
-# fossil based trigger
-#
-quick:fossil:run           http://www.kiatoa.com/fossils/megatest=v1.63;\
-                           http://www.kiatoa.com/fossils/megatest_qa=trunk;\
-		           http://www.kiatoa.com/fossils/megatest=v1.64
+# # fossil based trigger
+# #
+# quick:fossil:run           http://www.kiatoa.com/fossils/megatest=v1.63;\
+#                            http://www.kiatoa.com/fossils/megatest_qa=trunk;\
+# 		           http://www.kiatoa.com/fossils/megatest=v1.64
 
 # field          allowed values
 # -----          --------------
 # minute         0-59
 # hour           0-23

Index: runs.scm
==================================================================
--- runs.scm
+++ runs.scm
@@ -8,13 +8,12 @@
 ;;  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
 ;;  PURPOSE.
 
 ;;  strftime('%m/%d/%Y %H:%M:%S','now','localtime')
 
-(use sqlite3 srfi-1 posix regex regex-case srfi-69 dot-locking (srfi 18) 
+(use (prefix sqlite3 sqlite3:) srfi-1 posix regex regex-case srfi-69 (srfi 18) 
      posix-extras directory-utils pathname-expand typed-records format)
-(import (prefix sqlite3 sqlite3:))
 
 (declare (unit runs))
 (declare (uses db))
 (declare (uses common))
 (declare (uses items))
@@ -21,11 +20,10 @@
 (declare (uses runconfig))
 (declare (uses tests))
 (declare (uses server))
 (declare (uses mt))
 (declare (uses archive))
-(declare (uses keys))
 ;; (declare (uses filedb))
 
 (include "common_records.scm")
 (include "key_records.scm")
 (include "db_records.scm")
@@ -96,18 +94,28 @@
 		(msg        ((condition-property-accessor 'exn 'message) exn)))
 	    (if (< count 5)
 		(begin ;; this call is colliding, do some crude stuff to fix it.
 		  (debug:print 0 *default-log-port* "ERROR: *configdat* was inaccessible! This should never happen. Retry #" count)
 		  (launch:setup force-reread: #t)
-		  (fatal-loop (+ count 1)))
+		  (fatal-loop (+ count 1))) 
 		(begin
 		  (debug:print 0 *default-log-port* "FATAL: *configdat* was inaccessible! This should never happen. Retried " count " times. Message: " msg)
 		  (debug:print 0 *default-log-port* "Call chain:")
 		  (with-output-to-port *default-log-port*
-		    (lambda ()(pp call-chain)))
+
+                    (lambda ()
+                      (print "*configdat* is >>"*configdat*"<<")
+                      (pp *configdat*)
+                      (pp call-chain)))
+                  
 		  (exit 1))))
           ;;(bb-check-path msg: "runs:set-megatest-env-vars block 1.5")
+          (when (or (not *configdat*) (not (hash-table? *configdat*)))
+              (debug:print 0 *default-log-port* "WARNING: *configdat* was inaccessible! This should never happen.  Brute force reread.")
+              ;;(BB> "ERROR: *configdat* was inaccessible! This should never happen.  Brute force reread.")
+              (thread-sleep! 2) ;; assuming nfs lag.
+              (launch:setup force-reread: #t))
           (alist->env-vars (hash-table-ref/default *configdat* "env-override" '())))) ;;;; environment is tainted HERE in this let block.
     ;;(bb-check-path msg: "runs:set-megatest-env-vars block 2")
     ;; Lets use this as an opportunity to put MT_RUNNAME in the environment
     (let ((runname  (if inrunname inrunname (rmt:get-run-name-from-id run-id))))
       (if runname
@@ -218,11 +226,49 @@
 						   " in jobgroup \"" jobgroup "\" exceeds limit of " job-group-limit))
 				  #t)
 				 (else #f))))
 	  (list (not can-not-run-more) num-running num-running-in-jobgroup max-concurrent-jobs job-group-limit)))))
 
-
+(define (runs:run-pre-hook run-id)
+    (let* ((run-pre-hook   (configf:lookup *configdat* "runs" "pre-hook"))
+           (existing-tests (if run-pre-hook
+                               (rmt:get-tests-for-run run-id "%" '() '() ;; run-id testpatt states statuses
+                                                      #f #f ;; offset limit
+                                                      #f ;; not-in
+                                                      #f ;; sort-by
+                                                      #f ;; sort-order
+                                                      #f ;; get full data (not 'shortlist)
+                                                      0 ;; (runs:gendat-inc-results-last-update *runs:general-data*) ;; last update time
+                                                      'dashboard)
+                               '()))
+           (log-dir         (conc *toppath* "/logs"))
+           (log-file        (conc "pre-hook-" (string-translate (getenv "MT_TARGET") "/" "-") "-" (getenv "MT_RUNNAME") ".log"))
+           (full-log-fname  (conc log-dir "/" log-file)))
+      (if run-pre-hook
+          (if (null? existing-tests)
+              (let* ((use-log-dir (if (not (directory-exists? log-dir))
+                                      (handle-exceptions
+                                       exn
+                                       (begin
+                                         (debug:print 0 *default-log-port* "WARNING: Failed to create " log-dir)
+                                         #f)
+                                       (create-directory log-dir #t)
+                                       #t)
+                                      #t))
+                     (start-time   (current-seconds))
+                     (actual-logf  (if use-log-dir full-log-fname log-file)))
+                (handle-exceptions
+                 exn
+                 (begin
+                   (print-call-chain *default-log-port*)
+                   (debug:print 0 *default-log-port* "Message: " ((condition-property-accessor 'exn 'message) exn))
+                   (debug:print 0 *default-log-port* "ERROR: failed to run pre-hook " run-pre-hook ", check the log " log-file))
+                 (debug:print-info 0 *default-log-port* "running run-pre-hook: \"" run-pre-hook "\", log is " actual-logf)
+                 (system (conc run-pre-hook " >> " actual-logf " 2>&1"))
+                 (debug:print-info 0 *default-log-port* "pre-hook \"" run-pre-hook "\" took " (- (current-seconds) start-time) " seconds to run.")))
+              (debug:print 0 *default-log-port* "Skipping pre-hook call \"" run-pre-hook "\" as there are existing tests for this run.")))))
+    
 ;;  test-names: Comma separated patterns same as test-patts but used in selection 
 ;;              of tests to run. The item portions are not respected.
 ;;              FIXME: error out if /patt specified
 ;;            
 (define (runs:run-tests target runname test-patts user flags #!key (run-count 1)) ;; test-names
@@ -284,18 +330,24 @@
     ;; force the starting of a server
     (debug:print 0 *default-log-port* "waiting on server...")
     (server:start-and-wait *toppath*)
     
     (runs:set-megatest-env-vars run-id inkeys: keys inrunname: runname) ;; these may be needed by the launching process
-    (set! runconf (if (file-exists? runconfigf)
+    (set! runconf (if (common:file-exists? runconfigf)
 		      (setup-env-defaults runconfigf run-id *already-seen-runconfig-info* keyvals target)
 		      (begin
 			(debug:print 0 *default-log-port* "WARNING: You do not have a run config file: " runconfigf)
 			#f)))
 
     (if (not test-patts) ;; first time in - adjust testpatt
 	(set! test-patts (common:args-get-testpatt runconf)))
+    ;; if test-patts is #f at this point there is something wrong and we need to bail out
+    (if (not test-patts)
+	(begin
+	  (debug:print 0 *default-log-port* "WARNING: there is no test pattern for this run. Exiting now.")
+	  (exit 0)))
+    
     (if (args:get-arg "-tagexpr")
 	(begin
 	  (set! allowed-tests (string-join (runs:get-tests-matching-tags (args:get-arg "-tagexpr")) ","))
 	  	  (debug:print-info 0 *default-log-port* "filtering initial test list with tagexpr: " (args:get-arg "-tagexpr") " => " allowed-tests)
 		  ));; tests will be ANDed with this list
@@ -361,10 +413,14 @@
 	   (string-split (or (configf:lookup *configdat* "setup" "allow-auto-rerun") "")))))
 
     ;; Ensure all tests are registered in the test_meta table
     (runs:update-all-test_meta #f)
 
+    ;; run the run prehook if there are no tests yet run for this run:
+    ;;
+    (runs:run-pre-hook run-id)
+    
     ;; now add non-directly referenced dependencies (i.e. waiton)
     ;;======================================================================
     ;; refactoring this block into tests:get-full-data
     ;;
     ;; What happended, this code is now duplicated in tests!?
@@ -457,11 +513,17 @@
     (let ((reglen (configf:lookup *configdat* "setup" "runqueue")))
       (if (> (length (hash-table-keys test-records)) 0)
 	  (let* ((keep-going        #t)
 		 (run-queue-retries 5)
 		 (th1        (make-thread (lambda ()
-					    (runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests (any->number reglen) all-tests-registry))
+					    (handle-exceptions
+						exn
+						(begin
+						  (print-call-chain)
+						  (print " message: " ((condition-property-accessor 'exn 'message) exn)))
+					      (runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests
+								    (any->number reglen) all-tests-registry)))
 					    ;; (handle-exceptions
 					    ;;  exn
 					    ;;  (begin
 					    ;;    (print-call-chain (current-error-port))
 					    ;;    (debug:print-error 0 *default-log-port* "failure in runs:run-tests-queue thread, error: " ((condition-property-accessor 'exn 'message) exn))
@@ -778,12 +840,13 @@
 					    (not (equal? x hed)))
 					  (runs:calc-not-completed prereqs-not-met)))
 	 (loop-list               (list hed tal reg reruns))
 	 ;; configure the load runner
 	 (numcpus                 (common:get-num-cpus #f))
-	 (maxload                 (string->number (or (configf:lookup *configdat* "jobtools" "maxload") "3")))
-	 (waitdelay               (string->number (or (configf:lookup *configdat* "jobtools" "waitdelay") "60"))))
+	 (maxload                 (string->number (or (configf:lookup *configdat* "jobtools" "maxload") "3.0")))         ;; use a non-number string to disable
+         (maxhomehostload         (string->number (or (configf:lookup *configdat* "jobtools" "maxhomehostload") "1.2"))) ;; use a non-number string to disable
+         (waitdelay               (string->number (or (configf:lookup *configdat* "jobtools" "waitdelay") "60"))))
     (debug:print-info 4 *default-log-port* "have-resources: " have-resources " prereqs-not-met: (" 
 		      (string-intersperse 
 		       (map (lambda (t)
 			      (if (vector? t)
 				  (conc (db:test-get-state t) "/" (db:test-get-status t))
@@ -883,12 +946,15 @@
       ;; we are going to reset all the counters for test retries by setting a new hash table
       ;; this means they will increment only when nothing can be run
       (set! *max-tries-hash* (make-hash-table))
       ;; well, first lets see if cpu load throttling is enabled. If so wait around until the
       ;; average cpu load is under the threshold before continuing
-      (if (configf:lookup *configdat* "jobtools" "maxload") ;; only gate if maxload is specified
-	  (common:wait-for-cpuload maxload numcpus waitdelay))
+      (if maxload ;; only gate if maxload is specified
+          (common:wait-for-cpuload maxload numcpus waitdelay))
+      (if maxhomehostload
+          (common:wait-for-homehost-load maxhomehostload (conc "Waiting for homehost load to drop below normalized value of " maxhomehostload)))
+      
       (run:test run-id run-info keyvals runname test-record flags #f test-registry all-tests-registry)
       (runs:incremental-print-results run-id)
       (hash-table-set! test-registry (db:test-make-full-name test-name item-path) 'running)
       (runs:shrink-can-run-more-tests-count runsdat)  ;; DELAY TWEAKER (still needed?)
       ;; (thread-sleep! *global-delta*)
@@ -1105,15 +1171,12 @@
 	(sorted-test-names     (tests:sort-by-priority-and-waiton test-records))
 	(test-registry         (make-hash-table))
 	(registry-mutex        (make-mutex))
 	(num-retries           0)
 	(max-retries           (config-lookup *configdat* "setup" "maxretries"))
-	(max-concurrent-jobs   (let ((mcj (config-lookup *configdat* "setup"     "max_concurrent_jobs")))
-				 (if (and mcj (string->number mcj))
-				     (string->number mcj)
-				     1))) ;; length of the register queue ahead
-	(reglen                (if (number? reglen-in) reglen-in 1))
+	(max-concurrent-jobs   (configf:lookup-number *configdat* "setup" "max_concurrent_jobs" default: 50))
+        (reglen                (if (number? reglen-in) reglen-in 1))
 	(last-time-incomplete  (- (current-seconds) 900)) ;; force at least one clean up cycle
 	(last-time-some-running (current-seconds))
 	;; (tdbdat                (tasks:open-db))
 	(runsdat (make-runs:dat
 		  ;; hed: hed
@@ -1209,11 +1272,14 @@
 			   )))
 	(runs:dat-regfull-set! runsdat regfull)
 
 	;; every 15 minutes verify the server is there for this run
 	(if (and (common:low-noise-print 240 "try start server"  run-id)
-		 (not (server:check-if-running *toppath*)))
+		 (not (or (and *runremote*
+			       (remote-server-url *runremote*)
+			       (server:ping (remote-server-url *runremote*)))
+			  (server:check-if-running *toppath*))))
 	    (server:kind-run *toppath*))
 	
 	(if (> num-running 0)
 	  (set! last-time-some-running (current-seconds)))
 
@@ -1523,11 +1589,11 @@
 		  (thread-sleep! 1)
 		  (loop)))))
       (if (not testdat) ;; should NOT happen
 	  (debug:print-error 0 *default-log-port* "failed to get test record for test-id " test-id))
       (set! test-id (db:test-get-id testdat))
-      (if (file-exists? test-path)
+      (if (common:file-exists? test-path)
 	  (change-directory test-path)
 	  (begin
 	    (debug:print-error 0 *default-log-port* "test run path not created before attempting to run the test. Perhaps you are running -remove-runs at the same time?")
 	    (change-directory *toppath*)))
       (case (if force ;; (args:get-arg "-force")
@@ -1682,11 +1748,11 @@
 ;;    'remove-runs
 ;;    'set-state-status
 ;;
 ;; NB// should pass in keys?
 ;;
-(define (runs:operate-on action target runnamepatt testpatt #!key (state #f)(status #f)(new-state-status #f)(mode 'remove-all)(options '()))
+(define (runs:operate-on action target runnamepatt testpatt #!key (state #f)(status #f)(new-state-status #f)(mode #f)(options '()))
   (common:clear-caches) ;; clear all caches
   (let* ((db           #f)
 	 ;; (tdbdat       (tasks:open-db))
 	 (keys         (rmt:get-keys))
 	 (rundat       (mt:get-runs-by-patt keys runnamepatt target))
@@ -1694,20 +1760,20 @@
 	 (runs         (vector-ref rundat 1))
 	 (states       (if state  (string-split state  ",") '()))
 	 (statuses     (if status (string-split status ",") '()))
 	 (state-status (if (string? new-state-status) (string-split new-state-status ",") '(#f #f)))
 	 (rp-mutex     (make-mutex))
-	 (bup-mutex    (make-mutex)))
+	 (bup-mutex    (make-mutex))
+         (keep-records (args:get-arg "-keep-records"))) ;; used in conjunction with -remove-runs to keep the records, TODO: consolidate this with "mode".
 
     (let* ((write-access-actions '(remove-runs set-state-status archive run-wait))
            (dbfile             (conc  *toppath* "/megatest.db"))
            (readonly-mode      (not (file-write-access? dbfile))))
       (when (and readonly-mode
                  (member action write-access-actions))
         (debug:print-error 0 *default-log-port* "megatest.db is readonly.  Cannot proceed with action ["action"] in which write-access isrequired .")
         (exit 1)))
-
     
     (debug:print-info 4 *default-log-port* "runs:operate-on => Header: " header " action: " action " new-state-status: " new-state-status)
     (if (> 2 (length state-status))
 	(begin
 	  (debug:print-error 0 *default-log-port* "the parameter to -set-state-status is a comma delimited string. E.g. COMPLETED,FAIL")
@@ -1854,29 +1920,32 @@
 			       ((archive)
 				(if (and run-dir (not toplevel-with-children))
 				    (let ((ddir (conc run-dir "/")))
 				      (case (string->symbol (args:get-arg "-archive"))
 					((save save-remove keep-html)
-					 (if (file-exists? ddir)
+					 (if (common:file-exists? ddir)
 					     (debug:print-info 0 *default-log-port* "Estimating disk space usage for " test-fulln ": " (common:get-disk-space-used ddir)))))))
 				(if (not (null? tal))
 				    (loop (car tal)(cdr tal))))
 			       )))
 		       )
 		     (if worker-thread (thread-join! worker-thread))))))
 	   ;; remove the run if zero tests remain
 	   (if (eq? action 'remove-runs)
-	       (let ((remtests (mt:get-tests-for-run (db:get-value-by-header run header "id") #f '("DELETED") '("n/a") not-in: #t)))
+	       (let* ((run-id   (db:get-value-by-header run header "id")) ;; NB// masks run-id from above?
+                      (remtests (mt:get-tests-for-run run-id #f '("DELETED") '("n/a") not-in: #t)))
 		 (if (null? remtests) ;; no more tests remaining
 		     (let* ((dparts  (string-split lasttpath "/"))
 			    (runpath (conc "/" (string-intersperse 
 						(take dparts (- (length dparts) 1))
 						"/"))))
 		       (debug:print 1 *default-log-port* "Removing run: " runkey " " (db:get-value-by-header run header "runname") " and related record")
-		       (rmt:delete-run run-id)
-		       (rmt:delete-old-deleted-test-records)
-		       ;; (rmt:set-var "DELETED_TESTS" (current-seconds))
+                       (if (not keep-records)
+                           (begin
+                             (rmt:delete-run run-id)
+                             (rmt:delete-old-deleted-test-records)))
+                           ;; (rmt:set-var "DELETED_TESTS" (current-seconds))
 		       ;; need to figure out the path to the run dir and remove it if empty
 		       ;;    (if (null? (glob (conc runpath "/*")))
 		       ;;        (begin
 		       ;; 	 (debug:print 1 *default-log-port* "Removing run dir " runpath)
 		       ;; 	 (system (conc "rmdir -p " runpath))))
@@ -1887,25 +1956,26 @@
     )
   #t)
 
 (define (runs:remove-test-directory test mode) ;; remove-data-only)
   (let* ((run-dir       (db:test-get-rundir test))    ;; run dir is from the link tree
-	 (real-dir      (if (file-exists? run-dir)
+	 (real-dir      (if (common:file-exists? run-dir)
 			    ;; (resolve-pathname run-dir)
 			    (common:nice-path run-dir)
-			    #f)))
-    (case mode
+			    #f))
+         (clean-mode    (or mode 'remove-all)))
+    (case clean-mode
       ((remove-data-only)(mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) "CLEANING" "LOCKED" #f))
       ((remove-all)      (mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) "REMOVING" "LOCKED" #f))
       ((archive-remove)  (mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) "ARCHIVE_REMOVING" #f #f)))
     (debug:print-info 1 *default-log-port* "Attempting to remove " (if real-dir (conc " dir " real-dir " and ") "") " link " run-dir)
     (if (and real-dir 
 	     (> (string-length real-dir) 5)
-	     (file-exists? real-dir)) ;; bad heuristic but should prevent /tmp /home etc.
+	     (common:file-exists? real-dir)) ;; bad heuristic but should prevent /tmp /home etc.
 	(begin ;; let* ((realpath (resolve-pathname run-dir)))
 	  (debug:print-info 1 *default-log-port* "Recursively removing " real-dir)
-	  (if (file-exists? real-dir)
+	  (if (common:file-exists? real-dir)
 	      (runs:safe-delete-test-dir real-dir)
 	      (debug:print 0 *default-log-port* "WARNING: test dir " real-dir " appears to not exist or is not readable")))
 	(if real-dir 
 	    (debug:print 0 *default-log-port* "WARNING: directory " real-dir " does not exist")
 	    (debug:print 0 *default-log-port* "WARNING: no real directory corrosponding to link " run-dir ", nothing done")))
@@ -1927,12 +1997,12 @@
 		     (not (member run-dir (list "n/a" "/tmp/badname"))))
 		(debug:print 0 *default-log-port* "WARNING: not removing " run-dir " as it either doesn't exist or is not a symlink")
 		(debug:print 0 *default-log-port* "NOTE: the run dir for this test is undefined. Test may have already been deleted."))
 	    ))
     ;; Only delete the records *after* removing the directory. If things fail we have a record 
-    (case mode
-      ((remove-data-only)(mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) "NOT_STARTED" "n/a" #f))
+    (case clean-mode
+      ((remove-data-only)(mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) (db:test-get-state test)(db:test-get-status test) #f))
       ((archive-remove)  (mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) "ARCHIVED" #f #f))
       (else (rmt:delete-test-records (db:test-get-run_id test) (db:test-get-id test))))))
 
 ;;======================================================================
 ;; Routines for manipulating runs
@@ -1954,11 +2024,12 @@
       (let (;; (db   #f)
 	    (keys #f))
 	(if (launch:setup)
 	    (begin
 	      (full-runconfigs-read) ;; cache the run config
-	      (launch:cache-config)) ;; do not cache here - need to be sure runconfigs is processed
+	      ;; (launch:cache-config) ;; there are two independent config cache locations, turning this one off for now. MRW.
+	      ) ;; do not cache here - need to be sure runconfigs is processed
 	    (begin 
 	      (debug:print 0 *default-log-port* "Failed to setup, exiting")
 	      (exit 1)))
 
         
@@ -2115,11 +2186,11 @@
 (define (runs:clean-cache target runname toppath)
   (if target
       (if runname
 	  (let* ((linktree (common:get-linktree)) ;; (if toppath (configf:lookup *configdat* "setup" "linktree")))
 		 (runtop   (conc linktree "/" target "/" runname))
-		 (files    (if (file-exists? runtop)
+		 (files    (if (common:file-exists? runtop)
 			       (append (glob (conc runtop "/.megatest*"))
 				       (glob (conc runtop "/.runconfig*")))
 			       '())))
 	    (if (null? files)
 		(debug:print-info 0 *default-log-port* "No cached megatest or runconfigs files found. None removed.")

Index: sdb.scm
==================================================================
--- sdb.scm
+++ sdb.scm
@@ -22,11 +22,11 @@
 (declare (unit sdb))
 
 ;; 
 (define (sdb:open fname)
   (let* ((dbpath    (pathname-directory fname))
-	 (dbexists  (let ((fe (file-exists? fname)))
+	 (dbexists  (let ((fe (common:file-exists? fname)))
 		      (if fe 
 			  fe
 			  (begin
 			    (create-directory dbpath #t)
 			    #f))))

Index: server.scm
==================================================================
--- server.scm
+++ server.scm
@@ -1,7 +1,7 @@
 
-;; Copyright 2006-2012, Matthew Welland.
+;; Copyright 2006-2017, Matthew Welland.
 ;; 
 ;;  This program is made available under the GNU GPL version 2.0 or
 ;;  greater. See the accompanying file COPYING for details.
 ;; 
 ;;  This program is distributed WITHOUT ANY WARRANTY; without even the
@@ -8,24 +8,22 @@
 ;;  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
 ;;  PURPOSE.
 
 (require-extension (srfi 18) extras tcp s11n)
 
-(use srfi-1 posix regex regex-case srfi-69 hostinfo md5 message-digest directory-utils posix-extras matchable)
-;; (use zmq)
+(use srfi-1 posix regex regex-case srfi-69 hostinfo md5 message-digest directory-utils posix-extras matchable
+     )
 
 (use spiffy uri-common intarweb http-client spiffy-request-vars)
 
 (declare (unit server))
 
 (declare (uses common))
 (declare (uses db))
 (declare (uses tasks)) ;; tasks are where stuff is maintained about what is running.
-(declare (uses synchash))
+;; (declare (uses synchash))
 (declare (uses http-transport))
-(declare (uses rpc-transport))
-;;(declare (uses nmsg-transport))
 (declare (uses launch))
 (declare (uses daemon))
 
 (include "common_records.scm")
 (include "db_records.scm")
@@ -34,10 +32,16 @@
   (if (not hostport)
       #f
       (conc "http://" (car hostport) ":" (cadr hostport))))
 
 (define  *server-loop-heart-beat* (current-seconds))
+
+;;======================================================================
+;; P K T S   S T U F F 
+;;======================================================================
+
+;; ???
 
 ;;======================================================================
 ;; S E R V E R
 ;;======================================================================
 
@@ -112,11 +116,12 @@
 		      " -server " (or target-host "-") (if (equal? (configf:lookup *configdat* "server" "daemonize") "yes")
 							   " -daemonize "
 							   "")
 		      ;; " -log " logfile
 		      " -m testsuite:" testsuite)) ;; (conc " >> " logfile " 2>&1 &")))))
-	 (log-rotate  (make-thread common:rotate-logs  "server run, rotate logs thread")))
+	 (log-rotate  (make-thread common:rotate-logs  "server run, rotate logs thread"))
+         (load-limit  (configf:lookup-number *configdat* "server" "load-limit" default: 0.9)))
     ;; we want the remote server to start in *toppath* so push there
     (push-directory areapath)
     (debug:print 0 *default-log-port* "INFO: Trying to start server (" cmdln ") ...")
     (thread-start! log-rotate)
     
@@ -129,11 +134,11 @@
 	(begin
 	  (debug:print-info 0 *default-log-port* "Starting server on " target-host ", logfile is " logfile)
 	  (setenv "TARGETHOST" target-host)))
       
     (setenv "TARGETHOST_LOGF" logfile)
-    (common:wait-for-normalized-load 4 " delaying server start due to load" remote-host: (get-environment-variable "TARGETHOST")) ;; do not try starting servers on an already overloaded machine, just wait forever
+    (common:wait-for-normalized-load load-limit " delaying server start due to load" remote-host: (get-environment-variable "TARGETHOST")) ;; do not try starting servers on an already overloaded machine, just wait forever
     (system (conc "nbfake " cmdln))
     (unsetenv "TARGETHOST_LOGF")
     (if (get-environment-variable "TARGETHOST")(unsetenv "TARGETHOST"))
     (thread-join! log-rotate)
     (pop-directory)))
@@ -206,11 +211,25 @@
 		(if (null? tal)
 		    (if (and limit
 			     (> (length new-res) limit))
 			new-res ;; (take new-res limit)  <= need intelligent sorting before this will work
 			new-res)
-		      (loop (car tal)(cdr tal) new-res)))))))))
+		    (loop (car tal)(cdr tal) new-res)))))))))
+
+(define (server:get-num-alive srvlst)
+  (let ((num-alive 0))
+    (for-each
+     (lambda (server)
+       (match-let (((mod-time host port start-time pid)
+		    server))
+	 (let* ((uptime  (- (current-seconds) mod-time))
+		(runtime (if start-time
+			     (- mod-time start-time)
+			     0)))
+	   (if (< uptime 5)(set! num-alive (+ num-alive 1))))))
+     srvlst)
+    num-alive))
 
 ;; given a list of servers get a list of valid servers, i.e. at least
 ;; 10 seconds old, has started and is less than 1 hour old and is
 ;; active (i.e. mod-time < 10 seconds
 ;;
@@ -435,5 +454,118 @@
 	;; (* 3 24 60 60) ;; default to three days
 	;;(* 60 60 1)     ;; default to one hour
 	(* 60 5)          ;; default to five minutes
 	)))
 
+(define (server:get-best-guess-address hostname)
+  (let ((res #f))
+    (for-each 
+     (lambda (adr)
+       (if (not (eq? (u8vector-ref adr 0) 127))
+	   (set! res adr)))
+     ;; NOTE: This can fail when there is no mention of the host in /etc/hosts. FIXME
+     (vector->list (hostinfo-addresses (hostname->hostinfo hostname))))
+    (string-intersperse 
+     (map number->string
+	  (u8vector->list
+	   (if res res (hostname->ip hostname)))) ".")))
+
+;; moving this here as it needs access to db and cannot be in common.
+;;
+(define (server:writable-watchdog dbstruct)
+  (thread-sleep! 0.05) ;; delay for startup
+  (let ((legacy-sync  (common:run-sync?))
+	(debug-mode   (debug:debug-mode 1))
+	(last-time    (current-seconds))
+	(no-sync-db   (db:open-no-sync-db))
+        (sync-duration 0) ;; run time of the sync in milliseconds
+        (this-wd-num  (begin (mutex-lock! *wdnum*mutex) (let ((x *wdnum*)) (set! *wdnum* (add1 *wdnum*)) (mutex-unlock! *wdnum*mutex) x))))
+    (set! *no-sync-db* no-sync-db) ;; make the no sync db available to api calls
+    (debug:print-info 2 *default-log-port* "Periodic sync thread started.")
+    (debug:print-info 3 *default-log-port* "watchdog starting. legacy-sync is " legacy-sync" pid="(current-process-id)" this-wd-num="this-wd-num)
+    (if (and legacy-sync (not *time-to-exit*))
+	(let* (;;(dbstruct (db:setup))
+	       (mtdb     (dbr:dbstruct-mtdb dbstruct))
+	       (mtpath   (db:dbdat-get-path mtdb)))
+	  (debug:print-info 0 *default-log-port* "Server running, periodic sync started.")
+	  (let loop ()
+	    ;; sync for filesystem local db writes
+	    ;;
+	    (mutex-lock! *db-multi-sync-mutex*)
+	    (let* ((need-sync        (>= *db-last-access* *db-last-sync*)) ;; no sync since last write
+		   (sync-in-progress *db-sync-in-progress*)
+		   (should-sync      (and (not *time-to-exit*)
+                                          (> (- (current-seconds) *db-last-sync*) 5))) ;; sync every five seconds minimum
+		   (start-time       (current-seconds))
+		   (mt-mod-time      (file-modification-time mtpath))
+		   (recently-synced  (< (- start-time mt-mod-time) 4))
+		   (will-sync        (and (or need-sync should-sync)
+					  (not sync-in-progress)
+					  (not recently-synced))))
+              (debug:print-info 13 *default-log-port* "WD writable-watchdog top of loop.  need-sync="need-sync" sync-in-progress="sync-in-progress" should-sync="should-sync" start-time="start-time" mt-mod-time="mt-mod-time" recently-synced="recently-synced" will-sync="will-sync)
+	      ;; (if recently-synced (debug:print-info 0 *default-log-port* "Skipping sync due to recently-synced flag=" recently-synced))
+	      ;; (debug:print-info 0 *default-log-port* "need-sync: " need-sync " sync-in-progress: " sync-in-progress " should-sync: " should-sync " will-sync: " will-sync)
+	      (if will-sync (set! *db-sync-in-progress* #t))
+	      (mutex-unlock! *db-multi-sync-mutex*)
+	      (if will-sync
+                  (let ((sync-start (current-milliseconds)))
+                    (if (< sync-duration 300)
+                        (let ((res        (db:sync-to-megatest.db dbstruct no-sync-db: no-sync-db))) ;; did we sync any data? If so need to set the db touched flag to keep the server alive
+                          (set! sync-duration (- (current-milliseconds) sync-start))
+                          (if (> res 0) ;; some records were transferred, keep the db alive
+                              (begin
+                                (mutex-lock! *heartbeat-mutex*)
+                                (set! *db-last-access* (current-seconds))
+                                (mutex-unlock! *heartbeat-mutex*)
+                                (debug:print-info 0 *default-log-port* "sync called, " res " records transferred."))
+                              (debug:print-info 2 *default-log-port* "sync called but zero records transferred")))
+                        ;; TODO: factor this next routine out into a function
+                        (with-input-from-pipe ;; this should not block other threads but need to verify this
+                         "megatest -sync-to-megatest.db"
+                         (lambda ()
+                           (let loop ((inl (read-line))
+                                      (res #f))
+                             (if (eof-object? inl)
+                                 (begin
+                                   (set! sync-duration (- (current-milliseconds) sync-start))
+                                   (cond
+                                    ((not res)
+                                     (debug:print 0 *default-log-port* "ERROR: sync from /tmp db to megatest.db appears to have failed. Recommended that you stop your runs and run \"megatest -cleanup-db\""))
+                                    ((> res 0)
+                                     (mutex-lock! *heartbeat-mutex*)
+                                     (set! *db-last-access* (current-seconds))
+                                     (mutex-unlock! *heartbeat-mutex*))))
+                                 (let ((num-synced (let ((matches (string-match "^Synced (\\d+).*$" inl)))
+                                                     (if matches
+                                                         (string->number (cadr matches))
+                                                         #f))))
+                                   (loop (read-line)
+                                         (or num-synced res))))))))))
+	      (if will-sync
+		  (begin
+		    (mutex-lock! *db-multi-sync-mutex*)
+		    (set! *db-sync-in-progress* #f)
+		    (set! *db-last-sync* start-time)
+		    (mutex-unlock! *db-multi-sync-mutex*)))
+	      (if (and debug-mode
+		       (> (- start-time last-time) 60))
+		  (begin
+		    (set! last-time start-time)
+		    (debug:print-info 4 *default-log-port* "timestamp -> " (seconds->time-string (current-seconds)) ", time since start -> " (seconds->hr-min-sec (- (current-seconds) *time-zero*))))))
+	    
+	    ;; keep going unless time to exit
+	    ;;
+	    (if (not *time-to-exit*)
+		(let delay-loop ((count 0))
+                  ;;(debug:print-info 13 *default-log-port* "delay-loop top; count="count" pid="(current-process-id)" this-wd-num="this-wd-num" *time-to-exit*="*time-to-exit*)
+                                                            
+		  (if (and (not *time-to-exit*)
+			   (< count 6)) ;; was 11, changing to 4. 
+		      (begin
+			(thread-sleep! 1)
+			(delay-loop (+ count 1))))
+		  (if (not *time-to-exit*) (loop))))
+	    ;; time to exit, close the no-sync db here
+	    (db:no-sync-close-db no-sync-db)
+	    (if (common:low-noise-print 30)
+		(debug:print-info 0 *default-log-port* "Exiting watchdog timer, *time-to-exit* = " *time-to-exit*" pid="(current-process-id)" this-wd-num="this-wd-num)))))))
+

Index: tasks.scm
==================================================================
--- tasks.scm
+++ tasks.scm
@@ -36,24 +36,24 @@
 	(handle-exceptions
 	 exn
 	 (begin
 	   (print-call-chain (current-error-port))
 	   (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
-	   (debug:print 0 *default-log-port* " exn=" (condition->list exn))
+	   (debug:print 5 *default-log-port* " exn=" (condition->list exn))
 	   (debug:print 0 *default-log-port* "tasks:wait-on-journal failed. Continuing on, you can ignore this call-chain")
 	   #t) ;; if stuff goes wrong just allow it to move on
-	 (let loop ((journal-exists (file-exists? fullpath))
+	 (let loop ((journal-exists (common:file-exists? fullpath))
 		    (count          n)) ;; wait ten times ...
 	   (if journal-exists
 	       (begin
 		 (if (and waiting-msg
 			  (eq? (modulo n 30) 0))
 		     (debug:print 0 *default-log-port* waiting-msg))
 		 (if (> count 0)
 		     (begin
 		       (thread-sleep! 1)
-		       (loop (file-exists? fullpath)
+		       (loop (common:file-exists? fullpath)
 			     (- count 1)))
 		     (begin
 		       (debug:print 0 *default-log-port* "ERROR: removing the journal file " fullpath ", this is not good. Look for disk full, write access and other issues.")
 		       (if remove (system (conc "rm -rf " fullpath)))
 		       #f)))
@@ -87,21 +87,21 @@
        exn
        (if (> numretries 0)
 	   (begin
 	     (print-call-chain (current-error-port))
 	     (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
-	     (debug:print 0 *default-log-port* " exn=" (condition->list exn))
+	     (debug:print 5 *default-log-port* " exn=" (condition->list exn))
 	     (thread-sleep! 1)
 	     (tasks:open-db numretries (- numretries 1)))
 	   (begin
 	     (print-call-chain (current-error-port))
 	     (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
-	     (debug:print 0 *default-log-port* " exn=" (condition->list exn))))
+	     (debug:print 5 *default-log-port* " exn=" (condition->list exn))))
        (let* ((dbpath        (db:dbfile-path )) ;; (tasks:get-task-db-path))
 	      (dbfile       (conc dbpath "/monitor.db"))
 	      (avail        (tasks:wait-on-journal dbpath 10)) ;; wait up to about 10 seconds for the journal to go away
-	      (exists       (file-exists? dbpath))
+	      (exists       (common:file-exists? dbpath))
 	      (write-access (file-write-access? dbpath))
 	      (mdb          (cond ;; what the hek is *toppath* doing here?
 			     ((and (string? *toppath*)(file-write-access? *toppath*))
 			      (sqlite3:open-database dbfile))
 			     ((file-read-access? dbpath)    (sqlite3:open-database dbfile))
@@ -183,15 +183,27 @@
 ;; no elegance here ...
 ;;
 (define (tasks:kill-server hostname pid #!key (kill-switch ""))
   (debug:print-info 0 *default-log-port* "Attempting to kill server process " pid " on host " hostname)
   (setenv "TARGETHOST" hostname)
-  (setenv "TARGETHOST_LOGF" "server-kills.log")
-  (system (conc "nbfake kill "kill-switch" "pid))
+  (let* ((logdir (if (directory-exists? "logs")
+                    "logs/"
+                    ""))
+         (logfile (if logdir (conc "logs/server-"pid"-"hostname".log") #f))
+         (gzfile  (if logfile (conc logfile ".gz"))))
+    (setenv "TARGETHOST_LOGF" (conc logdir "server-kills.log"))
+
+    (system (conc "nbfake kill "kill-switch" "pid))
 
-  (unsetenv "TARGETHOST_LOGF")
-  (unsetenv "TARGETHOST"))
+    (when logfile
+      (thread-sleep! 0.5)
+      (if (common:file-exists? gzfile) (delete-file gzfile))
+      (system (conc "gzip " logfile))
+      
+      (unsetenv "TARGETHOST_LOGF")
+      (unsetenv "TARGETHOST"))))
+    
  
 ;;======================================================================
 ;; M O N I T O R S
 ;;======================================================================
 

ADDED   tcmt.scm
Index: tcmt.scm
==================================================================
--- /dev/null
+++ tcmt.scm
@@ -0,0 +1,172 @@
+;; Copyright 2006-2017, Matthew Welland.
+;; 
+;;  This program is made available under the GNU GPL version 2.0 or
+;;  greater. See the accompanying file COPYING for details.
+;; 
+;;  This program is distributed WITHOUT ANY WARRANTY; without even the
+;;  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+;;  PURPOSE.
+;;
+;;======================================================================
+;;
+;; Wrapper to enable running Megatest flows under teamcity
+;;
+;;  1. Run the megatest process and pass it all the needed parameters
+;;  2. Every five seconds check for state/status changes and print the info
+;;
+
+(use srfi-1 posix srfi-69 srfi-18 regex)
+
+(declare (uses margs))
+(declare (uses rmt))
+(declare (uses common))
+(declare (uses megatest-version))
+
+(include "megatest-fossil-hash.scm")
+(include "db_records.scm")
+
+(define origargs (cdr (argv)))
+(define remargs (args:get-args
+		 (argv)
+		 `( "-target"
+		    "-reqtarg"
+		    "-runname"
+		    )
+		 `("-tc-repl"
+		   )
+		 args:arg-hash
+		 0))
+
+;; ##teamcity[testStarted name='suite.testName']
+;; ##teamcity[testStdOut name='suite.testName' out='text']
+;; ##teamcity[testStdErr name='suite.testName' out='error text']
+;; ##teamcity[testFailed name='suite.testName' message='failure message' details='message and stack trace']
+;; ##teamcity[testFinished name='suite.testName' duration='50']
+;; 
+
+(define (print-changes-since data run-ids last-update tsname target runname)
+  (let ((now   (current-seconds)))
+    (handle-exceptions
+     exn
+     (begin (print-call-chain) (print "Error message: " ((condition-property-accessor 'exn 'message) exn)))
+     (for-each
+      (lambda (run-id)
+	(let* ((tests (rmt:get-tests-for-run run-id "%" '() '() #f #f #f #f #f #f last-update #f)))
+	  ;; (print "DEBUG: got tests=" tests)
+	  (for-each
+	   (lambda (testdat)
+	     (let* ((testn    (db:test-get-fullname     testdat))
+		    (testname (db:test-get-testname     testdat))
+		    (itempath (db:test-get-item-path    testdat))
+		    (tctname  (if (string=? itempath "") testname (conc testname "." (string-translate itempath "/" "."))))
+		    (state    (db:test-get-state        testdat))
+		    (status   (db:test-get-status       testdat))
+		    (duration (or (any->number (db:test-get-run_duration testdat)) 0))
+		    (comment  (db:test-get-comment      testdat))
+		    (logfile  (db:test-get-final_logf   testdat))
+		    (prevstat (hash-table-ref/default data testn #f))
+		    (newstat  (if (equal? state "RUNNING")
+				  "RUNNING"
+				  (if (equal? state "COMPLETED")
+				      status
+				      "UNK")))
+		    (cmtstr   (if comment
+				  (conc " message='" comment "' ")
+				  " "))
+		    (details  (if (string-match ".*html$" logfile)
+				  (conc " details='" *toppath* "/lt/" target "/" runname "/" testname (if (equal? itempath "") "/" (conc "/" itempath "/")) logfile "' ")
+				  "")))
+		    
+	       ;; (print "DEBUG: testn=" testn " state=" state " status=" status " prevstat=" prevstat " newstat=" newstat)
+	       (if (or (not prevstat)
+		       (not (equal? prevstat newstat)))
+		   (begin
+		     (case (string->symbol newstat)
+		       ((UNK)       ) ;; do nothing
+		       ((RUNNING)   (print "##teamcity[testStarted name='" tctname "']"))
+		       ((PASS SKIP) (print "##teamcity[testFinished name='" tctname "' duration='" (* 1e3 duration) "'" cmtstr details " ]"))
+		       (else
+			(print "##teamcity[testFailed name='" tctname "' " cmtstr details " ]")))
+		     (flush-output)
+		     (hash-table-set! data testn newstat)))))
+	   tests)))
+      run-ids))
+    now))
+
+(define (monitor pid)
+  (let ((run-ids #f)
+	(testdat (make-hash-table))
+	(keys    #f)
+	(last-update 0)
+	(target  (or (args:get-arg "-target")
+		     (args:get-arg "-reqtarg")))
+	(runname (args:get-arg "-runname"))
+	(tsname  #f))
+    (if (and target runname)
+	(begin
+	  (launch:setup)
+	  (set! keys (rmt:get-keys))))
+    (set! tsname  (common:get-testsuite-name))
+    (print "TCMT: for testsuite=" tsname " found runname=" runname ", target=" target ", keys=" keys " and successfully ran launch:setup.")
+    (let loop ()
+      (handle-exceptions
+       exn
+       ;; (print "Process done.")
+       (begin (print-call-chain) (print "Error message: " ((condition-property-accessor 'exn 'message) exn)))
+       (let-values (((pidres exittype exitstatus)
+		     (process-wait pid #t)))
+	 (if (and keys
+		  (or (not run-ids)
+		      (null? run-ids)))
+	     (let* ((runs (rmt:get-runs-by-patt keys
+						runname 
+						target
+						#f ;; offset
+						#f ;; limit
+						#f ;; fields
+						0  ;; last-update
+						))
+		    (header (db:get-header runs))
+		    (rows   (db:get-rows   runs))
+		    (run-ids-in (map (lambda (row)
+				       (db:get-value-by-header row header "id"))
+				     rows)))
+	       (set! run-ids run-ids-in)))
+	 ;; (print "TCMT: pidres=" pidres " exittype=" exittype " exitstatus=" exitstatus " run-ids=" run-ids)
+	 (if keys
+	     (set! last-update (print-changes-since testdat run-ids last-update tsname target runname)))
+	 (if (eq? pidres 0)
+	     (begin
+	       (thread-sleep! 3)
+	       (loop))
+	     (begin
+	       ;; (print "TCMT: pidres=" pidres " exittype=" exittype " exitstatus=" exitstatus " run-ids=" run-ids)
+	       (print "TCMT: All done.")
+	       )))))))
+
+;; (if (not (eq? pidres 0))	  ;; (not exitstatus))
+;; 	  (begin
+;; 	    (thread-sleep! 3)
+;; 	    (loop))
+;; 	  (print "Process: megatest " (string-intersperse origargs " ") " is done.")))))
+(define (main)
+  (let* ((mt-done #f)
+	 (pid     #f)
+	 (th1     (make-thread (lambda ()
+				 (print "Running megatest " (string-intersperse origargs " "))
+				 (set! pid (process-run "megatest" origargs)))
+			       "Megatest job"))
+	 (th2     (make-thread (lambda ()
+				 (monitor pid))
+			       "Monitor job")))
+    (thread-start! th1)
+    (thread-sleep! 1) ;; give the process time to get going
+    (thread-start! th2)
+    (thread-join! th2)))
+
+(if (args:get-arg "-tc-repl")
+    (repl)
+    (main))
+
+;; (process-wait)
+

Index: tdb.scm
==================================================================
--- tdb.scm
+++ tdb.scm
@@ -50,11 +50,11 @@
   (debug:print-info 11 *default-log-port* "open-test-db " work-area)
   (if (and work-area 
 	   (directory? work-area)
 	   (file-read-access? work-area))
       (let* ((dbpath              (conc work-area "/testdat.db"))
-	     (dbexists            (file-exists? dbpath))
+	     (dbexists            (common:file-exists? dbpath))
 	     (work-area-writeable (file-write-access? work-area))
 	     (db                  (handle-exceptions  ;; open the db if area writeable or db pre-existing. open in-mem otherwise. if exception, open in-mem
 				   exn
 				   (begin
 				     (print-call-chain (current-error-port))

Index: testnanomsg/req-rep.scm
==================================================================
--- testnanomsg/req-rep.scm
+++ testnanomsg/req-rep.scm
@@ -1,30 +1,33 @@
 ;; watch nanomsg's pipeline load-balancer in action.
 (use nanomsg)
 
+;; client
 (define req   (nn-socket 'req))
-(define rep   (nn-socket 'rep))
-
-(nn-bind    rep  "inproc://test")
-(nn-connect req  "inproc://test")
+(nn-connect req  "inproc://test") 
 
 (define (client-send-receive soc msg)
   (nn-send soc msg)
   (nn-recv soc))
 
+;; server
+(define rep   (nn-socket 'rep))
+(nn-bind    rep  "inproc://test")
+
 (define ((server soc))
   (let loop ((msg-in (nn-recv soc)))
     (if (not (equal? msg-in "quit"))
 	(begin
 	  (nn-send soc (conc "hello " msg-in))
 	  (loop (nn-recv soc))))))
 
 (thread-start! (server rep))
 
+;; client 
 (print (client-send-receive req "Matt"))
 (print (client-send-receive req "Tom"))
 
 ;; (client-send-receive req "quit")
 
-(nn-close req)
-(nn-close rep)
+(nn-close req) ;; client
+(nn-close rep) ;; server
 (exit)

Index: tests.scm
==================================================================
--- tests.scm
+++ tests.scm
@@ -61,16 +61,16 @@
 (define (tests:get-valid-tests test-registry tests-paths)
   (if (null? tests-paths) 
       test-registry
       (let loop ((hed (car tests-paths))
 		 (tal (cdr tests-paths)))
-	(if (file-exists? hed)
+	(if (common:file-exists? hed)
 	    (for-each (lambda (test-path)
 			(let* ((tname   (last (string-split test-path "/")))
 			       (tconfig (conc test-path "/testconfig")))
 			  (if (and (not (hash-table-ref/default test-registry tname #f))
-				   (file-exists? tconfig))
+				   (common:file-exists? tconfig))
 			      (hash-table-set! test-registry tname test-path))))
 		      (glob (conc hed "/*"))))
 	(if (null? tal)
 	    test-registry
 	    (loop (car tal)(cdr tal))))))
@@ -305,11 +305,11 @@
 	  (db:test-get-rundir prev-testdat)) ;; )
 	 (waivers     (if testconfig (configf:section-vars testconfig "waivers") '()))
 	 (waiver-rx   (regexp "^(\\S+)\\s+(.*)$"))
 	 (diff-rule   "diff %file1% %file2%")
 	 (logpro-rule "diff %file1% %file2% | logpro %waivername%.logpro %waivername%.html"))
-    (if (not (file-exists? test-rundir))
+    (if (not (common:file-exists? test-rundir))
 	(begin
 	  (debug:print-error 0 *default-log-port* "test run directory is gone, cannot propagate waiver")
 	  #f)
 	(begin
 	  (push-directory test-rundir)
@@ -322,11 +322,11 @@
 				     (wparts      (if waiver (string-match waiver-rx waiver) #f))
 				     (waiver-rule (if wparts (cadr wparts)  #f))
 				     (waiver-glob (if wparts (caddr wparts) #f))
 				     (logpro-file (if waiver
 						      (let ((fname (conc hed ".logpro")))
-							(if (file-exists? fname)
+							(if (common:file-exists? fname)
 							    fname 
 							    (begin
 							      (debug:print 0 *default-log-port* "INFO: No logpro file " fname " falling back to diff")
 							      #f)))
 						      #f))
@@ -420,19 +420,19 @@
     ;;       (sqlite3:execute db "UPDATE tests SET first_warn=? WHERE run_id=? AND testname=? AND item_path=?;" val run-id test-name item-path)))
 
     (let ((category (hash-table-ref/default otherdat ":category" ""))
 	  (variable (hash-table-ref/default otherdat ":variable" ""))
 	  (value    (hash-table-ref/default otherdat ":value"    #f))
-	  (expected (hash-table-ref/default otherdat ":expected" #f))
-	  (tol      (hash-table-ref/default otherdat ":tol"      #f))
+	  (expected (hash-table-ref/default otherdat ":expected" "n/a"))
+	  (tol      (hash-table-ref/default otherdat ":tol"      "n/a"))
 	  (units    (hash-table-ref/default otherdat ":units"    ""))
 	  (type     (hash-table-ref/default otherdat ":type"     ""))
 	  (dcomment (hash-table-ref/default otherdat ":comment"  "")))
       (debug:print 4 *default-log-port* 
 		   "category: " category ", variable: " variable ", value: " value
 		   ", expected: " expected ", tol: " tol ", units: " units)
-      (if (and value expected tol) ;; all three required
+      (if (and value) ;; require only value; BB was- all three required
 	  (let ((dat (conc category ","
 			   variable ","
 			   value    ","
 			   expected ","
 			   tol      ","
@@ -439,11 +439,13 @@
 			   units    ","
 			   dcomment ",," ;; extra comma for status
 			   type     )))
 	    ;; This was run remote, don't think that makes sense. Perhaps not, but that is the easiest path for the moment.
 	    (rmt:csv->test-data run-id test-id
-				dat))))
+				dat)
+            (thread-sleep! 10) ;; add 10 second delay before quit incase rmt needs time to start a server.
+            )))
       
     ;; need to update the top test record if PASS or FAIL and this is a subtest
     ;;;;;; (if (not (equal? item-path ""))
     ;;;;;;     (rmt:set-state-status-and-roll-up-items run-id test-name item-path state status #f) ;;;;;)
 
@@ -839,11 +841,11 @@
 						      '()
 						      (lambda (x p)
 							(let* ((targ-path (string-intersperse p "/"))
                                                                (full-path (conc linktree "/" targ-path))
                                                                (run-name  (car (reverse p))))
-                                                          (if (and (file-exists? full-path)
+                                                          (if (and (common:file-exists? full-path)
                                                                    (directory?   full-path)
                                                                    (file-write-access? full-path))
                                                               (s:a run-name 'href (conc targ-path "/run-summary.html"))
                                                               (begin
                                                                 (debug:print 0 *default-log-port* "INFO: Can't create " targ-path "/run-summary.html")
@@ -878,11 +880,11 @@
                                            path-parts))
                                        test-dats))
                     (tests-htree (common:list->htree tests-tree-dat))
                     (html-dir    (conc linktree "/" (string-intersperse run-dir "/")))
                     (html-path   (conc html-dir "/run-summary.html"))
-                    (oup         (if (and (file-exists? html-dir)
+                    (oup         (if (and (common:file-exists? html-dir)
                                           (directory?   html-dir)
                                           (file-write-access? html-dir))
                                      (open-output-file  html-path)
                                      #f)))
                ;; (print "run-dir: " run-dir ", tests-tree-dat: " tests-tree-dat)
@@ -906,21 +908,21 @@
                                                                           (item-path ;; (if (> (length p) 2) ;; test-name + run-name
                                                                            (string-intersperse p "/"))
                                                                           (full-targ (conc html-dir "/" targ-path))
                                                                           (std-file  (conc full-targ "/test-summary.html"))
                                                                           (alt-file  (conc full-targ "/megatest-rollup-" test-name ".html"))
-                                                                          (html-file (if (file-exists? alt-file)
+                                                                          (html-file (if (common:file-exists? alt-file)
                                                                                          alt-file
                                                                                          std-file))
                                                                           (run-name  (car (reverse p))))
-                                                                     (if (and (not (file-exists? full-targ))
+                                                                     (if (and (not (common:file-exists? full-targ))
                                                                               (directory? full-targ)
                                                                               (file-write-access? full-targ))
                                                                          (tests:summarize-test 
                                                                           run-id 
                                                                           (rmt:get-test-id run-id test-name item-path)))
-                                                                     (if (file-exists? full-targ)
+                                                                     (if (common:file-exists? full-targ)
                                                                          (s:a run-name 'href html-file)
                                                                          (begin
                                                                            (debug:print 0 *default-log-port* "ERROR: can't access " full-targ)
                                                                            (conc "No summary for " run-name)))))
                                                                  ))))))
@@ -1129,11 +1131,11 @@
 ;; Gather data from test/task specifications
 ;;======================================================================
 
 ;; (define (tests:get-valid-tests testsdir test-patts) ;;  #!key (test-names '()))
 ;;   (let ((tests (glob (conc testsdir "/tests/*")))) ;; " (string-translate patt "%" "*")))))
-;;     (set! tests (filter (lambda (test)(file-exists? (conc test "/testconfig"))) tests))
+;;     (set! tests (filter (lambda (test)(common:file-exists? (conc test "/testconfig"))) tests))
 ;;     (delete-duplicates
 ;;      (filter (lambda (testname)
 ;; 	       (tests:match test-patts testname #f))
 ;; 	     (map (lambda (testp)
 ;; 		    (last (string-split testp "/")))
@@ -1146,14 +1148,15 @@
 	   (getenv "MT_TEST_NAME")
 	   (getenv "MT_ITEMPATH"))
       (conc (getenv "MT_LINKTREE")  "/"
 	    (getenv "MT_TARGET")    "/"
 	    (getenv "MT_RUNNAME")   "/"
-	    (getenv "MT_TEST_NAME") "/"
-	    (if (or (getenv "MT_ITEMPATH")
-		    (not (string=? "" (getenv "MT_ITEMPATH"))))
-		(conc "/" (getenv "MT_ITEMPATH"))))
+	    (getenv "MT_TEST_NAME")
+	    (if (and (getenv "MT_ITEMPATH")
+                     (not (string=? "" (getenv "MT_ITEMPATH"))))
+		(conc "/" (getenv "MT_ITEMPATH"))
+                ""))
       #f))
 
 ;; if .testconfig exists in test directory read and return it
 ;; else if have cached copy in *testconfigs* return it IFF there is a section "have fulldata"
 ;; else read the testconfig file
@@ -1163,11 +1166,11 @@
   (let* ((use-cache    (common:use-cache?))
 	 (cache-path   (tests:get-test-path-from-environment))
 	 (cache-file   (and cache-path (conc cache-path "/.testconfig")))
 	 (cache-exists (and cache-file
 			    (not force-create)  ;; if force-create then pretend there is no cache to read
-			    (file-exists? cache-file)))
+			    (common:file-exists? cache-file)))
 	 (cached-dat   (if (and (not force-create)
 				cache-exists
 				use-cache)
 			   (handle-exceptions
 			    exn
@@ -1187,11 +1190,11 @@
 	      (let* ((treg         (or test-registry
 				       (tests:get-all)))
 		     (test-path    (or (hash-table-ref/default treg test-name #f)
 				       (conc *toppath* "/tests/" test-name)))
 		     (test-configf (conc test-path "/testconfig"))
-		     (testexists   (and (file-exists? test-configf)(file-read-access? test-configf)))
+		     (testexists   (and (common:file-exists? test-configf)(file-read-access? test-configf)))
 		     (tcfg         (if testexists
 				       (read-config test-configf #f system-allowed
 						    environ-patt: (if system-allowed
 								      "pre-launch-env-vars"
 								      #f))
@@ -1356,11 +1359,11 @@
 ;;
 (define (tests:lazy-dot testrecords  outtype sizex sizey)
   (let ((dfile (conc "/tmp/." (current-user-name) "-" (server:mk-signature) ".dot"))
 	(fname (conc "/tmp/." (current-user-name) "-" (server:mk-signature) ".dotdat")))
     (tests:write-dot-file testrecords dfile sizex sizey)
-    (if (file-exists? fname)
+    (if (common:file-exists? fname)
 	(let ((res (with-input-from-file fname
 		     (lambda ()
 		       (read-lines)))))
 	  (system (conc "env -i PATH=$PATH dot -T " outtype " < " dfile " > " fname "&"))
 	  res)
@@ -1559,11 +1562,11 @@
 	   (tests:set-full-meta-info db test-id run-id minutes work-area (- remtries 1)))
 	 (let ((err-status ((condition-property-accessor 'sqlite3 'status #f) exn)))
 	   (debug:print-error 0 *default-log-port* "tried for over a minute to update meta info and failed. Giving up")
 	   (debug:print 0 *default-log-port* "EXCEPTION: database probably overloaded or unreadable.")
 	   (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
-	   (print "exn=" (condition->list exn))
+	   (debug:print 5 *default-log-port* "exn=" (condition->list exn))
 	   (debug:print 0 *default-log-port* " status:  " ((condition-property-accessor 'sqlite3 'status) exn))
 	   (print-call-chain (current-error-port))))
      (tests:update-testdat-meta-info db test-id work-area cpuload diskfree minutes)
   )))
 	 

Index: tests/Makefile
==================================================================
--- tests/Makefile
+++ tests/Makefile
@@ -173,11 +173,11 @@
 	cd mintest;$(MEGATEST) -server - -debug $(DEBUG) > server.log 2> server.log & 
 	sleep 3
 	cd mintest;$(DASHBOARD) -rows 18 &
 
 cleanprep : ../*.scm Makefile */*.config build
-	mkdir -p fullrun/tmp/mt_runs fullrun/tmp/mt_links /tmp/$(USER)/adisk1
+	mkdir -p fullrun/tmp/mt_runs fullrun/tmp/mt_links /tmp/$(USER)/adisk1 fullrun/logs
 	rm -f */logging.db
 	touch cleanprep
 
 fullprep : cleanprep
 	cd fullrun;$(MEGATEST) -remove-runs :runname $(RUNNAME)% -target %/%/% -testpatt %/%

Index: tests/fdktestqa/testqa/megatest.config
==================================================================
--- tests/fdktestqa/testqa/megatest.config
+++ tests/fdktestqa/testqa/megatest.config
@@ -1,12 +1,14 @@
 [setup]
 testcopycmd cp --remove-destination -rlv TEST_SRC_PATH/. TEST_TARG_PATH/. >> TEST_TARG_PATH/mt_launch.log 2>> TEST_TARG_PATH/mt_launch.log
 # launchwait no
-launch-delay 0
+launch-delay 0.1
 
 [server]
-runtime 180
+# runtime 180
+# timeout is in hours, this is how long the server will stay alive when not being used.
+timeout 0.1
 
 # All these are overridden in ../fdk.config
 # [jobtools]
 # launcher nbfake
 # launcher bsub -q priority -o $MT_TEST_RUN_DIR/openlava.log 

Index: tests/fdktestqa/testqa/tests/bigrun2/testconfig
==================================================================
--- tests/fdktestqa/testqa/tests/bigrun2/testconfig
+++ tests/fdktestqa/testqa/tests/bigrun2/testconfig
@@ -9,11 +9,11 @@
 mode itemwait
 itemmap .*/
 
 # Iteration for your tests are controlled by the items section
 [items]
-NUMBER #{scheme (string-intersperse (map (lambda (x)(conc "blah/" x)) \
+NUMBER #{scheme (string-intersperse (map (lambda (x)(conc (if (getenv "USEBLAH") "blah/" "") x)) \
                                          (map number->string (sort (let loop ((a 0)(res '())) \
                                                                         (if (<= a (or (any->number (get-environment-variable "NUMTESTS")) 2500)) \
                                                                             (loop (+ a 1)(cons a res)) res)) <))) " ")}
 
 

Index: tests/fullrun/megatest.config
==================================================================
--- tests/fullrun/megatest.config
+++ tests/fullrun/megatest.config
@@ -47,11 +47,13 @@
 waivercommentpatt ^WW\d+ [a-z].*
 incomplete-timeout 1
 
 # wait 0.5 seconds between launching every process
 #
-launch-delay 0.5
+# launch-delay 0.5
+launch-delay 0
+
 
 # wait for runs to completely complete. yes, anything else is no
 run-wait yes
 
 # If set to "default" the old code is used. Otherwise defaults to 200 or uses

Index: tests/fullrun/runconfigs.config
==================================================================
--- tests/fullrun/runconfigs.config
+++ tests/fullrun/runconfigs.config
@@ -1,10 +1,10 @@
 [default]
 SOMEVAR This should show up in SOMEVAR3
 VARNOVAL
 VARNOVAL_WITHSPACE
-QUICK %
+QUICKPATT test_mt_vars,test2,priority_9
 
 # target based getting of config file, look at afs.config and nfs.config
 [include #{getenv fsname}.config]
 
 [include #{getenv MT_RUN_AREA_HOME}/common_runconfigs.config]

Index: tests/tests.scm
==================================================================
--- tests/tests.scm
+++ tests/tests.scm
@@ -17,10 +17,26 @@
 (import srfi-18)
 ;; (require-extension zmq)
 ;; (import zmq)
 
 (define test-work-dir (current-directory))
+
+;; given list of lists
+;;  ( ( msg expected param1 param2 ...)
+;;    ( ... ) )
+;; apply test to all
+;;
+(define (test-batch proc pname inlst #!key (post-proc #f))
+  (for-each
+   (lambda (spec)
+     (let ((msg    (conc pname " " (car spec)))
+           (result (cadr spec))
+           (params (cddr spec)))
+       (if post-proc
+           (test msg result (post-proc (apply proc params)))
+           (test msg result (apply proc params)))))
+   inlst))
 
 ;; read in all the _record files
 (let ((files (glob "*_records.scm")))
   (for-each
    (lambda (file)

Index: tests/unittests/all-rmt.scm
==================================================================
--- tests/unittests/all-rmt.scm
+++ tests/unittests/all-rmt.scm
@@ -28,13 +28,25 @@
 ;; DEF (rmt:kill-server run-id)
 ;; DEF (rmt:start-server run-id)
 (test #f '(#t "successful login")(rmt:login #f))
 ;; DEF (rmt:login-no-auto-client-setup connection-info)
 (test #f #t (pair? (rmt:get-latest-host-load (get-host-name))))
+
+;; get-latest-host-load does a lookup in the db, it won't return a useful value unless
+;; a test ran recently on host
+(test-batch rmt:get-latest-host-load
+            "rmt:get-latest-host-load"
+            (list (list "localhost"  #t (get-host-name))
+                  (list "not-a-host" #t "not-a-host"  ))
+            post-proc: pair?)
+                                           
 (test #f #t (list? (rmt:get-changed-record-ids 0)))
+
 (test #f #f (begin (runs:update-all-test_meta #f) #f))
+
 (test #f '("test1" "test2")(sort (alist-ref "tagtwo" (hash-table->alist (rmt:get-tests-tags)) equal?) string<=))
+
 (test #f '() (rmt:get-key-val-pairs 0))
 (test #f '("SYSTEM" "RELEASE") (rmt:get-keys))
 (test #f '("SYSTEM" "RELEASE") (rmt:get-keys-write)) ;; dummy query to force server start
 (test #f '() (rmt:get-key-vals 1))
 (test #f (vector '("SYSTEM" "RELEASE") '()) (rmt:get-targets))
@@ -82,11 +94,27 @@
 (test #f '()(rmt:get-prev-run-ids 1))
 (test #f #t (begin (rmt:lock/unlock-run 1 #t #f "mikey") #t))
 (test #f "JUSTFINE" (rmt:get-run-status 1))
 (test #f #t (begin (rmt:set-run-status 1 "NOTFINE" msg: "A message") #t))
 (test #f #t (begin (rmt:update-run-event_time 1) #t))
+
 ;; (rmt:get-runs-by-patt  keys runnamepatt targpatt offset limit fields last-runs-update) ;; fields of #f uses default
+;;
+(let ((keys (rmt:get-keys))
+      (rnp  "%")    ;; run name patt
+      (tpt  "%/%")) ;; target patt
+  (test-batch rmt:get-runs-by-patt
+              "rmt:get-runs-by-patt"
+              (list (list "t=0" #t keys rnp tpt #f #f #f 0)
+                    (list "t=current" #f keys rnp tpt #f #f #f (+ 100 (current-seconds))) ;; should be no records from the future
+                    )
+              post-proc: (lambda (res)
+                           ;; (print "rmt:get-runs-by-patt returned: " res)
+                           (and (vector? res)
+                                (let ((rows (vector-ref res 1)))
+                                  (> (length rows) 0))))))
+
 ;; (rmt:find-and-mark-incomplete run-id ovr-deadtime)
 ;; (rmt:get-main-run-stats run-id)
 ;; (rmt:get-var varname)
 ;; (rmt:set-var varname value)
 ;; (rmt:find-and-mark-incomplete-all-runs #!key (ovr-deadtime #f))

Index: tests/unittests/tests.scm
==================================================================
--- tests/unittests/tests.scm
+++ tests/unittests/tests.scm
@@ -12,67 +12,70 @@
 ;; 	 (non-completed   (runs:calc-not-completed prereqs-not-met))
 ;; 	 (runnables       (runs:calc-runnable prereqs-not-met)))
 ;; 
 ;; 
 ;; 
-
 (define user    (current-user-name))
 (define runname "mytestrun")
 (define keys    (rmt:get-keys))
 (define runinfo #f)
 (define keyvals '(("SYSTEM" "abc")("RELEASE" "def")))
 (define header  (list "SYSTEM" "RELEASE" "id" "runname" "state" "status" "owner" "event_time"))
+(define contour #f)
 (define run-id  1)
-
+(define new-comment #f)
 ;; Create a run
-(test #f 1 (rmt:register-run keyvals runname "new" "n/a" user))
+(test #f 1  (rmt:register-run keyvals runname "new" "n/a" user contour))
 (test #f #t (rmt:general-call 'register-test run-id run-id "test-one"   ""))
 (test #f #t (rmt:general-call 'register-test run-id run-id "test-two"   ""))
 (test #f #t (rmt:general-call 'register-test run-id run-id "test-three" ""))
 (test #f #t (rmt:general-call 'register-test run-id run-id "test-four"  ""))
 
-(rmt:test-set-state-status-by-id run-id (rmt:get-test-id run-id "test-one"   "") "COMPLETED" "FAIL" "")
-(rmt:test-set-state-status-by-id run-id (rmt:get-test-id run-id "test-two"   "") "COMPLETED" "PASS" "")
-(rmt:test-set-state-status-by-id run-id (rmt:get-test-id run-id "test-three" "") "RUNNING"   "n/a"  "")
-(rmt:test-set-state-status-by-id run-id (rmt:get-test-id run-id "test-four"  "") "COMPLETED" "WARN" "")
+
+;; (define (rmt:test-set-state-status-by-id run-id test-id newstate newstatus newcomment)
+(rmt:test-set-state-status-by-id run-id (rmt:get-test-id run-id "test-one"   "") "COMPLETED" "FAIL" new-comment)
+(rmt:test-set-state-status-by-id run-id (rmt:get-test-id run-id "test-two"   "") "COMPLETED" "PASS" new-comment)
+(rmt:test-set-state-status-by-id run-id (rmt:get-test-id run-id "test-three" "") "RUNNING"   "n/a"  new-comment)
+(rmt:test-set-state-status-by-id run-id (rmt:get-test-id run-id "test-four"  "") "COMPLETED" "WARN" new-comment)
 
-(print "MODE=not in")
-(test #f '()
+(test "MODE=not in"
+      '()
       (filter
        (lambda (y)
 	 (equal? y "FAIL")) ;; any FAIL in the output list?
        (map 
 	(lambda (x)(vector-ref x 4))
 	(rmt:get-tests-for-run run-id "%/%" '() '("FAIL") #f #f #t 'event_time "DESC" 'shortlist 0 'dashboard))))
 
-(print "MODE=in")
-(test #f '("FAIL")
+(test "MODE=in"
+      '("FAIL")
       (map 
        (lambda (x)(vector-ref x 4))
        (rmt:get-tests-for-run run-id "%/%" '() '("FAIL") #f #f #f 'event_time "DESC" 'shortlist 0 'dashboard)))
 (set! *verbosity* 1)
 
-(print "MODE=in, state in RUNNING")
 ;; (set! *verbosity* 8)
-(test #f '("RUNNING")
+(test "MODE=in, state in RUNNING" '("RUNNING")
       (map 
        (lambda (x)(vector-ref x 3))
        (rmt:get-tests-for-run run-id "%/%" '("RUNNING") '() #f #f #f 'event_time "DESC" 'shortlist 0 'dashboard)))
 (set! *verbosity* 1)
 
-(print "MODE=in, state in RUNNING and status IN WARN")
 ;; (set! *verbosity* 8)
-(test #f '(("RUNNING" . "n/a") ("COMPLETED" . "WARN"))
-      (map 
-       (lambda (x)
-	 (cons (vector-ref x 3)(vector-ref x 4)))
-       (rmt:get-tests-for-run run-id "%/%" '("RUNNING") '("WARN") #f #f #f 'event_time "DESC" 'shortlist 0 'dashboard)))
+;;(define (rmt:get-tests-for-run run-id testpatt states statuses offset limit not-in sort-by sort-order qryvals last-update mode)
+(test
+ "MODE=in, state in RUNNING and status IN WARN"
+ '(("COMPLETED" . "WARN") ("RUNNING" . "n/a") )
+ (map 
+  (lambda (x)
+    (cons (vector-ref x 3)(vector-ref x 4)))
+  (rmt:get-tests-for-run run-id "%/%" '("RUNNING") '("WARN") #f #f #f 'event_time "DESC" 'shortlist 0 'dashboard)))
 (set! *verbosity* 1)
 
-(print "MODE=not in, state in RUNNING and status IN WARN")
 (set! *verbosity* 8)
-(test #f '(("DELETED" . "n/a") ("COMPLETED" . "PASS") ("COMPLETED" . "FAIL"))
+(test "MODE=not in, state in RUNNING and status IN WARN"
+      '(("COMPLETED" . "PASS") ("COMPLETED" . "FAIL"))
       (map 
        (lambda (x)
 	 (cons (vector-ref x 3)(vector-ref x 4)))
        (rmt:get-tests-for-run run-id "%/%" '("RUNNING") '("WARN") #f #f #t 'event_time "DESC" 'shortlist 0 'dashboard)))
 (set! *verbosity* 1)

Index: tree.scm
==================================================================
--- tree.scm
+++ tree.scm
@@ -22,11 +22,11 @@
 (declare (uses launch))
 (declare (uses megatest-version))
 (declare (uses gutils))
 (declare (uses db))
 (declare (uses server))
-(declare (uses synchash))
+;; (declare (uses synchash))
 (declare (uses dcommon))
 
 (include "common_records.scm")
 (include "db_records.scm")
 (include "key_records.scm")

Index: utils/mk_wrapper
==================================================================
--- utils/mk_wrapper
+++ utils/mk_wrapper
@@ -1,13 +1,13 @@
 #!/bin/bash
 
 prefix=$1
 cmd=$2
 target=$3
+cfgfile="$prefix/bin/.$(lsb_release -sr)/cfg.sh"
 
 if [ "$LD_LIBRARY_PATH" != "" ];then
-  cfgfile="$prefix/bin/.$(lsb_release -sr)/cfg.sh"
   echo "INFO: Using LD_LIBRARY_PATH=$LD_LIBRARY_PATH" >&2
 ( cat << __EOF
 if [ "\$LD_LIBRARY_PATH" != "" ];then
   export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:\$LD_LIBRARY_PATH
 else
@@ -18,15 +18,10 @@
   echo 
 else
   echo "INFO: LD_LIBRARY_PATH not set" >&2
 fi
 
-# echo "#!/bin/bash" > $target
-# if [ "$LD_LIBRARY_PATH" != "" ];then
-#   echo "source $prefix/bin/.\$(lsb_release -sr)/cfg.sh" >> $target
-# fi
-# echo "exec $prefix/bin/.\$(lsb_release -sr)/$cmd \"\$@\"" >> $target
 echo "#!/bin/bash" > $target
 
 if [[ $cmd =~ dboard ]]; then
     cat >> $target <<'EOF'
 
@@ -57,8 +52,19 @@
 fi
 EOF
 
 fi
 
+cat >> $target << EOF 
+if [[ \$(ulimit -a | grep 'open files' | awk '{print \$4}') -gt 10000 ]];then ulimit -n 10000;fi
+EOF
+
+# echo "#!/bin/bash" > $target
+# echo "exec $prefix/bin/.\$(lsb_release -sr)/$cmd \"\$@\"" >> $target
+
 echo "lsbr=\$(lsb_release -sr)" >> $target
-echo "if [[ -e \$lsbr ]];then source \$lsbr;fi" >> $target
+if [ "$LD_LIBRARY_PATH" != "" ];then
+  echo "source $prefix/bin/.\$lsbr/cfg.sh" >> $target
+fi
+
+# echo "if [[ -e \$lsbr ]];then source \$lsbr;fi" >> $target
 echo "exec $prefix/bin/.\$lsbr/$cmd \"\$@\"" >> $target