@@ -50,17 +50,18 @@
   waitons testmode  newtal itemmaps prereqs-not-met)
 
 ;; set up needed environment variables given a run-id and optionally a target, itempath etc.
 ;;
 (define (runs:set-megatest-env-vars run-id #!key (inkeys #f)(inrunname #f)(inkeyvals #f)(intarget #f)(testname #f)(itempath #f))
+  ;;(bb-check-path msg: "runs:set-megatest-env-vars entry")
   (let* ((target    (or intarget 
 			(common:args-get-target)
 			(get-environment-variable "MT_TARGET")))
 	 (keys      (if inkeys    inkeys    (rmt:get-keys)))
 	 (keyvals   (if inkeyvals inkeyvals (keys:target->keyval keys target)))
 	 (vals      (hash-table-ref/default *env-vars-by-run-id* run-id #f))
-	 (link-tree (configf:lookup *configdat* "setup" "linktree")))
+	 (link-tree (common:get-linktree))) ;; (configf:lookup *configdat* "setup" "linktree")))
     (if testname (setenv "MT_TEST_NAME" testname))
     (if itempath (setenv "MT_ITEMPATH"  itempath))
 
     ;; get the info from the db and put it in the cache
     (if link-tree
@@ -73,26 +74,61 @@
 	  (for-each
 	   (lambda (key)
 	     (hash-table-set! vals (car key) (cadr key)))
 	   keyvals)))
     ;; from the cached data set the vars
+    
     (hash-table-for-each
      vals
      (lambda (key val)
        (debug:print 2 *default-log-port* "setenv " key " " val)
        (safe-setenv key val)))
+    ;;(bb-check-path msg: "runs:set-megatest-env-vars block 1")
+    ;;(BB> "*env-vars-by-run-id*/runid("run-id" vals="(hash-table->alist vals))
+
     (if (not (get-environment-variable "MT_TARGET"))(setenv "MT_TARGET" target))
-    (alist->env-vars (hash-table-ref/default *configdat* "env-override" '()))
+    ;; we had a case where there was an exception generated by the hash-table-ref
+    ;; due to *configdat* being #f Adding a handle and exit
+    (let fatal-loop ((count 0)) 
+      (handle-exceptions
+	  exn
+	  (let ((call-chain (get-call-chain))
+		(msg        ((condition-property-accessor 'exn 'message) exn)))
+	    (if (< count 5)
+		(begin ;; this call is colliding, do some crude stuff to fix it.
+		  (debug:print 0 *default-log-port* "ERROR: *configdat* was inaccessible! This should never happen. Retry #" count)
+		  (launch:setup force-reread: #t)
+		  (fatal-loop (+ count 1))) 
+		(begin
+		  (debug:print 0 *default-log-port* "FATAL: *configdat* was inaccessible! This should never happen. Retried " count " times. Message: " msg)
+		  (debug:print 0 *default-log-port* "Call chain:")
+		  (with-output-to-port *default-log-port*
+
+                    (lambda ()
+                      (print "*configdat* is >>"*configdat*"<<")
+                      (pp *configdat*)
+                      (pp call-chain)))
+                  
+		  (exit 1))))
+          ;;(bb-check-path msg: "runs:set-megatest-env-vars block 1.5")
+          (when (or (not *configdat*) (not (hash-table? *configdat*)))
+              (debug:print 0 *default-log-port* "WARNING: *configdat* was inaccessible! This should never happen.  Brute force reread.")
+              ;;(BB> "ERROR: *configdat* was inaccessible! This should never happen.  Brute force reread.")
+              (thread-sleep! 2) ;; assuming nfs lag.
+              (launch:setup force-reread: #t))
+          (alist->env-vars (hash-table-ref/default *configdat* "env-override" '())))) ;;;; environment is tainted HERE in this let block.
+    ;;(bb-check-path msg: "runs:set-megatest-env-vars block 2")
     ;; Lets use this as an opportunity to put MT_RUNNAME in the environment
     (let ((runname  (if inrunname inrunname (rmt:get-run-name-from-id run-id))))
       (if runname
 	  (setenv "MT_RUNNAME" runname)
 	  (debug:print-error 0 *default-log-port* "no value for runname for id " run-id)))
     (setenv "MT_RUN_AREA_HOME" *toppath*)
     ;; if a testname and itempath are available set the remaining appropriate variables
     (if testname (setenv "MT_TEST_NAME" testname))
     (if itempath (setenv "MT_ITEMPATH"  itempath))
+    ;;(bb-check-path msg: "runs:set-megatest-env-vars block 3")
     (if (and testname link-tree)
 	(setenv "MT_TEST_RUN_DIR" (conc (getenv "MT_LINKTREE")  "/"
 					(getenv "MT_TARGET")    "/"
 					(getenv "MT_RUNNAME")   "/"
 					(getenv "MT_TEST_NAME")
@@ -191,36 +227,121 @@
 						   " in jobgroup \"" jobgroup "\" exceeds limit of " job-group-limit))
 				  #t)
 				 (else #f))))
 	  (list (not can-not-run-more) num-running num-running-in-jobgroup max-concurrent-jobs job-group-limit)))))
 
+(define (runs:run-pre-hook run-id)
+    (let* ((run-pre-hook   (configf:lookup *configdat* "runs" "pre-hook"))
+           (existing-tests (if run-pre-hook
+                               (rmt:get-tests-for-run run-id "%" '() '() ;; run-id testpatt states statuses
+                                                      #f #f ;; offset limit
+                                                      #f ;; not-in
+                                                      #f ;; sort-by
+                                                      #f ;; sort-order
+                                                      #f ;; get full data (not 'shortlist)
+                                                      0 ;; (runs:gendat-inc-results-last-update *runs:general-data*) ;; last update time
+                                                      'dashboard)
+                               '()))
+           (log-dir         (conc *toppath* "/logs"))
+           (log-file        (conc "pre-hook-" (string-translate (getenv "MT_TARGET") "/" "-") "-" (getenv "MT_RUNNAME") ".log"))
+           (full-log-fname  (conc log-dir "/" log-file)))
+      (if run-pre-hook
+          (if (null? existing-tests)
+              (let* ((use-log-dir (if (not (directory-exists? log-dir))
+                                      (handle-exceptions
+                                       exn
+                                       (begin
+                                         (debug:print 0 *default-log-port* "WARNING: Failed to create " log-dir)
+                                         #f)
+                                       (create-directory log-dir #t)
+                                       #t)
+                                      #t))
+                     (start-time   (current-seconds))
+                     (actual-logf  (if use-log-dir full-log-fname log-file)))
+                (handle-exceptions
+                 exn
+                 (begin
+                   (print-call-chain *default-log-port*)
+                   (debug:print 0 *default-log-port* "Message: " ((condition-property-accessor 'exn 'message) exn))
+                   (debug:print 0 *default-log-port* "ERROR: failed to run pre-hook " run-pre-hook ", check the log " log-file))
+                 (debug:print-info 0 *default-log-port* "running run-pre-hook: \"" run-pre-hook "\", log is " actual-logf)
+                 (system (conc run-pre-hook " >> " actual-logf " 2>&1"))
+                 (debug:print-info 0 *default-log-port* "pre-hook \"" run-pre-hook "\" took " (- (current-seconds) start-time) " seconds to run.")))
+              (debug:print 0 *default-log-port* "Skipping pre-hook call \"" run-pre-hook "\" as there are existing tests for this run.")))))
+    
+(define (runs:run-post-hook run-id)
+    (let* ((run-post-hook   (configf:lookup *configdat* "runs" "post-hook"))
+           (existing-tests (if run-post-hook
+                               (rmt:get-tests-for-run run-id "%" '() '() ;; run-id testpatt states statuses
+                                                      #f #f ;; offset limit
+                                                      #f ;; not-in
+                                                      #f ;; sort-by
+                                                      #f ;; sort-order
+                                                      #f ;; get full data (not 'shortlist)
+                                                      0 ;; (runs:gendat-inc-results-last-update *runs:general-data*) ;; last update time
+                                                      'dashboard)
+                               '()))
+           (log-dir         (conc *toppath* "/logs"))
+           (log-file        (conc "post-hook-" (string-translate (getenv "MT_TARGET") "/" "-") "-" (getenv "MT_RUNNAME") ".log"))
+           (full-log-fname  (conc log-dir "/" log-file)))
+      (if run-post-hook
+          ;; (if (null? existing-tests)
+          ;;    (debug:print 0 *default-log-port* "Skipping post-hook call \"" run-post-hook "\" as there are existing tests for this run.")))))
+	  (let* ((use-log-dir (if (not (directory-exists? log-dir))
+				  (handle-exceptions
+				      exn
+				      (begin
+					(debug:print 0 *default-log-port* "WARNING: Failed to create " log-dir)
+					#f)
+				    (create-directory log-dir #t)
+				    #t)
+				  #t))
+		 (start-time   (current-seconds))
+		 (actual-logf  (if use-log-dir full-log-fname log-file)))
+	    (handle-exceptions
+		exn
+		(begin
+		  (print-call-chain *default-log-port*)
+		  (debug:print 0 *default-log-port* "Message: " ((condition-property-accessor 'exn 'message) exn))
+		  (debug:print 0 *default-log-port* "ERROR: failed to run post-hook " run-post-hook ", check the log " log-file))
+	      (debug:print-info 0 *default-log-port* "running run-post-hook: \"" run-post-hook "\", log is " actual-logf)
+	      (system (conc run-post-hook " >> " actual-logf " 2>&1"))
+	      (debug:print-info 0 *default-log-port* "post-hook \"" run-post-hook "\" took " (- (current-seconds) start-time) " seconds to run."))))))
 
 ;;  test-names: Comma separated patterns same as test-patts but used in selection 
 ;;              of tests to run. The item portions are not respected.
 ;;              FIXME: error out if /patt specified
 ;;            
 (define (runs:run-tests target runname test-patts user flags #!key (run-count 1)) ;; test-names
   (let* ((keys               (keys:config-get-fields *configdat*))
 	 (keyvals            (keys:target->keyval keys target))
-	 (run-id             (rmt:register-run keyvals runname "new" "n/a" user))  ;;  test-name)))
+	 (run-id             (rmt:register-run keyvals runname "new" "n/a" user (args:get-arg "-contour")))  ;;  test-name)))
 	 ;; (deferred          '()) ;; delay running these since they have a waiton clause
 	 (runconfigf         (conc  *toppath* "/runconfigs.config"))
+         (dbfile             (conc  *toppath* "/megatest.db"))
+         (readonly-mode      (not (file-write-access? dbfile)))
 	 (test-records       (make-hash-table))
 	 ;; need to process runconfigs before generating these lists
 	 (all-tests-registry #f)  ;; (tests:get-all)) ;; (tests:get-valid-tests (make-hash-table) test-search-path)) ;; all valid tests to check waiton names
 	 (all-test-names     #f)  ;; (hash-table-keys all-tests-registry))
 	 (test-names         #f)  ;; Generated by a call to (tests:filter-test-names all-test-names test-patts))
 	 (required-tests     #f)  ;; Put fully qualified test/testpath names in this list to be done
 	 (task-key           (conc (hash-table->alist flags) " " (get-host-name) " " (current-process-id)))
-	 (tdbdat             (tasks:open-db))
+	 ;; (tdbdat             (tasks:open-db))
 	 (config-reruns      (let ((x (configf:lookup *configdat* "setup" "reruns")))
-			       (if x (string->number x) #f))))
+			       (if x (string->number x) #f)))
+	 (allowed-tests      #f))
+
+    ;; check if readonly
+    (when readonly-mode
+      (debug:print-error 0 *default-log-port* "megatest.db is readonly.  Cannot proceed.")
+      (exit 1))
 
     ;; per user request. If less than 100Meg space on dbdir partition, bail out with error
     ;; this will reduce issues in database corruption
     (common:check-db-dir-and-exit-if-insufficient)
-    
+
     ;; override the number of reruns from the configs
     (if (and config-reruns
 	     (> run-count config-reruns))
 	(set! run-count config-reruns))
     
@@ -229,12 +350,12 @@
     (let ((sighand (lambda (signum)
 		     ;; (signal-mask! signum) ;; to mask or not? seems to cause issues in exiting
 		     (set! *time-to-exit* #t)
 		     (print "Received signal " signum ", cleaning up before exit. Please wait...")
 		     (let ((th1 (make-thread (lambda ()
-					       (let ((tdbdat (tasks:open-db)))
-						 (rmt:tasks-set-state-given-param-key task-key "killed"))
+					       ;; (let ((tdbdat (tasks:open-db)))
+						 (rmt:tasks-set-state-given-param-key task-key "killed") ;; )
 					       (print "Killed by signal " signum ". Exiting")
 					       (thread-sleep! 3)
 					       (exit))))
 			   (th2 (make-thread (lambda ()
 					       (thread-sleep! 5)
@@ -244,28 +365,48 @@
 		       (thread-start! th1)
 		       (thread-join! th2)))))
       (set-signal-handler! signal/int sighand)
       (set-signal-handler! signal/term sighand))
 
+    ;; force the starting of a server -- removed BB 17ww28 - no longer needed.
+    ;;(debug:print 0 *default-log-port* "waiting on server...")
+    ;;(server:start-and-wait *toppath*)
+    
     (runs:set-megatest-env-vars run-id inkeys: keys inrunname: runname) ;; these may be needed by the launching process
-    (set! runconf (if (file-exists? runconfigf)
+    (set! runconf (if (common:file-exists? runconfigf)
 		      (setup-env-defaults runconfigf run-id *already-seen-runconfig-info* keyvals target)
 		      (begin
 			(debug:print 0 *default-log-port* "WARNING: You do not have a run config file: " runconfigf)
 			#f)))
 
     (if (not test-patts) ;; first time in - adjust testpatt
 	(set! test-patts (common:args-get-testpatt runconf)))
+    ;; if test-patts is #f at this point there is something wrong and we need to bail out
+    (if (not test-patts)
+	(begin
+	  (debug:print 0 *default-log-port* "WARNING: there is no test pattern for this run. Exiting now.")
+	  (exit 0)))
+    
+    (if (args:get-arg "-tagexpr")
+	(begin
+	  (set! allowed-tests (string-join (runs:get-tests-matching-tags (args:get-arg "-tagexpr")) ","))
+	  	  (debug:print-info 0 *default-log-port* "filtering initial test list with tagexpr: " (args:get-arg "-tagexpr") " => " allowed-tests)
+		  ));; tests will be ANDed with this list
 
     ;; register this run in monitor.db
     (rmt:tasks-add "run-tests" user target runname test-patts task-key) ;; params)
     (rmt:tasks-set-state-given-param-key task-key "running")
 
     ;; Now generate all the tests lists
     (set! all-tests-registry (tests:get-all))   ;; hash of testname => path-to-test
     (set! all-test-names     (hash-table-keys all-tests-registry))
-    (set! test-names         (tests:filter-test-names all-test-names test-patts))
+    ;; filter first for allowed-tests (from -tagexpr) then for test-patts.
+    (set! test-names         (tests:filter-test-names
+			      (if allowed-tests
+				  (tests:filter-test-names all-test-names allowed-tests)
+				  all-test-names)
+			      test-patts))
 
     ;; I think seeding required-tests with all test-names makes sense but lack analysis to back that up.
 
     ;; NEW STRATEGY HERE:
     ;; 1. fill required tests with test-patts
@@ -300,17 +441,26 @@
 	  ;;
 	  ;; (rmt:general-call 'delete-tests-in-state run-id "NOT_STARTED")
 	  
 	  ;; Now convert anything in allow-auto-rerun to NOT_STARTED
 	  ;;
-	  (for-each (lambda (state)
-		      (rmt:set-tests-state-status run-id test-names state #f "NOT_STARTED" state))
-		    (string-split (or (configf:lookup *configdat* "setup" "allow-auto-rerun") "")))))
+	  (for-each
+	   (lambda (state-status)
+	     (let* ((ss-lst (string-split-fields "/" state-status #:infix))
+		    (state  (if (> (length ss-lst) 0)(car  ss-lst) #f))
+		    (status (if (> (length ss-lst) 1)(cadr ss-lst) #f)))
+	       (rmt:set-tests-state-status run-id test-names state status "NOT_STARTED" status)))
+	   ;; list of state/status pairs separated by spaces
+	   (string-split (or (configf:lookup *configdat* "setup" "allow-auto-rerun") "")))))
 
     ;; Ensure all tests are registered in the test_meta table
     (runs:update-all-test_meta #f)
 
+    ;; run the run prehook if there are no tests yet run for this run:
+    ;;
+    (runs:run-pre-hook run-id)
+    
     ;; now add non-directly referenced dependencies (i.e. waiton)
     ;;======================================================================
     ;; refactoring this block into tests:get-full-data
     ;;
     ;; What happended, this code is now duplicated in tests!?
@@ -403,21 +553,17 @@
     (let ((reglen (configf:lookup *configdat* "setup" "runqueue")))
       (if (> (length (hash-table-keys test-records)) 0)
 	  (let* ((keep-going        #t)
 		 (run-queue-retries 5)
 		 (th1        (make-thread (lambda ()
-					    (runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests (any->number reglen) all-tests-registry))
-					    ;; (handle-exceptions
-					    ;;  exn
-					    ;;  (begin
-					    ;;    (print-call-chain (current-error-port))
-					    ;;    (debug:print-error 0 *default-log-port* "failure in runs:run-tests-queue thread, error: " ((condition-property-accessor 'exn 'message) exn))
-					    ;;    (if (> run-queue-retries 0)
-					    ;; 	   (begin
-					    ;; 	     (set! run-queue-retries (- run-queue-retries 1))
-					    ;; 	     (runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests (any->number reglen) all-tests-registry))))
-					    ;;  (runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests (any->number reglen) all-tests-registry)))
+					    (handle-exceptions
+						exn
+						(begin
+						  (print-call-chain)
+						  (print " message: " ((condition-property-accessor 'exn 'message) exn)))
+					      (runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests
+								    (any->number reglen) all-tests-registry)))
 					  "runs:run-tests-queue"))
 		 (th2        (make-thread (lambda ()				    
 					    ;; (rmt:find-and-mark-incomplete-all-runs))))) CAN'T INTERRUPT IT ...
 					    (let ((run-ids (rmt:get-all-run-ids)))
 					      (for-each (lambda (run-id)
@@ -477,12 +623,21 @@
       (cdr reg)
       (if (null? tal) ;; if tal is null and reg not full then '() as reg contents moved to tal
 	  '()
 	  reg)))
 
+;; this is the list of parameters to the named loop "loop" near the top of runs:run-tests-queue, look around line 1216
+;;
+(define (runs:loop-values tal reg reglen regfull reruns)
+  (list (runs:queue-next-hed tal reg reglen regfull)      ;; hed
+        (runs:queue-next-tal tal reg reglen regfull)      ;; tal
+        (runs:queue-next-reg tal reg reglen regfull)      ;; reg
+        reruns))                                          ;; reruns
+
 (define runs:nothing-left-in-queue-count 0)
 
+;; BB: for future reference - suspect target vars are not expanded to env vars at this point (item expansion using [items]\nwhatever [system echo $TARGETVAR] doesnt work right whereas [system echo #{targetvar}] does.. Tal and Randy have tix on this.  on first pass, var not set, on second pass, ok.  
 (define (runs:expand-items hed tal reg reruns regfull newtal jobgroup max-concurrent-jobs run-id waitons item-path testmode test-record can-run-more items runname tconfig reglen test-registry test-records itemmaps)
   (let* ((loop-list       (list hed tal reg reruns))
 	 (prereqs-not-met (let ((res (rmt:get-prereqs-not-met run-id waitons hed item-path mode: testmode itemmaps: itemmaps)))
 			    (if (list? res)
 				res
@@ -518,14 +673,11 @@
 	   (member (hash-table-ref/default test-registry (db:test-make-full-name hed item-path) 'n/a)
 		   '(DONOTRUN removed CANNOTRUN))) ;; *common:cant-run-states-sym*) ;; '(COMPLETED KILLED WAIVED UNKNOWN INCOMPLETE)) ;; try to catch repeat processing of COMPLETED tests here
       (debug:print-info 1 *default-log-port* "Test " hed " set to \"" (hash-table-ref test-registry (db:test-make-full-name hed item-path)) "\". Removing it from the queue")
       (if (or (not (null? tal))
 	      (not (null? reg)))
-	  (list (runs:queue-next-hed tal reg reglen regfull)
-		(runs:queue-next-tal tal reg reglen regfull)
-		(runs:queue-next-reg tal reg reglen regfull)
-		reruns)
+          (runs:loop-values tal reg reglen regfull reruns)
 	  (begin
 	    (debug:print-info 0 *default-log-port* "Nothing left in the queue!")
 	    ;; If get here twice then we know we've tried to expand all items
 	    ;; since there must be a logic issue with the handling of loops in the 
 	    ;; items expand phase we will brute force an exit here.
@@ -594,14 +746,12 @@
 		(if test-id (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_DISCARDED" "Failed to run due to discarded prerequisites")))
 	      
 	      (if (and (null? trimmed-tal)
 		       (null? trimmed-reg))
 		  #f
-		  (list (runs:queue-next-hed trimmed-tal trimmed-reg reglen regfull)
-			(runs:queue-next-tal trimmed-tal trimmed-reg reglen regfull)
-			(runs:queue-next-reg trimmed-tal trimmed-reg reglen regfull)
-			reruns)))
+                  (runs:loop-values trimmed-tal trimmed-reg reglen regfull reruns)
+                  ))
 	      (list (car newtal)(append (cdr newtal) reg) '() reruns))))
 
      ((and (null? fails)
 	   (null? prereq-fails)
 	   (null? non-completed))
@@ -616,14 +766,12 @@
 	    (list (car newtal)(append (cdr newtal) reg) '() reruns)) ;; an issue with prereqs not yet met?
 	  (begin
 	    (debug:print-info 1 *default-log-port* "no fails in prerequisites for " hed " but nothing seen running in a while, dropping test " hed " from the run queue")
 	    (let ((test-id (rmt:get-test-id run-id hed "")))
 	      (if test-id (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "TIMED_OUT" "Nothing seen running in a while.")))
-	    (list (runs:queue-next-hed tal reg reglen regfull)
-		  (runs:queue-next-tal tal reg reglen regfull)
-		  (runs:queue-next-reg tal reg reglen regfull)
-		  reruns))))
+            (runs:loop-values tal reg reglen regfull reruns)
+            )))
 
      ((and 
        (or (not (null? fails))
 	   (not (null? prereq-fails)))
        (member 'normal testmode))
@@ -636,27 +784,21 @@
 		(mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_DISCARDED" "Failed to run due to prior failed prerequisites")
 		(mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_FAIL"      "Failed to run due to failed prerequisites"))))
       (if (or (not (null? reg))(not (null? tal)))
 	  (begin
 	    (hash-table-set! test-registry hed 'CANNOTRUN)
-	    (list (runs:queue-next-hed tal reg reglen regfull)
-		  (runs:queue-next-tal tal reg reglen regfull)
-		  (runs:queue-next-reg tal reg reglen regfull)
-		  (cons hed reruns)))
+            (runs:loop-values tal reg reglen regfull (cons hed reruns))
+            )
 	  #f)) ;; #f flags do not loop
 
      ((and (not (null? fails))(member 'toplevel testmode))
       (if (or (not (null? reg))(not (null? tal)))
 	   (list (car newtal)(append (cdr newtal) reg) '() reruns)
 	  #f)) 
-     ((null? runnables) #f) ;; if we get here and non-completed is null the it's all over.
+     ((null? runnables) #f) ;; if we get here and non-completed is null then it is all over.
      (else
       (debug:print 0 *default-log-port* "WARNING: FAILS or incomplete tests maybe preventing completion of this run. Watch for issues with test " hed ", continuing for now")
-      ;; (list (runs:queue-next-hed tal reg reglen regfull)
-      ;;   	(runs:queue-next-tal tal reg reglen regfull)
-      ;;   	(runs:queue-next-reg tal reg reglen regfull)
-      ;;   	reruns)
       (list (car newtal)(cdr newtal) reg reruns)))))
 
 (define (runs:mixed-list-testname-and-testrec->list-of-strings inlst)
   (if (null? inlst)
       '()
@@ -724,12 +866,13 @@
 					    (not (equal? x hed)))
 					  (runs:calc-not-completed prereqs-not-met)))
 	 (loop-list               (list hed tal reg reruns))
 	 ;; configure the load runner
 	 (numcpus                 (common:get-num-cpus #f))
-	 (maxload                 (string->number (or (configf:lookup *configdat* "jobtools" "maxload") "3")))
-	 (waitdelay               (string->number (or (configf:lookup *configdat* "jobtools" "waitdelay") "60"))))
+	 (maxload                 (string->number (or (configf:lookup *configdat* "jobtools" "maxload") "3.0")))         ;; use a non-number string to disable
+         (maxhomehostload         (string->number (or (configf:lookup *configdat* "jobtools" "maxhomehostload") "1.2"))) ;; use a non-number string to disable
+         (waitdelay               (string->number (or (configf:lookup *configdat* "jobtools" "waitdelay") "60"))))
     (debug:print-info 4 *default-log-port* "have-resources: " have-resources " prereqs-not-met: (" 
 		      (string-intersperse 
 		       (map (lambda (t)
 			      (if (vector? t)
 				  (conc (db:test-get-state t) "/" (db:test-get-status t))
@@ -755,14 +898,11 @@
      ((not (tests:match test-patts (tests:testqueue-get-testname test-record) item-path required: required-tests)) ;; This test/itempath is not to be run
       ;; else the run is stuck, temporarily or permanently
       ;; but should check if it is due to lack of resources vs. prerequisites
       (debug:print-info 1 *default-log-port* "Skipping " (tests:testqueue-get-testname test-record) " " item-path " as it doesn't match " test-patts)
       (if (or (not (null? tal))(not (null? reg)))
-	  (list (runs:queue-next-hed tal reg reglen regfull)
-		(runs:queue-next-tal tal reg reglen regfull)
-		(runs:queue-next-reg tal reg reglen regfull)
-		reruns)
+	  (runs:loop-values tal reg reglen regfull reruns)
 	  #f))
      
      ;; Register tests 
      ;;
      ((not (hash-table-ref/default test-registry (db:test-make-full-name test-name item-path) #f))
@@ -783,11 +923,11 @@
 	    (if (rmt:get-test-id run-id test-name "")
 		(hash-table-set! test-registry (db:test-make-full-name test-name "") 'done))))
       (runs:shrink-can-run-more-tests-count runsdat)   ;; DELAY TWEAKER (still needed?)
       (if (and (null? tal)(null? reg))
 	  (list hed tal (append reg (list hed)) reruns)
-	  (list (runs:queue-next-hed tal reg reglen regfull)
+	  (list (runs:queue-next-hed tal reg reglen regfull) ;; cannot replace with a call to runs:loop-values as the logic is different for reg
 		(runs:queue-next-tal tal reg reglen regfull)
 		;; NB// Here we are building reg as we register tests
 		;; if regfull we must pop the front item off reg
 		(if regfull
 		    (append (cdr reg) (list hed))
@@ -821,29 +961,30 @@
      ;; This is the final stage, everything is in place so launch the test
      ;;
      ((and have-resources
 	   (or (null? prereqs-not-met)
 	       (and (member 'toplevel testmode) ;;  'toplevel)
-		    (null? non-completed))))
+		    (null? non-completed)
+		    (not (member 'exclusive testmode)))))
       ;; (hash-table-delete! *max-tries-hash* (db:test-make-full-name test-name item-path))
       ;; we are going to reset all the counters for test retries by setting a new hash table
       ;; this means they will increment only when nothing can be run
       (set! *max-tries-hash* (make-hash-table))
       ;; well, first lets see if cpu load throttling is enabled. If so wait around until the
       ;; average cpu load is under the threshold before continuing
-      (if (configf:lookup *configdat* "jobtools" "maxload") ;; only gate if maxload is specified
-	  (common:wait-for-cpuload maxload numcpus waitdelay))
+      (if maxload ;; only gate if maxload is specified
+          (common:wait-for-cpuload maxload numcpus waitdelay))
+      (if maxhomehostload
+          (common:wait-for-homehost-load maxhomehostload (conc "Waiting for homehost load to drop below normalized value of " maxhomehostload)))
+      
       (run:test run-id run-info keyvals runname test-record flags #f test-registry all-tests-registry)
       (runs:incremental-print-results run-id)
       (hash-table-set! test-registry (db:test-make-full-name test-name item-path) 'running)
       (runs:shrink-can-run-more-tests-count runsdat)  ;; DELAY TWEAKER (still needed?)
       ;; (thread-sleep! *global-delta*)
       (if (or (not (null? tal))(not (null? reg)))
-	  (list (runs:queue-next-hed tal reg reglen regfull)
-		(runs:queue-next-tal tal reg reglen regfull)
-		(runs:queue-next-reg tal reg reglen regfull)
-		reruns)
+	  (runs:loop-values tal reg reglen regfull reruns)
 	  #f))
      
      ;; must be we have unmet prerequisites
      ;;
      (else
@@ -875,25 +1016,18 @@
 		    (runs:shrink-can-run-more-tests-count runsdat) ;; DELAY TWEAKER (still needed?)
 		    ;; (thread-sleep! *global-delta*)
 		    ;; This next is for the items
 		    (mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "BLOCKED" #f)
 		    (hash-table-set! test-registry (db:test-make-full-name test-name item-path) 'removed)
-		    (list (runs:queue-next-hed tal reg reglen regfull)
-			  (runs:queue-next-tal tal reg reglen regfull)
-			  (runs:queue-next-reg tal reg reglen regfull)
-			  reruns ;; WAS: (cons hed reruns) ;; but that makes no sense?
-			  ))
+		    (runs:loop-values tal reg reglen regfull reruns))
 		  (let ((nth-try (hash-table-ref/default test-registry hed 0)))
 		    (cond
 		     ((member "RUNNING" (map db:test-get-state prereqs-not-met))
 		      (if (runs:lownoise (conc "possible RUNNING prerequistes " hed) 60)
 			  (debug:print 0 *default-log-port* "WARNING: test " hed " has possible RUNNING prerequisites, don't give up on it yet."))
 		      (thread-sleep! 4)
-		      (list (runs:queue-next-hed newtal reg reglen regfull)
-			    (runs:queue-next-tal newtal reg reglen regfull)
-			    (runs:queue-next-reg newtal reg reglen regfull)
-			    reruns))
+		      (runs:loop-values tal reg reglen regfull reruns))
 		     ((or (not nth-try)
 			  (and (number? nth-try)
 			       (< nth-try 10)))
 		      (hash-table-set! test-registry hed (if (number? nth-try)
 							     (+ nth-try 1)
@@ -900,17 +1034,11 @@
 							     0))
 		      (if (runs:lownoise (conc "not removing test " hed) 60)
 			  (debug:print 1 *default-log-port* "WARNING: not removing test " hed " from queue although it may not be runnable due to FAILED prerequisites"))
 		      ;; may not have processed correctly. Could be a race condition in your test implementation? Dropping test " hed) ;;  " as it has prerequistes that are FAIL. (NOTE: hed is not a vector)")
 		      (runs:shrink-can-run-more-tests-count runsdat) ;; DELAY TWEAKER (still needed?)
-		      ;; (list hed tal reg reruns)
-		      ;; (list (car newtal)(cdr newtal) reg reruns)
-		      ;; (hash-table-set! test-registry hed 'removed)
-		      (list (runs:queue-next-hed newtal reg reglen regfull)
-			    (runs:queue-next-tal newtal reg reglen regfull)
-			    (runs:queue-next-reg newtal reg reglen regfull)
-			    reruns))
+		      (runs:loop-values newtal reg reglen regfull reruns))
 		     ((symbol? nth-try)
 		      (if (eq? nth-try 'removed) ;; removed is removed - drop it NOW
 			  (if (null? tal)
 			      #f ;; yes, really
 			      (list (car tal)(cdr tal) reg reruns))
@@ -917,14 +1045,11 @@
 			  (begin
 			    (if (runs:lownoise (conc "FAILED prerequisites or other issue" hed) 60)
 				(debug:print 0 *default-log-port* "WARNING: test " hed " has FAILED prerequisites or other issue. Internal state " nth-try " will be overridden and we'll retry."))
 			    (mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "KEEP_TRYING" #f)
 			    (hash-table-set! test-registry hed 0)
-			    (list (runs:queue-next-hed newtal reg reglen regfull)
-				  (runs:queue-next-tal newtal reg reglen regfull)
-				  (runs:queue-next-reg newtal reg reglen regfull)
-				  reruns))))
+			    (runs:loop-values newtal reg reglen regfull))))
 		     (else
 		      (if (runs:lownoise (conc "FAILED prerequitests and we tried" hed) 60)
 			  (debug:print 0 *default-log-port* "WARNING: test " hed " has FAILED prerequitests and we've tried at least 10 times to run it. Giving up now."))
 		      ;; (debug:print 0 *default-log-port* "         prereqs: " prereqs-not-met)
 		      (hash-table-set! test-registry hed 'removed)
@@ -934,17 +1059,14 @@
 		      (list (if (null? tal)(car newtal)(car tal))
 			    tal
 			    reg
 			    reruns)))))
 	      ;; can't drop this - maybe running? Just keep trying
-	      (let ((runable-tests (runs:runable-tests prereqs-not-met)))
+	      (let ((runable-tests (runs:runable-tests prereqs-not-met))) ;; SUSPICIOUS: Should look at more than just prereqs-not-met?
 		(if (null? runable-tests)
 		    #f   ;; I think we are truly done here
-		    (list (runs:queue-next-hed newtal reg reglen regfull)
-			    (runs:queue-next-tal newtal reg reglen regfull)
-			    (runs:queue-next-reg newtal reg reglen regfull)
-			    reruns)))))))))
+		    (runs:loop-values newtal reg reglen regfull reruns)))))))))
 
 ;; scan a list of tests looking to see if any are potentially runnable
 ;;
 (define (runs:runable-tests tests)
   (filter (lambda (t)
@@ -1043,25 +1165,22 @@
 
   ;; Do mark-and-find clean up of db before starting runing of quue
   ;;
   ;; (rmt:find-and-mark-incomplete)
 
-  (let* ((run-info              (rmt:get-run-info run-id))
+  (let* ((run-info             (rmt:get-run-info run-id))
 	(tests-info            (mt:get-tests-for-run run-id #f '() '())) ;;  qryvals: "id,testname,item_path"))
 	(sorted-test-names     (tests:sort-by-priority-and-waiton test-records))
 	(test-registry         (make-hash-table))
 	(registry-mutex        (make-mutex))
 	(num-retries           0)
 	(max-retries           (config-lookup *configdat* "setup" "maxretries"))
-	(max-concurrent-jobs   (let ((mcj (config-lookup *configdat* "setup"     "max_concurrent_jobs")))
-				 (if (and mcj (string->number mcj))
-				     (string->number mcj)
-				     1))) ;; length of the register queue ahead
-	(reglen                (if (number? reglen-in) reglen-in 1))
+	(max-concurrent-jobs   (configf:lookup-number *configdat* "setup" "max_concurrent_jobs" default: 50))
+        (reglen                (if (number? reglen-in) reglen-in 1))
 	(last-time-incomplete  (- (current-seconds) 900)) ;; force at least one clean up cycle
 	(last-time-some-running (current-seconds))
-	(tdbdat                (tasks:open-db))
+	;; (tdbdat                (tasks:open-db))
 	(runsdat (make-runs:dat
 		  ;; hed: hed
 		  ;; tal: tal
 		  ;; reg: reg
 		  ;; reruns: reruns
@@ -1151,14 +1270,19 @@
 			   newtal:      newtal
 			   itemmaps:    itemmaps
 			   ;; prereqs-not-met: prereqs-not-met
 			   )))
 	(runs:dat-regfull-set! runsdat regfull)
-	;; every couple minutes verify the server is there for this run
-	(if (and (common:low-noise-print 60 "try start server"  run-id)
-		 (tasks:need-server run-id))
-	    (tasks:start-and-wait-for-server tdbdat run-id 10)) ;; NOTE: delay and wait is done under the hood
+
+        ;; -- removed BB 17ww28 - no longer needed.
+	;; every 15 minutes verify the server is there for this run
+	;; (if (and (common:low-noise-print 240 "try start server"  run-id)
+	;; 	 (not (or (and *runremote*
+	;; 		       (remote-server-url *runremote*)
+	;; 		       (server:ping (remote-server-url *runremote*)))
+	;; 		  (server:check-if-running *toppath*))))
+	;;     (server:kind-run *toppath*))
 	
 	(if (> num-running 0)
 	  (set! last-time-some-running (current-seconds)))
 
       (if (> (current-seconds)(+ last-time-some-running (or (configf:lookup *configdat* "setup" "give-up-waiting") 36000)))
@@ -1286,11 +1410,11 @@
 	      (loop (car tal)(cdr tal) reg reruns)))
 	    
 	 ;; if items is a proc then need to run items:get-items-from-config, get the list and loop 
 	 ;;    - but only do that if resources exist to kick off the job
 	 ;; EXPAND ITEMS
-	 ((or (procedure? items)(eq? items 'have-procedure))
+	 ((or (procedure? items)(eq? items 'have-procedure)) ;; BB - target vars are env vars here? to allow expansion of [items]\nsomething [system echo $SOMETARGVAR], which is wonky
 	  (let ((can-run-more    (runs:can-run-more-tests runsdat run-id jobgroup max-concurrent-jobs)))
 	    (if (and (list? can-run-more)
 		     (car can-run-more))
 		(let ((loop-list (runs:expand-items hed tal reg reruns regfull newtal jobgroup max-concurrent-jobs run-id waitons item-path testmode test-record can-run-more items runname tconfig reglen test-registry test-records itemmaps)))
 		  (if loop-list
@@ -1324,11 +1448,11 @@
     ;; now *if* -run-wait we wait for all tests to be done
     ;; Now wait for any RUNNING tests to complete (if in run-wait mode)
     (thread-sleep! 5) ;; I think there is a race condition here. Let states/statuses settle
     (let wait-loop ((num-running      (rmt:get-count-tests-running-for-run-id run-id))
 		    (prev-num-running 0))
-      ;; (BB> "num-running=" num-running ", prev-num-running=" prev-num-running)
+      ;; (debug:print-info 13 *default-log-port* "num-running=" num-running ", prev-num-running=" prev-num-running)
       (if (and (or (args:get-arg "-run-wait")
 		   (equal? (configf:lookup *configdat* "setup" "run-wait") "yes"))
 	       (> num-running 0))
 	  (begin
 	    ;; Here we mark any old defunct tests as incomplete. Do this every fifteen minutes
@@ -1343,10 +1467,11 @@
 	    (thread-sleep! 5)
 	    ;; (wait-loop (rmt:get-count-tests-running-for-run-id run-id) num-running))))
 	    (wait-loop (rmt:get-count-tests-running-for-run-id run-id) num-running))))
     ;; LET* ((test-record
     ;; we get here on "drop through". All done!
+    (runs:run-post-hook run-id)
     (debug:print-info 1 *default-log-port* "All tests launched")))
 
 (define (runs:calc-fails prereqs-not-met)
   (filter (lambda (test)
 	    (and (vector? test) ;; not (string? test))
@@ -1381,11 +1506,12 @@
   (filter 
    (lambda (t)
      (or (not (vector? t))
 	 (and (equal? "NOT_STARTED" (db:test-get-state t))
 	      (member (db:test-get-status t)
-			      '("n/a" "KEEP_TRYING")))))
+		      '("n/a" "KEEP_TRYING")))
+	 (and (equal? "RUNNING" (db:test-get-state t))))) ;; account for a test that is running
    prereqs-not-met))
 
 (define (runs:pretty-string lst)
   (map (lambda (t)
 	 (if (not (vector? t))
@@ -1466,11 +1592,11 @@
 		  (thread-sleep! 1)
 		  (loop)))))
       (if (not testdat) ;; should NOT happen
 	  (debug:print-error 0 *default-log-port* "failed to get test record for test-id " test-id))
       (set! test-id (db:test-get-id testdat))
-      (if (file-exists? test-path)
+      (if (common:file-exists? test-path)
 	  (change-directory test-path)
 	  (begin
 	    (debug:print-error 0 *default-log-port* "test run path not created before attempting to run the test. Perhaps you are running -remove-runs at the same time?")
 	    (change-directory *toppath*)))
       (case (if force ;; (args:get-arg "-force")
@@ -1534,13 +1660,12 @@
 		   (let ((running-tests (rmt:get-tests-for-runs-mindata #f full-test-name '("RUNNING" "REMOTEHOSTSTART" "LAUNCHED") '() #f)))
 		     (if (not (null? running-tests)) ;; have to skip 
 			 (set! skip-test "Skipping due to previous tests running"))))
 		  ((and skip-check
 			(configf:lookup test-conf "skip" "fileexists"))
-		   (if (file-exists? (configf:lookup test-conf "skip" "fileexists"))
+		   (if (common:file-exists? (configf:lookup test-conf "skip" "fileexists"))
 		       (set! skip-test (conc "Skipping due to existance of file " (configf:lookup test-conf "skip" "fileexists")))))
-
 		  ((and skip-check
 			(configf:lookup test-conf "skip" "rundelay"))
 		   ;; run-ids = #f means *all* runs
 		   (let* ((numseconds      (common:hms-string->seconds (configf:lookup test-conf "skip" "rundelay")))
 			  (running-tests   (rmt:get-tests-for-runs-mindata #f full-test-name '("RUNNING" "REMOTEHOSTSTART" "LAUNCHED") '() #f))
@@ -1626,23 +1751,33 @@
 ;;    'remove-runs
 ;;    'set-state-status
 ;;
 ;; NB// should pass in keys?
 ;;
-(define (runs:operate-on action target runnamepatt testpatt #!key (state #f)(status #f)(new-state-status #f)(mode 'remove-all)(options '()))
+(define (runs:operate-on action target runnamepatt testpatt #!key (state #f)(status #f)(new-state-status #f)(mode #f)(options '()))
   (common:clear-caches) ;; clear all caches
   (let* ((db           #f)
-	 (tdbdat       (tasks:open-db))
+	 ;; (tdbdat       (tasks:open-db))
 	 (keys         (rmt:get-keys))
 	 (rundat       (mt:get-runs-by-patt keys runnamepatt target))
 	 (header       (vector-ref rundat 0))
 	 (runs         (vector-ref rundat 1))
 	 (states       (if state  (string-split state  ",") '()))
 	 (statuses     (if status (string-split status ",") '()))
 	 (state-status (if (string? new-state-status) (string-split new-state-status ",") '(#f #f)))
 	 (rp-mutex     (make-mutex))
-	 (bup-mutex    (make-mutex)))
+	 (bup-mutex    (make-mutex))
+         (keep-records (args:get-arg "-keep-records"))) ;; used in conjunction with -remove-runs to keep the records, TODO: consolidate this with "mode".
+
+    (let* ((write-access-actions '(remove-runs set-state-status archive run-wait))
+           (dbfile             (conc  *toppath* "/megatest.db"))
+           (readonly-mode      (not (file-write-access? dbfile))))
+      (when (and readonly-mode
+                 (member action write-access-actions))
+        (debug:print-error 0 *default-log-port* "megatest.db is readonly.  Cannot proceed with action ["action"] in which write-access isrequired .")
+        (exit 1)))
+    
     (debug:print-info 4 *default-log-port* "runs:operate-on => Header: " header " action: " action " new-state-status: " new-state-status)
     (if (> 2 (length state-status))
 	(begin
 	  (debug:print-error 0 *default-log-port* "the parameter to -set-state-status is a comma delimited string. E.g. COMPLETED,FAIL")
 	  (exit)))
@@ -1677,11 +1812,11 @@
 		    (tasks:kill-runner target run-name testpatt)
 		    
 		    ;; (debug:print 0 *default-log-port* "not attempting to kill any run launcher processes as testpatt is " testpatt))
 		    (debug:print 1 *default-log-port* "Removing tests for run: " runkey " " (db:get-value-by-header run header "runname")))
 		   ((set-state-status)
-		    (if (tasks:need-server run-id)(tasks:start-and-wait-for-server tdbdat run-id 10))
+		    ;; (if (tasks:need-server run-id)(tasks:start-and-wait-for-server tdbdat run-id 10))
 		    (debug:print 1 *default-log-port* "Modifying state and staus for tests for run: " runkey " " (db:get-value-by-header run header "runname")))
 		   ((print-run)
 		    (debug:print 1 *default-log-port* "Printing info for run " runkey ", run=" run ", tests=" tests ", header=" header)
 		    action)
 		   ((run-wait)
@@ -1788,29 +1923,32 @@
 			       ((archive)
 				(if (and run-dir (not toplevel-with-children))
 				    (let ((ddir (conc run-dir "/")))
 				      (case (string->symbol (args:get-arg "-archive"))
 					((save save-remove keep-html)
-					 (if (file-exists? ddir)
+					 (if (common:file-exists? ddir)
 					     (debug:print-info 0 *default-log-port* "Estimating disk space usage for " test-fulln ": " (common:get-disk-space-used ddir)))))))
 				(if (not (null? tal))
 				    (loop (car tal)(cdr tal))))
 			       )))
 		       )
 		     (if worker-thread (thread-join! worker-thread))))))
 	   ;; remove the run if zero tests remain
 	   (if (eq? action 'remove-runs)
-	       (let ((remtests (mt:get-tests-for-run (db:get-value-by-header run header "id") #f '("DELETED") '("n/a") not-in: #t)))
+	       (let* ((run-id   (db:get-value-by-header run header "id")) ;; NB// masks run-id from above?
+                      (remtests (mt:get-tests-for-run run-id #f '("DELETED") '("n/a") not-in: #t)))
 		 (if (null? remtests) ;; no more tests remaining
 		     (let* ((dparts  (string-split lasttpath "/"))
 			    (runpath (conc "/" (string-intersperse 
 						(take dparts (- (length dparts) 1))
 						"/"))))
 		       (debug:print 1 *default-log-port* "Removing run: " runkey " " (db:get-value-by-header run header "runname") " and related record")
-		       (rmt:delete-run run-id)
-		       (rmt:delete-old-deleted-test-records)
-		       ;; (rmt:set-var "DELETED_TESTS" (current-seconds))
+                       (if (not keep-records)
+                           (begin
+                             (rmt:delete-run run-id)
+                             (rmt:delete-old-deleted-test-records)))
+                           ;; (rmt:set-var "DELETED_TESTS" (current-seconds))
 		       ;; need to figure out the path to the run dir and remove it if empty
 		       ;;    (if (null? (glob (conc runpath "/*")))
 		       ;;        (begin
 		       ;; 	 (debug:print 1 *default-log-port* "Removing run dir " runpath)
 		       ;; 	 (system (conc "rmdir -p " runpath))))
@@ -1821,25 +1959,41 @@
     )
   #t)
 
 (define (runs:remove-test-directory test mode) ;; remove-data-only)
   (let* ((run-dir       (db:test-get-rundir test))    ;; run dir is from the link tree
-	 (real-dir      (if (file-exists? run-dir)
+	 (real-dir      (if (common:file-exists? run-dir)
 			    ;; (resolve-pathname run-dir)
 			    (common:nice-path run-dir)
-			    #f)))
-    (case mode
+			    #f))
+         (clean-mode    (or mode 'remove-all))
+         (test-id       (db:test-get-id test))
+        ;; (lock-key      (conc "test-" test-id))
+        ;; (got-lock      (let loop ((lock        (rmt:no-sync-get-lock lock-key))
+	;; 			     (expire-time (+ (current-seconds) 30))) ;; give up on getting the lock and steal it after 15 seconds
+	;; 		    (if (car lock)
+	;; 			#t
+	;; 			(if (> (current-seconds) expire-time)
+	;; 			    (begin
+	;; 			      (debug:print-info 0 *default-log-port* "Timed out waiting for a lock to clean test with id " test-id)
+	;; 			      (rmt:no-sync-del! lock-key) ;; destroy the lock
+	;; 			      (loop (rmt:no-sync-get-lock lock-key) expire-time)) ;; 
+	;; 			    (begin
+	;; 			      (thread-sleep! 1)
+	 ;; 			      (loop (rmt:no-sync-get-lock lock-key) expire-time)))))))
+	 )
+    (case clean-mode
       ((remove-data-only)(mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) "CLEANING" "LOCKED" #f))
       ((remove-all)      (mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) "REMOVING" "LOCKED" #f))
       ((archive-remove)  (mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) "ARCHIVE_REMOVING" #f #f)))
     (debug:print-info 1 *default-log-port* "Attempting to remove " (if real-dir (conc " dir " real-dir " and ") "") " link " run-dir)
     (if (and real-dir 
 	     (> (string-length real-dir) 5)
-	     (file-exists? real-dir)) ;; bad heuristic but should prevent /tmp /home etc.
+	     (common:file-exists? real-dir)) ;; bad heuristic but should prevent /tmp /home etc.
 	(begin ;; let* ((realpath (resolve-pathname run-dir)))
 	  (debug:print-info 1 *default-log-port* "Recursively removing " real-dir)
-	  (if (file-exists? real-dir)
+	  (if (common:file-exists? real-dir)
 	      (runs:safe-delete-test-dir real-dir)
 	      (debug:print 0 *default-log-port* "WARNING: test dir " real-dir " appears to not exist or is not readable")))
 	(if real-dir 
 	    (debug:print 0 *default-log-port* "WARNING: directory " real-dir " does not exist")
 	    (debug:print 0 *default-log-port* "WARNING: no real directory corrosponding to link " run-dir ", nothing done")))
@@ -1861,14 +2015,16 @@
 		     (not (member run-dir (list "n/a" "/tmp/badname"))))
 		(debug:print 0 *default-log-port* "WARNING: not removing " run-dir " as it either doesn't exist or is not a symlink")
 		(debug:print 0 *default-log-port* "NOTE: the run dir for this test is undefined. Test may have already been deleted."))
 	    ))
     ;; Only delete the records *after* removing the directory. If things fail we have a record 
-    (case mode
-      ((remove-data-only)(mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) "NOT_STARTED" "n/a" #f))
+    (case clean-mode
+      ((remove-data-only)(mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) (db:test-get-state test)(db:test-get-status test) #f))
       ((archive-remove)  (mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) "ARCHIVED" #f #f))
-      (else (rmt:delete-test-records (db:test-get-run_id test) (db:test-get-id test))))))
+      (else (rmt:delete-test-records (db:test-get-run_id test) (db:test-get-id test))))
+    ;; (rmt:no-sync-del! lock-key)
+    ))
 
 ;;======================================================================
 ;; Routines for manipulating runs
 ;;======================================================================
 
@@ -1888,14 +2044,17 @@
       (let (;; (db   #f)
 	    (keys #f))
 	(if (launch:setup)
 	    (begin
 	      (full-runconfigs-read) ;; cache the run config
-	      (launch:cache-config)) ;; do not cache here - need to be sure runconfigs is processed
+	      ;; (launch:cache-config) ;; there are two independent config cache locations, turning this one off for now. MRW.
+	      ) ;; do not cache here - need to be sure runconfigs is processed
 	    (begin 
 	      (debug:print 0 *default-log-port* "Failed to setup, exiting")
 	      (exit 1)))
+
+        
 	(set! keys (keys:config-get-fields *configdat*))
 	;; have enough to process -target or -reqtarg here
 	(if (args:get-arg "-reqtarg")
 	    (let* ((runconfigf (conc  *toppath* "/runconfigs.config")) ;; DO NOT EVALUATE ALL 
 		   (runconfig  (read-config runconfigf #f #t environ-patt: #f)))
@@ -1968,11 +2127,11 @@
   (let* ((tagdata (rmt:get-tests-tags))
          (res     '())) ;; list of tests that match one or more tags
     (for-each
      (lambda (tag)
        (if (patt-list-match tag tagpatt)
-           (set! res (append (hash-table-ref tagdata tag)))))
+           (set! res (append (hash-table-ref tagdata tag) res))))
      (hash-table-keys tagdata))
     res))
     
 
 ;; Update test_meta for all tests
@@ -1989,11 +2148,11 @@
 ;;
 (define (runs:rollup-run keys runname user keyvals)
   (debug:print 4 *default-log-port* "runs:rollup-run, keys: " keys " -runname " runname " user: " user)
   (let* ((db              #f)
 	 ;; register run operates on the main db
-	 (new-run-id      (rmt:register-run keyvals runname "new" "n/a" user))
+	 (new-run-id      (rmt:register-run keyvals runname "new" "n/a" user (args:get-arg "-contour")))
 	 (prev-tests      (rmt:get-matching-previous-test-run-records new-run-id "%" "%"))
 	 (curr-tests      (mt:get-tests-for-run new-run-id "%/%" '() '()))
 	 (curr-tests-hash (make-hash-table)))
     (rmt:update-run-event_time new-run-id)
     ;; index the already saved tests by testname and itemdat in curr-tests-hash
@@ -2041,5 +2200,28 @@
 	     (db:test-get-id testdat))))
 	 ))
      prev-tests)))
 	 
      
+;; clean cache files
+(define (runs:clean-cache target runname toppath)
+  (if target
+      (if runname
+	  (let* ((linktree (common:get-linktree)) ;; (if toppath (configf:lookup *configdat* "setup" "linktree")))
+		 (runtop   (conc linktree "/" target "/" runname))
+		 (files    (if (common:file-exists? runtop)
+			       (append (glob (conc runtop "/.megatest*"))
+				       (glob (conc runtop "/.runconfig*")))
+			       '())))
+	    (if (null? files)
+		(debug:print-info 0 *default-log-port* "No cached megatest or runconfigs files found. None removed.")
+		(begin
+		  (debug:print-info 0 *default-log-port* "Removing cached files:\n    " (string-intersperse files "\n    "))
+		  (for-each 
+		   (lambda (f)
+		     (handle-exceptions
+			 exn
+			 (debug:print 0 *default-log-port* "WARNING: Failed to remove file " f)
+		       (delete-file f)))
+		   files))))
+	  (debug:print-error 0 *default-log-port* "-clean-cache requires -runname."))
+      (debug:print-error 0 *default-log-port* "-clean-cache requires -target or -reqtarg")))