@@ -37,11 +37,11 @@
 ;; This is the *new* methodology. One record to inform them and in the chaos, organise them.
 ;;
 (define (runs:create-run-record)
   (let* ((mconfig      (if *configdat*
 		           *configdat*
-		           (if (setup-for-run)
+		           (if (launch:setup-for-run)
 		               *configdat*
 		               (begin
 		                 (debug:print 0 "ERROR: Called setup in a non-megatest area, exiting")
 		                 (exit 1)))))
 	  (runrec      (runs:runrec-make-record))
@@ -88,11 +88,11 @@
 			    (safe-setenv (car varval)(cadr varval)))
 			  (configf:get-section runconfig section)))
 	      (list "default" target))
     (vector target runname testpatt keys keyvals envdat mconfig runconfig serverdat transport db toppath run-id)))
 
-(define (set-megatest-env-vars run-id #!key (inkeys #f)(inrunname #f)(inkeyvals #f))
+(define (runs:set-megatest-env-vars run-id #!key (inkeys #f)(inrunname #f)(inkeyvals #f))
   (let* ((target    (or (common:args-get-target)
 			(get-environment-variable "MT_TARGET")))
 	 (keys    (if inkeys    inkeys    (rmt:get-keys)))
 	 (keyvals   (if inkeyvals inkeyvals (keys:target->keyval keys target)))
 	 (vals      (hash-table-ref/default *env-vars-by-run-id* run-id #f))
@@ -206,25 +206,34 @@
 	 (keyvals            (keys:target->keyval keys target))
 	 (run-id             (rmt:register-run keyvals runname "new" "n/a" user))  ;;  test-name)))
 	 (deferred          '()) ;; delay running these since they have a waiton clause
 	 (runconfigf         (conc  *toppath* "/runconfigs.config"))
 	 (test-records       (make-hash-table))
-	 (all-tests-registry (tests:get-all)) ;; (tests:get-valid-tests (make-hash-table) test-search-path)) ;; all valid tests to check waiton names
-	 (all-test-names     (hash-table-keys all-tests-registry))
-	 (test-names         (tests:filter-test-names all-test-names test-patts))
-	 (required-tests     (lset-intersection equal? (string-split test-patts ",") test-names))) ;; test-names)) ;; Added test-names as initial for required-tests but that failed to work
+	 ;; need to process runconfigs before generating these lists
+	 (all-tests-registry #f)  ;; (tests:get-all)) ;; (tests:get-valid-tests (make-hash-table) test-search-path)) ;; all valid tests to check waiton names
+	 (all-test-names     #f)  ;; (hash-table-keys all-tests-registry))
+	 (test-names         #f)  ;; (tests:filter-test-names all-test-names test-patts))
+	 (required-tests     #f)) ;;(lset-intersection equal? (string-split test-patts ",") test-names))) ;; test-names)) ;; Added test-names as initial for required-tests but that failed to work
 
-    (set-megatest-env-vars run-id inkeys: keys inrunname: runname) ;; these may be needed by the launching process
+    (runs:set-megatest-env-vars run-id inkeys: keys inrunname: runname) ;; these may be needed by the launching process
     (if (file-exists? runconfigf)
-	(setup-env-defaults runconfigf run-id *already-seen-runconfig-info* keyvals "pre-launch-env-vars")
+	(setup-env-defaults runconfigf run-id *already-seen-runconfig-info* keyvals target)
 	(debug:print 0 "WARNING: You do not have a run config file: " runconfigf))
+
+    ;; Now generate all the tests lists
+    (set! all-tests-registry (tests:get-all))
+    (set! all-test-names     (hash-table-keys all-tests-registry))
+    (set! test-names         (tests:filter-test-names all-test-names test-patts))
+    (set! required-tests     (lset-intersection equal? (string-split test-patts ",") test-names))
     
     ;; look up all tests matching the comma separated list of globs in
     ;; test-patts (using % as wildcard)
 
     ;; (set! test-names (delete-duplicates (tests:get-valid-tests *toppath* test-patts)))
-    (debug:print-info 0 "test names " test-names)
+    (debug:print-info 0 "tests search path: " (tests:get-tests-search-path *configdat*))
+    (debug:print-info 0 "all tests:  " (string-intersperse (sort all-test-names string<) " "))
+    (debug:print-info 0 "test names: " (string-intersperse (sort test-names string<) " "))
 
     ;; on the first pass or call to run-tests set FAILS to NOT_STARTED if
     ;; -keepgoing is specified
     (if (eq? *passnum* 0)
 	(begin
@@ -430,14 +439,17 @@
 	       (null? non-completed)))
       (debug:print-info 4 "runs:expand-items: (or (null? prereqs-not-met) (and (member 'toplevel testmode)(null? non-completed)))")
       (let ((test-name (tests:testqueue-get-testname test-record)))
 	(setenv "MT_TEST_NAME" test-name) ;; 
 	(setenv "MT_RUNNAME"   runname)
-	(set-megatest-env-vars run-id inrunname: runname) ;; these may be needed by the launching process
+	(runs:set-megatest-env-vars run-id inrunname: runname) ;; these may be needed by the launching process
 	(let ((items-list (items:get-items-from-config tconfig)))
 	  (if (list? items-list)
 	      (begin
+		(if (null? items-list)
+		    (let ((test-id (rmt:get-test-id run-id test-name "")))
+		      (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "ZERO_ITEMS" "Failed to run due to failed prerequisites")))
 		(tests:testqueue-set-items! test-record items-list)
 		(list hed tal reg reruns))
 	      (begin
 		(debug:print 0 "ERROR: The proc from reading the items table did not yield a list - please report this")
 		(exit 1))))))
@@ -470,11 +482,11 @@
 	    (let ((trimmed-tal (mt:discard-blocked-tests run-id hed tal test-records))
 		  (trimmed-reg (mt:discard-blocked-tests run-id hed reg test-records)))
 	      (debug:print 1 "WARNING: test " hed " has discarded prerequisites, removing it from the queue")
 
 	      (let ((test-id (rmt:get-test-id run-id hed "")))
-		(mt:test-set-state-status-by-id test-id "DEQUEUED" "PREQ_FAIL" "Failed to run due to failed prerequisites"))
+		(mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_DISCARDED" "Failed to run due to discarded prerequisites"))
 	      
 	      (if (and (null? trimmed-tal)
 		       (null? trimmed-reg))
 		  #f
 		  (list (runs:queue-next-hed trimmed-tal trimmed-reg reglen regfull)
@@ -546,19 +558,23 @@
 	    ;; num-retries code was here
 	    ;; we use this opportunity to move contents of reg to tal
 	    (list (car newtal)(append (cdr newtal) reg) '() reruns)) ;; an issue with prereqs not yet met?
 	  (begin
 	    (debug:print-info 1 "no fails in prerequisites for " hed " but nothing seen running in a while, dropping test " hed " from the run queue")
+	    (let ((test-id (rmt:get-test-id run-id hed "")))
+	      (mt:test-set-state-status-by-id run-id test-id "DEQUEDED" "TIMED_OUT" "Nothing seen running in a while."))
 	    (list (runs:queue-next-hed tal reg reglen regfull)
 		  (runs:queue-next-tal tal reg reglen regfull)
 		  (runs:queue-next-reg tal reg reglen regfull)
 		  reruns))))
 
      ((and (not (null? fails))(member 'normal testmode))
       (debug:print-info 1 "test "  hed " (mode=" testmode ") has failed prerequisite(s); "
 			(string-intersperse (map (lambda (t)(conc (db:test-get-testname t) ":" (db:test-get-state t)"/"(db:test-get-status t))) fails) ", ")
 			", removing it from to-do list")
+      (let ((test-id (rmt:get-test-id run-id hed "")))
+	(mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_FAIL" "Failed to run due to failed prerequisites"))
       (if (or (not (null? reg))(not (null? tal)))
 	  (begin
 	    (hash-table-set! test-registry hed 'CANNOTRUN)
 	    (list (runs:queue-next-hed tal reg reglen regfull)
 		  (runs:queue-next-tal tal reg reglen regfull)
@@ -739,12 +755,15 @@
 	  (if (or (not (null? reg))(not (null? tal)))
 	      (if (vector? hed)
 		  (begin
 		    (debug:print 1 "WARNING: Dropping test " test-name "/" item-path
 				 " from the launch list as it has prerequistes that are FAIL")
+		    (let ((test-id (rmt:get-test-id run-id hed "")))
+		      (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_FAIL" "Failed to run due to failed prerequisites"))
 		    (runs:shrink-can-run-more-tests-count) ;; DELAY TWEAKER (still needed?)
 		    ;; (thread-sleep! *global-delta*)
+		    ;; This next is for the items
 		    (mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "BLOCKED" #f)
 		    (hash-table-set! test-registry (runs:make-full-test-name test-name item-path) 'removed)
 		    (list (runs:queue-next-hed tal reg reglen regfull)
 			  (runs:queue-next-tal tal reg reglen regfull)
 			  (runs:queue-next-reg tal reg reglen regfull)
@@ -902,11 +921,11 @@
 	     (num-running (rmt:get-count-tests-running-for-run-id run-id)))
 
       (if (> num-running 0)
 	  (set! last-time-some-running (current-seconds)))
 
-      (if (> (current-seconds)(+ last-time-some-running 60))
+      (if (> (current-seconds)(+ last-time-some-running 240))
 	  (hash-table-set! *max-tries-hash* tfullname (+ (hash-table-ref/default *max-tries-hash* tfullname 0) 1)))
 	;; (debug:print 0 "max-tries-hash: " (hash-table->alist *max-tries-hash*))
 
 	;; Ensure all top level tests get registered. This way they show up as "NOT_STARTED" on the dashboard
 	;; and it is clear they *should* have run but did not.
@@ -1116,11 +1135,11 @@
 		      )
     (debug:print 2 "Attempting to launch test " full-test-name)
     (setenv "MT_TEST_NAME" test-name) ;; 
     (setenv "MT_ITEMPATH"  item-path)
     (setenv "MT_RUNNAME"   runname)
-    (set-megatest-env-vars run-id inrunname: runname) ;; these may be needed by the launching process
+    (runs:set-megatest-env-vars run-id inrunname: runname) ;; these may be needed by the launching process
     (change-directory *toppath*)
 
     ;; Here is where the test_meta table is best updated
     ;; Yes, another use of a global for caching. Need a better way?
     ;;
@@ -1360,10 +1379,11 @@
 									(dirb ;; (rmt:sdb-qry 'getstr 
 									 (db:test-get-rundir b))) ;; ) ;; ((filedb:get-path *fdb* (db:test-get-rundir b))))
 								    (if (and (string? dira)(string? dirb))
 									(> (string-length dira)(string-length dirb))
 									#f)))))
+		       (toplevel-retries (make-hash-table)) ;; try three times to loop through and remove top level tests
 		       (test-retry-time  (make-hash-table))
 		       (allow-run-time   10)) ;; seconds to allow for killing tests before just brutally killing 'em
 		   (let loop ((test (car sorted-tests))
 			      (tal  (cdr sorted-tests)))
 		     (let* ((test-id       (db:test-get-id test))
@@ -1385,11 +1405,18 @@
 							       (> (cdb:remote-run db:test-toplevel-num-items db run-id test-name) 0))))
 			     (case action
 			       ((remove-runs)
 				;; if the test is a toplevel-with-children issue an error and do not remove
 				(if toplevel-with-children
-				    (debug:print 0 "WARNING: skipping removal of " test-fulln " with run-id " run-id " as it has sub tests")
+				    (begin
+				      (debug:print 0 "WARNING: skipping removal of " test-fulln " with run-id " run-id " as it has sub tests")
+				      (hash-table-set! toplevel-retries test-fulln (+ (hash-table-ref/default toplevel-retries test-fulln 0) 1))
+				      (if (> (hash-table-ref toplevel-retries test-fulln) 3)
+					  (if (not (null? tal))
+					      (loop (car tal)(cdr tal))) ;; no else clause - drop it if no more in queue and > 3 tries
+					  (let ((newtal (append tal (list test))))
+					    (loop (car newtal)(cdr newtal))))) ;; loop with test still in queue
 				    (begin
 				      (debug:print-info 0 "test: " test-name " itest-state: " test-state)
 				      (if (member test-state (list "RUNNING" "LAUNCHED" "REMOTEHOSTSTART" "KILLREQ"))
 					  (begin
 					    (if (not (hash-table-ref/default test-retry-time test-fulln #f))
@@ -1454,11 +1481,11 @@
   (let* ((run-dir       (db:test-get-rundir test))    ;; run dir is from the link tree
 	 (real-dir      (if (file-exists? run-dir)
 			    (resolve-pathname run-dir)
 			    #f)))
     (if (not remove-data-only)
-	(mt:test-set-state-status-by-id (db:test-get-id test) "REMOVING" "LOCKED" #f))
+	(mt:test-set-state-status-by-id (db:test-get-run-id test)(db:test-get-id test) "REMOVING" "LOCKED" #f))
     (debug:print-info 1 "Attempting to remove " (if real-dir (conc " dir " real-dir " and ") "") " link " run-dir)
     (if (and real-dir 
 	     (> (string-length real-dir) 5)
 	     (file-exists? real-dir)) ;; bad heuristic but should prevent /tmp /home etc.
 	(begin ;; let* ((realpath (resolve-pathname run-dir)))
@@ -1508,11 +1535,11 @@
       (debug:print 0 "ERROR: Missing required parameter for " switchname ", you must specify the run name with -runname runname")
       (exit 3))
      (else
       (let ((db   #f)
 	    (keys #f))
-	(if (not (setup-for-run))
+	(if (not (launch:setup-for-run))
 	    (begin 
 	      (debug:print 0 "Failed to setup, exiting")
 	      (exit 1)))
 	;; (if (args:get-arg "-server")
 	;;     (cdb:remote-run server:start db (args:get-arg "-server")))