@@ -199,11 +199,11 @@
 
 ;;  test-names: Comma separated patterns same as test-patts but used in selection 
 ;;              of tests to run. The item portions are not respected.
 ;;              FIXME: error out if /patt specified
 ;;            
-(define (runs:run-tests target runname test-patts user flags) ;; test-names
+(define (runs:run-tests target runname test-patts user flags #!key (run-count 3)) ;; test-names
   (let* ((keys               (keys:config-get-fields *configdat*))
 	 (keyvals            (keys:target->keyval keys target))
 	 (run-id             (rmt:register-run keyvals runname "new" "n/a" user))  ;;  test-name)))
 	 (deferred          '()) ;; delay running these since they have a waiton clause
 	 (runconfigf         (conc  *toppath* "/runconfigs.config"))
@@ -340,13 +340,23 @@
 	(debug:print-info 1 "Adding " required-tests " to the run queue"))
     ;; NOTE: these are all parent tests, items are not expanded yet.
     (debug:print-info 4 "test-records=" (hash-table->alist test-records))
     (let ((reglen (configf:lookup *configdat* "setup" "runqueue")))
       (if (> (length (hash-table-keys test-records)) 0)
-	  (runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests (any->number reglen) all-tests-registry)
+	  (begin
+	    (runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests (any->number reglen) all-tests-registry)
+	    ;; if run-count > 0 call, set -preclean and -rerun STUCK/DEAD
+	    (if (> run-count 0)
+		(begin
+		  (if (not (hash-table-ref/default flags "-preclean" #f))
+		      (hash-table-set! flags "-preclean" #t))
+		  (if (not (hash-table-ref/default flags "-rerun" #f))
+		      (hash-table-set! flags "-rerun" "STUCK/DEAD,n/a,ZERO_ITEMS"))
+		  (runs:run-tests target runname test-patts user flags run-count: (- run-count 1)))))
 	  (debug:print-info 0 "No tests to run")))
-    (debug:print-info 4 "All done by here")))
+    (debug:print-info 4 "All done by here")
+    ))
 
 
 ;; loop logic. These are used in runs:run-tests-queue to make it a bit more readable.
 ;;
 ;; If reg not full and have items in tal then loop with (car tal)(cdr tal) reg reruns
@@ -577,11 +587,15 @@
 	 (job-group-limit         (list-ref run-limits-info 4))
 	 (prereqs-not-met         (rmt:get-prereqs-not-met run-id waitons item-path testmode itemmap: itemmap))
 	 ;; (prereqs-not-met         (mt:lazy-get-prereqs-not-met run-id waitons item-path mode: testmode itemmap: itemmap))
 	 (fails                   (runs:calc-fails prereqs-not-met))
 	 (non-completed           (runs:calc-not-completed prereqs-not-met))
-	 (loop-list               (list hed tal reg reruns)))
+	 (loop-list               (list hed tal reg reruns))
+	 ;; configure the load runner
+	 (numcpus                 (common:get-num-cpus))
+	 (maxload                 (string->number (or (configf:lookup *configdat* "jobtools" "maxload") "3")))
+	 (waitdelay               (string->number (or (configf:lookup *configdat* "jobtools" "waitdelay") "60"))))
     (debug:print-info 4 "have-resources: " have-resources " prereqs-not-met: (" 
 		      (string-intersperse 
 		       (map (lambda (t)
 			      (if (vector? t)
 				  (conc (db:test-get-state t) "/" (db:test-get-status t))
@@ -676,10 +690,14 @@
 		    (null? non-completed))))
       ;; (hash-table-delete! *max-tries-hash* (runs:make-full-test-name test-name item-path))
       ;; we are going to reset all the counters for test retries by setting a new hash table
       ;; this means they will increment only when nothing can be run
       (set! *max-tries-hash* (make-hash-table))
+      ;; well, first lets see if cpu load throttling is enabled. If so wait around until the
+      ;; average cpu load is under the threshold before continuing
+      (if (configf:lookup *configdat* "jobtools" "maxload") ;; only gate if maxload is specified
+	  (common:wait-for-cpuload maxload numcpus waitdelay))
       (run:test run-id run-info keyvals runname test-record flags #f test-registry all-tests-registry)
       (hash-table-set! test-registry (runs:make-full-test-name test-name item-path) 'running)
       (runs:shrink-can-run-more-tests-count)  ;; DELAY TWEAKER (still needed?)
       ;; (thread-sleep! *global-delta*)
       (if (or (not (null? tal))(not (null? reg)))
@@ -1034,28 +1052,33 @@
 	 ((not (null? reg)) ;; could we get here with leftovers?
 	  (debug:print-info 0 "Have leftovers!")
 	  (loop (car reg)(cdr reg) '() reruns))
 	 (else
 	  (debug:print-info 4 "Exiting loop with...\n  hed=" hed "\n  tal=" tal "\n  reruns=" reruns))
-	 ))
-	;; now *if* -run-wait we wait for all tests to be done
-      (let loop ((num-running (rmt:get-count-tests-running-for-run-id run-id))
-		 (prev-num-running 0))
-	(if (and (args:get-arg "-run-wait")
-		 (> num-running 0))
-	    (begin
-	      ;; Here we mark any old defunct tests as incomplete. Do this every fifteen minutes
-	      (if (> (current-seconds)(+ last-time-incomplete 900))
-		  (begin
-		    (debug:print-info 0 "Marking stuck tests as INCOMPLETE while waiting for run " run-id ". Running as pid " (current-process-id) " on " (get-host-name))
-		    (set! last-time-incomplete (current-seconds))
-		    (cdb:remote-run db:find-and-mark-incomplete #f)))
-	      (if (not (eq? num-running prev-num-running))
-		  (debug:print-info 0 "run-wait specified, waiting on " num-running " tests in RUNNING, REMOTEHOSTSTART or LAUNCHED state at " (time->string (seconds->local-time (current-seconds)))))
-	      (thread-sleep! 15)
-	      (loop (rmt:get-count-tests-running-for-run-id run-id) num-running))))
-      ) ;; LET* ((test-record
+	 )))
+    ;; now *if* -run-wait we wait for all tests to be done
+    ;; Now wait for any RUNNING tests to complete (if in run-wait mode)
+    (let wait-loop ((num-running      (rmt:get-count-tests-running-for-run-id run-id))
+		    (prev-num-running 0))
+      ;; (debug:print 0 "num-running=" num-running ", prev-num-running=" prev-num-running)
+      (if (and (or (args:get-arg "-run-wait")
+		   (equal? (configf:lookup *configdat* "setup" "run-wait") "yes"))
+	       (> num-running 0))
+	  (begin
+	    ;; Here we mark any old defunct tests as incomplete. Do this every fifteen minutes
+	    ;; (debug:print 0 "Got here eh! num-running=" num-running " (> num-running 0) " (> num-running 0))
+	    (if (> (current-seconds)(+ last-time-incomplete 900))
+		(begin
+		  (debug:print-info 0 "Marking stuck tests as INCOMPLETE while waiting for run " run-id ". Running as pid " (current-process-id) " on " (get-host-name))
+		  (set! last-time-incomplete (current-seconds))
+		  (cdb:remote-run db:find-and-mark-incomplete #f)))
+	    (if (not (eq? num-running prev-num-running))
+		(debug:print-info 0 "run-wait specified, waiting on " num-running " tests in RUNNING, REMOTEHOSTSTART or LAUNCHED state at " (time->string (seconds->local-time (current-seconds)))))
+	    (thread-sleep! 15)
+	    ;; (wait-loop (rmt:get-count-tests-running-for-run-id run-id) num-running))))
+	    (wait-loop (rmt:get-count-tests-running-for-run-id run-id) num-running))))
+    ;; LET* ((test-record
     ;; we get here on "drop through". All done!
     (debug:print-info 1 "All tests launched")))
 
 (define (runs:calc-fails prereqs-not-met)
   (filter (lambda (test)
@@ -1115,10 +1138,11 @@
 	 (itemdat      (tests:testqueue-get-itemdat    test-record))
 	 (test-path    (hash-table-ref all-tests-registry test-name)) ;; (conc *toppath* "/tests/" test-name)) ;; could use tests:get-testconfig here ...
 	 (force        (hash-table-ref/default flags "-force" #f))
 	 (rerun        (hash-table-ref/default flags "-rerun" #f))
 	 (keepgoing    (hash-table-ref/default flags "-keepgoing" #f))
+	 (incomplete-timeout (string->number (or (configf:lookup *configdat* "setup" "incomplete-timeout") "x")))
 	 (item-path     "")
 	 (db           #f)
 	 (full-test-name #f))
 
     ;; setting itemdat to a list if it is #f
@@ -1262,11 +1286,12 @@
 	 (debug:print 1 "NOTE: " full-test-name " is already running or was explictly killed, use -force to launch it.")
 	 (hash-table-set! test-registry (runs:make-full-test-name test-name test-path) 'DONOTRUN)) ;; KILLED))
 	((LAUNCHED REMOTEHOSTSTART RUNNING)  
 	 (if (> (- (current-seconds)(+ (db:test-get-event_time testdat)
 				       (db:test-get-run_duration testdat)))
-		600) ;; i.e. no update for more than 600 seconds
+		(or incomplete-timeout
+		    6000)) ;; i.e. no update for more than 6000 seconds
 	     (begin
 	       (debug:print 0 "WARNING: Test " test-name " appears to be dead. Forcing it to state INCOMPLETE and status STUCK/DEAD")
 	       (tests:test-set-status! run-id test-id "INCOMPLETE" "STUCK/DEAD" "" #f))
 	       ;; (tests:test-set-status! test-id "INCOMPLETE" "STUCK/DEAD" "" #f))
 	     (debug:print 2 "NOTE: " test-name " is already running")))