@@ -51,10 +51,11 @@
   can-run-more-tests
   ((can-run-more-tests-count 0) : fixnum)
   (last-fuel-check         0)  ;; time when we last checked fuel
   (beginning-of-time       (current-seconds))
   (load-mgmt-function      #f)
+  (wait-for-jobs-function  #f)
   )
 
 (defstruct runs:testdat
   hed tal reg reruns  test-record
   test-name item-path jobgroup
@@ -1676,40 +1677,51 @@
           (debug:print-info 4 *default-log-port* "cond branch - "  "rtq-2")
 	  (debug:print-info 4 *default-log-port* "OUTER COND: (not items)")
 	  (if (and (not (tests:match test-patts (tests:testqueue-get-testname test-record) item-path required: required-tests))
 		   (not (null? tal)))
 	      (loop (car tal)(cdr tal) reg reruns))
-	  (runs:testdat-prereqs-not-met-set! testdat (rmt:get-prereqs-not-met run-id waitons hed item-path mode: testmode itemmaps: itemmaps))
-
-	  ;; This would be a good place to block on homehost load
 
 	  ;; gonna try a strategy change here.
 	  ;;
 	  ;; check if can run more tests. if yes, continue, if no, rest for 10 seconds, check again
 	  ;; repeat until can run more tests
 	  ;;
 	  ;; look at the test jobgroup and tot jobs running
-	  (let loop-can-run-more
-	      ((res      (runs:can-run-more-tests runsdat run-id jobgroup max-concurrent-jobs))
-	       (remtries 60))
-	    (match res
-              ((run-more num-running . rem)
-	       (if (or run-more
-		       (< remtries 1))
-		   (begin
-		     (if (runs:lownoise "num-running" 30)
-			 (debug:print-info 0 *default-log-port* "Have "num-running" tests of max " max-concurrent-jobs))
-		     (runs:dat-can-run-more-tests-set! runsdat res)) ;; capture the result and then drop through
-		   (begin
-		     (if (runs:lownoise "num-running" 10)
-			 (debug:print-info 0 *default-log-port* "Can't run more tests, have "num-running" tests of "
-					   max-concurrent-jobs " allowed."))
-		     (thread-sleep! 5) ;; if we've hit max concurrent jobs take a breather, nb// make this configurable
-		     (loop-can-run-more (runs:can-run-more-tests runsdat run-id jobgroup max-concurrent-jobs)
-					(- remtries 1)))))))
-	      
-	  (let ((loop-list (runs:process-expanded-tests runsdat testdat)))
+	  (if (not (runs:dat-wait-for-jobs-funcion runsdat))
+	      (runs:dat-wait-for-jobs-function-set!
+	       runsdat 
+	       (lambda ()
+		 (let loop-can-run-more
+		     ((res      (runs:can-run-more-tests runsdat run-id jobgroup max-concurrent-jobs))
+		      (remtries 1440)) ;; we can wait for up to two hours for jobs to get done
+		   (match res
+			  ((run-more num-running . rem)
+			   (if (or run-more
+				   (< remtries 1))
+			       (begin
+				 (if (runs:lownoise "num-running" 30)
+				     (debug:print-info 0 *default-log-port* "Have "num-running" tests of max " max-concurrent-jobs))
+				 (runs:dat-can-run-more-tests-set! runsdat res)) ;; capture the result and then drop through
+			       (begin
+				 (if (runs:lownoise "num-running" 10)
+				     (debug:print-info 0 *default-log-port* "Can't run more tests, have "num-running" tests of "
+						       max-concurrent-jobs " allowed."))
+				 (thread-sleep! 5) ;; if we've hit max concurrent jobs take a breather, nb// make this configurable
+				 
+				 ;; wait for load here
+				 (if (runs:dat-load-mgmt-function runsdat)((runs:dat-load-mgmt-function runsdat)))
+				 (loop-can-run-more (runs:can-run-more-tests runsdat run-id jobgroup max-concurrent-jobs)
+						    (- remtries 1)))))))
+		 )))
+
+	  ;; I'm not clear on why prereqs are gathered here TODO: verfiy this is needed
+	  (runs:testdat-prereqs-not-met-set! testdat (rmt:get-prereqs-not-met run-id waitons hed item-path mode: testmode itemmaps: itemmaps))
+
+	  ;; I'm not clear on why we'd capture running job counts here TODO: verify this is needed
+	  (runs:dat-can-run-more-tests-set! runsdat (runs:can-run-more-tests runsdat run-id jobgroup max-concurrent-jobs))
+
+	  (let ((loop-list (runs:process-expanded-tests runsdat testdat))) ;; in process-expanded-tests ultimately run:test -> launch-test -> test actually running
             (if loop-list (apply loop loop-list))))
 
 	 ;; items processed into a list but not came in as a list been processed
 	 ;;
 	 ((and (list? items)     ;; thus we know our items are already calculated
@@ -2085,15 +2097,23 @@
 		       (mt:test-set-state-status-by-id run-id test-id "COMPLETED" "SKIP" skip-test)
 		       (debug:print-info 1 *default-log-port* "SKIPPING Test " full-test-name " due to " skip-test))
 		     ;;
 		     ;; Here the test is handed off to launch.scm for launch-test to complete the launch process
 		     ;;
-		     (if (not (launch-test test-id run-id run-info keyvals runname test-conf test-name test-path itemdat flags))
-			 (begin
-			   (print "ERROR: Failed to launch the test. Exiting as soon as possible")
-			   (set! *globalexitstatus* 1) ;; 
-			   (process-signal (current-process-id) signal/kill))))))))
+		     (begin
+		       ;; wait for less than max jobs here
+		       (if (runs:dat-wait-for-jobs-function runsdat)
+			   ((runs:dat-wait-for-jobs-function runsdat)))
+		       
+		       (if (not (launch-test test-id run-id run-info keyvals runname test-conf test-name test-path itemdat flags))
+			   (begin
+			     (print "ERROR: Failed to launch the test. Exiting as soon as possible")
+			     (set! *globalexitstatus* 1) ;; 
+			     (process-signal (current-process-id) signal/kill))
+			   )
+		       ;; wait again here?
+		       ))))))
 	((KILLED) 
 	 (debug:print 1 *default-log-port* "NOTE: " full-test-name " is already running or was explictly killed, use -force to launch it.")
 	 (hash-table-set! test-registry (db:test-make-full-name test-name test-path) 'DONOTRUN)) ;; KILLED))
 	((LAUNCHED REMOTEHOSTSTART RUNNING)  
 	 (debug:print 2 *default-log-port* "NOTE: " test-name " is already running"))