Index: common.scm
==================================================================
--- common.scm
+++ common.scm
@@ -335,14 +335,14 @@
              (read-only
               (debug:print 0 *default-log-port* "   You have read-only access to this area.  Cannot proceed with megatest version migration.")
               (exit 1))
              (else
               (debug:print 0 *default-log-port* " to switch versions you can run: \"megatest -cleanup-db\"")
-              (exit 1)))))
-      (begin
-	(debug:print 0 *default-log-port* "ERROR: cannot migrate version unless on homehost. Exiting.")
-	(exit 1))))
+              (exit 1)))))))
+;;      (begin
+;;	(debug:print 0 *default-log-port* "ERROR: cannot migrate version unless on homehost. Exiting.")
+;;	(exit 1))))
 
 ;;======================================================================
 ;; S P A R S E   A R R A Y S
 ;;======================================================================
 
@@ -1483,31 +1483,36 @@
            (set! best-load load)
            (set! best-host hostname)))))
      hosts)
     best-host))
 
-
-
-
 (define (common:wait-for-cpuload maxload numcpus waitdelay #!key (count 1000) (msg #f)(remote-host #f))
   (let* ((loadavg (common:get-cpu-load remote-host))
 	 (first   (car loadavg))
 	 (next    (cadr loadavg))
 	 (adjload (* maxload numcpus))
 	 (loadjmp (- first next)))
     (cond
      ((and (> first adjload)
 	   (> count 0))
-      (debug:print-info 0 *default-log-port* "waiting " waitdelay " seconds due to load " first " exceeding max of " adjload (if msg msg ""))
+      (debug:print-info 0 *default-log-port* "waiting " waitdelay " seconds due to load " first " exceeding max of " adjload " " (if msg msg ""))
       (thread-sleep! waitdelay)
       (common:wait-for-cpuload maxload numcpus waitdelay count: (- count 1)))
      ((and (> loadjmp numcpus)
 	   (> count 0))
       (debug:print-info 0 *default-log-port* "waiting " waitdelay " seconds due to load jump " loadjmp " > numcpus " numcpus (if msg msg ""))
       (thread-sleep! waitdelay)
       (common:wait-for-cpuload maxload numcpus waitdelay count: (- count 1))))))
 
+(define (common:wait-for-homehost-load maxload msg)
+  (let* ((hh-dat (if (common:on-homehost?) ;; if we are on the homehost then pass in #f so the calls are local.
+                     #f
+                     (common:get-homehost)))
+         (hh     (if hh-dat (car hh-dat) #f))
+         (numcpus (common:get-num-cpus hh)))
+    (common:wait-for-normalized-load maxload msg: msg remote-host: hh)))
+
 (define (common:get-num-cpus remote-host)
   (let ((proc (lambda ()
 		(let loop ((numcpu 0)
 			   (inl    (read-line)))
 		  (if (eof-object? inl)
@@ -1524,11 +1529,11 @@
 
 ;; wait for normalized cpu load to drop below maxload
 ;;
 (define (common:wait-for-normalized-load maxload #!key (msg #f)(remote-host #f))
   (let ((num-cpus (common:get-num-cpus remote-host)))
-    (common:wait-for-cpuload maxload num-cpus 15 msg: msg)))
+    (common:wait-for-cpuload maxload num-cpus 15 msg: msg remote-host: remote-host)))
 
 (define (get-uname . params)
   (let* ((uname-res (process:cmd-run->list (conc "uname " (if (null? params) "-a" (car params)))))
 	 (uname #f))
     (if (null? (car uname-res))

ADDED   emergency-patch-1.scm
Index: emergency-patch-1.scm
==================================================================
--- /dev/null
+++ emergency-patch-1.scm
@@ -0,0 +1,203 @@
+
+
+;; These are called by the server on recipt of /api calls
+;;    - keep it simple, only return the actual result of the call, i.e. no meta info here
+;;
+;;    - returns #( flag result )
+;;
+(define (api:execute-requests dbstruct dat)
+  (handle-exceptions
+   exn
+   (let ((call-chain (get-call-chain)))
+     (debug:print 0 *default-log-port* "WARNING: api:execute-requests received an exception from peer, dat=" dat)
+     (print-call-chain (current-error-port))
+     (debug:print 0 *default-log-port* " message: "  ((condition-property-accessor 'exn 'message) exn))       
+     (vector #f (vector exn call-chain dat))) ;; return some stuff for debug if an exception happens
+   (cond
+    ((not (vector? dat))                    ;; it is an error to not receive a vector
+     (vector #f (vector #f "remote must be called with a vector")))
+    ((> *api-process-request-count* 20) ;; 20)
+     (debug:print 0 *default-log-port* "WARNING: api:execute-requests received an overloaded message.")
+     (set! *server-overloaded* #t)
+     (vector #f (vector #f 'overloaded))) ;; the inner vector is what gets returned. nope, don't know why. please refactor!
+    (else  
+     (let* ((cmd-in            (vector-ref dat 0))
+            (cmd               (if (symbol? cmd-in)
+				   cmd-in
+				   (string->symbol cmd-in)))
+            (params            (vector-ref dat 1))
+            (start-t           (current-milliseconds))
+            (readonly-mode     (dbr:dbstruct-read-only dbstruct))
+            (readonly-command  (member cmd api:read-only-queries))
+            (writecmd-in-readonly-mode (and readonly-mode (not readonly-command)))
+            (res    
+             (if writecmd-in-readonly-mode
+                 (conc "attempt to run write command "cmd" on a read-only database")
+                 (case cmd
+                   ;;===============================================
+                   ;; READ/WRITE QUERIES
+                   ;;===============================================
+
+                   ((get-keys-write)                        (db:get-keys dbstruct)) ;; force a dummy "write" query to force server; for debug in -repl
+                   
+                   ;; SERVERS
+                   ((start-server)                    (apply server:kind-run params))
+                   ((kill-server)                     (set! *server-run* #f))
+
+                   ;; TESTS
+
+                   ;;((test-set-state-status-by-id)     (apply mt:test-set-state-status-by-id dbstruct params))
+                   ;;BB - commented out above because it was calling below, eventually, incorrectly (dbstruct passed to mt:test-set-state-status-by-id, which previosly did more, but now only passes thru to db:set-state-status-and-roll-up-items.
+                   ((test-set-state-status-by-id)
+
+                    ;; (define (db:set-state-status-and-roll-up-items dbstruct run-id test-name item-path state status comment)
+                    (db:set-state-status-and-roll-up-items
+                     dbstruct
+                     (list-ref params 0) ; run-id
+                     (list-ref params 1) ; test-name
+                     #f                  ; item-path
+                     (list-ref params 2) ; state
+                     (list-ref params 3) ; status
+                     (list-ref params 4) ; comment
+                     ))
+                   
+                   ((delete-test-records)             (apply db:delete-test-records dbstruct params))
+                   ((delete-old-deleted-test-records) (apply db:delete-old-deleted-test-records dbstruct params))
+                   ((test-set-state-status)           (apply db:test-set-state-status dbstruct params))
+                   ((test-set-top-process-pid)        (apply db:test-set-top-process-pid dbstruct params))
+                   ((set-state-status-and-roll-up-items) (apply db:set-state-status-and-roll-up-items dbstruct params))
+                   ((top-test-set-per-pf-counts)      (apply db:top-test-set-per-pf-counts dbstruct params))
+                   ((test-set-archive-block-id)       (apply db:test-set-archive-block-id dbstruct params))
+
+                   ;; RUNS
+                   ((register-run)                 (apply db:register-run dbstruct params))
+                   ((set-tests-state-status)       (apply db:set-tests-state-status dbstruct params))
+                   ((delete-run)                   (apply db:delete-run dbstruct params))
+                   ((lock/unlock-run)              (apply db:lock/unlock-run dbstruct params))
+                   ((update-run-event_time)        (apply db:update-run-event_time dbstruct params))
+                   ((update-run-stats)             (apply db:update-run-stats dbstruct params))
+                   ((set-var)                      (apply db:set-var dbstruct params))
+                   ((del-var)                      (apply db:del-var dbstruct params))
+
+                   ;; STEPS
+                   ((teststep-set-status!)         (apply db:teststep-set-status! dbstruct params))
+
+                   ;; TEST DATA
+                   ((test-data-rollup)             (apply db:test-data-rollup dbstruct params))
+                   ((csv->test-data)               (apply db:csv->test-data dbstruct params))
+
+                   ;; MISC
+                   ((sync-inmem->db)               (let ((run-id (car params)))
+                                                     (db:sync-touched dbstruct run-id force-sync: #t)))
+                   ((mark-incomplete)              (apply db:find-and-mark-incomplete dbstruct params))
+
+                   ;; TESTMETA
+                   ((testmeta-add-record)       (apply db:testmeta-add-record dbstruct params))
+                   ((testmeta-update-field)     (apply db:testmeta-update-field dbstruct params))
+                   ((get-tests-tags)            (db:get-tests-tags dbstruct))
+
+                   ;; TASKS
+                   ((tasks-add)                 (apply tasks:add dbstruct params))   
+                   ((tasks-set-state-given-param-key) (apply tasks:set-state-given-param-key dbstruct params))
+                   ((tasks-get-last)            (apply tasks:get-last dbstruct params))
+
+		   ;; NO SYNC DB
+		   ((no-sync-set)               (apply db:no-sync-set         *no-sync-db* params))
+		   ((no-sync-get/default)       (apply db:no-sync-get/default *no-sync-db* params))
+		   ((no-sync-del!)              (apply db:no-sync-del!        *no-sync-db* params))
+		 
+                   ;; ARCHIVES
+                   ;; ((archive-get-allocations)   
+                   ((archive-register-disk)     (apply db:archive-register-disk dbstruct params))
+                   ((archive-register-block-name)(apply db:archive-register-block-name dbstruct params))
+                   ((archive-allocate-testsuite/area-to-block)(apply db:archive-allocate-testsuite/area-to-block dbstruct block-id testsuite-name areakey))
+
+                   ;;======================================================================
+                   ;; READ ONLY QUERIES
+                   ;;======================================================================
+
+                   ;; KEYS
+                   ((get-key-val-pairs)               (apply db:get-key-val-pairs dbstruct params))
+                   ((get-keys)                        (db:get-keys dbstruct))
+                   ((get-key-vals)                    (apply db:get-key-vals dbstruct params))
+                   ((get-target)                      (apply db:get-target dbstruct params))
+                   ((get-targets)                     (db:get-targets dbstruct))
+
+                   ;; ARCHIVES
+                   ((test-get-archive-block-info)     (apply db:test-get-archive-block-info dbstruct params))
+                   
+                   ;; TESTS
+                   ((test-toplevel-num-items)         (apply db:test-toplevel-num-items dbstruct params))
+                   ((get-test-info-by-id)	       (apply db:get-test-info-by-id dbstruct params))
+                   ((test-get-rundir-from-test-id)    (apply db:test-get-rundir-from-test-id dbstruct params))
+                   ((get-count-tests-running-for-testname) (apply db:get-count-tests-running-for-testname dbstruct params))
+                   ((get-count-tests-running)         (apply db:get-count-tests-running dbstruct params))
+                   ((get-count-tests-running-in-jobgroup) (apply db:get-count-tests-running-in-jobgroup dbstruct params))
+                   ;; ((delete-test-step-records)        (apply db:delete-test-step-records dbstruct params))
+                   ((get-previous-test-run-record)    (apply db:get-previous-test-run-record dbstruct params))
+                   ((get-matching-previous-test-run-records)(apply db:get-matching-previous-test-run-records dbstruct params))
+                   ((test-get-logfile-info)           (apply db:test-get-logfile-info dbstruct params))
+                   ((test-get-records-for-index-file)  (apply db:test-get-records-for-index-file dbstruct params))
+                   ((get-testinfo-state-status)       (apply db:get-testinfo-state-status dbstruct params))
+                   ((test-get-top-process-pid)        (apply db:test-get-top-process-pid dbstruct params))
+                   ((test-get-paths-matching-keynames-target-new) (apply db:test-get-paths-matching-keynames-target-new dbstruct params))
+                   ((get-prereqs-not-met)             (apply db:get-prereqs-not-met dbstruct params))
+                   ((get-count-tests-running-for-run-id) (apply db:get-count-tests-running-for-run-id dbstruct params))
+                   ((synchash-get)                    (apply synchash:server-get dbstruct params))
+                   ((get-raw-run-stats)               (apply db:get-raw-run-stats dbstruct params))
+
+                   ;; RUNS
+                   ((get-run-info)                 (apply db:get-run-info dbstruct params))
+                   ((get-run-status)               (apply db:get-run-status dbstruct params))
+                   ((set-run-status)               (apply db:set-run-status dbstruct params))
+                   ((get-tests-for-run)            (apply db:get-tests-for-run dbstruct params))
+                   ((get-test-id)                  (apply db:get-test-id dbstruct params))
+                   ((get-tests-for-run-mindata)    (apply db:get-tests-for-run-mindata dbstruct params))
+                   ((get-runs)                     (apply db:get-runs dbstruct params))
+                   ((get-num-runs)                 (apply db:get-num-runs dbstruct params))
+                   ((get-all-run-ids)              (db:get-all-run-ids dbstruct))
+                   ((get-prev-run-ids)             (apply db:get-prev-run-ids dbstruct params))
+                   ((get-run-ids-matching-target)  (apply db:get-run-ids-matching-target dbstruct params))
+                   ((get-runs-by-patt)             (apply db:get-runs-by-patt dbstruct params))
+                   ((get-run-name-from-id)         (apply db:get-run-name-from-id dbstruct params))
+                   ((get-main-run-stats)           (apply db:get-main-run-stats dbstruct params))
+                   ((get-var)                      (apply db:get-var dbstruct params))
+                   ((get-run-stats)                (apply db:get-run-stats dbstruct params))
+
+                   ;; STEPS
+                   ((get-steps-data)               (apply db:get-steps-data dbstruct params))
+                   ((get-steps-for-test)           (apply db:get-steps-for-test dbstruct params))
+
+                   ;; TEST DATA
+                   ((read-test-data)               (apply db:read-test-data dbstruct params))
+                   ((read-test-data*)              (apply db:read-test-data* dbstruct params))
+
+                   ;; MISC
+                   ((get-latest-host-load)         (apply db:get-latest-host-load dbstruct params))
+                   ((have-incompletes?)            (apply db:have-incompletes? dbstruct params))
+                   ((login)                        (apply db:login dbstruct params))
+                   ((general-call)                 (let ((stmtname   (car params))
+                                                         (run-id     (cadr params))
+                                                         (realparams (cddr params)))
+                                                     (db:general-call dbstruct stmtname realparams)))
+                   ((sdb-qry)                      (apply sdb:qry params))
+                   ((ping)                         (current-process-id))
+		   ((get-changed-record-ids)       (apply db:get-changed-record-ids dbstruct params))
+		   
+                   ;; TESTMETA
+                   ((testmeta-get-record)       (apply db:testmeta-get-record dbstruct params))
+
+                   ;; TASKS 
+                   ((find-task-queue-records)   (apply tasks:find-task-queue-records dbstruct params))
+		   (else
+		    (debug:print 0 *default-log-port* "ERROR: bad api call " cmd)
+		    (conc "ERROR: BAD api call " cmd))))))
+       
+       ;; save all stats
+       (let ((delta-t (- (current-milliseconds)
+			 start-t)))
+	 (hash-table-set! *db-api-call-time* cmd
+			  (cons delta-t (hash-table-ref/default *db-api-call-time* cmd '()))))
+       (if writecmd-in-readonly-mode
+	   (vector #f res)
+           (vector #t res)))))))

ADDED   emergency-patch-2.scm
Index: emergency-patch-2.scm
==================================================================
--- /dev/null
+++ emergency-patch-2.scm
@@ -0,0 +1,311 @@
+(include "common_records.scm")
+(include "key_records.scm")
+(include "db_records.scm")
+(include "run_records.scm")
+(include "test_records.scm")
+
+(define (common:wait-for-cpuload maxload numcpus waitdelay #!key (count 1000) (msg #f)(remote-host #f))
+  (let* ((loadavg (common:get-cpu-load remote-host))
+	 (first   (car loadavg))
+	 (next    (cadr loadavg))
+	 (adjload (* maxload numcpus))
+	 (loadjmp (- first next)))
+    (cond
+     ((and (> first adjload)
+	   (> count 0))
+      (debug:print-info 0 *default-log-port* "waiting " waitdelay " seconds due to load " first " exceeding max of " adjload " " (if msg msg ""))
+      (thread-sleep! waitdelay)
+      (common:wait-for-cpuload maxload numcpus waitdelay count: (- count 1)))
+     ((and (> loadjmp numcpus)
+	   (> count 0))
+      (debug:print-info 0 *default-log-port* "waiting " waitdelay " seconds due to load jump " loadjmp " > numcpus " numcpus (if msg msg ""))
+      (thread-sleep! waitdelay)
+      (common:wait-for-cpuload maxload numcpus waitdelay count: (- count 1))))))
+
+(define (common:wait-for-homehost-load maxload msg)
+  (let* ((hh-dat (if (common:on-homehost?) ;; if we are on the homehost then pass in #f so the calls are local.
+                     #f
+                     (common:get-homehost)))
+         (hh     (if hh-dat (car hh-dat) #f))
+         (numcpus (common:get-num-cpus hh)))
+    (common:wait-for-normalized-load maxload msg: msg remote-host: hh)))
+
+;; wait for normalized cpu load to drop below maxload
+;;
+(define (common:wait-for-normalized-load maxload #!key (msg #f)(remote-host #f))
+  (let ((num-cpus (common:get-num-cpus remote-host)))
+    (common:wait-for-cpuload maxload num-cpus 15 msg: msg remote-host: remote-host)))
+
+;;  hed tal reg reruns reglen regfull test-record runname test-name item-path jobgroup max-concurrent-jobs run-id waitons item-path testmode test-patts required-tests test-registry registry-mutex flags keyvals run-info newtal all-tests-registry itemmaps)
+(define (runs:process-expanded-tests runsdat testdat)
+  ;; unroll the contents of runsdat and testdat (due to ongoing refactoring).
+  (let* ((hed                    (runs:testdat-hed testdat))
+	 (tal                    (runs:testdat-tal testdat))
+	 (reg                    (runs:testdat-reg testdat))
+	 (reruns                 (runs:testdat-reruns testdat))
+	 (test-name              (runs:testdat-test-name testdat))
+	 (item-path              (runs:testdat-item-path testdat))
+	 (jobgroup               (runs:testdat-jobgroup testdat))
+	 (waitons                (runs:testdat-waitons testdat))
+	 (item-path              (runs:testdat-item-path testdat))
+	 (testmode               (runs:testdat-testmode testdat))
+	 (newtal                 (runs:testdat-newtal testdat))
+	 (itemmaps               (runs:testdat-itemmaps testdat))
+	 (test-record            (runs:testdat-test-record testdat))
+	 (prereqs-not-met        (runs:testdat-prereqs-not-met testdat))
+
+	 (reglen                 (runs:dat-reglen runsdat))
+	 (regfull                (runs:dat-regfull runsdat))
+	 (runname                (runs:dat-runname runsdat))
+	 (max-concurrent-jobs    (runs:dat-max-concurrent-jobs runsdat))
+	 (run-id                 (runs:dat-run-id runsdat))
+	 (test-patts             (runs:dat-test-patts runsdat))
+	 (required-tests         (runs:dat-required-tests runsdat))
+	 (test-registry          (runs:dat-test-registry runsdat))
+	 (registry-mutex         (runs:dat-registry-mutex runsdat))
+	 (flags                  (runs:dat-flags runsdat))
+	 (keyvals                (runs:dat-keyvals runsdat))
+	 (run-info               (runs:dat-run-info runsdat))
+	 (all-tests-registry     (runs:dat-all-tests-registry runsdat))
+	 (run-limits-info        (runs:dat-can-run-more-tests runsdat))
+	 ;; (runs:can-run-more-tests run-id jobgroup max-concurrent-jobs)) ;; look at the test jobgroup and tot jobs running
+	 (have-resources         (car run-limits-info))
+	 (num-running            (list-ref run-limits-info 1))
+	 (num-running-in-jobgroup(list-ref run-limits-info 2)) 
+	 (max-concurrent-jobs    (list-ref run-limits-info 3))
+	 (job-group-limit        (list-ref run-limits-info 4))
+	 ;; (prereqs-not-met        (rmt:get-prereqs-not-met run-id waitons hed item-path mode: testmode itemmaps: itemmaps))
+	 ;; (prereqs-not-met         (mt:lazy-get-prereqs-not-met run-id waitons item-path mode: testmode itemmap: itemmap))
+	 (fails                  (if (list? prereqs-not-met)
+				      (runs:calc-fails prereqs-not-met)
+				      (begin
+					(debug:print-error 0 *default-log-port* "prereqs-not-met is not a list! " prereqs-not-met)
+					'())))
+	 (non-completed           (filter (lambda (x)             ;; remove hed from not completed list, duh, of course it is not completed!
+					    (not (equal? x hed)))
+					  (runs:calc-not-completed prereqs-not-met)))
+	 (loop-list               (list hed tal reg reruns))
+	 ;; configure the load runner
+	 (numcpus                 (common:get-num-cpus #f))
+	 (maxload                 (string->number (or (configf:lookup *configdat* "jobtools" "maxload") "3.0")))         ;; use a non-number string to disable
+         (maxhomehostload         (string->number (or (configf:lookup *configdat* "jobtools" "maxhomehostload") "1.2"))) ;; use a non-number string to disable
+         (waitdelay               (string->number (or (configf:lookup *configdat* "jobtools" "waitdelay") "60"))))
+    (debug:print-info 4 *default-log-port* "have-resources: " have-resources " prereqs-not-met: (" 
+		      (string-intersperse 
+		       (map (lambda (t)
+			      (if (vector? t)
+				  (conc (db:test-get-state t) "/" (db:test-get-status t))
+				  (conc " WARNING: t is not a vector=" t )))
+			    prereqs-not-met)
+		       ", ") ") fails: " fails
+		       "\nregistered? " (hash-table-ref/default test-registry (db:test-make-full-name test-name item-path) #f))
+			    
+
+    
+    (if (and (not (null? prereqs-not-met))
+	     (runs:lownoise (conc "waiting on tests " prereqs-not-met hed) 60))
+	(debug:print-info 2 *default-log-port* "waiting on tests; " (string-intersperse (runs:mixed-list-testname-and-testrec->list-of-strings prereqs-not-met) ", ")))
+
+    ;; Don't know at this time if the test have been launched at some time in the past
+    ;; i.e. is this a re-launch?
+    (debug:print-info 4 *default-log-port* "run-limits-info = " run-limits-info)
+    
+    (cond
+     
+     ;; Check item path against item-patts, 
+     ;;
+     ((not (tests:match test-patts (tests:testqueue-get-testname test-record) item-path required: required-tests)) ;; This test/itempath is not to be run
+      ;; else the run is stuck, temporarily or permanently
+      ;; but should check if it is due to lack of resources vs. prerequisites
+      (debug:print-info 1 *default-log-port* "Skipping " (tests:testqueue-get-testname test-record) " " item-path " as it doesn't match " test-patts)
+      (if (or (not (null? tal))(not (null? reg)))
+	  (list (runs:queue-next-hed tal reg reglen regfull)
+		(runs:queue-next-tal tal reg reglen regfull)
+		(runs:queue-next-reg tal reg reglen regfull)
+		reruns)
+	  #f))
+     
+     ;; Register tests 
+     ;;
+     ((not (hash-table-ref/default test-registry (db:test-make-full-name test-name item-path) #f))
+      (debug:print-info 4 *default-log-port* "Pre-registering test " test-name "/" item-path " to create placeholder" )
+      ;; always do firm registration now in v1.60 and greater ;; (eq? *transport-type* 'fs) ;; no point in parallel registration if use fs
+      (let register-loop ((numtries 15))
+	(rmt:register-test run-id test-name item-path)
+	(if (rmt:get-test-id run-id test-name item-path)
+	    (hash-table-set! test-registry (db:test-make-full-name test-name item-path) 'done)
+	    (if (> numtries 0)
+		(begin
+		  (thread-sleep! 0.5)
+		  (register-loop (- numtries 1)))
+		(debug:print-error 0 *default-log-port* "failed to register test " (db:test-make-full-name test-name item-path)))))
+      (if (not (eq? (hash-table-ref/default test-registry (db:test-make-full-name test-name "") #f) 'done))
+	  (begin
+	    (rmt:register-test run-id test-name "")
+	    (if (rmt:get-test-id run-id test-name "")
+		(hash-table-set! test-registry (db:test-make-full-name test-name "") 'done))))
+      (runs:shrink-can-run-more-tests-count runsdat)   ;; DELAY TWEAKER (still needed?)
+      (if (and (null? tal)(null? reg))
+	  (list hed tal (append reg (list hed)) reruns)
+	  (list (runs:queue-next-hed tal reg reglen regfull)
+		(runs:queue-next-tal tal reg reglen regfull)
+		;; NB// Here we are building reg as we register tests
+		;; if regfull we must pop the front item off reg
+		(if regfull
+		    (append (cdr reg) (list hed))
+		    (append reg (list hed)))
+		reruns)))
+     
+     ;; At this point hed test registration must be completed.
+     ;;
+     ((eq? (hash-table-ref/default test-registry (db:test-make-full-name test-name item-path) #f)
+	   'start)
+      (debug:print-info 0 *default-log-port* "Waiting on test registration(s): "
+			(string-intersperse 
+			 (filter (lambda (x)
+				   (eq? (hash-table-ref/default test-registry x #f) 'start))
+				 (hash-table-keys test-registry))
+			 ", "))
+      (thread-sleep! 0.051)
+      (list hed tal reg reruns))
+     
+     ;; If no resources are available just kill time and loop again
+     ;;
+     ((not have-resources) ;; simply try again after waiting a second
+      (if (runs:lownoise "no resources" 60)
+	  (debug:print-info 1 *default-log-port* "no resources to run new tests, waiting ..."))
+      ;; Have gone back and forth on this but db starvation is an issue.
+      ;; wait one second before looking again to run jobs.
+      (thread-sleep! 1)
+      ;; could have done hed tal here but doing car/cdr of newtal to rotate tests
+      (list (car newtal)(cdr newtal) reg reruns))
+     
+     ;; This is the final stage, everything is in place so launch the test
+     ;;
+     ((and have-resources
+	   (or (null? prereqs-not-met)
+	       (and (member 'toplevel testmode) ;;  'toplevel)
+		    (null? non-completed)
+		    (not (member 'exclusive testmode)))))
+      ;; (hash-table-delete! *max-tries-hash* (db:test-make-full-name test-name item-path))
+      ;; we are going to reset all the counters for test retries by setting a new hash table
+      ;; this means they will increment only when nothing can be run
+      (set! *max-tries-hash* (make-hash-table))
+      ;; well, first lets see if cpu load throttling is enabled. If so wait around until the
+      ;; average cpu load is under the threshold before continuing
+      (if maxload ;; only gate if maxload is specified
+          (common:wait-for-cpuload maxload numcpus waitdelay))
+      (if maxhomehostload
+          (common:wait-for-homehost-load maxhomehostload (conc "Waiting for homehost load to drop below normalized value of " maxhomehostload)))
+      
+      (run:test run-id run-info keyvals runname test-record flags #f test-registry all-tests-registry)
+      (runs:incremental-print-results run-id)
+      (hash-table-set! test-registry (db:test-make-full-name test-name item-path) 'running)
+      (runs:shrink-can-run-more-tests-count runsdat)  ;; DELAY TWEAKER (still needed?)
+      ;; (thread-sleep! *global-delta*)
+      (if (or (not (null? tal))(not (null? reg)))
+	  (list (runs:queue-next-hed tal reg reglen regfull)
+		(runs:queue-next-tal tal reg reglen regfull)
+		(runs:queue-next-reg tal reg reglen regfull)
+		reruns)
+	  #f))
+     
+     ;; must be we have unmet prerequisites
+     ;;
+     (else
+      (debug:print 4 *default-log-port* "FAILS: " fails)
+      ;; If one or more of the prereqs-not-met are FAIL then we can issue
+      ;; a message and drop hed from the items to be processed.
+      ;; (runs:mixed-list-testname-and-testrec->list-of-strings prereqs-not-met)
+      (if (and (not (null? prereqs-not-met))
+	       (runs:lownoise (conc "waiting on tests " prereqs-not-met hed) 60))
+	  (debug:print-info 1 *default-log-port* "waiting on tests; " (string-intersperse 
+						    (runs:mixed-list-testname-and-testrec->list-of-strings 
+						     prereqs-not-met) ", ")))
+      (if (or (null? fails)
+	      (member 'toplevel testmode))
+	  (begin
+	    ;; couldn't run, take a breather
+	    (if  (runs:lownoise "Waiting for more work to do..." 60)
+		 (debug:print-info 0 *default-log-port* "Waiting for more work to do..."))
+	    (thread-sleep! 1)
+	    (list (car newtal)(cdr newtal) reg reruns))
+	  ;; the waiton is FAIL so no point in trying to run hed ever again
+	  (if (or (not (null? reg))(not (null? tal)))
+	      (if (vector? hed)
+		  (begin
+		    (debug:print 1 *default-log-port* "WARNING: Dropping test " test-name "/" item-path
+				 " from the launch list as it has prerequistes that are FAIL")
+		    (let ((test-id (rmt:get-test-id run-id hed "")))
+		      (if test-id (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_FAIL" "Failed to run due to failed prerequisites")))
+		    (runs:shrink-can-run-more-tests-count runsdat) ;; DELAY TWEAKER (still needed?)
+		    ;; (thread-sleep! *global-delta*)
+		    ;; This next is for the items
+		    (mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "BLOCKED" #f)
+		    (hash-table-set! test-registry (db:test-make-full-name test-name item-path) 'removed)
+		    (list (runs:queue-next-hed tal reg reglen regfull)
+			  (runs:queue-next-tal tal reg reglen regfull)
+			  (runs:queue-next-reg tal reg reglen regfull)
+			  reruns ;; WAS: (cons hed reruns) ;; but that makes no sense?
+			  ))
+		  (let ((nth-try (hash-table-ref/default test-registry hed 0)))
+		    (cond
+		     ((member "RUNNING" (map db:test-get-state prereqs-not-met))
+		      (if (runs:lownoise (conc "possible RUNNING prerequistes " hed) 60)
+			  (debug:print 0 *default-log-port* "WARNING: test " hed " has possible RUNNING prerequisites, don't give up on it yet."))
+		      (thread-sleep! 4)
+		      (list (runs:queue-next-hed newtal reg reglen regfull)
+			    (runs:queue-next-tal newtal reg reglen regfull)
+			    (runs:queue-next-reg newtal reg reglen regfull)
+			    reruns))
+		     ((or (not nth-try)
+			  (and (number? nth-try)
+			       (< nth-try 10)))
+		      (hash-table-set! test-registry hed (if (number? nth-try)
+							     (+ nth-try 1)
+							     0))
+		      (if (runs:lownoise (conc "not removing test " hed) 60)
+			  (debug:print 1 *default-log-port* "WARNING: not removing test " hed " from queue although it may not be runnable due to FAILED prerequisites"))
+		      ;; may not have processed correctly. Could be a race condition in your test implementation? Dropping test " hed) ;;  " as it has prerequistes that are FAIL. (NOTE: hed is not a vector)")
+		      (runs:shrink-can-run-more-tests-count runsdat) ;; DELAY TWEAKER (still needed?)
+		      ;; (list hed tal reg reruns)
+		      ;; (list (car newtal)(cdr newtal) reg reruns)
+		      ;; (hash-table-set! test-registry hed 'removed)
+		      (list (runs:queue-next-hed newtal reg reglen regfull)
+			    (runs:queue-next-tal newtal reg reglen regfull)
+			    (runs:queue-next-reg newtal reg reglen regfull)
+			    reruns))
+		     ((symbol? nth-try)
+		      (if (eq? nth-try 'removed) ;; removed is removed - drop it NOW
+			  (if (null? tal)
+			      #f ;; yes, really
+			      (list (car tal)(cdr tal) reg reruns))
+			  (begin
+			    (if (runs:lownoise (conc "FAILED prerequisites or other issue" hed) 60)
+				(debug:print 0 *default-log-port* "WARNING: test " hed " has FAILED prerequisites or other issue. Internal state " nth-try " will be overridden and we'll retry."))
+			    (mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "KEEP_TRYING" #f)
+			    (hash-table-set! test-registry hed 0)
+			    (list (runs:queue-next-hed newtal reg reglen regfull)
+				  (runs:queue-next-tal newtal reg reglen regfull)
+				  (runs:queue-next-reg newtal reg reglen regfull)
+				  reruns))))
+		     (else
+		      (if (runs:lownoise (conc "FAILED prerequitests and we tried" hed) 60)
+			  (debug:print 0 *default-log-port* "WARNING: test " hed " has FAILED prerequitests and we've tried at least 10 times to run it. Giving up now."))
+		      ;; (debug:print 0 *default-log-port* "         prereqs: " prereqs-not-met)
+		      (hash-table-set! test-registry hed 'removed)
+		      (mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "TEN_STRIKES" #f)
+		      ;; I'm unclear on if this roll up is needed - it may be the root cause of the "all set to FAIL" bug.
+		      (rmt:set-state-status-and-roll-up-items run-id test-name item-path #f "FAIL" #f) ;; treat as FAIL
+		      (list (if (null? tal)(car newtal)(car tal))
+			    tal
+			    reg
+			    reruns)))))
+	      ;; can't drop this - maybe running? Just keep trying
+	      (let ((runable-tests (runs:runable-tests prereqs-not-met)))
+		(if (null? runable-tests)
+		    #f   ;; I think we are truly done here
+		    (list (runs:queue-next-hed newtal reg reglen regfull)
+			    (runs:queue-next-tal newtal reg reglen regfull)
+			    (runs:queue-next-reg newtal reg reglen regfull)
+			    reruns)))))))))

Index: runs.scm
==================================================================
--- runs.scm
+++ runs.scm
@@ -840,12 +840,13 @@
 					    (not (equal? x hed)))
 					  (runs:calc-not-completed prereqs-not-met)))
 	 (loop-list               (list hed tal reg reruns))
 	 ;; configure the load runner
 	 (numcpus                 (common:get-num-cpus #f))
-	 (maxload                 (string->number (or (configf:lookup *configdat* "jobtools" "maxload") "3")))
-	 (waitdelay               (string->number (or (configf:lookup *configdat* "jobtools" "waitdelay") "60"))))
+	 (maxload                 (string->number (or (configf:lookup *configdat* "jobtools" "maxload") "3.0")))         ;; use a non-number string to disable
+         (maxhomehostload         (string->number (or (configf:lookup *configdat* "jobtools" "maxhomehostload") "1.2"))) ;; use a non-number string to disable
+         (waitdelay               (string->number (or (configf:lookup *configdat* "jobtools" "waitdelay") "60"))))
     (debug:print-info 4 *default-log-port* "have-resources: " have-resources " prereqs-not-met: (" 
 		      (string-intersperse 
 		       (map (lambda (t)
 			      (if (vector? t)
 				  (conc (db:test-get-state t) "/" (db:test-get-status t))
@@ -945,12 +946,15 @@
       ;; we are going to reset all the counters for test retries by setting a new hash table
       ;; this means they will increment only when nothing can be run
       (set! *max-tries-hash* (make-hash-table))
       ;; well, first lets see if cpu load throttling is enabled. If so wait around until the
       ;; average cpu load is under the threshold before continuing
-      (if (configf:lookup *configdat* "jobtools" "maxload") ;; only gate if maxload is specified
-	  (common:wait-for-cpuload maxload numcpus waitdelay))
+      (if maxload ;; only gate if maxload is specified
+          (common:wait-for-cpuload maxload numcpus waitdelay))
+      (if maxhomehostload
+          (common:wait-for-homehost-load maxhomehostload (conc "Waiting for homehost load to drop below normalized value of " maxhomehostload)))
+      
       (run:test run-id run-info keyvals runname test-record flags #f test-registry all-tests-registry)
       (runs:incremental-print-results run-id)
       (hash-table-set! test-registry (db:test-make-full-name test-name item-path) 'running)
       (runs:shrink-can-run-more-tests-count runsdat)  ;; DELAY TWEAKER (still needed?)
       ;; (thread-sleep! *global-delta*)