Index: common.scm
==================================================================
--- common.scm
+++ common.scm
@@ -570,11 +570,11 @@
 	       (let* ((fullname (conc "logs/" file)))
 		 (if (directory? fullname)
 		     (debug:print-info 0 *default-log-port* fullname " in logs directory is a directory! Cannot rotate it, it is best to not put subdirectories in the logs dir.")
 		     (handle-exceptions
 		      exn
-		      (debug:print-error 0 *default-log-port* "failed to remove " fullname ", exn=" exn)
+		      (debug:print-info 0 *default-log-port* "failed to remove " fullname ", exn=" exn)
 		      (delete-file* fullname)))))
 	     files)
 	    (debug:print-info 0 *default-log-port* "Deleted " (length files) " files from logs, keeping " max-allowed " files."))))))
   
 ;; Force a megatest cleanup-db if version is changed and skip-version-check not specified
@@ -1315,25 +1315,32 @@
       (debug:print-info 0 *default-log-port* "using testpatt " args-testpatt " rtestpatt:" rtestpatt)
       args-testpatt))))
 
 
 
-(define (common:false-on-exception thunk #!key (message #f))
-  (handle-exceptions exn
+;; 
+(define (common:false-on-exception thunk #!key (message #f)(tries 1))
+  (handle-exceptions
+      exn
                      (begin
                        (if message
-                           (debug:print-info 0 *default-log-port* message))
-                       #f) (thunk) ))
-
-(define (common:file-exists? path-string #!key (silent #f))
-  ;; this avoids stack dumps in the case where 
-
-  ;;;; TODO: catch permission denied exceptions and emit appropriate warnings, eg:  system error while trying to access file: "/nfs/pdx/disks/icf_env_disk001/bjbarcla/gwa/issues/mtdev/randy-slow/reproduce/q...
-  (common:false-on-exception (lambda () (file-exists? path-string))
+          (debug:print-info 0 *default-log-port* message " exn=" exn))
+      (if (> tries 1)
+	  (begin
+	    (thread-sleep! 1)
+	    (common:false-on-exception thunk message: message tries: (- tries 1)))
+	  #f))
+    (thunk)))
+
+(define (common:file-exists? path-string #!key (silent #f)(tries 1))
+  ;; this avoids stack dumps in the case where NFS is slow or flakey
+  (common:false-on-exception
+   (lambda ()(file-exists? path-string))
                              message: (if (not silent)
                                           (conc "Unable to access path: " path-string)
                                           #f)
+   tries: tries
                              ))
 
 (define (common:directory-exists? path-string)
   ;;;; TODO: catch permission denied exceptions and emit appropriate warnings, eg:  system error while trying to access file: "/nfs/pdx/disks/icf_env_disk001/bjbarcla/gwa/issues/mtdev/randy-slow/reproduce/q...
   (common:false-on-exception (lambda () (directory-exists? path-string))

Index: runs.scm
==================================================================
--- runs.scm
+++ runs.scm
@@ -62,11 +62,11 @@
 (defstruct runs:testdat
   hed tal reg reruns  test-record
   test-name item-path jobgroup
   waitons testmode  newtal
   itemmaps
-  (prereqs-not-met '())
+  (prereqs-not-met #f)
   (last-update 0) ;; 
   )
   
 ;; look in the $MT_RUN_AREA_HOME/.softlocks directory for key-host-pid.softlock files
 ;;  - remove any that are over 3600 seconds old
@@ -833,49 +833,41 @@
 ;; tal - list of never visited tests
 ;;   prefer next hed to be from reg than tal.
 
 (define runs:nothing-left-in-queue-count 0)
 
-(define (runs:lazy-get-prereqs-not-met  testdat run-id waitons hed item-path #!key (mode '(normal))(itemmaps #f)) ;; mode: testmode itemmaps: itemmaps)
+;; cache the result of get-prereqs-not-met and don't call it if called in past 10 seconds
+;; NOTE: This is assuming that testdat is highly specific to this test
+;;
+(define (runs:lazy-get-prereqs-not-met  testdat run-id waitons hed item-path #!key (mode '(normal))(itemmaps #f))
+  ;; mode: testmode itemmaps: itemmaps)
   (if (and (runs:testdat-prereqs-not-met testdat)
-	   (< (- (current-seconds) (runs:testdat-last-update testdat)) 10)) ;; only refresh for this test if it has been at least 10 seconds
-      (runs:testdat-prereqs-not-met testdat)
-      (let* ((res (let ((res (rmt:get-prereqs-not-met run-id waitons hed item-path mode: mode itemmaps: itemmaps)))
-		    (if (list? res)
-			res
-			(begin
-			  (debug:print 0 *default-log-port*
-				       "ERROR: rmt:get-prereqs-not-met returned non-list!\n"
-				       "  res=" res " run-id=" run-id " waitons=" waitons " hed=" hed " item-path=" item-path " testmode=" mode " itemmaps=" itemmaps)
-			  '())))))
+	   (< (- (current-seconds) (runs:testdat-last-update testdat)) 10)) ;;; only refresh for this test if
+					                                    ;;; it has been at least 10 seconds
+      (runs:testdat-prereqs-not-met testdat)  ;; return the cached result
+      (let* ((res (rmt:get-prereqs-not-met run-id waitons hed item-path mode: mode itemmaps: itemmaps)))
 	(runs:testdat-prereqs-not-met-set! testdat res)
 	(runs:testdat-last-update-set! testdat (current-seconds))
 	res)))
-	   
+
 ;;======================================================================
 ;; runs:expand-items is called by runs:run-tests-queue
 ;;======================================================================
 ;;
 ;; return value of runs:expand-items is passed back to runs-tests-queue and is fed to named loop with this signature:
 ;;    (let loop ((hed         (car sorted-test-names))
 ;;	         (tal         (cdr sorted-test-names))
 ;;	         (reg         '()) ;; registered, put these at the head of tal 
 ;;	         (reruns      '()))
-(define (runs:expand-items hed tal reg reruns regfull newtal jobgroup max-concurrent-jobs run-id waitons item-path testmode test-record
-			   can-run-more items runname tconfig reglen test-registry test-records itemmaps testdat)
+(define (runs:expand-items hed tal reg reruns regfull newtal jobgroup max-concurrent-jobs
+			   run-id waitons item-path testmode test-record
+			   can-run-more items runname tconfig reglen test-registry
+			   test-records itemmaps testdat)
   (let* ((loop-list       (list hed tal reg reruns))
-	 (prereqs-not-met (runs:lazy-get-prereqs-not-met testdat run-id waitons hed item-path mode: testmode itemmaps: itemmaps))
-	  #;(let ((res (rmt:get-prereqs-not-met run-id waitons hed item-path mode: testmode itemmaps: itemmaps)))
-			    (if (list? res)
-				res
-				(begin
-				  (debug:print 0 *default-log-port*
-					       "ERROR: rmt:get-prereqs-not-met returned non-list!\n"
-					       "  res=" res " run-id=" run-id " waitons=" waitons " hed=" hed " item-path=" item-path " testmode=" testmode " itemmaps=" itemmaps)
-				  '())))
+	 (prereqs-not-met (runs:lazy-get-prereqs-not-met testdat run-id waitons hed item-path
+							 mode: testmode itemmaps: itemmaps))
 	 (have-itemized   (not (null? (lset-intersection eq? testmode '(itemmatch itemwait)))))
-	 ;; (prereqs-not-met (mt:lazy-get-prereqs-not-met run-id waitons item-path mode: testmode itemmap: itemmap))
 	 (fails           (runs:calc-fails prereqs-not-met))
 	 (prereq-fails    (runs:calc-prereq-fail prereqs-not-met))
 	 (non-completed   (runs:calc-not-completed prereqs-not-met))
 	 (runnables       (runs:calc-runnable prereqs-not-met))
          (unexpanded-prereqs
@@ -1811,13 +1803,13 @@
           (debug:print-info 4 *default-log-port* "cond branch - "  "rtq-4")
 	  (let ((can-run-more    #f)) ;; (runs:can-run-more-tests runsdat run-id jobgroup max-concurrent-jobs)))
 	    (if (not can-run-more) #;(and (list? can-run-more) ;; IDEA, this mechanism may have had some value, make it configurable to test pros/cons TODO
 		(car can-run-more))
 		(let ((loop-list (runs:expand-items hed tal reg reruns regfull newtal jobgroup max-concurrent-jobs
-						    max-concurrent-jobs run-id waitons item-path
-						    testmode test-record can-run-more items runname
-						    tconfig reglen test-registry test-records itemmaps)))
+						    run-id waitons item-path testmode test-record
+						    can-run-more items runname tconfig reglen test-registry
+						    test-records itemmaps testdat)))
 		  (if loop-list
 		      (apply loop loop-list)
                       (debug:print-info 4 *default-log-port* " -- Can't expand hed="hed)))
 		;; if can't run more just loop with next possible test
 		(loop (car newtal)(cdr newtal) reg reruns))))