@@ -462,10 +462,12 @@
 	      (debug:print-info 0 *default-log-port* "post-hook \"" run-post-hook "\" took " (- (current-seconds) start-time) " seconds to run."))))))
 
 ;; return #t when all items in waitors-upon list are represented in test-patt, #f otherwise.
 (define (runs:testpatts-mention-waitors-upon? test-patt waitors-upon)
   (null? (tests:filter-test-names-not-matched waitors-upon test-patt)))
+
+(define *find-and-mark-incomplete-last-run* (make-hash-table))
 
 ;;======================================================================
 ;; runs:run-tests is called from megatest.scm and itself
 ;;======================================================================
 ;;
@@ -772,13 +774,18 @@
 					    ;; (rmt:find-and-mark-incomplete-all-runs))))) CAN'T INTERRUPT IT ...
 					    (let ((run-ids (rmt:get-all-run-ids)))
 					      (for-each (lambda (run-id)
 							  (if keep-going
 							      (handle-exceptions
-							       exn
-							       (debug:print 0 *default-log-port* "error in calling find-and-mark-incomplete for run-id " run-id ", exn=" exn)
-							       (rmt:find-and-mark-incomplete run-id #f)))) ;; ovr-deadtime))) ;; could be root of https://hsdes.intel.com/appstore/article/#/220546828/main -- Title: Megatest jobs show DEAD even though they are still running (1.64/27)
+								  exn
+								  (debug:print 0 *default-log-port* "error in calling find-and-mark-incomplete for run-id " run-id ", exn=" exn)
+								;; lets run this only if a run has been NOT seen for more than 900 seconds
+								(if (> (- (current-seconds)(hash-table-ref/default *find-and-mark-incomplete-last-run* run-id 0)) 900)
+								    (begin
+								      (rmt:find-and-mark-incomplete run-id #f)
+								      (hash-table-set! *find-and-mark-incomplete-last-run* run-id (current-seconds)))
+								    )))) ;; ovr-deadtime))) ;; could be root of https://hsdes.intel.com/appstore/article/#/220546828/main -- Title: Megatest jobs show DEAD even though they are still running (1.64/27)
 							run-ids)))
 					  "runs: mark-incompletes")))
 	    ;; (thread-start! th1)
 	    (thread-start! th2)
 	    ;; (thread-join! th1)
@@ -1870,16 +1877,18 @@
 		   (equal? (configf:lookup *configdat* "setup" "run-wait") "yes"))
 	       (> num-running 0))
 	  (begin
 	    ;; Here we mark any old defunct tests as incomplete. Do this every fifteen minutes
 	    ;; (debug:print 0 *default-log-port* "Got here eh! num-running=" num-running " (> num-running 0) " (> num-running 0))
-	    (if (> (current-seconds)(+ last-time-incomplete 900))
+	    (if (> (- (current-seconds)(hash-table-ref/default *find-and-mark-incomplete-last-run* run-id 0)) 900)
+		;; (begin(if (> (current-seconds)(+ last-time-incomplete 900))
 		(let ((actual-num-running (rmt:get-count-tests-running-for-run-id run-id)))
 		  (debug:print-info 0 *default-log-port* "Marking stuck tests as INCOMPLETE while waiting for run " run-id
 				    ". Running as pid " (current-process-id) " on " (get-host-name))
-		  (set! last-time-incomplete (current-seconds)) ;; FIXME, this might be causing slow down - use of set!
+		  ;; (set! last-time-incomplete (current-seconds)) ;; FIXME, this might be causing slow down - use of set!
 		  (rmt:find-and-mark-incomplete run-id #f)
+		  (hash-table-set! *find-and-mark-complete-last-run* run-id (current-seconds))
 		  (debug:print-info 0 *default-log-port* "run-wait specified, waiting on " actual-num-running
 				    " tests in RUNNING, REMOTEHOSTSTART or LAUNCHED state at "
 				    (time->string (seconds->local-time (current-seconds))))))
 	    ;; (if (runs:dat-load-mgmt-function runsdat)((runs:dat-load-mgmt-function runsdat)))
 	    (thread-sleep! 5) ;; (if (>= num-running max-concurrent-jobs) 5 1))