@@ -89,11 +89,11 @@ lock-files))) (if fresh-locks (begin (if (runs:lownoise "runners-softlock-wait" 360) (debug:print-info 0 *default-log-port* "Other runners in flight, giving up some time...")) - (thread-sleep! 10)) + (thread-sleep! 2)) (begin (if (runs:lownoise "runners-softlock-nowait" 360) (debug:print-info 0 *default-log-port* "No runners in flight, updating softlock")) (let* ((ouf (open-output-file my-lock-file))) (with-output-to-port ouf @@ -313,21 +313,21 @@ ;; Take advantage of a good place to exit if running the one-pass methodology (if (and (> (runs:dat-can-run-more-tests-count runsdat) 20) (args:get-arg "-one-pass")) (exit 0)) - (thread-sleep! (cond ;; BB: check with Matt. Should this sleep move - ;; to cond clauses below where we determine we - ;; have too many jobs running rather than each - ;; time the and condition above is true (which - ;; seems like always)? - ((< (- (current-seconds)(runs:dat-beginning-of-time runsdat)) 30) ;; for the first 30 seconds do not throttle in any way - 0) - ((> (runs:dat-can-run-more-tests-count runsdat) 20) ;; original intent was - save cycles, wait a long time - (if (runs:lownoise "waiting on tasks" 60)(debug:print-info 2 *default-log-port* "waiting for tasks to complete, sleeping briefly ...")) - 10) ;; obviously haven't had any work to do for a while - (else 0))) +;; (thread-sleep! (cond ;; BB: check with Matt. Should this sleep move +;; ;; to cond clauses below where we determine we +;; ;; have too many jobs running rather than each +;; ;; time the and condition above is true (which +;; ;; seems like always)? +;; ((< (- (current-seconds)(runs:dat-beginning-of-time runsdat)) 30) ;; for the first 30 seconds do not throttle in any way +;; 0) +;; ((> (runs:dat-can-run-more-tests-count runsdat) 20) ;; original intent was - save cycles, wait a long time +;; (if (runs:lownoise "waiting on tasks" 60)(debug:print-info 2 *default-log-port* "waiting for tasks to complete, sleeping briefly ...")) +;; 10) ;; obviously haven't had any work to do for a while +;; (else 0))) ;; ;; if have a number for inter-test-delay, use it, else don't delay much, maybe even zero? ;; (configf:lookup-number *configdat* "setup" "inter-test-delay" default: 0.01) ;; ))) (let* ((num-running (rmt:get-count-tests-running run-id #f)) ;; fastmode=no @@ -993,11 +993,11 @@ (if (runs:can-keep-running? hed 20) (begin (runs:inc-cant-run-tests hed) (debug:print-info 0 *default-log-port* "no fails in prerequisites for " hed " but also none running, keeping " hed " for now. Try count: " (hash-table-ref/default *seen-cant-run-tests* hed 0)) ;; ;; getting here likely means the system is way overloaded, kill a full minute before continuing - (thread-sleep! 60) ;; TODO: gate by normalized server load > 1.0 (maxload config thing) + (thread-sleep! 5) ;; TODO: gate by normalized server load > 1.0 (maxload config thing) ;; num-retries code was here ;; we use this opportunity to move contents of reg to tal (list (car newtal)(append (cdr newtal) reg) '() reruns)) ;; an issue with prereqs not yet met? (begin (debug:print-info 1 *default-log-port* "no fails in prerequisites for " hed " but nothing seen running in a while, dropping test " hed " from the run queue") @@ -1807,11 +1807,11 @@ (set! last-time-incomplete (current-seconds)) ;; FIXME, this might be causing slow down - use of set! (rmt:find-and-mark-incomplete run-id #f) (debug:print-info 0 *default-log-port* "run-wait specified, waiting on " actual-num-running " tests in RUNNING, REMOTEHOSTSTART or LAUNCHED state at " (time->string (seconds->local-time (current-seconds)))))) - (thread-sleep! 5) + (thread-sleep! 1) ;; (if (>= num-running max-concurrent-jobs) 5 1)) (wait-loop (rmt:get-count-tests-running-for-run-id run-id #t) ;; fastmode=yes num-running)))) ;; LET* ((test-record ;; we get here on "drop through". All done! ;; this is moved to runs:run-testes since this function is getting called twice to ensure everthing is completed. @@ -2535,11 +2535,11 @@ (if (not (null? tal)) (loop (car tal)(cdr tal)))) ((run-wait) ;; BB TODO - manage has-subrun case (debug:print-info 2 *default-log-port* "still waiting, " (length tests) " tests still running") - (thread-sleep! 10) + (thread-sleep! 5) (let ((new-tests (proc-get-tests run-id))) (if (null? new-tests) (debug:print-info 1 *default-log-port* "Run completed according to zero tests matching provided criteria.") (loop (car new-tests)(cdr new-tests))))) ((archive)