Index: runs.scm ================================================================== --- runs.scm +++ runs.scm @@ -48,40 +48,18 @@ test-patts required-tests test-registry registry-mutex flags keyvals run-info all-tests-registry can-run-more-tests ((can-run-more-tests-count 0) : fixnum) (last-fuel-check 0) ;; time when we last checked fuel -;; (last-runners-count #f) ;; -;; (runner-registered #f) ;; have I registered myself? -;; (run-skip-count 0) ;; how many times have I skipped running sequentially - (runners-mgmt-mode 'rest-mode) + (beginning-of-time (current-seconds)) ) (defstruct runs:testdat hed tal reg reruns test-record test-name item-path jobgroup waitons testmode newtal itemmaps prereqs-not-met) -;; Fourth try, do accounting through time -;; -(define (runs:parallel-runners-mgmt-4 rdat) - (let ((time-to-check 10) ;; 28 - (time-to-wait 12) - (now-time (current-seconds))) - (if (> (- now-time (runs:dat-last-fuel-check rdat)) time-to-check) ;; time to check - (let* ((fuel-used (or (rmt:get-var "runners-fuel") now-time))) - ;; initialize and sanitize values if needed - (if (> fuel-used (+ now-time 1)) ;; are we over-drawn? If so, kill time, do not add time to fuel used - (begin ;; gonna rest - (debug:print-info 0 *default-log-port* "Runner load high, taking a break.") - (thread-sleep! time-to-wait) - (runs:dat-last-fuel-check-set! rdat (current-seconds)) ;; work done from here (i.e. seconds of "fuel") will be added to fuel-used - ) - (begin ;; no fuel deficit, back to work - (rmt:set-var "runners-fuel" (+ now-time time-to-check)) - )))))) - ;; look in the $MT_RUN_AREA_HOME/.softlocks directory for key-host-pid.softlock files ;; - remove any that are over 3600 seconds old ;; - if there are any that are younger than 10 seconds ;; * sleep 10 seconds ;; * touch my key-host-pid.softlock file @@ -125,11 +103,11 @@ ;; Fourth try, do accounting through time ;; (define (runs:parallel-runners-mgmt rdat) (let ((time-to-check 10) ;; 28 - (time-to-wait 12) + (time-to-wait 30) (now-time (current-seconds))) (if (> (- now-time (runs:dat-last-fuel-check rdat)) time-to-check) ;; time to check (runs:wait-on-softlock rdat "runners")))) ;; To test parallel-runners management start a repl: @@ -154,181 +132,10 @@ ((or proc runs:parallel-runners-mgmt) rdat) (loop))))) (let* ((done-time (current-seconds))) (debug:print-info 0 *default-log-port* "DONE: rtime=" rtime ", elapsed time=" (- done-time startt) ", ratio=" (/ rtime (- done-time startt)))))) - -;; ;; Third try, use a running average -;; ;; -;; ;; ADD A COUNT OF TIMES CYCLED THROUGH REST MODE -;; ;; -;; ;; runners-mgmt-mode -;; ;; -;; (define (runs:parallel-runners-mgmt-3 rdat) -;; (let ((time-to-check 2.8) ;; 28 -;; (time-to-wait 3.0)) -;; (if (> (- (current-seconds) (runs:dat-last-runners-check rdat)) time-to-check) ;; time to check -;; (let* ((skip-count (runs:dat-run-skip-count rdat)) -;; (mgmt-mode (runs:dat-runners-mgmt-mode rdat)) ;; -;; (num-registered (rmt:get-var "num-runners")) -;; (last-runners-count (if (runs:dat-last-runners-count rdat) -;; (runs:dat-last-runners-count rdat) -;; (or num-registered 1))) -;; (last-runners-ravg (/ (+ last-runners-count num-registered) 2)#;(if (> num-registered last-runners-count) -;; (/ (+ last-runners-count num-registered) 2) -;; (/ (+ (* num-registered 4) last-runners-count) 5) ;; slow on down -;; )) ;; running average -;; ) -;; ;; initialize and sanitize values if needed -;; (cond -;; ((not num-registered) ;; first in, initialize to 1 -;; (debug:print-info 0 *default-log-port* " adjusting num-runners up to 1, currently it is not defined") -;; (rmt:set-var "num-runners" 1)) -;; ((< num-registered 1) ;; this should not be, reset to 1 to make it less confusing -;; (debug:print-info 0 *default-log-port* " adjusting num-runners up to 1, currently it is " num-registered) -;; (rmt:set-var "num-runners" 1))) -;; (if (not (member mgmt-mode '(rest-mode work-mode))) -;; (begin -;; (debug:print-info 0 *default-log-port* " setting mgmt-mode to work-mode, currently it is " mgmt-mode) -;; (rmt:inc-var "num-runners") -;; (set! last-runners-ravg (+ last-runners-ravg 1)) -;; (runs:dat-runners-mgmt-mode-set! rdat 'rest-mode))) -;; -;; (runs:dat-last-runners-count-set! rdat last-runners-ravg) -;; ;; to rest or not rest? -;; (if (and (< skip-count 5) -;; (> num-registered last-runners-count)) ;;(+ last-runners-ravg 0.5))) ;; there seem to be other runners out there -;; (begin ;; gonna rest -;; (debug:print-info 0 *default-log-port* "Too many running, num-registered=" num-registered ", ravg=" last-runners-ravg -;; ", real num runners=" (rmt:get-var "num-runners") ", skip-count=" skip-count) -;; (if (eq? mgmt-mode 'work-mode) -;; (rmt:dec-var "num-runners")) -;; (runs:dat-runners-mgmt-mode-set! rdat 'rest-mode) -;; (runs:dat-run-skip-count-set! rdat (+ (runs:dat-run-skip-count rdat) 1)) -;; (thread-sleep! time-to-wait) -;; (runs:parallel-runners-mgmt-3 rdat) -;; ) -;; (begin -;; (runs:dat-run-skip-count-set! rdat 0) -;; (if (eq? mgmt-mode 'rest-mode) -;; (rmt:inc-var "num-runners")) ;; going into work mode if not already in work mode -;; (runs:dat-runners-mgmt-mode-set! rdat 'work-mode) -;; (debug:print-info 0 *default-log-port* "All good, keep running, num-registered=" -;; num-registered ", ravg=" last-runners-ravg ", mode=" mgmt-mode -;; ", skip-count=" skip-count)) -;; ))))) - -;; (define (runs:print-parallel-runners-state state num-registered last-registered skip-count) -;; (debug:print-info 0 *default-log-port* "runs:parallel-runners-mgmt, state=" state -;; ", num-registered=" num-registered ", last-registered=" last-registered -;; ", skip-count=" skip-count)) -;; -;; (define (runs:print-parallel-runners-state2 state num-registered last-runners-count skip-count) -;; (debug:print-info 0 *default-log-port* "runs:parallel-runners-mgmt, state=" state -;; ", num-registered=" num-registered ", last-runners-count=" last-runners-count -;; ", skip-count=" skip-count)) -;; -;; ;; Second try -;; ;; -;; (define (runs:parallel-runners-mgmt-2 rdat) -;; (let ((time-to-check 2.8) ;; 28 -;; (time-to-wait 3.0)) -;; (if (> (- (current-seconds) (runs:dat-last-runners-check rdat)) time-to-check) ;; time to check -;; (let* ((num-registered (or (rmt:get-var "num-runners") 0)) -;; (last-runners-count (runs:dat-last-runners-count rdat)) -;; (skip-count (runs:dat-run-skip-count rdat))) -;; (cond -;; ;; first time in -;; ((not last-runners-count) -;; (runs:print-parallel-runners-state2 "A" num-registered last-runners-count skip-count) -;; (if (eq? num-registered 0) -;; (rmt:set-var "num-runners" 1) -;; (rmt:inc-var "num-runners")) -;; (runs:dat-last-runners-count-set! rdat num-registered) -;; (runs:dat-run-skip-count-set! rdat 0)) -;; ;; too many waits, decrement num-runners, reset last-runners and continue on -;; ((> (runs:dat-run-skip-count rdat) 3) -;; (runs:print-parallel-runners-state2 "B" num-registered last-runners-count skip-count) -;; (rmt:dec-var "num-runners") -;; (runs:dat-run-skip-count-set! rdat 0) -;; (runs:dat-last-runners-count-set! rdat num-registered)) -;; ;; too many running, take a break -;; ((> num-registered last-runners-count) ;; (+ last-runners-count 1)) -;; (runs:print-parallel-runners-state2 "C" num-registered last-runners-count skip-count) -;; (rmt:dec-var "num-runners") -;; (debug:print-info 0 *default-log-port* -;; "Too many running (" num-registered -;; "), last-count=" last-runners-count " waiting " time-to-wait " seconds ... ") -;; (thread-sleep! time-to-wait) -;; (runs:dat-run-skip-count-set! rdat (+ (runs:dat-run-skip-count rdat) 1)) -;; ;; adjust down last-runners-count -;; (if (< num-registered last-runners-count) -;; (runs:dat-last-runners-set! rdat num-running)) -;; (rmt:inc-var "num-runners") -;; ) -;; ;; we have been in waiting mode, do not increment again as we already did that -;; ((> skip-count 0) -;; (runs:print-parallel-runners-state2 "D" num-registered last-runners-count skip-count) -;; (runs:dat-run-skip-count-set! rdat 0) -;; ;; (runs:dat-last-runners-count-set! rdat num-registered) -;; ) -;; ;; skip count is zero, not too many running, this is transition into running -;; (else -;; (runs:print-parallel-runners-state2 "E" num-registered last-runners-count skip-count) -;; ;; (rmt:inc-var "num-runners") -;; #;(runs:dat-run-skip-count-set! rdat 0))))))) - -;; (define (runs:parallel-runners-mgmt rdat) -;; (let ((time-to-check 2.8) ;; 28 -;; (time-to-wait 3.0)) -;; (if (> (- (current-seconds) (runs:dat-last-runners-check rdat)) time-to-check) ;; time to check -;; (let* ((num-registered (or (rmt:get-var "num-runners") 0)) -;; (last-registered (or (rmt:get-var "runner-change-time") 0)) -;; (skip-count (runs:dat-run-skip-count rdat))) -;; (cond -;; ;; consider this the beginning of time -;; ((eq? num-registered 0) -;; (runs:print-parallel-runners-state "A" num-registered last-registered skip-count) -;; (rmt:set-var "num-runners" 1) ;; potential bug - not ACID -;; (rmt:set-var "runner-change-time" (current-seconds)) -;; (runs:dat-last-runners-check-set! rdat (current-seconds)) -;; (runs:dat-runner-registered-set! rdat #t) -;; (runs:dat-run-skip-count-set! rdat 0)) -;; ;; have headroom to run another -;; ((< num-registered 3) -;; (runs:print-parallel-runners-state "B" num-registered last-registered skip-count) -;; (rmt:inc-var "num-runners") -;; (rmt:set-var "runner-change-time" (current-seconds)) -;; (runs:dat-last-runners-check-set! rdat (current-seconds)) -;; (runs:dat-run-skip-count-set! rdat 0)) -;; ;; we've waited too many rounds, gonna force a round -;; ((> (runs:dat-run-skip-count rdat) 3) -;; (runs:print-parallel-runners-state "C" num-registered last-registered skip-count) -;; (rmt:set-var "num-runners" 1) -;; ;; (rmt:set-var "runner-change-time" (current-seconds)) -;; (runs:dat-last-runners-check-set! rdat (current-seconds)) -;; (runs:dat-run-skip-count-set! rdat 0)) -;; ;; have too many runners working, but this is the first time to wait since doing some work -;; ((eq? (runs:dat-run-skip-count rdat) 0) ;; and num-registered is >= 3 -;; (runs:print-parallel-runners-state "D" num-registered last-registered skip-count) -;; (if (not (eq? (runs:dat-last-runners-check rdat) 0)) ;; do not decrement if we've never incremented -;; (begin -;; (rmt:dec-var "num-runners") -;; #;(rmt:set-var "runner-change-time" (current-seconds)))) -;; (runs:dat-last-runners-check-set! rdat (current-seconds)) -;; (runs:dat-run-skip-count-set! rdat (+ (runs:dat-run-skip-count rdat) 1)) -;; (debug:print-info 0 *default-log-port* "Too many runners working (" num-registered -;; "). Resting for 30 seconds.") -;; (thread-sleep! time-to-wait) -;; (runs:parallel-runners-mgmt rdat)) -;; ;; ok, keep waiting -;; (else -;; (runs:print-parallel-runners-state "E" num-registered last-registered skip-count) -;; (thread-sleep! time-to-wait) -;; (runs:dat-run-skip-count-set! rdat (+ (runs:dat-run-skip-count rdat) 1)) -;; (runs:parallel-runners-mgmt rdat))))))) - (define (runs:get-mt-env-alist run-id runname target testname itempath) ;;(bb-check-path msg: "runs:set-megatest-env-vars entry") `(("MT_TEST_NAME" . ,testname) @@ -359,15 +166,10 @@ ,@(map (lambda (var) (let ((val (configf:lookup *configdat* "env-override" var))) (cons var val))) (configf:section-vars *configdat* "env-override")))) - - - - - ;; set up needed environment variables given a run-id and optionally a target, itempath etc. ;; (define (runs:set-megatest-env-vars run-id #!key (inkeys #f)(inrunname #f)(inkeyvals #f)(intarget #f)(testname #f)(itempath #f)) ;;(bb-check-path msg: "runs:set-megatest-env-vars entry")