@@ -47,142 +47,51 @@ runname max-concurrent-jobs run-id test-patts required-tests test-registry registry-mutex flags keyvals run-info all-tests-registry can-run-more-tests ((can-run-more-tests-count 0) : fixnum) - (last-runners-check 0) ;; time when we last checked number of runners - (last-runners-count #f) ;; - (runner-registered #f) ;; have I registered myself? - (run-skip-count 0) ;; how many times have I skipped running sequentially + (last-fuel-check 0) ;; time when we last checked fuel +;; (last-runners-count #f) ;; +;; (runner-registered #f) ;; have I registered myself? +;; (run-skip-count 0) ;; how many times have I skipped running sequentially (runners-mgmt-mode 'rest-mode) ) (defstruct runs:testdat hed tal reg reruns test-record test-name item-path jobgroup waitons testmode newtal itemmaps prereqs-not-met) - -(define (runs:print-parallel-runners-state state num-registered last-registered skip-count) - (debug:print-info 0 *default-log-port* "runs:parallel-runners-mgmt, state=" state - ", num-registered=" num-registered ", last-registered=" last-registered - ", skip-count=" skip-count)) - -(define (runs:print-parallel-runners-state2 state num-registered last-runners-count skip-count) - (debug:print-info 0 *default-log-port* "runs:parallel-runners-mgmt, state=" state - ", num-registered=" num-registered ", last-runners-count=" last-runners-count - ", skip-count=" skip-count)) - -;; Second try -;; -(define (runs:parallel-runners-mgmt-2 rdat) - (let ((time-to-check 2.8) ;; 28 - (time-to-wait 3.0)) - (if (> (- (current-seconds) (runs:dat-last-runners-check rdat)) time-to-check) ;; time to check - (let* ((num-registered (or (rmt:get-var "num-runners") 0)) - (last-runners-count (runs:dat-last-runners-count rdat)) - (skip-count (runs:dat-run-skip-count rdat))) - (cond - ;; first time in - ((not last-runners-count) - (runs:print-parallel-runners-state2 "A" num-registered last-runners-count skip-count) - (if (eq? num-registered 0) - (rmt:set-var "num-runners" 1) - (rmt:inc-var "num-runners")) - (runs:dat-last-runners-count-set! rdat num-registered) - (runs:dat-run-skip-count-set! rdat 0)) - ;; too many waits, decrement num-runners, reset last-runners and continue on - ((> (runs:dat-run-skip-count rdat) 3) - (runs:print-parallel-runners-state2 "B" num-registered last-runners-count skip-count) - (rmt:dec-var "num-runners") - (runs:dat-run-skip-count-set! rdat 0) - (runs:dat-last-runners-count-set! rdat num-registered)) - ;; too many running, take a break - ((> num-registered last-runners-count) ;; (+ last-runners-count 1)) - (runs:print-parallel-runners-state2 "C" num-registered last-runners-count skip-count) - (rmt:dec-var "num-runners") - (debug:print-info 0 *default-log-port* - "Too many running (" num-registered - "), last-count=" last-runners-count " waiting " time-to-wait " seconds ... ") - (thread-sleep! time-to-wait) - (runs:dat-run-skip-count-set! rdat (+ (runs:dat-run-skip-count rdat) 1)) - ;; adjust down last-runners-count - (if (< num-registered last-runners-count) - (runs:dat-last-runners-set! rdat num-running)) - (rmt:inc-var "num-runners") - ) - ;; we have been in waiting mode, do not increment again as we already did that - ((> skip-count 0) - (runs:print-parallel-runners-state2 "D" num-registered last-runners-count skip-count) - (runs:dat-run-skip-count-set! rdat 0) - ;; (runs:dat-last-runners-count-set! rdat num-registered) - ) - ;; skip count is zero, not too many running, this is transition into running - (else - (runs:print-parallel-runners-state2 "E" num-registered last-runners-count skip-count) - ;; (rmt:inc-var "num-runners") - #;(runs:dat-run-skip-count-set! rdat 0))))))) - -;; Third try, use a running average -;; -;; ADD A COUNT OF TIMES CYCLED THROUGH REST MODE -;; -;; runners-mgmt-mode -;; -(define (runs:parallel-runners-mgmt-3 rdat) - (let ((time-to-check 2.8) ;; 28 - (time-to-wait 3.0)) - (if (> (- (current-seconds) (runs:dat-last-runners-check rdat)) time-to-check) ;; time to check - (let* ((skip-count (runs:dat-run-skip-count rdat)) - (mgmt-mode (runs:dat-runners-mgmt-mode rdat)) ;; - (num-registered (rmt:get-var "num-runners")) - (last-runners-count (if (runs:dat-last-runners-count rdat) - (runs:dat-last-runners-count rdat) - (or num-registered 1))) - (last-runners-ravg (/ (+ last-runners-count num-registered) 2)#;(if (> num-registered last-runners-count) - (/ (+ last-runners-count num-registered) 2) - (/ (+ (* num-registered 4) last-runners-count) 5) ;; slow on down - )) ;; running average - ) + +;; Fourth try, do accounting through time +;; +(define (runs:parallel-runners-mgmt-4 rdat) + (let ((time-to-check 2.8) ;; 28 + (time-to-wait 3.0) + (now-time (current-seconds))) + (if (> (- now-time (runs:dat-last-fuel-check rdat)) time-to-check) ;; time to check + (let* ((mgmt-mode (runs:dat-runners-mgmt-mode rdat)) ;; + (fuel-used (rmt:get-var "runners-fuel"))) ;; initialize and sanitize values if needed (cond - ((not num-registered) ;; first in, initialize to 1 - (debug:print-info 0 *default-log-port* " adjusting num-runners up to 1, currently it is not defined") - (rmt:set-var "num-runners" 1)) - ((< num-registered 1) ;; this should not be, reset to 1 to make it less confusing - (debug:print-info 0 *default-log-port* " adjusting num-runners up to 1, currently it is " num-registered) - (rmt:set-var "num-runners" 1))) - (if (not (member mgmt-mode '(rest-mode work-mode))) - (begin - (debug:print-info 0 *default-log-port* " setting mgmt-mode to work-mode, currently it is " mgmt-mode) - (rmt:inc-var "num-runners") - (set! last-runners-ravg (+ last-runners-ravg 1)) - (runs:dat-runners-mgmt-mode-set! rdat 'rest-mode))) - - (runs:dat-last-runners-count-set! rdat last-runners-ravg) - ;; to rest or not rest? - (if (and (< skip-count 5) - (> num-registered last-runners-count)) ;;(+ last-runners-ravg 0.5))) ;; there seem to be other runners out there - (begin ;; gonna rest - (debug:print-info 0 *default-log-port* "Too many running, num-registered=" num-registered ", ravg=" last-runners-ravg - ", real num runners=" (rmt:get-var "num-runners") ", skip-count=" skip-count) - (if (eq? mgmt-mode 'work-mode) - (rmt:dec-var "num-runners")) - (runs:dat-runners-mgmt-mode-set! rdat 'rest-mode) - (runs:dat-run-skip-count-set! rdat (+ (runs:dat-run-skip-count rdat) 1)) - (thread-sleep! time-to-wait) - (runs:parallel-runners-mgmt-3 rdat) - ) - (begin - (runs:dat-run-skip-count-set! rdat 0) - (if (eq? mgmt-mode 'rest-mode) - (rmt:inc-var "num-runners")) ;; going into work mode if not already in work mode - (runs:dat-runners-mgmt-mode-set! rdat 'work-mode) - (debug:print-info 0 *default-log-port* "All good, keep running, num-registered=" - num-registered ", ravg=" last-runners-ravg ", mode=" mgmt-mode - ", skip-count=" skip-count)) - ))))) - + ((not fuel-used) ;; first in, initialize to 1 + (debug:print-info 0 *default-log-port* " adjusting num-runners up to 1, currently it is not defined") + (rmt:set-var "fuel-used" now-time) + (set! fuel-used now-time) + (runs:dat-last-fuel-check-set! rdat now-time)) + (else ;; add fuel used since last time + (rmt:add-var "fuel-used" (- now-time (runs:dat-last-fuel-check rdat))))) + + (if (> fuel-used now-time) ;; are we over-drawn? If so, kill time, do not add time to fuel used + (begin ;; gonna rest + (debug:print-info 0 *default-log-port* "Too much fuel used, taking a break. fuel-used=" + fuel-used ", now-time=" now-time) + (thread-sleep! time-to-wait) + (runs:dat-last-fuel-check-set! rdat (current-seconds)) ;; work done from here (i.e. seconds of "fuel") will be added to fuel-used + ) + (begin ;; no fuel deficit, back to work + (debug:print-info 0 *default-log-port* "No deficit, keep running")) + ))))) ;; To test parallel-runners management start a repl: ;; megatest -repl ;; then run: ;; (runs:test-parallel-runners 60) @@ -190,25 +99,145 @@ (define (runs:test-parallel-runners duration #!optional (proc #f)) (let* ((rdat (make-runs:dat)) (rtime 0) (startt (current-seconds)) (endt (+ startt duration))) - ((or proc runs:parallel-runners-mgmt-3) rdat) + ((or proc runs:parallel-runners-mgmt-4) rdat) (let loop () (let* ((wstart (current-seconds))) (if (< wstart endt) (let* ((work-time (random 10))) #;(debug:print-info 0 *default-log-port* "working for " work-time " seconds. Total work: " rtime ", elapsed time: " (- wstart startt)) (thread-sleep! work-time) (set! rtime (+ rtime work-time)) - ((or proc runs:parallel-runners-mgmt-3) rdat) + ((or proc runs:parallel-runners-mgmt-4) rdat) (loop))))) (let* ((done-time (current-seconds))) (debug:print-info 0 *default-log-port* "DONE: rtime=" rtime ", elapsed time=" (- done-time startt) ", ratio=" (/ rtime (- done-time startt)))))) +;; ;; Third try, use a running average +;; ;; +;; ;; ADD A COUNT OF TIMES CYCLED THROUGH REST MODE +;; ;; +;; ;; runners-mgmt-mode +;; ;; +;; (define (runs:parallel-runners-mgmt-3 rdat) +;; (let ((time-to-check 2.8) ;; 28 +;; (time-to-wait 3.0)) +;; (if (> (- (current-seconds) (runs:dat-last-runners-check rdat)) time-to-check) ;; time to check +;; (let* ((skip-count (runs:dat-run-skip-count rdat)) +;; (mgmt-mode (runs:dat-runners-mgmt-mode rdat)) ;; +;; (num-registered (rmt:get-var "num-runners")) +;; (last-runners-count (if (runs:dat-last-runners-count rdat) +;; (runs:dat-last-runners-count rdat) +;; (or num-registered 1))) +;; (last-runners-ravg (/ (+ last-runners-count num-registered) 2)#;(if (> num-registered last-runners-count) +;; (/ (+ last-runners-count num-registered) 2) +;; (/ (+ (* num-registered 4) last-runners-count) 5) ;; slow on down +;; )) ;; running average +;; ) +;; ;; initialize and sanitize values if needed +;; (cond +;; ((not num-registered) ;; first in, initialize to 1 +;; (debug:print-info 0 *default-log-port* " adjusting num-runners up to 1, currently it is not defined") +;; (rmt:set-var "num-runners" 1)) +;; ((< num-registered 1) ;; this should not be, reset to 1 to make it less confusing +;; (debug:print-info 0 *default-log-port* " adjusting num-runners up to 1, currently it is " num-registered) +;; (rmt:set-var "num-runners" 1))) +;; (if (not (member mgmt-mode '(rest-mode work-mode))) +;; (begin +;; (debug:print-info 0 *default-log-port* " setting mgmt-mode to work-mode, currently it is " mgmt-mode) +;; (rmt:inc-var "num-runners") +;; (set! last-runners-ravg (+ last-runners-ravg 1)) +;; (runs:dat-runners-mgmt-mode-set! rdat 'rest-mode))) +;; +;; (runs:dat-last-runners-count-set! rdat last-runners-ravg) +;; ;; to rest or not rest? +;; (if (and (< skip-count 5) +;; (> num-registered last-runners-count)) ;;(+ last-runners-ravg 0.5))) ;; there seem to be other runners out there +;; (begin ;; gonna rest +;; (debug:print-info 0 *default-log-port* "Too many running, num-registered=" num-registered ", ravg=" last-runners-ravg +;; ", real num runners=" (rmt:get-var "num-runners") ", skip-count=" skip-count) +;; (if (eq? mgmt-mode 'work-mode) +;; (rmt:dec-var "num-runners")) +;; (runs:dat-runners-mgmt-mode-set! rdat 'rest-mode) +;; (runs:dat-run-skip-count-set! rdat (+ (runs:dat-run-skip-count rdat) 1)) +;; (thread-sleep! time-to-wait) +;; (runs:parallel-runners-mgmt-3 rdat) +;; ) +;; (begin +;; (runs:dat-run-skip-count-set! rdat 0) +;; (if (eq? mgmt-mode 'rest-mode) +;; (rmt:inc-var "num-runners")) ;; going into work mode if not already in work mode +;; (runs:dat-runners-mgmt-mode-set! rdat 'work-mode) +;; (debug:print-info 0 *default-log-port* "All good, keep running, num-registered=" +;; num-registered ", ravg=" last-runners-ravg ", mode=" mgmt-mode +;; ", skip-count=" skip-count)) +;; ))))) + +;; (define (runs:print-parallel-runners-state state num-registered last-registered skip-count) +;; (debug:print-info 0 *default-log-port* "runs:parallel-runners-mgmt, state=" state +;; ", num-registered=" num-registered ", last-registered=" last-registered +;; ", skip-count=" skip-count)) +;; +;; (define (runs:print-parallel-runners-state2 state num-registered last-runners-count skip-count) +;; (debug:print-info 0 *default-log-port* "runs:parallel-runners-mgmt, state=" state +;; ", num-registered=" num-registered ", last-runners-count=" last-runners-count +;; ", skip-count=" skip-count)) +;; +;; ;; Second try +;; ;; +;; (define (runs:parallel-runners-mgmt-2 rdat) +;; (let ((time-to-check 2.8) ;; 28 +;; (time-to-wait 3.0)) +;; (if (> (- (current-seconds) (runs:dat-last-runners-check rdat)) time-to-check) ;; time to check +;; (let* ((num-registered (or (rmt:get-var "num-runners") 0)) +;; (last-runners-count (runs:dat-last-runners-count rdat)) +;; (skip-count (runs:dat-run-skip-count rdat))) +;; (cond +;; ;; first time in +;; ((not last-runners-count) +;; (runs:print-parallel-runners-state2 "A" num-registered last-runners-count skip-count) +;; (if (eq? num-registered 0) +;; (rmt:set-var "num-runners" 1) +;; (rmt:inc-var "num-runners")) +;; (runs:dat-last-runners-count-set! rdat num-registered) +;; (runs:dat-run-skip-count-set! rdat 0)) +;; ;; too many waits, decrement num-runners, reset last-runners and continue on +;; ((> (runs:dat-run-skip-count rdat) 3) +;; (runs:print-parallel-runners-state2 "B" num-registered last-runners-count skip-count) +;; (rmt:dec-var "num-runners") +;; (runs:dat-run-skip-count-set! rdat 0) +;; (runs:dat-last-runners-count-set! rdat num-registered)) +;; ;; too many running, take a break +;; ((> num-registered last-runners-count) ;; (+ last-runners-count 1)) +;; (runs:print-parallel-runners-state2 "C" num-registered last-runners-count skip-count) +;; (rmt:dec-var "num-runners") +;; (debug:print-info 0 *default-log-port* +;; "Too many running (" num-registered +;; "), last-count=" last-runners-count " waiting " time-to-wait " seconds ... ") +;; (thread-sleep! time-to-wait) +;; (runs:dat-run-skip-count-set! rdat (+ (runs:dat-run-skip-count rdat) 1)) +;; ;; adjust down last-runners-count +;; (if (< num-registered last-runners-count) +;; (runs:dat-last-runners-set! rdat num-running)) +;; (rmt:inc-var "num-runners") +;; ) +;; ;; we have been in waiting mode, do not increment again as we already did that +;; ((> skip-count 0) +;; (runs:print-parallel-runners-state2 "D" num-registered last-runners-count skip-count) +;; (runs:dat-run-skip-count-set! rdat 0) +;; ;; (runs:dat-last-runners-count-set! rdat num-registered) +;; ) +;; ;; skip count is zero, not too many running, this is transition into running +;; (else +;; (runs:print-parallel-runners-state2 "E" num-registered last-runners-count skip-count) +;; ;; (rmt:inc-var "num-runners") +;; #;(runs:dat-run-skip-count-set! rdat 0))))))) + ;; (define (runs:parallel-runners-mgmt rdat) ;; (let ((time-to-check 2.8) ;; 28 ;; (time-to-wait 3.0)) ;; (if (> (- (current-seconds) (runs:dat-last-runners-check rdat)) time-to-check) ;; time to check ;; (let* ((num-registered (or (rmt:get-var "num-runners") 0))