@@ -61,11 +61,11 @@ test-name item-path jobgroup waitons testmode newtal itemmaps prereqs-not-met) ;; Fourth try, do accounting through time ;; -(define (runs:parallel-runners-mgmt rdat) +(define (runs:parallel-runners-mgmt-4 rdat) (let ((time-to-check 10) ;; 28 (time-to-wait 12) (now-time (current-seconds))) (if (> (- now-time (runs:dat-last-fuel-check rdat)) time-to-check) ;; time to check (let* ((fuel-used (or (rmt:get-var "runners-fuel") now-time))) @@ -77,10 +77,60 @@ (runs:dat-last-fuel-check-set! rdat (current-seconds)) ;; work done from here (i.e. seconds of "fuel") will be added to fuel-used ) (begin ;; no fuel deficit, back to work (rmt:set-var "runners-fuel" (+ now-time time-to-check)) )))))) + +;; look in the $MT_RUN_AREA_HOME/.softlocks directory for key-host-pid.softlock files +;; - remove any that are over 3600 seconds old +;; - if there are any that are younger than 10 seconds +;; * sleep 10 seconds +;; * touch my key-host-pid.softlock file +;; * return +;; - if there are no files younger than 10 seconds +;; * touch my key-host-pid.softlock file +;; * return +;; +(define (runs:wait-on-softlock rdat key) + (if (not (and *toppath* (file-exists? *toppath*))) ;; don't seem to have toppath yet + (debug:print-info 0 *default-log-port* "Can't create softlocks - don't see MTRAH yet.") + (let* ((softlocks-dir (conc *toppath* "/.softlocks"))) + (if (not (file-exists? softlocks-dir)) + (create-directory softlocks-dir #t)) + (let* ((my-lock-file (conc softlocks-dir "/" key "-" (get-host-name) "-" (current-process-id) ".softlock")) + (lock-files (filter (lambda (x) + (not (equal? x my-lock-file))) + (glob (conc softlocks-dir "/" key "*.softlock")))) + (fresh-locks (any (lambda (x) ;; do we have any locks younger than 10 seconds + (let ((mod-time (file-modification-time x))) + (cond + ((> (- (current-seconds) mod-time) 3600) ;; too old to keep, remove it + (delete-file* x) #f) + ((< mod-time 10) #t) + (else #f)))) + lock-files))) + (if fresh-locks + (begin + (if (runs:lownoise "runners-softlock-wait" 360) + (debug:print-info 0 *default-log-port* "Other runners in flight, giving up some time...")) + (thread-sleep! 10)) + (begin + (if (runs:lownoise "runners-softlock-nowait" 360) + (debug:print-info 0 *default-log-port* "No runners in flight, updating softlock")) + (with-output-to-file my-lock-file + (lambda () + (print (current-seconds)))))) + (runs:dat-last-fuel-check-set! rdat (current-seconds)))))) + +;; Fourth try, do accounting through time +;; +(define (runs:parallel-runners-mgmt rdat) + (let ((time-to-check 10) ;; 28 + (time-to-wait 12) + (now-time (current-seconds))) + (if (> (- now-time (runs:dat-last-fuel-check rdat)) time-to-check) ;; time to check + (runs:wait-on-softlock rdat "runners")))) ;; To test parallel-runners management start a repl: ;; megatest -repl ;; then run: ;; (runs:test-parallel-runners 60)