Index: common.scm ================================================================== --- common.scm +++ common.scm @@ -19,10 +19,11 @@ ) (declare (unit common)) (include "common_records.scm") + ;; (require-library margs) ;; (include "margs.scm") ;; (define old-exit exit) @@ -2628,5 +2629,11 @@ restore-thunk)) delta-env-alist)))) (let ((rv (thunk))) (for-each (lambda (x) (x)) restore-thunks) ;; restore env to original state rv))) + +(define (common:send-thunk-to-background-thread thunk #!key (name #f)) + ;;(BB> "launched thread " name) + (if name + (thread-start! (make-thread thunk name)) + (thread-start! (make-thread thunk)))) Index: runs.scm ================================================================== --- runs.scm +++ runs.scm @@ -2049,10 +2049,13 @@ (if (and (string? dira)(string? dirb)) (> (string-length dira)(string-length dirb)) #f)))))) (toplevel-retries (make-hash-table)) ;; try three times to loop through and remove top level tests (test-retry-time (make-hash-table)) + (backgrounded-remove-status (make-hash-table)) + (backgrounded-remove-last-visit (make-hash-table)) + (backgrounded-remove-result (make-hash-table)) (allow-run-time 10)) ;; seconds to allow for killing tests before just brutally killing 'em (let loop ((test (car sorted-tests)) (tal (cdr sorted-tests))) (let* ((test-id (db:test-get-id test)) (new-test-dat (rmt:get-test-info-by-id run-id test-id))) @@ -2084,25 +2087,59 @@ (if (not (null? tal)) (loop (car tal)(cdr tal))) ;; no else clause - drop it if no more in queue and > 3 tries (let ((newtal (append tal (list test)))) (loop (car newtal)(cdr newtal))))) ;; loop with test still in queue (has-subrun - ;; BB TODO - manage toplevasel-retries hash and retries in general - (debug:print 0 *default-log-port* "WARNING: postponing removal of " test-fulln " with run-id " run-id " as it has a subrun") - (let* ((subrun-remove-succeeded - (subrun:remove-subrun run-dir new-test-dat test-name item-path test-state test-fulln toplevel-with-children test))) - (cond - (subrun-remove-succeeded - - (debug:print 0 *default-log-port* "Now removing of " test-fulln " with run-id " run-id " as it has a subrun") - (runs:remove-test-directory new-test-dat mode)) - (else - (let* ((logfile (subrun:get-log-path run-dir "remove"))) - (debug:print 0 *default-log-port* "WARNING: removal of subrun failed. Please check "logfile" for details.")))) - - (if (not (null? tal)) - (loop (car tal)(cdr tal))))) + ;; + (let ((last-visit (hash-table-ref/default backgrounded-remove-last-visit test-fulln 0)) + (now (current-seconds)) + (rem-status (hash-table-ref/default backgrounded-remove-status test-fulln 'not-started))) + (case rem-status + ((not-started) + (debug:print 0 *default-log-port* "WARNING: postponing removal of " test-fulln " with run-id " run-id " as it has a subrun") + (hash-table-set! backgrounded-remove-status test-fulln 'started) + (hash-table-set! backgrounded-remove-last-visit test-fulln (current-seconds)) + (common:send-thunk-to-background-thread + (lambda () + (let* ((subrun-remove-succeeded + (subrun:remove-subrun run-dir new-test-dat test-name item-path test-state test-fulln toplevel-with-children test))) + (hash-table-set! backgrounded-remove-result test-fulln subrun-remove-succeeded) + (hash-table-set! backgrounded-remove-status test-fulln 'done))) + name: (conc "remove-subrun:"test-fulln)) + + ;; send to back of line, loop + (let ((newtal (append tal (list test)))) + (loop (car newtal)(cdr newtal))) + ) + ((started) + ;; if last visit was within last second, sleep 1 second + (if (< (- now last-visit) 1.0) + (thread-sleep! 1.0)) + (hash-table-set! backgrounded-remove-last-visit test-fulln (current-seconds)) + ;; send to back of line, loop + (let ((newtal (append tal (list test)))) + (loop (car newtal)(cdr newtal))) + ) + ((done) + ;; drop this one; if remaining, loop, else finish + (hash-table-set! backgrounded-remove-last-visit test-fulln (current-seconds)) + (let ((subrun-remove-succeeded (hash-table-ref/default backgrounded-remove-result test-fulln 'exception))) + (cond + ((eq? subrun-remove-succeeded 'exception) + (let* ((logfile (subrun:get-log-path run-dir "remove"))) + (debug:print 0 *default-log-port* "ERROR: removing subrun of of " test-fulln " with run-id " run-id " ; see logfile @ "logfile))) + (subrun-remove-succeeded + (debug:print 0 *default-log-port* "Now removing of " test-fulln " with run-id " run-id " since subrun was removed.") + (runs:remove-test-directory new-test-dat mode)) + (else + (let* ((logfile (subrun:get-log-path run-dir "remove"))) + (debug:print 0 *default-log-port* "WARNING: removal of subrun failed. Please check "logfile" for details.")))) + (if (not (null? tal)) + (loop (car tal)(cdr tal))))) + ) ; end case rem-status + ) ; end let + ); end cond has-subrun (else (debug:print-info 0 *default-log-port* "test: " test-name " itest-state: " test-state) (if (member test-state (list "RUNNING" "LAUNCHED" "REMOTEHOSTSTART" "KILLREQ")) (begin Index: subrun.scm ================================================================== --- subrun.scm +++ subrun.scm @@ -77,11 +77,11 @@ (define (subrun:remove-subrun test-run-dir new-test-dat test-name item-path test-state test-fulln toplevel-with-children test) ;; set state/status of test item ;; fork off megatest ;; set state/status of test item ;; - (BB> "Entered subrun:remove-subrun with "test-fulln) + ;;(BB> "Entered subrun:remove-subrun with "test-fulln) (if (and (not (subrun:subrun-removed? test-run-dir)) (subrun:subrun-test-initialized? test-run-dir)) (let* ((remove-result (subrun:exec-sub-megatest test-run-dir "-remove-runs" "remove"))) (if remove-result (begin