Index: rmt.scm ================================================================== --- rmt.scm +++ rmt.scm @@ -335,11 +335,11 @@ (if success ;; success only tells us that the transport was ;; successful, have to examine the data to see if ;; there was a detected issue at the other end (extras-transport-succeded *default-log-port* *rmt-mutex* attemptnum runremote res params rid cmd) (begin - (debug:print-error 2 *default-log-port* " dat=" dat) + (debug:print-info 0 *default-log-port* "Bad return data from Megatest server: dat=" dat) (extras-transport-failed *default-log-port* *rmt-mutex* attemptnum runremote cmd rid params)) ))) (define (rmt:print-db-stats) (let ((fmtstr "~40a~7-d~9-d~20,2-f")) ;; "~20,2-f" Index: runs.scm ================================================================== --- runs.scm +++ runs.scm @@ -799,50 +799,24 @@ (debug:print-info 1 *default-log-port* "Adding \"" (string-intersperse required-tests " ") "\" to the run queue")) ;; NOTE: these are all parent tests, items are not expanded yet. (debug:print-info 4 *default-log-port* "test-records=" (hash-table->alist test-records)) (let ((reglen (configf:lookup *configdat* "setup" "runqueue"))) (if (> (length (hash-table-keys test-records)) 0) - (let* ((keep-going #t) - (run-queue-retries 5) - ;; (th1 (make-thread (lambda () - ;; (handle-exceptions - ;; exn - ;; (begin - ;; (print-call-chain) - ;; (print " message: " ((condition-property-accessor 'exn 'message) exn))) - ;; (runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests - ;; (any->number reglen) all-tests-registry))) - ;; "runs:run-tests-queue")) - (th2 (make-thread (lambda () ;; BBQ: why are we visiting ALL runs here? - ;; (rmt:find-and-mark-incomplete-all-runs))))) CAN'T INTERRUPT IT ... - (let ((run-ids (rmt:get-all-run-ids))) - (for-each (lambda (run-id) - (if keep-going - (handle-exceptions - exn - (debug:print 0 *default-log-port* "error in calling find-and-mark-incomplete for run-id " run-id ", exn=" exn) - (rmt:find-and-mark-incomplete run-id #f)))) ;; ovr-deadtime))) ;; could be root of https://hsdes.intel.com/appstore/article/#/220546828/main -- Title: Megatest jobs show DEAD even though they are still running (1.64/27) - run-ids))) - "runs: mark-incompletes"))) - ;; (thread-start! th1) - (thread-start! th2) - ;; (thread-join! th1) + (let* () ;; just do the main stuff in the main thread (runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests (any->number reglen) all-tests-registry) - (set! keep-going #f) - (thread-join! th2) ;; if run-count > 0 call, set -preclean and -rerun STUCK/DEAD (if (> run-count 0) ;; handle reruns (begin (if (not (hash-table-ref/default flags "-preclean" #f)) (hash-table-set! flags "-preclean" #t)) (if (not (hash-table-ref/default flags "-rerun" #f)) (hash-table-set! flags "-rerun" "ABORT,STUCK/DEAD,n/a,ZERO_ITEMS")) ;; recursive call to self - (runs:run-tests target runname test-patts user flags run-count: (- run-count 1))) - (launch:end-of-run-check run-id))) + (runs:run-tests target runname test-patts user flags run-count: (- run-count 1))) + (launch:end-of-run-check run-id))) (debug:print-info 0 *default-log-port* "No tests to run"))) (debug:print-info 4 *default-log-port* "All done by here") ;; TODO: try putting post hook call here ; (debug:print-info 2 *default-log-port* " run-count " run-count)