Index: db.scm ================================================================== --- db.scm +++ db.scm @@ -58,11 +58,11 @@ (let* ((dbpath (conc *toppath* "/megatest.db")) ;; fname) (dbexists (file-exists? dbpath)) (db (sqlite3:open-database dbpath)) ;; (never-give-up-open-db dbpath)) (handler (make-busy-timeout (if (args:get-arg "-override-timeout") (string->number (args:get-arg "-override-timeout")) - 136000)))) ;; 136000))) + 136000)))) ;; 136000))) ;; 136000 = 2.2 minutes (debug:print 4 "INFO: dbpath=" dbpath) (sqlite3:set-busy-handler! db handler) (if (not dbexists) (db:initialize db)) (db:set-sync db) Index: runs.scm ================================================================== --- runs.scm +++ runs.scm @@ -409,12 +409,12 @@ (open-run-close db:tests-register-test #f run-id test-name item-path) (hash-table-set! test-registery (runs:make-full-test-name test-name item-path) #t) (thread-sleep! *global-delta*) (loop (car newtal)(cdr newtal) reruns)) ((not have-resources) ;; simply try again after waiting a second - (thread-sleep! (+ 1 *global-delta*)) (debug:print 1 "INFO: no resources to run new tests, waiting ...") + (thread-sleep! (+ 0.01 *global-delta*)) ;; could have done hed tal here but doing car/cdr of newtal to rotate tests (loop (car newtal)(cdr newtal) reruns)) ((and have-resources (or (null? prereqs-not-met) (and (eq? testmode 'toplevel) @@ -429,11 +429,11 @@ ;; a message and drop hed from the items to be processed. (if (null? fails) (begin ;; couldn't run, take a breather (debug:print 4 "INFO: Shouldn't really get here, race condition? Unable to launch more tests at this moment, killing time ...") - (thread-sleep! (+ 1 *global-delta*)) ;; long sleep here - no resources, may as well be patient + (thread-sleep! (+ 0.01 *global-delta*)) ;; long sleep here - no resources, may as well be patient ;; we made new tal by sticking hed at the back of the list (loop (car newtal)(cdr newtal) reruns)) ;; the waiton is FAIL so no point in trying to run hed ever again (if (not (null? tal)) (if (vector? hed) @@ -441,11 +441,11 @@ " from the launch list as it has prerequistes that are FAIL") (thread-sleep! *global-delta*) (loop (car tal)(cdr tal) (cons hed reruns))) (begin (debug:print 1 "WARN: Test not processed correctly. Could be a race condition in your test implementation? " hed) ;; " as it has prerequistes that are FAIL. (NOTE: hed is not a vector)") - (thread-sleep! *global-delta*) + (thread-sleep! (+ 0.01 *global-delta*)) (loop hed tal reruns))))))))) ;; END OF INNER COND ;; case where an items came in as a list been processed ((and (list? items) ;; thus we know our items are already calculated (not itemdat)) ;; and not yet expanded into the list of things to be done @@ -467,12 +467,12 @@ (hash-table-set! test-records newtestname new-test-record) (set! tal (cons newtestname tal)))))) ;; since these are itemized create new test names testname/itempath items) (if (not (null? tal)) (begin - (thread-sleep! *global-delta*) - (debug:print 4 "INFO: End of items list, looping with next") + (debug:print 4 "INFO: End of items list, looping with next after short delay") + (thread-sleep! (+ 0.01 *global-delta*)) (loop (car tal)(cdr tal) reruns)))) ;; if items is a proc then need to run items:get-items-from-config, get the list and loop ;; - but only do that if resources exist to kick off the job ((or (procedure? items)(eq? items 'have-procedure)) @@ -491,11 +491,11 @@ "\n (eq? testmode 'toplevel): " (eq? testmode 'toplevel) "\n (null? non-completed): " (null? non-completed) "\n reruns: " reruns "\n items: " items "\n can-run-more: " can-run-more) - + ;; (thread-sleep! (+ 0.01 *global-delta*)) (cond ;; INNER COND #2 ((or (null? prereqs-not-met) ;; all prereqs met, fire off the test ;; or, if it is a 'toplevel test and all prereqs not met are COMPLETED then launch (and (eq? testmode 'toplevel) (null? non-completed))) @@ -512,14 +512,17 @@ (begin (debug:print 0 "ERROR: The proc from reading the setup did not yield a list - please report this") (exit 1)))))) ((null? fails) (debug:print 4 "INFO: fails is null, moving on in the queue but keeping " hed " for now") - (thread-sleep! *global-delta*) ;; only increment num-retries when there are no tests runing (if (eq? 0 (list-ref can-run-more 1)) - (set! num-retries (+ num-retries 1))) + (begin + (if (> num-retries 100) ;; first 100 retries are low time cost + (thread-sleep! (+ 2 *global-delta*)) + (thread-sleep! (+ 0.01 *global-delta*))) + (set! num-retries (+ num-retries 1)))) (if (> num-retries max-retries) (if (not (null? tal)) (loop (car tal)(cdr tal) reruns)) (loop (car newtal)(cdr newtal) reruns))) ;; an issue with prereqs not yet met? ((and (not (null? fails))(eq? testmode 'normal)) @@ -530,11 +533,11 @@ (begin (thread-sleep! *global-delta*) (loop (car tal)(cdr tal)(cons hed reruns))))) (else (debug:print 8 "ERROR: No handler for this condition.") - (thread-sleep! *global-delta*) + (thread-sleep! (+ 1 *global-delta*)) (loop (car newtal)(cdr newtal) reruns)))) ;; END OF IF CAN RUN MORE ;; if can't run more just loop with next possible test (begin (debug:print 4 "INFO: processing the case with a lambda for items or 'have-procedure. Moving through the queue without dropping " hed) @@ -550,11 +553,11 @@ (junked (lset-difference equal? tal newlst))) (debug:print 4 "INFO: full drop through, if reruns is less than 100 we will force retry them, reruns=" reruns ", tal=" tal) (if (< num-retries max-retries) (set! newlst (append reruns newlst))) (set! num-retries (+ num-retries 1)) - (thread-sleep! *global-delta*) + (thread-sleep! (+ 1 *global-delta*)) (if (not (null? newlst)) ;; since reruns have been tacked on to newlst create new reruns from junked (loop (car newlst)(cdr newlst)(delete-duplicates junked))))) ((not (null? tal)) (debug:print 4 "INFO: I'm pretty sure I shouldn't get here.")) Index: tests/fullrun/megatest.config ================================================================== --- tests/fullrun/megatest.config +++ tests/fullrun/megatest.config @@ -15,11 +15,11 @@ synchronous OFF # Throttle roughly scales the db access milliseconds to seconds delay throttle 0.2 # Max retries allows megatest to re-check that a tests status has changed # as tests can have transient FAIL status occasionally -maxretries 500 +maxretries 20 [validvalues] state start end status pass fail n/a 0 1 running