Index: runs.scm ================================================================== --- runs.scm +++ runs.scm @@ -246,10 +246,24 @@ (if (file-exists? (conc testpath "/testconfig")) (set! res (cons (last (string-split testpath "/")) res)))) tests) res)) +(define (runs:can-run-more-tests db) + (let ((num-running (db:get-count-tests-running db)) + (max-concurrent-jobs (config-lookup *configdat* "setup" "max_concurrent_jobs"))) + ;; (print "max-concurrent-jobs: " max-concurrent-jobs ", num-running: " num-running) + (if (or (not max-concurrent-jobs) + (and max-concurrent-jobs + (string->number max-concurrent-jobs) + (not (>= num-running (string->number max-concurrent-jobs))))) + #t + (begin + (print "WARNING: Max running jobs exceeded, current number running: " num-running + ", max_concurrent_jobs: " max-concurrent-jobs) + #f)))) + (define (run-tests db test-names) (let* ((keys (db-get-keys db)) (keyvallst (keys->vallist keys #t)) (run-id (register-run db keys))) ;; test-name))) ;; on the first pass or call to run-tests set FAILS to NOT_STARTED if @@ -259,20 +273,14 @@ (db:set-tests-state-status db run-id test-names #f "FAIL" "NOT_STARTED" "FAIL")) (set! *passnum* (+ *passnum* 1)) (let loop ((numtimes 0)) (for-each (lambda (test-name) - (let ((num-running (db:get-count-tests-running db)) - (max-concurrent-jobs (config-lookup *configdat* "setup" "max_concurrent_jobs"))) - ;; (print "max-concurrent-jobs: " max-concurrent-jobs ", num-running: " num-running) - (if (or (not max-concurrent-jobs) - (and max-concurrent-jobs - (string->number max-concurrent-jobs) - (not (>= num-running (string->number max-concurrent-jobs))))) - (run-one-test db run-id test-name keyvallst) - (print "WARNING: Max running jobs exceeded, current number running: " num-running - ", max_concurrent_jobs: \"" max-concurrent-jobs "\"")))) + (if (runs:can-run-more-tests db) + (run-one-test db run-id test-name keyvallst) + ;; add some delay + (sleep 2))) test-names) ;; (run-waiting-tests db) (if (args:get-arg "-keepgoing") (let ((estrem (db:estimated-tests-remaining db run-id))) (if (> estrem 0) @@ -324,16 +332,11 @@ (max-concurrent-jobs (config-lookup *configdat* "setup" "max_concurrent_jobs")) (parent-test (and (not (null? items))(equal? item-path ""))) (single-test (and (null? items) (equal? item-path ""))) (item-test (not (equal? item-path "")))) ;; (print "max-concurrent-jobs: " max-concurrent-jobs ", num-running: " num-running) - (if (not (or (not max-concurrent-jobs) - (and max-concurrent-jobs - (string->number max-concurrent-jobs) - (not (>= num-running (string->number max-concurrent-jobs)))))) - (print "WARNING: Max running jobs exceeded, current number running: " num-running - ", max_concurrent_jobs: " max-concurrent-jobs) + (if (runs:can-run-more-tests db) (begin (let loop2 ((ts (db:get-test-info db run-id test-name item-path)) ;; #f) (ct 0)) (if (and (not ts) (< ct 10)) @@ -424,28 +427,30 @@ ;; BUG this hack of brute force retrying works quite well for many cases but ;; what is needed is to check the db for tests that have failed less than ;; N times or never been started and kick them off again (let loop ((waiting-test-names (hash-table-keys *waiting-queue*))) (cond + ((not (runs:can-run-more-tests db)) + (sleep 2) + (loop waiting-test-names)) ((null? waiting-test-names) (print "All tests launched")) - ((> numtries 4) - (print "NOTE: Tried launching four times, perhaps run megatest again in a few minutes")) (else (set! numtries (+ numtries 1)) (for-each (lambda (testname) - (let* ((testdat (hash-table-ref *waiting-queue* testname)) - (prereqs ((car testdat))) - (ldb (if db db (open-db)))) - ;; (print "prereqs remaining: " prereqs) - (if (null? prereqs) - (begin - (print "Prerequisites met, launching " testname) - ((cadr testdat)) - (hash-table-delete! *waiting-queue* testname))) - (if (not db) - (sqlite3:finalize! ldb)))) + (if (runs:can-run-more-tests db) + (let* ((testdat (hash-table-ref *waiting-queue* testname)) + (prereqs ((car testdat))) + (ldb (if db db (open-db)))) + ;; (print "prereqs remaining: " prereqs) + (if (null? prereqs) + (begin + (print "Prerequisites met, launching " testname) + ((cadr testdat)) + (hash-table-delete! *waiting-queue* testname))) + (if (not db) + (sqlite3:finalize! ldb))))) waiting-test-names) ;; (sleep 10) ;; no point in rushing things at this stage? (loop (hash-table-keys *waiting-queue*))))))) (define (get-dir-up-one dir) Index: tests/megatest.config ================================================================== --- tests/megatest.config +++ tests/megatest.config @@ -3,11 +3,11 @@ fsname TEXT datapath TEXT [setup] # exectutable /path/to/megatest -max_concurrent_jobs 8 +max_concurrent_jobs 4 runsdir /tmp/runs [jobtools] # ## launcher launches jobs, the job is managed on the target host ## by megatest, comment out launcher to run local