Index: megatest-version.scm ================================================================== --- megatest-version.scm +++ megatest-version.scm @@ -1,7 +1,7 @@ ;; Always use two digit decimal ;; 1.01, 1.02...1.10,1.11 ... 1.99,2.00.. (declare (unit megatest-version)) -(define megatest-version 1.5429) +(define megatest-version 1.5431) Index: run-tests-queue-classic.scm ================================================================== --- run-tests-queue-classic.scm +++ run-tests-queue-classic.scm @@ -22,10 +22,11 @@ (if (not (null? reruns))(debug:print-info 4 "reruns=" reruns)) ;; (print "Top of loop, hed=" hed ", tal=" tal " ,reruns=" reruns) (let* ((test-record (hash-table-ref test-records hed)) (test-name (tests:testqueue-get-testname test-record)) (tconfig (tests:testqueue-get-testconfig test-record)) + (jobgroup (config-lookup tconfig "requirements" "jobgroup")) (testmode (let ((m (config-lookup tconfig "requirements" "mode"))) (if m (string->symbol m) 'normal))) (waitons (tests:testqueue-get-waitons test-record)) (priority (tests:testqueue-get-priority test-record)) (itemdat (tests:testqueue-get-itemdat test-record)) ;; itemdat can be a string, list or #f @@ -54,11 +55,12 @@ (cond ;; OUTER COND ((not items) ;; when false the test is ok to be handed off to launch (but not before) (if (and (not (tests:match test-patts (tests:testqueue-get-testname test-record) item-path required: required-tests)) (not (null? tal))) (loop (car newtal)(cdr newtal) reruns)) - (let* ((run-limits-info (runs:can-run-more-tests test-record max-concurrent-jobs)) ;; look at the test jobgroup and tot jobs running + (let* ((run-limits-info ;; (cdb:remote-run runs:can-run-more-tests #f jobgroup max-concurrent-jobs)) ;; look at the test jobgroup and tot jobs running + (open-run-close runs:can-run-more-tests #f jobgroup max-concurrent-jobs)) ;; look at the test jobgroup and tot jobs running (have-resources (car run-limits-info)) (num-running (list-ref run-limits-info 1)) (num-running-in-jobgroup (list-ref run-limits-info 2)) (max-concurrent-jobs (list-ref run-limits-info 3)) (job-group-limit (list-ref run-limits-info 4)) @@ -191,11 +193,12 @@ (loop (car tal)(cdr tal) reruns)))) ;; if items is a proc then need to run items:get-items-from-config, get the list and loop ;; - but only do that if resources exist to kick off the job ((or (procedure? items)(eq? items 'have-procedure)) - (let ((can-run-more (runs:can-run-more-tests test-record max-concurrent-jobs))) + (let ((can-run-more ;; (cdb:remote-run runs:can-run-more-tests #f jobgroup max-concurrent-jobs))) + (open-run-close runs:can-run-more-tests #f jobgroup max-concurrent-jobs))) (if (and (list? can-run-more) (car can-run-more)) (let* ((prereqs-not-met (db:get-prereqs-not-met run-id waitons item-path mode: testmode)) (fails (runs:calc-fails prereqs-not-met)) (non-completed (runs:calc-not-completed prereqs-not-met))) Index: run-tests-queue-new.scm ================================================================== --- run-tests-queue-new.scm +++ run-tests-queue-new.scm @@ -3,19 +3,28 @@ (define (runs:run-tests-queue-new run-id runname test-records keyvals flags test-patts required-tests reglen) ;; At this point the list of parent tests is expanded ;; NB// Should expand items here and then insert into the run queue. (debug:print 5 "test-records: " test-records ", flags: " (hash-table->alist flags)) (let ((run-info (cdb:remote-run db:get-run-info #f run-id)) + (tests-info (cdb:remote-run db:get-tests-for-run #f run-id #f '() '())) ;; qryvals: "id,testname,item_path")) (sorted-test-names (tests:sort-by-priority-and-waiton test-records)) (test-registry (make-hash-table)) (registry-mutex (make-mutex)) (num-retries 0) (max-retries (config-lookup *configdat* "setup" "maxretries")) (max-concurrent-jobs (let ((mcj (config-lookup *configdat* "setup" "max_concurrent_jobs"))) (if (and mcj (string->number mcj)) (string->number mcj) 1)))) ;; length of the register queue ahead + ;; Initialize the test-registery hash with tests that already have a record + (for-each (lambda (trec) + (let ((id (db:test-get-id trec)) + (tn (db:test-get-testname trec)) + (ip (db:test-get-item-path trec)) + (st (db:test-get-state trec))) + (hash-table-set! test-registry (runs:make-full-test-name tn ip) (string->symbol st)))) + tests-info) (set! max-retries (if (and max-retries (string->number max-retries))(string->number max-retries) 100)) (if (not (null? sorted-test-names)) (let loop ((hed (car sorted-test-names)) (tal (cdr sorted-test-names)) (reg '()) ;; registered, put these at the head of tal @@ -23,19 +32,28 @@ (if (not (null? reruns))(debug:print-info 4 "reruns=" reruns)) ;; (print "Top of loop, hed=" hed ", tal=" tal " ,reruns=" reruns) (let* ((test-record (hash-table-ref test-records hed)) (test-name (tests:testqueue-get-testname test-record)) (tconfig (tests:testqueue-get-testconfig test-record)) + (jobgroup (config-lookup tconfig "requirements" "jobgroup")) (testmode (let ((m (config-lookup tconfig "requirements" "mode"))) (if m (string->symbol m) 'normal))) (waitons (tests:testqueue-get-waitons test-record)) (priority (tests:testqueue-get-priority test-record)) (itemdat (tests:testqueue-get-itemdat test-record)) ;; itemdat can be a string, list or #f (items (tests:testqueue-get-items test-record)) (item-path (item-list->path itemdat)) + (tfullname (runs:make-full-test-name test-name item-path)) (newtal (append tal (list hed))) (regfull (> (length reg) reglen))) + + ;; Fast skip of tests that are already "COMPLETED" + (if (equal? (hash-table-ref/default test-registry tfullname #f) 'COMPLETED) + (begin + (debug:print-info 0 "Skipping COMPLETED test " tfullname) + (if (not (null? tal)) + (loop (car tal)(cdr tal) reg reruns)))) ;; (if (> (length reg) 10) ;; (begin ;; (set! tal (cons hed tal)) ;; (set! hed (car reg)) ;; (set! reg (cdr reg)) @@ -61,11 +79,12 @@ (cond ;; OUTER COND ((not items) ;; when false the test is ok to be handed off to launch (but not before) (if (and (not (tests:match test-patts (tests:testqueue-get-testname test-record) item-path required: required-tests)) (not (null? tal))) (loop (car tal)(cdr tal) reg reruns)) - (let* ((run-limits-info (runs:can-run-more-tests test-record max-concurrent-jobs)) ;; look at the test jobgroup and tot jobs running + (let* ((run-limits-info (cdb:remote-run runs:can-run-more-tests #f jobgroup max-concurrent-jobs)) ;; look at the test jobgroup and tot jobs running + ;; (open-run-close runs:can-run-more-tests #f jobgroup max-concurrent-jobs)) ;; look at the test jobgroup and tot jobs running (have-resources (car run-limits-info)) (num-running (list-ref run-limits-info 1)) (num-running-in-jobgroup (list-ref run-limits-info 2)) (max-concurrent-jobs (list-ref run-limits-info 3)) (job-group-limit (list-ref run-limits-info 4)) @@ -116,11 +135,11 @@ (mutex-lock! registry-mutex) (hash-table-set! test-registry (runs:make-full-test-name test-name item-path) 'done) (mutex-unlock! registry-mutex)) (conc test-name "/" item-path)))) (thread-start! th)) - (runs:shrink-can-run-more-tests-count) ;; DELAY TWEAKER (still needed?) + (cdb:remote-run runs:shrink-can-run-more-tests-count #f) ;; DELAY TWEAKER (still needed?) (if (and (null? tal)(null? reg)) (loop hed tal reg reruns) (loop (runs:queue-next-hed tal reg reglen regfull) (runs:queue-next-tal tal reg reglen regfull) (let ((newl (append reg (list hed)))) @@ -149,11 +168,11 @@ (or (null? prereqs-not-met) (and (eq? testmode 'toplevel) (null? non-completed)))) (run:test run-id run-info keyvals runname test-record flags #f) (hash-table-set! test-registry (runs:make-full-test-name test-name item-path) 'running) - (runs:shrink-can-run-more-tests-count) ;; DELAY TWEAKER (still needed?) + (cdb:remote-run runs:shrink-can-run-more-tests-count #f) ;; DELAY TWEAKER (still needed?) ;; (thread-sleep! *global-delta*) (if (not (null? tal)) (loop (runs:queue-next-hed tal reg reglen regfull) (runs:queue-next-tal tal reg reglen regfull) (runs:queue-next-reg tal reg reglen regfull) @@ -173,20 +192,20 @@ (if (not (null? tal)) (if (vector? hed) (begin (debug:print 1 "WARN: Dropping test " (db:test-get-testname hed) "/" (db:test-get-item-path hed) " from the launch list as it has prerequistes that are FAIL") - (runs:shrink-can-run-more-tests-count) ;; DELAY TWEAKER (still needed?) + (cdb:remote-run runs:shrink-can-run-more-tests-count #f) ;; DELAY TWEAKER (still needed?) ;; (thread-sleep! *global-delta*) (hash-table-set! test-registry (runs:make-full-test-name test-name item-path) 'removed) (loop (runs:queue-next-hed tal reg reglen regfull) (runs:queue-next-tal tal reg reglen regfull) (runs:queue-next-reg tal reg reglen regfull) (cons hed reruns))) (begin (debug:print 1 "WARN: Test not processed correctly. Could be a race condition in your test implementation? " hed) ;; " as it has prerequistes that are FAIL. (NOTE: hed is not a vector)") - (runs:shrink-can-run-more-tests-count) ;; DELAY TWEAKER (still needed?) + (cdb:remote-run runs:shrink-can-run-more-tests-count #f) ;; DELAY TWEAKER (still needed?) ;; (thread-sleep! (+ 0.01 *global-delta*)) (loop hed tal reg reruns))))))))) ;; END OF INNER COND ;; case where an items came in as a list been processed ((and (list? items) ;; thus we know our items are already calculated @@ -219,11 +238,11 @@ reruns)))) ;; if items is a proc then need to run items:get-items-from-config, get the list and loop ;; - but only do that if resources exist to kick off the job ((or (procedure? items)(eq? items 'have-procedure)) - (let ((can-run-more (runs:can-run-more-tests test-record max-concurrent-jobs))) + (let ((can-run-more (cdb:remote-run runs:can-run-more-tests #f jobgroup max-concurrent-jobs))) (if (and (list? can-run-more) (car can-run-more)) (let* ((prereqs-not-met (db:get-prereqs-not-met run-id waitons item-path mode: testmode)) (fails (runs:calc-fails prereqs-not-met)) (non-completed (runs:calc-not-completed prereqs-not-met))) Index: runs.scm ================================================================== --- runs.scm +++ runs.scm @@ -163,26 +163,25 @@ (for-each (lambda (item) (debug:print 2 "setenv " (car item) " " (cadr item)) (setenv (car item) (cadr item))) itemdat)) -(define *last-num-running-tests* 0) - ;; Every time can-run-more-tests is called increment the delay -;; if the cou +;; +;; NOTE: We run this server-side!! Do not use this global except in the runs:can-run-more-tests routine +;; +(define *last-num-running-tests* 0) (define *runs:can-run-more-tests-count* 0) -(define (runs:shrink-can-run-more-tests-count) +(define (runs:shrink-can-run-more-tests-count db) ;; the db is just so we can use cdb:remote-run (set! *runs:can-run-more-tests-count* 0)) ;; (/ *runs:can-run-more-tests-count* 2))) -(define (runs:can-run-more-tests test-record max-concurrent-jobs) +(define (runs:can-run-more-tests db jobgroup max-concurrent-jobs) (thread-sleep! (cond ((> *runs:can-run-more-tests-count* 20) 2);; obviously haven't had any work to do for a while (else 0))) - (let* ((tconfig (tests:testqueue-get-testconfig test-record)) - (jobgroup (config-lookup tconfig "requirements" "jobgroup")) - (num-running (cdb:remote-run db:get-count-tests-running #f)) - (num-running-in-jobgroup (cdb:remote-run db:get-count-tests-running-in-jobgroup #f jobgroup)) + (let* ((num-running (db:get-count-tests-running db)) + (num-running-in-jobgroup (db:get-count-tests-running-in-jobgroup db jobgroup)) (job-group-limit (config-lookup *configdat* "jobgroups" jobgroup))) (if (> (+ num-running num-running-in-jobgroup) 0) (set! *runs:can-run-more-tests-count* (+ *runs:can-run-more-tests-count* 1))) (if (not (eq? *last-num-running-tests* num-running)) (begin @@ -614,11 +613,11 @@ #f)) (test-state (db:test-get-state new-test-dat)) (test-fulln (db:test-get-fullname new-test-dat))) (case action ((remove-runs) - (debug:print-info 0 "test: " test-name " item path: " item-path " test-state: " test-state) + (debug:print-info 0 "test: " test-name " itest-state: " test-state) (if (member test-state (list "RUNNING" "LAUNCHED" "REMOTEHOSTSTART" "KILLREQ")) (begin (if (not (hash-table-ref/default test-retry-time test-fulln #f)) (hash-table-set! test-retry-time test-fulln (current-seconds))) (if (> (- (current-seconds)(hash-table-ref test-retry-time test-fulln)) allow-run-time)