Overview
Comment: | Added filter to skip COMPLETED tests on startup. Move code to query db for checking if ok to run more tests to server side |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | dev |
Files: | files | file ages | folders |
SHA1: |
290d5d298fff3ca3a29016a0df86f0bd |
User & Date: | mrwellan on 2013-06-07 22:03:08 |
Other Links: | branch diff | manifest | tags |
Context
2013-06-18
| ||
00:45 | Tweaks for debugging stuck launch queue issues check-in: 9243ce728a user: matt tags: dev | |
2013-06-07
| ||
22:07 | Merged development into v1.5501 as it diverges more than a patch release should diverge. check-in: fd2b134daa user: mrwellan tags: v1.55 | |
22:03 | Added filter to skip COMPLETED tests on startup. Move code to query db for checking if ok to run more tests to server side check-in: 290d5d298f user: mrwellan tags: dev | |
13:55 | This version successfully ran stdcell tests (200 parallel jobs, 6000 total jobs) but it has problems with queue management check-in: a5d4098b27 user: mrwellan tags: dev | |
Changes
Modified megatest-version.scm from [19b1c1747c] to [252b48ae3a].
1 2 3 4 5 | 1 2 3 4 5 6 7 | - + | ;; Always use two digit decimal ;; 1.01, 1.02...1.10,1.11 ... 1.99,2.00.. (declare (unit megatest-version)) |
Modified run-tests-queue-classic.scm from [4c9cbe32ec] to [78c0dba7f7].
︙ | |||
20 21 22 23 24 25 26 27 28 29 30 31 32 33 | 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 | + | (tal (cdr sorted-test-names)) (reruns '())) (if (not (null? reruns))(debug:print-info 4 "reruns=" reruns)) ;; (print "Top of loop, hed=" hed ", tal=" tal " ,reruns=" reruns) (let* ((test-record (hash-table-ref test-records hed)) (test-name (tests:testqueue-get-testname test-record)) (tconfig (tests:testqueue-get-testconfig test-record)) (jobgroup (config-lookup tconfig "requirements" "jobgroup")) (testmode (let ((m (config-lookup tconfig "requirements" "mode"))) (if m (string->symbol m) 'normal))) (waitons (tests:testqueue-get-waitons test-record)) (priority (tests:testqueue-get-priority test-record)) (itemdat (tests:testqueue-get-itemdat test-record)) ;; itemdat can be a string, list or #f (items (tests:testqueue-get-items test-record)) (item-path (item-list->path itemdat)) |
︙ | |||
52 53 54 55 56 57 58 | 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 | + - + | (set! waiton (filter (lambda (x)(not (equal? x hed))) waitons)))) (cond ;; OUTER COND ((not items) ;; when false the test is ok to be handed off to launch (but not before) (if (and (not (tests:match test-patts (tests:testqueue-get-testname test-record) item-path required: required-tests)) (not (null? tal))) (loop (car newtal)(cdr newtal) reruns)) (let* ((run-limits-info ;; (cdb:remote-run runs:can-run-more-tests #f jobgroup max-concurrent-jobs)) ;; look at the test jobgroup and tot jobs running |
︙ | |||
189 190 191 192 193 194 195 | 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 | - + + | (debug:print-info 4 "End of items list, looping with next after short delay") ;; (thread-sleep! (+ 0.01 *global-delta*)) (loop (car tal)(cdr tal) reruns)))) ;; if items is a proc then need to run items:get-items-from-config, get the list and loop ;; - but only do that if resources exist to kick off the job ((or (procedure? items)(eq? items 'have-procedure)) |
︙ |
Modified run-tests-queue-new.scm from [bdb0e5926d] to [b23b3c860e].
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 | + + + + + + + + + + + + + + + + + + | ;; test-records is a hash table testname:item_path => vector < testname testconfig waitons priority items-info ... > (define (runs:run-tests-queue-new run-id runname test-records keyvals flags test-patts required-tests reglen) ;; At this point the list of parent tests is expanded ;; NB// Should expand items here and then insert into the run queue. (debug:print 5 "test-records: " test-records ", flags: " (hash-table->alist flags)) (let ((run-info (cdb:remote-run db:get-run-info #f run-id)) (tests-info (cdb:remote-run db:get-tests-for-run #f run-id #f '() '())) ;; qryvals: "id,testname,item_path")) (sorted-test-names (tests:sort-by-priority-and-waiton test-records)) (test-registry (make-hash-table)) (registry-mutex (make-mutex)) (num-retries 0) (max-retries (config-lookup *configdat* "setup" "maxretries")) (max-concurrent-jobs (let ((mcj (config-lookup *configdat* "setup" "max_concurrent_jobs"))) (if (and mcj (string->number mcj)) (string->number mcj) 1)))) ;; length of the register queue ahead ;; Initialize the test-registery hash with tests that already have a record (for-each (lambda (trec) (let ((id (db:test-get-id trec)) (tn (db:test-get-testname trec)) (ip (db:test-get-item-path trec)) (st (db:test-get-state trec))) (hash-table-set! test-registry (runs:make-full-test-name tn ip) (string->symbol st)))) tests-info) (set! max-retries (if (and max-retries (string->number max-retries))(string->number max-retries) 100)) (if (not (null? sorted-test-names)) (let loop ((hed (car sorted-test-names)) (tal (cdr sorted-test-names)) (reg '()) ;; registered, put these at the head of tal (reruns '())) (if (not (null? reruns))(debug:print-info 4 "reruns=" reruns)) ;; (print "Top of loop, hed=" hed ", tal=" tal " ,reruns=" reruns) (let* ((test-record (hash-table-ref test-records hed)) (test-name (tests:testqueue-get-testname test-record)) (tconfig (tests:testqueue-get-testconfig test-record)) (jobgroup (config-lookup tconfig "requirements" "jobgroup")) (testmode (let ((m (config-lookup tconfig "requirements" "mode"))) (if m (string->symbol m) 'normal))) (waitons (tests:testqueue-get-waitons test-record)) (priority (tests:testqueue-get-priority test-record)) (itemdat (tests:testqueue-get-itemdat test-record)) ;; itemdat can be a string, list or #f (items (tests:testqueue-get-items test-record)) (item-path (item-list->path itemdat)) (tfullname (runs:make-full-test-name test-name item-path)) (newtal (append tal (list hed))) (regfull (> (length reg) reglen))) ;; Fast skip of tests that are already "COMPLETED" (if (equal? (hash-table-ref/default test-registry tfullname #f) 'COMPLETED) (begin (debug:print-info 0 "Skipping COMPLETED test " tfullname) (if (not (null? tal)) (loop (car tal)(cdr tal) reg reruns)))) ;; (if (> (length reg) 10) ;; (begin ;; (set! tal (cons hed tal)) ;; (set! hed (car reg)) ;; (set! reg (cdr reg)) ;; (set! newtal tal))) (debug:print 6 |
︙ | |||
59 60 61 62 63 64 65 | 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 | + - + | (set! waiton (filter (lambda (x)(not (equal? x hed))) waitons)))) (cond ;; OUTER COND ((not items) ;; when false the test is ok to be handed off to launch (but not before) (if (and (not (tests:match test-patts (tests:testqueue-get-testname test-record) item-path required: required-tests)) (not (null? tal))) (loop (car tal)(cdr tal) reg reruns)) (let* ((run-limits-info (cdb:remote-run runs:can-run-more-tests #f jobgroup max-concurrent-jobs)) ;; look at the test jobgroup and tot jobs running |
︙ | |||
114 115 116 117 118 119 120 | 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 | - + | (cdb:tests-register-test *runremote* run-id test-name "")) (cdb:tests-register-test *runremote* run-id test-name item-path) (mutex-lock! registry-mutex) (hash-table-set! test-registry (runs:make-full-test-name test-name item-path) 'done) (mutex-unlock! registry-mutex)) (conc test-name "/" item-path)))) (thread-start! th)) |
︙ | |||
147 148 149 150 151 152 153 | 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 | - + | (loop (car newtal)(cdr newtal) reg reruns)) ((and have-resources (or (null? prereqs-not-met) (and (eq? testmode 'toplevel) (null? non-completed)))) (run:test run-id run-info keyvals runname test-record flags #f) (hash-table-set! test-registry (runs:make-full-test-name test-name item-path) 'running) |
︙ | |||
171 172 173 174 175 176 177 | 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 | - + - + | (loop (car newtal)(cdr newtal) reg reruns)) ;; the waiton is FAIL so no point in trying to run hed ever again (if (not (null? tal)) (if (vector? hed) (begin (debug:print 1 "WARN: Dropping test " (db:test-get-testname hed) "/" (db:test-get-item-path hed) " from the launch list as it has prerequistes that are FAIL") |
︙ | |||
217 218 219 220 221 222 223 | 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 | - + | (runs:queue-next-tal tal reg reglen regfull) (runs:queue-next-reg tal reg reglen regfull) reruns)))) ;; if items is a proc then need to run items:get-items-from-config, get the list and loop ;; - but only do that if resources exist to kick off the job ((or (procedure? items)(eq? items 'have-procedure)) |
︙ |
Modified runs.scm from [513fa82301] to [d409eb1269].
︙ | |||
161 162 163 164 165 166 167 | 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 | - - - + + + + - + - + - - - - + + | (define (set-item-env-vars itemdat) (for-each (lambda (item) (debug:print 2 "setenv " (car item) " " (cadr item)) (setenv (car item) (cadr item))) itemdat)) |
︙ | |||
612 613 614 615 616 617 618 | 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 | - + | (real-dir (if (file-exists? run-dir) (resolve-pathname run-dir) #f)) (test-state (db:test-get-state new-test-dat)) (test-fulln (db:test-get-fullname new-test-dat))) (case action ((remove-runs) |
︙ |