Megatest: Diff

Differences From Artifact [65246f6bc9]:

File runs.scm — part of check-in [f52cd44a6e] at 2016-06-21 09:57:02 on branch redir-logs — Switch to *default-log-port* (user: mrwellan, size: 94641) [annotate] [blame] [check-ins using]

To Artifact [bb846f0eff]:

File runs.scm — part of check-in [c5596c2e5f] at 2016-06-21 10:22:28 on branch redir-logs — Redid redirecting of logs with -log (user: mrwellan, size: 95633) [annotate] [blame] [check-ins using]

︙			︙
126 127 128 129 130 131 132 ~~133~~ 134 135 136 137 138 139 140	#t) #f))) (define (runs:can-run-more-tests run-id jobgroup max-concurrent-jobs) (thread-sleep! (cond ((> runs:can-run-more-tests-count 20) (if (runs:lownoise "waiting on tasks" 60) ~~(debug:print-info 2 #f "waiting for tasks to complete, sleeping briefly ..."))~~ 2);; obviously haven't had any work to do for a while (else 0))) (let* ((num-running (rmt:get-count-tests-running run-id)) (num-running-in-jobgroup (rmt:get-count-tests-running-in-jobgroup run-id jobgroup)) (job-group-limit (let ((jobg-count (config-lookup configdat "jobgroups" jobgroup))) (if (string? jobg-count) (string->number jobg-count)	\|	126 127 128 129 130 131 132 133 134 135 136 137 138 139 140	#t) #f))) (define (runs:can-run-more-tests run-id jobgroup max-concurrent-jobs) (thread-sleep! (cond ((> runs:can-run-more-tests-count 20) (if (runs:lownoise "waiting on tasks" 60) (debug:print-info 2 default-log-port "waiting for tasks to complete, sleeping briefly ...")) 2);; obviously haven't had any work to do for a while (else 0))) (let* ((num-running (rmt:get-count-tests-running run-id)) (num-running-in-jobgroup (rmt:get-count-tests-running-in-jobgroup run-id jobgroup)) (job-group-limit (let ((jobg-count (config-lookup configdat "jobgroups" jobgroup))) (if (string? jobg-count) (string->number jobg-count)
︙			︙
258 259 260 261 262 263 264 ~~265 266 267 268~~ 269 270 271 272 273 274 275	(set! required-tests (lset-intersection equal? (string-split test-patts ",") all-test-names)) ;; (set! required-tests (lset-intersection equal? test-names all-test-names)) ;; look up all tests matching the comma separated list of globs in ;; test-patts (using % as wildcard) ;; (set! test-names (delete-duplicates (tests:get-valid-tests toppath test-patts))) (debug:print-info 0 #f "tests search path: " (string-intersperse (tests:get-tests-search-path configdat) " ")) (debug:print-info 0 #f "all tests: " (string-intersperse (sort all-test-names string<) " ")) (debug:print-info 0 #f "test names: " (string-intersperse (sort test-names string<) " ")) (debug:print-info 0 #f "required tests: " (string-intersperse (sort required-tests string<) " ")) ;; on the first pass or call to run-tests set FAILS to NOT_STARTED if ;; -keepgoing is specified (if (eq? passnum 0) (begin ;; Is this still necessary? I think not. Unreachable tests are marked as such and ;; should not cause problems here.	\| \| \| \|	258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275	(set! required-tests (lset-intersection equal? (string-split test-patts ",") all-test-names)) ;; (set! required-tests (lset-intersection equal? test-names all-test-names)) ;; look up all tests matching the comma separated list of globs in ;; test-patts (using % as wildcard) ;; (set! test-names (delete-duplicates (tests:get-valid-tests toppath test-patts))) (debug:print-info 0 default-log-port "tests search path: " (string-intersperse (tests:get-tests-search-path configdat) " ")) (debug:print-info 0 default-log-port "all tests: " (string-intersperse (sort all-test-names string<) " ")) (debug:print-info 0 default-log-port "test names: " (string-intersperse (sort test-names string<) " ")) (debug:print-info 0 default-log-port "required tests: " (string-intersperse (sort required-tests string<) " ")) ;; on the first pass or call to run-tests set FAILS to NOT_STARTED if ;; -keepgoing is specified (if (eq? passnum 0) (begin ;; Is this still necessary? I think not. Unreachable tests are marked as such and ;; should not cause problems here.
︙			︙
300 301 302 303 304 305 306 ~~307~~ 308 309 310 311 312 313 314	(if (not (null? test-names)) (let loop ((hed (car test-names)) ;; NOTE: This is the main loop that iterates over the test-names (tal (cdr test-names))) ;; 'return-procs tells the config reader to prep running system but return a proc (change-directory toppath) ;; PLEASE OPTIMIZE ME!!! I think this should be a no-op but there are several places where change-directories could be happening. (setenv "MT_TEST_NAME" hed) ;; (let-values (((waitons waitors config)(tests:get-waitons hed all-tests-registry))) ~~(debug:print-info 8 #f "waitons: " waitons)~~ ;; check for hed in waitons => this would be circular, remove it and issue an ;; error (if (or (member hed waitons) (member hed waitors)) (begin (debug:print 0 default-log-port* "ERROR: test " hed " has listed itself as a waiton or waitor, please correct this!") (set! waitons (filter (lambda (x)(not (equal? x hed))) waitons))	\|	300 301 302 303 304 305 306 307 308 309 310 311 312 313 314	(if (not (null? test-names)) (let loop ((hed (car test-names)) ;; NOTE: This is the main loop that iterates over the test-names (tal (cdr test-names))) ;; 'return-procs tells the config reader to prep running system but return a proc (change-directory toppath) ;; PLEASE OPTIMIZE ME!!! I think this should be a no-op but there are several places where change-directories could be happening. (setenv "MT_TEST_NAME" hed) ;; (let-values (((waitons waitors config)(tests:get-waitons hed all-tests-registry))) (debug:print-info 8 default-log-port* "waitons: " waitons) ;; check for hed in waitons => this would be circular, remove it and issue an ;; error (if (or (member hed waitons) (member hed waitors)) (begin (debug:print 0 default-log-port "ERROR: test " hed " has listed itself as a waiton or waitor, please correct this!") (set! waitons (filter (lambda (x)(not (equal? x hed))) waitons))
︙			︙
332 333 334 335 336 337 338 ~~339~~ 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 ~~356~~ 357 358 359 ~~360~~ 361 362 363 ~~364~~ 365 366 367 368 369 370 371 372 373 374 375 376 ~~377~~ 378 379 380 ~~381~~ 382 ~~383~~ 384 385 386 387 388 389 390	(let* ((waiton-record (hash-table-ref/default test-records waiton #f)) (waiton-tconfig (if waiton-record (vector-ref waiton-record 1) #f)) (waiton-itemized (and waiton-tconfig (or (hash-table-ref/default waiton-tconfig "items" #f) (hash-table-ref/default waiton-tconfig "itemstable" #f)))) (itemmaps (tests:get-itemmaps config)) ;; (configf:lookup config "requirements" "itemmap")) (new-test-patts (tests:extend-test-patts test-patts hed waiton itemmaps))) ~~(debug:print-info 0 #f "Test " waiton " has " (if waiton-record "a" "no") " waiton-record and" (if waiton-itemized " " " no ") "items")~~ ;; need to account for test-patt here, if I am test "a", selected with a test-patt of "hed/b%" ;; and we are waiting on "waiton" we need to add "waiton/,waiton/b%" to test-patt ;; is this satisfied by merely appending "/" to the waiton name added to the list? ;; ;; This approach causes all of the items in an upstream test to be run ;; if we have this waiton already processed once we can analzye it for extending ;; tests to be run, since we can't properly process waitons unless they have been ;; initially added we add them again to be processed on second round AND add the hed ;; back in to also be processed on second round ;; (if waiton-tconfig (begin (set! test-names (cons waiton test-names)) ;; need to process this one, only add once the waiton tconfig read (if waiton-itemized (begin ~~(debug:print-info 0 #f "New test patts: " new-test-patts ", prev test patts: " test-patts)~~ (set! required-tests (cons (conc waiton "/") required-tests)) (set! test-patts new-test-patts)) (begin ~~(debug:print-info 0 #f "Adding non-itemized test " waiton " to required-tests")~~ (set! required-tests (cons waiton required-tests)) (set! test-patts new-test-patts)))) (begin ~~(debug:print-info 0 #f "No testconfig info yet for " waiton ", setting up to re-process it")~~ (set! tal (append (cons waiton tal)(list hed))))) ;; (cons (conc waiton "/") required-tests)) ;; NOPE: didn't work. required needs to be plain test names. Try tacking on to test-patts ;; - doesn't work ;; (set! test-patts (conc test-patts "," waiton "/")) ;; (set! test-names (cons waiton test-names))))) ;; was an append, now a cons ))) (delete-duplicates (append waitons waitors))) (let ((remtests (delete-duplicates (append waitons tal)))) (if (not (null? remtests)) (begin ~~;; (debug:print-info 0 #f "Preprocessing continues for " (string-intersperse remtests ", "))~~ (loop (car remtests)(cdr remtests)))))))) (if (not (null? required-tests)) ~~(debug:print-info 1 #f "Adding \"" (string-intersperse required-tests " ") "\" to the run queue"))~~ ;; NOTE: these are all parent tests, items are not expanded yet. ~~(debug:print-info 4 #f "test-records=" (hash-table->alist test-records))~~ (let ((reglen (configf:lookup configdat "setup" "runqueue"))) (if (> (length (hash-table-keys test-records)) 0) (let* ((keep-going #t) (run-queue-retries 5) (th1 (make-thread (lambda () (runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests (any->number reglen) all-tests-registry)) ;; (handle-exceptions	\| \| \| \| \| \| \|	332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390	(let* ((waiton-record (hash-table-ref/default test-records waiton #f)) (waiton-tconfig (if waiton-record (vector-ref waiton-record 1) #f)) (waiton-itemized (and waiton-tconfig (or (hash-table-ref/default waiton-tconfig "items" #f) (hash-table-ref/default waiton-tconfig "itemstable" #f)))) (itemmaps (tests:get-itemmaps config)) ;; (configf:lookup config "requirements" "itemmap")) (new-test-patts (tests:extend-test-patts test-patts hed waiton itemmaps))) (debug:print-info 0 default-log-port "Test " waiton " has " (if waiton-record "a" "no") " waiton-record and" (if waiton-itemized " " " no ") "items") ;; need to account for test-patt here, if I am test "a", selected with a test-patt of "hed/b%" ;; and we are waiting on "waiton" we need to add "waiton/,waiton/b%" to test-patt ;; is this satisfied by merely appending "/" to the waiton name added to the list? ;; ;; This approach causes all of the items in an upstream test to be run ;; if we have this waiton already processed once we can analzye it for extending ;; tests to be run, since we can't properly process waitons unless they have been ;; initially added we add them again to be processed on second round AND add the hed ;; back in to also be processed on second round ;; (if waiton-tconfig (begin (set! test-names (cons waiton test-names)) ;; need to process this one, only add once the waiton tconfig read (if waiton-itemized (begin (debug:print-info 0 default-log-port "New test patts: " new-test-patts ", prev test patts: " test-patts) (set! required-tests (cons (conc waiton "/") required-tests)) (set! test-patts new-test-patts)) (begin (debug:print-info 0 default-log-port "Adding non-itemized test " waiton " to required-tests") (set! required-tests (cons waiton required-tests)) (set! test-patts new-test-patts)))) (begin (debug:print-info 0 default-log-port "No testconfig info yet for " waiton ", setting up to re-process it") (set! tal (append (cons waiton tal)(list hed))))) ;; (cons (conc waiton "/") required-tests)) ;; NOPE: didn't work. required needs to be plain test names. Try tacking on to test-patts ;; - doesn't work ;; (set! test-patts (conc test-patts "," waiton "/")) ;; (set! test-names (cons waiton test-names))))) ;; was an append, now a cons ))) (delete-duplicates (append waitons waitors))) (let ((remtests (delete-duplicates (append waitons tal)))) (if (not (null? remtests)) (begin ;; (debug:print-info 0 default-log-port "Preprocessing continues for " (string-intersperse remtests ", ")) (loop (car remtests)(cdr remtests)))))))) (if (not (null? required-tests)) (debug:print-info 1 default-log-port "Adding \"" (string-intersperse required-tests " ") "\" to the run queue")) ;; NOTE: these are all parent tests, items are not expanded yet. (debug:print-info 4 default-log-port "test-records=" (hash-table->alist test-records)) (let ((reglen (configf:lookup configdat "setup" "runqueue"))) (if (> (length (hash-table-keys test-records)) 0) (let* ((keep-going #t) (run-queue-retries 5) (th1 (make-thread (lambda () (runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests (any->number reglen) all-tests-registry)) ;; (handle-exceptions
︙			︙
419 420 421 422 423 424 425 ~~426 427~~ 428 429 430 431 432 433 434	(begin (if (not (hash-table-ref/default flags "-preclean" #f)) (hash-table-set! flags "-preclean" #t)) (if (not (hash-table-ref/default flags "-rerun" #f)) (hash-table-set! flags "-rerun" "STUCK/DEAD,n/a,ZERO_ITEMS")) ;; recursive call to self (runs:run-tests target runname test-patts user flags run-count: (- run-count 1))))) ~~(debug:print-info 0 #f "No tests to run"))) (debug:print-info 4 #f "All done by here")~~ (rmt:tasks-set-state-given-param-key task-key "done") ;; (sqlite3:finalize! tasks-db) )) ;; loop logic. These are used in runs:run-tests-queue to make it a bit more readable. ;;	\| \|	419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434	(begin (if (not (hash-table-ref/default flags "-preclean" #f)) (hash-table-set! flags "-preclean" #t)) (if (not (hash-table-ref/default flags "-rerun" #f)) (hash-table-set! flags "-rerun" "STUCK/DEAD,n/a,ZERO_ITEMS")) ;; recursive call to self (runs:run-tests target runname test-patts user flags run-count: (- run-count 1))))) (debug:print-info 0 default-log-port "No tests to run"))) (debug:print-info 4 default-log-port "All done by here") (rmt:tasks-set-state-given-param-key task-key "done") ;; (sqlite3:finalize! tasks-db) )) ;; loop logic. These are used in runs:run-tests-queue to make it a bit more readable. ;;
︙			︙
482 483 484 485 486 487 488 ~~489~~ 490 491 492 493 494 495 496	" res=" res " run-id=" run-id " waitons=" waitons " hed=" hed " item-path=" item-path " testmode=" testmode " itemmaps=" itemmaps) '())))) ;; (prereqs-not-met (mt:lazy-get-prereqs-not-met run-id waitons item-path mode: testmode itemmap: itemmap)) (fails (runs:calc-fails prereqs-not-met)) (prereq-fails (runs:calc-prereq-fail prereqs-not-met)) (non-completed (runs:calc-not-completed prereqs-not-met)) (runnables (runs:calc-runnable prereqs-not-met))) ~~(debug:print-info 4 #f "START OF INNER COND #2 "~~ "\n can-run-more: " can-run-more "\n testname: " hed "\n prereqs-not-met: " (runs:pretty-string prereqs-not-met) "\n non-completed: " (runs:pretty-string non-completed) "\n prereq-fails: " (runs:pretty-string prereq-fails) "\n fails: " (runs:pretty-string fails) "\n testmode: " testmode	\|	482 483 484 485 486 487 488 489 490 491 492 493 494 495 496	" res=" res " run-id=" run-id " waitons=" waitons " hed=" hed " item-path=" item-path " testmode=" testmode " itemmaps=" itemmaps) '())))) ;; (prereqs-not-met (mt:lazy-get-prereqs-not-met run-id waitons item-path mode: testmode itemmap: itemmap)) (fails (runs:calc-fails prereqs-not-met)) (prereq-fails (runs:calc-prereq-fail prereqs-not-met)) (non-completed (runs:calc-not-completed prereqs-not-met)) (runnables (runs:calc-runnable prereqs-not-met))) (debug:print-info 4 default-log-port "START OF INNER COND #2 " "\n can-run-more: " can-run-more "\n testname: " hed "\n prereqs-not-met: " (runs:pretty-string prereqs-not-met) "\n non-completed: " (runs:pretty-string non-completed) "\n prereq-fails: " (runs:pretty-string prereq-fails) "\n fails: " (runs:pretty-string fails) "\n testmode: " testmode
︙			︙
511 512 513 514 515 516 517 ~~518~~ 519 520 521 522 523 524 525 ~~526~~ 527 528 529 530 531 532 533 534 535 536 537 538 539 540 ~~541~~ 542 543 544 545 546 547 548	(cond ;; all prereqs met, fire off the test ;; or, if it is a 'toplevel test and all prereqs not met are COMPLETED then launch ((and (not (member 'toplevel testmode)) (member (hash-table-ref/default test-registry (db:test-make-full-name hed item-path) 'n/a) '(DONOTRUN removed CANNOTRUN))) ;; common:cant-run-states-sym) ;; '(COMPLETED KILLED WAIVED UNKNOWN INCOMPLETE)) ;; try to catch repeat processing of COMPLETED tests here ~~(debug:print-info 1 #f "Test " hed " set to \"" (hash-table-ref test-registry (db:test-make-full-name hed item-path)) "\". Removing it from the queue")~~ (if (or (not (null? tal)) (not (null? reg))) (list (runs:queue-next-hed tal reg reglen regfull) (runs:queue-next-tal tal reg reglen regfull) (runs:queue-next-reg tal reg reglen regfull) reruns) (begin ~~(debug:print-info 0 #f "Nothing left in the queue!")~~ ;; If get here twice then we know we've tried to expand all items ;; since there must be a logic issue with the handling of loops in the ;; items expand phase we will brute force an exit here. (if (> runs:nothing-left-in-queue-count 2) (begin (debug:print 0 default-log-port "WARNING: this condition is triggered when there were no items to expand and nothing to run. Please check your run for completeness") (exit 0)) (set! runs:nothing-left-in-queue-count (+ runs:nothing-left-in-queue-count 1))) #f))) ;; ((or (null? prereqs-not-met) (and (member 'toplevel testmode) (null? non-completed))) ~~(debug:print-info 4 #f "runs:expand-items: (or (null? prereqs-not-met) (and (member 'toplevel testmode)(null? non-completed)))")~~ (let ((test-name (tests:testqueue-get-testname test-record))) (setenv "MT_TEST_NAME" test-name) ;; (setenv "MT_RUNNAME" runname) (runs:set-megatest-env-vars run-id inrunname: runname) ;; these may be needed by the launching process (let ((items-list (items:get-items-from-config tconfig))) (if (list? items-list) (begin	\| \| \|	511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548	(cond ;; all prereqs met, fire off the test ;; or, if it is a 'toplevel test and all prereqs not met are COMPLETED then launch ((and (not (member 'toplevel testmode)) (member (hash-table-ref/default test-registry (db:test-make-full-name hed item-path) 'n/a) '(DONOTRUN removed CANNOTRUN))) ;; common:cant-run-states-sym) ;; '(COMPLETED KILLED WAIVED UNKNOWN INCOMPLETE)) ;; try to catch repeat processing of COMPLETED tests here (debug:print-info 1 default-log-port "Test " hed " set to \"" (hash-table-ref test-registry (db:test-make-full-name hed item-path)) "\". Removing it from the queue") (if (or (not (null? tal)) (not (null? reg))) (list (runs:queue-next-hed tal reg reglen regfull) (runs:queue-next-tal tal reg reglen regfull) (runs:queue-next-reg tal reg reglen regfull) reruns) (begin (debug:print-info 0 default-log-port "Nothing left in the queue!") ;; If get here twice then we know we've tried to expand all items ;; since there must be a logic issue with the handling of loops in the ;; items expand phase we will brute force an exit here. (if (> runs:nothing-left-in-queue-count 2) (begin (debug:print 0 default-log-port "WARNING: this condition is triggered when there were no items to expand and nothing to run. Please check your run for completeness") (exit 0)) (set! runs:nothing-left-in-queue-count (+ runs:nothing-left-in-queue-count 1))) #f))) ;; ((or (null? prereqs-not-met) (and (member 'toplevel testmode) (null? non-completed))) (debug:print-info 4 default-log-port "runs:expand-items: (or (null? prereqs-not-met) (and (member 'toplevel testmode)(null? non-completed)))") (let ((test-name (tests:testqueue-get-testname test-record))) (setenv "MT_TEST_NAME" test-name) ;; (setenv "MT_RUNNAME" runname) (runs:set-megatest-env-vars run-id inrunname: runname) ;; these may be needed by the launching process (let ((items-list (items:get-items-from-config tconfig))) (if (list? items-list) (begin
︙			︙
602 603 604 605 606 607 608 ~~609~~ 610 611 612 613 614 615 ~~616~~ 617 618 619 620 621 622 623 624 625 626 627 ~~628~~ 629 630 631 632 633 634 635	((and (null? fails) (null? prereq-fails) (null? non-completed)) (if (runs:can-keep-running? hed 20) (begin (runs:inc-cant-run-tests hed) ~~(debug:print-info 1 #f "no fails in prerequisites for " hed " but also none running, keeping " hed " for now. Try count: " (hash-table-ref/default seen-cant-run-tests hed 0))~~ ;; getting here likely means the system is way overloaded, kill a full minute before continuing (thread-sleep! 60) ;; num-retries code was here ;; we use this opportunity to move contents of reg to tal (list (car newtal)(append (cdr newtal) reg) '() reruns)) ;; an issue with prereqs not yet met? (begin ~~(debug:print-info 1 #f "no fails in prerequisites for " hed " but nothing seen running in a while, dropping test " hed " from the run queue")~~ (let ((test-id (rmt:get-test-id run-id hed ""))) (if test-id (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "TIMED_OUT" "Nothing seen running in a while."))) (list (runs:queue-next-hed tal reg reglen regfull) (runs:queue-next-tal tal reg reglen regfull) (runs:queue-next-reg tal reg reglen regfull) reruns)))) ((and (or (not (null? fails)) (not (null? prereq-fails))) (member 'normal testmode)) ~~(debug:print-info 1 #f "test " hed " (mode=" testmode ") has failed prerequisite(s); "~~ (string-intersperse (map (lambda (t)(conc (db:test-get-testname t) ":" (db:test-get-state t)"/"(db:test-get-status t))) fails) ", ") ", removing it from to-do list") (let ((test-id (rmt:get-test-id run-id hed ""))) (if test-id (if (not (null? prereq-fails)) (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_DISCARDED" "Failed to run due to prior failed prerequisites") (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_FAIL" "Failed to run due to failed prerequisites"))))	\| \| \|	602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635	((and (null? fails) (null? prereq-fails) (null? non-completed)) (if (runs:can-keep-running? hed 20) (begin (runs:inc-cant-run-tests hed) (debug:print-info 1 default-log-port "no fails in prerequisites for " hed " but also none running, keeping " hed " for now. Try count: " (hash-table-ref/default seen-cant-run-tests hed 0)) ;; getting here likely means the system is way overloaded, kill a full minute before continuing (thread-sleep! 60) ;; num-retries code was here ;; we use this opportunity to move contents of reg to tal (list (car newtal)(append (cdr newtal) reg) '() reruns)) ;; an issue with prereqs not yet met? (begin (debug:print-info 1 default-log-port "no fails in prerequisites for " hed " but nothing seen running in a while, dropping test " hed " from the run queue") (let ((test-id (rmt:get-test-id run-id hed ""))) (if test-id (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "TIMED_OUT" "Nothing seen running in a while."))) (list (runs:queue-next-hed tal reg reglen regfull) (runs:queue-next-tal tal reg reglen regfull) (runs:queue-next-reg tal reg reglen regfull) reruns)))) ((and (or (not (null? fails)) (not (null? prereq-fails))) (member 'normal testmode)) (debug:print-info 1 default-log-port "test " hed " (mode=" testmode ") has failed prerequisite(s); " (string-intersperse (map (lambda (t)(conc (db:test-get-testname t) ":" (db:test-get-state t)"/"(db:test-get-status t))) fails) ", ") ", removing it from to-do list") (let ((test-id (rmt:get-test-id run-id hed ""))) (if test-id (if (not (null? prereq-fails)) (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_DISCARDED" "Failed to run due to prior failed prerequisites") (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_FAIL" "Failed to run due to failed prerequisites"))))
︙			︙
690 691 692 693 694 695 696 ~~697~~ 698 699 700 701 702 703 704 705 706 707 708 709 710 ~~711~~ 712 713 714 ~~715~~ 716 717 718 719 720 721 722 723 ~~724~~ 725 726 727 728 729 730 731 732 733 734 ~~735~~ 736 737 738 739 740 741 742	(not (equal? x hed))) (runs:calc-not-completed prereqs-not-met))) (loop-list (list hed tal reg reruns)) ;; configure the load runner (numcpus (common:get-num-cpus)) (maxload (string->number (or (configf:lookup configdat "jobtools" "maxload") "3"))) (waitdelay (string->number (or (configf:lookup configdat "jobtools" "waitdelay") "60")))) ~~(debug:print-info 4 #f "have-resources: " have-resources " prereqs-not-met: ("~~ (string-intersperse (map (lambda (t) (if (vector? t) (conc (db:test-get-state t) "/" (db:test-get-status t)) (conc " WARNING: t is not a vector=" t ))) prereqs-not-met) ", ") ") fails: " fails "\nregistered? " (hash-table-ref/default test-registry (db:test-make-full-name test-name item-path) #f)) (if (and (not (null? prereqs-not-met)) (runs:lownoise (conc "waiting on tests " prereqs-not-met hed) 60)) ~~(debug:print-info 2 #f "waiting on tests; " (string-intersperse (runs:mixed-list-testname-and-testrec->list-of-strings prereqs-not-met) ", ")))~~ ;; Don't know at this time if the test have been launched at some time in the past ;; i.e. is this a re-launch? ~~(debug:print-info 4 #f "run-limits-info = " run-limits-info)~~ (cond ;; Check item path against item-patts, ;; ((not (tests:match test-patts (tests:testqueue-get-testname test-record) item-path required: required-tests)) ;; This test/itempath is not to be run ;; else the run is stuck, temporarily or permanently ;; but should check if it is due to lack of resources vs. prerequisites ~~(debug:print-info 1 #f "Skipping " (tests:testqueue-get-testname test-record) " " item-path " as it doesn't match " test-patts)~~ (if (or (not (null? tal))(not (null? reg))) (list (runs:queue-next-hed tal reg reglen regfull) (runs:queue-next-tal tal reg reglen regfull) (runs:queue-next-reg tal reg reglen regfull) reruns) #f)) ;; Register tests ;; ((not (hash-table-ref/default test-registry (db:test-make-full-name test-name item-path) #f)) ~~(debug:print-info 4 #f "Pre-registering test " test-name "/" item-path " to create placeholder" )~~ ;; always do firm registration now in v1.60 and greater ;; (eq? transport-type 'fs) ;; no point in parallel registration if use fs (let register-loop ((numtries 15)) (rmt:register-test run-id test-name item-path) (if (rmt:get-test-id run-id test-name item-path) (hash-table-set! test-registry (db:test-make-full-name test-name item-path) 'done) (if (> numtries 0) (begin	\| \| \| \| \|	690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742	(not (equal? x hed))) (runs:calc-not-completed prereqs-not-met))) (loop-list (list hed tal reg reruns)) ;; configure the load runner (numcpus (common:get-num-cpus)) (maxload (string->number (or (configf:lookup configdat "jobtools" "maxload") "3"))) (waitdelay (string->number (or (configf:lookup configdat "jobtools" "waitdelay") "60")))) (debug:print-info 4 default-log-port "have-resources: " have-resources " prereqs-not-met: (" (string-intersperse (map (lambda (t) (if (vector? t) (conc (db:test-get-state t) "/" (db:test-get-status t)) (conc " WARNING: t is not a vector=" t ))) prereqs-not-met) ", ") ") fails: " fails "\nregistered? " (hash-table-ref/default test-registry (db:test-make-full-name test-name item-path) #f)) (if (and (not (null? prereqs-not-met)) (runs:lownoise (conc "waiting on tests " prereqs-not-met hed) 60)) (debug:print-info 2 default-log-port "waiting on tests; " (string-intersperse (runs:mixed-list-testname-and-testrec->list-of-strings prereqs-not-met) ", "))) ;; Don't know at this time if the test have been launched at some time in the past ;; i.e. is this a re-launch? (debug:print-info 4 default-log-port "run-limits-info = " run-limits-info) (cond ;; Check item path against item-patts, ;; ((not (tests:match test-patts (tests:testqueue-get-testname test-record) item-path required: required-tests)) ;; This test/itempath is not to be run ;; else the run is stuck, temporarily or permanently ;; but should check if it is due to lack of resources vs. prerequisites (debug:print-info 1 default-log-port "Skipping " (tests:testqueue-get-testname test-record) " " item-path " as it doesn't match " test-patts) (if (or (not (null? tal))(not (null? reg))) (list (runs:queue-next-hed tal reg reglen regfull) (runs:queue-next-tal tal reg reglen regfull) (runs:queue-next-reg tal reg reglen regfull) reruns) #f)) ;; Register tests ;; ((not (hash-table-ref/default test-registry (db:test-make-full-name test-name item-path) #f)) (debug:print-info 4 default-log-port "Pre-registering test " test-name "/" item-path " to create placeholder" ) ;; always do firm registration now in v1.60 and greater ;; (eq? transport-type 'fs) ;; no point in parallel registration if use fs (let register-loop ((numtries 15)) (rmt:register-test run-id test-name item-path) (if (rmt:get-test-id run-id test-name item-path) (hash-table-set! test-registry (db:test-make-full-name test-name item-path) 'done) (if (> numtries 0) (begin
︙			︙
760 761 762 763 764 765 766 ~~767~~ 768 769 770 771 772 773 774 775 776 777 778 779 ~~780~~ 781 782 783 784 785 786 787	(append reg (list hed))) reruns))) ;; At this point hed test registration must be completed. ;; ((eq? (hash-table-ref/default test-registry (db:test-make-full-name test-name item-path) #f) 'start) ~~(debug:print-info 0 #f "Waiting on test registration(s): "~~ (string-intersperse (filter (lambda (x) (eq? (hash-table-ref/default test-registry x #f) 'start)) (hash-table-keys test-registry)) ", ")) (thread-sleep! 0.051) (list hed tal reg reruns)) ;; If no resources are available just kill time and loop again ;; ((not have-resources) ;; simply try again after waiting a second (if (runs:lownoise "no resources" 60) ~~(debug:print-info 1 #f "no resources to run new tests, waiting ..."))~~ ;; Have gone back and forth on this but db starvation is an issue. ;; wait one second before looking again to run jobs. (thread-sleep! 1) ;; could have done hed tal here but doing car/cdr of newtal to rotate tests (list (car newtal)(cdr newtal) reg reruns)) ;; This is the final stage, everything is in place so launch the test	\| \|	760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787	(append reg (list hed))) reruns))) ;; At this point hed test registration must be completed. ;; ((eq? (hash-table-ref/default test-registry (db:test-make-full-name test-name item-path) #f) 'start) (debug:print-info 0 default-log-port "Waiting on test registration(s): " (string-intersperse (filter (lambda (x) (eq? (hash-table-ref/default test-registry x #f) 'start)) (hash-table-keys test-registry)) ", ")) (thread-sleep! 0.051) (list hed tal reg reruns)) ;; If no resources are available just kill time and loop again ;; ((not have-resources) ;; simply try again after waiting a second (if (runs:lownoise "no resources" 60) (debug:print-info 1 default-log-port "no resources to run new tests, waiting ...")) ;; Have gone back and forth on this but db starvation is an issue. ;; wait one second before looking again to run jobs. (thread-sleep! 1) ;; could have done hed tal here but doing car/cdr of newtal to rotate tests (list (car newtal)(cdr newtal) reg reruns)) ;; This is the final stage, everything is in place so launch the test
︙			︙
814 815 816 817 818 819 820 ~~821~~ 822 823 824 825 826 827 828 ~~829~~ 830 831 832 833 834 835 836	(else (debug:print 4 default-log-port "FAILS: " fails) ;; If one or more of the prereqs-not-met are FAIL then we can issue ;; a message and drop hed from the items to be processed. ;; (runs:mixed-list-testname-and-testrec->list-of-strings prereqs-not-met) (if (and (not (null? prereqs-not-met)) (runs:lownoise (conc "waiting on tests " prereqs-not-met hed) 60)) ~~(debug:print-info 1 #f "waiting on tests; " (string-intersperse~~ (runs:mixed-list-testname-and-testrec->list-of-strings prereqs-not-met) ", "))) (if (or (null? fails) (member 'toplevel testmode)) (begin ;; couldn't run, take a breather (if (runs:lownoise "Waiting for more work to do..." 60) ~~(debug:print-info 0 #f "Waiting for more work to do..."))~~ (thread-sleep! 1) (list (car newtal)(cdr newtal) reg reruns)) ;; the waiton is FAIL so no point in trying to run hed ever again (if (or (not (null? reg))(not (null? tal))) (if (vector? hed) (begin (debug:print 1 default-log-port "WARNING: Dropping test " test-name "/" item-path	\| \|	814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836	(else (debug:print 4 default-log-port "FAILS: " fails) ;; If one or more of the prereqs-not-met are FAIL then we can issue ;; a message and drop hed from the items to be processed. ;; (runs:mixed-list-testname-and-testrec->list-of-strings prereqs-not-met) (if (and (not (null? prereqs-not-met)) (runs:lownoise (conc "waiting on tests " prereqs-not-met hed) 60)) (debug:print-info 1 default-log-port "waiting on tests; " (string-intersperse (runs:mixed-list-testname-and-testrec->list-of-strings prereqs-not-met) ", "))) (if (or (null? fails) (member 'toplevel testmode)) (begin ;; couldn't run, take a breather (if (runs:lownoise "Waiting for more work to do..." 60) (debug:print-info 0 default-log-port "Waiting for more work to do...")) (thread-sleep! 1) (list (car newtal)(cdr newtal) reg reruns)) ;; the waiton is FAIL so no point in trying to run hed ever again (if (or (not (null? reg))(not (null? tal))) (if (vector? hed) (begin (debug:print 1 default-log-port "WARNING: Dropping test " test-name "/" item-path
︙			︙
970 971 972 973 974 975 976 ~~977~~ 978 979 980 981 982 983 984	(set! max-retries (if (and max-retries (string->number max-retries))(string->number max-retries) 100)) (let loop ((hed (car sorted-test-names)) (tal (cdr sorted-test-names)) (reg '()) ;; registered, put these at the head of tal (reruns '())) ~~(if (not (null? reruns))(debug:print-info 4 #f "reruns=" reruns))~~ ;; Here we mark any old defunct tests as incomplete. Do this every fifteen minutes ;; moving this to a parallel thread and just run it once. ;; (if (> (current-seconds)(+ last-time-incomplete 900)) (begin (set! last-time-incomplete (current-seconds))	\|	970 971 972 973 974 975 976 977 978 979 980 981 982 983 984	(set! max-retries (if (and max-retries (string->number max-retries))(string->number max-retries) 100)) (let loop ((hed (car sorted-test-names)) (tal (cdr sorted-test-names)) (reg '()) ;; registered, put these at the head of tal (reruns '())) (if (not (null? reruns))(debug:print-info 4 default-log-port "reruns=" reruns)) ;; Here we mark any old defunct tests as incomplete. Do this every fifteen minutes ;; moving this to a parallel thread and just run it once. ;; (if (> (current-seconds)(+ last-time-incomplete 900)) (begin (set! last-time-incomplete (current-seconds))
︙			︙
1024 1025 1026 1027 1028 1029 1030 ~~1031~~ 1032 1033 1034 1035 1036 1037 1038	;; Fast skip of tests that are already "COMPLETED" - NO! Cannot do that as the items may not have been expanded yet :( ;; (if (member (hash-table-ref/default test-registry tfullname #f) '(DONOTRUN removed)) ;; common:cant-run-states-sym) ;; '(COMPLETED KILLED WAIVED UNKNOWN INCOMPLETE)) (begin (if (runs:lownoise (conc "been marked do not run " tfullname) 60) ~~(debug:print-info 0 #f "Skipping test " tfullname " as it has been marked do not run due to being completed or not runnable"))~~ (if (or (not (null? tal))(not (null? reg))) (loop (runs:queue-next-hed tal reg reglen regfull) (runs:queue-next-tal tal reg reglen regfull) (runs:queue-next-reg tal reg reglen regfull) reruns)))) ;; (loop (car tal)(cdr tal) reg reruns))))	\|	1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038	;; Fast skip of tests that are already "COMPLETED" - NO! Cannot do that as the items may not have been expanded yet :( ;; (if (member (hash-table-ref/default test-registry tfullname #f) '(DONOTRUN removed)) ;; common:cant-run-states-sym) ;; '(COMPLETED KILLED WAIVED UNKNOWN INCOMPLETE)) (begin (if (runs:lownoise (conc "been marked do not run " tfullname) 60) (debug:print-info 0 default-log-port "Skipping test " tfullname " as it has been marked do not run due to being completed or not runnable")) (if (or (not (null? tal))(not (null? reg))) (loop (runs:queue-next-hed tal reg reglen regfull) (runs:queue-next-tal tal reg reglen regfull) (runs:queue-next-reg tal reg reglen regfull) reruns)))) ;; (loop (car tal)(cdr tal) reg reruns))))
︙			︙
1087 1088 1089 1090 1091 1092 1093 ~~1094~~ 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 ~~1105~~ 1106 1107 1108 1109 1110 1111 1112	waitons))))) ;; could do this more elegantly with a marker.... (debug:print 0 default-log-port "WARNING: Marking test " tfullname " as not runnable. It is waiting on tests that cannot be run. Giving up now.") (hash-table-set! test-registry tfullname 'removed)) ;; items is #f then the test is ok to be handed off to launch (but not before) ;; ((not items) ~~(debug:print-info 4 #f "OUTER COND: (not items)")~~ (if (and (not (tests:match test-patts (tests:testqueue-get-testname test-record) item-path required: required-tests)) (not (null? tal))) (loop (car tal)(cdr tal) reg reruns)) (let ((loop-list (runs:process-expanded-tests hed tal reg reruns reglen regfull test-record runname test-name item-path jobgroup max-concurrent-jobs run-id waitons item-path testmode test-patts required-tests test-registry registry-mutex flags keyvals run-info newtal all-tests-registry itemmaps))) (if loop-list (apply loop loop-list)))) ;; items processed into a list but not came in as a list been processed ;; ((and (list? items) ;; thus we know our items are already calculated (not itemdat)) ;; and not yet expanded into the list of things to be done ~~(debug:print-info 4 #f "OUTER COND: (and (list? items)(not itemdat))")~~ ;; Must determine if the items list is valid. Discard the test if it is not. (if (and (list? items) (> (length items) 0) (and (list? (car items)) (> (length (car items)) 0)) (debug:debug-mode 1)) (debug:print 2 default-log-port (map (lambda (row)	\| \|	1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112	waitons))))) ;; could do this more elegantly with a marker.... (debug:print 0 default-log-port "WARNING: Marking test " tfullname " as not runnable. It is waiting on tests that cannot be run. Giving up now.") (hash-table-set! test-registry tfullname 'removed)) ;; items is #f then the test is ok to be handed off to launch (but not before) ;; ((not items) (debug:print-info 4 default-log-port "OUTER COND: (not items)") (if (and (not (tests:match test-patts (tests:testqueue-get-testname test-record) item-path required: required-tests)) (not (null? tal))) (loop (car tal)(cdr tal) reg reruns)) (let ((loop-list (runs:process-expanded-tests hed tal reg reruns reglen regfull test-record runname test-name item-path jobgroup max-concurrent-jobs run-id waitons item-path testmode test-patts required-tests test-registry registry-mutex flags keyvals run-info newtal all-tests-registry itemmaps))) (if loop-list (apply loop loop-list)))) ;; items processed into a list but not came in as a list been processed ;; ((and (list? items) ;; thus we know our items are already calculated (not itemdat)) ;; and not yet expanded into the list of things to be done (debug:print-info 4 default-log-port "OUTER COND: (and (list? items)(not itemdat))") ;; Must determine if the items list is valid. Discard the test if it is not. (if (and (list? items) (> (length items) 0) (and (list? (car items)) (> (length (car items)) 0)) (debug:debug-mode 1)) (debug:print 2 default-log-port (map (lambda (row)
︙			︙
1128 1129 1130 1131 1132 1133 1134 ~~1135~~ 1136 1137 1138 1139 1140 1141 1142	(tests:testqueue-set-items! new-test-record #f) (tests:testqueue-set-itemdat! new-test-record my-itemdat) (tests:testqueue-set-item_path! new-test-record my-item-path) (hash-table-set! test-records newtestname new-test-record) (set! tal (append tal (list newtestname))))))) ;; since these are itemized create new test names testname/itempath items) ~~;; (debug:print-info 0 #f "Test " (tests:testqueue-get-testname test-record) " is itemized but has no items")~~ ;; At this point we have possibly added items to tal but all must be handed off to ;; INNER COND logic. I think loop without rotating the queue ;; (loop hed tal reg reruns)) ;; (let ((newtal (append tal (list hed)))) ;; We should discard hed as it has been expanded into it's items? Yes, but only if this is an itemized test ;; (loop (car newtal)(cdr newtal) reg reruns) (if (null? tal)	\|	1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142	(tests:testqueue-set-items! new-test-record #f) (tests:testqueue-set-itemdat! new-test-record my-itemdat) (tests:testqueue-set-item_path! new-test-record my-item-path) (hash-table-set! test-records newtestname new-test-record) (set! tal (append tal (list newtestname))))))) ;; since these are itemized create new test names testname/itempath items) ;; (debug:print-info 0 default-log-port "Test " (tests:testqueue-get-testname test-record) " is itemized but has no items") ;; At this point we have possibly added items to tal but all must be handed off to ;; INNER COND logic. I think loop without rotating the queue ;; (loop hed tal reg reruns)) ;; (let ((newtal (append tal (list hed)))) ;; We should discard hed as it has been expanded into it's items? Yes, but only if this is an itemized test ;; (loop (car newtal)(cdr newtal) reg reruns) (if (null? tal)
︙			︙
1159 1160 1161 1162 1163 1164 1165 ~~1166~~ 1167 1168 1169 1170 1171 1172 1173 1174 ~~1175~~ 1176 ~~1177~~ 1178 1179 ~~1180~~ 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 ~~1196~~ 1197 1198 1199 ~~1200~~ 1201 1202 1203 1204 1205 ~~1206~~ 1207 1208 1209 1210 1211 1212 1213	;; this case should not happen, added to help catch any bugs ((and (list? items) itemdat) (debug:print 0 default-log-port "ERROR: Should not have a list of items in a test and the itemspath set - please report this") (exit 1)) ((not (null? reruns)) (let* ((newlst (tests:filter-non-runnable run-id tal test-records)) ;; i.e. not FAIL, WAIVED, INCOMPLETE, PASS, KILLED, (junked (lset-difference equal? tal newlst))) ~~(debug:print-info 4 #f "full drop through, if reruns is less than 100 we will force retry them, reruns=" reruns ", tal=" tal)~~ (if (< num-retries max-retries) (set! newlst (append reruns newlst))) (set! num-retries (+ num-retries 1)) ;; (thread-sleep! (+ 1 global-delta)) (if (not (null? newlst)) ;; since reruns have been tacked on to newlst create new reruns from junked (loop (car newlst)(cdr newlst) reg (delete-duplicates junked))))) ((not (null? tal)) ~~(debug:print-info 4 #f "I'm pretty sure I shouldn't get here."))~~ ((not (null? reg)) ;; could we get here with leftovers? ~~(debug:print-info 0 #f "Have leftovers!")~~ (loop (car reg)(cdr reg) '() reruns)) (else ~~(debug:print-info 4 #f "Exiting loop with...\n hed=" hed "\n tal=" tal "\n reruns=" reruns))~~ ))) ;; now if -run-wait we wait for all tests to be done ;; Now wait for any RUNNING tests to complete (if in run-wait mode) (thread-sleep! 5) ;; I think there is a race condition here. Let states/statuses settle (let wait-loop ((num-running (rmt:get-count-tests-running-for-run-id run-id)) (prev-num-running 0)) ;; (debug:print 0 default-log-port "num-running=" num-running ", prev-num-running=" prev-num-running) (if (and (or (args:get-arg "-run-wait") (equal? (configf:lookup configdat "setup" "run-wait") "yes")) (> num-running 0)) (begin ;; Here we mark any old defunct tests as incomplete. Do this every fifteen minutes ;; (debug:print 0 default-log-port "Got here eh! num-running=" num-running " (> num-running 0) " (> num-running 0)) (if (> (current-seconds)(+ last-time-incomplete 900)) (begin ~~(debug:print-info 0 #f "Marking stuck tests as INCOMPLETE while waiting for run " run-id ". Running as pid " (current-process-id) " on " (get-host-name))~~ (set! last-time-incomplete (current-seconds)) (rmt:find-and-mark-incomplete run-id #f))) (if (not (eq? num-running prev-num-running)) ~~(debug:print-info 0 #f "run-wait specified, waiting on " num-running " tests in RUNNING, REMOTEHOSTSTART or LAUNCHED state at " (time->string (seconds->local-time (current-seconds)))))~~ (thread-sleep! 5) ;; (wait-loop (rmt:get-count-tests-running-for-run-id run-id) num-running)))) (wait-loop (rmt:get-count-tests-running-for-run-id run-id) num-running)))) ;; LET* ((test-record ;; we get here on "drop through". All done! ~~(debug:print-info 1 #f "All tests launched")))~~ (define (runs:calc-fails prereqs-not-met) (filter (lambda (test) (and (vector? test) ;; not (string? test)) (member (db:test-get-state test) '("INCOMPLETE" "COMPLETED")) (not (member (db:test-get-status test) '("PASS" "WARN" "CHECK" "WAIVED" "SKIP")))))	\| \| \| \| \| \| \|	1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213	;; this case should not happen, added to help catch any bugs ((and (list? items) itemdat) (debug:print 0 default-log-port "ERROR: Should not have a list of items in a test and the itemspath set - please report this") (exit 1)) ((not (null? reruns)) (let* ((newlst (tests:filter-non-runnable run-id tal test-records)) ;; i.e. not FAIL, WAIVED, INCOMPLETE, PASS, KILLED, (junked (lset-difference equal? tal newlst))) (debug:print-info 4 default-log-port "full drop through, if reruns is less than 100 we will force retry them, reruns=" reruns ", tal=" tal) (if (< num-retries max-retries) (set! newlst (append reruns newlst))) (set! num-retries (+ num-retries 1)) ;; (thread-sleep! (+ 1 global-delta)) (if (not (null? newlst)) ;; since reruns have been tacked on to newlst create new reruns from junked (loop (car newlst)(cdr newlst) reg (delete-duplicates junked))))) ((not (null? tal)) (debug:print-info 4 default-log-port "I'm pretty sure I shouldn't get here.")) ((not (null? reg)) ;; could we get here with leftovers? (debug:print-info 0 default-log-port "Have leftovers!") (loop (car reg)(cdr reg) '() reruns)) (else (debug:print-info 4 default-log-port "Exiting loop with...\n hed=" hed "\n tal=" tal "\n reruns=" reruns)) ))) ;; now if -run-wait we wait for all tests to be done ;; Now wait for any RUNNING tests to complete (if in run-wait mode) (thread-sleep! 5) ;; I think there is a race condition here. Let states/statuses settle (let wait-loop ((num-running (rmt:get-count-tests-running-for-run-id run-id)) (prev-num-running 0)) ;; (debug:print 0 default-log-port "num-running=" num-running ", prev-num-running=" prev-num-running) (if (and (or (args:get-arg "-run-wait") (equal? (configf:lookup configdat "setup" "run-wait") "yes")) (> num-running 0)) (begin ;; Here we mark any old defunct tests as incomplete. Do this every fifteen minutes ;; (debug:print 0 default-log-port "Got here eh! num-running=" num-running " (> num-running 0) " (> num-running 0)) (if (> (current-seconds)(+ last-time-incomplete 900)) (begin (debug:print-info 0 default-log-port "Marking stuck tests as INCOMPLETE while waiting for run " run-id ". Running as pid " (current-process-id) " on " (get-host-name)) (set! last-time-incomplete (current-seconds)) (rmt:find-and-mark-incomplete run-id #f))) (if (not (eq? num-running prev-num-running)) (debug:print-info 0 default-log-port "run-wait specified, waiting on " num-running " tests in RUNNING, REMOTEHOSTSTART or LAUNCHED state at " (time->string (seconds->local-time (current-seconds))))) (thread-sleep! 5) ;; (wait-loop (rmt:get-count-tests-running-for-run-id run-id) num-running)))) (wait-loop (rmt:get-count-tests-running-for-run-id run-id) num-running)))) ;; LET* ((test-record ;; we get here on "drop through". All done! (debug:print-info 1 default-log-port "All tests launched"))) (define (runs:calc-fails prereqs-not-met) (filter (lambda (test) (and (vector? test) ;; not (string? test)) (member (db:test-get-state test) '("INCOMPLETE" "COMPLETED")) (not (member (db:test-get-status test) '("PASS" "WARN" "CHECK" "WAIVED" "SKIP")))))
︙			︙
1267 1268 1269 1270 1271 1272 1273 ~~1274~~ 1275 1276 1277 1278 1279 1280 1281	(db #f) (full-test-name #f)) ;; setting itemdat to a list if it is #f (if (not itemdat)(set! itemdat '())) (set! item-path (item-list->path itemdat)) (set! full-test-name (db:test-make-full-name test-name item-path)) ~~(debug:print-info 4 #f~~ "\nTESTNAME: " full-test-name "\n test-config: " (hash-table->alist test-conf) "\n itemdat: " itemdat ) (debug:print 2 default-log-port "Attempting to launch test " full-test-name) ;; (setenv "MT_TEST_NAME" test-name) ;; ;; (setenv "MT_ITEMPATH" item-path)	\|	1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281	(db #f) (full-test-name #f)) ;; setting itemdat to a list if it is #f (if (not itemdat)(set! itemdat '())) (set! item-path (item-list->path itemdat)) (set! full-test-name (db:test-make-full-name test-name item-path)) (debug:print-info 4 default-log-port "\nTESTNAME: " full-test-name "\n test-config: " (hash-table->alist test-conf) "\n itemdat: " itemdat ) (debug:print 2 default-log-port "Attempting to launch test " full-test-name) ;; (setenv "MT_TEST_NAME" test-name) ;; ;; (setenv "MT_ITEMPATH" item-path)
︙			︙
1311 1312 1313 1314 1315 1316 1317 ~~1318~~ 1319 1320 1321 ~~1322~~ 1323 1324 1325 1326 1327 1328 1329	;; (if (not test-id)(set! test-id (rmt:get-test-id run-id test-name item-path))) (if (not test-id) (begin (debug:print 2 default-log-port "WARN: Test not pre-created? test-name=" test-name ", item-path=" item-path ", run-id=" run-id) (rmt:register-test run-id test-name item-path) (set! test-id (rmt:get-test-id run-id test-name item-path)))) ~~(debug:print-info 4 #f "test-id=" test-id ", run-id=" run-id ", test-name=" test-name ", item-path=\"" item-path "\"")~~ (set! testdat (rmt:get-test-info-by-id run-id test-id)) (if (not testdat) (begin ~~(debug:print-info 0 #f "WARNING: server is overloaded, trying again in one second")~~ (thread-sleep! 1) (loop))))) (if (not testdat) ;; should NOT happen (debug:print 0 default-log-port "ERROR: failed to get test record for test-id " test-id)) (set! test-id (db:test-get-id testdat)) (if (file-exists? test-path) (change-directory test-path)	\| \|	1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329	;; (if (not test-id)(set! test-id (rmt:get-test-id run-id test-name item-path))) (if (not test-id) (begin (debug:print 2 default-log-port "WARN: Test not pre-created? test-name=" test-name ", item-path=" item-path ", run-id=" run-id) (rmt:register-test run-id test-name item-path) (set! test-id (rmt:get-test-id run-id test-name item-path)))) (debug:print-info 4 default-log-port "test-id=" test-id ", run-id=" run-id ", test-name=" test-name ", item-path=\"" item-path "\"") (set! testdat (rmt:get-test-info-by-id run-id test-id)) (if (not testdat) (begin (debug:print-info 0 default-log-port "WARNING: server is overloaded, trying again in one second") (thread-sleep! 1) (loop))))) (if (not testdat) ;; should NOT happen (debug:print 0 default-log-port "ERROR: failed to get test record for test-id " test-id)) (set! test-id (db:test-get-id testdat)) (if (file-exists? test-path) (change-directory test-path)
︙			︙
1346 1347 1348 1349 1350 1351 1352 ~~1353~~ 1354 1355 1356 1357 1358 1359 ~~1360~~ 1361 ~~1362~~ 1363 1364 1365 1366 1367 1368 1369	((member (test:get-state testdat) '("DELETED" "NOT_STARTED" "INCOMPLETE"))(set! runflag #t)) ;; not -rerun and PASS, WARN or CHECK, do no run ((and (or (not rerun) keepgoing) ;; Require to force re-run for COMPLETED or anything + PASS,WARN or CHECK (or (member (test:get-status testdat) '("PASS" "WARN" "CHECK" "SKIP" "WAIVED")) (member (test:get-state testdat) '("COMPLETED")))) ~~(debug:print-info 2 #f "running test " test-name "/" item-path " suppressed as it is " (test:get-state testdat) " and " (test:get-status testdat))~~ (hash-table-set! test-registry full-test-name 'DONOTRUN) ;; COMPLETED) (set! runflag #f)) ;; -rerun and status is one of the specifed, run it ((and rerun (let* ((rerunlst (string-split rerun ",")) (must-rerun (member (test:get-status testdat) rerunlst))) ~~(debug:print-info 3 #f "-rerun list: " rerun ", test-status: " (test:get-status testdat)", must-rerun: " must-rerun)~~ must-rerun)) ~~(debug:print-info 2 #f "Rerun forced for test " test-name "/" item-path)~~ (set! runflag #t)) ;; -keepgoing, do not rerun FAIL ((and keepgoing (member (test:get-status testdat) '("FAIL"))) (set! runflag #f)) ((and (not rerun) (member (test:get-status testdat) '("FAIL" "n/a")))	\| \| \|	1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369	((member (test:get-state testdat) '("DELETED" "NOT_STARTED" "INCOMPLETE"))(set! runflag #t)) ;; not -rerun and PASS, WARN or CHECK, do no run ((and (or (not rerun) keepgoing) ;; Require to force re-run for COMPLETED or anything + PASS,WARN or CHECK (or (member (test:get-status testdat) '("PASS" "WARN" "CHECK" "SKIP" "WAIVED")) (member (test:get-state testdat) '("COMPLETED")))) (debug:print-info 2 default-log-port "running test " test-name "/" item-path " suppressed as it is " (test:get-state testdat) " and " (test:get-status testdat)) (hash-table-set! test-registry full-test-name 'DONOTRUN) ;; COMPLETED) (set! runflag #f)) ;; -rerun and status is one of the specifed, run it ((and rerun (let* ((rerunlst (string-split rerun ",")) (must-rerun (member (test:get-status testdat) rerunlst))) (debug:print-info 3 default-log-port "-rerun list: " rerun ", test-status: " (test:get-status testdat)", must-rerun: " must-rerun) must-rerun)) (debug:print-info 2 default-log-port "Rerun forced for test " test-name "/" item-path) (set! runflag #t)) ;; -keepgoing, do not rerun FAIL ((and keepgoing (member (test:get-status testdat) '("FAIL"))) (set! runflag #f)) ((and (not rerun) (member (test:get-status testdat) '("FAIL" "n/a")))
︙			︙
1407 1408 1409 1410 1411 1412 1413 ~~1414~~ 1415 1416 1417 1418 1419 1420 1421	(if (or (not (null? running-tests)) ;; have to skip if test is running (> numseconds time-since-last)) (set! skip-test (conc "Skipping due to previous test run less than " (configf:lookup test-conf "skip" "rundelay") " ago")))))) (if skip-test (begin (mt:test-set-state-status-by-id run-id test-id "COMPLETED" "SKIP" skip-test) ~~(debug:print-info 1 #f "SKIPPING Test " full-test-name " due to " skip-test))~~ (if (not (launch-test test-id run-id run-info keyvals runname test-conf test-name test-path itemdat flags)) (begin (print "ERROR: Failed to launch the test. Exiting as soon as possible") (set! globalexitstatus 1) ;; (process-signal (current-process-id) signal/kill)))))))) ((KILLED) (debug:print 1 default-log-port "NOTE: " full-test-name " is already running or was explictly killed, use -force to launch it.")	\|	1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421	(if (or (not (null? running-tests)) ;; have to skip if test is running (> numseconds time-since-last)) (set! skip-test (conc "Skipping due to previous test run less than " (configf:lookup test-conf "skip" "rundelay") " ago")))))) (if skip-test (begin (mt:test-set-state-status-by-id run-id test-id "COMPLETED" "SKIP" skip-test) (debug:print-info 1 default-log-port "SKIPPING Test " full-test-name " due to " skip-test)) (if (not (launch-test test-id run-id run-info keyvals runname test-conf test-name test-path itemdat flags)) (begin (print "ERROR: Failed to launch the test. Exiting as soon as possible") (set! globalexitstatus 1) ;; (process-signal (current-process-id) signal/kill)))))))) ((KILLED) (debug:print 1 default-log-port "NOTE: " full-test-name " is already running or was explictly killed, use -force to launch it.")