Megatest: Diff

Differences From Artifact [147cbc54ec]:

File runs.scm — part of check-in [32584d6c1d] at 2017-08-28 11:42:54 on branch v1.64 — Cleaned up couple more named loop calls in runs.scm. Added post-run-hook. (user: matt, size: 113164) [annotate] [blame] [check-ins using] [more...]

To Artifact [3e262d9c09]:

File runs.scm — part of check-in [89fedf98b8] at 2017-09-29 17:56:31 on branch v1.64-keep-running-fix — got further. noticed race condition when not stepping one at a time by setting launcher. noticed xor does not handle preq-fail. (user: bjbarcla, size: 124859) [annotate] [blame] [check-ins using]

︙			︙
202 203 204 205 206 207 208 ~~209~~ 210 211 212 213 214 215 216	(if (string? jobg-count) (string->number jobg-count) jobg-count)))) (if (> (+ num-running num-running-in-jobgroup) 0) (runs:inc-can-run-more-tests-count runsdat)) ;; (set! runs:can-run-more-tests-count (+ runs:can-run-more-tests-count 1))) (if (not (eq? last-num-running-tests num-running)) (begin ~~(debug:print 2 default-log-port "max-concurrent-jobs: " max-concurrent-jobs ", num-running: " num-running)~~ (set! last-num-running-tests num-running))) (if (not (eq? 0 globalexitstatus)) (list #f num-running num-running-in-jobgroup max-concurrent-jobs job-group-limit) (let ((can-not-run-more (cond ;; if max-concurrent-jobs is set and the number running is greater ;; than it then cannot run more jobs ((and max-concurrent-jobs (>= num-running max-concurrent-jobs))	\|	202 203 204 205 206 207 208 209 210 211 212 213 214 215 216	(if (string? jobg-count) (string->number jobg-count) jobg-count)))) (if (> (+ num-running num-running-in-jobgroup) 0) (runs:inc-can-run-more-tests-count runsdat)) ;; (set! runs:can-run-more-tests-count (+ runs:can-run-more-tests-count 1))) (if (not (eq? last-num-running-tests num-running)) (begin (debug:print 1 default-log-port "max-concurrent-jobs: " max-concurrent-jobs ", num-running: " num-running) (set! last-num-running-tests num-running))) (if (not (eq? 0 globalexitstatus)) (list #f num-running num-running-in-jobgroup max-concurrent-jobs job-group-limit) (let ((can-not-run-more (cond ;; if max-concurrent-jobs is set and the number running is greater ;; than it then cannot run more jobs ((and max-concurrent-jobs (>= num-running max-concurrent-jobs))
︙			︙
463 464 465 466 467 468 469 ~~470~~ 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 ~~488 489 490 491~~ 492 ~~493~~ 494 495 496 497 498 499 500 501 502 503 504 505 506 ~~507~~ 508 509 510 511 512 513 514	;;====================================================================== ;; refactoring this block into tests:get-full-data ;; ;; What happended, this code is now duplicated in tests!? ;; ;;====================================================================== ~~(if (not (null? test-names))~~ (let loop ((hed (car test-names)) ;; NOTE: This is the main loop that iterates over the test-names (tal (cdr test-names))) ;; 'return-procs tells the config reader to prep running system but return a proc (change-directory toppath) ;; PLEASE OPTIMIZE ME!!! I think this should be a no-op but there are several places where change-directories could be happening. (setenv "MT_TEST_NAME" hed) ;; (let-values (((waitons waitors config)(tests:get-waitons hed all-tests-registry))) (debug:print-info 8 default-log-port* "waitons: " waitons) ;; check for hed in waitons => this would be circular, remove it and issue an ;; error (if (or (member hed waitons) (member hed waitors)) (begin (debug:print-error 0 default-log-port "test " hed " has listed itself as a waiton or waitor, please correct this!") (set! waitons (filter (lambda (x)(not (equal? x hed))) waitons)) (set! waitors (filter (lambda (x)(not (equal? x hed))) waitors)))) ;; (items (items:get-items-from-config config))) (if (not (hash-table-ref/default test-records hed #f)) ~~(hash-table-set! test-records hed (vector hed ;; 0 config ;; 1 waitons ;; 2~~ (config-lookup config "requirements" "priority") ;; priority 3 ~~(tests:get-items config) ;; expand the [items] and or [itemstable] into explict items~~ #f ;; itemsdat 5 #f ;; spare - used for item-path waitors ;; ))) (for-each (lambda (waiton) (if (and waiton (not (member waiton test-names))) (let* ((waiton-record (hash-table-ref/default test-records waiton #f)) (waiton-tconfig (if waiton-record (vector-ref waiton-record 1) #f)) (waiton-itemized (and waiton-tconfig (or (hash-table-ref/default waiton-tconfig "items" #f) (hash-table-ref/default waiton-tconfig "itemstable" #f)))) (itemmaps (tests:get-itemmaps config)) ;; (configf:lookup config "requirements" "itemmap")) ~~(new-test-patts (tests:extend-test-patts test-patts hed waiton itemmaps)))~~ (debug:print-info 0 default-log-port "Test " waiton " has " (if waiton-record "a" "no") " waiton-record and" (if waiton-itemized " " " no ") "items") ;; need to account for test-patt here, if I am test "a", selected with a test-patt of "hed/b%" ;; and we are waiting on "waiton" we need to add "waiton/,waiton/b%" to test-patt ;; is this satisfied by merely appending "/" to the waiton name added to the list? ;; ;; This approach causes all of the items in an upstream test to be run	\| \| \| \| \| \| \|	463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514	;;====================================================================== ;; refactoring this block into tests:get-full-data ;; ;; What happended, this code is now duplicated in tests!? ;; ;;====================================================================== (if (not (null? test-names)) ;; BEGIN test-names loop (let loop ((hed (car test-names)) ;; NOTE: This is the main loop that iterates over the test-names (tal (cdr test-names))) ;; 'return-procs tells the config reader to prep running system but return a proc (change-directory toppath) ;; PLEASE OPTIMIZE ME!!! I think this should be a no-op but there are several places where change-directories could be happening. (setenv "MT_TEST_NAME" hed) ;; (let-values (((waitons waitors config)(tests:get-waitons hed all-tests-registry))) (debug:print-info 8 default-log-port* "waitons: " waitons) ;; check for hed in waitons => this would be circular, remove it and issue an ;; error (if (or (member hed waitons) (member hed waitors)) (begin (debug:print-error 0 default-log-port "test " hed " has listed itself as a waiton or waitor, please correct this!") (set! waitons (filter (lambda (x)(not (equal? x hed))) waitons)) (set! waitors (filter (lambda (x)(not (equal? x hed))) waitors)))) ;; (items (items:get-items-from-config config))) (if (not (hash-table-ref/default test-records hed #f)) (hash-table-set! test-records ;; BB: we are doing a manual make-tests:testqueue hed (vector hed ;; 0 ;; testname config ;; 1 waitons ;; 2 (config-lookup config "requirements" "priority") ;; priority 3 (tests:get-items config) ;; 4 ;; expand the [items] and or [itemstable] into explict items #f ;; itemsdat 5 #f ;; spare - used for item-path waitors ;; ))) (for-each (lambda (waiton) (if (and waiton (not (member waiton test-names))) (let* ((waiton-record (hash-table-ref/default test-records waiton #f)) (waiton-tconfig (if waiton-record (vector-ref waiton-record 1) #f)) (waiton-itemized (and waiton-tconfig (or (hash-table-ref/default waiton-tconfig "items" #f) (hash-table-ref/default waiton-tconfig "itemstable" #f)))) (itemmaps (tests:get-itemmaps config)) ;; (configf:lookup config "requirements" "itemmap")) (new-test-patts (tests:extend-test-patts test-patts hed waiton itemmaps))) ;; BB: items expanded here. (debug:print-info 0 default-log-port "Test " waiton " has " (if waiton-record "a" "no") " waiton-record and" (if waiton-itemized " " " no ") "items") ;; need to account for test-patt here, if I am test "a", selected with a test-patt of "hed/b%" ;; and we are waiting on "waiton" we need to add "waiton/,waiton/b%" to test-patt ;; is this satisfied by merely appending "/" to the waiton name added to the list? ;; ;; This approach causes all of the items in an upstream test to be run
︙			︙
540 541 542 543 544 545 546 ~~547~~ 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 ~~566~~ 567 568 569 570 571 572 573 ~~574~~ 575 576 577 578 579 580 581	;; (set! test-names (cons waiton test-names))))) ;; was an append, now a cons ))) (delete-duplicates (append waitons waitors))) (let ((remtests (delete-duplicates (append waitons tal)))) (if (not (null? remtests)) (begin ;; (debug:print-info 0 default-log-port "Preprocessing continues for " (string-intersperse remtests ", ")) ~~(loop (car remtests)(cdr remtests))))))))~~ (if (not (null? required-tests)) (debug:print-info 1 default-log-port "Adding \"" (string-intersperse required-tests " ") "\" to the run queue")) ;; NOTE: these are all parent tests, items are not expanded yet. (debug:print-info 4 default-log-port "test-records=" (hash-table->alist test-records)) (let ((reglen (configf:lookup configdat "setup" "runqueue"))) (if (> (length (hash-table-keys test-records)) 0) (let* ((keep-going #t) (run-queue-retries 5) (th1 (make-thread (lambda () (handle-exceptions exn (begin (print-call-chain) (print " message: " ((condition-property-accessor 'exn 'message) exn))) (runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests (any->number reglen) all-tests-registry))) "runs:run-tests-queue")) ~~(th2 (make-thread (lambda ()~~ ;; (rmt:find-and-mark-incomplete-all-runs))))) CAN'T INTERRUPT IT ... (let ((run-ids (rmt:get-all-run-ids))) (for-each (lambda (run-id) (if keep-going (handle-exceptions exn (debug:print 0 default-log-port "error in calling find-and-mark-incomplete for run-id " run-id) ~~(rmt:find-and-mark-incomplete run-id #f)))) ;; ovr-deadtime)))~~ run-ids))) "runs: mark-incompletes"))) (thread-start! th1) (thread-start! th2) (thread-join! th1) (set! keep-going #f) (thread-join! th2)	\| \| \|	540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581	;; (set! test-names (cons waiton test-names))))) ;; was an append, now a cons ))) (delete-duplicates (append waitons waitors))) (let ((remtests (delete-duplicates (append waitons tal)))) (if (not (null? remtests)) (begin ;; (debug:print-info 0 default-log-port "Preprocessing continues for " (string-intersperse remtests ", ")) (loop (car remtests)(cdr remtests)))))))) ;; END test-names loop (if (not (null? required-tests)) (debug:print-info 1 default-log-port "Adding \"" (string-intersperse required-tests " ") "\" to the run queue")) ;; NOTE: these are all parent tests, items are not expanded yet. (debug:print-info 4 default-log-port "test-records=" (hash-table->alist test-records)) (let ((reglen (configf:lookup configdat "setup" "runqueue"))) (if (> (length (hash-table-keys test-records)) 0) (let* ((keep-going #t) (run-queue-retries 5) (th1 (make-thread (lambda () (handle-exceptions exn (begin (print-call-chain) (print " message: " ((condition-property-accessor 'exn 'message) exn))) (runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests (any->number reglen) all-tests-registry))) "runs:run-tests-queue")) (th2 (make-thread (lambda () ;; BBQ: why are we visiting ALL runs here? ;; (rmt:find-and-mark-incomplete-all-runs))))) CAN'T INTERRUPT IT ... (let ((run-ids (rmt:get-all-run-ids))) (for-each (lambda (run-id) (if keep-going (handle-exceptions exn (debug:print 0 default-log-port "error in calling find-and-mark-incomplete for run-id " run-id) (rmt:find-and-mark-incomplete run-id #f)))) ;; ovr-deadtime))) ;; could be root of https://hsdes.intel.com/appstore/article/#/220546828/main -- Title: Megatest jobs show DEAD even though they are still running (1.64/27) run-ids))) "runs: mark-incompletes"))) (thread-start! th1) (thread-start! th2) (thread-join! th1) (set! keep-going #f) (thread-join! th2)
︙			︙
628 629 630 631 632 633 634 635 636 637 ~~638~~ 639 640 ~~641~~ 642 643 644 645 646 647 648 649 ~~650 651 652 653~~ 654 ~~655 656 657~~ ~~658 659~~ ~~660 661~~ 662 ~~663 664 665~~ ~~666 667~~ ~~668 669 670 671 672 673 674~~ ~~675 676 677 678 679 680 681 682 683 684 685 686 687 688 689~~ 690 ~~691~~ ~~692~~ ~~693 694~~ ~~695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713~~ 714 ~~715 716 717~~ ~~718 719 720 721 722 723 724 725 726 727 728~~ 729 ~~730 731 732 733 734 735 736 737~~ 738 ~~739 740 741 742 743~~ 744 ~~745 746 747 748 749 750 751 752 753~~ 754 ~~755 756 757~~ ~~758 759 760 761 762 763 764 765 766 767 768 769 770 771 772~~ 773 ~~774 775 776 777~~ ~~778 779 780 781 782 783 784~~ ~~785 786 787 788 789 790 791~~ 792 ~~793~~ ~~794 795 796~~ ~~797~~ ~~798~~ ~~799 800~~ 801 802 803 804 805 806 807	;; this is the list of parameters to the named loop "loop" near the top of runs:run-tests-queue, look around line 1216 ;; (define (runs:loop-values tal reg reglen regfull reruns) (list (runs:queue-next-hed tal reg reglen regfull) ;; hed (runs:queue-next-tal tal reg reglen regfull) ;; tal (runs:queue-next-reg tal reg reglen regfull) ;; reg reruns)) ;; reruns (define runs:nothing-left-in-queue-count 0) ;; BB: for future reference - suspect target vars are not expanded to env vars at this point (item expansion using [items]\nwhatever [system echo $TARGETVAR] doesnt work right whereas [system echo #{targetvar}] does.. Tal and Randy have tix on this. on first pass, var not set, on second pass, ok. (define (runs:expand-items hed tal reg reruns regfull newtal jobgroup max-concurrent-jobs run-id waitons item-path testmode test-record can-run-more items runname tconfig reglen test-registry test-records itemmaps) (let* ((loop-list (list hed tal reg reruns)) ~~(prereqs~~-not-met~~ (let ((res (rmt:get-prereqs-not-met run-id waitons hed item-path mode: testmode itemmaps: itemmaps)))~~ (if (list? res) res (begin (debug:print 0 default-log-port "ERROR: rmt:get-prereqs-not-met returned non-list!\n" " res=" res " run-id=" run-id " waitons=" waitons " hed=" hed " item-path=" item-path " testmode=" testmode " itemmaps=" itemmaps) '())))) ;; (prereqs-not-met (mt:lazy-get-prereqs-not-met run-id waitons item-path mode: testmode itemmap: itemmap)) (fails (runs:calc-fails prereqs~~-not-met~~)) (prereq-fails (runs:calc-prereq-fail prereqs-not~~-me~~t)) (non-completed (runs:calc-not-completed prereqs~~-not-met~~)) (runnables (runs:calc-runnable prereqs~~-not-met)~~)) (debug:print-info 4 default-log-port "START OF INNER COND #2 " ~~"\n can-run-more: " can-run-more "\n testname: " hed "\n prereqs~~-not-met~~: " (runs:pretty-string prereqs~~-not-met~~)~~ ~~"\n non-completed: " (runs:pretty-string non-completed) "\n prereq-fails: " (runs:pretty-string prereq-fails)~~ ~~"\n fails: " (runs:pretty-string fails) "\n testmode: " testmode~~ "\n (member 'toplevel testmode): " (member 'toplevel testmode) ~~"\n (null? non-completed): " (null? non-completed) "\n reruns: " reruns "\n items: " items~~ ~~"\n can-run-more: " can-run-more)~~ (cond ;; all prereqs met, fire off the test ;; or, if it is a 'toplevel test and all prereqs not met are COMPLETED then launch ((and (not (member 'toplevel testmode)) (member (hash-table-ref/default test-registry (db:test-make-full-name hed item-path) 'n/a) '(DONOTRUN removed CANNOTRUN))) ;; common:cant-run-states-sym) ;; '(COMPLETED KILLED WAIVED UNKNOWN INCOMPLETE)) ;; try to catch repeat processing of COMPLETED tests here (debug:print-info 1 default-log-port "Test " hed " set to \"" (hash-table-ref test-registry (db:test-make-full-name hed item-path)) "\". Removing it from the queue") (if (or (not (null? tal)) (not (null? reg))) (runs:loop-values tal reg reglen regfull reruns) (begin (debug:print-info 0 default-log-port "Nothing left in the queue!") ;; If get here twice then we know we've tried to expand all items ;; since there must be a logic issue with the handling of loops in the ;; items expand phase we will brute force an exit here. (if (> runs:nothing-left-in-queue-count 2) (begin (debug:print 0 default-log-port "WARNING: this condition is triggered when there were no items to expand and nothing to run. Please check your run for completeness") (exit 0)) (set! runs:nothing-left-in-queue-count (+ runs:nothing-left-in-queue-count 1))) #f))) ;; ~~((or ~~(null? prereqs-not-met)~~~~ ~~(and (member 'toplevel testmode) (null? non-completed)))~~ (debug:print-info 4 default-log-port "runs:expand-items: (or (null? prereqs~~-not-met~~) (and (member 'toplevel testmode)(null? non-completed)))") (let ((test-name (tests:testqueue-get-testname test-record))) (setenv "MT_TEST_NAME" test-name) ;; (setenv "MT_RUNNAME" runname) (runs:set-megatest-env-vars run-id inrunname: runname) ;; these may be needed by the launching process (let ((items-list (items:get-items-from-config tconfig))) (if (list? items-list) (begin (if (null? items-list) (let ((test-id (rmt:get-test-id run-id test-name "")) (num-items (rmt:test-toplevel-num-items run-id test-name))) (if (and test-id (not (> num-items 0))) (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "ZERO_ITEMS" "Failed to run due to failed prerequisites")))) (tests:testqueue-set-items! test-record items-list) (list hed tal reg reruns)) (begin (debug:print-error 0 default-log-port "The proc from reading the items table did not yield a list - please report this") (exit 1)))))) ~~((and (null? fails) (null? prereq-fails) (not (null? non-completed)))~~ (let* ((allinqueue (map (lambda (x)(if (string? x) x (db:test-get-testname x))) (append newtal reruns))) ;; prereqstrs is a list of test names as strings that are prereqs for hed (prereqstrs (delete-duplicates (map (lambda (x)(if (string? x) x (db:test-get-testname x))) prereqs~~-not-met~~))) ;; a prereq that is not found in allinqueue will be put in the notinqueue list ;; ;; (notinqueue (filter (lambda (x) ;; (not (member x allinqueue))) ;; prereqstrs)) (give-up #f)) ;; We can get here when a prereq has not been run due to it having a prereq that failed. ;; We need to use this to dequeue this item as CANNOTRUN ;; (if (member 'toplevel testmode) ;; '(toplevel)) ;; NOTE: this probably should be (member 'toplevel testmode) (for-each (lambda (prereq) (if (eq? (hash-table-ref/default test-registry prereq 'justfine) 'CANNOTRUN) (set! give-up #t))) prereqstrs)) (if (and give-up (not (and (null? tal)(null? reg)))) (let ((trimmed-tal (mt:discard-blocked-tests run-id hed tal test-records)) (trimmed-reg (mt:discard-blocked-tests run-id hed reg test-records))) (debug:print 1 default-log-port "WARNING: test " hed " has discarded prerequisites, removing it from the queue") (let ((test-id (rmt:get-test-id run-id hed ""))) (if test-id (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_DISCARDED" "Failed to run due to discarded prerequisites"))) (if (and (null? trimmed-tal) (null? trimmed-reg)) #f (runs:loop-values trimmed-tal trimmed-reg reglen regfull reruns) )) (list (car newtal)(append (cdr newtal) reg) '() reruns)))) ~~((and (null? fails) (null? prereq-fails) (null? non-completed))~~ (if (runs:can-keep-running? hed 20) (begin (runs:inc-cant-run-tests hed) (debug:print-info 1 default-log-port "no fails in prerequisites for " hed " but also none running, keeping " hed " for now. Try count: " (hash-table-ref/default seen-cant-run-tests hed 0)) ;; getting here likely means the system is way overloaded, kill a full minute before continuing (thread-sleep! 60) ;; num-retries code was here ;; we use this opportunity to move contents of reg to tal (list (car newtal)(append (cdr newtal) reg) '() reruns)) ;; an issue with prereqs not yet met? (begin (debug:print-info 1 default-log-port "no fails in prerequisites for " hed " but nothing seen running in a while, dropping test " hed " from the run queue") (let ((test-id (rmt:get-test-id run-id hed ""))) (if test-id (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "TIMED_OUT" "Nothing seen running in a while."))) (runs:loop-values tal reg reglen regfull reruns) ))) ~~((and (or (not (null? fails)) (not (null? prereq-fails))) (member 'normal testmode))~~ (debug:print-info 1 default-log-port "test " hed " (mode=" testmode ") has failed prerequisite(s); " (string-intersperse (map (lambda (t)(conc (db:test-get-testname t) ":" (db:test-get-state t)"/"(db:test-get-status t))) fails) ", ") ", removing it from to-do list") (let ((test-id (rmt:get-test-id run-id hed ""))) (if test-id (if (not (null? prereq-fails)) (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_DISCARDED" "Failed to run due to prior failed prerequisites") (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_FAIL" "Failed to run due to failed prerequisites")))) (if (or (not (null? reg))(not (null? tal))) (begin (hash-table-set! test-registry hed 'CANNOTRUN) (runs:loop-values tal reg reglen regfull (cons hed reruns)) ) #f)) ;; #f flags do not loop ~~((and (not (null? fails))(member 'toplevel testmode))~~ ~~(if (or (not (null? reg))(not (null? tal))) (list (car newtal)(append (cdr newtal) reg) '() reruns) #f))~~ ~~~~((null?~~ ~~runnables)~~ #f) ;; if we get here and non-completed is null then it is all over.~~ ~~(else~~ (debug:print 0 default-log-port "WARNING: FAILS or incomplete tests maybe preventing completion of this run. Watch for issues with test " hed ", continuing for now") (list (car newtal)(cdr newtal) reg reruns))))) (define (runs:mixed-list-testname-and-testrec->list-of-strings inlst) (if (null? inlst) '() (map (lambda (t) (cond ((vector? t)	> > > > > > > > > > \| > > > > > > > > > \| > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > \| \| \| \| > > > > > > > > > > > > > > > > > > > > \| \| \| > \| \| > \| \| \| \| \| > > > \| \| > > \| \| \| \| \| \| \| > > \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| > > > > > > > > > > \| > \| > > \| \| > > \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| > \| \| \| > \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| > \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| > \| \| \| \| \| \| \| > > \| \| \| \| \| \| \| \| > \| \| \| > > > \| > \| > \| \| > >	628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916	;; this is the list of parameters to the named loop "loop" near the top of runs:run-tests-queue, look around line 1216 ;; (define (runs:loop-values tal reg reglen regfull reruns) (list (runs:queue-next-hed tal reg reglen regfull) ;; hed (runs:queue-next-tal tal reg reglen regfull) ;; tal (runs:queue-next-reg tal reg reglen regfull) ;; reg reruns)) ;; reruns ;; objective - iterate thru tests ;; => want to prioritize tests we haven't seen before ;; => sometimes need to squeeze things in (added to reg) ;; => review of a previously seen test is higher priority of never visited test ;; reg - list of previously visited tests ;; tal - list of never visited tests ;; prefer next hed to be from reg than tal. (define runs:nothing-left-in-queue-count 0) ;; BB: for future reference - suspect target vars are not expanded to env vars at this point (item expansion using [items]\nwhatever [system echo $TARGETVAR] doesnt work right whereas [system echo #{targetvar}] does.. Tal and Randy have tix on this. on first pass, var not set, on second pass, ok. ;; return value of runs:expand-items is passed back to runs-tests-queue and is fed to named loop with this signature: ;; (let loop ((hed (car sorted-test-names)) ;; (tal (cdr sorted-test-names)) ;; (reg '()) ;; registered, put these at the head of tal ;; (reruns '())) ;; runs:expand-items: for a given test, expand its items into real tests ready to be processed at this time ;; this procedure's operation only makes sense in context of runs-tests-queue. (define (runs:expand-items hed tal reg reruns regfull newtal jobgroup max-concurrent-jobs run-id waitons item-path testmode test-record can-run-more items runname tconfig reglen test-registry test-records itemmaps) (let* ((loop-list (list hed tal reg reruns)) (prereqs (let ((res (rmt:get-prereqs-not-met run-id waitons hed item-path mode: testmode itemmaps: itemmaps))) (if (list? res) res (begin (debug:print 0 default-log-port "ERROR: rmt:get-prereqs-not-met returned non-list!\n" " res=" res " run-id=" run-id " waitons=" waitons " hed=" hed " item-path=" item-path " testmode=" testmode " itemmaps=" itemmaps) '())))) (have-itemized (not (null? (lset-intersection eq? testmode '(itemmatch itemwait))))) (ok-statuses '("PASS" "WARN" "CHECK" "WAIVED" "SKIP")) ;; (prereqs-not-met ;; (filter (lambda (test) ;; (if (vector? test) ;; BB: result may be a collection of strings or test vectors-- why not just test vectors? ;; (let ((testname (db:test-get-testname test)) ;; (itempath (db:test-get-item-path test)) ;; (state (db:test-get-state test) ) ;; (status (db:test-get-status test))) ;; (cond ;; ((and (equal? state "COMPLETED") (member status ok-statuses)) #f) ;; ((and have-itemized (equal? "" itempath) (member testname prereq-tests-some-items-passed-list)) #f) ;; (else #t))) ;; test)) ;; (delete-duplicates prereqs))) (prereq-items-completed (filter (lambda (test) (if (vector? test) ;; BB: result may be a collection of strings or test vectors-- why not just test vectors? (let ((testname (db:test-get-testname test)) (itempath (db:test-get-item-path test)) (state (db:test-get-state test) ) (status (db:test-get-status test))) (cond ((equal? state "COMPLETED") #t) ((not (equal? "" itempath)) #f) (else #f))) #f)) prereqs)) ;; (prereqs-not-met (mt:lazy-get-prereqs-not-met run-id waitons item-path mode: testmode itemmap: itemmap)) (fails (runs:calc-fails prereqs)) ;; prereqs (prereq-fails (runs:calc-prereq-fail prereqs)) ;; filter - get NOT_STARTED's which are not status n/a or KEEP_TRYING (non-completed (runs:calc-not-completed prereqs)) (runnable-prereqs (runs:calc-runnable prereqs)) (unexpanded-prereqs (filter (lambda (testname) (let* ((test-rec (hash-table-ref test-records testname)) (items (tests:testqueue-get-items test-rec))) (BB> "HEY " testname "=>"items) (or (procedure? items)(eq? items 'have-procedure)))) waitons)) (completed-prereq-items (let ((foo (begin (BB> "hello prereqs: "prereqs) #t)) (res (filter (lambda (test) (BB> "foo - "test) (and (vector? test) (equal? "COMPLETED" (db:test-get-state test)) (equal? "COMPLETED" (db:test-get-state test)) (not (equal? "" (db:test-get-item-path test))))) prereqs))) res)) ) (debug:print-info 4 default-log-port "START OF INNER COND #2 " "\n can-run-more: " can-run-more "\n testname: " hed "\n prereqs: " (runs:pretty-string prereqs) "\n completed-prereq-items: " (runs:pretty-string completed-prereq-items) "\n non-completed: " (runs:pretty-string non-completed) "\n prereq-fails: " (runs:pretty-string prereq-fails) "\n runnable-prereqs: " (runs:pretty-string runnable-prereqs) "\n fails: " (runs:pretty-string fails) "\n testmode: " testmode "\n (member 'toplevel testmode): " (member 'toplevel testmode) "\n (null? non-completed): " (null? non-completed) "\n reruns: " reruns "\n items: " items "\n unexpanded-prereqs: " unexpanded-prereqs ;;all-prereqs-expanded "\n completed-prereq-items: " completed-prereq-items "\n have-itemized: " have-itemized "\n can-run-more: " can-run-more) (BB> "before runs:expand-items cond") (let ((res (cond ;; all prereqs met, fire off the test ;; or, if it is a 'toplevel test and all prereqs not met are COMPLETED then launch ;; runs:expand-items case: test of interest not toplevel and IS blackballed -> ??? ((and (not (member 'toplevel testmode)) ;; test has been blackballed elsewhere (member (hash-table-ref/default test-registry (db:test-make-full-name hed item-path) 'n/a) '(DONOTRUN removed CANNOTRUN))) ;; common:cant-run-states-sym) ;; '(COMPLETED KILLED WAIVED UNKNOWN INCOMPLETE)) ;; try to catch repeat processing of COMPLETED tests here (BB> "cb1") (debug:print-info 1 default-log-port "Test " hed " set to \"" (hash-table-ref test-registry (db:test-make-full-name hed item-path)) "\". Removing it from the queue") (if (or (not (null? tal)) (not (null? reg))) (runs:loop-values tal reg reglen regfull reruns) ;; blackballed test - throw it away (begin (debug:print-info 0 default-log-port "Nothing left in the queue!") ;; If get here twice then we know we've tried to expand all items ;; since there must be a logic issue with the handling of loops in the ;; items expand phase we will brute force an exit here. (if (> runs:nothing-left-in-queue-count 2) (begin (debug:print 0 default-log-port "WARNING: this condition is triggered when there were no items to expand and nothing to run. Please check your run for completeness") (exit 0)) (set! runs:nothing-left-in-queue-count (+ runs:nothing-left-in-queue-count 1))) #f))) ;;; desired result of below cond branch: ;; we want to expand items in our test of interest (hed) in the following cases: ;; case 1 - mode is itemmatch or itemwait: (DONE) ;; - all prereq tests have been expanded ;; - at least one prereq's items have completed ;; case 2 - mode is toplevel (PARTIAL) ;; - prereqs are completed. ;; - or no prereqs can complete (TODO) ;; case 3 - mode not specified (DONE) ;; - prereqs are completed and passed (we could consider removing "and passed" -- it would change behavior from current) ;; runs:expand-items case: toplevel or else no dangling prerequeistes -- expand items now. ((or (and have-itemized (null? unexpanded-prereqs) (not (null? completed-prereq-items))) (null? prereqs) ;; nothing is in our way to proceed (need to expand this to an item level check.) (and (member 'toplevel testmode) ;; for toplevel test - proceed (nothing in our way) (null? non-completed))) (BB> "cb2") (debug:print-info 4 default-log-port "runs:expand-items: (or (null? prereqs) (and (member 'toplevel testmode)(null? non-completed)))") (let ((test-name (tests:testqueue-get-testname test-record))) (setenv "MT_TEST_NAME" test-name) ;; hack to give context to get-items-from-config TODO: call-with-environment-variables (setenv "MT_RUNNAME" runname) (runs:set-megatest-env-vars run-id inrunname: runname) ;; these may be needed by the launching process (let ((items-list (items:get-items-from-config tconfig))) ;; BB: RIGHT HERE is where item expansion occurs.. target vars are not expanded to env vars at this point (item expansion using [items]\nwhatever [system echo $TARGETVAR] doesnt work right whereas [system echo #{targetvar}] does.. Tal and Randy have tix on this. (if (list? items-list) (begin ;; we have discovered we have items we need to process, so stuff them into test list and recur (if (null? items-list) (let ((test-id (rmt:get-test-id run-id test-name "")) (num-items (rmt:test-toplevel-num-items run-id test-name))) (if (and test-id (not (> num-items 0))) (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "ZERO_ITEMS" "Failed to run due to failed prerequisites")))) (tests:testqueue-set-items! test-record items-list) ; stuffing happens here (list hed tal reg reruns)) ;; return value.. (begin (debug:print-error 0 default-log-port "The proc from reading the items table did not yield a list - please report this") (exit 1)))))) ;; runs:expand-items case: no fails, no prereq-fails, some non-completed ((and (null? fails) (null? prereq-fails) (not (null? non-completed))) (BB> "cb3") (let* ((allinqueue (map (lambda (x)(if (string? x) x (db:test-get-testname x))) (append newtal reruns))) ;; prereqstrs is a list of test names as strings that are prereqs for hed (prereqstrs (delete-duplicates (map (lambda (x)(if (string? x) x (db:test-get-testname x))) prereqs))) ;; a prereq that is not found in allinqueue will be put in the notinqueue list ;; ;; (notinqueue (filter (lambda (x) ;; (not (member x allinqueue))) ;; prereqstrs)) (give-up #f)) ;; We can get here when a prereq has not been run due to it having a prereq that failed. ;; We need to use this to dequeue this item as CANNOTRUN ;; (if (member 'toplevel testmode) ;; '(toplevel)) ;; NOTE: this probably should be (member 'toplevel testmode) (for-each (lambda (prereq) (if (eq? (hash-table-ref/default test-registry prereq 'justfine) 'CANNOTRUN) (set! give-up #t))) prereqstrs)) (if (and give-up (not (and (null? tal)(null? reg)))) (let ((trimmed-tal (mt:discard-blocked-tests run-id hed tal test-records)) (trimmed-reg (mt:discard-blocked-tests run-id hed reg test-records))) (debug:print 1 default-log-port "WARNING: test " hed " has discarded prerequisites, removing it from the queue") (let ((test-id (rmt:get-test-id run-id hed ""))) (if test-id (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_DISCARDED" "Failed to run due to discarded prerequisites"))) (if (and (null? trimmed-tal) (null? trimmed-reg)) #f (runs:loop-values trimmed-tal trimmed-reg reglen regfull reruns) )) (list (car newtal)(append (cdr newtal) reg) '() reruns)))) ((and (null? fails) ;; have not-started tests, but unable to run them. everything looks completed with no prospect of unsticking something that is stuck. we should mark hed as moribund and exit or continue if there are more tests to consider (null? prereq-fails) (null? non-completed)) (BB> "cb4") (if (runs:can-keep-running? hed 20) (begin (runs:inc-cant-run-tests hed) (debug:print-info 0 default-log-port "no fails in prerequisites for " hed " but also none running, keeping " hed " for now. Try count: " (hash-table-ref/default seen-cant-run-tests hed 0)) ;; ;; getting here likely means the system is way overloaded, kill a full minute before continuing (thread-sleep! 60) ;; TODO: gate by normalized server load > 1.0 (maxload config thing) ;; num-retries code was here ;; we use this opportunity to move contents of reg to tal (list (car newtal)(append (cdr newtal) reg) '() reruns)) ;; an issue with prereqs not yet met? (begin (debug:print-info 1 default-log-port "no fails in prerequisites for " hed " but nothing seen running in a while, dropping test " hed " from the run queue") (let ((test-id (rmt:get-test-id run-id hed ""))) (if test-id (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "TIMED_OUT" "Nothing seen running in a while."))) (runs:loop-values tal reg reglen regfull reruns) ))) ((and (or (not (null? fails)) (not (null? prereq-fails))) (member 'normal testmode)) (BB> "cb5") (debug:print-info 1 default-log-port "test " hed " (mode=" testmode ") has failed prerequisite(s); " (string-intersperse (map (lambda (t)(conc (db:test-get-testname t) ":" (db:test-get-state t)"/"(db:test-get-status t))) fails) ", ") ", removing it from to-do list") (let ((test-id (rmt:get-test-id run-id hed ""))) (if test-id (if (not (null? prereq-fails)) (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_DISCARDED" "Failed to run due to prior failed prerequisites") (begin (debug:print 4 default-log-port"BB> set PREQ_FAIL on "hed) (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_FAIL" "Failed to run due to failed prerequisites"))))) ;; BB: this works, btu equivalent for itemwait mode does not work. (if (or (not (null? reg))(not (null? tal))) (begin (hash-table-set! test-registry hed 'CANNOTRUN) (runs:loop-values tal reg reglen regfull (cons hed reruns)) ) #f)) ;; #f flags do not loop ((and (not (null? fails))(member 'toplevel testmode)) (BB> "cb6") (if (or (not (null? reg))(not (null? tal))) (list (car newtal)(append (cdr newtal) reg) '() reruns) #f)) ((null? runnable-prereqs) (BB> "cb7") #f) ;; if we get here and non-completed is null then it is all over. (else (BB> "cb8") (debug:print 0 default-log-port "WARNING: FAILS or incomplete tests maybe preventing completion of this run. Watch for issues with test " hed ", continuing for now") (list (car newtal)(cdr newtal) reg reruns))))) (BB> "after runs:expand-items big cond") res))) (define (runs:mixed-list-testname-and-testrec->list-of-strings inlst) (if (null? inlst) '() (map (lambda (t) (cond ((vector? t)
︙			︙
816 817 818 819 820 821 822 823 824 825 826 827 828 829	(conc t)))) inlst))) ;; hed tal reg reruns reglen regfull test-record runname test-name item-path jobgroup max-concurrent-jobs run-id waitons item-path testmode test-patts required-tests test-registry registry-mutex flags keyvals run-info newtal all-tests-registry itemmaps) (define (runs:process-expanded-tests runsdat testdat) ;; unroll the contents of runsdat and testdat (due to ongoing refactoring). (let* ((hed (runs:testdat-hed testdat)) (tal (runs:testdat-tal testdat)) (reg (runs:testdat-reg testdat)) (reruns (runs:testdat-reruns testdat)) (test-name (runs:testdat-test-name testdat)) (item-path (runs:testdat-item-path testdat)) (jobgroup (runs:testdat-jobgroup testdat))	> > >	925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941	(conc t)))) inlst))) ;; hed tal reg reruns reglen regfull test-record runname test-name item-path jobgroup max-concurrent-jobs run-id waitons item-path testmode test-patts required-tests test-registry registry-mutex flags keyvals run-info newtal all-tests-registry itemmaps) (define (runs:process-expanded-tests runsdat testdat) ;; unroll the contents of runsdat and testdat (due to ongoing refactoring). (debug:print 2 default-log-port "runs:process-expanded-tests; testdat:" ) (debug:print 2 default-log-port (with-output-to-string (lambda () (pp (runs:testdat->alist testdat) )))) (let* ((hed (runs:testdat-hed testdat)) (tal (runs:testdat-tal testdat)) (reg (runs:testdat-reg testdat)) (reruns (runs:testdat-reruns testdat)) (test-name (runs:testdat-test-name testdat)) (item-path (runs:testdat-item-path testdat)) (jobgroup (runs:testdat-jobgroup testdat))
︙			︙
887 888 889 890 891 892 893 ~~894~~ 895 896 897 898 899 900 901	(runs:lownoise (conc "waiting on tests " prereqs-not-met hed) 60)) (debug:print-info 2 default-log-port "waiting on tests; " (string-intersperse (runs:mixed-list-testname-and-testrec->list-of-strings prereqs-not-met) ", "))) ;; Don't know at this time if the test have been launched at some time in the past ;; i.e. is this a re-launch? (debug:print-info 4 default-log-port "run-limits-info = " run-limits-info) ~~(cond~~ ;; Check item path against item-patts, ;; ((not (tests:match test-patts (tests:testqueue-get-testname test-record) item-path required: required-tests)) ;; This test/itempath is not to be run ;; else the run is stuck, temporarily or permanently ;; but should check if it is due to lack of resources vs. prerequisites (debug:print-info 1 default-log-port "Skipping " (tests:testqueue-get-testname test-record) " " item-path " as it doesn't match " test-patts)	\|	999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013	(runs:lownoise (conc "waiting on tests " prereqs-not-met hed) 60)) (debug:print-info 2 default-log-port "waiting on tests; " (string-intersperse (runs:mixed-list-testname-and-testrec->list-of-strings prereqs-not-met) ", "))) ;; Don't know at this time if the test have been launched at some time in the past ;; i.e. is this a re-launch? (debug:print-info 4 default-log-port "run-limits-info = " run-limits-info) (cond ; cond 894- 1067 ;; Check item path against item-patts, ;; ((not (tests:match test-patts (tests:testqueue-get-testname test-record) item-path required: required-tests)) ;; This test/itempath is not to be run ;; else the run is stuck, temporarily or permanently ;; but should check if it is due to lack of resources vs. prerequisites (debug:print-info 1 default-log-port "Skipping " (tests:testqueue-get-testname test-record) " " item-path " as it doesn't match " test-patts)
︙			︙
1003 1004 1005 1006 1007 1008 1009 ~~1010~~ 1011 1012 1013 1014 1015 1016 1017 1018 ~~1019~~ 1020 1021 ~~1022~~ 1023 1024 1025 1026 1027 1028 ~~1029~~ 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 ~~1040~~ 1041 1042 1043 1044 1045 1046 ~~1047~~ 1048 1049 1050 1051 1052 1053 1054	;; couldn't run, take a breather (if (runs:lownoise "Waiting for more work to do..." 60) (debug:print-info 0 default-log-port "Waiting for more work to do...")) (thread-sleep! 1) (list (car newtal)(cdr newtal) reg reruns)) ;; the waiton is FAIL so no point in trying to run hed ever again (if (or (not (null? reg))(not (null? tal))) ~~(if (vector? hed)~~ (begin (debug:print 1 default-log-port "WARNING: Dropping test " test-name "/" item-path " from the launch list as it has prerequistes that are FAIL") (let ((test-id (rmt:get-test-id run-id hed ""))) (if test-id (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_FAIL" "Failed to run due to failed prerequisites"))) (runs:shrink-can-run-more-tests-count runsdat) ;; DELAY TWEAKER (still needed?) ;; (thread-sleep! global-delta) ;; This next is for the items ~~(mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "BLOCKED" #f)~~ (hash-table-set! test-registry (db:test-make-full-name test-name item-path) 'removed) (runs:loop-values tal reg reglen regfull reruns)) ~~(let ((nth-try (hash-table-ref/default test-registry hed 0)))~~ (cond ((member "RUNNING" (map db:test-get-state prereqs-not-met)) (if (runs:lownoise (conc "possible RUNNING prerequistes " hed) 60) (debug:print 0 default-log-port "WARNING: test " hed " has possible RUNNING prerequisites, don't give up on it yet.")) (thread-sleep! 4) (runs:loop-values tal reg reglen regfull reruns)) ~~((or (not nth-try)~~ (and (number? nth-try) (< nth-try 10))) (hash-table-set! test-registry hed (if (number? nth-try) (+ nth-try 1) 0)) (if (runs:lownoise (conc "not removing test " hed) 60) (debug:print 1 default-log-port "WARNING: not removing test " hed " from queue although it may not be runnable due to FAILED prerequisites")) ;; may not have processed correctly. Could be a race condition in your test implementation? Dropping test " hed) ;; " as it has prerequistes that are FAIL. (NOTE: hed is not a vector)") (runs:shrink-can-run-more-tests-count runsdat) ;; DELAY TWEAKER (still needed?) (runs:loop-values newtal reg reglen regfull reruns)) ~~((symbol? nth-try)~~ (if (eq? nth-try 'removed) ;; removed is removed - drop it NOW (if (null? tal) #f ;; yes, really (list (car tal)(cdr tal) reg reruns)) (begin (if (runs:lownoise (conc "FAILED prerequisites or other issue" hed) 60) ~~(debug:print 0 default-log-port "WARNING: test " hed " has FAILED prerequisites or other issue. Internal state " nth-try " will be overridden and we'll retry."))~~ (mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "KEEP_TRYING" #f) (hash-table-set! test-registry hed 0) (runs:loop-values newtal reg reglen regfull)))) (else (if (runs:lownoise (conc "FAILED prerequitests and we tried" hed) 60) (debug:print 0 default-log-port "WARNING: test " hed " has FAILED prerequitests and we've tried at least 10 times to run it. Giving up now.")) ;; (debug:print 0 default-log-port " prereqs: " prereqs-not-met)	\| > > > > \| > \| > \| \| \|	1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172	;; couldn't run, take a breather (if (runs:lownoise "Waiting for more work to do..." 60) (debug:print-info 0 default-log-port "Waiting for more work to do...")) (thread-sleep! 1) (list (car newtal)(cdr newtal) reg reruns)) ;; the waiton is FAIL so no point in trying to run hed ever again (if (or (not (null? reg))(not (null? tal))) (if (or (vector? hed) (not (null? fails))) ;; BB: why do we need a vector? in my case, fails is populated (prereq failed), reg is not nul, and we really want to drop this one (begin (debug:print 1 default-log-port "WARNING: Dropping test " test-name "/" item-path " from the launch list as it has prerequistes that are FAIL") (let ((test-id (rmt:get-test-id run-id hed ""))) (if test-id (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_FAIL" "Failed to run due to failed prerequisites"))) (runs:shrink-can-run-more-tests-count runsdat) ;; DELAY TWEAKER (still needed?) ;; (thread-sleep! global-delta) ;; This next is for the items (if (not (null? fails)) ;;(mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "PREQ_FAIL" #f) (rmt:set-state-status-and-roll-up-items run-id test-name item-path "NOT_STARTED" "PREQ_FAIL" #f) ;;(mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "BLOCKED" #f) (rmt:set-state-status-and-roll-up-items run-id test-name item-path "NOT_STARTED" "BLOCKED" #f) ) (hash-table-set! test-registry (db:test-make-full-name test-name item-path) 'removed) (runs:loop-values tal reg reglen regfull reruns)) (let ((nth-try (hash-table-ref/default test-registry hed 0))) ;; hed not a vector... (debug:print 2 default-log-port "nth-try("hed")="nth-try) (cond ((member "RUNNING" (map db:test-get-state prereqs-not-met)) (if (runs:lownoise (conc "possible RUNNING prerequistes " hed) 60) (debug:print 0 default-log-port "WARNING: test " hed " has possible RUNNING prerequisites, don't give up on it yet.")) (thread-sleep! 4) (runs:loop-values tal reg reglen regfull reruns)) ((or (not nth-try) ;; BB: condition on subsequent tries, condition below fires on first try (and (number? nth-try) (< nth-try 10))) (hash-table-set! test-registry hed (if (number? nth-try) (+ nth-try 1) 0)) (if (runs:lownoise (conc "not removing test " hed) 60) (debug:print 1 default-log-port "WARNING: not removing test " hed " from queue although it may not be runnable due to FAILED prerequisites")) ;; may not have processed correctly. Could be a race condition in your test implementation? Dropping test " hed) ;; " as it has prerequistes that are FAIL. (NOTE: hed is not a vector)") (runs:shrink-can-run-more-tests-count runsdat) ;; DELAY TWEAKER (still needed?) (runs:loop-values newtal reg reglen regfull reruns)) ((symbol? nth-try) ;; BB: 'done matches here in one case where prereq itemwait failed. This is first "try" (if (eq? nth-try 'removed) ;; removed is removed - drop it NOW (if (null? tal) #f ;; yes, really (list (car tal)(cdr tal) reg reruns)) (begin (if (runs:lownoise (conc "FAILED prerequisites or other issue" hed) 60) (debug:print 0 default-log-port "WARNING: test " hed " has FAILED prerequisites or other issue. Internal state >" nth-try "< will be overridden and we'll retry.")) (mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "KEEP_TRYING" #f) (hash-table-set! test-registry hed 0) (runs:loop-values newtal reg reglen regfull)))) (else (if (runs:lownoise (conc "FAILED prerequitests and we tried" hed) 60) (debug:print 0 default-log-port "WARNING: test " hed " has FAILED prerequitests and we've tried at least 10 times to run it. Giving up now.")) ;; (debug:print 0 default-log-port " prereqs: " prereqs-not-met)
︙			︙
1164 1165 1166 1167 1168 1169 1170 ~~1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210~~ 1211 1212 1213 1214 1215 1216 1217	(debug:print 5 default-log-port "test-records: " test-records ", flags: " (hash-table->alist flags)) ;; Do mark-and-find clean up of db before starting runing of quue ;; ;; (rmt:find-and-mark-incomplete) (let* ((run-info (rmt:get-run-info run-id)) (tests-info (mt:get-tests-for-run run-id #f '() '())) ;; qryvals: "id,testname,item_path")) (sorted-test-names (tests:sort-by-priority-and-waiton test-records)) (test-registry (make-hash-table)) (registry-mutex (make-mutex)) (num-retries 0) (max-retries (config-lookup configdat "setup" "maxretries")) (max-concurrent-jobs (configf:lookup-number configdat "setup" "max_concurrent_jobs" default: 50)) (reglen (if (number? reglen-in) reglen-in 1)) (last-time-incomplete (- (current-seconds) 900)) ;; force at least one clean up cycle (last-time-some-running (current-seconds)) ;; (tdbdat (tasks:open-db)) (runsdat (make-runs:dat ;; hed: hed ;; tal: tal ;; reg: reg ;; reruns: reruns reglen: reglen regfull: #f ;; regfull ;; test-record: test-record runname: runname ;; test-name: test-name ;; item-path: item-path ;; jobgroup: jobgroup max-concurrent-jobs: max-concurrent-jobs run-id: run-id ;; waitons: waitons ;; testmode: testmode test-patts: test-patts required-tests: required-tests test-registry: test-registry registry-mutex: registry-mutex flags: flags keyvals: keyvals run-info: run-info ;; newtal: newtal all-tests-registry: all-tests-registry ;; itemmaps: itemmaps ;; prereqs-not-met: (rmt:get-prereqs-not-met run-id waitons hed item-path mode: testmode itemmaps: itemmaps) ;; can-run-more-tests: (runs:can-run-more-tests run-id jobgroup max-concurrent-jobs) ;; look at the test jobgroup and tot jobs running ))) ;; Initialize the test-registery hash with tests that already have a record ;; convert state to symbol and use that as the hash value (for-each (lambda (trec) (let ((id (db:test-get-id trec)) (tn (db:test-get-testname trec)) (ip (db:test-get-item-path trec))	\| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \|	1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335	(debug:print 5 default-log-port "test-records: " test-records ", flags: " (hash-table->alist flags)) ;; Do mark-and-find clean up of db before starting runing of quue ;; ;; (rmt:find-and-mark-incomplete) (let* ((run-info (rmt:get-run-info run-id)) (tests-info (mt:get-tests-for-run run-id #f '() '())) ;; qryvals: "id,testname,item_path")) (sorted-test-names (tests:sort-by-priority-and-waiton test-records)) (test-registry (make-hash-table)) (registry-mutex (make-mutex)) (num-retries 0) (max-retries (config-lookup configdat "setup" "maxretries")) (max-concurrent-jobs (configf:lookup-number configdat "setup" "max_concurrent_jobs" default: 50)) (reglen (if (number? reglen-in) reglen-in 1)) (last-time-incomplete (- (current-seconds) 900)) ;; force at least one clean up cycle (last-time-some-running (current-seconds)) ;; (tdbdat (tasks:open-db)) (runsdat (make-runs:dat ;; hed: hed ;; tal: tal ;; reg: reg ;; reruns: reruns reglen: reglen regfull: #f ;; regfull ;; test-record: test-record runname: runname ;; test-name: test-name ;; item-path: item-path ;; jobgroup: jobgroup max-concurrent-jobs: max-concurrent-jobs run-id: run-id ;; waitons: waitons ;; testmode: testmode test-patts: test-patts required-tests: required-tests test-registry: test-registry registry-mutex: registry-mutex flags: flags keyvals: keyvals run-info: run-info ;; newtal: newtal all-tests-registry: all-tests-registry ;; itemmaps: itemmaps ;; prereqs-not-met: (rmt:get-prereqs-not-met run-id waitons hed item-path mode: testmode itemmaps: itemmaps) ;; can-run-more-tests: (runs:can-run-more-tests run-id jobgroup max-concurrent-jobs) ;; look at the test jobgroup and tot jobs running ))) ;; Initialize the test-registery hash with tests that already have a record ;; convert state to symbol and use that as the hash value (for-each (lambda (trec) (let ((id (db:test-get-id trec)) (tn (db:test-get-testname trec)) (ip (db:test-get-item-path trec))
︙			︙
1281 1282 1283 1284 1285 1286 1287 ~~1288 1289~~ 1290 1291 1292 1293 1294 1295 1296	;; (server:ping (remote-server-url runremote))) ;; (server:check-if-running toppath)))) ;; (server:kind-run toppath)) (if (> num-running 0) (set! last-time-some-running (current-seconds))) (if (> (current-seconds)(+ last-time-some-running (or (configf:lookup configdat "setup" "give-up-waiting") 36000))) (hash-table-set! max-tries-hash tfullname (+ (hash-table-ref/default max-tries-hash tfullname 0) 1))) ;; (debug:print 0 default-log-port "max-tries-hash: " (hash-table->alist max-tries-hash)) ;; Ensure all top level tests get registered. This way they show up as "NOT_STARTED" on the dashboard ;; and it is clear they should have run but did not. (if (not (hash-table-ref/default test-registry (db:test-make-full-name test-name "") #f)) (begin (rmt:register-test run-id test-name "")	\| \|	1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414	;; (server:ping (remote-server-url runremote))) ;; (server:check-if-running toppath)))) ;; (server:kind-run toppath)) (if (> num-running 0) (set! last-time-some-running (current-seconds))) (if (> (current-seconds)(+ last-time-some-running (or (configf:lookup configdat "setup" "give-up-waiting") 36000))) (hash-table-set! max-tries-hash tfullname (+ (hash-table-ref/default max-tries-hash tfullname 0) 1))) ;; (debug:print 0 default-log-port "max-tries-hash: " (hash-table->alist max-tries-hash)) ;; Ensure all top level tests get registered. This way they show up as "NOT_STARTED" on the dashboard ;; and it is clear they should have run but did not. (if (not (hash-table-ref/default test-registry (db:test-make-full-name test-name "") #f)) (begin (rmt:register-test run-id test-name "")
︙			︙
1307 1308 1309 1310 1311 1312 1313 ~~1314~~ 1315 1316 1317 1318 1319 1320 1321	(loop (runs:queue-next-hed tal reg reglen regfull) (runs:queue-next-tal tal reg reglen regfull) (runs:queue-next-reg tal reg reglen regfull) reruns)))) ;; (loop (car tal)(cdr tal) reg reruns)))) (runs:incremental-print-results run-id) ~~(debug:print 4 default-log-port "TOP OF LOOP => "~~ "test-name: " test-name "\n test-record " test-record "\n hed: " hed "\n itemdat: " itemdat "\n items: " items "\n item-path: " item-path "\n waitons: " waitons	\|	1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439	(loop (runs:queue-next-hed tal reg reglen regfull) (runs:queue-next-tal tal reg reglen regfull) (runs:queue-next-reg tal reg reglen regfull) reruns)))) ;; (loop (car tal)(cdr tal) reg reruns)))) (runs:incremental-print-results run-id) (debug:print 1 default-log-port "TOP OF LOOP => " "test-name: " test-name "\n test-record " test-record "\n hed: " hed "\n itemdat: " itemdat "\n items: " items "\n item-path: " item-path "\n waitons: " waitons
︙			︙
1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361	(not (member waiton reruns))) 1 #f)) waitons))))) ;; could do this more elegantly with a marker.... (debug:print 0 default-log-port "WARNING: Marking test " tfullname " as not runnable. It is waiting on tests that cannot be run. Giving up now.") (hash-table-set! test-registry tfullname 'removed)) ;; items is #f then the test is ok to be handed off to launch (but not before) ;; ((not items) (debug:print-info 4 default-log-port "OUTER COND: (not items)") (if (and (not (tests:match test-patts (tests:testqueue-get-testname test-record) item-path required: required-tests)) (not (null? tal))) (loop (car tal)(cdr tal) reg reruns))	> > > > > > >	1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486	(not (member waiton reruns))) 1 #f)) waitons))))) ;; could do this more elegantly with a marker.... (debug:print 0 default-log-port "WARNING: Marking test " tfullname " as not runnable. It is waiting on tests that cannot be run. Giving up now.") (hash-table-set! test-registry tfullname 'removed)) ; BB - a possibility for preqfail propagation ;; ((and (not items) (string? item-path) (not (equal? item-path "")) ;; (lset-intersection testmode '(itemmatch itemwait))) ;; ) ;; items is #f then the test is ok to be handed off to launch (but not before) ;; ((not items) (debug:print-info 4 default-log-port "OUTER COND: (not items)") (if (and (not (tests:match test-patts (tests:testqueue-get-testname test-record) item-path required: required-tests)) (not (null? tal))) (loop (car tal)(cdr tal) reg reruns))
︙			︙
1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421	(if (null? tal) #f (loop (car tal)(cdr tal) reg reruns))) ;; if items is a proc then need to run items:get-items-from-config, get the list and loop ;; - but only do that if resources exist to kick off the job ;; EXPAND ITEMS ((or (procedure? items)(eq? items 'have-procedure)) ;; BB - target vars are env vars here? to allow expansion of [items]\nsomething [system echo $SOMETARGVAR], which is wonky (let ((can-run-more (runs:can-run-more-tests runsdat run-id jobgroup max-concurrent-jobs))) (if (and (list? can-run-more) (car can-run-more)) (let ((loop-list (runs:expand-items hed tal reg reruns regfull newtal jobgroup max-concurrent-jobs run-id waitons item-path testmode test-record can-run-more items runname tconfig reglen test-registry test-records itemmaps))) (if loop-list (apply loop loop-list)))	> >	1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548	(if (null? tal) #f (loop (car tal)(cdr tal) reg reruns))) ;; if items is a proc then need to run items:get-items-from-config, get the list and loop ;; - but only do that if resources exist to kick off the job ;; EXPAND ITEMS ;; ;; * the condition for (eq? items 'have-procedure) below ensure that runs:expand-items is not called on the same test twice -- expand-items will flatten the procedure to an actual list of items. ((or (procedure? items)(eq? items 'have-procedure)) ;; BB - target vars are env vars here? to allow expansion of [items]\nsomething [system echo $SOMETARGVAR], which is wonky (let ((can-run-more (runs:can-run-more-tests runsdat run-id jobgroup max-concurrent-jobs))) (if (and (list? can-run-more) (car can-run-more)) (let ((loop-list (runs:expand-items hed tal reg reruns regfull newtal jobgroup max-concurrent-jobs run-id waitons item-path testmode test-record can-run-more items runname tconfig reglen test-registry test-records itemmaps))) (if loop-list (apply loop loop-list)))
︙			︙
1440 1441 1442 1443 1444 1445 1446 ~~1447~~ 1448 1449 1450 1451 1452 1453 1454	((not (null? tal)) (debug:print-info 4 default-log-port "I'm pretty sure I shouldn't get here.")) ((not (null? reg)) ;; could we get here with leftovers? (debug:print-info 0 default-log-port "Have leftovers!") (loop (car reg)(cdr reg) '() reruns)) (else (debug:print-info 4 default-log-port "Exiting loop with...\n hed=" hed "\n tal=" tal "\n reruns=" reruns)) ~~)))~~ ;; now if -run-wait we wait for all tests to be done ;; Now wait for any RUNNING tests to complete (if in run-wait mode) (thread-sleep! 5) ;; I think there is a race condition here. Let states/statuses settle (let wait-loop ((num-running (rmt:get-count-tests-running-for-run-id run-id)) (prev-num-running 0)) ;; (debug:print-info 13 default-log-port "num-running=" num-running ", prev-num-running=" prev-num-running) (if (and (or (args:get-arg "-run-wait")	> \|	1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582	((not (null? tal)) (debug:print-info 4 default-log-port "I'm pretty sure I shouldn't get here.")) ((not (null? reg)) ;; could we get here with leftovers? (debug:print-info 0 default-log-port "Have leftovers!") (loop (car reg)(cdr reg) '() reruns)) (else (debug:print-info 4 default-log-port "Exiting loop with...\n hed=" hed "\n tal=" tal "\n reruns=" reruns)) ))) ;; end loop on sorted test names ;; now if -run-wait we wait for all tests to be done ;; Now wait for any RUNNING tests to complete (if in run-wait mode) (thread-sleep! 5) ;; I think there is a race condition here. Let states/statuses settle (let wait-loop ((num-running (rmt:get-count-tests-running-for-run-id run-id)) (prev-num-running 0)) ;; (debug:print-info 13 default-log-port "num-running=" num-running ", prev-num-running=" prev-num-running) (if (and (or (args:get-arg "-run-wait")
︙			︙
1468 1469 1470 1471 1472 1473 1474 ~~1475~~ 1476 ~~1477~~ ~~1478 1479 1480~~ 1481 1482 1483 1484 1485 1486 ~~1487~~ 1488 1489 1490 ~~1491 1492~~ 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 ~~1512~~ 1513 1514 1515 1516 1517 1518 ~~1519~~ 1520 1521 1522 1523 1524 1525 1526	;; (wait-loop (rmt:get-count-tests-running-for-run-id run-id) num-running)))) (wait-loop (rmt:get-count-tests-running-for-run-id run-id) num-running)))) ;; LET* ((test-record ;; we get here on "drop through". All done! (runs:run-post-hook run-id) (debug:print-info 1 default-log-port "All tests launched"))) ~~(define (runs:calc-fails prereqs-not-met)~~ (filter (lambda (test) ~~(and (vector? test) ;; not (string? test))~~ ~~(member (db:test-get-state test) '("INCOMPLETE" "COMPLETED")) (not (member (db:test-get-status test) '("PASS" "WARN" "CHECK" "WAIVED" "SKIP")))))~~ prereqs-not-met)) (define (runs:calc-prereq-fail prereqs-not-met) (filter (lambda (test) (and (vector? test) ;; not (string? test)) (equal? (db:test-get-state test) "NOT_STARTED") ~~(not (member (db:test-get-status test)~~ '("n/a" "KEEP_TRYING"))))) prereqs-not-met)) ~~(define (runs:calc-not-completed prereqs-not-met) (filter~~ (lambda (t) (or (not (vector? t)) (not (member (db:test-get-state t) '("INCOMPLETE" "COMPLETED"))))) prereqs-not-met)) ;; (define (runs:calc-not-completed prereqs-not-met) ;; (filter ;; (lambda (t) ;; (or (not (vector? t)) ;; (not (equal? "COMPLETED" (db:test-get-state t))))) ;; prereqs-not-met)) (define (runs:calc-runnable prereqs-not-met) (filter (lambda (t) (or (not (vector? t)) (and (equal? "NOT_STARTED" (db:test-get-state t)) (member (db:test-get-status t) '("n/a" "KEEP_TRYING"))) ~~(and (e~~qual? "RUNNING"~~ (db:test-get-state t))))) ;; account for a test that is running~~ prereqs-not-met)) (define (runs:pretty-string lst) (map (lambda (t) (if (not (vector? t)) (conc t) ~~(conc (db:test-get-testname t) ":" (db:test-get-state t) "/" (db:test-get-status t))))~~ lst)) ;; parent-test is there as a placeholder for when parent-tests can be run as a setup step ;; (define (run:test run-id run-info keyvals runname test-record flags parent-test test-registry all-tests-registry) ;; All these vars might be referenced by the testconfig file reader (let* ((test-name (tests:testqueue-get-testname test-record))	\| > \| > > \| \| \| \| \| \| > \| \|	1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658	;; (wait-loop (rmt:get-count-tests-running-for-run-id run-id) num-running)))) (wait-loop (rmt:get-count-tests-running-for-run-id run-id) num-running)))) ;; LET* ((test-record ;; we get here on "drop through". All done! (runs:run-post-hook run-id) (debug:print-info 1 default-log-port "All tests launched"))) (define (runs:calc-fails prereqs-not-met) ;; BB is this redundant with runs:runable-tests ? (filter (lambda (test) (or (and (vector? test) ;; not (string? test)) (member (db:test-get-status test) '("TEN_STRIKES" "BLOCKED" "PREQ_FAIL" "ZERO_ITEMS" "PREQ_DISCARDED" "TIMED_OUT" "KILLED"))) (and (vector? test) ;; not (string? test)) (member (db:test-get-state test) '("INCOMPLETE" "COMPLETED")) (not (member (db:test-get-status test) '("PASS" "WARN" "CHECK" "WAIVED" "SKIP")))))) prereqs-not-met)) (define (runs:calc-prereq-fail prereqs-not-met) (filter (lambda (test) (and (vector? test) ;; not (string? test)) (equal? (db:test-get-state test) "NOT_STARTED") (not (member (db:test-get-status test) '("n/a" "KEEP_TRYING"))))) prereqs-not-met)) (define (runs:calc-not-completed prereqs-not-met) ;; filter out tests which have reached a ground state -- they are done one way or another. (filter (lambda (t) (or (not (vector? t)) (not (member (db:test-get-status t) '("TEN_STRIKES" "BLOCKED" "PREQ_FAIL" "ZERO_ITEMS" "PREQ_DISCARDED" "TIMED_OUT" "KILLED"))) (not (member (db:test-get-state t) '("INCOMPLETE" "COMPLETED"))))) prereqs-not-met)) ;; (define (runs:calc-not-completed prereqs-not-met) ;; (filter ;; (lambda (t) ;; (or (not (vector? t)) ;; (not (equal? "COMPLETED" (db:test-get-state t))))) ;; prereqs-not-met)) (define (runs:calc-runnable prereqs-not-met) (filter (lambda (t) (or (not (vector? t)) (and (equal? "NOT_STARTED" (db:test-get-state t)) (member (db:test-get-status t) '("n/a" "KEEP_TRYING"))) (and (member (db:test-get-state t) '("RUNNING" "LAUNCHED" "REMOTEHOSTSTART" ))))) ;; account for a test that is running prereqs-not-met)) (define (runs:pretty-string lst) (map (lambda (t) (if (not (vector? t)) (conc t) (conc (db:test-get-testname t)"/"(db:test-get-item-path t) ":" (db:test-get-state t) "/" (db:test-get-status t)))) lst)) ;; parent-test is there as a placeholder for when parent-tests can be run as a setup step ;; (define (run:test run-id run-info keyvals runname test-record flags parent-test test-registry all-tests-registry) ;; All these vars might be referenced by the testconfig file reader (let* ((test-name (tests:testqueue-get-testname test-record))
︙			︙