Overview
Comment: | updated - itemwait continues forward now, but runs do not stop. it is progress.
problems 1- toplevel goes to completed when not all items have started but so-far started items are completed. 2- not-started/preq-fail propagates not-started/na (which propagates not-started/preq-fail) |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | v1.64-keep-running-fix |
Files: | files | file ages | folders |
SHA1: |
f7fdbdc3056f7aa68372edb5b04aa272 |
User & Date: | bjbarcla on 2017-09-28 17:58:45 |
Original Comment: | updated - itemwait continues forward now, but runs do not stop. it is progress. |
Other Links: | branch diff | manifest | tags |
Context
2017-09-29
| ||
17:56 | got further. noticed race condition when not stepping one at a time by setting launcher. noticed xor does not handle preq-fail. check-in: 89fedf98b8 user: bjbarcla tags: v1.64-keep-running-fix | |
2017-09-28
| ||
17:58 |
updated - itemwait continues forward now, but runs do not stop. it is progress.
problems 1- toplevel goes to completed when not all items have started but so-far started items are completed. 2- not-started/preq-fail propagates not-started/na (which propagates not-started/preq-fail) check-in: f7fdbdc305 user: bjbarcla tags: v1.64-keep-running-fix | |
2017-09-26
| ||
12:46 | fixed display of PREQ_FAIL in runs summary tab; updated color of PREQ_FAIL to salmon (light red) instead of white check-in: 42d3bf31ad user: bjbarcla tags: v1.64-keep-running-fix | |
Changes
Modified common_records.scm from [0a5321af09] to [9609771573].
︙ | |||
145 146 147 148 149 150 151 | 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 | + + - + - + | (lambda (frame) (let* ((this-loc (vector-ref frame 0)) (temp (string-split (->string this-loc) " ")) (this-func (if (and (list? temp) (> (length temp) 1)) (cadr temp) "???"))) (if (equal? this-func "BB>") (set! location this-loc)))) stack) (let* ((color-on "\x1b[1m") (color-off "\x1b[0m") |
︙ |
Modified db.scm from [879c965cf5] to [f4290acd99].
︙ | |||
4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 | 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 | + + + + | ;; ;; Note: mode 'normal means that tests must be COMPLETED and ok (i.e. PASS, WARN, CHECK, SKIP or WAIVED) ;; mode 'toplevel means that tests must be COMPLETED only ;; mode 'itemmatch or 'itemwait means that tests items must be COMPLETED and (PASS|WARN|WAIVED|CHECK) [[ NB// NOT IMPLEMENTED YET ]] ;; mode 'exclusive means this test/item cannot run if the same test/item is LAUNCHED,REMOTEHOSTSTART or RUNNING ;; ;; (define (db:get-prereqs-not-met dbstruct run-id waitons ref-item-path mode) (define (db:get-prereqs-not-met dbstruct run-id waitons ref-test-name ref-item-path mode itemmaps) ;; #!key (mode '(normal))(itemmap #f)) ;; BBnote - rollup of an itemized test's overall state/status done in db:set-state-status-and-roll-up-items (let* ((ok-statuses '("PASS" "WARN" "CHECK" "WAIVED" "SKIP")) (have-itemized (not (null? (lset-intersection eq? mode '(itemmatch itemwait)))))) (append (if (member 'exclusive mode) (let ((running-tests (db:get-tests-for-run dbstruct #f ;; run-id of #f means for all runs. (if (string=? ref-item-path "") ;; testpatt ref-test-name (conc ref-test-name "/" ref-item-path)) |
︙ | |||
4110 4111 4112 4113 4114 4115 4116 | 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 | - + - + | ;; (if (equal? waitontest-name (db:test-get-testname test)) ;; by defintion this had better be true ... (let* ((state (db:test-get-state test)) (status (db:test-get-status test)) (item-path (db:test-get-item-path test)) ;; BB- this is the upstream itempath (is-completed (equal? state "COMPLETED")) (is-running (equal? state "RUNNING")) (is-killed (equal? state "KILLED")) |
︙ | |||
4150 4151 4152 4153 4154 4155 4156 | 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 | + + + + + + + + + + + + + + + + + + - + + | (if (not (or parent-waiton-met item-waiton-met)) (set! result (append (if (null? tests) (list waitontest-name) tests) result))) ;; appends the string if the full record is not available ;; if the test is not found then clearly the waiton is not met... ;; (if (not ever-seen)(set! result (cons waitontest-name result))))) (if (not ever-seen) (set! result (append (if (null? tests)(list waitontest-name) tests) result))))) waitons) ;; TODO: for itemwait and itemmatch mode, filter out failed toplevel prereq test if any items passed. ;; a rewrite might help understanding, but quick fix is just remove tests from result which are completed/pass. -BB ;;(pp result) ;; (let ((prereq-tests-some-items-passed-list '(ref-test-name))) ;; seed with ref-test-name; do not wait on self. ;; (for-each (lambda (test) ;; (if (vector? test) ;; (if (and ;; (equal? (db:test-get-state test) "COMPLETED") ;; (member (db:test-get-status test) ok-statuses) ;; (not (equal? (db:test-get-item-path test) ""))) ;; (set! prereq-tests-some-items-passed-list (cons (db:test-get-testname test) prereq-tests-some-items-passed-list))))) ;; result) ;; (set! prereq-tests-some-items-passed-list (delete-duplicates prereq-tests-some-items-passed-list)) |
︙ |
Modified runs.scm from [0a0d3ed157] to [6846f73920].
︙ | |||
628 629 630 631 632 633 634 635 636 637 638 639 640 | 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 | + + + + + + + + + + + + + + + + + + + - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - + + + + + + + + + + + + + + + + + + + + + + + - + + + + + + + + - - + + + + - + + + + + + + + + + - - - + + + + + + + + - + - + - + - + - - + + + + - + | ;; this is the list of parameters to the named loop "loop" near the top of runs:run-tests-queue, look around line 1216 ;; (define (runs:loop-values tal reg reglen regfull reruns) (list (runs:queue-next-hed tal reg reglen regfull) ;; hed (runs:queue-next-tal tal reg reglen regfull) ;; tal (runs:queue-next-reg tal reg reglen regfull) ;; reg reruns)) ;; reruns ;; objective - iterate thru tests ;; => want to prioritize tests we haven't seen before ;; => sometimes need to squeeze things in (added to reg) ;; => review of a previously seen test is higher priority of never visited test ;; reg - list of previously visited tests ;; tal - list of never visited tests ;; prefer next hed to be from reg than tal. (define runs:nothing-left-in-queue-count 0) ;; BB: for future reference - suspect target vars are not expanded to env vars at this point (item expansion using [items]\nwhatever [system echo $TARGETVAR] doesnt work right whereas [system echo #{targetvar}] does.. Tal and Randy have tix on this. on first pass, var not set, on second pass, ok. ;; return value of runs:expand-items is passed back to runs-tests-queue and is fed to named loop with this signature: ;; (let loop ((hed (car sorted-test-names)) ;; (tal (cdr sorted-test-names)) ;; (reg '()) ;; registered, put these at the head of tal ;; (reruns '())) ;; runs:expand-items: for a given test, expand its items into real tests ready to be processed at this time ;; this procedure's operation only makes sense in context of runs-tests-queue. (define (runs:expand-items hed tal reg reruns regfull newtal jobgroup max-concurrent-jobs run-id waitons item-path testmode test-record can-run-more items runname tconfig reglen test-registry test-records itemmaps) (let* ((loop-list (list hed tal reg reruns)) |
︙ | |||
751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 | 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 | + + + + + + - + + + + + | (runs:loop-values trimmed-tal trimmed-reg reglen regfull reruns) )) (list (car newtal)(append (cdr newtal) reg) '() reruns)))) ((and (null? fails) ;; have not-started tests, but unable to run them. everything looks completed with no prospect of unsticking something that is stuck. we should mark hed as moribund and exit or continue if there are more tests to consider (null? prereq-fails) (null? non-completed)) (BB> "cb4") (if (runs:can-keep-running? hed 20) (begin (runs:inc-cant-run-tests hed) (debug:print-info 0 *default-log-port* "no fails in prerequisites for " hed " but also none running, keeping " hed " for now. Try count: " (hash-table-ref/default *seen-cant-run-tests* hed 0)) ;; ;; getting here likely means the system is way overloaded, kill a full minute before continuing (thread-sleep! 60) ;; TODO: gate by normalized server load > 1.0 (maxload config thing) ;; num-retries code was here ;; we use this opportunity to move contents of reg to tal (list (car newtal)(append (cdr newtal) reg) '() reruns)) ;; an issue with prereqs not yet met? (begin (debug:print-info 1 *default-log-port* "no fails in prerequisites for " hed " but nothing seen running in a while, dropping test " hed " from the run queue") (let ((test-id (rmt:get-test-id run-id hed ""))) (if test-id (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "TIMED_OUT" "Nothing seen running in a while."))) (runs:loop-values tal reg reglen regfull reruns) ))) ((and (or (not (null? fails)) (not (null? prereq-fails))) (member 'normal testmode)) (BB> "cb5") (debug:print-info 1 *default-log-port* "test " hed " (mode=" testmode ") has failed prerequisite(s); " (string-intersperse (map (lambda (t)(conc (db:test-get-testname t) ":" (db:test-get-state t)"/"(db:test-get-status t))) fails) ", ") ", removing it from to-do list") (let ((test-id (rmt:get-test-id run-id hed ""))) (if test-id (if (not (null? prereq-fails)) (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_DISCARDED" "Failed to run due to prior failed prerequisites") (begin (debug:print 4 *default-log-port*"BB> set PREQ_FAIL on "hed) (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_FAIL" "Failed to run due to failed prerequisites"))))) ;; BB: this works, btu equivalent for itemwait mode does not work. (if (or (not (null? reg))(not (null? tal))) (begin (hash-table-set! test-registry hed 'CANNOTRUN) (runs:loop-values tal reg reglen regfull (cons hed reruns)) ) #f)) ;; #f flags do not loop ((and (not (null? fails))(member 'toplevel testmode)) (BB> "cb6") (if (or (not (null? reg))(not (null? tal))) (list (car newtal)(append (cdr newtal) reg) '() reruns) #f)) ((null? runnable-prereqs) (BB> "cb7") |
︙ | |||
1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 | 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 | + + | (if (null? tal) #f (loop (car tal)(cdr tal) reg reruns))) ;; if items is a proc then need to run items:get-items-from-config, get the list and loop ;; - but only do that if resources exist to kick off the job ;; EXPAND ITEMS ;; ;; * the condition for (eq? items 'have-procedure) below ensure that runs:expand-items is not called on the same test twice -- expand-items will flatten the procedure to an actual list of items. ((or (procedure? items)(eq? items 'have-procedure)) ;; BB - target vars are env vars here? to allow expansion of [items]\nsomething [system echo $SOMETARGVAR], which is wonky (let ((can-run-more (runs:can-run-more-tests runsdat run-id jobgroup max-concurrent-jobs))) (if (and (list? can-run-more) (car can-run-more)) (let ((loop-list (runs:expand-items hed tal reg reruns regfull newtal jobgroup max-concurrent-jobs run-id waitons item-path testmode test-record can-run-more items runname tconfig reglen test-registry test-records itemmaps))) (if loop-list (apply loop loop-list))) |
︙ | |||
1496 1497 1498 1499 1500 1501 1502 | 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 | - + + | (filter (lambda (test) (and (vector? test) ;; not (string? test)) (equal? (db:test-get-state test) "NOT_STARTED") (not (member (db:test-get-status test) '("n/a" "KEEP_TRYING"))))) prereqs-not-met)) |
︙ | |||
1524 1525 1526 1527 1528 1529 1530 | 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 | - + | (and (equal? "RUNNING" (db:test-get-state t))))) ;; account for a test that is running prereqs-not-met)) (define (runs:pretty-string lst) (map (lambda (t) (if (not (vector? t)) (conc t) |
︙ |