Megatest: Diff

Differences From Artifact [73d47dd3ee]:

File runs.scm — part of check-in [ff130a6cab] at 2015-01-09 01:21:45 on branch trunk — Fixed bad records problem in archiving that was causing a crash (user: matt, size: 88414) [annotate] [blame] [check-ins using] [more...]

To Artifact [e4e58cdff9]:

File runs.scm — part of check-in [a9aad76712] at 2015-05-07 18:09:08 on branch v1.60 — Capture missing fix (user: mrwellan, size: 89335) [annotate] [blame] [check-ins using] [more...]

︙			︙
160 161 162 163 164 165 166 ~~167~~ 168 169 170 171 172 173 174	(begin (hash-table-set! runs:denoise key currtime) #t) #f))) (define (runs:can-run-more-tests run-id jobgroup max-concurrent-jobs) (thread-sleep! (cond ~~((> runs:can-run-more-tests-count 20) ~~2);; obviously haven't had any work to do for a while~~~~ (else 0))) (let* ((num-running (rmt:get-count-tests-running run-id)) (num-running-in-jobgroup (rmt:get-count-tests-running-in-jobgroup run-id jobgroup)) (job-group-limit (let ((jobg-count (config-lookup configdat "jobgroups" jobgroup))) (if (string? jobg-count) (string->number jobg-count) jobg-count))))	\| > > >	160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177	(begin (hash-table-set! runs:denoise key currtime) #t) #f))) (define (runs:can-run-more-tests run-id jobgroup max-concurrent-jobs) (thread-sleep! (cond ((> runs:can-run-more-tests-count 20) (if (runs:lownoise "waiting on tasks" 60) (debug:print-info 2 "waiting for tasks to complete, sleeping briefly ...")) 2);; obviously haven't had any work to do for a while (else 0))) (let* ((num-running (rmt:get-count-tests-running run-id)) (num-running-in-jobgroup (rmt:get-count-tests-running-in-jobgroup run-id jobgroup)) (job-group-limit (let ((jobg-count (config-lookup configdat "jobgroups" jobgroup))) (if (string? jobg-count) (string->number jobg-count) jobg-count))))
︙			︙
479 480 481 482 483 484 485 ~~486~~ 487 ~~488~~ 489 490 491 492 493 494 495	"\n can-run-more: " can-run-more) (cond ;; all prereqs met, fire off the test ;; or, if it is a 'toplevel test and all prereqs not met are COMPLETED then launch ((and (not (member 'toplevel testmode)) ~~(member (hash-table-ref/default test-registry (~~runs:~~make-full~~-test~~-name hed item-path) 'n/a)~~ '(DONOTRUN removed CANNOTRUN))) ;; common:cant-run-states-sym) ;; '(COMPLETED KILLED WAIVED UNKNOWN INCOMPLETE)) ;; try to catch repeat processing of COMPLETED tests here ~~(debug:print-info 1 "Test " hed " set to \"" (hash-table-ref test-registry (~~runs:~~make-full~~-test~~-name hed item-path)) "\". Removing it from the queue")~~ (if (or (not (null? tal)) (not (null? reg))) (list (runs:queue-next-hed tal reg reglen regfull) (runs:queue-next-tal tal reg reglen regfull) (runs:queue-next-reg tal reg reglen regfull) reruns) (begin	\| \|	482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498	"\n can-run-more: " can-run-more) (cond ;; all prereqs met, fire off the test ;; or, if it is a 'toplevel test and all prereqs not met are COMPLETED then launch ((and (not (member 'toplevel testmode)) (member (hash-table-ref/default test-registry (db:test-make-full-name hed item-path) 'n/a) '(DONOTRUN removed CANNOTRUN))) ;; common:cant-run-states-sym) ;; '(COMPLETED KILLED WAIVED UNKNOWN INCOMPLETE)) ;; try to catch repeat processing of COMPLETED tests here (debug:print-info 1 "Test " hed " set to \"" (hash-table-ref test-registry (db:test-make-full-name hed item-path)) "\". Removing it from the queue") (if (or (not (null? tal)) (not (null? reg))) (list (runs:queue-next-hed tal reg reglen regfull) (runs:queue-next-tal tal reg reglen regfull) (runs:queue-next-reg tal reg reglen regfull) reruns) (begin
︙			︙
684 685 686 687 688 689 690 ~~691~~ 692 693 694 695 ~~696~~ 697 ~~698~~ 699 ~~700 701 702~~ 703 704 705 ~~706~~ 707 708 709 710 711 712 713 714 715 716 717 718 719 720 ~~721~~ 722 723 724 725 726 727 728 ~~729~~ 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 ~~749~~ 750 751 752 753 754 755 756 757 ~~758~~ 759 760 761 762 763 764 765	(runs:queue-next-tal tal reg reglen regfull) (runs:queue-next-reg tal reg reglen regfull) reruns) #f)) ;; Register tests ;; ~~((not (hash-table-ref/default test-registry (~~runs:~~make-full~~-test~~-name test-name item-path) #f))~~ (debug:print-info 4 "Pre-registering test " test-name "/" item-path " to create placeholder" ) ;; always do firm registration now in v1.60 and greater ;; (eq? transport-type 'fs) ;; no point in parallel registration if use fs (let register-loop ((numtries 15)) (rmt:general-call 'register-test run-id run-id test-name item-path) ~~(thread-sleep! 0.5)~~ (if (rmt:get-test-id run-id test-name item-path) ~~(hash-table-set! test-registry (~~runs:~~make-full~~-test~~-name test-name item-path) 'done)~~ (if (> numtries 0) (register-loop (- numtries 1)) (debug:print 0 "ERROR: failed to register test " (~~runs:~~make-full~~-test~~-name test-name item-path))))) (if (not (eq? (hash-table-ref/default test-registry (~~runs:~~make-full~~-test~~-name test-name "") #f) 'done)) (begin (rmt:general-call 'register-test run-id run-id test-name "") (if (rmt:get-test-id run-id test-name "") ~~(hash-table-set! test-registry (~~runs:~~make-full~~-test~~-name test-name "") 'done))))~~ (runs:shrink-can-run-more-tests-count) ;; DELAY TWEAKER (still needed?) (if (and (null? tal)(null? reg)) (list hed tal (append reg (list hed)) reruns) (list (runs:queue-next-hed tal reg reglen regfull) (runs:queue-next-tal tal reg reglen regfull) ;; NB// Here we are building reg as we register tests ;; if regfull we must pop the front item off reg (if regfull (append (cdr reg) (list hed)) (append reg (list hed))) reruns))) ;; At this point hed test registration must be completed. ;; ~~((eq? (hash-table-ref/default test-registry (~~runs:~~make-full~~-test~~-name test-name item-path) #f)~~ 'start) (debug:print-info 0 "Waiting on test registration(s): " (string-intersperse (filter (lambda (x) (eq? (hash-table-ref/default test-registry x #f) 'start)) (hash-table-keys test-registry)) ", ")) ~~(thread-sleep! 0.1)~~ (list hed tal reg reruns)) ;; If no resources are available just kill time and loop again ;; ((not have-resources) ;; simply try again after waiting a second (if (runs:lownoise "no resources" 60) (debug:print-info 1 "no resources to run new tests, waiting ...")) ;; Have gone back and forth on this but db starvation is an issue. ;; wait one second before looking again to run jobs. (thread-sleep! 1) ;; could have done hed tal here but doing car/cdr of newtal to rotate tests (list (car newtal)(cdr newtal) reg reruns)) ;; This is the final stage, everything is in place so launch the test ;; ((and have-resources (or (null? prereqs-not-met) (and (eq? testmode 'toplevel) (null? non-completed)))) ~~;; (hash-table-delete! max-tries-hash (~~runs:~~make-full~~-test~~-name test-name item-path))~~ ;; we are going to reset all the counters for test retries by setting a new hash table ;; this means they will increment only when nothing can be run (set! max-tries-hash (make-hash-table)) ;; well, first lets see if cpu load throttling is enabled. If so wait around until the ;; average cpu load is under the threshold before continuing (if (configf:lookup configdat "jobtools" "maxload") ;; only gate if maxload is specified (common:wait-for-cpuload maxload numcpus waitdelay)) (run:test run-id run-info keyvals runname test-record flags #f test-registry all-tests-registry) ~~(hash-table-set! test-registry (~~runs:~~make-full~~-test~~-name test-name item-path) 'running)~~ (runs:shrink-can-run-more-tests-count) ;; DELAY TWEAKER (still needed?) ;; (thread-sleep! global-delta) (if (or (not (null? tal))(not (null? reg))) (list (runs:queue-next-hed tal reg reglen regfull) (runs:queue-next-tal tal reg reglen regfull) (runs:queue-next-reg tal reg reglen regfull) reruns)	\| < \| > > \| \| \| \| \| \| \| \|	687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769	(runs:queue-next-tal tal reg reglen regfull) (runs:queue-next-reg tal reg reglen regfull) reruns) #f)) ;; Register tests ;; ((not (hash-table-ref/default test-registry (db:test-make-full-name test-name item-path) #f)) (debug:print-info 4 "Pre-registering test " test-name "/" item-path " to create placeholder" ) ;; always do firm registration now in v1.60 and greater ;; (eq? transport-type 'fs) ;; no point in parallel registration if use fs (let register-loop ((numtries 15)) (rmt:general-call 'register-test run-id run-id test-name item-path) (if (rmt:get-test-id run-id test-name item-path) (hash-table-set! test-registry (db:test-make-full-name test-name item-path) 'done) (if (> numtries 0) (begin (thread-sleep! 0.5) (register-loop (- numtries 1))) (debug:print 0 "ERROR: failed to register test " (db:test-make-full-name test-name item-path))))) (if (not (eq? (hash-table-ref/default test-registry (db:test-make-full-name test-name "") #f) 'done)) (begin (rmt:general-call 'register-test run-id run-id test-name "") (if (rmt:get-test-id run-id test-name "") (hash-table-set! test-registry (db:test-make-full-name test-name "") 'done)))) (runs:shrink-can-run-more-tests-count) ;; DELAY TWEAKER (still needed?) (if (and (null? tal)(null? reg)) (list hed tal (append reg (list hed)) reruns) (list (runs:queue-next-hed tal reg reglen regfull) (runs:queue-next-tal tal reg reglen regfull) ;; NB// Here we are building reg as we register tests ;; if regfull we must pop the front item off reg (if regfull (append (cdr reg) (list hed)) (append reg (list hed))) reruns))) ;; At this point hed test registration must be completed. ;; ((eq? (hash-table-ref/default test-registry (db:test-make-full-name test-name item-path) #f) 'start) (debug:print-info 0 "Waiting on test registration(s): " (string-intersperse (filter (lambda (x) (eq? (hash-table-ref/default test-registry x #f) 'start)) (hash-table-keys test-registry)) ", ")) (thread-sleep! 0.051) (list hed tal reg reruns)) ;; If no resources are available just kill time and loop again ;; ((not have-resources) ;; simply try again after waiting a second (if (runs:lownoise "no resources" 60) (debug:print-info 1 "no resources to run new tests, waiting ...")) ;; Have gone back and forth on this but db starvation is an issue. ;; wait one second before looking again to run jobs. (thread-sleep! 1) ;; could have done hed tal here but doing car/cdr of newtal to rotate tests (list (car newtal)(cdr newtal) reg reruns)) ;; This is the final stage, everything is in place so launch the test ;; ((and have-resources (or (null? prereqs-not-met) (and (eq? testmode 'toplevel) (null? non-completed)))) ;; (hash-table-delete! max-tries-hash (db:test-make-full-name test-name item-path)) ;; we are going to reset all the counters for test retries by setting a new hash table ;; this means they will increment only when nothing can be run (set! max-tries-hash (make-hash-table)) ;; well, first lets see if cpu load throttling is enabled. If so wait around until the ;; average cpu load is under the threshold before continuing (if (configf:lookup configdat "jobtools" "maxload") ;; only gate if maxload is specified (common:wait-for-cpuload maxload numcpus waitdelay)) (run:test run-id run-info keyvals runname test-record flags #f test-registry all-tests-registry) (hash-table-set! test-registry (db:test-make-full-name test-name item-path) 'running) (runs:shrink-can-run-more-tests-count) ;; DELAY TWEAKER (still needed?) ;; (thread-sleep! global-delta) (if (or (not (null? tal))(not (null? reg))) (list (runs:queue-next-hed tal reg reglen regfull) (runs:queue-next-tal tal reg reglen regfull) (runs:queue-next-reg tal reg reglen regfull) reruns)
︙			︙
792 793 794 795 796 797 798 ~~799~~ 800 801 802 803 804 805 806	" from the launch list as it has prerequistes that are FAIL") (let ((test-id (rmt:get-test-id run-id hed ""))) (if test-id (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_FAIL" "Failed to run due to failed prerequisites"))) (runs:shrink-can-run-more-tests-count) ;; DELAY TWEAKER (still needed?) ;; (thread-sleep! global-delta) ;; This next is for the items (mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "BLOCKED" #f) ~~(hash-table-set! test-registry (~~runs:~~make-full~~-test~~-name test-name item-path) 'removed)~~ (list (runs:queue-next-hed tal reg reglen regfull) (runs:queue-next-tal tal reg reglen regfull) (runs:queue-next-reg tal reg reglen regfull) reruns ;; WAS: (cons hed reruns) ;; but that makes no sense? )) (let ((nth-try (hash-table-ref/default test-registry hed 0))) (cond	\|	796 797 798 799 800 801 802 803 804 805 806 807 808 809 810	" from the launch list as it has prerequistes that are FAIL") (let ((test-id (rmt:get-test-id run-id hed ""))) (if test-id (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_FAIL" "Failed to run due to failed prerequisites"))) (runs:shrink-can-run-more-tests-count) ;; DELAY TWEAKER (still needed?) ;; (thread-sleep! global-delta) ;; This next is for the items (mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "BLOCKED" #f) (hash-table-set! test-registry (db:test-make-full-name test-name item-path) 'removed) (list (runs:queue-next-hed tal reg reglen regfull) (runs:queue-next-tal tal reg reglen regfull) (runs:queue-next-reg tal reg reglen regfull) reruns ;; WAS: (cons hed reruns) ;; but that makes no sense? )) (let ((nth-try (hash-table-ref/default test-registry hed 0))) (cond
︙			︙
889 890 891 892 893 894 895 ~~896~~ 897 898 899 900 901 902 903	(define (runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests reglen-in all-tests-registry) ;; At this point the list of parent tests is expanded ;; NB// Should expand items here and then insert into the run queue. (debug:print 5 "test-records: " test-records ", flags: " (hash-table->alist flags)) ;; Do mark-and-find clean up of db before starting runing of quue ;; ~~;; (~~cdb:remote-run db~~:find-and-mark-incomplete #f)~~ (let ((run-info (rmt:get-run-info run-id)) (tests-info (mt:get-tests-for-run run-id #f '() '())) ;; qryvals: "id,testname,item_path")) (sorted-test-names (tests:sort-by-priority-and-waiton test-records)) (test-registry (make-hash-table)) (registry-mutex (make-mutex)) (num-retries 0)	\|	893 894 895 896 897 898 899 900 901 902 903 904 905 906 907	(define (runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests reglen-in all-tests-registry) ;; At this point the list of parent tests is expanded ;; NB// Should expand items here and then insert into the run queue. (debug:print 5 "test-records: " test-records ", flags: " (hash-table->alist flags)) ;; Do mark-and-find clean up of db before starting runing of quue ;; ;; (rmt:find-and-mark-incomplete) (let ((run-info (rmt:get-run-info run-id)) (tests-info (mt:get-tests-for-run run-id #f '() '())) ;; qryvals: "id,testname,item_path")) (sorted-test-names (tests:sort-by-priority-and-waiton test-records)) (test-registry (make-hash-table)) (registry-mutex (make-mutex)) (num-retries 0)
︙			︙
915 916 917 918 919 920 921 ~~922~~ 923 924 925 926 927 928 929	;; convert state to symbol and use that as the hash value (for-each (lambda (trec) (let ((id (db:test-get-id trec)) (tn (db:test-get-testname trec)) (ip (db:test-get-item-path trec)) (st (db:test-get-state trec))) (if (not (equal? st "DELETED")) ~~(hash-table-set! test-registry (~~runs:~~make-full~~-test~~-name tn ip) (string->symbol st)))))~~ tests-info) (set! max-retries (if (and max-retries (string->number max-retries))(string->number max-retries) 100)) (let loop ((hed (car sorted-test-names)) (tal (cdr sorted-test-names)) (reg '()) ;; registered, put these at the head of tal (reruns '()))	\|	919 920 921 922 923 924 925 926 927 928 929 930 931 932 933	;; convert state to symbol and use that as the hash value (for-each (lambda (trec) (let ((id (db:test-get-id trec)) (tn (db:test-get-testname trec)) (ip (db:test-get-item-path trec)) (st (db:test-get-state trec))) (if (not (equal? st "DELETED")) (hash-table-set! test-registry (db:test-make-full-name tn ip) (string->symbol st))))) tests-info) (set! max-retries (if (and max-retries (string->number max-retries))(string->number max-retries) 100)) (let loop ((hed (car sorted-test-names)) (tal (cdr sorted-test-names)) (reg '()) ;; registered, put these at the head of tal (reruns '()))
︙			︙
948 949 950 951 952 953 954 ~~955~~ 956 957 958 959 960 961 962 963 964 965 966 967 ~~968~~ 969 970 971 972 973 ~~974~~ 975 976 ~~977~~ 978 979 980 981 982 983 984	(if m (map string->symbol (string-split m)) '(normal)))) (itemmap (configf:lookup tconfig "requirements" "itemmap")) (waitons (tests:testqueue-get-waitons test-record)) (priority (tests:testqueue-get-priority test-record)) (itemdat (tests:testqueue-get-itemdat test-record)) ;; itemdat can be a string, list or #f (items (tests:testqueue-get-items test-record)) (item-path (item-list->path itemdat)) ~~(tfullname (~~runs:~~make-full~~-test~~-name test-name item-path))~~ (newtal (append tal (list hed))) (regfull (>= (length reg) reglen)) (num-running (rmt:get-count-tests-running-for-run-id run-id))) ;; every couple minutes verify the server is there for this run (if (and (common:low-noise-print 60 "try start server" run-id) (tasks:need-server run-id)) (tasks:start-and-wait-for-server tdbdat run-id 10)) ;; NOTE: delay and wait is done under the hood (if (> num-running 0) (set! last-time-some-running (current-seconds))) ~~(if (> (current-seconds)(+ last-time-some-running ~~240~~))~~ (hash-table-set! max-tries-hash tfullname (+ (hash-table-ref/default max-tries-hash tfullname 0) 1))) ;; (debug:print 0 "max-tries-hash: " (hash-table->alist max-tries-hash)) ;; Ensure all top level tests get registered. This way they show up as "NOT_STARTED" on the dashboard ;; and it is clear they should have run but did not. ~~(if (not (hash-table-ref/default test-registry (~~runs:~~make-full~~-test~~-name test-name "") #f))~~ (begin (rmt:general-call 'register-test run-id run-id test-name "") ~~(hash-table-set! test-registry (~~runs:~~make-full~~-test~~-name test-name "") 'done)))~~ ;; Fast skip of tests that are already "COMPLETED" - NO! Cannot do that as the items may not have been expanded yet :( ;; (if (member (hash-table-ref/default test-registry tfullname #f) '(DONOTRUN removed)) ;; common:cant-run-states-sym) ;; '(COMPLETED KILLED WAIVED UNKNOWN INCOMPLETE)) (begin (if (runs:lownoise (conc "been marked do not run " tfullname) 60)	\| \| \| \|	952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988	(if m (map string->symbol (string-split m)) '(normal)))) (itemmap (configf:lookup tconfig "requirements" "itemmap")) (waitons (tests:testqueue-get-waitons test-record)) (priority (tests:testqueue-get-priority test-record)) (itemdat (tests:testqueue-get-itemdat test-record)) ;; itemdat can be a string, list or #f (items (tests:testqueue-get-items test-record)) (item-path (item-list->path itemdat)) (tfullname (db:test-make-full-name test-name item-path)) (newtal (append tal (list hed))) (regfull (>= (length reg) reglen)) (num-running (rmt:get-count-tests-running-for-run-id run-id))) ;; every couple minutes verify the server is there for this run (if (and (common:low-noise-print 60 "try start server" run-id) (tasks:need-server run-id)) (tasks:start-and-wait-for-server tdbdat run-id 10)) ;; NOTE: delay and wait is done under the hood (if (> num-running 0) (set! last-time-some-running (current-seconds))) (if (> (current-seconds)(+ last-time-some-running (or (configf:lookup configdat "setup" "give-up-waiting") 36000))) (hash-table-set! max-tries-hash tfullname (+ (hash-table-ref/default max-tries-hash tfullname 0) 1))) ;; (debug:print 0 "max-tries-hash: " (hash-table->alist max-tries-hash)) ;; Ensure all top level tests get registered. This way they show up as "NOT_STARTED" on the dashboard ;; and it is clear they should have run but did not. (if (not (hash-table-ref/default test-registry (db:test-make-full-name test-name "") #f)) (begin (rmt:general-call 'register-test run-id run-id test-name "") (hash-table-set! test-registry (db:test-make-full-name test-name "") 'done))) ;; Fast skip of tests that are already "COMPLETED" - NO! Cannot do that as the items may not have been expanded yet :( ;; (if (member (hash-table-ref/default test-registry tfullname #f) '(DONOTRUN removed)) ;; common:cant-run-states-sym) ;; '(COMPLETED KILLED WAIVED UNKNOWN INCOMPLETE)) (begin (if (runs:lownoise (conc "been marked do not run " tfullname) 60)
︙			︙
1063 1064 1065 1066 1067 1068 1069 ~~1070~~ 1071 1072 1073 1074 1075 1076 1077	(for-each (lambda (my-itemdat) (let* ((new-test-record (let ((newrec (make-tests:testqueue))) (vector-copy! test-record newrec) newrec)) (my-item-path (item-list->path my-itemdat))) (if (tests:match test-patts hed my-item-path required: required-tests) ;; (patt-list-match my-item-path item-patts) ;; yes, we want to process this item, NOTE: Should not need this check here! ~~(let ((newtestname (~~runs:~~make-full~~-test~~-name hed my-item-path))) ;; test names are unique on testname/item-path~~ (tests:testqueue-set-items! new-test-record #f) (tests:testqueue-set-itemdat! new-test-record my-itemdat) (tests:testqueue-set-item_path! new-test-record my-item-path) (hash-table-set! test-records newtestname new-test-record) (set! tal (append tal (list newtestname))))))) ;; since these are itemized create new test names testname/itempath items)	\|	1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081	(for-each (lambda (my-itemdat) (let* ((new-test-record (let ((newrec (make-tests:testqueue))) (vector-copy! test-record newrec) newrec)) (my-item-path (item-list->path my-itemdat))) (if (tests:match test-patts hed my-item-path required: required-tests) ;; (patt-list-match my-item-path item-patts) ;; yes, we want to process this item, NOTE: Should not need this check here! (let ((newtestname (db:test-make-full-name hed my-item-path))) ;; test names are unique on testname/item-path (tests:testqueue-set-items! new-test-record #f) (tests:testqueue-set-itemdat! new-test-record my-itemdat) (tests:testqueue-set-item_path! new-test-record my-item-path) (hash-table-set! test-records newtestname new-test-record) (set! tal (append tal (list newtestname))))))) ;; since these are itemized create new test names testname/itempath items)
︙			︙
1189 1190 1191 1192 1193 1194 1195 ~~1196 1197 1198~~ 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 ~~1218~~ 1219 1220 1221 1222 1223 1224 1225	(define (runs:pretty-string lst) (map (lambda (t) (if (not (vector? t)) (conc t) (conc (db:test-get-testname t) ":" (db:test-get-state t) "/" (db:test-get-status t)))) lst)) ~~(define (runs:make-full-test-name testname itempath)~~ ~~(if (equal? itempath "") testname (conc testname "/" itempath)))~~ ;; parent-test is there as a placeholder for when parent-tests can be run as a setup step (define (run:test run-id run-info keyvals runname test-record flags parent-test test-registry all-tests-registry) ;; All these vars might be referenced by the testconfig file reader (let* ((test-name (tests:testqueue-get-testname test-record)) (test-waitons (tests:testqueue-get-waitons test-record)) (test-conf (tests:testqueue-get-testconfig test-record)) (itemdat (tests:testqueue-get-itemdat test-record)) (test-path (hash-table-ref all-tests-registry test-name)) ;; (conc toppath "/tests/" test-name)) ;; could use tests:get-testconfig here ... (force (hash-table-ref/default flags "-force" #f)) (rerun (hash-table-ref/default flags "-rerun" #f)) (keepgoing (hash-table-ref/default flags "-keepgoing" #f)) (incomplete-timeout (string->number (or (configf:lookup configdat "setup" "incomplete-timeout") "x"))) (item-path "") (db #f) (full-test-name #f)) ;; setting itemdat to a list if it is #f (if (not itemdat)(set! itemdat '())) (set! item-path (item-list->path itemdat)) ~~(set! full-test-name (~~runs:~~make-full~~-test~~-name test-name item-path))~~ (debug:print-info 4 "\nTESTNAME: " full-test-name "\n test-config: " (hash-table->alist test-conf) "\n itemdat: " itemdat ) (debug:print 2 "Attempting to launch test " full-test-name) (setenv "MT_TEST_NAME" test-name) ;;	< < < \|	1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226	(define (runs:pretty-string lst) (map (lambda (t) (if (not (vector? t)) (conc t) (conc (db:test-get-testname t) ":" (db:test-get-state t) "/" (db:test-get-status t)))) lst)) ;; parent-test is there as a placeholder for when parent-tests can be run as a setup step (define (run:test run-id run-info keyvals runname test-record flags parent-test test-registry all-tests-registry) ;; All these vars might be referenced by the testconfig file reader (let* ((test-name (tests:testqueue-get-testname test-record)) (test-waitons (tests:testqueue-get-waitons test-record)) (test-conf (tests:testqueue-get-testconfig test-record)) (itemdat (tests:testqueue-get-itemdat test-record)) (test-path (hash-table-ref all-tests-registry test-name)) ;; (conc toppath "/tests/" test-name)) ;; could use tests:get-testconfig here ... (force (hash-table-ref/default flags "-force" #f)) (rerun (hash-table-ref/default flags "-rerun" #f)) (keepgoing (hash-table-ref/default flags "-keepgoing" #f)) (incomplete-timeout (string->number (or (configf:lookup configdat "setup" "incomplete-timeout") "x"))) (item-path "") (db #f) (full-test-name #f)) ;; setting itemdat to a list if it is #f (if (not itemdat)(set! itemdat '())) (set! item-path (item-list->path itemdat)) (set! full-test-name (db:test-make-full-name test-name item-path)) (debug:print-info 4 "\nTESTNAME: " full-test-name "\n test-config: " (hash-table->alist test-conf) "\n itemdat: " itemdat ) (debug:print 2 "Attempting to launch test " full-test-name) (setenv "MT_TEST_NAME" test-name) ;;
︙			︙
1335 1336 1337 1338 1339 1340 1341 ~~1342~~ 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 ~~1354~~ 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 ~~1370~~ 1371 ~~1372~~ 1373 1374 1375 1376 1377 1378 1379	;; run-ids = #f means all runs (let ((running-tests (rmt:get-tests-for-runs-mindata #f full-test-name '("RUNNING" "REMOTEHOSTSTART" "LAUNCHED") '() #f))) (if (not (null? running-tests)) ;; have to skip (set! skip-test "Skipping due to previous tests running")))) ((and skip-check (configf:lookup test-conf "skip" "fileexists")) (if (file-exists? (configf:lookup test-conf "skip" "fileexists")) ~~(set! skip-test (conc "Skipping due to existance of file " (configf:lookup test-conf "skip" "fileexists"))))))~~ (if skip-test (begin (mt:test-set-state-status-by-id run-id test-id "COMPLETED" "SKIP" skip-test) (debug:print-info 1 "SKIPPING Test " full-test-name " due to " skip-test)) (if (not (launch-test test-id run-id run-info keyvals runname test-conf test-name test-path itemdat flags)) (begin (print "ERROR: Failed to launch the test. Exiting as soon as possible") (set! globalexitstatus 1) ;; (process-signal (current-process-id) signal/kill)))))))) ((KILLED) (debug:print 1 "NOTE: " full-test-name " is already running or was explictly killed, use -force to launch it.") ~~(hash-table-set! test-registry (~~runs:~~make-full~~-test~~-name test-name test-path) 'DONOTRUN)) ;; KILLED))~~ ((LAUNCHED REMOTEHOSTSTART RUNNING) (debug:print 2 "NOTE: " test-name " is already running")) ;; (if (> (- (current-seconds)(+ (db:test-get-event_time testdat) ;; (db:test-get-run_duration testdat))) ;; (or incomplete-timeout ;; 6000)) ;; i.e. no update for more than 6000 seconds ;; (begin ;; (debug:print 0 "WARNING: Test " test-name " appears to be dead. Forcing it to state INCOMPLETE and status STUCK/DEAD") ;; (tests:test-set-status! run-id test-id "INCOMPLETE" "STUCK/DEAD" "" #f)) ;; ;; (tests:test-set-status! test-id "INCOMPLETE" "STUCK/DEAD" "" #f)) ;; (debug:print 2 "NOTE: " test-name " is already running"))) (else (debug:print 0 "ERROR: Failed to launch test " full-test-name ". Unrecognised state " (test:get-state testdat)) (case (string->symbol (test:get-state testdat)) ((COMPLETED INCOMPLETE) ~~(hash-table-set! test-registry (~~runs:~~make-full~~-test~~-name test-name test-path) 'DONOTRUN))~~ (else ~~(hash-table-set! test-registry (~~runs:~~make-full~~-test~~-name test-name test-path) 'DONOTRUN))))))))~~ ;;====================================================================== ;; END OF NEW STUFF ;;====================================================================== (define (get-dir-up-n dir . params) (let ((dparts (string-split dir "/"))	\| > > > > > > > > > > > > > \| \| \|	1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393	;; run-ids = #f means all runs (let ((running-tests (rmt:get-tests-for-runs-mindata #f full-test-name '("RUNNING" "REMOTEHOSTSTART" "LAUNCHED") '() #f))) (if (not (null? running-tests)) ;; have to skip (set! skip-test "Skipping due to previous tests running")))) ((and skip-check (configf:lookup test-conf "skip" "fileexists")) (if (file-exists? (configf:lookup test-conf "skip" "fileexists")) (set! skip-test (conc "Skipping due to existance of file " (configf:lookup test-conf "skip" "fileexists"))))) ((and skip-check (configf:lookup test-conf "skip" "rundelay")) ;; run-ids = #f means all runs (let* ((numseconds (common:hms-string->seconds (configf:lookup test-conf "skip" "rundelay"))) (running-tests (rmt:get-tests-for-runs-mindata #f full-test-name '("RUNNING" "REMOTEHOSTSTART" "LAUNCHED") '() #f)) (completed-tests (rmt:get-tests-for-runs-mindata #f full-test-name '("COMPLETED") '("PASS" "FAIL" "ABORT") #f)) (last-run-times (map db:mintest-get-event_time completed-tests)) (time-since-last (- (current-seconds) (if (null? last-run-times) 0 (apply max last-run-times))))) (if (or (not (null? running-tests)) ;; have to skip if test is running (> numseconds time-since-last)) (set! skip-test (conc "Skipping due to previous test run less than " (configf:lookup test-conf "skip" "rundelay") " ago")))))) (if skip-test (begin (mt:test-set-state-status-by-id run-id test-id "COMPLETED" "SKIP" skip-test) (debug:print-info 1 "SKIPPING Test " full-test-name " due to " skip-test)) (if (not (launch-test test-id run-id run-info keyvals runname test-conf test-name test-path itemdat flags)) (begin (print "ERROR: Failed to launch the test. Exiting as soon as possible") (set! globalexitstatus 1) ;; (process-signal (current-process-id) signal/kill)))))))) ((KILLED) (debug:print 1 "NOTE: " full-test-name " is already running or was explictly killed, use -force to launch it.") (hash-table-set! test-registry (db:test-make-full-name test-name test-path) 'DONOTRUN)) ;; KILLED)) ((LAUNCHED REMOTEHOSTSTART RUNNING) (debug:print 2 "NOTE: " test-name " is already running")) ;; (if (> (- (current-seconds)(+ (db:test-get-event_time testdat) ;; (db:test-get-run_duration testdat))) ;; (or incomplete-timeout ;; 6000)) ;; i.e. no update for more than 6000 seconds ;; (begin ;; (debug:print 0 "WARNING: Test " test-name " appears to be dead. Forcing it to state INCOMPLETE and status STUCK/DEAD") ;; (tests:test-set-status! run-id test-id "INCOMPLETE" "STUCK/DEAD" "" #f)) ;; ;; (tests:test-set-status! test-id "INCOMPLETE" "STUCK/DEAD" "" #f)) ;; (debug:print 2 "NOTE: " test-name " is already running"))) (else (debug:print 0 "ERROR: Failed to launch test " full-test-name ". Unrecognised state " (test:get-state testdat)) (case (string->symbol (test:get-state testdat)) ((COMPLETED INCOMPLETE) (hash-table-set! test-registry (db:test-make-full-name test-name test-path) 'DONOTRUN)) (else (hash-table-set! test-registry (db:test-make-full-name test-name test-path) 'DONOTRUN)))))))) ;;====================================================================== ;; END OF NEW STUFF ;;====================================================================== (define (get-dir-up-n dir . params) (let ((dparts (string-split dir "/"))
︙			︙
1474 1475 1476 1477 1478 1479 1480 ~~1481~~ 1482 1483 1484 1485 1486 1487 1488	(debug:print 1 "Waiting for run " runkey ", run=" runnamepatt " to complete")) ((archive) (debug:print 1 "Archiving/restoring (" (args:get-arg "-archive") ") data for run: " runkey " " (db:get-value-by-header run header "runname")) (set! worker-thread (make-thread (lambda () (case (string->symbol (args:get-arg "-archive")) ((save save-remove keep-html)(archive:run-bup (args:get-arg "-archive") run-id run-name tests)) ((restore)(archive:bup-restore (args:get-arg "-archive") run-id run-name tests)) ~~~~(else~~ (debug:print 0 "ERROR: unrecognised sub command to -archive. Run \"megatest\" to see help"~~)))~~)~~ "archive-bup-thread")) (thread-start! worker-thread)) (else (debug:print-info 0 "action not recognised " action))) ;; actions that operate on one test at a time can be handled below ;;	> \| >	1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504	(debug:print 1 "Waiting for run " runkey ", run=" runnamepatt " to complete")) ((archive) (debug:print 1 "Archiving/restoring (" (args:get-arg "-archive") ") data for run: " runkey " " (db:get-value-by-header run header "runname")) (set! worker-thread (make-thread (lambda () (case (string->symbol (args:get-arg "-archive")) ((save save-remove keep-html)(archive:run-bup (args:get-arg "-archive") run-id run-name tests)) ((restore)(archive:bup-restore (args:get-arg "-archive") run-id run-name tests)) (else (debug:print 0 "ERROR: unrecognised sub command to -archive. Run \"megatest\" to see help") (exit)))) "archive-bup-thread")) (thread-start! worker-thread)) (else (debug:print-info 0 "action not recognised " action))) ;; actions that operate on one test at a time can be handled below ;;
︙			︙
1565 1566 1567 1568 1569 1570 1571 ~~1572~~ ~~1573 1574~~ ~~1575 1576~~ 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 ~~1593~~ 1594 1595 1596 1597 1598 1599 1600	(debug:print-info 2 "still waiting, " (length tests) " tests still running") (thread-sleep! 10) (let ((new-tests (proc-get-tests run-id))) (if (null? new-tests) (debug:print-info 1 "Run completed according to zero tests matching provided criteria.") (loop (car new-tests)(cdr new-tests))))) ((archive) ~~(if (not toplevel-with-children)~~ ~~(case (string->symbol (args:get-arg "-archive")) ((save save-remove keep-html)~~ ~~(debug:print-info 0 "Estimating disk space usage for " test-fulln) ~~(debug:print-info 0 " " (common:get-disk-space-used (conc run-dir "/"))))))~~~~ (if (not (null? tal)) (loop (car tal)(cdr tal)))) ))) ) (if worker-thread (thread-join! worker-thread)))))) ;; remove the run if zero tests remain (if (eq? action 'remove-runs) (let ((remtests (mt:get-tests-for-run (db:get-value-by-header run header "id") #f '("DELETED") '("n/a") not-in: #t))) (if (null? remtests) ;; no more tests remaining (let* ((dparts (string-split lasttpath "/")) (runpath (conc "/" (string-intersperse (take dparts (- (length dparts) 1)) "/")))) (debug:print 1 "Removing run: " runkey " " (db:get-value-by-header run header "runname") " and related record") (rmt:delete-run run-id) (rmt:delete-old-deleted-test-records) ~~;; (~~cdb:remote-run db~~:set-var db "DELETED_TESTS" (current-seconds))~~ ;; need to figure out the path to the run dir and remove it if empty ;; (if (null? (glob (conc runpath "/*"))) ;; (begin ;; (debug:print 1 "Removing run dir " runpath) ;; (system (conc "rmdir -p " runpath)))) ))))) ))	\| > \| \| > \| < \|	1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617	(debug:print-info 2 "still waiting, " (length tests) " tests still running") (thread-sleep! 10) (let ((new-tests (proc-get-tests run-id))) (if (null? new-tests) (debug:print-info 1 "Run completed according to zero tests matching provided criteria.") (loop (car new-tests)(cdr new-tests))))) ((archive) (if (and run-dir (not toplevel-with-children)) (let ((ddir (conc run-dir "/"))) (case (string->symbol (args:get-arg "-archive")) ((save save-remove keep-html) (if (file-exists? ddir) (debug:print-info 0 "Estimating disk space usage for " test-fulln ": " (common:get-disk-space-used ddir))))))) (if (not (null? tal)) (loop (car tal)(cdr tal)))) ))) ) (if worker-thread (thread-join! worker-thread)))))) ;; remove the run if zero tests remain (if (eq? action 'remove-runs) (let ((remtests (mt:get-tests-for-run (db:get-value-by-header run header "id") #f '("DELETED") '("n/a") not-in: #t))) (if (null? remtests) ;; no more tests remaining (let* ((dparts (string-split lasttpath "/")) (runpath (conc "/" (string-intersperse (take dparts (- (length dparts) 1)) "/")))) (debug:print 1 "Removing run: " runkey " " (db:get-value-by-header run header "runname") " and related record") (rmt:delete-run run-id) (rmt:delete-old-deleted-test-records) ;; (rmt:set-var "DELETED_TESTS" (current-seconds)) ;; need to figure out the path to the run dir and remove it if empty ;; (if (null? (glob (conc runpath "/*"))) ;; (begin ;; (debug:print 1 "Removing run dir " runpath) ;; (system (conc "rmdir -p " runpath)))) ))))) ))
︙			︙
1669 1670 1671 1672 1673 1674 1675 ~~1676 1677~~ 1678 1679 1680 1681 1682 1683 1684	(let (;; (db #f) (keys #f)) (if (launch:setup-for-run) (launch:cache-config) (begin (debug:print 0 "Failed to setup, exiting") (exit 1))) ~~;; (if (args:get-arg "-server")~~ ~~;; (cdb:remote-run server:start db (args:get-arg "-server")))~~ (set! keys (keys:config-get-fields configdat)) ;; have enough to process -target or -reqtarg here (if (args:get-arg "-reqtarg") (let* ((runconfigf (conc toppath "/runconfigs.config")) ;; DO NOT EVALUATE ALL (runconfig (read-config runconfigf #f #t environ-patt: #f))) (if (hash-table-ref/default runconfig (args:get-arg "-reqtarg") #f) (keys:target-set-args keys (args:get-arg "-reqtarg") args:arg-hash)	< <	1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699	(let (;; (db #f) (keys #f)) (if (launch:setup-for-run) (launch:cache-config) (begin (debug:print 0 "Failed to setup, exiting") (exit 1))) (set! keys (keys:config-get-fields configdat)) ;; have enough to process -target or -reqtarg here (if (args:get-arg "-reqtarg") (let* ((runconfigf (conc toppath "/runconfigs.config")) ;; DO NOT EVALUATE ALL (runconfig (read-config runconfigf #f #t environ-patt: #f))) (if (hash-table-ref/default runconfig (args:get-arg "-reqtarg") #f) (keys:target-set-args keys (args:get-arg "-reqtarg") args:arg-hash)
︙			︙
1789 1790 1791 1792 1793 1794 1795 ~~1796~~ 1797 1798 1799 1800 1801 1802 1803	(conc "INSERT OR REPLACE INTO tests (run_id,testname,state,status,event_time,host,cpuload,diskfree,uname,rundir,item_path,run_duration,final_logf,comment) " "VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?);") new-run-id (cddr (vector->list testdat))) (set! new-testdat (car (mt:get-tests-for-run new-run-id (conc testname "/" item-path) '() '()))) (hash-table-set! curr-tests-hash full-name new-testdat) ;; this could be confusing, which record should go into the lookup table? ;; Now duplicate the test steps (debug:print 4 "Copying records in test_steps from test_id=" (db:test-get-id testdat) " to " (db:test-get-id new-testdat)) ~~(cdb:remote-run~~ (lambda () (sqlite3:execute db (conc "INSERT OR REPLACE INTO test_steps (test_id,stepname,state,status,event_time,comment) " "SELECT " (db:test-get-id new-testdat) ",stepname,state,status,event_time,comment FROM test_steps WHERE test_id=?;") (db:test-get-id testdat)) ;; Now duplicate the test data	\|	1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818	(conc "INSERT OR REPLACE INTO tests (run_id,testname,state,status,event_time,host,cpuload,diskfree,uname,rundir,item_path,run_duration,final_logf,comment) " "VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?);") new-run-id (cddr (vector->list testdat))) (set! new-testdat (car (mt:get-tests-for-run new-run-id (conc testname "/" item-path) '() '()))) (hash-table-set! curr-tests-hash full-name new-testdat) ;; this could be confusing, which record should go into the lookup table? ;; Now duplicate the test steps (debug:print 4 "Copying records in test_steps from test_id=" (db:test-get-id testdat) " to " (db:test-get-id new-testdat)) (cdb:remote-run ;; to be replaced, note: this routine is not used currently (lambda () (sqlite3:execute db (conc "INSERT OR REPLACE INTO test_steps (test_id,stepname,state,status,event_time,comment) " "SELECT " (db:test-get-id new-testdat) ",stepname,state,status,event_time,comment FROM test_steps WHERE test_id=?;") (db:test-get-id testdat)) ;; Now duplicate the test data
︙			︙