Megatest: Diff

Differences From Artifact [9a63a1bcac]:

File runs.scm — part of check-in [83635d0962] at 2013-11-16 21:12:33 on branch trunk — Merged minor fixes from v1.55 (user: matt, size: 74172) [annotate] [blame] [check-ins using] [more...]

To Artifact [c47d5b284a]:

File runs.scm — part of check-in [268a3235ad] at 2013-12-09 22:57:57 on branch inmem-per-run-db — Merged in fixes from v1.55, fixed couple queries related to jobgroups (user: matt, size: 74329) [annotate] [blame] [check-ins using] [more...]

︙			︙
17 18 19 20 21 22 23 24 25 26 27 28 29 30	(declare (uses db)) (declare (uses common)) (declare (uses items)) (declare (uses runconfig)) (declare (uses tests)) (declare (uses server)) (declare (uses mt)) (include "common_records.scm") (include "key_records.scm") (include "db_records.scm") (include "run_records.scm") (include "test_records.scm")	>	17 18 19 20 21 22 23 24 25 26 27 28 29 30 31	(declare (uses db)) (declare (uses common)) (declare (uses items)) (declare (uses runconfig)) (declare (uses tests)) (declare (uses server)) (declare (uses mt)) ;; (declare (uses filedb)) (include "common_records.scm") (include "key_records.scm") (include "db_records.scm") (include "run_records.scm") (include "test_records.scm")
︙			︙
55 56 57 58 59 60 61 ~~62 63 64 65 66~~ 67 68 69 70 71 72 73	(toppath toppath) (envdat keyvals) ;; initial values start with keyvals (runconfig #f) (serverdat (if (args:get-arg "-server") runremote #f)) ;; to be used later (transport (or (args:get-arg "-transport") 'http)) ~~(db (if (and mconfig~~ ~~(or (args:get-arg "-server")~~ ~~(eq? transport 'fs)))~~ ~~(open-db)~~ ~~#f))~~ (run-id #f)) ;; Set all the environment vars we know so far, start with keys (for-each (lambda (keyval) (setenv (car keyval)(cadr keyval))) keyvals) ;; Set up various and sundry known vars here (setenv "MT_RUN_AREA_HOME" toppath)	< < < < <	56 57 58 59 60 61 62 63 64 65 66 67 68 69	(toppath toppath) (envdat keyvals) ;; initial values start with keyvals (runconfig #f) (serverdat (if (args:get-arg "-server") runremote #f)) ;; to be used later (transport (or (args:get-arg "-transport") 'http)) (run-id #f)) ;; Set all the environment vars we know so far, start with keys (for-each (lambda (keyval) (setenv (car keyval)(cadr keyval))) keyvals) ;; Set up various and sundry known vars here (setenv "MT_RUN_AREA_HOME" toppath)
︙			︙
161 162 163 164 165 166 167 ~~168~~ 169 170 171 ~~172 173 174~~ 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 ~~195 196~~ 197 198 199 200 201 202 203	(currtime (current-seconds))) (if (> (- currtime lasttime) waitval) (begin (hash-table-set! runs:denoise key currtime) #t) #f))) ~~(define (runs:can-run-more-tests jobgroup max-concurrent-jobs)~~ (thread-sleep! (cond ((> runs:can-run-more-tests-count 20) 2);; obviously haven't had any work to do for a while (else 0))) (let* ((num-running (rmt:get-count-tests-running)) (num-running-in-jobgroup (rmt:get-count-tests-running-in-jobgroup jobgroup)) (job-group-limit (config-lookup configdat "jobgroups" jobgroup))) (if (> (+ num-running num-running-in-jobgroup) 0) (set! runs:can-run-more-tests-count (+ runs:can-run-more-tests-count 1))) (if (not (eq? last-num-running-tests num-running)) (begin (debug:print 2 "max-concurrent-jobs: " max-concurrent-jobs ", num-running: " num-running) (set! last-num-running-tests num-running))) (if (not (eq? 0 globalexitstatus)) (list #f num-running num-running-in-jobgroup max-concurrent-jobs job-group-limit) (let ((can-not-run-more (cond ;; if max-concurrent-jobs is set and the number running is greater ;; than it than cannot run more jobs ((and max-concurrent-jobs (>= num-running max-concurrent-jobs)) (if (runs:lownoise "mcj msg" 60) (debug:print 0 "WARNING: Max running jobs exceeded, current number running: " num-running ", max_concurrent_jobs: " max-concurrent-jobs)) #t) ;; if job-group-limit is set and number of jobs in the group is greater ;; than the limit then cannot run more jobs of this kind ((and job-group-limit (>= num-running-in-jobgroup job-group-limit)) ~~(debug:print 1 "WARNING: number of jobs " num-running-in-jobgroup " in " jobgroup " exceed~~ed, will no~~t ~~run~~ " ~~(tests:testqueue~~-g~~et-testname test-record~~))~~ #t) (else #f)))) (list (not can-not-run-more) num-running num-running-in-jobgroup max-concurrent-jobs job-group-limit))))) ;; test-names: Comma separated patterns same as test-patts but used in selection ;; of tests to run. The item portions are not respected. ;; FIXME: error out if /patt specified	\| \| \| \| > > > > \| \|	157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203	(currtime (current-seconds))) (if (> (- currtime lasttime) waitval) (begin (hash-table-set! runs:denoise key currtime) #t) #f))) (define (runs:can-run-more-tests run-id jobgroup max-concurrent-jobs) (thread-sleep! (cond ((> runs:can-run-more-tests-count 20) 2);; obviously haven't had any work to do for a while (else 0))) (let* ((num-running (rmt:get-count-tests-running run-id)) (num-running-in-jobgroup (rmt:get-count-tests-running-in-jobgroup run-id jobgroup)) (job-group-limit (let ((jobg-count (config-lookup configdat "jobgroups" jobgroup))) (if (string? jobg-count) (string->number jobg-count) jobg-count)))) (if (> (+ num-running num-running-in-jobgroup) 0) (set! runs:can-run-more-tests-count (+ runs:can-run-more-tests-count 1))) (if (not (eq? last-num-running-tests num-running)) (begin (debug:print 2 "max-concurrent-jobs: " max-concurrent-jobs ", num-running: " num-running) (set! last-num-running-tests num-running))) (if (not (eq? 0 globalexitstatus)) (list #f num-running num-running-in-jobgroup max-concurrent-jobs job-group-limit) (let ((can-not-run-more (cond ;; if max-concurrent-jobs is set and the number running is greater ;; than it than cannot run more jobs ((and max-concurrent-jobs (>= num-running max-concurrent-jobs)) (if (runs:lownoise "mcj msg" 60) (debug:print 0 "WARNING: Max running jobs exceeded, current number running: " num-running ", max_concurrent_jobs: " max-concurrent-jobs)) #t) ;; if job-group-limit is set and number of jobs in the group is greater ;; than the limit then cannot run more jobs of this kind ((and job-group-limit (>= num-running-in-jobgroup job-group-limit)) (if (runs:lownoise (conc "maxjobgroup " jobgroup) 60) (debug:print 1 "WARNING: number of jobs " num-running-in-jobgroup " in jobgroup \"" jobgroup "\" exceeds limit of " job-group-limit)) #t) (else #f)))) (list (not can-not-run-more) num-running num-running-in-jobgroup max-concurrent-jobs job-group-limit))))) ;; test-names: Comma separated patterns same as test-patts but used in selection ;; of tests to run. The item portions are not respected. ;; FIXME: error out if /patt specified
︙			︙
211 212 213 214 215 216 217 ~~218 219 220~~ 221 222 223 224 225 226 227	(runconfigf (conc toppath "/runconfigs.config")) (required-tests '()) (test-records (make-hash-table)) (all-tests-registry (tests:get-all)) ;; (tests:get-valid-tests (make-hash-table) test-search-path)) ;; all valid tests to check waiton names (all-test-names (hash-table-keys all-tests-registry)) (test-names (tests:filter-test-names all-test-names test-patts))) ~~;; Update the synchronous setting in the db based on the default or what is set by the user~~ ~~;; This is done once here on a call to run tests rather than on every call to open-db~~ (set-megatest-env-vars run-id inkeys: keys inrunname: runname) ;; these may be needed by the launching process (if (file-exists? runconfigf) (setup-env-defaults runconfigf run-id already-seen-runconfig-info keyvals "pre-launch-env-vars") (debug:print 0 "WARNING: You do not have a run config file: " runconfigf)) ;; look up all tests matching the comma separated list of globs in ;; test-patts (using % as wildcard)	< < <	211 212 213 214 215 216 217 218 219 220 221 222 223 224	(runconfigf (conc toppath "/runconfigs.config")) (required-tests '()) (test-records (make-hash-table)) (all-tests-registry (tests:get-all)) ;; (tests:get-valid-tests (make-hash-table) test-search-path)) ;; all valid tests to check waiton names (all-test-names (hash-table-keys all-tests-registry)) (test-names (tests:filter-test-names all-test-names test-patts))) (set-megatest-env-vars run-id inkeys: keys inrunname: runname) ;; these may be needed by the launching process (if (file-exists? runconfigf) (setup-env-defaults runconfigf run-id already-seen-runconfig-info keyvals "pre-launch-env-vars") (debug:print 0 "WARNING: You do not have a run config file: " runconfigf)) ;; look up all tests matching the comma separated list of globs in ;; test-patts (using % as wildcard)
︙			︙
382 383 384 385 386 387 388 ~~389~~ 390 391 392 393 394 395 396	'() reg))) (define runs:nothing-left-in-queue-count 0) (define (runs:expand-items hed tal reg reruns regfull newtal jobgroup max-concurrent-jobs run-id waitons item-path testmode test-record can-run-more items runname tconfig reglen test-registry test-records) (let* ((loop-list (list hed tal reg reruns)) ~~(prereqs-not-met (rmt:get-prereqs-not-met run-id waitons item-path ~~mode: testmode)) ;; (mt:lazy-get-prereqs-not-met run-id waitons item-path mode:~~ testmode))~~ (fails (runs:calc-fails prereqs-not-met)) (non-completed (runs:calc-not-completed prereqs-not-met))) (debug:print-info 4 "START OF INNER COND #2 " "\n can-run-more: " can-run-more "\n testname: " hed "\n prereqs-not-met: " (runs:pretty-string prereqs-not-met) "\n non-completed: " (runs:pretty-string non-completed)	\|	379 380 381 382 383 384 385 386 387 388 389 390 391 392 393	'() reg))) (define runs:nothing-left-in-queue-count 0) (define (runs:expand-items hed tal reg reruns regfull newtal jobgroup max-concurrent-jobs run-id waitons item-path testmode test-record can-run-more items runname tconfig reglen test-registry test-records) (let* ((loop-list (list hed tal reg reruns)) (prereqs-not-met (rmt:get-prereqs-not-met run-id waitons item-path testmode)) (fails (runs:calc-fails prereqs-not-met)) (non-completed (runs:calc-not-completed prereqs-not-met))) (debug:print-info 4 "START OF INNER COND #2 " "\n can-run-more: " can-run-more "\n testname: " hed "\n prereqs-not-met: " (runs:pretty-string prereqs-not-met) "\n non-completed: " (runs:pretty-string non-completed)
︙			︙
581 582 583 584 585 586 587 ~~588~~ 589 590 591 592 593 ~~594~~ 595 596 597 598 599 600 601	((string? t) t) (else (conc t)))) inlst)) (define (runs:process-expanded-tests hed tal reg reruns reglen regfull test-record runname test-name item-path jobgroup max-concurrent-jobs run-id waitons item-path testmode test-patts required-tests test-registry registry-mutex flags keyvals run-info newtal all-tests-registry) ~~(let* ((run-limits-info (runs:can-run-more-tests jobgroup max-concurrent-jobs)) ;; look at the test jobgroup and tot jobs running~~ (have-resources (car run-limits-info)) (num-running (list-ref run-limits-info 1)) (num-running-in-jobgroup (list-ref run-limits-info 2)) (max-concurrent-jobs (list-ref run-limits-info 3)) (job-group-limit (list-ref run-limits-info 4)) ~~(prereqs-not-met (rmt:get-prereqs-not-met run-id waitons item-path ~~mode: testmode)) ;; (mt:lazy-get-prereqs-not-met run-id waitons item-path mode:~~ testmode))~~ (fails (runs:calc-fails prereqs-not-met)) (non-completed (runs:calc-not-completed prereqs-not-met)) (loop-list (list hed tal reg reruns))) (debug:print-info 4 "have-resources: " have-resources " prereqs-not-met: (" (string-intersperse (map (lambda (t) (if (vector? t)	\| \|	578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598	((string? t) t) (else (conc t)))) inlst)) (define (runs:process-expanded-tests hed tal reg reruns reglen regfull test-record runname test-name item-path jobgroup max-concurrent-jobs run-id waitons item-path testmode test-patts required-tests test-registry registry-mutex flags keyvals run-info newtal all-tests-registry) (let* ((run-limits-info (runs:can-run-more-tests run-id jobgroup max-concurrent-jobs)) ;; look at the test jobgroup and tot jobs running (have-resources (car run-limits-info)) (num-running (list-ref run-limits-info 1)) (num-running-in-jobgroup (list-ref run-limits-info 2)) (max-concurrent-jobs (list-ref run-limits-info 3)) (job-group-limit (list-ref run-limits-info 4)) (prereqs-not-met (rmt:get-prereqs-not-met run-id waitons item-path testmode)) (fails (runs:calc-fails prereqs-not-met)) (non-completed (runs:calc-not-completed prereqs-not-met)) (loop-list (list hed tal reg reruns))) (debug:print-info 4 "have-resources: " have-resources " prereqs-not-met: (" (string-intersperse (map (lambda (t) (if (vector? t)
︙			︙
627 628 629 630 631 632 633 ~~634~~ 635 636 637 638 639 640 641 ~~642 643~~ 644 645 646 647 648 649 650	;; Register tests ;; ((not (hash-table-ref/default test-registry (runs:make-full-test-name test-name item-path) #f)) (debug:print-info 4 "Pre-registering test " test-name "/" item-path " to create placeholder" ) (if (eq? transport-type 'fs) ;; no point in parallel registration if use fs (begin ~~(rmt:general-call 'register-test run-id test-name item-path)~~ (hash-table-set! test-registry (runs:make-full-test-name test-name item-path) 'done)) (let ((th (make-thread (lambda () (mutex-lock! registry-mutex) (hash-table-set! test-registry (runs:make-full-test-name test-name item-path) 'start) (mutex-unlock! registry-mutex) ;; If haven't done it before register a top level test if this is an itemized test (if (not (eq? (hash-table-ref/default test-registry (runs:make-full-test-name test-name "") #f) 'done)) ~~(rmt:general-call 'register-test run-id test-name "")) (rmt:general-call 'register-test run-id test-name item-path)~~ (mutex-lock! registry-mutex) (hash-table-set! test-registry (runs:make-full-test-name test-name item-path) 'done) (mutex-unlock! registry-mutex)) (conc test-name "/" item-path)))) (thread-start! th))) (runs:shrink-can-run-more-tests-count) ;; DELAY TWEAKER (still needed?) (if (and (null? tal)(null? reg))	\| \| \|	624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647	;; Register tests ;; ((not (hash-table-ref/default test-registry (runs:make-full-test-name test-name item-path) #f)) (debug:print-info 4 "Pre-registering test " test-name "/" item-path " to create placeholder" ) (if (eq? transport-type 'fs) ;; no point in parallel registration if use fs (begin (rmt:general-call 'register-test run-id run-id test-name item-path) (hash-table-set! test-registry (runs:make-full-test-name test-name item-path) 'done)) (let ((th (make-thread (lambda () (mutex-lock! registry-mutex) (hash-table-set! test-registry (runs:make-full-test-name test-name item-path) 'start) (mutex-unlock! registry-mutex) ;; If haven't done it before register a top level test if this is an itemized test (if (not (eq? (hash-table-ref/default test-registry (runs:make-full-test-name test-name "") #f) 'done)) (rmt:general-call 'register-test run-id run-id test-name "")) (rmt:general-call 'register-test run-id run-id test-name item-path) (mutex-lock! registry-mutex) (hash-table-set! test-registry (runs:make-full-test-name test-name item-path) 'done) (mutex-unlock! registry-mutex)) (conc test-name "/" item-path)))) (thread-start! th))) (runs:shrink-can-run-more-tests-count) ;; DELAY TWEAKER (still needed?) (if (and (null? tal)(null? reg))
︙			︙
795 796 797 798 799 800 801 ~~802~~ 803 804 805 806 807 808 809	;; (set! last-time-incomplete (current-seconds)) ;; (cdb:remote-run db:find-and-mark-incomplete #f))) ;; (print "Top of loop, hed=" hed ", tal=" tal " ,reruns=" reruns) (let* ((test-record (hash-table-ref test-records hed)) (test-name (tests:testqueue-get-testname test-record)) (tconfig (tests:testqueue-get-testconfig test-record)) ~~(jobgroup (config-lookup tconfig "~~require~~me~~nts~~" "jobgroup"))~~ (testmode (let ((m (config-lookup tconfig "requirements" "mode"))) (if m (string->symbol m) 'normal))) (waitons (tests:testqueue-get-waitons test-record)) (priority (tests:testqueue-get-priority test-record)) (itemdat (tests:testqueue-get-itemdat test-record)) ;; itemdat can be a string, list or #f (items (tests:testqueue-get-items test-record)) (item-path (item-list->path itemdat))	\|	792 793 794 795 796 797 798 799 800 801 802 803 804 805 806	;; (set! last-time-incomplete (current-seconds)) ;; (cdb:remote-run db:find-and-mark-incomplete #f))) ;; (print "Top of loop, hed=" hed ", tal=" tal " ,reruns=" reruns) (let* ((test-record (hash-table-ref test-records hed)) (test-name (tests:testqueue-get-testname test-record)) (tconfig (tests:testqueue-get-testconfig test-record)) (jobgroup (config-lookup tconfig "test_meta" "jobgroup")) (testmode (let ((m (config-lookup tconfig "requirements" "mode"))) (if m (string->symbol m) 'normal))) (waitons (tests:testqueue-get-waitons test-record)) (priority (tests:testqueue-get-priority test-record)) (itemdat (tests:testqueue-get-itemdat test-record)) ;; itemdat can be a string, list or #f (items (tests:testqueue-get-items test-record)) (item-path (item-list->path itemdat))
︙			︙
819 820 821 822 823 824 825 ~~826~~ 827 828 829 830 831 832 833	(hash-table-set! max-tries-hash tfullname (+ (hash-table-ref/default max-tries-hash tfullname 0) 1))) ;; (debug:print 0 "max-tries-hash: " (hash-table->alist max-tries-hash)) ;; Ensure all top level tests get registered. This way they show up as "NOT_STARTED" on the dashboard ;; and it is clear they should have run but did not. (if (not (hash-table-ref/default test-registry (runs:make-full-test-name test-name "") #f)) (begin ~~(rmt:general-call 'register-test run-id test-name "")~~ (hash-table-set! test-registry (runs:make-full-test-name test-name "") 'done))) ;; Fast skip of tests that are already "COMPLETED" - NO! Cannot do that as the items may not have been expanded yet :( ;; (if (member (hash-table-ref/default test-registry tfullname #f) '(DONOTRUN removed)) ;; common:cant-run-states-sym) ;; '(COMPLETED KILLED WAIVED UNKNOWN INCOMPLETE)) (begin	\|	816 817 818 819 820 821 822 823 824 825 826 827 828 829 830	(hash-table-set! max-tries-hash tfullname (+ (hash-table-ref/default max-tries-hash tfullname 0) 1))) ;; (debug:print 0 "max-tries-hash: " (hash-table->alist max-tries-hash)) ;; Ensure all top level tests get registered. This way they show up as "NOT_STARTED" on the dashboard ;; and it is clear they should have run but did not. (if (not (hash-table-ref/default test-registry (runs:make-full-test-name test-name "") #f)) (begin (rmt:general-call 'register-test run-id run-id test-name "") (hash-table-set! test-registry (runs:make-full-test-name test-name "") 'done))) ;; Fast skip of tests that are already "COMPLETED" - NO! Cannot do that as the items may not have been expanded yet :( ;; (if (member (hash-table-ref/default test-registry tfullname #f) '(DONOTRUN removed)) ;; common:cant-run-states-sym) ;; '(COMPLETED KILLED WAIVED UNKNOWN INCOMPLETE)) (begin
︙			︙
935 936 937 938 939 940 941 ~~942~~ 943 944 945 946 947 948 949	#f (loop (car tal)(cdr tal) reg reruns))) ;; if items is a proc then need to run items:get-items-from-config, get the list and loop ;; - but only do that if resources exist to kick off the job ;; EXPAND ITEMS ((or (procedure? items)(eq? items 'have-procedure)) ~~(let ((can-run-more (runs:can-run-more-tests jobgroup max-concurrent-jobs)))~~ (if (and (list? can-run-more) (car can-run-more)) (let ((loop-list (runs:expand-items hed tal reg reruns regfull newtal jobgroup max-concurrent-jobs run-id waitons item-path testmode test-record can-run-more items runname tconfig reglen test-registry test-records))) (if loop-list (apply loop loop-list))) ;; if can't run more just loop with next possible test (loop (car newtal)(cdr newtal) reg reruns))))	\|	932 933 934 935 936 937 938 939 940 941 942 943 944 945 946	#f (loop (car tal)(cdr tal) reg reruns))) ;; if items is a proc then need to run items:get-items-from-config, get the list and loop ;; - but only do that if resources exist to kick off the job ;; EXPAND ITEMS ((or (procedure? items)(eq? items 'have-procedure)) (let ((can-run-more (runs:can-run-more-tests run-id jobgroup max-concurrent-jobs))) (if (and (list? can-run-more) (car can-run-more)) (let ((loop-list (runs:expand-items hed tal reg reruns regfull newtal jobgroup max-concurrent-jobs run-id waitons item-path testmode test-record can-run-more items runname tconfig reglen test-registry test-records))) (if loop-list (apply loop loop-list))) ;; if can't run more just loop with next possible test (loop (car newtal)(cdr newtal) reg reruns))))
︙			︙
1042 1043 1044 1045 1046 1047 1048 ~~1049~~ 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 ~~1064~~ 1065 1066 ~~1067~~ 1068 1069 1070 1071 1072 1073 1074	(begin (hash-table-set! test-meta-updated test-name #t) (runs:update-test_meta test-name test-conf))) ;; itemdat => ((ripeness "overripe") (temperature "cool") (season "summer")) (let* ((new-test-path (string-intersperse (cons test-path (map cadr itemdat)) "/")) (test-id (rmt:get-test-id run-id test-name item-path)) ~~(testdat (if test-id (rmt:get-test-info-by-id test-id) #f)))~~ (if (not testdat) (let loop () ;; ensure that the path exists before registering the test ;; NOPE: Cannot! Don't know yet which disk area will be assigned.... ;; (system (conc "mkdir -p " new-test-path)) ;; ;; (open-run-close tests:register-test db run-id test-name item-path) ;; ;; NB// for the above line. I want the test to be registered long before this routine gets called! ;; (if (not test-id)(set! test-id (rmt:get-test-id-cached run-id test-name item-path))) (if (not test-id) (begin (debug:print 2 "WARN: Test not pre-created? test-name=" test-name ", item-path=" item-path ", run-id=" run-id) ~~(rmt:general-call 'register-test run-id test-name item-path)~~ (set! test-id (rmt:get-test-id run-id test-name item-path)))) (debug:print-info 4 "test-id=" test-id ", run-id=" run-id ", test-name=" test-name ", item-path=\"" item-path "\"") ~~(set! testdat (rmt:get-test-info-by-id test-id))~~ (if (not testdat) (begin (debug:print-info 0 "WARNING: server is overloaded, trying again in one second") (thread-sleep! 1) (loop))))) (if (not testdat) ;; should NOT happen (debug:print 0 "ERROR: failed to get test record for test-id " test-id))	\| \| \|	1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071	(begin (hash-table-set! test-meta-updated test-name #t) (runs:update-test_meta test-name test-conf))) ;; itemdat => ((ripeness "overripe") (temperature "cool") (season "summer")) (let* ((new-test-path (string-intersperse (cons test-path (map cadr itemdat)) "/")) (test-id (rmt:get-test-id run-id test-name item-path)) (testdat (if test-id (rmt:get-test-info-by-id run-id test-id) #f))) (if (not testdat) (let loop () ;; ensure that the path exists before registering the test ;; NOPE: Cannot! Don't know yet which disk area will be assigned.... ;; (system (conc "mkdir -p " new-test-path)) ;; ;; (open-run-close tests:register-test db run-id test-name item-path) ;; ;; NB// for the above line. I want the test to be registered long before this routine gets called! ;; (if (not test-id)(set! test-id (rmt:get-test-id-cached run-id test-name item-path))) (if (not test-id) (begin (debug:print 2 "WARN: Test not pre-created? test-name=" test-name ", item-path=" item-path ", run-id=" run-id) (rmt:general-call 'register-test run-id run-id test-name item-path) (set! test-id (rmt:get-test-id run-id test-name item-path)))) (debug:print-info 4 "test-id=" test-id ", run-id=" run-id ", test-name=" test-name ", item-path=\"" item-path "\"") (set! testdat (rmt:get-test-info-by-id run-id test-id)) (if (not testdat) (begin (debug:print-info 0 "WARNING: server is overloaded, trying again in one second") (thread-sleep! 1) (loop))))) (if (not testdat) ;; should NOT happen (debug:print 0 "ERROR: failed to get test record for test-id " test-id))
︙			︙
1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 ~~1146~~ 1147 1148 1149 1150 1151 1152 1153	(let ((skip-test #f) (skip-check (configf:get-section test-conf "skip"))) (cond ;; Have to check for skip conditions. This one skips if there are same-named tests ;; currently running ((and skip-check (configf:lookup test-conf "skip" "prevrunning")) (let ((running-tests (rmt:get-tests-for-runs-mindata #f full-test-name '("RUNNING" "REMOTEHOSTSTART" "LAUNCHED") '() #f))) (if (not (null? running-tests)) ;; have to skip (set! skip-test "Skipping due to previous tests running")))) ((and skip-check (configf:lookup test-conf "skip" "fileexists")) (if (file-exists? (configf:lookup test-conf "skip" "fileexists")) (set! skip-test (conc "Skipping due to existance of file " (configf:lookup test-conf "skip" "fileexists")))))) (if skip-test (begin ~~(mt:test-set-state-status-by-id test-id "COMPLETED" "SKIP" skip-test)~~ (debug:print-info 1 "SKIPPING Test " full-test-name " due to " skip-test)) (if (not (launch-test test-id run-id run-info keyvals runname test-conf test-name test-path itemdat flags)) (begin (print "ERROR: Failed to launch the test. Exiting as soon as possible") (set! globalexitstatus 1) ;; (process-signal (current-process-id) signal/kill)))))))) ((KILLED)	> \|	1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151	(let ((skip-test #f) (skip-check (configf:get-section test-conf "skip"))) (cond ;; Have to check for skip conditions. This one skips if there are same-named tests ;; currently running ((and skip-check (configf:lookup test-conf "skip" "prevrunning")) ;; run-ids = #f means all runs (let ((running-tests (rmt:get-tests-for-runs-mindata #f full-test-name '("RUNNING" "REMOTEHOSTSTART" "LAUNCHED") '() #f))) (if (not (null? running-tests)) ;; have to skip (set! skip-test "Skipping due to previous tests running")))) ((and skip-check (configf:lookup test-conf "skip" "fileexists")) (if (file-exists? (configf:lookup test-conf "skip" "fileexists")) (set! skip-test (conc "Skipping due to existance of file " (configf:lookup test-conf "skip" "fileexists")))))) (if skip-test (begin (mt:test-set-state-status-by-id run-id test-id "COMPLETED" "SKIP" skip-test) (debug:print-info 1 "SKIPPING Test " full-test-name " due to " skip-test)) (if (not (launch-test test-id run-id run-info keyvals runname test-conf test-name test-path itemdat flags)) (begin (print "ERROR: Failed to launch the test. Exiting as soon as possible") (set! globalexitstatus 1) ;; (process-signal (current-process-id) signal/kill)))))))) ((KILLED)
︙			︙
1255 1256 1257 1258 1259 1260 1261 ~~1262~~ ~~1263~~ 1264 1265 1266 1267 1268 1269 1270 1271 ~~1272~~ 1273 1274 1275 1276 1277 1278 1279 ~~1280~~ 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 ~~1301~~ 1302 1303 ~~1304~~ 1305 1306 1307 1308 1309 1310 ~~1311~~ 1312 1313 1314 1315 1316 1317 1318	((print-run) (debug:print 1 "Printing info for run " runkey ", run=" run ", tests=" tests ", header=" header) action) ((run-wait) (debug:print 1 "Waiting for run " runkey ", run=" runnamepatt " to complete")) (else (debug:print-info 0 "action not recognised " action))) ~~(let ((sorted-tests (sort tests (lambda (a b)(let ((dira (db~~:test~~-get~~-rundir a))~~~~ ~~~~(dirb~~ (db:test-get-rundir b)))~~ (if (and (string? dira)(string? dirb)) (> (string-length dira)(string-length dirb)) #f))))) (test-retry-time (make-hash-table)) (allow-run-time 10)) ;; seconds to allow for killing tests before just brutally killing 'em (let loop ((test (car sorted-tests)) (tal (cdr sorted-tests))) (let* ((test-id (db:test-get-id test)) ~~(new-test-dat (rmt:get-test-info-by-id test-id)))~~ (if (not new-test-dat) (begin (debug:print 0 "ERROR: We have a test-id of " test-id " but no record was found. NOTE: No locking of records is done between processes, do not simultaneously remove the same run from two processes!") (if (not (null? tal)) (loop (car tal)(cdr tal)))) (let* ((item-path (db:test-get-item-path new-test-dat)) (test-name (db:test-get-testname new-test-dat)) ~~~~(run-dir~~ (db:test-get-rundir new-test-dat)) ;; run dir is from the link tree~~ (real-dir (if (file-exists? run-dir) (resolve-pathname run-dir) #f)) (test-state (db:test-get-state new-test-dat)) (test-fulln (db:test-get-fullname new-test-dat))) (case action ((remove-runs) (debug:print-info 0 "test: " test-name " itest-state: " test-state) (if (member test-state (list "RUNNING" "LAUNCHED" "REMOTEHOSTSTART" "KILLREQ")) (begin (if (not (hash-table-ref/default test-retry-time test-fulln #f)) (begin ;; want to set to REMOVING BUT CANNOT do it here? (hash-table-set! test-retry-time test-fulln (current-seconds)))) (if (> (- (current-seconds)(hash-table-ref test-retry-time test-fulln)) allow-run-time) ;; This test is not in a correct state for cleaning up. Let's try some graceful shutdown steps first ;; Set the test to "KILLREQ" and wait five seconds then try again. Repeat up to five times then give ;; up and blow it away. (begin (debug:print 0 "WARNING: could not gracefully remove test " test-fulln ", tried to kill it to no avail. Forcing state to FAILEDKILL and continuing") ~~(mt:test-set-state-status-by-id (db:test-get-id test) "FAILEDKILL" "n/a" #f)~~ (thread-sleep! 1)) (begin ~~(mt:test-set-state-status-by-id (db:test-get-id test) "KILLREQ" "n/a" #f)~~ (thread-sleep! 1))) ;; NOTE: This is suboptimal as the testdata will be used later and the state/status may have changed ... (if (null? tal) (loop new-test-dat tal) (loop (car tal)(append tal (list new-test-dat))))) (begin ~~(mt:test-set-state-status-by-id (db:test-get-id test) "REMOVING" "LOCKED" #f)~~ (debug:print-info 1 "Attempting to remove " (if real-dir (conc " dir " real-dir " and ") "") " link " run-dir) (if (and real-dir (> (string-length real-dir) 5) (file-exists? real-dir)) ;; bad heuristic but should prevent /tmp /home etc. (begin ;; let* ((realpath (resolve-pathname run-dir))) (debug:print-info 1 "Recursively removing " real-dir) (if (file-exists? real-dir)	\| > > \| \| > > \| \| \| \|	1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320	((print-run) (debug:print 1 "Printing info for run " runkey ", run=" run ", tests=" tests ", header=" header) action) ((run-wait) (debug:print 1 "Waiting for run " runkey ", run=" runnamepatt " to complete")) (else (debug:print-info 0 "action not recognised " action))) (let ((sorted-tests (sort tests (lambda (a b)(let ((dira ;; (rmt:sdb-qry 'getstr (db:test-get-rundir a)) ;; ) ;; (filedb:get-path fdb (db:test-get-rundir a))) (dirb ;; (rmt:sdb-qry 'getstr (db:test-get-rundir b))) ;; ) ;; ((filedb:get-path fdb (db:test-get-rundir b)))) (if (and (string? dira)(string? dirb)) (> (string-length dira)(string-length dirb)) #f))))) (test-retry-time (make-hash-table)) (allow-run-time 10)) ;; seconds to allow for killing tests before just brutally killing 'em (let loop ((test (car sorted-tests)) (tal (cdr sorted-tests))) (let* ((test-id (db:test-get-id test)) (new-test-dat (rmt:get-test-info-by-id run-id test-id))) (if (not new-test-dat) (begin (debug:print 0 "ERROR: We have a test-id of " test-id " but no record was found. NOTE: No locking of records is done between processes, do not simultaneously remove the same run from two processes!") (if (not (null? tal)) (loop (car tal)(cdr tal)))) (let* ((item-path (db:test-get-item-path new-test-dat)) (test-name (db:test-get-testname new-test-dat)) (run-dir ;;(filedb:get-path fdb ;; (rmt:sdb-qry 'getid (db:test-get-rundir new-test-dat)) ;; ) ;; run dir is from the link tree (real-dir (if (file-exists? run-dir) (resolve-pathname run-dir) #f)) (test-state (db:test-get-state new-test-dat)) (test-fulln (db:test-get-fullname new-test-dat))) (case action ((remove-runs) (debug:print-info 0 "test: " test-name " itest-state: " test-state) (if (member test-state (list "RUNNING" "LAUNCHED" "REMOTEHOSTSTART" "KILLREQ")) (begin (if (not (hash-table-ref/default test-retry-time test-fulln #f)) (begin ;; want to set to REMOVING BUT CANNOT do it here? (hash-table-set! test-retry-time test-fulln (current-seconds)))) (if (> (- (current-seconds)(hash-table-ref test-retry-time test-fulln)) allow-run-time) ;; This test is not in a correct state for cleaning up. Let's try some graceful shutdown steps first ;; Set the test to "KILLREQ" and wait five seconds then try again. Repeat up to five times then give ;; up and blow it away. (begin (debug:print 0 "WARNING: could not gracefully remove test " test-fulln ", tried to kill it to no avail. Forcing state to FAILEDKILL and continuing") (mt:test-set-state-status-by-id run-id (db:test-get-id test) "FAILEDKILL" "n/a" #f) (thread-sleep! 1)) (begin (mt:test-set-state-status-by-id run-id (db:test-get-id test) "KILLREQ" "n/a" #f) (thread-sleep! 1))) ;; NOTE: This is suboptimal as the testdata will be used later and the state/status may have changed ... (if (null? tal) (loop new-test-dat tal) (loop (car tal)(append tal (list new-test-dat))))) (begin (mt:test-set-state-status-by-id run-id (db:test-get-id test) "REMOVING" "LOCKED" #f) (debug:print-info 1 "Attempting to remove " (if real-dir (conc " dir " real-dir " and ") "") " link " run-dir) (if (and real-dir (> (string-length real-dir) 5) (file-exists? real-dir)) ;; bad heuristic but should prevent /tmp /home etc. (begin ;; let* ((realpath (resolve-pathname run-dir))) (debug:print-info 1 "Recursively removing " real-dir) (if (file-exists? real-dir)
︙			︙
1336 1337 1338 1339 1340 1341 1342 ~~1343~~ 1344 1345 1346 1347 ~~1348~~ 1349 1350 1351 1352 1353 1354 1355	(debug:print 0 "ERROR: Failed to remove directory " run-dir ((condition-property-accessor 'exn 'message) exn) ", attempting to continue") (delete-directory run-dir))) (if run-dir (debug:print 0 "WARNING: not removing " run-dir " as it either doesn't exist or is not a symlink") (debug:print 0 "NOTE: the run dir for this test is undefined. Test may have already been deleted.")) )) ;; Only delete the records after removing the directory. If things fail we have a record ~~(rmt:delete-test-records (db:test-get-id test))~~ (if (not (null? tal)) (loop (car tal)(cdr tal)))))) ((set-state-status) (debug:print-info 2 "new state " (car state-status) ", new status " (cadr state-status)) ~~(mt:test-set-state-status-by-id (db:test-get-id test) (car state-status)(cadr state-status) #f)~~ (if (not (null? tal)) (loop (car tal)(cdr tal)))) ((run-wait) (debug:print-info 2 "still waiting, " (length tests) " tests still running") (thread-sleep! 10) (let ((new-tests (proc-get-tests run-id))) (if (null? new-tests)	\| \|	1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357	(debug:print 0 "ERROR: Failed to remove directory " run-dir ((condition-property-accessor 'exn 'message) exn) ", attempting to continue") (delete-directory run-dir))) (if run-dir (debug:print 0 "WARNING: not removing " run-dir " as it either doesn't exist or is not a symlink") (debug:print 0 "NOTE: the run dir for this test is undefined. Test may have already been deleted.")) )) ;; Only delete the records after removing the directory. If things fail we have a record (rmt:delete-test-records (db:test-get-run_id test)(db:test-get-id test)) (if (not (null? tal)) (loop (car tal)(cdr tal)))))) ((set-state-status) (debug:print-info 2 "new state " (car state-status) ", new status " (cadr state-status)) (mt:test-set-state-status-by-id run-id (db:test-get-id test) (car state-status)(cadr state-status) #f) (if (not (null? tal)) (loop (car tal)(cdr tal)))) ((run-wait) (debug:print-info 2 "still waiting, " (length tests) " tests still running") (thread-sleep! 10) (let ((new-tests (proc-get-tests run-id))) (if (null? new-tests)
︙			︙
1457 1458 1459 1460 1461 1462 1463 ~~1464~~ 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 ~~1476~~ 1477 1478 1479 1480 1481 1482 1483	;;====================================================================== ;; Update the test_meta table for this test (define (runs:update-test_meta test-name test-conf) (let ((currrecord (rmt:testmeta-get-record test-name))) (if (not currrecord) (begin ~~(set! currrecord (make-vector 10 #f))~~ (rmt:testmeta-add-record test-name))) (for-each (lambda (key) (let* ((idx (cadr key)) (fld (car key)) (val (config-lookup test-conf "test_meta" fld))) ;; (debug:print 5 "idx: " idx " fld: " fld " val: " val) (if (and val (not (equal? (vector-ref currrecord idx) val))) (begin (print "Updating " test-name " " fld " to " val) (rmt:testmeta-update-field test-name fld val))))) ~~'(("author" 2)("owner" 3)("description" 4)("reviewed" 5)("tags" 9)))))~~ ;; Update test_meta for all tests (define (runs:update-all-test_meta db) (let ((test-names (tests:get-all))) ;; (tests:get-valid-tests))) (for-each (lambda (test-name) (let* ((test-conf (mt:lazy-read-test-config test-name)))	\| \|	1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485	;;====================================================================== ;; Update the test_meta table for this test (define (runs:update-test_meta test-name test-conf) (let ((currrecord (rmt:testmeta-get-record test-name))) (if (not currrecord) (begin (set! currrecord (make-vector 11 #f)) (rmt:testmeta-add-record test-name))) (for-each (lambda (key) (let* ((idx (cadr key)) (fld (car key)) (val (config-lookup test-conf "test_meta" fld))) ;; (debug:print 5 "idx: " idx " fld: " fld " val: " val) (if (and val (not (equal? (vector-ref currrecord idx) val))) (begin (print "Updating " test-name " " fld " to " val) (rmt:testmeta-update-field test-name fld val))))) '(("author" 2)("owner" 3)("description" 4)("reviewed" 5)("tags" 9)("jobgroup" 10))))) ;; Update test_meta for all tests (define (runs:update-all-test_meta db) (let ((test-names (tests:get-all))) ;; (tests:get-valid-tests))) (for-each (lambda (test-name) (let* ((test-conf (mt:lazy-read-test-config test-name)))
︙			︙