Megatest: Diff

Differences From Artifact [b746e7df77]:

File runs.scm — part of check-in [8c12404219] at 2014-04-10 16:27:53 on branch v1.55 — Added control over logfile for nbfake, improved bigrun2/3 tests, added itemmap parameter in requirements for mapping items for item wait. bumped version (user: mrwellan, size: 76023) [annotate] [blame] [check-ins using]

To Artifact [9cd1d4c354]:

File runs.scm — part of check-in [eecc0fad4c] at 2014-06-02 13:49:04 on branch v1.60 — Merged from 8c12 on v1.55 into v1.60 (user: mrwellan, size: 75571) [annotate] [blame] [check-ins using] [more...]

︙			︙
17 18 19 20 21 22 23 24 25 26 27 28 29 30	(declare (uses db)) (declare (uses common)) (declare (uses items)) (declare (uses runconfig)) (declare (uses tests)) (declare (uses server)) (declare (uses mt)) (include "common_records.scm") (include "key_records.scm") (include "db_records.scm") (include "run_records.scm") (include "test_records.scm")	>	17 18 19 20 21 22 23 24 25 26 27 28 29 30 31	(declare (uses db)) (declare (uses common)) (declare (uses items)) (declare (uses runconfig)) (declare (uses tests)) (declare (uses server)) (declare (uses mt)) ;; (declare (uses filedb)) (include "common_records.scm") (include "key_records.scm") (include "db_records.scm") (include "run_records.scm") (include "test_records.scm")
︙			︙
54 55 56 57 58 59 60 ~~61 62 63 64 65~~ 66 67 68 69 70 71 72	(toppath toppath) (envdat keyvals) ;; initial values start with keyvals (runconfig #f) (serverdat (if (args:get-arg "-server") runremote #f)) ;; to be used later (transport (or (args:get-arg "-transport") 'http)) ~~(db (if (and mconfig~~ ~~(or (args:get-arg "-server")~~ ~~(eq? transport 'fs)))~~ ~~(open-db)~~ ~~#f))~~ (run-id #f)) ;; Set all the environment vars we know so far, start with keys (for-each (lambda (keyval) (setenv (car keyval)(cadr keyval))) keyvals) ;; Set up various and sundry known vars here (setenv "MT_RUN_AREA_HOME" toppath)	< < < < <	55 56 57 58 59 60 61 62 63 64 65 66 67 68	(toppath toppath) (envdat keyvals) ;; initial values start with keyvals (runconfig #f) (serverdat (if (args:get-arg "-server") runremote #f)) ;; to be used later (transport (or (args:get-arg "-transport") 'http)) (run-id #f)) ;; Set all the environment vars we know so far, start with keys (for-each (lambda (keyval) (setenv (car keyval)(cadr keyval))) keyvals) ;; Set up various and sundry known vars here (setenv "MT_RUN_AREA_HOME" toppath)
︙			︙
85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 ~~100~~ 101 102 103 104 105 106 107 108 109 110 111 112 113 ~~114~~ 115 116 117 118 119 120 ~~121 122 123 124~~ 125 126 127 ~~128~~ 129 130 131 132 133 134 135 136 137 138 139 140 141 142 ~~143~~ 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 ~~166~~ 167 168 169 ~~170 171~~ 172 173 174 175 176 177 178	(debug:print 0 "ERROR: [" (args:get-arg "-reqtarg") "] not found in " runconfigf) (if db (sqlite3:finalize! db)) (exit 1))) ;; Now have runconfigs data loaded, set environment vars (for-each (lambda (section) (for-each (lambda (varval) (set! envdat (append envdat (list varval))) ~~(setenv (car varval)(cadr varval)))~~ (configf:get-section runconfig section))) (list "default" target)) (vector target runname testpatt keys keyvals envdat mconfig runconfig serverdat transport db toppath run-id))) (define (set-megatest-env-vars run-id #!key (inkeys #f)(inrunname #f)(inkeyvals #f)) (let* ((target (or (common:args-get-target) (get-environment-variable "MT_TARGET"))) ~~(keys (if inkeys inkeys (~~cdb:remote-run db~~:get-keys #f)))~~ (keyvals (if inkeyvals inkeyvals (keys:target->keyval keys target))) (vals (hash-table-ref/default env-vars-by-run-id run-id #f)) (link-tree (configf:lookup configdat "setup" "linktree"))) ;; get the info from the db and put it in the cache (if link-tree (setenv "MT_LINKTREE" link-tree) (debug:print 0 "ERROR: linktree not set, should be set in megatest.config in [setup] section.")) (if (not vals) (let ((ht (make-hash-table))) (hash-table-set! env-vars-by-run-id run-id ht) (set! vals ht) (for-each (lambda (key) ~~(hash-table-set! vals (car key) (cadr key))) ~~;; (cdb:remote-run db:get-run-key-val #f run-id (car key))))~~~~ keyvals))) ;; from the cached data set the vars (hash-table-for-each vals (lambda (key val) (debug:print 2 "setenv " key " " val) ~~(if (and (string? key)~~ ~~(string? val))~~ (setenv key val) ~~(debug:print 0 "ERROR: Malformed environment variable definition: var=" var ", val=" val))))~~ (if (not (get-environment-variable "MT_TARGET"))(setenv "MT_TARGET" target)) (alist->env-vars (hash-table-ref/default configdat "env-override" '())) ;; Lets use this as an opportunity to put MT_RUNNAME in the environment ~~(setenv "MT_RUNNAME" ~~(if in~~runname ~~inrunname (cdb:remote-run db:get-run-name-from-id #f run-id))~~)~~ (setenv "MT_RUN_AREA_HOME" toppath))) (define (set-item-env-vars itemdat) (for-each (lambda (item) (debug:print 2 "setenv " (car item) " " (cadr item)) (setenv (car item) (cadr item))) itemdat)) ;; Every time can-run-more-tests is called increment the delay ;; ;; NOTE: We run this server-side!! Do not use this global except in the runs:can-run-more-tests routine ;; (define last-num-running-tests 0) (define runs:can-run-more-tests-count 0) ~~(define (runs:shrink-can-run-more-tests-count) ~~;; the db is a dummy var so we can use cdb:remote-run~~~~ (set! runs:can-run-more-tests-count 0)) ;; (/ runs:can-run-more-tests-count 2))) ;; Temporary globals. Move these into the logic or into common ;; (define seen-cant-run-tests (make-hash-table)) ;; use to track tests that we suspect cannot be run (define (runs:inc-cant-run-tests testname) (hash-table-set! seen-cant-run-tests testname (+ (hash-table-ref/default seen-cant-run-tests testname 0) 1))) (define (runs:can-keep-running? testname n) (< (hash-table-ref/default seen-cant-run-tests testname 0) n)) (define runs:denoise (make-hash-table)) ;; key => last-time-ran (define (runs:lownoise key waitval) (let ((lasttime (hash-table-ref/default runs:denoise key 0)) (currtime (current-seconds))) (if (> (- currtime lasttime) waitval) (begin (hash-table-set! runs:denoise key currtime) #t) #f))) ~~(define (runs:can-run-more-tests jobgroup max-concurrent-jobs)~~ (thread-sleep! (cond ((> runs:can-run-more-tests-count 20) 2);; obviously haven't had any work to do for a while (else 0))) ~~(let* ((num-running (~~cdb:remote-run db~~:get-count-tests-running #f)) (num-running-in-jobgroup (~~cdb:remote-run db~~:get-count-tests-running-in-jobgroup #f jobgroup))~~ (job-group-limit (let ((jobg-count (config-lookup configdat "jobgroups" jobgroup))) (if (string? jobg-count) (string->number jobg-count) jobg-count)))) (if (> (+ num-running num-running-in-jobgroup) 0) (set! runs:can-run-more-tests-count (+ runs:can-run-more-tests-count 1))) (if (not (eq? last-num-running-tests num-running))	\| \| \| < < \| < > > \| > \| > \| \| \|	81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175	(debug:print 0 "ERROR: [" (args:get-arg "-reqtarg") "] not found in " runconfigf) (if db (sqlite3:finalize! db)) (exit 1))) ;; Now have runconfigs data loaded, set environment vars (for-each (lambda (section) (for-each (lambda (varval) (set! envdat (append envdat (list varval))) (safe-setenv (car varval)(cadr varval))) (configf:get-section runconfig section))) (list "default" target)) (vector target runname testpatt keys keyvals envdat mconfig runconfig serverdat transport db toppath run-id))) (define (set-megatest-env-vars run-id #!key (inkeys #f)(inrunname #f)(inkeyvals #f)) (let* ((target (or (common:args-get-target) (get-environment-variable "MT_TARGET"))) (keys (if inkeys inkeys (rmt:get-keys))) (keyvals (if inkeyvals inkeyvals (keys:target->keyval keys target))) (vals (hash-table-ref/default env-vars-by-run-id run-id #f)) (link-tree (configf:lookup configdat "setup" "linktree"))) ;; get the info from the db and put it in the cache (if link-tree (setenv "MT_LINKTREE" link-tree) (debug:print 0 "ERROR: linktree not set, should be set in megatest.config in [setup] section.")) (if (not vals) (let ((ht (make-hash-table))) (hash-table-set! env-vars-by-run-id run-id ht) (set! vals ht) (for-each (lambda (key) (hash-table-set! vals (car key) (cadr key))) keyvals))) ;; from the cached data set the vars (hash-table-for-each vals (lambda (key val) (debug:print 2 "setenv " key " " val) (safe-setenv key val))) (if (not (get-environment-variable "MT_TARGET"))(setenv "MT_TARGET" target)) (alist->env-vars (hash-table-ref/default configdat "env-override" '())) ;; Lets use this as an opportunity to put MT_RUNNAME in the environment (let ((runname (if inrunname inrunname (rmt:get-run-name-from-id run-id)))) (if runname (setenv "MT_RUNNAME" runname) (debug:print 0 "ERROR: no value for runname for id " run-id))) (setenv "MT_RUN_AREA_HOME" toppath))) (define (set-item-env-vars itemdat) (for-each (lambda (item) (debug:print 2 "setenv " (car item) " " (cadr item)) (setenv (car item) (cadr item))) itemdat)) ;; Every time can-run-more-tests is called increment the delay ;; ;; NOTE: We run this server-side!! Do not use this global except in the runs:can-run-more-tests routine ;; (define last-num-running-tests 0) (define runs:can-run-more-tests-count 0) (define (runs:shrink-can-run-more-tests-count) (set! runs:can-run-more-tests-count 0)) ;; (/ runs:can-run-more-tests-count 2))) ;; Temporary globals. Move these into the logic or into common ;; (define seen-cant-run-tests (make-hash-table)) ;; use to track tests that we suspect cannot be run (define (runs:inc-cant-run-tests testname) (hash-table-set! seen-cant-run-tests testname (+ (hash-table-ref/default seen-cant-run-tests testname 0) 1))) (define (runs:can-keep-running? testname n) (< (hash-table-ref/default seen-cant-run-tests testname 0) n)) (define runs:denoise (make-hash-table)) ;; key => last-time-ran (define (runs:lownoise key waitval) (let ((lasttime (hash-table-ref/default runs:denoise key 0)) (currtime (current-seconds))) (if (> (- currtime lasttime) waitval) (begin (hash-table-set! runs:denoise key currtime) #t) #f))) (define (runs:can-run-more-tests run-id jobgroup max-concurrent-jobs) (thread-sleep! (cond ((> runs:can-run-more-tests-count 20) 2);; obviously haven't had any work to do for a while (else 0))) (let* ((num-running (rmt:get-count-tests-running run-id)) (num-running-in-jobgroup (rmt:get-count-tests-running-in-jobgroup run-id jobgroup)) (job-group-limit (let ((jobg-count (config-lookup configdat "jobgroups" jobgroup))) (if (string? jobg-count) (string->number jobg-count) jobg-count)))) (if (> (+ num-running num-running-in-jobgroup) 0) (set! runs:can-run-more-tests-count (+ runs:can-run-more-tests-count 1))) (if (not (eq? last-num-running-tests num-running))
︙			︙
201 202 203 204 205 206 207 ~~208~~ 209 210 ~~211~~ 212 213 214 215 216 217 218 219 ~~220 221 222 223~~ 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 ~~243 244~~ 245 246 247 248 249 250 251	(list (not can-not-run-more) num-running num-running-in-jobgroup max-concurrent-jobs job-group-limit))))) ;; test-names: Comma separated patterns same as test-patts but used in selection ;; of tests to run. The item portions are not respected. ;; FIXME: error out if /patt specified ;; (define (runs:run-tests target runname test-patts user flags) ;; test-names ~~(common:clear-caches) ;; clear all caches~~ (let* ((keys (keys:config-get-fields configdat)) (keyvals (keys:target->keyval keys target)) ~~(run-id (~~cdb:remote-run db~~:register-run #f keyvals runname "new" "n/a" user)) ;; test-name)))~~ (deferred '()) ;; delay running these since they have a waiton clause (runconfigf (conc toppath "/runconfigs.config")) (test-records (make-hash-table)) (all-tests-registry (tests:get-all)) ;; (tests:get-valid-tests (make-hash-table) test-search-path)) ;; all valid tests to check waiton names (all-test-names (hash-table-keys all-tests-registry)) (test-names (tests:filter-test-names all-test-names test-patts)) (required-tests (lset-intersection equal? (string-split test-patts ",") test-names))) ;; test-names)) ;; Added test-names as initial for required-tests but that failed to work ~~;; Update the synchronous setting in the db based on the default or what is set by the user~~ ~~;; This is done once here on a call to run tests rather than on every call to open-db~~ ~~(cdb:remote-run db:set-sync #f)~~ (set-megatest-env-vars run-id inkeys: keys inrunname: runname) ;; these may be needed by the launching process (if (file-exists? runconfigf) (setup-env-defaults runconfigf run-id already-seen-runconfig-info keyvals "pre-launch-env-vars") (debug:print 0 "WARNING: You do not have a run config file: " runconfigf)) ;; look up all tests matching the comma separated list of globs in ;; test-patts (using % as wildcard) ;; (set! test-names (delete-duplicates (tests:get-valid-tests toppath test-patts))) (debug:print-info 0 "test names " test-names) ;; on the first pass or call to run-tests set FAILS to NOT_STARTED if ;; -keepgoing is specified (if (eq? passnum 0) (begin ;; have to delete test records where NOT_STARTED since they can cause -keepgoing to ;; get stuck due to becoming inaccessible from a failed test. I.e. if test B depends ;; on test A but test B reached the point on being registered as NOT_STARTED and test ;; A failed for some reason then on re-run using -keepgoing the run can never complete. ~~(~~cdb:~~delete-tests-in-state ~~runremote~~ run-id "NOT_STARTED") (~~cdb:remote-run db~~:set-tests-state-status #f run-id test-names #f "FAIL" "NOT_STARTED" "FAIL")))~~ ;; Ensure all tests are registered in the test_meta table (runs:update-all-test_meta #f) ;; now add non-directly referenced dependencies (i.e. waiton) ;;====================================================================== ;; refactoring this block into tests:get-full-data	< \| < < < < \| \|	198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243	(list (not can-not-run-more) num-running num-running-in-jobgroup max-concurrent-jobs job-group-limit))))) ;; test-names: Comma separated patterns same as test-patts but used in selection ;; of tests to run. The item portions are not respected. ;; FIXME: error out if /patt specified ;; (define (runs:run-tests target runname test-patts user flags) ;; test-names (let* ((keys (keys:config-get-fields configdat)) (keyvals (keys:target->keyval keys target)) (run-id (rmt:register-run keyvals runname "new" "n/a" user)) ;; test-name))) (deferred '()) ;; delay running these since they have a waiton clause (runconfigf (conc toppath "/runconfigs.config")) (test-records (make-hash-table)) (all-tests-registry (tests:get-all)) ;; (tests:get-valid-tests (make-hash-table) test-search-path)) ;; all valid tests to check waiton names (all-test-names (hash-table-keys all-tests-registry)) (test-names (tests:filter-test-names all-test-names test-patts)) (required-tests (lset-intersection equal? (string-split test-patts ",") test-names))) ;; test-names)) ;; Added test-names as initial for required-tests but that failed to work (set-megatest-env-vars run-id inkeys: keys inrunname: runname) ;; these may be needed by the launching process (if (file-exists? runconfigf) (setup-env-defaults runconfigf run-id already-seen-runconfig-info keyvals "pre-launch-env-vars") (debug:print 0 "WARNING: You do not have a run config file: " runconfigf)) ;; look up all tests matching the comma separated list of globs in ;; test-patts (using % as wildcard) ;; (set! test-names (delete-duplicates (tests:get-valid-tests toppath test-patts))) (debug:print-info 0 "test names " test-names) ;; on the first pass or call to run-tests set FAILS to NOT_STARTED if ;; -keepgoing is specified (if (eq? passnum 0) (begin ;; have to delete test records where NOT_STARTED since they can cause -keepgoing to ;; get stuck due to becoming inaccessible from a failed test. I.e. if test B depends ;; on test A but test B reached the point on being registered as NOT_STARTED and test ;; A failed for some reason then on re-run using -keepgoing the run can never complete. (rmt:general-call 'delete-tests-in-state run-id "NOT_STARTED") (rmt:set-tests-state-status run-id test-names #f "FAIL" "NOT_STARTED" "FAIL"))) ;; Ensure all tests are registered in the test_meta table (runs:update-all-test_meta #f) ;; now add non-directly referenced dependencies (i.e. waiton) ;;====================================================================== ;; refactoring this block into tests:get-full-data
︙			︙
385 386 387 388 389 390 391 ~~392~~ 393 394 395 396 397 398 399	'() reg))) (define runs:nothing-left-in-queue-count 0) (define (runs:expand-items hed tal reg reruns regfull newtal jobgroup max-concurrent-jobs run-id waitons item-path testmode test-record can-run-more items runname tconfig reglen test-registry test-records itemmap) (let* ((loop-list (list hed tal reg reruns)) ~~(prereqs-not-met (mt:lazy-get-prereqs-not-met run-id waitons item-path mode: testmode itemmap: itemmap))~~ (fails (runs:calc-fails prereqs-not-met)) (non-completed (runs:calc-not-completed prereqs-not-met))) (debug:print-info 4 "START OF INNER COND #2 " "\n can-run-more: " can-run-more "\n testname: " hed "\n prereqs-not-met: " (runs:pretty-string prereqs-not-met) "\n non-completed: " (runs:pretty-string non-completed)	> \|	377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392	'() reg))) (define runs:nothing-left-in-queue-count 0) (define (runs:expand-items hed tal reg reruns regfull newtal jobgroup max-concurrent-jobs run-id waitons item-path testmode test-record can-run-more items runname tconfig reglen test-registry test-records itemmap) (let* ((loop-list (list hed tal reg reruns)) (prereqs-not-met (rmt:get-prereqs-not-met run-id waitons item-path testmode itemmap: itemmap)) ;; (prereqs-not-met (mt:lazy-get-prereqs-not-met run-id waitons item-path mode: testmode itemmap: itemmap)) (fails (runs:calc-fails prereqs-not-met)) (non-completed (runs:calc-not-completed prereqs-not-met))) (debug:print-info 4 "START OF INNER COND #2 " "\n can-run-more: " can-run-more "\n testname: " hed "\n prereqs-not-met: " (runs:pretty-string prereqs-not-met) "\n non-completed: " (runs:pretty-string non-completed)
︙			︙
474 475 476 477 478 479 480 ~~481~~ 482 483 484 485 486 487 488	(if (and give-up (not (and (null? tal)(null? reg)))) (let ((trimmed-tal (mt:discard-blocked-tests run-id hed tal test-records)) (trimmed-reg (mt:discard-blocked-tests run-id hed reg test-records))) (debug:print 1 "WARNING: test " hed " has discarded prerequisites, removing it from the queue") ~~(let ((test-id (~~cdb:remote-run db~~:get-test-id~~-cached #f~~ run-id hed "")))~~ (mt:test-set-state-status-by-id test-id "DEQUEUED" "PREQ_FAIL" "Failed to run due to failed prerequisites")) (if (and (null? trimmed-tal) (null? trimmed-reg)) #f (list (runs:queue-next-hed trimmed-tal trimmed-reg reglen regfull) (runs:queue-next-tal trimmed-tal trimmed-reg reglen regfull)	\|	467 468 469 470 471 472 473 474 475 476 477 478 479 480 481	(if (and give-up (not (and (null? tal)(null? reg)))) (let ((trimmed-tal (mt:discard-blocked-tests run-id hed tal test-records)) (trimmed-reg (mt:discard-blocked-tests run-id hed reg test-records))) (debug:print 1 "WARNING: test " hed " has discarded prerequisites, removing it from the queue") (let ((test-id (rmt:get-test-id run-id hed ""))) (mt:test-set-state-status-by-id test-id "DEQUEUED" "PREQ_FAIL" "Failed to run due to failed prerequisites")) (if (and (null? trimmed-tal) (null? trimmed-reg)) #f (list (runs:queue-next-hed trimmed-tal trimmed-reg reglen regfull) (runs:queue-next-tal trimmed-tal trimmed-reg reglen regfull)
︙			︙
594 595 596 597 598 599 600 ~~601~~ 602 603 604 605 606 ~~607~~ 608 609 610 611 612 613 614	((string? t) t) (else (conc t)))) inlst))) (define (runs:process-expanded-tests hed tal reg reruns reglen regfull test-record runname test-name item-path jobgroup max-concurrent-jobs run-id waitons item-path testmode test-patts required-tests test-registry registry-mutex flags keyvals run-info newtal all-tests-registry itemmap) ~~(let* ((run-limits-info (runs:can-run-more-tests jobgroup max-concurrent-jobs)) ;; look at the test jobgroup and tot jobs running~~ (have-resources (car run-limits-info)) (num-running (list-ref run-limits-info 1)) (num-running-in-jobgroup (list-ref run-limits-info 2)) (max-concurrent-jobs (list-ref run-limits-info 3)) (job-group-limit (list-ref run-limits-info 4)) ~~(prereqs-not-met (mt:lazy-get-prereqs-not-met run-id waitons item-path mode: testmode itemmap: itemmap))~~ (fails (runs:calc-fails prereqs-not-met)) (non-completed (runs:calc-not-completed prereqs-not-met)) (loop-list (list hed tal reg reruns))) (debug:print-info 4 "have-resources: " have-resources " prereqs-not-met: (" (string-intersperse (map (lambda (t) (if (vector? t)	\| > \|	587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608	((string? t) t) (else (conc t)))) inlst))) (define (runs:process-expanded-tests hed tal reg reruns reglen regfull test-record runname test-name item-path jobgroup max-concurrent-jobs run-id waitons item-path testmode test-patts required-tests test-registry registry-mutex flags keyvals run-info newtal all-tests-registry itemmap) (let* ((run-limits-info (runs:can-run-more-tests run-id jobgroup max-concurrent-jobs)) ;; look at the test jobgroup and tot jobs running (have-resources (car run-limits-info)) (num-running (list-ref run-limits-info 1)) (num-running-in-jobgroup (list-ref run-limits-info 2)) (max-concurrent-jobs (list-ref run-limits-info 3)) (job-group-limit (list-ref run-limits-info 4)) (prereqs-not-met (rmt:get-prereqs-not-met run-id waitons item-path testmode itemmap: itemmap)) ;; (prereqs-not-met (mt:lazy-get-prereqs-not-met run-id waitons item-path mode: testmode itemmap: itemmap)) (fails (runs:calc-fails prereqs-not-met)) (non-completed (runs:calc-not-completed prereqs-not-met)) (loop-list (list hed tal reg reruns))) (debug:print-info 4 "have-resources: " have-resources " prereqs-not-met: (" (string-intersperse (map (lambda (t) (if (vector? t)
︙			︙
640 641 642 643 644 645 646 ~~647~~ 648 649 650 651 652 653 654 ~~655 656~~ 657 658 659 660 661 662 663	;; Register tests ;; ((not (hash-table-ref/default test-registry (runs:make-full-test-name test-name item-path) #f)) (debug:print-info 4 "Pre-registering test " test-name "/" item-path " to create placeholder" ) (if (eq? transport-type 'fs) ;; no point in parallel registration if use fs (begin ~~(c~~db:tests-~~register-test runremote run-id test-name item-path)~~ (hash-table-set! test-registry (runs:make-full-test-name test-name item-path) 'done)) (let ((th (make-thread (lambda () (mutex-lock! registry-mutex) (hash-table-set! test-registry (runs:make-full-test-name test-name item-path) 'start) (mutex-unlock! registry-mutex) ;; If haven't done it before register a top level test if this is an itemized test (if (not (eq? (hash-table-ref/default test-registry (runs:make-full-test-name test-name "") #f) 'done)) ~~(c~~db:tests-~~register-test runremote run-id test-name "")) (c~~db:tests-~~register-test runremote run-id test-name item-path)~~ (mutex-lock! registry-mutex) (hash-table-set! test-registry (runs:make-full-test-name test-name item-path) 'done) (mutex-unlock! registry-mutex)) (conc test-name "/" item-path)))) (thread-start! th))) (runs:shrink-can-run-more-tests-count) ;; DELAY TWEAKER (still needed?) (if (and (null? tal)(null? reg))	\| \| \|	634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657	;; Register tests ;; ((not (hash-table-ref/default test-registry (runs:make-full-test-name test-name item-path) #f)) (debug:print-info 4 "Pre-registering test " test-name "/" item-path " to create placeholder" ) (if (eq? transport-type 'fs) ;; no point in parallel registration if use fs (begin (rmt:general-call 'register-test run-id run-id test-name item-path) (hash-table-set! test-registry (runs:make-full-test-name test-name item-path) 'done)) (let ((th (make-thread (lambda () (mutex-lock! registry-mutex) (hash-table-set! test-registry (runs:make-full-test-name test-name item-path) 'start) (mutex-unlock! registry-mutex) ;; If haven't done it before register a top level test if this is an itemized test (if (not (eq? (hash-table-ref/default test-registry (runs:make-full-test-name test-name "") #f) 'done)) (rmt:general-call 'register-test run-id run-id test-name "")) (rmt:general-call 'register-test run-id run-id test-name item-path) (mutex-lock! registry-mutex) (hash-table-set! test-registry (runs:make-full-test-name test-name item-path) 'done) (mutex-unlock! registry-mutex)) (conc test-name "/" item-path)))) (thread-start! th))) (runs:shrink-can-run-more-tests-count) ;; DELAY TWEAKER (still needed?) (if (and (null? tal)(null? reg))
︙			︙
778 779 780 781 782 783 784 ~~785~~ 786 787 788 789 790 791 792	;; NB// Should expand items here and then insert into the run queue. (debug:print 5 "test-records: " test-records ", flags: " (hash-table->alist flags)) ;; Do mark-and-find clean up of db before starting runing of quue ;; ;; (cdb:remote-run db:find-and-mark-incomplete #f) ~~(let ((run-info (~~cdb:remote-run db~~:get-run-info #f run-id))~~ (tests-info (mt:get-tests-for-run run-id #f '() '())) ;; qryvals: "id,testname,item_path")) (sorted-test-names (tests:sort-by-priority-and-waiton test-records)) (test-registry (make-hash-table)) (registry-mutex (make-mutex)) (num-retries 0) (max-retries (config-lookup configdat "setup" "maxretries")) (max-concurrent-jobs (let ((mcj (config-lookup configdat "setup" "max_concurrent_jobs")))	\|	772 773 774 775 776 777 778 779 780 781 782 783 784 785 786	;; NB// Should expand items here and then insert into the run queue. (debug:print 5 "test-records: " test-records ", flags: " (hash-table->alist flags)) ;; Do mark-and-find clean up of db before starting runing of quue ;; ;; (cdb:remote-run db:find-and-mark-incomplete #f) (let ((run-info (rmt:get-run-info run-id)) (tests-info (mt:get-tests-for-run run-id #f '() '())) ;; qryvals: "id,testname,item_path")) (sorted-test-names (tests:sort-by-priority-and-waiton test-records)) (test-registry (make-hash-table)) (registry-mutex (make-mutex)) (num-retries 0) (max-retries (config-lookup configdat "setup" "maxretries")) (max-concurrent-jobs (let ((mcj (config-lookup configdat "setup" "max_concurrent_jobs")))
︙			︙
833 834 835 836 837 838 839 ~~840~~ 841 842 843 844 845 846 847 848 849 850 851 852 ~~853~~ 854 855 856 857 858 859 860	(priority (tests:testqueue-get-priority test-record)) (itemdat (tests:testqueue-get-itemdat test-record)) ;; itemdat can be a string, list or #f (items (tests:testqueue-get-items test-record)) (item-path (item-list->path itemdat)) (tfullname (runs:make-full-test-name test-name item-path)) (newtal (append tal (list hed))) (regfull (>= (length reg) reglen)) ~~(num-running (~~cdb:remote-run db~~:get-count-tests-running-for-run-id #f run-id)))~~ (if (> num-running 0) (set! last-time-some-running (current-seconds))) (if (> (current-seconds)(+ last-time-some-running 60)) (hash-table-set! max-tries-hash tfullname (+ (hash-table-ref/default max-tries-hash tfullname 0) 1))) ;; (debug:print 0 "max-tries-hash: " (hash-table->alist max-tries-hash)) ;; Ensure all top level tests get registered. This way they show up as "NOT_STARTED" on the dashboard ;; and it is clear they should have run but did not. (if (not (hash-table-ref/default test-registry (runs:make-full-test-name test-name "") #f)) (begin ~~(c~~db:tests-~~register-test runremote run-id test-name "")~~ (hash-table-set! test-registry (runs:make-full-test-name test-name "") 'done))) ;; Fast skip of tests that are already "COMPLETED" - NO! Cannot do that as the items may not have been expanded yet :( ;; (if (member (hash-table-ref/default test-registry tfullname #f) '(DONOTRUN removed)) ;; common:cant-run-states-sym) ;; '(COMPLETED KILLED WAIVED UNKNOWN INCOMPLETE)) (begin	\| \|	827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854	(priority (tests:testqueue-get-priority test-record)) (itemdat (tests:testqueue-get-itemdat test-record)) ;; itemdat can be a string, list or #f (items (tests:testqueue-get-items test-record)) (item-path (item-list->path itemdat)) (tfullname (runs:make-full-test-name test-name item-path)) (newtal (append tal (list hed))) (regfull (>= (length reg) reglen)) (num-running (rmt:get-count-tests-running-for-run-id run-id))) (if (> num-running 0) (set! last-time-some-running (current-seconds))) (if (> (current-seconds)(+ last-time-some-running 60)) (hash-table-set! max-tries-hash tfullname (+ (hash-table-ref/default max-tries-hash tfullname 0) 1))) ;; (debug:print 0 "max-tries-hash: " (hash-table->alist max-tries-hash)) ;; Ensure all top level tests get registered. This way they show up as "NOT_STARTED" on the dashboard ;; and it is clear they should have run but did not. (if (not (hash-table-ref/default test-registry (runs:make-full-test-name test-name "") #f)) (begin (rmt:general-call 'register-test run-id run-id test-name "") (hash-table-set! test-registry (runs:make-full-test-name test-name "") 'done))) ;; Fast skip of tests that are already "COMPLETED" - NO! Cannot do that as the items may not have been expanded yet :( ;; (if (member (hash-table-ref/default test-registry tfullname #f) '(DONOTRUN removed)) ;; common:cant-run-states-sym) ;; '(COMPLETED KILLED WAIVED UNKNOWN INCOMPLETE)) (begin
︙			︙
962 963 964 965 966 967 968 ~~969~~ 970 971 972 973 974 975 976	#f (loop (car tal)(cdr tal) reg reruns))) ;; if items is a proc then need to run items:get-items-from-config, get the list and loop ;; - but only do that if resources exist to kick off the job ;; EXPAND ITEMS ((or (procedure? items)(eq? items 'have-procedure)) ~~(let ((can-run-more (runs:can-run-more-tests jobgroup max-concurrent-jobs)))~~ (if (and (list? can-run-more) (car can-run-more)) (let ((loop-list (runs:expand-items hed tal reg reruns regfull newtal jobgroup max-concurrent-jobs run-id waitons item-path testmode test-record can-run-more items runname tconfig reglen test-registry test-records itemmap))) (if loop-list (apply loop loop-list))) ;; if can't run more just loop with next possible test (loop (car newtal)(cdr newtal) reg reruns))))	\|	956 957 958 959 960 961 962 963 964 965 966 967 968 969 970	#f (loop (car tal)(cdr tal) reg reruns))) ;; if items is a proc then need to run items:get-items-from-config, get the list and loop ;; - but only do that if resources exist to kick off the job ;; EXPAND ITEMS ((or (procedure? items)(eq? items 'have-procedure)) (let ((can-run-more (runs:can-run-more-tests run-id jobgroup max-concurrent-jobs))) (if (and (list? can-run-more) (car can-run-more)) (let ((loop-list (runs:expand-items hed tal reg reruns regfull newtal jobgroup max-concurrent-jobs run-id waitons item-path testmode test-record can-run-more items runname tconfig reglen test-registry test-records itemmap))) (if loop-list (apply loop loop-list))) ;; if can't run more just loop with next possible test (loop (car newtal)(cdr newtal) reg reruns))))
︙			︙
1068 1069 1070 1071 1072 1073 1074 ~~1075 1076~~ 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 ~~1087~~ 1088 1089 1090 ~~1091 1092~~ 1093 ~~1094~~ 1095 1096 1097 1098 1099 1100 1101	(if (not (hash-table-ref/default test-meta-updated test-name #f)) (begin (hash-table-set! test-meta-updated test-name #t) (runs:update-test_meta test-name test-conf))) ;; itemdat => ((ripeness "overripe") (temperature "cool") (season "summer")) (let* ((new-test-path (string-intersperse (cons test-path (map cadr itemdat)) "/")) ~~(test-id (~~cdb:remote-run db~~:get-test-id~~-cached #f~~ run-id test-name item-path)) (testdat (if test-id (~~cdb~~:get-test-info-by-id runremote test-id) #f)))~~ (if (not testdat) (let loop () ;; ensure that the path exists before registering the test ;; NOPE: Cannot! Don't know yet which disk area will be assigned.... ;; (system (conc "mkdir -p " new-test-path)) ;; ;; (open-run-close tests:register-test db run-id test-name item-path) ;; ;; NB// for the above line. I want the test to be registered long before this routine gets called! ;; ~~(if (not test-id)(set! test-id (~~cdb:remote-run db~~:get-test-id~~-cached #f~~ run-id test-name item-path)))~~ (if (not test-id) (begin (debug:print 2 "WARN: Test not pre-created? test-name=" test-name ", item-path=" item-path ", run-id=" run-id) ~~(c~~db:tests-~~register-test runremote run-id test-name item-path) (set! test-id (~~cdb:remote-run db~~:get-test-id~~-cached #f~~ run-id test-name item-path))))~~ (debug:print-info 4 "test-id=" test-id ", run-id=" run-id ", test-name=" test-name ", item-path=\"" item-path "\"") ~~(set! testdat (~~cdb~~:get-test-info-by-id runremote test-id))~~ (if (not testdat) (begin (debug:print-info 0 "WARNING: server is overloaded, trying again in one second") (thread-sleep! 1) (loop))))) (if (not testdat) ;; should NOT happen (debug:print 0 "ERROR: failed to get test record for test-id " test-id))	\| \| \| \| \| \|	1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095	(if (not (hash-table-ref/default test-meta-updated test-name #f)) (begin (hash-table-set! test-meta-updated test-name #t) (runs:update-test_meta test-name test-conf))) ;; itemdat => ((ripeness "overripe") (temperature "cool") (season "summer")) (let* ((new-test-path (string-intersperse (cons test-path (map cadr itemdat)) "/")) (test-id (rmt:get-test-id run-id test-name item-path)) (testdat (if test-id (rmt:get-test-info-by-id run-id test-id) #f))) (if (not testdat) (let loop () ;; ensure that the path exists before registering the test ;; NOPE: Cannot! Don't know yet which disk area will be assigned.... ;; (system (conc "mkdir -p " new-test-path)) ;; ;; (open-run-close tests:register-test db run-id test-name item-path) ;; ;; NB// for the above line. I want the test to be registered long before this routine gets called! ;; (if (not test-id)(set! test-id (rmt:get-test-id run-id test-name item-path))) (if (not test-id) (begin (debug:print 2 "WARN: Test not pre-created? test-name=" test-name ", item-path=" item-path ", run-id=" run-id) (rmt:general-call 'register-test run-id run-id test-name item-path) (set! test-id (rmt:get-test-id run-id test-name item-path)))) (debug:print-info 4 "test-id=" test-id ", run-id=" run-id ", test-name=" test-name ", item-path=\"" item-path "\"") (set! testdat (rmt:get-test-info-by-id run-id test-id)) (if (not testdat) (begin (debug:print-info 0 "WARNING: server is overloaded, trying again in one second") (thread-sleep! 1) (loop))))) (if (not testdat) ;; should NOT happen (debug:print 0 "ERROR: failed to get test record for test-id " test-id))
︙			︙
1157 1158 1159 1160 1161 1162 1163 ~~1164~~ 1165 1166 1167 1168 1169 1170 1171 1172 ~~1173~~ 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 ~~1189~~ 1190 1191 1192 1193 1194 1195 1196	(let ((skip-test #f) (skip-check (configf:get-section test-conf "skip"))) (cond ;; Have to check for skip conditions. This one skips if there are same-named tests ;; currently running ((and skip-check (configf:lookup test-conf "skip" "prevrunning")) ~~(let ((running-tests (~~cdb:remote-run db~~:get-tests-for-runs-mindata #f #f full-test-name '("RUNNING" "REMOTEHOSTSTART" "LAUNCHED") '() #f)))~~ (if (not (null? running-tests)) ;; have to skip (set! skip-test "Skipping due to previous tests running")))) ((and skip-check (configf:lookup test-conf "skip" "fileexists")) (if (file-exists? (configf:lookup test-conf "skip" "fileexists")) (set! skip-test (conc "Skipping due to existance of file " (configf:lookup test-conf "skip" "fileexists")))))) (if skip-test (begin ~~(mt:test-set-state-status-by-id test-id "COMPLETED" "SKIP" skip-test)~~ (debug:print-info 1 "SKIPPING Test " full-test-name " due to " skip-test)) (if (not (launch-test test-id run-id run-info keyvals runname test-conf test-name test-path itemdat flags)) (begin (print "ERROR: Failed to launch the test. Exiting as soon as possible") (set! globalexitstatus 1) ;; (process-signal (current-process-id) signal/kill)))))))) ((KILLED) (debug:print 1 "NOTE: " full-test-name " is already running or was explictly killed, use -force to launch it.") (hash-table-set! test-registry (runs:make-full-test-name test-name test-path) 'DONOTRUN)) ;; KILLED)) ((LAUNCHED REMOTEHOSTSTART RUNNING) (if (> (- (current-seconds)(+ (db:test-get-event_time testdat) (db:test-get-run_duration testdat))) 600) ;; i.e. no update for more than 600 seconds (begin (debug:print 0 "WARNING: Test " test-name " appears to be dead. Forcing it to state INCOMPLETE and status STUCK/DEAD") ~~(tests:test-set-status! test-id "INCOMPLETE" "STUCK/DEAD" "Test is stuck or dead" #f))~~ (debug:print 2 "NOTE: " test-name " is already running"))) (else (debug:print 0 "ERROR: Failed to launch test " full-test-name ". Unrecognised state " (test:get-state testdat)) (case (string->symbol (test:get-state testdat)) ((COMPLETED INCOMPLETE) (hash-table-set! test-registry (runs:make-full-test-name test-name test-path) 'DONOTRUN)) (else	> \| \| \|	1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191	(let ((skip-test #f) (skip-check (configf:get-section test-conf "skip"))) (cond ;; Have to check for skip conditions. This one skips if there are same-named tests ;; currently running ((and skip-check (configf:lookup test-conf "skip" "prevrunning")) ;; run-ids = #f means all runs (let ((running-tests (rmt:get-tests-for-runs-mindata #f full-test-name '("RUNNING" "REMOTEHOSTSTART" "LAUNCHED") '() #f))) (if (not (null? running-tests)) ;; have to skip (set! skip-test "Skipping due to previous tests running")))) ((and skip-check (configf:lookup test-conf "skip" "fileexists")) (if (file-exists? (configf:lookup test-conf "skip" "fileexists")) (set! skip-test (conc "Skipping due to existance of file " (configf:lookup test-conf "skip" "fileexists")))))) (if skip-test (begin (mt:test-set-state-status-by-id run-id test-id "COMPLETED" "SKIP" skip-test) (debug:print-info 1 "SKIPPING Test " full-test-name " due to " skip-test)) (if (not (launch-test test-id run-id run-info keyvals runname test-conf test-name test-path itemdat flags)) (begin (print "ERROR: Failed to launch the test. Exiting as soon as possible") (set! globalexitstatus 1) ;; (process-signal (current-process-id) signal/kill)))))))) ((KILLED) (debug:print 1 "NOTE: " full-test-name " is already running or was explictly killed, use -force to launch it.") (hash-table-set! test-registry (runs:make-full-test-name test-name test-path) 'DONOTRUN)) ;; KILLED)) ((LAUNCHED REMOTEHOSTSTART RUNNING) (if (> (- (current-seconds)(+ (db:test-get-event_time testdat) (db:test-get-run_duration testdat))) 600) ;; i.e. no update for more than 600 seconds (begin (debug:print 0 "WARNING: Test " test-name " appears to be dead. Forcing it to state INCOMPLETE and status STUCK/DEAD") (tests:test-set-status! run-id test-id "INCOMPLETE" "STUCK/DEAD" "Test is stuck or dead" #f)) (debug:print 2 "NOTE: " test-name " is already running"))) (else (debug:print 0 "ERROR: Failed to launch test " full-test-name ". Unrecognised state " (test:get-state testdat)) (case (string->symbol (test:get-state testdat)) ((COMPLETED INCOMPLETE) (hash-table-set! test-registry (runs:make-full-test-name test-name test-path) 'DONOTRUN)) (else
︙			︙
1237 1238 1239 1240 1241 1242 1243 ~~1244~~ 1245 1246 1247 1248 1249 1250 1251	;; 'set-state-status ;; ;; NB// should pass in keys? ;; (define (runs:operate-on action target runnamepatt testpatt #!key (state #f)(status #f)(new-state-status #f)) (common:clear-caches) ;; clear all caches (let* ((db #f) ~~(keys (~~cdb:remote-run db~~:get-keys db))~~ (rundat (mt:get-runs-by-patt keys runnamepatt target)) (header (vector-ref rundat 0)) (runs (vector-ref rundat 1)) (states (if state (string-split state ",") '())) (statuses (if status (string-split status ",") '())) (state-status (if (string? new-state-status) (string-split new-state-status ",") '(#f #f)))) (debug:print-info 4 "runs:operate-on => Header: " header " action: " action " new-state-status: " new-state-status)	\|	1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246	;; 'set-state-status ;; ;; NB// should pass in keys? ;; (define (runs:operate-on action target runnamepatt testpatt #!key (state #f)(status #f)(new-state-status #f)) (common:clear-caches) ;; clear all caches (let* ((db #f) (keys (rmt:get-keys)) (rundat (mt:get-runs-by-patt keys runnamepatt target)) (header (vector-ref rundat 0)) (runs (vector-ref rundat 1)) (states (if state (string-split state ",") '())) (statuses (if status (string-split status ",") '())) (state-status (if (string? new-state-status) (string-split new-state-status ",") '(#f #f)))) (debug:print-info 4 "runs:operate-on => Header: " header " action: " action " new-state-status: " new-state-status)
︙			︙
1282 1283 1284 1285 1286 1287 1288 ~~1289~~ ~~1290~~ 1291 1292 1293 1294 1295 1296 1297 1298 ~~1299~~ 1300 1301 1302 1303 1304 1305 1306 ~~1307~~ 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 ~~1328~~ 1329 1330 ~~1331~~ 1332 1333 1334 1335 1336 1337 ~~1338~~ 1339 1340 1341 1342 1343 1344 1345	((print-run) (debug:print 1 "Printing info for run " runkey ", run=" run ", tests=" tests ", header=" header) action) ((run-wait) (debug:print 1 "Waiting for run " runkey ", run=" runnamepatt " to complete")) (else (debug:print-info 0 "action not recognised " action))) ~~(let ((sorted-tests (sort tests (lambda (a b)(let ((dira (db~~:test~~-get~~-rundir a))~~~~ ~~~~(dirb~~ (db:test-get-rundir b)))~~ (if (and (string? dira)(string? dirb)) (> (string-length dira)(string-length dirb)) #f))))) (test-retry-time (make-hash-table)) (allow-run-time 10)) ;; seconds to allow for killing tests before just brutally killing 'em (let loop ((test (car sorted-tests)) (tal (cdr sorted-tests))) (let* ((test-id (db:test-get-id test)) ~~(new-test-dat (~~cdb~~:get-test-info-by-id runremote test-id)))~~ (if (not new-test-dat) (begin (debug:print 0 "ERROR: We have a test-id of " test-id " but no record was found. NOTE: No locking of records is done between processes, do not simultaneously remove the same run from two processes!") (if (not (null? tal)) (loop (car tal)(cdr tal)))) (let* ((item-path (db:test-get-item-path new-test-dat)) (test-name (db:test-get-testname new-test-dat)) ~~~~(run-dir~~ (db:test-get-rundir new-test-dat)) ;; run dir is from the link tree~~ (real-dir (if (file-exists? run-dir) (resolve-pathname run-dir) #f)) (test-state (db:test-get-state new-test-dat)) (test-fulln (db:test-get-fullname new-test-dat))) (case action ((remove-runs) (debug:print-info 0 "test: " test-name " itest-state: " test-state) (if (member test-state (list "RUNNING" "LAUNCHED" "REMOTEHOSTSTART" "KILLREQ")) (begin (if (not (hash-table-ref/default test-retry-time test-fulln #f)) (begin ;; want to set to REMOVING BUT CANNOT do it here? (hash-table-set! test-retry-time test-fulln (current-seconds)))) (if (> (- (current-seconds)(hash-table-ref test-retry-time test-fulln)) allow-run-time) ;; This test is not in a correct state for cleaning up. Let's try some graceful shutdown steps first ;; Set the test to "KILLREQ" and wait five seconds then try again. Repeat up to five times then give ;; up and blow it away. (begin (debug:print 0 "WARNING: could not gracefully remove test " test-fulln ", tried to kill it to no avail. Forcing state to FAILEDKILL and continuing") ~~(mt:test-set-state-status-by-id (db:test-get-id test) "FAILEDKILL" "n/a" #f)~~ (thread-sleep! 1)) (begin ~~(mt:test-set-state-status-by-id (db:test-get-id test) "KILLREQ" "n/a" #f)~~ (thread-sleep! 1))) ;; NOTE: This is suboptimal as the testdata will be used later and the state/status may have changed ... (if (null? tal) (loop new-test-dat tal) (loop (car tal)(append tal (list new-test-dat))))) (begin ~~(mt:test-set-state-status-by-id (db:test-get-id test) "REMOVING" "LOCKED" #f)~~ (debug:print-info 1 "Attempting to remove " (if real-dir (conc " dir " real-dir " and ") "") " link " run-dir) (if (and real-dir (> (string-length real-dir) 5) (file-exists? real-dir)) ;; bad heuristic but should prevent /tmp /home etc. (begin ;; let* ((realpath (resolve-pathname run-dir))) (debug:print-info 1 "Recursively removing " real-dir) (if (file-exists? real-dir)	\| > > \| \| > > \| \| \| \|	1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344	((print-run) (debug:print 1 "Printing info for run " runkey ", run=" run ", tests=" tests ", header=" header) action) ((run-wait) (debug:print 1 "Waiting for run " runkey ", run=" runnamepatt " to complete")) (else (debug:print-info 0 "action not recognised " action))) (let ((sorted-tests (sort tests (lambda (a b)(let ((dira ;; (rmt:sdb-qry 'getstr (db:test-get-rundir a)) ;; ) ;; (filedb:get-path fdb (db:test-get-rundir a))) (dirb ;; (rmt:sdb-qry 'getstr (db:test-get-rundir b))) ;; ) ;; ((filedb:get-path fdb (db:test-get-rundir b)))) (if (and (string? dira)(string? dirb)) (> (string-length dira)(string-length dirb)) #f))))) (test-retry-time (make-hash-table)) (allow-run-time 10)) ;; seconds to allow for killing tests before just brutally killing 'em (let loop ((test (car sorted-tests)) (tal (cdr sorted-tests))) (let* ((test-id (db:test-get-id test)) (new-test-dat (rmt:get-test-info-by-id run-id test-id))) (if (not new-test-dat) (begin (debug:print 0 "ERROR: We have a test-id of " test-id " but no record was found. NOTE: No locking of records is done between processes, do not simultaneously remove the same run from two processes!") (if (not (null? tal)) (loop (car tal)(cdr tal)))) (let* ((item-path (db:test-get-item-path new-test-dat)) (test-name (db:test-get-testname new-test-dat)) (run-dir ;;(filedb:get-path fdb ;; (rmt:sdb-qry 'getid (db:test-get-rundir new-test-dat)) ;; ) ;; run dir is from the link tree (real-dir (if (file-exists? run-dir) (resolve-pathname run-dir) #f)) (test-state (db:test-get-state new-test-dat)) (test-fulln (db:test-get-fullname new-test-dat))) (case action ((remove-runs) (debug:print-info 0 "test: " test-name " itest-state: " test-state) (if (member test-state (list "RUNNING" "LAUNCHED" "REMOTEHOSTSTART" "KILLREQ")) (begin (if (not (hash-table-ref/default test-retry-time test-fulln #f)) (begin ;; want to set to REMOVING BUT CANNOT do it here? (hash-table-set! test-retry-time test-fulln (current-seconds)))) (if (> (- (current-seconds)(hash-table-ref test-retry-time test-fulln)) allow-run-time) ;; This test is not in a correct state for cleaning up. Let's try some graceful shutdown steps first ;; Set the test to "KILLREQ" and wait five seconds then try again. Repeat up to five times then give ;; up and blow it away. (begin (debug:print 0 "WARNING: could not gracefully remove test " test-fulln ", tried to kill it to no avail. Forcing state to FAILEDKILL and continuing") (mt:test-set-state-status-by-id run-id (db:test-get-id test) "FAILEDKILL" "n/a" #f) (thread-sleep! 1)) (begin (mt:test-set-state-status-by-id run-id (db:test-get-id test) "KILLREQ" "n/a" #f) (thread-sleep! 1))) ;; NOTE: This is suboptimal as the testdata will be used later and the state/status may have changed ... (if (null? tal) (loop new-test-dat tal) (loop (car tal)(append tal (list new-test-dat))))) (begin (mt:test-set-state-status-by-id run-id (db:test-get-id test) "REMOVING" "LOCKED" #f) (debug:print-info 1 "Attempting to remove " (if real-dir (conc " dir " real-dir " and ") "") " link " run-dir) (if (and real-dir (> (string-length real-dir) 5) (file-exists? real-dir)) ;; bad heuristic but should prevent /tmp /home etc. (begin ;; let* ((realpath (resolve-pathname run-dir))) (debug:print-info 1 "Recursively removing " real-dir) (if (file-exists? real-dir)
︙			︙
1363 1364 1365 1366 1367 1368 1369 ~~1370~~ 1371 1372 1373 1374 ~~1375~~ 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 ~~1395 1396 1397 1398 1399~~ 1400 1401 1402 1403 1404 1405 1406	(debug:print 0 "ERROR: Failed to remove directory " run-dir ((condition-property-accessor 'exn 'message) exn) ", attempting to continue") (delete-directory run-dir))) (if run-dir (debug:print 0 "WARNING: not removing " run-dir " as it either doesn't exist or is not a symlink") (debug:print 0 "NOTE: the run dir for this test is undefined. Test may have already been deleted.")) )) ;; Only delete the records after removing the directory. If things fail we have a record ~~(~~cdb:remote-run db~~:delete-test-records db #f (db:test-get-id test))~~ (if (not (null? tal)) (loop (car tal)(cdr tal)))))) ((set-state-status) (debug:print-info 2 "new state " (car state-status) ", new status " (cadr state-status)) ~~(mt:test-set-state-status-by-id (db:test-get-id test) (car state-status)(cadr state-status) #f)~~ (if (not (null? tal)) (loop (car tal)(cdr tal)))) ((run-wait) (debug:print-info 2 "still waiting, " (length tests) " tests still running") (thread-sleep! 10) (let ((new-tests (proc-get-tests run-id))) (if (null? new-tests) (debug:print-info 1 "Run completed according to zero tests matching provided criteria.") (loop (car new-tests)(cdr new-tests)))))))) ))))) ;; remove the run if zero tests remain (if (eq? action 'remove-runs) (let ((remtests (mt:get-tests-for-run (db:get-value-by-header run header "id") #f '("DELETED") '("n/a") not-in: #t))) (if (null? remtests) ;; no more tests remaining (let* ((dparts (string-split lasttpath "/")) (runpath (conc "/" (string-intersperse (take dparts (- (length dparts) 1)) "/")))) (debug:print 1 "Removing run: " runkey " " (db:get-value-by-header run header "runname") " and related record") (~~cdb:remote-run db~~:delete-run db run-id) ~~;; This is a pretty good place to purge old DELETED tests~~ ~~(cdb:remote-run db:delete-tests-for-run db run-id)~~ (~~cdb:remote-run db~~:delete-old-deleted-test-records db) (cdb:remote-run db:set-var db "DELETED_TESTS" (current-seconds)) ;; need to figure out the path to the run dir and remove it if empty ;; (if (null? (glob (conc runpath "/*"))) ;; (begin ;; (debug:print 1 "Removing run dir " runpath) ;; (system (conc "rmdir -p " runpath)))) ))))) ))	\| \| \| < < \| \|	1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403	(debug:print 0 "ERROR: Failed to remove directory " run-dir ((condition-property-accessor 'exn 'message) exn) ", attempting to continue") (delete-directory run-dir))) (if run-dir (debug:print 0 "WARNING: not removing " run-dir " as it either doesn't exist or is not a symlink") (debug:print 0 "NOTE: the run dir for this test is undefined. Test may have already been deleted.")) )) ;; Only delete the records after removing the directory. If things fail we have a record (rmt:delete-test-records (db:test-get-run_id test)(db:test-get-id test)) (if (not (null? tal)) (loop (car tal)(cdr tal)))))) ((set-state-status) (debug:print-info 2 "new state " (car state-status) ", new status " (cadr state-status)) (mt:test-set-state-status-by-id run-id (db:test-get-id test) (car state-status)(cadr state-status) #f) (if (not (null? tal)) (loop (car tal)(cdr tal)))) ((run-wait) (debug:print-info 2 "still waiting, " (length tests) " tests still running") (thread-sleep! 10) (let ((new-tests (proc-get-tests run-id))) (if (null? new-tests) (debug:print-info 1 "Run completed according to zero tests matching provided criteria.") (loop (car new-tests)(cdr new-tests)))))))) ))))) ;; remove the run if zero tests remain (if (eq? action 'remove-runs) (let ((remtests (mt:get-tests-for-run (db:get-value-by-header run header "id") #f '("DELETED") '("n/a") not-in: #t))) (if (null? remtests) ;; no more tests remaining (let* ((dparts (string-split lasttpath "/")) (runpath (conc "/" (string-intersperse (take dparts (- (length dparts) 1)) "/")))) (debug:print 1 "Removing run: " runkey " " (db:get-value-by-header run header "runname") " and related record") (rmt:delete-run run-id) (rmt:delete-old-deleted-test-records) ;; (cdb:remote-run db:set-var db "DELETED_TESTS" (current-seconds)) ;; need to figure out the path to the run dir and remove it if empty ;; (if (null? (glob (conc runpath "/*"))) ;; (begin ;; (debug:print 1 "Removing run dir " runpath) ;; (system (conc "rmdir -p " runpath)))) ))))) ))
︙			︙
1469 1470 1471 1472 1473 1474 1475 ~~1476~~ 1477 1478 1479 1480 1481 1482 1483 1484 ~~1485~~ 1486 1487 1488 ~~1489~~ 1490 1491 1492 1493 1494 1495 1496 1497 1498 ~~1499~~ 1500 1501 1502 1503 1504 1505 1506 1507 ~~1508~~ 1509 1510 1511 1512 1513 1514 1515 ~~1516 1517~~ 1518 1519 ~~1520~~ 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 ~~1538~~ 1539 1540 1541 1542 1543 1544 1545	(for-each (lambda (run) (let ((run-id (db:get-value-by-header run header "id"))) (if (or lock (and unlock (begin (print "Do you really wish to unlock run " run-id "?\n y/n: ") (equal? "y" (read-line))))) ~~(~~cdb:remote-run db~~:lock/unlock-run db run-id lock unlock user)~~ (debug:print-info 0 "Skipping lock/unlock on " run-id)))) runs))) ;;====================================================================== ;; Rollup runs ;;====================================================================== ;; Update the test_meta table for this test (define (runs:update-test_meta test-name test-conf) ~~(let ((currrecord (~~cdb:remote-run db~~:testmeta-get-record #f test-name)))~~ (if (not currrecord) (begin (set! currrecord (make-vector 11 #f)) ~~(~~cdb:remote-run db~~:testmeta-add-record #f test-name)))~~ (for-each (lambda (key) (let* ((idx (cadr key)) (fld (car key)) (val (config-lookup test-conf "test_meta" fld))) ;; (debug:print 5 "idx: " idx " fld: " fld " val: " val) (if (and val (not (equal? (vector-ref currrecord idx) val))) (begin (print "Updating " test-name " " fld " to " val) ~~(~~cdb:remote-run db~~:testmeta-update-field #f test-name fld val)))))~~ '(("author" 2)("owner" 3)("description" 4)("reviewed" 5)("tags" 9)("jobgroup" 10))))) ;; Update test_meta for all tests (define (runs:update-all-test_meta db) (let ((test-names (tests:get-all))) ;; (tests:get-valid-tests))) (for-each (lambda (test-name) (let* ((test-conf (mt:lazy-read-test-config test-name))) ~~;; use the cdb:remote-run instead of passing in db~~ (if test-conf (runs:update-test_meta test-name test-conf)))) (hash-table-keys test-names)))) ;; This could probably be refactored into one complex query ... (define (runs:rollup-run keys runname user keyvals) (debug:print 4 "runs:rollup-run, keys: " keys " -runname " runname " user: " user) (let* ((db #f) ~~(new-run-id (~~cdb:remote-run db~~:register-run #f keyvals runname "new" "n/a" user)) (prev-tests (~~cdb:remote-run tes~~t:get-matching-previous-test-run-records db new-run-id "%" "%"))~~ (curr-tests (mt:get-tests-for-run new-run-id "%/%" '() '())) (curr-tests-hash (make-hash-table))) ~~(~~cdb:remote-run db~~:update-run-event_time db new-run-id)~~ ;; index the already saved tests by testname and itemdat in curr-tests-hash (for-each (lambda (testdat) (let* ((testname (db:test-get-testname testdat)) (item-path (db:test-get-item-path testdat)) (full-name (conc testname "/" item-path))) (hash-table-set! curr-tests-hash full-name testdat))) curr-tests) ;; NOPE: Non-optimal approach. Try this instead. ;; 1. tests are received in a list, most recent first ;; 2. replace the rollup test with the new always (for-each (lambda (testdat) (let* ((testname (db:test-get-testname testdat)) (item-path (db:test-get-item-path testdat)) (full-name (conc testname "/" item-path)) (prev-test-dat (hash-table-ref/default curr-tests-hash full-name #f)) ~~(test-steps (~~cdb:remote-run db~~:get-steps-for-test db (db:test-get-id testdat)))~~ (new-test-record #f)) ;; replace these with insert ... select (apply sqlite3:execute db (conc "INSERT OR REPLACE INTO tests (run_id,testname,state,status,event_time,host,cpuload,diskfree,uname,rundir,item_path,run_duration,final_logf,comment) " "VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?);") new-run-id (cddr (vector->list testdat)))	\| \| \| \| < > > > \| \| \| \|	1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544	(for-each (lambda (run) (let ((run-id (db:get-value-by-header run header "id"))) (if (or lock (and unlock (begin (print "Do you really wish to unlock run " run-id "?\n y/n: ") (equal? "y" (read-line))))) (rmt:lock/unlock-run run-id lock unlock user) (debug:print-info 0 "Skipping lock/unlock on " run-id)))) runs))) ;;====================================================================== ;; Rollup runs ;;====================================================================== ;; Update the test_meta table for this test (define (runs:update-test_meta test-name test-conf) (let ((currrecord (rmt:testmeta-get-record test-name))) (if (not currrecord) (begin (set! currrecord (make-vector 11 #f)) (rmt:testmeta-add-record test-name))) (for-each (lambda (key) (let* ((idx (cadr key)) (fld (car key)) (val (config-lookup test-conf "test_meta" fld))) ;; (debug:print 5 "idx: " idx " fld: " fld " val: " val) (if (and val (not (equal? (vector-ref currrecord idx) val))) (begin (print "Updating " test-name " " fld " to " val) (rmt:testmeta-update-field test-name fld val))))) '(("author" 2)("owner" 3)("description" 4)("reviewed" 5)("tags" 9)("jobgroup" 10))))) ;; Update test_meta for all tests (define (runs:update-all-test_meta db) (let ((test-names (tests:get-all))) ;; (tests:get-valid-tests))) (for-each (lambda (test-name) (let* ((test-conf (mt:lazy-read-test-config test-name))) (if test-conf (runs:update-test_meta test-name test-conf)))) (hash-table-keys test-names)))) ;; This could probably be refactored into one complex query ... ;; NOT PORTED - DO NOT USE YET ;; (define (runs:rollup-run keys runname user keyvals) (debug:print 4 "runs:rollup-run, keys: " keys " -runname " runname " user: " user) (let* ((db #f) ;; register run operates on the main db (new-run-id (rmt:register-run keyvals runname "new" "n/a" user)) (prev-tests (rmt:get-matching-previous-test-run-records new-run-id "%" "%")) (curr-tests (mt:get-tests-for-run new-run-id "%/%" '() '())) (curr-tests-hash (make-hash-table))) (rmt:update-run-event_time new-run-id) ;; index the already saved tests by testname and itemdat in curr-tests-hash (for-each (lambda (testdat) (let* ((testname (db:test-get-testname testdat)) (item-path (db:test-get-item-path testdat)) (full-name (conc testname "/" item-path))) (hash-table-set! curr-tests-hash full-name testdat))) curr-tests) ;; NOPE: Non-optimal approach. Try this instead. ;; 1. tests are received in a list, most recent first ;; 2. replace the rollup test with the new always (for-each (lambda (testdat) (let* ((testname (db:test-get-testname testdat)) (item-path (db:test-get-item-path testdat)) (full-name (conc testname "/" item-path)) (prev-test-dat (hash-table-ref/default curr-tests-hash full-name #f)) (test-steps (rmt:get-steps-for-test (db:test-get-id testdat))) (new-test-record #f)) ;; replace these with insert ... select (apply sqlite3:execute db (conc "INSERT OR REPLACE INTO tests (run_id,testname,state,status,event_time,host,cpuload,diskfree,uname,rundir,item_path,run_duration,final_logf,comment) " "VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?);") new-run-id (cddr (vector->list testdat)))
︙			︙