Megatest: Changes On Branch a035fad97b7ad66a

Changes In Branch 1.65-subrun-ancilliary-usecases Through [a035fad97b] Excluding Merge-Ins

This is equivalent to a diff from 7cb9fcca30 to a035fad97b

2017-12-27
19:01		updated to work with keep-records and updated manual Leaf check-in: 70391eee14 user: bjbarcla tags: 1.65-subrun-ancilliary-usecases
18:12		parallelized removal of subruns check-in: a035fad97b user: bjbarcla tags: 1.65-subrun-ancilliary-usecases
16:24		subrun kill works but suboptimal (serial kill) check-in: e010ede9bd user: bjbarcla tags: 1.65-subrun-ancilliary-usecases
2017-12-20
17:53		wip check-in: ddb7261be3 user: bjbarcla tags: 1.65-subrun-ancilliary-usecases
2017-12-15
16:47		added css to static html genration check-in: 4a97fe2a81 user: pjhatwal tags: v1.65
2017-12-13
23:54		TODO: send email to notify admin contact listed in the config that the listener got killed check-in: 7cb9fcca30 user: pjhatwal tags: v1.65
23:49		added rmt get-steps-info-by-id and get-test-info-by-id to readonly list added static html generation; check-in: dabd344efb user: pjhatwal tags: v1.65

Modified Makefile from [f43b069198] to [c37f1ed514].

Modified common.scm from [933bab82d3] to [ebc2b450b4].

Modified dashboard-tests.scm from [c114ec0352] to [1155a25cab].

Modified launch.scm from [ab11d5875b] to [37186dba18].

Modified mtut.scm from [fc44892821] to [9183a72ed5].

Modified runs.scm from [b0c63a44c5] to [b511145c8a].

Added subrun.scm version [3571b59cfa].

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21	# make install CSCOPTS='-accumulate-profile -profile-name $(PWD)/profile-ww$(shell date +%V.%u)' # rm <files>.o ; make install CSCOPTS='-profile' ; ... ; chicken-profile \| less PREFIX=$(PWD) CSCOPTS= INSTALL=install SRCFILES = common.scm items.scm launch.scm \ ods.scm runconfig.scm server.scm configf.scm \ db.scm keys.scm margs.scm megatest-version.scm \ process.scm runs.scm tasks.scm tests.scm genexample.scm \ http-transport.scm filedb.scm tdb.scm \ client.scm daemon.scm mt.scm \ ezsteps.scm lock-queue.scm sdb.scm \ ~~rmt.scm api.scm \~~ portlogger.scm archive.scm env.scm diff-report.scm cgisetup/models/pgdb.scm # module source files MSRCFILES = ftail.scm # Eggs to install (straightforward ones) EGGS=matchable readline apropos base64 regex-literals format regex-case test coops trace csv \	\|	1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21	# make install CSCOPTS='-accumulate-profile -profile-name $(PWD)/profile-ww$(shell date +%V.%u)' # rm <files>.o ; make install CSCOPTS='-profile' ; ... ; chicken-profile \| less PREFIX=$(PWD) CSCOPTS= INSTALL=install SRCFILES = common.scm items.scm launch.scm \ ods.scm runconfig.scm server.scm configf.scm \ db.scm keys.scm margs.scm megatest-version.scm \ process.scm runs.scm tasks.scm tests.scm genexample.scm \ http-transport.scm filedb.scm tdb.scm \ client.scm daemon.scm mt.scm \ ezsteps.scm lock-queue.scm sdb.scm \ rmt.scm api.scm subrun.scm \ portlogger.scm archive.scm env.scm diff-report.scm cgisetup/models/pgdb.scm # module source files MSRCFILES = ftail.scm # Eggs to install (straightforward ones) EGGS=matchable readline apropos base64 regex-literals format regex-case test coops trace csv \
︙			︙
89 90 91 92 93 94 95 96 97 98 99 100 101 102	rpc-transport.o \ runconfig.o \ runs.o \ server.o \ tasks.o \ tdb.o \ tests.o \ tcmt : $(TCMTOBJS) tcmt.scm csc $(CSCOPTS) $(TCMTOBJS) tcmt.scm -o tcmt # install documentation to $(PREFIX)/docs # DOES NOT REBUILD DOCS	>	89 90 91 92 93 94 95 96 97 98 99 100 101 102 103	rpc-transport.o \ runconfig.o \ runs.o \ server.o \ tasks.o \ tdb.o \ tests.o \ subrun.o \ tcmt : $(TCMTOBJS) tcmt.scm csc $(CSCOPTS) $(TCMTOBJS) tcmt.scm -o tcmt # install documentation to $(PREFIX)/docs # DOES NOT REBUILD DOCS
︙			︙

︙			︙
11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29	(use srfi-1 data-structures posix regex-case (prefix base64 base64:) format dot-locking csv-xml z3 ;; sql-de-lite hostinfo md5 message-digest typed-records directory-utils stack matchable regex posix (srfi 18) extras ;; tcp (prefix nanomsg nmsg:) (prefix sqlite3 sqlite3:) ~~pkts)~~ (declare (unit common)) (include "common_records.scm") ;; (require-library margs) ;; (include "margs.scm") ;; (define old-exit exit) ;; ;; (define (exit . code)	\| > >	11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31	(use srfi-1 data-structures posix regex-case (prefix base64 base64:) format dot-locking csv-xml z3 ;; sql-de-lite hostinfo md5 message-digest typed-records directory-utils stack matchable regex posix (srfi 18) extras ;; tcp (prefix nanomsg nmsg:) (prefix sqlite3 sqlite3:) pkts ) (declare (unit common)) (include "common_records.scm") ;; (require-library margs) ;; (include "margs.scm") ;; (define old-exit exit) ;; ;; (define (exit . code)
︙			︙
1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808	(string-search whitesp key) (string-search ":" key)) ;; internal only values to be skipped. "# export " "export ") key "=" delim (mungeval val) delim))) envvars))))) ;; set some env vars from an alist, return an alist with original values ;; (("VAR" "value") ...) ;; a value of #f means "unset this var" ;; (define (alist->env-vars lst) (if (list? lst) (let ((res '())) (for-each (lambda (p) (let* ((var (car p)) (val (cadr p)) (prv (get-environment-variable var))) (set! res (cons (list var prv) res)) (if val (safe-setenv var (->string val)) (unsetenv var)))) lst) res) '())) ;; clear vars matching pattern, run proc, set vars back ;; if proc is a string run that string as a command with ;; system. ;; (define (common:without-vars proc . var-patts) (let ((vars (make-hash-table)))	> > > > > > > > > > > > > > > > > > > > >	1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831	(string-search whitesp key) (string-search ":" key)) ;; internal only values to be skipped. "# export " "export ") key "=" delim (mungeval val) delim))) envvars))))) (define (common:get-param-mapping #!key (flavor #f)) "returns alist mapping string keys in testconfig/subrun to megatest command line switches; if flavor is switch-symbol, maps tcmt symbolic switches to megatest switches" (let ((default '(("tag-expr" . "-tagexpr") ("mode-patt" . "-modepatt") ("run-name" . "-runname") ("contour" . "-contour") ("mode-patt" . "-mode-patt") ("target" . "-target") ("test-patt" . "-testpatt") ("msg" . "-m") ("log" . "-log") ("start-dir" . "-start-dir") ("new" . "-set-state-status")))) (if (eq? flavor 'switch-symbol) (map (lambda (x) (cons (string->symbol (conc "-" (car x))) (cdr x))) default) default))) ;; set some env vars from an alist, return an alist with original values ;; (("VAR" "value") ...) ;; a value of #f means "unset this var" ;; (define (alist->env-vars lst) (if (list? lst) (let ((res '())) (for-each (lambda (p) (let* ((var (car p)) (val (cadr p)) (prv (get-environment-variable var))) (set! res (cons (list var prv) res)) (if val (safe-setenv var (->string val)) (unsetenv var)))) lst) res) '())) ;; clear vars matching pattern, run proc, set vars back ;; if proc is a string run that string as a command with ;; system. ;; (define (common:without-vars proc . var-patts) (let ((vars (make-hash-table)))
︙			︙
1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834	((string? proc)(system proc)) (proc (proc))) (hash-table-for-each vars (lambda (var val) (setenv var val))) vars)) (define (common:run-a-command cmd #!key (with-vars #f)) (let* ((pre-cmd (dtests:get-pre-command)) (post-cmd (dtests:get-post-command)) (fullcmd (if (or pre-cmd post-cmd) (conc pre-cmd cmd post-cmd) (conc "viewscreen " cmd))))	>	1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858	((string? proc)(system proc)) (proc (proc))) (hash-table-for-each vars (lambda (var val) (setenv var val))) vars)) (define (common:run-a-command cmd #!key (with-vars #f)) (let* ((pre-cmd (dtests:get-pre-command)) (post-cmd (dtests:get-post-command)) (fullcmd (if (or pre-cmd post-cmd) (conc pre-cmd cmd post-cmd) (conc "viewscreen " cmd))))
︙			︙
2603 2604 2605 2606 2607 2608 2609	((string? new-val) (setenv env-var new-val))) restore-thunk)) delta-env-alist)))) (let ((rv (thunk))) (for-each (lambda (x) (x)) restore-thunks) ;; restore env to original state rv)))	> > > > > >	2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639	((string? new-val) (setenv env-var new-val))) restore-thunk)) delta-env-alist)))) (let ((rv (thunk))) (for-each (lambda (x) (x)) restore-thunks) ;; restore env to original state rv))) (define (common:send-thunk-to-background-thread thunk #!key (name #f)) ;;(BB> "launched thread " name) (if name (thread-start! (make-thread thunk name)) (thread-start! (make-thread thunk))))

︙			︙
16 17 18 19 20 21 22 23 24 25 26 27 28 29	(use regex regex-case base64 sqlite3 srfi-18 directory-utils posix-extras z3 call-with-environment-variables csv) (use typed-records pathname-expand matchable) (import (prefix base64 base64:)) (import (prefix sqlite3 sqlite3:)) (declare (unit launch)) (declare (uses common)) (declare (uses configf)) (declare (uses db)) (include "common_records.scm") (include "key_records.scm") (include "db_records.scm")	>	16 17 18 19 20 21 22 23 24 25 26 27 28 29 30	(use regex regex-case base64 sqlite3 srfi-18 directory-utils posix-extras z3 call-with-environment-variables csv) (use typed-records pathname-expand matchable) (import (prefix base64 base64:)) (import (prefix sqlite3 sqlite3:)) (declare (unit launch)) (declare (uses subrun)) (declare (uses common)) (declare (uses configf)) (declare (uses db)) (include "common_records.scm") (include "key_records.scm") (include "db_records.scm")
︙			︙
291 292 293 294 295 296 297 ~~298~~ 299 300 301 302 303 304 305	(begin (thread-sleep! 2) (loop (+ i 1))) ))))) ;; then, if runscript ran ok (or did not get called) ;; do all the ezsteps (if any) (if (or ezsteps subrun) ~~(let* ((testconfig ;; (read-config (conc work-area "/testconfig") #f #t environ-patt: "pre-launch-env-vars")) ;; FIXME??? is allow-system ok here?~~ ;; NOTE: it is tempting to turn off force-create of testconfig but dynamic ;; ezstep names need a full re-eval here. (tests:get-testconfig test-name item-path tconfigreg #t force-create: #t)) ;; 'return-procs))) (ezstepslst (if (hash-table? testconfig) (hash-table-ref/default testconfig "ezsteps" '()) #f))) (if testconfig	> \|	292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307	(begin (thread-sleep! 2) (loop (+ i 1))) ))))) ;; then, if runscript ran ok (or did not get called) ;; do all the ezsteps (if any) (if (or ezsteps subrun) (let* ((test-run-dir (tests:get-test-path-from-environment)) (testconfig ;; (read-config (conc work-area "/testconfig") #f #t environ-patt: "pre-launch-env-vars")) ;; FIXME??? is allow-system ok here? ;; NOTE: it is tempting to turn off force-create of testconfig but dynamic ;; ezstep names need a full re-eval here. (tests:get-testconfig test-name item-path tconfigreg #t force-create: #t)) ;; 'return-procs))) (ezstepslst (if (hash-table? testconfig) (hash-table-ref/default testconfig "ezsteps" '()) #f))) (if testconfig
︙			︙
318 319 320 321 322 323 324 ~~325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364~~ 365 366 ~~367~~ ~~368 369 370 371 372~~ 373 374 375 376 377 378 379	;; 1. get section [runarun] ;; 2. unset MT_* vars ;; 3. fix target ;; 4. fix runname ;; 5. fix testpatt or calculate it from contour ;; 6. launch the run ;; 7. roll up the run result and or roll up the logpro processed result (if (configf:lookup testconfig "subrun" "runwait") ;; we use runwait as the flag that a subrun is requested ~~(let* ((runarea (let ((ra (configf:lookup testconfig "subrun" "runarea")))~~ ~~(if ra ;; when runarea is not set we default to toppath. However~~ ~~ra ;; we need to force the setting in the testconfig so it will~~ ~~(begin ;; be preserved in the testconfig.subrun file~~ ~~(configf:set-section-var testconfig "subrun" "runarea" toppath)~~ toppath)))) ~~(passfail (configf:lookup testconfig "subrun" "passfail"))~~ ~~(target (or (configf:lookup testconfig "subrun" "target") (get-environment-variable "MT_TARGET")))~~ ~~(runname (or (configf:lookup testconfig "subrun" "runname")(get-environment-variable "MT_RUNNAME")))~~ (~~con~~tou~~r (co~~n~~figf:lookup~~ testconfig ~~"sub~~run~~" "contour")~~) ~~(testpatt (configf:lookup testconfig "subrun" "testpatt"))~~ ~~(mode-patt (configf:lookup testconfig "subrun" "mode-patt"))~~ ~~(tag-expr (configf:lookup testconfig "subrun" "tag-expr"))~~ ~~(run-wait (configf:lookup testconfig "subrun" "runwait"))~~ ~~(logpro (configf:lookup testconfig "subrun" "logpro"))~~ ~~(compact-stem (string-substitute "[/]" "_" (conc target "-" runname "-" (or testpatt mode-patt tag-expr))))~~ ~~(log-file (conc compact-stem ".log"))~~ ~~(mt-cmd (conc "megatest -run -target " target~~ ~~" -runname " runname~~ ~~(conc " -start-dir " runarea) ;; (if runarea runarea toppath))~~ ~~(if testpatt (conc " -testpatt " testpatt) "")~~ ~~(if mode-patt (conc " -modepatt " mode-patt) "")~~ ~~(if tag-expr (conc " -tag-expr" tag-expr) "")~~ ~~(if (equal? run-wait "yes") " -run-wait " "")~~ ~~" -log " log-file)))~~ ~~;; change directory to runarea, create it if needed, we do NOT create the directory~~ ~~;; (if runarea~~ ~~;; (if (directory-exists? runarea)~~ ~~;; (change-directory runarea)~~ ~~;; (begin~~ ~~;; (debug:print 0 default-log-port* "ERROR: for sub-megatest run the runarea \"" runarea "\" does not exist! EXITING.")~~ ~~;; (exit 1))))~~ ~~;; (let ((subrun (conc toppath "/subrun") #t))~~ ~~;; (create-directory subrun)~~ ~~;; (change-directory subrun)))~~ ~~;; by this point we are in the right place to run the subrun and we have a Megatest command to run~~ ~~;; (filter (lambda (x)(string-match "MT_." (car x))) (get-environment-variables))~~ ~~;; (common:without-vars mt-cmd "^MT_.")~~ (debug:print-info 0 default-log-port "Subrun command is \"" mt-cmd "\"") (set! ezsteps #t) ;; set the needed flag ~~(set! ezstepslst ~~(append (or ezstepslst '())~~~~ (list (list "subrun" (conc "{subrun=true} " mt-cmd))))) ~~(configf:set-section-var testconfig "logpro" "subrun" logpro) ;; append the logpro rules to the logpro section as stepname subrun~~ ~~(if runarea (configf:set-section-var testconfig "setup" "submegatest" runarea))~~ ~~(configf:write-alist testconfig "testconfig.subrun")~~ )) ;; process the ezsteps (if ezsteps (begin (if (not (common:file-exists? ".ezsteps"))(create-directory ".ezsteps")) ;; if ezsteps was defined then we are sure to have at least one step but check anyway (if (not (> (length ezstepslst) 0))	\| < < < < < < < < < \| < < < < < < < < < < < < < < < < < < < < < < < < < \| < < < \| > \| < < < <	320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341	;; 1. get section [runarun] ;; 2. unset MT_* vars ;; 3. fix target ;; 4. fix runname ;; 5. fix testpatt or calculate it from contour ;; 6. launch the run ;; 7. roll up the run result and or roll up the logpro processed result (when (configf:lookup testconfig "subrun" "runwait") ;; we use runwait as the flag that a subrun is requested (subrun:initialize-toprun-test testconfig test-run-dir) (let* ((mt-cmd (subrun:launch-cmd test-run-dir))) (debug:print-info 0 default-log-port "Subrun command is \"" mt-cmd "\"") (set! ezsteps #t) ;; set the needed flag (set! ezstepslst (append (or ezstepslst '()) (list (list "subrun" (conc "{subrun=true} " mt-cmd))))))) ;; process the ezsteps (if ezsteps (begin (if (not (common:file-exists? ".ezsteps"))(create-directory ".ezsteps")) ;; if ezsteps was defined then we are sure to have at least one step but check anyway (if (not (> (length ezstepslst) 0))
︙			︙

︙			︙
251 252 253 254 255 256 257 ~~258~~ ~~259 260 261 262 263 264 265 266~~ 267 268 269 270 271 272 273	;;====================================================================== ;; U T I L S ;;====================================================================== ;; given a mtutil param, return the old megatest equivalent ;; ~~(define (param-tra~~nslate~~ param)~~ ~~(or~~ (alist-ref (string->symbol param) ~~'((-tag-expr . "-tagexpr")~~ ~~(-mode-patt . "-modepatt")~~ ~~(-run-name . "-runname")~~ ~~(-test-patt . "-testpatt")~~ ~~(-msg . "-m")~~ ~~(-new . "-set-state-status")))~~ param)) (define (val->alist val) (let ((val-list (string-split-fields ";\\s" val #:infix))) (if val-list (map (lambda (x) (let ((f (string-split-fields "\\s=\\s*" x #:infix))) (case (length f)	\| > \| < < < < < < \|	251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268	;;====================================================================== ;; U T I L S ;;====================================================================== ;; given a mtutil param, return the old megatest equivalent ;; (define (megatest-param->mtutil-param param) (let* ((mapping-alist (common:get-param-mapping flavor: 'switch-symbol))) (alist-ref (string->symbol param) mapping-alist eq? param) param)) (define (val->alist val) (let ((val-list (string-split-fields ";\\s" val #:infix))) (if val-list (map (lambda (x) (let ((f (string-split-fields "\\s=\\s*" x #:infix))) (case (length f)
︙			︙
961 962 963 964 965 966 967 ~~968 969 970 971~~ 972 973 974 975 976 977 978 ~~979~~ 980 981 982 983 984 985 986	runkeydats))) (let ((res (configf:get-section torun contour))) ;; each contour / target ;; (print "res=" res) res)))) (hash-table-keys torun))))))) (define (pkt->cmdline pkta) ~~(let* ((action (or (lookup-action-by-key (alist-ref 'A pkta)) "noaction")) (action-param (case (string->symbol action) ((-set-state-status) (conc (alist-ref 'l pkta) " ")) (else ""))))~~ (fold (lambda (a res) (let* ((key (car a)) ;; get the key name (val (cdr a)) (par (or (lookup-param-by-key key) ;; need to check also if it is a switch (lookup-param-by-key key inlst: switch-keys)))) ;; (print "key: " key " val: " val " par: " par) (if par ~~(conc res " " (param-t~~ranslate~~ par) " " val)~~ (if (alist-ref key additional-cards) ;; these cards do not translate to parameters or switches res (begin (print "ERROR: Unknown key in packet \"" key "\" with value \"" val "\"") res))))) (conc "megatest " (if (not (member action '("sync"))) (conc action " " action-param)	> \| \| \| \| \|	956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982	runkeydats))) (let ((res (configf:get-section torun contour))) ;; each contour / target ;; (print "res=" res) res)))) (hash-table-keys torun))))))) (define (pkt->cmdline pkta) (let* ((param-mapping-alist (common:get-param-mapping flavor: 'switch-symbol)) (action (or (lookup-action-by-key (alist-ref 'A pkta)) "noaction")) (action-param (case (string->symbol action) ((-set-state-status) (conc (alist-ref 'l pkta) " ")) (else "")))) (fold (lambda (a res) (let* ((key (car a)) ;; get the key name (val (cdr a)) (par (or (lookup-param-by-key key) ;; need to check also if it is a switch (lookup-param-by-key key inlst: switch-keys)))) ;; (print "key: " key " val: " val " par: " par) (if par (conc res " " (alist-ref (string->symbol par) param-mapping-alist eq? par) " " val) (if (alist-ref key additional-cards) ;; these cards do not translate to parameters or switches res (begin (print "ERROR: Unknown key in packet \"" key "\" with value \"" val "\"") res))))) (conc "megatest " (if (not (member action '("sync"))) (conc action " " action-param)
︙			︙

︙			︙
1148 1149 1150 1151 1152 1153 1154 ~~1155~~ 1156 1157 1158 1159 1160 1161 1162 ~~1163~~ 1164 1165 1166 1167 1168 1169 1170	(mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "TEN_STRIKES" #f) ;; I'm unclear on if this roll up is needed - it may be the root cause of the "all set to FAIL" bug. (rmt:set-state-status-and-roll-up-items run-id test-name item-path #f "FAIL" #f) ;; treat as FAIL (list (if (null? tal)(car newtal)(car tal)) tal reg reruns))))) ~~;; ELSE: can't drop this - maybe running? Just keep trying~~ ;;(if (not (or (not (null? reg))(not (null? tal)))) ;; old experiment (let ((runable-tests (runs:runable-tests prereqs-not-met))) ;; SUSPICIOUS: Should look at more than just prereqs-not-met? (if (null? runable-tests) #f ;; I think we are truly done here (runs:loop-values newtal reg reglen regfull reruns))) ;;) ;;from old experiment ~~) ;; end if (or (not (null? reg))(not (null? tal)))~~ )))))) ;; scan a list of tests looking to see if any are potentially runnable ;; (define (runs:runable-tests tests) (filter (lambda (t) (if (not (vector? t))	\| \|	1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170	(mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "TEN_STRIKES" #f) ;; I'm unclear on if this roll up is needed - it may be the root cause of the "all set to FAIL" bug. (rmt:set-state-status-and-roll-up-items run-id test-name item-path #f "FAIL" #f) ;; treat as FAIL (list (if (null? tal)(car newtal)(car tal)) tal reg reruns))))) ;; ELSE: can't drop this - maybe running? Just keep trying ;;(if (not (or (not (null? reg))(not (null? tal)))) ;; old experiment (let ((runable-tests (runs:runable-tests prereqs-not-met))) ;; SUSPICIOUS: Should look at more than just prereqs-not-met? (if (null? runable-tests) #f ;; I think we are truly done here (runs:loop-values newtal reg reglen regfull reruns))) ;;) ;;from old experiment ) ;; end if (or (not (null? reg))(not (null? tal))) )))))) ;; scan a list of tests looking to see if any are potentially runnable ;; (define (runs:runable-tests tests) (filter (lambda (t) (if (not (vector? t))
︙			︙
1190 1191 1192 1193 1194 1195 1196 ~~1197~~ 1198 1199 1200 1201 1202 1203 1204	inc-results: (make-hash-table) inc-results-last-update: 0 inc-results-fmt: "~12a~12a~20a~12a~40a\n" ;; state status time duration test-name item-path run-info: #f runname: #f target: #f ) ) (define (runs:incremental-print-results run-id) (let ((curr-sec (current-seconds))) (if (> (- curr-sec (runs:gendat-inc-results-last-update runs:general-data)) 5) ;; at least five seconds since last update (let* ((run-dat (or (runs:gendat-run-info runs:general-data)(rmt:get-run-info run-id))) (runname (or (runs:gendat-runname runs:general-data) (db:get-value-by-header (db:get-rows run-dat)	\|	1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204	inc-results: (make-hash-table) inc-results-last-update: 0 inc-results-fmt: "~12a~12a~20a~12a~40a\n" ;; state status time duration test-name item-path run-info: #f runname: #f target: #f ) ) (define (runs:incremental-print-results run-id) (let ((curr-sec (current-seconds))) (if (> (- curr-sec (runs:gendat-inc-results-last-update runs:general-data)) 5) ;; at least five seconds since last update (let* ((run-dat (or (runs:gendat-run-info runs:general-data)(rmt:get-run-info run-id))) (runname (or (runs:gendat-runname runs:general-data) (db:get-value-by-header (db:get-rows run-dat)
︙			︙
1378 1379 1380 1381 1382 1383 1384 ~~1385~~ 1386 1387 1388 1389 1390 1391 1392	;; (not (or (and runremote ;; (remote-server-url runremote) ;; (server:ping (remote-server-url runremote))) ;; (server:check-if-running toppath)))) ;; (server:kind-run toppath)) (if (> num-running 0) ~~(set! last-time-some-running (current-seconds)))~~ (if (> (current-seconds)(+ last-time-some-running (or (configf:lookup configdat "setup" "give-up-waiting") 36000))) (hash-table-set! max-tries-hash tfullname (+ (hash-table-ref/default max-tries-hash tfullname 0) 1))) ;; (debug:print 0 default-log-port "max-tries-hash: " (hash-table->alist max-tries-hash)) ;; Ensure all top level tests get registered. This way they show up as "NOT_STARTED" on the dashboard ;; and it is clear they should have run but did not.	\|	1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392	;; (not (or (and runremote ;; (remote-server-url runremote) ;; (server:ping (remote-server-url runremote))) ;; (server:check-if-running toppath)))) ;; (server:kind-run toppath)) (if (> num-running 0) (set! last-time-some-running (current-seconds))) (if (> (current-seconds)(+ last-time-some-running (or (configf:lookup configdat "setup" "give-up-waiting") 36000))) (hash-table-set! max-tries-hash tfullname (+ (hash-table-ref/default max-tries-hash tfullname 0) 1))) ;; (debug:print 0 default-log-port "max-tries-hash: " (hash-table->alist max-tries-hash)) ;; Ensure all top level tests get registered. This way they show up as "NOT_STARTED" on the dashboard ;; and it is clear they should have run but did not.
︙			︙
1403 1404 1405 1406 1407 1408 1409 ~~1410~~ 1411 1412 1413 1414 1415 1416 1417	(if (runs:lownoise (conc "been marked do not run " tfullname) 60) (debug:print-info 0 default-log-port "Skipping test " tfullname " as it has been marked do not run due to being completed or not runnable")) (if (or (not (null? tal))(not (null? reg))) (loop (runs:queue-next-hed tal reg reglen regfull) (runs:queue-next-tal tal reg reglen regfull) (runs:queue-next-reg tal reg reglen regfull) reruns)))) ~~;; (loop (car tal)(cdr tal) reg reruns))))~~ (runs:incremental-print-results run-id) (debug:print 4 default-log-port "TOP OF LOOP => " "test-name: " test-name "\n hed: " hed "\n tal: " tal "\n reg: " reg	\|	1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417	(if (runs:lownoise (conc "been marked do not run " tfullname) 60) (debug:print-info 0 default-log-port "Skipping test " tfullname " as it has been marked do not run due to being completed or not runnable")) (if (or (not (null? tal))(not (null? reg))) (loop (runs:queue-next-hed tal reg reglen regfull) (runs:queue-next-tal tal reg reglen regfull) (runs:queue-next-reg tal reg reglen regfull) reruns)))) ;; (loop (car tal)(cdr tal) reg reruns)))) (runs:incremental-print-results run-id) (debug:print 4 default-log-port "TOP OF LOOP => " "test-name: " test-name "\n hed: " hed "\n tal: " tal "\n reg: " reg
︙			︙
1460 1461 1462 1463 1464 1465 1466 ~~1467~~ 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 ~~1482 1483 1484 1485 1486 1487 1488~~ 1489 1490 1491 1492 1493 1494 1495	(debug:print-info 4 default-log-port "OUTER COND: (not items)") (if (and (not (tests:match test-patts (tests:testqueue-get-testname test-record) item-path required: required-tests)) (not (null? tal))) (loop (car tal)(cdr tal) reg reruns)) (runs:testdat-prereqs-not-met-set! testdat (rmt:get-prereqs-not-met run-id waitons hed item-path mode: testmode itemmaps: itemmaps)) (runs:dat-can-run-more-tests-set! runsdat (runs:can-run-more-tests runsdat run-id jobgroup max-concurrent-jobs)) ;; look at the test jobgroup and tot jobs running (let ((loop-list (runs:process-expanded-tests runsdat testdat))) ~~(if loop-list (apply loop loop-list))))~~ ;; items processed into a list but not came in as a list been processed ;; ((and (list? items) ;; thus we know our items are already calculated (not itemdat)) ;; and not yet expanded into the list of things to be done (debug:print-info 4 default-log-port "cond branch - " "rtq-3") (debug:print-info 4 default-log-port "OUTER COND: (and (list? items)(not itemdat))") ;; Must determine if the items list is valid. Discard the test if it is not. (if (and (list? items) (> (length items) 0) (and (list? (car items)) (> (length (car items)) 0)) (debug:debug-mode 1)) (debug:print 2 default-log-port (map (lambda (row) ~~(conc (string-intersperse (map (lambda (varval) (string-intersperse varval "=")) row) " ") "\n")) items)))~~ (let* ((items-in-testpatt (filter (lambda (my-itemdat) (tests:match test-patts hed (item-list->path my-itemdat) )) ;; was: (tests:match test-patts hed (item-list->path my-itemdat) required: required-tests)) items) ))	\| \| \| \| \| \| \| \|	1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495	(debug:print-info 4 default-log-port "OUTER COND: (not items)") (if (and (not (tests:match test-patts (tests:testqueue-get-testname test-record) item-path required: required-tests)) (not (null? tal))) (loop (car tal)(cdr tal) reg reruns)) (runs:testdat-prereqs-not-met-set! testdat (rmt:get-prereqs-not-met run-id waitons hed item-path mode: testmode itemmaps: itemmaps)) (runs:dat-can-run-more-tests-set! runsdat (runs:can-run-more-tests runsdat run-id jobgroup max-concurrent-jobs)) ;; look at the test jobgroup and tot jobs running (let ((loop-list (runs:process-expanded-tests runsdat testdat))) (if loop-list (apply loop loop-list)))) ;; items processed into a list but not came in as a list been processed ;; ((and (list? items) ;; thus we know our items are already calculated (not itemdat)) ;; and not yet expanded into the list of things to be done (debug:print-info 4 default-log-port "cond branch - " "rtq-3") (debug:print-info 4 default-log-port "OUTER COND: (and (list? items)(not itemdat))") ;; Must determine if the items list is valid. Discard the test if it is not. (if (and (list? items) (> (length items) 0) (and (list? (car items)) (> (length (car items)) 0)) (debug:debug-mode 1)) (debug:print 2 default-log-port (map (lambda (row) (conc (string-intersperse (map (lambda (varval) (string-intersperse varval "=")) row) " ") "\n")) items))) (let* ((items-in-testpatt (filter (lambda (my-itemdat) (tests:match test-patts hed (item-list->path my-itemdat) )) ;; was: (tests:match test-patts hed (item-list->path my-itemdat) required: required-tests)) items) ))
︙			︙
1503 1504 1505 1506 1507 1508 1509 ~~1510 1511 1512 1513~~ 1514 ~~1515 1516~~ 1517 1518 1519 1520 1521 1522 1523 1524 1525 ~~1526~~ 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 ~~1543~~ 1544 1545 1546 1547 1548 1549 1550	(let* ((new-test-record (let ((newrec (make-tests:testqueue))) (vector-copy! test-record newrec) newrec)) (my-item-path (item-list->path my-itemdat)) (newtestname (db:test-make-full-name hed my-item-path))) ;; test names are unique on testname/item-path (tests:testqueue-set-items! new-test-record #f) (tests:testqueue-set-itemdat! new-test-record my-itemdat) (tests:testqueue-set-item_path! new-test-record my-item-path) (hash-table-set! test-records newtestname new-test-record) (set! tal (append tal (list newtestname))))) ;; since these are itemized create new test names testname/itempath items-in-testpatt))) ;; At this point we have possibly added items to tal but all must be handed off to ;; INNER COND logic. I think loop without rotating the queue ;; (loop hed tal reg reruns)) ;; (let ((newtal (append tal (list hed)))) ;; We should discard hed as it has been expanded into it's items? Yes, but only if this is an itemized test ;; (loop (car newtal)(cdr newtal) reg reruns) (if (null? tal) #f (loop (car tal)(cdr tal) reg reruns))) ;; if items is a proc then need to run items:get-items-from-config, get the list and loop ;; - but only do that if resources exist to kick off the job ;; EXPAND ITEMS ((or (procedure? items)(eq? items 'have-procedure)) (debug:print-info 4 default-log-port "cond branch - " "rtq-4") (let ((can-run-more (runs:can-run-more-tests runsdat run-id jobgroup max-concurrent-jobs))) (if (and (list? can-run-more) (car can-run-more)) (let ((loop-list (runs:expand-items hed tal reg reruns regfull newtal jobgroup max-concurrent-jobs run-id waitons item-path testmode test-record can-run-more items runname tconfig reglen test-registry test-records itemmaps))) ;; itemized test expanded here (if loop-list (apply loop loop-list) (debug:print-info 4 default-log-port " -- Can't expand hed="hed) ) ) ;; if can't run more just loop with next possible test (loop (car newtal)(cdr newtal) reg reruns)))) ;; this case should not happen, added to help catch any bugs ((and (list? items) itemdat) (debug:print-info 4 default-log-port "cond branch - " "rtq-5") (debug:print-error 0 default-log-port "Should not have a list of items in a test and the itemspath set - please report this") (exit 1)) ((not (null? reruns)) (debug:print-info 4 default-log-port "cond branch - " "rtq-6")	\| \| \| \| \| \| \| \|	1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550	(let* ((new-test-record (let ((newrec (make-tests:testqueue))) (vector-copy! test-record newrec) newrec)) (my-item-path (item-list->path my-itemdat)) (newtestname (db:test-make-full-name hed my-item-path))) ;; test names are unique on testname/item-path (tests:testqueue-set-items! new-test-record #f) (tests:testqueue-set-itemdat! new-test-record my-itemdat) (tests:testqueue-set-item_path! new-test-record my-item-path) (hash-table-set! test-records newtestname new-test-record) (set! tal (append tal (list newtestname))))) ;; since these are itemized create new test names testname/itempath items-in-testpatt))) ;; At this point we have possibly added items to tal but all must be handed off to ;; INNER COND logic. I think loop without rotating the queue ;; (loop hed tal reg reruns)) ;; (let ((newtal (append tal (list hed)))) ;; We should discard hed as it has been expanded into it's items? Yes, but only if this is an itemized test ;; (loop (car newtal)(cdr newtal) reg reruns) (if (null? tal) #f (loop (car tal)(cdr tal) reg reruns))) ;; if items is a proc then need to run items:get-items-from-config, get the list and loop ;; - but only do that if resources exist to kick off the job ;; EXPAND ITEMS ((or (procedure? items)(eq? items 'have-procedure)) (debug:print-info 4 default-log-port "cond branch - " "rtq-4") (let ((can-run-more (runs:can-run-more-tests runsdat run-id jobgroup max-concurrent-jobs))) (if (and (list? can-run-more) (car can-run-more)) (let ((loop-list (runs:expand-items hed tal reg reruns regfull newtal jobgroup max-concurrent-jobs run-id waitons item-path testmode test-record can-run-more items runname tconfig reglen test-registry test-records itemmaps))) ;; itemized test expanded here (if loop-list (apply loop loop-list) (debug:print-info 4 default-log-port " -- Can't expand hed="hed) ) ) ;; if can't run more just loop with next possible test (loop (car newtal)(cdr newtal) reg reruns)))) ;; this case should not happen, added to help catch any bugs ((and (list? items) itemdat) (debug:print-info 4 default-log-port "cond branch - " "rtq-5") (debug:print-error 0 default-log-port "Should not have a list of items in a test and the itemspath set - please report this") (exit 1)) ((not (null? reruns)) (debug:print-info 4 default-log-port "cond branch - " "rtq-6")
︙			︙
1682 1683 1684 1685 1686 1687 1688 ~~1689 1690~~ 1691 1692 1693 1694 1695 1696 1697	;; ;; There is now a single call to runs:update-all-test_meta and this ;; per-test call is not needed. Given the delicacy of the move to ;; v1.55 this code is being left in place for the time being. ;; (if (not (hash-table-ref/default test-meta-updated test-name #f)) (begin ~~(hash-table-set! test-meta-updated test-name #t) (runs:update-test_meta test-name test-conf)))~~ ;; itemdat => ((ripeness "overripe") (temperature "cool") (season "summer")) (let* ((new-test-path (string-intersperse (cons test-path (map cadr itemdat)) "/")) (test-id (rmt:get-test-id run-id test-name item-path)) (testdat (if test-id (rmt:get-test-info-by-id run-id test-id) #f))) (if (not testdat) (let loop ()	\| \|	1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697	;; ;; There is now a single call to runs:update-all-test_meta and this ;; per-test call is not needed. Given the delicacy of the move to ;; v1.55 this code is being left in place for the time being. ;; (if (not (hash-table-ref/default test-meta-updated test-name #f)) (begin (hash-table-set! test-meta-updated test-name #t) (runs:update-test_meta test-name test-conf))) ;; itemdat => ((ripeness "overripe") (temperature "cool") (season "summer")) (let* ((new-test-path (string-intersperse (cons test-path (map cadr itemdat)) "/")) (test-id (rmt:get-test-id run-id test-name item-path)) (testdat (if test-id (rmt:get-test-info-by-id run-id test-id) #f))) (if (not testdat) (let loop ()
︙			︙
1927 1928 1929 1930 1931 1932 1933 ~~1934~~ 1935 1936 1937 1938 1939 1940 1941	" " (time->string (seconds->local-time (simple-run-event_time run)) "WW%V.%u %H:%M:%S") " " (if remove "REMOVE" ""))) ((remove-runs) (if remove (system (conc precmd " megatest -remove-runs -target " target " -runname " (simple-run-runname run) " -testpatt %")))) ((archive) (if remove (system (conc precmd " megatest -archive save-remove -target " target " -runname " (simple-run-runname run) " -testpatt %")))))) actions)))) ~~sorted)))~~ ;; (print "Sorted: " (map simple-run-event_time sorted)) ;; (print "Remove: " (map simple-run-event_time to-remove)))) (hash-table-keys runs-ht)) runs-ht)) ;; (define (runs:remove-all-but-last-n-runs-per-target target-patts runpatt num-to-keep) ;; (let ((data (runs:get-all-but-most-recent-n-per-target target-patts runpatt num-to-keep)))	\|	1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941	" " (time->string (seconds->local-time (simple-run-event_time run)) "WW%V.%u %H:%M:%S") " " (if remove "REMOVE" ""))) ((remove-runs) (if remove (system (conc precmd " megatest -remove-runs -target " target " -runname " (simple-run-runname run) " -testpatt %")))) ((archive) (if remove (system (conc precmd " megatest -archive save-remove -target " target " -runname " (simple-run-runname run) " -testpatt %")))))) actions)))) sorted))) ;; (print "Sorted: " (map simple-run-event_time sorted)) ;; (print "Remove: " (map simple-run-event_time to-remove)))) (hash-table-keys runs-ht)) runs-ht)) ;; (define (runs:remove-all-but-last-n-runs-per-target target-patts runpatt num-to-keep) ;; (let ((data (runs:get-all-but-most-recent-n-per-target target-patts runpatt num-to-keep)))
︙			︙
1986 1987 1988 1989 1990 1991 1992 ~~1993 1994 1995 1996 1997 1998~~ 1999 2000 2001 2002 2003 2004 2005	(exit))) (for-each (lambda (run) (let ((runkey (string-intersperse (map (lambda (k) (db:get-value-by-header run header k)) keys) "/")) (dirs-to-remove (make-hash-table)) (proc-get-tests (lambda (run-id) (mt:get-tests-for-run run-id testpatt states statuses not-in: #f sort-by: (case action ((remove-runs) 'rundir) (else 'event_time)))))) (let* ((run-id (db:get-value-by-header run header "id")) (run-state (db:get-value-by-header run header "state")) (run-name (db:get-value-by-header run header "runname")) (tests (if (not (equal? run-state "locked")) (proc-get-tests run-id) '())) (lasttpath "/does/not/exist/I/hope")	\| \| \| \| \| \|	1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005	(exit))) (for-each (lambda (run) (let ((runkey (string-intersperse (map (lambda (k) (db:get-value-by-header run header k)) keys) "/")) (dirs-to-remove (make-hash-table)) (proc-get-tests (lambda (run-id) (mt:get-tests-for-run run-id testpatt states statuses not-in: #f sort-by: (case action ((remove-runs) 'rundir) (else 'event_time)))))) (let* ((run-id (db:get-value-by-header run header "id")) (run-state (db:get-value-by-header run header "state")) (run-name (db:get-value-by-header run header "runname")) (tests (if (not (equal? run-state "locked")) (proc-get-tests run-id) '())) (lasttpath "/does/not/exist/I/hope")
︙			︙
2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 ~~2077 2078 2079 2080 2081 2082 2083 2084 2085~~ ~~2086~~ ~~2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112~~ 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133	(dirb ;; (rmt:sdb-qry 'getstr (db:test-get-rundir b))) ;; ) ;; ((filedb:get-path fdb (db:test-get-rundir b)))) (if (and (string? dira)(string? dirb)) (> (string-length dira)(string-length dirb)) #f)))))) (toplevel-retries (make-hash-table)) ;; try three times to loop through and remove top level tests (test-retry-time (make-hash-table)) (allow-run-time 10)) ;; seconds to allow for killing tests before just brutally killing 'em (let loop ((test (car sorted-tests)) (tal (cdr sorted-tests))) (let* ((test-id (db:test-get-id test)) (new-test-dat (rmt:get-test-info-by-id run-id test-id))) (if (not new-test-dat) (begin (debug:print-error 0 default-log-port "We have a test-id of " test-id " but no record was found. NOTE: No locking of records is done between processes, do not simultaneously remove the same run from two processes!") (if (not (null? tal)) (loop (car tal)(cdr tal)))) (let* ((item-path (db:test-get-item-path new-test-dat)) (test-name (db:test-get-testname new-test-dat)) (run-dir ;;(filedb:get-path fdb ;; (rmt:sdb-qry 'getid (db:test-get-rundir new-test-dat)) ;; ) ;; run dir is from the link tree (test-state (db:test-get-state new-test-dat)) (test-fulln (db:test-get-fullname new-test-dat)) (uname (db:test-get-uname new-test-dat)) (toplevel-with-children (and (db:test-get-is-toplevel test) (> (rmt:test-toplevel-num-items run-id test-name) 0)))) (case action ((remove-runs) ;; if the test is a toplevel-with-children issue an error and do not remove (if toplevel-with-children ~~(begin~~ (debug:print 0 default-log-port "WARNING: skipping removal of " test-fulln " with run-id " run-id " as it has sub tests") (hash-table-set! toplevel-retries test-fulln (+ (hash-table-ref/default toplevel-retries test-fulln 0) 1)) (if (> (hash-table-ref toplevel-retries test-fulln) 3) (if (not (null? tal)) (loop (car tal)(cdr tal))) ;; no else clause - drop it if no more in queue and > 3 tries (let ((newtal (append tal (list test)))) (loop (car newtal)(cdr newtal))))) ;; loop with test still in queue ~~(begin~~ (debug:print-info 0 default-log-port "test: " test-name " itest-state: " test-state) (if (member test-state (list "RUNNING" "LAUNCHED" "REMOTEHOSTSTART" "KILLREQ")) (begin (if (not (hash-table-ref/default test-retry-time test-fulln #f)) (begin ;; want to set to REMOVING BUT CANNOT do it here? (hash-table-set! test-retry-time test-fulln (current-seconds)))) (if (> (- (current-seconds)(hash-table-ref test-retry-time test-fulln)) allow-run-time) ;; This test is not in a correct state for cleaning up. Let's try some graceful shutdown steps first ;; Set the test to "KILLREQ" and wait five seconds then try again. Repeat up to five times then give ;; up and blow it away. (begin (debug:print 0 default-log-port "WARNING: could not gracefully remove test " test-fulln ", tried to kill it to no avail. Forcing state to FAILEDKILL and continuing") (mt:test-set-state-status-by-id run-id (db:test-get-id test) "FAILEDKILL" "n/a" #f) (thread-sleep! 1)) (begin (mt:test-set-state-status-by-id run-id (db:test-get-id test) "KILLREQ" "n/a" #f) (thread-sleep! 1))) ;; NOTE: This is suboptimal as the testdata will be used later and the state/status may have changed ... (if (null? tal) (loop new-test-dat tal) (loop (car tal)(append tal (list new-test-dat))))) (begin (runs:remove-test-directory new-test-dat mode) ;; 'remove-all) (if (not (null? tal)) (loop (car tal)(cdr tal))))))) (rmt:update-run-stats run-id (rmt:get-raw-run-stats run-id))) ((set-state-status) (debug:print-info 2 default-log-port "new state " (car state-status) ", new status " (cadr state-status)) (mt:test-set-state-status-by-id run-id (db:test-get-id test) (car state-status)(cadr state-status) #f) (if (not (null? tal)) (loop (car tal)(cdr tal)))) ((run-wait) (debug:print-info 2 default-log-port "still waiting, " (length tests) " tests still running") (thread-sleep! 10) (let ((new-tests (proc-get-tests run-id))) (if (null? new-tests) (debug:print-info 1 default-log-port "Run completed according to zero tests matching provided criteria.") (loop (car new-tests)(cdr new-tests))))) ((archive) (if (and run-dir (not toplevel-with-children)) (let ((ddir (conc run-dir "/"))) (case (string->symbol (args:get-arg "-archive")) ((save save-remove keep-html) (if (common:file-exists? ddir) (debug:print-info 0 default-log-port "Estimating disk space usage for " test-fulln ": " (common:get-disk-space-used ddir))))))) (if (not (null? tal))	> > > > > > \| < \| \| \| \| \| \| \| > > > > > > > > > > > > > > > > > \| > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| > > >	2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192	(dirb ;; (rmt:sdb-qry 'getstr (db:test-get-rundir b))) ;; ) ;; ((filedb:get-path fdb (db:test-get-rundir b)))) (if (and (string? dira)(string? dirb)) (> (string-length dira)(string-length dirb)) #f)))))) (toplevel-retries (make-hash-table)) ;; try three times to loop through and remove top level tests (test-retry-time (make-hash-table)) (backgrounded-remove-status (make-hash-table)) (backgrounded-remove-last-visit (make-hash-table)) (backgrounded-remove-result (make-hash-table)) (allow-run-time 10)) ;; seconds to allow for killing tests before just brutally killing 'em (let loop ((test (car sorted-tests)) (tal (cdr sorted-tests))) (let* ((test-id (db:test-get-id test)) (new-test-dat (rmt:get-test-info-by-id run-id test-id))) (if (not new-test-dat) (begin (debug:print-error 0 default-log-port "We have a test-id of " test-id " but no record was found. NOTE: No locking of records is done between processes, do not simultaneously remove the same run from two processes!") (if (not (null? tal)) (loop (car tal)(cdr tal)))) (let* ((item-path (db:test-get-item-path new-test-dat)) (test-name (db:test-get-testname new-test-dat)) (run-dir ;;(filedb:get-path fdb ;; (rmt:sdb-qry 'getid (db:test-get-rundir new-test-dat)) ;; ) ;; run dir is from the link tree (has-subrun (and (subrun:subrun-test-initialized? run-dir) (not (subrun:subrun-removed? run-dir)))) (test-state (db:test-get-state new-test-dat)) (test-fulln (db:test-get-fullname new-test-dat)) (uname (db:test-get-uname new-test-dat)) (toplevel-with-children (and (db:test-get-is-toplevel test) (> (rmt:test-toplevel-num-items run-id test-name) 0)))) (case action ((remove-runs) ;; if the test is a toplevel-with-children issue an error and do not remove (cond (toplevel-with-children (debug:print 0 default-log-port "WARNING: skipping removal of " test-fulln " with run-id " run-id " as it has sub tests") (hash-table-set! toplevel-retries test-fulln (+ (hash-table-ref/default toplevel-retries test-fulln 0) 1)) (if (> (hash-table-ref toplevel-retries test-fulln) 3) (if (not (null? tal)) (loop (car tal)(cdr tal))) ;; no else clause - drop it if no more in queue and > 3 tries (let ((newtal (append tal (list test)))) (loop (car newtal)(cdr newtal))))) ;; loop with test still in queue (has-subrun ;; (let ((last-visit (hash-table-ref/default backgrounded-remove-last-visit test-fulln 0)) (now (current-seconds)) (rem-status (hash-table-ref/default backgrounded-remove-status test-fulln 'not-started))) (case rem-status ((not-started) (debug:print 0 default-log-port "WARNING: postponing removal of " test-fulln " with run-id " run-id " as it has a subrun") (hash-table-set! backgrounded-remove-status test-fulln 'started) (hash-table-set! backgrounded-remove-last-visit test-fulln (current-seconds)) (common:send-thunk-to-background-thread (lambda () (let* ((subrun-remove-succeeded (subrun:remove-subrun run-dir new-test-dat test-name item-path test-state test-fulln toplevel-with-children test))) (hash-table-set! backgrounded-remove-result test-fulln subrun-remove-succeeded) (hash-table-set! backgrounded-remove-status test-fulln 'done))) name: (conc "remove-subrun:"test-fulln)) ;; send to back of line, loop (let ((newtal (append tal (list test)))) (loop (car newtal)(cdr newtal))) ) ((started) ;; if last visit was within last second, sleep 1 second (if (< (- now last-visit) 1.0) (thread-sleep! 1.0)) (hash-table-set! backgrounded-remove-last-visit test-fulln (current-seconds)) ;; send to back of line, loop (let ((newtal (append tal (list test)))) (loop (car newtal)(cdr newtal))) ) ((done) ;; drop this one; if remaining, loop, else finish (hash-table-set! backgrounded-remove-last-visit test-fulln (current-seconds)) (let ((subrun-remove-succeeded (hash-table-ref/default backgrounded-remove-result test-fulln 'exception))) (cond ((eq? subrun-remove-succeeded 'exception) (let* ((logfile (subrun:get-log-path run-dir "remove"))) (debug:print 0 default-log-port "ERROR: removing subrun of of " test-fulln " with run-id " run-id " ; see logfile @ "logfile))) (subrun-remove-succeeded (debug:print 0 default-log-port "Now removing of " test-fulln " with run-id " run-id " since subrun was removed.") (runs:remove-test-directory new-test-dat mode)) (else (let* ((logfile (subrun:get-log-path run-dir "remove"))) (debug:print 0 default-log-port "WARNING: removal of subrun failed. Please check "logfile" for details.")))) (if (not (null? tal)) (loop (car tal)(cdr tal))))) ) ; end case rem-status ) ; end let ); end cond has-subrun (else (debug:print-info 0 default-log-port "test: " test-name " itest-state: " test-state) (if (member test-state (list "RUNNING" "LAUNCHED" "REMOTEHOSTSTART" "KILLREQ")) (begin (if (not (hash-table-ref/default test-retry-time test-fulln #f)) (begin ;; want to set to REMOVING BUT CANNOT do it here? (hash-table-set! test-retry-time test-fulln (current-seconds)))) (if (> (- (current-seconds)(hash-table-ref test-retry-time test-fulln)) allow-run-time) ;; This test is not in a correct state for cleaning up. Let's try some graceful shutdown steps first ;; Set the test to "KILLREQ" and wait five seconds then try again. Repeat up to five times then give ;; up and blow it away. (begin (debug:print 0 default-log-port "WARNING: could not gracefully remove test " test-fulln ", tried to kill it to no avail. Forcing state to FAILEDKILL and continuing") (mt:test-set-state-status-by-id run-id (db:test-get-id test) "FAILEDKILL" "n/a" #f) (thread-sleep! 1)) (begin (mt:test-set-state-status-by-id run-id (db:test-get-id test) "KILLREQ" "n/a" #f) (thread-sleep! 1))) ;; NOTE: This is suboptimal as the testdata will be used later and the state/status may have changed ... (if (null? tal) (loop new-test-dat tal) (loop (car tal)(append tal (list new-test-dat))))) (begin (runs:remove-test-directory new-test-dat mode) ;; 'remove-all) (if (not (null? tal)) (loop (car tal)(cdr tal))))))) (rmt:update-run-stats run-id (rmt:get-raw-run-stats run-id))) ((set-state-status) ;; BB TODO - manage has-subrun case (debug:print-info 2 default-log-port "new state " (car state-status) ", new status " (cadr state-status)) (mt:test-set-state-status-by-id run-id (db:test-get-id test) (car state-status)(cadr state-status) #f) (if (not (null? tal)) (loop (car tal)(cdr tal)))) ((run-wait) ;; BB TODO - manage has-subrun case (debug:print-info 2 default-log-port "still waiting, " (length tests) " tests still running") (thread-sleep! 10) (let ((new-tests (proc-get-tests run-id))) (if (null? new-tests) (debug:print-info 1 default-log-port "Run completed according to zero tests matching provided criteria.") (loop (car new-tests)(cdr new-tests))))) ((archive) ;; BB TODO - manage has-subrun case (if (and run-dir (not toplevel-with-children)) (let ((ddir (conc run-dir "/"))) (case (string->symbol (args:get-arg "-archive")) ((save save-remove keep-html) (if (common:file-exists? ddir) (debug:print-info 0 default-log-port "Estimating disk space usage for " test-fulln ": " (common:get-disk-space-used ddir))))))) (if (not (null? tal))
︙			︙