Megatest: Diff

Differences From Artifact [b272222fa5]:

File runs.scm — part of check-in [a91345f8e2] at 2017-12-20 17:15:36 on branch v1.65 — Changes to fix remove-keep (user: ritikaag, size: 126975) [annotate] [blame] [check-ins using]

To Artifact [4e15a438a3]:

File runs.scm — part of check-in [2e121e3655] at 2017-12-28 11:20:23 on branch v1.65 — enabled remove-run to propagate to subruns (user: bjbarcla, size: 134085) [annotate] [blame] [check-ins using] [more...]

︙			︙
1148 1149 1150 1151 1152 1153 1154 ~~1155~~ 1156 1157 1158 1159 1160 1161 1162 ~~1163~~ 1164 1165 1166 1167 1168 1169 1170	(mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "TEN_STRIKES" #f) ;; I'm unclear on if this roll up is needed - it may be the root cause of the "all set to FAIL" bug. (rmt:set-state-status-and-roll-up-items run-id test-name item-path #f "FAIL" #f) ;; treat as FAIL (list (if (null? tal)(car newtal)(car tal)) tal reg reruns))))) ~~;; ELSE: can't drop this - maybe running? Just keep trying~~ ;;(if (not (or (not (null? reg))(not (null? tal)))) ;; old experiment (let ((runable-tests (runs:runable-tests prereqs-not-met))) ;; SUSPICIOUS: Should look at more than just prereqs-not-met? (if (null? runable-tests) #f ;; I think we are truly done here (runs:loop-values newtal reg reglen regfull reruns))) ;;) ;;from old experiment ~~) ;; end if (or (not (null? reg))(not (null? tal)))~~ )))))) ;; scan a list of tests looking to see if any are potentially runnable ;; (define (runs:runable-tests tests) (filter (lambda (t) (if (not (vector? t))	\| \|	1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170	(mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "TEN_STRIKES" #f) ;; I'm unclear on if this roll up is needed - it may be the root cause of the "all set to FAIL" bug. (rmt:set-state-status-and-roll-up-items run-id test-name item-path #f "FAIL" #f) ;; treat as FAIL (list (if (null? tal)(car newtal)(car tal)) tal reg reruns))))) ;; ELSE: can't drop this - maybe running? Just keep trying ;;(if (not (or (not (null? reg))(not (null? tal)))) ;; old experiment (let ((runable-tests (runs:runable-tests prereqs-not-met))) ;; SUSPICIOUS: Should look at more than just prereqs-not-met? (if (null? runable-tests) #f ;; I think we are truly done here (runs:loop-values newtal reg reglen regfull reruns))) ;;) ;;from old experiment ) ;; end if (or (not (null? reg))(not (null? tal))) )))))) ;; scan a list of tests looking to see if any are potentially runnable ;; (define (runs:runable-tests tests) (filter (lambda (t) (if (not (vector? t))
︙			︙
1190 1191 1192 1193 1194 1195 1196 ~~1197~~ 1198 1199 1200 1201 1202 1203 1204	inc-results: (make-hash-table) inc-results-last-update: 0 inc-results-fmt: "~12a~12a~20a~12a~40a\n" ;; state status time duration test-name item-path run-info: #f runname: #f target: #f ) ) (define (runs:incremental-print-results run-id) (let ((curr-sec (current-seconds))) (if (> (- curr-sec (runs:gendat-inc-results-last-update runs:general-data)) 5) ;; at least five seconds since last update (let* ((run-dat (or (runs:gendat-run-info runs:general-data)(rmt:get-run-info run-id))) (runname (or (runs:gendat-runname runs:general-data) (db:get-value-by-header (db:get-rows run-dat)	\|	1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204	inc-results: (make-hash-table) inc-results-last-update: 0 inc-results-fmt: "~12a~12a~20a~12a~40a\n" ;; state status time duration test-name item-path run-info: #f runname: #f target: #f ) ) (define (runs:incremental-print-results run-id) (let ((curr-sec (current-seconds))) (if (> (- curr-sec (runs:gendat-inc-results-last-update runs:general-data)) 5) ;; at least five seconds since last update (let* ((run-dat (or (runs:gendat-run-info runs:general-data)(rmt:get-run-info run-id))) (runname (or (runs:gendat-runname runs:general-data) (db:get-value-by-header (db:get-rows run-dat)
︙			︙
1378 1379 1380 1381 1382 1383 1384 ~~1385~~ 1386 1387 1388 1389 1390 1391 1392	;; (not (or (and runremote ;; (remote-server-url runremote) ;; (server:ping (remote-server-url runremote))) ;; (server:check-if-running toppath)))) ;; (server:kind-run toppath)) (if (> num-running 0) ~~(set! last-time-some-running (current-seconds)))~~ (if (> (current-seconds)(+ last-time-some-running (or (configf:lookup configdat "setup" "give-up-waiting") 36000))) (hash-table-set! max-tries-hash tfullname (+ (hash-table-ref/default max-tries-hash tfullname 0) 1))) ;; (debug:print 0 default-log-port "max-tries-hash: " (hash-table->alist max-tries-hash)) ;; Ensure all top level tests get registered. This way they show up as "NOT_STARTED" on the dashboard ;; and it is clear they should have run but did not.	\|	1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392	;; (not (or (and runremote ;; (remote-server-url runremote) ;; (server:ping (remote-server-url runremote))) ;; (server:check-if-running toppath)))) ;; (server:kind-run toppath)) (if (> num-running 0) (set! last-time-some-running (current-seconds))) (if (> (current-seconds)(+ last-time-some-running (or (configf:lookup configdat "setup" "give-up-waiting") 36000))) (hash-table-set! max-tries-hash tfullname (+ (hash-table-ref/default max-tries-hash tfullname 0) 1))) ;; (debug:print 0 default-log-port "max-tries-hash: " (hash-table->alist max-tries-hash)) ;; Ensure all top level tests get registered. This way they show up as "NOT_STARTED" on the dashboard ;; and it is clear they should have run but did not.
︙			︙
1403 1404 1405 1406 1407 1408 1409 ~~1410~~ 1411 1412 1413 1414 1415 1416 1417	(if (runs:lownoise (conc "been marked do not run " tfullname) 60) (debug:print-info 0 default-log-port "Skipping test " tfullname " as it has been marked do not run due to being completed or not runnable")) (if (or (not (null? tal))(not (null? reg))) (loop (runs:queue-next-hed tal reg reglen regfull) (runs:queue-next-tal tal reg reglen regfull) (runs:queue-next-reg tal reg reglen regfull) reruns)))) ~~;; (loop (car tal)(cdr tal) reg reruns))))~~ (runs:incremental-print-results run-id) (debug:print 4 default-log-port "TOP OF LOOP => " "test-name: " test-name "\n hed: " hed "\n tal: " tal "\n reg: " reg	\|	1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417	(if (runs:lownoise (conc "been marked do not run " tfullname) 60) (debug:print-info 0 default-log-port "Skipping test " tfullname " as it has been marked do not run due to being completed or not runnable")) (if (or (not (null? tal))(not (null? reg))) (loop (runs:queue-next-hed tal reg reglen regfull) (runs:queue-next-tal tal reg reglen regfull) (runs:queue-next-reg tal reg reglen regfull) reruns)))) ;; (loop (car tal)(cdr tal) reg reruns)))) (runs:incremental-print-results run-id) (debug:print 4 default-log-port "TOP OF LOOP => " "test-name: " test-name "\n hed: " hed "\n tal: " tal "\n reg: " reg
︙			︙
1460 1461 1462 1463 1464 1465 1466 ~~1467~~ 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 ~~1482 1483 1484 1485 1486 1487 1488~~ 1489 1490 1491 1492 1493 1494 1495	(debug:print-info 4 default-log-port "OUTER COND: (not items)") (if (and (not (tests:match test-patts (tests:testqueue-get-testname test-record) item-path required: required-tests)) (not (null? tal))) (loop (car tal)(cdr tal) reg reruns)) (runs:testdat-prereqs-not-met-set! testdat (rmt:get-prereqs-not-met run-id waitons hed item-path mode: testmode itemmaps: itemmaps)) (runs:dat-can-run-more-tests-set! runsdat (runs:can-run-more-tests runsdat run-id jobgroup max-concurrent-jobs)) ;; look at the test jobgroup and tot jobs running (let ((loop-list (runs:process-expanded-tests runsdat testdat))) ~~(if loop-list (apply loop loop-list))))~~ ;; items processed into a list but not came in as a list been processed ;; ((and (list? items) ;; thus we know our items are already calculated (not itemdat)) ;; and not yet expanded into the list of things to be done (debug:print-info 4 default-log-port "cond branch - " "rtq-3") (debug:print-info 4 default-log-port "OUTER COND: (and (list? items)(not itemdat))") ;; Must determine if the items list is valid. Discard the test if it is not. (if (and (list? items) (> (length items) 0) (and (list? (car items)) (> (length (car items)) 0)) (debug:debug-mode 1)) (debug:print 2 default-log-port (map (lambda (row) ~~(conc (string-intersperse (map (lambda (varval) (string-intersperse varval "=")) row) " ") "\n")) items)))~~ (let* ((items-in-testpatt (filter (lambda (my-itemdat) (tests:match test-patts hed (item-list->path my-itemdat) )) ;; was: (tests:match test-patts hed (item-list->path my-itemdat) required: required-tests)) items) ))	\| \| \| \| \| \| \| \|	1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495	(debug:print-info 4 default-log-port "OUTER COND: (not items)") (if (and (not (tests:match test-patts (tests:testqueue-get-testname test-record) item-path required: required-tests)) (not (null? tal))) (loop (car tal)(cdr tal) reg reruns)) (runs:testdat-prereqs-not-met-set! testdat (rmt:get-prereqs-not-met run-id waitons hed item-path mode: testmode itemmaps: itemmaps)) (runs:dat-can-run-more-tests-set! runsdat (runs:can-run-more-tests runsdat run-id jobgroup max-concurrent-jobs)) ;; look at the test jobgroup and tot jobs running (let ((loop-list (runs:process-expanded-tests runsdat testdat))) (if loop-list (apply loop loop-list)))) ;; items processed into a list but not came in as a list been processed ;; ((and (list? items) ;; thus we know our items are already calculated (not itemdat)) ;; and not yet expanded into the list of things to be done (debug:print-info 4 default-log-port "cond branch - " "rtq-3") (debug:print-info 4 default-log-port "OUTER COND: (and (list? items)(not itemdat))") ;; Must determine if the items list is valid. Discard the test if it is not. (if (and (list? items) (> (length items) 0) (and (list? (car items)) (> (length (car items)) 0)) (debug:debug-mode 1)) (debug:print 2 default-log-port (map (lambda (row) (conc (string-intersperse (map (lambda (varval) (string-intersperse varval "=")) row) " ") "\n")) items))) (let* ((items-in-testpatt (filter (lambda (my-itemdat) (tests:match test-patts hed (item-list->path my-itemdat) )) ;; was: (tests:match test-patts hed (item-list->path my-itemdat) required: required-tests)) items) ))
︙			︙
1503 1504 1505 1506 1507 1508 1509 ~~1510 1511 1512 1513~~ 1514 ~~1515 1516~~ 1517 1518 1519 1520 1521 1522 1523 1524 1525 ~~1526~~ 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 ~~1543~~ 1544 1545 1546 1547 1548 1549 1550	(let* ((new-test-record (let ((newrec (make-tests:testqueue))) (vector-copy! test-record newrec) newrec)) (my-item-path (item-list->path my-itemdat)) (newtestname (db:test-make-full-name hed my-item-path))) ;; test names are unique on testname/item-path (tests:testqueue-set-items! new-test-record #f) (tests:testqueue-set-itemdat! new-test-record my-itemdat) (tests:testqueue-set-item_path! new-test-record my-item-path) (hash-table-set! test-records newtestname new-test-record) (set! tal (append tal (list newtestname))))) ;; since these are itemized create new test names testname/itempath items-in-testpatt))) ;; At this point we have possibly added items to tal but all must be handed off to ;; INNER COND logic. I think loop without rotating the queue ;; (loop hed tal reg reruns)) ;; (let ((newtal (append tal (list hed)))) ;; We should discard hed as it has been expanded into it's items? Yes, but only if this is an itemized test ;; (loop (car newtal)(cdr newtal) reg reruns) (if (null? tal) #f (loop (car tal)(cdr tal) reg reruns))) ;; if items is a proc then need to run items:get-items-from-config, get the list and loop ;; - but only do that if resources exist to kick off the job ;; EXPAND ITEMS ((or (procedure? items)(eq? items 'have-procedure)) (debug:print-info 4 default-log-port "cond branch - " "rtq-4") (let ((can-run-more (runs:can-run-more-tests runsdat run-id jobgroup max-concurrent-jobs))) (if (and (list? can-run-more) (car can-run-more)) (let ((loop-list (runs:expand-items hed tal reg reruns regfull newtal jobgroup max-concurrent-jobs run-id waitons item-path testmode test-record can-run-more items runname tconfig reglen test-registry test-records itemmaps))) ;; itemized test expanded here (if loop-list (apply loop loop-list) (debug:print-info 4 default-log-port " -- Can't expand hed="hed) ) ) ;; if can't run more just loop with next possible test (loop (car newtal)(cdr newtal) reg reruns)))) ;; this case should not happen, added to help catch any bugs ((and (list? items) itemdat) (debug:print-info 4 default-log-port "cond branch - " "rtq-5") (debug:print-error 0 default-log-port "Should not have a list of items in a test and the itemspath set - please report this") (exit 1)) ((not (null? reruns)) (debug:print-info 4 default-log-port "cond branch - " "rtq-6")	\| \| \| \| \| \| \| \|	1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550	(let* ((new-test-record (let ((newrec (make-tests:testqueue))) (vector-copy! test-record newrec) newrec)) (my-item-path (item-list->path my-itemdat)) (newtestname (db:test-make-full-name hed my-item-path))) ;; test names are unique on testname/item-path (tests:testqueue-set-items! new-test-record #f) (tests:testqueue-set-itemdat! new-test-record my-itemdat) (tests:testqueue-set-item_path! new-test-record my-item-path) (hash-table-set! test-records newtestname new-test-record) (set! tal (append tal (list newtestname))))) ;; since these are itemized create new test names testname/itempath items-in-testpatt))) ;; At this point we have possibly added items to tal but all must be handed off to ;; INNER COND logic. I think loop without rotating the queue ;; (loop hed tal reg reruns)) ;; (let ((newtal (append tal (list hed)))) ;; We should discard hed as it has been expanded into it's items? Yes, but only if this is an itemized test ;; (loop (car newtal)(cdr newtal) reg reruns) (if (null? tal) #f (loop (car tal)(cdr tal) reg reruns))) ;; if items is a proc then need to run items:get-items-from-config, get the list and loop ;; - but only do that if resources exist to kick off the job ;; EXPAND ITEMS ((or (procedure? items)(eq? items 'have-procedure)) (debug:print-info 4 default-log-port "cond branch - " "rtq-4") (let ((can-run-more (runs:can-run-more-tests runsdat run-id jobgroup max-concurrent-jobs))) (if (and (list? can-run-more) (car can-run-more)) (let ((loop-list (runs:expand-items hed tal reg reruns regfull newtal jobgroup max-concurrent-jobs run-id waitons item-path testmode test-record can-run-more items runname tconfig reglen test-registry test-records itemmaps))) ;; itemized test expanded here (if loop-list (apply loop loop-list) (debug:print-info 4 default-log-port " -- Can't expand hed="hed) ) ) ;; if can't run more just loop with next possible test (loop (car newtal)(cdr newtal) reg reruns)))) ;; this case should not happen, added to help catch any bugs ((and (list? items) itemdat) (debug:print-info 4 default-log-port "cond branch - " "rtq-5") (debug:print-error 0 default-log-port "Should not have a list of items in a test and the itemspath set - please report this") (exit 1)) ((not (null? reruns)) (debug:print-info 4 default-log-port "cond branch - " "rtq-6")
︙			︙
1682 1683 1684 1685 1686 1687 1688 ~~1689 1690~~ 1691 1692 1693 1694 1695 1696 1697	;; ;; There is now a single call to runs:update-all-test_meta and this ;; per-test call is not needed. Given the delicacy of the move to ;; v1.55 this code is being left in place for the time being. ;; (if (not (hash-table-ref/default test-meta-updated test-name #f)) (begin ~~(hash-table-set! test-meta-updated test-name #t) (runs:update-test_meta test-name test-conf)))~~ ;; itemdat => ((ripeness "overripe") (temperature "cool") (season "summer")) (let* ((new-test-path (string-intersperse (cons test-path (map cadr itemdat)) "/")) (test-id (rmt:get-test-id run-id test-name item-path)) (testdat (if test-id (rmt:get-test-info-by-id run-id test-id) #f))) (if (not testdat) (let loop ()	\| \|	1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697	;; ;; There is now a single call to runs:update-all-test_meta and this ;; per-test call is not needed. Given the delicacy of the move to ;; v1.55 this code is being left in place for the time being. ;; (if (not (hash-table-ref/default test-meta-updated test-name #f)) (begin (hash-table-set! test-meta-updated test-name #t) (runs:update-test_meta test-name test-conf))) ;; itemdat => ((ripeness "overripe") (temperature "cool") (season "summer")) (let* ((new-test-path (string-intersperse (cons test-path (map cadr itemdat)) "/")) (test-id (rmt:get-test-id run-id test-name item-path)) (testdat (if test-id (rmt:get-test-info-by-id run-id test-id) #f))) (if (not testdat) (let loop ()
︙			︙
1927 1928 1929 1930 1931 1932 1933 ~~1934~~ 1935 1936 1937 1938 1939 1940 1941	" " (time->string (seconds->local-time (simple-run-event_time run)) "WW%V.%u %H:%M:%S") " " (if remove "REMOVE" ""))) ((remove-runs) (if remove (system (conc precmd " megatest -remove-runs -target " target " -runname " (simple-run-runname run) " -testpatt %")))) ((archive) (if remove (system (conc precmd " megatest -archive save-remove -target " target " -runname " (simple-run-runname run) " -testpatt %")))))) actions)))) ~~sorted)))~~ ;; (print "Sorted: " (map simple-run-event_time sorted)) ;; (print "Remove: " (map simple-run-event_time to-remove)))) (hash-table-keys runs-ht)) runs-ht)) ;; (define (runs:remove-all-but-last-n-runs-per-target target-patts runpatt num-to-keep) ;; (let ((data (runs:get-all-but-most-recent-n-per-target target-patts runpatt num-to-keep)))	\|	1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941	" " (time->string (seconds->local-time (simple-run-event_time run)) "WW%V.%u %H:%M:%S") " " (if remove "REMOVE" ""))) ((remove-runs) (if remove (system (conc precmd " megatest -remove-runs -target " target " -runname " (simple-run-runname run) " -testpatt %")))) ((archive) (if remove (system (conc precmd " megatest -archive save-remove -target " target " -runname " (simple-run-runname run) " -testpatt %")))))) actions)))) sorted))) ;; (print "Sorted: " (map simple-run-event_time sorted)) ;; (print "Remove: " (map simple-run-event_time to-remove)))) (hash-table-keys runs-ht)) runs-ht)) ;; (define (runs:remove-all-but-last-n-runs-per-target target-patts runpatt num-to-keep) ;; (let ((data (runs:get-all-but-most-recent-n-per-target target-patts runpatt num-to-keep)))
︙			︙
1986 1987 1988 1989 1990 1991 1992 ~~1993 1994 1995 1996 1997 1998~~ 1999 2000 2001 2002 2003 2004 2005	(exit))) (for-each (lambda (run) (let ((runkey (string-intersperse (map (lambda (k) (db:get-value-by-header run header k)) keys) "/")) (dirs-to-remove (make-hash-table)) (proc-get-tests (lambda (run-id) (mt:get-tests-for-run run-id testpatt states statuses not-in: #f sort-by: (case action ((remove-runs) 'rundir) (else 'event_time)))))) (let* ((run-id (db:get-value-by-header run header "id")) (run-state (db:get-value-by-header run header "state")) (run-name (db:get-value-by-header run header "runname")) (tests (if (not (equal? run-state "locked")) (proc-get-tests run-id) '())) (lasttpath "/does/not/exist/I/hope")	\| \| \| \| \| \|	1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005	(exit))) (for-each (lambda (run) (let ((runkey (string-intersperse (map (lambda (k) (db:get-value-by-header run header k)) keys) "/")) (dirs-to-remove (make-hash-table)) (proc-get-tests (lambda (run-id) (mt:get-tests-for-run run-id testpatt states statuses not-in: #f sort-by: (case action ((remove-runs) 'rundir) (else 'event_time)))))) (let* ((run-id (db:get-value-by-header run header "id")) (run-state (db:get-value-by-header run header "state")) (run-name (db:get-value-by-header run header "runname")) (tests (if (not (equal? run-state "locked")) (proc-get-tests run-id) '())) (lasttpath "/does/not/exist/I/hope")
︙			︙
2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 ~~2077 2078 2079 2080 2081 2082 2083 2084 2085~~ ~~2086~~ ~~2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112~~ 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133	(dirb ;; (rmt:sdb-qry 'getstr (db:test-get-rundir b))) ;; ) ;; ((filedb:get-path fdb (db:test-get-rundir b)))) (if (and (string? dira)(string? dirb)) (> (string-length dira)(string-length dirb)) #f)))))) (toplevel-retries (make-hash-table)) ;; try three times to loop through and remove top level tests (test-retry-time (make-hash-table)) (allow-run-time 10)) ;; seconds to allow for killing tests before just brutally killing 'em (let loop ((test (car sorted-tests)) (tal (cdr sorted-tests))) (let* ((test-id (db:test-get-id test)) (new-test-dat (rmt:get-test-info-by-id run-id test-id))) (if (not new-test-dat) (begin (debug:print-error 0 default-log-port "We have a test-id of " test-id " but no record was found. NOTE: No locking of records is done between processes, do not simultaneously remove the same run from two processes!") (if (not (null? tal)) (loop (car tal)(cdr tal)))) (let* ((item-path (db:test-get-item-path new-test-dat)) (test-name (db:test-get-testname new-test-dat)) (run-dir ;;(filedb:get-path fdb ;; (rmt:sdb-qry 'getid (db:test-get-rundir new-test-dat)) ;; ) ;; run dir is from the link tree (test-state (db:test-get-state new-test-dat)) (test-fulln (db:test-get-fullname new-test-dat)) (uname (db:test-get-uname new-test-dat)) (toplevel-with-children (and (db:test-get-is-toplevel test) (> (rmt:test-toplevel-num-items run-id test-name) 0)))) (case action ((remove-runs) ;; if the test is a toplevel-with-children issue an error and do not remove (if toplevel-with-children ~~(begin~~ (debug:print 0 default-log-port "WARNING: skipping removal of " test-fulln " with run-id " run-id " as it has sub tests") (hash-table-set! toplevel-retries test-fulln (+ (hash-table-ref/default toplevel-retries test-fulln 0) 1)) (if (> (hash-table-ref toplevel-retries test-fulln) 3) (if (not (null? tal)) (loop (car tal)(cdr tal))) ;; no else clause - drop it if no more in queue and > 3 tries (let ((newtal (append tal (list test)))) (loop (car newtal)(cdr newtal))))) ;; loop with test still in queue ~~(begin~~ (debug:print-info 0 default-log-port "test: " test-name " itest-state: " test-state) (if (member test-state (list "RUNNING" "LAUNCHED" "REMOTEHOSTSTART" "KILLREQ")) (begin (if (not (hash-table-ref/default test-retry-time test-fulln #f)) (begin ;; want to set to REMOVING BUT CANNOT do it here? (hash-table-set! test-retry-time test-fulln (current-seconds)))) (if (> (- (current-seconds)(hash-table-ref test-retry-time test-fulln)) allow-run-time) ;; This test is not in a correct state for cleaning up. Let's try some graceful shutdown steps first ;; Set the test to "KILLREQ" and wait five seconds then try again. Repeat up to five times then give ;; up and blow it away. (begin (debug:print 0 default-log-port "WARNING: could not gracefully remove test " test-fulln ", tried to kill it to no avail. Forcing state to FAILEDKILL and continuing") (mt:test-set-state-status-by-id run-id (db:test-get-id test) "FAILEDKILL" "n/a" #f) (thread-sleep! 1)) (begin (mt:test-set-state-status-by-id run-id (db:test-get-id test) "KILLREQ" "n/a" #f) (thread-sleep! 1))) ;; NOTE: This is suboptimal as the testdata will be used later and the state/status may have changed ... (if (null? tal) (loop new-test-dat tal) (loop (car tal)(append tal (list new-test-dat))))) (begin (runs:remove-test-directory new-test-dat mode) ;; 'remove-all) (if (not (null? tal)) (loop (car tal)(cdr tal))))))) (rmt:update-run-stats run-id (rmt:get-raw-run-stats run-id))) ((set-state-status) (debug:print-info 2 default-log-port "new state " (car state-status) ", new status " (cadr state-status)) (mt:test-set-state-status-by-id run-id (db:test-get-id test) (car state-status)(cadr state-status) #f) (if (not (null? tal)) (loop (car tal)(cdr tal)))) ((run-wait) (debug:print-info 2 default-log-port "still waiting, " (length tests) " tests still running") (thread-sleep! 10) (let ((new-tests (proc-get-tests run-id))) (if (null? new-tests) (debug:print-info 1 default-log-port "Run completed according to zero tests matching provided criteria.") (loop (car new-tests)(cdr new-tests))))) ((archive) (if (and run-dir (not toplevel-with-children)) (let ((ddir (conc run-dir "/"))) (case (string->symbol (args:get-arg "-archive")) ((save save-remove keep-html) (if (common:file-exists? ddir) (debug:print-info 0 default-log-port "Estimating disk space usage for " test-fulln ": " (common:get-disk-space-used ddir))))))) (if (not (null? tal))	> > > > > > \| < \| \| \| \| \| \| \| > > > > > > > > > > > > > > > > > \| > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| > > >	2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198	(dirb ;; (rmt:sdb-qry 'getstr (db:test-get-rundir b))) ;; ) ;; ((filedb:get-path fdb (db:test-get-rundir b)))) (if (and (string? dira)(string? dirb)) (> (string-length dira)(string-length dirb)) #f)))))) (toplevel-retries (make-hash-table)) ;; try three times to loop through and remove top level tests (test-retry-time (make-hash-table)) (backgrounded-remove-status (make-hash-table)) (backgrounded-remove-last-visit (make-hash-table)) (backgrounded-remove-result (make-hash-table)) (allow-run-time 10)) ;; seconds to allow for killing tests before just brutally killing 'em (let loop ((test (car sorted-tests)) (tal (cdr sorted-tests))) (let* ((test-id (db:test-get-id test)) (new-test-dat (rmt:get-test-info-by-id run-id test-id))) (if (not new-test-dat) (begin (debug:print-error 0 default-log-port "We have a test-id of " test-id " but no record was found. NOTE: No locking of records is done between processes, do not simultaneously remove the same run from two processes!") (if (not (null? tal)) (loop (car tal)(cdr tal)))) (let* ((item-path (db:test-get-item-path new-test-dat)) (test-name (db:test-get-testname new-test-dat)) (run-dir ;;(filedb:get-path fdb ;; (rmt:sdb-qry 'getid (db:test-get-rundir new-test-dat)) ;; ) ;; run dir is from the link tree (has-subrun (and (subrun:subrun-test-initialized? run-dir) (not (subrun:subrun-removed? run-dir)))) (test-state (db:test-get-state new-test-dat)) (test-fulln (db:test-get-fullname new-test-dat)) (uname (db:test-get-uname new-test-dat)) (toplevel-with-children (and (db:test-get-is-toplevel test) (> (rmt:test-toplevel-num-items run-id test-name) 0)))) (case action ((remove-runs) ;; if the test is a toplevel-with-children issue an error and do not remove (cond (toplevel-with-children (debug:print 0 default-log-port "WARNING: skipping removal of " test-fulln " with run-id " run-id " as it has sub tests") (hash-table-set! toplevel-retries test-fulln (+ (hash-table-ref/default toplevel-retries test-fulln 0) 1)) (if (> (hash-table-ref toplevel-retries test-fulln) 3) (if (not (null? tal)) (loop (car tal)(cdr tal))) ;; no else clause - drop it if no more in queue and > 3 tries (let ((newtal (append tal (list test)))) (loop (car newtal)(cdr newtal))))) ;; loop with test still in queue (has-subrun ;; (let ((last-visit (hash-table-ref/default backgrounded-remove-last-visit test-fulln 0)) (now (current-seconds)) (rem-status (hash-table-ref/default backgrounded-remove-status test-fulln 'not-started))) (case rem-status ((not-started) (debug:print 0 default-log-port "WARNING: postponing removal of " test-fulln " with run-id " run-id " as it has a subrun") (hash-table-set! backgrounded-remove-status test-fulln 'started) (hash-table-set! backgrounded-remove-last-visit test-fulln (current-seconds)) (common:send-thunk-to-background-thread (lambda () (let* ((subrun-remove-succeeded (subrun:remove-subrun run-dir keep-records))) (hash-table-set! backgrounded-remove-result test-fulln subrun-remove-succeeded) (hash-table-set! backgrounded-remove-status test-fulln 'done))) name: (conc "remove-subrun:"test-fulln)) ;; send to back of line, loop (let ((newtal (append tal (list test)))) (loop (car newtal)(cdr newtal))) ) ((started) ;; if last visit was within last second, sleep 1 second (if (< (- now last-visit) 1.0) (thread-sleep! 1.0)) (hash-table-set! backgrounded-remove-last-visit test-fulln (current-seconds)) ;; send to back of line, loop (let ((newtal (append tal (list test)))) (loop (car newtal)(cdr newtal))) ) ((done) ;; drop this one; if remaining, loop, else finish (hash-table-set! backgrounded-remove-last-visit test-fulln (current-seconds)) (let ((subrun-remove-succeeded (hash-table-ref/default backgrounded-remove-result test-fulln 'exception))) (cond ((eq? subrun-remove-succeeded 'exception) (let* ((logfile (subrun:get-log-path run-dir "remove"))) (debug:print 0 default-log-port "ERROR: removing subrun of of " test-fulln " with run-id " run-id " ; see logfile @ "logfile))) (subrun-remove-succeeded (debug:print 0 default-log-port "Now removing of " test-fulln " with run-id " run-id " since subrun was removed.") ;;(runs:remove-test-directory new-test-dat mode) ;; let normal case handle this. it will go thru loop again as non-subrun ) (else (let* ((logfile (subrun:get-log-path run-dir "remove"))) (debug:print 0 default-log-port "WARNING: removal of subrun failed. Please check "logfile" for details.")))) ;;(if (not (null? tal)) ;; (loop (car tal)(cdr tal))) ;; send to back of line, loop (will not match has-subrun next time through) (let ((newtal (append tal (list test)))) (loop (car newtal)(cdr newtal))) )) ) ; end case rem-status ) ; end let ); end cond has-subrun (else (debug:print-info 0 default-log-port "test: " test-name " itest-state: " test-state) (if (member test-state (list "RUNNING" "LAUNCHED" "REMOTEHOSTSTART" "KILLREQ")) (begin (if (not (hash-table-ref/default test-retry-time test-fulln #f)) (begin ;; want to set to REMOVING BUT CANNOT do it here? (hash-table-set! test-retry-time test-fulln (current-seconds)))) (if (> (- (current-seconds)(hash-table-ref test-retry-time test-fulln)) allow-run-time) ;; This test is not in a correct state for cleaning up. Let's try some graceful shutdown steps first ;; Set the test to "KILLREQ" and wait five seconds then try again. Repeat up to five times then give ;; up and blow it away. (begin (debug:print 0 default-log-port "WARNING: could not gracefully remove test " test-fulln ", tried to kill it to no avail. Forcing state to FAILEDKILL and continuing") (mt:test-set-state-status-by-id run-id (db:test-get-id test) "FAILEDKILL" "n/a" #f) (thread-sleep! 1)) (begin (mt:test-set-state-status-by-id run-id (db:test-get-id test) "KILLREQ" "n/a" #f) (thread-sleep! 1))) ;; NOTE: This is suboptimal as the testdata will be used later and the state/status may have changed ... (if (null? tal) (loop new-test-dat tal) (loop (car tal)(append tal (list new-test-dat))))) (begin (runs:remove-test-directory new-test-dat mode) ;; 'remove-all) (if (not (null? tal)) (loop (car tal)(cdr tal))))))) (rmt:update-run-stats run-id (rmt:get-raw-run-stats run-id))) ((set-state-status) ;; BB TODO - manage has-subrun case (debug:print-info 2 default-log-port "new state " (car state-status) ", new status " (cadr state-status)) (mt:test-set-state-status-by-id run-id (db:test-get-id test) (car state-status)(cadr state-status) #f) (if (not (null? tal)) (loop (car tal)(cdr tal)))) ((run-wait) ;; BB TODO - manage has-subrun case (debug:print-info 2 default-log-port "still waiting, " (length tests) " tests still running") (thread-sleep! 10) (let ((new-tests (proc-get-tests run-id))) (if (null? new-tests) (debug:print-info 1 default-log-port "Run completed according to zero tests matching provided criteria.") (loop (car new-tests)(cdr new-tests))))) ((archive) ;; BB TODO - manage has-subrun case (if (and run-dir (not toplevel-with-children)) (let ((ddir (conc run-dir "/"))) (case (string->symbol (args:get-arg "-archive")) ((save save-remove keep-html) (if (common:file-exists? ddir) (debug:print-info 0 default-log-port "Estimating disk space usage for " test-fulln ": " (common:get-disk-space-used ddir))))))) (if (not (null? tal))
︙			︙