Megatest: Diff

Differences From Artifact [97a8694689]:

File runs.scm — part of check-in [3d5659bc1c] at 2016-01-29 12:39:45 on branch re-refactor-vec2defstruct — Added scripts used for refactor and refactored db:tests from db_records (user: mrwellan, size: 93903) [annotate] [blame] [check-ins using]

To Artifact [c09835a467]:

File runs.scm — part of check-in [ad54a64cae] at 2016-01-29 15:17:59 on branch re-refactor-vec2defstruct — converted db:test -> dbr:test (user: mrwellan, size: 93977) [annotate] [blame] [check-ins using]

︙			︙
27 28 29 30 31 32 33 ~~34 35~~ 36 37 38 39 40 41 42	(include "common_records.scm") (include "key_records.scm") (include "db_records.scm") (include "run_records.scm") (include "test_records.scm") (define (runs:test-get-full-path test) ~~(let* ((testname (db:test-testname test)) (itempath (db:test-item-path test)))~~ (conc testname (if (equal? itempath "") "" (conc "(" itempath ")"))))) ;; This is the new methodology. One record to inform them and in the chaos, organise them. ;; ;; NOT YET UTILIZED ;; (define (runs:create-run-record)	\| \|	27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42	(include "common_records.scm") (include "key_records.scm") (include "db_records.scm") (include "run_records.scm") (include "test_records.scm") (define (runs:test-get-full-path test) (let* ((testname (dbr:test-testname test)) (itempath (dbr:test-item-path test))) (conc testname (if (equal? itempath "") "" (conc "(" itempath ")"))))) ;; This is the new methodology. One record to inform them and in the chaos, organise them. ;; ;; NOT YET UTILIZED ;; (define (runs:create-run-record)
︙			︙
609 610 611 612 613 614 615 ~~616~~ 617 618 ~~619~~ 620 621 622 623 624 625 626	(begin (debug:print 0 "ERROR: The proc from reading the items table did not yield a list - please report this") (exit 1)))))) ((and (null? fails) (null? prereq-fails) (not (null? non-completed))) ~~(let* ((allinqueue (map (lambda (x)(if (string? x) x (db:test-testname x)))~~ (append newtal reruns))) ;; prereqstrs is a list of test names as strings that are prereqs for hed ~~(prereqstrs (delete-duplicates (map (lambda (x)(if (string? x) x (db:test-testname x)))~~ prereqs-not-met))) ;; a prereq that is not found in allinqueue will be put in the notinqueue list ;; ;; (notinqueue (filter (lambda (x) ;; (not (member x allinqueue))) ;; prereqstrs)) (give-up #f))	\| \|	609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626	(begin (debug:print 0 "ERROR: The proc from reading the items table did not yield a list - please report this") (exit 1)))))) ((and (null? fails) (null? prereq-fails) (not (null? non-completed))) (let* ((allinqueue (map (lambda (x)(if (string? x) x (dbr:test-testname x))) (append newtal reruns))) ;; prereqstrs is a list of test names as strings that are prereqs for hed (prereqstrs (delete-duplicates (map (lambda (x)(if (string? x) x (dbr:test-testname x))) prereqs-not-met))) ;; a prereq that is not found in allinqueue will be put in the notinqueue list ;; ;; (notinqueue (filter (lambda (x) ;; (not (member x allinqueue))) ;; prereqstrs)) (give-up #f))
︙			︙
674 675 676 677 678 679 680 ~~681~~ 682 683 684 685 686 687 688	reruns)))) ((and (or (not (null? fails)) (not (null? prereq-fails))) (member 'normal testmode)) (debug:print-info 1 "test " hed " (mode=" testmode ") has failed prerequisite(s); " ~~(string-intersperse (map (lambda (t)(conc (db:test-testname t) ":" (db:test-state t)"/"(db:test-status t))) fails) ", ")~~ ", removing it from to-do list") (let ((test-id (rmt:get-test-id run-id hed ""))) (if test-id (if (not (null? prereq-fails)) (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_DISCARDED" "Failed to run due to prior failed prerequisites") (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_FAIL" "Failed to run due to failed prerequisites")))) (if (or (not (null? reg))(not (null? tal)))	\|	674 675 676 677 678 679 680 681 682 683 684 685 686 687 688	reruns)))) ((and (or (not (null? fails)) (not (null? prereq-fails))) (member 'normal testmode)) (debug:print-info 1 "test " hed " (mode=" testmode ") has failed prerequisite(s); " (string-intersperse (map (lambda (t)(conc (dbr:test-testname t) ":" (dbr:test-state t)"/"(dbr:test-status t))) fails) ", ") ", removing it from to-do list") (let ((test-id (rmt:get-test-id run-id hed ""))) (if test-id (if (not (null? prereq-fails)) (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_DISCARDED" "Failed to run due to prior failed prerequisites") (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_FAIL" "Failed to run due to failed prerequisites")))) (if (or (not (null? reg))(not (null? tal)))
︙			︙
709 710 711 712 713 714 715 ~~716 717 718 719~~ 720 721 722 723 724 725 726	(define (runs:mixed-list-testname-and-testrec->list-of-strings inlst) (if (null? inlst) '() (map (lambda (t) (cond ((vector? t) ~~(let ((test-name (db:test-testname t)) (item-path (db:test-item-path t)) (test-state (db:test-state t)) (test-status (db:test-status t)))~~ (conc test-name (if (equal? item-path "") "" "/") item-path ":" test-state "/" test-status))) ((string? t) t) (else (conc t)))) inlst)))	\| \| \| \|	709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726	(define (runs:mixed-list-testname-and-testrec->list-of-strings inlst) (if (null? inlst) '() (map (lambda (t) (cond ((vector? t) (let ((test-name (dbr:test-testname t)) (item-path (dbr:test-item-path t)) (test-state (dbr:test-state t)) (test-status (dbr:test-status t))) (conc test-name (if (equal? item-path "") "" "/") item-path ":" test-state "/" test-status))) ((string? t) t) (else (conc t)))) inlst)))
︙			︙
742 743 744 745 746 747 748 ~~749~~ 750 751 752 753 754 755 756	(numcpus (common:get-num-cpus)) (maxload (string->number (or (configf:lookup configdat "jobtools" "maxload") "3"))) (waitdelay (string->number (or (configf:lookup configdat "jobtools" "waitdelay") "60")))) (debug:print-info 4 "have-resources: " have-resources " prereqs-not-met: (" (string-intersperse (map (lambda (t) (if (vector? t) ~~(conc (db:test-state t) "/" (db:test-status t))~~ (conc " WARNING: t is not a vector=" t ))) prereqs-not-met) ", ") ") fails: " fails "\nregistered? " (hash-table-ref/default test-registry (db:test-make-full-name test-name item-path) #f))	\|	742 743 744 745 746 747 748 749 750 751 752 753 754 755 756	(numcpus (common:get-num-cpus)) (maxload (string->number (or (configf:lookup configdat "jobtools" "maxload") "3"))) (waitdelay (string->number (or (configf:lookup configdat "jobtools" "waitdelay") "60")))) (debug:print-info 4 "have-resources: " have-resources " prereqs-not-met: (" (string-intersperse (map (lambda (t) (if (vector? t) (conc (dbr:test-state t) "/" (dbr:test-status t)) (conc " WARNING: t is not a vector=" t ))) prereqs-not-met) ", ") ") fails: " fails "\nregistered? " (hash-table-ref/default test-registry (db:test-make-full-name test-name item-path) #f))
︙			︙
893 894 895 896 897 898 899 ~~900~~ 901 902 903 904 905 906 907	(list (runs:queue-next-hed tal reg reglen regfull) (runs:queue-next-tal tal reg reglen regfull) (runs:queue-next-reg tal reg reglen regfull) reruns ;; WAS: (cons hed reruns) ;; but that makes no sense? )) (let ((nth-try (hash-table-ref/default test-registry hed 0))) (cond ~~((member "RUNNING" (map db:test-state prereqs-not-met))~~ (if (runs:lownoise (conc "possible RUNNING prerequistes " hed) 60) (debug:print 0 "WARNING: test " hed " has possible RUNNING prerequisites, don't give up on it yet.")) (thread-sleep! 4) (list (runs:queue-next-hed newtal reg reglen regfull) (runs:queue-next-tal newtal reg reglen regfull) (runs:queue-next-reg newtal reg reglen regfull) reruns))	\|	893 894 895 896 897 898 899 900 901 902 903 904 905 906 907	(list (runs:queue-next-hed tal reg reglen regfull) (runs:queue-next-tal tal reg reglen regfull) (runs:queue-next-reg tal reg reglen regfull) reruns ;; WAS: (cons hed reruns) ;; but that makes no sense? )) (let ((nth-try (hash-table-ref/default test-registry hed 0))) (cond ((member "RUNNING" (map dbr:test-state prereqs-not-met)) (if (runs:lownoise (conc "possible RUNNING prerequistes " hed) 60) (debug:print 0 "WARNING: test " hed " has possible RUNNING prerequisites, don't give up on it yet.")) (thread-sleep! 4) (list (runs:queue-next-hed newtal reg reglen regfull) (runs:queue-next-tal newtal reg reglen regfull) (runs:queue-next-reg newtal reg reglen regfull) reruns))
︙			︙
958 959 960 961 962 963 964 ~~965 966~~ 967 968 969 970 971 972 973	reruns))))))))) ;; scan a list of tests looking to see if any are potentially runnable (define (runs:runable-tests tests) (filter (lambda (t) (if (not (vector? t)) t ~~(let ((state (db:test-state t)) (status (db:test-status t)))~~ (case (string->symbol state) ((COMPLETED INCOMPLETE) #f) ((NOT_STARTED) (if (member status '("TEN_STRIKES" "BLOCKED" "PREQ_FAIL" "ZERO_ITEMS" "PREQ_DISCARDED" "TIMED_OUT" )) #f t)) ((DELETED) #f)	\| \|	958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973	reruns))))))))) ;; scan a list of tests looking to see if any are potentially runnable (define (runs:runable-tests tests) (filter (lambda (t) (if (not (vector? t)) t (let ((state (dbr:test-state t)) (status (dbr:test-status t))) (case (string->symbol state) ((COMPLETED INCOMPLETE) #f) ((NOT_STARTED) (if (member status '("TEN_STRIKES" "BLOCKED" "PREQ_FAIL" "ZERO_ITEMS" "PREQ_DISCARDED" "TIMED_OUT" )) #f t)) ((DELETED) #f)
︙			︙
1004 1005 1006 1007 1008 1009 1010 ~~1011 1012 1013 1014~~ 1015 1016 1017 1018 1019 1020 1021	(last-time-incomplete (- (current-seconds) 900)) ;; force at least one clean up cycle (last-time-some-running (current-seconds)) (tdbdat (tasks:open-db))) ;; Initialize the test-registery hash with tests that already have a record ;; convert state to symbol and use that as the hash value (for-each (lambda (trec) ~~(let ((id (db:test-id trec)) (tn (db:test-testname trec)) (ip (db:test-item-path trec)) (st (db:test-state trec)))~~ (if (not (equal? st "DELETED")) (hash-table-set! test-registry (db:test-make-full-name tn ip) (string->symbol st))))) tests-info) (set! max-retries (if (and max-retries (string->number max-retries))(string->number max-retries) 100)) (let loop ((hed (car sorted-test-names)) (tal (cdr sorted-test-names))	\| \| \| \|	1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021	(last-time-incomplete (- (current-seconds) 900)) ;; force at least one clean up cycle (last-time-some-running (current-seconds)) (tdbdat (tasks:open-db))) ;; Initialize the test-registery hash with tests that already have a record ;; convert state to symbol and use that as the hash value (for-each (lambda (trec) (let ((id (dbr:test-id trec)) (tn (dbr:test-testname trec)) (ip (dbr:test-item-path trec)) (st (dbr:test-state trec))) (if (not (equal? st "DELETED")) (hash-table-set! test-registry (db:test-make-full-name tn ip) (string->symbol st))))) tests-info) (set! max-retries (if (and max-retries (string->number max-retries))(string->number max-retries) 100)) (let loop ((hed (car sorted-test-names)) (tal (cdr sorted-test-names))
︙			︙
1241 1242 1243 1244 1245 1246 1247 ~~1248 1249~~ 1250 1251 1252 1253 1254 1255 ~~1256 1257~~ 1258 1259 1260 1261 1262 1263 1264 ~~1265~~ 1266 1267 1268 1269 1270 1271 ~~1272~~ 1273 1274 1275 1276 1277 1278 ~~1279 1280~~ 1281 1282 1283 1284 1285 1286 1287 ~~1288~~ 1289 1290 1291 1292 1293 1294 1295	;; LET* ((test-record ;; we get here on "drop through". All done! (debug:print-info 1 "All tests launched"))) (define (runs:calc-fails prereqs-not-met) (filter (lambda (test) (and (vector? test) ;; not (string? test)) ~~(member (db:test-state test) '("INCOMPLETE" "COMPLETED")) (not (member (db:test-status test)~~ '("PASS" "WARN" "CHECK" "WAIVED" "SKIP"))))) prereqs-not-met)) (define (runs:calc-prereq-fail prereqs-not-met) (filter (lambda (test) (and (vector? test) ;; not (string? test)) ~~(equal? (db:test-state test) "NOT_STARTED") (not (member (db:test-status test)~~ '("n/a" "KEEP_TRYING"))))) prereqs-not-met)) (define (runs:calc-not-completed prereqs-not-met) (filter (lambda (t) (or (not (vector? t)) ~~(not (member (db:test-state t) '("INCOMPLETE" "COMPLETED")))))~~ prereqs-not-met)) ;; (define (runs:calc-not-completed prereqs-not-met) ;; (filter ;; (lambda (t) ;; (or (not (vector? t)) ~~;; (not (equal? "COMPLETED" (db:test-state t)))))~~ ;; prereqs-not-met)) (define (runs:calc-runnable prereqs-not-met) (filter (lambda (t) (or (not (vector? t)) ~~(and (equal? "NOT_STARTED" (db:test-state t)) (member (db:test-status t)~~ '("n/a" "KEEP_TRYING"))))) prereqs-not-met)) (define (runs:pretty-string lst) (map (lambda (t) (if (not (vector? t)) (conc t) ~~(conc (db:test-testname t) ":" (db:test-state t) "/" (db:test-status t))))~~ lst)) ;; parent-test is there as a placeholder for when parent-tests can be run as a setup step (define (run:test run-id run-info keyvals runname test-record flags parent-test test-registry all-tests-registry) ;; All these vars might be referenced by the testconfig file reader (let* ((test-name (tests:testqueue-get-testname test-record)) (test-waitons (tests:testqueue-get-waitons test-record))	\| \| \| \| \| \| \| \| \|	1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295	;; LET* ((test-record ;; we get here on "drop through". All done! (debug:print-info 1 "All tests launched"))) (define (runs:calc-fails prereqs-not-met) (filter (lambda (test) (and (vector? test) ;; not (string? test)) (member (dbr:test-state test) '("INCOMPLETE" "COMPLETED")) (not (member (dbr:test-status test) '("PASS" "WARN" "CHECK" "WAIVED" "SKIP"))))) prereqs-not-met)) (define (runs:calc-prereq-fail prereqs-not-met) (filter (lambda (test) (and (vector? test) ;; not (string? test)) (equal? (dbr:test-state test) "NOT_STARTED") (not (member (dbr:test-status test) '("n/a" "KEEP_TRYING"))))) prereqs-not-met)) (define (runs:calc-not-completed prereqs-not-met) (filter (lambda (t) (or (not (vector? t)) (not (member (dbr:test-state t) '("INCOMPLETE" "COMPLETED"))))) prereqs-not-met)) ;; (define (runs:calc-not-completed prereqs-not-met) ;; (filter ;; (lambda (t) ;; (or (not (vector? t)) ;; (not (equal? "COMPLETED" (dbr:test-state t))))) ;; prereqs-not-met)) (define (runs:calc-runnable prereqs-not-met) (filter (lambda (t) (or (not (vector? t)) (and (equal? "NOT_STARTED" (dbr:test-state t)) (member (dbr:test-status t) '("n/a" "KEEP_TRYING"))))) prereqs-not-met)) (define (runs:pretty-string lst) (map (lambda (t) (if (not (vector? t)) (conc t) (conc (dbr:test-testname t) ":" (dbr:test-state t) "/" (dbr:test-status t)))) lst)) ;; parent-test is there as a placeholder for when parent-tests can be run as a setup step (define (run:test run-id run-info keyvals runname test-record flags parent-test test-registry all-tests-registry) ;; All these vars might be referenced by the testconfig file reader (let* ((test-name (tests:testqueue-get-testname test-record)) (test-waitons (tests:testqueue-get-waitons test-record))
︙			︙
1357 1358 1359 1360 1361 1362 1363 ~~1364~~ 1365 1366 1367 1368 1369 1370 1371	(if (not testdat) (begin (debug:print-info 0 "WARNING: server is overloaded, trying again in one second") (thread-sleep! 1) (loop))))) (if (not testdat) ;; should NOT happen (debug:print 0 "ERROR: failed to get test record for test-id " test-id)) ~~(set! test-id (db:test-id testdat))~~ (if (file-exists? test-path) (change-directory test-path) (begin (debug:print "ERROR: test run path not created before attempting to run the test. Perhaps you are running -remove-runs at the same time?") (change-directory toppath))) (case (if force ;; (args:get-arg "-force") 'NOT_STARTED	\|	1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371	(if (not testdat) (begin (debug:print-info 0 "WARNING: server is overloaded, trying again in one second") (thread-sleep! 1) (loop))))) (if (not testdat) ;; should NOT happen (debug:print 0 "ERROR: failed to get test record for test-id " test-id)) (set! test-id (dbr:test-id testdat)) (if (file-exists? test-path) (change-directory test-path) (begin (debug:print "ERROR: test run path not created before attempting to run the test. Perhaps you are running -remove-runs at the same time?") (change-directory toppath))) (case (if force ;; (args:get-arg "-force") 'NOT_STARTED
︙			︙
1455 1456 1457 1458 1459 1460 1461 ~~1462 1463~~ 1464 1465 1466 1467 1468 1469 1470	(set! globalexitstatus 1) ;; (process-signal (current-process-id) signal/kill)))))))) ((KILLED) (debug:print 1 "NOTE: " full-test-name " is already running or was explictly killed, use -force to launch it.") (hash-table-set! test-registry (db:test-make-full-name test-name test-path) 'DONOTRUN)) ;; KILLED)) ((LAUNCHED REMOTEHOSTSTART RUNNING) (debug:print 2 "NOTE: " test-name " is already running")) ~~;; (if (> (- (current-seconds)(+ (db:test-event_time testdat) ;; (db:test-run_duration testdat)))~~ ;; (or incomplete-timeout ;; 6000)) ;; i.e. no update for more than 6000 seconds ;; (begin ;; (debug:print 0 "WARNING: Test " test-name " appears to be dead. Forcing it to state INCOMPLETE and status STUCK/DEAD") ;; (tests:test-set-status! run-id test-id "INCOMPLETE" "STUCK/DEAD" "" #f)) ;; ;; (tests:test-set-status! test-id "INCOMPLETE" "STUCK/DEAD" "" #f)) ;; (debug:print 2 "NOTE: " test-name " is already running")))	\| \|	1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470	(set! globalexitstatus 1) ;; (process-signal (current-process-id) signal/kill)))))))) ((KILLED) (debug:print 1 "NOTE: " full-test-name " is already running or was explictly killed, use -force to launch it.") (hash-table-set! test-registry (db:test-make-full-name test-name test-path) 'DONOTRUN)) ;; KILLED)) ((LAUNCHED REMOTEHOSTSTART RUNNING) (debug:print 2 "NOTE: " test-name " is already running")) ;; (if (> (- (current-seconds)(+ (dbr:test-event_time testdat) ;; (dbr:test-run_duration testdat))) ;; (or incomplete-timeout ;; 6000)) ;; i.e. no update for more than 6000 seconds ;; (begin ;; (debug:print 0 "WARNING: Test " test-name " appears to be dead. Forcing it to state INCOMPLETE and status STUCK/DEAD") ;; (tests:test-set-status! run-id test-id "INCOMPLETE" "STUCK/DEAD" "" #f)) ;; ;; (tests:test-set-status! test-id "INCOMPLETE" "STUCK/DEAD" "" #f)) ;; (debug:print 2 "NOTE: " test-name " is already running")))
︙			︙
1594 1595 1596 1597 1598 1599 1600 ~~1601~~ 1602 ~~1603~~ 1604 1605 1606 1607 1608 1609 1610 1611 ~~1612~~ 1613 1614 1615 1616 1617 1618 ~~1619 1620~~ 1621 1622 ~~1623 1624 1625 1626~~ 1627 1628 1629 1630 1631 1632 1633	(debug:print-info 0 "action not recognised " action))) ;; actions that operate on one test at a time can be handled below ;; (let ((sorted-tests (filter vector? (sort tests (lambda (a b)(let ((dira ;; (rmt:sdb-qry 'getstr ~~(db:test-rundir a)) ;; ) ;; (filedb:get-path fdb (db:test-rundir a)))~~ (dirb ;; (rmt:sdb-qry 'getstr ~~(db:test-rundir b))) ;; ) ;; ((filedb:get-path fdb (db:test-rundir b))))~~ (if (and (string? dira)(string? dirb)) (> (string-length dira)(string-length dirb)) #f)))))) (toplevel-retries (make-hash-table)) ;; try three times to loop through and remove top level tests (test-retry-time (make-hash-table)) (allow-run-time 10)) ;; seconds to allow for killing tests before just brutally killing 'em (let loop ((test (car sorted-tests)) (tal (cdr sorted-tests))) ~~(let* ((test-id (db:test-id test))~~ (new-test-dat (rmt:get-test-info-by-id run-id test-id))) (if (not new-test-dat) (begin (debug:print 0 "ERROR: We have a test-id of " test-id " but no record was found. NOTE: No locking of records is done between processes, do not simultaneously remove the same run from two processes!") (if (not (null? tal)) (loop (car tal)(cdr tal)))) ~~(let* ((item-path (db:test-item-path new-test-dat)) (test-name (db:test-testname new-test-dat))~~ (run-dir ;;(filedb:get-path fdb ;; (rmt:sdb-qry 'getid (db:test-rundir new-test-dat)) ;; ) ;; run dir is from the link tree (test-state (db:test-state new-test-dat)) (test-fulln (db:test-fullname new-test-dat)) (uname (db:test-uname new-test-dat)) (toplevel-with-children (and (db:test-is-toplevel test) (> (rmt:test-toplevel-num-items run-id test-name) 0)))) (case action ((remove-runs) ;; if the test is a toplevel-with-children issue an error and do not remove (if toplevel-with-children (begin	\| \| \| \| \| \| \| \| \|	1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633	(debug:print-info 0 "action not recognised " action))) ;; actions that operate on one test at a time can be handled below ;; (let ((sorted-tests (filter vector? (sort tests (lambda (a b)(let ((dira ;; (rmt:sdb-qry 'getstr (dbr:test-rundir a)) ;; ) ;; (filedb:get-path fdb (dbr:test-rundir a))) (dirb ;; (rmt:sdb-qry 'getstr (dbr:test-rundir b))) ;; ) ;; ((filedb:get-path fdb (dbr:test-rundir b)))) (if (and (string? dira)(string? dirb)) (> (string-length dira)(string-length dirb)) #f)))))) (toplevel-retries (make-hash-table)) ;; try three times to loop through and remove top level tests (test-retry-time (make-hash-table)) (allow-run-time 10)) ;; seconds to allow for killing tests before just brutally killing 'em (let loop ((test (car sorted-tests)) (tal (cdr sorted-tests))) (let* ((test-id (dbr:test-id test)) (new-test-dat (rmt:get-test-info-by-id run-id test-id))) (if (not new-test-dat) (begin (debug:print 0 "ERROR: We have a test-id of " test-id " but no record was found. NOTE: No locking of records is done between processes, do not simultaneously remove the same run from two processes!") (if (not (null? tal)) (loop (car tal)(cdr tal)))) (let* ((item-path (dbr:test-item-path new-test-dat)) (test-name (dbr:test-testname new-test-dat)) (run-dir ;;(filedb:get-path fdb ;; (rmt:sdb-qry 'getid (dbr:test-rundir new-test-dat)) ;; ) ;; run dir is from the link tree (test-state (dbr:test-state new-test-dat)) (test-fulln (dbr:test-fullname new-test-dat)) (uname (dbr:test-uname new-test-dat)) (toplevel-with-children (and (db:test-is-toplevel test) (> (rmt:test-toplevel-num-items run-id test-name) 0)))) (case action ((remove-runs) ;; if the test is a toplevel-with-children issue an error and do not remove (if toplevel-with-children (begin
︙			︙
1648 1649 1650 1651 1652 1653 1654 ~~1655~~ 1656 1657 ~~1658~~ 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 ~~1670~~ 1671 1672 1673 1674 1675 1676 1677	(hash-table-set! test-retry-time test-fulln (current-seconds)))) (if (> (- (current-seconds)(hash-table-ref test-retry-time test-fulln)) allow-run-time) ;; This test is not in a correct state for cleaning up. Let's try some graceful shutdown steps first ;; Set the test to "KILLREQ" and wait five seconds then try again. Repeat up to five times then give ;; up and blow it away. (begin (debug:print 0 "WARNING: could not gracefully remove test " test-fulln ", tried to kill it to no avail. Forcing state to FAILEDKILL and continuing") ~~(mt:test-set-state-status-by-id run-id (db:test-id test) "FAILEDKILL" "n/a" #f)~~ (thread-sleep! 1)) (begin ~~(mt:test-set-state-status-by-id run-id (db:test-id test) "KILLREQ" "n/a" #f)~~ (thread-sleep! 1))) ;; NOTE: This is suboptimal as the testdata will be used later and the state/status may have changed ... (if (null? tal) (loop new-test-dat tal) (loop (car tal)(append tal (list new-test-dat))))) (begin (runs:remove-test-directory new-test-dat mode) ;; 'remove-all) (if (not (null? tal)) (loop (car tal)(cdr tal)))))))) ((set-state-status) (debug:print-info 2 "new state " (car state-status) ", new status " (cadr state-status)) ~~(mt:test-set-state-status-by-id run-id (db:test-id test) (car state-status)(cadr state-status) #f)~~ (if (not (null? tal)) (loop (car tal)(cdr tal)))) ((run-wait) (debug:print-info 2 "still waiting, " (length tests) " tests still running") (thread-sleep! 10) (let ((new-tests (proc-get-tests run-id))) (if (null? new-tests)	\| \| \|	1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677	(hash-table-set! test-retry-time test-fulln (current-seconds)))) (if (> (- (current-seconds)(hash-table-ref test-retry-time test-fulln)) allow-run-time) ;; This test is not in a correct state for cleaning up. Let's try some graceful shutdown steps first ;; Set the test to "KILLREQ" and wait five seconds then try again. Repeat up to five times then give ;; up and blow it away. (begin (debug:print 0 "WARNING: could not gracefully remove test " test-fulln ", tried to kill it to no avail. Forcing state to FAILEDKILL and continuing") (mt:test-set-state-status-by-id run-id (dbr:test-id test) "FAILEDKILL" "n/a" #f) (thread-sleep! 1)) (begin (mt:test-set-state-status-by-id run-id (dbr:test-id test) "KILLREQ" "n/a" #f) (thread-sleep! 1))) ;; NOTE: This is suboptimal as the testdata will be used later and the state/status may have changed ... (if (null? tal) (loop new-test-dat tal) (loop (car tal)(append tal (list new-test-dat))))) (begin (runs:remove-test-directory new-test-dat mode) ;; 'remove-all) (if (not (null? tal)) (loop (car tal)(cdr tal)))))))) ((set-state-status) (debug:print-info 2 "new state " (car state-status) ", new status " (cadr state-status)) (mt:test-set-state-status-by-id run-id (dbr:test-id test) (car state-status)(cadr state-status) #f) (if (not (null? tal)) (loop (car tal)(cdr tal)))) ((run-wait) (debug:print-info 2 "still waiting, " (length tests) " tests still running") (thread-sleep! 10) (let ((new-tests (proc-get-tests run-id))) (if (null? new-tests)
︙			︙
1710 1711 1712 1713 1714 1715 1716 ~~1717~~ 1718 1719 1720 1721 ~~1722 1723 1724~~ 1725 1726 1727 1728 1729 1730 1731	)) runs) ;; (sqlite3:finalize! (db:delay-if-busy tdbdat)) ) #t) (define (runs:remove-test-directory test mode) ;; remove-data-only) ~~(let* ((run-dir (db:test-rundir test)) ;; run dir is from the link tree~~ (real-dir (if (file-exists? run-dir) (resolve-pathname run-dir) #f))) (case mode ((remove-data-only)(mt:test-set-state-status-by-id (db:test-run_id test)(db:test-id test) "CLEANING" "LOCKED" #f)) ((remove-all) (mt:test-set-state-status-by-id (db:test-run_id test)(db:test-id test) "REMOVING" "LOCKED" #f)) ((archive-remove) (mt:test-set-state-status-by-id (db:test-run_id test)(db:test-id test) "ARCHIVE_REMOVING" #f #f))) (debug:print-info 1 "Attempting to remove " (if real-dir (conc " dir " real-dir " and ") "") " link " run-dir) (if (and real-dir (> (string-length real-dir) 5) (file-exists? real-dir)) ;; bad heuristic but should prevent /tmp /home etc. (begin ;; let* ((realpath (resolve-pathname run-dir))) (debug:print-info 1 "Recursively removing " real-dir) (if (file-exists? real-dir)	\| \| \| \|	1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731	)) runs) ;; (sqlite3:finalize! (db:delay-if-busy tdbdat)) ) #t) (define (runs:remove-test-directory test mode) ;; remove-data-only) (let* ((run-dir (dbr:test-rundir test)) ;; run dir is from the link tree (real-dir (if (file-exists? run-dir) (resolve-pathname run-dir) #f))) (case mode ((remove-data-only)(mt:test-set-state-status-by-id (dbr:test-run_id test)(dbr:test-id test) "CLEANING" "LOCKED" #f)) ((remove-all) (mt:test-set-state-status-by-id (dbr:test-run_id test)(dbr:test-id test) "REMOVING" "LOCKED" #f)) ((archive-remove) (mt:test-set-state-status-by-id (dbr:test-run_id test)(dbr:test-id test) "ARCHIVE_REMOVING" #f #f))) (debug:print-info 1 "Attempting to remove " (if real-dir (conc " dir " real-dir " and ") "") " link " run-dir) (if (and real-dir (> (string-length real-dir) 5) (file-exists? real-dir)) ;; bad heuristic but should prevent /tmp /home etc. (begin ;; let* ((realpath (resolve-pathname run-dir))) (debug:print-info 1 "Recursively removing " real-dir) (if (file-exists? real-dir)
︙			︙
1751 1752 1753 1754 1755 1756 1757 ~~1758 1759 1760~~ 1761 1762 1763 1764 1765 1766 1767	(if (and run-dir (not (member run-dir (list "n/a" "/tmp/badname")))) (debug:print 0 "WARNING: not removing " run-dir " as it either doesn't exist or is not a symlink") (debug:print 0 "NOTE: the run dir for this test is undefined. Test may have already been deleted.")) )) ;; Only delete the records after removing the directory. If things fail we have a record (case mode ((remove-data-only)(mt:test-set-state-status-by-id (db:test-run_id test)(db:test-id test) "NOT_STARTED" "n/a" #f)) ((archive-remove) (mt:test-set-state-status-by-id (db:test-run_id test)(db:test-id test) "ARCHIVED" #f #f)) (else (rmt:delete-test-records (db:test-run_id test) (db:test-id test)))))) ;;====================================================================== ;; Routines for manipulating runs ;;====================================================================== ;; Since many calls to a run require pretty much the same setup ;; this wrapper is used to reduce the replication of code	\| \| \|	1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767	(if (and run-dir (not (member run-dir (list "n/a" "/tmp/badname")))) (debug:print 0 "WARNING: not removing " run-dir " as it either doesn't exist or is not a symlink") (debug:print 0 "NOTE: the run dir for this test is undefined. Test may have already been deleted.")) )) ;; Only delete the records after removing the directory. If things fail we have a record (case mode ((remove-data-only)(mt:test-set-state-status-by-id (dbr:test-run_id test)(dbr:test-id test) "NOT_STARTED" "n/a" #f)) ((archive-remove) (mt:test-set-state-status-by-id (dbr:test-run_id test)(dbr:test-id test) "ARCHIVED" #f #f)) (else (rmt:delete-test-records (dbr:test-run_id test) (dbr:test-id test)))))) ;;====================================================================== ;; Routines for manipulating runs ;;====================================================================== ;; Since many calls to a run require pretty much the same setup ;; this wrapper is used to reduce the replication of code
︙			︙
1871 1872 1873 1874 1875 1876 1877 ~~1878 1879~~ 1880 1881 1882 1883 1884 1885 1886 1887 ~~1888 1889~~ 1890 1891 ~~1892~~ 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 ~~1903~~ 1904 1905 1906 1907 1908 ~~1909 1910~~ 1911 ~~1912~~ 1913 1914 1915 ~~1916 1917~~ 1918 1919 1920 1921	(prev-tests (rmt:get-matching-previous-test-run-records new-run-id "%" "%")) (curr-tests (mt:get-tests-for-run new-run-id "%/%" '() '())) (curr-tests-hash (make-hash-table))) (rmt:update-run-event_time new-run-id) ;; index the already saved tests by testname and itemdat in curr-tests-hash (for-each (lambda (testdat) ~~(let* ((testname (db:test-testname testdat)) (item-path (db:test-item-path testdat))~~ (full-name (conc testname "/" item-path))) (hash-table-set! curr-tests-hash full-name testdat))) curr-tests) ;; NOPE: Non-optimal approach. Try this instead. ;; 1. tests are received in a list, most recent first ;; 2. replace the rollup test with the new always (for-each (lambda (testdat) ~~(let* ((testname (db:test-testname testdat)) (item-path (db:test-item-path testdat))~~ (full-name (conc testname "/" item-path)) (prev-test-dat (hash-table-ref/default curr-tests-hash full-name #f)) ~~(test-steps (rmt:get-steps-for-test (db:test-id testdat)))~~ (new-test-record #f)) ;; replace these with insert ... select (apply sqlite3:execute db (conc "INSERT OR REPLACE INTO tests (run_id,testname,state,status,event_time,host,cpuload,diskfree,uname,rundir,item_path,run_duration,final_logf,comment) " "VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?);") new-run-id (cddr (vector->list testdat))) (set! new-testdat (car (mt:get-tests-for-run new-run-id (conc testname "/" item-path) '() '()))) (hash-table-set! curr-tests-hash full-name new-testdat) ;; this could be confusing, which record should go into the lookup table? ;; Now duplicate the test steps ~~(debug:print 4 "Copying records in test_steps from test_id=" (db:test-id testdat) " to " (db:test-id new-testdat))~~ (cdb:remote-run ;; to be replaced, note: this routine is not used currently (lambda () (sqlite3:execute db (conc "INSERT OR REPLACE INTO test_steps (test_id,stepname,state,status,event_time,comment) " ~~"SELECT " (db:test-id new-testdat) ",stepname,state,status,event_time,comment FROM test_steps WHERE test_id=?;") (db:test-id testdat))~~ ;; Now duplicate the test data ~~(debug:print 4 "Copying records in test_data from test_id=" (db:test-id testdat) " to " (db:test-id new-testdat))~~ (sqlite3:execute db (conc "INSERT OR REPLACE INTO test_data (test_id,category,variable,value,expected,tol,units,comment) " ~~"SELECT " (db:test-id new-testdat) ",category,variable,value,expected,tol,units,comment FROM test_data WHERE test_id=?;") (db:test-id testdat))))~~ )) prev-tests)))	\| \| \| \| \| \| \| \| \| \| \|	1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921	(prev-tests (rmt:get-matching-previous-test-run-records new-run-id "%" "%")) (curr-tests (mt:get-tests-for-run new-run-id "%/%" '() '())) (curr-tests-hash (make-hash-table))) (rmt:update-run-event_time new-run-id) ;; index the already saved tests by testname and itemdat in curr-tests-hash (for-each (lambda (testdat) (let* ((testname (dbr:test-testname testdat)) (item-path (dbr:test-item-path testdat)) (full-name (conc testname "/" item-path))) (hash-table-set! curr-tests-hash full-name testdat))) curr-tests) ;; NOPE: Non-optimal approach. Try this instead. ;; 1. tests are received in a list, most recent first ;; 2. replace the rollup test with the new always (for-each (lambda (testdat) (let* ((testname (dbr:test-testname testdat)) (item-path (dbr:test-item-path testdat)) (full-name (conc testname "/" item-path)) (prev-test-dat (hash-table-ref/default curr-tests-hash full-name #f)) (test-steps (rmt:get-steps-for-test (dbr:test-id testdat))) (new-test-record #f)) ;; replace these with insert ... select (apply sqlite3:execute db (conc "INSERT OR REPLACE INTO tests (run_id,testname,state,status,event_time,host,cpuload,diskfree,uname,rundir,item_path,run_duration,final_logf,comment) " "VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?);") new-run-id (cddr (vector->list testdat))) (set! new-testdat (car (mt:get-tests-for-run new-run-id (conc testname "/" item-path) '() '()))) (hash-table-set! curr-tests-hash full-name new-testdat) ;; this could be confusing, which record should go into the lookup table? ;; Now duplicate the test steps (debug:print 4 "Copying records in test_steps from test_id=" (dbr:test-id testdat) " to " (dbr:test-id new-testdat)) (cdb:remote-run ;; to be replaced, note: this routine is not used currently (lambda () (sqlite3:execute db (conc "INSERT OR REPLACE INTO test_steps (test_id,stepname,state,status,event_time,comment) " "SELECT " (dbr:test-id new-testdat) ",stepname,state,status,event_time,comment FROM test_steps WHERE test_id=?;") (dbr:test-id testdat)) ;; Now duplicate the test data (debug:print 4 "Copying records in test_data from test_id=" (dbr:test-id testdat) " to " (dbr:test-id new-testdat)) (sqlite3:execute db (conc "INSERT OR REPLACE INTO test_data (test_id,category,variable,value,expected,tol,units,comment) " "SELECT " (dbr:test-id new-testdat) ",category,variable,value,expected,tol,units,comment FROM test_data WHERE test_id=?;") (dbr:test-id testdat)))) )) prev-tests)))