Megatest: Diff

Differences From Artifact [8c52752e36]:

File runs.scm — part of check-in [346da738c4] at 2017-06-25 22:03:26 on branch v1.65 — Syncing up with v1.64 (user: matt, size: 110835) [annotate] [blame] [check-ins using] [more...]

To Artifact [96d6ec20a5]:

File runs.scm — part of check-in [37803689fe] at 2017-09-20 21:58:45 on branch v1.65 — Merged run-utils branch to v1.65 (user: matt, size: 119268) [annotate] [blame] [check-ins using] [more...]

︙			︙
263 264 265 266 267 268 269 270 271 272 273 274 275 276	(debug:print 0 default-log-port "Message: " ((condition-property-accessor 'exn 'message) exn)) (debug:print 0 default-log-port "ERROR: failed to run pre-hook " run-pre-hook ", check the log " log-file)) (debug:print-info 0 default-log-port "running run-pre-hook: \"" run-pre-hook "\", log is " actual-logf) (system (conc run-pre-hook " >> " actual-logf " 2>&1")) (debug:print-info 0 default-log-port "pre-hook \"" run-pre-hook "\" took " (- (current-seconds) start-time) " seconds to run."))) (debug:print 0 default-log-port "Skipping pre-hook call \"" run-pre-hook "\" as there are existing tests for this run."))))) ;; test-names: Comma separated patterns same as test-patts but used in selection ;; of tests to run. The item portions are not respected. ;; FIXME: error out if /patt specified ;; (define (runs:run-tests target runname test-patts user flags #!key (run-count 1)) ;; test-names (let* ((keys (keys:config-get-fields configdat)) (keyvals (keys:target->keyval keys target))	> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > >	263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315	(debug:print 0 default-log-port "Message: " ((condition-property-accessor 'exn 'message) exn)) (debug:print 0 default-log-port "ERROR: failed to run pre-hook " run-pre-hook ", check the log " log-file)) (debug:print-info 0 default-log-port "running run-pre-hook: \"" run-pre-hook "\", log is " actual-logf) (system (conc run-pre-hook " >> " actual-logf " 2>&1")) (debug:print-info 0 default-log-port "pre-hook \"" run-pre-hook "\" took " (- (current-seconds) start-time) " seconds to run."))) (debug:print 0 default-log-port "Skipping pre-hook call \"" run-pre-hook "\" as there are existing tests for this run."))))) (define (runs:run-post-hook run-id) (let* ((run-post-hook (configf:lookup configdat "runs" "post-hook")) (existing-tests (if run-post-hook (rmt:get-tests-for-run run-id "%" '() '() ;; run-id testpatt states statuses #f #f ;; offset limit #f ;; not-in #f ;; sort-by #f ;; sort-order #f ;; get full data (not 'shortlist) 0 ;; (runs:gendat-inc-results-last-update runs:general-data) ;; last update time 'dashboard) '())) (log-dir (conc toppath "/logs")) (log-file (conc "post-hook-" (string-translate (getenv "MT_TARGET") "/" "-") "-" (getenv "MT_RUNNAME") ".log")) (full-log-fname (conc log-dir "/" log-file))) (if run-post-hook ;; (if (null? existing-tests) ;; (debug:print 0 default-log-port "Skipping post-hook call \"" run-post-hook "\" as there are existing tests for this run."))))) (let* ((use-log-dir (if (not (directory-exists? log-dir)) (handle-exceptions exn (begin (debug:print 0 default-log-port "WARNING: Failed to create " log-dir) #f) (create-directory log-dir #t) #t) #t)) (start-time (current-seconds)) (actual-logf (if use-log-dir full-log-fname log-file))) (handle-exceptions exn (begin (print-call-chain default-log-port) (debug:print 0 default-log-port "Message: " ((condition-property-accessor 'exn 'message) exn)) (debug:print 0 default-log-port "ERROR: failed to run post-hook " run-post-hook ", check the log " log-file)) (debug:print-info 0 default-log-port "running run-post-hook: \"" run-post-hook "\", log is " actual-logf) (system (conc run-post-hook " >> " actual-logf " 2>&1")) (debug:print-info 0 default-log-port "post-hook \"" run-post-hook "\" took " (- (current-seconds) start-time) " seconds to run.")))))) ;; test-names: Comma separated patterns same as test-patts but used in selection ;; of tests to run. The item portions are not respected. ;; FIXME: error out if /patt specified ;; (define (runs:run-tests target runname test-patts user flags #!key (run-count 1)) ;; test-names (let* ((keys (keys:config-get-fields configdat)) (keyvals (keys:target->keyval keys target))
︙			︙
323 324 325 326 327 328 329 ~~330 331 332~~ 333 334 335 336 337 338 339	(exit 4))))) (thread-start! th2) (thread-start! th1) (thread-join! th2))))) (set-signal-handler! signal/int sighand) (set-signal-handler! signal/term sighand)) ~~;; force the starting of a server (debug:print 0 default-log-port "waiting on server...") (server:start-and-wait toppath)~~ (runs:set-megatest-env-vars run-id inkeys: keys inrunname: runname) ;; these may be needed by the launching process (set! runconf (if (common:file-exists? runconfigf) (setup-env-defaults runconfigf run-id already-seen-runconfig-info keyvals target) (begin (debug:print 0 default-log-port "WARNING: You do not have a run config file: " runconfigf) #f)))	\| \| \|	362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378	(exit 4))))) (thread-start! th2) (thread-start! th1) (thread-join! th2))))) (set-signal-handler! signal/int sighand) (set-signal-handler! signal/term sighand)) ;; force the starting of a server -- removed BB 17ww28 - no longer needed. ;;(debug:print 0 default-log-port "waiting on server...") ;;(server:start-and-wait toppath) (runs:set-megatest-env-vars run-id inkeys: keys inrunname: runname) ;; these may be needed by the launching process (set! runconf (if (common:file-exists? runconfigf) (setup-env-defaults runconfigf run-id already-seen-runconfig-info keyvals target) (begin (debug:print 0 default-log-port "WARNING: You do not have a run config file: " runconfigf) #f)))
︙			︙
423 424 425 426 427 428 429 ~~430~~ 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 ~~448 449 450 451~~ 452 ~~453~~ 454 455 456 457 458 459 460 461 462 463 464 465 466 ~~467~~ 468 469 470 471 472 473 474	;;====================================================================== ;; refactoring this block into tests:get-full-data ;; ;; What happended, this code is now duplicated in tests!? ;; ;;====================================================================== ~~(if (not (null? test-names))~~ (let loop ((hed (car test-names)) ;; NOTE: This is the main loop that iterates over the test-names (tal (cdr test-names))) ;; 'return-procs tells the config reader to prep running system but return a proc (change-directory toppath) ;; PLEASE OPTIMIZE ME!!! I think this should be a no-op but there are several places where change-directories could be happening. (setenv "MT_TEST_NAME" hed) ;; (let-values (((waitons waitors config)(tests:get-waitons hed all-tests-registry))) (debug:print-info 8 default-log-port* "waitons: " waitons) ;; check for hed in waitons => this would be circular, remove it and issue an ;; error (if (or (member hed waitons) (member hed waitors)) (begin (debug:print-error 0 default-log-port "test " hed " has listed itself as a waiton or waitor, please correct this!") (set! waitons (filter (lambda (x)(not (equal? x hed))) waitons)) (set! waitors (filter (lambda (x)(not (equal? x hed))) waitors)))) ;; (items (items:get-items-from-config config))) (if (not (hash-table-ref/default test-records hed #f)) ~~(hash-table-set! test-records hed (vector hed ;; 0 config ;; 1 waitons ;; 2~~ (config-lookup config "requirements" "priority") ;; priority 3 ~~(tests:get-items config) ;; expand the [items] and or [itemstable] into explict items~~ #f ;; itemsdat 5 #f ;; spare - used for item-path waitors ;; ))) (for-each (lambda (waiton) (if (and waiton (not (member waiton test-names))) (let* ((waiton-record (hash-table-ref/default test-records waiton #f)) (waiton-tconfig (if waiton-record (vector-ref waiton-record 1) #f)) (waiton-itemized (and waiton-tconfig (or (hash-table-ref/default waiton-tconfig "items" #f) (hash-table-ref/default waiton-tconfig "itemstable" #f)))) (itemmaps (tests:get-itemmaps config)) ;; (configf:lookup config "requirements" "itemmap")) ~~(new-test-patts (tests:extend-test-patts test-patts hed waiton itemmaps)))~~ (debug:print-info 0 default-log-port "Test " waiton " has " (if waiton-record "a" "no") " waiton-record and" (if waiton-itemized " " " no ") "items") ;; need to account for test-patt here, if I am test "a", selected with a test-patt of "hed/b%" ;; and we are waiting on "waiton" we need to add "waiton/,waiton/b%" to test-patt ;; is this satisfied by merely appending "/" to the waiton name added to the list? ;; ;; This approach causes all of the items in an upstream test to be run	\| \| \| \| \| \| \|	462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513	;;====================================================================== ;; refactoring this block into tests:get-full-data ;; ;; What happended, this code is now duplicated in tests!? ;; ;;====================================================================== (if (not (null? test-names)) ;; BEGIN test-names loop (let loop ((hed (car test-names)) ;; NOTE: This is the main loop that iterates over the test-names (tal (cdr test-names))) ;; 'return-procs tells the config reader to prep running system but return a proc (change-directory toppath) ;; PLEASE OPTIMIZE ME!!! I think this should be a no-op but there are several places where change-directories could be happening. (setenv "MT_TEST_NAME" hed) ;; (let-values (((waitons waitors config)(tests:get-waitons hed all-tests-registry))) (debug:print-info 8 default-log-port* "waitons: " waitons) ;; check for hed in waitons => this would be circular, remove it and issue an ;; error (if (or (member hed waitons) (member hed waitors)) (begin (debug:print-error 0 default-log-port "test " hed " has listed itself as a waiton or waitor, please correct this!") (set! waitons (filter (lambda (x)(not (equal? x hed))) waitons)) (set! waitors (filter (lambda (x)(not (equal? x hed))) waitors)))) ;; (items (items:get-items-from-config config))) (if (not (hash-table-ref/default test-records hed #f)) (hash-table-set! test-records ;; BB: we are doing a manual make-tests:testqueue hed (vector hed ;; 0 ;; testname config ;; 1 waitons ;; 2 (config-lookup config "requirements" "priority") ;; priority 3 (tests:get-items config) ;; 4 ;; expand the [items] and or [itemstable] into explict items #f ;; itemsdat 5 #f ;; spare - used for item-path waitors ;; ))) (for-each (lambda (waiton) (if (and waiton (not (member waiton test-names))) (let* ((waiton-record (hash-table-ref/default test-records waiton #f)) (waiton-tconfig (if waiton-record (vector-ref waiton-record 1) #f)) (waiton-itemized (and waiton-tconfig (or (hash-table-ref/default waiton-tconfig "items" #f) (hash-table-ref/default waiton-tconfig "itemstable" #f)))) (itemmaps (tests:get-itemmaps config)) ;; (configf:lookup config "requirements" "itemmap")) (new-test-patts (tests:extend-test-patts test-patts hed waiton itemmaps))) ;; BB: items expanded here. (debug:print-info 0 default-log-port "Test " waiton " has " (if waiton-record "a" "no") " waiton-record and" (if waiton-itemized " " " no ") "items") ;; need to account for test-patt here, if I am test "a", selected with a test-patt of "hed/b%" ;; and we are waiting on "waiton" we need to add "waiton/,waiton/b%" to test-patt ;; is this satisfied by merely appending "/" to the waiton name added to the list? ;; ;; This approach causes all of the items in an upstream test to be run
︙			︙
500 501 502 503 504 505 506 ~~507~~ 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 ~~525 526 527 528 529 530 531 532 533 534~~ 535 ~~536~~ 537 538 539 540 541 542 543 ~~544~~ 545 546 547 548 549 550 551	;; (set! test-names (cons waiton test-names))))) ;; was an append, now a cons ))) (delete-duplicates (append waitons waitors))) (let ((remtests (delete-duplicates (append waitons tal)))) (if (not (null? remtests)) (begin ;; (debug:print-info 0 default-log-port "Preprocessing continues for " (string-intersperse remtests ", ")) ~~(loop (car remtests)(cdr remtests))))))))~~ (if (not (null? required-tests)) (debug:print-info 1 default-log-port "Adding \"" (string-intersperse required-tests " ") "\" to the run queue")) ;; NOTE: these are all parent tests, items are not expanded yet. (debug:print-info 4 default-log-port "test-records=" (hash-table->alist test-records)) (let ((reglen (configf:lookup configdat "setup" "runqueue"))) (if (> (length (hash-table-keys test-records)) 0) (let* ((keep-going #t) (run-queue-retries 5) (th1 (make-thread (lambda () (handle-exceptions exn (begin (print-call-chain) (print " message: " ((condition-property-accessor 'exn 'message) exn))) (runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests (any->number reglen) all-tests-registry))) ~~;; (handle-exceptions~~ ~~;; exn~~ ~~;; (begin~~ ~~;; (print-call-chain (current-error-port))~~ ~~;; (debug:print-error 0 default-log-port "failure in runs:run-tests-queue thread, error: " ((condition-property-accessor 'exn 'message) exn))~~ ~~;; (if (> run-queue-retries 0)~~ ~~;; (begin~~ ~~;; (set! run-queue-retries (- run-queue-retries 1))~~ ~~;; (runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests (any->number reglen) all-tests-registry))))~~ ~~;; (runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests (any->number reglen) all-tests-registry)))~~ "runs:run-tests-queue")) ~~(th2 (make-thread (lambda ()~~ ;; (rmt:find-and-mark-incomplete-all-runs))))) CAN'T INTERRUPT IT ... (let ((run-ids (rmt:get-all-run-ids))) (for-each (lambda (run-id) (if keep-going (handle-exceptions exn (debug:print 0 default-log-port "error in calling find-and-mark-incomplete for run-id " run-id) ~~(rmt:find-and-mark-incomplete run-id #f)))) ;; ovr-deadtime)))~~ run-ids))) "runs: mark-incompletes"))) (thread-start! th1) (thread-start! th2) (thread-join! th1) (set! keep-going #f) (thread-join! th2)	\| < < < < < < < < < < \| \|	539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580	;; (set! test-names (cons waiton test-names))))) ;; was an append, now a cons ))) (delete-duplicates (append waitons waitors))) (let ((remtests (delete-duplicates (append waitons tal)))) (if (not (null? remtests)) (begin ;; (debug:print-info 0 default-log-port "Preprocessing continues for " (string-intersperse remtests ", ")) (loop (car remtests)(cdr remtests)))))))) ;; END test-names loop (if (not (null? required-tests)) (debug:print-info 1 default-log-port "Adding \"" (string-intersperse required-tests " ") "\" to the run queue")) ;; NOTE: these are all parent tests, items are not expanded yet. (debug:print-info 4 default-log-port "test-records=" (hash-table->alist test-records)) (let ((reglen (configf:lookup configdat "setup" "runqueue"))) (if (> (length (hash-table-keys test-records)) 0) (let* ((keep-going #t) (run-queue-retries 5) (th1 (make-thread (lambda () (handle-exceptions exn (begin (print-call-chain) (print " message: " ((condition-property-accessor 'exn 'message) exn))) (runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests (any->number reglen) all-tests-registry))) "runs:run-tests-queue")) (th2 (make-thread (lambda () ;; BBQ: why are we visiting ALL runs here? ;; (rmt:find-and-mark-incomplete-all-runs))))) CAN'T INTERRUPT IT ... (let ((run-ids (rmt:get-all-run-ids))) (for-each (lambda (run-id) (if keep-going (handle-exceptions exn (debug:print 0 default-log-port "error in calling find-and-mark-incomplete for run-id " run-id) (rmt:find-and-mark-incomplete run-id #f)))) ;; ovr-deadtime))) ;; could be root of https://hsdes.intel.com/appstore/article/#/220546828/main -- Title: Megatest jobs show DEAD even though they are still running (1.64/27) run-ids))) "runs: mark-incompletes"))) (thread-start! th1) (thread-start! th2) (thread-join! th1) (set! keep-going #f) (thread-join! th2)
︙			︙
591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606	(define (runs:queue-next-reg tal reg n regfull) (if regfull (cdr reg) (if (null? tal) ;; if tal is null and reg not full then '() as reg contents moved to tal '() reg))) (define runs:nothing-left-in-queue-count 0) (define (runs:expand-items hed tal reg reruns regfull newtal jobgroup max-concurrent-jobs run-id waitons item-path testmode test-record can-run-more items runname tconfig reglen test-registry test-records itemmaps) (let* ((loop-list (list hed tal reg reruns)) (prereqs-not-met (let ((res (rmt:get-prereqs-not-met run-id waitons hed item-path mode: testmode itemmaps: itemmaps))) (if (list? res) res (begin (debug:print 0 default-log-port	> > > > > > > > >	620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644	(define (runs:queue-next-reg tal reg n regfull) (if regfull (cdr reg) (if (null? tal) ;; if tal is null and reg not full then '() as reg contents moved to tal '() reg))) ;; this is the list of parameters to the named loop "loop" near the top of runs:run-tests-queue, look around line 1216 ;; (define (runs:loop-values tal reg reglen regfull reruns) (list (runs:queue-next-hed tal reg reglen regfull) ;; hed (runs:queue-next-tal tal reg reglen regfull) ;; tal (runs:queue-next-reg tal reg reglen regfull) ;; reg reruns)) ;; reruns (define runs:nothing-left-in-queue-count 0) ;; BB: for future reference - suspect target vars are not expanded to env vars at this point (item expansion using [items]\nwhatever [system echo $TARGETVAR] doesnt work right whereas [system echo #{targetvar}] does.. Tal and Randy have tix on this. on first pass, var not set, on second pass, ok. (define (runs:expand-items hed tal reg reruns regfull newtal jobgroup max-concurrent-jobs run-id waitons item-path testmode test-record can-run-more items runname tconfig reglen test-registry test-records itemmaps) (let* ((loop-list (list hed tal reg reruns)) (prereqs-not-met (let ((res (rmt:get-prereqs-not-met run-id waitons hed item-path mode: testmode itemmaps: itemmaps))) (if (list? res) res (begin (debug:print 0 default-log-port
︙			︙
632 633 634 635 636 637 638 ~~639 640 641 642~~ 643 644 645 646 647 648 649	((and (not (member 'toplevel testmode)) (member (hash-table-ref/default test-registry (db:test-make-full-name hed item-path) 'n/a) '(DONOTRUN removed CANNOTRUN))) ;; common:cant-run-states-sym) ;; '(COMPLETED KILLED WAIVED UNKNOWN INCOMPLETE)) ;; try to catch repeat processing of COMPLETED tests here (debug:print-info 1 default-log-port "Test " hed " set to \"" (hash-table-ref test-registry (db:test-make-full-name hed item-path)) "\". Removing it from the queue") (if (or (not (null? tal)) (not (null? reg))) ~~(list (runs:queue-next-hed tal reg reglen regfull)~~ (runs:que~~ue-next-tal~~ tal reg reglen regfull) ~~(runs:queue-next-reg tal reg reglen regfull)~~ ~~reruns)~~ (begin (debug:print-info 0 default-log-port "Nothing left in the queue!") ;; If get here twice then we know we've tried to expand all items ;; since there must be a logic issue with the handling of loops in the ;; items expand phase we will brute force an exit here. (if (> runs:nothing-left-in-queue-count 2) (begin	< \| < <	670 671 672 673 674 675 676 677 678 679 680 681 682 683 684	((and (not (member 'toplevel testmode)) (member (hash-table-ref/default test-registry (db:test-make-full-name hed item-path) 'n/a) '(DONOTRUN removed CANNOTRUN))) ;; common:cant-run-states-sym) ;; '(COMPLETED KILLED WAIVED UNKNOWN INCOMPLETE)) ;; try to catch repeat processing of COMPLETED tests here (debug:print-info 1 default-log-port "Test " hed " set to \"" (hash-table-ref test-registry (db:test-make-full-name hed item-path)) "\". Removing it from the queue") (if (or (not (null? tal)) (not (null? reg))) (runs:loop-values tal reg reglen regfull reruns) (begin (debug:print-info 0 default-log-port "Nothing left in the queue!") ;; If get here twice then we know we've tried to expand all items ;; since there must be a logic issue with the handling of loops in the ;; items expand phase we will brute force an exit here. (if (> runs:nothing-left-in-queue-count 2) (begin
︙			︙
708 709 710 711 712 713 714 ~~715 716 717 718~~ 719 720 ~~721~~ 722 723 724 725 726 ~~727~~ 728 ~~729~~ 730 731 732 733 734 735 736 ~~737 738 739 740~~ 741 742 743 744 745 746 747 748 749 750 751 752 ~~753~~ 754 755 756 ~~757 758 759 760~~ 761 762 763 764 765 766 767 768 769 ~~770 771 772 773~~ 774 775 776 777 778 779 780	(let ((test-id (rmt:get-test-id run-id hed ""))) (if test-id (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_DISCARDED" "Failed to run due to discarded prerequisites"))) (if (and (null? trimmed-tal) (null? trimmed-reg)) #f ~~(list (runs:queue-next-hed trimmed-tal trimmed-reg reglen regfull)~~ (runs:que~~ue-next-tal~~ trimmed-tal trimmed-reg reglen regfull) ~~(runs:queue-next-reg trimmed-tal trimmed-reg reglen regfull)~~ ~~reruns)~~)) (list (car newtal)(append (cdr newtal) reg) '() reruns)))) ~~((and (null? fails)~~ (null? prereq-fails) (null? non-completed)) (if (runs:can-keep-running? hed 20) (begin (runs:inc-cant-run-tests hed) ~~(debug:print-info 1 default-log-port "no fails in prerequisites for " hed " but also none running, keeping " hed " for now. Try count: " (hash-table-ref/default seen-cant-run-tests hed 0))~~ ;; getting here likely means the system is way overloaded, kill a full minute before continuing ~~(thread-sleep! 60)~~ ;; num-retries code was here ;; we use this opportunity to move contents of reg to tal (list (car newtal)(append (cdr newtal) reg) '() reruns)) ;; an issue with prereqs not yet met? (begin (debug:print-info 1 default-log-port "no fails in prerequisites for " hed " but nothing seen running in a while, dropping test " hed " from the run queue") (let ((test-id (rmt:get-test-id run-id hed ""))) (if test-id (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "TIMED_OUT" "Nothing seen running in a while."))) ~~~~(list (runs:queue-next-hed tal reg reglen regfull)~~ (runs:que~~ue-next-tal~~ tal reg reglen regfull) ~~(runs:queue-next-reg tal reg reglen regfull)~~ ~~reruns)~~)))~~ ((and (or (not (null? fails)) (not (null? prereq-fails))) (member 'normal testmode)) (debug:print-info 1 default-log-port "test " hed " (mode=" testmode ") has failed prerequisite(s); " (string-intersperse (map (lambda (t)(conc (db:test-get-testname t) ":" (db:test-get-state t)"/"(db:test-get-status t))) fails) ", ") ", removing it from to-do list") (let ((test-id (rmt:get-test-id run-id hed ""))) (if test-id (if (not (null? prereq-fails)) (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_DISCARDED" "Failed to run due to prior failed prerequisites") ~~(mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_FAIL" "Failed to run due to failed prerequisites"))))~~ (if (or (not (null? reg))(not (null? tal))) (begin (hash-table-set! test-registry hed 'CANNOTRUN) ~~~~(list (runs:queue-next-hed tal reg reglen regfull)~~ ~~(runs:queue-next-tal tal reg reglen regfull)~~ ~~(runs:queue-next-reg tal reg reglen regfull)~~ (cons hed reruns)))~~ #f)) ;; #f flags do not loop ((and (not (null? fails))(member 'toplevel testmode)) (if (or (not (null? reg))(not (null? tal))) (list (car newtal)(append (cdr newtal) reg) '() reruns) #f)) ((null? runnables) #f) ;; if we get here and non-completed is null then it is all over. (else (debug:print 0 default-log-port "WARNING: FAILS or incomplete tests maybe preventing completion of this run. Watch for issues with test " hed ", continuing for now") ~~;; (list (runs:queue-next-hed tal reg reglen regfull)~~ ~~;; (runs:queue-next-tal tal reg reglen regfull)~~ ~~;; (runs:queue-next-reg tal reg reglen regfull)~~ ~~;; reruns)~~ (list (car newtal)(cdr newtal) reg reruns))))) (define (runs:mixed-list-testname-and-testrec->list-of-strings inlst) (if (null? inlst) '() (map (lambda (t) (cond	< \| < \| \| \| \| < \| < \| > > \| < < < \| > < < < <	743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807	(let ((test-id (rmt:get-test-id run-id hed ""))) (if test-id (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_DISCARDED" "Failed to run due to discarded prerequisites"))) (if (and (null? trimmed-tal) (null? trimmed-reg)) #f (runs:loop-values trimmed-tal trimmed-reg reglen regfull reruns) )) (list (car newtal)(append (cdr newtal) reg) '() reruns)))) ((and (null? fails) ;; have not-started tests, but unable to run them. everything looks completed with no prospect of unsticking something that is stuck. we should mark hed as moribund and exit or continue if there are more tests to consider (null? prereq-fails) (null? non-completed)) (if (runs:can-keep-running? hed 20) (begin (runs:inc-cant-run-tests hed) (debug:print-info 0 default-log-port "no fails in prerequisites for " hed " but also none running, keeping " hed " for now. Try count: " (hash-table-ref/default seen-cant-run-tests hed 0)) ;; ;; getting here likely means the system is way overloaded, kill a full minute before continuing (thread-sleep! 60) ;; TODO: gate by normalized server load > 1.0 (maxload config thing) ;; num-retries code was here ;; we use this opportunity to move contents of reg to tal (list (car newtal)(append (cdr newtal) reg) '() reruns)) ;; an issue with prereqs not yet met? (begin (debug:print-info 1 default-log-port "no fails in prerequisites for " hed " but nothing seen running in a while, dropping test " hed " from the run queue") (let ((test-id (rmt:get-test-id run-id hed ""))) (if test-id (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "TIMED_OUT" "Nothing seen running in a while."))) (runs:loop-values tal reg reglen regfull reruns) ))) ((and (or (not (null? fails)) (not (null? prereq-fails))) (member 'normal testmode)) (debug:print-info 1 default-log-port "test " hed " (mode=" testmode ") has failed prerequisite(s); " (string-intersperse (map (lambda (t)(conc (db:test-get-testname t) ":" (db:test-get-state t)"/"(db:test-get-status t))) fails) ", ") ", removing it from to-do list") (let ((test-id (rmt:get-test-id run-id hed ""))) (if test-id (if (not (null? prereq-fails)) (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_DISCARDED" "Failed to run due to prior failed prerequisites") (begin (debug:print 4 default-log-port"BB> set PREQ_FAIL on "hed) (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_FAIL" "Failed to run due to failed prerequisites"))))) ;; BB: this works, btu equivalent for itemwait mode does not work. (if (or (not (null? reg))(not (null? tal))) (begin (hash-table-set! test-registry hed 'CANNOTRUN) (runs:loop-values tal reg reglen regfull (cons hed reruns)) ) #f)) ;; #f flags do not loop ((and (not (null? fails))(member 'toplevel testmode)) (if (or (not (null? reg))(not (null? tal))) (list (car newtal)(append (cdr newtal) reg) '() reruns) #f)) ((null? runnables) #f) ;; if we get here and non-completed is null then it is all over. (else (debug:print 0 default-log-port "WARNING: FAILS or incomplete tests maybe preventing completion of this run. Watch for issues with test " hed ", continuing for now") (list (car newtal)(cdr newtal) reg reruns))))) (define (runs:mixed-list-testname-and-testrec->list-of-strings inlst) (if (null? inlst) '() (map (lambda (t) (cond
︙			︙
790 791 792 793 794 795 796 797 798 799 800 801 802 803	(conc t)))) inlst))) ;; hed tal reg reruns reglen regfull test-record runname test-name item-path jobgroup max-concurrent-jobs run-id waitons item-path testmode test-patts required-tests test-registry registry-mutex flags keyvals run-info newtal all-tests-registry itemmaps) (define (runs:process-expanded-tests runsdat testdat) ;; unroll the contents of runsdat and testdat (due to ongoing refactoring). (let* ((hed (runs:testdat-hed testdat)) (tal (runs:testdat-tal testdat)) (reg (runs:testdat-reg testdat)) (reruns (runs:testdat-reruns testdat)) (test-name (runs:testdat-test-name testdat)) (item-path (runs:testdat-item-path testdat)) (jobgroup (runs:testdat-jobgroup testdat))	> > >	817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833	(conc t)))) inlst))) ;; hed tal reg reruns reglen regfull test-record runname test-name item-path jobgroup max-concurrent-jobs run-id waitons item-path testmode test-patts required-tests test-registry registry-mutex flags keyvals run-info newtal all-tests-registry itemmaps) (define (runs:process-expanded-tests runsdat testdat) ;; unroll the contents of runsdat and testdat (due to ongoing refactoring). (debug:print 2 default-log-port "runs:process-expanded-tests; testdat:" ) (debug:print 2 default-log-port (with-output-to-string (lambda () (pp (runs:testdat->alist testdat) )))) (let* ((hed (runs:testdat-hed testdat)) (tal (runs:testdat-tal testdat)) (reg (runs:testdat-reg testdat)) (reruns (runs:testdat-reruns testdat)) (test-name (runs:testdat-test-name testdat)) (item-path (runs:testdat-item-path testdat)) (jobgroup (runs:testdat-jobgroup testdat))
︙			︙
861 862 863 864 865 866 867 ~~868~~ 869 870 871 872 873 874 875 876 ~~877 878 879 880~~ 881 882 883 884 885 886 887	(runs:lownoise (conc "waiting on tests " prereqs-not-met hed) 60)) (debug:print-info 2 default-log-port "waiting on tests; " (string-intersperse (runs:mixed-list-testname-and-testrec->list-of-strings prereqs-not-met) ", "))) ;; Don't know at this time if the test have been launched at some time in the past ;; i.e. is this a re-launch? (debug:print-info 4 default-log-port "run-limits-info = " run-limits-info) ~~(cond~~ ;; Check item path against item-patts, ;; ((not (tests:match test-patts (tests:testqueue-get-testname test-record) item-path required: required-tests)) ;; This test/itempath is not to be run ;; else the run is stuck, temporarily or permanently ;; but should check if it is due to lack of resources vs. prerequisites (debug:print-info 1 default-log-port "Skipping " (tests:testqueue-get-testname test-record) " " item-path " as it doesn't match " test-patts) (if (or (not (null? tal))(not (null? reg))) ~~(list (runs:queue-next-hed tal reg reglen regfull)~~ (runs:que~~ue-next-tal~~ tal reg reglen regfull) ~~(runs:queue-next-reg tal reg reglen regfull)~~ ~~reruns)~~ #f)) ;; Register tests ;; ((not (hash-table-ref/default test-registry (db:test-make-full-name test-name item-path) #f)) (debug:print-info 4 default-log-port "Pre-registering test " test-name "/" item-path " to create placeholder" ) ;; always do firm registration now in v1.60 and greater ;; (eq? transport-type 'fs) ;; no point in parallel registration if use fs	\| < \| < <	891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914	(runs:lownoise (conc "waiting on tests " prereqs-not-met hed) 60)) (debug:print-info 2 default-log-port "waiting on tests; " (string-intersperse (runs:mixed-list-testname-and-testrec->list-of-strings prereqs-not-met) ", "))) ;; Don't know at this time if the test have been launched at some time in the past ;; i.e. is this a re-launch? (debug:print-info 4 default-log-port "run-limits-info = " run-limits-info) (cond ; cond 894- 1067 ;; Check item path against item-patts, ;; ((not (tests:match test-patts (tests:testqueue-get-testname test-record) item-path required: required-tests)) ;; This test/itempath is not to be run ;; else the run is stuck, temporarily or permanently ;; but should check if it is due to lack of resources vs. prerequisites (debug:print-info 1 default-log-port "Skipping " (tests:testqueue-get-testname test-record) " " item-path " as it doesn't match " test-patts) (if (or (not (null? tal))(not (null? reg))) (runs:loop-values tal reg reglen regfull reruns) #f)) ;; Register tests ;; ((not (hash-table-ref/default test-registry (db:test-make-full-name test-name item-path) #f)) (debug:print-info 4 default-log-port "Pre-registering test " test-name "/" item-path " to create placeholder" ) ;; always do firm registration now in v1.60 and greater ;; (eq? transport-type 'fs) ;; no point in parallel registration if use fs
︙			︙
898 899 900 901 902 903 904 ~~905~~ 906 907 908 909 910 911 912	(begin (rmt:register-test run-id test-name "") (if (rmt:get-test-id run-id test-name "") (hash-table-set! test-registry (db:test-make-full-name test-name "") 'done)))) (runs:shrink-can-run-more-tests-count runsdat) ;; DELAY TWEAKER (still needed?) (if (and (null? tal)(null? reg)) (list hed tal (append reg (list hed)) reruns) ~~(list (runs:queue-next-hed tal reg reglen regfull)~~ (runs:queue-next-tal tal reg reglen regfull) ;; NB// Here we are building reg as we register tests ;; if regfull we must pop the front item off reg (if regfull (append (cdr reg) (list hed)) (append reg (list hed))) reruns)))	\|	925 926 927 928 929 930 931 932 933 934 935 936 937 938 939	(begin (rmt:register-test run-id test-name "") (if (rmt:get-test-id run-id test-name "") (hash-table-set! test-registry (db:test-make-full-name test-name "") 'done)))) (runs:shrink-can-run-more-tests-count runsdat) ;; DELAY TWEAKER (still needed?) (if (and (null? tal)(null? reg)) (list hed tal (append reg (list hed)) reruns) (list (runs:queue-next-hed tal reg reglen regfull) ;; cannot replace with a call to runs:loop-values as the logic is different for reg (runs:queue-next-tal tal reg reglen regfull) ;; NB// Here we are building reg as we register tests ;; if regfull we must pop the front item off reg (if regfull (append (cdr reg) (list hed)) (append reg (list hed))) reruns)))
︙			︙
955 956 957 958 959 960 961 ~~962 963 964 965~~ 966 967 968 969 970 971 972	(run:test run-id run-info keyvals runname test-record flags #f test-registry all-tests-registry) (runs:incremental-print-results run-id) (hash-table-set! test-registry (db:test-make-full-name test-name item-path) 'running) (runs:shrink-can-run-more-tests-count runsdat) ;; DELAY TWEAKER (still needed?) ;; (thread-sleep! global-delta) (if (or (not (null? tal))(not (null? reg))) ~~(list (runs:queue-next-hed tal reg reglen regfull)~~ (runs:que~~ue-next-tal~~ tal reg reglen regfull) ~~(runs:queue-next-reg tal reg reglen regfull)~~ ~~reruns)~~ #f)) ;; must be we have unmet prerequisites ;; (else (debug:print 4 default-log-port "FAILS: " fails) ;; If one or more of the prereqs-not-met are FAIL then we can issue	< \| < <	982 983 984 985 986 987 988 989 990 991 992 993 994 995 996	(run:test run-id run-info keyvals runname test-record flags #f test-registry all-tests-registry) (runs:incremental-print-results run-id) (hash-table-set! test-registry (db:test-make-full-name test-name item-path) 'running) (runs:shrink-can-run-more-tests-count runsdat) ;; DELAY TWEAKER (still needed?) ;; (thread-sleep! global-delta) (if (or (not (null? tal))(not (null? reg))) (runs:loop-values tal reg reglen regfull reruns) #f)) ;; must be we have unmet prerequisites ;; (else (debug:print 4 default-log-port "FAILS: " fails) ;; If one or more of the prereqs-not-met are FAIL then we can issue
︙			︙
983 984 985 986 987 988 989 ~~990~~ 991 992 993 994 995 996 997 998 ~~999~~ 1000 ~~1001 1002 1003 1004 1005 1006~~ 1007 1008 1009 1010 1011 ~~1012 1013 1014 1015 1016~~ 1017 1018 1019 1020 1021 1022 1023 1024 1025 ~~1026 1027 1028 1029 1030 1031 1032 1033~~ 1034 1035 1036 1037 1038 1039 ~~1040~~ 1041 1042 ~~1043 1044 1045 1046~~ 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 ~~1060~~ 1061 1062 ~~1063 1064 1065 1066~~ 1067 1068 1069 1070 1071 1072 1073	;; couldn't run, take a breather (if (runs:lownoise "Waiting for more work to do..." 60) (debug:print-info 0 default-log-port "Waiting for more work to do...")) (thread-sleep! 1) (list (car newtal)(cdr newtal) reg reruns)) ;; the waiton is FAIL so no point in trying to run hed ever again (if (or (not (null? reg))(not (null? tal))) ~~(if (vector? hed)~~ (begin (debug:print 1 default-log-port "WARNING: Dropping test " test-name "/" item-path " from the launch list as it has prerequistes that are FAIL") (let ((test-id (rmt:get-test-id run-id hed ""))) (if test-id (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_FAIL" "Failed to run due to failed prerequisites"))) (runs:shrink-can-run-more-tests-count runsdat) ;; DELAY TWEAKER (still needed?) ;; (thread-sleep! global-delta) ;; This next is for the items ~~(mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "BLOCKED" #f)~~ (hash-table-set! test-registry (db:test-make-full-name test-name item-path) 'removed) ~~(list (runs:queue-next-hed tal reg reglen regfull)~~ (runs:que~~ue-next-tal~~ tal reg reglen regfull) ~~(runs:queue-next-reg tal reg reglen regfull)~~ ~~reruns ;; WAS: (cons hed reruns) ;; but that makes no sense?~~ )) (let ((nth-try (hash-table-ref/default test-registry hed 0))) (cond ((member "RUNNING" (map db:test-get-state prereqs-not-met)) (if (runs:lownoise (conc "possible RUNNING prerequistes " hed) 60) (debug:print 0 default-log-port "WARNING: test " hed " has possible RUNNING prerequisites, don't give up on it yet.")) (thread-sleep! 4) ~~(list (runs:queue-next-hed newtal reg reglen regfull)~~ (runs:que~~ue-next-tal new~~tal reg reglen regfull) ~~(runs:queue-next-reg newtal reg reglen regfull)~~ ~~reruns))~~ ((or (not nth-try) (and (number? nth-try) (< nth-try 10))) (hash-table-set! test-registry hed (if (number? nth-try) (+ nth-try 1) 0)) (if (runs:lownoise (conc "not removing test " hed) 60) (debug:print 1 default-log-port "WARNING: not removing test " hed " from queue although it may not be runnable due to FAILED prerequisites")) ;; may not have processed correctly. Could be a race condition in your test implementation? Dropping test " hed) ;; " as it has prerequistes that are FAIL. (NOTE: hed is not a vector)") (runs:shrink-can-run-more-tests-count runsdat) ;; DELAY TWEAKER (still needed?) ~~;; (list hed tal reg reruns)~~ ~~;; (list (car newtal)(cdr newtal) reg reruns)~~ ~~;; (hash-table-set! test-registry hed 'removed)~~ ~~(list (runs:queue-next-hed newtal reg reglen regfull)~~ (runs:que~~ue-next-tal~~ newtal reg reglen regfull) ~~(runs:queue-next-reg newtal reg reglen regfull)~~ ~~reruns))~~ ((symbol? nth-try) (if (eq? nth-try 'removed) ;; removed is removed - drop it NOW (if (null? tal) #f ;; yes, really (list (car tal)(cdr tal) reg reruns)) (begin (if (runs:lownoise (conc "FAILED prerequisites or other issue" hed) 60) ~~(debug:print 0 default-log-port "WARNING: test " hed " has FAILED prerequisites or other issue. Internal state " nth-try " will be overridden and we'll retry."))~~ (mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "KEEP_TRYING" #f) (hash-table-set! test-registry hed 0) ~~(list (runs:queue-next-hed newtal reg reglen regfull)~~ (runs:que~~ue-next-tal~~ newtal reg reglen regfull) ~~(runs:queue-next-reg newtal reg reglen regfull)~~ ~~reruns))))~~ (else (if (runs:lownoise (conc "FAILED prerequitests and we tried" hed) 60) (debug:print 0 default-log-port "WARNING: test " hed " has FAILED prerequitests and we've tried at least 10 times to run it. Giving up now.")) ;; (debug:print 0 default-log-port " prereqs: " prereqs-not-met) (hash-table-set! test-registry hed 'removed) (mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "TEN_STRIKES" #f) ;; I'm unclear on if this roll up is needed - it may be the root cause of the "all set to FAIL" bug. (rmt:set-state-status-and-roll-up-items run-id test-name item-path #f "FAIL" #f) ;; treat as FAIL (list (if (null? tal)(car newtal)(car tal)) tal reg reruns))))) ;; can't drop this - maybe running? Just keep trying ~~(let ((runable-tests (runs:runable-tests prereqs-not-met)))~~ (if (null? runable-tests) #f ;; I think we are truly done here ~~(list (runs:queue-next-hed newtal reg reglen regfull)~~ (runs:que~~ue-next-tal~~ newtal reg reglen regfull) ~~(runs:queue-next-reg newtal reg reglen regfull)~~ ~~reruns)))))))))~~ ;; scan a list of tests looking to see if any are potentially runnable ;; (define (runs:runable-tests tests) (filter (lambda (t) (if (not (vector? t)) t	\| > > > > \| > < \| < < < \| > < \| < < \| < < < < \| < < \| \| < \| < < \| < \| < <	1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084	;; couldn't run, take a breather (if (runs:lownoise "Waiting for more work to do..." 60) (debug:print-info 0 default-log-port "Waiting for more work to do...")) (thread-sleep! 1) (list (car newtal)(cdr newtal) reg reruns)) ;; the waiton is FAIL so no point in trying to run hed ever again (if (or (not (null? reg))(not (null? tal))) (if (or (vector? hed) (not (null? fails))) ;; BB: why do we need a vector? in my case, fails is populated (prereq failed), reg is not nul, and we really want to drop this one (begin (debug:print 1 default-log-port "WARNING: Dropping test " test-name "/" item-path " from the launch list as it has prerequistes that are FAIL") (let ((test-id (rmt:get-test-id run-id hed ""))) (if test-id (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_FAIL" "Failed to run due to failed prerequisites"))) (runs:shrink-can-run-more-tests-count runsdat) ;; DELAY TWEAKER (still needed?) ;; (thread-sleep! global-delta) ;; This next is for the items (if (not (null? fails)) ;;(mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "PREQ_FAIL" #f) (rmt:set-state-status-and-roll-up-items run-id test-name item-path "NOT_STARTED" "PREQ_FAIL" #f) ;;(mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "BLOCKED" #f) (rmt:set-state-status-and-roll-up-items run-id test-name item-path "NOT_STARTED" "BLOCKED" #f) ) (hash-table-set! test-registry (db:test-make-full-name test-name item-path) 'removed) (runs:loop-values tal reg reglen regfull reruns)) (let ((nth-try (hash-table-ref/default test-registry hed 0))) ;; hed not a vector... (debug:print 2 default-log-port "nth-try("hed")="nth-try) (cond ((member "RUNNING" (map db:test-get-state prereqs-not-met)) (if (runs:lownoise (conc "possible RUNNING prerequistes " hed) 60) (debug:print 0 default-log-port "WARNING: test " hed " has possible RUNNING prerequisites, don't give up on it yet.")) (thread-sleep! 4) (runs:loop-values tal reg reglen regfull reruns)) ((or (not nth-try) ;; BB: condition on subsequent tries, condition below fires on first try (and (number? nth-try) (< nth-try 10))) (hash-table-set! test-registry hed (if (number? nth-try) (+ nth-try 1) 0)) (if (runs:lownoise (conc "not removing test " hed) 60) (debug:print 1 default-log-port "WARNING: not removing test " hed " from queue although it may not be runnable due to FAILED prerequisites")) ;; may not have processed correctly. Could be a race condition in your test implementation? Dropping test " hed) ;; " as it has prerequistes that are FAIL. (NOTE: hed is not a vector)") (runs:shrink-can-run-more-tests-count runsdat) ;; DELAY TWEAKER (still needed?) (runs:loop-values newtal reg reglen regfull reruns)) ((symbol? nth-try) ;; BB: 'done matches here in one case where prereq itemwait failed. This is first "try" (if (eq? nth-try 'removed) ;; removed is removed - drop it NOW (if (null? tal) #f ;; yes, really (list (car tal)(cdr tal) reg reruns)) (begin (if (runs:lownoise (conc "FAILED prerequisites or other issue" hed) 60) (debug:print 0 default-log-port "WARNING: test " hed " has FAILED prerequisites or other issue. Internal state >" nth-try "< will be overridden and we'll retry.")) (mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "KEEP_TRYING" #f) (hash-table-set! test-registry hed 0) (runs:loop-values newtal reg reglen regfull)))) (else (if (runs:lownoise (conc "FAILED prerequitests and we tried" hed) 60) (debug:print 0 default-log-port "WARNING: test " hed " has FAILED prerequitests and we've tried at least 10 times to run it. Giving up now.")) ;; (debug:print 0 default-log-port " prereqs: " prereqs-not-met) (hash-table-set! test-registry hed 'removed) (mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "TEN_STRIKES" #f) ;; I'm unclear on if this roll up is needed - it may be the root cause of the "all set to FAIL" bug. (rmt:set-state-status-and-roll-up-items run-id test-name item-path #f "FAIL" #f) ;; treat as FAIL (list (if (null? tal)(car newtal)(car tal)) tal reg reruns))))) ;; can't drop this - maybe running? Just keep trying (let ((runable-tests (runs:runable-tests prereqs-not-met))) ;; SUSPICIOUS: Should look at more than just prereqs-not-met? (if (null? runable-tests) #f ;; I think we are truly done here (runs:loop-values newtal reg reglen regfull reruns))))))))) ;; scan a list of tests looking to see if any are potentially runnable ;; (define (runs:runable-tests tests) (filter (lambda (t) (if (not (vector? t)) t
︙			︙
1163 1164 1165 1166 1167 1168 1169 ~~1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209~~ 1210 1211 1212 1213 1214 1215 1216	(debug:print 5 default-log-port "test-records: " test-records ", flags: " (hash-table->alist flags)) ;; Do mark-and-find clean up of db before starting runing of quue ;; ;; (rmt:find-and-mark-incomplete) (let* ((run-info (rmt:get-run-info run-id)) (tests-info (mt:get-tests-for-run run-id #f '() '())) ;; qryvals: "id,testname,item_path")) (sorted-test-names (tests:sort-by-priority-and-waiton test-records)) (test-registry (make-hash-table)) (registry-mutex (make-mutex)) (num-retries 0) (max-retries (config-lookup configdat "setup" "maxretries")) (max-concurrent-jobs (configf:lookup-number configdat "setup" "max_concurrent_jobs" default: 50)) (reglen (if (number? reglen-in) reglen-in 1)) (last-time-incomplete (- (current-seconds) 900)) ;; force at least one clean up cycle (last-time-some-running (current-seconds)) ;; (tdbdat (tasks:open-db)) (runsdat (make-runs:dat ;; hed: hed ;; tal: tal ;; reg: reg ;; reruns: reruns reglen: reglen regfull: #f ;; regfull ;; test-record: test-record runname: runname ;; test-name: test-name ;; item-path: item-path ;; jobgroup: jobgroup max-concurrent-jobs: max-concurrent-jobs run-id: run-id ;; waitons: waitons ;; testmode: testmode test-patts: test-patts required-tests: required-tests test-registry: test-registry registry-mutex: registry-mutex flags: flags keyvals: keyvals run-info: run-info ;; newtal: newtal all-tests-registry: all-tests-registry ;; itemmaps: itemmaps ;; prereqs-not-met: (rmt:get-prereqs-not-met run-id waitons hed item-path mode: testmode itemmaps: itemmaps) ;; can-run-more-tests: (runs:can-run-more-tests run-id jobgroup max-concurrent-jobs) ;; look at the test jobgroup and tot jobs running ))) ;; Initialize the test-registery hash with tests that already have a record ;; convert state to symbol and use that as the hash value (for-each (lambda (trec) (let ((id (db:test-get-id trec)) (tn (db:test-get-testname trec)) (ip (db:test-get-item-path trec))	\| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \|	1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227	(debug:print 5 default-log-port "test-records: " test-records ", flags: " (hash-table->alist flags)) ;; Do mark-and-find clean up of db before starting runing of quue ;; ;; (rmt:find-and-mark-incomplete) (let* ((run-info (rmt:get-run-info run-id)) (tests-info (mt:get-tests-for-run run-id #f '() '())) ;; qryvals: "id,testname,item_path")) (sorted-test-names (tests:sort-by-priority-and-waiton test-records)) (test-registry (make-hash-table)) (registry-mutex (make-mutex)) (num-retries 0) (max-retries (config-lookup configdat "setup" "maxretries")) (max-concurrent-jobs (configf:lookup-number configdat "setup" "max_concurrent_jobs" default: 50)) (reglen (if (number? reglen-in) reglen-in 1)) (last-time-incomplete (- (current-seconds) 900)) ;; force at least one clean up cycle (last-time-some-running (current-seconds)) ;; (tdbdat (tasks:open-db)) (runsdat (make-runs:dat ;; hed: hed ;; tal: tal ;; reg: reg ;; reruns: reruns reglen: reglen regfull: #f ;; regfull ;; test-record: test-record runname: runname ;; test-name: test-name ;; item-path: item-path ;; jobgroup: jobgroup max-concurrent-jobs: max-concurrent-jobs run-id: run-id ;; waitons: waitons ;; testmode: testmode test-patts: test-patts required-tests: required-tests test-registry: test-registry registry-mutex: registry-mutex flags: flags keyvals: keyvals run-info: run-info ;; newtal: newtal all-tests-registry: all-tests-registry ;; itemmaps: itemmaps ;; prereqs-not-met: (rmt:get-prereqs-not-met run-id waitons hed item-path mode: testmode itemmaps: itemmaps) ;; can-run-more-tests: (runs:can-run-more-tests run-id jobgroup max-concurrent-jobs) ;; look at the test jobgroup and tot jobs running ))) ;; Initialize the test-registery hash with tests that already have a record ;; convert state to symbol and use that as the hash value (for-each (lambda (trec) (let ((id (db:test-get-id trec)) (tn (db:test-get-testname trec)) (ip (db:test-get-item-path trec))
︙			︙
1268 1269 1270 1271 1272 1273 1274 1275 ~~1276 1277 1278 1279 1280 1281~~ 1282 1283 1284 1285 ~~1286 1287~~ 1288 1289 1290 1291 1292 1293 1294	testmode: testmode newtal: newtal itemmaps: itemmaps ;; prereqs-not-met: prereqs-not-met ))) (runs:dat-regfull-set! runsdat regfull) ;; every 15 minutes verify the server is there for this run (if (and (common:low-noise-print 240 "try start server" run-id) (not (or (and runremote (remote-server-url runremote) (server:ping (remote-server-url runremote))) (server:check-if-running toppath)))) (server:kind-run toppath)) (if (> num-running 0) (set! last-time-some-running (current-seconds))) (if (> (current-seconds)(+ last-time-some-running (or (configf:lookup configdat "setup" "give-up-waiting") 36000))) (hash-table-set! max-tries-hash tfullname (+ (hash-table-ref/default max-tries-hash tfullname 0) 1))) ;; (debug:print 0 default-log-port "max-tries-hash: " (hash-table->alist max-tries-hash)) ;; Ensure all top level tests get registered. This way they show up as "NOT_STARTED" on the dashboard ;; and it is clear they should have run but did not. (if (not (hash-table-ref/default test-registry (db:test-make-full-name test-name "") #f)) (begin (rmt:register-test run-id test-name "")	> \| \| \| \| \| \| \| \|	1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306	testmode: testmode newtal: newtal itemmaps: itemmaps ;; prereqs-not-met: prereqs-not-met ))) (runs:dat-regfull-set! runsdat regfull) ;; -- removed BB 17ww28 - no longer needed. ;; every 15 minutes verify the server is there for this run ;; (if (and (common:low-noise-print 240 "try start server" run-id) ;; (not (or (and runremote ;; (remote-server-url runremote) ;; (server:ping (remote-server-url runremote))) ;; (server:check-if-running toppath)))) ;; (server:kind-run toppath)) (if (> num-running 0) (set! last-time-some-running (current-seconds))) (if (> (current-seconds)(+ last-time-some-running (or (configf:lookup configdat "setup" "give-up-waiting") 36000))) (hash-table-set! max-tries-hash tfullname (+ (hash-table-ref/default max-tries-hash tfullname 0) 1))) ;; (debug:print 0 default-log-port "max-tries-hash: " (hash-table->alist max-tries-hash)) ;; Ensure all top level tests get registered. This way they show up as "NOT_STARTED" on the dashboard ;; and it is clear they should have run but did not. (if (not (hash-table-ref/default test-registry (db:test-make-full-name test-name "") #f)) (begin (rmt:register-test run-id test-name "")
︙			︙
1406 1407 1408 1409 1410 1411 1412 ~~1413~~ 1414 1415 1416 1417 1418 1419 1420	(if (null? tal) #f (loop (car tal)(cdr tal) reg reruns))) ;; if items is a proc then need to run items:get-items-from-config, get the list and loop ;; - but only do that if resources exist to kick off the job ;; EXPAND ITEMS ~~((or (procedure? items)(eq? items 'have-procedure))~~ (let ((can-run-more (runs:can-run-more-tests runsdat run-id jobgroup max-concurrent-jobs))) (if (and (list? can-run-more) (car can-run-more)) (let ((loop-list (runs:expand-items hed tal reg reruns regfull newtal jobgroup max-concurrent-jobs run-id waitons item-path testmode test-record can-run-more items runname tconfig reglen test-registry test-records itemmaps))) (if loop-list (apply loop loop-list))) ;; if can't run more just loop with next possible test	\|	1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432	(if (null? tal) #f (loop (car tal)(cdr tal) reg reruns))) ;; if items is a proc then need to run items:get-items-from-config, get the list and loop ;; - but only do that if resources exist to kick off the job ;; EXPAND ITEMS ((or (procedure? items)(eq? items 'have-procedure)) ;; BB - target vars are env vars here? to allow expansion of [items]\nsomething [system echo $SOMETARGVAR], which is wonky (let ((can-run-more (runs:can-run-more-tests runsdat run-id jobgroup max-concurrent-jobs))) (if (and (list? can-run-more) (car can-run-more)) (let ((loop-list (runs:expand-items hed tal reg reruns regfull newtal jobgroup max-concurrent-jobs run-id waitons item-path testmode test-record can-run-more items runname tconfig reglen test-registry test-records itemmaps))) (if loop-list (apply loop loop-list))) ;; if can't run more just loop with next possible test
︙			︙
1438 1439 1440 1441 1442 1443 1444 ~~1445~~ 1446 1447 1448 1449 1450 1451 1452	((not (null? tal)) (debug:print-info 4 default-log-port "I'm pretty sure I shouldn't get here.")) ((not (null? reg)) ;; could we get here with leftovers? (debug:print-info 0 default-log-port "Have leftovers!") (loop (car reg)(cdr reg) '() reruns)) (else (debug:print-info 4 default-log-port "Exiting loop with...\n hed=" hed "\n tal=" tal "\n reruns=" reruns)) ~~)))~~ ;; now if -run-wait we wait for all tests to be done ;; Now wait for any RUNNING tests to complete (if in run-wait mode) (thread-sleep! 5) ;; I think there is a race condition here. Let states/statuses settle (let wait-loop ((num-running (rmt:get-count-tests-running-for-run-id run-id)) (prev-num-running 0)) ;; (debug:print-info 13 default-log-port "num-running=" num-running ", prev-num-running=" prev-num-running) (if (and (or (args:get-arg "-run-wait")	> \|	1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465	((not (null? tal)) (debug:print-info 4 default-log-port "I'm pretty sure I shouldn't get here.")) ((not (null? reg)) ;; could we get here with leftovers? (debug:print-info 0 default-log-port "Have leftovers!") (loop (car reg)(cdr reg) '() reruns)) (else (debug:print-info 4 default-log-port "Exiting loop with...\n hed=" hed "\n tal=" tal "\n reruns=" reruns)) ))) ;; end loop on sorted test names ;; now if -run-wait we wait for all tests to be done ;; Now wait for any RUNNING tests to complete (if in run-wait mode) (thread-sleep! 5) ;; I think there is a race condition here. Let states/statuses settle (let wait-loop ((num-running (rmt:get-count-tests-running-for-run-id run-id)) (prev-num-running 0)) ;; (debug:print-info 13 default-log-port "num-running=" num-running ", prev-num-running=" prev-num-running) (if (and (or (args:get-arg "-run-wait")
︙			︙
1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476	(if (not (eq? num-running prev-num-running)) (debug:print-info 0 default-log-port "run-wait specified, waiting on " num-running " tests in RUNNING, REMOTEHOSTSTART or LAUNCHED state at " (time->string (seconds->local-time (current-seconds))))) (thread-sleep! 5) ;; (wait-loop (rmt:get-count-tests-running-for-run-id run-id) num-running)))) (wait-loop (rmt:get-count-tests-running-for-run-id run-id) num-running)))) ;; LET* ((test-record ;; we get here on "drop through". All done! (debug:print-info 1 default-log-port "All tests launched"))) (define (runs:calc-fails prereqs-not-met) (filter (lambda (test) (and (vector? test) ;; not (string? test)) (member (db:test-get-state test) '("INCOMPLETE" "COMPLETED")) (not (member (db:test-get-status test)	>	1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490	(if (not (eq? num-running prev-num-running)) (debug:print-info 0 default-log-port "run-wait specified, waiting on " num-running " tests in RUNNING, REMOTEHOSTSTART or LAUNCHED state at " (time->string (seconds->local-time (current-seconds))))) (thread-sleep! 5) ;; (wait-loop (rmt:get-count-tests-running-for-run-id run-id) num-running)))) (wait-loop (rmt:get-count-tests-running-for-run-id run-id) num-running)))) ;; LET* ((test-record ;; we get here on "drop through". All done! (runs:run-post-hook run-id) (debug:print-info 1 default-log-port "All tests launched"))) (define (runs:calc-fails prereqs-not-met) (filter (lambda (test) (and (vector? test) ;; not (string? test)) (member (db:test-get-state test) '("INCOMPLETE" "COMPLETED")) (not (member (db:test-get-status test)
︙			︙
1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 ~~1753~~ 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 ~~1765~~ 1766 1767 1768 1769 1770 1771 1772	(if (not (string-search (regexp "testdat.db") f)) (runs:recursive-delete-with-error-msg fullname))) (+ 1 x)) 0 real-dir) ;; then the entire directory (runs:recursive-delete-with-error-msg real-dir)) ;; Remove runs ;; fields are passing in through ;; action: ;; 'remove-runs ;; 'set-state-status ;; ;; NB// should pass in keys? ;; ~~(define (runs:operate-on action target runnamepatt testpatt #!key (state #f)(status #f)(new-state-status #f)(mode ~~'remove-all~~)(options '()))~~ (common:clear-caches) ;; clear all caches (let* ((db #f) ;; (tdbdat (tasks:open-db)) (keys (rmt:get-keys)) (rundat (mt:get-runs-by-patt keys runnamepatt target)) (header (vector-ref rundat 0)) (runs (vector-ref rundat 1)) (states (if state (string-split state ",") '())) (statuses (if status (string-split status ",") '())) (state-status (if (string? new-state-status) (string-split new-state-status ",") '(#f #f))) (rp-mutex (make-mutex)) ~~(bup-mutex (make-mutex)))~~ (let* ((write-access-actions '(remove-runs set-state-status archive run-wait)) (dbfile (conc toppath "/megatest.db")) (readonly-mode (not (file-write-access? dbfile)))) (when (and readonly-mode (member action write-access-actions)) (debug:print-error 0 default-log-port "megatest.db is readonly. Cannot proceed with action ["action"] in which write-access isrequired .")	> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > \| \| >	1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865	(if (not (string-search (regexp "testdat.db") f)) (runs:recursive-delete-with-error-msg fullname))) (+ 1 x)) 0 real-dir) ;; then the entire directory (runs:recursive-delete-with-error-msg real-dir)) ;; cleanup often needs to remove all but the last N runs per target ;; ;; target-patts a1/b1/c1,a2/b2/c2 ... ;; ;; This will fail if called with empty target or a bad target (i.e. missing or extra fields) ;; (define (runs:get-hash-by-target target-patts runpatt) (let* ((targets (string-split target-patts ",")) (keys (rmt:get-keys)) (res-ht (make-hash-table))) ;; target -> ( runrecord1 runrecord2 ... ) (for-each (lambda (target-patt) (let ((runs (rmt:simple-get-runs runpatt #f #f target-patt))) (for-each (lambda (run) (let ((target (simple-run-target run))) (hash-table-set! res-ht target (cons run (hash-table-ref/default res-ht target '()))))) runs))) targets) res-ht)) ;; delete runs older than X (weeks, days, months years etc.) ;; delete redundant runs within a target - N is the input ;; delete redundant runs within a target IFF older than given date/time AND keep at least N ;; (define (runs:remove-all-but-last-n-runs-per-target target-patts runpatt num-to-keep #!key (actions '(print))) (let* ((runs-ht (runs:get-hash-by-target target-patts runpatt)) (age (if (args:get-arg "-age")(common:hms-string->seconds (args:get-arg "-age")) #f)) (age-mark (if age (- (current-seconds) age) (+ (current-seconds) 86400))) (precmd (or (args:get-arg "-precmd") ""))) (print "Actions: " actions) (for-each (lambda (target) (let* ((runs (hash-table-ref runs-ht target)) (sorted (sort runs (lambda (a b)(> (simple-run-event_time a)(simple-run-event_time b))))) (to-remove (let* ((len (length sorted)) (trim-amt (- len num-to-keep))) (if (> trim-amt 0) (take sorted trim-amt) '())))) (hash-table-set! runs-ht target to-remove) (print target ":") (for-each (lambda (run) (let ((remove (member run to-remove (lambda (a b) (eq? (simple-run-id a) (simple-run-id b)))))) (if (and age (> (simple-run-event_time run) age-mark)) (print "Skipping handling of " target "/" (simple-run-runname run) " as it is younger than " (args:get-arg "-age")) (for-each (lambda (action) (case action ((print) (print " " (simple-run-runname run) " " (time->string (seconds->local-time (simple-run-event_time run)) "WW%V.%u %H:%M:%S") " " (if remove "REMOVE" ""))) ((remove-runs) (if remove (system (conc precmd " megatest -remove-runs -target " target " -runname " (simple-run-runname run) " -testpatt %")))) ((archive) (if remove (system (conc precmd " megatest -archive save-remove -target " target " -runname " (simple-run-runname run) " -testpatt %")))))) actions)))) sorted))) ;; (print "Sorted: " (map simple-run-event_time sorted)) ;; (print "Remove: " (map simple-run-event_time to-remove)))) (hash-table-keys runs-ht)) runs-ht)) ;; (define (runs:remove-all-but-last-n-runs-per-target target-patts runpatt num-to-keep) ;; (let ((data (runs:get-all-but-most-recent-n-per-target target-patts runpatt num-to-keep))) ;; (for-each ;; (lambda (target) ;; (let ((runs-to-remove (hash-table-ref data target ))) ;; (for-each ;; (lambda (run) ;; (print "megatest -remove-runs -target " target " -runname " (simple-run-runname run) " -testpatt %")) ;; runs-to-remove))) ;; (hash-table-keys data)))) ;; Remove runs ;; fields are passing in through ;; action: ;; 'remove-runs ;; 'set-state-status ;; ;; NB// should pass in keys? ;; (define (runs:operate-on action target runnamepatt testpatt #!key (state #f)(status #f)(new-state-status #f)(mode #f)(options '())) (common:clear-caches) ;; clear all caches (let* ((db #f) ;; (tdbdat (tasks:open-db)) (keys (rmt:get-keys)) (rundat (mt:get-runs-by-patt keys runnamepatt target)) (header (vector-ref rundat 0)) (runs (vector-ref rundat 1)) (states (if state (string-split state ",") '())) (statuses (if status (string-split status ",") '())) (state-status (if (string? new-state-status) (string-split new-state-status ",") '(#f #f))) (rp-mutex (make-mutex)) (bup-mutex (make-mutex)) (keep-records (args:get-arg "-keep-records"))) ;; used in conjunction with -remove-runs to keep the records, TODO: consolidate this with "mode". (let* ((write-access-actions '(remove-runs set-state-status archive run-wait)) (dbfile (conc toppath "/megatest.db")) (readonly-mode (not (file-write-access? dbfile)))) (when (and readonly-mode (member action write-access-actions)) (debug:print-error 0 default-log-port "megatest.db is readonly. Cannot proceed with action ["action"] in which write-access isrequired .")
︙			︙
1934 1935 1936 1937 1938 1939 1940 ~~1941 1942 1943~~ 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 ~~1961~~ ~~1962~~ 1963 1964 1965 1966 1967 1968 1969	(remtests (mt:get-tests-for-run run-id #f '("DELETED") '("n/a") not-in: #t))) (if (null? remtests) ;; no more tests remaining (let* ((dparts (string-split lasttpath "/")) (runpath (conc "/" (string-intersperse (take dparts (- (length dparts) 1)) "/")))) (debug:print 1 default-log-port "Removing run: " runkey " " (db:get-value-by-header run header "runname") " and related record") ~~(rmt:delete-run run-id) (rmt:delete-old-deleted-test-records) ;; (rmt:set-var "DELETED_TESTS" (current-seconds))~~ ;; need to figure out the path to the run dir and remove it if empty ;; (if (null? (glob (conc runpath "/"))) ;; (begin ;; (debug:print 1 default-log-port* "Removing run dir " runpath) ;; (system (conc "rmdir -p " runpath)))) ))))) )) runs) ;; (sqlite3:finalize! (db:delay-if-busy tdbdat)) ) #t) (define (runs:remove-test-directory test mode) ;; remove-data-only) (let* ((run-dir (db:test-get-rundir test)) ;; run dir is from the link tree (real-dir (if (common:file-exists? run-dir) ;; (resolve-pathname run-dir) (common:nice-path run-dir) ~~#f)))~~ ~~(case mode~~ ((remove-data-only)(mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) "CLEANING" "LOCKED" #f)) ((remove-all) (mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) "REMOVING" "LOCKED" #f)) ((archive-remove) (mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) "ARCHIVE_REMOVING" #f #f))) (debug:print-info 1 default-log-port "Attempting to remove " (if real-dir (conc " dir " real-dir " and ") "") " link " run-dir) (if (and real-dir (> (string-length real-dir) 5) (common:file-exists? real-dir)) ;; bad heuristic but should prevent /tmp /home etc.	> > \| \| \| \| > > > > > > > > > > > > > > > > \|	2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080	(remtests (mt:get-tests-for-run run-id #f '("DELETED") '("n/a") not-in: #t))) (if (null? remtests) ;; no more tests remaining (let* ((dparts (string-split lasttpath "/")) (runpath (conc "/" (string-intersperse (take dparts (- (length dparts) 1)) "/")))) (debug:print 1 default-log-port "Removing run: " runkey " " (db:get-value-by-header run header "runname") " and related record") (if (not keep-records) (begin (rmt:delete-run run-id) (rmt:delete-old-deleted-test-records))) ;; (rmt:set-var "DELETED_TESTS" (current-seconds)) ;; need to figure out the path to the run dir and remove it if empty ;; (if (null? (glob (conc runpath "/"))) ;; (begin ;; (debug:print 1 default-log-port* "Removing run dir " runpath) ;; (system (conc "rmdir -p " runpath)))) ))))) )) runs) ;; (sqlite3:finalize! (db:delay-if-busy tdbdat)) ) #t) (define (runs:remove-test-directory test mode) ;; remove-data-only) (let* ((run-dir (db:test-get-rundir test)) ;; run dir is from the link tree (real-dir (if (common:file-exists? run-dir) ;; (resolve-pathname run-dir) (common:nice-path run-dir) #f)) (clean-mode (or mode 'remove-all)) (test-id (db:test-get-id test)) ;; (lock-key (conc "test-" test-id)) ;; (got-lock (let loop ((lock (rmt:no-sync-get-lock lock-key)) ;; (expire-time (+ (current-seconds) 30))) ;; give up on getting the lock and steal it after 15 seconds ;; (if (car lock) ;; #t ;; (if (> (current-seconds) expire-time) ;; (begin ;; (debug:print-info 0 default-log-port "Timed out waiting for a lock to clean test with id " test-id) ;; (rmt:no-sync-del! lock-key) ;; destroy the lock ;; (loop (rmt:no-sync-get-lock lock-key) expire-time)) ;; ;; (begin ;; (thread-sleep! 1) ;; (loop (rmt:no-sync-get-lock lock-key) expire-time))))))) ) (case clean-mode ((remove-data-only)(mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) "CLEANING" "LOCKED" #f)) ((remove-all) (mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) "REMOVING" "LOCKED" #f)) ((archive-remove) (mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) "ARCHIVE_REMOVING" #f #f))) (debug:print-info 1 default-log-port "Attempting to remove " (if real-dir (conc " dir " real-dir " and ") "") " link " run-dir) (if (and real-dir (> (string-length real-dir) 5) (common:file-exists? real-dir)) ;; bad heuristic but should prevent /tmp /home etc.
︙			︙
1991 1992 1993 1994 1995 1996 1997 ~~1998 1999~~ 2000 ~~2001~~ 2002 2003 2004 2005 2006 2007 2008	(delete-directory run-dir))) (if (and run-dir (not (member run-dir (list "n/a" "/tmp/badname")))) (debug:print 0 default-log-port "WARNING: not removing " run-dir " as it either doesn't exist or is not a symlink") (debug:print 0 default-log-port "NOTE: the run dir for this test is undefined. Test may have already been deleted.")) )) ;; Only delete the records after removing the directory. If things fail we have a record ~~(case mode ((remove-data-only)(mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) ~~"NOT_STARTED"~~ ~~"n/a"~~ #f))~~ ((archive-remove) (mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) "ARCHIVED" #f #f)) ~~(else (rmt:delete-test-records (db:test-get-run_id test) (db:test-get-id test))))))~~ ;;====================================================================== ;; Routines for manipulating runs ;;====================================================================== ;; Since many calls to a run require pretty much the same setup ;; this wrapper is used to reduce the replication of code	\| \| \| > >	2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121	(delete-directory run-dir))) (if (and run-dir (not (member run-dir (list "n/a" "/tmp/badname")))) (debug:print 0 default-log-port "WARNING: not removing " run-dir " as it either doesn't exist or is not a symlink") (debug:print 0 default-log-port "NOTE: the run dir for this test is undefined. Test may have already been deleted.")) )) ;; Only delete the records after removing the directory. If things fail we have a record (case clean-mode ((remove-data-only)(mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) (db:test-get-state test)(db:test-get-status test) #f)) ((archive-remove) (mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) "ARCHIVED" #f #f)) (else (rmt:delete-test-records (db:test-get-run_id test) (db:test-get-id test)))) ;; (rmt:no-sync-del! lock-key) )) ;;====================================================================== ;; Routines for manipulating runs ;;====================================================================== ;; Since many calls to a run require pretty much the same setup ;; this wrapper is used to reduce the replication of code