Megatest

Check-in [3c92e0ef5f]
Login
Overview
Comment:Partial work on fixing rerun From: b5b72d675da2eba5c01850ea653e0451706a04c2 User: mrwellan
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | v1.65-rerun-fixes-cm1
Files: files | file ages | folders
SHA1: 3c92e0ef5f7ba541c5498f6f2012722389a4b221
User & Date: matt on 2021-02-26 07:37:44
Other Links: branch diff | manifest | tags
Context
2021-02-26
07:37
Missed commit - message on how to turn on profiling From: 4b3e88463bfcce6890022adb304df18446e9e5e5 User: matt check-in: 0312a7dc52 user: matt tags: v1.65-rerun-fixes-cm1 (unpublished)
07:37
Partial work on fixing rerun From: b5b72d675da2eba5c01850ea653e0451706a04c2 User: mrwellan check-in: 3c92e0ef5f user: matt tags: v1.65-rerun-fixes-cm1 (unpublished)
2021-02-25
15:46
Missing dep. check-in: 80a01976f7 user: matt tags: v1.65-real
Changes

Modified runs.scm from [2583922f1c] to [c46f8d2ea8].

498
499
500
501
502
503
504


505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
;;======================================================================
;; runs:run-tests is called from megatest.scm and itself
;;======================================================================
;;
;;  test-names: Comma separated patterns same as test-patts but used in selection 
;;              of tests to run. The item portions are not respected.
;;              FIXME: error out if /patt specified


;;            
(define (runs:run-tests target runname test-patts user flags #!key (run-count 1)) ;; test-names
  (let* ((keys               (keys:config-get-fields *configdat*))
	 (keyvals            (keys:target->keyval keys target))
	 (run-id             (rmt:register-run keyvals runname "new" "n/a" user (args:get-arg "-contour")))  ;;  test-name)))
	 ;; (deferred          '()) ;; delay running these since they have a waiton clause
	 (runconfigf         (conc  *toppath* "/runconfigs.config"))
         (dbfile             (conc  *toppath* "/megatest.db"))
         (readonly-mode      (not (file-write-access? dbfile)))
	 (test-records       (make-hash-table))
	 ;; need to process runconfigs before generating these lists
	 (all-tests-registry #f)  ;; (tests:get-all)) ;; (tests:get-valid-tests (make-hash-table) test-search-path)) ;; all valid tests to check waiton names
	 (all-test-names     #f)  ;; (hash-table-keys all-tests-registry))
	 (test-names         #f)  ;; Generated by a call to (tests:filter-test-names all-test-names test-patts))
	 (required-tests     #f)  ;; Put fully qualified test/testpath names in this list to be done
         (waitors-upon       (make-hash-table)) ;; given a test, return list of tests waiting upon this test.
	 (task-key           (conc (hash-table->alist flags) " " (get-host-name) " " (current-process-id)))
	 ;; (tdbdat             (tasks:open-db))
	 (config-reruns      (let ((x (configf:lookup *configdat* "setup" "reruns")))
			       (if x (string->number x) #f)))
	 (allowed-tests      #f)
	 (runconf            #f))

    ;; check if readonly
    (when readonly-mode
      (debug:print-error 0 *default-log-port* "megatest.db is readonly.  Cannot proceed.")







>
>


















|







498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
;;======================================================================
;; runs:run-tests is called from megatest.scm and itself
;;======================================================================
;;
;;  test-names: Comma separated patterns same as test-patts but used in selection 
;;              of tests to run. The item portions are not respected.
;;              FIXME: error out if /patt specified
;;
;; this calls itself with run-count incremented up to the [setup]->runqueue number or 5
;;            
(define (runs:run-tests target runname test-patts user flags #!key (run-count 1)) ;; test-names
  (let* ((keys               (keys:config-get-fields *configdat*))
	 (keyvals            (keys:target->keyval keys target))
	 (run-id             (rmt:register-run keyvals runname "new" "n/a" user (args:get-arg "-contour")))  ;;  test-name)))
	 ;; (deferred          '()) ;; delay running these since they have a waiton clause
	 (runconfigf         (conc  *toppath* "/runconfigs.config"))
         (dbfile             (conc  *toppath* "/megatest.db"))
         (readonly-mode      (not (file-write-access? dbfile)))
	 (test-records       (make-hash-table))
	 ;; need to process runconfigs before generating these lists
	 (all-tests-registry #f)  ;; (tests:get-all)) ;; (tests:get-valid-tests (make-hash-table) test-search-path)) ;; all valid tests to check waiton names
	 (all-test-names     #f)  ;; (hash-table-keys all-tests-registry))
	 (test-names         #f)  ;; Generated by a call to (tests:filter-test-names all-test-names test-patts))
	 (required-tests     #f)  ;; Put fully qualified test/testpath names in this list to be done
         (waitors-upon       (make-hash-table)) ;; given a test, return list of tests waiting upon this test.
	 (task-key           (conc (hash-table->alist flags) " " (get-host-name) " " (current-process-id)))
	 ;; (tdbdat             (tasks:open-db))
	 (config-reruns      (let ((x (configf:lookup *configdat* "setup" "reruns"))) ;; run tests up to this many times if status is in -rerun list or [setup]->allow-auto-rerun list
			       (if x (string->number x) #f)))
	 (allowed-tests      #f)
	 (runconf            #f))

    ;; check if readonly
    (when readonly-mode
      (debug:print-error 0 *default-log-port* "megatest.db is readonly.  Cannot proceed.")
819
820
821
822
823
824
825
826
827

828
829
830
831
832
833
834
	    (thread-join! th2)
	    ;; if run-count > 0 call, set -preclean and -rerun STUCK/DEAD
	    (if (> run-count 0) ;; handle reruns
		(begin
		  (if (not (hash-table-ref/default flags "-preclean" #f))
		      (hash-table-set! flags "-preclean" #t))
		  (if (not (hash-table-ref/default flags "-rerun" #f))
		      (hash-table-set! flags "-rerun" "ABORT,STUCK/DEAD,n/a,ZERO_ITEMS"))
		  ;; recursive call to self

      (runs:run-tests target runname test-patts user flags run-count: (- run-count 1)))
                  (launch:end-of-run-check run-id)))
	  (debug:print-info 0 *default-log-port* "No tests to run")))
    (debug:print-info 4 *default-log-port* "All done by here")
    ;; TODO: try putting post hook call here
      
    ;  (debug:print-info 2 *default-log-port* " run-count " run-count)







|

>







821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
	    (thread-join! th2)
	    ;; if run-count > 0 call, set -preclean and -rerun STUCK/DEAD
	    (if (> run-count 0) ;; handle reruns
		(begin
		  (if (not (hash-table-ref/default flags "-preclean" #f))
		      (hash-table-set! flags "-preclean" #t))
		  (if (not (hash-table-ref/default flags "-rerun" #f))
		      (hash-table-set! flags "-rerun" "ABORT,DEAD,STUCK/DEAD,n/a,ZERO_ITEMS"))
		  ;; recursive call to self
		  (debug:print-info 0 *default-log-port* "Re-running tests with status " (hash-table-ref/default flags "-rerun" ""))
      (runs:run-tests target runname test-patts user flags run-count: (- run-count 1)))
                  (launch:end-of-run-check run-id)))
	  (debug:print-info 0 *default-log-port* "No tests to run")))
    (debug:print-info 4 *default-log-port* "All done by here")
    ;; TODO: try putting post hook call here
      
    ;  (debug:print-info 2 *default-log-port* " run-count " run-count)
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871


1872
1873
1874
1875
1876
1877
1878
1879
1880
		(loop (car newtal)(cdr newtal) reg reruns))))
         
	 ;; this case should not happen, added to help catch any bugs
	 ((and (list? items) itemdat)
          (debug:print-info 4 *default-log-port* "cond branch - "  "rtq-5")
	  (debug:print-error 0 *default-log-port* "Should not have a list of items in a test and the itemspath set - please report this")
	  (exit 1))
	 ((not (null? reruns))
          (debug:print-info 4 *default-log-port* "cond branch - "  "rtq-6")
	  (let* ((newlst (tests:filter-non-runnable run-id tal test-records)) ;; i.e. not FAIL, WAIVED, INCOMPLETE, PASS, KILLED,
		 (junked (lset-difference equal? tal newlst)))
	    (debug:print-info 4 *default-log-port* "full drop through, if reruns is less than 100 we will force retry them, reruns=" reruns ", tal=" tal)
	    (if (< num-retries max-retries)
		(set! newlst (append reruns newlst)))
	    (set! num-retries (+ num-retries 1))
	    ;; (thread-sleep! (+ 1 *global-delta*))
	    (if (not (null? newlst))


		;; since reruns have been tacked on to newlst create new reruns from junked
		(loop (car newlst)(cdr newlst) reg (delete-duplicates junked)))))
	 ((not (null? tal))
          (debug:print-info 4 *default-log-port* "cond branch - "  "rtq-7")
	  (debug:print-info 4 *default-log-port* "I'm pretty sure I shouldn't get here."))
	 ((not (null? reg)) ;; could we get here with leftovers?
          (debug:print-info 4 *default-log-port* "cond branch - "  "rtq-8")
	  (debug:print-info 0 *default-log-port* "Have leftovers!")
	  (loop (car reg)(cdr reg) '() reruns))







|

|







>
>
|
|







1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
		(loop (car newtal)(cdr newtal) reg reruns))))
         
	 ;; this case should not happen, added to help catch any bugs
	 ((and (list? items) itemdat)
          (debug:print-info 4 *default-log-port* "cond branch - "  "rtq-5")
	  (debug:print-error 0 *default-log-port* "Should not have a list of items in a test and the itemspath set - please report this")
	  (exit 1))
	 ((not (null? reruns)) ;; PROCESS THE RERUNS HERE
          (debug:print-info 4 *default-log-port* "cond branch - "  "rtq-6")
	  (let* ((newlst (tests:filter-non-runnable run-id tal test-records )) ;; i.e. not FAIL, WAIVED, INCOMPLETE, PASS, KILLED,
		 (junked (lset-difference equal? tal newlst)))
	    (debug:print-info 4 *default-log-port* "full drop through, if reruns is less than 100 we will force retry them, reruns=" reruns ", tal=" tal)
	    (if (< num-retries max-retries)
		(set! newlst (append reruns newlst)))
	    (set! num-retries (+ num-retries 1))
	    ;; (thread-sleep! (+ 1 *global-delta*))
	    (if (not (null? newlst))
		(begin
		  (debug:print-info 0 *default-log-port* "Re-running tests " (string-intersperse newlst " "))
		  ;; since reruns have been tacked on to newlst create new reruns from junked
		  (loop (car newlst)(cdr newlst) reg (delete-duplicates junked))))))
	 ((not (null? tal))
          (debug:print-info 4 *default-log-port* "cond branch - "  "rtq-7")
	  (debug:print-info 4 *default-log-port* "I'm pretty sure I shouldn't get here."))
	 ((not (null? reg)) ;; could we get here with leftovers?
          (debug:print-info 4 *default-log-port* "cond branch - "  "rtq-8")
	  (debug:print-info 0 *default-log-port* "Have leftovers!")
	  (loop (car reg)(cdr reg) '() reruns))
2088
2089
2090
2091
2092
2093
2094


2095
2096
2097
2098
2099
2100
2101
			 (must-rerun (member (test:get-status testdat) rerunlst)))
		    (debug:print-info 3 *default-log-port* "-rerun list: " rerun ", test-status: " (test:get-status testdat)", must-rerun: " must-rerun)
		    must-rerun))
	     (debug:print-info 2 *default-log-port* "Rerun forced for test " test-name "/" item-path)
	     (set! runflag #t)
             (debug:print-info 2 *default-log-port* "Calling rerun hook")
             (runs:rerun-hook test-id new-test-path testdat rerun)


             )
             

	    
            ;; -keepgoing, do not rerun FAIL
	    ((and keepgoing
		  (member (test:get-status testdat) '("FAIL")))







>
>







2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
			 (must-rerun (member (test:get-status testdat) rerunlst)))
		    (debug:print-info 3 *default-log-port* "-rerun list: " rerun ", test-status: " (test:get-status testdat)", must-rerun: " must-rerun)
		    must-rerun))
	     (debug:print-info 2 *default-log-port* "Rerun forced for test " test-name "/" item-path)
	     (set! runflag #t)
             (debug:print-info 2 *default-log-port* "Calling rerun hook")
             (runs:rerun-hook test-id new-test-path testdat rerun)
	     ;; set the test up to be re-run by changing to NOT_STARTED
	     (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" (conc "RERUN_NEEDED_" config-reruns) "Test can be re-run")
             )
             

	    
            ;; -keepgoing, do not rerun FAIL
	    ((and keepgoing
		  (member (test:get-status testdat) '("FAIL")))

Modified tests.scm from [58a365a2ab] to [e7cdcdeb06].

1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814


1815
1816
1817
1818
1819
1820
1821
	  (with-input-from-file fname
	    (lambda ()
	      (read-lines)))))))
	  

;; for each test:
;;   
(define (tests:filter-non-runnable run-id testkeynames testrecordshash)
  (let ((runnables '()))
    (for-each
     (lambda (testkeyname)
       (let* ((test-record (hash-table-ref testrecordshash testkeyname))
	      (test-name   (tests:testqueue-get-testname  test-record))
	      (itemdat     (tests:testqueue-get-itemdat   test-record))
	      (item-path   (tests:testqueue-get-item_path test-record))
	      (waitons     (tests:testqueue-get-waitons   test-record))
	      (keep-test   #t)
	      (test-id     (rmt:get-test-id run-id test-name item-path))
	      (tdat        (rmt:get-testinfo-state-status run-id test-id))) ;; (cdb:get-test-info-by-id *runremote* test-id)))
	 (if tdat
	     (begin
	       ;; Look at the test state and status
	       (if (or (and (member (db:test-get-status tdat) 
				    '("PASS" "WARN" "WAIVED" "CHECK" "SKIP"))


			    (equal? (db:test-get-state tdat) "COMPLETED"))
		       (member (db:test-get-state tdat)
				    '("INCOMPLETE" "KILLED")))
		   (set! keep-test #f))

	       ;; examine waitons for any fails. If it is FAIL or INCOMPLETE then eliminate this test
	       ;; from the runnable list







|
















>
>







1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
	  (with-input-from-file fname
	    (lambda ()
	      (read-lines)))))))
	  

;; for each test:
;;   
(define (tests:filter-non-runnable run-id testkeynames testrecordshash #!optional (override-statuses '()))
  (let ((runnables '()))
    (for-each
     (lambda (testkeyname)
       (let* ((test-record (hash-table-ref testrecordshash testkeyname))
	      (test-name   (tests:testqueue-get-testname  test-record))
	      (itemdat     (tests:testqueue-get-itemdat   test-record))
	      (item-path   (tests:testqueue-get-item_path test-record))
	      (waitons     (tests:testqueue-get-waitons   test-record))
	      (keep-test   #t)
	      (test-id     (rmt:get-test-id run-id test-name item-path))
	      (tdat        (rmt:get-testinfo-state-status run-id test-id))) ;; (cdb:get-test-info-by-id *runremote* test-id)))
	 (if tdat
	     (begin
	       ;; Look at the test state and status
	       (if (or (and (member (db:test-get-status tdat) 
				    '("PASS" "WARN" "WAIVED" "CHECK" "SKIP"))
			    (not (member (db:test-get-status tdat)
					 override-status))
			    (equal? (db:test-get-state tdat) "COMPLETED"))
		       (member (db:test-get-state tdat)
				    '("INCOMPLETE" "KILLED")))
		   (set! keep-test #f))

	       ;; examine waitons for any fails. If it is FAIL or INCOMPLETE then eliminate this test
	       ;; from the runnable list