Megatest

Changes On Branch b5b72d675da2eba5
Login

Changes In Branch v1.65-rerun-fixes Through [b5b72d675d] Excluding Merge-Ins

This is equivalent to a diff from 5adcfa3594 to b5b72d675d

2021-01-07
04:04
Missed commit - message on how to turn on profiling check-in: 4b3e88463b user: matt tags: v1.65-rerun-fixes
2020-12-12
15:58
changed irerun logfile names and tarfile names to matchi each other. added .so files to support dashboard on sles12. Changed version to v16578. check-in: 305d72c734 user: mmgraham tags: v1.65
2020-12-11
14:21
Partial work on fixing rerun check-in: b5b72d675d user: mrwellan tags: v1.65-rerun-fixes
2020-12-06
23:19
Working on ulex again check-in: 1db1be496d user: matt tags: v1.65-ulex-try-again
15:22
added -rerun to megatest calls from mtut, added archival of rerun data check-in: 5adcfa3594 user: mmgraham tags: v1.65
2020-11-25
20:11
Added minor defence against situation where http transport returned bad data (good candidate for reversal) check-in: 8618fbca95 user: mrwellan tags: v1.65

Modified runs.scm from [22193dc2d3] to [93bdd33d36].

522
523
524
525
526
527
528


529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
;;======================================================================
;; runs:run-tests is called from megatest.scm and itself
;;======================================================================
;;
;;  test-names: Comma separated patterns same as test-patts but used in selection 
;;              of tests to run. The item portions are not respected.
;;              FIXME: error out if /patt specified


;;            
(define (runs:run-tests target runname test-patts user flags #!key (run-count 1)) ;; test-names
  (let* ((keys               (keys:config-get-fields *configdat*))
	 (keyvals            (keys:target->keyval keys target))
	 (run-id             (rmt:register-run keyvals runname "new" "n/a" user (args:get-arg "-contour")))  ;;  test-name)))
	 ;; (deferred          '()) ;; delay running these since they have a waiton clause
	 (runconfigf         (conc  *toppath* "/runconfigs.config"))
         (dbfile             (conc  *toppath* "/megatest.db"))
         (readonly-mode      (not (file-write-access? dbfile)))
	 (test-records       (make-hash-table))
	 ;; need to process runconfigs before generating these lists
	 (all-tests-registry #f)  ;; (tests:get-all)) ;; (tests:get-valid-tests (make-hash-table) test-search-path)) ;; all valid tests to check waiton names
	 (all-test-names     #f)  ;; (hash-table-keys all-tests-registry))
	 (test-names         #f)  ;; Generated by a call to (tests:filter-test-names all-test-names test-patts))
	 (required-tests     #f)  ;; Put fully qualified test/testpath names in this list to be done
         (waitors-upon       (make-hash-table)) ;; given a test, return list of tests waiting upon this test.
	 (task-key           (conc (hash-table->alist flags) " " (get-host-name) " " (current-process-id)))
	 ;; (tdbdat             (tasks:open-db))
	 (config-reruns      (let ((x (configf:lookup *configdat* "setup" "reruns")))
			       (if x (string->number x) #f)))
	 (allowed-tests      #f)
	 (runconf            #f)
	 (cache-files        (launch:get-cache-file-paths #f (common:get-toppath *toppath* ) target))
	 (runstart-time      (current-seconds)))

    ;; check if readonly







>
>


















|







522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
;;======================================================================
;; runs:run-tests is called from megatest.scm and itself
;;======================================================================
;;
;;  test-names: Comma separated patterns same as test-patts but used in selection 
;;              of tests to run. The item portions are not respected.
;;              FIXME: error out if /patt specified
;;
;; this calls itself with run-count incremented up to the [setup]->runqueue number or 5
;;            
(define (runs:run-tests target runname test-patts user flags #!key (run-count 1)) ;; test-names
  (let* ((keys               (keys:config-get-fields *configdat*))
	 (keyvals            (keys:target->keyval keys target))
	 (run-id             (rmt:register-run keyvals runname "new" "n/a" user (args:get-arg "-contour")))  ;;  test-name)))
	 ;; (deferred          '()) ;; delay running these since they have a waiton clause
	 (runconfigf         (conc  *toppath* "/runconfigs.config"))
         (dbfile             (conc  *toppath* "/megatest.db"))
         (readonly-mode      (not (file-write-access? dbfile)))
	 (test-records       (make-hash-table))
	 ;; need to process runconfigs before generating these lists
	 (all-tests-registry #f)  ;; (tests:get-all)) ;; (tests:get-valid-tests (make-hash-table) test-search-path)) ;; all valid tests to check waiton names
	 (all-test-names     #f)  ;; (hash-table-keys all-tests-registry))
	 (test-names         #f)  ;; Generated by a call to (tests:filter-test-names all-test-names test-patts))
	 (required-tests     #f)  ;; Put fully qualified test/testpath names in this list to be done
         (waitors-upon       (make-hash-table)) ;; given a test, return list of tests waiting upon this test.
	 (task-key           (conc (hash-table->alist flags) " " (get-host-name) " " (current-process-id)))
	 ;; (tdbdat             (tasks:open-db))
	 (config-reruns      (let ((x (configf:lookup *configdat* "setup" "reruns"))) ;; run tests up to this many times if status is in -rerun list or [setup]->allow-auto-rerun list
			       (if x (string->number x) #f)))
	 (allowed-tests      #f)
	 (runconf            #f)
	 (cache-files        (launch:get-cache-file-paths #f (common:get-toppath *toppath* ) target))
	 (runstart-time      (current-seconds)))

    ;; check if readonly
843
844
845
846
847
848
849
850
851

852
853
854
855
856
857
858
	    
	    ;; if run-count > 0 call, set -preclean and -rerun STUCK/DEAD
	    (if (> run-count 0) ;; handle reruns
		(begin
		  (if (not (hash-table-ref/default flags "-preclean" #f))
		      (hash-table-set! flags "-preclean" #t))
		  (if (not (hash-table-ref/default flags "-rerun" #f))
		      (hash-table-set! flags "-rerun" "STUCK/DEAD,n/a,ZERO_ITEMS"))
		  ;; recursive call to self

		  (runs:run-tests target runname test-patts user flags run-count: (- run-count 1)))
                (launch:end-of-run-check run-id)))
	  (debug:print-info 0 *default-log-port* "No tests to run")))
    (debug:print-info 4 *default-log-port* "All done by here")
    ;; TODO: try putting post hook call here
      
    ;  (debug:print-info 2 *default-log-port* " run-count " run-count)







|

>







845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
	    
	    ;; if run-count > 0 call, set -preclean and -rerun STUCK/DEAD
	    (if (> run-count 0) ;; handle reruns
		(begin
		  (if (not (hash-table-ref/default flags "-preclean" #f))
		      (hash-table-set! flags "-preclean" #t))
		  (if (not (hash-table-ref/default flags "-rerun" #f))
		      (hash-table-set! flags "-rerun" "ABORT,DEAD,STUCK/DEAD,n/a,ZERO_ITEMS"))
		  ;; recursive call to self
		  (debug:print-info 0 *default-log-port* "Re-running tests with status " (hash-table-ref/default flags "-rerun" ""))
		  (runs:run-tests target runname test-patts user flags run-count: (- run-count 1)))
                (launch:end-of-run-check run-id)))
	  (debug:print-info 0 *default-log-port* "No tests to run")))
    (debug:print-info 4 *default-log-port* "All done by here")
    ;; TODO: try putting post hook call here
      
    ;  (debug:print-info 2 *default-log-port* " run-count " run-count)
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905


1906
1907
1908
1909
1910
1911
1912
1913
1914
		(loop (car newtal)(cdr newtal) reg reruns))))
         
	 ;; this case should not happen, added to help catch any bugs
	 ((and (list? items) itemdat)
          (debug:print-info 4 *default-log-port* "cond branch - "  "rtq-5")
	  (debug:print-error 0 *default-log-port* "Should not have a list of items in a test and the itemspath set - please report this")
	  (exit 1))
	 ((not (null? reruns))
          (debug:print-info 4 *default-log-port* "cond branch - "  "rtq-6")
	  (let* ((newlst (tests:filter-non-runnable run-id tal test-records)) ;; i.e. not FAIL, WAIVED, INCOMPLETE, PASS, KILLED,
		 (junked (lset-difference equal? tal newlst)))
	    (debug:print-info 4 *default-log-port* "full drop through, if reruns is less than 100 we will force retry them, reruns=" reruns ", tal=" tal)
	    (if (< num-retries max-retries)
		(set! newlst (append reruns newlst)))
	    (set! num-retries (+ num-retries 1))
	    ;; (thread-sleep! (+ 1 *global-delta*))
	    (if (not (null? newlst))


		;; since reruns have been tacked on to newlst create new reruns from junked
		(loop (car newlst)(cdr newlst) reg (delete-duplicates junked)))))
	 ((not (null? tal))
          (debug:print-info 4 *default-log-port* "cond branch - "  "rtq-7")
	  (debug:print-info 4 *default-log-port* "I'm pretty sure I shouldn't get here."))
	 ((not (null? reg)) ;; could we get here with leftovers?
          (debug:print-info 4 *default-log-port* "cond branch - "  "rtq-8")
	  (debug:print-info 0 *default-log-port* "Have leftovers!")
	  (loop (car reg)(cdr reg) '() reruns))







|

|







>
>
|
|







1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
		(loop (car newtal)(cdr newtal) reg reruns))))
         
	 ;; this case should not happen, added to help catch any bugs
	 ((and (list? items) itemdat)
          (debug:print-info 4 *default-log-port* "cond branch - "  "rtq-5")
	  (debug:print-error 0 *default-log-port* "Should not have a list of items in a test and the itemspath set - please report this")
	  (exit 1))
	 ((not (null? reruns)) ;; PROCESS THE RERUNS HERE
          (debug:print-info 4 *default-log-port* "cond branch - "  "rtq-6")
	  (let* ((newlst (tests:filter-non-runnable run-id tal test-records )) ;; i.e. not FAIL, WAIVED, INCOMPLETE, PASS, KILLED,
		 (junked (lset-difference equal? tal newlst)))
	    (debug:print-info 4 *default-log-port* "full drop through, if reruns is less than 100 we will force retry them, reruns=" reruns ", tal=" tal)
	    (if (< num-retries max-retries)
		(set! newlst (append reruns newlst)))
	    (set! num-retries (+ num-retries 1))
	    ;; (thread-sleep! (+ 1 *global-delta*))
	    (if (not (null? newlst))
		(begin
		  (debug:print-info 0 *default-log-port* "Re-running tests " (string-intersperse newlst " "))
		  ;; since reruns have been tacked on to newlst create new reruns from junked
		  (loop (car newlst)(cdr newlst) reg (delete-duplicates junked))))))
	 ((not (null? tal))
          (debug:print-info 4 *default-log-port* "cond branch - "  "rtq-7")
	  (debug:print-info 4 *default-log-port* "I'm pretty sure I shouldn't get here."))
	 ((not (null? reg)) ;; could we get here with leftovers?
          (debug:print-info 4 *default-log-port* "cond branch - "  "rtq-8")
	  (debug:print-info 0 *default-log-port* "Have leftovers!")
	  (loop (car reg)(cdr reg) '() reruns))
2124
2125
2126
2127
2128
2129
2130


2131
2132
2133
2134
2135
2136
2137
			 (must-rerun (member (test:get-status testdat) rerunlst)))
		    (debug:print-info 3 *default-log-port* "-rerun list: " rerun ", test-status: " (test:get-status testdat)", must-rerun: " must-rerun)
		    must-rerun))
	     (debug:print-info 2 *default-log-port* "Rerun forced for test " test-name "/" item-path)
	     (set! runflag #t)
             (debug:print-info 2 *default-log-port* "Calling rerun hook")
             (runs:rerun-hook test-id new-test-path testdat rerun)


             )
             

	    
            ;; -keepgoing, do not rerun FAIL
	    ((and keepgoing
		  (member (test:get-status testdat) '("FAIL")))







>
>







2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
			 (must-rerun (member (test:get-status testdat) rerunlst)))
		    (debug:print-info 3 *default-log-port* "-rerun list: " rerun ", test-status: " (test:get-status testdat)", must-rerun: " must-rerun)
		    must-rerun))
	     (debug:print-info 2 *default-log-port* "Rerun forced for test " test-name "/" item-path)
	     (set! runflag #t)
             (debug:print-info 2 *default-log-port* "Calling rerun hook")
             (runs:rerun-hook test-id new-test-path testdat rerun)
	     ;; set the test up to be re-run by changing to NOT_STARTED
	     (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" (conc "RERUN_NEEDED_" config-reruns) "Test can be re-run")
             )
             

	    
            ;; -keepgoing, do not rerun FAIL
	    ((and keepgoing
		  (member (test:get-status testdat) '("FAIL")))

Modified tests.scm from [698654fba2] to [f147b4d13d].

1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842


1843
1844
1845
1846
1847
1848
1849
		 (map (lambda (s)
			(string-substitute "\"" "" s #t))
		      (string-split inl)))
	       data)))))

;; for each test:
;;   
(define (tests:filter-non-runnable run-id testkeynames testrecordshash)
  (let ((runnables '()))
    (for-each
     (lambda (testkeyname)
       (let* ((test-record (hash-table-ref testrecordshash testkeyname))
	      (test-name   (tests:testqueue-get-testname  test-record))
	      (itemdat     (tests:testqueue-get-itemdat   test-record))
	      (item-path   (tests:testqueue-get-item_path test-record))
	      (waitons     (tests:testqueue-get-waitons   test-record))
	      (keep-test   #t)
	      (test-id     (rmt:get-test-id run-id test-name item-path))
	      (tdat        (rmt:get-testinfo-state-status run-id test-id))) ;; (cdb:get-test-info-by-id *runremote* test-id)))
	 (if tdat
	     (begin
	       ;; Look at the test state and status
	       (if (or (and (member (db:test-get-status tdat) 
				    '("PASS" "WARN" "WAIVED" "CHECK" "SKIP"))


			    (equal? (db:test-get-state tdat) "COMPLETED"))
		       (member (db:test-get-state tdat)
				    '("INCOMPLETE" "KILLED")))
		   (set! keep-test #f))

	       ;; examine waitons for any fails. If it is FAIL or INCOMPLETE then eliminate this test
	       ;; from the runnable list







|
















>
>







1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
		 (map (lambda (s)
			(string-substitute "\"" "" s #t))
		      (string-split inl)))
	       data)))))

;; for each test:
;;   
(define (tests:filter-non-runnable run-id testkeynames testrecordshash #!optional (override-statuses '()))
  (let ((runnables '()))
    (for-each
     (lambda (testkeyname)
       (let* ((test-record (hash-table-ref testrecordshash testkeyname))
	      (test-name   (tests:testqueue-get-testname  test-record))
	      (itemdat     (tests:testqueue-get-itemdat   test-record))
	      (item-path   (tests:testqueue-get-item_path test-record))
	      (waitons     (tests:testqueue-get-waitons   test-record))
	      (keep-test   #t)
	      (test-id     (rmt:get-test-id run-id test-name item-path))
	      (tdat        (rmt:get-testinfo-state-status run-id test-id))) ;; (cdb:get-test-info-by-id *runremote* test-id)))
	 (if tdat
	     (begin
	       ;; Look at the test state and status
	       (if (or (and (member (db:test-get-status tdat) 
				    '("PASS" "WARN" "WAIVED" "CHECK" "SKIP"))
			    (not (member (db:test-get-status tdat)
					 override-status))
			    (equal? (db:test-get-state tdat) "COMPLETED"))
		       (member (db:test-get-state tdat)
				    '("INCOMPLETE" "KILLED")))
		   (set! keep-test #f))

	       ;; examine waitons for any fails. If it is FAIL or INCOMPLETE then eliminate this test
	       ;; from the runnable list