Megatest

Check-in [bc27cf381e]
Login
Overview
Comment:Fixed itemwait with itempatt and issues in the run queue
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | v1.55
Files: files | file ages | folders
SHA1: bc27cf381e484f3b8c57cabc5c0969e10ecdad85
User & Date: matt on 2014-04-22 00:53:35
Other Links: branch diff | manifest | tags
Context
2014-04-22
10:47
Trimmed redundant messages. Caught another corner case with useless attempts to retry check-in: 46bed86eba user: mrwellan tags: v1.55, v1.5518
00:53
Fixed itemwait with itempatt and issues in the run queue check-in: bc27cf381e user: matt tags: v1.55
2014-04-21
01:30
Incremental improvement with itemwait and run queue logic. check-in: 65e8ba3d33 user: matt tags: v1.55
Changes

Modified db.scm from [ab6ebaa799] to [c6dd5bf332].

1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
(define (cdb:update-pass-fail-counts serverdat run-id test-name)
  (cdb:client-call serverdat 'update-fail-pass-counts #t *default-numtries* run-id test-name run-id test-name run-id test-name))

(define (cdb:top-test-set-running serverdat run-id test-name)
  (cdb:client-call serverdat 'top-test-set-running #t *default-numtries* run-id test-name))

(define (cdb:top-test-set-per-pf-counts serverdat run-id test-name)
  (cdb:client-call serverdat 'top-test-set-per-pf-counts #t *default-numtries* run-id test-name run-id test-name run-id test-name))

;;=

(define (cdb:flush-queue serverdat)
  (cdb:client-call serverdat 'flush #f *default-numtries*))

(define (cdb:kill-server serverdat pid)







|







1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
(define (cdb:update-pass-fail-counts serverdat run-id test-name)
  (cdb:client-call serverdat 'update-fail-pass-counts #t *default-numtries* run-id test-name run-id test-name run-id test-name))

(define (cdb:top-test-set-running serverdat run-id test-name)
  (cdb:client-call serverdat 'top-test-set-running #t *default-numtries* run-id test-name))

(define (cdb:top-test-set-per-pf-counts serverdat run-id test-name)
  (cdb:client-call serverdat 'top-test-set-per-pf-counts #t *default-numtries* run-id test-name run-id test-name run-id test-name run-id test-name))

;;=

(define (cdb:flush-queue serverdat)
  (cdb:client-call serverdat 'flush #f *default-numtries*))

(define (cdb:kill-server serverdat pid)
1786
1787
1788
1789
1790
1791
1792

1793
1794
1795




1796
1797
1798
1799
1800
1801
1802
             WHERE run_id=? AND testname=? AND item_path='';")
	'(top-test-set-running  "UPDATE tests SET state='RUNNING' WHERE run_id=? AND testname=? AND item_path='';")
	'(top-test-set-per-pf-counts "UPDATE tests
                       SET state=CASE 
                                   WHEN (SELECT count(id) FROM tests 
                                                WHERE run_id=? AND testname=?
                                                     AND item_path != '' 

                                                     AND state in ('RUNNING','NOT_STARTED','LAUNCHED','REMOTEHOSTSTART')) > 0 THEN 'RUNNING'
                                   ELSE 'COMPLETED' END,
                            status=CASE 




                                  WHEN fail_count > 0 THEN 'FAIL' 
                                  WHEN pass_count > 0 AND fail_count=0 THEN 'PASS' 
                                  WHEN (SELECT count(id) FROM tests
                                         WHERE run_id=? AND testname=?
                                              AND item_path != ''
                                              AND status = 'SKIP') > 0 THEN 'SKIP'
                                  ELSE 'UNKNOWN' END







>



>
>
>
>







1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
             WHERE run_id=? AND testname=? AND item_path='';")
	'(top-test-set-running  "UPDATE tests SET state='RUNNING' WHERE run_id=? AND testname=? AND item_path='';")
	'(top-test-set-per-pf-counts "UPDATE tests
                       SET state=CASE 
                                   WHEN (SELECT count(id) FROM tests 
                                                WHERE run_id=? AND testname=?
                                                     AND item_path != '' 
                                                     AND status NOT IN ('TEN_STRIKES','BLOCKED')
                                                     AND state in ('RUNNING','NOT_STARTED','LAUNCHED','REMOTEHOSTSTART')) > 0 THEN 'RUNNING'
                                   ELSE 'COMPLETED' END,
                            status=CASE 
                                  WHEN (SELECT count(id) FROM tests
                                         WHERE run_id=? AND testname=?
                                              AND item_path != ''
                                              AND state IN ('NOT_STARTED','BLOCKED')) > 0 THEN 'FAIL'
                                  WHEN fail_count > 0 THEN 'FAIL' 
                                  WHEN pass_count > 0 AND fail_count=0 THEN 'PASS' 
                                  WHEN (SELECT count(id) FROM tests
                                         WHERE run_id=? AND testname=?
                                              AND item_path != ''
                                              AND status = 'SKIP') > 0 THEN 'SKIP'
                                  ELSE 'UNKNOWN' END

Modified mt.scm from [049126ee50] to [bbcabbe252].

182
183
184
185
186
187
188




189
190
191
192
193
194
195
    (cdb:client-call *runremote* 'state-status #t *default-numtries* newstate newstatus test-id))
   (else
    (if newstate   (cdb:client-call *runremote* 'set-test-state #t *default-numtries* newstate test-id))
    (if newstatus  (cdb:client-call *runremote* 'set-test-status #t *default-numtries* newstatus test-id))
    (if newcomment (cdb:client-call *runremote* 'set-test-comment #t *default-numtries* newcomment test-id))))
   (mt:process-triggers test-id newstate newstatus)
   #t)





(define (mt:lazy-get-test-info-by-id test-id)
  (let* ((tdat (hash-table-ref/default *test-info* test-id #f)))
    (if (and tdat 
	     (< (current-seconds)(+ (vector-ref tdat 0) 10)))
	(vector-ref tdat 1)
	;; no need to update *test-info* as that is done in cdb:get-test-info-by-id







>
>
>
>







182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
    (cdb:client-call *runremote* 'state-status #t *default-numtries* newstate newstatus test-id))
   (else
    (if newstate   (cdb:client-call *runremote* 'set-test-state #t *default-numtries* newstate test-id))
    (if newstatus  (cdb:client-call *runremote* 'set-test-status #t *default-numtries* newstatus test-id))
    (if newcomment (cdb:client-call *runremote* 'set-test-comment #t *default-numtries* newcomment test-id))))
   (mt:process-triggers test-id newstate newstatus)
   #t)

(define (mt:test-set-state-status-by-testname run-id test-name item-path new-state new-status new-comment)
  (let ((test-id (cdb:remote-run db:get-test-id-cached #f run-id test-name item-path)))
    (mt:test-set-state-status-by-id test-id new-state new-status new-comment)))

(define (mt:lazy-get-test-info-by-id test-id)
  (let* ((tdat (hash-table-ref/default *test-info* test-id #f)))
    (if (and tdat 
	     (< (current-seconds)(+ (vector-ref tdat 0) 10)))
	(vector-ref tdat 1)
	;; no need to update *test-info* as that is done in cdb:get-test-info-by-id

Modified runs.scm from [dcd06d6df4] to [7aa7a850d0].

574
575
576
577
578
579
580
581
582
583
584

585
586
587
588
589
590
591
592



593

594
595
596
597
598
599
600
	  #f)) ;; #f flags do not loop

     ((and (not (null? fails))(member 'toplevel testmode))
      (if (or (not (null? reg))(not (null? tal)))
	   (list (car newtal)(append (cdr newtal) reg) '() reruns)
	  #f)) 
     (else
      (debug:print 1 "WARNING: FAILS or incomplete tests are preventing completion of this run. Dropping test " hed " from the run queue")
      (list (runs:queue-next-hed tal reg reglen regfull)
		(runs:queue-next-tal tal reg reglen regfull)
		(runs:queue-next-reg tal reg reglen regfull)

		reruns))))) ;; (list (car newtal)(cdr newtal) reg reruns)))))

(define (runs:mixed-list-testname-and-testrec->list-of-strings inlst)
  (if (null? inlst)
      '()
      (map (lambda (t)
	     (cond
	      ((vector? t)



	       (conc (db:test-get-state t) "/" (db:test-get-status t)))

	      ((string? t)
	       t)
	      (else 
	       (conc t))))
	   inlst)))

(define (runs:process-expanded-tests hed tal reg reruns reglen regfull test-record runname test-name item-path jobgroup max-concurrent-jobs run-id waitons item-path testmode test-patts required-tests test-registry registry-mutex flags keyvals run-info newtal all-tests-registry itemmap)







|
|
|
|
>
|







>
>
>
|
>







574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
	  #f)) ;; #f flags do not loop

     ((and (not (null? fails))(member 'toplevel testmode))
      (if (or (not (null? reg))(not (null? tal)))
	   (list (car newtal)(append (cdr newtal) reg) '() reruns)
	  #f)) 
     (else
      (debug:print 0 "WARNING: FAILS or incomplete tests maybe preventing completion of this run. Watch for issues with test " hed ", continuing for now")
      ;; (list (runs:queue-next-hed tal reg reglen regfull)
      ;;   	(runs:queue-next-tal tal reg reglen regfull)
      ;;   	(runs:queue-next-reg tal reg reglen regfull)
      ;;   	reruns)
      (list (car newtal)(cdr newtal) reg reruns)))))

(define (runs:mixed-list-testname-and-testrec->list-of-strings inlst)
  (if (null? inlst)
      '()
      (map (lambda (t)
	     (cond
	      ((vector? t)
	       (let ((test-name (db:test-get-testname t))
		     (item-path (db:test-get-item-path t))
		     (test-state (db:test-get-state t))
		     (test-status (db:test-get-status t)))
		 (conc test-name (if (equal? item-path "") "" "/") item-path ":" test-state "/" test-status)))
	      ((string? t)
	       t)
	      (else 
	       (conc t))))
	   inlst)))

(define (runs:process-expanded-tests hed tal reg reruns reglen regfull test-record runname test-name item-path jobgroup max-concurrent-jobs run-id waitons item-path testmode test-patts required-tests test-registry registry-mutex flags keyvals run-info newtal all-tests-registry itemmap)
732
733
734
735
736
737
738
739
740
741
742
743

744
745
746
747
748
749
750
	    ;; couldn't run, take a breather
	    (debug:print-info 0 "Waiting for more work to do...")
	    (thread-sleep! 1)
	    (list (car newtal)(cdr newtal) reg reruns))
	  ;; the waiton is FAIL so no point in trying to run hed ever again
	  (if (or (not (null? reg))(not (null? tal)))
	      (if (vector? hed)
		  (begin 
		    (debug:print 1 "WARNING: Dropping test " (db:test-get-testname hed) "/" (db:test-get-item-path hed)
				 " from the launch list as it has prerequistes that are FAIL")
		    (runs:shrink-can-run-more-tests-count) ;; DELAY TWEAKER (still needed?)
		    ;; (thread-sleep! *global-delta*)

		    (hash-table-set! test-registry (runs:make-full-test-name test-name item-path) 'removed)
		    (list (runs:queue-next-hed tal reg reglen regfull)
			  (runs:queue-next-tal tal reg reglen regfull)
			  (runs:queue-next-reg tal reg reglen regfull)
			  reruns ;; WAS: (cons hed reruns) ;; but that makes no sense?
			  ))
		  (let ((nth-try (hash-table-ref/default test-registry hed 0)))







|
|



>







737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
	    ;; couldn't run, take a breather
	    (debug:print-info 0 "Waiting for more work to do...")
	    (thread-sleep! 1)
	    (list (car newtal)(cdr newtal) reg reruns))
	  ;; the waiton is FAIL so no point in trying to run hed ever again
	  (if (or (not (null? reg))(not (null? tal)))
	      (if (vector? hed)
		  (begin
		    (debug:print 1 "WARNING: Dropping test " test-name "/" item-path
				 " from the launch list as it has prerequistes that are FAIL")
		    (runs:shrink-can-run-more-tests-count) ;; DELAY TWEAKER (still needed?)
		    ;; (thread-sleep! *global-delta*)
		    (mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "BLOCKED" #f)
		    (hash-table-set! test-registry (runs:make-full-test-name test-name item-path) 'removed)
		    (list (runs:queue-next-hed tal reg reglen regfull)
			  (runs:queue-next-tal tal reg reglen regfull)
			  (runs:queue-next-reg tal reg reglen regfull)
			  reruns ;; WAS: (cons hed reruns) ;; but that makes no sense?
			  ))
		  (let ((nth-try (hash-table-ref/default test-registry hed 0)))
770
771
772
773
774
775
776

777
778
779
780
781
782
783
784
785


786
787
788
789
790



791
792
793
794

















795
796
797
798
799
800
801
		      ;; (hash-table-set! test-registry hed 'removed)
		      (list (runs:queue-next-hed newtal reg reglen regfull)
			    (runs:queue-next-tal newtal reg reglen regfull)
			    (runs:queue-next-reg newtal reg reglen regfull)
			    reruns))
		     ((symbol? nth-try)
		      (debug:print 0 "WARNING: test " hed " has FAILED prerequisites or other issue. Internal state " nth-try " will be overridden and we'll retry.")

		      (hash-table-set! test-registry hed 0)
		      (list (runs:queue-next-hed newtal reg reglen regfull)
			    (runs:queue-next-tal newtal reg reglen regfull)
			    (runs:queue-next-reg newtal reg reglen regfull)
			    reruns))
		     (else
		      (debug:print 0 "WARNING: test " hed " has FAILED prerequitests and we've tried at least 10 times to run it. Giving up now.")
		      ;; (debug:print 0 "         prereqs: " prereqs-not-met)
		      (hash-table-set! test-registry hed 'removed)


		      (list (if (null? tal)(car newtal)(car tal))
			    tal
			    reg
			    reruns)))))
	      ;; can't drop this - maybe running? Just keep trying



	      (list hed
		    tal
		    reg
		    reruns)))))))


















;; every time though the loop increment the test/itempatt val.
;; when the min is > max-allowed and none running then force exit
;;
(define *max-tries-hash* (make-hash-table))

;; test-records is a hash table testname:item_path => vector < testname testconfig waitons priority items-info ... >







>









>
>





>
>
>
|
|
|
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
		      ;; (hash-table-set! test-registry hed 'removed)
		      (list (runs:queue-next-hed newtal reg reglen regfull)
			    (runs:queue-next-tal newtal reg reglen regfull)
			    (runs:queue-next-reg newtal reg reglen regfull)
			    reruns))
		     ((symbol? nth-try)
		      (debug:print 0 "WARNING: test " hed " has FAILED prerequisites or other issue. Internal state " nth-try " will be overridden and we'll retry.")
		      (mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "KEEP_TRYING" #f)
		      (hash-table-set! test-registry hed 0)
		      (list (runs:queue-next-hed newtal reg reglen regfull)
			    (runs:queue-next-tal newtal reg reglen regfull)
			    (runs:queue-next-reg newtal reg reglen regfull)
			    reruns))
		     (else
		      (debug:print 0 "WARNING: test " hed " has FAILED prerequitests and we've tried at least 10 times to run it. Giving up now.")
		      ;; (debug:print 0 "         prereqs: " prereqs-not-met)
		      (hash-table-set! test-registry hed 'removed)
		      (mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "TEN_STRIKES" #f)
		      (mt:roll-up-pass-fail-counts run-id test-name item-path "FAIL") ;; treat as FAIL
		      (list (if (null? tal)(car newtal)(car tal))
			    tal
			    reg
			    reruns)))))
	      ;; can't drop this - maybe running? Just keep trying
	      (let ((runable-tests (runs:runable-tests prereqs-not-met)))
		(if (null? runable-tests)
		    #f   ;; I think we are truly done here
		    (list (runs:queue-next-hed newtal reg reglen regfull)
			    (runs:queue-next-tal newtal reg reglen regfull)
			    (runs:queue-next-reg newtal reg reglen regfull)
			    reruns)))))))))

;; scan a list of tests looking to see if any are potentially runnable
(define (runs:runable-tests tests)
  (filter (lambda (t)
	    (if (not (vector? t))
		t
		(let ((state  (db:test-get-state t))
		      (status (db:test-get-status t)))
		  (case (string->symbol state)
		    ((COMPLETED) #f)
		    ((NOT_STARTED)
		     (if (member status '("TEN_STRIKES" "BLOCKED"))
			 #f
			 t))
		    ((DELETED) #f)
		    (else t)))))
	  tests))

;; every time though the loop increment the test/itempatt val.
;; when the min is > max-allowed and none running then force exit
;;
(define *max-tries-hash* (make-hash-table))

;; test-records is a hash table testname:item_path => vector < testname testconfig waitons priority items-info ... >