Megatest

Check-in [40ce199020]
Login
Overview
Comment:Merged in lastest changes from v1.60
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | multi-area
Files: files | file ages | folders
SHA1: 40ce1990203264ed44c63caa9193c12f2d328a98
User & Date: matt on 2015-05-24 20:31:49
Other Links: branch diff | manifest | tags
Context
2015-06-02
22:31
merged check-in: 902972c7ce user: matt tags: multi-area
2015-05-24
20:31
Merged in lastest changes from v1.60 check-in: 40ce199020 user: matt tags: multi-area
2015-05-21
23:29
Fixed toplevel mode tests not running check-in: 3d3e19cd4d user: matt tags: v1.60
2015-05-20
09:53
Merged from v1.60 check-in: f63922410b user: mrwellan tags: multi-area
Changes

Modified megatest-version.scm from [456f37ff9a] to [efcc00779e].

1
2
3
4
5
6
7
;; Always use two or four digit decimal
;; 1.01, 1.02...1.10,1.11,1,1101 ... 1.99,2.00..

(declare (unit megatest-version))

(define megatest-version 1.6012)






|

1
2
3
4
5
6
7
;; Always use two or four digit decimal
;; 1.01, 1.02...1.10,1.11,1,1101 ... 1.99,2.00..

(declare (unit megatest-version))

(define megatest-version 1.6013)

Modified runs.scm from [eba5a1b9e0] to [2be427f72c].

226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251




252
253
254
255
256
257

258
259
260

261
262
263
264
265
266
267
	 (task-key           (conc (hash-table->alist flags) " " (get-host-name) " " (current-process-id)))
	 (tdbdat             (tasks:open-db area-dat)))

    (if (tasks:need-server run-id area-dat)(tasks:start-and-wait-for-server tdbdat run-id 10))

    (set-signal-handler! signal/int
			 (lambda (signum)
			   (signal-mask! signum)
			   (print "Received signal " signum ", cleaning up before exit. Please wait...")
			   (let ((tdbdat (tasks:open-db area-dat)))
			     (rmt:tasks-set-state-given-param-key task-key "killed"))
			   (print "Killed by signal " signum ". Exiting")
			   (exit)))

    ;; register this run in monitor.db
    (rmt:tasks-add "run-tests" user target runname test-patts task-key area-dat) ;; params)
    (rmt:tasks-set-state-given-param-key task-key "running" area-dat)
    (runs:set-megatest-env-vars run-id area-dat inkeys: keys inrunname: runname) ;; these may be needed by the launching process
    (if (file-exists? runconfigf)
	(setup-env-defaults runconfigf run-id *already-seen-runconfig-info* keyvals target)
	(debug:print 0 "WARNING: You do not have a run config file: " runconfigf))

    ;; Now generate all the tests lists
    (set! all-tests-registry (tests:get-all area-dat))
    (set! all-test-names     (hash-table-keys all-tests-registry))
    (set! test-names         (tests:filter-test-names all-test-names test-patts))




    (set! required-tests     (lset-intersection equal? (string-split test-patts ",") test-names))
    
    ;; look up all tests matching the comma separated list of globs in
    ;; test-patts (using % as wildcard)

    ;; (set! test-names (delete-duplicates (tests:get-valid-tests toppath test-patts)))

    (debug:print-info 0 "tests search path: " (tests:get-tests-search-path configdat area-dat))
    (debug:print-info 0 "all tests:  " (string-intersperse (sort all-test-names string<) " "))
    (debug:print-info 0 "test names: " (string-intersperse (sort test-names string<) " "))


    ;; on the first pass or call to run-tests set FAILS to NOT_STARTED if
    ;; -keepgoing is specified
    (if (eq? *passnum* 0)
	(begin
	  ;; Is this still necessary? I think not. Unreachable tests are marked as such and 
	  ;; should not cause problems here.







|


















>
>
>
>
|





>
|
|
|
>







226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
	 (task-key           (conc (hash-table->alist flags) " " (get-host-name) " " (current-process-id)))
	 (tdbdat             (tasks:open-db area-dat)))

    (if (tasks:need-server run-id area-dat)(tasks:start-and-wait-for-server tdbdat run-id 10))

    (set-signal-handler! signal/int
			 (lambda (signum)
			   ;; (signal-mask! signum) ;; to mask or not? seems to cause issues in exiting
			   (print "Received signal " signum ", cleaning up before exit. Please wait...")
			   (let ((tdbdat (tasks:open-db area-dat)))
			     (rmt:tasks-set-state-given-param-key task-key "killed"))
			   (print "Killed by signal " signum ". Exiting")
			   (exit)))

    ;; register this run in monitor.db
    (rmt:tasks-add "run-tests" user target runname test-patts task-key area-dat) ;; params)
    (rmt:tasks-set-state-given-param-key task-key "running" area-dat)
    (runs:set-megatest-env-vars run-id area-dat inkeys: keys inrunname: runname) ;; these may be needed by the launching process
    (if (file-exists? runconfigf)
	(setup-env-defaults runconfigf run-id *already-seen-runconfig-info* keyvals target)
	(debug:print 0 "WARNING: You do not have a run config file: " runconfigf))

    ;; Now generate all the tests lists
    (set! all-tests-registry (tests:get-all area-dat))
    (set! all-test-names     (hash-table-keys all-tests-registry))
    (set! test-names         (tests:filter-test-names all-test-names test-patts))

    ;; I think seeding required-tests with all test-names makes sense but lack analysis to back that up.
    ;;
    ;; (set! required-tests     (lset-intersection equal? (string-split test-patts ",") all-test-names))
    (set! required-tests     (lset-intersection equal? test-names all-test-names))
    
    ;; look up all tests matching the comma separated list of globs in
    ;; test-patts (using % as wildcard)

    ;; (set! test-names (delete-duplicates (tests:get-valid-tests toppath test-patts)))
    (debug:print-info 0 "tests search path: " (string-intersperse (tests:get-tests-search-path configdat area-dat) " "))
    ;; (debug:print-info 0 "tests search path: " (string-intersperse (tests:get-tests-search-path *configdat*) " "))
    (debug:print-info 0 "all tests:         " (string-intersperse (sort all-test-names string<) " "))
    (debug:print-info 0 "test names:        " (string-intersperse (sort test-names string<) " "))
    (debug:print-info 0 "required tests:    " (string-intersperse (sort required-tests string<) " "))

    ;; on the first pass or call to run-tests set FAILS to NOT_STARTED if
    ;; -keepgoing is specified
    (if (eq? *passnum* 0)
	(begin
	  ;; Is this still necessary? I think not. Unreachable tests are marked as such and 
	  ;; should not cause problems here.
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
             ;;    		   (not (member x allinqueue)))
             ;;    		 prereqstrs))
	     (give-up    #f))

	;; We can get here when a prereq has not been run due to *it* having a prereq that failed.
	;; We need to use this to dequeue this item as CANNOTRUN
	;; 
	(if (member testmode '(toplevel))
	    (for-each (lambda (prereq)
			(if (eq? (hash-table-ref/default test-registry prereq 'justfine) 'CANNOTRUN)
			    (set! give-up #t)))
		      prereqstrs))

	(if (and give-up
		 (not (and (null? tal)(null? reg))))







|







552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
             ;;    		   (not (member x allinqueue)))
             ;;    		 prereqstrs))
	     (give-up    #f))

	;; We can get here when a prereq has not been run due to *it* having a prereq that failed.
	;; We need to use this to dequeue this item as CANNOTRUN
	;; 
	(if (member 'toplevel testmode) ;; '(toplevel)) ;; NOTE: this probably should be (member 'toplevel testmode)
	    (for-each (lambda (prereq)
			(if (eq? (hash-table-ref/default test-registry prereq 'justfine) 'CANNOTRUN)
			    (set! give-up #t)))
		      prereqstrs))

	(if (and give-up
		 (not (and (null? tal)(null? reg))))
654
655
656
657
658
659
660


661
662
663
664
665
666
667
668
669
670
671
672
673




674
675
676
677
678
679
680
	 (num-running             (list-ref run-limits-info 1))
	 (num-running-in-jobgroup (list-ref run-limits-info 2)) 
	 (max-concurrent-jobs     (list-ref run-limits-info 3))
	 (job-group-limit         (list-ref run-limits-info 4))
	 (prereqs-not-met         (rmt:get-prereqs-not-met run-id waitons item-path testmode area-dat itemmap: itemmap))
	 ;; (prereqs-not-met         (mt:lazy-get-prereqs-not-met run-id waitons item-path mode: testmode itemmap: itemmap))
	 (fails                   (runs:calc-fails prereqs-not-met))


	 (non-completed           (runs:calc-not-completed prereqs-not-met))
	 (loop-list               (list hed tal reg reruns))
	 ;; configure the load runner
	 (numcpus                 (common:get-num-cpus))
	 (maxload                 (string->number (or (configf:lookup configdat "jobtools" "maxload") "3")))
	 (waitdelay               (string->number (or (configf:lookup configdat "jobtools" "waitdelay") "60"))))
    (debug:print-info 4 "have-resources: " have-resources " prereqs-not-met: (" 
		      (string-intersperse 
		       (map (lambda (t)
			      (if (vector? t)
				  (conc (db:test-get-state t) "/" (db:test-get-status t))
				  (conc " WARNING: t is not a vector=" t )))
			    prereqs-not-met) ", ") ") fails: " fails)




    
    (if (and (not (null? prereqs-not-met))
	     (runs:lownoise (conc "waiting on tests " prereqs-not-met hed) 60))
	(debug:print-info 2 "waiting on tests; " (string-intersperse (runs:mixed-list-testname-and-testrec->list-of-strings prereqs-not-met) ", ")))

    ;; Don't know at this time if the test have been launched at some time in the past
    ;; i.e. is this a re-launch?







>
>
|











|
>
>
>
>







660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
	 (num-running             (list-ref run-limits-info 1))
	 (num-running-in-jobgroup (list-ref run-limits-info 2)) 
	 (max-concurrent-jobs     (list-ref run-limits-info 3))
	 (job-group-limit         (list-ref run-limits-info 4))
	 (prereqs-not-met         (rmt:get-prereqs-not-met run-id waitons item-path testmode area-dat itemmap: itemmap))
	 ;; (prereqs-not-met         (mt:lazy-get-prereqs-not-met run-id waitons item-path mode: testmode itemmap: itemmap))
	 (fails                   (runs:calc-fails prereqs-not-met))
	 (non-completed           (filter (lambda (x)             ;; remove hed from not completed list, duh, of course it is not completed!
					    (not (equal? x hed)))
					  (runs:calc-not-completed prereqs-not-met)))
	 (loop-list               (list hed tal reg reruns))
	 ;; configure the load runner
	 (numcpus                 (common:get-num-cpus))
	 (maxload                 (string->number (or (configf:lookup configdat "jobtools" "maxload") "3")))
	 (waitdelay               (string->number (or (configf:lookup configdat "jobtools" "waitdelay") "60"))))
    (debug:print-info 4 "have-resources: " have-resources " prereqs-not-met: (" 
		      (string-intersperse 
		       (map (lambda (t)
			      (if (vector? t)
				  (conc (db:test-get-state t) "/" (db:test-get-status t))
				  (conc " WARNING: t is not a vector=" t )))
			    prereqs-not-met)
		       ", ") ") fails: " fails
		       "\nregistered? " (hash-table-ref/default test-registry (db:test-make-full-name test-name item-path) #f))
			    

    
    (if (and (not (null? prereqs-not-met))
	     (runs:lownoise (conc "waiting on tests " prereqs-not-met hed) 60))
	(debug:print-info 2 "waiting on tests; " (string-intersperse (runs:mixed-list-testname-and-testrec->list-of-strings prereqs-not-met) ", ")))

    ;; Don't know at this time if the test have been launched at some time in the past
    ;; i.e. is this a re-launch?
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
      ;; could have done hed tal here but doing car/cdr of newtal to rotate tests
      (list (car newtal)(cdr newtal) reg reruns))
     
     ;; This is the final stage, everything is in place so launch the test
     ;;
     ((and have-resources
	   (or (null? prereqs-not-met)
	       (and (eq? testmode 'toplevel)
		    (null? non-completed))))
      ;; (hash-table-delete! *max-tries-hash* (db:test-make-full-name test-name item-path))
      ;; we are going to reset all the counters for test retries by setting a new hash table
      ;; this means they will increment only when nothing can be run
      (set! *max-tries-hash* (make-hash-table))
      ;; well, first lets see if cpu load throttling is enabled. If so wait around until the
      ;; average cpu load is under the threshold before continuing







|







762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
      ;; could have done hed tal here but doing car/cdr of newtal to rotate tests
      (list (car newtal)(cdr newtal) reg reruns))
     
     ;; This is the final stage, everything is in place so launch the test
     ;;
     ((and have-resources
	   (or (null? prereqs-not-met)
	       (and (member 'toplevel testmode) ;;  'toplevel)
		    (null? non-completed))))
      ;; (hash-table-delete! *max-tries-hash* (db:test-make-full-name test-name item-path))
      ;; we are going to reset all the counters for test retries by setting a new hash table
      ;; this means they will increment only when nothing can be run
      (set! *max-tries-hash* (make-hash-table))
      ;; well, first lets see if cpu load throttling is enabled. If so wait around until the
      ;; average cpu load is under the threshold before continuing
783
784
785
786
787
788
789
790

791
792
793
794
795
796
797
      ;; a message and drop hed from the items to be processed.
      ;; (runs:mixed-list-testname-and-testrec->list-of-strings prereqs-not-met)
      (if (and (not (null? prereqs-not-met))
	       (runs:lownoise (conc "waiting on tests " prereqs-not-met hed) 60))
	  (debug:print-info 1 "waiting on tests; " (string-intersperse 
						    (runs:mixed-list-testname-and-testrec->list-of-strings 
						     prereqs-not-met) ", ")))
      (if (null? fails)

	  (begin
	    ;; couldn't run, take a breather
	    (if  (runs:lownoise "Waiting for more work to do..." 60)
		 (debug:print-info 0 "Waiting for more work to do..."))
	    (thread-sleep! 1)
	    (list (car newtal)(cdr newtal) reg reruns))
	  ;; the waiton is FAIL so no point in trying to run hed ever again







|
>







795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
      ;; a message and drop hed from the items to be processed.
      ;; (runs:mixed-list-testname-and-testrec->list-of-strings prereqs-not-met)
      (if (and (not (null? prereqs-not-met))
	       (runs:lownoise (conc "waiting on tests " prereqs-not-met hed) 60))
	  (debug:print-info 1 "waiting on tests; " (string-intersperse 
						    (runs:mixed-list-testname-and-testrec->list-of-strings 
						     prereqs-not-met) ", ")))
      (if (or (null? fails)
	      (member 'toplevel testmode))
	  (begin
	    ;; couldn't run, take a breather
	    (if  (runs:lownoise "Waiting for more work to do..." 60)
		 (debug:print-info 0 "Waiting for more work to do..."))
	    (thread-sleep! 1)
	    (list (car newtal)(cdr newtal) reg reruns))
	  ;; the waiton is FAIL so no point in trying to run hed ever again
855
856
857
858
859
860
861

862
863
864
865
866
867
868
869
				  reruns))))
		     (else
		      (if (runs:lownoise (conc "FAILED prerequitests and we tried" hed) 60)
			  (debug:print 0 "WARNING: test " hed " has FAILED prerequitests and we've tried at least 10 times to run it. Giving up now."))
		      ;; (debug:print 0 "         prereqs: " prereqs-not-met)
		      (hash-table-set! test-registry hed 'removed)
		      (mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "TEN_STRIKES" #f)

		      (mt:roll-up-pass-fail-counts run-id test-name item-path "FAIL") ;; treat as FAIL
		      (list (if (null? tal)(car newtal)(car tal))
			    tal
			    reg
			    reruns)))))
	      ;; can't drop this - maybe running? Just keep trying
	      (let ((runable-tests (runs:runable-tests prereqs-not-met)))
		(if (null? runable-tests)







>
|







868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
				  reruns))))
		     (else
		      (if (runs:lownoise (conc "FAILED prerequitests and we tried" hed) 60)
			  (debug:print 0 "WARNING: test " hed " has FAILED prerequitests and we've tried at least 10 times to run it. Giving up now."))
		      ;; (debug:print 0 "         prereqs: " prereqs-not-met)
		      (hash-table-set! test-registry hed 'removed)
		      (mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "TEN_STRIKES" #f)
		      ;; I'm unclear on if this roll up is needed - it may be the root cause of the "all set to FAIL" bug.
		      (rmt:roll-up-pass-fail-counts run-id test-name item-path "FAIL") ;; treat as FAIL
		      (list (if (null? tal)(car newtal)(car tal))
			    tal
			    reg
			    reruns)))))
	      ;; can't drop this - maybe running? Just keep trying
	      (let ((runable-tests (runs:runable-tests prereqs-not-met)))
		(if (null? runable-tests)

Modified tests/fullrun/tests/all_toplevel/testconfig from [4c397d46e3] to [c99d8b6dbc].

1
2
3
4
5
6
7
8
9
10
11
12
13
[ezsteps]
calcresults megatest -list-runs $MT_RUNNAME -target $MT_TARGET

[requirements]
waiton all_toplevel         exit_0 exit_1  ez_exit2_fail  ez_fail        ez_pass              ezlog_fail \
       ezlog_fail_then_pass ezlog_pass     ezlog_warn     lineitem_fail  lineitem_pass        logpro_required_fail \
       manual_example       neverrun       priority_1     priority_10    priority_10_waiton_1 priority_2 \
       priority_3           priority_4     priority_5     priority_6     priority_7           priority_8 \
       priority_9           runfirst       singletest     singletest2    sqlitespeed          test_mt_vars \
       ez_fail_quick        test1                test2          special        blocktestxz

# This is a "toplevel" test, it does not require waitons to be non-FAIL to run
mode toplevel




|

|


|



1
2
3
4
5
6
7
8
9
10
11
12
13
[ezsteps]
calcresults megatest -list-runs $MT_RUNNAME -target $MT_TARGET

[requirements]
waiton                      exit_0 exit_1  ez_exit2_fail  ez_fail        ez_pass              ezlog_fail \
       ezlog_fail_then_pass ezlog_pass     ezlog_warn     lineitem_fail  lineitem_pass        logpro_required_fail \
       manual_example       neverrun       priority_1     priority_10    priority_10_waiton_1 \
       priority_3           priority_4     priority_5     priority_6     priority_7           priority_8 \
       priority_9           runfirst       singletest     singletest2    sqlitespeed          test_mt_vars \
       ez_fail_quick        test1          test2

# This is a "toplevel" test, it does not require waitons to be non-FAIL to run
mode toplevel