Megatest

Check-in [3d3e19cd4d]
Login
Overview
Comment:Fixed toplevel mode tests not running
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | v1.60
Files: files | file ages | folders
SHA1: 3d3e19cd4d952a5160f5f93f8877d8b9b60a5452
User & Date: matt on 2015-05-21 23:29:23
Other Links: branch diff | manifest | tags
Context
2015-05-26
22:29
Improved exit handling slightly check-in: b589024130 user: matt tags: v1.60
2015-05-24
20:31
Merged in lastest changes from v1.60 check-in: 40ce199020 user: matt tags: multi-area
2015-05-21
23:29
Fixed toplevel mode tests not running check-in: 3d3e19cd4d user: matt tags: v1.60
01:44
Cherrypicked 9b7efa961f and 66ff347255, passes test4 check-in: e1b983e8cf user: matt tags: v1.60
Changes

Modified megatest-version.scm from [456f37ff9a] to [efcc00779e].

1
2
3
4
5
6
7
;; Always use two or four digit decimal
;; 1.01, 1.02...1.10,1.11,1,1101 ... 1.99,2.00..

(declare (unit megatest-version))

(define megatest-version 1.6012)






|

1
2
3
4
5
6
7
;; Always use two or four digit decimal
;; 1.01, 1.02...1.10,1.11,1,1101 ... 1.99,2.00..

(declare (unit megatest-version))

(define megatest-version 1.6013)

Modified runs.scm from [eb38f67113] to [c0fe0ada27].

222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247




248
249
250
251
252
253
254
255
256

257
258
259
260
261
262
263
	 (task-key           (conc (hash-table->alist flags) " " (get-host-name) " " (current-process-id)))
	 (tdbdat             (tasks:open-db)))

    (if (tasks:need-server run-id)(tasks:start-and-wait-for-server tdbdat run-id 10))

    (set-signal-handler! signal/int
			 (lambda (signum)
			   (signal-mask! signum)
			   (print "Received signal " signum ", cleaning up before exit. Please wait...")
			   (let ((tdbdat (tasks:open-db)))
			     (rmt:tasks-set-state-given-param-key task-key "killed"))
			   (print "Killed by signal " signum ". Exiting")
			   (exit)))

    ;; register this run in monitor.db
    (rmt:tasks-add "run-tests" user target runname test-patts task-key) ;; params)
    (rmt:tasks-set-state-given-param-key task-key "running")
    (runs:set-megatest-env-vars run-id inkeys: keys inrunname: runname) ;; these may be needed by the launching process
    (if (file-exists? runconfigf)
	(setup-env-defaults runconfigf run-id *already-seen-runconfig-info* keyvals target)
	(debug:print 0 "WARNING: You do not have a run config file: " runconfigf))

    ;; Now generate all the tests lists
    (set! all-tests-registry (tests:get-all))
    (set! all-test-names     (hash-table-keys all-tests-registry))
    (set! test-names         (tests:filter-test-names all-test-names test-patts))




    (set! required-tests     (lset-intersection equal? (string-split test-patts ",") test-names))
    
    ;; look up all tests matching the comma separated list of globs in
    ;; test-patts (using % as wildcard)

    ;; (set! test-names (delete-duplicates (tests:get-valid-tests *toppath* test-patts)))
    (debug:print-info 0 "tests search path: " (tests:get-tests-search-path *configdat*))
    (debug:print-info 0 "all tests:  " (string-intersperse (sort all-test-names string<) " "))
    (debug:print-info 0 "test names: " (string-intersperse (sort test-names string<) " "))


    ;; on the first pass or call to run-tests set FAILS to NOT_STARTED if
    ;; -keepgoing is specified
    (if (eq? *passnum* 0)
	(begin
	  ;; Is this still necessary? I think not. Unreachable tests are marked as such and 
	  ;; should not cause problems here.







|


















>
>
>
>
|





|
|
|
>







222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
	 (task-key           (conc (hash-table->alist flags) " " (get-host-name) " " (current-process-id)))
	 (tdbdat             (tasks:open-db)))

    (if (tasks:need-server run-id)(tasks:start-and-wait-for-server tdbdat run-id 10))

    (set-signal-handler! signal/int
			 (lambda (signum)
			   ;; (signal-mask! signum) ;; to mask or not? seems to cause issues in exiting
			   (print "Received signal " signum ", cleaning up before exit. Please wait...")
			   (let ((tdbdat (tasks:open-db)))
			     (rmt:tasks-set-state-given-param-key task-key "killed"))
			   (print "Killed by signal " signum ". Exiting")
			   (exit)))

    ;; register this run in monitor.db
    (rmt:tasks-add "run-tests" user target runname test-patts task-key) ;; params)
    (rmt:tasks-set-state-given-param-key task-key "running")
    (runs:set-megatest-env-vars run-id inkeys: keys inrunname: runname) ;; these may be needed by the launching process
    (if (file-exists? runconfigf)
	(setup-env-defaults runconfigf run-id *already-seen-runconfig-info* keyvals target)
	(debug:print 0 "WARNING: You do not have a run config file: " runconfigf))

    ;; Now generate all the tests lists
    (set! all-tests-registry (tests:get-all))
    (set! all-test-names     (hash-table-keys all-tests-registry))
    (set! test-names         (tests:filter-test-names all-test-names test-patts))

    ;; I think seeding required-tests with all test-names makes sense but lack analysis to back that up.
    ;;
    ;; (set! required-tests     (lset-intersection equal? (string-split test-patts ",") all-test-names))
    (set! required-tests     (lset-intersection equal? test-names all-test-names))
    
    ;; look up all tests matching the comma separated list of globs in
    ;; test-patts (using % as wildcard)

    ;; (set! test-names (delete-duplicates (tests:get-valid-tests *toppath* test-patts)))
    (debug:print-info 0 "tests search path: " (string-intersperse (tests:get-tests-search-path *configdat*) " "))
    (debug:print-info 0 "all tests:         " (string-intersperse (sort all-test-names string<) " "))
    (debug:print-info 0 "test names:        " (string-intersperse (sort test-names string<) " "))
    (debug:print-info 0 "required tests:    " (string-intersperse (sort required-tests string<) " "))

    ;; on the first pass or call to run-tests set FAILS to NOT_STARTED if
    ;; -keepgoing is specified
    (if (eq? *passnum* 0)
	(begin
	  ;; Is this still necessary? I think not. Unreachable tests are marked as such and 
	  ;; should not cause problems here.
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
             ;;    		   (not (member x allinqueue)))
             ;;    		 prereqstrs))
	     (give-up    #f))

	;; We can get here when a prereq has not been run due to *it* having a prereq that failed.
	;; We need to use this to dequeue this item as CANNOTRUN
	;; 
	(if (member testmode '(toplevel))
	    (for-each (lambda (prereq)
			(if (eq? (hash-table-ref/default test-registry prereq 'justfine) 'CANNOTRUN)
			    (set! give-up #t)))
		      prereqstrs))

	(if (and give-up
		 (not (and (null? tal)(null? reg))))







|







547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
             ;;    		   (not (member x allinqueue)))
             ;;    		 prereqstrs))
	     (give-up    #f))

	;; We can get here when a prereq has not been run due to *it* having a prereq that failed.
	;; We need to use this to dequeue this item as CANNOTRUN
	;; 
	(if (member 'toplevel testmode) ;; '(toplevel)) ;; NOTE: this probably should be (member 'toplevel testmode)
	    (for-each (lambda (prereq)
			(if (eq? (hash-table-ref/default test-registry prereq 'justfine) 'CANNOTRUN)
			    (set! give-up #t)))
		      prereqstrs))

	(if (and give-up
		 (not (and (null? tal)(null? reg))))
648
649
650
651
652
653
654


655
656
657
658
659
660
661
662
663
664
665
666
667




668
669
670
671
672
673
674
	 (num-running             (list-ref run-limits-info 1))
	 (num-running-in-jobgroup (list-ref run-limits-info 2)) 
	 (max-concurrent-jobs     (list-ref run-limits-info 3))
	 (job-group-limit         (list-ref run-limits-info 4))
	 (prereqs-not-met         (rmt:get-prereqs-not-met run-id waitons item-path testmode itemmap: itemmap))
	 ;; (prereqs-not-met         (mt:lazy-get-prereqs-not-met run-id waitons item-path mode: testmode itemmap: itemmap))
	 (fails                   (runs:calc-fails prereqs-not-met))


	 (non-completed           (runs:calc-not-completed prereqs-not-met))
	 (loop-list               (list hed tal reg reruns))
	 ;; configure the load runner
	 (numcpus                 (common:get-num-cpus))
	 (maxload                 (string->number (or (configf:lookup *configdat* "jobtools" "maxload") "3")))
	 (waitdelay               (string->number (or (configf:lookup *configdat* "jobtools" "waitdelay") "60"))))
    (debug:print-info 4 "have-resources: " have-resources " prereqs-not-met: (" 
		      (string-intersperse 
		       (map (lambda (t)
			      (if (vector? t)
				  (conc (db:test-get-state t) "/" (db:test-get-status t))
				  (conc " WARNING: t is not a vector=" t )))
			    prereqs-not-met) ", ") ") fails: " fails)




    
    (if (and (not (null? prereqs-not-met))
	     (runs:lownoise (conc "waiting on tests " prereqs-not-met hed) 60))
	(debug:print-info 2 "waiting on tests; " (string-intersperse (runs:mixed-list-testname-and-testrec->list-of-strings prereqs-not-met) ", ")))

    ;; Don't know at this time if the test have been launched at some time in the past
    ;; i.e. is this a re-launch?







>
>
|











|
>
>
>
>







653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
	 (num-running             (list-ref run-limits-info 1))
	 (num-running-in-jobgroup (list-ref run-limits-info 2)) 
	 (max-concurrent-jobs     (list-ref run-limits-info 3))
	 (job-group-limit         (list-ref run-limits-info 4))
	 (prereqs-not-met         (rmt:get-prereqs-not-met run-id waitons item-path testmode itemmap: itemmap))
	 ;; (prereqs-not-met         (mt:lazy-get-prereqs-not-met run-id waitons item-path mode: testmode itemmap: itemmap))
	 (fails                   (runs:calc-fails prereqs-not-met))
	 (non-completed           (filter (lambda (x)             ;; remove hed from not completed list, duh, of course it is not completed!
					    (not (equal? x hed)))
					  (runs:calc-not-completed prereqs-not-met)))
	 (loop-list               (list hed tal reg reruns))
	 ;; configure the load runner
	 (numcpus                 (common:get-num-cpus))
	 (maxload                 (string->number (or (configf:lookup *configdat* "jobtools" "maxload") "3")))
	 (waitdelay               (string->number (or (configf:lookup *configdat* "jobtools" "waitdelay") "60"))))
    (debug:print-info 4 "have-resources: " have-resources " prereqs-not-met: (" 
		      (string-intersperse 
		       (map (lambda (t)
			      (if (vector? t)
				  (conc (db:test-get-state t) "/" (db:test-get-status t))
				  (conc " WARNING: t is not a vector=" t )))
			    prereqs-not-met)
		       ", ") ") fails: " fails
		       "\nregistered? " (hash-table-ref/default test-registry (db:test-make-full-name test-name item-path) #f))
			    

    
    (if (and (not (null? prereqs-not-met))
	     (runs:lownoise (conc "waiting on tests " prereqs-not-met hed) 60))
	(debug:print-info 2 "waiting on tests; " (string-intersperse (runs:mixed-list-testname-and-testrec->list-of-strings prereqs-not-met) ", ")))

    ;; Don't know at this time if the test have been launched at some time in the past
    ;; i.e. is this a re-launch?
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
      ;; could have done hed tal here but doing car/cdr of newtal to rotate tests
      (list (car newtal)(cdr newtal) reg reruns))
     
     ;; This is the final stage, everything is in place so launch the test
     ;;
     ((and have-resources
	   (or (null? prereqs-not-met)
	       (and (eq? testmode 'toplevel)
		    (null? non-completed))))
      ;; (hash-table-delete! *max-tries-hash* (db:test-make-full-name test-name item-path))
      ;; we are going to reset all the counters for test retries by setting a new hash table
      ;; this means they will increment only when nothing can be run
      (set! *max-tries-hash* (make-hash-table))
      ;; well, first lets see if cpu load throttling is enabled. If so wait around until the
      ;; average cpu load is under the threshold before continuing







|







755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
      ;; could have done hed tal here but doing car/cdr of newtal to rotate tests
      (list (car newtal)(cdr newtal) reg reruns))
     
     ;; This is the final stage, everything is in place so launch the test
     ;;
     ((and have-resources
	   (or (null? prereqs-not-met)
	       (and (member 'toplevel testmode) ;;  'toplevel)
		    (null? non-completed))))
      ;; (hash-table-delete! *max-tries-hash* (db:test-make-full-name test-name item-path))
      ;; we are going to reset all the counters for test retries by setting a new hash table
      ;; this means they will increment only when nothing can be run
      (set! *max-tries-hash* (make-hash-table))
      ;; well, first lets see if cpu load throttling is enabled. If so wait around until the
      ;; average cpu load is under the threshold before continuing
777
778
779
780
781
782
783
784

785
786
787
788
789
790
791
      ;; a message and drop hed from the items to be processed.
      ;; (runs:mixed-list-testname-and-testrec->list-of-strings prereqs-not-met)
      (if (and (not (null? prereqs-not-met))
	       (runs:lownoise (conc "waiting on tests " prereqs-not-met hed) 60))
	  (debug:print-info 1 "waiting on tests; " (string-intersperse 
						    (runs:mixed-list-testname-and-testrec->list-of-strings 
						     prereqs-not-met) ", ")))
      (if (null? fails)

	  (begin
	    ;; couldn't run, take a breather
	    (if  (runs:lownoise "Waiting for more work to do..." 60)
		 (debug:print-info 0 "Waiting for more work to do..."))
	    (thread-sleep! 1)
	    (list (car newtal)(cdr newtal) reg reruns))
	  ;; the waiton is FAIL so no point in trying to run hed ever again







|
>







788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
      ;; a message and drop hed from the items to be processed.
      ;; (runs:mixed-list-testname-and-testrec->list-of-strings prereqs-not-met)
      (if (and (not (null? prereqs-not-met))
	       (runs:lownoise (conc "waiting on tests " prereqs-not-met hed) 60))
	  (debug:print-info 1 "waiting on tests; " (string-intersperse 
						    (runs:mixed-list-testname-and-testrec->list-of-strings 
						     prereqs-not-met) ", ")))
      (if (or (null? fails)
	      (member 'toplevel testmode))
	  (begin
	    ;; couldn't run, take a breather
	    (if  (runs:lownoise "Waiting for more work to do..." 60)
		 (debug:print-info 0 "Waiting for more work to do..."))
	    (thread-sleep! 1)
	    (list (car newtal)(cdr newtal) reg reruns))
	  ;; the waiton is FAIL so no point in trying to run hed ever again

Modified tests/fullrun/tests/all_toplevel/testconfig from [4c397d46e3] to [c99d8b6dbc].

1
2
3
4
5
6
7
8
9
10
11
12
13
[ezsteps]
calcresults megatest -list-runs $MT_RUNNAME -target $MT_TARGET

[requirements]
waiton all_toplevel         exit_0 exit_1  ez_exit2_fail  ez_fail        ez_pass              ezlog_fail \
       ezlog_fail_then_pass ezlog_pass     ezlog_warn     lineitem_fail  lineitem_pass        logpro_required_fail \
       manual_example       neverrun       priority_1     priority_10    priority_10_waiton_1 priority_2 \
       priority_3           priority_4     priority_5     priority_6     priority_7           priority_8 \
       priority_9           runfirst       singletest     singletest2    sqlitespeed          test_mt_vars \
       ez_fail_quick        test1                test2          special        blocktestxz

# This is a "toplevel" test, it does not require waitons to be non-FAIL to run
mode toplevel




|

|


|



1
2
3
4
5
6
7
8
9
10
11
12
13
[ezsteps]
calcresults megatest -list-runs $MT_RUNNAME -target $MT_TARGET

[requirements]
waiton                      exit_0 exit_1  ez_exit2_fail  ez_fail        ez_pass              ezlog_fail \
       ezlog_fail_then_pass ezlog_pass     ezlog_warn     lineitem_fail  lineitem_pass        logpro_required_fail \
       manual_example       neverrun       priority_1     priority_10    priority_10_waiton_1 \
       priority_3           priority_4     priority_5     priority_6     priority_7           priority_8 \
       priority_9           runfirst       singletest     singletest2    sqlitespeed          test_mt_vars \
       ez_fail_quick        test1          test2

# This is a "toplevel" test, it does not require waitons to be non-FAIL to run
mode toplevel