Megatest

Check-in [fd20d22153]
Login
Overview
Comment:Added exec to nbfake. Changed test4 to do launchwait. Added forced registration of top level tests as not having them is a path to possible escape on silent failure
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | v1.55
Files: files | file ages | folders
SHA1: fd20d22153a1099ddde103c281a266cba4847dca
User & Date: matt on 2013-09-12 23:38:27
Other Links: branch diff | manifest | tags
Context
2013-09-12
23:42
Fixed typo check-in: cbc9328c04 user: matt tags: v1.55
23:38
Added exec to nbfake. Changed test4 to do launchwait. Added forced registration of top level tests as not having them is a path to possible escape on silent failure check-in: fd20d22153 user: matt tags: v1.55
2013-09-11
23:36
Added child reaper based on post to #chicken by andyjpg, it seems to work well check-in: 2886acdd2f user: matt tags: v1.55
Changes

Modified runs.scm from [250a158829] to [0825d9ad63].

689
690
691
692
693
694
695

696
697
698
699
700
701
702
703
704

705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720







721
722
723
724
725
726
727
    ;; Initialize the test-registery hash with tests that already have a record
    ;; convert state to symbol and use that as the hash value
    (for-each (lambda (trec)
		(let ((id (db:test-get-id        trec))
		      (tn (db:test-get-testname  trec))
		      (ip (db:test-get-item-path trec))
		      (st (db:test-get-state     trec)))

		  (hash-table-set! test-registry (runs:make-full-test-name tn ip) (string->symbol st))))
	      tests-info)
    (set! max-retries (if (and max-retries (string->number max-retries))(string->number max-retries) 100))

    (let loop ((hed         (car sorted-test-names))
	       (tal         (cdr sorted-test-names))
	       (reg         '()) ;; registered, put these at the head of tal 
	       (reruns      '()))
      (if (not (null? reruns))(debug:print-info 4 "reruns=" reruns))

      ;; (print "Top of loop, hed=" hed ", tal=" tal " ,reruns=" reruns)
      (let* ((test-record (hash-table-ref test-records hed))
	     (test-name   (tests:testqueue-get-testname test-record))
	     (tconfig     (tests:testqueue-get-testconfig test-record))
	     (jobgroup    (config-lookup tconfig "requirements" "jobgroup"))
	     (testmode    (let ((m (config-lookup tconfig "requirements" "mode")))
			    (if m (string->symbol m) 'normal)))
	     (waitons     (tests:testqueue-get-waitons    test-record))
	     (priority    (tests:testqueue-get-priority   test-record))
	     (itemdat     (tests:testqueue-get-itemdat    test-record)) ;; itemdat can be a string, list or #f
	     (items       (tests:testqueue-get-items      test-record))
	     (item-path   (item-list->path itemdat))
	     (tfullname   (runs:make-full-test-name test-name item-path))
	     (newtal      (append tal (list hed)))
	     (regfull     (>= (length reg) reglen)))








	;; Fast skip of tests that are already "COMPLETED" - NO! Cannot do that as the items may not have been expanded yet :(
	;;
	(if (member (hash-table-ref/default test-registry tfullname #f) 
		    '(DONOTRUN removed)) ;; *common:cant-run-states-sym*) ;; '(COMPLETED KILLED WAIVED UNKNOWN INCOMPLETE))
	    (begin
	      (debug:print-info 0 "Skipping test " tfullname " as it has been marked do not run due to being completed or not runnable")
	      (if (or (not (null? tal))(not (null? reg)))







>
|








>
















>
>
>
>
>
>
>







689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
    ;; Initialize the test-registery hash with tests that already have a record
    ;; convert state to symbol and use that as the hash value
    (for-each (lambda (trec)
		(let ((id (db:test-get-id        trec))
		      (tn (db:test-get-testname  trec))
		      (ip (db:test-get-item-path trec))
		      (st (db:test-get-state     trec)))
		  (if (not (equal? st "DELETED"))
		      (hash-table-set! test-registry (runs:make-full-test-name tn ip) (string->symbol st)))))
	      tests-info)
    (set! max-retries (if (and max-retries (string->number max-retries))(string->number max-retries) 100))

    (let loop ((hed         (car sorted-test-names))
	       (tal         (cdr sorted-test-names))
	       (reg         '()) ;; registered, put these at the head of tal 
	       (reruns      '()))
      (if (not (null? reruns))(debug:print-info 4 "reruns=" reruns))

      ;; (print "Top of loop, hed=" hed ", tal=" tal " ,reruns=" reruns)
      (let* ((test-record (hash-table-ref test-records hed))
	     (test-name   (tests:testqueue-get-testname test-record))
	     (tconfig     (tests:testqueue-get-testconfig test-record))
	     (jobgroup    (config-lookup tconfig "requirements" "jobgroup"))
	     (testmode    (let ((m (config-lookup tconfig "requirements" "mode")))
			    (if m (string->symbol m) 'normal)))
	     (waitons     (tests:testqueue-get-waitons    test-record))
	     (priority    (tests:testqueue-get-priority   test-record))
	     (itemdat     (tests:testqueue-get-itemdat    test-record)) ;; itemdat can be a string, list or #f
	     (items       (tests:testqueue-get-items      test-record))
	     (item-path   (item-list->path itemdat))
	     (tfullname   (runs:make-full-test-name test-name item-path))
	     (newtal      (append tal (list hed)))
	     (regfull     (>= (length reg) reglen)))

	;; Ensure all top level tests get registered. This way they show up as "NOT_STARTED" on the dashboard
	;; and it is clear they *should* have run but did not.
	(if (not (hash-table-ref/default test-registry (runs:make-full-test-name hed "") #f))
	    (begin
	      (cdb:tests-register-test *runremote* run-id hed "")
	      (hash-table-set! test-registry (runs:make-full-test-name hed "") 'done)))
	
	;; Fast skip of tests that are already "COMPLETED" - NO! Cannot do that as the items may not have been expanded yet :(
	;;
	(if (member (hash-table-ref/default test-registry tfullname #f) 
		    '(DONOTRUN removed)) ;; *common:cant-run-states-sym*) ;; '(COMPLETED KILLED WAIVED UNKNOWN INCOMPLETE))
	    (begin
	      (debug:print-info 0 "Skipping test " tfullname " as it has been marked do not run due to being completed or not runnable")
	      (if (or (not (null? tal))(not (null? reg)))

Modified tests/fullrun/config/mt_include_1.config from [6243d15a3d] to [32c08be131].

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
[setup]
# exectutable /path/to/megatest
max_concurrent_jobs 25

linktree #{getenv MT_RUN_AREA_HOME}/tmp/mt_links

[jobtools]
useshell yes
# ## launcher launches jobs, the job is managed on the target host
## by megatest, comment out launcher to run local
# workhosts localhost hermes
launcher nbfake
# launcher nodanggood

## use "xterm -e csi -- " as a launcher to examine the launch environment.
## exit with (exit)
## get a shell with (system "bash")
# launcher xterm -e csi --











|






1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
[setup]
# exectutable /path/to/megatest
max_concurrent_jobs 25

linktree #{getenv MT_RUN_AREA_HOME}/tmp/mt_links

[jobtools]
useshell yes
# ## launcher launches jobs, the job is managed on the target host
## by megatest, comment out launcher to run local
# workhosts localhost hermes
launcher exec nbfake
# launcher nodanggood

## use "xterm -e csi -- " as a launcher to examine the launch environment.
## exit with (exit)
## get a shell with (system "bash")
# launcher xterm -e csi --

Modified tests/fullrun/megatest.config from [ddb8ea0c45] to [438c5f6345].

15
16
17
18
19
20
21

22
23
24
25
26
27
28
29
30
31
32
parent #{shell readlink -f $MT_RUN_AREA_HOME/..}

[tests-paths]
1 #{get misc parent}/simplerun/tests

[setup]
# Set launchwait to no to use the more agressive code that does not wait for the launch to complete before proceeding

launchwait no

# Use http instead of direct filesystem access
transport http

# If set to "default" the old code is used. Otherwise defaults to 200 or uses
# numeric value given.
#
runqueue 20









>
|


|







15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
parent #{shell readlink -f $MT_RUN_AREA_HOME/..}

[tests-paths]
1 #{get misc parent}/simplerun/tests

[setup]
# Set launchwait to no to use the more agressive code that does not wait for the launch to complete before proceeding
# this may save a few milliseconds on launching tests
# launchwait no

# Use http instead of direct filesystem access
# transport http

# If set to "default" the old code is used. Otherwise defaults to 200 or uses
# numeric value given.
#
runqueue 20


Modified utils/nbfake from [455975d5ec] to [8a5014b010].

1
2
3
4
5
6
7
8
9
10
11
12
#!/bin/bash

# ssh localhost "nohup $* > nbfake.log 2> nbfake.err < /dev/null"

# Can't always trust $PWD
CURRWD=`pwd`

if [[ $TARGETHOST == ""  ]]; then
  sh -c "cd $CURRWD;export DISPLAY=$DISPLAY; export PATH=$PATH; nohup $* > NBFAKE-`date +%GWW%V.%u_%T` 2>&1 &"
else
  ssh -n -f $TARGETHOST "sh -c \"cd $CURRWD;export DISPLAY=$DISPLAY; export PATH=$PATH; nohup $* > NBFAKE-`date +%GWW%V.%u_%T` 2>&1 &\""
fi








|

|

1
2
3
4
5
6
7
8
9
10
11
12
#!/bin/bash

# ssh localhost "nohup $* > nbfake.log 2> nbfake.err < /dev/null"

# Can't always trust $PWD
CURRWD=`pwd`

if [[ $TARGETHOST == ""  ]]; then
  exec sh -c "cd $CURRWD;export DISPLAY=$DISPLAY; export PATH=$PATH; nohup $* > NBFAKE-`date +%GWW%V.%u_%T` 2>&1 &"
else
  exec ssh -n -f $TARGETHOST "sh -c \"cd $CURRWD;export DISPLAY=$DISPLAY; export PATH=$PATH; nohup $* > NBFAKE-`date +%GWW%V.%u_%T` 2>&1 &\""
fi