Megatest

Changes On Branch v1.60_ezsteps_tcsh_fix
Login

Changes In Branch v1.60_ezsteps_tcsh_fix Excluding Merge-Ins

This is equivalent to a diff from fb5d8213b7 to 66ff347255

2015-05-21
01:28
cherrypicked a064 check-in: 50cc57039a user: matt tags: v1.60
01:03
Partial fix of false PASS on ezsteps Closed-Leaf check-in: 66ff347255 user: matt tags: v1.60_ezsteps_tcsh_fix
2015-05-20
23:38
put more output in mt_launch.log check-in: 157b6d1b21 user: matt tags: v1.60_ezsteps_tcsh_fix
2015-05-11
17:06
Fixed ezstep command execution where system default shell is tcsh check-in: 9b7efa961f user: mrwellan tags: v1.60_ezsteps_tcsh_fix
05:13
Added generic pool of cpus to batchsim check-in: fb5d8213b7 user: matt tags: v1.60
2015-05-07
18:09
Capture missing fix check-in: a9aad76712 user: mrwellan tags: v1.60

Modified db.scm from [bd9d70e94b] to [272f710720].

2258
2259
2260
2261
2262
2263
2264
2265

2266
2267
2268
2269
2270
2271
2272
2258
2259
2260
2261
2262
2263
2264

2265
2266
2267
2268
2269
2270
2271
2272







-
+







	     #f
	     (lambda (db)
	       (sqlite3:first-result
		db
		(conc "SELECT count(id) FROM tests WHERE state in ('RUNNING','LAUNCHED','REMOTEHOSTSTART') AND testname in ('"
		      (string-intersperse testnames "','")
		      "') AND NOT (uname = 'n/a' AND item_path='');")) ;; should this include the (uname = 'n/a' ...) ???
	       0)))))))
	       )))))))
             ;; DEBUG FIXME - need to merge this v.155 query correctly   
             ;; AND testname in (SELECT testname FROM test_meta WHERE jobgroup=?)
             ;; AND NOT (uname = 'n/a' AND item_path = '');"

;; done with run when:
;;   0 tests in LAUNCHED, NOT_STARTED, REMOTEHOSTSTART, RUNNING
(define (db:estimated-tests-remaining dbstruct run-id)

Modified launch.scm from [0044077627] to [4f41513d7a].

79
80
81
82
83
84
85
86

87
88

89
90
91
92
93
94
95
79
80
81
82
83
84
85

86
87

88
89
90
91
92
93
94
95







-
+

-
+







    ;; (set! script (conc "mt_ezstep " stepname " " (if prevstep prevstep "x") " " stepcmd))
    
    (debug:print 4 "script: " script)
    (rmt:teststep-set-status! run-id test-id stepname "start" "-" #f #f)
    ;; now launch the actual process
    (call-with-environment-variables 
     (list (cons "PATH" (conc (get-environment-variable "PATH") ":.")))
     (lambda ()
     (lambda () ;; (process-run "/bin/bash" "-c" "exec ls -l /tmp/foobar > /tmp/delme-more.log 2>&1")
       (let* ((cmd (conc stepcmd " > " stepname ".log 2>&1")) ;; >outfile 2>&1 
	      (pid (process-run cmd)))
	      (pid (process-run "/bin/bash" (list "-c" cmd))))
	 (rmt:test-set-top-process-pid run-id test-id pid)
	 (let processloop ((i 0))
	   (let-values (((pid-val exit-status exit-code)(process-wait pid #t)))
		       (mutex-lock! m)
		       (vector-set! exit-info 0 pid)
		       (vector-set! exit-info 1 exit-status)
		       (vector-set! exit-info 2 exit-code)
198
199
200
201
202
203
204
205


206
207


208
209

210
211
212
213
214
215
216
198
199
200
201
202
203
204

205
206
207
208
209
210
211

212
213
214
215
216
217
218
219







-
+
+


+
+

-
+







                                              runscript))))) ;; assume it is on the path
	       ;; (rollup-status 0)
	       )
	  (change-directory top-path)

	  ;; (set-signal-handler! signal/int (lambda ()
					    
	  ;; Do not run the test if it is REMOVING, RUNNING, KILLREQ or REMOTEHOSTSTART,
	  ;; WAS: Do not run the test if it is REMOVING, RUNNING, KILLREQ or REMOTEHOSTSTART,
	  ;; NOW: Do not run test test unless state is LAUNCHED
	  ;; Mark the test as REMOTEHOSTSTART *IMMEDIATELY*
	  ;;
	  ;; This is flawed. It should be a single transaction that tests for NOT_STARTED and updates to REMOTEHOSTSTART
	  ;;
	  (let ((test-info (rmt:get-testinfo-state-status run-id test-id)))
	    (if (not (member (db:test-get-state test-info) '("REMOVING" "REMOTEHOSTSTART" "RUNNING" "KILLREQ")))
	    (if (equal? (db:test-get-state test-info) "LAUNCHED") ;; '("REMOVING" "REMOTEHOSTSTART" "RUNNING" "KILLREQ")))
		(tests:test-force-state-status! run-id test-id "REMOTEHOSTSTART" "n/a")
		(begin
		  (debug:print 0 "ERROR: test state is " (db:test-get-state test-info) ", cannot proceed")
		  (exit))))
	  
	  (debug:print 2 "Exectuing " test-name " (id: " test-id ") on " (get-host-name))
	  (set! keys       (rmt:get-keys))
322
323
324
325
326
327
328
329

330
331
332
333
334
335
336
325
326
327
328
329
330
331

332
333
334
335
336
337
338
339







-
+







				 ;; (thread-sleep! 0.3)
				 (tests:test-force-state-status! run-id test-id "RUNNING" "n/a")
				 (rmt:roll-up-pass-fail-counts run-id test-name item-path "RUNNING")
				 ;; (thread-sleep! 0.3) ;; NFS slowness has caused grief here

				 ;; if there is a runscript do it first
				 (if fullrunscript
				     (let ((pid (process-run fullrunscript)))
				     (let ((pid (process-run "/bin/bash" (list "-c" (conc fullrunscript " >> " work-area "/mt_launch.log 2>&1")))))
				       (rmt:test-set-top-process-pid run-id test-id pid)
				       (let loop ((i 0))
					 (let-values
					  (((pid-val exit-status exit-code) (process-wait pid #t)))
					  (mutex-lock! m)
					  (vector-set! exit-info 0 pid)
					  (vector-set! exit-info 1 exit-status)
880
881
882
883
884
885
886



887






888
889
890
891
892
893
894
883
884
885
886
887
888
889
890
891
892

893
894
895
896
897
898
899
900
901
902
903
904
905







+
+
+
-
+
+
+
+
+
+







				      (list 'env-ovrd  (hash-table-ref/default *configdat* "env-override" '())) 
				      (list 'set-vars  (if params (hash-table-ref/default params "-setvars" #f)))
				      (list 'runname   runname)
				      (list 'mt-bindir-path mt-bindir-path))))))))

    ;; clean out step records from previous run if they exist
    ;; (rmt:delete-test-step-records run-id test-id)
    
    ;; Moving launch logs to MT_RUN_AREA_HOME/logs 
    ;;
    (change-directory work-area) ;; so that log files from the launch process don't clutter the test dir
    (let ((launchdir (configf:lookup *configdat* "setup" "launchdir"))) ;; (change-directory work-area) ;; so that log files from the launch process don't clutter the test dir
      (if (not launchdir) ;; default
	  (change-directory (conc *toppath* "/logs")) ;; can assume this exists
	  (case (string->symbol launchdir)
	    ((legacy)(change-directory work-area))
	    (else    (change-directory launchdir)))))
    (cond
     ((and launcher hosts) ;; must be using ssh hostname
      (set! fullcmd (append launcher (car hosts)(list remote-megatest test-sig "-execute" cmdparms) debug-param)))
     ;; (set! fullcmd (append launcher (car hosts)(list remote-megatest test-sig "-execute" cmdparms))))
     (launcher
      (set! fullcmd (append launcher (list remote-megatest test-sig "-execute" cmdparms) debug-param)))
     ;; (set! fullcmd (append launcher (list remote-megatest test-sig "-execute" cmdparms))))
918
919
920
921
922
923
924
925

926
927
928
929
930
931
932

933
934
935
936
937
938
939
929
930
931
932
933
934
935

936
937
938
939
940
941
942

943
944
945
946
947
948
949
950







-
+






-
+







	   (launch-results (apply (if launchwait
				      cmd-run-with-stderr->list
				      process-run)
				  (if useshell
				      (let ((cmdstr (string-intersperse fullcmd " ")))
					(if launchwait
					    cmdstr
					    (conc cmdstr " >> mt_launch.log 2>&1")))
					    (conc cmdstr " >> " work-area "/mt_launch.log 2>&1")))
				      (car fullcmd))
				  (if useshell
				      '()
				      (cdr fullcmd)))))
      (if (not launchwait) ;; give the OS a little time to allow the process to start
	  (thread-sleep! 0.01))
      (with-output-to-file "mt_launch.log"
      (with-output-to-file (conc work-area "/mt_launch.log")
	(lambda ()
	  (if (list? launch-results)
	      (apply print launch-results)
	      (print "NOTE: launched \"" fullcmd "\"\n  but did not wait for it to proceed. Add the following to megatest.config \n[setup]\nlaunchwait yes\n  if you have problems with this"))
	  #:append))
      (debug:print 2 "Launching completed, updating db")
      (debug:print 2 "Launch results: " launch-results)

Modified runs.scm from [e4e58cdff9] to [eb38f67113].

849
850
851
852
853
854
855

856

857
858
859
860
861
862
863
849
850
851
852
853
854
855
856

857
858
859
860
861
862
863
864







+
-
+







				  reruns))))
		     (else
		      (if (runs:lownoise (conc "FAILED prerequitests and we tried" hed) 60)
			  (debug:print 0 "WARNING: test " hed " has FAILED prerequitests and we've tried at least 10 times to run it. Giving up now."))
		      ;; (debug:print 0 "         prereqs: " prereqs-not-met)
		      (hash-table-set! test-registry hed 'removed)
		      (mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "TEN_STRIKES" #f)
		      ;; I'm unclear on if this roll up is needed - it may be the root cause of the "all set to FAIL" bug.
		      (mt:roll-up-pass-fail-counts run-id test-name item-path "FAIL") ;; treat as FAIL
		      (rmt:roll-up-pass-fail-counts run-id test-name item-path "FAIL") ;; treat as FAIL
		      (list (if (null? tal)(car newtal)(car tal))
			    tal
			    reg
			    reruns)))))
	      ;; can't drop this - maybe running? Just keep trying
	      (let ((runable-tests (runs:runable-tests prereqs-not-met)))
		(if (null? runable-tests)

Modified tests/fullrun/megatest.config from [e5113ba78d] to [5c36f01faf].

25
26
27
28
29
30
31
32

33
34
35
36
37
38
39



40
41
42


43
44
45
46
47
48
49
25
26
27
28
29
30
31

32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54







-
+







+
+
+



+
+







[setup]

# turn off faststart, put monitor.db in MT_RUN_AREA_HOME/db
# and set the dbdir to /var/tmp/$USER/mt_db to enable keeping
# the raw db in /var/tmp/$USER
#
faststart  no
monitordir #{getenv MT_RUN_AREA_HOME}/db
monitordir #{scheme (conc *toppath*)}/db
dbdir      /var/tmp/#{getenv USER}/mt_db

# Set launchwait to no to use the more agressive code that does not wait for the launch to complete before proceeding
# this may save a few milliseconds on launching tests
# launchwait no
waivercommentpatt ^WW\d+ [a-z].*
incomplete-timeout 1

# set the dbdir, default is linktree
dbdir #{getenv MT_RUN_AREA_HOME}/db/

# wait for runs to completely complete. yes, anything else is no
run-wait yes



# If set to "default" the old code is used. Otherwise defaults to 200 or uses
# numeric value given.
#
runqueue 20

# Default runtimelim 1d 1h 1m 10s

Modified tests/fullrun/tests/all_toplevel/testconfig from [4c397d46e3] to [471c210c60].

1
2
3
4


































5
6
7
8
9
10






11
12
13



1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38






39
40
41
42
43
44
45
46
47
48
49
50




+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
-
-
-
-
+
+
+
+
+
+



+
+
+
[ezsteps]
calcresults megatest -list-runs $MT_RUNNAME -target $MT_TARGET

[requirements]
waiton \
exit_0		     \
exit_1		     \
ez_exit2_fail	     \
ez_fail		     \
ez_fail_quick	     \
ezlog_fail	     \
ezlog_fail_then_pass \
ezlog_pass	     \
ezlog_warn	     \
ez_pass		     \
lineitem_fail	     \
lineitem_pass	     \
logpro_required_fail \
manual_example	     \
neverrun	     \
priority_1	     \
priority_10	     \
priority_10_waiton_1 \
priority_3	     \
priority_4	     \
priority_5	     \
priority_6	     \
priority_7	     \
priority_8	     \
priority_9	     \
runfirst	     \
singletest	     \
singletest2	     \
special		     \
sqlitespeed	     \
test1		     \
test2

waiton all_toplevel         exit_0 exit_1  ez_exit2_fail  ez_fail        ez_pass              ezlog_fail \
       ezlog_fail_then_pass ezlog_pass     ezlog_warn     lineitem_fail  lineitem_pass        logpro_required_fail \
       manual_example       neverrun       priority_1     priority_10    priority_10_waiton_1 priority_2 \
       priority_3           priority_4     priority_5     priority_6     priority_7           priority_8 \
       priority_9           runfirst       singletest     singletest2    sqlitespeed          test_mt_vars \
       ez_fail_quick        test1                test2          special        blocktestxz
#        exit_0 exit_1  ez_exit2_fail  ez_fail        ez_pass              ezlog_fail \
#        ezlog_fail_then_pass ezlog_pass     ezlog_warn     lineitem_fail  lineitem_pass        logpro_required_fail \
#        manual_example       neverrun       priority_1     priority_10    priority_10_waiton_1 priority_2 \
#        priority_3           priority_4     priority_5     priority_6     priority_7           priority_8 \
#        priority_9           runfirst       singletest     singletest2    sqlitespeed          test_mt_vars \
#        ez_fail_quick        test1                test2          special        blocktestxz

# This is a "toplevel" test, it does not require waitons to be non-FAIL to run
mode toplevel


# matt@xena:~/ $ $MT_MEGATEST -list-runs $MT_RUNNAME -target $MT_TARGET|grep Test:|grep 'State: COMPL'|awk '{print $2}'|cut -d\( -f1|sort -u

Modified tests/simplerun/tests/test1/step1.sh from [c71fbc7484] to [a96d5c2635].

1
2
3
4
5
1
2
3
4





-
#!/usr/bin/env bash

# Run your step here
echo Got here!

Modified tests/simplerun/tests/test1/step2.sh from [97ecbea6c6] to [b3e19b3724].

1
2
3
4
5
6
1
2
3
4

5




-

#!/usr/bin/env bash

# Run your step here
echo Got here eh!