Megatest

Check-in [f63922410b]
Login
Overview
Comment:Merged from v1.60
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | multi-area
Files: files | file ages | folders
SHA1: f63922410bdc69f02e5876b3ab927e92a80a3dec
User & Date: mrwellan on 2015-05-20 09:53:34
Other Links: branch diff | manifest | tags
Context
2015-05-24
20:31
Merged in lastest changes from v1.60 check-in: 40ce199020 user: matt tags: multi-area
2015-05-20
09:53
Merged from v1.60 check-in: f63922410b user: mrwellan tags: multi-area
2015-05-16
21:23
Yes, I do want the db dir in mtrah check-in: 0d58c56f22 user: matt tags: v1.60_ezsteps_tcsh_fix
2015-05-06
20:51
Merged latest fixes from v1.60 to multi-area check-in: 2d67113627 user: matt tags: multi-area
Changes

Modified Makefile from [9328fc1f07] to [9ec3222d9a].

34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
# BASH_MACHTYPE=$(shell bash -c "echo \$$MACHTYPE")
# ARCHSTR=$(BASH_MACHTYPE)_$(shell lsb_release -sr)
ARCHSTR=$(shell lsb_release -sr)
# ARCHSTR=$(shell bash -c "echo \$$MACHTYPE")

all : $(PREFIX)/bin/.$(ARCHSTR) mtest dboard 

mtest: $(OFILES) megatest.o readline-fix.scm
	csc $(CSCOPTS) $(OFILES) megatest.o -o mtest

dboard : $(OFILES) $(GOFILES) dashboard.scm
	csc $(OFILES) dashboard.scm $(GOFILES) -o dboard

odboard : olddashboard.scm $(OFILES) $(GOFILES)
	csc $(OFILES) $(GOFILES) olddashboard.scm -o odboard







|







34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
# BASH_MACHTYPE=$(shell bash -c "echo \$$MACHTYPE")
# ARCHSTR=$(BASH_MACHTYPE)_$(shell lsb_release -sr)
ARCHSTR=$(shell lsb_release -sr)
# ARCHSTR=$(shell bash -c "echo \$$MACHTYPE")

all : $(PREFIX)/bin/.$(ARCHSTR) mtest dboard 

mtest: $(OFILES) readline-fix.scm megatest.o
	csc $(CSCOPTS) $(OFILES) megatest.o -o mtest

dboard : $(OFILES) $(GOFILES) dashboard.scm
	csc $(OFILES) dashboard.scm $(GOFILES) -o dboard

odboard : olddashboard.scm $(OFILES) $(GOFILES)
	csc $(OFILES) $(GOFILES) olddashboard.scm -o odboard

Modified archive.scm from [51cad9b9b7] to [221e1469ab].

133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
	      (test-name         (db:test-get-testname  test-dat))
	      (test-id           (db:test-get-id        test-dat))
	      (run-id            (db:test-get-run_id    test-dat))
	      (target            (string-intersperse (map cadr (rmt:get-key-val-pairs run-id)) "/"))
	      
	      (toplevel/children (and (db:test-get-is-toplevel test-dat)
				      (> (rmt:test-toplevel-num-items run-id test-name) 0)))
	      (test-partial-path (conc target "/" run-name "/" (runs:make-full-test-name test-name item-path)))
	      ;; note the trailing slash to get the dir inspite of it being a link
	      (test-path         (conc linktree "/" test-partial-path))
	      (test-physical-path (if (file-exists? test-path) (read-symbolic-link test-path #t) #f))
	      (partial-path-index (if test-physical-path (substring-index test-partial-path test-physical-path) #f))
	      (test-base         (if (and partial-path-index 
					  test-physical-path )
				     (substring test-physical-path







|







133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
	      (test-name         (db:test-get-testname  test-dat))
	      (test-id           (db:test-get-id        test-dat))
	      (run-id            (db:test-get-run_id    test-dat))
	      (target            (string-intersperse (map cadr (rmt:get-key-val-pairs run-id)) "/"))
	      
	      (toplevel/children (and (db:test-get-is-toplevel test-dat)
				      (> (rmt:test-toplevel-num-items run-id test-name) 0)))
	      (test-partial-path (conc target "/" run-name "/" (db:test-make-full-name test-name item-path)))
	      ;; note the trailing slash to get the dir inspite of it being a link
	      (test-path         (conc linktree "/" test-partial-path))
	      (test-physical-path (if (file-exists? test-path) (read-symbolic-link test-path #t) #f))
	      (partial-path-index (if test-physical-path (substring-index test-partial-path test-physical-path) #f))
	      (test-base         (if (and partial-path-index 
					  test-physical-path )
				     (substring test-physical-path
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
	      (test-id           (db:test-get-id        test-dat))
	      (run-id            (db:test-get-run_id    test-dat))
	      (keyvals           (rmt:get-key-val-pairs run-id))
	      (target            (string-intersperse (map cadr keyvals) "/"))
	      
	      (toplevel/children (and (db:test-get-is-toplevel test-dat)
				      (> (rmt:test-toplevel-num-items run-id test-name) 0)))
	      (test-partial-path (conc target "/" run-name "/" (runs:make-full-test-name test-name item-path)))
	      ;; note the trailing slash to get the dir inspite of it being a link
	      (test-path         (conc linktree "/" test-partial-path))
	      ;; if the old path was not deleted then prev-test-physical-path will end up pointing to a real directory
	      (prev-test-physical-path (if (file-exists? test-path) (read-symbolic-link test-path #t) #f))

	      (new-test-physical-path  (conc best-disk "/" test-partial-path))
	      (archive-block-id        (db:test-get-archived test-dat))







|







218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
	      (test-id           (db:test-get-id        test-dat))
	      (run-id            (db:test-get-run_id    test-dat))
	      (keyvals           (rmt:get-key-val-pairs run-id))
	      (target            (string-intersperse (map cadr keyvals) "/"))
	      
	      (toplevel/children (and (db:test-get-is-toplevel test-dat)
				      (> (rmt:test-toplevel-num-items run-id test-name) 0)))
	      (test-partial-path (conc target "/" run-name "/" (db:test-make-full-name test-name item-path)))
	      ;; note the trailing slash to get the dir inspite of it being a link
	      (test-path         (conc linktree "/" test-partial-path))
	      ;; if the old path was not deleted then prev-test-physical-path will end up pointing to a real directory
	      (prev-test-physical-path (if (file-exists? test-path) (read-symbolic-link test-path #t) #f))

	      (new-test-physical-path  (conc best-disk "/" test-partial-path))
	      (archive-block-id        (db:test-get-archived test-dat))

Modified batchsim/Makefile from [cb23d858e9] to [23dda389e9].


1
2
3
4
5
6
7


all : batchsim
	./batchsim

batchsim : batchsim.scm
	csc batchsim.scm

>


|




1
2
3
4
5
6
7
8
RUN=default.scm

all : batchsim
	./batchsim $(RUN)

batchsim : batchsim.scm
	csc batchsim.scm

Modified batchsim/batchsim.scm from [5b100bed93] to [d5cdd008ec].

59
60
61
62
63
64
65
































































66
67
68
69
70
71
72
   300 ;; start-y
   300 ;; delta-y how far to next queue
   15  ;; height
   400 ;; length
   ))
(define *use-log* #f)
(define *job-log-scale* 10)

































































;;======================================================================
;; Users
;;======================================================================

(define *user-colors* (make-hash-table))








>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
   300 ;; start-y
   300 ;; delta-y how far to next queue
   15  ;; height
   400 ;; length
   ))
(define *use-log* #f)
(define *job-log-scale* 10)

;;======================================================================
;; CPU
;;======================================================================

(define-record cpu name num-cores mem job x y)

;;======================================================================
;; CPU Pool
;;======================================================================

(define-record pool name x y w h gap boxw cpus delta nrows ncols cpunum)

(define (new-pool name x y nrows ncols gap boxw)
  (let* ((delta (+ gap boxw))
	 ;; (nrows (quotient h (+ gap delta)))
	 ;; (ncols (quotient w (+ gap delta)))
	 (w     (+ gap (* nrows delta)))
	 (h     (+ gap (* ncols delta)))
	 (cpus  (make-vector (* nrows ncols) #f))
	 (npool (make-pool name x y w h gap boxw cpus delta nrows ncols 0)))
    npool))

(define (pool:add-cpu pool name num-cores mem)
  (let* ((cpu (make-cpu name num-cores mem #f #f #f)))
    (vector-set! (pool-cpus pool)(pool-cpunum pool) cpu)
    (pool-cpunum-set! pool (+ 1 (pool-cpunum pool)))
    cpu))

(define (pool:draw ezx pool)
  (let ((nrows (pool-nrows pool))
	(ncols (pool-ncols pool))
	(x     (pool-x     pool))
	(y     (pool-y     pool))
	(w     (pool-w     pool))
	(h     (pool-h     pool))
	(gap   (pool-gap   pool))
	(boxw  (pool-boxw  pool))
	(delta (pool-delta pool))
	(cpus  (pool-cpus  pool)))
    (ezx-select-layer ezx 1)
    ;(ezx-wipe-layer   ezx 1)
    ;; draw time at upper right
    (ezx-str-2d ezx x y (pool-name pool) *black*)
    (ezx-rect-2d ezx x y (+ x w)(+ y h) *black* 1)
    (let loop ((row    0)
	       (col    0)
	       (cpunum 0))
      (let* ((cpu  (vector-ref cpus cpunum))
	     (xval (+ x gap (* row delta)))
	     (yval (+ y gap (* col delta))))
	(if cpu
	    (begin
	      (cpu-x-set! cpu xval)
	      (cpu-y-set! cpu yval))
	    (vector-set! cpus cpunum (make-cpu (conc cpunum) 1 1 #f xval yval)))
	;; (print "box at " xval ", " yval)
	(ezx-rect-2d ezx xval yval (+ xval boxw) (+ yval boxw) *grey* 1)
	(if (< col (- ncols 1))
	    (loop row (+ col 1)(+ cpunum 1))
	    (if (< row (- nrows 1))
		(loop (+ row 1) 0 (+ cpunum 1))))))
    (ezx-redraw ezx)))
	       

;;======================================================================
;; Users
;;======================================================================

(define *user-colors* (make-hash-table))

Modified batchsim/default.scm from [9a8a9b1e46] to [6d3b9494d2].

1
2
3
4
5
6
7
8
9
10
11








12
13
14
15
16
17
18
;; run sim for four hours
;;
(define *end-time* (* 60 50))

;; create the cpus
;;
(let loop ((count 200))
  (add-cpu (conc "cpu_" count) 1 1)
  (if (>= count 0)(loop (- count 1))))

(draw-cpus)









;; init the queues
;;
(hash-table-set! *queues* "normal" '())
(hash-table-set! *queues* "quick"  '())
(draw-queues)












>
>
>
>
>
>
>
>







1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
;; run sim for four hours
;;
(define *end-time* (* 60 50))

;; create the cpus
;;
(let loop ((count 200))
  (add-cpu (conc "cpu_" count) 1 1)
  (if (>= count 0)(loop (- count 1))))

(draw-cpus)

(define *pool1* (new-pool "generic" 100 100 100 100 2 10))
(let loop ((count 10))
  (pool:add-cpu *pool1* (conc count) 1 1)
  (if (> count 0)
      (loop (- count 1))))

(pool:draw *ezx* *pool1*)

;; init the queues
;;
(hash-table-set! *queues* "normal" '())
(hash-table-set! *queues* "quick"  '())
(draw-queues)

Added batchsim/testing.scm version [c6005591aa].















































































































































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
;; run sim for four hours
;;
(define *end-time* (* 60 50))

;; create the cpus
;;
(let loop ((count 200))
  (add-cpu (conc "cpu_" count) 1 1)
  (if (>= count 0)(loop (- count 1))))

;; (draw-cpus)

(define *pool1* (new-pool "generic" 20 20 12 80 2 4))
(let loop ((count 10))
  (pool:add-cpu *pool1* (conc count) 1 1)
  (if (> count 0)
      (loop (- count 1))))

(pool:draw *ezx* *pool1*)

;; ;; init the queues
;; ;;
;; (hash-table-set! *queues* "normal" '())
;; (hash-table-set! *queues* "quick"  '())
;; (draw-queues)
;; 
;; ;; user k adds 200 jobs at time zero
;; ;;
;; (event *start-time*
;;        (lambda ()
;; 	 (let loop ((count 300)) ;; add 500 jobs
;; 	   (add-job "normal" "k" 600 1 1)
;; 	   (if (>= count 0)(loop (- count 1))))))
;; 
;; ;; one minute in user m runs ten jobs
;; ;;
;; (event (+ 600 *start-time*)
;;        (lambda ()
;; 	 (let loop ((count 300)) ;; add 100 jobs
;; 	   (add-job "normal" "m" 600 1 1)
;; 	   (if (> count 0)(loop (- count 1))))))
;; 
;; ;; every minute user j runs ten jobs
;; ;;
;; (define *user-j-jobs* 300)
;; (event (+ 600 *start-time*)
;;        (lambda ()
;; 	 (let f ()
;; 	   (schedule 60)
;; 	   (if (> *user-j-jobs* 0)
;; 	       (begin
;; 		 (let loop ((count 5)) ;; add 100 jobs
;; 		   (add-job "quick" "j" 600 1 1)
;; 		   (if (> count 0)(loop (- count 1))))
;; 		 (set! *user-j-jobs* (- *user-j-jobs* 5))))
;; 	   (if (and (not *done*)
;; 		    (> *user-j-jobs* 0))
;; 	       (f))))) ;; Megatest user running 200 jobs
;; 
;; ;; every minute user j runs ten jobs
;; ;;
;; (define *user-j-jobs* 300)
;; (event (+ 630 *start-time*)
;;        (lambda ()
;; 	 (let f ()
;; 	   (schedule 60)
;; 	   (if (> *user-j-jobs* 0)
;; 	       (begin
;; 		 (let loop ((count 5)) ;; add 100 jobs
;; 		   (add-job "quick" "n" 600 1 1)
;; 		   (if (> count 0)(loop (- count 1))))
;; 		 (set! *user-j-jobs* (- *user-j-jobs* 5))))
;; 	   (if (and (not *done*)
;; 		    (> *user-j-jobs* 0))
;; 	       (f))))) ;; Megatest user running 200 jobs
;; 
;; ;; ;;
;; ;; (event *start-time*
;; ;;        (lambda ()
;; ;; 	 (let f ((count 200))
;; ;; 	   (schedule 10)
;; ;; 	   (add-job "normal" "t" 60 1 1)
;; ;; 	   (if (and (not *done*)
;; ;; 		    (>= count 0))
;; ;; 	       (f (- count 1))))))
;; 
;; ;; every 3 seconds check for available machines and launch a job
;; ;;
;; (event *start-time*
;;        (lambda ()
;; 	 (let f ()
;; 	   (schedule 3)
;; 	   (let ((queue-names (random-sort (hash-table-keys *queues*))))
;; 	     (let loop ((cpu   (get-cpu))
;; 			(count (+ (length queue-names) 4))
;; 			(qname (car queue-names))
;; 			(remq  (cdr queue-names)))
;; 	       (if (and cpu
;; 			(> count 0))
;; 		   (begin
;; 		     (if (peek-job qname) ;; any jobs to do in normal queue
;; 			 (let ((job (take-job qname)))
;; 			   (run-job cpu job)))
;; 		     (loop (get-cpu)
;; 			   (- count 1)
;; 			   (if (null? remq)
;; 			       (car queue-names)
;; 			       (car remq))
;; 			   (if (null? remq)
;; 			       (cdr queue-names)
;; 			       (cdr remq)))))))
;; 	   (if (not *done*)(f)))))
;; 
;; ;; screen updates
;; ;;
(event *start-time* (lambda ()
		      (let f ()
			(schedule 60) ;; update the screen every 60 seconds of sim time
			;; (draw-cpus) ;; (print "Now: " *now* " queue: " (hash-table->alist *queues*))
			(pool:draw *ezx* *pool1*)

			(wait-for-next-draw-time)
			(if (not *done*) (f)))))
;; 
;; 
;; ;; end the simulation
;; ;;
(event *end-time*
       (lambda () 
	 (set! *event-list* '())
	 (set! *done* #t)))
;; 
(start)
;; ;; (exit 0)
;; 

Modified db.scm from [01a5a1f4f0] to [8451ddfac6].

2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
	     #f
	     (lambda (db)
	       (sqlite3:first-result
		db
		(conc "SELECT count(id) FROM tests WHERE state in ('RUNNING','LAUNCHED','REMOTEHOSTSTART') AND testname in ('"
		      (string-intersperse testnames "','")
		      "') AND NOT (uname = 'n/a' AND item_path='');")) ;; should this include the (uname = 'n/a' ...) ???
	       0)))))))
             ;; DEBUG FIXME - need to merge this v.155 query correctly   
             ;; AND testname in (SELECT testname FROM test_meta WHERE jobgroup=?)
             ;; AND NOT (uname = 'n/a' AND item_path = '');"

;; done with run when:
;;   0 tests in LAUNCHED, NOT_STARTED, REMOTEHOSTSTART, RUNNING
(define (db:estimated-tests-remaining dbstruct area-dat run-id)







|







2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
	     #f
	     (lambda (db)
	       (sqlite3:first-result
		db
		(conc "SELECT count(id) FROM tests WHERE state in ('RUNNING','LAUNCHED','REMOTEHOSTSTART') AND testname in ('"
		      (string-intersperse testnames "','")
		      "') AND NOT (uname = 'n/a' AND item_path='');")) ;; should this include the (uname = 'n/a' ...) ???
	       )))))))
             ;; DEBUG FIXME - need to merge this v.155 query correctly   
             ;; AND testname in (SELECT testname FROM test_meta WHERE jobgroup=?)
             ;; AND NOT (uname = 'n/a' AND item_path = '');"

;; done with run when:
;;   0 tests in LAUNCHED, NOT_STARTED, REMOTEHOSTSTART, RUNNING
(define (db:estimated-tests-remaining dbstruct area-dat run-id)

Modified launch.scm from [411494ad38] to [7b605ded00].

79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
    ;; (set! script (conc "mt_ezstep " stepname " " (if prevstep prevstep "x") " " stepcmd))
    
    (debug:print 4 "script: " script)
    (rmt:teststep-set-status! run-id test-id stepname "start" "-" #f #f area-dat)
    ;; now launch the actual process
    (call-with-environment-variables 
     (list (cons "PATH" (conc (get-environment-variable "PATH") ":.")))
     (lambda ()
       (let* ((cmd (conc stepcmd " > " stepname ".log 2>&1")) ;; >outfile 2>&1 
	      (pid (process-run cmd)))
	 (rmt:test-set-top-process-pid run-id test-id pid area-dat)
	 (let processloop ((i 0))
	   (let-values (((pid-val exit-status exit-code)(process-wait pid #t)))
		       (mutex-lock! m)
		       (vector-set! exit-info 0 pid)
		       (vector-set! exit-info 1 exit-status)
		       (vector-set! exit-info 2 exit-code)







|
|
|







79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
    ;; (set! script (conc "mt_ezstep " stepname " " (if prevstep prevstep "x") " " stepcmd))
    
    (debug:print 4 "script: " script)
    (rmt:teststep-set-status! run-id test-id stepname "start" "-" #f #f area-dat)
    ;; now launch the actual process
    (call-with-environment-variables 
     (list (cons "PATH" (conc (get-environment-variable "PATH") ":.")))
     (lambda () ;; (process-run "/bin/bash" "-c" "exec ls -l /tmp/foobar > /tmp/delme-more.log 2>&1")
       (let* ((cmd (conc "exec " stepcmd " > " stepname ".log 2>&1")) ;; >outfile 2>&1 
	      (pid (process-run "/bin/bash" (list "-c" cmd))))
	 (rmt:test-set-top-process-pid run-id test-id pid area-dat)
	 (let processloop ((i 0))
	   (let-values (((pid-val exit-status exit-code)(process-wait pid #t)))
		       (mutex-lock! m)
		       (vector-set! exit-info 0 pid)
		       (vector-set! exit-info 1 exit-status)
		       (vector-set! exit-info 2 exit-code)
198
199
200
201
202
203
204
205

206
207

208

209
210
211
212
213
214
215
216
                                              runscript))))) ;; assume it is on the path
	       ;; (rollup-status 0)
	       )
	  (change-directory top-path)

	  ;; (set-signal-handler! signal/int (lambda ()
					    
	  ;; Do not run the test if it is REMOVING, RUNNING, KILLREQ or REMOTEHOSTSTART,

	  ;; Mark the test as REMOTEHOSTSTART *IMMEDIATELY*
	  ;;

	  (let ((test-info (rmt:get-testinfo-state-status run-id test-id area-dat)))

	    (if (not (member (db:test-get-state test-info) '("REMOVING" "REMOTEHOSTSTART" "RUNNING" "KILLREQ")))
		(tests:test-force-state-status! run-id test-id "REMOTEHOSTSTART" "n/a")
		(begin
		  (debug:print 0 "ERROR: test state is " (db:test-get-state test-info) ", cannot proceed")
		  (exit))))
	  
	  (debug:print 2 "Exectuing " test-name " (id: " test-id ") on " (get-host-name))
	  (set! keys       (rmt:get-keys area-dat))







|
>


>

>
|







198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
                                              runscript))))) ;; assume it is on the path
	       ;; (rollup-status 0)
	       )
	  (change-directory top-path)

	  ;; (set-signal-handler! signal/int (lambda ()
					    
	  ;; WAS: Do not run the test if it is REMOVING, RUNNING, KILLREQ or REMOTEHOSTSTART,
	  ;; NOW: Do not run test test unless state is LAUNCHED
	  ;; Mark the test as REMOTEHOSTSTART *IMMEDIATELY*
	  ;;
	  ;; This is flawed. It should be a single transaction that tests for NOT_STARTED and updates to REMOTEHOSTSTART
	  (let ((test-info (rmt:get-testinfo-state-status run-id test-id area-dat)))
	  ;;
	    (if (equal? (db:test-get-state test-info) "LAUNCHED") ;; '("REMOVING" "REMOTEHOSTSTART" "RUNNING" "KILLREQ")))
		(tests:test-force-state-status! run-id test-id "REMOTEHOSTSTART" "n/a")
		(begin
		  (debug:print 0 "ERROR: test state is " (db:test-get-state test-info) ", cannot proceed")
		  (exit))))
	  
	  (debug:print 2 "Exectuing " test-name " (id: " test-id ") on " (get-host-name))
	  (set! keys       (rmt:get-keys area-dat))
892
893
894
895
896
897
898



899





900
901
902
903
904
905
906
				      (list 'env-ovrd  (hash-table-ref/default configdat "env-override" '())) 
				      (list 'set-vars  (if params (hash-table-ref/default params "-setvars" #f)))
				      (list 'runname   runname)
				      (list 'mt-bindir-path mt-bindir-path))))))))

    ;; clean out step records from previous run if they exist
    ;; (rmt:delete-test-step-records run-id test-id)



    (change-directory work-area) ;; so that log files from the launch process don't clutter the test dir





    (cond
     ((and launcher hosts) ;; must be using ssh hostname
      (set! fullcmd (append launcher (car hosts)(list remote-megatest test-sig "-execute" cmdparms) debug-param)))
     ;; (set! fullcmd (append launcher (car hosts)(list remote-megatest test-sig "-execute" cmdparms))))
     (launcher
      (set! fullcmd (append launcher (list remote-megatest test-sig "-execute" cmdparms) debug-param)))
     ;; (set! fullcmd (append launcher (list remote-megatest test-sig "-execute" cmdparms))))







>
>
>
|
>
>
>
>
>







895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
				      (list 'env-ovrd  (hash-table-ref/default configdat "env-override" '())) 
				      (list 'set-vars  (if params (hash-table-ref/default params "-setvars" #f)))
				      (list 'runname   runname)
				      (list 'mt-bindir-path mt-bindir-path))))))))

    ;; clean out step records from previous run if they exist
    ;; (rmt:delete-test-step-records run-id test-id)
    
    ;; Moving launch logs to MT_RUN_AREA_HOME/logs 
    ;;
    (let ((launchdir (configf:lookup *configdat* "setup" "launchdir"))) ;; (change-directory work-area) ;; so that log files from the launch process don't clutter the test dir
      (if (not launchdir) ;; default
	  (change-directory (conc *toppath* "/logs")) ;; can assume this exists
	  (case (string->symbol launchdir)
	    ((legacy)(change-directory work-area))
	    (else    (change-directory launchdir)))))
    (cond
     ((and launcher hosts) ;; must be using ssh hostname
      (set! fullcmd (append launcher (car hosts)(list remote-megatest test-sig "-execute" cmdparms) debug-param)))
     ;; (set! fullcmd (append launcher (car hosts)(list remote-megatest test-sig "-execute" cmdparms))))
     (launcher
      (set! fullcmd (append launcher (list remote-megatest test-sig "-execute" cmdparms) debug-param)))
     ;; (set! fullcmd (append launcher (list remote-megatest test-sig "-execute" cmdparms))))
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
	   (launch-results (apply (if launchwait
				      cmd-run-with-stderr->list
				      process-run)
				  (if useshell
				      (let ((cmdstr (string-intersperse fullcmd " ")))
					(if launchwait
					    cmdstr
					    (conc cmdstr " >> mt_launch.log 2>&1")))
				      (car fullcmd))
				  (if useshell
				      '()
				      (cdr fullcmd)))))
      (if (not launchwait) ;; give the OS a little time to allow the process to start
	  (thread-sleep! 0.01))
      (with-output-to-file "mt_launch.log"
	(lambda ()
	  (if (list? launch-results)
	      (apply print launch-results)
	      (print "NOTE: launched \"" fullcmd "\"\n  but did not wait for it to proceed. Add the following to megatest.config \n[setup]\nlaunchwait yes\n  if you have problems with this"))
	  #:append))
      (debug:print 2 "Launching completed, updating db")
      (debug:print 2 "Launch results: " launch-results)







|






|







941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
	   (launch-results (apply (if launchwait
				      cmd-run-with-stderr->list
				      process-run)
				  (if useshell
				      (let ((cmdstr (string-intersperse fullcmd " ")))
					(if launchwait
					    cmdstr
					    (conc cmdstr " >> " work-area "/mt_launch.log 2>&1")))
				      (car fullcmd))
				  (if useshell
				      '()
				      (cdr fullcmd)))))
      (if (not launchwait) ;; give the OS a little time to allow the process to start
	  (thread-sleep! 0.01))
      (with-output-to-file (conc work-area "/mt_launch.log")
	(lambda ()
	  (if (list? launch-results)
	      (apply print launch-results)
	      (print "NOTE: launched \"" fullcmd "\"\n  but did not wait for it to proceed. Add the following to megatest.config \n[setup]\nlaunchwait yes\n  if you have problems with this"))
	  #:append))
      (debug:print 2 "Launching completed, updating db")
      (debug:print 2 "Launch results: " launch-results)

Modified runs.scm from [df890d5bab] to [eba5a1b9e0].

1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
		  ((and skip-check
			(configf:lookup test-conf "skip" "rundelay"))
		   ;; run-ids = #f means *all* runs
		   (let* ((numseconds      (common:hms-string->seconds (configf:lookup test-conf "skip" "rundelay")))
			  (running-tests   (rmt:get-tests-for-runs-mindata #f full-test-name '("RUNNING" "REMOTEHOSTSTART" "LAUNCHED") '() #f))
			  (completed-tests (rmt:get-tests-for-runs-mindata #f full-test-name '("COMPLETED") '("PASS" "FAIL" "ABORT") #f))
			  (last-run-times  (map db:mintest-get-event_time completed-tests))
			  (time-since-last (- (current-seconds) (apply max last-run-times))))
		     (if (or (not (null? running-tests)) ;; have to skip if test is running
			     (> numseconds time-since-last))
			 (set! skip-test (conc "Skipping due to previous test run less than " (configf:lookup test-conf "skip" "rundelay") " ago"))))))
		 
		 (if skip-test
		     (begin
		       (mt:test-set-state-status-by-id run-id test-id "COMPLETED" "SKIP" skip-test)







|







1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
		  ((and skip-check
			(configf:lookup test-conf "skip" "rundelay"))
		   ;; run-ids = #f means *all* runs
		   (let* ((numseconds      (common:hms-string->seconds (configf:lookup test-conf "skip" "rundelay")))
			  (running-tests   (rmt:get-tests-for-runs-mindata #f full-test-name '("RUNNING" "REMOTEHOSTSTART" "LAUNCHED") '() #f))
			  (completed-tests (rmt:get-tests-for-runs-mindata #f full-test-name '("COMPLETED") '("PASS" "FAIL" "ABORT") #f))
			  (last-run-times  (map db:mintest-get-event_time completed-tests))
			  (time-since-last (- (current-seconds) (if (null? last-run-times) 0 (apply max last-run-times)))))
		     (if (or (not (null? running-tests)) ;; have to skip if test is running
			     (> numseconds time-since-last))
			 (set! skip-test (conc "Skipping due to previous test run less than " (configf:lookup test-conf "skip" "rundelay") " ago"))))))
		 
		 (if skip-test
		     (begin
		       (mt:test-set-state-status-by-id run-id test-id "COMPLETED" "SKIP" skip-test)
1590
1591
1592
1593
1594
1595
1596
1597

1598
1599

1600
1601
1602
1603
1604
1605
1606
1607
1608
				(debug:print-info 2 "still waiting, " (length tests) " tests still running")
				(thread-sleep! 10)
				(let ((new-tests (proc-get-tests run-id)))
				  (if (null? new-tests)
				      (debug:print-info 1 "Run completed according to zero tests matching provided criteria.")
				      (loop (car new-tests)(cdr new-tests)))))
			       ((archive)
				(if (not toplevel-with-children)

				    (case (string->symbol (args:get-arg "-archive"))
				      ((save save-remove keep-html)

				       (debug:print-info 0 "Estimating disk space usage for " test-fulln)
				       (debug:print-info 0 "   " (common:get-disk-space-used (conc run-dir "/"))))))
				(if (not (null? tal))
				    (loop (car tal)(cdr tal))))
			       )))
		       )
		     (if worker-thread (thread-join! worker-thread))))))
	   ;; remove the run if zero tests remain
	   (if (eq? action 'remove-runs)







|
>
|
|
>
|
<







1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602

1603
1604
1605
1606
1607
1608
1609
				(debug:print-info 2 "still waiting, " (length tests) " tests still running")
				(thread-sleep! 10)
				(let ((new-tests (proc-get-tests run-id)))
				  (if (null? new-tests)
				      (debug:print-info 1 "Run completed according to zero tests matching provided criteria.")
				      (loop (car new-tests)(cdr new-tests)))))
			       ((archive)
				(if (and run-dir (not toplevel-with-children))
				    (let ((ddir (conc run-dir "/")))
				      (case (string->symbol (args:get-arg "-archive"))
					((save save-remove keep-html)
					 (if (file-exists? ddir)
					     (debug:print-info 0 "Estimating disk space usage for " test-fulln ": " (common:get-disk-space-used ddir)))))))

				(if (not (null? tal))
				    (loop (car tal)(cdr tal))))
			       )))
		       )
		     (if worker-thread (thread-join! worker-thread))))))
	   ;; remove the run if zero tests remain
	   (if (eq? action 'remove-runs)

Modified tests/fullrun/megatest.config from [e5113ba78d] to [28073f7970].

33
34
35
36
37
38
39



40
41
42


43
44
45
46
47
48
49
dbdir      /var/tmp/#{getenv USER}/mt_db

# Set launchwait to no to use the more agressive code that does not wait for the launch to complete before proceeding
# this may save a few milliseconds on launching tests
# launchwait no
waivercommentpatt ^WW\d+ [a-z].*
incomplete-timeout 1




# wait for runs to completely complete. yes, anything else is no
run-wait yes



# If set to "default" the old code is used. Otherwise defaults to 200 or uses
# numeric value given.
#
runqueue 20

# Default runtimelim 1d 1h 1m 10s







>
>
>



>
>







33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
dbdir      /var/tmp/#{getenv USER}/mt_db

# Set launchwait to no to use the more agressive code that does not wait for the launch to complete before proceeding
# this may save a few milliseconds on launching tests
# launchwait no
waivercommentpatt ^WW\d+ [a-z].*
incomplete-timeout 1

# set the dbdir, default is linktree
dbdir #{getenv MT_RUN_AREA_HOME}/db/

# wait for runs to completely complete. yes, anything else is no
run-wait yes



# If set to "default" the old code is used. Otherwise defaults to 200 or uses
# numeric value given.
#
runqueue 20

# Default runtimelim 1d 1h 1m 10s

Modified tests/simplerun/tests/test1/step2.sh from [97ecbea6c6] to [b3e19b3724].

1
2
3
4
5
6
#!/usr/bin/env bash

# Run your step here
echo Got here eh!







<
1
2
3
4
5

#!/usr/bin/env bash

# Run your step here
echo Got here eh!