Megatest

Check-in [c075ebd51b]
Login
Overview
Comment:Added -keepgoing, removed the calls to run-queue, fixes to job limits, -runstep, and killreq (now will do signal/kill after first trying signal/term)
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: c075ebd51bcdb34062a39b7c46642be877740a6b
User & Date: mrwellan on 2011-06-15 18:11:59
Other Links: manifest | tags
Context
2011-06-15
18:31
For max_concurrent_jobs if there are trailing spaces the string->number fails. Added quotes to make this occurance more obvious check-in: 194bedd45c user: mrwellan tags: trunk
18:11
Added -keepgoing, removed the calls to run-queue, fixes to job limits, -runstep, and killreq (now will do signal/kill after first trying signal/term) check-in: c075ebd51b user: mrwellan tags: trunk
2011-06-14
23:04
Added filters on test and items to dashboard. Not even close to having real scrolling but it'll have to do for now check-in: a76b6398d6 user: mrwellan tags: trunk
Changes

Modified dashboard.scm from [78e2859a5d] to [d72b4fee0f].

179
180
181
182
183
184
185
186

187
188
189
190
191
192
193
179
180
181
182
183
184
185

186
187
188
189
190
191
192
193







-
+







					 (iup:button "Cancel and close"
						     #:expand "YES"
						     #:action (lambda (x)
								(hash-table-delete! *examine-test-dat* testkey)
								(iup:destroy! self))))
			       )))
		  (iup:hbox ;; the test steps are tracked here
		   (let ((stepsdat (iup:label "Test steps ......................................" #:expand "YES")))
		   (let ((stepsdat (iup:label "Test steps ........................................." #:expand "YES")))
		     (hash-table-set! widgets "Test Steps" stepsdat)
		     stepsdat)
		   ))))
	  (iup:show self)
	  ))))

(define (colors-similar? color1 color2)
207
208
209
210
211
212
213
214

215
216
217
218
219
220
221
207
208
209
210
211
212
213

214
215
216
217
218
219
220
221







-
+







		       (tests    (db-get-tests-for-run *db* run-id testnamepatt itemnamepatt))
		       (key-vals (get-key-vals *db* run-id)))
		  (if (> (length tests) maxtests)
		      (set! maxtests (length tests)))
		  (set! result (cons (vector run tests key-vals) result))))
	      runs)
    (set! *header*  header)
    (set! *allruns* (reverse result))
    (set! *allruns* result)
    maxtests))

(define (update-labels uidat)
  (let* ((rown    0)
	 (lftcol (vector-ref uidat 0))
	 (maxn   (- (vector-length lftcol) 1)))
    (let loop ((i 0))
240
241
242
243
244
245
246
247

248
249
250
251
252
253
254
240
241
242
243
244
245
246

247
248
249
250
251
252
253
254







-
+







    (update-labels uidat)
    (for-each 
     (lambda (popup)
       (let* ((test-id  (car popup))
	      (widgets  (hash-table-ref *examine-test-dat* popup))
	      (stepslbl (hash-table-ref/default widgets "Test Steps" #f)))
	 (if stepslbl
	     (let* ((fmtstr  "~15a~8a~8a~17a")
	     (let* ((fmtstr  "~15a~8a~8a~20a")
		    (newtxt  (string-intersperse 
			      (append
			       (list 
				(format #f fmtstr "Stepname" "State" "Status" "Event Time")
				(format #f fmtstr "========" "=====" "======" "=========="))
			       (map (lambda (x)
				      ;; take advantage of the \n on time->string

Modified db.scm from [e509962d69] to [6176618730].

219
220
221
222
223
224
225











226
227
228
229
230
231
232
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243







+
+
+
+
+
+
+
+
+
+
+







  (let ((res 0))
    (sqlite3:for-each-row
     (lambda (count)
       (set! res count))
     db
     "SELECT count(id) FROM tests WHERE state = 'RUNNING' OR state = 'LAUNCHED' OR state = 'REMOTEHOSTSTART';")
    res))

;; done with run when:
;;   0 tests in LAUNCHED, NOT_STARTED, REMOTEHOSTSTART, RUNNING
(define (db:estimated-tests-remaining db run-id)
  (let ((res 0))
    (sqlite3:for-each-row
     (lambda (count)
       (set! res count))
     db
     "SELECT count(id) FROM tests WHERE state in ('LAUNCHED','NOT_STARTED','REMOTEHOSTSTART','RUNNING') AND run_id=?;" run-id)
    res))

;; NB// Sync this with runs:get-test-info
(define (db:get-test-info db run-id testname item-path)
  (let ((res '()))
    (sqlite3:for-each-row
     (lambda (id run-id testname state status event-time host cpuload diskfree uname rundir item-path run_duration final_logf comment)
       (set! res (vector id run-id testname state status event-time host cpuload diskfree uname rundir item-path run_duration final_logf comment)))
270
271
272
273
274
275
276
277

278
279

280
281
282
283
284
285
286
281
282
283
284
285
286
287

288
289

290
291
292
293
294
295
296
297







-
+

-
+








(define (db-get-test-steps-for-run db test-id)
  (let ((res '()))
    (sqlite3:for-each-row 
     (lambda (id test-id stepname state status event-time)
       (set! res (cons (vector id test-id stepname state status event-time) res)))
     db
     "SELECT id,test_id,stepname,state,status,event_time FROM test_steps WHERE test_id=? ORDER BY event_time DESC;"
     "SELECT id,test_id,stepname,state,status,event_time FROM test_steps WHERE test_id=? ORDER BY id ASC;" ;; event_time DESC,id ASC;
     test-id)
    res))
    (reverse res)))

;; check that *all* the prereqs are "COMPLETED"
(define (db-get-prereqs-met db run-id waiton)
  (let ((res          #f)
	(not-complete 0)
	(tests        (db-get-tests-for-run db run-id)))
    (for-each

Modified megatest.scm from [7e0b50a664] to [69c49cdbf1].

1
2
3
4
5
6
7
8
9
10
11

12
13
14
15
16
17
18
1
2
3
4
5
6
7
8
9
10

11
12
13
14
15
16
17
18










-
+







;; Copyright 2006-2011, Matthew Welland.
;; 
;;  This program is made available under the GNU GPL version 2.0 or
;;  greater. See the accompanying file COPYING for details.
;; 
;;  This program is distributed WITHOUT ANY WARRANTY; without even the
;;  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
;;  PURPOSE.

(include "common.scm")
(define megatest-version 1.11)
(define megatest-version 1.12)

(define help (conc "
Megatest, documentation at http://www.kiatoa.com/fossils/megatest
  version " megatest-version "
  license GPL, Copyright Matt Welland 2006-2011

Usage: megatest [options]
46
47
48
49
50
51
52


53
54
55
56
57
58
59
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61







+
+








Misc 
  -force                  : override some checks
  -xterm                  : start an xterm instead of launching the test
  -remove-runs            : remove the data for a run, requires fields, :runname 
                            and -testpatt
  -testpatt patt          : remove tests matching patt (requires -remove-runs)
  -keepgoing              : continue running until no jobs are \"LAUNCHED\" or
                            \"NOT_STARTED\"

Helpers
  -runstep stepname  ...  : take remaining params as comand and execute as stepname
                            log will be in stepname.log. Best to put command in quotes
  -logpro file            : with -exec apply logpro file to stepname.log, creates
                            stepname.html and sets log to same
                            If using make use stepname_logpro.log as your target
88
89
90
91
92
93
94

95
96
97
98
99
100
101
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104







+







		        "-force"
		        "-xterm"
		        "-showkeys"
		        "-test-status"
		        "-gui"
			"-runall"    ;; run all tests
			"-remove-runs"
			"-keepgoing"
		       )
		 args:arg-hash
		 0))

(if (args:get-arg "-h")
    (begin
      (print help)
252
253
254
255
256
257
258
259

260
261
262
263
264
265
266
255
256
257
258
259
260
261

262
263
264
265
266
267
268
269







-
+







		(print "ERROR: Attempted to run a test but run area config file not found")
		(exit 1))
	      ;; put test parameters into convenient variables
	      (let* ((test-names (get-all-legal-tests))) ;; "PROD" is ignored for now
		(print "INFO: Attempting to start the following tests...")
		(print "     " (string-intersperse test-names ","))
		(run-tests db test-names)))
	  (run-waiting-tests db)
	  ;; (run-waiting-tests db)
	  (sqlite3:finalize! db)
	  (set! *didsomething* #t))))

;;======================================================================
;; run one test
;;======================================================================

293
294
295
296
297
298
299
300

301
302
303
304
305
306
307
296
297
298
299
300
301
302

303
304
305
306
307
308
309
310







-
+







	      (print "ERROR: Attempted to run a test but run area config file not found")
	      (exit 1))
	    ;; put test parameters into convenient variables
	    (let* ((test-names   (string-split (args:get-arg "-runtests") ",")))
	      (run-tests db test-names)))
	;; run-waiting-tests db)
	(sqlite3:finalize! db)
	(run-waiting-tests #f)
	;; (run-waiting-tests #f)
	(set! *didsomething* #t))))
	  
(if (args:get-arg "-runtests")
    (runtests))

;;======================================================================
;; execute the test
389
390
391
392
393
394
395


396






397
398
399
400
401
402
403
392
393
394
395
396
397
398
399
400

401
402
403
404
405
406
407
408
409
410
411
412
413







+
+
-
+
+
+
+
+
+







							     (- 
							      (current-seconds) 
							      start-seconds))))))
				     (let loop ((minutes   (calc-minutes)))
				       (let ((db    (open-db)))
					 (set! kill-job? (test-get-kill-request db run-id test-name itemdat))
					 (test-update-meta-info db run-id test-name itemdat minutes)
					 (if kill-job? 
					     (begin 
					 (if kill-job? (process-signal (vector-ref exit-info 0) signal/term))
					       (process-signal (vector-ref exit-info 0) signal/term)
					       (sleep 2)
					       (handle-exceptions
						exn
						(print "ERROR: Problem killing process " (vector-ref exit-info 0))
						(process-signal (vector-ref exit-info 0) signal/kill))))
					 (sqlite3:finalize! db)
					 (thread-sleep! (+ 8 (random 4))) ;; add some jitter to the call home time to spread out the db accesses
					 (loop (calc-minutes)))))))
		   (th1          (make-thread monitorjob))
		   (th2          (make-thread runit)))
	      (thread-start! th1)
	      (thread-start! th2)
500
501
502
503
504
505
506
507

508
509
510
511
512
513
514
510
511
512
513
514
515
516

517
518
519
520
521
522
523
524







-
+







			 (redir      (case (string->symbol shell)
				       ((tcsh csh ksh)    ">&")
				       ((zsh bash sh ash) "2>&1 >")))
			 (fullcmd    (conc "(" (string-intersperse 
						(cons cmd params) " ")
					   ") " redir " " logfile)))
		    ;; mark the start of the test
		    (test-set-status! db run-id test-name "start" "n/a" itemdat (args:get-arg "-m"))
		    (teststep-set-status! db run-id test-name stepname "start" "n/a" itemdat (args:get-arg "-m"))
		    ;; close the db
		    (sqlite3:finalize! db)
		    ;; run the test step
		    (print "INFO: Running \"" fullcmd "\"")
		    (change-directory startingdir)
		    (set! exitstat (system fullcmd)) ;; cmd params))
		    (set! *globalexitstatus* exitstat)

Modified runs.scm from [a13e1910a3] to [4be3851e9e].

248
249
250
251
252
253
254

255
256
257
258
259
260
261
262
263
264
265
266
267
268





















269
270
271
272
273
274
275
248
249
250
251
252
253
254
255














256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283







+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+







	      tests)
    res))

(define (run-tests db test-names)
  (let* ((keys        (db-get-keys db))
	 (keyvallst   (keys->vallist keys #t))
	 (run-id      (register-run db keys))) ;;  test-name)))
    (let loop ((numtimes 0))
    (for-each 
     (lambda (test-name)
       (let ((num-running (db:get-count-tests-running db))
	     (max-concurrent-jobs (config-lookup *configdat* "setup" "max_concurrent_jobs")))
	 (print "max-concurrent-jobs: " max-concurrent-jobs ", num-running: " num-running)
	 (if (or (not max-concurrent-jobs)
		 (and max-concurrent-jobs
		      (string->number max-concurrent-jobs)
		      (not (> num-running (string->number max-concurrent-jobs)))))
	     (run-one-test db run-id test-name keyvallst)
	     (print "WARNING: Max running jobs exceeded, current number running: " num-running 
		    ", max_concurrent_jobs: " max-concurrent-jobs))))
     test-names)))

      (for-each 
       (lambda (test-name)
	 (let ((num-running (db:get-count-tests-running db))
	       (max-concurrent-jobs (config-lookup *configdat* "setup" "max_concurrent_jobs")))
	   (print "max-concurrent-jobs: " max-concurrent-jobs ", num-running: " num-running)
	   (if (or (not max-concurrent-jobs)
		   (and max-concurrent-jobs
			(string->number max-concurrent-jobs)
			(not (>= num-running (string->number max-concurrent-jobs)))))
	       (run-one-test db run-id test-name keyvallst)
	       (print "WARNING: Max running jobs exceeded, current number running: " num-running 
		      ", max_concurrent_jobs: " max-concurrent-jobs))))
       test-names)
      (if (args:get-arg "-keepgoing")
	  (let ((estrem (db:estimated-tests-remaining db run-id)))
	    (if (> estrem 0)
		(begin
		  (print "Keep going, estimated " estrem " tests remaining to run, will continue in 10 seconds ...")
		  (sleep 10)
		  (loop (+ numtimes 1)))))))))
	   
;; VERY INEFFICIENT! Move stuff that should be done once up to calling proc
(define (run-one-test db run-id test-name keyvallst)
  (print "Launching test " test-name)
  ;; All these vars might be referenced by the testconfig file reader
  (setenv "MT_TEST_NAME" test-name) ;; 
  (setenv "MT_RUNNAME"   (args:get-arg ":runname"))
  (set-megatest-env-vars db run-id) ;; these may be needed by the launching process
305
306
307
308
309
310
311
312

313
314
315
316
317
318
319
313
314
315
316
317
318
319

320
321
322
323
324
325
326
327







-
+







		   (test-status   #f)
		   (num-running (db:get-count-tests-running db))
		   (max-concurrent-jobs (config-lookup *configdat* "setup" "max_concurrent_jobs")))
	      (print "max-concurrent-jobs: " max-concurrent-jobs ", num-running: " num-running)
	      (if (not (or (not max-concurrent-jobs)
			   (and max-concurrent-jobs
				(string->number max-concurrent-jobs)
				(not (> num-running (string->number max-concurrent-jobs))))))
				(not (>= num-running (string->number max-concurrent-jobs))))))
		  (print "WARNING: Max running jobs exceeded, current number running: " num-running 
			 ", max_concurrent_jobs: " max-concurrent-jobs)
		  (begin
		    (let loop2 ((ts #f)
				(ct 0))
		      (if (and (not ts)
			       (< ct 10))

Modified tests/Makefile from [4c07feade7] to [0b961aa417].

1
2
3
4
5
6
7

8
9
10
11
12
13
14
1
2
3
4
5
6

7
8
9
10
11
12
13
14






-
+







# run some tests

MEGATEST=$(shell realpath ../megatest)

runall :
	cd ../;make 
	$(MEGATEST) -runall :sysname ubuntu :fsname nfs :datapath none :runname `date +%GWW%V.%u` -m "This is a comment specific to a run"
	$(MEGATEST) -keepgoing -runall :sysname ubuntu :fsname nfs :datapath none :runname `date +%GWW%V.%u` -m "This is a comment specific to a run"

test :
	cd ../;make test
	make runall

dashboard :
	cd ../;make dashboard

Modified tests/megatest.config from [b996a05181] to [7cce1c9833].

1
2
3
4
5
6
7
8

9
10
11
12
13
14
15
1
2
3
4
5
6
7

8
9
10
11
12
13
14
15







-
+







[fields]
sysname TEXT
fsname TEXT
datapath TEXT

[setup]
# exectutable /path/to/megatest
max_concurrent_jobs 405
max_concurrent_jobs 8
runsdir /tmp/runs

[jobtools]
# ## launcher launches jobs, the job is managed on the target host
## by megatest, comment out launcher to run local
# workhosts localhost hermes
launcher nbfake