Megatest

Check-in [c075ebd51b]
Login
Overview
Comment:Added -keepgoing, removed the calls to run-queue, fixes to job limits, -runstep, and killreq (now will do signal/kill after first trying signal/term)
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: c075ebd51bcdb34062a39b7c46642be877740a6b
User & Date: mrwellan on 2011-06-15 18:11:59
Other Links: manifest | tags
Context
2011-06-15
18:31
For max_concurrent_jobs if there are trailing spaces the string->number fails. Added quotes to make this occurance more obvious check-in: 194bedd45c user: mrwellan tags: trunk
18:11
Added -keepgoing, removed the calls to run-queue, fixes to job limits, -runstep, and killreq (now will do signal/kill after first trying signal/term) check-in: c075ebd51b user: mrwellan tags: trunk
2011-06-14
23:04
Added filters on test and items to dashboard. Not even close to having real scrolling but it'll have to do for now check-in: a76b6398d6 user: mrwellan tags: trunk
Changes

Modified dashboard.scm from [78e2859a5d] to [d72b4fee0f].

179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
					 (iup:button "Cancel and close"
						     #:expand "YES"
						     #:action (lambda (x)
								(hash-table-delete! *examine-test-dat* testkey)
								(iup:destroy! self))))
			       )))
		  (iup:hbox ;; the test steps are tracked here
		   (let ((stepsdat (iup:label "Test steps ......................................" #:expand "YES")))
		     (hash-table-set! widgets "Test Steps" stepsdat)
		     stepsdat)
		   ))))
	  (iup:show self)
	  ))))

(define (colors-similar? color1 color2)







|







179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
					 (iup:button "Cancel and close"
						     #:expand "YES"
						     #:action (lambda (x)
								(hash-table-delete! *examine-test-dat* testkey)
								(iup:destroy! self))))
			       )))
		  (iup:hbox ;; the test steps are tracked here
		   (let ((stepsdat (iup:label "Test steps ........................................." #:expand "YES")))
		     (hash-table-set! widgets "Test Steps" stepsdat)
		     stepsdat)
		   ))))
	  (iup:show self)
	  ))))

(define (colors-similar? color1 color2)
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
		       (tests    (db-get-tests-for-run *db* run-id testnamepatt itemnamepatt))
		       (key-vals (get-key-vals *db* run-id)))
		  (if (> (length tests) maxtests)
		      (set! maxtests (length tests)))
		  (set! result (cons (vector run tests key-vals) result))))
	      runs)
    (set! *header*  header)
    (set! *allruns* (reverse result))
    maxtests))

(define (update-labels uidat)
  (let* ((rown    0)
	 (lftcol (vector-ref uidat 0))
	 (maxn   (- (vector-length lftcol) 1)))
    (let loop ((i 0))







|







207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
		       (tests    (db-get-tests-for-run *db* run-id testnamepatt itemnamepatt))
		       (key-vals (get-key-vals *db* run-id)))
		  (if (> (length tests) maxtests)
		      (set! maxtests (length tests)))
		  (set! result (cons (vector run tests key-vals) result))))
	      runs)
    (set! *header*  header)
    (set! *allruns* result)
    maxtests))

(define (update-labels uidat)
  (let* ((rown    0)
	 (lftcol (vector-ref uidat 0))
	 (maxn   (- (vector-length lftcol) 1)))
    (let loop ((i 0))
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
    (update-labels uidat)
    (for-each 
     (lambda (popup)
       (let* ((test-id  (car popup))
	      (widgets  (hash-table-ref *examine-test-dat* popup))
	      (stepslbl (hash-table-ref/default widgets "Test Steps" #f)))
	 (if stepslbl
	     (let* ((fmtstr  "~15a~8a~8a~17a")
		    (newtxt  (string-intersperse 
			      (append
			       (list 
				(format #f fmtstr "Stepname" "State" "Status" "Event Time")
				(format #f fmtstr "========" "=====" "======" "=========="))
			       (map (lambda (x)
				      ;; take advantage of the \n on time->string







|







240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
    (update-labels uidat)
    (for-each 
     (lambda (popup)
       (let* ((test-id  (car popup))
	      (widgets  (hash-table-ref *examine-test-dat* popup))
	      (stepslbl (hash-table-ref/default widgets "Test Steps" #f)))
	 (if stepslbl
	     (let* ((fmtstr  "~15a~8a~8a~20a")
		    (newtxt  (string-intersperse 
			      (append
			       (list 
				(format #f fmtstr "Stepname" "State" "Status" "Event Time")
				(format #f fmtstr "========" "=====" "======" "=========="))
			       (map (lambda (x)
				      ;; take advantage of the \n on time->string

Modified db.scm from [e509962d69] to [6176618730].

219
220
221
222
223
224
225











226
227
228
229
230
231
232
  (let ((res 0))
    (sqlite3:for-each-row
     (lambda (count)
       (set! res count))
     db
     "SELECT count(id) FROM tests WHERE state = 'RUNNING' OR state = 'LAUNCHED' OR state = 'REMOTEHOSTSTART';")
    res))












;; NB// Sync this with runs:get-test-info
(define (db:get-test-info db run-id testname item-path)
  (let ((res '()))
    (sqlite3:for-each-row
     (lambda (id run-id testname state status event-time host cpuload diskfree uname rundir item-path run_duration final_logf comment)
       (set! res (vector id run-id testname state status event-time host cpuload diskfree uname rundir item-path run_duration final_logf comment)))







>
>
>
>
>
>
>
>
>
>
>







219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
  (let ((res 0))
    (sqlite3:for-each-row
     (lambda (count)
       (set! res count))
     db
     "SELECT count(id) FROM tests WHERE state = 'RUNNING' OR state = 'LAUNCHED' OR state = 'REMOTEHOSTSTART';")
    res))

;; done with run when:
;;   0 tests in LAUNCHED, NOT_STARTED, REMOTEHOSTSTART, RUNNING
(define (db:estimated-tests-remaining db run-id)
  (let ((res 0))
    (sqlite3:for-each-row
     (lambda (count)
       (set! res count))
     db
     "SELECT count(id) FROM tests WHERE state in ('LAUNCHED','NOT_STARTED','REMOTEHOSTSTART','RUNNING') AND run_id=?;" run-id)
    res))

;; NB// Sync this with runs:get-test-info
(define (db:get-test-info db run-id testname item-path)
  (let ((res '()))
    (sqlite3:for-each-row
     (lambda (id run-id testname state status event-time host cpuload diskfree uname rundir item-path run_duration final_logf comment)
       (set! res (vector id run-id testname state status event-time host cpuload diskfree uname rundir item-path run_duration final_logf comment)))
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286

(define (db-get-test-steps-for-run db test-id)
  (let ((res '()))
    (sqlite3:for-each-row 
     (lambda (id test-id stepname state status event-time)
       (set! res (cons (vector id test-id stepname state status event-time) res)))
     db
     "SELECT id,test_id,stepname,state,status,event_time FROM test_steps WHERE test_id=? ORDER BY event_time DESC;"
     test-id)
    res))

;; check that *all* the prereqs are "COMPLETED"
(define (db-get-prereqs-met db run-id waiton)
  (let ((res          #f)
	(not-complete 0)
	(tests        (db-get-tests-for-run db run-id)))
    (for-each







|

|







281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297

(define (db-get-test-steps-for-run db test-id)
  (let ((res '()))
    (sqlite3:for-each-row 
     (lambda (id test-id stepname state status event-time)
       (set! res (cons (vector id test-id stepname state status event-time) res)))
     db
     "SELECT id,test_id,stepname,state,status,event_time FROM test_steps WHERE test_id=? ORDER BY id ASC;" ;; event_time DESC,id ASC;
     test-id)
    (reverse res)))

;; check that *all* the prereqs are "COMPLETED"
(define (db-get-prereqs-met db run-id waiton)
  (let ((res          #f)
	(not-complete 0)
	(tests        (db-get-tests-for-run db run-id)))
    (for-each

Modified megatest.scm from [7e0b50a664] to [69c49cdbf1].

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
;; Copyright 2006-2011, Matthew Welland.
;; 
;;  This program is made available under the GNU GPL version 2.0 or
;;  greater. See the accompanying file COPYING for details.
;; 
;;  This program is distributed WITHOUT ANY WARRANTY; without even the
;;  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
;;  PURPOSE.

(include "common.scm")
(define megatest-version 1.11)

(define help (conc "
Megatest, documentation at http://www.kiatoa.com/fossils/megatest
  version " megatest-version "
  license GPL, Copyright Matt Welland 2006-2011

Usage: megatest [options]










|







1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
;; Copyright 2006-2011, Matthew Welland.
;; 
;;  This program is made available under the GNU GPL version 2.0 or
;;  greater. See the accompanying file COPYING for details.
;; 
;;  This program is distributed WITHOUT ANY WARRANTY; without even the
;;  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
;;  PURPOSE.

(include "common.scm")
(define megatest-version 1.12)

(define help (conc "
Megatest, documentation at http://www.kiatoa.com/fossils/megatest
  version " megatest-version "
  license GPL, Copyright Matt Welland 2006-2011

Usage: megatest [options]
46
47
48
49
50
51
52


53
54
55
56
57
58
59

Misc 
  -force                  : override some checks
  -xterm                  : start an xterm instead of launching the test
  -remove-runs            : remove the data for a run, requires fields, :runname 
                            and -testpatt
  -testpatt patt          : remove tests matching patt (requires -remove-runs)



Helpers
  -runstep stepname  ...  : take remaining params as comand and execute as stepname
                            log will be in stepname.log. Best to put command in quotes
  -logpro file            : with -exec apply logpro file to stepname.log, creates
                            stepname.html and sets log to same
                            If using make use stepname_logpro.log as your target







>
>







46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61

Misc 
  -force                  : override some checks
  -xterm                  : start an xterm instead of launching the test
  -remove-runs            : remove the data for a run, requires fields, :runname 
                            and -testpatt
  -testpatt patt          : remove tests matching patt (requires -remove-runs)
  -keepgoing              : continue running until no jobs are \"LAUNCHED\" or
                            \"NOT_STARTED\"

Helpers
  -runstep stepname  ...  : take remaining params as comand and execute as stepname
                            log will be in stepname.log. Best to put command in quotes
  -logpro file            : with -exec apply logpro file to stepname.log, creates
                            stepname.html and sets log to same
                            If using make use stepname_logpro.log as your target
88
89
90
91
92
93
94

95
96
97
98
99
100
101
		        "-force"
		        "-xterm"
		        "-showkeys"
		        "-test-status"
		        "-gui"
			"-runall"    ;; run all tests
			"-remove-runs"

		       )
		 args:arg-hash
		 0))

(if (args:get-arg "-h")
    (begin
      (print help)







>







90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
		        "-force"
		        "-xterm"
		        "-showkeys"
		        "-test-status"
		        "-gui"
			"-runall"    ;; run all tests
			"-remove-runs"
			"-keepgoing"
		       )
		 args:arg-hash
		 0))

(if (args:get-arg "-h")
    (begin
      (print help)
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
		(print "ERROR: Attempted to run a test but run area config file not found")
		(exit 1))
	      ;; put test parameters into convenient variables
	      (let* ((test-names (get-all-legal-tests))) ;; "PROD" is ignored for now
		(print "INFO: Attempting to start the following tests...")
		(print "     " (string-intersperse test-names ","))
		(run-tests db test-names)))
	  (run-waiting-tests db)
	  (sqlite3:finalize! db)
	  (set! *didsomething* #t))))

;;======================================================================
;; run one test
;;======================================================================








|







255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
		(print "ERROR: Attempted to run a test but run area config file not found")
		(exit 1))
	      ;; put test parameters into convenient variables
	      (let* ((test-names (get-all-legal-tests))) ;; "PROD" is ignored for now
		(print "INFO: Attempting to start the following tests...")
		(print "     " (string-intersperse test-names ","))
		(run-tests db test-names)))
	  ;; (run-waiting-tests db)
	  (sqlite3:finalize! db)
	  (set! *didsomething* #t))))

;;======================================================================
;; run one test
;;======================================================================

293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
	      (print "ERROR: Attempted to run a test but run area config file not found")
	      (exit 1))
	    ;; put test parameters into convenient variables
	    (let* ((test-names   (string-split (args:get-arg "-runtests") ",")))
	      (run-tests db test-names)))
	;; run-waiting-tests db)
	(sqlite3:finalize! db)
	(run-waiting-tests #f)
	(set! *didsomething* #t))))
	  
(if (args:get-arg "-runtests")
    (runtests))

;;======================================================================
;; execute the test







|







296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
	      (print "ERROR: Attempted to run a test but run area config file not found")
	      (exit 1))
	    ;; put test parameters into convenient variables
	    (let* ((test-names   (string-split (args:get-arg "-runtests") ",")))
	      (run-tests db test-names)))
	;; run-waiting-tests db)
	(sqlite3:finalize! db)
	;; (run-waiting-tests #f)
	(set! *didsomething* #t))))
	  
(if (args:get-arg "-runtests")
    (runtests))

;;======================================================================
;; execute the test
389
390
391
392
393
394
395


396





397
398
399
400
401
402
403
							     (- 
							      (current-seconds) 
							      start-seconds))))))
				     (let loop ((minutes   (calc-minutes)))
				       (let ((db    (open-db)))
					 (set! kill-job? (test-get-kill-request db run-id test-name itemdat))
					 (test-update-meta-info db run-id test-name itemdat minutes)


					 (if kill-job? (process-signal (vector-ref exit-info 0) signal/term))





					 (sqlite3:finalize! db)
					 (thread-sleep! (+ 8 (random 4))) ;; add some jitter to the call home time to spread out the db accesses
					 (loop (calc-minutes)))))))
		   (th1          (make-thread monitorjob))
		   (th2          (make-thread runit)))
	      (thread-start! th1)
	      (thread-start! th2)







>
>
|
>
>
>
>
>







392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
							     (- 
							      (current-seconds) 
							      start-seconds))))))
				     (let loop ((minutes   (calc-minutes)))
				       (let ((db    (open-db)))
					 (set! kill-job? (test-get-kill-request db run-id test-name itemdat))
					 (test-update-meta-info db run-id test-name itemdat minutes)
					 (if kill-job? 
					     (begin 
					       (process-signal (vector-ref exit-info 0) signal/term)
					       (sleep 2)
					       (handle-exceptions
						exn
						(print "ERROR: Problem killing process " (vector-ref exit-info 0))
						(process-signal (vector-ref exit-info 0) signal/kill))))
					 (sqlite3:finalize! db)
					 (thread-sleep! (+ 8 (random 4))) ;; add some jitter to the call home time to spread out the db accesses
					 (loop (calc-minutes)))))))
		   (th1          (make-thread monitorjob))
		   (th2          (make-thread runit)))
	      (thread-start! th1)
	      (thread-start! th2)
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
			 (redir      (case (string->symbol shell)
				       ((tcsh csh ksh)    ">&")
				       ((zsh bash sh ash) "2>&1 >")))
			 (fullcmd    (conc "(" (string-intersperse 
						(cons cmd params) " ")
					   ") " redir " " logfile)))
		    ;; mark the start of the test
		    (test-set-status! db run-id test-name "start" "n/a" itemdat (args:get-arg "-m"))
		    ;; close the db
		    (sqlite3:finalize! db)
		    ;; run the test step
		    (print "INFO: Running \"" fullcmd "\"")
		    (change-directory startingdir)
		    (set! exitstat (system fullcmd)) ;; cmd params))
		    (set! *globalexitstatus* exitstat)







|







510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
			 (redir      (case (string->symbol shell)
				       ((tcsh csh ksh)    ">&")
				       ((zsh bash sh ash) "2>&1 >")))
			 (fullcmd    (conc "(" (string-intersperse 
						(cons cmd params) " ")
					   ") " redir " " logfile)))
		    ;; mark the start of the test
		    (teststep-set-status! db run-id test-name stepname "start" "n/a" itemdat (args:get-arg "-m"))
		    ;; close the db
		    (sqlite3:finalize! db)
		    ;; run the test step
		    (print "INFO: Running \"" fullcmd "\"")
		    (change-directory startingdir)
		    (set! exitstat (system fullcmd)) ;; cmd params))
		    (set! *globalexitstatus* exitstat)

Modified runs.scm from [a13e1910a3] to [4be3851e9e].

248
249
250
251
252
253
254

255
256
257
258
259
260
261
262
263
264
265
266
267







268
269
270
271
272
273
274
275
	      tests)
    res))

(define (run-tests db test-names)
  (let* ((keys        (db-get-keys db))
	 (keyvallst   (keys->vallist keys #t))
	 (run-id      (register-run db keys))) ;;  test-name)))

    (for-each 
     (lambda (test-name)
       (let ((num-running (db:get-count-tests-running db))
	     (max-concurrent-jobs (config-lookup *configdat* "setup" "max_concurrent_jobs")))
	 (print "max-concurrent-jobs: " max-concurrent-jobs ", num-running: " num-running)
	 (if (or (not max-concurrent-jobs)
		 (and max-concurrent-jobs
		      (string->number max-concurrent-jobs)
		      (not (> num-running (string->number max-concurrent-jobs)))))
	     (run-one-test db run-id test-name keyvallst)
	     (print "WARNING: Max running jobs exceeded, current number running: " num-running 
		    ", max_concurrent_jobs: " max-concurrent-jobs))))
     test-names)))








;; VERY INEFFICIENT! Move stuff that should be done once up to calling proc
(define (run-one-test db run-id test-name keyvallst)
  (print "Launching test " test-name)
  ;; All these vars might be referenced by the testconfig file reader
  (setenv "MT_TEST_NAME" test-name) ;; 
  (setenv "MT_RUNNAME"   (args:get-arg ":runname"))
  (set-megatest-env-vars db run-id) ;; these may be needed by the launching process







>
|
|
|
|
|
|
|
|
|
|
|
|
|
>
>
>
>
>
>
>
|







248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
	      tests)
    res))

(define (run-tests db test-names)
  (let* ((keys        (db-get-keys db))
	 (keyvallst   (keys->vallist keys #t))
	 (run-id      (register-run db keys))) ;;  test-name)))
    (let loop ((numtimes 0))
      (for-each 
       (lambda (test-name)
	 (let ((num-running (db:get-count-tests-running db))
	       (max-concurrent-jobs (config-lookup *configdat* "setup" "max_concurrent_jobs")))
	   (print "max-concurrent-jobs: " max-concurrent-jobs ", num-running: " num-running)
	   (if (or (not max-concurrent-jobs)
		   (and max-concurrent-jobs
			(string->number max-concurrent-jobs)
			(not (>= num-running (string->number max-concurrent-jobs)))))
	       (run-one-test db run-id test-name keyvallst)
	       (print "WARNING: Max running jobs exceeded, current number running: " num-running 
		      ", max_concurrent_jobs: " max-concurrent-jobs))))
       test-names)
      (if (args:get-arg "-keepgoing")
	  (let ((estrem (db:estimated-tests-remaining db run-id)))
	    (if (> estrem 0)
		(begin
		  (print "Keep going, estimated " estrem " tests remaining to run, will continue in 10 seconds ...")
		  (sleep 10)
		  (loop (+ numtimes 1)))))))))
	   
;; VERY INEFFICIENT! Move stuff that should be done once up to calling proc
(define (run-one-test db run-id test-name keyvallst)
  (print "Launching test " test-name)
  ;; All these vars might be referenced by the testconfig file reader
  (setenv "MT_TEST_NAME" test-name) ;; 
  (setenv "MT_RUNNAME"   (args:get-arg ":runname"))
  (set-megatest-env-vars db run-id) ;; these may be needed by the launching process
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
		   (test-status   #f)
		   (num-running (db:get-count-tests-running db))
		   (max-concurrent-jobs (config-lookup *configdat* "setup" "max_concurrent_jobs")))
	      (print "max-concurrent-jobs: " max-concurrent-jobs ", num-running: " num-running)
	      (if (not (or (not max-concurrent-jobs)
			   (and max-concurrent-jobs
				(string->number max-concurrent-jobs)
				(not (> num-running (string->number max-concurrent-jobs))))))
		  (print "WARNING: Max running jobs exceeded, current number running: " num-running 
			 ", max_concurrent_jobs: " max-concurrent-jobs)
		  (begin
		    (let loop2 ((ts #f)
				(ct 0))
		      (if (and (not ts)
			       (< ct 10))







|







313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
		   (test-status   #f)
		   (num-running (db:get-count-tests-running db))
		   (max-concurrent-jobs (config-lookup *configdat* "setup" "max_concurrent_jobs")))
	      (print "max-concurrent-jobs: " max-concurrent-jobs ", num-running: " num-running)
	      (if (not (or (not max-concurrent-jobs)
			   (and max-concurrent-jobs
				(string->number max-concurrent-jobs)
				(not (>= num-running (string->number max-concurrent-jobs))))))
		  (print "WARNING: Max running jobs exceeded, current number running: " num-running 
			 ", max_concurrent_jobs: " max-concurrent-jobs)
		  (begin
		    (let loop2 ((ts #f)
				(ct 0))
		      (if (and (not ts)
			       (< ct 10))

Modified tests/Makefile from [4c07feade7] to [0b961aa417].

1
2
3
4
5
6
7
8
9
10
11
12
13
14
# run some tests

MEGATEST=$(shell realpath ../megatest)

runall :
	cd ../;make 
	$(MEGATEST) -runall :sysname ubuntu :fsname nfs :datapath none :runname `date +%GWW%V.%u` -m "This is a comment specific to a run"

test :
	cd ../;make test
	make runall

dashboard :
	cd ../;make dashboard






|







1
2
3
4
5
6
7
8
9
10
11
12
13
14
# run some tests

MEGATEST=$(shell realpath ../megatest)

runall :
	cd ../;make 
	$(MEGATEST) -keepgoing -runall :sysname ubuntu :fsname nfs :datapath none :runname `date +%GWW%V.%u` -m "This is a comment specific to a run"

test :
	cd ../;make test
	make runall

dashboard :
	cd ../;make dashboard

Modified tests/megatest.config from [b996a05181] to [7cce1c9833].

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
[fields]
sysname TEXT
fsname TEXT
datapath TEXT

[setup]
# exectutable /path/to/megatest
max_concurrent_jobs 405
runsdir /tmp/runs

[jobtools]
# ## launcher launches jobs, the job is managed on the target host
## by megatest, comment out launcher to run local
# workhosts localhost hermes
launcher nbfake







|







1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
[fields]
sysname TEXT
fsname TEXT
datapath TEXT

[setup]
# exectutable /path/to/megatest
max_concurrent_jobs 8
runsdir /tmp/runs

[jobtools]
# ## launcher launches jobs, the job is managed on the target host
## by megatest, comment out launcher to run local
# workhosts localhost hermes
launcher nbfake