Check-in [c075ebd51b]
Not logged in
Overview
SHA1 Hash:c075ebd51bcdb34062a39b7c46642be877740a6b
Date: 2011-06-16 00:11:59
User: mrwellan
Comment:Added -keepgoing, removed the calls to run-queue, fixes to job limits, -runstep, and killreq (now will do signal/kill after first trying signal/term)
Timelines: family | ancestors | descendants | both | trunk
Downloads: Tarball | ZIP archive
Other Links: files | file ages | folders | manifest
Tags And Properties
Changes

Modified dashboard.scm from [78e2859a5dd5f60b] to [d72b4fee0f77d68f].

179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
...
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
...
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
					 (iup:button "Cancel and close"
						     #:expand "YES"
						     #:action (lambda (x)
								(hash-table-delete! *examine-test-dat* testkey)
								(iup:destroy! self))))
			       )))
		  (iup:hbox ;; the test steps are tracked here
		   (let ((stepsdat (iup:label "Test steps ......................................" #:expand "YES")))
		     (hash-table-set! widgets "Test Steps" stepsdat)
		     stepsdat)
		   ))))
	  (iup:show self)
	  ))))

(define (colors-similar? color1 color2)
................................................................................
		       (tests    (db-get-tests-for-run *db* run-id testnamepatt itemnamepatt))
		       (key-vals (get-key-vals *db* run-id)))
		  (if (> (length tests) maxtests)
		      (set! maxtests (length tests)))
		  (set! result (cons (vector run tests key-vals) result))))
	      runs)
    (set! *header*  header)
    (set! *allruns* (reverse result))
    maxtests))

(define (update-labels uidat)
  (let* ((rown    0)
	 (lftcol (vector-ref uidat 0))
	 (maxn   (- (vector-length lftcol) 1)))
    (let loop ((i 0))
................................................................................
    (update-labels uidat)
    (for-each 
     (lambda (popup)
       (let* ((test-id  (car popup))
	      (widgets  (hash-table-ref *examine-test-dat* popup))
	      (stepslbl (hash-table-ref/default widgets "Test Steps" #f)))
	 (if stepslbl
	     (let* ((fmtstr  "~15a~8a~8a~17a")
		    (newtxt  (string-intersperse 
			      (append
			       (list 
				(format #f fmtstr "Stepname" "State" "Status" "Event Time")
				(format #f fmtstr "========" "=====" "======" "=========="))
			       (map (lambda (x)
				      ;; take advantage of the \n on time->string







|







 







|







 







|







179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
...
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
...
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
					 (iup:button "Cancel and close"
						     #:expand "YES"
						     #:action (lambda (x)
								(hash-table-delete! *examine-test-dat* testkey)
								(iup:destroy! self))))
			       )))
		  (iup:hbox ;; the test steps are tracked here
		   (let ((stepsdat (iup:label "Test steps ........................................." #:expand "YES")))
		     (hash-table-set! widgets "Test Steps" stepsdat)
		     stepsdat)
		   ))))
	  (iup:show self)
	  ))))

(define (colors-similar? color1 color2)
................................................................................
		       (tests    (db-get-tests-for-run *db* run-id testnamepatt itemnamepatt))
		       (key-vals (get-key-vals *db* run-id)))
		  (if (> (length tests) maxtests)
		      (set! maxtests (length tests)))
		  (set! result (cons (vector run tests key-vals) result))))
	      runs)
    (set! *header*  header)
    (set! *allruns* result)
    maxtests))

(define (update-labels uidat)
  (let* ((rown    0)
	 (lftcol (vector-ref uidat 0))
	 (maxn   (- (vector-length lftcol) 1)))
    (let loop ((i 0))
................................................................................
    (update-labels uidat)
    (for-each 
     (lambda (popup)
       (let* ((test-id  (car popup))
	      (widgets  (hash-table-ref *examine-test-dat* popup))
	      (stepslbl (hash-table-ref/default widgets "Test Steps" #f)))
	 (if stepslbl
	     (let* ((fmtstr  "~15a~8a~8a~20a")
		    (newtxt  (string-intersperse 
			      (append
			       (list 
				(format #f fmtstr "Stepname" "State" "Status" "Event Time")
				(format #f fmtstr "========" "=====" "======" "=========="))
			       (map (lambda (x)
				      ;; take advantage of the \n on time->string

Modified db.scm from [e509962d692c4e15] to [617661873021dd04].

219
220
221
222
223
224
225











226
227
228
229
230
231
232
...
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
  (let ((res 0))
    (sqlite3:for-each-row
     (lambda (count)
       (set! res count))
     db
     "SELECT count(id) FROM tests WHERE state = 'RUNNING' OR state = 'LAUNCHED' OR state = 'REMOTEHOSTSTART';")
    res))












;; NB// Sync this with runs:get-test-info
(define (db:get-test-info db run-id testname item-path)
  (let ((res '()))
    (sqlite3:for-each-row
     (lambda (id run-id testname state status event-time host cpuload diskfree uname rundir item-path run_duration final_logf comment)
       (set! res (vector id run-id testname state status event-time host cpuload diskfree uname rundir item-path run_duration final_logf comment)))
................................................................................

(define (db-get-test-steps-for-run db test-id)
  (let ((res '()))
    (sqlite3:for-each-row 
     (lambda (id test-id stepname state status event-time)
       (set! res (cons (vector id test-id stepname state status event-time) res)))
     db
     "SELECT id,test_id,stepname,state,status,event_time FROM test_steps WHERE test_id=? ORDER BY event_time DESC;"
     test-id)
    res))

;; check that *all* the prereqs are "COMPLETED"
(define (db-get-prereqs-met db run-id waiton)
  (let ((res          #f)
	(not-complete 0)
	(tests        (db-get-tests-for-run db run-id)))
    (for-each







>
>
>
>
>
>
>
>
>
>
>







 







|

|







219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
...
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
  (let ((res 0))
    (sqlite3:for-each-row
     (lambda (count)
       (set! res count))
     db
     "SELECT count(id) FROM tests WHERE state = 'RUNNING' OR state = 'LAUNCHED' OR state = 'REMOTEHOSTSTART';")
    res))

;; done with run when:
;;   0 tests in LAUNCHED, NOT_STARTED, REMOTEHOSTSTART, RUNNING
(define (db:estimated-tests-remaining db run-id)
  (let ((res 0))
    (sqlite3:for-each-row
     (lambda (count)
       (set! res count))
     db
     "SELECT count(id) FROM tests WHERE state in ('LAUNCHED','NOT_STARTED','REMOTEHOSTSTART','RUNNING') AND run_id=?;" run-id)
    res))

;; NB// Sync this with runs:get-test-info
(define (db:get-test-info db run-id testname item-path)
  (let ((res '()))
    (sqlite3:for-each-row
     (lambda (id run-id testname state status event-time host cpuload diskfree uname rundir item-path run_duration final_logf comment)
       (set! res (vector id run-id testname state status event-time host cpuload diskfree uname rundir item-path run_duration final_logf comment)))
................................................................................

(define (db-get-test-steps-for-run db test-id)
  (let ((res '()))
    (sqlite3:for-each-row 
     (lambda (id test-id stepname state status event-time)
       (set! res (cons (vector id test-id stepname state status event-time) res)))
     db
     "SELECT id,test_id,stepname,state,status,event_time FROM test_steps WHERE test_id=? ORDER BY id ASC;" ;; event_time DESC,id ASC;
     test-id)
    (reverse res)))

;; check that *all* the prereqs are "COMPLETED"
(define (db-get-prereqs-met db run-id waiton)
  (let ((res          #f)
	(not-complete 0)
	(tests        (db-get-tests-for-run db run-id)))
    (for-each

Modified megatest.scm from [7e0b50a664905c41] to [69c49cdbf1ae7a69].

4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
..
46
47
48
49
50
51
52


53
54
55
56
57
58
59
..
88
89
90
91
92
93
94

95
96
97
98
99
100
101
...
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
...
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
...
389
390
391
392
393
394
395


396





397
398
399
400
401
402
403
...
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
;;  greater. See the accompanying file COPYING for details.
;; 
;;  This program is distributed WITHOUT ANY WARRANTY; without even the
;;  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
;;  PURPOSE.

(include "common.scm")
(define megatest-version 1.11)

(define help (conc "
Megatest, documentation at http://www.kiatoa.com/fossils/megatest
  version " megatest-version "
  license GPL, Copyright Matt Welland 2006-2011

Usage: megatest [options]
................................................................................

Misc 
  -force                  : override some checks
  -xterm                  : start an xterm instead of launching the test
  -remove-runs            : remove the data for a run, requires fields, :runname 
                            and -testpatt
  -testpatt patt          : remove tests matching patt (requires -remove-runs)



Helpers
  -runstep stepname  ...  : take remaining params as comand and execute as stepname
                            log will be in stepname.log. Best to put command in quotes
  -logpro file            : with -exec apply logpro file to stepname.log, creates
                            stepname.html and sets log to same
                            If using make use stepname_logpro.log as your target
................................................................................
		        "-force"
		        "-xterm"
		        "-showkeys"
		        "-test-status"
		        "-gui"
			"-runall"    ;; run all tests
			"-remove-runs"

		       )
		 args:arg-hash
		 0))

(if (args:get-arg "-h")
    (begin
      (print help)
................................................................................
		(print "ERROR: Attempted to run a test but run area config file not found")
		(exit 1))
	      ;; put test parameters into convenient variables
	      (let* ((test-names (get-all-legal-tests))) ;; "PROD" is ignored for now
		(print "INFO: Attempting to start the following tests...")
		(print "     " (string-intersperse test-names ","))
		(run-tests db test-names)))
	  (run-waiting-tests db)
	  (sqlite3:finalize! db)
	  (set! *didsomething* #t))))

;;======================================================================
;; run one test
;;======================================================================

................................................................................
	      (print "ERROR: Attempted to run a test but run area config file not found")
	      (exit 1))
	    ;; put test parameters into convenient variables
	    (let* ((test-names   (string-split (args:get-arg "-runtests") ",")))
	      (run-tests db test-names)))
	;; run-waiting-tests db)
	(sqlite3:finalize! db)
	(run-waiting-tests #f)
	(set! *didsomething* #t))))
	  
(if (args:get-arg "-runtests")
    (runtests))

;;======================================================================
;; execute the test
................................................................................
							     (- 
							      (current-seconds) 
							      start-seconds))))))
				     (let loop ((minutes   (calc-minutes)))
				       (let ((db    (open-db)))
					 (set! kill-job? (test-get-kill-request db run-id test-name itemdat))
					 (test-update-meta-info db run-id test-name itemdat minutes)


					 (if kill-job? (process-signal (vector-ref exit-info 0) signal/term))





					 (sqlite3:finalize! db)
					 (thread-sleep! (+ 8 (random 4))) ;; add some jitter to the call home time to spread out the db accesses
					 (loop (calc-minutes)))))))
		   (th1          (make-thread monitorjob))
		   (th2          (make-thread runit)))
	      (thread-start! th1)
	      (thread-start! th2)
................................................................................
			 (redir      (case (string->symbol shell)
				       ((tcsh csh ksh)    ">&")
				       ((zsh bash sh ash) "2>&1 >")))
			 (fullcmd    (conc "(" (string-intersperse 
						(cons cmd params) " ")
					   ") " redir " " logfile)))
		    ;; mark the start of the test
		    (test-set-status! db run-id test-name "start" "n/a" itemdat (args:get-arg "-m"))
		    ;; close the db
		    (sqlite3:finalize! db)
		    ;; run the test step
		    (print "INFO: Running \"" fullcmd "\"")
		    (change-directory startingdir)
		    (set! exitstat (system fullcmd)) ;; cmd params))
		    (set! *globalexitstatus* exitstat)







|







 







>
>







 







>







 







|







 







|







 







>
>
|
>
>
>
>
>







 







|







4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
..
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
..
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
...
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
...
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
...
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
...
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
;;  greater. See the accompanying file COPYING for details.
;; 
;;  This program is distributed WITHOUT ANY WARRANTY; without even the
;;  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
;;  PURPOSE.

(include "common.scm")
(define megatest-version 1.12)

(define help (conc "
Megatest, documentation at http://www.kiatoa.com/fossils/megatest
  version " megatest-version "
  license GPL, Copyright Matt Welland 2006-2011

Usage: megatest [options]
................................................................................

Misc 
  -force                  : override some checks
  -xterm                  : start an xterm instead of launching the test
  -remove-runs            : remove the data for a run, requires fields, :runname 
                            and -testpatt
  -testpatt patt          : remove tests matching patt (requires -remove-runs)
  -keepgoing              : continue running until no jobs are \"LAUNCHED\" or
                            \"NOT_STARTED\"

Helpers
  -runstep stepname  ...  : take remaining params as comand and execute as stepname
                            log will be in stepname.log. Best to put command in quotes
  -logpro file            : with -exec apply logpro file to stepname.log, creates
                            stepname.html and sets log to same
                            If using make use stepname_logpro.log as your target
................................................................................
		        "-force"
		        "-xterm"
		        "-showkeys"
		        "-test-status"
		        "-gui"
			"-runall"    ;; run all tests
			"-remove-runs"
			"-keepgoing"
		       )
		 args:arg-hash
		 0))

(if (args:get-arg "-h")
    (begin
      (print help)
................................................................................
		(print "ERROR: Attempted to run a test but run area config file not found")
		(exit 1))
	      ;; put test parameters into convenient variables
	      (let* ((test-names (get-all-legal-tests))) ;; "PROD" is ignored for now
		(print "INFO: Attempting to start the following tests...")
		(print "     " (string-intersperse test-names ","))
		(run-tests db test-names)))
	  ;; (run-waiting-tests db)
	  (sqlite3:finalize! db)
	  (set! *didsomething* #t))))

;;======================================================================
;; run one test
;;======================================================================

................................................................................
	      (print "ERROR: Attempted to run a test but run area config file not found")
	      (exit 1))
	    ;; put test parameters into convenient variables
	    (let* ((test-names   (string-split (args:get-arg "-runtests") ",")))
	      (run-tests db test-names)))
	;; run-waiting-tests db)
	(sqlite3:finalize! db)
	;; (run-waiting-tests #f)
	(set! *didsomething* #t))))
	  
(if (args:get-arg "-runtests")
    (runtests))

;;======================================================================
;; execute the test
................................................................................
							     (- 
							      (current-seconds) 
							      start-seconds))))))
				     (let loop ((minutes   (calc-minutes)))
				       (let ((db    (open-db)))
					 (set! kill-job? (test-get-kill-request db run-id test-name itemdat))
					 (test-update-meta-info db run-id test-name itemdat minutes)
					 (if kill-job? 
					     (begin 
					       (process-signal (vector-ref exit-info 0) signal/term)
					       (sleep 2)
					       (handle-exceptions
						exn
						(print "ERROR: Problem killing process " (vector-ref exit-info 0))
						(process-signal (vector-ref exit-info 0) signal/kill))))
					 (sqlite3:finalize! db)
					 (thread-sleep! (+ 8 (random 4))) ;; add some jitter to the call home time to spread out the db accesses
					 (loop (calc-minutes)))))))
		   (th1          (make-thread monitorjob))
		   (th2          (make-thread runit)))
	      (thread-start! th1)
	      (thread-start! th2)
................................................................................
			 (redir      (case (string->symbol shell)
				       ((tcsh csh ksh)    ">&")
				       ((zsh bash sh ash) "2>&1 >")))
			 (fullcmd    (conc "(" (string-intersperse 
						(cons cmd params) " ")
					   ") " redir " " logfile)))
		    ;; mark the start of the test
		    (teststep-set-status! db run-id test-name stepname "start" "n/a" itemdat (args:get-arg "-m"))
		    ;; close the db
		    (sqlite3:finalize! db)
		    ;; run the test step
		    (print "INFO: Running \"" fullcmd "\"")
		    (change-directory startingdir)
		    (set! exitstat (system fullcmd)) ;; cmd params))
		    (set! *globalexitstatus* exitstat)

Modified runs.scm from [a13e1910a3378aca] to [4be3851e9e3a8974].

248
249
250
251
252
253
254

255
256
257
258
259
260
261
262
263
264
265
266
267







268
269
270
271
272
273
274
275
...
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
	      tests)
    res))

(define (run-tests db test-names)
  (let* ((keys        (db-get-keys db))
	 (keyvallst   (keys->vallist keys #t))
	 (run-id      (register-run db keys))) ;;  test-name)))

    (for-each 
     (lambda (test-name)
       (let ((num-running (db:get-count-tests-running db))
	     (max-concurrent-jobs (config-lookup *configdat* "setup" "max_concurrent_jobs")))
	 (print "max-concurrent-jobs: " max-concurrent-jobs ", num-running: " num-running)
	 (if (or (not max-concurrent-jobs)
		 (and max-concurrent-jobs
		      (string->number max-concurrent-jobs)
		      (not (> num-running (string->number max-concurrent-jobs)))))
	     (run-one-test db run-id test-name keyvallst)
	     (print "WARNING: Max running jobs exceeded, current number running: " num-running 
		    ", max_concurrent_jobs: " max-concurrent-jobs))))
     test-names)))








;; VERY INEFFICIENT! Move stuff that should be done once up to calling proc
(define (run-one-test db run-id test-name keyvallst)
  (print "Launching test " test-name)
  ;; All these vars might be referenced by the testconfig file reader
  (setenv "MT_TEST_NAME" test-name) ;; 
  (setenv "MT_RUNNAME"   (args:get-arg ":runname"))
  (set-megatest-env-vars db run-id) ;; these may be needed by the launching process
................................................................................
		   (test-status   #f)
		   (num-running (db:get-count-tests-running db))
		   (max-concurrent-jobs (config-lookup *configdat* "setup" "max_concurrent_jobs")))
	      (print "max-concurrent-jobs: " max-concurrent-jobs ", num-running: " num-running)
	      (if (not (or (not max-concurrent-jobs)
			   (and max-concurrent-jobs
				(string->number max-concurrent-jobs)
				(not (> num-running (string->number max-concurrent-jobs))))))
		  (print "WARNING: Max running jobs exceeded, current number running: " num-running 
			 ", max_concurrent_jobs: " max-concurrent-jobs)
		  (begin
		    (let loop2 ((ts #f)
				(ct 0))
		      (if (and (not ts)
			       (< ct 10))







>
|
|
|
|
|
|
|
|
|
|
|
|
|
>
>
>
>
>
>
>
|







 







|







248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
...
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
	      tests)
    res))

(define (run-tests db test-names)
  (let* ((keys        (db-get-keys db))
	 (keyvallst   (keys->vallist keys #t))
	 (run-id      (register-run db keys))) ;;  test-name)))
    (let loop ((numtimes 0))
      (for-each 
       (lambda (test-name)
	 (let ((num-running (db:get-count-tests-running db))
	       (max-concurrent-jobs (config-lookup *configdat* "setup" "max_concurrent_jobs")))
	   (print "max-concurrent-jobs: " max-concurrent-jobs ", num-running: " num-running)
	   (if (or (not max-concurrent-jobs)
		   (and max-concurrent-jobs
			(string->number max-concurrent-jobs)
			(not (>= num-running (string->number max-concurrent-jobs)))))
	       (run-one-test db run-id test-name keyvallst)
	       (print "WARNING: Max running jobs exceeded, current number running: " num-running 
		      ", max_concurrent_jobs: " max-concurrent-jobs))))
       test-names)
      (if (args:get-arg "-keepgoing")
	  (let ((estrem (db:estimated-tests-remaining db run-id)))
	    (if (> estrem 0)
		(begin
		  (print "Keep going, estimated " estrem " tests remaining to run, will continue in 10 seconds ...")
		  (sleep 10)
		  (loop (+ numtimes 1)))))))))
	   
;; VERY INEFFICIENT! Move stuff that should be done once up to calling proc
(define (run-one-test db run-id test-name keyvallst)
  (print "Launching test " test-name)
  ;; All these vars might be referenced by the testconfig file reader
  (setenv "MT_TEST_NAME" test-name) ;; 
  (setenv "MT_RUNNAME"   (args:get-arg ":runname"))
  (set-megatest-env-vars db run-id) ;; these may be needed by the launching process
................................................................................
		   (test-status   #f)
		   (num-running (db:get-count-tests-running db))
		   (max-concurrent-jobs (config-lookup *configdat* "setup" "max_concurrent_jobs")))
	      (print "max-concurrent-jobs: " max-concurrent-jobs ", num-running: " num-running)
	      (if (not (or (not max-concurrent-jobs)
			   (and max-concurrent-jobs
				(string->number max-concurrent-jobs)
				(not (>= num-running (string->number max-concurrent-jobs))))))
		  (print "WARNING: Max running jobs exceeded, current number running: " num-running 
			 ", max_concurrent_jobs: " max-concurrent-jobs)
		  (begin
		    (let loop2 ((ts #f)
				(ct 0))
		      (if (and (not ts)
			       (< ct 10))

Modified tests/Makefile from [4c07feade78035be] to [0b961aa417fcf512].

1
2
3
4
5
6
7
8
9
10
11
12
13
14
# run some tests

MEGATEST=$(shell realpath ../megatest)

runall :
	cd ../;make 
	$(MEGATEST) -runall :sysname ubuntu :fsname nfs :datapath none :runname `date +%GWW%V.%u` -m "This is a comment specific to a run"

test :
	cd ../;make test
	make runall

dashboard :
	cd ../;make dashboard






|







1
2
3
4
5
6
7
8
9
10
11
12
13
14
# run some tests

MEGATEST=$(shell realpath ../megatest)

runall :
	cd ../;make 
	$(MEGATEST) -keepgoing -runall :sysname ubuntu :fsname nfs :datapath none :runname `date +%GWW%V.%u` -m "This is a comment specific to a run"

test :
	cd ../;make test
	make runall

dashboard :
	cd ../;make dashboard

Modified tests/megatest.config from [b996a05181a3c85b] to [7cce1c983316acca].

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
[fields]
sysname TEXT
fsname TEXT
datapath TEXT

[setup]
# exectutable /path/to/megatest
max_concurrent_jobs 405
runsdir /tmp/runs

[jobtools]
# ## launcher launches jobs, the job is managed on the target host
## by megatest, comment out launcher to run local
# workhosts localhost hermes
launcher nbfake







|







1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
[fields]
sysname TEXT
fsname TEXT
datapath TEXT

[setup]
# exectutable /path/to/megatest
max_concurrent_jobs 8
runsdir /tmp/runs

[jobtools]
# ## launcher launches jobs, the job is managed on the target host
## by megatest, comment out launcher to run local
# workhosts localhost hermes
launcher nbfake