Check-in [c075ebd51b]
Not logged in
Overview
SHA1 Hash:c075ebd51bcdb34062a39b7c46642be877740a6b
Date: 2011-06-15 18:11:59
User: mrwellan
Comment:Added -keepgoing, removed the calls to run-queue, fixes to job limits, -runstep, and killreq (now will do signal/kill after first trying signal/term)
Timelines: family | ancestors | descendants | both | trunk
Downloads: Tarball | ZIP archive
Other Links: files | file ages | manifest
Tags And Properties
Changes

Modified dashboard.scm from [78e2859a5dd5f60b] to [d72b4fee0f77d68f].

179 179 (iup:button "Cancel and close" 180 180 #:expand "YES" 181 181 #:action (lambda (x) 182 182 (hash-table-delete! *examine-test-dat* testkey) 183 183 (iup:destroy! self)))) 184 184 ))) 185 185 (iup:hbox ;; the test steps are tracked here 186 - (let ((stepsdat (iup:label "Test steps ......................................" #:expand "YES"))) 186 + (let ((stepsdat (iup:label "Test steps ........................................." #:expand "YES"))) 187 187 (hash-table-set! widgets "Test Steps" stepsdat) 188 188 stepsdat) 189 189 )))) 190 190 (iup:show self) 191 191 )))) 192 192 193 193 (define (colors-similar? color1 color2) ................................................................................ 207 207 (tests (db-get-tests-for-run *db* run-id testnamepatt itemnamepatt)) 208 208 (key-vals (get-key-vals *db* run-id))) 209 209 (if (> (length tests) maxtests) 210 210 (set! maxtests (length tests))) 211 211 (set! result (cons (vector run tests key-vals) result)))) 212 212 runs) 213 213 (set! *header* header) 214 - (set! *allruns* (reverse result)) 214 + (set! *allruns* result) 215 215 maxtests)) 216 216 217 217 (define (update-labels uidat) 218 218 (let* ((rown 0) 219 219 (lftcol (vector-ref uidat 0)) 220 220 (maxn (- (vector-length lftcol) 1))) 221 221 (let loop ((i 0)) ................................................................................ 240 240 (update-labels uidat) 241 241 (for-each 242 242 (lambda (popup) 243 243 (let* ((test-id (car popup)) 244 244 (widgets (hash-table-ref *examine-test-dat* popup)) 245 245 (stepslbl (hash-table-ref/default widgets "Test Steps" #f))) 246 246 (if stepslbl 247 - (let* ((fmtstr "~15a~8a~8a~17a") 247 + (let* ((fmtstr "~15a~8a~8a~20a") 248 248 (newtxt (string-intersperse 249 249 (append 250 250 (list 251 251 (format #f fmtstr "Stepname" "State" "Status" "Event Time") 252 252 (format #f fmtstr "========" "=====" "======" "==========")) 253 253 (map (lambda (x) 254 254 ;; take advantage of the \n on time->string

Modified db.scm from [e509962d692c4e15] to [617661873021dd04].

219 219 (let ((res 0)) 220 220 (sqlite3:for-each-row 221 221 (lambda (count) 222 222 (set! res count)) 223 223 db 224 224 "SELECT count(id) FROM tests WHERE state = 'RUNNING' OR state = 'LAUNCHED' OR state = 'REMOTEHOSTSTART';") 225 225 res)) 226 + 227 +;; done with run when: 228 +;; 0 tests in LAUNCHED, NOT_STARTED, REMOTEHOSTSTART, RUNNING 229 +(define (db:estimated-tests-remaining db run-id) 230 + (let ((res 0)) 231 + (sqlite3:for-each-row 232 + (lambda (count) 233 + (set! res count)) 234 + db 235 + "SELECT count(id) FROM tests WHERE state in ('LAUNCHED','NOT_STARTED','REMOTEHOSTSTART','RUNNING') AND run_id=?;" run-id) 236 + res)) 226 237 227 238 ;; NB// Sync this with runs:get-test-info 228 239 (define (db:get-test-info db run-id testname item-path) 229 240 (let ((res '())) 230 241 (sqlite3:for-each-row 231 242 (lambda (id run-id testname state status event-time host cpuload diskfree uname rundir item-path run_duration final_logf comment) 232 243 (set! res (vector id run-id testname state status event-time host cpuload diskfree uname rundir item-path run_duration final_logf comment))) ................................................................................ 270 281 271 282 (define (db-get-test-steps-for-run db test-id) 272 283 (let ((res '())) 273 284 (sqlite3:for-each-row 274 285 (lambda (id test-id stepname state status event-time) 275 286 (set! res (cons (vector id test-id stepname state status event-time) res))) 276 287 db 277 - "SELECT id,test_id,stepname,state,status,event_time FROM test_steps WHERE test_id=? ORDER BY event_time DESC;" 288 + "SELECT id,test_id,stepname,state,status,event_time FROM test_steps WHERE test_id=? ORDER BY id ASC;" ;; event_time DESC,id ASC; 278 289 test-id) 279 - res)) 290 + (reverse res))) 280 291 281 292 ;; check that *all* the prereqs are "COMPLETED" 282 293 (define (db-get-prereqs-met db run-id waiton) 283 294 (let ((res #f) 284 295 (not-complete 0) 285 296 (tests (db-get-tests-for-run db run-id))) 286 297 (for-each

Modified megatest.scm from [7e0b50a664905c41] to [69c49cdbf1ae7a69].

4 4 ;; greater. See the accompanying file COPYING for details. 5 5 ;; 6 6 ;; This program is distributed WITHOUT ANY WARRANTY; without even the 7 7 ;; implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR 8 8 ;; PURPOSE. 9 9 10 10 (include "common.scm") 11 -(define megatest-version 1.11) 11 +(define megatest-version 1.12) 12 12 13 13 (define help (conc " 14 14 Megatest, documentation at http://www.kiatoa.com/fossils/megatest 15 15 version " megatest-version " 16 16 license GPL, Copyright Matt Welland 2006-2011 17 17 18 18 Usage: megatest [options] ................................................................................ 46 46 47 47 Misc 48 48 -force : override some checks 49 49 -xterm : start an xterm instead of launching the test 50 50 -remove-runs : remove the data for a run, requires fields, :runname 51 51 and -testpatt 52 52 -testpatt patt : remove tests matching patt (requires -remove-runs) 53 + -keepgoing : continue running until no jobs are \"LAUNCHED\" or 54 + \"NOT_STARTED\" 53 55 54 56 Helpers 55 57 -runstep stepname ... : take remaining params as comand and execute as stepname 56 58 log will be in stepname.log. Best to put command in quotes 57 59 -logpro file : with -exec apply logpro file to stepname.log, creates 58 60 stepname.html and sets log to same 59 61 If using make use stepname_logpro.log as your target ................................................................................ 88 90 "-force" 89 91 "-xterm" 90 92 "-showkeys" 91 93 "-test-status" 92 94 "-gui" 93 95 "-runall" ;; run all tests 94 96 "-remove-runs" 97 + "-keepgoing" 95 98 ) 96 99 args:arg-hash 97 100 0)) 98 101 99 102 (if (args:get-arg "-h") 100 103 (begin 101 104 (print help) ................................................................................ 252 255 (print "ERROR: Attempted to run a test but run area config file not found") 253 256 (exit 1)) 254 257 ;; put test parameters into convenient variables 255 258 (let* ((test-names (get-all-legal-tests))) ;; "PROD" is ignored for now 256 259 (print "INFO: Attempting to start the following tests...") 257 260 (print " " (string-intersperse test-names ",")) 258 261 (run-tests db test-names))) 259 - (run-waiting-tests db) 262 + ;; (run-waiting-tests db) 260 263 (sqlite3:finalize! db) 261 264 (set! *didsomething* #t)))) 262 265 263 266 ;;====================================================================== 264 267 ;; run one test 265 268 ;;====================================================================== 266 269 ................................................................................ 293 296 (print "ERROR: Attempted to run a test but run area config file not found") 294 297 (exit 1)) 295 298 ;; put test parameters into convenient variables 296 299 (let* ((test-names (string-split (args:get-arg "-runtests") ","))) 297 300 (run-tests db test-names))) 298 301 ;; run-waiting-tests db) 299 302 (sqlite3:finalize! db) 300 - (run-waiting-tests #f) 303 + ;; (run-waiting-tests #f) 301 304 (set! *didsomething* #t)))) 302 305 303 306 (if (args:get-arg "-runtests") 304 307 (runtests)) 305 308 306 309 ;;====================================================================== 307 310 ;; execute the test ................................................................................ 389 392 (- 390 393 (current-seconds) 391 394 start-seconds)))))) 392 395 (let loop ((minutes (calc-minutes))) 393 396 (let ((db (open-db))) 394 397 (set! kill-job? (test-get-kill-request db run-id test-name itemdat)) 395 398 (test-update-meta-info db run-id test-name itemdat minutes) 396 - (if kill-job? (process-signal (vector-ref exit-info 0) signal/term)) 399 + (if kill-job? 400 + (begin 401 + (process-signal (vector-ref exit-info 0) signal/term) 402 + (sleep 2) 403 + (handle-exceptions 404 + exn 405 + (print "ERROR: Problem killing process " (vector-ref exit-info 0)) 406 + (process-signal (vector-ref exit-info 0) signal/kill)))) 397 407 (sqlite3:finalize! db) 398 408 (thread-sleep! (+ 8 (random 4))) ;; add some jitter to the call home time to spread out the db accesses 399 409 (loop (calc-minutes))))))) 400 410 (th1 (make-thread monitorjob)) 401 411 (th2 (make-thread runit))) 402 412 (thread-start! th1) 403 413 (thread-start! th2) ................................................................................ 500 510 (redir (case (string->symbol shell) 501 511 ((tcsh csh ksh) ">&") 502 512 ((zsh bash sh ash) "2>&1 >"))) 503 513 (fullcmd (conc "(" (string-intersperse 504 514 (cons cmd params) " ") 505 515 ") " redir " " logfile))) 506 516 ;; mark the start of the test 507 - (test-set-status! db run-id test-name "start" "n/a" itemdat (args:get-arg "-m")) 517 + (teststep-set-status! db run-id test-name stepname "start" "n/a" itemdat (args:get-arg "-m")) 508 518 ;; close the db 509 519 (sqlite3:finalize! db) 510 520 ;; run the test step 511 521 (print "INFO: Running \"" fullcmd "\"") 512 522 (change-directory startingdir) 513 523 (set! exitstat (system fullcmd)) ;; cmd params)) 514 524 (set! *globalexitstatus* exitstat)

Modified runs.scm from [a13e1910a3378aca] to [4be3851e9e3a8974].

248 248 tests) 249 249 res)) 250 250 251 251 (define (run-tests db test-names) 252 252 (let* ((keys (db-get-keys db)) 253 253 (keyvallst (keys->vallist keys #t)) 254 254 (run-id (register-run db keys))) ;; test-name))) 255 - (for-each 256 - (lambda (test-name) 257 - (let ((num-running (db:get-count-tests-running db)) 258 - (max-concurrent-jobs (config-lookup *configdat* "setup" "max_concurrent_jobs"))) 259 - (print "max-concurrent-jobs: " max-concurrent-jobs ", num-running: " num-running) 260 - (if (or (not max-concurrent-jobs) 261 - (and max-concurrent-jobs 262 - (string->number max-concurrent-jobs) 263 - (not (> num-running (string->number max-concurrent-jobs))))) 264 - (run-one-test db run-id test-name keyvallst) 265 - (print "WARNING: Max running jobs exceeded, current number running: " num-running 266 - ", max_concurrent_jobs: " max-concurrent-jobs)))) 267 - test-names))) 255 + (let loop ((numtimes 0)) 256 + (for-each 257 + (lambda (test-name) 258 + (let ((num-running (db:get-count-tests-running db)) 259 + (max-concurrent-jobs (config-lookup *configdat* "setup" "max_concurrent_jobs"))) 260 + (print "max-concurrent-jobs: " max-concurrent-jobs ", num-running: " num-running) 261 + (if (or (not max-concurrent-jobs) 262 + (and max-concurrent-jobs 263 + (string->number max-concurrent-jobs) 264 + (not (>= num-running (string->number max-concurrent-jobs))))) 265 + (run-one-test db run-id test-name keyvallst) 266 + (print "WARNING: Max running jobs exceeded, current number running: " num-running 267 + ", max_concurrent_jobs: " max-concurrent-jobs)))) 268 + test-names) 269 + (if (args:get-arg "-keepgoing") 270 + (let ((estrem (db:estimated-tests-remaining db run-id))) 271 + (if (> estrem 0) 272 + (begin 273 + (print "Keep going, estimated " estrem " tests remaining to run, will continue in 10 seconds ...") 274 + (sleep 10) 275 + (loop (+ numtimes 1))))))))) 268 276 269 277 ;; VERY INEFFICIENT! Move stuff that should be done once up to calling proc 270 278 (define (run-one-test db run-id test-name keyvallst) 271 279 (print "Launching test " test-name) 272 280 ;; All these vars might be referenced by the testconfig file reader 273 281 (setenv "MT_TEST_NAME" test-name) ;; 274 282 (setenv "MT_RUNNAME" (args:get-arg ":runname")) ................................................................................ 305 313 (test-status #f) 306 314 (num-running (db:get-count-tests-running db)) 307 315 (max-concurrent-jobs (config-lookup *configdat* "setup" "max_concurrent_jobs"))) 308 316 (print "max-concurrent-jobs: " max-concurrent-jobs ", num-running: " num-running) 309 317 (if (not (or (not max-concurrent-jobs) 310 318 (and max-concurrent-jobs 311 319 (string->number max-concurrent-jobs) 312 - (not (> num-running (string->number max-concurrent-jobs)))))) 320 + (not (>= num-running (string->number max-concurrent-jobs)))))) 313 321 (print "WARNING: Max running jobs exceeded, current number running: " num-running 314 322 ", max_concurrent_jobs: " max-concurrent-jobs) 315 323 (begin 316 324 (let loop2 ((ts #f) 317 325 (ct 0)) 318 326 (if (and (not ts) 319 327 (< ct 10))

Modified tests/Makefile from [4c07feade78035be] to [0b961aa417fcf512].

1 1 # run some tests 2 2 3 3 MEGATEST=$(shell realpath ../megatest) 4 4 5 5 runall : 6 6 cd ../;make 7 - $(MEGATEST) -runall :sysname ubuntu :fsname nfs :datapath none :runname `date +%GWW%V.%u` -m "This is a comment specific to a run" 7 + $(MEGATEST) -keepgoing -runall :sysname ubuntu :fsname nfs :datapath none :runname `date +%GWW%V.%u` -m "This is a comment specific to a run" 8 8 9 9 test : 10 10 cd ../;make test 11 11 make runall 12 12 13 13 dashboard : 14 14 cd ../;make dashboard

Modified tests/megatest.config from [b996a05181a3c85b] to [7cce1c983316acca].

1 1 [fields] 2 2 sysname TEXT 3 3 fsname TEXT 4 4 datapath TEXT 5 5 6 6 [setup] 7 7 # exectutable /path/to/megatest 8 -max_concurrent_jobs 405 8 +max_concurrent_jobs 8 9 9 runsdir /tmp/runs 10 10 11 11 [jobtools] 12 12 # ## launcher launches jobs, the job is managed on the target host 13 13 ## by megatest, comment out launcher to run local 14 14 # workhosts localhost hermes 15 15 launcher nbfake