Index: dashboard.scm ================================================================== --- dashboard.scm +++ dashboard.scm @@ -181,11 +181,11 @@ #:action (lambda (x) (hash-table-delete! *examine-test-dat* testkey) (iup:destroy! self)))) ))) (iup:hbox ;; the test steps are tracked here - (let ((stepsdat (iup:label "Test steps ......................................" #:expand "YES"))) + (let ((stepsdat (iup:label "Test steps ........................................." #:expand "YES"))) (hash-table-set! widgets "Test Steps" stepsdat) stepsdat) )))) (iup:show self) )))) @@ -209,11 +209,11 @@ (if (> (length tests) maxtests) (set! maxtests (length tests))) (set! result (cons (vector run tests key-vals) result)))) runs) (set! *header* header) - (set! *allruns* (reverse result)) + (set! *allruns* result) maxtests)) (define (update-labels uidat) (let* ((rown 0) (lftcol (vector-ref uidat 0)) @@ -242,11 +242,11 @@ (lambda (popup) (let* ((test-id (car popup)) (widgets (hash-table-ref *examine-test-dat* popup)) (stepslbl (hash-table-ref/default widgets "Test Steps" #f))) (if stepslbl - (let* ((fmtstr "~15a~8a~8a~17a") + (let* ((fmtstr "~15a~8a~8a~20a") (newtxt (string-intersperse (append (list (format #f fmtstr "Stepname" "State" "Status" "Event Time") (format #f fmtstr "========" "=====" "======" "==========")) Index: db.scm ================================================================== --- db.scm +++ db.scm @@ -221,10 +221,21 @@ (lambda (count) (set! res count)) db "SELECT count(id) FROM tests WHERE state = 'RUNNING' OR state = 'LAUNCHED' OR state = 'REMOTEHOSTSTART';") res)) + +;; done with run when: +;; 0 tests in LAUNCHED, NOT_STARTED, REMOTEHOSTSTART, RUNNING +(define (db:estimated-tests-remaining db run-id) + (let ((res 0)) + (sqlite3:for-each-row + (lambda (count) + (set! res count)) + db + "SELECT count(id) FROM tests WHERE state in ('LAUNCHED','NOT_STARTED','REMOTEHOSTSTART','RUNNING') AND run_id=?;" run-id) + res)) ;; NB// Sync this with runs:get-test-info (define (db:get-test-info db run-id testname item-path) (let ((res '())) (sqlite3:for-each-row @@ -272,13 +283,13 @@ (let ((res '())) (sqlite3:for-each-row (lambda (id test-id stepname state status event-time) (set! res (cons (vector id test-id stepname state status event-time) res))) db - "SELECT id,test_id,stepname,state,status,event_time FROM test_steps WHERE test_id=? ORDER BY event_time DESC;" + "SELECT id,test_id,stepname,state,status,event_time FROM test_steps WHERE test_id=? ORDER BY id ASC;" ;; event_time DESC,id ASC; test-id) - res)) + (reverse res))) ;; check that *all* the prereqs are "COMPLETED" (define (db-get-prereqs-met db run-id waiton) (let ((res #f) (not-complete 0) Index: megatest.scm ================================================================== --- megatest.scm +++ megatest.scm @@ -6,11 +6,11 @@ ;; This program is distributed WITHOUT ANY WARRANTY; without even the ;; implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR ;; PURPOSE. (include "common.scm") -(define megatest-version 1.11) +(define megatest-version 1.12) (define help (conc " Megatest, documentation at http://www.kiatoa.com/fossils/megatest version " megatest-version " license GPL, Copyright Matt Welland 2006-2011 @@ -48,10 +48,12 @@ -force : override some checks -xterm : start an xterm instead of launching the test -remove-runs : remove the data for a run, requires fields, :runname and -testpatt -testpatt patt : remove tests matching patt (requires -remove-runs) + -keepgoing : continue running until no jobs are \"LAUNCHED\" or + \"NOT_STARTED\" Helpers -runstep stepname ... : take remaining params as comand and execute as stepname log will be in stepname.log. Best to put command in quotes -logpro file : with -exec apply logpro file to stepname.log, creates @@ -90,10 +92,11 @@ "-showkeys" "-test-status" "-gui" "-runall" ;; run all tests "-remove-runs" + "-keepgoing" ) args:arg-hash 0)) (if (args:get-arg "-h") @@ -254,11 +257,11 @@ ;; put test parameters into convenient variables (let* ((test-names (get-all-legal-tests))) ;; "PROD" is ignored for now (print "INFO: Attempting to start the following tests...") (print " " (string-intersperse test-names ",")) (run-tests db test-names))) - (run-waiting-tests db) + ;; (run-waiting-tests db) (sqlite3:finalize! db) (set! *didsomething* #t)))) ;;====================================================================== ;; run one test @@ -295,11 +298,11 @@ ;; put test parameters into convenient variables (let* ((test-names (string-split (args:get-arg "-runtests") ","))) (run-tests db test-names))) ;; run-waiting-tests db) (sqlite3:finalize! db) - (run-waiting-tests #f) + ;; (run-waiting-tests #f) (set! *didsomething* #t)))) (if (args:get-arg "-runtests") (runtests)) @@ -391,11 +394,18 @@ start-seconds)))))) (let loop ((minutes (calc-minutes))) (let ((db (open-db))) (set! kill-job? (test-get-kill-request db run-id test-name itemdat)) (test-update-meta-info db run-id test-name itemdat minutes) - (if kill-job? (process-signal (vector-ref exit-info 0) signal/term)) + (if kill-job? + (begin + (process-signal (vector-ref exit-info 0) signal/term) + (sleep 2) + (handle-exceptions + exn + (print "ERROR: Problem killing process " (vector-ref exit-info 0)) + (process-signal (vector-ref exit-info 0) signal/kill)))) (sqlite3:finalize! db) (thread-sleep! (+ 8 (random 4))) ;; add some jitter to the call home time to spread out the db accesses (loop (calc-minutes))))))) (th1 (make-thread monitorjob)) (th2 (make-thread runit))) @@ -502,11 +512,11 @@ ((zsh bash sh ash) "2>&1 >"))) (fullcmd (conc "(" (string-intersperse (cons cmd params) " ") ") " redir " " logfile))) ;; mark the start of the test - (test-set-status! db run-id test-name "start" "n/a" itemdat (args:get-arg "-m")) + (teststep-set-status! db run-id test-name stepname "start" "n/a" itemdat (args:get-arg "-m")) ;; close the db (sqlite3:finalize! db) ;; run the test step (print "INFO: Running \"" fullcmd "\"") (change-directory startingdir) Index: runs.scm ================================================================== --- runs.scm +++ runs.scm @@ -250,24 +250,32 @@ (define (run-tests db test-names) (let* ((keys (db-get-keys db)) (keyvallst (keys->vallist keys #t)) (run-id (register-run db keys))) ;; test-name))) - (for-each - (lambda (test-name) - (let ((num-running (db:get-count-tests-running db)) - (max-concurrent-jobs (config-lookup *configdat* "setup" "max_concurrent_jobs"))) - (print "max-concurrent-jobs: " max-concurrent-jobs ", num-running: " num-running) - (if (or (not max-concurrent-jobs) - (and max-concurrent-jobs - (string->number max-concurrent-jobs) - (not (> num-running (string->number max-concurrent-jobs))))) - (run-one-test db run-id test-name keyvallst) - (print "WARNING: Max running jobs exceeded, current number running: " num-running - ", max_concurrent_jobs: " max-concurrent-jobs)))) - test-names))) - + (let loop ((numtimes 0)) + (for-each + (lambda (test-name) + (let ((num-running (db:get-count-tests-running db)) + (max-concurrent-jobs (config-lookup *configdat* "setup" "max_concurrent_jobs"))) + (print "max-concurrent-jobs: " max-concurrent-jobs ", num-running: " num-running) + (if (or (not max-concurrent-jobs) + (and max-concurrent-jobs + (string->number max-concurrent-jobs) + (not (>= num-running (string->number max-concurrent-jobs))))) + (run-one-test db run-id test-name keyvallst) + (print "WARNING: Max running jobs exceeded, current number running: " num-running + ", max_concurrent_jobs: " max-concurrent-jobs)))) + test-names) + (if (args:get-arg "-keepgoing") + (let ((estrem (db:estimated-tests-remaining db run-id))) + (if (> estrem 0) + (begin + (print "Keep going, estimated " estrem " tests remaining to run, will continue in 10 seconds ...") + (sleep 10) + (loop (+ numtimes 1))))))))) + ;; VERY INEFFICIENT! Move stuff that should be done once up to calling proc (define (run-one-test db run-id test-name keyvallst) (print "Launching test " test-name) ;; All these vars might be referenced by the testconfig file reader (setenv "MT_TEST_NAME" test-name) ;; @@ -307,11 +315,11 @@ (max-concurrent-jobs (config-lookup *configdat* "setup" "max_concurrent_jobs"))) (print "max-concurrent-jobs: " max-concurrent-jobs ", num-running: " num-running) (if (not (or (not max-concurrent-jobs) (and max-concurrent-jobs (string->number max-concurrent-jobs) - (not (> num-running (string->number max-concurrent-jobs)))))) + (not (>= num-running (string->number max-concurrent-jobs)))))) (print "WARNING: Max running jobs exceeded, current number running: " num-running ", max_concurrent_jobs: " max-concurrent-jobs) (begin (let loop2 ((ts #f) (ct 0)) Index: tests/Makefile ================================================================== --- tests/Makefile +++ tests/Makefile @@ -2,11 +2,11 @@ MEGATEST=$(shell realpath ../megatest) runall : cd ../;make - $(MEGATEST) -runall :sysname ubuntu :fsname nfs :datapath none :runname `date +%GWW%V.%u` -m "This is a comment specific to a run" + $(MEGATEST) -keepgoing -runall :sysname ubuntu :fsname nfs :datapath none :runname `date +%GWW%V.%u` -m "This is a comment specific to a run" test : cd ../;make test make runall Index: tests/megatest.config ================================================================== --- tests/megatest.config +++ tests/megatest.config @@ -3,11 +3,11 @@ fsname TEXT datapath TEXT [setup] # exectutable /path/to/megatest -max_concurrent_jobs 405 +max_concurrent_jobs 8 runsdir /tmp/runs [jobtools] # ## launcher launches jobs, the job is managed on the target host ## by megatest, comment out launcher to run local