Index: Makefile ================================================================== --- Makefile +++ Makefile @@ -11,11 +11,11 @@ OFILES = $(SRCFILES:%.scm=%.o) GOFILES = $(GUISRCF:%.scm=%.o) HELPERS=$(addprefix $(PREFIX)/bin/,mt_laststep mt_runstep mt_ezstep) -MTESTHASH=$(shell fsl info|grep checkout:| awk '{print $$2}') +MTESTHASH=$(shell fossil info|grep checkout:| awk '{print $$2}') all : mtest dboard mtest: $(OFILES) megatest.o csc $(CSCOPTS) $(OFILES) megatest.o -o mtest Index: db.scm ================================================================== --- db.scm +++ db.scm @@ -755,11 +755,11 @@ (let ((res 0)) (sqlite3:for-each-row (lambda (count) (set! res count)) db - "SELECT count(id) FROM tests WHERE state in ('RUNNING','LAUNCHED','REMOTEHOSTSTART','NOT_STARTED');") + "SELECT count(id) FROM tests WHERE state in ('RUNNING','LAUNCHED','REMOTEHOSTSTART');") res)) (define (db:get-count-tests-running-in-jobgroup db jobgroup) (if (not jobgroup) 0 ;; @@ -1054,11 +1054,11 @@ ;;====================================================================== (define (db:updater) (debug:print 4 "INFO: Starting cache processing") (let loop ((start-time (current-time))) - (thread-sleep! 5) ;; move save time around to minimize regular collisions? + (thread-sleep! 10) ;; move save time around to minimize regular collisions? (db:write-cached-data) (loop start-time))) (define (cdb:test-set-status-state test-id status state msg) (debug:print 4 "INFO: cdb:test-set-status-state test-id=" test-id ", status=" status ", state=" state ", msg=" msg) Index: launch.scm ================================================================== --- launch.scm +++ launch.scm @@ -525,11 +525,12 @@ (local-megatest (let* ((lm (car (argv))) (dir (pathname-directory lm)) (exe (pathname-strip-directory lm))) (conc (if dir (conc dir "/") "") (case (string->symbol exe) - ((dboard) "megatest") + ((dboard) "megatest") + ((mtest) "megatest") ((dashboard) "megatest") (else exe))))) (test-sig (conc test-name ":" (item-list->path itemdat))) ;; test-path is the full path including the item-path (work-area #f) (toptest-work-area #f) ;; for iterated tests the top test contains data relevant for all Index: megatest.scm ================================================================== --- megatest.scm +++ megatest.scm @@ -355,11 +355,12 @@ (let* ((host:port (db:get-var db "SERVER")) ;; this doen't support multiple servers BUG!!!! (th2 (server:start db (args:get-arg "-server"))) (th3 (make-thread (lambda () (server:keep-running db host:port))))) (thread-start! th3) - (thread-join! th3)) + (thread-join! th3) + (set! *didsomething* #t)) (debug:print 0 "ERROR: Failed to setup for megatest")))) ;;====================================================================== ;; full run ;;====================================================================== Index: runs.scm ================================================================== --- runs.scm +++ runs.scm @@ -115,24 +115,25 @@ (define (runs:can-run-more-tests db test-record) (let* ((tconfig (tests:testqueue-get-testconfig test-record)) (jobgroup (config-lookup tconfig "requirements" "jobgroup")) (num-running (db:get-count-tests-running db)) (num-running-in-jobgroup (db:get-count-tests-running-in-jobgroup db jobgroup)) - (max-concurrent-jobs (config-lookup *configdat* "setup" "max_concurrent_jobs")) + (max-concurrent-jobs (let ((mcj (config-lookup *configdat* "setup" "max_concurrent_jobs"))) + (if (and mcj (string->number mcj)) + (string->number mcj) + #f))) (job-group-limit (config-lookup *configdat* "jobgroups" jobgroup))) (if (not (eq? *last-num-running-tests* num-running)) (begin (debug:print 2 "max-concurrent-jobs: " max-concurrent-jobs ", num-running: " num-running) (set! *last-num-running-tests* num-running))) (if (not (eq? 0 *globalexitstatus*)) - #f + (list #f num-running num-running-in-jobgroup max-concurrent-jobs job-group-limit) (let ((can-not-run-more (cond ;; if max-concurrent-jobs is set and the number running is greater ;; than it than cannot run more jobs - ((and max-concurrent-jobs - (string->number max-concurrent-jobs) - (>= num-running (string->number max-concurrent-jobs))) + ((and max-concurrent-jobs (>= num-running max-concurrent-jobs)) (debug:print 0 "WARNING: Max running jobs exceeded, current number running: " num-running ", max_concurrent_jobs: " max-concurrent-jobs) #t) ;; if job-group-limit is set and number of jobs in the group is greater ;; than the limit then cannot run more jobs of this kind @@ -140,11 +141,11 @@ (>= num-running-in-jobgroup job-group-limit)) (debug:print 1 "WARNING: number of jobs " num-running-in-jobgroup " in " jobgroup " exceeded, will not run " (tests:testqueue-get-testname test-record)) #t) (else #f)))) - (not can-not-run-more))))) + (list (not can-not-run-more) num-running num-running-in-jobgroup max-concurrent-jobs job-group-limit))))) ;;====================================================================== ;; New methodology. These routines will replace the above in time. For ;; now the code is duplicated. This stuff is initially used in the monitor ;; based code. @@ -375,14 +376,19 @@ (debug:print 0 "ERROR: test " test-name " has listed itself as a waiton, please correct this!") (set! waiton (filter (lambda (x)(not (equal? x hed))) waitons)))) (cond ;; OUTER COND ((not items) ;; when false the test is ok to be handed off to launch (but not before) - (let* ((have-resources (open-run-close runs:can-run-more-tests #f test-record)) ;; look at the test jobgroup and tot jobs running - (prereqs-not-met (open-run-close db:get-prereqs-not-met #f run-id waitons item-path mode: testmode)) - (fails (runs:calc-fails prereqs-not-met)) - (non-completed (runs:calc-not-completed prereqs-not-met))) + (let* ((run-limits-info (open-run-close runs:can-run-more-tests #f test-record)) ;; look at the test jobgroup and tot jobs running + (have-resources (car run-limits-info)) + (num-running (list-ref run-limits-info 1)) + (num-running-in-jobgroup (list-ref run-limits-info 2)) + (max-concurrent-jobs (list-ref run-limits-info 3)) + (job-group-limit (list-ref run-limits-info 4)) + (prereqs-not-met (open-run-close db:get-prereqs-not-met #f run-id waitons item-path mode: testmode)) + (fails (runs:calc-fails prereqs-not-met)) + (non-completed (runs:calc-not-completed prereqs-not-met))) (debug:print 8 "INFO: have-resources: " have-resources " prereqs-not-met: " (string-intersperse (map (lambda (t) (if (vector? t) (conc (db:test-get-state t) "/" (db:test-get-status t)) @@ -390,11 +396,11 @@ prereqs-not-met) ", ") " fails: " fails) (debug:print 4 "INFO: hed=" hed) ;; Don't know at this time if the test have been launched at some time in the past ;; i.e. is this a re-launch? - + (debug:print 4 "INFO: run-limits-info = " run-limits-info) (cond ;; INNER COND #1 for a launchable test ;; Check item path against item-patts ((and (not (patt-list-match item-path item-patts)) (not (equal? item-path ""))) ;; else the run is stuck, temporarily or permanently @@ -401,11 +407,12 @@ ;; but should check if it is due to lack of resources vs. prerequisites (debug:print 1 "INFO: Skipping " (tests:testqueue-get-testname test-record) " " item-path " as it doesn't match " item-patts) (thread-sleep! *global-delta*) (if (not (null? tal)) (loop (car tal)(cdr tal) reruns))) - ((not (hash-table-ref/default test-registery (runs:make-full-test-name test-name item-path) #f)) + ((and (not (hash-table-ref/default test-registery (runs:make-full-test-name test-name item-path) #f)) + (and max-concurrent-jobs (> (- max-concurrent-jobs num-running) 5))) (open-run-close db:tests-register-test #f run-id test-name item-path) (hash-table-set! test-registery (runs:make-full-test-name test-name item-path) #t) (thread-sleep! *global-delta*) (loop (car newtal)(cdr newtal) reruns)) ((not have-resources) ;; simply try again after waiting a second @@ -627,17 +634,17 @@ (if testdat (string->symbol (test:get-state testdat)) 'failed-to-insert)) ((failed-to-insert) (debug:print 0 "ERROR: Failed to insert the record into the db")) - ((NOT_STARTED COMPLETED) + ((NOT_STARTED COMPLETED DELETED) (let ((runflag #f)) (cond ;; -force, run no matter what (force (set! runflag #t)) ;; NOT_STARTED, run no matter what - ((equal? (test:get-state testdat) "NOT_STARTED")(set! runflag #t)) + ((member (test:get-state testdat) '("DELETED" "NOT_STARTED"))(set! runflag #t)) ;; not -rerun and PASS, WARN or CHECK, do no run ((and (or (not rerun) keepgoing) ;; Require to force re-run for COMPLETED or *anything* + PASS,WARN or CHECK (or (member (test:get-status testdat) '("PASS" "WARN" "CHECK")) Index: tests/fullrun/config/mt_include_1.config ================================================================== --- tests/fullrun/config/mt_include_1.config +++ tests/fullrun/config/mt_include_1.config @@ -1,8 +1,8 @@ [setup] # exectutable /path/to/megatest -max_concurrent_jobs 200 +max_concurrent_jobs 50 linktree /tmp/mt_links [jobtools] useshell yes # ## launcher launches jobs, the job is managed on the target host Index: utils/nbfake ================================================================== --- utils/nbfake +++ utils/nbfake @@ -1,12 +1,12 @@ #!/bin/bash # ssh localhost "nohup $* > nbfake.log 2> nbfake.err < /dev/null" -if [[ $TARGETHOST == "" ]]; then - TARGETHOST=localhost -fi - # Can't always trust $PWD CURRWD=`pwd` -ssh -n -f $TARGETHOST "sh -c \"cd $CURRWD;export DISPLAY=$DISPLAY; export PATH=$PATH; nohup $* > NBFAKE-`date +%GWW%V.%u_%T` 2>&1 &\"" +if [[ $TARGETHOST == "" ]]; then + sh -c "cd $CURRWD;export DISPLAY=$DISPLAY; export PATH=$PATH; nohup $* > NBFAKE-`date +%GWW%V.%u_%T` 2>&1 &" +else + ssh -n -f $TARGETHOST "sh -c \"cd $CURRWD;export DISPLAY=$DISPLAY; export PATH=$PATH; nohup $* > NBFAKE-`date +%GWW%V.%u_%T` 2>&1 &\"" +fi