Index: dashboard.scm ================================================================== --- dashboard.scm +++ dashboard.scm @@ -136,11 +136,12 @@ (define *tests-sort-options* (vector (vector "Sort +a" 'testname "ASC") (vector "Sort -a" 'testname "DESC") (vector "Sort +t" 'event_time "ASC") (vector "Sort -t" 'event_time "DESC") (vector "Sort +s" 'statestatus "ASC") - (vector "Sort -s" 'statestatus "DESC"))) + (vector "Sort -s" 'statestatus "DESC") + (vector "Sort +a" 'testname "ASC"))) (define *tests-sort-type-index* '(("+testname" 0) ("-testname" 1) ("+event_time" 2) ("-event_time" 3) @@ -477,11 +478,11 @@ ;;(teststart (db:test-get-event_time test)) (runtime (db:test-get-run_duration test)) (buttontxt (cond ((equal? teststate "COMPLETED") teststatus) ((and (equal? teststate "NOT_STARTED") - (member teststatus '("ZERO_ITEMS" "BLOCKED" "PREQ_FAIL" "PREQ_DISCARDED" "KEEP_TRYING" "TEN_STRIKES"))) + (member teststatus '("ZERO_ITEMS" "BLOCKED" "PREQ_FAIL" "PREQ_DISCARDED" "TIMED_OUT" "KEEP_TRYING" "TEN_STRIKES"))) teststatus) (else teststate))) (button (vector-ref columndat rown)) (color (car (gutils:get-color-for-state-status teststate teststatus))) ADDED docs/megatest-state-status.dot Index: docs/megatest-state-status.dot ================================================================== --- /dev/null +++ docs/megatest-state-status.dot @@ -0,0 +1,47 @@ +digraph megatest_state_status { + ranksep=0.05 + // rankdir=LR + +node [shape=box,style=filled]; + +// subgraph cluster_notstarted { +// label="Not started"; + + "NOT_STARTED FAILS" [ + label = "{ NOT_STARTED/FAILS |{ NO_ITEMS | FAIL_PREREQ | FAIL_TIMEOUT }}"; + shape= "record"; + ] + +"NOT_STARTED n/a" -> "LAUNCHED n/a" [label=" launch"]; +"NOT_STARTED WAIT" -> "LAUNCHED n/a" + + "NOT_STARTED n/a"; + "NOT_STARTED WAIT" [ + label = "{NOT_STARTED WAIT|{ NO_SLOTS | WAIT_PREREQ}}"; + shape = "record"; +] + +// struct3 [shape=record,label="hello\nworld |{ b |{c| d|e}| f}| g | h"]; + + "NOT_STARTED n/a" -> "NOT_STARTED FAILS"; + "NOT_STARTED n/a" -> "NOT_STARTED WAIT"; + + "RUNNING" [ + shape="record"; + label="{RUNNING|{n/a| PASS | FAIL}}"; + ] + + "COMPLETED" [ + shape="record"; + label = "{COMPLETED|{PASS | FAIL | CHECK| SKIP}}"; + ] + + +"RUNNING" -> "COMPLETED"; +"RUNNING" -> "INCOMPLETE" [label="test dead for > 24hrs"]; + + +"LAUNCHED n/a" -> "REMOTEHOSTSTART n/a" -> "RUNNING"; + +} + Index: launch.scm ================================================================== --- launch.scm +++ launch.scm @@ -364,14 +364,14 @@ (debug:print 0 "Killing " (cadr parts) "; kill -9 " p-id) ;; (process-signal pid signal/kill))))) ;; (system (conc "kill -9 " p-id)))))) (car processes))) (system (conc "kill -9 -" pid)) - (tests:test-set-status! test-id "KILLED" "FAIL" (args:get-arg "-m") #f))) + (tests:test-set-status! test-id "KILLED" "KILLED" (args:get-arg "-m") #f))) (begin (debug:print 0 "WARNING: Request received to kill job but problem with process, attempting to kill manager process") -;; (tests:test-set-status! run-id test-id "KILLED" "FAIL" + (tests:test-set-status! test-id "KILLED" "KILLED" (args:get-arg "-m") #f) (tests:test-set-status! run-id test-id "KILLED" "FAIL" (args:get-arg "-m") #f) (exit 1) ;; IS THIS NECESSARY OR WISE??? ))) (set! kill-tries (+ 1 kill-tries)) (mutex-unlock! m))) Index: lock-queue.scm ================================================================== --- lock-queue.scm +++ lock-queue.scm @@ -21,33 +21,40 @@ ;;====================================================================== ;; attempt to prevent overlapping updates of rollup files by queueing ;; update requests in an sqlite db ;;====================================================================== -(define (lock-queue:open-db fname) +(define (lock-queue:open-db fname #!key (count 10)) (let* ((actualfname (conc fname ".lockdb")) (dbexists (file-exists? actualfname)) (db (sqlite3:open-database actualfname)) (handler (make-busy-timeout 136000))) (if dbexists db (begin - (sqlite3:execute - db - "CREATE TABLE IF NOT EXISTS queue ( + (handle-exceptions + exn + (begin + (thread-sleep! 10) + (if (> count 0) + (lock-queue:open-db fname count: (- count 1)) + db)) + (sqlite3:execute + db + "CREATE TABLE IF NOT EXISTS queue ( id INTEGER PRIMARY KEY, test_id INTEGER, start_time INTEGER, state TEXT, CONSTRAINT queue_constraint UNIQUE (test_id));") - (sqlite3:execute - db - "CREATE TABLE IF NOT EXISTS runlocks ( + (sqlite3:execute + db + "CREATE TABLE IF NOT EXISTS runlocks ( id INTEGER PRIMARY KEY, test_id INTEGER, run_lock TEXT, - CONSTRAINT runlock_constraint UNIQUE (run_lock));"))) + CONSTRAINT runlock_constraint UNIQUE (run_lock));")))) (sqlite3:set-busy-handler! db handler) db)) (define (lock-queue:set-state db test-id newstate #!key (remtries 10)) (handle-exceptions @@ -83,18 +90,22 @@ (set! res tid))) db "SELECT test_id FROM queue WHERE start_time > ?;" mystart) res))) -(define (lock-queue:get-lock db test-id) +(define (lock-queue:get-lock db test-id #!key (count 10)) (let ((res #f) (lckqry (sqlite3:prepare db "SELECT test_id,run_lock FROM runlocks WHERE run_lock='locked';")) (mklckqry (sqlite3:prepare db "INSERT INTO runlocks (test_id,run_lock) VALUES (?,'locked');"))) (let ((result (handle-exceptions exn - #f + (begin + (thread-sleep! 10) + (if (> count 0) + (lock-queue:get-lock db test-id count: (- count 1))) + #f) (sqlite3:with-transaction db (lambda () (sqlite3:for-each-row (lambda (tid lockstate) (set! res (list tid lockstate))) @@ -109,46 +120,67 @@ #t))))))) (sqlite3:finalize! lckqry) (sqlite3:finalize! mklckqry) result))) -(define (lock-queue:release-lock fname test-id) +(define (lock-queue:release-lock fname test-id #!key (count 10)) (let ((db (lock-queue:open-db fname))) - (sqlite3:execute db "DELETE FROM runlocks WHERE test_id=?;" test-id) - (sqlite3:finalize! db))) + (handle-exceptions + exn + (begin + (thread-sleep! 10) + (if (> count 0) + (lock-queue:release-lock fname test-id count: (- count 1)) + #f)) + (sqlite3:execute db "DELETE FROM runlocks WHERE test_id=?;" test-id) + (sqlite3:finalize! db)))) -(define (lock-queue:steal-lock db test-id) - (sqlite3:execute db "DELETE FROM runlocks WHERE run_lock='locked';") +(define (lock-queue:steal-lock db test-id #!key (count 10)) + (handle-exceptions + exn + (begin + (thread-sleep! 10) + (if (> count 0) + (lock-queue:steal-lock db test-id count: (- count 1)) + #f)) + (sqlite3:execute db "DELETE FROM runlocks WHERE run_lock='locked';")) (lock-queue:get-lock db test-it)) ;; returns #f if ok to skip the task ;; returns #t if ok to proceed with task ;; otherwise waits ;; -(define (lock-queue:wait-turn fname test-id) +(define (lock-queue:wait-turn fname test-id #!key (count 10)) (let ((db (lock-queue:open-db fname)) (mystart (current-seconds))) - (sqlite3:execute - db - "INSERT OR REPLACE INTO queue (test_id,start_time,state) VALUES (?,?,'waiting');" - test-id mystart) - (thread-sleep! 1) ;; give other tests a chance to register - (let ((result - (let loop ((younger-waiting (lock-queue:any-younger? db mystart test-id))) - (if younger-waiting - (begin - ;; no need for us to wait. mark in the lock queue db as skipping - (lock-queue:set-state db test-id "skipping") - #f) ;; let the calling process know that nothing needs to be done - (if (lock-queue:get-lock db test-id) - #t - (if (> (- (current-seconds) mystart) 36000) ;; waited too long, steal the lock - (lock-queue:steal-lock db test-id) - (begin - (thread-sleep! 1) - (loop (lock-queue:any-younger? db mystart test-id))))))))) - (sqlite3:finalize! db) - result))) + (handle-exceptions + exn + (begin + (thread-sleep! 10) + (if (> count 0) + (lock-queue:wait-turn fname test-id count: (- count 1)) + #f)) + (sqlite3:execute + db + "INSERT OR REPLACE INTO queue (test_id,start_time,state) VALUES (?,?,'waiting');" + test-id mystart) + (thread-sleep! 1) ;; give other tests a chance to register + (let ((result + (let loop ((younger-waiting (lock-queue:any-younger? db mystart test-id))) + (if younger-waiting + (begin + ;; no need for us to wait. mark in the lock queue db as skipping + (lock-queue:set-state db test-id "skipping") + #f) ;; let the calling process know that nothing needs to be done + (if (lock-queue:get-lock db test-id) + #t + (if (> (- (current-seconds) mystart) 36000) ;; waited too long, steal the lock + (lock-queue:steal-lock db test-id) + (begin + (thread-sleep! 1) + (loop (lock-queue:any-younger? db mystart test-id))))))))) + (sqlite3:finalize! db) + result)))) ;; (use trace) ;; (trace lock-queue:get-lock lock-queue:release-lock lock-queue:wait-turn lock-queue:any-younger? lock-queue:set-state) Index: runs.scm ================================================================== --- runs.scm +++ runs.scm @@ -391,16 +391,19 @@ (define (runs:expand-items hed tal reg reruns regfull newtal jobgroup max-concurrent-jobs run-id waitons item-path testmode test-record can-run-more items runname tconfig reglen test-registry test-records itemmap) (let* ((loop-list (list hed tal reg reruns)) (prereqs-not-met (rmt:get-prereqs-not-met run-id waitons item-path testmode itemmap: itemmap)) ;; (prereqs-not-met (mt:lazy-get-prereqs-not-met run-id waitons item-path mode: testmode itemmap: itemmap)) (fails (runs:calc-fails prereqs-not-met)) - (non-completed (runs:calc-not-completed prereqs-not-met))) + (prereq-fails (runs:calc-prereq-fail prereqs-not-met)) + (non-completed (runs:calc-not-completed prereqs-not-met)) + (runnables (runs:calc-runnable prereqs-not-met))) (debug:print-info 4 "START OF INNER COND #2 " "\n can-run-more: " can-run-more "\n testname: " hed "\n prereqs-not-met: " (runs:pretty-string prereqs-not-met) "\n non-completed: " (runs:pretty-string non-completed) + "\n prereq-fails: " (runs:pretty-string prereq-fails) "\n fails: " (runs:pretty-string fails) "\n testmode: " testmode "\n (member 'toplevel testmode): " (member 'toplevel testmode) "\n (null? non-completed): " (null? non-completed) "\n reruns: " reruns @@ -453,10 +456,11 @@ (begin (debug:print 0 "ERROR: The proc from reading the items table did not yield a list - please report this") (exit 1)))))) ((and (null? fails) + (null? prereq-fails) (not (null? non-completed))) (let* ((allinqueue (map (lambda (x)(if (string? x) x (db:test-get-testname x))) (append newtal reruns))) ;; prereqstrs is a list of test names as strings that are prereqs for hed (prereqstrs (delete-duplicates (map (lambda (x)(if (string? x) x (db:test-get-testname x))) @@ -493,65 +497,12 @@ (runs:queue-next-tal trimmed-tal trimmed-reg reglen regfull) (runs:queue-next-reg trimmed-tal trimmed-reg reglen regfull) reruns))) (list (car newtal)(append (cdr newtal) reg) '() reruns)))) - ;; (debug:print-info 1 "allinqueue: " allinqueue) - ;; (debug:print-info 1 "prereqstrs: " prereqstrs) - ;; (debug:print-info 1 "notinqueue: " notinqueue) - ;; (debug:print-info 1 "tal: " tal) - ;; (debug:print-info 1 "newtal: " newtal) - ;; (debug:print-info 1 "reg: " reg) - -;; == == ;; num-retries code was here -;; == == ;; we use this opportunity to move contents of reg to tal -;; == == ;; but also lets check that the prerequisites are all in the newtal or reruns lists -;; == == -;; == == (let* ((allinqueue (map (lambda (x)(if (string? x) x (db:test-get-testname x))) -;; == == (append newtal reruns))) -;; == == ;; prereqstrs is a list of test names as strings that are prereqs for hed -;; == == (prereqstrs (map (lambda (x)(if (string? x) x (db:test-get-testname x))) -;; == == prereqs-not-met)) -;; == == ;; a prereq that is not found in allinqueue will be put in the notinqueue list -;; == == ;; -;; == == (notinqueue (filter (lambda (x) -;; == == (not (member x allinqueue))) -;; == == prereqstrs))) -;; == == (if (not (null? notinqueue)) -;; == == (if (runs:can-keep-running? hed 5) ;; try five times -;; == == (begin -;; == == (debug:print-info 4 "increment cant-run-tests for " hed) -;; == == (runs:inc-cant-run-tests hed) -;; == == (list (car newtal)(append (cdr newtal) reg) '() reruns)) -;; == == (begin -;; == == -;; == == (if (runs:lownoise (conc "no fails prereq, null notinqueue " hed) 30) -;; == == (begin -;; == == (debug:print 1 "WARNING: test " hed " has no failed prerequisites but does have prerequistes that are NOT in the queue: " (string-intersperse notinqueue ", ")) -;; == == (debug:print-info 4 "allinqueue: " allinqueue) -;; == == (debug:print-info 4 "prereqstrs: " prereqstrs) -;; == == (debug:print-info 4 "notinqueue: " notinqueue))) -;; == == (if (and (null? tal)(null? reg)) -;; == == (list (car newtal)(append (cdr newtal) reg) '() reruns) -;; == == (list (runs:queue-next-hed tal reg reglen regfull) -;; == == (runs:queue-next-tal tal reg reglen regfull) -;; == == (runs:queue-next-reg tal reg reglen regfull) -;; == == reruns)))) -;; == == ;; have prereqs in queue, keep going. -;; == == (begin -;; == == (if (runs:lownoise (conc "no fails prereq " hed) 30) -;; == == (debug:print-info 1 "no fails in prerequisites for " hed ", waiting on tests; " -;; == == (string-intersperse (map (lambda (x) -;; == == (if (string? x) -;; == == x -;; == == (runs:make-full-test-name (db:test-get-testname x) -;; == == (db:test-get-item-path x)))) -;; == == non-completed) ", ") -;; == == ". Delaying launch of " hed ".")) -;; == == (list (car newtal)(append (cdr newtal) reg) '() reruns))))) ;; an issue with prereqs not yet met? - ((and (null? fails) + (null? prereq-fails) (null? non-completed)) (if (runs:can-keep-running? hed 5) (begin (runs:inc-cant-run-tests hed) (debug:print-info 1 "no fails in prerequisites for " hed " but also none running, keeping " hed " for now. Try count: " (hash-table-ref/default *seen-cant-run-tests* hed 0)) @@ -559,22 +510,27 @@ ;; we use this opportunity to move contents of reg to tal (list (car newtal)(append (cdr newtal) reg) '() reruns)) ;; an issue with prereqs not yet met? (begin (debug:print-info 1 "no fails in prerequisites for " hed " but nothing seen running in a while, dropping test " hed " from the run queue") (let ((test-id (rmt:get-test-id run-id hed ""))) - (mt:test-set-state-status-by-id run-id test-id "DEQUEDED" "TIMED_OUT" "Nothing seen running in a while.")) + (mt:test-set-state-status-by-id test-id "NOT_STARTED" "TIMED_OUT" "Nothing seen running in a while.")) (list (runs:queue-next-hed tal reg reglen regfull) (runs:queue-next-tal tal reg reglen regfull) (runs:queue-next-reg tal reg reglen regfull) reruns)))) - ((and (not (null? fails))(member 'normal testmode)) + ((and + (or (not (null? fails)) + (not (null? prereq-fails))) + (member 'normal testmode)) (debug:print-info 1 "test " hed " (mode=" testmode ") has failed prerequisite(s); " (string-intersperse (map (lambda (t)(conc (db:test-get-testname t) ":" (db:test-get-state t)"/"(db:test-get-status t))) fails) ", ") ", removing it from to-do list") (let ((test-id (rmt:get-test-id run-id hed ""))) - (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_FAIL" "Failed to run due to failed prerequisites")) + (if (not (null? prereq-fails)) + (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_DISCARDED" "Failed to run due to prior failed prerequisites") + (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_FAIL" "Failed to run due to failed prerequisites"))) (if (or (not (null? reg))(not (null? tal))) (begin (hash-table-set! test-registry hed 'CANNOTRUN) (list (runs:queue-next-hed tal reg reglen regfull) (runs:queue-next-tal tal reg reglen regfull) @@ -584,10 +540,11 @@ ((and (not (null? fails))(member 'toplevel testmode)) (if (or (not (null? reg))(not (null? tal))) (list (car newtal)(append (cdr newtal) reg) '() reruns) #f)) + ((null? runnables) #f) ;; if we get here and non-completed is null the it's all over. (else (debug:print 0 "WARNING: FAILS or incomplete tests maybe preventing completion of this run. Watch for issues with test " hed ", continuing for now") ;; (list (runs:queue-next-hed tal reg reglen regfull) ;; (runs:queue-next-tal tal reg reglen regfull) ;; (runs:queue-next-reg tal reg reglen regfull) @@ -838,11 +795,11 @@ (let ((state (db:test-get-state t)) (status (db:test-get-status t))) (case (string->symbol state) ((COMPLETED) #f) ((NOT_STARTED) - (if (member status '("TEN_STRIKES" "BLOCKED")) + (if (member status '("TEN_STRIKES" "BLOCKED" "PREQ_FAIL" "ZERO_ITEMS" "PREQ_DISCARDED" "TIMED_OUT" )) #f t)) ((DELETED) #f) (else t))))) tests)) @@ -1089,17 +1046,41 @@ (and (vector? test) ;; not (string? test)) (equal? (db:test-get-state test) "COMPLETED") (not (member (db:test-get-status test) '("PASS" "WARN" "CHECK" "WAIVED" "SKIP"))))) prereqs-not-met)) + +(define (runs:calc-prereq-fail prereqs-not-met) + (filter (lambda (test) + (and (vector? test) ;; not (string? test)) + (equal? (db:test-get-state test) "NOT_STARTED") + (not (member (db:test-get-status test) + '("n/a" "KEEP_TRYING"))))) + prereqs-not-met)) + +(define (runs:calc-not-completed prereqs-not-met) + (filter + (lambda (t) + (or (not (vector? t)) + (not (equal? "COMPLETED" (db:test-get-state t))))) + prereqs-not-met)) (define (runs:calc-not-completed prereqs-not-met) (filter (lambda (t) (or (not (vector? t)) (not (equal? "COMPLETED" (db:test-get-state t))))) prereqs-not-met)) + +(define (runs:calc-runnable prereqs-not-met) + (filter + (lambda (t) + (or (not (vector? t)) + (and (equal? "NOT_STARTED" (db:test-get-state t)) + (member (db:test-get-status t) + '("n/a" "KEEP_TRYING"))))) + prereqs-not-met)) (define (runs:pretty-string lst) (map (lambda (t) (if (not (vector? t)) (conc t) @@ -1192,17 +1173,17 @@ (if testdat (string->symbol (test:get-state testdat)) 'failed-to-insert)) ((failed-to-insert) (debug:print 0 "ERROR: Failed to insert the record into the db")) - ((NOT_STARTED COMPLETED DELETED) + ((NOT_STARTED COMPLETED DELETED INCOMPLETE) (let ((runflag #f)) (cond ;; -force, run no matter what (force (set! runflag #t)) ;; NOT_STARTED, run no matter what - ((member (test:get-state testdat) '("DELETED" "NOT_STARTED"))(set! runflag #t)) + ((member (test:get-state testdat) '("DELETED" "NOT_STARTED" "INCOMPLETE"))(set! runflag #t)) ;; not -rerun and PASS, WARN or CHECK, do no run ((and (or (not rerun) keepgoing) ;; Require to force re-run for COMPLETED or *anything* + PASS,WARN or CHECK (or (member (test:get-status testdat) '("PASS" "WARN" "CHECK" "SKIP" "WAIVED")) @@ -1481,11 +1462,11 @@ (let* ((run-dir (db:test-get-rundir test)) ;; run dir is from the link tree (real-dir (if (file-exists? run-dir) (resolve-pathname run-dir) #f))) (if (not remove-data-only) - (mt:test-set-state-status-by-id (db:test-get-run-id test)(db:test-get-id test) "REMOVING" "LOCKED" #f)) + (mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) "REMOVING" "LOCKED" #f)) (debug:print-info 1 "Attempting to remove " (if real-dir (conc " dir " real-dir " and ") "") " link " run-dir) (if (and real-dir (> (string-length real-dir) 5) (file-exists? real-dir)) ;; bad heuristic but should prevent /tmp /home etc. (begin ;; let* ((realpath (resolve-pathname run-dir))) Index: tests/Makefile ================================================================== --- tests/Makefile +++ tests/Makefile @@ -83,10 +83,11 @@ cd fullrun;$(MEGATEST) -preclean -runtests runfirst -testpatt %blahha% -reqtarg ubuntu/nfs/none :runname $(RUNNAME)_itempatt -debug 10 cd fullrun;$(MEGATEST) -rollup :runname newrun -target ubuntu/nfs/none -debug 10 test7: @echo Only a/c testname c should remain. If there is a run a/b/c then there is a cache issue. + cd simplerun;$(DASHBOARD) & (cd simplerun; \ $(MEGATEST) -server - -daemonize; \ $(MEGATEST) -remove-runs -target %/% :runname % -testpatt %; \ $(MEGATEST) -preclean -runtests % -target a/b :runname c; sleep 5; \ $(MEGATEST) -remove-runs -target a/c :runname c; \ @@ -110,10 +111,11 @@ test9 : minsetup test9a test9b test9c test9d test9e test9a : @echo Run super-simple mintest e, no waitons. + cd mintest;$(DASHBOARD)& cd mintest;$(MEGATEST) -preclean -runtests e -target $(VER) :runname `date +%H.%M.%S` -debug $(DEBUG) test9b : @echo Run simple mintest d with one waiton c cd mintest;$(MEGATEST) -preclean -runtests d -target $(VER) :runname `date +%H.%M.%S` -debug $(DEBUG) Index: tests/fdktestqa/fdk.config ================================================================== --- tests/fdktestqa/fdk.config +++ tests/fdktestqa/fdk.config @@ -2,10 +2,11 @@ SYSTEM TEXT RELEASE TEXT [setup] # Adjust max_concurrent_jobs to limit how much you load your machines +# max_concurrent_jobs 150 max_concurrent_jobs 500 # This is your link path, you can move it but it is generally better to keep it stable linktree #{shell readlink -f #{getenv PWD}/../simplelinks} Index: tests/fullrun/megatest.config ================================================================== --- tests/fullrun/megatest.config +++ tests/fullrun/megatest.config @@ -109,10 +109,11 @@ WRAPPEDVAR This var should have the work blah thrice: \ blah \ blah +MAX_ALLOWED_LOAD 200 # XTERM [system xterm] # RUNDEAD [system exit 56] [server] Index: utils/loadrunner ================================================================== --- utils/loadrunner +++ utils/loadrunner @@ -14,14 +14,14 @@ # Can't always trust $PWD CURRWD=`pwd` if [[ $TARGETHOST_LOGF == "" ]]; then TARGETHOST_LOGF=NBFAKE-`date +%GWW%V.%u_%T` fi - echo "#======================================================================" - echo "# NBFAKE Running command:" - echo "# \"$*\"" - echo "#======================================================================" + # echo "#======================================================================" + # echo "# NBFAKE Running command:" + # echo "# \"$*\"" + # echo "#======================================================================" if [[ $TARGETHOST == "" ]]; then sh -c "cd $CURRWD;export DISPLAY=$DISPLAY; export PATH=$PATH; nohup $* > $TARGETHOST_LOGF 2>&1 &" else ssh -n -f $TARGETHOST "sh -c \"cd $CURRWD;export DISPLAY=$DISPLAY; export PATH=$PATH; nohup $* > $TARGETHOST_LOGF 2>&1 &\"" @@ -45,32 +45,33 @@ # NB// max_load is in units of percent. # lperc=$(echo "100 * $load / $numcpu"|bc) lperc2=$(echo "100 * $load2 / $numcpu"|bc) +let lperc2adj="$lperc2 + 0.5" if [[ "x$MAX_ALLOWED_LOAD" == "x" ]]; then max_load=100 else max_load=$MAX_ALLOWED_LOAD fi lfile=/tmp/loadrunner-$USER.lockfile -lockfile -r 5 -l 60 $lfile +lockfile -r 5 -l 60 $lfile &> /dev/null if [[ $lperc -lt $max_load ]];then - if [[ $lperc -le $lperc2 ]];then - echo "Load acceptable: lperc=$lperc %, max_load=$max_load %, load=$load, numcpu=$numcpu, MAX_ALLOWED_LOAD=$MAX_ALLOWED_LOAD % and $lperc2 < $lperc" - echo "Starting command: \"$@\"" + if [[ $lperc -le $lperc2adj ]];then + # echo "Load acceptable: lperc=$lperc %, max_load=$max_load %, load=$load, numcpu=$numcpu, MAX_ALLOWED_LOAD=$MAX_ALLOWED_LOAD % and $lperc2 < $lperc" + # echo "Starting command: \"$@\"" launchjob "$@" # we sleep ten seconds here to keep the lock a little longer and give time for # the uptime to show a response sleep 10 else - echo "$LOADRUNNER $@" | at now + 2 minutes 2> /dev/null + echo "$LOADRUNNER $@" | at now + 2 minutes &> /dev/null fi else # echo "Load too high: lperc=$lperc, max_load=$max_load, waiting two minutes before trying to run command: \"$@\"" - echo "$LOADRUNNER $@" | at now + 2 minutes 2> /dev/null + echo "$LOADRUNNER $@" | at now + 2 minutes &> /dev/null fi sleep $(get_delay_time 10) rm -f $lfile