Index: Makefile ================================================================== --- Makefile +++ Makefile @@ -36,11 +36,11 @@ ARCHSTR=$(shell lsb_release -sr) # ARCHSTR=$(shell bash -c "echo \$$MACHTYPE") all : $(PREFIX)/bin/.$(ARCHSTR) mtest dboard -mtest: $(OFILES) megatest.o readline-fix.scm +mtest: $(OFILES) readline-fix.scm megatest.o csc $(CSCOPTS) $(OFILES) megatest.o -o mtest dboard : $(OFILES) $(GOFILES) dashboard.scm csc $(OFILES) dashboard.scm $(GOFILES) -o dboard Index: archive.scm ================================================================== --- archive.scm +++ archive.scm @@ -135,11 +135,11 @@ (run-id (db:test-get-run_id test-dat)) (target (string-intersperse (map cadr (rmt:get-key-val-pairs run-id)) "/")) (toplevel/children (and (db:test-get-is-toplevel test-dat) (> (rmt:test-toplevel-num-items run-id test-name) 0))) - (test-partial-path (conc target "/" run-name "/" (runs:make-full-test-name test-name item-path))) + (test-partial-path (conc target "/" run-name "/" (db:test-make-full-name test-name item-path))) ;; note the trailing slash to get the dir inspite of it being a link (test-path (conc linktree "/" test-partial-path)) (test-physical-path (if (file-exists? test-path) (read-symbolic-link test-path #t) #f)) (partial-path-index (if test-physical-path (substring-index test-partial-path test-physical-path) #f)) (test-base (if (and partial-path-index @@ -220,11 +220,11 @@ (keyvals (rmt:get-key-val-pairs run-id)) (target (string-intersperse (map cadr keyvals) "/")) (toplevel/children (and (db:test-get-is-toplevel test-dat) (> (rmt:test-toplevel-num-items run-id test-name) 0))) - (test-partial-path (conc target "/" run-name "/" (runs:make-full-test-name test-name item-path))) + (test-partial-path (conc target "/" run-name "/" (db:test-make-full-name test-name item-path))) ;; note the trailing slash to get the dir inspite of it being a link (test-path (conc linktree "/" test-partial-path)) ;; if the old path was not deleted then prev-test-physical-path will end up pointing to a real directory (prev-test-physical-path (if (file-exists? test-path) (read-symbolic-link test-path #t) #f)) Index: batchsim/Makefile ================================================================== --- batchsim/Makefile +++ batchsim/Makefile @@ -1,7 +1,8 @@ +RUN=default.scm all : batchsim - ./batchsim + ./batchsim $(RUN) batchsim : batchsim.scm csc batchsim.scm Index: batchsim/batchsim.scm ================================================================== --- batchsim/batchsim.scm +++ batchsim/batchsim.scm @@ -61,10 +61,74 @@ 15 ;; height 400 ;; length )) (define *use-log* #f) (define *job-log-scale* 10) + +;;====================================================================== +;; CPU +;;====================================================================== + +(define-record cpu name num-cores mem job x y) + +;;====================================================================== +;; CPU Pool +;;====================================================================== + +(define-record pool name x y w h gap boxw cpus delta nrows ncols cpunum) + +(define (new-pool name x y nrows ncols gap boxw) + (let* ((delta (+ gap boxw)) + ;; (nrows (quotient h (+ gap delta))) + ;; (ncols (quotient w (+ gap delta))) + (w (+ gap (* nrows delta))) + (h (+ gap (* ncols delta))) + (cpus (make-vector (* nrows ncols) #f)) + (npool (make-pool name x y w h gap boxw cpus delta nrows ncols 0))) + npool)) + +(define (pool:add-cpu pool name num-cores mem) + (let* ((cpu (make-cpu name num-cores mem #f #f #f))) + (vector-set! (pool-cpus pool)(pool-cpunum pool) cpu) + (pool-cpunum-set! pool (+ 1 (pool-cpunum pool))) + cpu)) + +(define (pool:draw ezx pool) + (let ((nrows (pool-nrows pool)) + (ncols (pool-ncols pool)) + (x (pool-x pool)) + (y (pool-y pool)) + (w (pool-w pool)) + (h (pool-h pool)) + (gap (pool-gap pool)) + (boxw (pool-boxw pool)) + (delta (pool-delta pool)) + (cpus (pool-cpus pool))) + (ezx-select-layer ezx 1) + ;(ezx-wipe-layer ezx 1) + ;; draw time at upper right + (ezx-str-2d ezx x y (pool-name pool) *black*) + (ezx-rect-2d ezx x y (+ x w)(+ y h) *black* 1) + (let loop ((row 0) + (col 0) + (cpunum 0)) + (let* ((cpu (vector-ref cpus cpunum)) + (xval (+ x gap (* row delta))) + (yval (+ y gap (* col delta)))) + (if cpu + (begin + (cpu-x-set! cpu xval) + (cpu-y-set! cpu yval)) + (vector-set! cpus cpunum (make-cpu (conc cpunum) 1 1 #f xval yval))) + ;; (print "box at " xval ", " yval) + (ezx-rect-2d ezx xval yval (+ xval boxw) (+ yval boxw) *grey* 1) + (if (< col (- ncols 1)) + (loop row (+ col 1)(+ cpunum 1)) + (if (< row (- nrows 1)) + (loop (+ row 1) 0 (+ cpunum 1)))))) + (ezx-redraw ezx))) + ;;====================================================================== ;; Users ;;====================================================================== Index: batchsim/default.scm ================================================================== --- batchsim/default.scm +++ batchsim/default.scm @@ -7,10 +7,18 @@ (let loop ((count 200)) (add-cpu (conc "cpu_" count) 1 1) (if (>= count 0)(loop (- count 1)))) (draw-cpus) + +(define *pool1* (new-pool "generic" 100 100 100 100 2 10)) +(let loop ((count 10)) + (pool:add-cpu *pool1* (conc count) 1 1) + (if (> count 0) + (loop (- count 1)))) + +(pool:draw *ezx* *pool1*) ;; init the queues ;; (hash-table-set! *queues* "normal" '()) (hash-table-set! *queues* "quick" '()) ADDED batchsim/testing.scm Index: batchsim/testing.scm ================================================================== --- /dev/null +++ batchsim/testing.scm @@ -0,0 +1,135 @@ +;; run sim for four hours +;; +(define *end-time* (* 60 50)) + +;; create the cpus +;; +(let loop ((count 200)) + (add-cpu (conc "cpu_" count) 1 1) + (if (>= count 0)(loop (- count 1)))) + +;; (draw-cpus) + +(define *pool1* (new-pool "generic" 20 20 12 80 2 4)) +(let loop ((count 10)) + (pool:add-cpu *pool1* (conc count) 1 1) + (if (> count 0) + (loop (- count 1)))) + +(pool:draw *ezx* *pool1*) + +;; ;; init the queues +;; ;; +;; (hash-table-set! *queues* "normal" '()) +;; (hash-table-set! *queues* "quick" '()) +;; (draw-queues) +;; +;; ;; user k adds 200 jobs at time zero +;; ;; +;; (event *start-time* +;; (lambda () +;; (let loop ((count 300)) ;; add 500 jobs +;; (add-job "normal" "k" 600 1 1) +;; (if (>= count 0)(loop (- count 1)))))) +;; +;; ;; one minute in user m runs ten jobs +;; ;; +;; (event (+ 600 *start-time*) +;; (lambda () +;; (let loop ((count 300)) ;; add 100 jobs +;; (add-job "normal" "m" 600 1 1) +;; (if (> count 0)(loop (- count 1)))))) +;; +;; ;; every minute user j runs ten jobs +;; ;; +;; (define *user-j-jobs* 300) +;; (event (+ 600 *start-time*) +;; (lambda () +;; (let f () +;; (schedule 60) +;; (if (> *user-j-jobs* 0) +;; (begin +;; (let loop ((count 5)) ;; add 100 jobs +;; (add-job "quick" "j" 600 1 1) +;; (if (> count 0)(loop (- count 1)))) +;; (set! *user-j-jobs* (- *user-j-jobs* 5)))) +;; (if (and (not *done*) +;; (> *user-j-jobs* 0)) +;; (f))))) ;; Megatest user running 200 jobs +;; +;; ;; every minute user j runs ten jobs +;; ;; +;; (define *user-j-jobs* 300) +;; (event (+ 630 *start-time*) +;; (lambda () +;; (let f () +;; (schedule 60) +;; (if (> *user-j-jobs* 0) +;; (begin +;; (let loop ((count 5)) ;; add 100 jobs +;; (add-job "quick" "n" 600 1 1) +;; (if (> count 0)(loop (- count 1)))) +;; (set! *user-j-jobs* (- *user-j-jobs* 5)))) +;; (if (and (not *done*) +;; (> *user-j-jobs* 0)) +;; (f))))) ;; Megatest user running 200 jobs +;; +;; ;; ;; +;; ;; (event *start-time* +;; ;; (lambda () +;; ;; (let f ((count 200)) +;; ;; (schedule 10) +;; ;; (add-job "normal" "t" 60 1 1) +;; ;; (if (and (not *done*) +;; ;; (>= count 0)) +;; ;; (f (- count 1)))))) +;; +;; ;; every 3 seconds check for available machines and launch a job +;; ;; +;; (event *start-time* +;; (lambda () +;; (let f () +;; (schedule 3) +;; (let ((queue-names (random-sort (hash-table-keys *queues*)))) +;; (let loop ((cpu (get-cpu)) +;; (count (+ (length queue-names) 4)) +;; (qname (car queue-names)) +;; (remq (cdr queue-names))) +;; (if (and cpu +;; (> count 0)) +;; (begin +;; (if (peek-job qname) ;; any jobs to do in normal queue +;; (let ((job (take-job qname))) +;; (run-job cpu job))) +;; (loop (get-cpu) +;; (- count 1) +;; (if (null? remq) +;; (car queue-names) +;; (car remq)) +;; (if (null? remq) +;; (cdr queue-names) +;; (cdr remq))))))) +;; (if (not *done*)(f))))) +;; +;; ;; screen updates +;; ;; +(event *start-time* (lambda () + (let f () + (schedule 60) ;; update the screen every 60 seconds of sim time + ;; (draw-cpus) ;; (print "Now: " *now* " queue: " (hash-table->alist *queues*)) + (pool:draw *ezx* *pool1*) + + (wait-for-next-draw-time) + (if (not *done*) (f))))) +;; +;; +;; ;; end the simulation +;; ;; +(event *end-time* + (lambda () + (set! *event-list* '()) + (set! *done* #t))) +;; +(start) +;; ;; (exit 0) +;; Index: db.scm ================================================================== --- db.scm +++ db.scm @@ -2270,11 +2270,11 @@ (sqlite3:first-result db (conc "SELECT count(id) FROM tests WHERE state in ('RUNNING','LAUNCHED','REMOTEHOSTSTART') AND testname in ('" (string-intersperse testnames "','") "') AND NOT (uname = 'n/a' AND item_path='');")) ;; should this include the (uname = 'n/a' ...) ??? - 0))))))) + ))))))) ;; DEBUG FIXME - need to merge this v.155 query correctly ;; AND testname in (SELECT testname FROM test_meta WHERE jobgroup=?) ;; AND NOT (uname = 'n/a' AND item_path = '');" ;; done with run when: Index: launch.scm ================================================================== --- launch.scm +++ launch.scm @@ -81,13 +81,13 @@ (debug:print 4 "script: " script) (rmt:teststep-set-status! run-id test-id stepname "start" "-" #f #f area-dat) ;; now launch the actual process (call-with-environment-variables (list (cons "PATH" (conc (get-environment-variable "PATH") ":."))) - (lambda () - (let* ((cmd (conc stepcmd " > " stepname ".log 2>&1")) ;; >outfile 2>&1 - (pid (process-run cmd))) + (lambda () ;; (process-run "/bin/bash" "-c" "exec ls -l /tmp/foobar > /tmp/delme-more.log 2>&1") + (let* ((cmd (conc "exec " stepcmd " > " stepname ".log 2>&1")) ;; >outfile 2>&1 + (pid (process-run "/bin/bash" (list "-c" cmd)))) (rmt:test-set-top-process-pid run-id test-id pid area-dat) (let processloop ((i 0)) (let-values (((pid-val exit-status exit-code)(process-wait pid #t))) (mutex-lock! m) (vector-set! exit-info 0 pid) @@ -200,15 +200,18 @@ ) (change-directory top-path) ;; (set-signal-handler! signal/int (lambda () - ;; Do not run the test if it is REMOVING, RUNNING, KILLREQ or REMOTEHOSTSTART, + ;; WAS: Do not run the test if it is REMOVING, RUNNING, KILLREQ or REMOTEHOSTSTART, + ;; NOW: Do not run test test unless state is LAUNCHED ;; Mark the test as REMOTEHOSTSTART *IMMEDIATELY* ;; + ;; This is flawed. It should be a single transaction that tests for NOT_STARTED and updates to REMOTEHOSTSTART (let ((test-info (rmt:get-testinfo-state-status run-id test-id area-dat))) - (if (not (member (db:test-get-state test-info) '("REMOVING" "REMOTEHOSTSTART" "RUNNING" "KILLREQ"))) + ;; + (if (equal? (db:test-get-state test-info) "LAUNCHED") ;; '("REMOVING" "REMOTEHOSTSTART" "RUNNING" "KILLREQ"))) (tests:test-force-state-status! run-id test-id "REMOTEHOSTSTART" "n/a") (begin (debug:print 0 "ERROR: test state is " (db:test-get-state test-info) ", cannot proceed") (exit)))) @@ -894,11 +897,19 @@ (list 'runname runname) (list 'mt-bindir-path mt-bindir-path)))))))) ;; clean out step records from previous run if they exist ;; (rmt:delete-test-step-records run-id test-id) - (change-directory work-area) ;; so that log files from the launch process don't clutter the test dir + + ;; Moving launch logs to MT_RUN_AREA_HOME/logs + ;; + (let ((launchdir (configf:lookup *configdat* "setup" "launchdir"))) ;; (change-directory work-area) ;; so that log files from the launch process don't clutter the test dir + (if (not launchdir) ;; default + (change-directory (conc *toppath* "/logs")) ;; can assume this exists + (case (string->symbol launchdir) + ((legacy)(change-directory work-area)) + (else (change-directory launchdir))))) (cond ((and launcher hosts) ;; must be using ssh hostname (set! fullcmd (append launcher (car hosts)(list remote-megatest test-sig "-execute" cmdparms) debug-param))) ;; (set! fullcmd (append launcher (car hosts)(list remote-megatest test-sig "-execute" cmdparms)))) (launcher @@ -932,18 +943,18 @@ process-run) (if useshell (let ((cmdstr (string-intersperse fullcmd " "))) (if launchwait cmdstr - (conc cmdstr " >> mt_launch.log 2>&1"))) + (conc cmdstr " >> " work-area "/mt_launch.log 2>&1"))) (car fullcmd)) (if useshell '() (cdr fullcmd))))) (if (not launchwait) ;; give the OS a little time to allow the process to start (thread-sleep! 0.01)) - (with-output-to-file "mt_launch.log" + (with-output-to-file (conc work-area "/mt_launch.log") (lambda () (if (list? launch-results) (apply print launch-results) (print "NOTE: launched \"" fullcmd "\"\n but did not wait for it to proceed. Add the following to megatest.config \n[setup]\nlaunchwait yes\n if you have problems with this")) #:append)) Index: runs.scm ================================================================== --- runs.scm +++ runs.scm @@ -1356,11 +1356,11 @@ ;; run-ids = #f means *all* runs (let* ((numseconds (common:hms-string->seconds (configf:lookup test-conf "skip" "rundelay"))) (running-tests (rmt:get-tests-for-runs-mindata #f full-test-name '("RUNNING" "REMOTEHOSTSTART" "LAUNCHED") '() #f)) (completed-tests (rmt:get-tests-for-runs-mindata #f full-test-name '("COMPLETED") '("PASS" "FAIL" "ABORT") #f)) (last-run-times (map db:mintest-get-event_time completed-tests)) - (time-since-last (- (current-seconds) (apply max last-run-times)))) + (time-since-last (- (current-seconds) (if (null? last-run-times) 0 (apply max last-run-times))))) (if (or (not (null? running-tests)) ;; have to skip if test is running (> numseconds time-since-last)) (set! skip-test (conc "Skipping due to previous test run less than " (configf:lookup test-conf "skip" "rundelay") " ago")))))) (if skip-test @@ -1592,15 +1592,16 @@ (let ((new-tests (proc-get-tests run-id))) (if (null? new-tests) (debug:print-info 1 "Run completed according to zero tests matching provided criteria.") (loop (car new-tests)(cdr new-tests))))) ((archive) - (if (not toplevel-with-children) - (case (string->symbol (args:get-arg "-archive")) - ((save save-remove keep-html) - (debug:print-info 0 "Estimating disk space usage for " test-fulln) - (debug:print-info 0 " " (common:get-disk-space-used (conc run-dir "/")))))) + (if (and run-dir (not toplevel-with-children)) + (let ((ddir (conc run-dir "/"))) + (case (string->symbol (args:get-arg "-archive")) + ((save save-remove keep-html) + (if (file-exists? ddir) + (debug:print-info 0 "Estimating disk space usage for " test-fulln ": " (common:get-disk-space-used ddir))))))) (if (not (null? tal)) (loop (car tal)(cdr tal)))) ))) ) (if worker-thread (thread-join! worker-thread)))))) Index: tests/fullrun/megatest.config ================================================================== --- tests/fullrun/megatest.config +++ tests/fullrun/megatest.config @@ -35,13 +35,18 @@ # Set launchwait to no to use the more agressive code that does not wait for the launch to complete before proceeding # this may save a few milliseconds on launching tests # launchwait no waivercommentpatt ^WW\d+ [a-z].* incomplete-timeout 1 + +# set the dbdir, default is linktree +dbdir #{getenv MT_RUN_AREA_HOME}/db/ # wait for runs to completely complete. yes, anything else is no run-wait yes + + # If set to "default" the old code is used. Otherwise defaults to 200 or uses # numeric value given. # runqueue 20 Index: tests/simplerun/tests/test1/step2.sh ================================================================== --- tests/simplerun/tests/test1/step2.sh +++ tests/simplerun/tests/test1/step2.sh @@ -1,6 +1,5 @@ #!/usr/bin/env bash # Run your step here echo Got here eh! -