Index: commonmod.scm ================================================================== --- commonmod.scm +++ commonmod.scm @@ -1881,15 +1881,15 @@ ;;====================================================================== ;; get cpu load by reading from /proc/loadavg, return all three values ;; (define (common:get-cpu-load remote-host) - (handle-exceptions - exn - (begin - (debug:print 0 *default-log-port* "failed to ssh or read loadavg from host " remote-host ", exn=" exn) - '(-99 -99 -99)) + ;; (handle-exceptions + ;; exn + ;; (begin + ;; (debug:print 0 *default-log-port* "failed to ssh or read loadavg from host " remote-host ", exn=" exn) + ;; '(-99 -99 -99)) (let* ((actual-hostname (or remote-host (get-host-name) "localhost"))) (or (common:get-cached-info actual-hostname "cpu-load") (let ((result (if remote-host (map (lambda (res) (if (eof-object? res) 9e99 res)) @@ -1906,11 +1906,11 @@ (number? l3)) (begin (common:write-cached-info actual-hostname "cpu-load" result) result) '(-1 -1 -1))) ;; -1 is bad result - (else '(-2 -2 -2)))))))) + (else '(-2 -2 -2))))))) ;; ) ;;====================================================================== ;; get normalized cpu load by reading from /proc/loadavg and /proc/cpuinfo return all three values and the number of real cpus and the number of threads ;; returns alist '((adj-cpu-load . normalized-proc-load) ... etc. ;; keys: adj-proc-load, adj-core-load, 1m-load, 5m-load, 15m-load @@ -2307,10 +2307,12 @@ ("mode-patt" . "-modepatt") ("run-name" . "-runname") ("contour" . "-contour") ("target" . "-target") ("test-patt" . "-testpatt") + ("rerun" . "-rerun") + ("setvars" . "-setvars") ("msg" . "-m") ("log" . "-log") ("start-dir" . "-start-dir") ("new" . "-set-state-status")))) (if (eq? flavor 'switch-symbol) @@ -2612,11 +2614,13 @@ (string-intersperse (map cadr itemdat) "/") "")) (define (launch:is-test-alive host pid) (if (and host pid (not (equal? host "n/a"))) - (let* ((cmd (conc "ssh " host " pstree -A " pid)) + (let* ((is-local (equal? host (get-host-name))) + (ssh-cmd (if is-local " " (conc "ssh " host " "))) + (cmd (conc ssh-cmd "pstree -A " pid)) (output (with-input-from-pipe cmd read-lines))) (debug:print 2 *default-log-port* "Running " cmd " received " output) (if (eq? (length output) 0) #f #t)) Index: dashboard-tests.scm ================================================================== --- dashboard-tests.scm +++ dashboard-tests.scm @@ -270,18 +270,33 @@ ;; if there is a submegatest create a button to launch dashboard in that area ;; (define (submegatest-panel dbstruct keydat testdat runname testconfig) (let* ((test-run-dir (db:test-get-rundir testdat)) (subarea (subrun:get-runarea test-run-dir)) - (area-exists (and subarea (common:file-exists? subarea silent: #t)))) - (if subarea + (area-exists (and subarea (common:file-exists? subarea silent: #t))) + (target #f) + (runname #f) + (cmd-parts-file (conc test-run-dir "/subrun-command-parts.sexp"))) + (if (file-exists? cmd-parts-file) ;; existance of this file is sufficient to *try* opening a dashboard + (let* ((cmd-parts (if (file-exists? cmd-parts-file) + (with-input-from-file cmd-parts-file + read) + '())) + (target (alist-ref "-target" cmd-parts equal?)) + (runname (alist-ref "-runname" cmd-parts equal?)) + (run-area (alist-ref "-startdir" cmd-parts equal?))) (iup:frame #:title "Megatest Run Info" ; #:expand "YES" + (iup:vbox (iup:button "Launch Dashboard" #:action (lambda (obj) - (subrun:launch-dashboard test-run-dir)))) + (subrun:launch-dashboard test-run-dir))) + (iup:button + "Launch Dashboard+Filter" + #:action (lambda (obj) + (subrun:launch-dashboard test-run-dir target: target runname: runname)))))) (iup:vbox)))) ;; use a global for setting the buttons colors ;; state status teststeps (define *state-status* (vector #f #f #f)) Index: dbmod.scm ================================================================== --- dbmod.scm +++ dbmod.scm @@ -947,11 +947,11 @@ (lambda (dat) (let ((tblname (car dat)) (count (cdr dat))) (set! tot-count (+ tot-count count)) (if (> count 0) ;; concs are there to prevent "Error: (sprintf) illegal format-string character: #\1" which I don't understand. - (if should-print (debug:print 0 *default-log-port* (format #f " ~10a ~5a" (conc tblname)(conc count))))))) + (if should-print (debug:print 0 *default-log-port* (format " "(conc tblname)" "(conc count))))))) ;; ~10a ~5a" (sort (hash-table->alist numrecs)(lambda (a b)(> (cdr a)(cdr b)))))) tot-count)))) (define db:trigger-list (list (list "update_runs_trigger" "CREATE TRIGGER IF NOT EXISTS update_runs_trigger AFTER UPDATE ON runs Index: ezsteps.scm ================================================================== --- ezsteps.scm +++ ezsteps.scm @@ -47,13 +47,27 @@ ;;(rmt:get-test-info-by-id run-id test-id) -> testdat ;; TODO: deprecate me in favor of ezsteps.scm ;; -(define (launch:runstep ezstep run-id test-id exit-info m tal testconfig all-steps-dat) +(define (launch:runstep ezstep run-id test-id exit-info m tal testconfig all-steps-dat prevstepname envdbf) (let* ((stepname (car ezstep)) ;; do stuff to run the step + (stepmode-n (ezsteps:step-name->mode stepname)) (stepinfo (cadr ezstep)) + (shellmode (ezsteps:get-ezpropvars (configf:lookup testconfig "setup" "ezpropvars"))) ;; returns '(csh|sh . "/path/to/shell") + (stepmode (if stepmode-n ;; the .sh or .csh always wins + stepmode-n + (if shellmode + (car shellmode) + #f))) + (shellexe (if stepmode-n + (case stepmode + ((csh) "/bin/csh") + (else "/bin/bash")) + (if shellmode + (cdr shellmode) + "/bin/bash"))) ;; (let ((info (cadr ezstep))) ;; (if (proc? info) "" info))) ;; (stepproc (let ((info (cadr ezstep))) ;; (if (proc? info) info #f))) (stepparts (string-match (regexp "^(\\{([^\\}\\{]*)\\}\\s*|)(.*)$") stepinfo)) @@ -89,12 +103,14 @@ ";;") (print tconfig-logpro))) (set! logpro-used #t))) ;; NB// can safely assume we are in test-area directory - (debug:print 4 *default-log-port* "ezsteps:\n stepname: " stepname " stepinfo: " stepinfo " stepparts: " stepparts + (debug:print 4 *default-log-port* "ezsteps:\n stepname: " stepname " stepinfo: " stepinfo + " stepparts: " stepparts " stepparams: " stepparams " stepcmd: " stepcmd) + (if stepmode (ezsteps:create-step-script envdbf stepname prevstepname stepmode stepcmd shellexe)) ;; ;; first source the previous environment ;; (let ((prev-env (conc ".ezsteps/" prevstep (if (string-search (regexp "csh") ;; (get-environment-variable "SHELL")) ".csh" ".sh")))) ;; (if (and prevstep (common:file-exists? prev-env)) @@ -107,18 +123,21 @@ (rmt:teststep-set-status! run-id test-id stepname "start" "-" #f #f) ;; now launch the actual process (call-with-environment-variables (list (cons "PATH" (conc (get-environment-variable "PATH") ":."))) (lambda () ;; (process-run "/bin/bash" "-c" "exec ls -l /tmp/foobar > /tmp/delme-more.log 2>&1") - (let* ((cmd (conc stepcmd " > " stepname ".log 2>&1")) ;; >outfile 2>&1 + (let* ((cmd (conc (if stepmode + (conc "ez_" stepname) + stepcmd) + " > " stepname ".log 2>&1")) ;; >outfile 2>&1 (pid #f)) (let ((proc (lambda () (set! pid (process-run "/bin/bash" (list "-c" cmd)))))) (if subrun (begin (debug:print-info 0 *default-log-port* "Running without MT_.* environment variables.") - (common:without-vars proc "^MT_.*")) + (common:propogate-mt-vars-to-subrun proc '("MT_TARGET" "MT_LINKTREE" "MT_RUNNAME"))) (proc))) (with-output-to-file "Makefile.ezsteps" (lambda () (print stepname ".log :") Index: launch.scm ================================================================== --- launch.scm +++ launch.scm @@ -177,22 +177,28 @@ (append (or ezstepslst '()) (list (list "subrun" (conc "{subrun=true} " mt-cmd))))))) ;; process the ezsteps (if ezsteps - (let* ((all-steps-dat (make-hash-table))) ;; keep all the info around as stepname ==> alist; where 'params is the params list (add other stuff as needed) + (let* ((envdbf (conc "/tmp/."(current-user-name)"-"(current-process-id)"-"run-id"-"test-id".db")) + (all-steps-dat (make-hash-table))) ;; keep all the info around as stepname ==> alist; + ;;; where 'params is the params list (add other + ;;; stuff as needed) (if (not (common:file-exists? ".ezsteps"))(create-directory ".ezsteps")) ;; if ezsteps was defined then we are sure to have at least one step but check anyway (if (not (> (length ezstepslst) 0)) (debug:print-error 0 *default-log-port* "ezsteps defined but ezstepslst is zero length") + (let ((all-step-names (map car ezstepslst))) + (setenv "MT_STEP_NAMES" (string-intersperse all-step-names " ")) (let loop ((ezstep (car ezstepslst)) (tal (cdr ezstepslst)) (prevstep #f)) (debug:print-info 0 *default-log-port* "Processing ezstep \"" (string-intersperse ezstep " ") "\"") ;; check exit-info (vector-ref exit-info 1) (if (launch:einf-exit-status exit-info) ;; (vector-ref exit-info 1) - (let* ((logpro-used (launch:runstep ezstep run-id test-id exit-info m tal testconfig all-steps-dat)) + (let* ((logpro-used (launch:runstep ezstep run-id test-id exit-info m + tal testconfig all-steps-dat prevstep envdbf)) (stepname (car ezstep)) (stepparms (hash-table-ref all-steps-dat stepname))) (setenv "MT_STEP_NAME" stepname) (pp (hash-table->alist all-steps-dat)) ;; if logpro-used read in the stepname.dat file @@ -200,11 +206,12 @@ (launch:load-logpro-dat run-id test-id stepname)) (if (steprun-good? logpro-used (launch:einf-exit-code exit-info) stepparms) (if (not (null? tal)) (loop (car tal) (cdr tal) stepname)) (debug:print 0 *default-log-port* "WARNING: step " (car ezstep) " failed. Stopping"))) - (debug:print 0 *default-log-port* "WARNING: a prior step failed, stopping at " ezstep))))))))) + (debug:print 0 *default-log-port* "WARNING: a prior step failed, stopping at " ezstep)) + )))))))) (define (launch:monitor-job run-id test-id item-path fullrunscript ezsteps test-name tconfigreg exit-info m work-area runtlim misc-flags) (let* ((update-period (string->number (or (configf:lookup *configdat* "setup" "test-stats-update-period") "30"))) (start-seconds (current-seconds)) (calc-minutes (lambda () Index: rmt.scm ================================================================== --- rmt.scm +++ rmt.scm @@ -437,11 +437,11 @@ ;; (rmt:update-db-stats run-id cmd params duration) ;; mark this run as dirty if this was a write, the watchdog is responsible for syncing it (if qry-is-write (let ((start-time (current-seconds))) (mutex-lock! *db-multi-sync-mutex*) -/ (set! *db-last-access* start-time) ;; THIS IS PROBABLY USELESS? (we are on a client) + (set! *db-last-access* start-time) ;; THIS IS PROBABLY USELESS? (we are on a client) (mutex-unlock! *db-multi-sync-mutex*))))) res)) (define (rmt:send-receive-no-auto-client-setup connection-info cmd run-id params) (let* ((run-id (if run-id run-id 0)) @@ -711,11 +711,13 @@ (define (rmt:get-prereqs-not-met run-id waitons ref-test-name ref-item-path #!key (mode '(normal))(itemmaps #f)) (rmt:send-receive 'get-prereqs-not-met run-id (list run-id waitons ref-test-name ref-item-path mode itemmaps))) (define (rmt:get-count-tests-running-for-run-id run-id) - (rmt:send-receive 'get-count-tests-running-for-run-id run-id (list run-id))) + (if (number? run-id) + (rmt:send-receive 'get-count-tests-running-for-run-id run-id (list run-id)) + 0)) (define (rmt:get-not-completed-cnt run-id) (rmt:send-receive 'get-not-completed-cnt run-id (list run-id))) @@ -818,12 +820,11 @@ (define (rmt:get-runs-by-patt keys runnamepatt targpatt offset limit fields last-runs-update #!key (sort-order "asc")) ;; fields of #f uses default (rmt:send-receive 'get-runs-by-patt #f (list keys runnamepatt targpatt offset limit fields last-runs-update sort-order))) (define (rmt:find-and-mark-incomplete run-id ovr-deadtime) ;; (if (rmt:send-receive 'have-incompletes? run-id (list run-id ovr-deadtime)) - (rmt:send-receive 'mark-incomplete run-id (list run-id ovr-deadtime)) - ) ;; ) + (rmt:send-receive 'mark-incomplete run-id (list run-id ovr-deadtime))) ;; ) (define (rmt:get-main-run-stats run-id) (rmt:send-receive 'get-main-run-stats #f (list run-id))) (define (rmt:get-var varname) Index: runs.scm ================================================================== --- runs.scm +++ runs.scm @@ -1283,11 +1283,11 @@ ((not have-resources) ;; simply try again after waiting a second (if (runs:lownoise "no resources" 60) (debug:print-info 1 *default-log-port* "no resources to run new tests, waiting ...")) ;; Have gone back and forth on this but db starvation is an issue. ;; wait one second before looking again to run jobs. - (thread-sleep! 0.25) + (thread-sleep! 1) ;; changed back to 1 from 0.25 ;; could have done hed tal here but doing car/cdr of newtal to rotate tests (list (car newtal)(cdr newtal) reg reruns)) ;; This is the final stage, everything is in place so launch the test ;; Index: subrun.scm ================================================================== --- subrun.scm +++ subrun.scm @@ -52,15 +52,17 @@ (if (and (common:file-exists? (conc test-run-dir "/subrun-area") ) (common:file-exists? (conc test-run-dir "/testconfig.subrun") )) #t #f)) -(define (subrun:launch-dashboard test-run-dir) +(define (subrun:launch-dashboard test-run-dir #!key (target #f)(runname #f)) (if (subrun:subrun-test-initialized? test-run-dir) - (let* ((subarea (subrun:get-runarea test-run-dir))) + (let* ((subarea (subrun:get-runarea test-run-dir)) + (params (conc (if target (conc " -target " target) "") + (if runname (conc " -runname " runname) "")))) (if (and subarea (common:file-exists? subarea)) - (system (conc "cd " subarea ";env -i PATH=$PATH DISPLAY=$DISPLAY HOME=$HOME USER=$USER dashboard &")))))) + (system (conc "cd " subarea ";env -i PATH=$PATH DISPLAY=$DISPLAY HOME=$HOME USER=$USER nbfake dashboard " params)))))) (define (subrun:subrun-removed? test-run-dir) (if (subrun:subrun-test-initialized? test-run-dir) (let ((flagfile (conc test-run-dir "/subrun.removed"))) (if (common:file-exists? flagfile) @@ -219,10 +221,13 @@ (map (lambda (item) (if (equal? (car item) "-testpatt") (cons "-testpatt" testpatt) item)) switch-alist-pre)))) + (with-output-to-file "subrun-command-parts.sexp" + (lambda () + (pp switch-alist))) switch-alist)) ;; note - get precmd from subrun section ;; apply to submegatest commands (define (subrun:get-log-path test-run-dir log-prefix) Index: tests.scm ================================================================== --- tests.scm +++ tests.scm @@ -1633,15 +1633,17 @@ (format temp-port " size=4,8\n") ;; (format temp-port " splines=none\n") (for-each (lambda (testname) (let* ((testrec (hash-table-ref test-records testname)) - (waitons (or (tests:testqueue-get-waitons testrec) '()))) + (waitons (or (tests:testqueue-get-waitons testrec) '())) + (my-mt-waitons (tests:get-mt-waitons testname #t))) + ;; (print "my-mt-waitons=" my-mt-waitons) (for-each (lambda (waiton) (format temp-port (conc " " waiton " -> " testname " [splines=ortho]\n"))) - waitons))) + (append waitons my-mt-waitons)))) all-testnames) (format temp-port "}\n") (close-output-port temp-port) (with-input-from-pipe (conc "env -i PATH=$PATH dot -T" outtype " < " temp-path) @@ -1666,17 +1668,19 @@ (conc " size=\"" (or sizex 11) "," (or sizey 11) "\";") " ratio=0.95;" ))) (let* ((testrec (hash-table-ref test-records hed)) (waitons (or (tests:testqueue-get-waitons testrec) '())) + (my-mt-waitons (tests:get-mt-waitons hed #t)) + (all-waitons (delete-duplicates (append waitons my-mt-waitons))) (newres (append res - (if (null? waitons) + (if (null? all-waitons) (list (conc " \"" hed "\" [shape=box];")) (map (lambda (waiton) (conc " \"" waiton "\" -> \"" hed "\" [shape=box];")) - waitons) - )))) + all-waitons))))) + ;; (debug:print 0 *default-log-port* "For test "hed" got "all-waitons) (if (null? tal) (append newres (list "}")) (loop (car tal)(cdr tal) newres) ))))))