Index: TODO ================================================================== --- TODO +++ TODO @@ -16,10 +16,17 @@ # along with Megatest. If not, see . TODO ==== +WW38 +. Add test_rundat to no-sync ==> correction, put in /.meta/test-run.dat +. Add STATE/STATUS transitions to .meta/test-run.dat or similar +. Swizzle update-test-rundat to operate on no-sync +. Swizzle update-run-duration, -uname-host and cpuload-diskfree to no-sync +. On state/status change update tests table with duration + WW15 . fill newview matrix with data, filter pipeline gui elements . improve [script], especially indent handling WW16 @@ -35,11 +42,10 @@ . break command line into sections; all, run control, queries, utilities etc. . pull in ftfplan (not integrated, just code pulled in) WW20 . ./configure => ubuntu, sles11, sles12, rh7 -. Jenkins junit XML support . Add output flushing in teamcity support . Switch to using simple runs query everywhere . Add end_time to runs and add a rollup call that sets state, status and end_time Future Index: api.scm ================================================================== --- api.scm +++ api.scm @@ -142,11 +142,11 @@ ;; (define (api:execute-requests dbstruct dat) (handle-exceptions exn (let ((call-chain (get-call-chain))) - (debug:print 0 *default-log-port* "WARNING: api:execute-requests received an exception from peer, dat=" dat) + (debug:print 0 *default-log-port* "WARNING: api:execute-requests received an exception from peer, dat=" dat ", exn=" exn) (print-call-chain (current-error-port)) (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) (vector #f (vector exn call-chain dat))) ;; return some stuff for debug if an exception happens (cond ((not (vector? dat)) ;; it is an error to not receive a vector Index: archive.scm ================================================================== --- archive.scm +++ archive.scm @@ -90,11 +90,11 @@ (pscript-cmd (conc pscript " " testsuite-name " " target " " run-name " " test-name)) (apath (if pscript (handle-exceptions exn (begin - (debug:print 0 *default-log-port* "ERROR: script \"" pscript-cmd "\" failed to run properly.") + (debug:print 0 *default-log-port* "ERROR: script \"" pscript-cmd "\" failed to run properly. exn=" exn) (exit 1)) (with-input-from-pipe pscript-cmd read-line)) #f)) ;; this is the user-calculated archive path Index: chicken.makefile ================================================================== --- chicken.makefile +++ chicken.makefile @@ -23,11 +23,11 @@ # CHICKEN_BIN_DIR=$(shell dirname $(shell which csi)) # if have csi on path use that, else use default # CSIPATH=$(shell which csi) # CKPATH=$(shell dirname $(shell dirname $(CSIPATH))) -sCHICKEN_PREFIX=$(or $(CKPATH),$(PREFIX)/bin/.$(ARCHSTR)) +CHICKEN_PREFIX=$(or $(CKPATH),$(PREFIX)/bin/.$(ARCHSTR)) whatever : @echo "CHICKEN_PREFIX=$(CHICKEN_PREFIX)" tgz-$(USER)/postgresql-9.6.4.tar.gz : @@ -66,10 +66,11 @@ cd tgz-$(USER)/nanomsg-1.0.0/build-$(USER); make; make install $(CHICKEN_PREFIX)/chicken-4.13.0/LICENSE : tgz-$(USER)/chicken-4.13.0.tar.gz mkdir -p build-$(USER)/eggs-installed cd build-$(USER);tar xf ../tgz-$(USER)/chicken-4.13.0.tar.gz + if [[ -e $(CHICKEN_PREFIX)/chicken-4.13.0/LICENSE ]];then touch $(CHICKEN_PREFIX)/chicken-4.13.0/LICENSE;fi tgz-$(USER)/opensrc.fossil : cd tgz-$(USER); fossil clone http://www.kiatoa.com/fossils/opensrc opensrc.fossil mkdir tgz-$(USER)/opensrc cd tgz-$(USER)/opensrc; fossil open --nested ../opensrc.fossil; fossil up; fossil uv sync Index: common.scm ================================================================== --- common.scm +++ common.scm @@ -49,11 +49,11 @@ ;; arguments - thunk, message (define (common:fail-safe thunk warning-message-on-exception) (handle-exceptions exn (begin - (debug:print-info 0 *default-log-port* "notable but nonfatal condition - "warning-message-on-exception) + (debug:print-info 0 *default-log-port* "notable but nonfatal condition - "warning-message-on-exception", exn=" exn) (debug:print-info 0 *default-log-port* (string-substitute "\n?Error:" "nonfatal condition:" (with-output-to-string (lambda () (print-error-message exn) )))) @@ -61,17 +61,19 @@ #f) (thunk))) (define getenv get-environment-variable) (define (safe-setenv key val) - (if (or (substring-index "!" key) (substring-index ":" key)) ;; variables containing : are for internal use and cannot be environment variables. + (if (or (substring-index "!" key) + (substring-index ":" key) ;; variables containing : are for internal use and cannot be environment variables. + (substring-index "." key)) ;; periods are not allowed in environment variables (debug:print-error 4 *default-log-port* "skip setting internal use only variables containing \":\" or starting with \"!\"") (if (and (string? val) (string? key)) (handle-exceptions exn - (debug:print-error 0 *default-log-port* "bad value for setenv, key=" key ", value=" val) + (debug:print-error 0 *default-log-port* "bad value for setenv, key=" key ", value=" val ", exn=" exn) (setenv key val)) (debug:print-error 0 *default-log-port* "bad value for setenv, key=" key ", value=" val)))) (define home (getenv "HOME")) (define user (getenv "USER")) @@ -506,11 +508,11 @@ (directory-fold (lambda (file rem) (handle-exceptions exn (begin - (debug:print-info 2 *default-log-port* "unable to rotate log " file ", probably handled by another process, this is safe to ignore.") + (debug:print-info 2 *default-log-port* "unable to rotate log " file ", probably handled by another process, this is safe to ignore. exn=" exn) (debug:print 2 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) ;; (print-call-chain (current-error-port)) ;; ) (let* ((fullname (conc "logs/" file)) (mod-time (file-modification-time fullname)) @@ -541,11 +543,11 @@ (handle-exceptions exn #f (if (directory? fullname) (begin - (debug:print-error 0 *default-log-port* fullname " in logs directory is a directory! Cannot rotate it, it is best to not put subdirectories in the logs dir.") + (debug:print-info 0 *default-log-port* fullname " in logs directory is a directory! Cannot rotate it, it is best to not put subdirectories in the logs dir.") (inc-stat "directories")) (begin (delete-file* fullname) (inc-stat "deleted"))) (hash-table-delete! all-files file))))))) @@ -565,14 +567,14 @@ (- num-logs max-allowed)))) (for-each (lambda (file) (let* ((fullname (conc "logs/" file))) (if (directory? fullname) - (debug:print-error 0 *default-log-port* fullname " in logs directory is a directory! Cannot rotate it, it is best to not put subdirectories in the logs dir.") + (debug:print-info 0 *default-log-port* fullname " in logs directory is a directory! Cannot rotate it, it is best to not put subdirectories in the logs dir.") (handle-exceptions exn - (debug:print-error 0 *default-log-port* "failed to remove " fullname) + (debug:print-error 0 *default-log-port* "failed to remove " fullname ", exn=" exn) (delete-file* fullname))))) files) (debug:print-info 0 *default-log-port* "Deleted " (length files) " files from logs, keeping " max-allowed " files.")))))) ;; Force a megatest cleanup-db if version is changed and skip-version-check not specified @@ -595,11 +597,11 @@ (eq? (current-user-id)(file-owner mtconf))) ;; safe to run -cleanup-db (debug:print 0 *default-log-port* " I see you are the owner of megatest.config, attempting to cleanup and reset to new version") (handle-exceptions exn (begin - (debug:print 0 *default-log-port* "Failed to switch versions.") + (debug:print 0 *default-log-port* "Failed to switch versions. exn=" exn) (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) (print-call-chain (current-error-port)) (exit 1)) (common:cleanup-db dbstruct))) ((not (common:file-exists? mtconf)) @@ -706,11 +708,11 @@ (handle-exceptions exn (handle-exceptions exn (begin - (debug:print-error 0 *default-log-port* "received bad encoded string \"" instr "\", message: " ((condition-property-accessor 'exn 'message) exn)) + (debug:print-error 0 *default-log-port* "received bad encoded string \"" instr "\", message: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn) (print-call-chain (current-error-port)) #f) (read (open-input-string (base64:base64-decode instr)))) (read (open-input-string (z3:decode-buffer (base64:base64-decode instr)))))) @@ -944,11 +946,11 @@ *db-cache-path* (if *toppath* ;; common:get-create-writeable-dir (handle-exceptions exn (begin - (debug:print-error 0 *default-log-port* "Couldn't create path to " *db-cache-path*) + (debug:print-error 0 *default-log-port* "Couldn't create path to " *db-cache-path* ", exn=" exn) (exit 1)) (let ((dbpath (common:get-create-writeable-dir (list (conc "/tmp/" (current-user-name) "/megatest_localdb/" (common:get-testsuite-name) "/" @@ -1189,11 +1191,12 @@ (file-write-access? hed) hed) (handle-exceptions exn (begin - (debug:print-info 0 *default-log-port* "could not create " hed ", this might cause problems down the road.") + (debug:print-info 0 *default-log-port* "could not create " hed + ", this might cause problems down the road. exn=" exn) #f) (create-directory hed #t))))) (if (and (string? res) (directory? res)) res @@ -1333,12 +1336,14 @@ ;; ;; returns the directory or #f ;; (define (common:directory-writable? path-string) (handle-exceptions - exn - #f + exn + (begin + (debug:print 0 *default-log-port* "Failed to identify access to " path-string ", exn=" exn) + #f) (if (and (directory-exists? path-string) (file-write-access? path-string)) path-string #f))) @@ -1427,16 +1432,20 @@ (handle-exceptions exn (if (> trynum 0) (let ((delay-time (* (- 5 trynum) 5))) (mutex-unlock! *homehost-mutex*) - (debug:print 0 *default-log-port* "ERROR: ["(common:human-time)"] Failed to read .homehost file, delaying " delay-time " seconds and trying again, message: " ((condition-property-accessor 'exn 'message) exn)) + (debug:print 0 *default-log-port* "ERROR: ["(common:human-time)"] Failed to read .homehost file, delaying " + delay-time " seconds and trying again, message: " ((condition-property-accessor 'exn 'message) exn) + ", exn=" exn) (thread-sleep! delay-time) (common:get-homehost trynum: (- trynum 1))) (begin (mutex-unlock! *homehost-mutex*) - (debug:print 0 *default-log-port* "ERROR: ["(common:human-time)"] Failed to read .homehost file after trying five times. Giving up and exiting, message: " ((condition-property-accessor 'exn 'message) exn)) + (debug:print 0 *default-log-port* "ERROR: ["(common:human-time) + "] Failed to read .homehost file after trying five times. Giving up and exiting, message: " + ((condition-property-accessor 'exn 'message) exn)) (exit 1))) (let ((hhf (conc *toppath* "/.homehost"))) (if (common:file-exists? hhf) (with-input-from-file hhf read-line) (if (file-write-access? *toppath*) @@ -1687,26 +1696,32 @@ ;; lazy-safe get file mod time. on any error (file not existing etc.) return 0 ;; (define (common:lazy-modification-time fpath) (handle-exceptions exn - 0 - (file-modification-time fpath))) + (begin + (debug:print 0 *default-log-port* "Failed to get modifcation time for " fpath ", treating it as zero. exn=" exn) + 0) + (if (file-exists? fpath) + (file-modification-time fpath) + 0))) ;; find timestamp of newest file associated with a sqlite db file (define (common:lazy-sqlite-db-modification-time fpath) (let* ((glob-list (handle-exceptions exn - `(,(conc "/no/such/file, message: " ((condition-property-accessor 'exn 'message) exn))) + (begin + (debug:print 0 *default-log-port* "Failed to glob " fpath "*, exn=" exn) + `(,(conc "/no/such/file, message: " ((condition-property-accessor 'exn 'message) exn)))) (glob (conc fpath "*")))) (file-list (if (eq? 0 (length glob-list)) '("/no/such/file") glob-list))) (apply max - (map - common:lazy-modification-time - file-list)))) + (map + common:lazy-modification-time + file-list)))) ;; return a nice clean pathname made absolute (define (common:nice-path dir) (let ((match (string-match "^(~[^\\/]*)(\\/.*|)$" dir))) (if match ;; using ~ for home? @@ -1720,11 +1735,11 @@ (define (common:read-link-f path) (handle-exceptions exn (begin - (debug:print-error 0 *default-log-port* "command \"/bin/readlink -f " path "\" failed.") + (debug:print-error 0 *default-log-port* "command \"/bin/readlink -f " path "\" failed. exn=" exn) path) ;; just give up (with-input-from-pipe (conc "/bin/readlink -f " path) (lambda () (read-line))))) @@ -1763,11 +1778,12 @@ (debug:print-info 1 *default-log-port* " removing bad file " fullpath) (delete-file* fullpath) #f) (with-input-from-file fullpath read)) (begin - (debug:print-info 2 *default-log-port* "file " fullpath " is too old (" real-age" seconds) to trust, skipping reading it") + (debug:print-info 2 *default-log-port* "file " fullpath + " is too old (" real-age" seconds) to trust, skipping reading it") #f)))) (begin (debug:print 2 *default-log-port* "not reading file " fullpath) #f))) #f)) @@ -1776,50 +1792,56 @@ (if *toppath* (let* ((fulldir (conc *toppath* "/.sysdata")) (fullpath (conc fulldir "/" key "-" dtype ".log"))) (if (not (file-exists? fulldir))(create-directory fulldir #t)) (handle-exceptions - exn - #f - (with-output-to-file fullpath (lambda ()(pp dat))))) + exn + (begin + (debug:print 0 *default-log-path* "failed to write file " fullpath ", exn=" exn) + #f) + (with-output-to-file fullpath (lambda ()(pp dat))))) #f)) (define (common:raw-get-remote-host-load remote-host) (handle-exceptions - exn - #f ;; more specific handling of errors needed - (with-input-from-pipe - (conc "ssh " remote-host " cat /proc/loadavg") - (lambda ()(list (read)(read)(read)))))) + exn + (begin + (debug:print 0 *default-log-port* "failed to ssh to " remote-host " and get loadavg. exn=" exn) + #f) ;; more specific handling of errors needed + (with-input-from-pipe + (conc "ssh " remote-host " cat /proc/loadavg") + (lambda ()(list (read)(read)(read)))))) ;; get cpu load by reading from /proc/loadavg, return all three values ;; (define (common:get-cpu-load remote-host) (handle-exceptions - exn - '(-99 -99 -99) - (let* ((actual-hostname (or remote-host (get-host-name) "localhost"))) - (or (common:get-cached-info actual-hostname "cpu-load") - (let ((result (if remote-host - (map (lambda (res) - (if (eof-object? res) 9e99 res)) - (with-input-from-pipe - (conc "ssh " remote-host " cat /proc/loadavg") - (lambda ()(list (read)(read)(read))))) - (with-input-from-file "/proc/loadavg" - (lambda ()(list (read)(read)(read))))))) - (match - result - ((l1 l2 l3) - (if (and (number? l1) + exn + (begin + (debug:print 0 *default-log-port* "failed to ssh or read loadavg from host " remote-host ", exn=" exn) + '(-99 -99 -99)) + (let* ((actual-hostname (or remote-host (get-host-name) "localhost"))) + (or (common:get-cached-info actual-hostname "cpu-load") + (let ((result (if remote-host + (map (lambda (res) + (if (eof-object? res) 9e99 res)) + (with-input-from-pipe + (conc "ssh " remote-host " cat /proc/loadavg") + (lambda ()(list (read)(read)(read))))) + (with-input-from-file "/proc/loadavg" + (lambda ()(list (read)(read)(read))))))) + (match + result + ((l1 l2 l3) + (if (and (number? l1) (number? l2) (number? l3)) - (begin - (common:write-cached-info actual-hostname "cpu-load" result) - result) - '(-1 -1 -1))) ;; -1 is bad result - (else '(-2 -2 -2)))))))) + (begin + (common:write-cached-info actual-hostname "cpu-load" result) + result) + '(-1 -1 -1))) ;; -1 is bad result + (else '(-2 -2 -2)))))))) ;; get normalized cpu load by reading from /proc/loadavg and /proc/cpuinfo return all three values and the number of real cpus and the number of threads ;; returns alist '((adj-cpu-load . normalized-proc-load) ... etc. ;; keys: adj-proc-load, adj-core-load, 1m-load, 5m-load, 15m-load ;; @@ -2097,11 +2119,12 @@ (debug:print-info 1 *default-log-port* "Checking cpuload on " (or remote-host "localhost") ", maxload: " maxload ", load: " first ", adjmaxload: " adjmaxload ", loadjmp: " loadjmp)) (cond ((and (< first 0) ;; this indicates the loadavg data is bad - machine may not be reachable (> num-tries 0)) - (debug:print 0 *default-log-port* "WARNING: received bad data from get-cpu-load " first ", we'll sleep 10s and try " num-tries " more times.") + (debug:print 0 *default-log-port* "WARNING: received bad data from get-cpu-load " + first ", we'll sleep 10s and try " num-tries " more times.") (thread-sleep! 10) (common:wait-for-cpuload maxload-in numcpus-in waitdelay count: count remote-host: remote-host force-maxload: force-maxload num-tries: (- num-tries 1))) ((and (> first adjmaxload) (> count 0)) @@ -2433,10 +2456,12 @@ ("mode-patt" . "-modepatt") ("run-name" . "-runname") ("contour" . "-contour") ("target" . "-target") ("test-patt" . "-testpatt") + ("rerun" . "-rerun") + ("setvars" . "-setvars") ("msg" . "-m") ("log" . "-log") ("start-dir" . "-start-dir") ("new" . "-set-state-status")))) (if (eq? flavor 'switch-symbol) @@ -3018,10 +3043,11 @@ (let host-loop ((targ-host (common:get-least-loaded-host (cdr launcher-parts) host-type configdat)) (count 100)) (if targ-host (conc "remrun " targ-host) (if (> count 0) + (begin (debug:print 0 *default-log-port* "INFO: Waiting for a host for host-type " host-type) (thread-sleep! (- 101 count)) (host-loop (common:get-least-loaded-host (cdr launcher-parts) host-type configdat) (- count 1))) @@ -3195,11 +3221,11 @@ (pktsdir (car pktsdirs))) ;; assume it is there (hash-table-set! *pkts-info* 'pkts-dir pktsdir) pktsdir)))) (handle-exceptions exn - (debug:print-info 0 "failed to write out packet to " pktsdir) ;; don't care if this failed for now but MUST FIX - BUG!! + (debug:print-info 0 "failed to write out packet to " pktsdir ", exn=" exn) ;; don't care if this failed for now but MUST FIX - BUG!! (if (not (file-exists? pktsdir)) (create-directory pktsdir #t)) (with-output-to-file (conc pktsdir "/" uuid ".pkt") (lambda () @@ -3342,13 +3368,15 @@ (for-each (lambda (thread-name) (let* ((thread (hash-table-ref/default *common:thread-punchlist* thread-name #f))) (if thread (handle-exceptions - exn - #t ;; just ignore it, it might have died in the meantime so joining it will throw an exception - (thread-join! thread)) + exn + (begin + (debug:print 0 *default-log-port* "joining threads failed. exn=" exn) + #t) ;; just ignore it, it might have died in the meantime so joining it will throw an exception + (thread-join! thread)) ))) (hash-table-keys *common:thread-punchlist*))) (define *common:telemetry-log-state* 'startup) (define *common:telemetry-log-socket* #f) Index: common_records.scm ================================================================== --- common_records.scm +++ common_records.scm @@ -39,10 +39,11 @@ ;; (define-syntax common:handle-exceptions ;; (syntax-rules () ;; ((_ exn-in errstmt ...)(handle-exceptions exn-in errstmt ...)))) +;; this works, why didn't I use it more? (define-syntax common:debug-handle-exceptions (syntax-rules () ((_ debug exn errstmt body ...) (if debug (begin body ...) Index: configf.scm ================================================================== --- configf.scm +++ configf.scm @@ -65,11 +65,11 @@ ;; (equal? "!" (substring str 0 1))) ;; null string or starts with ! are preserved but NOT set in the environment str (handle-exceptions exn (begin - (debug:print-error 0 *default-log-port* "problem evaluating \"" str "\" in the shell environment") + (debug:print-error 0 *default-log-port* "problem evaluating \"" str "\" in the shell environment, exn=" exn) #f) (let ((cmdres (process:cmd-run->list (conc "echo " str)))) (if (null? cmdres) "" (caar cmdres))))) ;; ) @@ -130,11 +130,11 @@ (else "(lambda (ht)(print \"ERROR\") \"ERROR\")")))) ;; (print "fullcmd=" fullcmd) (handle-exceptions exn (begin - (debug:print 0 *default-log-port* "WARNING: failed to process config input \"" l "\"") + (debug:print 0 *default-log-port* "WARNING: failed to process config input \"" l "\", exn=" exn) (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) ;; (print "exn=" (condition->list exn)) (set! result (conc "#{( " cmdtype ") " cmd "}, full expansion: " fullcmd))) (if (or allow-system (not (member cmdtype '("system" "shell" "sh")))) @@ -334,11 +334,15 @@ (common:nice-path (conc (if curr-conf-dir curr-conf-dir ".") "/" include-file))))) - (let ((all-matches (sort (handle-exceptions exn (list) (glob full-conf)) string<=?))) + (let ((all-matches (sort (handle-exceptions exn + (begin + (debug:print '(2 9) *default-log-port* "glob of " full-conf " gave no match. , exn=" exn) + (list)) + (glob full-conf)) string<=?))) (if (null? all-matches) (begin (debug:print '(2 9) #f "INFO: include file(s) matching " include-file " not found (called from " path ")") (debug:print 2 *default-log-port* " " full-conf)) (for-each @@ -775,14 +779,16 @@ ht)) ;; if (define (configf:read-alist fname) (handle-exceptions - exn - #f - (configf:alist->config - (with-input-from-file fname read)))) + exn + (begin + (debug:print-info 0 *default-log-port* "unable to read alist " fname ". exn=" exn) + #f) + (configf:alist->config + (with-input-from-file fname read)))) (define (configf:write-alist cdat fname) (if (not (common:faux-lock fname)) (debug:print 0 *default-log-port* "INFO: Could not get lock on " fname)) (let* ((dat (configf:config->alist cdat)) @@ -795,14 +801,16 @@ (if (common:file-exists? fname) ;; now verify it is readable (if (configf:read-alist fname) #t ;; data is good. (begin (handle-exceptions - exn - #f - (debug:print 0 *default-log-port* "WARNING: content " dat " for cache " fname " is not readable. Deleting generated file.") - (delete-file fname)) + exn + (begin + (debug:print 0 *default-log-port* "deleting " fname " failed, exn=" exn) + #f) + (debug:print 0 *default-log-port* "WARNING: content " dat " for cache " fname " is not readable. Deleting generated file.") + (delete-file fname)) #f)) #f)))) (common:faux-unlock fname) res)) Index: configure ================================================================== --- configure +++ configure @@ -71,11 +71,11 @@ ARCHSTR=$(/usr/bin/sw_vers -productVersion) else ARCHSTR=$(lsb_release -sr) fi -echo "CHICKEN_PREFIX=$PREFIX/.$ARCHSTR" >> makefile.inc +echo "CKPATH=$PREFIX/.$ARCHSTR" >> makefile.inc CHICKEN_PREFIX=$PREFIX/bin/.$ARCHSTR if [[ ! $(type csi) ]];then echo "Chicken build needed." echo "BUILD_CHICKEN=yes" >> makefile.inc Index: dashboard-context-menu.scm ================================================================== --- dashboard-context-menu.scm +++ dashboard-context-menu.scm @@ -319,11 +319,11 @@ (lambda () (if scheme-match (begin (handle-exceptions exn - (print "error with custom menu scheme") + (print "error with custom menu scheme, exn=" exn) (begin ;;(BB> "gonna eval it!") (eval (with-input-from-string (cadr scheme-match) read))))) (common:run-a-command command-line with-vars: #t)))))))) #f))) Index: dashboard-tests.scm ================================================================== --- dashboard-tests.scm +++ dashboard-tests.scm @@ -513,21 +513,25 @@ ;; this next block was added to fix a bug where variables were ;; needed. Revisit this. (runconfig (let ((runconfigf (conc *toppath* "/runconfigs.config"))) ;; no rush but it would be good to convert this call to use runconfig:read (if (common:file-exists? runconfigf) (handle-exceptions - exn - #f ;; do nothing, just keep on trucking .... + exn + (begin + (debug:print 0 *default-log-port* "failed to set up environment for " runconfigf ", exn=" exn) + #f) ;; do nothing, just keep on trucking .... (setup-env-defaults runconfigf run-id (make-hash-table) keydat environ-patt: keystring)) (make-hash-table)))) (testconfig (begin ;; (runs:set-megatest-env-vars run-id inrunname: runname testname: test-name itempath: item-path) (runs:set-megatest-env-vars run-id inkeyvals: keydat inrunname: runname intarget: keystring testname: testname itempath: item-path) ;; these may be needed by the launching process (handle-exceptions - exn ;; NOTE: I've no idea why this was written this way. Research, study and fix needed! - (tests:get-testconfig (db:test-get-testname testdat) (db:test-get-item-path testdat) test-registry #f allow-write-cache: #f) - (tests:get-testconfig (db:test-get-testname testdat) item-path test-registry #t allow-write-cache: #f)))) + exn ;; NOTE: I've no idea why this was written this way. Research, study and fix needed! + (begin + (debug:print 0 *default-log-port* "testconfig load using " item-path " failed, trying " (db:test-get-item-path testdat) ", exn=" exn) + (tests:get-testconfig (db:test-get-testname testdat) (db:test-get-item-path testdat) test-registry #f allow-write-cache: #f)) + (tests:get-testconfig (db:test-get-testname testdat) item-path test-registry #t allow-write-cache: #f)))) (viewlog (lambda (x) (if (common:file-exists? logfile) ;(system (conc "firefox " logfile "&")) (dcommon:run-html-viewer logfile) (message-window (conc "File " logfile " not found"))))) @@ -561,13 +565,16 @@ (> (current-milliseconds)(+ last-update 10000)) ;; force update even 10 seconds request-update)) (newtestdat (if need-update ;; NOTE: BUG HIDER, try to eliminate this exception handler (handle-exceptions - exn - (debug:print-info 0 *default-log-port* "test db access issue in examine test for run-id " run-id ", test-id " test-id ": " ((condition-property-accessor 'exn 'message) exn)) - (rmt:get-test-info-by-id run-id test-id ))))) + exn + (begin + (debug:print-info 0 *default-log-port* "test db access issue in examine test for run-id " run-id + ", test-id " test-id ": " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn) + #f) + (rmt:get-test-info-by-id run-id test-id))))) ;; (print "INFO: need-update= " need-update " curr-mod-time = " curr-mod-time) (cond ((and need-update newtestdat) (set! testdat newtestdat) (set! teststeps (augment-teststeps (tests:get-compressed-steps run-id test-id))) Index: dashboard.scm ================================================================== --- dashboard.scm +++ dashboard.scm @@ -2054,11 +2054,11 @@ (iup:attribute-set! run-matrix key (cadr value)) (iup:attribute-set! run-matrix (conc "BGCOLOR" key) (car value)))))) matrix-content) ;; Col labels - do after setting Cell contents so they are accounted for in the size calc. - + (for-each (lambda (ind) (let* ((name (car ind)) (num (cadr ind)) (key (conc "0:" num))) (if (not (equal? (iup:attribute run-matrix key) name)) @@ -2127,11 +2127,11 @@ (file-read-access? source)) (handle-exceptions exn (begin (print-call-chain) - (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) + (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn) (debug:print 0 *default-log-port* "ERROR: failed to load " source ", try loading in the repl: megatest -repl") (set! success #f)) (load source)) (begin (debug:print 0 *default-log-port* "ERROR: cannot find file to load: \"" source "\" for user view " view-name))) @@ -2139,11 +2139,11 @@ (if success (handle-exceptions exn (begin (print-call-chain) - (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) + (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn) (debug:print 0 *default-log-port* "ERROR: failed call procedure " viewgen ", with; tab-num=" tab-num ", view-name=" view-name ", and views-cfgdat and megatest configdat as parameters. To debug try loading in the repl: megatest -repl") (set! success #f)) (print "Adding tab " view-name " with proc " viewgen) @@ -2156,11 +2156,11 @@ (lambda () (handle-exceptions exn (begin (print-call-chain) - (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) + (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn) (debug:print 0 *default-log-port* "ERROR: failed call procedure \"" updater "\", with; tabnum=" tab-num ", view-name=" view-name ", and views-cfgdat and megatest configdat as parameters. To debug try loading in the repl: megatest -repl") (set! success #f)) (debug:print 4 *default-log-port* "Running updater for tab " view-name " with proc " updater " and tab-num: " tab-num) @@ -3044,11 +3044,12 @@ (define (dashboard:get-youngest-run-db-mod-time dbdir) (handle-exceptions exn (begin - (debug:print 2 *default-log-port* "WARNING: error in accessing databases in get-youngest-run-db-mod-time: " ((condition-property-accessor 'exn 'message) exn) " db-dir="dbdir) + (debug:print 2 *default-log-port* "WARNING: error in accessing databases in get-youngest-run-db-mod-time: " + ((condition-property-accessor 'exn 'message) exn) " db-dir="dbdir ", exn=" exn) (current-seconds)) ;; something went wrong - just print an error and return current-seconds (common:max (map (lambda (filen) (file-modification-time filen)) (glob (conc dbdir "/*.db*")))))) Index: db.scm ================================================================== --- db.scm +++ db.scm @@ -124,11 +124,11 @@ (let ((err-status ((condition-property-accessor 'sqlite3 'status #f) exn))) ;; check for (exn sqlite3) ((condition-property-accessor 'exn 'message) exn) (if (eq? err-status 'done) default (begin - (debug:print-error 0 *default-log-port* " query " stmt " failed, params: " params ", error: " ((condition-property-accessor 'exn 'message) exn)) + (debug:print-error 0 *default-log-port* " query " stmt " failed, params: " params ", error: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn) (print-call-chain (current-error-port)) default))) (apply sqlite3:first-result db stmt params))) ;; Get/open a database @@ -355,11 +355,10 @@ (dbr:dbstruct-mtdb-set! dbstruct mtdb) (dbr:dbstruct-tmpdb-set! dbstruct tmpdb) (dbr:dbstruct-dbstack-set! dbstruct (make-stack)) ;; BB: why a stack? Why would the number of db's be indeterminate? Is this a legacy of 1.db 2.db .. ? (stack-push! (dbr:dbstruct-dbstack dbstruct) tmpdb) ;; olddb is already a (cons db path) (dbr:dbstruct-refndb-set! dbstruct refndb) - ;; (mutex-unlock! *rundb-mutex*) (if (and (or (not dbfexists) (and modtimedelta (> modtimedelta max-stale-tmp))) ;; if db in tmp is over ten seconds older than the file in MTRA then do a sync back do-sync) (begin @@ -471,11 +470,11 @@ (define (db:close-all dbstruct) (if (dbr:dbstruct? dbstruct) (handle-exceptions exn (begin - (debug:print 0 *default-log-port* "WARNING: Finalizing failed, " ((condition-property-accessor 'exn 'message) exn)) + (debug:print 0 *default-log-port* "WARNING: Finalizing failed, " ((condition-property-accessor 'exn 'message) exn) ", note - exn=" exn) (print-call-chain *default-log-port*)) ;; (db:sync-touched dbstruct 0 force-sync: #t) ;; NO. Do not do this here. Instead we rely on a server to be started when there are writes, even if the server itself is not going to be used as a server. (let ((tdbs (map db:dbdat-get-db (stack->list (dbr:dbstruct-dbstack dbstruct)))) (mdb (db:dbdat-get-db (dbr:dbstruct-mtdb dbstruct))) @@ -639,10 +638,11 @@ ;; (else ;; ((equal? fname "megatest.db") ;; this file can be regenerated if needed (handle-exceptions exn (begin + (print "Problems trying to repair the db, exn=" exn) ;; (db:move-and-recreate-db dbdat) (if (> numtries 0) (db:repair-db dbdat numtries: (- numtries 1)) #f) (debug:print 0 *default-log-port* "FATAL: file " dbpath " was found corrupted, an attempt to fix has been made but you must start over.") @@ -1223,29 +1223,29 @@ FOR EACH ROW BEGIN UPDATE runs SET last_update=(strftime('%s','now')) WHERE id=old.id; END;" ) - (list "update_run_stats_trigger" "CREATE TRIGGER IF NOT EXISTS update_run_stats_trigger AFTER UPDATE ON run_stats + (list "update_run_stats_trigger" "CREATE TRIGGER IF NOT EXISTS update_run_stats_trigger AFTER UPDATE ON run_stats FOR EACH ROW BEGIN UPDATE run_stats SET last_update=(strftime('%s','now')) WHERE id=old.id; END;" ) - (list "update_tests_trigger" "CREATE TRIGGER IF NOT EXISTS update_tests_trigger AFTER UPDATE ON tests + (list "update_tests_trigger" "CREATE TRIGGER IF NOT EXISTS update_tests_trigger AFTER UPDATE ON tests FOR EACH ROW BEGIN UPDATE tests SET last_update=(strftime('%s','now')) WHERE id=old.id; END;" ) - (list "update_teststeps_trigger" "CREATE TRIGGER IF NOT EXISTS update_teststeps_trigger AFTER UPDATE ON test_steps + (list "update_teststeps_trigger" "CREATE TRIGGER IF NOT EXISTS update_teststeps_trigger AFTER UPDATE ON test_steps FOR EACH ROW BEGIN UPDATE test_steps SET last_update=(strftime('%s','now')) WHERE id=old.id; END;" ) - (list "update_test_data_trigger" "CREATE TRIGGER IF NOT EXISTS update_test_data_trigger AFTER UPDATE ON test_data + (list "update_test_data_trigger" "CREATE TRIGGER IF NOT EXISTS update_test_data_trigger AFTER UPDATE ON test_data FOR EACH ROW BEGIN UPDATE test_data SET last_update=(strftime('%s','now')) WHERE id=old.id; END;" ))) @@ -1260,14 +1260,14 @@ (for-each (lambda (key) (sqlite3:execute db (cadr key))) db:trigger-list)) (define (db:drop-all-triggers dbstruct) -(db:with-db + (db:with-db dbstruct #f #f (lambda (db) -(db:drop-triggers db)))) + (db:drop-triggers db)))) (define (db:is-trigger-dropped db tbl-name) (let* ((trigger-name (if (equal? tbl-name "test_steps") "update_teststeps_trigger" (conc "update_" tbl-name "_trigger")))) @@ -1529,11 +1529,10 @@ state TEXT DEFAULT 'new', status TEXT DEFAULT 'n/a', archive_type TEXT DEFAULT 'bup', du INTEGER, archive_path TEXT);"))) - (print "creating trigges from init") (db:create-triggers db) db)) ;; ) ;;====================================================================== ;; A R C H I V E S @@ -2241,11 +2240,12 @@ (n 0)) (if (equal? hed field) (handle-exceptions exn (begin - (debug:print 0 *default-log-port* "WARNING: attempt to read non-existant field, row=" row " header=" header " field=" field) + (debug:print 0 *default-log-port* "WARNING: attempt to read non-existant field, row=" + row " header=" header " field=" field ", exn=" exn) #f) (vector-ref row n)) (if (null? tal) #f (loop (car tal)(cdr tal)(+ n 1))))))) ;; Accessors for the header/data structure @@ -3463,11 +3463,11 @@ (lambda (run-id) (let ((testrecs (db:get-all-tests-info-by-run-id mtdb run-id))) (db:prep-megatest.db-adj-test-ids (db:dbdat-get-db mtdb) run-id testrecs))) run-ids))) -;; Get test data using test_id, run-id is not used +;; Get test data using test_id, run-id is not used - but it will be! ;; (define (db:get-test-info-by-id dbstruct run-id test-id) (db:with-db dbstruct #f ;; run-id @@ -4481,11 +4481,11 @@ (db (db:dbdat-get-db dbdat)) ;; we'll return this so (db:delay--if-busy can be called inline (dbfj (conc dbpath "-journal"))) (if (handle-exceptions exn (begin - (debug:print-info 0 *default-log-port* "WARNING: failed to test for existance of " dbfj) + (debug:print-info 0 *default-log-port* "WARNING: failed to test for existance of " dbfj ", exn=" exn) (thread-sleep! 1) (db:delay-if-busy count (- count 1))) (common:file-exists? dbfj)) (case count ((6) @@ -4646,11 +4646,11 @@ (begin (handle-exceptions exn (begin (debug:print 0 *default-log-port* - "WARNING: itemmap has problem \"" itemmap "\", patt: " patt ", repl: " repl) + "WARNING: itemmap has problem \"" itemmap "\", patt: " patt ", repl: " repl ", exn=" exn) res) (string-substitute patt repl res)) ) Index: dcommon.scm ================================================================== --- dcommon.scm +++ dcommon.scm @@ -1434,11 +1434,12 @@ (define (dashboard:get-youngest-run-db-mod-time dbdir) (handle-exceptions exn (begin - (debug:print 2 *default-log-port* "WARNING: error in accessing databases in get-youngest-run-db-mod-time: " ((condition-property-accessor 'exn 'message) exn) " db-dir="dbdir) + (debug:print 2 *default-log-port* "WARNING: error in accessing databases in get-youngest-run-db-mod-time: " ((condition-property-accessor 'exn 'message) exn) + " db-dir="dbdir ", exn=" exn) (current-seconds)) ;; something went wrong - just print an error and return current-seconds (common:max (map (lambda (filen) (file-modification-time filen)) (glob (conc dbdir "/*.db*")))))) Index: docs/manual/megatest_manual.txt ================================================================== --- docs/manual/megatest_manual.txt +++ docs/manual/megatest_manual.txt @@ -116,10 +116,12 @@ include::writing_tests.txt[] include::howto.txt[] include::reference.txt[] + +include::testplan.txt[] Megatest Internals ------------------ ["graphviz", "server.png"] Index: docs/manual/reference.txt ================================================================== --- docs/manual/reference.txt +++ docs/manual/reference.txt @@ -205,10 +205,27 @@ [setup] # this will automatically kill the test if it runs for more than 1h 2m and 3s runtimelim 1h 2m 3s ----------------- +Post Run Hook ++++++++++++++ + +This runs script to-run.sh after all tests have been completed. It is +not necessary to use -run-wait as each test will check for other +running tests on completion and if there are none it will call the +post run hook. + +Note that the output from the script call will be placed in a log file +in the logs directory with a file name derived by replacing / with _ +in post-hook--.log. + +------------------- +[runs] +post-hook /path/to/script/to-run.sh +------------------- + Tests browser view ~~~~~~~~~~~~~~~~~~ The tests browser (see the Run Control tab on the dashboard) has two views for displaying the tests. @@ -707,10 +724,16 @@ --------------------------- Ezsteps ~~~~~~~ +Ezsteps is the recommended way to implement tests and automation in +Megatest. + +NOTE: Each ezstep must be a single line. Use the [scripts] mechanism +to create multiline scripts (see example below). + .Example ezsteps with logpro rules ----------------- [ezsteps] lookittmp ls /tmp @@ -719,16 +742,68 @@ ;; a blank line indicates the end of the block of text (expect:required in "LogFileBody" > 0 "A file name that should never exist!" #/This is a awfully stupid file name that should never be found in the temp dir/) ----------------- -To transfer the environment to the next step you can do the following: +Automatic environment propagation with Ezsteps +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -.Propagate environment to next step ----------------------------- -$MT_MEGATEST -env2file .ezsteps/${stepname} ----------------------------- +Turn on ezpropvars and environment variables will be propagated from +step to step. Use this to source script files that modify the +envionment where the modifications are needed in subsequent steps. + +NOTE: aliases and variables with strange whitespace or characters will +not propagate correctly. Put in a ticket on the +http://www.kiatoa.com/fossils/megatest site if you need support for a +specific strange character combination. + +.Turn on auto propagate for bash +--------------------------- +[setup] +ezpropvars sh +--------------------------- + +.Write your ezsteps. The loadenv.csh step will use /bin/csh as its shell, other steps will use bash. +--------------------------- +[ezsteps] +loadenv.csh source $REF/ourenviron.csh +compile make +install make install +--------------------------- + +Bash and csh are supported. You can override the shell binary location +from the default /bin/bash and /bin/csh if needed. + +.Turn on auto propagate for csh +--------------------------- +[setup] +ezpropvars csh /bin/csh +--------------------------- + +.Example of auto propagation using extensions +--------------------------- +[ezsteps] +step1.sh export SOMEVAR=$(ps -def | wc -l);ls /tmp +# The next step will get the value of $SOMEVAR from step1.sh +step2.sh echo $SOMEVAR +--------------------------- + +.Example of multi-line script +--------------------------- +[scripts] +tarresults tar cfvz $DEST/srcdir1.tar.gz srcdir1 + tar cfvz $DEST/srcdir2.tar.gz srcdir2 + +[setup] +ezpropvars sh + +[ezsteps] +step1 DEST=/tmp/targz;source tarresults +--------------------------- + +The above example will result in files; tarresults and ez_step1 being +created in the test dir. Scripts ~~~~~~~ .Specifying scripts inline (best used for only simple scripts) Index: ezsteps.scm ================================================================== --- ezsteps.scm +++ ezsteps.scm @@ -80,11 +80,12 @@ ";;") (print tconfig-logpro))) (set! logpro-used #t))) ;; NB// can safely assume we are in test-area directory - (debug:print 4 *default-log-port* "ezsteps:\n stepname: " stepname " stepinfo: " stepinfo " stepparts: " stepparts + (debug:print 4 *default-log-port* "ezsteps:\n stepname: " stepname " stepinfo: " stepinfo + " stepparts: " stepparts " stepparams: " stepparams " stepcmd: " stepcmd) ;; ;; first source the previous environment ;; (let ((prev-env (conc ".ezsteps/" prevstep (if (string-search (regexp "csh") ;; (get-environment-variable "SHELL")) ".csh" ".sh")))) Index: http-transport.scm ================================================================== --- http-transport.scm +++ http-transport.scm @@ -132,11 +132,14 @@ ((equal? (uri-path (request-uri (current-request))) '(/ "dashboard")) (send-response body: (http-transport:html-dboard $) headers: '((content-type text/HTML)))) (else (continue)))))))) - (with-output-to-file start-file (lambda ()(print (current-process-id)))) + (handle-exceptions + exn + (debug:print 0 *default-log-port* "Failed to create file " start-file ", exn=" exn) + (with-output-to-file start-file (lambda ()(print (current-process-id))))) (http-transport:try-start-server ipaddrstr start-port))) ;; This is recursively run by http-transport:run until sucessful ;; (define (http-transport:try-start-server ipaddrstr portnum) @@ -269,11 +272,11 @@ (set! success #f) (if (debug:debug-mode 1) (debug:print-info 0 *default-log-port* "couldn't talk to server, trying again ...") (begin (debug:print 0 *default-log-port* "WARNING: failure in with-input-from-request to " fullurl ".") - (debug:print 0 *default-log-port* " message: " msg) + (debug:print 0 *default-log-port* " message: " msg ", exn=" exn) (debug:print 0 *default-log-port* " cmd: " cmd " params: " params) (debug:print 0 *default-log-port* " call-chain: " call-chain))) (if runremote (remote-conndat-set! runremote #f)) ;; Killing associated server to allow clean retry.") @@ -332,11 +335,11 @@ (let ((api-dat (http-transport:server-dat-get-api-uri server-dat))) (handle-exceptions exn (begin (print-call-chain *default-log-port*) - (debug:print-error 0 *default-log-port* " closing connection failed with error: " ((condition-property-accessor 'exn 'message) exn))) + (debug:print-error 0 *default-log-port* " closing connection failed with error: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn)) (close-connection! api-dat) ;;(close-idle-connections!) #t)) #f))) @@ -431,11 +434,14 @@ (last-access 0) (server-timeout (server:expiration-timeout)) (server-going #f) (server-log-file (args:get-arg "-log"))) ;; always set when we are a server - (with-output-to-file started-file (lambda ()(print (current-process-id)))) + (handle-exceptions + exn + (debug:print 0 *default-log-port* "Failed to create " started-file ", exn=" exn) + (with-output-to-file started-file (lambda ()(print (current-process-id))))) (let loop ((count 0) (server-state 'available) (bad-sync-count 0) (start-time (current-milliseconds))) @@ -494,11 +500,11 @@ (if (common:low-noise-print 120 "server continuing") (debug:print-info 0 *default-log-port* "Server continuing, seconds since last db access: " (- (current-seconds) last-access)) (let ((curr-time (current-seconds))) (handle-exceptions exn - (debug:print 0 *default-log-port* "ERROR: Failed to change timestamp on log file " server-log-file ". Are you out of space on that disk?") + (debug:print 0 *default-log-port* "ERROR: Failed to change timestamp on log file " server-log-file ". Are you out of space on that disk? exn=" exn) (if (not *server-overloaded*) (change-file-times server-log-file curr-time curr-time))))) (loop 0 server-state bad-sync-count (current-milliseconds))) (else (debug:print-info 0 *default-log-port* "Server timed out. seconds since last db access: " (- (current-seconds) last-access)) Index: launch.scm ================================================================== --- launch.scm +++ launch.scm @@ -274,11 +274,11 @@ (lambda (pid) (handle-exceptions exn (begin (debug:print-info 0 *default-log-port* "Unable to kill process with pid " pid ", possibly already killed.") - (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))) + (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn)) (debug:print 0 *default-log-port* "WARNING: Request received to kill job " pid) ;; " (attempt # " kill-tries ")") (debug:print-info 0 *default-log-port* "Signal mask=" (signal-mask)) ;; (if (process:alive? pid) ;; (begin (map (lambda (pid-num) @@ -286,13 +286,15 @@ (process:get-sub-pids pid)) (thread-sleep! 5) ;; (if (process:process-alive? pid) (map (lambda (pid-num) (handle-exceptions - exn - #f - (process-signal pid-num signal/kill))) + exn + (begin + (debug:print 0 *default-log-port* " .... had trouble sending kill to " pid-num ", exn=" exn) + #f) + (process-signal pid-num signal/kill))) (process:get-sub-pids pid)))) ;; (debug:print-info 0 *default-log-port* "not killing process " pid " as it is not alive")))) pids) ;; BB: question to Matt -- does the tests:test-state-status! encompass rollup to toplevel? If not, should it? (tests:test-set-status! run-id test-id "KILLED" "KILLED" (conc (args:get-arg "-m")" "kill-reason) #f)) ;; BB ADDED kill-reason -- confirm OK with Matt @@ -396,11 +398,10 @@ ;; one more time, change to the work-area directory (change-directory work-area))) ) ;; let* (if contour (setenv "MT_CONTOUR" contour)) - ;; immediated set some key variables from CMDINFO data, yes, these will be set again below ... ;; (setenv "MT_TESTSUITENAME" areaname) (setenv "MT_RUN_AREA_HOME" top-path) (set! *toppath* top-path) @@ -767,11 +768,13 @@ (if (not (null? tal)) (loop (car tal) (cdr tal))))))))))) (define (launch:is-test-alive host pid) (if (and host pid (not (equal? host "n/a"))) - (let* ((cmd (conc "ssh " host " pstree -A " pid)) + (let* ((is-local (equal? host (get-host-name))) + (ssh-cmd (if is-local " " (conc "ssh " host " "))) + (cmd (conc ssh-cmd "pstree -A " pid)) (output (with-input-from-pipe cmd read-lines))) (debug:print 2 *default-log-port* "Running " cmd " received " output) (if (eq? (length output) 0) #f #t)) @@ -1039,18 +1042,18 @@ (begin (handle-exceptions exn (begin (debug:print-error 0 *default-log-port* "Something went wrong when trying to create linktree dir at " linktree) - (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) + (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn) (exit 1)) (create-directory linktree #t)))) (handle-exceptions exn (begin (debug:print-error 0 *default-log-port* "Something went wrong when trying to create link to linktree at " *toppath*) - (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))) + (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn)) (let ((tlink (conc *toppath* "/lt"))) (if (not (common:file-exists? tlink)) (create-symbolic-link linktree tlink))))) (begin (debug:print-error 0 *default-log-port* "linktree not defined in [setup] section of megatest.config") @@ -1111,11 +1114,15 @@ ;;(exit 1) (if (null? disks) (cons 1 (conc *toppath* "/runs")) (let ((paths (sort disks (lambda (x y) (> (string-length (cadr x)) (string-length (cadr y))))))) (let loop ((head (car paths)) (tail (cdr paths))) - (let ((result (handle-exceptions exn #f (create-directory (cadr head) #t)))) + (let ((result (handle-exceptions exn + (begin + (debug:print 0 *default-log-port* "failed to create dir " (cadr head) ", exn=" exn) + #f) + (create-directory (cadr head) #t)))) (if result result (if (null? tail) (cons 1 (conc *toppath* "/runs")) (loop (car tail) (cdr tail))))))))))) @@ -1207,11 +1214,11 @@ (let ((success (if (and (not (common:directory-exists? lnkbase)) (not (common:file-exists? lnkbase))) (handle-exceptions exn (begin - (debug:print-error 0 *default-log-port* "Problem creating linktree base at " lnkbase) + (debug:print-error 0 *default-log-port* "Problem creating linktree base at " lnkbase ", exn=" exn) (print-error-message exn (current-error-port)) #t) (create-directory lnkbase #t) #f)))) (if (and (not success)(> done 0)) @@ -1230,28 +1237,31 @@ (let ((iterated-parent (pathname-directory (conc lnkpath "/" item-path)))) (debug:print-info 2 *default-log-port* "Creating iterated parent " iterated-parent) (handle-exceptions exn (begin - (debug:print-error 0 *default-log-port* " Failed to create directory " iterated-parent ((condition-property-accessor 'exn 'message) exn) ", continuing but link tree may be corrupted") + (debug:print-error 0 *default-log-port* " Failed to create directory " iterated-parent ((condition-property-accessor 'exn 'message) exn) + ", continuing but link tree may be corrupted, exn=" exn) #;(exit 1)) (create-directory iterated-parent #t)))) (if (symbolic-link? lnkpath) (handle-exceptions exn (begin - (debug:print-error 0 *default-log-port* " Failed to remove symlink " lnkpath ((condition-property-accessor 'exn 'message) exn) ", continuing but link tree may be corrupted.") + (debug:print-error 0 *default-log-port* " Failed to remove symlink " lnkpath ((condition-property-accessor 'exn 'message) exn) + ", continuing but link tree may be corrupted. exn=" exn) #;(exit 1)) (delete-file lnkpath))) (if (not (or (common:file-exists? lnkpath) (symbolic-link? lnkpath))) (handle-exceptions exn (begin - (debug:print-error 0 *default-log-port* " Failed to create symlink " lnkpath ((condition-property-accessor 'exn 'message) exn) ", continuing but link tree may be corrupted.") + (debug:print-error 0 *default-log-port* " Failed to create symlink " lnkpath ((condition-property-accessor 'exn 'message) exn) + ", continuing but link tree may be corrupted. exn=" exn) #;(exit 1)) (create-symbolic-link toptest-path lnkpath))) ;; NB - This was not working right - some top tests are not getting the path set!!! ;; @@ -1278,12 +1288,14 @@ (if (or (not curr-test-path) (not (directory-exists? toptest-path))) (begin (debug:print-info 2 *default-log-port* "Creating " toptest-path " and link " lnkpath) (handle-exceptions - exn - #f ;; don't care to catch and deal with errors here for now. + exn + (begin + (debug:print 0 *default-log-port* "failed to create directory " toptest-path ", exn=" exn) + #f) (create-directory toptest-path #t)) (hash-table-set! *toptest-paths* testname toptest-path))))) ;; The toptest path has been created, the link to the test in the linktree has ;; been created. Now, if this is an iterated test the real test dir must be created @@ -1292,11 +1304,12 @@ (debug:print 2 *default-log-port* "Setting up sub test run area") (debug:print 2 *default-log-port* " - creating run area in " test-path) (handle-exceptions exn (begin - (debug:print-error 0 *default-log-port* " Failed to create directory " test-path ((condition-property-accessor 'exn 'message) exn) ", exiting") + (debug:print-error 0 *default-log-port* " Failed to create directory " test-path ((condition-property-accessor 'exn 'message) exn) + ", exiting, exn=" exn) (exit 1)) (create-directory test-path #t)) (debug:print 2 *default-log-port* " - creating link from: " test-path "\n" " to: " lnktarget) @@ -1303,11 +1316,11 @@ ;; If there is already a symlink delete it and recreate it. (handle-exceptions exn (begin - (debug:print-error 0 *default-log-port* " Failed to re-create link " lnktarget ((condition-property-accessor 'exn 'message) exn) ", exiting") + (debug:print-error 0 *default-log-port* " Failed to re-create link " lnktarget ((condition-property-accessor 'exn 'message) exn) ", exiting, exn=" exn) (exit)) (if (symbolic-link? lnktarget) (delete-file lnktarget)) (if (not (common:file-exists? lnktarget)) (create-symbolic-link test-path lnktarget))))) (if (not (directory? test-path)) Index: megatest.scm ================================================================== --- megatest.scm +++ megatest.scm @@ -498,11 +498,11 @@ (lambda () (handle-exceptions exn (begin (print-call-chain) - (print " message: " ((condition-property-accessor 'exn 'message) exn))) + (print " message: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn)) (common:watchdog))) "Watchdog thread")) ;;(if (not (args:get-arg "-server")) ;; (thread-start! *watchdog*)) ;; if starting a server; wait till we get to running state before kicking off watchdog @@ -552,11 +552,11 @@ ;; (if (or (args:get-arg "-log")(args:get-arg "-server")) ;; redirect the log always when a server (handle-exceptions exn (begin - (print "ERROR: Failed to switch to log output. " ((condition-property-accessor 'exn 'message) exn)) + (print "ERROR: Failed to switch to log output. " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn) ) (let* ((tl (or (args:get-arg "-log")(launch:setup))) ;; run launch:setup if -server, ensure we do NOT run launch:setup if -log specified (logf (or (args:get-arg "-log") ;; use -log unless we are a server, then craft a logfile name (conc tl "/logs/server-" (current-process-id) "-" (get-host-name) ".log"))) (oup (open-logfile logf))) @@ -602,12 +602,14 @@ (printf "Preparing to exit with exit code ~A ...\n" exit-code) (for-each (lambda (pid) (handle-exceptions - exn - #t + exn + (begin + (printf "process reap failed. exn=~A\n" exn) + #t) (let-values (((pid-val exit-status exit-code) (process-wait pid #t))) (if (or (eq? pid-val pid) (eq? pid-val 0)) (begin (printf "Sending signal/term to ~A\n" pid) Index: mt.scm ================================================================== --- mt.scm +++ mt.scm @@ -190,11 +190,11 @@ ;; (mutex-lock! *triggers-mutex*) (handle-exceptions exn (begin (debug:print-error 0 *default-log-port* " Exception in mt:process-triggers for run-id="run-id" test-id="test-id" newstate="newstate" newstatus="newstatus - "\n error: " ((condition-property-accessor 'exn 'message) exn) + "\n error: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn "\n test-rundir="test-rundir "\n test-name="test-name "\n item-path="item-path "\n state="state "\n status="status Index: process.scm ================================================================== --- process.scm +++ process.scm @@ -196,12 +196,14 @@ (equal? rpid pid))))) (define (process:alive-on-host? host pid) (let ((cmd (conc "ssh " host " ps -o pid= -p " pid))) (handle-exceptions - exn - #f ;; anything goes wrong - assume the process in NOT running. + exn + (begin + (debug:print 0 *default-log-port* "failed to identify if process " pid ", on host " host " is alive. exn=" exn) + #f) ;; anything goes wrong - assume the process in NOT running. (with-input-from-pipe cmd (lambda () (let loop ((inl (read-line))) (if (eof-object? inl) Index: rmt.scm ================================================================== --- rmt.scm +++ rmt.scm @@ -415,13 +415,15 @@ res)) (define (rmt:send-receive-no-auto-client-setup connection-info cmd run-id params) (let* ((run-id (if run-id run-id 0)) (res (handle-exceptions - exn - #f - (http-transport:client-api-send-receive run-id connection-info cmd params)))) + exn + (begin + (print "transport failed. exn=" exn) + #f) + (http-transport:client-api-send-receive run-id connection-info cmd params)))) (if (and res (vector-ref res 0)) (vector-ref res 1) ;;; YES!! THIS IS CORRECT!! CHANGE IT HERE, THEN CHANGE rmt:send-receive ALSO!!! #f))) ;; ;; Wrap json library for strings (why the ports crap in the first place?) @@ -476,13 +478,10 @@ ;; given a hostname, return a pair of cpu load and update time representing latest intelligence from tests running on that host (define (rmt:get-latest-host-load hostname) (rmt:send-receive 'get-latest-host-load 0 (list hostname))) -;; (define (rmt:sync-inmem->db run-id) -;; (rmt:send-receive 'sync-inmem->db run-id '())) - (define (rmt:sdb-qry qry val run-id) ;; add caching if qry is 'getid or 'getstr (rmt:send-receive 'sdb-qry run-id (list qry val))) ;; NOT COMPLETED @@ -556,11 +555,11 @@ (rmt:general-call 'register-test run-id run-id test-name item-path)) (define (rmt:get-test-id run-id testname item-path) (rmt:send-receive 'get-test-id run-id (list run-id testname item-path))) -;; run-id is NOT used +;; run-id is NOT used - but it will be! ;; (define (rmt:get-test-info-by-id run-id test-id) (if (number? test-id) (rmt:send-receive 'get-test-info-by-id run-id (list run-id test-id)) (begin @@ -647,15 +646,10 @@ ;; run-id-list)))) (define (rmt:delete-test-records run-id test-id) (rmt:send-receive 'delete-test-records run-id (list run-id test-id))) -;; This is not needed as test steps are deleted on test delete call -;; -;; (define (rmt:delete-test-step-records run-id test-id) -;; (rmt:send-receive 'delete-test-step-records run-id (list run-id test-id))) - (define (rmt:test-set-state-status run-id test-id state status msg) (rmt:send-receive 'test-set-state-status run-id (list run-id test-id state status msg))) (define (rmt:test-toplevel-num-items run-id test-name) (rmt:send-receive 'test-toplevel-num-items run-id (list run-id test-name))) @@ -694,13 +688,10 @@ (apply append (map (lambda (run-id) (rmt:send-receive 'test-get-paths-matching-keynames-target-new run-id (list run-id keynames target res testpatt statepatt statuspatt runname))) run-ids)))) -;; (define (rmt:get-run-ids-matching keynames target res) -;; (rmt:send-receive #f 'get-run-ids-matching (list keynames target res))) - (define (rmt:get-prereqs-not-met run-id waitons ref-test-name ref-item-path #!key (mode '(normal))(itemmaps #f)) (rmt:send-receive 'get-prereqs-not-met run-id (list run-id waitons ref-test-name ref-item-path mode itemmaps))) (define (rmt:get-count-tests-running-for-run-id run-id fastmode) (rmt:send-receive 'get-count-tests-running-for-run-id run-id (list run-id fastmode))) Index: runs.scm ================================================================== --- runs.scm +++ runs.scm @@ -16,11 +16,12 @@ ;; along with Megatest. If not, see . ;; strftime('%m/%d/%Y %H:%M:%S','now','localtime') (use (prefix sqlite3 sqlite3:) srfi-1 posix regex regex-case srfi-69 (srfi 18) - posix-extras directory-utils pathname-expand typed-records format sxml-serializer sxml-modifications) + posix-extras directory-utils pathname-expand typed-records format sxml-serializer + sxml-modifications matchable) (declare (unit runs)) (declare (uses db)) (declare (uses common)) (declare (uses items)) @@ -213,15 +214,17 @@ exn (let ((call-chain (get-call-chain)) (msg ((condition-property-accessor 'exn 'message) exn))) (if (< count 5) (begin ;; this call is colliding, do some crude stuff to fix it. - (debug:print 0 *default-log-port* "ERROR: *configdat* was inaccessible! This should never happen. Retry #" count) + (debug:print 0 *default-log-port* "ERROR: *configdat* was inaccessible! This should never happen. Retry #" count + ", exn=" exn) (launch:setup force-reread: #t) (fatal-loop (+ count 1))) (begin - (debug:print 0 *default-log-port* "FATAL: *configdat* was inaccessible! This should never happen. Retried " count " times. Message: " msg) + (debug:print 0 *default-log-port* "FATAL: *configdat* was inaccessible! This should never happen. Retried " count + " times. Message: " msg) (debug:print 0 *default-log-port* "Call chain:") (with-output-to-port *default-log-port* (lambda () (print "*configdat* is >>"*configdat*"<<") @@ -378,11 +381,11 @@ (if (null? existing-tests) (let* ((use-log-dir (if (not (directory-exists? log-dir)) (handle-exceptions exn (begin - (debug:print 0 *default-log-port* "WARNING: Failed to create " log-dir) + (debug:print 0 *default-log-port* "WARNING: Failed to create " log-dir ", exn=" exn) #f) (create-directory log-dir #t) #t) #t)) (start-time (current-seconds)) @@ -389,11 +392,11 @@ (actual-logf (if use-log-dir full-log-fname log-file))) (handle-exceptions exn (begin (print-call-chain *default-log-port*) - (debug:print 0 *default-log-port* "Message: " ((condition-property-accessor 'exn 'message) exn)) + (debug:print 0 *default-log-port* "Message: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn) (debug:print 0 *default-log-port* "ERROR: failed to run pre-hook " run-pre-hook ", check the log " log-file)) (debug:print-info 0 *default-log-port* "running run-pre-hook: \"" run-pre-hook "\", log is " actual-logf) (system (conc run-pre-hook " >> " actual-logf " 2>&1")) (debug:print-info 0 *default-log-port* "pre-hook \"" run-pre-hook "\" took " (- (current-seconds) start-time) " seconds to run."))) (debug:print 0 *default-log-port* "Skipping pre-hook call \"" run-pre-hook "\" as there are existing tests for this run."))))) @@ -418,11 +421,11 @@ ;; (debug:print 0 *default-log-port* "Skipping post-hook call \"" run-post-hook "\" as there are existing tests for this run."))))) (let* ((use-log-dir (if (not (directory-exists? log-dir)) (handle-exceptions exn (begin - (debug:print 0 *default-log-port* "WARNING: Failed to create " log-dir) + (debug:print 0 *default-log-port* "WARNING: Failed to create " log-dir ", exn=" exn) #f) (create-directory log-dir #t) #t) #t)) (start-time (current-seconds)) @@ -429,11 +432,11 @@ (actual-logf (if use-log-dir full-log-fname log-file))) (handle-exceptions exn (begin (print-call-chain *default-log-port*) - (debug:print 0 *default-log-port* "Message: " ((condition-property-accessor 'exn 'message) exn)) + (debug:print 0 *default-log-port* "Message: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn) (debug:print 0 *default-log-port* "ERROR: failed to run post-hook " run-post-hook ", check the log " log-file)) (debug:print-info 0 *default-log-port* "running run-post-hook: \"" run-post-hook "\", log is " actual-logf) (system (conc run-post-hook " >> " actual-logf " 2>&1")) (debug:print-info 0 *default-log-port* "post-hook \"" run-post-hook "\" took " (- (current-seconds) start-time) " seconds to run.")))))) @@ -609,17 +612,17 @@ (runs:run-pre-hook run-id) ;; mark all test launced flag as false in the meta table (rmt:set-var (conc "lunch-complete-" run-id) "no") (debug:print-info 1 *default-log-port* "Setting end-of-run to no") (let* ((config-reruns (let ((x (configf:lookup *configdat* "setup" "reruns"))) - (if x (string->number x) #f))) - (config-rerun-cnt (if config-reruns - config-reruns - 1))) - (if (eq? config-rerun-cnt run-count) - (rmt:set-var (conc "end-of-run-" run-id) "no"))) - + (if x (string->number x) #f))) + (config-rerun-cnt (if config-reruns + config-reruns + 1))) + (if (eq? config-rerun-cnt run-count) + (rmt:set-var (conc "end-of-run-" run-id) "no"))) + (rmt:set-run-state-status run-id "new" "n/a") ;; now add non-directly referenced dependencies (i.e. waiton) ;;====================================================================== ;; refactoring this block into tests:get-full-data ;; @@ -1573,19 +1576,10 @@ itemmaps: itemmaps ;; prereqs-not-met: prereqs-not-met ))) (runs:dat-regfull-set! runsdat regfull) - ;; -- removed BB 17ww28 - no longer needed. - ;; every 15 minutes verify the server is there for this run - ;; (if (and (common:low-noise-print 240 "try start server" run-id) - ;; (not (or (and *runremote* - ;; (remote-server-url *runremote*) - ;; (server:ping (remote-server-url *runremote*))) - ;; (server:check-if-running *toppath*)))) - ;; (server:kind-run *toppath*)) - (if (> num-running 0) (set! last-time-some-running (current-seconds))) (if (> (current-seconds)(+ last-time-some-running (or (configf:lookup *configdat* "setup" "give-up-waiting") 36000))) (hash-table-set! *max-tries-hash* tfullname (+ (hash-table-ref/default *max-tries-hash* tfullname 0) 1))) @@ -2189,29 +2183,10 @@ path-out ) ) -;; (define (runs:remove-all-but-last-n-runs-per-target target-patts runpatt num-to-keep) -;; (let ((data (runs:get-all-but-most-recent-n-per-target target-patts runpatt num-to-keep))) -;; (for-each -;; (lambda (target) -;; (let ((runs-to-remove (hash-table-ref data target ))) -;; (for-each -;; (lambda (run) -;; (print "megatest -remove-runs -target " target " -runname " (simple-run-runname run) " -testpatt %")) -;; runs-to-remove))) -;; (hash-table-keys data)))) - -;; Remove runs -;; fields are passing in through -;; action: -;; 'remove-runs -;; 'set-state-status -;; -;; NB// should pass in keys? -;; (define (runs:operate-on action target runnamepatt testpatt #!key (state #f)(status #f)(new-state-status #f)(mode #f)(options '())) (common:clear-caches) ;; clear all caches (let* ((db #f) ;; (tdbdat (tasks:open-db)) (keys (rmt:get-keys)) @@ -2628,18 +2603,18 @@ (if (symbolic-link? run-dir) (begin (debug:print-info 1 *default-log-port* "Removing symlink " run-dir) (handle-exceptions exn - (debug:print-error 0 *default-log-port* " Failed to remove symlink " run-dir ((condition-property-accessor 'exn 'message) exn) ", attempting to continue") + (debug:print-error 0 *default-log-port* " Failed to remove symlink " run-dir ((condition-property-accessor 'exn 'message) exn) ", attempting to continue, exn=" exn) (delete-file run-dir))) (if (directory? run-dir) (if (> (directory-fold (lambda (f x)(+ 1 x)) 0 run-dir) 0) (debug:print 0 *default-log-port* "WARNING: refusing to remove " run-dir " as it is not empty") (handle-exceptions exn - (debug:print-error 0 *default-log-port* " Failed to remove directory " run-dir ((condition-property-accessor 'exn 'message) exn) ", attempting to continue") + (debug:print-error 0 *default-log-port* " Failed to remove directory " run-dir ((condition-property-accessor 'exn 'message) exn) ", attempting to continue, exn=" exn) (delete-directory run-dir))) (if (and run-dir (not (member run-dir (list "n/a" "/tmp/badname")))) (debug:print 0 *default-log-port* "WARNING: not removing " run-dir " as it either doesn't exist or is not a symlink") (debug:print 0 *default-log-port* "NOTE: the run dir for this test is undefined. Test may have already been deleted.")) @@ -2912,11 +2887,11 @@ (let* ((final-doc ((sxml-modify `("testsuite" insert-into (@ (name ,xml-ts-name) (tests ,tests-count) (errors ,error-cnt) (failures ,fail-cnt)))) new-doc))) (debug:print 0 *default-log-port* "modify attrib error=" error-cnt " fail= " fail-cnt) (handle-exceptions exn (let* ((msg ((condition-property-accessor 'exn 'message) exn))) - (debug:print 0 *default-log-port* (conc "WARNING: Failed to update file" xml-path". Message:" msg))) + (debug:print 0 *default-log-port* (conc "WARNING: Failed to update file" xml-path". Message:" msg ", exn=" exn))) (if (not (file-exists? xml-dir)) (create-directory xml-dir #t)) (if (not (rmt:no-sync-get/default keyname #f)) (begin @@ -2946,10 +2921,10 @@ (debug:print-info 0 *default-log-port* "Removing cached files:\n " (string-intersperse files "\n ")) (for-each (lambda (f) (handle-exceptions exn - (debug:print 0 *default-log-port* "WARNING: Failed to remove file " f) + (debug:print 0 *default-log-port* "WARNING: Failed to remove file " f ", exn=" exn) (delete-file f))) files)))) (debug:print-error 0 *default-log-port* "-clean-cache requires -runname.")) (debug:print-error 0 *default-log-port* "-clean-cache requires -target or -reqtarg"))) Index: server.scm ================================================================== --- server.scm +++ server.scm @@ -135,11 +135,11 @@ "") ;; " -log " logfile " -m testsuite:" testsuite " " profile-mode )) ;; (conc " >> " logfile " 2>&1 &"))))) - (log-rotate (make-thread common:rotate-logs "server run, rotate logs thread")) + (log-rotate (make-thread common:rotate-logs "server run, rotate logs thread")) ;; why are we rotating logs here? This is a sensitive location with a lot going on!? (load-limit (configf:lookup-number *configdat* "jobtools" "max-server-start-load" default: 3.0))) ;; we want the remote server to start in *toppath* so push there (push-directory areapath) (debug:print 0 *default-log-port* "INFO: Trying to start server (" cmdln ") ...") (thread-start! log-rotate) @@ -200,13 +200,13 @@ (if (if (directory-exists? (conc areapath "/logs")) '() (if (file-write-access? areapath) (begin (condition-case - (create-directory (conc areapath "/logs") #t) - (exn (i/o file)(debug:print 0 *default-log-port* "ERROR: Cannot create directory at " (conc areapath "/logs"))) - (exn ()(debug:print 0 *default-log-port* "ERROR: Unknown error attemtping to get server list."))) + (create-directory (conc areapath "/logs") #t) + (exn (i/o file)(debug:print 0 *default-log-port* "ERROR: Cannot create directory at " (conc areapath "/logs"))) + (exn ()(debug:print 0 *default-log-port* "ERROR: Unknown error attemtping to get server list. exn=" exn))) (directory-exists? (conc areapath "/logs"))) '())) (let* ((server-logs (glob (conc areapath "/logs/server-*.log"))) (num-serv-logs (length server-logs))) (if (null? server-logs) @@ -213,13 +213,15 @@ '() (let loop ((hed (car server-logs)) (tal (cdr server-logs)) (res '())) (let* ((mod-time (handle-exceptions - exn - (current-seconds) ;; 0 - (file-modification-time hed))) ;; default to *very* old so log gets ignored if deleted + exn + (begin + (debug:print 0 *default-log-port* "failed to get modification time on " hed ", exn=" exn) + (current-seconds)) ;; 0 + (file-modification-time hed))) ;; default to *very* old so log gets ignored if deleted (down-time (- (current-seconds) mod-time)) (serv-dat (if (or (< num-serv-logs 10) (< down-time 900)) ;; day-seconds)) (server:logf-get-start-info hed) '())) ;; don't waste time processing server files not touched in the 15 minutes if there are more than ten servers to look at @@ -227,16 +229,16 @@ (fmatch (string-match fname-rx hed)) (pid (if fmatch (string->number (list-ref fmatch 2)) #f)) (new-res (if (null? serv-dat) res (cons (append serv-rec (list pid)) res)))) - (if (null? tal) - (if (and limit - (> (length new-res) limit)) - new-res ;; (take new-res limit) <= need intelligent sorting before this will work - new-res) - (loop (car tal)(cdr tal) new-res))))))))) + (if (null? tal) + (if (and limit + (> (length new-res) limit)) + new-res ;; (take new-res limit) <= need intelligent sorting before this will work + new-res) + (loop (car tal)(cdr tal) new-res))))))))) (define (server:get-num-alive srvlst) (let ((num-alive 0)) (for-each (lambda (server) Index: tasks.scm ================================================================== --- tasks.scm +++ tasks.scm @@ -73,11 +73,11 @@ (configf:lookup *configdat* "setup" "dbdir") (conc (common:get-linktree) "/.db")))) (handle-exceptions exn (begin - (debug:print-error 0 *default-log-port* "Couldn't create path to " dbdir) + (debug:print-error 0 *default-log-port* "Couldn't create path to " dbdir ", exn=" exn) (exit 1)) (if (not (directory? dbdir))(create-directory dbdir #t))) dbdir)) ;; If file exists AND @@ -443,23 +443,23 @@ (db:with-db dbstruct #f #t (lambda (db) (sqlite3:execute db (conc "DELETE FROM tasks_queue WHERE id IN (" task-ids ");"))))) -#;(define (tasks:process-queue dbstruct) - (let* ((task (tasks:snag-a-task dbstruct)) - (action (if task (tasks:task-get-action task) #f))) - (if action (print "tasks:process-queue task: " task)) - (if action - (case (string->symbol action) - ((run) (tasks:start-run dbstruct task)) - ((remove) (tasks:remove-runs dbstruct task)) - ((lock) (tasks:lock-runs dbstruct task)) - ;; ((monitor) (tasks:start-monitor db task)) - #;((rollup) (tasks:rollup-runs dbstruct task)) - ((updatemeta)(tasks:update-meta dbstruct task)) - #;((kill) (tasks:kill-monitors dbstruct task)))))) +;; (define (tasks:process-queue dbstruct) +;; (let* ((task (tasks:snag-a-task dbstruct)) +;; (action (if task (tasks:task-get-action task) #f))) +;; (if action (print "tasks:process-queue task: " task)) +;; (if action +;; (case (string->symbol action) +;; ((run) (tasks:start-run dbstruct task)) +;; ((remove) (tasks:remove-runs dbstruct task)) +;; ((lock) (tasks:lock-runs dbstruct task)) +;; ;; ((monitor) (tasks:start-monitor db task)) +;; #;((rollup) (tasks:rollup-runs dbstruct task)) +;; ((updatemeta)(tasks:update-meta dbstruct task)) +;; #;((kill) (tasks:kill-monitors dbstruct task)))))) (define (tasks:tasks->text tasks) (let ((fmtstr "~10a~10a~10a~12a~20a~12a~12a~10a")) (conc (format #f fmtstr "id" "action" "owner" "state" "target" "runname" "testpatts" "params") "\n" (string-intersperse @@ -742,11 +742,11 @@ ;; (define (tasks:run-id->mtpg-run-id dbh cached-info run-id area-info smallest-last-update-time) (let* ((runs-ht (hash-table-ref cached-info 'runs)) (runinf (hash-table-ref/default runs-ht run-id #f)) (area-id (vector-ref area-info 0))) - (if runinf + (if runinf runinf ;; already cached (let* ((run-dat (rmt:get-run-info run-id)) ;; NOTE: get-run-info returns a vector < row header > (run-name (rmt:get-run-name-from-id run-id)) (row (db:get-rows run-dat)) ;; yes, this returns a single row (header (db:get-header run-dat)) @@ -755,55 +755,55 @@ (owner (db:get-value-by-header row header "owner")) (event-time (db:get-value-by-header row header "event_time")) (comment (db:get-value-by-header row header "comment")) (fail-count (db:get-value-by-header row header "fail_count")) (pass-count (db:get-value-by-header row header "pass_count")) - (db-contour (db:get-value-by-header row header "contour")) + (db-contour (db:get-value-by-header row header "contour")) (contour (if (args:get-arg "-prepend-contour") - (if (and db-contour (not (equal? db-contour "")) (string? db-contour )) - (begin - (debug:print-info 1 *default-log-port* "db-contour") - db-contour) - (args:get-arg "-contour")))) - (run-tag (if (args:get-arg "-run-tag") + (if (and db-contour (not (equal? db-contour "")) (string? db-contour )) + (begin + (debug:print-info 1 *default-log-port* "db-contour") + db-contour) + (args:get-arg "-contour")))) + (run-tag (if (args:get-arg "-run-tag") (args:get-arg "-run-tag") - "")) - (last-update (db:get-value-by-header row header "last_update")) + "")) + (last-update (db:get-value-by-header row header "last_update")) (keytarg (if (or (args:get-arg "-prepend-contour") (args:get-arg "-prefix-target")) - (conc "MT_CONTOUR/MT_AREA/" (string-intersperse (rmt:get-keys) "/")) (string-intersperse (rmt:get-keys) "/"))) ;; e.g. version/iteration/platform + (conc "MT_CONTOUR/MT_AREA/" (string-intersperse (rmt:get-keys) "/")) (string-intersperse (rmt:get-keys) "/"))) ;; e.g. version/iteration/platform (target (if (or (args:get-arg "-prepend-contour") (args:get-arg "-prefix-target")) - (conc (or (args:get-arg "-prefix-target") (conc contour "/" (common:get-area-name) "/")) (rmt:get-target run-id)) (rmt:get-target run-id))) ;; e.g. v1.63/a3e1/ubuntu + (conc (or (args:get-arg "-prefix-target") (conc contour "/" (common:get-area-name) "/")) (rmt:get-target run-id)) (rmt:get-target run-id))) ;; e.g. v1.63/a3e1/ubuntu (spec-id (pgdb:get-ttype dbh keytarg)) (publish-time (if (args:get-arg "-cp-eventtime-to-publishtime") - event-time - (current-seconds))) + event-time + (current-seconds))) (new-run-id (pgdb:get-run-id dbh spec-id target run-name area-id))) - (if new-run-id - (begin ;; let ((run-record (pgdb:get-run-info dbh new-run-id)) - (hash-table-set! runs-ht run-id new-run-id) + (if new-run-id + (begin ;; let ((run-record (pgdb:get-run-info dbh new-run-id)) + (hash-table-set! runs-ht run-id new-run-id) ;; ensure key fields are up to date - ;; if last_update == pgdb_last_update do not update smallest-last-update-time - (let* ((pgdb-last-update (pgdb:get-run-last-update dbh new-run-id)) - (smallest-time (hash-table-ref/default smallest-last-update-time "smallest-time" #f))) - (if (and (> last-update pgdb-last-update) (or (not smallest-time) (< last-update smallest-time))) - (hash-table-set! smallest-last-update-time "smallest-time" last-update))) + ;; if last_update == pgdb_last_update do not update smallest-last-update-time + (let* ((pgdb-last-update (pgdb:get-run-last-update dbh new-run-id)) + (smallest-time (hash-table-ref/default smallest-last-update-time "smallest-time" #f))) + (if (and (> last-update pgdb-last-update) (or (not smallest-time) (< last-update smallest-time))) + (hash-table-set! smallest-last-update-time "smallest-time" last-update))) (pgdb:refresh-run-info dbh new-run-id state status owner event-time comment fail-count pass-count area-id last-update publish-time) - (debug:print-info 0 *default-log-port* "Working on run-id " run-id " pgdb-id " new-run-id ) - (if (not (equal? run-tag "")) - (task:add-run-tag dbh new-run-id run-tag)) + (debug:print-info 0 *default-log-port* "Working on run-id " run-id " pgdb-id " new-run-id ) + (if (not (equal? run-tag "")) + (task:add-run-tag dbh new-run-id run-tag)) new-run-id) - + (if (equal? state "deleted") - (begin - (debug:print-info 1 *default-log-port* "Warning: Run with id " run-id " was created after previous sync and deleted before the sync") #f) - (if (handle-exceptions - exn - (begin (print-call-chain) - (print ((condition-property-accessor 'exn 'message) exn)) + (begin + (debug:print-info 1 *default-log-port* "Warning: Run with id " run-id " was created after previous sync and deleted before the sync") #f) + (if (handle-exceptions + exn + (begin (print-call-chain) + (print ((condition-property-accessor 'exn 'message) exn)) #f) (pgdb:insert-run dbh spec-id target run-name state status owner event-time comment fail-count pass-count area-id last-update publish-time)) @@ -1015,11 +1015,11 @@ (define (tasks:sync-run-data dbh cached-info run-ids area-info smallest-last-update-time) (for-each (lambda (run-id) (debug:print-info 1 *default-log-port* "Check if run with " run-id " needs to be synced" ) (tasks:run-id->mtpg-run-id dbh cached-info run-id area-info smallest-last-update-time)) -run-ids)) + run-ids)) ;; get runs changed since last sync ;; (define (tasks:sync-test-data dbh cached-info area-info) ;; (let* (( Index: tests.scm ================================================================== --- tests.scm +++ tests.scm @@ -552,11 +552,13 @@ (tests:test-set-toplog! run-id test-name outputfilename)) ;; didn't get the lock, check to see if current update started later than this ;; update, if so we can exit without doing any work (if (> my-start-time (handle-exceptions exn - 0 + (begin + (debug:print 0 *default-log-port* "failed to get mod time on " lockf ", exn=" exn) + 0) (file-modification-time lockf))) ;; we started since current re-gen in flight, delay a little and try again (begin (debug:print-info 1 *default-log-port* "Waiting to update " outputfilename ", another test currently updating it") (thread-sleep! (+ 5 (random 5))) ;; delay between 5 and 10 seconds @@ -1504,13 +1506,15 @@ (map (lambda (p) (if (directory-exists? p) (let ((glob-query (conc p "/" fnamepatt))) (handle-exceptions exn + (begin + (print "built-in glob on " glob-query ", failed, try using the shell. exn=" exn) (with-input-from-pipe - (conc "echo " glob-query) - read-lines) ;; we aren't going to try too hard. If glob breaks it is likely because someone tried to do */*/*.log or similar + (conc "echo " glob-query) + read-lines)) ;; we aren't going to try too hard. If glob breaks it is likely because someone tried to do */*/*.log or similar (glob glob-query))) '())) paths-from-db)) paths-from-db))) @@ -1559,13 +1563,15 @@ (common:file-exists? cache-file))) (cached-dat (if (and (not force-create) cache-exists use-cache) (handle-exceptions - exn - #f ;; any issues, just give up with the cached version and re-read - (configf:read-alist cache-file)) + exn + (begin + (debug:print 0 *default-log-port* "failed to read " cache-file ", exn=" exn) + #f) ;; any issues, just give up with the cached version and re-read + (configf:read-alist cache-file)) #f)) (test-full-name (if (and item-path (not (string-null? item-path))) (conc test-name "/" item-path) test-name))) (if cached-dat Index: tests/unittests/all-api.scm ================================================================== --- tests/unittests/all-api.scm +++ tests/unittests/all-api.scm @@ -116,11 +116,11 @@ (test #f #t (vector-ref (api:execute-requests my-dbstruct (vector 'delete-run (list 2))) 0)) ;; delete a non-existant run (test #f #t (vector-ref (api:execute-requests my-dbstruct (vector 'update-run-stats (list 1 '()))) 0)) (test #f #t (vector-ref (api:execute-requests my-dbstruct (vector 'get-main-run-stats (list 1 ))) 0)) (test #f #t (vector-ref (api:execute-requests my-dbstruct (vector 'delete-old-deleted-test-records '())) 0)) (test #f #t (vector-ref (api:execute-requests my-dbstruct (vector 'get-runs (list "%" 10 0 keypatts))) 0)) -(test #f #t (vector-ref (api:execute-requests my-dbstruct (vector 'simple-get-runs (list "%" 10 0 keypatts))) 0)) +(test #f #t (vector-ref (api:execute-requests my-dbstruct (vector 'simple-get-runs (list "%" 10 0 keypatts 0))) 0)) (test #f #(#t (1))(api:execute-requests my-dbstruct (vector 'get-all-run-ids '()))) (test #f #(#t ()) (api:execute-requests my-dbstruct (vector 'get-prev-run-ids '(1)))) (test #f #(#t "JUSTFINE") (api:execute-requests my-dbstruct (vector 'get-run-status '(1)))) (test #f #(#t "NEW") (api:execute-requests my-dbstruct (vector 'get-run-state '(1)))) (test #f #(#t (("Totals" "UNKNOWN" 1) ("bar" "UNKNOWN" 1))) (api:execute-requests my-dbstruct (vector 'get-run-stats '()))) Index: tests/unittests/all-rmt.scm ================================================================== --- tests/unittests/all-rmt.scm +++ tests/unittests/all-rmt.scm @@ -38,12 +38,12 @@ (test #f #f (server:check-if-running toppath)) ;; these are used by server:start-and-wait (test #f #t (list? (server:get-list toppath))) (test #f '() (server:get-best '())) (test #f #t (common:simple-file-lock-and-wait "test.lock" expire-time: 15)) (test #f "test.lock" (common:simple-file-release-lock "test.lock")) -(test #f #t (server:get-best-guess-address (get-host-name))) -(test #f #t (string? (common:get-homehost))) +(test #f #t (string? (server:get-best-guess-address (get-host-name)))) +(test #f #t (string? (car (common:get-homehost)))) ;; clean out any old running servers ;; (let ((servers (server:get-list toppath))) (print "Known servers: " servers) @@ -68,11 +68,11 @@ (thread-sleep! 2) ;; (test #f #t (string? (server:start-and-wait *toppath*))) (test "setup for run" #t (begin (launch:setup) (string? (getenv "MT_RUN_AREA_HOME")))) -(test #f #t (client:setup-http toppath)) +(test #f #t (vector? (client:setup-http toppath))) (test #f #t (vector? (client:setup toppath))) (test #f #t (vector? (rmt:get-connection-info toppath))) ;; TODO: push areapath down. (test #f #t (string? (server:check-if-running "."))) ;; DEF (test #f #f (rmt:send-receive-no-auto-client-setup *runremote* 'get-keys #f '()))