Index: Makefile ================================================================== --- Makefile +++ Makefile @@ -268,11 +268,11 @@ $(PREFIX)/bin/nbfind : utils/nbfind $(INSTALL) $< $@ chmod a+x $@ -$(PREFIX)/bin/loadrunner : utils/loadrunner +$(PREFIX)/bin/mtrunner : utils/mtrunner $(INSTALL) $< $@ chmod a+x $@ # $(PREFIX)/bin/refdb : refdb # $(INSTALL) $< $@ @@ -302,11 +302,11 @@ $(INSTALL) dboard $(PREFIX)/bin/.$(ARCHSTR)/dboard install : $(PREFIX)/bin/.$(ARCHSTR) $(PREFIX)/bin/.$(ARCHSTR)/mtest $(PREFIX)/bin/megatest \ $(PREFIX)/bin/.$(ARCHSTR)/dboard $(PREFIX)/bin/dashboard $(HELPERS) $(PREFIX)/bin/nbfake \ $(PREFIX)/bin/.$(ARCHSTR)/mtexec $(PREFIX)/bin/mtexec $(PREFIX)/bin/serialize-env \ - $(PREFIX)/bin/nbfind $(PREFIX)/bin/loadrunner $(PREFIX)/bin/viewscreen $(PREFIX)/bin/mt_xterm \ + $(PREFIX)/bin/nbfind $(PREFIX)/bin/mtrunner $(PREFIX)/bin/viewscreen $(PREFIX)/bin/mt_xterm \ $(PREFIX)/share/docs/megatest_manual.html $(PREFIX)/bin/remrun \ $(PREFIX)/share/docs/megatest_manual.html $(PREFIX)/bin/remrun $(PREFIX)/bin/mtutil \ $(PREFIX)/bin/tcmt $(PREFIX)/share/db/mt-pg.sql \ $(PREFIX)/share/js/jquery-3.1.0.slim.min.js # $(PREFIX)/bin/.$(ARCHSTR)/ndboard Index: NOTES ================================================================== --- NOTES +++ NOTES @@ -158,5 +158,9 @@ INFO: (0) Server shutdown complete. Exiting Start: 0 at Sun Apr 28 22:18:25 MST 2013 Max: 52 at Sun Apr 28 23:06:59 MST 2013 End: 6 at Sun Apr 28 23:47:51 MST 2013 + +======================================================================== + + Index: archive.scm ================================================================== --- archive.scm +++ archive.scm @@ -90,11 +90,11 @@ (pscript-cmd (conc pscript " " testsuite-name " " target " " run-name " " test-name)) (apath (if pscript (handle-exceptions exn (begin - (debug:print 0 "ERROR: script \"" pscript-cmd "\" failed to run properly.") + (debug:print 0 *default-log-port* "ERROR: script \"" pscript-cmd "\" failed to run properly.") (exit 1)) (with-input-from-pipe pscript-cmd read-line)) #f)) ;; this is the user-calculated archive path @@ -116,13 +116,16 @@ ;; (allocation-id (rmt:archive-allocate-testsuite/area-to-block block-id testsuite-name area-key))) (if block-id ;; (and block-id allocation-id) (let ((res (cons block-id archive-path))) (hash-table-set! blockid-cache key res) res) - #f)) - #f)) ;; no best disk found - ))) + (begin + (debug:print 0 *default-log-port* "WARNING: no disk found for " target ", " run-name ", " test-name ", archive-path=" archive-path) + #f))) + (begin + (debug:print 0 *default-log-port* "WARNING: no disk found for " target ", " run-name ", " test-name ", block-id=" block-id) + #f)))))) ;; no best disk found ;; archive - run bup ;; ;; 1. create the bup dir if not exists ;; 2. start the du of each directory @@ -182,13 +185,11 @@ partial-path-index) #f)) ;; we need our archive dir checked for every test to enable folks who want to store other ways. (archive-info (archive:allocate-new-archive-block blockid-cache *toppath* tsname min-space target run-name test-name)) (archive-dir (if archive-info (cdr archive-info) #f)) - (archive-id (if archive-info (car archive-info) -1)) - - ) + (archive-id (if archive-info (car archive-info) -1))) (if (not archive-dir) ;; no archive disk found, this is fatal (begin (debug:print 0 *default-log-port* "FATAL: No archive disks found. Please add disks with at least " min-space " MB space to the [archive-disks] section of megatest.config") @@ -246,11 +247,20 @@ (arch-group (hash-table-ref arch-groups test-base)) (arch-info (car arch-group)) ;; don't know yet how this will work, can I get more than one possibility? (archive-id (car arch-info)) (archive-dir (cdr arch-info))) (debug:print 0 *default-log-port* "Processing disk-group " test-base) - (let* ((test-paths (hash-table-ref disk-groups test-base))) + (let* ((test-paths-in (hash-table-ref disk-groups test-base)) + (test-paths (if (args:get-arg "-include") + (let ((subpaths (string-split (args:get-arg "-include") ","))) + (apply append + (map (lambda (p) + (map (lambda (subp) + (conc p "/" subp)) + subpaths)) + test-paths-in))) + test-paths-in))) (if (not (common:file-exists? archive-dir)) (create-directory archive-dir #t)) (case archiver ((bup) ;; Archive using bup (let* ((bup-init-params (list "-d" archive-dir "init")) @@ -300,12 +310,14 @@ (for-each (lambda (test-dat) (let ((test-id (db:test-get-id test-dat)) (run-id (db:test-get-run_id test-dat))) (rmt:test-set-archive-block-id run-id test-id archive-id) - (if (member archive-command '("save-remove")) - (runs:remove-test-directory test-dat 'archive-remove)))) + (if (member (symbol->string archive-command) '("save-remove")) + (begin + (debug:print-info 0 *default-log-port* "remove testdat") + (runs:remove-test-directory test-dat 'archive-remove))))) (hash-table-ref test-groups test-base))))) (hash-table-keys disk-groups)) #t)) (define (archive:bup-restore archive-command run-id run-name tests rp-mutex bup-mutex) ;; move the getting of archive space down into the below block so that a single run can @@ -343,11 +355,14 @@ (archive-block-id (db:test-get-archived test-dat)) (archive-block-info (rmt:test-get-archive-block-info archive-block-id)) (archive-path (if (vector? archive-block-info) (vector-ref archive-block-info 2) ;; look in db.scm for test-get-archive-block-info for the vector record info #f)) ;; no archive found? - (archive-internal-path (conc (common:get-testsuite-name) "-" run-id "/latest/" test-partial-path))) + (archive-internal-path (conc (common:get-testsuite-name) "-" run-id "/latest/" test-partial-path)) + (include-paths (args:get-arg "-include")) + (exclude-pattern (args:get-arg "-exclude-rx")) + (exclude-file (args:get-arg "-exclude-rx-from"))) ;; some sanity checks, move an existing path out of the way - iif it is not a toplevel with children ;; (if (and (not toplevel/children) ;; special handling needed for toplevel with children prev-test-physical-path @@ -386,6 +401,90 @@ (run-n-wait bup-exe params: bup-restore-params print-cmd: #f) ;; (mutex-unlock! bup-mutex) (mt:test-set-state-status-by-id run-id test-id "COMPLETED" #f #f))) (debug:print-error 0 *default-log-port* "No archive path in the record for run-id=" run-id " test-id=" test-id)))) (filter vector? tests)))) - + +(define (common:get-youngest-test tests) + (if (null? tests) + #f + (let ((res #f)) + (for-each + (lambda (test-dat) + (let ((event-time (db:test-get-event_time test-dat))) + (if (or (not res) + (> event-time (db:test-get-event_time res))) + (set! res test-dat)))) + tests) + res))) + +;; from an archive get a specific path - works ONLY with bup for now +;; +(define (archive:bup-get-data archive-command run-id-in run-name-in tests rp-mutex bup-mutex) + (if (null? tests) + (debug:print-info 0 *default-log-port* "get-data called with no matching tests to operate on.") + + (let* ((bup-exe (or (configf:lookup *configdat* "archive" "bup") "bup")) + (linktree (common:get-linktree)) ;; (configf:lookup *configdat* "setup" "linktree"))) + ;; (test-dat (common:get-youngest-test tests)) + (destpath (args:get-arg "-dest"))) + (cond + ((null? tests) + (debug:print-error 0 *default-log-port* + "No test matching provided target, runname pattern and test pattern found.")) + ((file-exists? destpath) + (debug:print-error 0 *default-log-port* + "Destination path alread exists! Please remove it before running get.")) + (else + (let loop ((rem-tests tests)) + (let* ((test-dat (common:get-youngest-test rem-tests)) + (item-path (db:test-get-item-path test-dat)) + (test-name (db:test-get-testname test-dat)) + (test-id (db:test-get-id test-dat)) + (run-id (db:test-get-run_id test-dat)) + (run-name (rmt:get-run-name-from-id run-id)) + (keyvals (rmt:get-key-val-pairs run-id)) + (target (string-intersperse (map cadr keyvals) "/")) + + (toplevel/children (and (db:test-get-is-toplevel test-dat) + (> (rmt:test-toplevel-num-items run-id test-name) 0))) + (test-partial-path (conc target "/" run-name "/" + (db:test-make-full-name test-name item-path))) + ;; note the trailing slash to get the dir inspite of it being a link + (test-path (conc linktree "/" test-partial-path)) + (archive-block-id (db:test-get-archived test-dat)) + (archive-block-info (rmt:test-get-archive-block-info archive-block-id)) + (archive-path (if (vector? archive-block-info) + (vector-ref archive-block-info 2) + #f)) + (archive-internal-path (conc (common:get-testsuite-name) "-" run-id + "/latest/" test-partial-path)) + (include-paths (args:get-arg "-include")) + (exclude-pattern (args:get-arg "-exclude-rx")) + (exclude-file (args:get-arg "-exclude-rx-from"))) + + (if (and archive-path ;; no point in proceeding if there is no actual archive + (not toplevel/children)) + (begin + (let* ((bup-restore-params (append (list "-d" archive-path "restore" "-C" (or destpath "data")) + ;; " " ;; What is the empty string for? + (if include-paths + (map (lambda (p) + (conc archive-internal-path "/" p)) + (string-split include-paths ",")) + (list archive-internal-path))))) + (debug:print-info 0 *default-log-port* "Restoring archived data to " (or destpath "data") + " from archive in " archive-path " ... " archive-internal-path) + (run-n-wait bup-exe params: bup-restore-params print-cmd: #t))) + (let ((new-rem-tests (filter (lambda (tdat) + (or (not (eq? (db:test-get-id tdat) test-id)) + (not (eq? (db:test-get-run_id tdat) run-id)))) + rem-tests) )) + (debug:print-info 0 *default-log-port* + "No archive path in the record for run-id=" run-id + " test-id=" test-id ", skipping.") + (if (null? new-rem-tests) + (begin + (debug:print-info 0 *default-log-port* "No archives found for " target "/" run-name "...") + #f) + (loop new-rem-tests))))))))))) + ADDED autostuff/.mtutil.scm Index: autostuff/.mtutil.scm ================================================================== --- /dev/null +++ autostuff/.mtutil.scm @@ -0,0 +1,88 @@ +;; Copyright 2006-2017, Matthew Welland. +;; +;; This file is part of Megatest. +;; +;; Megatest is free software: you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. +;; +;; Megatest is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with Megatest. If not, see . + +(use json) +(use ducttape-lib) + +(define (get-last-runname area-path target) + (let* ((run-data (with-input-from-pipe (conc "megatest -list-runs % -target " target " -fields runs:runname,event_time -dumpmode sexpr -start-dir " area-path) + read))) + (if (or (not run-data) + (null? run-data)) + #f + (let* ((name-time (let ((dat (map cdadr (alist-ref target run-data equal?)))) ;; (("runname" . "2017w07.0-0047") ("event_time" . "1487490424")) + ;; (print "dat=" dat) + (map (lambda (item) + (cons (alist-ref "runname" item equal?) + (string->number (alist-ref "event_time" item equal?)))) + dat))) + (sorted (sort name-time (lambda (a b)(> (cdr a)(cdr b))))) + (last-name (if (null? sorted) + #f + (caar sorted)))) + last-name)))) + +(define (str-first-char->number str) + (char->integer (string-ref str 0))) + +;; example of how to set up and write target mappers +;; NOTE: maps a *list* of targets! +;; +;; (? target run-name area area-path reason contour mode-patt) +;; +(add-target-mapper 'prefix-contour + (lambda (runkey area contour) + (print "target: " runkey) + (list (conc contour "/" runkey)))) +#;(add-target-mapper 'prefix-area-contour + (lambda (target run-name area area-path reason contour mode-patt) + (conc area "/" contour "/" target))) + +(add-runname-mapper 'corporate-ww + (lambda (target run-name area area-path reason contour mode-patt) + (print "corporate-ww called with: target=" target " run-name=" run-name " area=" area " area-path=" area-path " reason=" reason " contour=" contour " mode-patt=" mode-patt) + (let* ((last-name (get-last-runname area-path target)) + (last-letter (let* ((ch (if (string? last-name) + (let ((len (string-length last-name))) + (substring last-name (- len 1) len)) + "a")) + (chnum (str-first-char->number ch)) + (a (str-first-char->number "a")) + (z (str-first-char->number "z"))) + (if (and (>= chnum a)(<= chnum z)) + chnum + #f))) + (next-letter (if last-letter + (list->string + (list + (integer->char + (+ last-letter 1)))) ;; surely there is an easier way? + "a"))) + ;; (print "last-name: " last-name " last-letter: " last-letter " next-letter: " next-letter) + (conc (seconds->wwdate (current-seconds)) next-letter)))) + +(add-runname-mapper 'auto + (lambda (target run-name area area-path reason contour mode-patt) + "auto-eh")) + +;; run only areas where first letter of area name is "a" +;; +(add-area-checker 'first-letter-a + (lambda (area target contour) + (string-match "^a.*$" area))) + + ADDED autostuff/megatest.config Index: autostuff/megatest.config ================================================================== --- /dev/null +++ autostuff/megatest.config @@ -0,0 +1,85 @@ +# Copyright 2006-2017, Matthew Welland. +# +# This file is part of Megatest. +# +# Megatest is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# Megatest is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Megatest. If not, see . + +## commented out due to a bug in v1.6501 in mtutil +[fields] +a text +b text +c text + +[default] +# usercode .mtutil.scm +# areafilter area-to-run +# targtrans generic-target-translator +# runtrans generic-runname-translator +usercode .mtutil.scm +# areafilter area-to-run +targtrans prefix-contour-broken +# runtrans generic-runname-translator + +[setup] +pktsdirs /mfs/home/matt/orion_automation/pkts + +[areas] + +# path-to-area map-target-script(future, optional) +# someqa path=../megatestqa/someqa; targtrans=somefunc; areafilter=area-to-run +# targtrans is name of scheme proc stored in .mtutil.scm, which lives in PWD where mtutil is run +# the target translator can return: a/target OR (list/of targets/to apply/run) +# OR #f i.e. run nothing + +# ext-tests path=ext-tests; targtrans=prefix-contour; + + +ext path=/mfs/home/matt/automation_areas/megatest/ext-tests; targtrans=prefix-contour + +[contours] +# selector=tag-expr/mode-patt +quick areas=ext; selector=/QUICKPATT +# quick2 areafn=check-area; selector=/QUICKPATT +full areas=ext +# quick areas=fullrun,ext-tests; selector=QUICKPATT/quick +# full areas=fullrun,ext-tests; selector=MAXPATT/ +# short areas=fullrun,ext-tests; selector=MAXPATT/ +# all areas=fullrun,ext-tests +# snazy selector=QUICKPATT/ + +[nopurpose] + +[access] +ext matt:admin mattw:owner + +[accesstypes] +admin run rerun resume remove set-ss rerun-clean +owner run rerun resume remove rerun-all +badguy set-ss + +[setup] +maxload 1.2 + +[listeners] +localhost:12345 contact=matt@kiatoa.com +localhost:54321 contact=matt@kiatoa.com + +[listener] +script nbfake echo + + +[server] +timeout 1 + +[include local.config] ADDED autostuff/runconfigs.config Index: autostuff/runconfigs.config ================================================================== --- /dev/null +++ autostuff/runconfigs.config @@ -0,0 +1,112 @@ +# Copyright 2006-2017, Matthew Welland. +# +# This file is part of Megatest. +# +# Megatest is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# Megatest is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Megatest. If not, see . + +# To get emacs font highlighing in the various megatest configs do this: +# +# Install emacs-goodies-el: +# sudo apt install emacs-goodies-el +# Add to your ~/.emacs file: +# (add-to-list 'auto-mode-alist '("config\\'" . conf-space-mode)) +# + +# example of a cron entry to run sync using db spec pgdb, with pgdb setting in file local.config +# +[a/b/c] +# all:scheduled:sync cron= 0/5 * * * *;dbdest=pgdb;appendconf=/nfs/phoebe/disk1/home/mfs/matt/.sysmaint/local.config +# quick:scheduled:sync cron= 0/5 * * * *;dbdest=pgdb;appendconf=/nfs/phoebe/disk1/home/mfs/matt/.sysmaint/local.config +# fast:scheduled:sync-prepend cron= 0/1 * * * *;dbdest=pgdb;appendconf=/mfs/matt/.sysmaint/local.config + +# [scriptinc ./gentargets.sh #{getenv USER}] +# [v1.23/45/67] + +# tip will be replaced with hashkey? + +# [%/%/%] doesn't work + +[/.*/] + +[v1.65/tip/dev] +# file: files changes since last run trigger new run +# script: script is called with unix seconds as last parameter (other parameters are preserved) +# +# contour:sensetype:action params data +# commented out for debug + +quick:file:run runtrans=auto; glob=/nfs/orion/disk1/mfs_home/home/matt/automation_areas/megatest/*.scm foo.touchme +# snazy:file:run runtrans=corporate-ww; glob=/home/matt/data/megatest/*.scm +# short:file:run runtrans=short; glob=/home/matt/data/megatest/*.scm + +# script returns change-time (unix epoch), new-target-name, run-name +# +# quick:script:run checkfossil = http://www.kiatoa.com/fossils/megatest v1.63;\ +# checkfossil = http://www.kiatoa.com/fossils/megatest_qa trunk + +# # fossil based trigger +# # +quick:fossil:run http://www.kiatoa.com/fossils/megatest=v1.65;\ + http://www.kiatoa.com/fossils/megatest_qa=trunk + +# field allowed values +# ----- -------------- +# minute 0-59 +# hour 0-23 +# day of month 1-31 +# month 1-12 (or names, future development) +# day of week 0-7 (0 or 7 is Sun, or, future development, use names) + +# actions: +# run - run a testsuite +# clean - clear out runs +# archive - archive runs + +# quick:scheduled:run cron=47 * * * * ;run-name=auto +# quick:scheduled:archive cron=15 20 * * * ;run-name=%;target=%/%/% + +# [%] +# # every friday at midnight clean "all" tests over 7d +# all:scheduled:clean cron= 0 0 0 0 5;run-name=%;age=7d + +[v1.65/tip/dev] +# # file: files changes since last run trigger new run +# # script: script is called with unix seconds as last parameter (other parameters are preserved) +# # +# # contour:sensetype:action params data +# quick:file:run run-name=auto;glob=*.scm +# quick:file:clean run-name=auto; +# quick:script:run run-name=auto;script=checkfossil.sh v1.63 +# +# # field allowed values +# # ----- -------------- +# # minute 0-59 +# # hour 0-23 +# # day of month 1-31 +# # month 1-12 (or names, future development) +# # day of week 0-7 (0 or 7 is Sun, or, future development, use names) +# +# # actions: +# # run - run a testsuite +# # clean - clear out runs +# # archive - archive runs +# +quick:scheduled:run cron=47 * * * * ;run-name=auto +# quick:scheduled:archive cron=15 20 * * * ;run-name=% ; +# + +[%/%/%] +# # every friday at midnight clean "all" tests over 7d +all:scheduled:clean cron= 0 0 0 0 5;run-name=%;age=7d +# ADDED autostuff/setup.sh Index: autostuff/setup.sh ================================================================== --- /dev/null +++ autostuff/setup.sh @@ -0,0 +1,2 @@ +source /opt/chicken/4.13.0_18.04_WW45/setup-chicken4x.sh +export PATH=/mfs/home/matt/orion_automation/bin:$PATH Index: common.scm ================================================================== --- common.scm +++ common.scm @@ -484,13 +484,14 @@ (directory-fold (lambda (file rem) (handle-exceptions exn (begin - (debug:print-info 0 *default-log-port* "unable to rotate log " file ", probably handled by another process.") - (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) - (print-call-chain (current-error-port))) + (debug:print-info 2 *default-log-port* "unable to rotate log " file ", probably handled by another process, this is safe to ignore.") + (debug:print 2 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) + ;; (print-call-chain (current-error-port)) ;; + ) (let* ((fullname (conc "logs/" file)) (mod-time (file-modification-time fullname)) (file-age (- (current-seconds) mod-time))) (hash-table-set! all-files file mod-time) (if (or (and (string-match "^.*.log" file) @@ -691,14 +692,18 @@ ;; dot-locking egg seems not to work, using this for now ;; if lock is older than expire-time then remove it and try again ;; to get the lock ;; (define (common:simple-file-lock fname #!key (expire-time 300)) + (let ((fmod-time (handle-exceptions + ext + (current-seconds) + (file-modification-time fname)))) (if (common:file-exists? fname) - (if (> (- (current-seconds)(file-modification-time fname)) expire-time) + (if (> (- (current-seconds) fmod-time) expire-time) (begin - (handle-exceptions exn #f (delete-file* fname)) + (handle-exceptions exn #f (delete-file* fname)) (common:simple-file-lock fname expire-time: expire-time)) #f) (let ((key-string (conc (get-host-name) "-" (current-process-id)))) (with-output-to-file fname (lambda () @@ -708,11 +713,11 @@ (handle-exceptions exn #f (with-input-from-file fname (lambda () (equal? key-string (read-line))))) - #f)))) + #f))))) (define (common:simple-file-lock-and-wait fname #!key (expire-time 300)) (let ((end-time (+ expire-time (current-seconds)))) (let loop ((got-lock (common:simple-file-lock fname expire-time: expire-time))) (if got-lock @@ -879,10 +884,35 @@ (configf:lookup *configdat* "setup" "testsuite" ) (getenv "MT_TESTSUITE_NAME") (if (string? *toppath* ) (pathname-file *toppath*) #f))) ;; (pathname-file (current-directory))))) + +;; safe getting of toppath +(define (common:get-toppath areapath) + (or *toppath* + (if areapath + (begin + (set! *toppath* areapath) + (setenv "MT_RUN_AREA_HOME" areapath) + areapath) + #f) + (if (getenv "MT_RUN_AREA_HOME") + (begin + (set! *toppath* (getenv "MT_RUN_AREA_HOME")) + *toppath*) + #f) + ;; last resort, look for megatest.config + (let loop ((thepath (realpath "."))) + (if (file-exists? (conc thepath "/megatest.config")) + thepath + (if (equal? thepath "/") + (begin + (debug:print-error 0 *default-log-port* "Unable to find megatest home directory.") + #f) + (loop (pathname-directory thepath))))) + )) (define common:get-area-name common:get-testsuite-name) (define (common:get-db-tmp-area . junk) (if *db-cache-path* @@ -1172,11 +1202,23 @@ ;; (define (common:bash-glob instr) (string-split (with-input-from-pipe (conc "/bin/bash -c \"echo " instr "\"") - read-line))) + read-line))) + +;;====================================================================== +;; Some safety net stuff +;;====================================================================== + +;; return input if it is a list or return null +(define (common:list-or-null inlst #!key (ovrd #f)(message #f)) + (if (list? inlst) + inlst + (begin + (if message (debug:print-error 0 *default-log-port* message)) + (or ovrd '())))) ;;====================================================================== ;; T A R G E T S , S T A T E , S T A T U S , ;; R U N N A M E A N D T E S T P A T T ;;====================================================================== @@ -1277,13 +1319,18 @@ (define (common:get-linktree) (or (getenv "MT_LINKTREE") (if *configdat* (configf:lookup *configdat* "setup" "linktree") - (if *toppath* - (conc *toppath* "/lt") - #f)))) + #f) + (if (or *toppath* (getenv "MT_RUN_AREA_HOME")) + (conc (or *toppath* (getenv "MT_RUN_AREA_HOME")) "/lt") + #f) + (let* ((tp (common:get-toppath #f)) + (lt (conc tp "/lt"))) + (if (not tp)(debug:print 0 *default-log-port* "WARNING: cannot calculate best path for linktree, using " lt)) + lt))) (define (common:args-get-runname) (let ((res (or (args:get-arg "-runname") (args:get-arg ":runname") (getenv "MT_RUNNAME")))) @@ -1672,37 +1719,47 @@ ;; cpu-load)) ;; get values from cached info from dropping file in logs dir ;; e.g. key is host and dtype is normalized-load ;; -(define (common:get-cached-info key dtype #!key (age 5)) - (let* ((fullpath (conc *toppath* "/.sysdata/" key "-" dtype ".log"))) - (if (and (file-exists? fullpath) - (file-read-access? fullpath)) - (handle-exceptions - exn - #f - (debug:print 2 *default-log-port* "reading file " fullpath) - (let ((real-age (- (current-seconds)(file-change-time fullpath)))) - (if (< real-age age) - (with-input-from-file fullpath read) - (begin - (debug:print 1 *default-log-port* "file " fullpath " is too old (" real-age" seconds) to trust, skipping reading it") - #f)))) - (begin - (debug:print 2 *default-log-port* "not reading file " fullpath) - #f)))) - -(define (common:write-cached-info key dtype dat) - (let* ((fulldir (conc *toppath* "/.sysdata")) - (fullpath (conc fulldir "/" key "-" dtype ".log"))) - (if (not (file-exists? fulldir))(create-directory fulldir #t)) - (handle-exceptions - exn - #f - (with-output-to-file fullpath (lambda ()(pp dat)))))) - +(define (common:get-cached-info key dtype #!key (age 10)) + (if *toppath* + (let* ((fullpath (conc *toppath* "/.sysdata/" key "-" dtype ".log"))) + (if (and (file-exists? fullpath) + (file-read-access? fullpath)) + (handle-exceptions + exn + #f + (debug:print 2 *default-log-port* "reading file " fullpath) + (let ((real-age (- (current-seconds)(file-change-time fullpath)))) + (if (< real-age age) + (handle-exceptions + exn + (begin + (debug:print-info 1 *default-log-port* " removing bad file " fullpath) + (delete-file* fullpath) + #f) + (with-input-from-file fullpath read)) + (begin + (debug:print-info 2 *default-log-port* "file " fullpath " is too old (" real-age" seconds) to trust, skipping reading it") + #f)))) + (begin + (debug:print 2 *default-log-port* "not reading file " fullpath) + #f))) + #f)) + +(define (common:write-cached-info key dtype dat) + (if *toppath* + (let* ((fulldir (conc *toppath* "/.sysdata")) + (fullpath (conc fulldir "/" key "-" dtype ".log"))) + (if (not (file-exists? fulldir))(create-directory fulldir #t)) + (handle-exceptions + exn + #f + (with-output-to-file fullpath (lambda ()(pp dat))))) + #f)) + ;; get cpu load by reading from /proc/loadavg, return all three values ;; (define (common:get-cpu-load remote-host) (handle-exceptions exn @@ -1935,30 +1992,38 @@ (or (common:get-cached-info actual-host "num-cpus" age: (+ 2592000 (random 3600))) ;; hosts had better not be changing the number of cpus too often! (let* ((proc (lambda () (let loop ((numcpu 0) (inl (read-line))) (if (eof-object? inl) - (begin - (common:write-cached-info actual-host "num-cpus" numcpu) - numcpu) + (if (> numcpu 0) + numcpu + #f) ;; if zero return #f so caller knows that things are not working (loop (if (string-match "^processor\\s+:\\s+\\d+$" inl) (+ numcpu 1) numcpu) (read-line)))))) (result (if remote-host (with-input-from-pipe (conc "ssh " remote-host " cat /proc/cpuinfo") proc) (with-input-from-file "/proc/cpuinfo" proc)))) - (common:write-cached-info actual-host "num-cpus" result) + (if (and (number? result) + (> result 0)) + (common:write-cached-info actual-host "num-cpus" result)) result)))) ;; wait for normalized cpu load to drop below maxload ;; -(define (common:wait-for-normalized-load maxload msg remote-host) +(define (common:wait-for-normalized-load maxload msg remote-host #!optional (rem-tries 5)) (let ((num-cpus (common:get-num-cpus remote-host))) - (common:wait-for-cpuload maxload num-cpus 15 msg: msg remote-host: remote-host))) + (if num-cpus + (common:wait-for-cpuload maxload num-cpus 15 msg: msg remote-host: remote-host) + (begin + (thread-sleep! (random 60)) ;; we failed to get num cpus. wait a bit and try again + (if (> rem-tries 0) + (common:wait-for-normalized-load maxload msg remote-host (- rem-tries 1)) + #f))))) ;; DO NOT CALL THIS DIRECTLY. It is called from common:wait-for-normalized-load ;; (define (common:wait-for-cpuload maxload-in numcpus-in waitdelay #!key (count 1000) (msg #f)(remote-host #f)(force-maxload #f)) (let* ((loadavg (common:get-cpu-load remote-host)) @@ -1965,18 +2030,22 @@ (numcpus (if (<= 1 numcpus-in) ;; not possible to have zero. If we get 1, it's possible that we got the previous default, and we should check again (common:get-num-cpus remote-host) numcpus-in)) (maxload (if force-maxload maxload-in - (max maxload-in 0.5))) ;; so maxload must be greater than 0.5 for now BUG - FIXME? + (if (number? maxload-in) + (max maxload-in 0.5) + 0.5))) ;; so maxload must be greater than 0.5 for now BUG - FIXME? (first (car loadavg)) (next (cadr loadavg)) (adjload (* maxload (max 1 numcpus))) ;; possible bug where numcpus (or could be maxload) is zero, crude fallback is to at least use 1 (loadjmp (- first next)) (adjwait (min (+ 300 (random 10)) (abs (* (+ (random 10)(/ (- 1000 count) 10) waitdelay) (- first adjload) )) )));; add some randomness to the time to break any alignment where netbatch dumps many jobs to machines simultaneously - (debug:print-info 1 *default-log-port* "Checking cpuload on " (or remote-host "localhost") ", maxload: " maxload - ", load: " first ", adjload: " adjload ", loadjmp: " loadjmp) + ;; let's let the user know once in a long while that load checking is happening but not constantly report it + (if (> (random 100) 75) ;; about 25% of the time + (debug:print-info 1 *default-log-port* "Checking cpuload on " (or remote-host "localhost") ", maxload: " maxload + ", load: " first ", adjload: " adjload ", loadjmp: " loadjmp)) (cond ((and (> first adjload) (> count 0)) (debug:print-info 0 *default-log-port* "server start delayed " adjwait " seconds due to load " first " exceeding max of " adjload " on server " (or remote-host (get-host-name)) " (normalized load-limit: " maxload ") " (if msg msg "")) (thread-sleep! adjwait) @@ -2084,12 +2153,13 @@ ;; check space in dbdir and in megatest dir ;; returns: ok/not dbspace required-space ;; (define (common:check-db-dir-space) (let* ((required (string->number + ;; default is 1GB (or actually a billion bytes) This is the number of 1 kB blocks. (or (configf:lookup *configdat* "setup" "dbdir-space-required") - "100000"))) + "1000000"))) (dbdir (common:get-db-tmp-area)) ;; (db:get-dbdir)) (tdbspace (common:check-space-in-dir dbdir required)) (mdbspace (common:check-space-in-dir *toppath* required))) (sort (list tdbspace mdbspace) (lambda (a b) (< (cadr a)(cadr b)))))) @@ -2108,13 +2178,16 @@ (exit 1))))) ;; paths is list of lists ((name path) ... ) ;; (define (common:get-disk-with-most-free-space disks minsize) - (let ((best #f) + (let* ((best #f) (bestsize 0) - (min-inodes (or (string->number (if (configf:lookup *configdat* "setup" "min_inodes") (configf:lookup *configdat* "setup" "min_inodes") "0")) 0))) + (default-min-inodes-string "1000000") + (default-min-inodes (string->number default-min-inodes-string)) + (min-inodes (or (string->number (if (configf:lookup *configdat* "setup" "min_inodes") (configf:lookup *configdat* "setup" "min_inodes") default-min-inodes-string)) default-min-inodes))) + (for-each (lambda (disk-num) (let* ((dirpath (cadr (assoc disk-num disks))) (freespc (cond ((not (directory? dirpath)) @@ -2146,10 +2219,11 @@ -1) (else (get-free-inodes dirpath)))) ;;(free-inodes (get-free-inodes dirpath)) ) + (debug:print 2 *default-log-port* "INFO: disk " disk-num " path " dirpath " free space " freespc " free inodes " free-inodes) (if (and (> freespc bestsize)(> free-inodes min-inodes )) (begin (set! best (cons disk-num dirpath)) (set! bestsize freespc))) ;;(print "Processing: " disk-num " bestsize: " bestsize " best: " best " freespc: " freespc " min-inodes: " min-inodes " free-inodes: " free-inodes) Index: configf.scm ================================================================== --- configf.scm +++ configf.scm @@ -20,11 +20,11 @@ ;;====================================================================== ;; Config file handling ;;====================================================================== -(use regex regex-case) ;; directory-utils) +(use regex regex-case matchable) ;; directory-utils) (declare (unit configf)) (declare (uses process)) (declare (uses env)) (declare (uses keys)) @@ -118,14 +118,15 @@ " (let ((extra \"" cmd "\"))" " (conc (or *toppath* (get-environment-variable \"MT_RUN_AREA_HOME\"))" " (if (string-null? extra) \"\" \"/\")" " extra)))")) ((get g) - (let* ((parts (string-split cmd)) - (sect (car parts)) - (var (cadr parts))) - (conc "(lambda (ht)(configf:lookup ht \"" sect "\" \"" var "\"))"))) + (match (string-split cmd) + ((sect var)(conc "(lambda (ht)(configf:lookup ht \"" sect "\" \"" var "\"))")) + (else + (debug:print-error 0 *default-log-port* "#{get ...} used with only one parameter, \"" cmd "\", two needed.") + "(lambda (ht) #f)"))) ((runconfigs-get rget) (conc "(lambda (ht)(runconfigs-get ht \"" cmd "\"))")) ;; ((rget) (conc "(lambda (ht)(runconfigs-get ht \"" cmd "\"))")) (else "(lambda (ht)(print \"ERROR\") \"ERROR\")")))) ;; (print "fullcmd=" fullcmd) (handle-exceptions Index: db.scm ================================================================== --- db.scm +++ db.scm @@ -1464,20 +1464,18 @@ (set! res id)) db "SELECT id FROM archive_blocks WHERE archive_disk_id=? AND disk_path=?;" bdisk-id archive-path) (if res ;; record exists, update du if applicable and return res - (begin - (if du (sqlite3:execute db "UPDATE archive_blocks SET last_du=?,last_du_time=(strftime('%s','now')) + (if du (sqlite3:execute db "UPDATE archive_blocks SET last_du=?,last_du_time=(strftime('%s','now')) WHERE archive_disk_id=? AND disk_path=?;" - bdisk-id archive-path du)) - res) + bdisk-id archive-path du)) (begin (sqlite3:execute db "INSERT OR REPLACE INTO archive_blocks (archive_disk_id,disk_path,last_du) VALUES (?,?,?);" bdisk-id archive-path (or du 0)) - (db:archive-register-block-name dbstruct bdisk-id archive-path du: du))) + (set! res (db:archive-register-block-name dbstruct bdisk-id archive-path du: du)))) (stack-push! (dbr:dbstruct-dbstack dbstruct) dbdat) res)) ;; The "archived" field in tests is overloaded; 0 = not archived, > 0 archived in block with given id @@ -1614,10 +1612,30 @@ ;; given a launch delay (minimum time from last launch) return amount of time to wait ;; ;; (define (db:launch-delay-left dbstruct run-id launch-delay) + + +(define (db:get-status-from-final-status-file run-dir) + (let ( + (infile (conc run-dir "/.final-status"))) + + ;; first verify we are able to write the output file + (if (not (file-read-access? infile)) + (begin + (debug:print 0 *default-log-port* "ERROR: cannot read " infile) + (debug:print 0 *default-log-port* "ERROR: run-dir is " run-dir) + #f + ) + (with-input-from-file infile read-lines) + ) + ) +) + + + ;; select end_time-now from ;; (select testname,item_path,event_time+run_duration as ;; end_time,strftime('%s','now') as now from tests where state in ;; ('RUNNING','REMOTEHOSTSTART','LAUNCHED')); @@ -1624,10 +1642,12 @@ (define (db:find-and-mark-incomplete dbstruct run-id ovr-deadtime) (let* ((incompleted '()) (oldlaunched '()) (toplevels '()) + ;; The default running-deadtime is 720 seconds = 12 minutes. + ;; "(running-deadtime-default (+ server-start-allowance (* 2 launch-monitor-period)))" = 200 + (2 * (200 + 30 + 30)) (deadtime-trim (or ovr-deadtime (configf:lookup-number *configdat* "setup" "deadtime"))) (server-start-allowance 200) (server-overloaded-budget 200) (launch-monitor-off-time (or (configf:lookup-number *configdat* "setup" "test-stats-update-period") 30)) (launch-monitor-on-time-budget 30) @@ -1635,10 +1655,13 @@ (remotehoststart-deadtime-default (+ server-start-allowance server-overloaded-budget 30)) (remotehoststart-deadtime (or deadtime-trim remotehoststart-deadtime-default)) (running-deadtime-default (+ server-start-allowance (* 2 launch-monitor-period))) (running-deadtime (or deadtime-trim running-deadtime-default)) ;; two minutes (30 seconds between updates, this leaves 3x grace period) ) + (debug:print-info 4 *default-log-port* "running-deadtime = " running-deadtime) + (debug:print-info 4 *default-log-port* "deadtime-trim = " deadtime-trim) + (db:with-db dbstruct #f #f (lambda (db) ;; in RUNNING or REMOTEHOSTSTART for more than 10 minutes ;; @@ -1657,12 +1680,13 @@ (debug:print-info 0 *default-log-port* "Found old toplevel test in RUNNING state, test-id=" test-id)) (begin (set! incompleted (cons (list test-id run-dir uname testname item-path run-id) incompleted)) (debug:print-info 0 *default-log-port* "Found old test in RUNNING state, test-id=" test-id" exceeded running-deadtime "running-deadtime" now="(current-seconds)" event-time="event-time" run-duration="run-duration)))) db + "SELECT id,rundir,uname,testname,item_path,event_time,run_duration FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > (run_duration + ?) AND state IN ('RUNNING');" - run-id running-deadtime) + run-id running-deadtime) ;; default time 720 seconds (sqlite3:for-each-row (lambda (test-id run-dir uname testname item-path event-time run-duration) (if (and (equal? uname "n/a") @@ -1674,11 +1698,11 @@ (begin (debug:print-info 0 *default-log-port* "Found old test in REMOTEHOSTSTART state, test-id=" test-id" exceeded running-deadtime "running-deadtime" now="(current-seconds)" event-time="event-time" run-duration="run-duration) (set! incompleted (cons (list test-id run-dir uname testname item-path run-id) incompleted))))) db "SELECT id,rundir,uname,testname,item_path,event_time,run_duration FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > (run_duration + ?) AND state IN ('REMOTEHOSTSTART');" - run-id remotehoststart-deadtime) + run-id remotehoststart-deadtime) ;; default time 230 seconds ;; in LAUNCHED for more than one day. Could be long due to job queues TODO/BUG: Need override for this in config ;; ;; (db:delay-if-busy dbdat) (sqlite3:for-each-row @@ -1708,16 +1732,37 @@ ;; incompleted)) (min-incompleted-ids (map car incompleted)) ;; do 'em all (all-ids (append min-incompleted-ids (map car oldlaunched)))) (if (> (length all-ids) 0) (begin + ;; (launch:is-test-alive "localhost" 435) (debug:print 0 *default-log-port* "WARNING: Marking test(s); " (string-intersperse (map conc all-ids) ", ") " as DEAD") (for-each - (lambda (test-id) - (db:set-state-status-and-roll-up-items dbstruct run-id test-id 'foo "COMPLETED" "DEAD" "Test stopped responding while in RUNNING or REMOTEHOSTSTART; presumed dead.")) - ;;(db:test-set-state-status dbstruct run-id test-id "COMPLETED" "DEAD" "Test stopped responding while in RUNNING or REMOTEHOSTSTART; presumed dead.")) ;; fix for one aspect of Randy's ticket 1405717332 ;; TODO - fix problem where test goes to COMPLETED/DEAD while in progress, only later to go to COMPLETED/PASS. ref ticket 220546828 - all-ids)))))))) + (lambda (test-id) + (let* (;; (run-dir (db:test-get-rundir-from-test-id dbstruct run-id test-id)) + (tinfo (db:get-test-info-by-id dbstruct run-id test-id)) + (run-dir (db:test-get-rundir tinfo)) + (host (db:test-get-host tinfo)) + (pid (db:test-get-process_id tinfo)) + (result (db:get-status-from-final-status-file run-dir))) + (if (and (list? result) (> (length result) 1) (equal? "PASS" (cadr result)) (equal? "COMPLETED" (car result))) + (begin + (debug:print 0 *default-log-port* "INFO: test " test-id " actually passed, so marking PASS not DEAD") + (db:set-state-status-and-roll-up-items dbstruct run-id test-id 'foo "COMPLETED" "PASS" + "Test stopped responding but it has PASSED; marking it PASS in the DB.")) + (let ((is-alive (launch:is-test-alive host pid))) + (if is-alive + (debug:print 0 *default-log-port* "INFO: test " test-id " on host " host " has a process on pid " pid ", NOT setting to DEAD.") + (begin + (debug:print 0 *default-log-port* "INFO: test " test-id " final state/status is not COMPLETED/PASS. It is " result) + (db:set-state-status-and-roll-up-items dbstruct run-id test-id 'foo "COMPLETED" "DEAD" + "Test stopped responding while in RUNNING or REMOTEHOSTSTART; presumed dead."))))))) + all-ids) + ;;call end of eud of run detection for posthook + (launch:end-of-run-check run-id) + ))))))) + ;; ALL REPLACED BY THE BLOCK ABOVE ;; ;; (sqlite3:execute ;; db Index: docs/manual/howto.txt ================================================================== --- docs/manual/howto.txt +++ docs/manual/howto.txt @@ -13,196 +13,213 @@ // You should have received a copy of the GNU General Public License // along with Megatest. If not, see . // // Copyright 2006-2012, Matthew Welland. -How To Do Things ----------------- - -Process Runs -~~~~~~~~~~~~ - -Remove Runs -^^^^^^^^^^^ - -From the dashboard click on the button (PASS/FAIL...) for one of the tests. From the test control panel that -comes up push the clean test button. The command field will be prefilled with a template command for removing -that test. You can edit the command, for example change the argument to -testpatt to "%" to remove all tests. - -.Remove the test diskperf and all it's items ----------------- -megatest -remove-runs -target ubuntu/nfs/none -runname ww28.1a -testpatt diskperf/% -v ----------------- - -.Remove all tests for all runs and all targets ----------------- -megatest -remove-runs -target %/%/% -runname % -testpatt % -v ----------------- - -Archive Runs -^^^^^^^^^^^^ - -Megatest supports using the bup backup tool (https://bup.github.io/) to archive your tests for efficient storage -and retrieval. Archived data can be rapidly retrieved if needed. The metadata for the run (PASS/FAIL status, run -durations, time stamps etc.) are all preserved in the megatest database. - -For setup information see the Archiving topic in the reference section of this manual. - -To Archive -++++++++++ - -Hint: use the test control panel to create a template command by pushing the "Archive Tests" button. - -.Archive a full run ----------------- -megatest -target ubuntu/nfs/none -runname ww28.1a -archive save-remove -testpatt % ----------------- - -To Restore -++++++++++ - -.Retrieve a single test ----------------- -megatest -target ubuntu/nfs/none -runname ww28.1a -archive restore -testpatt diskperf/% ----------------- - -Hint: You can browse the archive using bup commands directly. - ----------------- -bup -d /path/to/bup/archive ftp ----------------- - -Submit jobs to Host Types based on Test Name -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -.In megatest.config ------------------------- -[host-types] -general ssh #{getbgesthost general} -nbgeneral nbjob run JOBCOMMAND -log $MT_LINKTREE/$MT_TARGET/$MT_RUNNAME.$MT_TESTNAME-$MT_ITEM_PATH.lgo - -[hosts] -general cubian xena - -[launchers] -envsetup general -xor/%/n 4C16G -% nbgeneral - -[jobtools] -launcher bsub -# if defined and not "no" flexi-launcher will bypass launcher unless there is no -# match. -flexi-launcher yes ------------------------- - -Tricks ------- - -This section is a compendium of a various useful tricks for debugging, -configuring and generally getting the most out of Megatest. - -Limiting your running jobs -~~~~~~~~~~~~~~~~~~~~~~~~~~ - -The following example will limit a test in the jobgroup "group1" to no more than 10 tests simultaneously. - -In your testconfig: - ----------------- -[test_meta] -jobgroup group1 ----------------- - -In your megatest.config: - ---------------- -[jobgroups] -group1 10 -custdes 4 ---------------- - -Debugging Tricks ----------------- - -Examining The Environment -~~~~~~~~~~~~~~~~~~~~~~~~~ - -Test Control Panel - xterm -^^^^^^^^^^^^^^^^^^^^^^^^^^ - -From the dashboard click on a test PASS/FAIL button. This brings up a test control panel. Aproximately near the center left of the -window there is a button "Start Xterm". Push this to get an xterm with the full context and environment loaded for that test. You can run -scripts or ezsteps by copying from the testconfig (hint, load up the testconfig in a separate gvim or emacs window). This is the easiest way -to debug your tests. - -During Config File Processing -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -It is often helpful to know the content of variables in various -contexts as Megatest does the actions needed to run your tests. A handy technique is to force the startup of an xterm in the context being examined. - -For example, if an item list is not being generated as expected you -can inject the startup of an xterm as if it were an item: - -.Original items table ------------------ -[items] -CELLNAME [system getcellname.sh] ------------------ - -.Items table modified for debug ------------------ -[items] -DEBUG [system xterm] -CELLNAME [system getcellnames.sh] ------------------ - -When this test is run an xterm will pop up. In that xterm the -environment is exactly that in which the script "getcellnames.sh" -would run. You can now debug the script to find out why it isn't -working as expected. - -Organising Your Tests and Tasks -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -The default location "tests" for storing tests can be extended by -adding to your tests-paths section. - ----------------------------- -[misc] -parent #{shell dirname $(readlink -f .)} - -[tests-paths] -1 #{get misc parent}/simplerun/tests ----------------------------- - -The above example shows how you can use addition sections in your -config file to do complex processing. By putting results of relatively -slow operations into variables the processing of your configs can be -kept fast. - -Alternative Method for Running your Job Script -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -.Directly running job in testconfig -------------------- -[setup] -runscript main.csh -------------------- - -The runscript method is essentially a brute force way to run scripts where the -user is responsible for setting STATE and STATUS and managing the details of running a test. - -Debugging Server Problems -~~~~~~~~~~~~~~~~~~~~~~~~~ - -Some handy Unix commands to track down issues with servers not -communicating with your test manager processes. Please put in tickets -at https://www.kiatoa.com/fossils/megatest if you have problems with -servers getting stuck. - ----------------- -sudo lsof -i -sudo netstat -lptu -sudo netstat -tulpn +How To Do Things +---------------- + +Process Runs +~~~~~~~~~~~~ + +Remove Runs +^^^^^^^^^^^ + +From the dashboard click on the button (PASS/FAIL...) for one of the tests. From the test control panel that +comes up push the clean test button. The command field will be prefilled with a template command for removing +that test. You can edit the command, for example change the argument to -testpatt to "%" to remove all tests. + +.Remove the test diskperf and all it's items +---------------- +megatest -remove-runs -target ubuntu/nfs/none -runname ww28.1a -testpatt diskperf/% -v +---------------- + +.Remove all tests for all runs and all targets +---------------- +megatest -remove-runs -target %/%/% -runname % -testpatt % -v +---------------- + +Archive Runs +^^^^^^^^^^^^ + +Megatest supports using the bup backup tool (https://bup.github.io/) to archive your tests for efficient storage +and retrieval. Archived data can be rapidly retrieved if needed. The metadata for the run (PASS/FAIL status, run +durations, time stamps etc.) are all preserved in the megatest database. + +For setup information see the Archiving topic in the reference section of this manual. + +To Archive +++++++++++ + +Hint: use the test control panel to create a template command by pushing the "Archive Tests" button. + +.Archive a full run +---------------- +megatest -target ubuntu/nfs/none -runname ww28.1a -archive save-remove -testpatt % +---------------- + +To Restore +++++++++++ + +.Retrieve a single test +---------------- +megatest -target ubuntu/nfs/none -runname ww28.1a -archive restore -testpatt diskperf/% +---------------- + +Hint: You can browse the archive using bup commands directly. + +---------------- +bup -d /path/to/bup/archive ftp +---------------- + +Pass Data from Test to Test +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.To save the data call archive save within your test: +---------------- +megatest -archive save +---------------- + +.To retrieve the data call archive get using patterns as needed +---------------- +# Put the retrieved data into /tmp +DESTPATH=/tmp/$USER/$MT_TARGET/$MT_RUN_NAME/$MT_TESTNAME/$MT_ITEMPATH/my_data +mkdir -p $DESTPATH +megatest -archive get -runname % -dest $DESTPATH +---------------- + + +Submit jobs to Host Types based on Test Name +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.In megatest.config +------------------------ +[host-types] +general ssh #{getbgesthost general} +nbgeneral nbjob run JOBCOMMAND -log $MT_LINKTREE/$MT_TARGET/$MT_RUNNAME.$MT_TESTNAME-$MT_ITEM_PATH.lgo + +[hosts] +general cubian xena + +[launchers] +envsetup general +xor/%/n 4C16G +% nbgeneral + +[jobtools] +launcher bsub +# if defined and not "no" flexi-launcher will bypass launcher unless there is no +# match. +flexi-launcher yes +------------------------ + +Tricks +------ + +This section is a compendium of a various useful tricks for debugging, +configuring and generally getting the most out of Megatest. + +Limiting your running jobs +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The following example will limit a test in the jobgroup "group1" to no more than 10 tests simultaneously. + +In your testconfig: + +---------------- +[test_meta] +jobgroup group1 +---------------- + +In your megatest.config: + +--------------- +[jobgroups] +group1 10 +custdes 4 +--------------- + +Debugging Tricks +---------------- + +Examining The Environment +~~~~~~~~~~~~~~~~~~~~~~~~~ + +Test Control Panel - xterm +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +From the dashboard click on a test PASS/FAIL button. This brings up a test control panel. Aproximately near the center left of the +window there is a button "Start Xterm". Push this to get an xterm with the full context and environment loaded for that test. You can run +scripts or ezsteps by copying from the testconfig (hint, load up the testconfig in a separate gvim or emacs window). This is the easiest way +to debug your tests. + +During Config File Processing +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +It is often helpful to know the content of variables in various +contexts as Megatest does the actions needed to run your tests. A handy technique is to force the startup of an xterm in the context being examined. + +For example, if an item list is not being generated as expected you +can inject the startup of an xterm as if it were an item: + +.Original items table +----------------- +[items] +CELLNAME [system getcellname.sh] +----------------- + +.Items table modified for debug +----------------- +[items] +DEBUG [system xterm] +CELLNAME [system getcellnames.sh] +----------------- + +When this test is run an xterm will pop up. In that xterm the +environment is exactly that in which the script "getcellnames.sh" +would run. You can now debug the script to find out why it isn't +working as expected. + +Organising Your Tests and Tasks +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The default location "tests" for storing tests can be extended by +adding to your tests-paths section. + +---------------------------- +[misc] +parent #{shell dirname $(readlink -f .)} + +[tests-paths] +1 #{get misc parent}/simplerun/tests +---------------------------- + +The above example shows how you can use addition sections in your +config file to do complex processing. By putting results of relatively +slow operations into variables the processing of your configs can be +kept fast. + +Alternative Method for Running your Job Script +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.Directly running job in testconfig +------------------- +[setup] +runscript main.csh +------------------- + +The runscript method is essentially a brute force way to run scripts where the +user is responsible for setting STATE and STATUS and managing the details of running a test. + +Debugging Server Problems +~~~~~~~~~~~~~~~~~~~~~~~~~ + +Some handy Unix commands to track down issues with servers not +communicating with your test manager processes. Please put in tickets +at https://www.kiatoa.com/fossils/megatest if you have problems with +servers getting stuck. + +---------------- +sudo lsof -i +sudo netstat -lptu +sudo netstat -tulpn ---------------- Index: docs/manual/megatest_manual.html ================================================================== --- docs/manual/megatest_manual.html +++ docs/manual/megatest_manual.html @@ -900,10 +900,69 @@ sqlite3 database. Megatest has been used with the Intel Netbatch and lsf (also known as openlava) batch systems and it should be straightforward to use it with other similar systems.

+ +
+

Overview

+
+
+

Stand-alone Megatest Area

+

A single, stand-alone, Megatest based testsuite or "area" is +sufficient for most validation, automation and build problems.

+
+
+Static +
+
+

Megatest is designed as a distributed or decoupled system. This means +you can run the areas stand-alone with no additional +infrastructure. I.e. there are no databases, web servers or other +centralized resources needed. However as your needs grow you can +integrate multiple areas into a bigger system.

+
+

Component Descriptions

+
    +
  1. +

    +Multi-area dashboard and xterm. A gui (the dashboard) is usually the + best option for controlling and launching runs but all operations + can also be done from the commandline. Note: The not yet released + multi-area dashboard replaces the old dashboard for browsing and + controlling runs but for managing a single area the old dashboard + works very well. +

    +
  2. +
  3. +

    +Area/testsuite. This is your testsuite or automation definition and + consists of the information in megatest.config, runconfigs.config + and your testconfigs along with any custom scripting that can’t be + done with the native Megatest features. +

    +
  4. +
  5. +

    +If your testsuite or build automation is too large to run on a + single instance you can distribute your jobs into a compute server + pool. The only current requirements are password-less ssh access and + a network filesystem. +

    +
  6. +
+
+
+
+

Full System Architecture

+
+
+Static +
+
+
+

Installation

@@ -1393,10 +1452,26 @@
bup -d /path/to/bup/archive ftp
+ +
+

Pass Data from Test to Test

+
+
To save the data call archive save within your test:
+
+
megatest -archive save
+
+
+
To retrieve the data call archive get using patterns as needed
+
+
# Put the retrieved data into /tmp
+DESTPATH=/tmp/$USER/$MT_TARGET/$MT_RUN_NAME/$MT_TESTNAME/$MT_ITEMPATH/my_data
+mkdir -p $DESTPATH
+megatest -archive get -runname % -dest $DESTPATH
+

Submit jobs to Host Types based on Test Name

In megatest.config
@@ -1973,10 +2048,32 @@ A x y B 1 2 # Yields x/1 y/2
+
+
Or use files
+
+
[itemopts]
+slash path/to/file/with/items
+# or
+space path/to/file/with/items
+
+
+
File format for / delimited
+
+
key1/key2/key3
+val1/val2/val2
+...
+
+
+
File format for space delimited
+
+
key1 key2 key3
+val1 val2 val2
+...
+

Requirements section

Header
@@ -2307,10 +2404,114 @@
Propagate environment to next step
$MT_MEGATEST -env2file .ezsteps/${stepname}
+
+
+

Scripts

+
+
Specifying scripts inline (best used for only simple scripts)
+
+
[scripts]
+loaddb #!/bin/bash
+  sqlite3 $1 <<EOF
+  .mode tabs
+  .import $2 data
+  .q
+  EOF
+
+

The above snippet results in the creation of an executable script +called "loaddb" in the test directory. NOTE: every line in the script +must be prefixed with the exact same number of spaces. Lines beginning +with a # will not work as expected. Currently you cannot indent +intermediate lines.

+
+
Full example with ezsteps, logpro rules, scripts etc.
+
+
# You can include a common file
+#
+[include #{getenv MT_RUN_AREA_HOME}/global-testconfig.inc]
+
+# Use "var" for a scratch pad
+#
+[var]
+dumpsql select * from data;
+sepstr .....................................
+
+# NOT IMPLEMENTED YET!
+#
+[ezsteps-addendum]
+prescript something.sh
+postscript something2.sh
+
+# Add additional steps here. Format is "stepname script"
+[ezsteps]
+importdb loaddb prod.db prod.sql
+dumpprod dumpdata prod.db "#{get var dumpsql}"
+diff (echo "prod#{get var sepstr}test";diff --side-by-side \
+     dumpprod.log reference.log ;echo DIFFDONE)
+
+[scripts]
+loaddb #!/bin/bash
+  sqlite3 $1 <<EOF
+  .mode tabs
+  .import $2 data
+  .q
+  EOF
+
+dumpdata #!/bin/bash
+  sqlite3 $1 <<EOF
+  .separator ,
+  $2
+  .q
+  EOF
+
+# Test requirements are specified here
+[requirements]
+waiton setup
+priority 0
+
+# Iteration for your test is controlled by the items section
+# The complicated if is needed to allow processing of the config for the dashboard when there are no actual runs.
+[items]
+THINGNAME [system generatethings.sh | sort -u]
+
+# Logpro rules for each step can be captured here in the testconfig
+# note: The ;; after the stepname and the leading whitespace are required
+#
+[logpro]
+inputdb ;;
+  (expect:ignore   in "LogFileBody"  < 99 "Ignore error in comments"      #/^\/\/.*error/)
+  (expect:warning  in "LogFileBody"  = 0 "Any warning"                    #/warn/)
+  (expect:required in "LogFileBody"  > 0 "Some data found"                #/^[a-z]{3,4}[0-9]+_r.*/)
+
+diff ;;
+  (expect:ignore   in "LogFileBody"  < 99 "Ignore error in comments"      #/^\/\/.*error/)
+  (expect:warning  in "LogFileBody"  = 0 "Any warning"                    #/warn/)
+  (expect:error    in "LogFileBody"  = 0 "< or > indicate missing entry"  (list #/(<|>)/   #/error/i))
+  (expect:error    in "LogFileBody"  = 0 "Difference in data"             (list #/\s+\|\s+/ #/error/i))
+  (expect:required in "LogFileBody"  > 0 "DIFFDONE Marker found"          #/DIFFDONE/)
+  (expect:required in "LogFileBody"  > 0 "Some things found"              #/^[a-z]{3,4}[0-9]+_r.*/)
+
+# NOT IMPLEMENTED YET!
+#
+## Also: enhance logpro to take list of command files: file1,file2...
+[waivers]
+createprod{target=%78/%/%/%} ;;
+  (disable:required "DIFFDONE Marker found")
+  (disable:error    "Some error")
+  (expect:waive  in "LogFileBody" < 99 "Waive if failed due to version" #/\w+3\.6.*/)
+
+# test_meta is a section for storing additional data on your test
+[test_meta]
+author matt
+owner  matt
+description Compare things
+tags tagone,tagtwo
+reviewed never
+

Triggers

In your testconfig or megatest.config triggers can be specified

@@ -2990,10 +3191,10 @@

ADDED docs/manual/overview.txt Index: docs/manual/overview.txt ================================================================== --- /dev/null +++ docs/manual/overview.txt @@ -0,0 +1,43 @@ + +Overview +-------- + +Stand-alone Megatest Area +~~~~~~~~~~~~~~~~~~~~~~~~~ + +A single, stand-alone, Megatest based testsuite or "area" is +sufficient for most validation, automation and build problems. + +image::megatest-stand-alone-area.png[Static,300] + +Megatest is designed as a distributed or decoupled system. This means +you can run the areas stand-alone with no additional +infrastructure. I.e. there are no databases, web servers or other +centralized resources needed. However as your needs grow you can +integrate multiple areas into a bigger system. + +Component Descriptions +^^^^^^^^^^^^^^^^^^^^^^ + +. Multi-area dashboard and xterm. A gui (the dashboard) is usually the + best option for controlling and launching runs but all operations + can also be done from the commandline. Note: The not yet released + multi-area dashboard replaces the old dashboard for browsing and + controlling runs but for managing a single area the old dashboard + works very well. + +. Area/testsuite. This is your testsuite or automation definition and + consists of the information in megatest.config, runconfigs.config + and your testconfigs along with any custom scripting that can't be + done with the native Megatest features. + +. If your testsuite or build automation is too large to run on a + single instance you can distribute your jobs into a compute server + pool. The only current requirements are password-less ssh access and + a network filesystem. + +Full System Architecture +~~~~~~~~~~~~~~~~~~~~~~~~ + +image::megatest-system-architecture.png[Static,300] + Index: docs/manual/reference.txt ================================================================== --- docs/manual/reference.txt +++ docs/manual/reference.txt @@ -317,10 +317,31 @@ B 1 2 # Yields x/1 y/2 ------------------ +.Or use files +------------------ +[itemopts] +slash path/to/file/with/items +# or +space path/to/file/with/items +------------------ + +.File format for / delimited +------------------ +key1/key2/key3 +val1/val2/val2 +... +------------------ + +.File format for space delimited +------------------ +key1 key2 key3 +val1 val2 val2 +... +------------------ Requirements section ~~~~~~~~~~~~~~~~~~~~ .Header @@ -464,14 +485,12 @@ Complex mapping example ~~~~~~~~~~~~~~~~~~~~~~~ - // image::itemmap.png[] image::complex-itemmap.png[] - We accomplish this by configuring the testconfigs of our tests C D and E as follows: .Testconfig for Test E has ---------------------- @@ -514,14 +533,10 @@ itemstable ~~~~~~~~~~ An alternative to defining items is the itemstable section. This lets you define the itempath in a table format rather than specifying components and relying on getting all permutations of those components. - - - - Dynamic Flow Dependency Tree ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .Autogeneration waiton list for dynamic flow dependency trees @@ -652,10 +667,115 @@ .Propagate environment to next step ---------------------------- $MT_MEGATEST -env2file .ezsteps/${stepname} ---------------------------- +Scripts +~~~~~~~ + +.Specifying scripts inline (best used for only simple scripts) +---------------------------- +[scripts] +loaddb #!/bin/bash + sqlite3 $1 < 0 "Some data found" #/^[a-z]{3,4}[0-9]+_r.*/) + +diff ;; + (expect:ignore in "LogFileBody" < 99 "Ignore error in comments" #/^\/\/.*error/) + (expect:warning in "LogFileBody" = 0 "Any warning" #/warn/) + (expect:error in "LogFileBody" = 0 "< or > indicate missing entry" (list #/(<|>)/ #/error/i)) + (expect:error in "LogFileBody" = 0 "Difference in data" (list #/\s+\|\s+/ #/error/i)) + (expect:required in "LogFileBody" > 0 "DIFFDONE Marker found" #/DIFFDONE/) + (expect:required in "LogFileBody" > 0 "Some things found" #/^[a-z]{3,4}[0-9]+_r.*/) + +# NOT IMPLEMENTED YET! +# +## Also: enhance logpro to take list of command files: file1,file2... +[waivers] +createprod{target=%78/%/%/%} ;; + (disable:required "DIFFDONE Marker found") + (disable:error "Some error") + (expect:waive in "LogFileBody" < 99 "Waive if failed due to version" #/\w+3\.6.*/) + +# test_meta is a section for storing additional data on your test +[test_meta] +author matt +owner matt +description Compare things +tags tagone,tagtwo +reviewed never +----------------- + Triggers ~~~~~~~~ In your testconfig or megatest.config triggers can be specified Index: genexample.scm ================================================================== --- genexample.scm +++ genexample.scm @@ -156,18 +156,18 @@ (print "") (print "[setup]") (print "# Adjust max_concurrent_jobs to limit how much you load your machines") (print "max_concurrent_jobs 50\n") (print "# This is your link path. Avoid moving it once set.") - (print "linktree " (common:real-path lntree)) + (print "linktree " lntree) ;; (common:real-path lntree)) (print "\n# Job tools are more advanced ways to control how your jobs are launched") (print "[jobtools]\nuseshell yes\nlauncher nbfake\nmaxload 1.5\n") (print "# You can override environment variables for all your tests here") (print "[env-override]\nEXAMPLE_VAR example value\n") (print "# As you run more tests you may need to add additional disks, the names are arbitrary but must be unique") - (print "[disks]\ndisk0 " (common:real-path firstd)))) - + (print "[disks]\ndisk0 " firstd))) ;; (common:real-path firstd)))) + (print "================== I'm now creating a runconfigs.config file for you with a default section. You can use this file to set variables for your tests based on the \"target\" (the combination @@ -195,11 +195,11 @@ ;; Now create a test and logpro file (print "================== You now have the basic common files for your megatest setup. Next run -\"megatest -gen-test\" to create a test. +\"megatest -create-test \" to create a test. Thank you for using Megatest. You can edit your config files and create tests in the " path " directory Index: items.scm ================================================================== --- items.scm +++ items.scm @@ -123,13 +123,63 @@ (if s (string-split s) #f)))) (if valid-values (if (member item valid-values) item #f) item))) + +;; '(("k1" "k2" "k3") +;; ("a" "b" "c") +;; ("d" "e" "f")) +;; +;; => '((("k1" "a")("k2" "b")("k3" "c")) +;; (("k1" "d")("k2" "e")("k3" "f"))) +;; +(define (items:first-row-intersperse data) + (if (< (length data) 2) + '() + (let ((header (car data)) + (rows (cdr data))) + (map (lambda (row) + (map list header row)) + rows)))) + +;; k1/k2/k3 +;; a/b/c +;; d/e/f +;; => '(("k1" "k2" "k3") +;; ("a" "b" "c") +;; ("d" "e" "f")) +;; +;; => '((("k1" "a")("k2" "b")("k3" "c")) +;; (("k1" "d")("k2" "e")("k3" "f"))) +;; +(define (items:read-items-file fname ftype) ;; 'sxml 'slash 'space + (if (and fname (file-exists? fname)) + (items:first-row-intersperse (case ftype + ((slash space) + (let ((splitter (case ftype + ((slash) (lambda (x)(string-split x "/"))) + (else string-split)))) + (debug:print 0 *default-log-port* "Reading " fname " of type " ftype) + (with-input-from-file fname + (lambda () + (let loop ((inl (read-line)) + (res '())) + (if (eof-object? inl) + res + (loop (read-line)(cons (splitter inl) res)))))))) + ((sxml)(with-input-from-file fname read)) + (else (debug:print 0 *default-log-port* "items file type " ftype " not recognised")))) + (begin + (if fname (debug:print 0 *default-log-port* "no items file " fname " found")) + '()))) (define (items:get-items-from-config tconfig) - (let* ((have-items (hash-table-ref/default tconfig "items" #f)) + (let* ((slashf (configf:lookup tconfig "itemopts" "slash")) ;; a/b/c\nd/e/f\n ... + (sxmlf (configf:lookup tconfig "itemopts" "sxml")) ;; '(("a" "b" "c")("d" "e" "f") ...) + (spacef (configf:lookup tconfig "itemopts" "space")) ;; a b c\nd e f\n ... + (have-items (hash-table-ref/default tconfig "items" #f)) (have-itable (hash-table-ref/default tconfig "itemstable" #f)) (items (hash-table-ref/default tconfig "items" '())) (itemstable (hash-table-ref/default tconfig "itemstable" '()))) (debug:print 5 *default-log-port* "items: " items " itemstable: " itemstable) (set! items (map (lambda (item) @@ -142,14 +192,21 @@ (list (car item)((cadr item))) ;; evaluate the proc item)) itemstable)) (if (and have-items (null? items)) (debug:print 0 *default-log-port* "WARNING:[items] section in testconfig but no entries defined")) (if (and have-itable (null? itemstable))(debug:print 0 *default-log-port* "WARNNG:[itemstable] section in testconfig but no entries defined")) - (if (or (not (null? items))(not (null? itemstable))) + (if (or (not (null? items)) + (not (null? itemstable)) + slashf + sxmlf + spacef) (append (item-assoc->item-list items) - (item-table->item-list itemstable)) + (item-table->item-list itemstable) + (items:read-items-file slashf 'slash) + (items:read-items-file sxmlf 'sxml) + (items:read-items-file spacef 'space)) '(())))) ;; (pp (item-assoc->item-list itemdat)) Index: launch.scm ================================================================== --- launch.scm +++ launch.scm @@ -93,16 +93,22 @@ ;; (let ((info (cadr ezstep))) ;; (if (proc? info) "" info))) ;; (stepproc (let ((info (cadr ezstep))) ;; (if (proc? info) info #f))) (stepparts (string-match (regexp "^(\\{([^\\}\\{]*)\\}\\s*|)(.*)$") stepinfo)) - (stepparams (list-ref stepparts 2)) ;; for future use, {VAR=1,2,3}, run step for each + (stepparams (if (and (list? stepparts) + (> (length stepparts) 1)) + (list-ref stepparts 2) + #f)) ;; for future use, {VAR=1,2,3}, run step for each (paramparts (if (string? stepparams) (map (lambda (x)(string-split x "=")) (string-split-fields "[^;]*=[^;]*" stepparams)) '())) (subrun (alist-ref "subrun" paramparts equal?)) - (stepcmd (list-ref stepparts 3)) + (stepcmd (if (and (list? stepparts) + (> (length stepparts) 2)) + (list-ref stepparts 3) + (conc "# error, no command for step "stepname))) (script "") ; "#!/bin/bash\n") ;; yep, we depend on bin/bash FIXME!!!\ (logpro-file (conc stepname ".logpro")) (html-file (conc stepname ".html")) (dat-file (conc stepname ".dat")) (tconfig-logpro (configf:lookup testconfig "logpro" stepname)) @@ -617,11 +623,19 @@ ) ;; (set-signal-handler! signal/stop sighand) ;; Do not run the test if it is REMOVING, RUNNING, KILLREQ or REMOTEHOSTSTART, ;; Mark the test as REMOTEHOSTSTART *IMMEDIATELY* ;; - (let* ((test-info (rmt:get-test-info-by-id run-id test-id)) + (let* ((test-info (let loop ((tries 0)) + (let ((tinfo (rmt:get-test-info-by-id run-id test-id))) + (if tinfo + tinfo + (if (> tries 5) + #f + (begin + (thread-sleep! (+ 1 (* tries 10))) + (loop (+ tries 1)))))))) (test-host (if test-info (db:test-get-host test-info) (begin (debug:print 0 *default-log-port* "ERROR: failed to find a record for test-id " test-id ", exiting.") (exit)))) @@ -782,13 +796,29 @@ ;; any previous runs ;; (db:test-remove-steps db run-id testname itemdat) ;; now is also a good time to write the .testconfig file (let* ((tconfig-fname (conc work-area "/.testconfig")) (tconfig-tmpfile (conc tconfig-fname ".tmp")) - (tconfig (tests:get-testconfig test-name item-path tconfigreg #t force-create: #t))) ;; 'return-procs))) + (tconfig (tests:get-testconfig test-name item-path tconfigreg #t force-create: #t)) ;; 'return-procs))) + (scripts (configf:get-section tconfig "scripts"))) + ;; create .testconfig file (configf:write-alist tconfig tconfig-tmpfile) - (file-move tconfig-tmpfile tconfig-fname #t)) + (file-move tconfig-tmpfile tconfig-fname #t) + (delete-file* ".final-status") + + ;; extract scripts from testconfig and write them to files in test run dir + (for-each + (lambda (scriptdat) + (match scriptdat + ((name content) + (with-output-to-file name + (lambda () + (print content) + (change-file-mode name (bitwise-ior perm/irwxg perm/irwxu))))) + (else + (debug:print-info 0 "Invalid script definiton found in [scripts] section of testconfig. \"" scriptdat "\"")))) + scripts)) ;; (let* ((m (make-mutex)) (kill-job? #f) (exit-info (make-launch:einf pid: #t exit-status: #t exit-code: #t rollup-status: 0)) ;; pid exit-status exit-code (i.e. process was successfully run) rollup-status (job-thread #f) @@ -832,10 +862,14 @@ ((eq? (launch:einf-rollup-status exit-info) 4) "WAIVED") ((eq? (launch:einf-rollup-status exit-info) 5) "ABORT") ((eq? (launch:einf-rollup-status exit-info) 6) "SKIP") (else "FAIL")))) ;; (db:test-get-status testinfo))) (debug:print-info 1 *default-log-port* "Test exited in state=" (db:test-get-state testinfo) ", setting state/status based on exit code of " (launch:einf-exit-status exit-info) " and rollup-status of " (launch:einf-rollup-status exit-info)) + + ;; Leave a .final-status file for each sub-test + (tests:save-final-status run-id test-id) + (tests:test-set-status! run-id test-id new-state new-status (args:get-arg "-m") #f) @@ -842,12 +876,14 @@ ;; need to update the top test record if PASS or FAIL and this is a subtest ;; NO NEED TO CALL set-state-status-and-roll-up-items HERE, THIS IS DONE IN set-state-status-and-roll-up-items called by tests:test-set-status! )) ;; for automated creation of the rollup html file this is a good place... (if (not (equal? item-path "")) - (tests:summarize-items run-id test-id test-name #f)) + (tests:summarize-items run-id test-id test-name #f)) (tests:summarize-test run-id test-id) ;; don't force - just update if no + ;; Leave a .final-status file for the top level test + (tests:save-final-status run-id test-id) (rmt:update-run-stats run-id (rmt:get-raw-run-stats run-id))) (mutex-unlock! m) (launch:end-of-run-check run-id ) (debug:print 2 *default-log-port* "Output from running " fullrunscript ", pid " (launch:einf-pid exit-info) " in work area " work-area ":\n====\n exit code " (launch:einf-exit-code exit-info) "\n" "====\n") @@ -873,17 +909,25 @@ (running-cnt (rmt:get-count-tests-running-for-run-id run-id)) (all-test-launched (rmt:get-var (conc "lunch-complete-" run-id))) (current-state (rmt:get-run-state run-id)) (current-status (rmt:get-run-status run-id))) ;;get-vars run-id to query metadata table to check if all completed. if all-test-launched = yes then only not-completed-cnt = 0 means everyting is completed if no entry found in the table do nothing - (debug:print 0 *default-log-port* "rollup run state/status") + (debug:print 0 *default-log-port* "Running test cnt :" running-cnt) (rmt:set-state-status-and-roll-up-run run-id current-state current-status) - + (runs:update-junit-test-reporter-xml run-id) (cond ((and all-test-launched (eq? not-completed-cnt 0) (equal? all-test-launched "yes" )) - (debug:print 0 *default-log-port* "look for post hook.") - (runs:run-post-hook run-id)) + (if (and (equal? (rmt:get-var (conc "end-of-run-" run-id)) "no") (common:simple-lock (conc "endOfRun" run-id))) + (begin + (debug:print 4 *default-log-port* "look for post hook. currseconds: " (current-seconds) " EOR " (rmt:get-var (conc "end-of-run-" run-id))) + (debug:print 0 *default-log-port* "End of Run Detected.") + (rmt:set-var (conc "end-of-run-" run-id) "yes") + ;(thread-sleep! 10) + (runs:run-post-hook run-id) + (debug:print 4 *default-log-port* "currseconds: " (current-seconds)" eor: " (rmt:get-var (conc "end-of-run-" run-id))) + (common:simple-unlock (conc "endOfRun" run-id))) + (debug:print 0 *default-log-port* "End of Run Detected but not running post hook. This should happen when eor is set to yes. This will happen only when 2 tests exit at smae time. eor= " (rmt:get-var (conc "end-of-run-" run-id))))) ((> running-cnt 3) (debug:print 0 *default-log-port* "There are " running-cnt " tests running." )) ((> running-cnt 0) (debug:print 0 *default-log-port* "running cnt > 0 but <= 3 kill-running-tests-if-dead" ) (let ((kill-cnt (launch:kill-tests-if-dead run-id))) @@ -899,18 +943,18 @@ (debug:print 0 *default-log-port* "test " test-name "/" item-path " not completed") (if (not (null? tal)) (loop (car tal) (cdr tal))))))))))) (define (launch:is-test-alive host pid) -(if (and host pid (not (equal? host "n/a"))) -(let* ((cmd (conc "ssh " host " pstree -A " pid)) - (output (with-input-from-pipe cmd read-lines))) - (print "cmd: " cmd "\n op: " output ) - (if(eq? (length output) 0) - #f - #t)) -#t)) + (if (and host pid (not (equal? host "n/a"))) + (let* ((cmd (conc "ssh " host " pstree -A " pid)) + (output (with-input-from-pipe cmd read-lines))) + (debug:print 2 *default-log-port* "Running " cmd " received " output) + (if (eq? (length output) 0) + #f + #t)) + #t)) (define (launch:kill-tests-if-dead run-id) (let* ((running-tests (rmt:get-tests-for-run run-id "%" `("RUNNING" "LAUNCHED" "REMOTEHOSTSTART") `() #f #f #f #f #f #f #f #f))) (let loop ((running-test (car running-tests)) (tal (cdr running-tests)) @@ -1029,11 +1073,11 @@ (if (and (eq? *configstatus* 'fulldata) *toppath* (not force-reread)) ;; no need to reprocess *toppath* ;; return toppath (let* ((use-cache (common:use-cache?)) ;; BB- use-cache checks *configdat* for use-cache setting. We do not have *configdat*. Bootstrapping problem here. - (toppath (or *toppath* areapath (getenv "MT_RUN_AREA_HOME"))) ;; preserve toppath + (toppath (common:get-toppath areapath)) (target (common:args-get-target)) (sections (if target (list "default" target) #f)) ;; for runconfigs (mtconfig (or (args:get-arg "-config") "megatest.config")) ;; allow overriding megatest.config (cachefiles (launch:get-cache-file-paths areapath toppath target mtconfig)) ;; checking for null cachefiles should not be necessary, I was seeing error car of '(), might be a chicken bug or a red herring ... @@ -1096,11 +1140,12 @@ (begin (debug:print-error 0 *default-log-port* "you are not in a megatest area!") (exit 1))) (setenv "MT_RUN_AREA_HOME" *toppath*) ;; the seed read is done, now read runconfigs, cache it then read megatest.config one more time and cache it - (let* ((keys (rmt:get-keys)) + (let* ((keys (common:list-or-null (rmt:get-keys) + message: "Failed to retrieve keys in launch.scm. Please report this to the developers.")) (key-vals (keys:target->keyval keys target)) (linktree (common:get-linktree)) ;; (or (getenv "MT_LINKTREE")(if *configdat* (configf:lookup *configdat* "setup" "linktree") #f))) ; (if *configdat* ; (configf:lookup *configdat* "setup" "linktree") ; (conc *toppath* "/lt")))) @@ -1224,10 +1269,11 @@ (let ((cfname (args:get-arg "-append-config"))) (if (and cfname (file-read-access? cfname)) (read-config cfname *configdat* #t))) ;; values are added to the hash, no need to do anything special. *toppath*))) + (define (get-best-disk confdat testconfig) (let* ((disks (or (and testconfig (hash-table-ref/default testconfig "disks" #f)) (hash-table-ref/default confdat "disks" #f))) (minspace (let ((m (configf:lookup confdat "setup" "minspace"))) @@ -1234,11 +1280,11 @@ (string->number (or m "10000"))))) (if disks (let ((res (common:get-disk-with-most-free-space disks minspace))) ;; min size of 1000, seems tad dumb (if res (cdr res) - (begin + (begin ;; DEAD CODE PATH - REVISIT! ;; (if (common:low-noise-print 20 "No valid disks or no disk with enough space") ;; (debug:print-error 0 *default-log-port* "No valid disks found in megatest.config. Please add some to your [disks] section and ensure the directory exists and has enough space!\n You can change minspace in the [setup] section of megatest.config. Current setting is: " minspace)) ;;(exit 1) (if (null? disks) (cons 1 (conc *toppath* "/runs")) @@ -1247,12 +1293,21 @@ (let ((result (handle-exceptions exn #f (create-directory (cadr head) #t)))) (if result result (if (null? tail) (cons 1 (conc *toppath* "/runs")) - (loop (car tail) (cdr tail)))))))))))))) ;; the code creates the necessary directories if it does not exist and returns the path. - + (loop (car tail) (cdr tail))))))))))) + ;; no disks definition - use mtrah/runs, fall back to currdir/runs + (let* ((toppath (or *toppath* + (common:get-toppath *toppath*) + (begin + (debug:print-error 0 *default-log-port* "Creating runs dir in current directory, this is probably not what you wanted. Please check your setup.") + (current-directory)))) + (runsdir (conc toppath "/runs"))) + (if (not (file-exists? runsdir))(create-directory runsdir)) + runsdir) + ))) ;; the code creates the necessary directories if it does not exist and returns the path. (define (launch:test-copy test-src-path test-path) (let* ((ovrcmd (let ((cmd (configf:lookup *configdat* "setup" "testcopycmd"))) (if cmd ;; substitute the TEST_SRC_PATH and TEST_TARG_PATH @@ -1352,29 +1407,29 @@ (let ((iterated-parent (pathname-directory (conc lnkpath "/" item-path)))) (debug:print-info 2 *default-log-port* "Creating iterated parent " iterated-parent) (handle-exceptions exn (begin - (debug:print-error 0 *default-log-port* " Failed to create directory " iterated-parent ((condition-property-accessor 'exn 'message) exn) ", exiting") - (exit 1)) + (debug:print-error 0 *default-log-port* " Failed to create directory " iterated-parent ((condition-property-accessor 'exn 'message) exn) ", continuing but link tree may be corrupted") + #;(exit 1)) (create-directory iterated-parent #t)))) (if (symbolic-link? lnkpath) (handle-exceptions exn (begin - (debug:print-error 0 *default-log-port* " Failed to remove symlink " lnkpath ((condition-property-accessor 'exn 'message) exn) ", exiting") - (exit 1)) + (debug:print-error 0 *default-log-port* " Failed to remove symlink " lnkpath ((condition-property-accessor 'exn 'message) exn) ", continuing but link tree may be corrupted.") + #;(exit 1)) (delete-file lnkpath))) (if (not (or (common:file-exists? lnkpath) (symbolic-link? lnkpath))) (handle-exceptions exn (begin - (debug:print-error 0 *default-log-port* " Failed to create symlink " lnkpath ((condition-property-accessor 'exn 'message) exn) ", exiting") - (exit 1)) + (debug:print-error 0 *default-log-port* " Failed to create symlink " lnkpath ((condition-property-accessor 'exn 'message) exn) ", continuing but link tree may be corrupted.") + #;(exit 1)) (create-symbolic-link toptest-path lnkpath))) ;; NB - This was not working right - some top tests are not getting the path set!!! ;; ;; Do the setting of this record after the paths are created so that the shortdir can Index: megatest-version.scm ================================================================== --- megatest-version.scm +++ megatest-version.scm @@ -18,6 +18,6 @@ ;; Always use two or four digit decimal ;; 1.01, 1.02...1.10,1.11,1.1101 ... 1.99,2.00.. (declare (unit megatest-version)) -(define megatest-version 1.6543) +(define megatest-version 1.6558) Index: megatest.config ================================================================== --- megatest.config +++ megatest.config @@ -48,11 +48,11 @@ # snazy selector=QUICKPATT/ [nopurpose] [access] -ext matt:admin mattw:owner +ext #{getenv USER}:admin matt:admin mattw:owner [accesstypes] admin run rerun resume remove set-ss rerun-clean owner run rerun resume remove rerun-all badguy set-ss Index: megatest.scm ================================================================== --- megatest.scm +++ megatest.scm @@ -226,11 +226,12 @@ will substitute %s for the sheet name in generating multiple sheets) -o : output file for refdb2dat (defaults to stdout) -archive cmd : archive runs specified by selectors to one of disks specified in the [archive-disks] section. - cmd: keep-html, restore, save, save-remove + cmd: keep-html, restore, save, save-remove, get (use + -dest to set destination), -include path1,path2... to get or save specific files -generate-html : create a simple html dashboard for browsing your runs -generate-html-structure : create a top level html veiw to list targets/runs and a Run view within each run directory. -list-run-time : list time requered to complete runs. It supports following switches -run-patt -target-patt -dumpmode -list-test-time : list time requered to complete each test in a run. It following following arguments @@ -299,10 +300,11 @@ "-rerun" "-days" "-rename-run" "-to" + "-dest" ;; values and messages ":category" ":variable" ":value" ":expected" @@ -329,13 +331,18 @@ ;; move runs stuff here "-remove-keep" "-set-run-status" "-age" + + ;; archive "-archive" "-actions" "-precmd" + "-include" + "-exclude-rx" + "-exclude-rx-from" "-debug" ;; for *verbosity* > 2 "-create-test" "-override-timeout" "-test-files" ;; -test-paths is for listing all @@ -505,13 +512,19 @@ (if start-watchdog (thread-start! *watchdog*))) ;; bracket open-output-file with code to make leading directory if it does not exist and handle exceptions -(define (open-logfile logpath) +(define (open-logfile logpath-in) (condition-case - (let* ((log-dir (or (pathname-directory logpath) "."))) + (let* ((log-dir (or (pathname-directory logpath-in) ".")) + (fname (pathname-strip-directory logpath-in)) + (logpath (if (> (string-length fname) 250) + (let ((newlogf (conc log-dir "/" (common:get-signature fname) ".log"))) + (debug:print 0 *default-log-port* "WARNING: log file " logpath-in " path too long, converted to " newlogf) + newlogf) + logpath-in))) (if (not (directory-exists? log-dir)) (system (conc "mkdir -p " log-dir))) (open-output-file logpath)) (exn () (debug:print-error 0 *default-log-port* "Could not open log file for write: "logpath) @@ -1041,23 +1054,34 @@ ;; Remove old run(s) ;;====================================================================== ;; since several actions can be specified on the command line the removal ;; is done first -(define (operate-on action #!key (mode #f)) ;; #f is "use default" +(define (operate-on action #!key (mode #f)(target-in #f)(runname-in #f)(keys-in #f)(keyvals-in #f)) ;; #f is "use default" (let* ((runrec (runs:runrec-make-record)) - (target (common:args-get-target))) + (target (or target-in (common:args-get-target))) ;; eventually get rid of the call to common:args-get-target + (runname (or runname-in + (args:get-arg "-runname"))) ;; eventually get rid of the get-arg calls + (testpatt (or (args:get-arg "-testpatt") + (and (eq? action 'archive) ;; if it is an archive command fallback to MT_TEST_NAME and MT_ITEMPATH + (common:get-full-test-name)) + (and (eq? action 'kill-runs) + "%/%") ;; I'm just guessing that this is correct :( + (common:args-get-testpatt #f) ;; (args:get-arg "-testpatt"))) + ))) ;; (cond ((not target) - (debug:print-error 0 *default-log-port* "Missing required parameter for " action ", you must specify -target or -reqtarg") + (debug:print-error 0 *default-log-port* "Missing required parameter for " + action ", you must specify -target or -reqtarg") (exit 1)) - ((not (or (args:get-arg ":runname") - (args:get-arg "-runname"))) - (debug:print-error 0 *default-log-port* "Missing required parameter for " action ", you must specify the run name pattern with -runname patt") + ((not runname) + (debug:print-error 0 *default-log-port* "Missing required parameter for " + action ", you must specify the run name pattern with -runname patt") (exit 2)) - ((not (or (args:get-arg "-testpatt") (eq? action 'kill-runs))) - (debug:print-error 0 *default-log-port* "Missing required parameter for " action ", you must specify the test pattern with -testpatt") + ((not testpatt) + (debug:print-error 0 *default-log-port* "Missing required parameter for " + action ", you must specify the test pattern with -testpatt") (exit 3)) (else (if (not (car *configinfo*)) (begin (debug:print-error 0 *default-log-port* "Attempted " action "on test(s) but run area config file not found") @@ -1066,13 +1090,13 @@ (begin ;; check for correct version, exit with message if not correct (common:exit-on-version-changed) (runs:operate-on action target - (common:args-get-runname) ;; (or (args:get-arg "-runname")(args:get-arg ":runname")) - (common:args-get-testpatt #f) ;; (args:get-arg "-testpatt") - state: (common:args-get-state) + runname + testpatt + state: (common:args-get-state) status: (common:args-get-status) new-state-status: (args:get-arg "-set-state-status") mode: mode))) (set! *didsomething* #t))))) @@ -1652,10 +1676,69 @@ ;;====================================================================== ;; full run ;;====================================================================== + +(define (handle-run-requests target runname keys keyvals need-clean) + (if (or (args:get-arg "-kill-rerun") (args:get-arg "-rerun-clean")) ;; first set states/statuses correct + ;; For rerun-clean do we or do we not support the testpatt? + (let ((states (or (configf:lookup *configdat* "validvalues" "cleanrerun-states") + "KILLREQ,KILLED,UNKNOWN,INCOMPLETE,STUCK,NOT_STARTED")) + (statuses (or (configf:lookup *configdat* "validvalues" "cleanrerun-statuses") + "FAIL,INCOMPLETE,ABORT,CHECK,DEAD,PREQ_FAIL,PREQ_DISCARDED"))) + (hash-table-set! args:arg-hash "-preclean" #t) + (runs:operate-on 'set-state-status + target + (common:args-get-runname) ;; (or (args:get-arg "-runname")(args:get-arg ":runname")) + ;; "%" ;; (common:args-get-testpatt #f) ;; (args:get-arg "-testpatt") + (common:args-get-testpatt #f) ;; (args:get-arg "-testpatt") + state: states + ;; status: statuses + new-state-status: "NOT_STARTED,n/a") + (runs:clean-cache target runname *toppath*) + (runs:operate-on 'set-state-status + target + (common:args-get-runname) ;; (or (args:get-arg "-runname")(args:get-arg ":runname")) + ;; "%" ;; (common:args-get-testpatt #f) ;; (args:get-arg "-testpatt") + (common:args-get-testpatt #f) ;; (args:get-arg "-testpatt") + ;; state: states + status: statuses + new-state-status: "NOT_STARTED,n/a"))) + ;; RERUN ALL + (if (args:get-arg "-rerun-all") ;; first set states/statuses correct + (let* ((rconfig (full-runconfigs-read))) + (hash-table-set! args:arg-hash "-preclean" #t) + (runs:operate-on 'set-state-status + target + (common:args-get-runname) ;; (or (args:get-arg "-runname")(args:get-arg ":runname")) + (common:args-get-testpatt rconfig) ;; (args:get-arg "-testpatt") + state: #f + ;; status: statuses + new-state-status: "NOT_STARTED,n/a") + (runs:clean-cache target runname *toppath*) + (runs:operate-on 'set-state-status + target + (common:args-get-runname) ;; (or (args:get-arg "-runname")(args:get-arg ":runname")) + (common:args-get-testpatt rconfig) ;; (args:get-arg "-testpatt") + ;; state: states + status: #f + new-state-status: "NOT_STARTED,n/a"))) + (let* ((config-reruns (let ((x (configf:lookup *configdat* "setup" "reruns"))) + (if x (string->number x) #f))) + (rerun-cnt (if config-reruns + config-reruns + 1))) + + (runs:run-tests target + runname + #f ;; (common:args-get-testpatt #f) + ;; (or (args:get-arg "-testpatt") + ;; "%") + user + args:arg-hash + run-count: rerun-cnt))) ;; get lock in db for full run for this directory ;; for all tests with deps ;; walk tree of tests to find head tasks ;; add head tasks to task queue @@ -1675,72 +1758,36 @@ (args:get-arg "-rerun-clean") (args:get-arg "-rerun-all") (args:get-arg "-runtests") (args:get-arg "-kill-rerun")) (let ((need-clean (or (args:get-arg "-rerun-clean") - (args:get-arg "-rerun-all")))) + (args:get-arg "-rerun-all"))) + (orig-cmdline (string-intersperse (argv) " "))) (general-run-call "-runall" "run all tests" (lambda (target runname keys keyvals) - (if (or (args:get-arg "-kill-rerun") (args:get-arg "-rerun-clean")) ;; first set states/statuses correct - ;; For rerun-clean do we or do we not support the testpatt? - (let ((states (or (configf:lookup *configdat* "validvalues" "cleanrerun-states") - "KILLREQ,KILLED,UNKNOWN,INCOMPLETE,STUCK,NOT_STARTED")) - (statuses (or (configf:lookup *configdat* "validvalues" "cleanrerun-statuses") - "FAIL,INCOMPLETE,ABORT,CHECK,DEAD,PREQ_FAIL,PREQ_DISCARDED"))) - (hash-table-set! args:arg-hash "-preclean" #t) - (runs:operate-on 'set-state-status - target - (common:args-get-runname) ;; (or (args:get-arg "-runname")(args:get-arg ":runname")) - ;; "%" ;; (common:args-get-testpatt #f) ;; (args:get-arg "-testpatt") - (common:args-get-testpatt #f) ;; (args:get-arg "-testpatt") - state: states - ;; status: statuses - new-state-status: "NOT_STARTED,n/a") - (runs:clean-cache target runname *toppath*) - (runs:operate-on 'set-state-status - target - (common:args-get-runname) ;; (or (args:get-arg "-runname")(args:get-arg ":runname")) - ;; "%" ;; (common:args-get-testpatt #f) ;; (args:get-arg "-testpatt") - (common:args-get-testpatt #f) ;; (args:get-arg "-testpatt") - ;; state: states - status: statuses - new-state-status: "NOT_STARTED,n/a"))) - ;; RERUN ALL - (if (args:get-arg "-rerun-all") ;; first set states/statuses correct - (let* ((rconfig (full-runconfigs-read))) - (hash-table-set! args:arg-hash "-preclean" #t) - (runs:operate-on 'set-state-status - target - (common:args-get-runname) ;; (or (args:get-arg "-runname")(args:get-arg ":runname")) - (common:args-get-testpatt rconfig) ;; (args:get-arg "-testpatt") - state: #f - ;; status: statuses - new-state-status: "NOT_STARTED,n/a") - (runs:clean-cache target runname *toppath*) - (runs:operate-on 'set-state-status - target - (common:args-get-runname) ;; (or (args:get-arg "-runname")(args:get-arg ":runname")) - (common:args-get-testpatt rconfig) ;; (args:get-arg "-testpatt") - ;; state: states - status: #f - new-state-status: "NOT_STARTED,n/a"))) - (let* ((config-reruns (let ((x (configf:lookup *configdat* "setup" "reruns"))) - (if x (string->number x) #f))) - (rerun-cnt (if config-reruns - config-reruns - 1))) - - (runs:run-tests target - runname - #f ;; (common:args-get-testpatt #f) - ;; (or (args:get-arg "-testpatt") - ;; "%") - user - args:arg-hash - run-count: rerun-cnt)))))) + (if (or (string-search "%" target) + (string-search "%" runname)) ;; we are being asked to re-run multiple runs + (let* ((run-specs (rmt:simple-get-runs runname #f #f target #f))) ;; list of simple-run records + (debug:print-info 0 *default-log-port* "Pattern supplied for target or runname with " + (length run-specs) " matches round. Running each in turn.") + (if (null? run-specs) + (debug:print 0 *default-log-port* "WARNING: No runs match target " target " and runname " runname)) + (for-each (lambda (spec) + (let* ((newcmdline (string-substitute + (conc "target " target) + (conc "target " (simple-run-target spec)) + (string-substitute + (conc "runname " runname) + (conc "runname " (simple-run-runname spec)) + orig-cmdline)))) + (debug:print 0 *default-log-port* "ORIG: " orig-cmdline) + (debug:print 0 *default-log-port* "NEW: " newcmdline) + (system newcmdline))) + run-specs)) + (handle-run-requests target runname keys keyvals need-clean)))))) ;;====================================================================== ;; run one test ;;====================================================================== @@ -1864,15 +1911,22 @@ ;; Archive tests ;;====================================================================== ;; Archive tests matching target, runname, and testpatt (if (args:get-arg "-archive") ;; else do a general-run-call - (general-run-call - "-archive" - "Archive" - (lambda (target runname keys keyvals) - (operate-on 'archive)))) + (begin + ;; for the archive get we need to preserve the starting dir as part of the target path + (if (and (args:get-arg "-dest") + (not (equal? (substring (args:get-arg "-dest") 0 1) "/"))) + (let ((newpath (conc (current-directory) "/" (args:get-arg "-dest")))) + (debug:print-info 1 *default-log-port* "Preserving original path to destination, was " (args:get-arg "-dest") ", now " newpath) + (hash-table-set! args:arg-hash "-dest" newpath))) + (general-run-call + "-archive" + "Archive" + (lambda (target runname keys keyvals) + (operate-on 'archive target-in: target runname-in: runname ))))) ;;====================================================================== ;; Extract a spreadsheet from the runs database ;;====================================================================== Index: mtut.scm ================================================================== --- mtut.scm +++ mtut.scm @@ -23,10 +23,11 @@ (define (toplevel-command . a) #f) (use srfi-1 posix srfi-69 readline ;; regex regex-case srfi-69 apropos json http-client directory-utils rpc typed-records;; (srfi 18) extras) srfi-19 srfi-18 extras format pkts regex regex-case (prefix dbi dbi:) + (prefix sqlite3 sqlite3:) nanomsg) (declare (uses common)) (declare (uses megatest-version)) (declare (uses margs)) @@ -230,10 +231,11 @@ ("-msg" . M) ("-start-dir" . S) ("-set-vars" . v) ("-config" . h) ("-time-out" . u) + ("-archive" . b) )) (define *switch-keys* '( ("-h" . #f) ("-help" . #f) @@ -257,11 +259,11 @@ (kill-run . "-kill-runs") (kill-rerun . "-kill-rerun") (lock . "-lock") (unlock . "-unlock") (sync . "") - (archive . "-archive") + (archive . "") (set-ss . "-set-state-status") (remove . "-remove-runs"))) ;; manually keep this list updated from the keys to ;; the case *action* near the end of this file. @@ -841,12 +843,11 @@ (contours (configf:get-section mtconf "contours")) (torun (make-hash-table)) ;; target => ( ... info ... ) (rgentargs (hash-table-keys rgconf))) ;; these are the targets registered for automatically triggering ;;(print "rgentargs: " rgentargs) - - (for-each + (for-each (lambda (runkey) (let* ((keydats (configf:get-section rgconf runkey))) (for-each (lambda (sense) ;; these are the sense rules (let* ((key (car sense)) @@ -1429,10 +1430,11 @@ (areas (configf:get-section mtconf "areas")) (contours (configf:get-section mtconf "contours")) (pkts (find-pkts pdb '(cmd) '())) (torun (make-hash-table)) ;; target => ( ... info ... ) (rgentargs (hash-table-keys rgconf))) ;; these are the targets registered for automatically triggering + (sqlite3:set-busy-handler! (dbi:db-conn pdb) (sqlite3:make-busy-timeout 10000)) (for-each (lambda (pktdat) (let* ((pkta (alist-ref 'apkt pktdat)) (action (alist-ref 'A pkta)) (cmdline (pkt->cmdline pkta)) Index: rmt.scm ================================================================== --- rmt.scm +++ rmt.scm @@ -70,11 +70,11 @@ (cond ((> attemptnum 2) (thread-sleep! 0.05)) ((> attemptnum 10) (thread-sleep! 0.5)) ((> attemptnum 20) (thread-sleep! 1))) (if (and (> attemptnum 5) (= 0 (modulo attemptnum 15))) - (begin (rmt:start-server rid) (thread-sleep! 3))) + (begin (server:run *toppath*) (thread-sleep! 3))) ;;DOT digraph megatest_state_status { ;;DOT ranksep=0; ;;DOT // rankdir=LR; @@ -253,11 +253,11 @@ (define (extras-case-11 *default-log-port* runremote cmd params attemptnum rid) ;; (mutex-unlock! *rmt-mutex*) (debug:print-info 12 *default-log-port* "rmt:send-receive, case 9") ;; (mutex-lock! *rmt-mutex*) (let* ((conninfo (remote-conndat runremote)) - (dat (case (remote-transport runremote) + (dat-in (case (remote-transport runremote) ((http) (condition-case ;; handling here has ;; caused a lot of ;; problems. However it ;; is needed to deal with ;; attemtped @@ -268,10 +268,37 @@ ((commfail)(vector #f "communications fail")) ((exn)(vector #f "other fail" (print-call-chain))))) (else (debug:print 0 *default-log-port* "ERROR: transport " (remote-transport runremote) " not supported") (exit)))) + +;; No Title +;; Error: (vector-ref) out of range +;; #(# (#("db.scm:3740: regex#regexp" #f #f) #("db.scm:3739: regex#string-substitute" #f #f) #("db.scm:3738: base64#base64-decode" #f #f) #("db.scm:3737: z3#z3:decode-buffer" #f #f) #("db.scm:3736: with-input-from-string" #f #f) #("db.scm:3741: s11n#deserialize" #f #f) #("api.scm:374: api:execute-requests" #f #f) #("api.scm:139: call-with-current-continuation" #f #f) #("api.scm:139: with-exception-handler" #f #f) #("api.scm:139: ##sys#call-with-values" #f #f) #("api.scm:158: string->symbol" #f #f) #("api.scm:160: current-milliseconds" #f #f) #("api.scm:161: dbr:dbstruct-read-only" #f #f) #("api.scm:139: k15" #f #f) #("api.scm:139: g19" #f #f) #("api.scm:142: get-call-chain" #f #f)) #("get-test-info-by-id" (1102 507299))) +;; 6 +;; +;; Call history: +;; +;; http-transport.scm:306: thread-terminate! +;; http-transport.scm:307: debug:print-info +;; common_records.scm:235: debug:debug-mode +;; rmt.scm:259: k587 +;; rmt.scm:259: g591 +;; rmt.scm:276: http-transport:server-dat-update-last-access +;; http-transport.scm:364: current-seconds +;; rmt.scm:282: debug:print-info +;; common_records.scm:235: debug:debug-mode +;; rmt.scm:283: mutex-unlock! +;; rmt.scm:287: extras-transport-succeded <-- +;; +-----------------------------------------------------------------------------+ +;; | Exit Status : 70 +;; + + (dat (if (and (vector? dat-in) ;; ... check it is a correct size + (> (vector-length dat-in) 1)) + dat-in + (vector #f (conc "communications fail (type 2), dat-in=" dat-in)))) (success (if (vector? dat) (vector-ref dat 0) #f)) (res (if (vector? dat) (vector-ref dat 1) #f))) (if (and (vector? conninfo) (< 5 (vector-length conninfo))) (http-transport:server-dat-update-last-access conninfo) ;; refresh access time (begin Index: runconfigs.config ================================================================== --- runconfigs.config +++ runconfigs.config @@ -23,11 +23,11 @@ # (add-to-list 'auto-mode-alist '("config\\'" . conf-space-mode)) # # example of a cron entry to run sync using db spec pgdb, with pgdb setting in file local.config # -[a/b/c] +[a/b/c/d] all:scheduled:sync cron= 0/5 * * * *;dbdest=pgdb;appendconf=/nfs/phoebe/disk1/home/mfs/matt/.sysmaint/local.config # quick:scheduled:sync cron= 0/5 * * * *;dbdest=pgdb;appendconf=/nfs/phoebe/disk1/home/mfs/matt/.sysmaint/local.config # fast:scheduled:sync-prepend cron= 0/1 * * * *;dbdest=pgdb;appendconf=/mfs/matt/.sysmaint/local.config [scriptinc ./gentargets.sh #{getenv USER}] Index: runs.scm ================================================================== --- runs.scm +++ runs.scm @@ -16,11 +16,11 @@ ;; along with Megatest. If not, see . ;; strftime('%m/%d/%Y %H:%M:%S','now','localtime') (use (prefix sqlite3 sqlite3:) srfi-1 posix regex regex-case srfi-69 (srfi 18) - posix-extras directory-utils pathname-expand typed-records format) + posix-extras directory-utils pathname-expand typed-records format sxml-serializer sxml-modifications) (declare (unit runs)) (declare (uses db)) (declare (uses common)) (declare (uses items)) @@ -237,16 +237,21 @@ ;; Take advantage of a good place to exit if running the one-pass methodology (if (and (> (runs:dat-can-run-more-tests-count runsdat) 20) (args:get-arg "-one-pass")) (exit 0)) - (thread-sleep! (cond ;; BB: check with Matt. Should this sleep move to cond clauses below where we determine we have too many jobs running rather than each time the and condition above is true (which seems like always)? - ((> (runs:dat-can-run-more-tests-count runsdat) 20) + (thread-sleep! (cond ;; BB: check with Matt. Should this sleep move + ;; to cond clauses below where we determine we + ;; have too many jobs running rather than each + ;; time the and condition above is true (which + ;; seems like always)? + ((> (runs:dat-can-run-more-tests-count runsdat) 20) ;; original intent was - save cycles, wait a long time (if (runs:lownoise "waiting on tasks" 60)(debug:print-info 2 *default-log-port* "waiting for tasks to complete, sleeping briefly ...")) - (configf:lookup-number *configdat* "setup" "inter-test-delay" default: 0.1) ;; was 2 - );; obviously haven't had any work to do for a while - (else 0))) + 10) ;; obviously haven't had any work to do for a while + (else + ;; if have a number for inter-test-delay, use it, else don't delay much, maybe even zero? + (configf:lookup-number *configdat* "setup" "inter-test-delay" default: 0.01)))) (let* ((num-running (rmt:get-count-tests-running run-id)) (num-running-in-jobgroup (rmt:get-count-tests-running-in-jobgroup run-id jobgroup)) (job-group-limit (let ((jobg-count (configf:lookup *configdat* "jobgroups" jobgroup))) (if (string? jobg-count) @@ -527,10 +532,19 @@ ;; run the run prehook if there are no tests yet run for this run: ;; (runs:run-pre-hook run-id) ;; mark all test launced flag as false in the meta table (rmt:set-var (conc "lunch-complete-" run-id) "no") + (debug:print-info 1 *default-log-port* "Setting end-of-run to no") + (let* ((config-reruns (let ((x (configf:lookup *configdat* "setup" "reruns"))) + (if x (string->number x) #f))) + (config-rerun-cnt (if config-reruns + config-reruns + 1))) + (if (eq? config-rerun-cnt run-count) + (rmt:set-var (conc "end-of-run-" run-id) "no"))) + (rmt:set-run-state-status run-id "new" "n/a") ;; now add non-directly referenced dependencies (i.e. waiton) ;;====================================================================== ;; refactoring this block into tests:get-full-data ;; @@ -680,21 +694,21 @@ (if (not (hash-table-ref/default flags "-preclean" #f)) (hash-table-set! flags "-preclean" #t)) (if (not (hash-table-ref/default flags "-rerun" #f)) (hash-table-set! flags "-rerun" "STUCK/DEAD,n/a,ZERO_ITEMS")) ;; recursive call to self - (runs:run-tests target runname test-patts user flags run-count: (- run-count 1))))) + (runs:run-tests target runname test-patts user flags run-count: (- run-count 1))) + (launch:end-of-run-check run-id))) (debug:print-info 0 *default-log-port* "No tests to run"))) (debug:print-info 4 *default-log-port* "All done by here") ;; TODO: try putting post hook call here - ;(if (eq? run-count 0) - ; (begin - ; (debug:print-info 0 *default-log-port* "Calling Post Hook") + ; (debug:print-info 2 *default-log-port* " run-count " run-count) ; (runs:run-post-hook run-id)) ; (debug:print-info 2 *default-log-port* "Not calling post hook runcount = " run-count )) (rmt:tasks-set-state-given-param-key task-key "done") + ;; (sqlite3:finalize! tasks-db) )) ;; loop logic. These are used in runs:run-tests-queue to make it a bit more readable. @@ -1281,18 +1295,26 @@ (let* ((run-dat (or (runs:gendat-run-info *runs:general-data*)(rmt:get-run-info run-id))) (runname (or (runs:gendat-runname *runs:general-data*) (db:get-value-by-header (db:get-rows run-dat) (db:get-header run-dat) "runname"))) (target (or (runs:gendat-target *runs:general-data*)(rmt:get-target run-id))) - (testsdat (rmt:get-tests-for-run run-id "%" '() '() ;; run-id testpatt states statuses - #f #f ;; offset limit - #f ;; not-in - #f ;; sort-by - #f ;; sort-order - #f ;; get full data (not 'shortlist) - (runs:gendat-inc-results-last-update *runs:general-data*) ;; last update time - 'dashboard))) + (testsdat (let ((res (rmt:get-tests-for-run + run-id "%" '() '() ;; run-id testpatt states statuses + #f #f ;; offset limit + #f ;; not-in + #f ;; sort-by + #f ;; sort-order + #f ;; get full data (not 'shortlist) + (runs:gendat-inc-results-last-update *runs:general-data*) ;; last update time + 'dashboard))) + (if (list? res) + res + (begin + (debug:print-error + 0 *default-log-port* + "FAILED TO GET DATA using rmt:get-tests-for-run. Notify developers if you see this. result: " res) + '()))))) (if (not (runs:gendat-run-info *runs:general-data*)) (runs:gendat-run-info-set! *runs:general-data* run-dat)) (if (not (runs:gendat-runname *runs:general-data*)) (runs:gendat-runname-set! *runs:general-data* runname)) (if (not (runs:gendat-target *runs:general-data*)) @@ -1665,12 +1687,12 @@ (else (debug:print-info 4 *default-log-port* "cond branch - " "rtq-9") (debug:print-info 4 *default-log-port* "Exiting loop with...\n hed=" hed "\n tal=" tal "\n reruns=" reruns)) ))) ;; end loop on sorted test names ;; this is the point where everything is launched and now you can mark the run in metadata table as all launched - (rmt:set-var (conc "lunch-complete-" run-id) "yes") - + (rmt:set-var (conc "lunch-complete-" run-id) "yes") + ;; now *if* -run-wait we wait for all tests to be done ;; Now wait for any RUNNING tests to complete (if in run-wait mode) (thread-sleep! 10) ;; I think there is a race condition here. Let states/statuses settle (let wait-loop ((num-running (rmt:get-count-tests-running-for-run-id run-id)) (prev-num-running 0)) @@ -1996,11 +2018,11 @@ (lambda (f x) (let ((fullname (conc real-dir "/" f))) (if (not (string-search (regexp "testdat.db") f)) (runs:recursive-delete-with-error-msg fullname))) (+ 1 x)) - 0 real-dir) + 0 real-dir #t) ;; then the entire directory (runs:recursive-delete-with-error-msg real-dir)) ;; cleanup often needs to remove all but the last N runs per target ;; @@ -2072,10 +2094,19 @@ sorted))) ;; (print "Sorted: " (map simple-run-event_time sorted)) ;; (print "Remove: " (map simple-run-event_time to-remove)))) (hash-table-keys runs-ht)) runs-ht)) + +(define (remove-last-path-directory path-in) + (let* ((dparts (string-split path-in "/")) + (path-out (conc "/" (string-intersperse (take dparts (- (length dparts) 1)) "/"))) + ) + path-out + ) +) + ;; (define (runs:remove-all-but-last-n-runs-per-target target-patts runpatt num-to-keep) ;; (let ((data (runs:get-all-but-most-recent-n-per-target target-patts runpatt num-to-keep))) ;; (for-each ;; (lambda (target) @@ -2105,11 +2136,12 @@ (states (if state (string-split state ",") '())) (statuses (if status (string-split status ",") '())) (state-status (if (string? new-state-status) (string-split new-state-status ",") '(#f #f))) (rp-mutex (make-mutex)) (bup-mutex (make-mutex)) - (keep-records (args:get-arg "-keep-records"))) ;; used in conjunction with -remove-runs to keep the records, TODO: consolidate this with "mode". + (keep-records (args:get-arg "-keep-records")) ;; used in conjunction with -remove-runs to keep the records, TODO: consolidate this with "mode". + (test-records '())) ;; for tasks that we wish to operate on all tests in one fell swoop (let* ((write-access-actions '(remove-runs set-state-status archive run-wait kill-runs)) (dbfile (conc *toppath* "/megatest.db")) (readonly-mode (not (file-write-access? dbfile)))) (when (and readonly-mode @@ -2139,10 +2171,13 @@ (run-name (db:get-value-by-header run header "runname")) (tests (if (not (equal? run-state "locked")) (proc-get-tests run-id) '())) (lasttpath "/does/not/exist/I/hope") + (lastrealpath "/does/not/exist/I/hope") + ;; there may be a number of different disks used in the same run. + (run-paths-hash (make-hash-table)) (worker-thread #f)) (debug:print-info 4 *default-log-port* "runs:operate-on run=" run ", header=" header) (if (not (null? tests)) (begin (case action @@ -2165,23 +2200,29 @@ action) ((run-wait) (debug:print 1 *default-log-port* "Waiting for run " runkey ", run=" runnamepatt " to complete")) ((archive) (debug:print 1 *default-log-port* "Archiving/restoring (" (args:get-arg "-archive") ") data for run: " runkey " " (db:get-value-by-header run header "runname")) - (set! worker-thread - (make-thread - (lambda () - (case (string->symbol (args:get-arg "-archive")) - ((save save-remove keep-html) - (archive:run-bup (args:get-arg "-archive") run-id run-name tests rp-mutex bup-mutex)) - ((restore) - (archive:bup-restore (args:get-arg "-archive") run-id run-name tests rp-mutex bup-mutex)) - (else - (debug:print-error 0 *default-log-port* "unrecognised sub command to -archive. Run \"megatest\" to see help") - (exit)))) - "archive-bup-thread")) - (thread-start! worker-thread)) + (let ((op (string->symbol (args:get-arg "-archive")))) + (set! worker-thread + (make-thread + (lambda () + (case op + ((save save-remove keep-html) + (archive:run-bup op run-id run-name tests rp-mutex bup-mutex)) + ((restore) + (archive:bup-restore op run-id run-name tests rp-mutex bup-mutex)) + ((get) ;;; NOTE: This is a special case. We wish to operate on ALL tests in one go + (set! test-records (append tests test-records))) + (else + (debug:print-error 0 *default-log-port* "unrecognised sub command " op " for -archive. Run \"megatest\" to see help") + (exit)))) + "archive-bup-thread")) + (thread-start! worker-thread) + (if (eq? op 'get) + (thread-join! worker-thread)) ;; we need the test-records set to not overlap + )) (else (debug:print-info 0 *default-log-port* "action not recognised " action))) ;; actions that operate on one test at a time can be handled below ;; @@ -2315,13 +2356,40 @@ ;; NOTE: This is suboptimal as the testdata will be used later and the state/status may have changed ... (if (null? tal) (loop new-test-dat tal) (loop (car tal)(append tal (list new-test-dat))))) (begin - (runs:remove-test-directory new-test-dat mode) ;; 'remove-all) - (if (not (null? tal)) - (loop (car tal)(cdr tal))))))) + (let ((rundir (db:test-get-rundir new-test-dat))) + (if (and (not (string= rundir "/tmp/badname")) + (file-exists? rundir) + (substring-index run-name rundir) + (substring-index target rundir) + ) + (begin + (set! lasttpath (db:test-get-rundir new-test-dat)) ;; remember this path for run removal + (set! lastrealpath (remove-last-path-directory (resolve-pathname lasttpath))) + (hash-table-set! run-paths-hash lastrealpath 1) + (runs:remove-test-directory new-test-dat mode) ;; 'remove-all) + ) + (begin + (debug:print 2 *default-log-port* "Not removing directory " rundir " because either it doesn't exist or has a bad name") + (debug:print 2 *default-log-port* "Is /tmp/badname: " (string= rundir "/tmp/badname")) + (debug:print 2 *default-log-port* "Exists: " (file-exists? rundir)) + (debug:print 2 *default-log-port* "Has run-name: " (substring-index run-name rundir)) + (debug:print 2 *default-log-port* "Has target: " (substring-index target rundir)) + ;;PJH remove record from db no need to cleanup directory + (case mode + ((remove-data-only)(mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) (db:test-get-state test)(db:test-get-status test) #f)) + ((archive-remove) (mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) "ARCHIVED" #f #f)) + (else (rmt:delete-test-records (db:test-get-run_id test) (db:test-get-id test)))) + + ) + ) + ) + + (if (not (null? tal)) + (loop (car tal)(cdr tal))))))) (rmt:update-run-stats run-id (rmt:get-raw-run-stats run-id))) ((kill-runs) ;; RUNNING -> KILLREQ ;; LAUNCHED,RUNNING,REMOTEHOSTSTART -> NOT STARTED (cond @@ -2387,36 +2455,49 @@ (loop (car tal)(cdr tal)))) ))) ) (if worker-thread (thread-join! worker-thread))) (common:join-backgrounded-threads)))) + ;; remove the run if zero tests remain (if (eq? action 'remove-runs) (let* ((run-id (db:get-value-by-header run header "id")) ;; NB// masks run-id from above? (remtests (mt:get-tests-for-run run-id #f '("DELETED") '("n/a") not-in: #t))) (if (null? remtests) ;; no more tests remaining - (let* ((dparts (string-split lasttpath "/")) - (runpath (conc "/" (string-intersperse - (take dparts (- (length dparts) 1)) - "/")))) - (debug:print 1 *default-log-port* "Removing run: " runkey " " (db:get-value-by-header run header "runname") " and related record") - (if (not keep-records) - (begin - (rmt:delete-run run-id) - (rmt:delete-old-deleted-test-records))) - ;; (rmt:set-var "DELETED_TESTS" (current-seconds)) - ;; need to figure out the path to the run dir and remove it if empty - ;; (if (null? (glob (conc runpath "/*"))) - ;; (begin - ;; (debug:print 1 *default-log-port* "Removing run dir " runpath) - ;; (system (conc "rmdir -p " runpath)))) + (let* ((linkspath (remove-last-path-directory lasttpath)) + (runpaths (hash-table-keys run-paths-hash)) + ) + + (debug:print 2 *default-log-port* "run-paths-hash: " (hash-table-keys run-paths-hash)) + + (debug:print 1 *default-log-port* "Removing target " target "run: " run-name) + (if (not keep-records) + (begin + (debug:print 1 *default-log-port* "Removing DB records for the run.") + (rmt:delete-run run-id) + (rmt:delete-old-deleted-test-records)) + ) + (if (not (equal? linkspath "/does/not/exist/I")) + (begin + (debug:print 1 *default-log-port* "Recursively removing links dir " linkspath) + (runs:recursive-delete-with-error-msg linkspath))) + + (for-each (lambda(runpath) + (debug:print 1 *default-log-port* "Recursively removing runs dir " runpath) + (runs:recursive-delete-with-error-msg runpath) + ) + runpaths + ) ))))) )) runs) - ;; (sqlite3:finalize! (db:delay-if-busy tdbdat)) + ;; special case - archive get + (if (equal? (args:get-arg "-archive") "get") + (archive:bup-get-data "get" #f #f test-records rp-mutex bup-mutex)) ) - #t) + #t + ) (define (runs:remove-test-directory test mode) ;; remove-data-only) (let* ((run-dir (db:test-get-rundir test)) ;; run dir is from the link tree (real-dir (if (common:file-exists? run-dir) ;; (resolve-pathname run-dir) @@ -2444,15 +2525,15 @@ ((archive-remove) (mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) "ARCHIVE_REMOVING" #f #f))) (debug:print-info 1 *default-log-port* "Attempting to remove " (if real-dir (conc " dir " real-dir " and ") "") " link " run-dir) (if (and real-dir (> (string-length real-dir) 5) (common:file-exists? real-dir)) ;; bad heuristic but should prevent /tmp /home etc. - (begin ;; let* ((realpath (resolve-pathname run-dir))) - (debug:print-info 1 *default-log-port* "Recursively removing " real-dir) - (if (common:file-exists? real-dir) - (runs:safe-delete-test-dir real-dir) - (debug:print 0 *default-log-port* "WARNING: test dir " real-dir " appears to not exist or is not readable"))) + (let* ((realpath (resolve-pathname run-dir))) + (debug:print-info 1 *default-log-port* "Recursively removing " realpath) + (if (common:file-exists? realpath) + (runs:safe-delete-test-dir realpath) + (debug:print 0 *default-log-port* "WARNING: test dir " realpath " appears to not exist or is not readable"))) (if real-dir (debug:print 0 *default-log-port* "WARNING: directory " real-dir " does not exist") (debug:print 0 *default-log-port* "WARNING: no real directory corrosponding to link " run-dir ", nothing done"))) (if (symbolic-link? run-dir) (begin @@ -2486,11 +2567,11 @@ ;;====================================================================== ;; Since many calls to a run require pretty much the same setup ;; this wrapper is used to reduce the replication of code (define (general-run-call switchname action-desc proc) - (let ((runname (or (args:get-arg "-runname")(args:get-arg ":runname"))) + (let ((runname (common:args-get-runname)) (target (common:args-get-target))) (cond ((not target) (debug:print-error 0 *default-log-port* "Missing required parameter for " switchname ", you must specify the target with -target") (exit 3)) @@ -2663,10 +2744,102 @@ (conc "INSERT OR REPLACE INTO test_data (test_id,category,variable,value,expected,tol,units,comment) " "SELECT " (db:test-get-id new-testdat) ",category,variable,value,expected,tol,units,comment FROM test_data WHERE test_id=?;") (db:test-get-id testdat)))) )) prev-tests))) + +(define doc-template + '(*TOP* + (*PI* xml "version='1.0'") + (testsuite))) + +(define (runs:update-junit-test-reporter-xml run-id) + (let* ( + (junit-test-reporter (configf:lookup *configdat* "runs" "junit-test-reporter-xml")) + (junit-test-report-dir (configf:lookup *configdat* "runs" "junit-test-report-dir")) + (xml-dir (if (and junit-test-reporter (equal? junit-test-reporter "yes" )) + (if junit-test-report-dir + junit-test-report-dir + (conc (getenv "MT_LINKTREE") "/" (getenv "MT_TARGET") "/" (getenv "MT_RUNNAME"))) + #f)) + (xml-ts-name (if xml-dir + (conc (getenv "MT_TESTSUITENAME")"."(string-translate (getenv "MT_TARGET") "/" ".") "." (getenv "MT_RUNNAME")) + #f)) + (keyname (if xml-ts-name (common:get-signature xml-ts-name) #f)) + (xml-path (if xml-dir + (conc xml-dir "/" keyname ".xml") + #f)) + + (test-data (if xml-dir + (rmt:get-tests-for-run run-id "%" '() '() ;; run-id testpatt states statuses + #f #f ;; offset limit + #f ;; not-in + #f ;; sort-by + #f ;; sort-order + #f ;; get full data (not 'shortlist) + 0 ;; (runs:gendat-inc-results-last-update *runs:general-data*) ;; last update time + #f) + '())) + (tests-count (if xml-dir (length test-data) #f))) + (if (and junit-test-reporter (equal? junit-test-reporter "yes" )) + (begin + ;((sxml-modify! `("testsuite" insert-into (@ (name ,xml-ts-name) (tests ,tests-count)))) doc) + + (let loop ((test (car test-data)) + (tail (cdr test-data)) + (doc doc-template) + (fail-cnt 0) + (error-cnt 0)) + (let* ((test-name (vector-ref test 2)) + (test-itempath (vector-ref test 11)) + (tc-name (conc test-name (if (and test-itempath (not (equal? test-itempath ""))) (conc "." (string-translate test-itempath "/" "." )) ""))) + (test-state (vector-ref test 3)) + (comment (vector-ref test 14)) + (test-status (vector-ref test 4)) + (exc-msg (conc "No bucket for State " test-state " Status " test-status)) + (new-doc (cond + ((member test-state (list "RUNNING" )) + ((sxml-modify `("testsuite" insert-into (testcase (@ (name ,tc-name)) (inProgress)))) doc)) + ((member test-state (list "LAUNCHED" "REMOTEHOSTSTART" "NOT_STARTED")) + ((sxml-modify `("testsuite" insert-into (testcase (@ (name ,tc-name)) (inQueue)))) doc)) + ((member test-status (list "PASS" "WARN" "WAIVED")) + ((sxml-modify `("testsuite" insert-into (testcase (@ (name ,tc-name))))) doc)) + ((member test-status (list "FAIL" "CHECK")) + ((sxml-modify `("testsuite" insert-into (testcase (@ (name ,tc-name)) (failure (@ (message ,comment) (type "failure")))))) doc)) + ((member test-status (list "DEAD" "KILLED" "ABORT" "PREQ_FAIL" "PREQ_DISCARDED")) + ((sxml-modify `("testsuite" insert-into (testcase (@ (name ,tc-name)) (failure (@ (message ,comment) (type "error")))))) doc)) + ((member test-status (list "SKIP")) + ((sxml-modify `("testsuite" insert-into (testcase (@ (name ,tc-name)) (skipped (@ (type "skipped")))))) doc)) + (else + (debug:print 0 *default-log-port* (conc "What do I do with State " test-state " Status " test-status)) + ((sxml-modify `("testsuite" insert-into (testcase (@ (name ,tc-name)) (failure (@ (message ,exc-msg) (type "error")))))) doc)))) + (new-error-cnt (if (member test-status (list "DEAD" "KILLED" "ABORT" "PREQ_FAIL" "PREQ_DISCARDED")) + (+ error-cnt 1) + error-cnt)) + (new-fail-cnt (if (member test-status (list "FAIL" "CHECK")) + (+ fail-cnt 1) + fail-cnt))) + (if (null? tail) + (let* ((final-doc ((sxml-modify `("testsuite" insert-into (@ (name ,xml-ts-name) (tests ,tests-count) (errors ,error-cnt) (failures ,fail-cnt)))) new-doc))) + (debug:print 0 *default-log-port* "modify attrib error=" error-cnt " fail= " fail-cnt) + (handle-exceptions + exn + (let* ((msg ((condition-property-accessor 'exn 'message) exn))) + (debug:print 0 *default-log-port* (conc "WARNING: Failed to update file" xml-path". Message:" msg))) + + (if (not (file-exists? xml-dir)) + (create-directory xml-dir #t)) + (if (not (rmt:no-sync-get/default keyname #f)) + (begin + (rmt:no-sync-set keyname "on") + (debug:print 0 *default-log-port* "creating xml at " xml-path) + (with-output-to-file xml-path + (lambda () + (print (sxml-serializer#serialize-sxml final-doc ns-prefixes: (list (cons 'gnm "http://foo")))))) + (rmt:no-sync-del! keyname)) + (debug:print 0 *default-log-port* "Could not get the lock. Skip writing the xml file.")))) + (loop (car tail) (cdr tail) new-doc new-fail-cnt new-error-cnt)))))))) ;; clean cache files (define (runs:clean-cache target runname toppath) (if target Index: tests.scm ================================================================== --- tests.scm +++ tests.scm @@ -1401,10 +1401,29 @@ ;; (string