Index: Makefile ================================================================== --- Makefile +++ Makefile @@ -314,19 +314,40 @@ # install dashboard as dboard so wrapper script can be called dashboard $(PREFIX)/bin/.$(ARCHSTR)/dboard : dboard $(FILES) utils/mk_wrapper utils/mk_wrapper $(PREFIX) dboard $(PREFIX)/bin/dashboard chmod a+x $(PREFIX)/bin/dashboard $(INSTALL) dboard $(PREFIX)/bin/.$(ARCHSTR)/dboard + +$(PREFIX)/bin/.$(ARCHSTR)/lib/libpangox-1.0.so : lib/libpangox-1.0.so + if [[ $(ARCHSTR) == 12.5 ]]; then \ + mkdir -p $(PREFIX)/bin/.$(ARCHSTR)/lib; \ + $(INSTALL) lib/libpangox-1.0.so $(PREFIX)/bin/.$(ARCHSTR)/lib/libpangox-1.0.so; \ + fi + +$(PREFIX)/bin/.$(ARCHSTR)/lib/libpangox-1.0.so.0 : lib/libpangox-1.0.so.0 + if [[ $(ARCHSTR) == 12.5 ]]; then \ + mkdir -p $(PREFIX)/bin/.$(ARCHSTR)/lib; \ + $(INSTALL) lib/libpangox-1.0.so.0 $(PREFIX)/bin/.$(ARCHSTR)/lib/libpangox-1.0.so.0; \ + fi + +$(PREFIX)/bin/.$(ARCHSTR)/lib/libxcb-xlib.so.0 : lib/libxcb-xlib.so.0 + if [[ $(ARCHSTR) == 12.5 ]]; then \ + mkdir -p $(PREFIX)/bin/.$(ARCHSTR)/lib; \ + $(INSTALL) lib/libxcb-xlib.so.0 $(PREFIX)/bin/.$(ARCHSTR)/lib/libxcb-xlib.so.0; \ + fi install : $(PREFIX)/bin/.$(ARCHSTR) $(PREFIX)/bin/.$(ARCHSTR)/mtest $(PREFIX)/bin/megatest \ $(PREFIX)/bin/.$(ARCHSTR)/dboard $(PREFIX)/bin/dashboard $(HELPERS) $(PREFIX)/bin/nbfake \ $(PREFIX)/bin/.$(ARCHSTR)/mtexec $(PREFIX)/bin/mtexec $(PREFIX)/bin/serialize-env \ $(PREFIX)/bin/nbfind $(PREFIX)/bin/mtrunner $(PREFIX)/bin/viewscreen $(PREFIX)/bin/mt_xterm \ $(PREFIX)/share/docs/megatest_manual.html $(PREFIX)/bin/remrun \ $(PREFIX)/share/docs/megatest_manual.html $(PREFIX)/bin/remrun $(PREFIX)/bin/mtutil \ $(PREFIX)/bin/tcmt $(PREFIX)/share/db/mt-pg.sql \ - $(PREFIX)/share/js/jquery-3.1.0.slim.min.js + $(PREFIX)/share/js/jquery-3.1.0.slim.min.js \ + $(PREFIX)/bin/.$(ARCHSTR)/lib/libpangox-1.0.so \ + $(PREFIX)/bin/.$(ARCHSTR)/lib/libpangox-1.0.so.0 \ + $(PREFIX)/bin/.$(ARCHSTR)/lib/libxcb-xlib.so.0 # $(PREFIX)/bin/.$(ARCHSTR)/ndboard # $(PREFIX)/bin/newdashboard $(PREFIX)/bin/.$(ARCHSTR) : Index: api.scm ================================================================== --- api.scm +++ api.scm @@ -76,11 +76,11 @@ get-run-ids-matching-target get-runs-by-patt get-steps-data get-steps-for-test read-test-data - read-test-data* + read-test-data-varpatt login tasks-get-last testmeta-get-record have-incompletes? ;; synchash-get @@ -165,11 +165,11 @@ (params (vector-ref dat 1)) (start-t (current-milliseconds)) (readonly-mode (dbr:dbstruct-read-only dbstruct)) (readonly-command (member cmd api:read-only-queries)) (writecmd-in-readonly-mode (and readonly-mode (not readonly-command))) - (foo (begin + #;(foo (begin (common:telemetry-log (conc "api-in:"(->string cmd)) payload: `((params . ,params))) #t)) (res @@ -329,11 +329,11 @@ ((get-steps-for-test) (apply db:get-steps-for-test dbstruct params)) ((get-steps-info-by-id) (apply db:get-steps-info-by-id dbstruct params)) ;; TEST DATA ((read-test-data) (apply db:read-test-data dbstruct params)) - ((read-test-data*) (apply db:read-test-data* dbstruct params)) + ((read-test-data-varpatt) (apply db:read-test-data-varpatt dbstruct params)) ((get-data-info-by-id) (apply db:get-data-info-by-id dbstruct params)) ;; MISC ((get-latest-host-load) (apply db:get-latest-host-load dbstruct params)) ((have-incompletes?) (apply db:have-incompletes? dbstruct params)) @@ -361,16 +361,16 @@ start-t))) (hash-table-set! *db-api-call-time* cmd (cons delta-t (hash-table-ref/default *db-api-call-time* cmd '())))) (if writecmd-in-readonly-mode (begin - (common:telemetry-log (conc "api-out:"(->string cmd)) + #;(common:telemetry-log (conc "api-out:"(->string cmd)) payload: `((params . ,params) (ok-res . #t))) (vector #f res)) (begin - (common:telemetry-log (conc "api-out:"(->string cmd)) + #;(common:telemetry-log (conc "api-out:"(->string cmd)) payload: `((params . ,params) (ok-res . #f))) (vector #t res)))))))) ;; http-server send-response @@ -383,18 +383,18 @@ (debug:print 4 *default-log-port* "server-id:" *server-id*) (let* ((cmd ($ 'cmd)) (paramsj ($ 'params)) (key ($ 'key)) (params (db:string->obj paramsj transport: 'http))) ;; incoming data from the POST (or is it a GET?) - (debug:print 0 *default-log-port* "cmd:" cmd " with params " params "key " key) + (debug:print 4 *default-log-port* "cmd:" cmd " with params " params "key " key) (if (equal? key *server-id*) (begin (set! *api-process-request-count* (+ *api-process-request-count* 1)) (let* ((resdat (api:execute-requests dbstruct (vector cmd params))) ;; process the request, resdat = #( flag result ) (success (vector-ref resdat 0)) (res (vector-ref resdat 1))) ;; (vector flag payload), get the payload, ignore the flag (why?) - (debug:print 0 *default-log-port* "res:" res) + (debug:print 4 *default-log-port* "res:" res) (if (not success) (debug:print 0 *default-log-port* "ERROR: success flag is #f for " cmd " with params " params)) (if (> *api-process-request-count* *max-api-process-requests*) (set! *max-api-process-requests* *api-process-request-count*)) (set! *api-process-request-count* (- *api-process-request-count* 1)) Index: apimod.scm ================================================================== --- apimod.scm +++ apimod.scm @@ -29,9 +29,6 @@ (import (prefix sqlite3 sqlite3:) posix typed-records srfi-18) (import commonmod) (import (prefix ulex ulex:)) -(define (api:execute-requests params) - #f) - ) Index: archive.scm ================================================================== --- archive.scm +++ archive.scm @@ -120,11 +120,11 @@ res) (begin (debug:print 0 *default-log-port* "WARNING: no disk found for " target ", " run-name ", " test-name ", archive-path=" archive-path) #f))) (begin - (debug:print 0 *default-log-port* "WARNING: no disk found for " target ", " run-name ", " test-name ", block-id=" block-id) + (debug:print 0 *default-log-port* "WARNING: no disk found for " target ", " run-name ", " test-name ) #f)))))) ;; no best disk found ;; archive - run bup ;; ;; 1. create the bup dir if not exists @@ -224,11 +224,11 @@ " as it is a toplevel test with children")) ((not (common:file-exists? test-path)) (debug:print 0 *default-log-port* "WARNING: Cannot archive " test-name "/" item-path " as path " test-path " does not exist")) (else - (debug:print 0 *default-log-port* + (debug:print 2 *default-log-port* "From test-dat=" test-dat " derived the following:\n" "test-partial-path = " test-partial-path "\n" "test-path = " test-path "\n" "test-physical-path = " test-physical-path "\n" "partial-path-index = " partial-path-index "\n" @@ -268,27 +268,39 @@ ((bup) ;; Archive using bup (let* ((bup-init-params (list "-d" archive-dir "init")) (bup-index-params (append (list "-d" archive-dir "index") test-paths)) (bup-save-params (append (list "-d" archive-dir "save" ;; (conc "--strip-path=" linktree) (conc "-" compress) ;; or (conc "--compress=" compress) - "-n" (conc (common:get-testsuite-name) "-" run-id) - (conc "--strip-path=" test-base) ;; if we push to the directory do we need this? + "-n" (conc (common:get-testsuite-name) "-"(string-substitute "/" "-" target " ")) + (conc "--strip-path=" (conc test-base target "/" )) ;; if we push to the directory do we need this? ) test-paths))) (if (not (common:file-exists? (conc archive-dir "/HEAD"))) (begin ;; replace this with jobrunner stuff enventually - (debug:print-info 0 *default-log-port* "Init bup in " archive-dir) + (debug:print-info 2 *default-log-port* "Init bup in " archive-dir) ;; (mutex-lock! bup-mutex) - (run-n-wait bup-exe params: bup-init-params print-cmd: print-prefix) + (let-values (((pid-val exit-status exit-code) (run-n-wait bup-exe params: bup-init-params print-cmd: print-prefix))) + (if (not (eq? exit-code 0)) + (begin + (debug:print-error 0 *default-log-port* "There was an error initializing bup. Archive failed.") + (exit 1)))) ;; (mutex-unlock! bup-mutex) )) - (debug:print-info 0 *default-log-port* "Indexing data to be archived") + (debug:print-info 2 *default-log-port* "Indexing data to be archived") ;; (mutex-lock! bup-mutex) - (run-n-wait bup-exe params: bup-index-params print-cmd: print-prefix) - (debug:print-info 0 *default-log-port* "Archiving data with bup") - (run-n-wait bup-exe params: bup-save-params print-cmd: print-prefix))) + (let-values (((pid-val exit-status exit-code) (run-n-wait bup-exe params: bup-index-params print-cmd: print-prefix))) + (if (not (eq? exit-code 0)) + (begin + (debug:print-error 0 *default-log-port* "There was an error Indexing bup. Archive failed.") + (exit 1)))) + (debug:print-info 2 *default-log-port* "Archiving data with bup") + (let-values (((pid-val exit-status exit-code) (run-n-wait bup-exe params: bup-save-params print-cmd: print-prefix))) + (if (not (eq? exit-code 0)) + (begin + (debug:print-error 0 *default-log-port* "There was an archiving data with bup. Archive failed.") + (exit 1)))))) ((7z tar) (for-each (lambda (test-dat) (let* ((test-id (db:test-get-id test-dat)) (test-name (db:test-get-testname test-dat)) @@ -336,11 +348,11 @@ (archive-info (archive:allocate-new-archive-block blockid-cache *toppath* tsname min-space target-patt run-patt "megatest-db")) (archive-dir (if archive-info (cdr archive-info) #f)) (archive-id (if archive-info (car archive-info) -1)) (home-host (common:get-homehost)) (archive-time (seconds->std-time-str (current-seconds))) - (archive-staging-db (conc *toppath* "/logs/archive_" archive-time)) + (archive-staging-db (conc *toppath* "/.db-snapshot/archive_" archive-time)) (tmp-db-path (conc (common:get-db-tmp-area) "/megatest.db")) (dbfile (conc archive-staging-db "/megatest.db"))) (create-directory archive-staging-db #t) (let-values (((pid-val exit-status exit-code) (run-n-wait rsync-exe params: (list "-v" (conc (car home-host) ":"tmp-db-path) archive-staging-db) print-cmd: print-prefix))) (if (eq? exit-code 0) @@ -354,36 +366,49 @@ (conc "--strip-path=" archive-staging-db ) ;; if we push to the directory do we need this? dbfile))) (if (not (common:file-exists? (conc archive-dir "/HEAD"))) (begin ;; replace this with jobrunner stuff enventually - (debug:print-info 0 *default-log-port* "Init bup in " archive-dir) - (run-n-wait bup-exe params: bup-init-params print-cmd: print-prefix))) - (debug:print-info 0 *default-log-port* "Indexing data to be archived") - (run-n-wait bup-exe params: bup-index-params print-cmd: print-prefix) - (debug:print-info 0 *default-log-port* "Archiving data with bup") - (run-n-wait bup-exe params: bup-save-params print-cmd: print-prefix))) + (debug:print-info 2 *default-log-port* "Init bup in " archive-dir) + (let-values (((pid-val exit-status exit-code)(run-n-wait bup-exe params: bup-init-params print-cmd: print-prefix))) + (if (not (eq? exit-code 0)) + (begin + (debug:print-error 0 *default-log-port* "There was an error initializing bup. Archive failed.") + (exit 1)))))) + (debug:print-info 2 *default-log-port* "Indexing data to be archived") + (let-values (((pid-val exit-status exit-code) (run-n-wait bup-exe params: bup-index-params print-cmd: print-prefix))) + (if (not (eq? exit-code 0)) + (begin + (debug:print-error 0 *default-log-port* "There was an error Indexing bup. Archive failed.") + (exit 1)))) + (debug:print-info 2 *default-log-port* "Archiving data with bup") + (let-values (((pid-val exit-status exit-code) (run-n-wait bup-exe params: bup-save-params print-cmd: print-prefix))) + (if (not (eq? exit-code 0)) + (begin + (debug:print-error 0 *default-log-port* "There was an error archiving data with bup. Archive failed.") + (exit 1)) + (debug:print-info 0 *default-log-port* "To restore megatest.db run megatest -archive replicacte-db -source archive-dir -time-stamp . Current timestamp: " (seconds->std-time-str (current-seconds))))))) (else (debug:print-info 0 *default-log-port* "No support for databse archiving with " archiver))) (debug:print-error 0 *default-log-port* "There was an error rsyncing tmp database"))))) (define (archive:restore-db archive-path ts) (let* ((bup-exe (or (configf:lookup *configdat* "archive" "bup") "bup")) (archive-internal-path (conc (common:get-testsuite-name) "-megatest-db/" ts "/megatest.db" )) (bup-restore-params (list "-d" archive-path "restore" "-C" *toppath* archive-internal-path))) - (debug:print-info 0 *default-log-port* "Restoring archived data to " *toppath* " from archive in " archive-path " ... " archive-internal-path) - (run-n-wait bup-exe params: bup-restore-params print-cmd: #f)) + (debug:print-info 2 *default-log-port* "Restoring archived data to " *toppath* " from archive in " archive-path " ... " archive-internal-path) + (run-n-wait bup-exe params: bup-restore-params print-cmd: "Running:")) + (sleep 2) (db:multi-db-sync (db:setup #f) 'killservers ;'dejunk ;'adj-testids 'old2new ) - (debug:print-info 1 *default-log-port* "dropping trigerrs to update linktree") + (debug:print-info 1 *default-log-port* "dropping triggers to update linktree") (rmt:drop-all-triggers) - (let* ((linktree (common:get-linktree)) ;; (configf:lookup *configdat* "setup" "linktree"))) (src-archive-linktree (rmt:get-var "src-archive-linktree"))) (if (not (equal? src-archive-linktree linktree)) (rmt:update-tesdata-on-repilcate-db src-archive-linktree linktree)) (debug:print-info 1 *default-log-port* "creating triggers after updating linktree") @@ -412,13 +437,13 @@ (time->string (seconds->local-time sec) "%Y-%m-%d-%H%M%S")) -(define (archive:get-timestamp-dir bup-exe archive-dir testsuite-name run-id test-partial-path test-last-update) +(define (archive:get-timestamp-dir bup-exe archive-dir testsuite-name target test-partial-path test-last-update) (debug:print-info 0 *default-log-port* "Test last update time:" (seconds->std-time-str test-last-update)) - (let* ((internal-path (conc testsuite-name "-" run-id)) + (let* ((internal-path (conc testsuite-name "-" target)) (archive-update-delay (string->number (or (configf:lookup *configdat* "archive" "test-update-delay") "900" ))) (ts-list (archive:ls->list bup-exe archive-dir internal-path)) (ds-flag (vector-ref (seconds->local-time) 8))) (let loop ((hed (car ts-list)) (tail (cdr ts-list))) @@ -457,11 +482,11 @@ (keyvals (rmt:get-key-val-pairs run-id)) (target (string-intersperse (map cadr keyvals) "/")) (toplevel/children (and (db:test-get-is-toplevel test-dat) (> (rmt:test-toplevel-num-items run-id test-name) 0))) - (test-partial-path (conc target "/" run-name "/" (db:test-make-full-name test-name item-path))) + (test-partial-path (conc run-name "/" (db:test-make-full-name test-name item-path))) ;; note the trailing slash to get the dir inspite of it being a link (test-path (conc linktree "/" test-partial-path)) ;; if the old path was not deleted then prev-test-physical-path will end up pointing to a real directory (mutex-lock! rp-mutex) (prev-test-physical-path (if (common:file-exists? test-path) @@ -474,12 +499,12 @@ (test-last-update (db:test-get-last_update test-dat)) (archive-block-info (rmt:test-get-archive-block-info archive-block-id)) (archive-path (if (vector? archive-block-info) (vector-ref archive-block-info 2) ;; look in db.scm for test-get-archive-block-info for the vector record info #f)) ;; no archive found? - (archive-timestamp-dir (if archive-path (archive:get-timestamp-dir bup-exe archive-path (common:get-testsuite-name) run-id test-partial-path test-last-update) #f)) - (archive-internal-path (conc (common:get-testsuite-name) "-" run-id "/" archive-timestamp-dir "/" test-partial-path)) + (archive-timestamp-dir (if archive-path (archive:get-timestamp-dir bup-exe archive-path (common:get-testsuite-name) (string-substitute "/" "-" target " ") test-partial-path test-last-update) #f)) + (archive-internal-path (conc (common:get-testsuite-name) "-" (string-substitute "/" "-" target " ") "/" archive-timestamp-dir "/" test-partial-path)) (include-paths (args:get-arg "-include")) (exclude-pattern (args:get-arg "-exclude-rx")) (exclude-file (args:get-arg "-exclude-rx-from"))) (if (not archive-timestamp-dir) (debug:print-error 0 *default-log-port* "Archive not found for testsuite" (common:get-testsuite-name) " run/test/itempath" test-partial-path) ADDED bin/.11/lib/libpangox-1.0.so Index: bin/.11/lib/libpangox-1.0.so ================================================================== --- /dev/null +++ bin/.11/lib/libpangox-1.0.so cannot compute difference between binary files ADDED bin/.11/lib/libpangox-1.0.so.0 Index: bin/.11/lib/libpangox-1.0.so.0 ================================================================== --- /dev/null +++ bin/.11/lib/libpangox-1.0.so.0 cannot compute difference between binary files ADDED bin/.11/lib/libxcb-xlib.so.0 Index: bin/.11/lib/libxcb-xlib.so.0 ================================================================== --- /dev/null +++ bin/.11/lib/libxcb-xlib.so.0 cannot compute difference between binary files Index: common.scm ================================================================== --- common.scm +++ common.scm @@ -197,10 +197,11 @@ (define *launch-setup-mutex* (make-mutex)) ;; need to be able to call launch:setup often so mutex it and re-call the real deal only if *toppath* not set (define *homehost-mutex* (make-mutex)) ;; Miscellaneous (define *triggers-mutex* (make-mutex)) ;; block overlapping processing of triggers +(define *numcpus-cache* (make-hash-table)) (use posix-extras pathname-expand files) ;; this plugs a hole in posix-extras in recent chicken versions > 4.9) (let-values (( (chicken-release-number chicken-major-version) @@ -225,14 +226,18 @@ (string-match "^-(s|ss|sx|script)$" (cadr argv))) (caddr argv)) (else (car argv)))) (fullpath (realpath this-script))) fullpath)) + +;;====================================================================== + (define *common:this-exe-fullpath* (common:get-this-exe-fullpath)) (define *common:this-exe-dir* (pathname-directory *common:this-exe-fullpath*)) (define *common:this-exe-name* (pathname-strip-directory *common:this-exe-fullpath*)) +;;====================================================================== ;; when called from a wrapper I need sometimes to find the calling ;; wrapper, this is for dashboard to find the correct megatest. ;; (define (common:find-local-megatest #!optional (progname "megatest")) (let ((res (filter file-exists? @@ -247,11 +252,11 @@ ((dashboard) progname) (else exe))))) '("../../" "../"))))) (if (null? res) (begin - (debug:print 0 *current-log-port* "Failed to find this executable! Using what can be found on the path") + (debug:print 0 *default-log-port* "Failed to find this executable! Using what can be found on the path") progname) (car res)))) (define *common:logpro-exit-code->status-sym-alist* '( ( 0 . pass ) @@ -360,10 +365,11 @@ (conc megatest-version "-" megatest-fossil-hash)) (define (common:version-signature) (conc megatest-version "-" (substring megatest-fossil-hash 0 4))) +;;====================================================================== ;; from metadat lookup MEGATEST_VERSION ;; (define (common:get-last-run-version) ;; RADT => How does this work in send-receive function??; assume it is the value saved in some DB (rmt:get-var "MEGATEST_VERSION")) @@ -372,29 +378,25 @@ (substring (common:get-last-run-version) 0 6))) (define (common:set-last-run-version) (rmt:set-var "MEGATEST_VERSION" (common:version-signature))) +;;====================================================================== ;; postive number if megatest version > db version ;; negative number if megatest version < db version (define (common:version-db-delta) - (- megatest-version (common:get-last-run-version-number))) + (- megatest-version (common:get-last-run-version-number))) (define (common:version-changed?) (not (equal? (common:get-last-run-version) (common:version-signature)))) (define (common:api-changed?) (not (equal? (substring (->string megatest-version) 0 4) (substring (conc (common:get-last-run-version)) 0 4)))) - -(define (common:get-sync-lock-filepath) - (let* ((tmp-area (common:get-db-tmp-area)) - (lockfile (conc tmp-area "/megatest.db.sync-lock"))) - lockfile)) - +;;====================================================================== ;; Move me elsewhere ... ;; RADT => Why do we meed the version check here, this is called only if version misma ;; (define (common:cleanup-db dbstruct #!key (full #f)) (apply db:multi-db-sync @@ -489,13 +491,12 @@ (when (>= (age-wks daysfile) 1) (copy daysfile wksfile) (copy hrsfile daysfile)) #t) #f)) - - - + +;;====================================================================== ;; Rotate logs, logic: ;; if > 500k and older than 1 week: ;; remove previous compressed log and compress this log ;; WARNING: This proc operates assuming that it is in the directory above the ;; logs directory you wish to log-rotate. @@ -576,11 +577,12 @@ exn (debug:print-error 0 *default-log-port* "failed to remove " fullname ", exn=" exn) (delete-file* fullname))))) files) (debug:print-info 0 *default-log-port* "Deleted " (length files) " files from logs, keeping " max-allowed " files.")))))) - + +;;====================================================================== ;; Force a megatest cleanup-db if version is changed and skip-version-check not specified ;; Do NOT check if not on homehost! ;; (define (common:exit-on-version-changed) (if (common:on-homehost?) @@ -619,10 +621,11 @@ (debug:print 0 *default-log-port* " You have read-only access to this area. Cannot proceed with megatest version migration.") (exit 1)) (else (debug:print 0 *default-log-port* " to switch versions you can run: \"megatest -cleanup-db\"") (exit 1))))))) +;;====================================================================== ;; (begin ;; (debug:print 0 *default-log-port* "ERROR: cannot migrate version unless on homehost. Exiting.") ;; (exit 1)))) ;;====================================================================== @@ -786,10 +789,11 @@ "REMOVING" "CLEANING" "ARCHIVE_REMOVING" )) +;;====================================================================== ;; BBnote: *common:std-statuses* dashboard filter control and test control status buttons defined here; used in set-fields-panel and dboard:make-controls ;; note these statuses are sorted from better to worse. ;; This sort order is important to dcommon:status-compare3 and db:set-state-status-and-roll-up-items (define *common:std-statuses* '(;; (0 "DELETED") @@ -823,10 +827,11 @@ '("COMPLETED" "KILLED" "UNKNOWN" "INCOMPLETE" "ARCHIVED")) (define *common:not-started-ok-statuses* ;; if not one of these statuses when in not_started state treat as dead '("n/a" "na" "PASS" "FAIL" "WARN" "CHECK" "WAIVED" "DEAD" "SKIP")) +;;====================================================================== ;; group tests into buckets corresponding to rollup ;;; Running, completed-pass, completed-non-pass + worst status, not started. ;; filter out ;(define (common:categorize-items-for-rollup in-tests) ; ( @@ -838,10 +843,11 @@ (lambda (a b) (let ((a-num (cadr (or (assoc a items-order) '(0 0)))) (b-num (cadr (or (assoc b items-order) '(0 0))))) (acomp a-num b-num)))))) +;;====================================================================== ;; ;; given a toplevel with currstate, currstatus apply state and status ;; ;; => (newstate . newstatus) ;; (define (common:apply-state-status currstate currstatus state status) ;; (let* ((cstate (string->symbol (string-downcase currstate))) ;; (cstatus (string->symbol (string-downcase currstatus))) @@ -913,13 +919,14 @@ (configf:lookup *configdat* "setup" "testsuite" ) (getenv "MT_TESTSUITE_NAME") (pathname-file (or (if (string? *toppath* ) (pathname-file *toppath*) #f) - (common:get-topath #f))) + (common:get-toppath #f))) "please-set-setup-area-name")) ;; (pathname-file (current-directory))))) +;;====================================================================== ;; safe getting of toppath (define (common:get-toppath areapath) (or *toppath* (if areapath (begin @@ -941,10 +948,13 @@ (debug:print-error 0 *default-log-port* "Unable to find megatest home directory.") #f) (loop (pathname-directory thepath))))) )) +;;====================================================================== +;; redefine for future cleanup (converge on area-name, the more generic +;; (define common:get-area-name common:get-testsuite-name) (define (common:get-db-tmp-area . junk) (if *db-cache-path* *db-cache-path* @@ -980,26 +990,15 @@ ;;====================================================================== (define (common:run-sync?) (and (common:on-homehost?) (args:get-arg "-server"))) - -;; (let ((ohh (common:on-homehost?)) -;; (srv (args:get-arg "-server"))) -;; (and ohh srv))) - ;; (debug:print-info 0 *default-log-port* "common:run-sync? ohh=" ohh ", srv=" srv) - - - -(define *wdnum* 0) -(define *wdnum*mutex (make-mutex)) - (define (common:human-time) (time->string (seconds->local-time (current-seconds)) "%Y-%m-%d %H:%M:%S")) - +;;====================================================================== ;; currently the primary job of the watchdog is to run the sync back to megatest.db from the db in /tmp ;; if we are on the homehost and we are a server (by definition we are on the homehost if we are a server) ;; (define (common:readonly-watchdog dbstruct) (thread-sleep! 0.05) ;; delay for startup @@ -1027,10 +1026,11 @@ (debug:print-info 13 *default-log-port* "rosync called, " res " records transferred.")))) (loop (current-seconds))) #t))) (debug:print-info 0 *default-log-port* "Exiting readonly-watchdog timer, *time-to-exit* = " *time-to-exit*" pid="(current-process-id)" mtpath="golden-mtpath))) +;;====================================================================== ;; TODO: for multiple areas, we will have multiple watchdogs; and multiple threads to manage (define (common:watchdog) (debug:print-info 13 *default-log-port* "common:watchdog entered.") (if (launch:setup) (if (common:on-homehost?) @@ -1130,10 +1130,11 @@ ;;====================================================================== ;; M I S C U T I L S ;;====================================================================== +;;====================================================================== ;; convert stuff to a number if possible (define (any->number val) (cond ((number? val) val) ((string? val) (string->number val)) @@ -1156,16 +1157,18 @@ (set! res #t)))) (string-split patts ",")) res) #t)) +;;====================================================================== ;; '(print (string-intersperse (map cadr (hash-table-ref/default (read-config "megatest.config" \#f \#t) "disks" '"'"'("none" ""))) "\n"))' (define (common:get-disks #!key (configf #f)) (hash-table-ref/default (or configf (read-config "megatest.config" #f #t)) "disks" '("none" ""))) +;;====================================================================== ;; return first command that exists, else #f ;; (define (common:which cmds) (if (null? cmds) #f @@ -1188,10 +1191,11 @@ (pathname-directory (pathname-directory (pathname-directory exe-path)))) #f))) +;;====================================================================== ;; return first path that can be created or already exists and is writable ;; (define (common:get-create-writeable-dir dirs) (if (null? dirs) #f @@ -1212,10 +1216,11 @@ res (if (null? tal) #f (loop (car tal)(cdr tal)))))))) +;;====================================================================== ;; return the youngest timestamp . filename ;; (define (common:get-youngest glob-list) (let ((all-files (apply append (map (lambda (patt) @@ -1234,10 +1239,11 @@ (list curmod fname) res))) '(0 "n/a") all-files))) +;;====================================================================== ;; use bash to expand a glob. Does NOT handle paths with spaces! ;; (define (common:bash-glob instr) (string-split (with-input-from-pipe @@ -1246,10 +1252,11 @@ ;;====================================================================== ;; Some safety net stuff ;;====================================================================== +;;====================================================================== ;; return input if it is a list or return null (define (common:list-or-null inlst #!key (ovrd #f)(message #f)) (if (list? inlst) inlst (begin @@ -1259,10 +1266,11 @@ ;;====================================================================== ;; T A R G E T S , S T A T E , S T A T U S , ;; R U N N A M E A N D T E S T P A T T ;;====================================================================== +;;====================================================================== ;; (map print (map car (hash-table->alist (read-config "runconfigs.config" #f #t)))) ;; (define (common:get-runconfig-targets #!key (configf #f)) (let ((targs (sort (map car (hash-table->alist (or configf ;; NOTE: There is no value in using runconfig:read here. @@ -1275,10 +1283,11 @@ (filter (lambda (x) (patt-list-match x target-patt)) targs) targs))) +;;====================================================================== ;; Lookup a value in runconfigs based on -reqtarg or -target ;; (define (runconfigs-get config var) (let ((targ (common:args-get-target))) ;; (or (args:get-arg "-reqtarg")(args:get-arg "-target")(getenv "MT_TARGET")))) (if targ @@ -1315,35 +1324,32 @@ rtestpatt) (else (debug:print-info 0 *default-log-port* "using testpatt " args-testpatt " rtestpatt:" rtestpatt) args-testpatt)))) - - (define (common:false-on-exception thunk #!key (message #f)) (handle-exceptions exn (begin (if message (debug:print-info 0 *default-log-port* message)) #f) (thunk) )) (define (common:file-exists? path-string #!key (silent #f)) - ;; this avoids stack dumps in the case where - - ;;;; TODO: catch permission denied exceptions and emit appropriate warnings, eg: system error while trying to access file: "/nfs/pdx/disks/icf_env_disk001/bjbarcla/gwa/issues/mtdev/randy-slow/reproduce/q... + ;;;; TODO: catch permission denied exceptions and emit appropriate warnings. Might be able to get rid of with chicken 5? (common:false-on-exception (lambda () (file-exists? path-string)) message: (if (not silent) (conc "Unable to access path: " path-string) #f) )) (define (common:directory-exists? path-string) - ;;;; TODO: catch permission denied exceptions and emit appropriate warnings, eg: system error while trying to access file: "/nfs/pdx/disks/icf_env_disk001/bjbarcla/gwa/issues/mtdev/randy-slow/reproduce/q... + ;;;; TODO: catch permission denied exceptions and emit appropriate warnings (common:false-on-exception (lambda () (directory-exists? path-string)) message: (conc "Unable to access path: " path-string) )) +;;====================================================================== ;; does the directory exist and do we have write access? ;; ;; returns the directory or #f ;; (define (common:directory-writable? path-string) @@ -1403,10 +1409,11 @@ (debug:print-error 0 *default-log-port* "Invalid target, spaces or blanks not allowed \"" target "\", target should be: " (string-intersperse keys "/") ", have " tlist " for elements") (if exit-if-bad (exit 1)) #f) #f)))) +;;====================================================================== ;; looking only (at least for now) at the MT_ variables craft the full testname ;; (define (common:get-full-test-name) (if (getenv "MT_TEST_NAME") (if (and (getenv "MT_ITEMPATH") @@ -1413,10 +1420,11 @@ (not (equal? (getenv "MT_ITEMPATH") ""))) (getenv "MT_TEST_NAME") (conc (getenv "MT_TEST_NAME") "/" (getenv "MT_ITEMPATH"))) #f)) +;;====================================================================== ;; logic for getting homehost. Returns (host . at-home) ;; IF *toppath* is not set, wait up to five seconds trying every two seconds ;; (this is to accomodate the watchdog) ;; (define (common:get-homehost #!key (trynum 5)) @@ -1471,18 +1479,20 @@ (equal? homehost bestadrs)))) (set! *home-host* (cons homehost at-home)) (mutex-unlock! *homehost-mutex*) *home-host*)))) +;;====================================================================== ;; am I on the homehost? ;; (define (common:on-homehost?) (let ((hh (common:get-homehost))) (if hh (cdr hh) #f))) +;;====================================================================== ;; do we honor the caches of the config files? ;; (define (common:use-cache?) (let ((res #t)) ;; priority by order of evaluation (if *configdat* ;; sillyness here. can't use setup/use-cache to know if we can use the cached files! @@ -1496,10 +1506,11 @@ (set! res #t) (if (equal? (getenv "MT_USE_CACHE") "no") (set! res #f)))) ;; overrides -no-cache switch res)) +;;====================================================================== ;; force use of server? ;; (define (common:force-server?) (let* ((force-setting (configf:lookup *configdat* "server" "force")) (force-type (if force-setting (string->symbol force-setting) #f)) @@ -1520,10 +1531,11 @@ ;;====================================================================== ;; M I S C L I S T S ;;====================================================================== +;;====================================================================== ;; items in lista are matched value and position in listb ;; return the remaining items in listb or #f ;; (define (common:list-is-sublist lista listb) (if (null? lista) @@ -1542,10 +1554,11 @@ (car talb) (cdr talb))) #f))))) +;;====================================================================== ;; Needed for long lists to be sorted where (apply max ... ) dies ;; (define (common:max inlst) (let loop ((max-val (car inlst)) (hed (car inlst)) @@ -1554,10 +1567,11 @@ (loop (max hed max-val) (car tal) (cdr tal)) (max hed max-val)))) +;;====================================================================== ;; get min or max, use > for max and < for min, this works around the limits on apply ;; (define (common:min-max comp lst) (if (null? lst) #f ;; better than an exception for my needs @@ -1564,10 +1578,11 @@ (fold (lambda (a b) (if (comp a b) a b)) (car lst) lst))) +;;====================================================================== ;; get min or max, use > for max and < for min, this works around the limits on apply ;; (define (common:sum lst) (if (null? lst) 0 @@ -1574,10 +1589,11 @@ (fold (lambda (a b) (+ a b)) (car lst) lst))) +;;====================================================================== ;; path list to hash-table tree ;; ((a b c)(a b d)(e b c)) => ((a (b (d) (c))) (e (b (c)))) ;; (define (common:list->htree lst) (let ((resh (make-hash-table))) @@ -1595,10 +1611,11 @@ (hash-table-set! ht hed (make-hash-table)) (loop ht hed tal))))) lst) resh)) +;;====================================================================== ;; hash-table tree to html list tree ;; ;; tipfunc takes two parameters: y the tip value and path the path to that point ;; (define (common:htree->html ht path tipfunc) @@ -1620,10 +1637,11 @@ (list levelname (common:htree->html y newpath tipfunc)))))) datlist))))) +;;====================================================================== ;; hash-table tree to alist tree ;; (define (common:htree->atree ht) (map (lambda (x) (cons (car x) @@ -1635,10 +1653,11 @@ ;;====================================================================== ;; M U N G E D A T A I N T O N I C E F O R M S ;;====================================================================== +;;====================================================================== ;; Generate an index for a sparse list of key values ;; ( (rowname1 colname1 val1)(rowname2 colname2 val2) ) ;; ;; => ;; @@ -1674,16 +1693,18 @@ new-colnames (if (> curr-rownum rownum) curr-rownum rownum) (if (> curr-colnum colnum) curr-colnum colnum) )))))) +;;====================================================================== ;; if it looks like a number -> convert it to a number, else return it ;; (define (common:lazy-convert inval) (let* ((as-num (if (string? inval)(string->number inval) #f))) (or as-num inval))) +;;====================================================================== ;; convert string a=1; b=2; c=a silly thing; d= ;; to '((a . 1)(b . 2)(c . "a silly thing")(d . "")) ;; (define (common:val->alist val #!key (convert #f)) (let ((val-list (string-split-fields ";\\s*" val #:infix))) @@ -1701,10 +1722,11 @@ ;;====================================================================== ;; S Y S T E M S T U F F ;;====================================================================== +;;====================================================================== ;; lazy-safe get file mod time. on any error (file not existing etc.) return 0 ;; (define (common:lazy-modification-time fpath) (handle-exceptions exn @@ -1713,10 +1735,11 @@ 0) (if (file-exists? fpath) (file-modification-time fpath) 0))) +;;====================================================================== ;; find timestamp of newest file associated with a sqlite db file (define (common:lazy-sqlite-db-modification-time fpath) (let* ((glob-list (handle-exceptions exn (begin @@ -1729,19 +1752,21 @@ (apply max (map common:lazy-modification-time file-list)))) +;;====================================================================== ;; return a nice clean pathname made absolute (define (common:nice-path dir) (let ((match (string-match "^(~[^\\/]*)(\\/.*|)$" dir))) (if match ;; using ~ for home? (common:nice-path (conc (common:read-link-f (cadr match)) "/" (caddr match))) (normalize-pathname (if (absolute-pathname? dir) dir (conc (current-directory) "/" dir)))))) +;;====================================================================== ;; make "nice-path" available in config files and the repl (define nice-path common:nice-path) (define (common:read-link-f path) (handle-exceptions @@ -1752,19 +1777,21 @@ (with-input-from-pipe (conc "/bin/readlink -f " path) (lambda () (read-line))))) +;;====================================================================== ;; returns *effective load* (not normalized) ;; (define (common:get-intercept onemin fivemin) (if (< onemin fivemin) ;; load is decreasing, just use the onemin load onemin (let* ((load-change (- onemin fivemin)) (tchange (- 300 60))) (max (+ onemin (* 60 (/ load-change tchange))) 0)))) +;;====================================================================== ;; calculate a delay number based on a droop curve ;; inputs are: ;; - load-in, load as from uptime, NOT normalized ;; - numcpus, number of cpus, ideally use the real cpus, not threads ;; @@ -1798,10 +1825,12 @@ (if (< x 2) (loop (+ x 0.1))))) (define (get-cpu-load #!key (remote-host #f)) (car (common:get-cpu-load remote-host))) + +;;====================================================================== ;; (let* ((load-res (process:cmd-run->list "uptime")) ;; (load-rx (regexp "load average:\\s+(\\d+)")) ;; (cpu-load #f)) ;; (for-each (lambda (l) ;; (let ((match (string-search load-rx l))) @@ -1810,17 +1839,18 @@ ;; (if (number? newval) ;; (set! cpu-load newval)))))) ;; (car load-res)) ;; cpu-load)) +;;====================================================================== ;; get values from cached info from dropping file in logs dir ;; e.g. key is host and dtype is normalized-load ;; (define (common:get-cached-info key dtype #!key (age 10)) (if *toppath* (let* ((fullpath (conc *toppath* "/.sysdata/" key "-" dtype ".log")) - (delfile (lambda () + (delfile (lambda (exn) (debug:print-info 1 *default-log-port* " removing bad file " fullpath ", exn=" exn) (delete-file* fullpath) #f))) (if (and (file-exists? fullpath) (file-read-access? fullpath)) @@ -1839,15 +1869,15 @@ 0) (file-change-time fullpath))))) (if (< real-age age) (handle-exceptions exn - (delfile) + (delfile exn) (let* ((res (with-input-from-file fullpath read))) (if (eof-object? res) (begin - (delfile) + (delfile "n/a") #f) res))) (begin (debug:print-info 2 *default-log-port* "file " fullpath " is too old (" real-age" seconds) to trust, skipping reading it") @@ -1863,11 +1893,11 @@ (fullpath (conc fulldir "/" key "-" dtype ".log"))) (if (not (file-exists? fulldir))(create-directory fulldir #t)) (handle-exceptions exn (begin - (debug:print 0 *default-log-path* "failed to write file " fullpath ", exn=" exn) + (debug:print 0 *default-log-port* "failed to write file " fullpath ", exn=" exn) #f) (with-output-to-file fullpath (lambda ()(pp dat))))) #f)) (define (common:raw-get-remote-host-load remote-host) @@ -1878,10 +1908,11 @@ #f) ;; more specific handling of errors needed (with-input-from-pipe (conc "ssh " remote-host " cat /proc/loadavg") (lambda ()(list (read)(read)(read)))))) +;;====================================================================== ;; get cpu load by reading from /proc/loadavg, return all three values ;; (define (common:get-cpu-load remote-host) (handle-exceptions exn @@ -1908,10 +1939,11 @@ (common:write-cached-info actual-hostname "cpu-load" result) result) '(-1 -1 -1))) ;; -1 is bad result (else '(-2 -2 -2)))))))) +;;====================================================================== ;; get normalized cpu load by reading from /proc/loadavg and /proc/cpuinfo return all three values and the number of real cpus and the number of threads ;; returns alist '((adj-cpu-load . normalized-proc-load) ... etc. ;; keys: adj-proc-load, adj-core-load, 1m-load, 5m-load, 15m-load ;; (define (common:get-normalized-cpu-load remote-host) @@ -1991,10 +2023,11 @@ (define (common:unix-ping hostname) (let ((res (system (conc "ping -c 1 " hostname " > /dev/null")))) (eq? res 0))) +;;====================================================================== ;; ideally put all this info into the db, no need to preserve it across moving homehost ;; ;; return list of ;; ( reachable? cpuload update-time ) (define (common:get-host-info hostname) @@ -2022,10 +2055,11 @@ (alist-ref 'adj-core-load (common:get-normalized-cpu-load hostname)))) ;; this is cheaper than you might think. get-normalized-cpu-load is cached for up to 5 seconds (else (list #f 0 -1) ;; bad host, don't use! )))) +;;====================================================================== ;; see defstruct host at top of file. ;; host: reachable last-update last-used last-cpuload ;; (define (common:update-host-loads-table hosts-raw) (let* ((hosts (filter (lambda (x) @@ -2046,10 +2080,11 @@ (host-reachable-set! rec is-reachable) (host-last-update-set! rec last-reached-time) (host-last-cpuload-set! rec load))) hosts))) +;;====================================================================== ;; go through the hosts from least recently used to most recently used, pick the first that meets the load criteral from the ;; [host-rules] section. ;; (define (common:get-least-loaded-host hosts-raw host-type configdat) (let* ((rdat (configf:lookup configdat "host-rules" host-type)) @@ -2118,11 +2153,10 @@ #f (common:get-homehost))) (hh (if hh-dat (car hh-dat) #f))) (common:wait-for-normalized-load maxnormload msg hh))) -(define *numcpus-cache* (make-hash-table)) (define (common:get-num-cpus remote-host) (let* ((actual-host (or remote-host (get-host-name)))) ;; hosts had better not be changing the number of cpus too often! (or (hash-table-ref/default *numcpus-cache* actual-host #f) (let* ((numcpus (or (common:get-cached-info actual-host "num-cpus" age: (+ 2592000 (random 3600))) @@ -2147,10 +2181,11 @@ (common:write-cached-info actual-host "num-cpus" result)) result)))) (hash-table-set! *numcpus-cache* actual-host numcpus) numcpus)))) +;;====================================================================== ;; wait for normalized cpu load to drop below maxload ;; (define (common:wait-for-normalized-load maxnormload msg remote-host #!optional (rem-tries 5)) (let ((num-cpus (common:get-num-cpus remote-host))) (if num-cpus @@ -2159,10 +2194,11 @@ (thread-sleep! (random 60)) ;; we failed to get num cpus. wait a bit and try again (if (> rem-tries 0) (common:wait-for-normalized-load maxnormload msg remote-host (- rem-tries 1)) #f))))) +;;====================================================================== ;; DO NOT CALL THIS DIRECTLY. It is called from common:wait-for-normalized-load ;; count - count down to zero, at some point we'd give up if the load never drops ;; num-tries - count down to zero number tries to get numcpus ;; (define (common:wait-for-cpuload maxnormload numcpus-in @@ -2187,30 +2223,32 @@ (effective-load (common:get-intercept first next)) (recommended-delay (common:get-delay effective-load numcpus)) (effective-host (or remote-host "localhost")) (normalized-effective-load (/ effective-load numcpus)) (will-wait (> normalized-effective-load maxnormload))) - (if (> recommended-delay 0) + (if (> recommended-delay 1) (let* ((actual-delay (min recommended-delay 30))) (if (common:low-noise-print 30 (conc (round actual-delay) "-safe-load")) (debug:print-info 0 *default-log-port* "Load control, delaying " actual-delay " seconds to maintain safe load. current normalized effective load is " - normalized-effective-load".")) + normalized-effective-load". maxnormload = " maxnormload " numcpus = " numcpus " loadavg = " loadavg " effective-load = " effective-load)) (thread-sleep! actual-delay))) (cond ;; bad data, try again to get the data ((not will-wait) - (if (common:low-noise-print 30 (conc (round normalized-effective-load) "-load-acceptable-" effective-host)) - (debug:print 0 *default-log-port* "Effective load on " effective-host " is acceptable at " effective-load " continuing."))) + (if (common:low-noise-print 3600 (conc (round normalized-effective-load) "-load-acceptable-" effective-host)) + (debug:print 0 *default-log-port* "Effective load on " effective-host " is acceptable at " effective-load " continuing."))) + ((and (< first 0) ;; this indicates the loadavg data is bad - machine may not be reachable (> num-tries 0)) (debug:print 0 *default-log-port* "WARNING: received bad data from get-cpu-load " first ", we'll sleep 10s and try " num-tries " more times.") (thread-sleep! 10) (common:wait-for-cpuload maxnormload numcpus-in count: count remote-host: remote-host num-tries: (- num-tries 1))) + ;; need to wait for load to drop ((and will-wait ;; (> first adjmaxload) (> count 0)) (debug:print-info 0 *default-log-port* "Delaying 15" ;; adjwait @@ -2234,10 +2272,11 @@ (debug:print 0 *default-log-port* "Load on " effective-host " is acceptable at effective normalized load of " effective-normalized-load " continuing.")) (debug:print 0 *default-log-port* "Load on " effective-host ", " first" could not be retrieved. Giving up and continuing.")))))) +;;====================================================================== ;; DO NOT CALL THIS DIRECTLY. It is called from common:wait-for-normalized-load ;; ;; (define (common:wait-for-cpuload maxload-in numcpus-in waitdelay #!key (count 1000) (msg #f)(remote-host #f)(force-maxload #f)(num-tries 5)) ;; (let* ((loadavg (common:get-cpu-load remote-host)) ;; (numcpus (if (<= 1 numcpus-in) ;; not possible to have zero. If we get 1, it's possible that we got the previous default, and we should check again @@ -2353,10 +2392,11 @@ ;;====================================================================== (define (common:get-disk-space-used fpath) (with-input-from-pipe (conc "/usr/bin/du -s " fpath) read)) +;;====================================================================== ;; given path get free space, allows override in [setup] ;; with free-space-script /path/to/some/script.sh ;; (define (get-df path) (if (configf:lookup *configdat* "setup" "free-space-script") @@ -2413,10 +2453,11 @@ (list (> dbspace required) dbspace required dirpath))) +;;====================================================================== ;; check space in dbdir and in megatest dir ;; returns: ok/not dbspace required-space ;; (define (common:check-db-dir-space) (let* ((required (string->number @@ -2426,11 +2467,12 @@ (dbdir (common:get-db-tmp-area)) ;; (db:get-dbdir)) (tdbspace (common:check-space-in-dir dbdir required)) (mdbspace (common:check-space-in-dir *toppath* required))) (sort (list tdbspace mdbspace) (lambda (a b) (< (cadr a)(cadr b)))))) - + +;;====================================================================== ;; check available space in dbdir, exit if insufficient ;; (define (common:check-db-dir-and-exit-if-insufficient) (let* ((spacedat (car (common:check-db-dir-space))) ;; look only at worst for now (is-ok (car spacedat)) @@ -2439,11 +2481,12 @@ (dbdir (cadddr spacedat))) (if (not is-ok) (begin (debug:print-error 0 *default-log-port* "Insufficient space in " dbdir ", require " required ", have " dbspace ", exiting now.") (exit 1))))) - + +;;====================================================================== ;; paths is list of lists ((name path) ... ) ;; (define (common:get-disk-with-most-free-space disks minsize) (let* ((best #f) (bestsize 0) @@ -2496,10 +2539,11 @@ (map car disks)) (if (and best (> bestsize minsize)) best #f))) ;; #f means no disk candidate found +;;====================================================================== ;; convert a spec string to a list of vectors #( rx action rx-string ) (define (common:spec-string->list-of-specs spec-string actions) (let ((spec-strings (string-split-fields "\\s*;\\s*" spec-string #:infix)) (actions-regex (regexp (conc "^(.*)\\s+(" (string-intersperse (map conc actions) "|") ")")))) (filter @@ -2511,10 +2555,11 @@ (begin (debug:print 0 *default-log-port* "WARNING: Unrecognised rule \"" s "\" in clean-up specification.") #f)))) spec-strings)))) +;;====================================================================== ;; given a list of specs rx . rule and a file return the first matching rule ;; (define (common:file-find-rule fname rules) ;; rule is vector #( rx action rx-string) (let loop ((rule (car rules)) (tail (cdr rules))) @@ -2524,10 +2569,11 @@ rule ;; return the whole rule so regex can be printed etc. (if (null? tail) #f (loop (car tail)(cdr tail))))))) +;;====================================================================== ;; given a spec apply some rules to a directory ;; ;; WARNING: This function will REMOVE files - be sure your spec and path is correct! ;; ;; spec format: @@ -2575,17 +2621,17 @@ )) ;;====================================================================== ;; E N V I R O N M E N T V A R S ;;====================================================================== + (define (bb-check-path #!key (msg "check-path: ")) (let ((path (or (get-environment-variable "PATH") "none"))) (debug:print-info 0 *default-log-port* (conc msg" : $PATH="path)) (if (string-match "^.*/isoenv-core/.*" path) (debug:print-error 0 *default-log-port* (conc msg" : !!ISOENV PRESENT!!")) ;; remove for prod (debug:print-info 1 *default-log-port* (conc msg" : **no isoenv present**"))))) - (define (save-environment-as-files fname #!key (ignorevars (list "USER" "HOME" "DISPLAY" "LS_COLORS" "XKEYSYMDB" "EDITOR" "MAKEFLAGS" "MAKEF" "MAKEOVERRIDES"))) ;;(bb-check-path msg: "save-environment-as-files entry") (let ((envvars (get-environment-variables)) (whitesp (regexp "[^a-zA-Z0-9_\\-:,\\.\\/%$]")) @@ -2622,11 +2668,10 @@ "# export " "export ") key "=" delim (mungeval val) delim))) envvars))))) - (define (common:get-param-mapping #!key (flavor #f)) "returns alist mapping string keys in testconfig/subrun to megatest command line switches; if flavor is switch-symbol, maps tcmt symbolic switches to megatest switches" (let ((default '(("tag-expr" . "-tagexpr") ("mode-patt" . "-modepatt") ("run-name" . "-runname") @@ -2641,10 +2686,11 @@ (map (lambda (x) (cons (string->symbol (conc "-" (car x))) (cdr x))) default) default))) +;;====================================================================== ;; set some env vars from an alist, return an alist with original values ;; (("VAR" "value") ...) ;; a value of #f means "unset this var" ;; (define (alist->env-vars lst) @@ -2660,11 +2706,11 @@ (unsetenv var)))) lst) res) '())) - +;;====================================================================== ;; clear vars matching pattern, run proc, set vars back ;; if proc is a string run that string as a command with ;; system. ;; (define *common:orig-env* @@ -2710,11 +2756,10 @@ vars (lambda (var val) (setenv var val))) vars)) - (define (common:run-a-command cmd #!key (with-vars #f) (with-orig-env #f)) (let* ((pre-cmd (dtests:get-pre-command)) (post-cmd (dtests:get-post-command)) (fullcmd (if (or pre-cmd post-cmd) (conc pre-cmd cmd post-cmd) @@ -2727,10 +2772,11 @@ ;;====================================================================== ;; T I M E A N D D A T E ;;====================================================================== +;;====================================================================== ;; Convert strings like "5s 2h 3m" => 60x60x2 + 3x60 + 5 (define (common:hms-string->seconds tstr) (let ((parts (string-split-fields "\\w+" tstr)) (time-secs 0) ;; s=seconds, m=minutes, h=hours, d=days, M=months, y=years, w=weeks @@ -2795,15 +2841,17 @@ ((4 5 6) 2) ((7 8 9) 3) ((10 11 12) 4) (else #f))) +;;====================================================================== ;; basic ISO8601 format (e.g. "2017-02-28 06:02:54") date time => Unix epoch ;; (define (common:date-time->seconds datetime) (local-time->seconds (string->time datetime "%Y-%m-%d %H:%M:%S"))) +;;====================================================================== ;; given span of seconds tstart to tend ;; find start time to mark and mark delta ;; (define (common:find-start-mark-and-mark-delta tstart tend) (let* ((deltat (- (max tend (+ tend 10)) tstart)) ;; can't handle runs of less than 4 seconds. Pad it to 10 seconds ... @@ -2834,19 +2882,21 @@ '(5 10 15 20 30 40 50 500)) (if values (apply values result) (values 0 day 1 0 'd)))) +;;====================================================================== ;; given x y lim return the cron expansion ;; (define (common:expand-cron-slash x y lim) (let loop ((curr x) (res `())) (if (< curr lim) (loop (+ curr y) (cons curr res)) (reverse res)))) +;;====================================================================== ;; expand a complex cron string to a list of cron strings ;; ;; x/y => x, x+y, x+2y, x+3y while x+Ny a, b ,c ;; @@ -2899,12 +2949,12 @@ (flatten (map common:cron-expand new-list-crons)))) ;; (map common:cron-expand (map common:cron-expand new-list-crons)))) (else (if (null? tal) cron-str (loop (car tal)(cdr tal)(car type-tal)(cdr type-tal)(append res (list hed))))))))))) - - + +;;====================================================================== ;; given a cron string and the last time event was processed return #t to run or #f to not run ;; ;; min hour dayofmonth month dayofweek ;; 0-59 0-23 1-31 1-12 0-6 ### NOTE: dayofweek does not include 7 ;; @@ -3010,10 +3060,11 @@ ((red) "223 33 49") ((grey) "192 192 192") ((orange) "255 172 13") ((purple) "This is unfinished ..."))) +;;====================================================================== ;; (define (common:get-color-for-state-status state status) ;; (case (string->symbol state) ;; ((COMPLETED) ;; (case (string->symbol status) ;; ((PASS) "70 249 73") @@ -3039,10 +3090,11 @@ ;;====================================================================== ;; L O C K I N G M E C H A N I S M S ;;====================================================================== +;;====================================================================== ;; faux-lock is deprecated. Please use simple-lock below ;; (define (common:faux-lock keyname #!key (wait-time 8)(allow-lock-steal #t)) (if (rmt:no-sync-get/default keyname #f) ;; do not be tempted to compare to pid. locking is a one-shot action, if already locked for this pid it doesn't actually count (if (> wait-time 0) @@ -3063,18 +3115,18 @@ (begin (if (rmt:no-sync-get/default keyname #f) (rmt:no-sync-del! keyname)) #t) #f)) +;;====================================================================== ;; simple lock. improve and converge on this one. ;; (define (common:simple-lock keyname) (rmt:no-sync-get-lock keyname)) (define (common:simple-unlock keyname #!key (force #f)) (rmt:no-sync-del! keyname)) - ;;====================================================================== ;; ;;====================================================================== @@ -3090,13 +3142,13 @@ ((equal? status "KILLREQ") "purple") ((equal? status "RUNNING") "blue") ((equal? status "ABORT") "brown") (else "black"))) -;; ;;====================================================================== -;; ;; N A N O M S G C L I E N T -;; ;;====================================================================== +;;====================================================================== +;; N A N O M S G C L I E N T +;;====================================================================== ;; ;; ;; ;; (define (common:send-dboard-main-changed) ;; (let* ((dashboard-ips (mddb:get-dashboards))) @@ -3239,49 +3291,51 @@ fallback-launcher))) ;;====================================================================== ;; NMSG AND NEW API ;;====================================================================== - -;; nm based server experiment, keep around for now. -;; -(define (nm:start-server dbconn #!key (given-host-name #f)) - (let* ((srvdat (start-raw-server given-host-name: given-host-name)) - (host-name (srvdat-host srvdat)) - (soc (srvdat-soc srvdat))) - - ;; start the queue processor (save for second round of development) - ;; - (thread-start! (make-thread! (lambda ()(queue-processor dbconn) "Queue processor"))) - ;; msg is an alist - ;; 'r host:port <== where to return the data - ;; 'p params <== data to apply the command to - ;; 'e j|s|l <== encoding of the params. default is s (sexp), if not specified is assumed to be default - ;; 'c command <== look up the function to call using this key - ;; - (let loop ((msg-in (nn-recv soc))) - (if (not (equal? msg-in "quit")) - (let* ((dat (decode msg-in)) - (host-port (alist-ref 'r dat)) ;; this is for the reverse req rep where the server is a client of the original client - (params (alist-ref 'p dat)) - (command (let ((c (alist-ref 'c dat)))(if c (string->symbol c) #f))) - (all-good (and host-port params command (hash-table-exists? *commands* command)))) - (if all-good - (let ((cmddat (make-qitem - command: command - host-port: host-port - params: params))) - (queue-push cmddat) ;; put request into the queue - (nn-send soc "queued")) ;; reply with "queued" - (print "ERROR: ["(common:human-time)"] BAD request " dat)) - (loop (nn-recv soc))))) - (nn-close soc))) +;; +;; ;;====================================================================== +;; ;; nm based server experiment, keep around for now. +;; ;; +;; (define (nm:start-server dbconn #!key (given-host-name #f)) +;; (let* ((srvdat (start-raw-server given-host-name: given-host-name)) +;; (host-name (srvdat-host srvdat)) +;; (soc (srvdat-soc srvdat))) +;; +;; ;; start the queue processor (save for second round of development) +;; ;; +;; (thread-start! (make-thread! (lambda ()(queue-processor dbconn) "Queue processor"))) +;; ;; msg is an alist +;; ;; 'r host:port <== where to return the data +;; ;; 'p params <== data to apply the command to +;; ;; 'e j|s|l <== encoding of the params. default is s (sexp), if not specified is assumed to be default +;; ;; 'c command <== look up the function to call using this key +;; ;; +;; (let loop ((msg-in (nn-recv soc))) +;; (if (not (equal? msg-in "quit")) +;; (let* ((dat (decode msg-in)) +;; (host-port (alist-ref 'r dat)) ;; this is for the reverse req rep where the server is a client of the original client +;; (params (alist-ref 'p dat)) +;; (command (let ((c (alist-ref 'c dat)))(if c (string->symbol c) #f))) +;; (all-good (and host-port params command (hash-table-exists? *commands* command)))) +;; (if all-good +;; (let ((cmddat (make-qitem +;; command: command +;; host-port: host-port +;; params: params))) +;; (queue-push cmddat) ;; put request into the queue +;; (nn-send soc "queued")) ;; reply with "queued" +;; (print "ERROR: ["(common:human-time)"] BAD request " dat)) +;; (loop (nn-recv soc))))) +;; (nn-close soc))) ;;====================================================================== ;; D A S H B O A R D U S E R V I E W S ;;====================================================================== +;;====================================================================== ;; first read ~/views.config if it exists, then read $MTRAH/views.config if it exists ;; (define (common:load-views-config) (let* ((view-cfgdat (make-hash-table)) (home-cfgfile (conc (get-environment-variable "HOME") "/.mtviews.config")) @@ -3294,23 +3348,25 @@ view-cfgdat)) ;;====================================================================== ;; H I E R A R C H I C A L H A S H T A B L E S ;;====================================================================== - +;; ;; Every element including top element is a vector: ;; (define (hh:make-hh #!key (ht #f)(value #f)) (vector (or ht (make-hash-table)) value)) +;;====================================================================== ;; used internally (define-inline (hh:set-ht! hh ht) (vector-set! hh 0 ht)) (define-inline (hh:get-ht hh) (vector-ref hh 0)) (define-inline (hh:set-value! hh value) (vector-set! hh 1 value)) (define-inline (hh:get-value hh value) (vector-ref hh 1)) +;;====================================================================== ;; given a hierarchial hash and some keys look up the value ... ;; (define (hh:get hh . keys) (if (null? keys) (vector-ref hh 1) ;; we have reached the end of the line, return the value sought @@ -3320,10 +3376,11 @@ (if sub-hh (apply hh:get sub-hh (cdr keys)) #f)) #f)))) +;;====================================================================== ;; given a hierarchial hash, a value and some keys, add needed hierarcy and insert the value ;; (define (hh:set! hh value . keys) (if (null? keys) (hh:set-value! hh value) ;; we have reached the end of the line, store the value @@ -3336,11 +3393,12 @@ (apply hh:set! new-sub-hh value (cdr keys))) (apply hh:set! sub-hh value (cdr keys)))) ;; call the sub-hierhash with remaining keys (begin (hh:set-ht! hh (make-hash-table)) (apply hh:set! hh value keys)))))) - + +;;====================================================================== ;; Manage pkts, used in servers, tests and likely other contexts so put ;; in common ;;====================================================================== (define common:pkts-spec @@ -3374,10 +3432,11 @@ (pktsdirs (if pktsdirs-str (string-split pktsdirs-str " ") #f))) pktsdirs)) +;;====================================================================== ;; use-lt is use linktree "lt" link to find pkts dir (define (common:save-pkt pktalist-in mtconf use-lt #!key (add-only #f)) ;; add-only saves the pkt only if there is a parent already (if (or add-only (hash-table-exists? *pkts-info* 'last-parent)) (let* ((parent (hash-table-ref/default *pkts-info* 'last-parent #f)) @@ -3460,10 +3519,11 @@ (define (common:get-pkt-alists pkts) (map (lambda (x) (alist-ref 'apkt x)) ;; 'pkta pulls out the alist from the read pkt pkts)) +;;====================================================================== ;; given list of pkts (alist mode) return list of D cards as Unix epoch, sorted descending ;; also delete duplicates by target i.e. (car pkt) ;; (define (common:get-pkt-times pkts) (delete-duplicates @@ -3472,12 +3532,11 @@ `(,(alist-ref 't x) . ,(string->number (alist-ref 'D x)))) pkts) (lambda (a b)(> (cdr a)(cdr b)))) ;; sort descending (lambda (a b)(equal? (car a)(car b))))) ;; remove duplicates by target - - +;;====================================================================== ;; accept an alist or hash table containing envvar/env value pairs (value of #f causes unset) ;; execute thunk in context of environment modified as per this list ;; restore env to prior state then return value of eval'd thunk. ;; ** this is not thread safe ** (define (common:with-env-vars delta-env-alist-or-hash-table thunk) @@ -3517,11 +3576,10 @@ rv))) (define *common:thread-punchlist* (make-hash-table)) (define (common:send-thunk-to-background-thread thunk #!key (name #f)) ;;(BB> "launched thread " name) - ;; we need a unique name for the thread. (let* ((realname (if name (if (not (hash-table-ref/default *common:thread-punchlist* name #f)) name (conc name"-" (symbol->string (gensym)))) @@ -3548,68 +3606,69 @@ #t) ;; just ignore it, it might have died in the meantime so joining it will throw an exception (thread-join! thread)) ))) (hash-table-keys *common:thread-punchlist*))) -(define *common:telemetry-log-state* 'startup) -(define *common:telemetry-log-socket* #f) - -(define (common:telemetry-log-open) - (if (eq? *common:telemetry-log-state* 'startup) - (let* ((serverhost (configf:lookup *configdat* "telemetry" "host")) - (serverport (configf:lookup-number *configdat* "telemetry" "port")) - (user (or (get-environment-variable "USER") "unknown")) - (host (or (get-environment-variable "HOST") "unknown"))) - (set! *common:telemetry-log-state* - (handle-exceptions - exn - (begin - (debug:print-info 0 *default-log-port* "common-telemetry-log open udp port failure") - 'broken) - (if (and serverhost serverport user host) - (let* ((s (udp-open-socket))) - ;;(udp-bind! s #f 0) - (udp-connect! s serverhost serverport) - (set! *common:telemetry-log-socket* s) - 'open) - 'not-needed)))))) - -(define (common:telemetry-log event #!key (payload '())) - (if (eq? *common:telemetry-log-state* 'startup) - (common:telemetry-log-open)) - - (if (eq? 'open *common:telemetry-log-state*) - (handle-exceptions - exn - (begin - (debug:print-info 0 *default-log-port* "common-telemetry-log comms failure ; disabled (no server?)") - ;;(define *common:telemetry-log-state* 'broken-or-no-server-preclose) - ;;(common:telemetry-log-close) - (define *common:telemetry-log-state* 'broken-or-no-server) - (set! *common:telemetry-log-socket* #f) - ) - (if (and *common:telemetry-log-socket* event) ;; TODO - filter on event against telemetry.want-events - (let* ((user (or (get-environment-variable "USER") "unknown")) - (host (or (get-environment-variable "HOST") "unknown")) - (start (conc "[megatest "event"]")) - (toppath (or *toppath* "/dev/null")) - (payload-serialized - (base64:base64-encode - (z3:encode-buffer - (with-output-to-string (lambda () (pp payload)))))) - (msg (conc user":"host":"start":"(current-process-id)":"(car (argv))":" - toppath":"payload-serialized))) - (udp-send *common:telemetry-log-socket* msg)))))) - -(define (common:telemetry-log-close) - (when (or (member *common:telemetry-log-state* '(broken-or-no-server-preclose open)) *common:telemetry-log-socket*) - (handle-exceptions - exn - (begin - (define *common:telemetry-log-state* 'closed-fail) - (debug:print-info 0 *default-log-port* "common-telemetry-log closure failure") - ) - (begin - (define *common:telemetry-log-state* 'closed) - (udp-close-socket *common:telemetry-log-socket*) - (set! *common:telemetry-log-socket* #f))))) +;;====================================================================== +;; (define *common:telemetry-log-state* 'startup) +;; (define *common:telemetry-log-socket* #f) +;; +;; (define (common:telemetry-log-open) +;; (if (eq? *common:telemetry-log-state* 'startup) +;; (let* ((serverhost (configf:lookup *configdat* "telemetry" "host")) +;; (serverport (configf:lookup-number *configdat* "telemetry" "port")) +;; (user (or (get-environment-variable "USER") "unknown")) +;; (host (or (get-environment-variable "HOST") "unknown"))) +;; (set! *common:telemetry-log-state* +;; (handle-exceptions +;; exn +;; (begin +;; (debug:print-info 0 *default-log-port* "common-telemetry-log open udp port failure") +;; 'broken) +;; (if (and serverhost serverport user host) +;; (let* ((s (udp-open-socket))) +;; ;;(udp-bind! s #f 0) +;; (udp-connect! s serverhost serverport) +;; (set! *common:telemetry-log-socket* s) +;; 'open) +;; 'not-needed)))))) +;; +;; (define (common:telemetry-log event #!key (payload '())) +;; (if (eq? *common:telemetry-log-state* 'startup) +;; (common:telemetry-log-open)) +;; +;; (if (eq? 'open *common:telemetry-log-state*) +;; (handle-exceptions +;; exn +;; (begin +;; (debug:print-info 0 *default-log-port* "common-telemetry-log comms failure ; disabled (no server?)") +;; ;;(define *common:telemetry-log-state* 'broken-or-no-server-preclose) +;; ;;(common:telemetry-log-close) +;; (define *common:telemetry-log-state* 'broken-or-no-server) +;; (set! *common:telemetry-log-socket* #f) +;; ) +;; (if (and *common:telemetry-log-socket* event) ;; TODO - filter on event against telemetry.want-events +;; (let* ((user (or (get-environment-variable "USER") "unknown")) +;; (host (or (get-environment-variable "HOST") "unknown")) +;; (start (conc "[megatest "event"]")) +;; (toppath (or *toppath* "/dev/null")) +;; (payload-serialized +;; (base64:base64-encode +;; (z3:encode-buffer +;; (with-output-to-string (lambda () (pp payload)))))) +;; (msg (conc user":"host":"start":"(current-process-id)":"(car (argv))":" +;; toppath":"payload-serialized))) +;; (udp-send *common:telemetry-log-socket* msg)))))) +;; +;; (define (common:telemetry-log-close) +;; (when (or (member *common:telemetry-log-state* '(broken-or-no-server-preclose open)) *common:telemetry-log-socket*) +;; (handle-exceptions +;; exn +;; (begin +;; (define *common:telemetry-log-state* 'closed-fail) +;; (debug:print-info 0 *default-log-port* "common-telemetry-log closure failure") +;; ) +;; (begin +;; (define *common:telemetry-log-state* 'closed) +;; (udp-close-socket *common:telemetry-log-socket*) +;; (set! *common:telemetry-log-socket* #f))))) Index: configf.scm ================================================================== --- configf.scm +++ configf.scm @@ -46,23 +46,23 @@ (let ((remcwd (take dir (- (length dir) 1)))) (if (null? remcwd) (list #f #f #f) ;; #f #f) (loop remcwd))))))))) -(define (config:assoc-safe-add alist key val #!key (metadata #f)) +(define (configf:assoc-safe-add alist key val #!key (metadata #f)) (let ((newalist (filter (lambda (x)(not (equal? key (car x)))) alist))) (append newalist (list (if metadata (list key val metadata) (list key val)))))) (define (configf:section-var-set! cfgdat section-name var value #!key (metadata #f)) (hash-table-set! cfgdat section-name - (config:assoc-safe-add + (configf:assoc-safe-add (hash-table-ref/default cfgdat section-name '()) var value metadata: metadata))) -(define (config:eval-string-in-environment str) +(define (configf:eval-string-in-environment str) ;; (if (or (string-null? str) ;; (equal? "!" (substring str 0 1))) ;; null string or starts with ! are preserved but NOT set in the environment str (handle-exceptions exn @@ -241,11 +241,11 @@ (lambda (bundle) ;; (print "bundle: " bundle) (let ((key (car bundle)) (val (cadr bundle)) (meta (if (> (length bundle) 2)(caddr bundle) #f))) - (hash-table-set! ht section (config:assoc-safe-add (hash-table-ref ht section) key val metadata: meta)))) + (hash-table-set! ht section (configf:assoc-safe-add (hash-table-ref ht section) key val metadata: meta)))) vars))))) (hash-table-keys ht)))) ht) ;; read a config file, returns hash table of alists @@ -419,11 +419,11 @@ (debug:print-info 9 *default-log-port* "for line \"" inl "\"\n command: " cmd " took " delta " seconds to run with output:\n " res)) (if (null? res) "" (string-intersperse res " ")))))) (hash-table-set! res curr-section-name - (config:assoc-safe-add alist + (configf:assoc-safe-add alist key (case (calc-allow-system allow-system curr-section-name sections) ((return-procs) val-proc) ((return-string) cmd) (else (val-proc))) @@ -438,11 +438,11 @@ (let* ((alist (hash-table-ref/default res curr-section-name '())) (fval (or (if (string? val) val #f) ""))) ;; fval should be either "" or " " (one or more spaces) (debug:print 10 *default-log-port* " setting: [" curr-section-name "] " key " = #t") (safe-setenv key fval) (hash-table-set! res curr-section-name - (config:assoc-safe-add alist key fval metadata: metapath)) + (configf:assoc-safe-add alist key fval metadata: metapath)) (loop (configf:read-line inp res (calc-allow-system allow-system curr-section-name sections) settings) curr-section-name key #f))) @@ -453,17 +453,17 @@ (and (not (string-null? key)) (not (equal? "!" (substring key 0 1)))) ;; ! as leading character is a signature to NOT export to the environment ;; (string-match "^.*:.*:.*$" key) ;; ;; something:something:something reserved for triggers in runconfigs )) (realval (if envar - (config:eval-string-in-environment val) + (configf:eval-string-in-environment val) val))) (debug:print-info 6 *default-log-port* "read-config env setting, envar: " envar " realval: " realval " val: " val " key: " key " curr-section-name: " curr-section-name) (if envar (safe-setenv key realval)) (debug:print 10 *default-log-port* " setting: [" curr-section-name "] " key " = " val) (hash-table-set! res curr-section-name - (config:assoc-safe-add alist key realval metadata: metapath)) + (configf:assoc-safe-add alist key realval metadata: metapath)) (loop (configf:read-line inp res (calc-allow-system allow-system curr-section-name sections) settings) curr-section-name key #f))) ;; if a continued line (configf:cont-ln-rx ( x whsp val ) @@ -476,11 +476,11 @@ (string-substitute (regexp lead) "" whsp) "") val))) ;; (print "val: " val "\nnewval: \"" newval "\"\nvarflag: " var-flag) (hash-table-set! res curr-section-name - (config:assoc-safe-add alist var-flag newval metadata: metapath)) + (configf:assoc-safe-add alist var-flag newval metadata: metapath)) (loop (configf:read-line inp res (calc-allow-system allow-system curr-section-name sections) settings) curr-section-name var-flag (if lead lead whsp))) (loop (configf:read-line inp res (calc-allow-system allow-system curr-section-name sections) settings) curr-section-name #f #f)))) (else (debug:print-error 0 *default-log-port* "problem parsing " path ",\n \"" inl "\"") (set! var-flag #f) (loop (configf:read-line inp res (calc-allow-system allow-system curr-section-name sections) settings) curr-section-name #f #f)))) @@ -524,10 +524,11 @@ ;; (configf:var-is? cfgdat "foo" "var" "yes") => #t ;; (define (configf:var-is? cfgdat section var expected-val) (equal? (configf:lookup cfgdat section var) expected-val)) +;; redefines (define config-lookup configf:lookup) (define configf:read-file read-config) ;; safely look up a value that is expected to be a number, return ;; a default (#f unless provided) @@ -552,14 +553,15 @@ (hash-table-ref/default cfgdat section '())) (define (configf:set-section-var cfgdat section var val) (let ((sectdat (configf:get-section cfgdat section))) (hash-table-set! cfgdat section - (config:assoc-safe-add sectdat var val)))) + (configf:assoc-safe-add sectdat var val)))) - ;;(append (filter (lambda (x)(not (assoc var sectdat))) sectdat) - ;; (list var val)))) +;;====================================================================== +;;(append (filter (lambda (x)(not (assoc var sectdat))) sectdat) +;; (list var val)))) (define (setup) (let* ((configf (find-config "megatest.config")) (config (if configf (read-config configf #f #t) #f))) (if config Index: dashboard.scm ================================================================== --- dashboard.scm +++ dashboard.scm @@ -577,10 +577,12 @@ (cadr t-sort) 3))) (define (get-curr-sort) (vector-ref *tests-sort-options* *tests-sort-reverse*)) + +;;====================================================================== (debug:setup) ;; (define uidat #f) Index: db.scm ================================================================== --- db.scm +++ db.scm @@ -109,11 +109,11 @@ ;;====================================================================== (define (db:general-sqlite-error-dump exn stmt . params) (let ((err-status ((condition-property-accessor 'sqlite3 'status #f) exn))) ;; RADT ... how does this work? ;; check for (exn sqlite3) ((condition-property-accessor 'exn 'message) exn) - (print "err-status: " err-status) + ;; (print "err-status: " err-status) (debug:print-error 0 *default-log-port* " query " stmt " failed, params: " params ", error: " ((condition-property-accessor 'exn 'message) exn)) (print-call-chain (current-error-port)))) ;; convert to -inline ;; @@ -457,11 +457,11 @@ exn (begin (print "Attempt to safely close sqlite3 db failed. Trying again. exn=" exn) (thread-sleep! 3) (sqlite3:interrupt! db) - (db:safely-close-sqlite3-db db stmtcache try-num: (- try-num 1))) + (db:safely-close-sqlite3-db db stmt-cache try-num: (- try-num 1))) (if (sqlite3:database? db) (let* ((stmts (and stmt-cache (hash-table-ref/default stmt-cache db #f)))) (if stmts (map sqlite3:finalize! (hash-table-values stmts))) (sqlite3:finalize! db) #t) @@ -1081,13 +1081,18 @@ (case option ;; kill servers ((killservers) (for-each (lambda (server) - (match-let (((mod-time host port start-time pid) server)) + (handle-exceptions + exn + (begin + (debug:print-info 0 *default-log-port* "Unable to get host and/or port from " server ", exn=" exn) + #f) + (match-let (((mod-time host port start-time server-id pid) server)) (if (and host pid) - (tasks:kill-server host pid)))) + (tasks:kill-server host pid))))) servers) ;; /tmp/bjbarcla/megatest_localdb/fullrun/.nfs.pdx.disks.icf_env_disk001.bjbarcla.gwa.issues.mtdev.matt-bisect.megatest.ext-tests.runs.all.v1.65.1.6524.dev.bb-24-justrollup-f8.rollup.fullrun/megatest.db.lock (delete-file* (common:get-sync-lock-filepath)) ) @@ -1534,11 +1539,11 @@ state TEXT DEFAULT 'new', status TEXT DEFAULT 'n/a', archive_type TEXT DEFAULT 'bup', du INTEGER, archive_path TEXT);"))) - ;; (print "creating trigges from init") + (print "creating triggers from init") (db:create-triggers db) db)) ;; ) ;;====================================================================== ;; A R C H I V E S @@ -2911,11 +2916,11 @@ ;; Get run-ids for runs with same target but different runnames and NOT run-id ;; (define (db:get-prev-run-ids dbstruct run-id) (let* ((keyvals (db:get-key-val-pairs dbstruct run-id)) (kvalues (map cadr keyvals)) - (keys (rmt:get-keys)) + (keys (db:get-keys dbstruct)) (qrystr (string-intersperse (map (lambda (x)(conc x "=?")) keys) " AND "))) (let ((prev-run-ids '())) (if (null? keyvals) '() (begin @@ -3047,33 +3052,33 @@ ;; ;; 1. cache tests-match-qry ;; 2. compile qry and store in hash ;; 3. convert for-each-row to fold ;; -(define (db:get-tests-for-run-state-status dbstruct run-id testpatt) - (db:with-db - dbstruct run-id #f - (lambda (db) - (let* ((res '()) - (stmt-cache (dbr:dbstruct-stmt-cache dbstruct)) - (stmth (let* ((sh (db:hoh-get stmt-cache db testpatt))) - (or sh - (let* ((tests-match-qry (tests:match->sqlqry testpatt)) - (qry (conc "SELECT id,testname,item_path,state,status FROM tests WHERE run_id=? " - (if tests-match-qry (conc " AND (" tests-match-qry ") ") ""))) - (newsh (sqlite3:prepare db qry))) - (debug:print-info 8 *default-log-port* "db:get-tests-for-run qry=" qry) - (db:hoh-set! stmt-cache db testpatt newsh) - newsh))))) - (reverse - (sqlite3:fold-row - (lambda (res id testname item-path state status) - ;; id,run_id,testname,state,status,event_time,host,cpuload,diskfree,uname,rundir,item_path,run_duration,final_logf,comment - (cons (vector id run-id testname state status -1 "" -1 -1 "" "-" item-path -1 "-" "-") res)) - '() - stmth - run-id)))))) +;; (define (db:get-tests-for-run-state-status dbstruct run-id testpatt) +;; (db:with-db +;; dbstruct run-id #f +;; (lambda (db) +;; (let* ((res '()) +;; (stmt-cache (dbr:dbstruct-stmt-cache dbstruct)) +;; (stmth (let* ((sh (db:hoh-get stmt-cache db testpatt))) +;; (or sh +;; (let* ((tests-match-qry (tests:match->sqlqry testpatt)) +;; (qry (conc "SELECT id,testname,item_path,state,status FROM tests WHERE run_id=? " +;; (if tests-match-qry (conc " AND (" tests-match-qry ") ") ""))) +;; (newsh (sqlite3:prepare db qry))) +;; (debug:print-info 8 *default-log-port* "db:get-tests-for-run qry=" qry) +;; (db:hoh-set! stmt-cache db testpatt newsh) +;; newsh))))) +;; (reverse +;; (sqlite3:fold-row +;; (lambda (res id testname item-path state status) +;; ;; id,run_id,testname,state,status,event_time,host,cpuload,diskfree,uname,rundir,item_path,run_duration,final_logf,comment +;; (cons (vector id run-id testname state status -1 "" -1 -1 "" "-" item-path -1 "-" "-") res)) +;; '() +;; stmth +;; run-id)))))) (define (db:get-tests-for-run-state-status dbstruct run-id testpatt #!optional (last-update 0)) (let* ((res '()) (tests-match-qry (tests:match->sqlqry testpatt)) (qry (conc "SELECT id,testname,item_path,state,status,event_time,run_duration FROM tests WHERE run_id=? " @@ -3533,29 +3538,13 @@ #f ;; default result test-id)))) (define (db:get-test-times dbstruct run-name target) (let ((res `()) - (qry (conc "select testname, item_path, run_duration, " (string-join (db:get-keys dbstruct) " || '/' || ") " as target from tests inner join runs on tests.run_id = runs.id where runs.runname = ? and target = ? ;"))) - - (db:with-db - dbstruct - #f ;; this is for the main runs db - #f ;; does not modify db - (lambda (db) - (sqlite3:for-each-row - (lambda (test-name item-path test-time target ) - (set! res (cons (vector test-name item-path test-time) res))) - db - qry - run-name target) - res)))) - -(define (db:get-test-times dbstruct run-name target) - (let ((res `()) - (qry (conc "select testname, item_path, run_duration, " (string-join (db:get-keys dbstruct) " || '/' || ") " as target from tests inner join runs on tests.run_id = runs.id where runs.runname = ? and target = ? ;"))) - + (qry (conc "select testname, item_path, run_duration, " + (string-join (db:get-keys dbstruct) " || '/' || ") + " as target from tests inner join runs on tests.run_id = runs.id where runs.runname = ? and target = ? ;"))) (db:with-db dbstruct #f ;; this is for the main runs db #f ;; does not modify db (lambda (db) @@ -3845,11 +3834,11 @@ "SELECT id,test_id,category,variable,value,expected,tol,units,comment,status,type FROM test_data WHERE test_id=? AND category LIKE ? ORDER BY category,variable;" test-id categorypatt) (reverse res))))) ;; This routine moved from tdb.scm, :read-test-data ;; -(define (db:read-test-data* dbstruct run-id test-id categorypatt varpatt) +(define (db:read-test-data-varpatt dbstruct run-id test-id categorypatt varpatt) (let* ((res '())) (db:with-db dbstruct #f #f (lambda (db) (sqlite3:for-each-row Index: launch.scm ================================================================== --- launch.scm +++ launch.scm @@ -211,11 +211,11 @@ (let loop ((minutes (calc-minutes)) (cpu-load (alist-ref 'adj-core-load (common:get-normalized-cpu-load #f))) (disk-free (get-df (current-directory))) (last-sync (current-seconds))) - (common:telemetry-log "zombie" (conc "launch:monitor-job - top of loop encountered at "(current-seconds)" with last-sync="last-sync)) + ;; (common:telemetry-log "zombie" (conc "launch:monitor-job - top of loop encountered at "(current-seconds)" with last-sync="last-sync)) (let* ((over-time (> (current-seconds) (+ last-sync update-period))) (new-cpu-load (let* ((load (alist-ref 'adj-core-load (common:get-normalized-cpu-load #f))) (delta (abs (- load cpu-load)))) (if (> delta 0.1) ;; don't bother updating with small changes load @@ -233,11 +233,11 @@ (test-info (rmt:get-test-info-by-id run-id test-id)) (state (db:test-get-state test-info)) (status (db:test-get-status test-info)) (kill-reason "no kill reason specified") (kill-job? #f)) - (common:telemetry-log "zombie" (conc "launch:monitor-job - decision time encountered at "(current-seconds)" with last-sync="last-sync" do-sync="do-sync" over-time="over-time" update-period="update-period)) + ;; (common:telemetry-log "zombie" (conc "launch:monitor-job - decision time encountered at "(current-seconds)" with last-sync="last-sync" do-sync="do-sync" over-time="over-time" update-period="update-period)) (cond ((test-get-kill-request run-id test-id) (set! kill-reason "KILLING TEST since received kill request (KILLREQ)") (set! kill-job? #t)) ((and runtlim (> (- (current-seconds) start-seconds) runtlim)) @@ -252,13 +252,14 @@ (debug:print 4 *default-log-port* "cpu: " new-cpu-load " disk: " new-disk-free " last-sync: " last-sync " do-sync: " do-sync) (launch:handle-zombie-tests run-id) (when do-sync ;;(with-output-to-file (conc (getenv "MT_TEST_RUN_DIR") "/last-loadinfo.log" #:append) ;; (lambda () (pp (list (current-seconds) new-cpu-load new-disk-free (calc-minutes))))) - (common:telemetry-log "zombie" (conc "launch:monitor-job - dosync started at "(current-seconds))) + ;; (common:telemetry-log "zombie" (conc "launch:monitor-job - dosync started at "(current-seconds))) (tests:update-central-meta-info run-id test-id new-cpu-load new-disk-free (calc-minutes) #f #f) - (common:telemetry-log "zombie" (conc "launch:monitor-job - dosync finished at "(current-seconds)))) + ;; (common:telemetry-log "zombie" (conc "launch:monitor-job - dosync finished at "(current-seconds))) + ) (if kill-job? (begin (debug:print-info 0 *default-log-port* "proceeding to kill test: "kill-reason) (mutex-lock! m) @@ -523,11 +524,11 @@ (lambda (varval) (let ((var (car varval)) (val (cadr varval))) (if (and (string? var)(string? val)) (begin - (safe-setenv var (config:eval-string-in-environment val))) ;; val) + (safe-setenv var (configf:eval-string-in-environment val))) ;; val) (debug:print-error 0 *default-log-port* "bad variable spec, " var "=" val)))) (configf:get-section rconfig section))) (list "default" target))) ;;(bb-check-path msg: "launch:execute post block 1") ADDED lib/libpangox-1.0.so Index: lib/libpangox-1.0.so ================================================================== --- /dev/null +++ lib/libpangox-1.0.so cannot compute difference between binary files ADDED lib/libpangox-1.0.so.0 Index: lib/libpangox-1.0.so.0 ================================================================== --- /dev/null +++ lib/libpangox-1.0.so.0 cannot compute difference between binary files ADDED lib/libxcb-xlib.so.0 Index: lib/libxcb-xlib.so.0 ================================================================== --- /dev/null +++ lib/libxcb-xlib.so.0 cannot compute difference between binary files Index: megatest-version.scm ================================================================== --- megatest-version.scm +++ megatest-version.scm @@ -18,6 +18,6 @@ ;; Always use two or four digit decimal ;; 1.01, 1.02...1.10,1.11,1.1101 ... 1.99,2.00.. ;; (declare (unit megatest-version)) -(define megatest-version 1.6581) +(define megatest-version 1.6583) Index: megatest.scm ================================================================== --- megatest.scm +++ megatest.scm @@ -519,15 +519,25 @@ "-show-runconfig" ;;"-list-db-targets" "-show-runconfig" "-show-config" "-show-cmdinfo" - "-cleanup-db")) + "-cleanup-db" + )) + (no-watchdog-argvals (list '("-archive" . "replicate-db"))) + (start-watchdog-specail-arg-val (let loop ((hed (car no-watchdog-argvals)) + (tail (cdr no-watchdog-argvals))) + ;; (print "hed" hed " arg " (args:get-arg (car hed)) " val:" (cdr hed) " eql" (equal? (args:get-arg (car hed)) (cdr hed))) + (if (equal? (args:get-arg (car hed)) (cdr hed)) + #f + (if (null? tail) + #t + (loop (car tail) (cdr tail)))))) (no-watchdog-args-vals (filter (lambda (x) x) (map args:get-arg no-watchdog-args))) - (start-watchdog (null? no-watchdog-args-vals))) - ;;(BB> "no-watchdog-args="no-watchdog-args "no-watchdog-args-vals="no-watchdog-args-vals) + (start-watchdog (and (null? no-watchdog-args-vals) start-watchdog-specail-arg-val))) + ;(print "no-watchdog-args="no-watchdog-args "no-watchdog-args-vals="no-watchdog-args-vals " start-watchdog-specail-arg-val:" start-watchdog-specail-arg-val " start-watchdog:" start-watchdog) (if start-watchdog (thread-start! *watchdog*))) ;; bracket open-output-file with code to make leading directory if it does not exist and handle exceptions @@ -1318,11 +1328,11 @@ (itempath (if (member "item_path" tests-spec)(get-value-by-fieldname test test-field-index "item_path" ) #f)) ;; (db:test-get-item-path test)) (fullname (conc testname (if (equal? itempath "") "" (conc "/" itempath )))) - (testdat-raw (map vector->list (rmt:read-test-data* run-id test-id categorypatt setvarpatt))) + (testdat-raw (map vector->list (rmt:read-test-data-varpatt run-id test-id categorypatt setvarpatt))) (testdat (filter (lambda (x) (not (equal? "logpro" (list-ref x 10)))) testdat-raw))) @@ -1969,15 +1979,15 @@ ;;====================================================================== ;; Archive tests ;;====================================================================== ;; Archive tests matching target, runname, and testpatt -(if (equal? (args:get-arg "-archive") "replicacte-db") +(if (equal? (args:get-arg "-archive") "replicate-db") (begin ;; check if source - ;; check if megatest.db exist - (launch:setup) + ;; check if megatest.db exist + (launch:setup) (if (not (args:get-arg "-source")) (begin (debug:print-info 1 *default-log-port* "Missing required argument -source ") (exit 1))) (if (common:file-exists? (conc *toppath* "/megatest.db")) @@ -2000,11 +2010,11 @@ (set! *didsomething* #t)) (begin (debug:print-error 1 *default-log-port* "Path " source " not found") (exit 1)))))) ;; else do a general-run-call - (if (and (args:get-arg "-archive") (not (equal? (args:get-arg "-archive") "replicacte-db"))) + (if (and (args:get-arg "-archive") (not (equal? (args:get-arg "-archive") "replicate-db"))) (begin ;; for the archive get we need to preserve the starting dir as part of the target path (if (and (args:get-arg "-dest") (not (equal? (substring (args:get-arg "-dest") 0 1) "/"))) (let ((newpath (conc (current-directory) "/" (args:get-arg "-dest")))) Index: mtut.scm ================================================================== --- mtut.scm +++ mtut.scm @@ -1323,11 +1323,13 @@ (begin (print "ERROR: Unknown key in packet \"" key "\" with value \"" val "\"") res))))) (conc "megatest " (if (not (member action '("sync"))) (conc action " " action-param) - "")) + "") (if (member action '("-run" "-rerun-clean" "-rerun-all" "-kill-rerun")) + "-rerun DEAD,ABORT,KILLED" + "")) pkta))) ;; (use trace)(trace pkt->cmdline) (define (write-pkt pktsdir uuid pkt) Index: rmt.scm ================================================================== --- rmt.scm +++ rmt.scm @@ -51,10 +51,12 @@ (if cinfo cinfo (if (server:check-if-running areapath) (client:setup areapath) #f)))) + +;;====================================================================== (define *send-receive-mutex* (make-mutex)) ;; should have separate mutex per run-id ;; RA => e.g. usage (rmt:send-receive 'get-var #f (list varname)) ;; @@ -890,12 +892,13 @@ ;; T E S T D A T A ;;====================================================================== (define (rmt:read-test-data run-id test-id categorypatt #!key (work-area #f)) (rmt:send-receive 'read-test-data run-id (list run-id test-id categorypatt))) -(define (rmt:read-test-data* run-id test-id categorypatt varpatt #!key (work-area #f)) - (rmt:send-receive 'read-test-data* run-id (list run-id test-id categorypatt varpatt))) + +(define (rmt:read-test-data-varpatt run-id test-id categorypatt varpatt #!key (work-area #f)) + (rmt:send-receive 'read-test-data-varpatt run-id (list run-id test-id categorypatt varpatt))) (define (rmt:get-data-info-by-id test-data-id) (rmt:send-receive 'get-data-info-by-id #f (list test-data-id))) (define (rmt:testmeta-add-record testname) Index: rmtmod.scm ================================================================== --- rmtmod.scm +++ rmtmod.scm @@ -38,10 +38,11 @@ (defstruct alldat (areapath #f) (ulexdat #f) ) +;;====================================================================== ;; return the handle struct for sending queries to a specific database ;; - initializes the connection object if this is the first access ;; - finds the "captain" and asks who to talk to for the given dbfname ;; - establishes the connection to the current dbowner ;; Index: runs.scm ================================================================== --- runs.scm +++ runs.scm @@ -392,11 +392,13 @@ (debug:print 0 *default-log-port* "ERROR: failed to run pre-hook " run-pre-hook ", check the log " log-file)) (debug:print-info 0 *default-log-port* "running run-pre-hook: \"" run-pre-hook "\", log is " actual-logf) (system (conc run-pre-hook " >> " actual-logf " 2>&1")) (debug:print-info 0 *default-log-port* "pre-hook \"" run-pre-hook "\" took " (- (current-seconds) start-time) " seconds to run."))) (debug:print 0 *default-log-port* "Skipping pre-hook call \"" run-pre-hook "\" as there are existing tests for this run."))))) - + + + (define (runs:run-post-hook run-id) (let* ((run-post-hook (configf:lookup *configdat* "runs" "post-hook")) (existing-tests (if run-post-hook (rmt:get-tests-for-run run-id "%" '() '() ;; run-id testpatt states statuses #f #f ;; offset limit @@ -431,10 +433,65 @@ (debug:print 0 *default-log-port* "Message: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn) (debug:print 0 *default-log-port* "ERROR: failed to run post-hook " run-post-hook ", check the log " log-file)) (debug:print-info 0 *default-log-port* "running run-post-hook: \"" run-post-hook "\", log is " actual-logf) (system (conc run-post-hook " >> " actual-logf " 2>&1")) (debug:print-info 0 *default-log-port* "post-hook \"" run-post-hook "\" took " (- (current-seconds) start-time) " seconds to run.")))))) + + +(define (runs:rerun-hook test-id new-test-path testdat rerunlst) + (let* ((rerun-hook (configf:lookup *configdat* "runs" "rerun-hook")) + (log-dir (conc *toppath* "/reruns/logs")) + (target (getenv "MT_TARGET")) + (runname (common:args-get-runname)) + (rundir (db:test-get-rundir testdat)) + (tarfiledir (conc *toppath* "/reruns")) + (status (db:test-get-status testdat)) + (comment (conc "\"" (db:test-get-comment testdat) "\"" )) + (testname (db:test-get-testname testdat)) + (itempath (db:test-get-item-path testdat)) + (file-body (conc status "-" (string-translate target "/" "-") "-" runname "." testname (if (not (string=? itempath "")) (conc "." (string-translate itempath "/" "-")) ""))) + (log-file (conc file-body ".log")) + ;; (log-file (conc status "-" (string-translate target "/" "-") "-" runname "." testname (if (not (string=? itempath "")) (conc "." (string-translate itempath "/" "-")) "") ".log")) + (full-log-fname (conc log-dir "/" log-file)) + (tarfilename (conc file-body ".tar")) + ;; (tarfilename (conc status "." (string-translate target "/" "-") "." runname "." testname (if (not (string=? itempath "")) (conc "." (string-translate itempath "/" "-")) "") ".tar")) + ) + (if rerun-hook + (let* ((use-log-dir (if (not (directory-exists? log-dir)) + (handle-exceptions + exn + (begin + (debug:print 0 *default-log-port* "WARNING: Failed to create " log-dir ", exn=" exn) + #f) + (create-directory log-dir #t) + #t) + #t)) + (start-time (current-seconds)) + (actual-logf (if use-log-dir full-log-fname log-file)) + (sys-call-text (conc rerun-hook " " tarfilename " " rundir " " actual-logf " " runname " " tarfiledir " " status " " target " " comment " " testname " " itempath " >> " actual-logf " 2>&1")) + ) + (debug:print 2 *default-log-port* "Found rerun-hook in config:" rerun-hook) + (handle-exceptions + exn + (begin + (print-call-chain *default-log-port*) + (debug:print 0 *default-log-port* "Message: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn) + (debug:print 0 *default-log-port* "ERROR: failed to run rerun-hook " rerun-hook ", check the log " log-file)) + (debug:print-info 0 *default-log-port* "running rerun-hook: \"" rerun-hook "\", log is " actual-logf) + ;; call the hook + (debug:print-info 0 *default-log-port* "Calling rerun-hook for " test-id new-test-path testdat rerunlst) + (debug:print-info 0 *default-log-port* "rerun hook: " rerun-hook) + (debug:print-info 0 *default-log-port* "tarfilename: " tarfilename) + (debug:print-info 0 *default-log-port* "rundir: " rundir) + (debug:print-info 0 *default-log-port* "actual-logf: " actual-logf) + (debug:print-info 0 *default-log-port* "runname: " runname) + (debug:print-info 0 *default-log-port* "sys-call-text: " sys-call-text) + (system sys-call-text) + (debug:print-info 0 *default-log-port* "rerun-hook \"" rerun-hook "\" took " (- (current-seconds) start-time) " seconds to run.")))))) + + + ;; return #t when all items in waitors-upon list are represented in test-patt, #f otherwise. (define (runs:testpatts-mention-waitors-upon? test-patt waitors-upon) (null? (tests:filter-test-names-not-matched waitors-upon test-patt))) @@ -602,11 +659,11 @@ (runs:update-all-test_meta #f) ;; run the run prehook if there are no tests yet run for this run: ;; (runs:run-pre-hook run-id) - ;; mark all test launced flag as false in the meta table + ;; mark all test launched flag as false in the meta table (rmt:set-var (conc "lunch-complete-" run-id) "no") (debug:print-info 1 *default-log-port* "Setting end-of-run to no") (let* ((config-reruns (let ((x (configf:lookup *configdat* "setup" "reruns"))) (if x (string->number x) #f))) (config-rerun-cnt (if config-reruns @@ -764,11 +821,11 @@ (if (> run-count 0) ;; handle reruns (begin (if (not (hash-table-ref/default flags "-preclean" #f)) (hash-table-set! flags "-preclean" #t)) (if (not (hash-table-ref/default flags "-rerun" #f)) - (hash-table-set! flags "-rerun" "STUCK/DEAD,n/a,ZERO_ITEMS")) + (hash-table-set! flags "-rerun" "ABORT,STUCK/DEAD,n/a,ZERO_ITEMS")) ;; recursive call to self (runs:run-tests target runname test-patts user flags run-count: (- run-count 1))) (launch:end-of-run-check run-id))) (debug:print-info 0 *default-log-port* "No tests to run"))) (debug:print-info 4 *default-log-port* "All done by here") @@ -2022,25 +2079,34 @@ (or (member (test:get-status testdat) '("PASS" "WARN" "CHECK" "SKIP" "WAIVED")) (member (test:get-state testdat) '("COMPLETED")))) (debug:print-info 2 *default-log-port* "running test " test-name "/" item-path " suppressed as it is " (test:get-state testdat) " and " (test:get-status testdat)) (hash-table-set! test-registry full-test-name 'DONOTRUN) ;; COMPLETED) (set! runflag #f)) + ;; -rerun and status is one of the specifed, run it ((and rerun (let* ((rerunlst (string-split rerun ",")) (must-rerun (member (test:get-status testdat) rerunlst))) (debug:print-info 3 *default-log-port* "-rerun list: " rerun ", test-status: " (test:get-status testdat)", must-rerun: " must-rerun) must-rerun)) (debug:print-info 2 *default-log-port* "Rerun forced for test " test-name "/" item-path) - (set! runflag #t)) - ;; -keepgoing, do not rerun FAIL + (set! runflag #t) + (debug:print-info 2 *default-log-port* "Calling rerun hook") + (runs:rerun-hook test-id new-test-path testdat rerun) + ) + + + + ;; -keepgoing, do not rerun FAIL ((and keepgoing (member (test:get-status testdat) '("FAIL"))) (set! runflag #f)) - ((and (not rerun) + + ((and (not rerun) (member (test:get-status testdat) '("FAIL" "n/a"))) (set! runflag #t)) + (else (set! runflag #f))) (debug:print 4 *default-log-port* "RUNNING => runflag: " runflag " STATE: " (test:get-state testdat) " STATUS: " (test:get-status testdat)) (if (not runflag) (if (not parent-test) (if (runs:lownoise (conc "not starting test" full-test-name) 60) @@ -2202,32 +2268,38 @@ ;; (define (runs:remove-all-but-last-n-runs-per-target target-patts runpatt num-to-keep #!key (actions '(print))) (let* ((runs-ht (runs:get-hash-by-target target-patts runpatt)) (age (if (args:get-arg "-age")(common:hms-string->seconds (args:get-arg "-age")) #f)) (age-mark (if age (- (current-seconds) age) (+ (current-seconds) 86400))) - (precmd (or (args:get-arg "-precmd") ""))) - (print "Actions: " actions) - (for-each - (lambda (target) - (let* ((runs (hash-table-ref runs-ht target)) - (sorted (sort runs (lambda (a b)(< (simple-run-event_time a)(simple-run-event_time b))))) - (to-remove (let* ((len (length sorted)) - (trim-amt (- len num-to-keep))) - (if (> trim-amt 0) - (take sorted trim-amt) - '())))) - (hash-table-set! runs-ht target to-remove) - (print target ":") - (for-each - (lambda (run) - (let ((remove (member run to-remove (lambda (a b) - (eq? (simple-run-id a) - (simple-run-id b)))))) - (if (and age (> (simple-run-event_time run) age-mark)) - (print "Skipping handling of " target "/" (simple-run-runname run) " as it is younger than " (args:get-arg "-age")) - (for-each - (lambda (action) + (precmd (or (args:get-arg "-precmd") "")) + (action-chk (member (string->symbol "remove-runs") actions))) + ;; check the sequence of actions archive must comme before remove-runs + (if (and action-chk (member (string->symbol "archive") action-chk)) + (begin + (debug:print-error 0 *default-log-port* "action remove-runs must come after archive") + (exit 1))) + (print "Actions: " actions " age: " age) + (for-each + (lambda (action) + (for-each + (lambda (target) + (let* ((runs (hash-table-ref runs-ht target)) + (sorted (sort runs (lambda (a b)(< (simple-run-event_time a)(simple-run-event_time b))))) + (to-remove (let* ((len (length sorted)) + (trim-amt (- len num-to-keep))) + (if (> trim-amt 0) + (take sorted trim-amt) + '())))) + (hash-table-set! runs-ht target to-remove) + (print target ":") + (for-each + (lambda (run) + (let ((remove (member run to-remove (lambda (a b) + (eq? (simple-run-id a) + (simple-run-id b)))))) + (if (and age (> (simple-run-event_time run) age-mark)) + (print "Skipping handling of " target "/" (simple-run-runname run) " as it is younger than " (args:get-arg "-age")) (case action ((print) (print " " (simple-run-runname run) " " (time->string (seconds->local-time (simple-run-event_time run)) "WW%V.%u %H:%M:%S") " " (if remove "REMOVE" ""))) @@ -2237,17 +2309,14 @@ " -kill-wait 0" ""))))) ((archive) (if remove (system (conc precmd " megatest -archive save-remove -target " target " -runname " (simple-run-runname run) " -testpatt %")))) ((kill-runs) - (if remove (system (conc precmd " megatest -kill-runs -target " target " -runname " (simple-run-runname run) " -testpatt %")))) - )) - actions)))) - sorted))) - ;; (print "Sorted: " (map simple-run-event_time sorted)) - ;; (print "Remove: " (map simple-run-event_time to-remove)))) - (hash-table-keys runs-ht)) + (if remove (system (conc precmd " megatest -kill-runs -target " target " -runname " (simple-run-runname run) " -testpatt %")))))))) + sorted))) + (hash-table-keys runs-ht))) + actions) runs-ht)) (define (remove-last-path-directory path-in) (let* ((dparts (string-split path-in "/")) (path-out (conc "/" (string-intersperse (take dparts (- (length dparts) 1)) "/"))) @@ -2255,29 +2324,10 @@ path-out ) ) -;; (define (runs:remove-all-but-last-n-runs-per-target target-patts runpatt num-to-keep) -;; (let ((data (runs:get-all-but-most-recent-n-per-target target-patts runpatt num-to-keep))) -;; (for-each -;; (lambda (target) -;; (let ((runs-to-remove (hash-table-ref data target ))) -;; (for-each -;; (lambda (run) -;; (print "megatest -remove-runs -target " target " -runname " (simple-run-runname run) " -testpatt %")) -;; runs-to-remove))) -;; (hash-table-keys data)))) - -;; Remove runs -;; fields are passing in through -;; action: -;; 'remove-runs -;; 'set-state-status -;; -;; NB// should pass in keys? -;; (define (runs:operate-on action target runnamepatt testpatt #!key (state #f)(status #f)(new-state-status #f)(mode #f)(options '())) (common:clear-caches) ;; clear all caches (let* ((db #f) ;; (tdbdat (tasks:open-db)) (keys (rmt:get-keys)) @@ -2511,11 +2561,11 @@ (begin (let ((rundir (db:test-get-rundir new-test-dat))) (if (and (not (string= rundir "/tmp/badname")) (file-exists? rundir) (substring-index run-name rundir) - (substring-index target rundir) + (tests:glob-like-match (conc "%/" target "/%") rundir) ) (begin (set! lasttpath (db:test-get-rundir new-test-dat)) ;; remember this path for run removal (set! lastrealpath (remove-last-path-directory (resolve-pathname lasttpath))) (hash-table-set! run-paths-hash lastrealpath 1) @@ -2524,11 +2574,13 @@ (begin (debug:print 2 *default-log-port* "Not removing directory " rundir " because either it doesn't exist or has a bad name") (debug:print 2 *default-log-port* "Is /tmp/badname: " (string= rundir "/tmp/badname")) (debug:print 2 *default-log-port* "Exists: " (file-exists? rundir)) (debug:print 2 *default-log-port* "Has run-name: " (substring-index run-name rundir)) - (debug:print 2 *default-log-port* "Has target: " (substring-index target rundir)) + (debug:print 2 *default-log-port* "Has target: " (tests:glob-like-match (conc "%/" target "/%") rundir)) + (debug:print 2 *default-log-port* "Target: " target) + ;;PJH remove record from db no need to cleanup directory (case mode ((remove-data-only)(mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) (db:test-get-state test)(db:test-get-status test) #f)) ((archive-remove) (mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) "ARCHIVED" #f #f)) (else (rmt:delete-test-records (db:test-get-run_id test) (db:test-get-id test)))) Index: sauth-common.scm ================================================================== --- sauth-common.scm +++ sauth-common.scm @@ -240,11 +240,20 @@ (set! obj data-row)))) ;(print obj) obj)) +(define (sauth-common:src-size path) + (let ((output (with-input-from-pipe (conc "/usr/bin/du -s " path " | awk '{print $1}'") + (lambda() + (read-line))))) + (string->number output))) +(define (sauth-common:space-left-at-dest path) + (let* ((output (run/string (pipe (df ,path ) (tail -1)))) + (size (caddr (cdr (string-split output " "))))) + (string->number size))) ;; function to validate the users input for target path and resolve the path ;; TODO: Check for restriction in subpath (define (sauth-common:resolve-path new current allowed-sheets) (let* ((target-path (append current (string-split new "/"))) @@ -279,11 +288,11 @@ (if (and (not (equal? restricted-areas "" )) (string-match (regexp restrictions) target-path)) (begin - (sauth:print-error "Access denied to " (string-join resolved-path "/")) + (sauth:print-error (conc "Access denied to " (string-join resolved-path "/"))) ;(exit 1) #f) target-path) )) Index: server.scm ================================================================== --- server.scm +++ server.scm @@ -167,11 +167,13 @@ ;; given a path to a server log return: host port startseconds ;; any changes to number of elements returned by this fuction will dirctly affect server:record->url,server:record->id,server:kill,server:get-num-alive which uses match let (define (server:logf-get-start-info logf) - (let ((rx (regexp "^SERVER STARTED: (\\S+):(\\d+) AT ([\\d\\.]+) server-id: (\\S+)"))) ;; SERVER STARTED: host:port AT timesecs server id + (let ((server-rx (regexp "^SERVER STARTED: (\\S+):(\\d+) AT ([\\d\\.]+) server-id: (\\S+)")) ;; SERVER STARTED: host:port AT timesecs server id + (dbprep-rx (regexp "^SERVER: dbprep")) + (dbprep-found 0)) (handle-exceptions exn (begin (debug:print-info 0 *default-log-port* "Unable to get server info from " logf ", exn=" exn) (list #f #f #f #f)) ;; no idea what went wrong, call it a bad server @@ -179,11 +181,16 @@ logf (lambda () (let loop ((inl (read-line)) (lnum 0)) (if (not (eof-object? inl)) - (let ((mlst (string-match rx inl))) + (let ((mlst (string-match server-rx inl)) + (dbprep (string-match dbprep-rx inl)) + ) + (if dbprep + (set! dbprep-found 1) + ) (if (not mlst) (if (< lnum 500) ;; give up if more than 500 lines of server log read (loop (read-line)(+ lnum 1)) (begin (debug:print-info 0 *default-log-port* "Unable to get server info from first 500 lines of " logf ) @@ -192,11 +199,17 @@ (list (car dat) ;; host (string->number (cadr dat)) ;; port (string->number (caddr dat)) (cadr (cddr dat)))))) (begin - (debug:print-info 0 *default-log-port* "Unable to get server info from " logf " at " (current-seconds)) + (if dbprep-found + (begin + (debug:print-info 0 *default-log-port* "Server is in dbprep at " (current-seconds)) + (thread-sleep! 25) + ) + (debug:print-info 0 *default-log-port* "Unable to get server info from " logf " at " (current-seconds)) + ) (list #f #f #f #f))))))))) ;; get a list of servers with all relevant data ;; ( mod-time host port start-time pid ) ;; @@ -214,15 +227,21 @@ (create-directory (conc areapath "/logs") #t) (exn (i/o file)(debug:print 0 *default-log-port* "ERROR: Cannot create directory at " (conc areapath "/logs"))) (exn ()(debug:print 0 *default-log-port* "ERROR: Unknown error attemtping to get server list. exn=" exn))) (directory-exists? (conc areapath "/logs"))) '())) - (let* ((server-logs (glob (conc areapath "/logs/server-*.log"))) + + ;; Get the list of server logs that do not contain "exiting". Ignore logs for servers that have exited. + (let* ((server-logs-cmd (conc "grep -iL exiting " areapath "/logs/server-*-*.log")) + (server-logs (string-split (string-chomp (call-with-input-pipe server-logs-cmd read-all)))) (num-serv-logs (length server-logs))) - (if (null? server-logs) - '() - (let loop ((hed (car server-logs)) + (if (or (null? server-logs) (= num-serv-logs 0)) + (let () + (debug:print 1 *default-log-port* "There are no servers running") + '() + ) + (let loop ((hed (string-chomp (car server-logs))) (tal (cdr server-logs)) (res '())) (let* ((mod-time (handle-exceptions exn (begin @@ -243,11 +262,11 @@ (if (null? tal) (if (and limit (> (length new-res) limit)) new-res ;; (take new-res limit) <= need intelligent sorting before this will work new-res) - (loop (car tal)(cdr tal) new-res))))))))) + (loop (string-chomp (car tal)) (cdr tal) new-res))))))))) (define (server:get-num-alive srvlst) (let ((num-alive 0)) (for-each (lambda (server) @@ -359,10 +378,11 @@ (let* ((fmodtime (file-modification-time start-flag)) (delta (- (current-seconds) fmodtime)) (all-go (> delta reftime))) (if (and all-go (begin + (debug:print-info 0 *default-log-port* "Writing " start-flag) (with-output-to-file start-flag (lambda () (print server-key))) (thread-sleep! 0.25) (let ((res (with-input-from-file start-flag @@ -395,10 +415,11 @@ (random 5))) ;; add a small random number just in case a lot of jobs hit the work hosts simultaneously (lock-file (conc areapath "/logs/server-start.lock"))) (if (> (- (current-seconds) when-run) run-delay) (let* ((start-flag (conc areapath "/logs/server-start-last"))) (common:simple-file-lock-and-wait lock-file expire-time: 15) + (debug:print-info 0 *default-log-port* "server:kind-run: touching " start-flag) (system (conc "touch " start-flag)) ;; lazy but safe (server:run areapath) (thread-sleep! 2) ;; don't release the lock for at least a few seconds (common:simple-file-release-lock lock-file))) (hash-table-set! *server-kind-run* areapath (list (+ call-num 1)(current-seconds)))))) @@ -464,11 +485,11 @@ (handle-exceptions exn (begin (debug:print-info 0 *default-log-port* "Unable to get host and/or port from " servr ", exn=" exn) #f) - (match-let (((mod-time hostname port start-time pid) + (match-let (((mod-time hostname port start-time server-id pid) servr)) (tasks:kill-server hostname pid)))) ;; called in megatest.scm, host-port is string hostname:port ;; @@ -702,11 +723,10 @@ (debug-mode (debug:debug-mode 1)) (last-time (current-seconds)) (no-sync-db (db:open-no-sync-db)) (stmt-cache (dbr:dbstruct-stmt-cache dbstruct)) (sync-duration 0) ;; run time of the sync in milliseconds - ;;(this-wd-num (begin (mutex-lock! *wdnum*mutex) (let ((x *wdnum*)) (set! *wdnum* (add1 *wdnum*)) (mutex-unlock! *wdnum*mutex) x))) ) (set! *no-sync-db* no-sync-db) ;; make the no sync db available to api calls (debug:print-info 2 *default-log-port* "Periodic sync thread started.") (debug:print-info 3 *default-log-port* "watchdog starting. legacy-sync is " legacy-sync" pid="(current-process-id) );; " this-wd-num="this-wd-num) (if (and legacy-sync (not *time-to-exit*)) Index: spublish.scm ================================================================== --- spublish.scm +++ spublish.scm @@ -391,10 +391,14 @@ ((not (file-exists? target-path)) (sauth:print-error (conc " target Directory " target-path " does not exist!!"))) ((not (file-exists? src-path)) (sauth:print-error (conc "Source path " src-path " does not exist!!" ))) (else + (if (< (sauth-common:space-left-at-dest target-path) (sauth-common:src-size src-path)) + (begin + (sauth:print-error "Destination does not have enough disk space.") + (exit 1))) (if (is_directory src-path) (begin (let* ((parent-dir src-path) (start-dir target-path)) (run (pipe Index: sretrieve.scm ================================================================== --- sretrieve.scm +++ sretrieve.scm @@ -697,11 +697,12 @@ ;;do nothing for dirs) ) (else (if (not (string-match (regexp exclude) p )) - (print (string-substitute (conc base_path "/") "" p "-")))))))) + (print (string-substitute (conc base_path "/") "" p "-")))))) + dotfiles: #t)) (define (sretrieve:shell-help) (conc "Usage: " *exe-name* " [action [params ...]] ls [target path] : list contents of target area. The output of the cmd can be piped into other system cmd. eg ls | grep txt @@ -1081,12 +1082,12 @@ (define (main) (let* ((args (argv)) (prog (car args)) (rema (cdr args)) (exe-name (pathname-file (car (argv)))) - (exe-dir (or (pathname-directory prog) - (sretrieve:find exe-name (string-split (get-environment-variable "PATH") ":")))) + ;(exe-dir (or (pathname-directory prog) + ; (sretrieve:find exe-name (string-split (get-environment-variable "PATH") ":")))) ;(configdat (sretrieve:load-config exe-dir exe-name)) ) ;; preserve the exe data in the config file ;(hash-table-set! configdat "exe-info" (list (list "exe-name" exe-name) ; (list "exe-dir" exe-dir))) Index: tests.scm ================================================================== --- tests.scm +++ tests.scm @@ -66,12 +66,12 @@ '())))) (filter (lambda (d) (if (directory-exists? d) d (begin - (if (common:low-noise-print 60 "tests:get-tests-search-path" d) - (debug:print 0 *default-log-port* "WARNING: problem with directory " d ", dropping it from tests path")) + ;; (if (common:low-noise-print 60 "tests:get-tests-search-path" d) + ;; (debug:print 0 *default-log-port* "WARNING: problem with directory " d ", dropping it from tests path")) #f))) (append paths (list (conc *toppath* "/tests")))))) (define (tests:get-valid-tests test-registry tests-paths) (if (null? tests-paths) Index: utils/mk_wrapper ================================================================== --- utils/mk_wrapper +++ utils/mk_wrapper @@ -19,10 +19,11 @@ prefix=$1 cmd=$2 target=$3 cfgfile="$prefix/bin/.$(lsb_release -sr)/cfg.sh" +libdir="$prefix/bin/.$(lsb_release -sr)/lib" # we wish to create a var in cfg.sh for finding sqlite3 executable chicken_bin_dir=$(dirname $(which csi)) if [[ -e $chicken_bin_dir/sqlite3 ]];then sqlite3_exe=$chicken_bin_dir/sqlite3 @@ -30,19 +31,20 @@ sqlite3_exe=$(which sqlite3) fi if [ "$LD_LIBRARY_PATH" != "" ];then echo "INFO: Using LD_LIBRARY_PATH=$LD_LIBRARY_PATH" >&2 + echo "INFO: Writing $cfgfile" >&2 ( cat << __EOF if [ -z \$MT_ORIG_ENV ]; then export MT_ORIG_ENV=\$( $prefix/bin/serialize-env ) fi if [ "\$LD_LIBRARY_PATH" != "" ];then - export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:\$LD_LIBRARY_PATH + export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:\$LD_LIBRARY_PATH:$libdir else - export LD_LIBRARY_PATH=$LD_LIBRARY_PATH + export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$libdir fi export MT_SQLITE3_EXE=$sqlite3_exe __EOF ) > $cfgfile