@@ -45,2938 +45,13 @@ rmtmod (prefix mtargs args:)) (include "common_records.scm") -(define (remove-files filespec) - (let ((files (glob filespec))) - (for-each delete-file files))) - -(define (stop-the-train) - (thread-start! (make-thread (lambda () - (let loop () - (if (and *toppath* - (file-exists? (conc *toppath*"/stop-the-train"))) - (let* ((msg (conc "ERROR: found file "*toppath*"/stop-the-train, exiting immediately"))) - ;; yes, print to current-output-port AND *default-log-port*, annoying but necessary I think - (print msg) - (debug:print 0 *default-log-port* msg) - (remove-files (conc *toppath* "/logs/server*")) - (remove-files (conc *toppath* "/.servinfo/*")) - (remove-files (conc *toppath* "/.mtdb/*lock")) - (exit 1))) - (thread-sleep! 5) - (loop)))))) - -;; execute thunk, return value. If exception thrown, trap exception, return #f, and emit nonfatal condition note to *default-log-port* . -;; arguments - thunk, message -(define (common:fail-safe thunk warning-message-on-exception) - (handle-exceptions - exn - (begin - (debug:print-info 0 *default-log-port* "notable but nonfatal condition - "warning-message-on-exception", exn=" exn) - (debug:print-info 0 *default-log-port* - (string-substitute "\n?Error:" "nonfatal condition:" - (with-output-to-string - (lambda () - (print-error-message exn) )))) - (debug:print-info 0 *default-log-port* " -- continuing after nonfatal condition...") - #f) - (thunk))) - - -;; returns list of fd count, socket count -(define (get-file-descriptor-count #!key (pid (current-process-id ))) - (list - (length (glob (conc "/proc/" pid "/fd/*"))) - (length (filter identity (map socket? (glob (conc "/proc/" pid "/fd/*"))))) - ) -) - - - -;; GLOBALS - -;; CONTEXTS -(defstruct cxt - (taskdb #f) - (cmutex (make-mutex))) -;; (define *contexts* (make-hash-table)) -;; (define *context-mutex* (make-mutex)) - -;; ;; safe method for accessing a context given a toppath -;; ;; -;; (define (common:with-cxt toppath proc) -;; (mutex-lock! *context-mutex*) -;; (let ((cxt (hash-table-ref/default *contexts* toppath #f))) -;; (if (not cxt) -;; (set! cxt (let ((x (make-cxt)))(hash-table-set! *contexts* toppath x) x))) -;; (let ((cxt-mutex (cxt-mutex cxt))) -;; (mutex-unlock! *context-mutex*) -;; (mutex-lock! cxt-mutex) -;; (let ((res (proc cxt))) -;; (mutex-unlock! cxt-mutex) -;; res)))) - -;; A hash table that can be accessed by #{scheme ...} calls in -;; config files. Allows communicating between confgs -;; -(define *user-hash-data* (make-hash-table)) - -(define *db-keys* #f) - -(define *pkts-info* (make-hash-table)) ;; store stuff like the last parent here -(define *configinfo* #f) ;; raw results from setup, includes toppath and table from megatest.config -(define *runconfigdat* #f) ;; run configs data -(define *configdat* #f) ;; megatest.config data -(define *configstatus* #f) ;; status of data; 'fulldata : all processing done, #f : no data yet, 'partialdata : partial read done -;; (define *toppath* #f) ;; moved to commonmod -(define *already-seen-runconfig-info* #f) - -(define *test-meta-updated* (make-hash-table)) -(define *globalexitstatus* 0) ;; attempt to work around possible thread issues -(define *passnum* 0) ;; when running track calls to run-tests or similar -;; (define *alt-log-file* #f) ;; used by -log -;; (define *common:denoise* (make-hash-table)) ;; for low noise printing -(define *default-log-port* (current-error-port)) -(define *time-zero* (current-seconds)) ;; for the watchdog -(define *on-exit-procs* '()) ;; add procs to this list to be executed on exit -(define *default-area-tag* "local") - -;; DATABASE -;; db access -(define *db-last-access* (current-seconds)) ;; last db access, used in server -;; (define *db-write-access* #t) -;; db sync -;; (define *db-last-sync* 0) ;; last time the sync to megatest.db happened -(define *db-sync-in-progress* #f) ;; if there is a sync in progress do not try to start another -;; (define *db-multi-sync-mutex* (make-mutex)) ;; protect access to *db-sync-in-progress*, *db-last-sync* -;; task db -(define *task-db* #f) ;; (vector db path-to-db) -(define *db-access-allowed* #t) ;; flag to allow access -;; (define *db-access-mutex* (make-mutex)) ;; moved to dbfile -;; (define *db-transaction-mutex* (make-mutex)) -(define *db-cache-path* #f) -;; (define *db-with-db-mutex* (make-mutex)) -(define *db-api-call-time* (make-hash-table)) ;; hash of command => (list of times) - -;; SERVER -(define *transport-type* 'http) ;; override with [server] transport http|rpc|nmsg -(define *runremote* #f) ;; if set up for server communication this will hold -;; (define *max-cache-size* 0) -(define *logged-in-clients* (make-hash-table)) -(define *server-id* #f) -;; (define *server-info* #f) ;; good candidate for easily convert to non-global -(define *time-to-exit* #f) -(define *run-id* #f) -(define *server-kind-run* (make-hash-table)) -(define *home-host* #f) -;; (define *total-non-write-delay* 0) -(define *heartbeat-mutex* (make-mutex)) -;; (define *api-process-request-count* 0) -;; (define *max-api-process-requests* 0) -(define *server-overloaded* #f) - -;; client -(define *rmt-mutex* (make-mutex)) ;; remote access calls mutex - -;; RPC transport -(define *rpc:listener* #f) - -;; KEY info -(define *target* (make-hash-table)) ;; cache the target here; target is keyval1/keyval2/.../keyvalN -(define *keys* (make-hash-table)) ;; cache the keys here -(define *keyvals* (make-hash-table)) -(define *toptest-paths* (make-hash-table)) ;; cache toptest path settings here -(define *test-paths* (make-hash-table)) ;; cache test-id to test run paths here -(define *test-ids* (make-hash-table)) ;; cache run-id, testname, and item-path => test-id -(define *test-info* (make-hash-table)) ;; cache the test info records, update the state, status, run_duration etc. from testdat.db - -(define *run-info-cache* (make-hash-table)) ;; run info is stable, no need to reget -(define *launch-setup-mutex* (make-mutex)) ;; need to be able to call launch:setup often so mutex it and re-call the real deal only if *toppath* not set -(define *homehost-mutex* (make-mutex)) - -;; Miscellaneous -(define *triggers-mutex* (make-mutex)) ;; block overlapping processing of triggers -(define *numcpus-cache* (make-hash-table)) - -;; this plugs a hole in posix-extras in recent chicken versions > 4.9) -(let-values (( (chicken-release-number chicken-major-version) - (apply values - (map string->number - (take - (string-split (chicken-version) ".") - 2))))) - (let ((resolve-pathname-broken? - (or (> chicken-release-number 4) - (and (eq? 4 chicken-release-number) (> chicken-major-version 9))))) - (if resolve-pathname-broken? - (define ##sys#expand-home-path pathname-expand)))) - -(define (realpath x) (resolve-pathname (pathname-expand (or x "/dev/null")) )) - -(define (common:get-this-exe-fullpath #!key (argv (argv))) - (let* ((this-script - (cond - ((and (> (length argv) 2) - (string-match "^(.*/csi|csi)$" (car argv)) - (string-match "^-(s|ss|sx|script)$" (cadr argv))) - (caddr argv)) - (else (car argv)))) - (fullpath (realpath this-script))) - fullpath)) - -;;====================================================================== - -(define *common:this-exe-fullpath* (common:get-this-exe-fullpath)) -(define *common:this-exe-dir* (pathname-directory *common:this-exe-fullpath*)) -(define *common:this-exe-name* (pathname-strip-directory *common:this-exe-fullpath*)) - -(define (common:get-sync-lock-filepath) - (let* ((tmp-area (common:make-tmpdir-name *toppath* "")) - (lockfile (conc tmp-area "/megatest.db.lock"))) - lockfile)) - -(define *common:logpro-exit-code->status-sym-alist* - '( ( 0 . pass ) - ( 1 . fail ) - ( 2 . warn ) - ( 3 . check ) - ( 4 . waived ) - ( 5 . abort ) - ( 6 . skip ))) - -(define (common:logpro-exit-code->status-sym exit-code) - (or (alist-ref exit-code *common:logpro-exit-code->status-sym-alist*) 'fail)) - -(define (common:worse-status-sym ss1 ss2) - (let loop ((status-syms-remaining '(abort fail check skip warn waived pass))) - (cond - ((null? status-syms-remaining) - 'fail) - ((eq? (car status-syms-remaining) ss1) - ss1) - ((eq? (car status-syms-remaining) ss2) - ss2) - (else - (loop (cdr status-syms-remaining)))))) - -(define (common:steps-can-proceed-given-status-sym status-sym) - (if (member status-sym '(warn waived pass)) - #t - #f)) - -(define (status-sym->string status-sym) - (case status-sym - ((pass) "PASS") - ((fail) "FAIL") - ((warn) "WARN") - ((check) "CHECK") - ((waived) "WAIVED") - ((abort) "ABORT") - ((skip) "SKIP") - (else "FAIL"))) - -(define (common:logpro-exit-code->test-status exit-code) - (status-sym->string (common:logpro-exit-code->status-sym exit-code))) - -;; -(defstruct remote - - ;; transport to be used - ;; http - use http-transport - ;; http-read-cached - use http-transport for writes but in-mem cached for reads - (rmode 'http) - (hh-dat (let ((res (or (server:choose-server *toppath* 'homehost) - (cons #f #f)))) - (assert (pair? res)(conc "FATAL: hh-dat should be a pair, got "res)) - res)) - (server-url #f) ;; (server:check-if-running *toppath*) #f)) - (server-id #f) - (server-info #f) ;; (if *toppath* (server:check-if-running *toppath*) #f)) - (last-server-check 0) ;; last time we checked to see if the server was alive - (connect-time (current-seconds)) ;; when we first connected - (last-access (current-seconds)) ;; last time we talked to server - ;; (conndat #f) ;; iface port api-uri api-url api-req seconds server-id - (server-timeout (server:expiration-timeout)) - (force-server #f) - (ro-mode #f) - (ro-mode-checked #f) ;; flag that indicates we have checked for ro-mode - - ;; conndat stuff - (iface #f) ;; TODO: Consolidate this data with server-url and server-info above - (port #f) - (api-url #f) - (api-uri #f) - (api-req #f)) - -;; launching and hosts -(defstruct host - (reachable #f) - (last-update 0) - (last-used 0) - (last-cpuload 1)) - -(define *host-loads* (make-hash-table)) - -;; cache environment vars for each run here -(define *env-vars-by-run-id* (make-hash-table)) - -;; Testconfig and runconfig caches. -(define *testconfigs* (make-hash-table)) ;; test-name => testconfig -(define *runconfigs* (make-hash-table)) ;; target => runconfig - -;; This is a cache of pre-reqs met, don't re-calc in cases where called with same params less than -;; five seconds ago -(define *pre-reqs-met-cache* (make-hash-table)) - -;; cache of verbosity given string -;; -(define *verbosity-cache* (make-hash-table)) - -(define (common:clear-caches) - (set! *target* (make-hash-table)) - (set! *keys* (make-hash-table)) - (set! *keyvals* (make-hash-table)) - (set! *toptest-paths* (make-hash-table)) - (set! *test-paths* (make-hash-table)) - (set! *test-ids* (make-hash-table)) - (set! *test-info* (make-hash-table)) - (set! *run-info-cache* (make-hash-table)) - (set! *env-vars-by-run-id* (make-hash-table)) - (set! *test-id-cache* (make-hash-table))) - -;; Generic string database -(define sdb:qry #f) ;; (make-sdb:qry)) ;; 'init #f) -;; Generic path database -(define *fdb* #f) - -(define *last-launch* (current-seconds)) ;; use for throttling the launch rate. Would be better to use the db and last time of a test in LAUNCHED state. - -;;====================================================================== -;; V E R S I O N -;;====================================================================== - -(define (common:get-full-version) - (conc megatest-version "-" megatest-fossil-hash)) - -(define (common:version-signature) - (conc megatest-version "-" (substring megatest-fossil-hash 0 4))) - -;;====================================================================== -;; from metadat lookup MEGATEST_VERSION -;; -(define (common:get-last-run-version) ;; RADT => How does this work in send-receive function??; assume it is the value saved in some DB - (rmt:get-var "MEGATEST_VERSION")) - -(define (common:get-last-run-version-number) - (string->number - (substring (common:get-last-run-version) 0 6))) - -(define (common:set-last-run-version) - (rmt:set-var "MEGATEST_VERSION" (common:version-signature))) - -;;====================================================================== -;; postive number if megatest version > db version -;; negative number if megatest version < db version -(define (common:version-db-delta) - (- megatest-version (common:get-last-run-version-number))) - -(define (common:version-changed?) - (not (equal? (common:get-last-run-version) - (common:version-signature)))) - - -;; From 1.70 to 1.80, db's are compatible. - -(define (common:api-changed?) - (let* ( - (megatest-major-version (substring (->string megatest-version) 0 4)) - (run-major-version (substring (conc (common:get-last-run-version)) 0 4)) - ) - (and (not (equal? megatest-major-version "1.80")) - (not (equal? megatest-major-version megatest-run-version))) - ) -) - -;;====================================================================== -;; Move me elsewhere ... -;; RADT => Why do we meed the version check here, this is called only if version misma -;; -(define (common:cleanup-db dbstruct #!key (full #f)) - (case (rmt:transport-mode) - ((http) - (apply db:multi-db-sync - dbstruct - 'schema - 'killservers - 'adj-target - 'new2old - '(dejunk) - )) - ((tcp nfs) - (apply db:multi-db-sync - dbstruct - 'schema - 'killservers - 'adj-target - 'new2old - '(dejunk) - ))) - (if (common:api-changed?) - (common:set-last-run-version))) - -(define (common:snapshot-file filepath #!key (subdir ".") ) - (if (file-exists? filepath) - (let* ((age-sec (lambda (file) - (if (file-exists? file) - (- (current-seconds) (file-modification-time file)) - 1000000000))) ;; return really old value if file doesn't exist. we want to clobber it if old or not exist. - (ok-flag #t) - (age-mins (lambda (file) (/ (age-sec file) 60))) - (age-hrs (lambda (file) (/ (age-mins file) 60))) - (age-days (lambda (file) (/ (age-hrs file) 24))) - (age-wks (lambda (file) (/ (age-days file) 7))) - (docmd (lambda (cmd) - (cond - (ok-flag - (let ((res (system cmd))) - (cond - ((eq? 0 res) - #t) - (else - (set! ok-flag #f) - (debug:print 0 *default-log-port* "ERROR: ["(common:human-time)"] Command failed with exit code " - (if (< res 0) - res - (/ res 8)) " ["cmd"]" ) - #f)))) - (else - (debug:print 0 *default-log-port* "ERROR: ["(common:human-time)"] Not runnining command due to prior error. ["cmd"]") - #f)))) - (copy (lambda (src dest) (docmd (conc "/bin/cp '"src"' '"dest"'")))) - (copy+zip (lambda (src dest) (docmd (conc "gzip -c - < '"src"' > '"dest"'")))) - (fullpath (realpath filepath)) - (basedir (pathname-directory fullpath)) - (basefile (pathname-strip-directory fullpath)) - ;;(prevfile (conc filepath ".prev.gz")) - (minsfile (conc basedir "/" subdir "/" basefile ".mins.gz")) - (hrsfile (conc basedir "/" subdir "/" basefile ".hrs.gz")) - (daysfile (conc basedir "/" subdir "/" basefile ".days.gz")) - (wksfile (conc basedir "/" subdir "/" basefile ".weeks.gz"))) - - ;; create subdir it not exists - (if (not (directory-exists? (conc basedir "/" subdir))) - (docmd (conc "/bin/mkdir -p '"(conc basedir "/" subdir)"'"))) - - ;; copy&zip to .mins if not exists - (if (not (file-exists? minsfile)) - (copy+zip filepath minsfile)) - ;; copy .mins to .hrs if not exists - (if (not (file-exists? hrsfile)) - (copy minsfile hrsfile)) - ;; copy .hrs to .days if not exists - (if (not (file-exists? daysfile)) - (copy hrsfile daysfile)) - ;; copy .days to .weeks if not exists - (if (not (file-exists? wksfile)) - (copy daysfile wksfile)) - - - ;; if age(.mins.gz) >= 1h: - ;; copy .mins.gz .hrs.gz - ;; copy .mins.gz - (when (>= (age-mins minsfile) 1) - (copy minsfile hrsfile) - (copy+zip filepath minsfile)) - - ;; if age(.hrs.gz) >= 1d: - ;; copy .hrs.gz .days.gz - ;; copy .mins.gz .hrs.gz - (when (>= (age-days hrsfile) 1) - (copy hrsfile daysfile) - (copy minsfile hrsfile)) - - ;; if age(.days.gz) >= 1w: - ;; copy .days.gz .weeks.gz - ;; copy .hrs.gz .days.gz - (when (>= (age-wks daysfile) 1) - (copy daysfile wksfile) - (copy hrsfile daysfile)) - #t) - #f)) - -;;====================================================================== -;; Rotate logs, logic: -;; if > 500k and older than 1 week: -;; remove previous compressed log and compress this log -;; WARNING: This proc operates assuming that it is in the directory above the -;; logs directory you wish to log-rotate. -;; -(define (common:rotate-logs) - (let* ((all-files (make-hash-table)) - (stats (make-hash-table)) - (inc-stat (lambda (key) - (hash-table-set! stats key (+ (hash-table-ref/default stats key 0) 1)))) - (max-allowed (string->number (or (configf:lookup *configdat* "setup" "max-logfiles") "600")))) ;; name -> age - (if (not (directory-exists? "logs"))(create-directory "logs")) - (directory-fold - (lambda (file rem) - (handle-exceptions - exn - (begin - (debug:print-info 2 *default-log-port* "unable to rotate log " file ", probably handled by another process, this is safe to ignore. exn=" exn) - (debug:print 2 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) - ;; (print-call-chain (current-error-port)) ;; - ) - (let* ((fullname (conc "logs/" file)) - (mod-time (file-modification-time fullname)) - (file-age (- (current-seconds) mod-time)) - (file-old (> file-age (* 48 60 60))) - (file-big (> (file-size fullname) 200000))) - (hash-table-set! all-files file mod-time) - (if (or (and (string-match "^.*.log" file) - file-old - file-big) - (and (string-match "^server-.*.log" file) - file-old)) - (let ((gzfile (conc fullname ".gz"))) - (if (common:file-exists? gzfile) - (begin - (debug:print-info 0 *default-log-port* "removing " gzfile) - (delete-file* gzfile) - (hash-table-delete! all-files gzfile) ;; needed? - )) - (debug:print-info 0 *default-log-port* "compressing " file) - (system (conc "gzip " fullname)) - (inc-stat "gzipped") - (hash-table-set! all-files (conc file ".gz") file-age) ;; add the .gz file and remove the base file - (hash-table-delete! all-files file) - ) - (if (and (> file-age (* (string->number (or (configf:lookup *configdat* "setup" "log-expire-days") "30")) 24 3600)) - (file-exists? fullname)) ;; just in case it was gzipped - will get it next time - (handle-exceptions - exn - #f - (if (directory? fullname) - (begin - (debug:print-info 0 *default-log-port* fullname " in logs directory is a directory! Cannot rotate it, it is best to not put subdirectories in the logs dir.") - (inc-stat "directories")) - (begin - (delete-file* fullname) - (inc-stat "deleted"))) - (hash-table-delete! all-files file))))))) - '() - "logs") - (for-each - (lambda (category) - (let ((quant (hash-table-ref/default stats category 0))) - (if (> quant 0) - (debug:print-info 0 *default-log-port* category " log files: " quant)))) - `("deleted" "gzipped" "directories")) - (let ((num-logs (hash-table-size all-files))) - (if (> num-logs max-allowed) ;; because NFS => don't let number of logs exceed 300 - (let ((files (take (sort (hash-table-keys all-files) - (lambda (a b) - (< (hash-table-ref all-files a)(hash-table-ref all-files b)))) - (- num-logs max-allowed)))) - (for-each - (lambda (file) - (let* ((fullname (conc "logs/" file))) - (if (directory? fullname) - (debug:print-info 0 *default-log-port* fullname " in logs directory is a directory! Cannot rotate it, it is best to not put subdirectories in the logs dir.") - (handle-exceptions - exn - (debug:print-error 0 *default-log-port* "failed to remove " fullname ", exn=" exn) - (delete-file* fullname))))) - files) - (debug:print-info 0 *default-log-port* "Deleted " (length files) " files from logs, keeping " max-allowed " files.")))))) - -;;====================================================================== -;; Force a megatest cleanup-db if version is changed and skip-version-check not specified -;; Do NOT check if not on homehost! -;; -(define (common:exit-on-version-changed) - (if (and *toppath* ;; do nothing if *toppath* not yet provided - (common:on-homehost?)) - (if (common:api-changed?) - (let* ((mtconf (conc (get-environment-variable "MT_RUN_AREA_HOME") "/megatest.config")) - (dbfile (conc (get-environment-variable "MT_RUN_AREA_HOME") ".mtdb/main.db")) - (read-only (not (file-write-access? dbfile))) - (dbstruct (db:setup))) ;; (db:setup-db *dbstruct-dbs* *toppath* #f))) ;; #t))) - (debug:print 0 *default-log-port* - "WARNING: Version mismatch!\n" - " expected: " (common:version-signature) "\n" - " got: " (common:get-last-run-version)) - (cond - ((get-environment-variable "MT_SKIP_DB_MIGRATE") #t) - ((and (common:file-exists? mtconf) (common:file-exists? dbfile) (not read-only) - (eq? (current-user-id)(file-owner mtconf))) ;; safe to run -cleanup-db - (debug:print 0 *default-log-port* " I see you are the owner of megatest.config, attempting to cleanup and reset to new version") - (handle-exceptions - exn - (begin - (debug:print 0 *default-log-port* "Failed to switch versions. exn=" exn) - (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) - (print-call-chain (current-error-port)) - (exit 1)) - (common:cleanup-db dbstruct))) - ((not (common:file-exists? mtconf)) - (debug:print 0 *default-log-port* " megatest.config does not exist in this area. Cannot proceed with megatest version migration.") - (exit 1)) - ((not (common:file-exists? dbfile)) - (debug:print 0 *default-log-port* " .mtdb/main.db does not exist in this area. Cannot proceed with megatest version migration.") - (exit 1)) - ((not (eq? (current-user-id)(file-owner mtconf))) - (debug:print 0 *default-log-port* " You do not own .mtdb/main.db in this area. Cannot proceed with megatest version migration.") - (exit 1)) - (read-only - (debug:print 0 *default-log-port* " You have read-only access to this area. Cannot proceed with megatest version migration.") - (exit 1)) - (else - (debug:print 0 *default-log-port* " to switch versions you can run: \"megatest -cleanup-db\"") - (exit 1))))))) -;;====================================================================== -;; (begin -;; (debug:print 0 *default-log-port* "ERROR: cannot migrate version unless on homehost. Exiting.") -;; (exit 1)))) - -;;====================================================================== -;; S P A R S E A R R A Y S -;;====================================================================== - -(define (make-sparse-array) - (let ((a (make-sparse-vector))) - (sparse-vector-set! a 0 (make-sparse-vector)) - a)) - -(define (sparse-array? a) - (and (sparse-vector? a) - (sparse-vector? (sparse-vector-ref a 0)))) - -(define (sparse-array-ref a x y) - (let ((row (sparse-vector-ref a x))) - (if row - (sparse-vector-ref row y) - #f))) - -(define (sparse-array-set! a x y val) - (let ((row (sparse-vector-ref a x))) - (if row - (sparse-vector-set! row y val) - (let ((new-row (make-sparse-vector))) - (sparse-vector-set! a x new-row) - (sparse-vector-set! new-row y val))))) - -;;====================================================================== -;; L O C K E R S A N D B L O C K E R S -;;====================================================================== - -;; block further accesses to databases. Call this before shutting db down -(define (common:db-block-further-queries) - (mutex-lock! *db-access-mutex*) - (set! *db-access-allowed* #f) - (mutex-unlock! *db-access-mutex*)) - -(define (common:db-access-allowed?) - (let ((val (begin - (mutex-lock! *db-access-mutex*) - *db-access-allowed* - (mutex-unlock! *db-access-mutex*)))) - val)) - -;;====================================================================== -;; U S E F U L S T U F F -;;====================================================================== - -;; convert things to an alist or assoc list, #f gets converted to "" -;; -(define (common:to-alist dat) - (cond - ((list? dat) (map common:to-alist dat)) - ((vector? dat) - (map common:to-alist (vector->list dat))) - ((pair? dat) - (cons (common:to-alist (car dat)) - (common:to-alist (cdr dat)))) - ((hash-table? dat) - (map common:to-alist (hash-table->alist dat))) - (else - (if dat - dat - "")))) - -(define (common:alist-ref/default key alist default) - (or (alist-ref key alist) default)) - -;; moved into commonmod -;; -;; (define (common:low-noise-print waitval . keys) -;; (let* ((key (string-intersperse (map conc keys) "-" )) -;; (lasttime (hash-table-ref/default *common:denoise* key 0)) -;; (currtime (current-seconds))) -;; (if (> (- currtime lasttime) waitval) -;; (begin -;; (hash-table-set! *common:denoise* key currtime) -;; #t) -;; #f))) - -(define (common:read-encoded-string instr) - (handle-exceptions - exn - (handle-exceptions - exn - (begin - (debug:print-error 0 *default-log-port* "received bad encoded string \"" instr "\", message: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn) - (print-call-chain (current-error-port)) - #f) - (read (open-input-string (base64:base64-decode instr)))) - (read (open-input-string (z3:decode-buffer (base64:base64-decode instr)))))) - - -;;====================================================================== -;; S T A T E S A N D S T A T U S E S -;;====================================================================== - -;; BBnote: *common:std-states* - dashboard filter control and test control state buttons defined here; used in set-fields-panel and dboard:make-controls -(define *common:std-states* ;; for toggle buttons in dashboard - '( - (0 "ARCHIVED") - (1 "STUCK") - (2 "KILLREQ") - (3 "KILLED") - (4 "NOT_STARTED") - (5 "COMPLETED") - (6 "LAUNCHED") - (7 "REMOTEHOSTSTART") - (8 "RUNNING") - )) - -(define *common:dont-roll-up-states* - '("DELETED" - "REMOVING" - "CLEANING" - "ARCHIVE_REMOVING" - )) - -;;====================================================================== -;; BBnote: *common:std-statuses* dashboard filter control and test control status buttons defined here; used in set-fields-panel and dboard:make-controls -;; note these statuses are sorted from better to worse. -;; This sort order is important to dcommon:status-compare3 and db:set-state-status-and-roll-up-items -(define *common:std-statuses* - '(;; (0 "DELETED") - (1 "n/a") - (2 "PASS") - (3 "SKIP") - (4 "WARN") - (5 "WAIVED") - (6 "CHECK") - (7 "STUCK/DEAD") - (8 "DEAD") - (9 "FAIL") - (10 "PREQ_FAIL") - (11 "PREQ_DISCARDED") - (12 "ABORT"))) - -(define *common:ended-states* ;; states which indicate the test is stopped and will not proceed - '("COMPLETED" "ARCHIVED" "KILLED" "KILLREQ" "STUCK" "INCOMPLETE" )) - -(define *common:badly-ended-states* ;; these roll up as CHECK, i.e. results need to be checked - '("KILLED" "KILLREQ" "STUCK" "INCOMPLETE" "DEAD" "CHECK")) - -(define *common:well-ended-states* ;; an item's prereq in this state allows item to proceed - '("PASS" "WARN" "WAIVED" "SKIP")) - -;; BBnote: *common:running-states* used from db:set-state-status-and-roll-up-items -(define *common:running-states* ;; test is either running or can be run - '("RUNNING" "REMOTEHOSTSTART" "LAUNCHED" "STARTED")) - -(define *common:cant-run-states* ;; These are stopping conditions that prevent a test from being run - '("COMPLETED" "KILLED" "UNKNOWN" "INCOMPLETE" "ARCHIVED")) - -(define *common:not-started-ok-statuses* ;; if not one of these statuses when in not_started state treat as dead - '("n/a" "na" "PASS" "FAIL" "WARN" "CHECK" "WAIVED" "DEAD" "SKIP")) - -;;====================================================================== -;; group tests into buckets corresponding to rollup -;;; Running, completed-pass, completed-non-pass + worst status, not started. -;; filter out -;(define (common:categorize-items-for-rollup in-tests) -; ( - -(define (common:special-sort items order comp) - (let ((items-order (map reverse order)) - (acomp (or comp >))) - (sort items - (lambda (a b) - (let ((a-num (cadr (or (assoc a items-order) '(0 0)))) - (b-num (cadr (or (assoc b items-order) '(0 0))))) - (acomp a-num b-num)))))) - -;;====================================================================== -;; ;; given a toplevel with currstate, currstatus apply state and status -;; ;; => (newstate . newstatus) -;; (define (common:apply-state-status currstate currstatus state status) -;; (let* ((cstate (string->symbol (string-downcase currstate))) -;; (cstatus (string->symbol (string-downcase currstatus))) -;; (sstate (string->symbol (string-downcase state))) -;; (sstatus (string->symbol (string-downcase status))) -;; (nstate #f) -;; (nstatus #f)) -;; (set! nstate -;; (case cstate -;; ((completed not_started killed killreq stuck archived) -;; (case sstate ;; completed -> sstate -;; ((completed killed killreq stuck archived) completed) -;; ((running remotehoststart launched) running) -;; (else unknown-error-1))) -;; ((running remotehoststart launched) -;; (case sstate -;; ((completed killed killreq stuck archived) #f) ;; need to look at all items -;; ((running remotehoststart launched) running) -;; (else unknown-error-2))) -;; (else unknown-error-3))) -;; (set! nstatus -;; (case sstatus -;; ((pass) -;; (case nstate -;; ((pass n/a deleted) pass) -;; ((warn) warn) -;; ((fail) fail) -;; ((check) check) -;; ((waived) waived) -;; ((skip) skip) -;; ((stuck/dead) stuck) -;; ((abort) abort) -;; (else unknown-error-4))) -;; ((warn) -;; (case nstate -;; ((pass warn n/a skip deleted) warn) -;; ((fail) fail) -;; ((check) check) -;; ((waived) waived) -;; ((stuck/dead) stuck) -;; (else unknown-error-5))) -;; ((fail) -;; (case nstate -;; ((pass warn fail check n/a waived skip deleted stuck/dead stuck) fail) -;; ((abort) abort) -;; (else unknown-error-6))) -;; (else unknown-error-7))) -;; (cons -;; (if nstate (symbol->string nstate) nstate) -;; (if nstatus (symbol->string nstatus) nstatus)))) - -;;====================================================================== -;; D E B U G G I N G S T U F F -;;====================================================================== - -(define *verbosity* 1) -(define *logging* #f) - -(define (get-with-default val default) - (let ((val (args:get-arg val))) - (if val val default))) - -(define (assoc/default key lst . default) - (let ((res (assoc key lst))) - (if res (cadr res)(if (null? default) #f (car default))))) - -(define (common:get-testsuite-name) - (or (configf:lookup *configdat* "setup" "area-name") ;; megatest is a flexible tool, testsuite is too limiting a description. - (configf:lookup *configdat* "setup" "testsuite" ) - (getenv "MT_TESTSUITE_NAME") - (pathname-file (or (if (string? *toppath* ) - (pathname-file *toppath*) - #f) - (common:get-toppath #f))) - "please-set-setup-area-name")) ;; (pathname-file (current-directory))))) - -;;====================================================================== -;; safe getting of toppath -(define (common:get-toppath areapath) - (or *toppath* - (if areapath - (begin - (set! *toppath* areapath) - (setenv "MT_RUN_AREA_HOME" areapath) - areapath) - #f) - (if (getenv "MT_RUN_AREA_HOME") - (begin - (set! *toppath* (getenv "MT_RUN_AREA_HOME")) - *toppath*) - #f) - ;; last resort, look for megatest.config - (let loop ((thepath (realpath "."))) - (if (file-exists? (conc thepath "/megatest.config")) - thepath - (if (equal? thepath "/") - (begin - (debug:print-error 0 *default-log-port* "Unable to find megatest home directory.") - #f) - (loop (pathname-directory thepath))))) - )) - - -;;====================================================================== -;; redefine for future cleanup (converge on area-name, the more generic -;; -(define common:get-area-name common:get-testsuite-name) - -(define (common:get-db-tmp-area . junk) - (if *db-cache-path* - *db-cache-path* - (if *toppath* ;; common:get-create-writeable-dir - (handle-exceptions - exn - (begin - (debug:print-error 0 *default-log-port* "Couldn't create path to " *db-cache-path* ", exn=" exn) - (exit 1)) - (let* ((toppath (common:real-path *toppath*)) - (tsname (common:get-testsuite-name)) - (dbpath (common:get-create-writeable-dir - (list (conc "/tmp/" (current-user-name) - "/megatest_localdb/" - tsname "/" - (string-translate toppath "/" ".")) - (conc "/tmp/" (current-process-id) ;; just in case we have an issue with the dir by own user name - "/"(current-user-name) "/megatest_localdb/" - tsname - (string-translate toppath "/" ".")) - )))) - (set! *db-cache-path* dbpath) - ;; ensure megatest area has .mtdb - (let ((dbarea (conc *toppath* "/.mtdb"))) - (if (not (file-exists? dbarea)) - (create-directory dbarea))) - ;; ensure tmp area has .mtdb - (let ((dbarea (conc dbpath "/.mtdb"))) - (if (not (file-exists? dbarea)) - (create-directory dbarea))) - dbpath)) - #f))) - -(define (common:get-area-path-signature) - (message-digest-string (md5-primitive) *toppath*)) - -;;====================================================================== -;; E X I T H A N D L I N G -;;====================================================================== - -(define (common:run-sync?) - (and *toppath* ;; gate if called before *toppath* is set - (common:on-homehost?) - (args:get-arg "-server"))) - - -(define (std-signal-handler signum) - ;; (signal-mask! signum) - (set! *time-to-exit* #t) - ;;(debug:print-info 13 *default-log-port* "got signal "signum) - (debug:print-error 0 *default-log-port* "Received signal " signum " aaa exiting promptly") - ;; (std-exit-procedure) ;; shouldn't need this since we are exiting and it will be called anyway - (exit)) - -(define (special-signal-handler signum) - ;; (signal-mask! signum) - (set! *time-to-exit* #t) - ;;(debug:print-info 13 *default-log-port* "got signal "signum) - (debug:print-error 0 *default-log-port* "Received signal " signum " sending email befor exiting!!") - ;;TODO send email to notify admin contact listed in the config that the lisner got killed - ;; (std-exit-procedure) ;; shouldn't need this since we are exiting and it will be called anyway - (exit)) - - -(set-signal-handler! signal/int std-signal-handler) ;; ^C -(set-signal-handler! signal/term std-signal-handler) - -;; (set-signal-handler! signal/stop std-signal-handler) ;; ^Z NO, do NOT handle ^Z! - -;;====================================================================== -;; M I S C U T I L S -;;====================================================================== - -;;====================================================================== -;; convert stuff to a number if possible -(define (any->number val) - (cond - ((number? val) val) - ((string? val) (string->number val)) - ((symbol? val) (any->number (symbol->string val))) - (else #f))) - -(define (any->number-if-possible val) - (let ((num (any->number val))) - (if num num val))) - -(define (patt-list-match item patts) - (debug:print-info 8 *default-log-port* "patt-list-match item=" item " patts=" patts) - (if (and item patts) ;; here we are filtering for matches with item patterns - (let ((res #f)) ;; look through all the item-patts if defined, format is patt1,patt2,patt3 ... wildcard is % - (for-each - (lambda (patt) - (let ((modpatt (string-substitute "%" ".*" patt #t))) - (debug:print-info 10 *default-log-port* "patt " patt " modpatt " modpatt) - (if (string-match (regexp modpatt) item) - (set! res #t)))) - (string-split patts ",")) - res) - #t)) - -;;====================================================================== -;; '(print (string-intersperse (map cadr (hash-table-ref/default (read-config "megatest.config" \#f \#t) "disks" '"'"'("none" ""))) "\n"))' -(define (common:get-disks #!key (configf #f)) - (hash-table-ref/default - (or configf (read-config "megatest.config" #f #t)) - "disks" '("none" ""))) - -(define (common:get-install-area) - (let ((exe-path (car (argv)))) - (if (common:file-exists? exe-path) - (handle-exceptions - exn - #f - (pathname-directory - (pathname-directory - (pathname-directory exe-path)))) - #f))) - -;;====================================================================== -;; return first path that can be created or already exists and is writable -;; -(define (common:get-create-writeable-dir dirs) - (if (null? dirs) - #f - (let loop ((hed (car dirs)) - (tal (cdr dirs))) - (let ((res (or (and (directory? hed) - (file-write-access? hed) - hed) - (handle-exceptions - exn - (begin - (debug:print-info 0 *default-log-port* "could not create " hed - ", this might cause problems down the road. exn=" exn) - #f) - (create-directory hed #t))))) - (if (and (string? res) - (directory? res)) - res - (if (null? tal) - #f - (loop (car tal)(cdr tal)))))))) - -;;====================================================================== -;; return the youngest timestamp . filename -;; -(define (common:get-youngest glob-list) - (let ((all-files (apply append - (map (lambda (patt) - (handle-exceptions - exn - '() - (glob patt))) - glob-list)))) - (fold (lambda (fname res) - (let ((last-mod (car res)) - (curmod (handle-exceptions - exn - 0 - (file-modification-time fname)))) - (if (> curmod last-mod) - (list curmod fname) - res))) - '(0 "n/a") - all-files))) - -;;====================================================================== -;; use bash to expand a glob. Does NOT handle paths with spaces! -;; -(define (common:bash-glob instr) - (string-split - (with-input-from-pipe - (conc "/bin/bash -c \"echo " instr "\"") - read-line))) - -;;====================================================================== -;; Some safety net stuff -;;====================================================================== - -;;====================================================================== -;; return input if it is a list or return null -(define (common:list-or-null inlst #!key (ovrd #f)(message #f)) - (if (list? inlst) - inlst - (begin - (if message (debug:print-error 0 *default-log-port* message)) - (or ovrd '())))) - -;;====================================================================== -;; T A R G E T S , S T A T E , S T A T U S , -;; R U N N A M E A N D T E S T P A T T -;;====================================================================== - -;;====================================================================== -;; (map print (map car (hash-table->alist (read-config "runconfigs.config" #f #t)))) -;; -(define (common:get-runconfig-targets #!key (configf #f)) - (let ((targs (sort (map car (hash-table->alist - (or configf ;; NOTE: There is no value in using runconfig:read here. - (read-config (conc *toppath* "/runconfigs.config") - #f #t) - (make-hash-table)))) - stringsymbol force-setting) #f)) - (force-result (case force-type - ((#f) #f) - ((always) #t) - ((test) (if (args:get-arg "-execute") ;; we are in a test - #t - #f)) - (else - (debug:print 0 *default-log-port* "ERROR: Bad server force setting " force-setting ", forcing server.") - #t)))) ;; default to requiring server - (if force-result - (begin - (debug:print-info 0 *default-log-port* "ATTENTION! Forcing use of server, force setting is \"" force-setting "\".") - #t) - #f))) - -;;====================================================================== -;; M I S C L I S T S -;;====================================================================== - -;;====================================================================== -;; items in lista are matched value and position in listb -;; return the remaining items in listb or #f -;; -(define (common:list-is-sublist lista listb) - (if (null? lista) - listb ;; all items in listb are "remaining" - (if (> (length lista)(length listb)) - #f - (let loop ((heda (car lista)) - (tala (cdr lista)) - (hedb (car listb)) - (talb (cdr listb))) - (if (equal? heda hedb) - (if (null? tala) ;; we are done - talb - (loop (car tala) - (cdr tala) - (car talb) - - (cdr talb))) - #f))))) - -;;====================================================================== -;; Needed for long lists to be sorted where (apply max ... ) dies -;; -(define (common:max inlst) - (let loop ((max-val (car inlst)) - (hed (car inlst)) - (tal (cdr inlst))) - (if (not (null? tal)) - (loop (max hed max-val) - (car tal) - (cdr tal)) - (max hed max-val)))) - -;;====================================================================== -;; get min or max, use > for max and < for min, this works around the limits on apply -;; -(define (common:min-max comp lst) - (if (null? lst) - #f ;; better than an exception for my needs - (fold (lambda (a b) - (if (comp a b) a b)) - (car lst) - lst))) - -;;====================================================================== -;; get min or max, use > for max and < for min, this works around the limits on apply -;; -(define (common:sum lst) - (if (null? lst) - 0 - (fold (lambda (a b) - (+ a b)) - (car lst) - lst))) - -;;====================================================================== -;; path list to hash-table tree -;; ((a b c)(a b d)(e b c)) => ((a (b (d) (c))) (e (b (c)))) -;; -(define (common:list->htree lst) - (let ((resh (make-hash-table))) - (for-each - (lambda (inlst) - (let loop ((ht resh) - (hed (car inlst)) - (tal (cdr inlst))) - (if (hash-table-ref/default ht hed #f) - (if (not (null? tal)) - (loop (hash-table-ref ht hed) - (car tal) - (cdr tal))) - (begin - (hash-table-set! ht hed (make-hash-table)) - (loop ht hed tal))))) - lst) - resh)) - -;;====================================================================== -;; hash-table tree to html list tree -;; -;; tipfunc takes two parameters: y the tip value and path the path to that point -;; -(define (common:htree->html ht path tipfunc) - (let ((datlist (sort (hash-table->alist ht) - (lambda (a b) - (string< (car a)(car b)))))) - (if (null? datlist) - (tipfunc #f path) ;; really shouldn't get here - (s:ul - (map (lambda (x) - (let* ((levelname (car x)) - (y (cdr x)) - (newpath (append path (list levelname))) - (leaf (or (not (hash-table? y)) - (null? (hash-table-keys y))))) - (if leaf - (s:li (tipfunc y newpath)) - (s:li - (list - levelname - (common:htree->html y newpath tipfunc)))))) - datlist))))) - -;;====================================================================== -;; hash-table tree to alist tree -;; -(define (common:htree->atree ht) - (map (lambda (x) - (cons (car x) - (let ((y (cdr x))) - (if (hash-table? y) - (common:htree->atree y) - y)))) - (hash-table->alist ht))) - -;;====================================================================== -;; M U N G E D A T A I N T O N I C E F O R M S -;;====================================================================== - -;;====================================================================== -;; Generate an index for a sparse list of key values -;; ( (rowname1 colname1 val1)(rowname2 colname2 val2) ) -;; -;; => -;; -;; ( (rowname1 0)(rowname2 1)) ;; rownames -> num -;; (colname1 0)(colname2 1)) ) ;; colnames -> num -;; -;; optional apply proc to rownum colnum value -(define (common:sparse-list-generate-index data #!key (proc #f)) - (if (null? data) - (list '() '()) - (let loop ((hed (car data)) - (tal (cdr data)) - (rownames '()) - (colnames '()) - (rownum 0) - (colnum 0)) - (let* ((rowkey (car hed)) - (colkey (cadr hed)) - (value (caddr hed)) - (existing-rowdat (assoc rowkey rownames)) - (existing-coldat (assoc colkey colnames)) - (curr-rownum (if existing-rowdat rownum (+ rownum 1))) - (curr-colnum (if existing-coldat colnum (+ colnum 1))) - (new-rownames (if existing-rowdat rownames (cons (list rowkey curr-rownum) rownames))) - (new-colnames (if existing-coldat colnames (cons (list colkey curr-colnum) colnames)))) - ;; (debug:print-info 0 *default-log-port* "Processing record: " hed ) - (if proc (proc curr-rownum curr-colnum rowkey colkey value)) - (if (null? tal) - (list new-rownames new-colnames) - (loop (car tal) - (cdr tal) - new-rownames - new-colnames - (if (> curr-rownum rownum) curr-rownum rownum) - (if (> curr-colnum colnum) curr-colnum colnum) - )))))) - -;;====================================================================== -;; if it looks like a number -> convert it to a number, else return it -;; -(define (common:lazy-convert inval) - (let* ((as-num (if (string? inval)(string->number inval) #f))) - (or as-num inval))) - -;;====================================================================== -;; convert string a=1; b=2; c=a silly thing; d= -;; to '((a . 1)(b . 2)(c . "a silly thing")(d . "")) -;; -(define (common:val->alist val #!key (convert #f)) - (let ((val-list (string-split-fields ";\\s*" val #:infix))) - (if val-list - (map (lambda (x) - (let ((f (string-split-fields "\\s*=\\s*" x #:infix))) - (case (length f) - ((0) `(,#f)) ;; null string case - ((1) `(,(string->symbol (car f)))) - ((2) `(,(string->symbol (car f)) . ,(let ((inval (cadr f))) - (if convert (common:lazy-convert inval) inval)))) - (else f)))) - val-list) - '()))) - -;;====================================================================== -;; S Y S T E M S T U F F -;;====================================================================== - -;;====================================================================== -;; lazy-safe get file mod time. on any error (file not existing etc.) return 0 -;; -(define (common:lazy-modification-time fpath) - (handle-exceptions - exn - (begin - (debug:print 2 *default-log-port* "Failed to get modification time for " fpath ", treating it as zero. exn=" exn) - 0) - (if (file-exists? fpath) - (file-modification-time fpath) - 0))) - -;;====================================================================== -;; find timestamp of newest file associated with a sqlite db file -(define (common:lazy-sqlite-db-modification-time fpath) - (let* ((glob-list (handle-exceptions - exn - (begin - (debug:print 0 *default-log-port* "Failed to glob " fpath "*, exn=" exn) - `(,(conc "/no/such/file, message: " ((condition-property-accessor 'exn 'message) exn)))) - (glob (conc fpath "*")))) - (file-list (if (eq? 0 (length glob-list)) - '("/no/such/file") - glob-list))) - (apply max - (map - common:lazy-modification-time - file-list)))) - -;;====================================================================== -;; make "nice-path" available in config files and the repl -(define nice-path common:nice-path) - -;;====================================================================== -;; returns *effective load* (not normalized) -;; -(define (common:get-intercept onemin fivemin) - (if (< onemin fivemin) ;; load is decreasing, just use the onemin load - onemin - (let* ((load-change (- onemin fivemin)) - (tchange (- 300 60))) - (max (+ onemin (* 60 (/ load-change tchange))) 0)))) - -;;====================================================================== -;; calculate a delay number based on a droop curve -;; inputs are: -;; - load-in, load as from uptime, NOT normalized -;; - numcpus, number of cpus, ideally use the real cpus, not threads -;; -(define (common:get-delay load-in numcpus) - (let* ((ratio (/ load-in numcpus)) - (new-option (configf:lookup *configdat* "load" "new-load-method")) - (paramstr (or (configf:lookup *configdat* "load" "exp-params") - "15 12 1281453987.9543 0.75")) ;; 5 4 10 1")) - (paramlst (map string->number (string-split paramstr)))) - (if new-option - (begin - (cond ((and (>= ratio 0) (< ratio .5)) - 0) - ((and (>= ratio 0.5) (<= ratio .9)) - (* ratio (/ 5 .9))) - ((and (> ratio .9) (<= ratio 1.1)) - (+ 5 (* (- ratio .9) (/ 55 .2)))) - ((> ratio 1.1) - 60))) - (match paramlst - ((r1 r2 s1 s2) - (debug:print 3 *default-log-port* "Using params r1=" r1 " r2=" r2 " s1=" s1 " s2=" s2) - (min (max (/ (expt r1 (* r2 s2 ratio)) s1) 0) 30)) - (else - (debug:print 0 *default-log-port* "BAD exp-params, should be \"r1 r2 s1 s2\" but got " paramstr) - 30))))) - -;; -mrw- this appears to not be used -;; -;; (define (common:print-delay-table) -;; (let loop ((x 0)) -;; (print x "," (common:get-delay x 1)) -;; (if (< x 2) -;; (loop (+ x 0.1))))) - -;; (define (get-cpu-load #!key (remote-host #f)) -;; (car (common:get-cpu-load remote-host))) - -;;====================================================================== -;; (let* ((load-res (process:cmd-run->list "uptime")) -;; (load-rx (regexp "load average:\\s+(\\d+)")) -;; (cpu-load #f)) -;; (for-each (lambda (l) -;; (let ((match (string-search load-rx l))) -;; (if match -;; (let ((newval (string->number (cadr match)))) -;; (if (number? newval) -;; (set! cpu-load newval)))))) -;; (car load-res)) -;; cpu-load)) - -;;====================================================================== -;; get values from cached info from dropping file in .sysdata dir -;; e.g. key is host and dtype is normalized-load -;; -(define (common:get-cached-info key dtype #!key (age 10)) - (if *toppath* - (let* ((fullpath (conc *toppath* "/.sysdata/" key "-" dtype ".log")) - (delfile (lambda (exn) - (debug:print-info 2 *default-log-port* " removing bad file " fullpath ", exn=" exn) - (delete-file* fullpath) - #f))) - (if (and (file-exists? fullpath) - (file-read-access? fullpath)) - (handle-exceptions - exn - (begin - (debug:print 0 *default-log-port* "failed to get cached info from " fullpath ", exn=" exn) - #f) - (debug:print 2 *default-log-port* "reading file " fullpath) - (let ((real-age (- (current-seconds) - (handle-exceptions - exn - (begin - (debug:print 1 *default-log-port* "Failed to read mod time on file " - fullpath ", using 0, exn=" exn) - 0) - (file-change-time fullpath))))) - (if (< real-age age) - (handle-exceptions - exn - (delfile exn) - (let* ((res (with-input-from-file fullpath read))) - (if (eof-object? res) - (begin - (delfile "n/a") - #f) - res))) - (begin - (debug:print-info 2 *default-log-port* "file " fullpath - " is too old (" real-age" seconds) to trust, skipping reading it") - #f)))) - (begin - (debug:print 2 *default-log-port* "not reading file " fullpath) - #f))) - #f)) - -(define (common:write-cached-info key dtype dat) - (if *toppath* - (let* ((fulldir (conc *toppath* "/.sysdata")) - (fullpath (conc fulldir "/" key "-" dtype ".log"))) - (if (not (file-exists? fulldir))(create-directory fulldir #t)) - (handle-exceptions - exn - (begin - (debug:print 0 *default-log-port* "failed to write file " fullpath ", exn=" exn) - #f) - (with-output-to-file fullpath (lambda ()(pp dat))))) - #f)) - -(define (common:raw-get-remote-host-load remote-host) - (let* ((inp #f)) - (handle-exceptions - exn - (begin - (close-input-pipe inp) - (debug:print 0 *default-log-port* "failed to ssh to " remote-host " and get loadavg. exn=" exn) - #f) ;; more specific handling of errors needed - (set! inp (open-input-pipe (conc "ssh " remote-host " cat /proc/loadavg"))) - (let ((res (list (read inp)(read inp)(read inp)))) - (close-input-pipe inp) - res)))) - -;;====================================================================== -;; get cpu load by reading from /proc/loadavg, return all three values -;; -(define (common:get-cpu-load remote-host) - (handle-exceptions - exn - (begin - (debug:print 0 *default-log-port* "failed to ssh or read loadavg from host " remote-host ", exn=" exn) - '(-99 -99 -99)) - (let* ((actual-hostname (or remote-host (get-host-name) "localhost"))) - (or (common:get-cached-info actual-hostname "cpu-load") - (let ((result (if (and remote-host - (not (equal? remote-host (get-host-name)))) - (map (lambda (res) - (if (eof-object? res) 9e99 res)) - (common:raw-get-remote-host-load remote-host)) - (with-input-from-file "/proc/loadavg" - (lambda ()(list (read)(read)(read))))))) - (match - result - ((l1 l2 l3) - (if (and (number? l1) - (number? l2) - (number? l3)) - (begin - (common:write-cached-info actual-hostname "cpu-load" result) - result) - '(-1 -1 -1))) ;; -1 is bad result - (else '(-2 -2 -2)))))))) - -;;====================================================================== -;; get normalized cpu load by reading from /proc/loadavg and /proc/cpuinfo return all three values and the number of real cpus and the number of threads -;; returns alist '((adj-cpu-load . normalized-proc-load) ... etc. -;; keys: adj-proc-load, adj-core-load, 1m-load, 5m-load, 15m-load -;; -(define (common:get-normalized-cpu-load remote-host) - (let ((res (common:get-normalized-cpu-load-raw remote-host)) - (default `((adj-proc-load . 2) ;; there is no right answer - (adj-core-load . 2) - (1m-load . 2) - (5m-load . 0) ;; causes a large delta - thus causing default of throttling if stuff goes wrong - (15m-load . 0) - (proc . 1) - (core . 1) - (phys . 1) - (error . #t)))) - (cond - ((and (list? res) - (> (length res) 2)) - res) - ((eq? res #f) default) ;; add messages? - ((eq? res #f) default) ;; this would be the #eof - (else default)))) - -(define (common:ssh-get-loadavg remote-host) - (let ((inp (open-input-pipe (conc "ssh " remote-host " \"cat /proc/loadavg;cat /proc/cpuinfo;echo end\"")))) - (let* ((res (read-lines inp))) - (close-input-pipe inp) - res))) - -(define (common:get-normalized-cpu-load-raw remote-host) - (let* ((actual-host (or remote-host (get-host-name)))) ;; #f is localhost - (or (common:get-cached-info actual-host "normalized-load") - (let ((data (if remote-host - (common:ssh-get-loadavg remote-host) - (append - (with-input-from-file "/proc/loadavg" - read-lines) - (with-input-from-file "/proc/cpuinfo" - read-lines) - (list "end")))) - (load-rx (regexp "^([\\d\\.]+)\\s+([\\d\\.]+)\\s+([\\d\\.]+)\\s+.*$")) - (proc-rx (regexp "^processor\\s+:\\s+(\\d+)\\s*$")) - (core-rx (regexp "^core id\\s+:\\s+(\\d+)\\s*$")) - (phys-rx (regexp "^physical id\\s+:\\s+(\\d+)\\s*$")) - (max-num (lambda (p n)(max (string->number p) n)))) - ;; (print "data=" data) - (if (null? data) ;; something went wrong - #f - (let loop ((hed (car data)) - (tal (cdr data)) - (loads #f) - (proc-num 0) ;; processor includes threads - (phys-num 0) ;; physical chip on motherboard - (core-num 0)) ;; core - ;;; (print hed ", " loads ", " proc-num ", " phys-num ", " core-num) - (if (null? tal) ;; have all our data, calculate normalized load and return result - (let* ((act-proc (+ proc-num 1)) - (act-phys (+ phys-num 1)) - (act-core (+ core-num 1)) - (adj-proc-load (/ (car loads) act-proc)) - (adj-core-load (/ (car loads) act-core)) - (result - (append (list (cons 'adj-proc-load adj-proc-load) - (cons 'adj-core-load adj-core-load)) - (list (cons '1m-load (car loads)) - (cons '5m-load (cadr loads)) - (cons '15m-load (caddr loads))) - (list (cons 'proc act-proc) - (cons 'core act-core) - (cons 'phys act-phys))))) - (common:write-cached-info actual-host "normalized-load" result) - result) - (regex-case - hed - (load-rx ( x l1 l5 l15 ) (loop (car tal)(cdr tal)(map string->number (list l1 l5 l15)) proc-num phys-num core-num)) - (proc-rx ( x p ) (loop (car tal)(cdr tal) loads (max-num p proc-num) phys-num core-num)) - (phys-rx ( x p ) (loop (car tal)(cdr tal) loads proc-num (max-num p phys-num) core-num)) - (core-rx ( x c ) (loop (car tal)(cdr tal) loads proc-num phys-num (max-num c core-num))) - (else - (begin - ;; (print "NO MATCH: " hed) - (loop (car tal)(cdr tal) loads proc-num phys-num core-num))))))))))) - -(define (common:unix-ping hostname) - (let ((res (system (conc "ping -c 1 " hostname " > /dev/null")))) - (eq? res 0))) - -;;====================================================================== -;; ideally put all this info into the db, no need to preserve it across moving homehost -;; -;; return list of -;; ( reachable? cpuload update-time ) -(define (common:get-host-info hostname) - (let* ((loadinfo (rmt:get-latest-host-load hostname)) ;; if this host happens to have been recently used by a test reuse the load data - (load (car loadinfo)) - (load-sample-time (cdr loadinfo)) - (load-sample-age (- (current-seconds) load-sample-time)) - (loadinfo-timeout-seconds 6) ;; this was 20 seconds, seems way too lax. Switch to 6 seconds - (host-last-update-timeout-seconds 4) - (host-rec (hash-table-ref/default *host-loads* hostname #f)) - ) - (cond - ((< load-sample-age loadinfo-timeout-seconds) - (list #t - load-sample-time - load)) - ((and host-rec - (< (current-seconds) (+ (host-last-update host-rec) host-last-update-timeout-seconds))) - (list #t - (host-last-update host-rec) - (host-last-cpuload host-rec ))) - ((common:unix-ping hostname) - (list #t - (current-seconds) - (alist-ref 'adj-core-load (common:get-normalized-cpu-load hostname)))) ;; this is cheaper than you might think. get-normalized-cpu-load is cached for up to 5 seconds - (else - (list #f 0 -1) ;; bad host, don't use! - )))) - -;;====================================================================== -;; see defstruct host at top of file. -;; host: reachable last-update last-used last-cpuload -;; -(define (common:update-host-loads-table hosts-raw) - (let* ((hosts (filter (lambda (x) - (string-match (regexp "^\\S+$") x)) - hosts-raw))) - (for-each - (lambda (hostname) - (let* ((rec (let ((h (hash-table-ref/default *host-loads* hostname #f))) - (if h - h - (let ((h (make-host))) - (hash-table-set! *host-loads* hostname h) - h)))) - (host-info (common:get-host-info hostname)) - (is-reachable (car host-info)) - (last-reached-time (cadr host-info)) - (load (caddr host-info))) - (host-reachable-set! rec is-reachable) - (host-last-update-set! rec last-reached-time) - (host-last-cpuload-set! rec load))) - hosts))) - -;;====================================================================== -;; go through the hosts from least recently used to most recently used, pick the first that meets the load criteral from the -;; [host-rules] section. -;; -(define (common:get-least-loaded-host hosts-raw host-type configdat) - (let* ((rdat (configf:lookup configdat "host-rules" host-type)) - (rules (common:val->alist (or rdat "") convert: #t)) ;; maxnload, maxnjobs, maxjobrate - (maxnload (common:alist-ref/default 'maxnload rules 1.5)) ;; max normalized load - (maxnjobs (common:alist-ref/default 'maxnjobs rules 1.5)) ;; max normalized number of jobs - (maxjobrate (common:alist-ref/default 'maxjobrate rules (/ 1 6))) ;; max rate of submitting jobs to a given host in jobs/second - (hosts (filter (lambda (x) - (string-match (regexp "^\\S+$") x)) - hosts-raw)) - ;; (best-host #f) - (get-rec (lambda (hostname) - ;; (print "get-rec hostname=" hostname) - (let ((h (hash-table-ref/default *host-loads* hostname #f))) - (if h - h - (let ((h (make-host))) - (hash-table-set! *host-loads* hostname h) - h))))) - (best-load 99999) - (curr-time (current-seconds)) - (get-hosts-sorted (lambda (hosts) - (sort hosts (lambda (a b) - (let ((a-rec (get-rec a)) - (b-rec (get-rec b))) - ;; (print "a=" a " a-rec=" a-rec " host-last-used=" (host-last-used a-rec)) - ;; (print "b=" b " b-rec=" b-rec " host-last-used=" (host-last-used b-rec)) - (< (host-last-used a-rec) - (host-last-used b-rec)))))))) - (debug:print 0 *default-log-port* "INFO: hosts-sorted=" (get-hosts-sorted hosts)) - (if (null? hosts) - #f ;; no hosts to select from. All done and giving up now. - (let ((hosts-sorted (get-hosts-sorted hosts))) - (common:update-host-loads-table hosts) - (let loop ((hostname (car hosts-sorted)) - (tal (cdr hosts-sorted)) - (best-host #f)) - (let* ((rec (get-rec hostname)) - (reachable (host-reachable rec)) - (load (host-last-cpuload rec)) - (last-used (host-last-used rec)) - (delta (- curr-time last-used)) - (job-rate (if (> delta 0) - (/ 1 delta) - 999)) ;; jobs per second - (new-best - (cond - ((not reachable) - (debug:print 0 *default-log-port* "Skipping host " hostname " as it cannot be reached.") - best-host) - ((and (< load maxnload) ;; load is acceptable - (< job-rate maxjobrate)) ;; job rate is acceptable - (set! best-load load) - hostname) - (else best-host)))) - (debug:print 0 *default-log-port* "INFO: Trying host " hostname " with load " load ", last used " delta " seconds ago, with job-rate " job-rate " for running a test." ) - (if new-best - (begin ;; found a host, return it - (debug:print 0 *default-log-port* "INFO: Found host: " new-best " load: " load " last-used: " delta " seconds ago, with job-rate: " job-rate) - (host-last-used-set! rec curr-time) - new-best) - (if (null? tal) #f (loop (car tal)(cdr tal) best-host))))))))) - -(define (common:wait-for-homehost-load maxnormload msg) - (let loop ((start-time (current-seconds))) ;; we saw some instances of this being called before *toppath* was set. This might be an early setup race. This delay should help but it is impossible to test... - (if (not *toppath*) - (begin - (debug:print 0 *default-log-port* "ERROR: common:wait-for-homehost-load called before *toppath* set.") - (thread-sleep! 30) - (if (< (- (current-seconds) start-time) 300) - (loop start-time))))) - (case (rmt:transport-mode) - ((http) - (let* ((hh-dat (if (common:on-homehost?) ;; if we are on the homehost then pass in #f so the calls are local. - #f - (server:choose-server *toppath* 'homehost))) - (hh (if hh-dat (car hh-dat) #f))) - (common:wait-for-normalized-load maxnormload msg hh))) - (else - (common:wait-for-normalized-load maxnormload msg (get-host-name))))) - -(define (common:get-num-cpus remote-host) - (let* ((actual-host (or remote-host (get-host-name)))) - ;; hosts had better not be changing the number of cpus too often! - (or (hash-table-ref/default *numcpus-cache* actual-host #f) - (let* ((numcpus (or (common:get-cached-info actual-host "num-cpus" age: (+ 2592000 (random 3600))) - (let* ((proc (lambda () - (let loop ((numcpu 0) - (inl (read-line))) - (if (eof-object? inl) - (if (> numcpu 0) - numcpu - #f) ;; if zero return #f so caller knows that things are not working - (loop (if (string-match "^processor\\s+:\\s+\\d+$" inl) - (+ numcpu 1) - numcpu) - (read-line)))))) - (result (if (and remote-host - (not (equal? remote-host (get-host-name)))) - (common:generic-ssh - (conc "ssh " remote-host " cat /proc/cpuinfo") - proc -1) - (with-input-from-file "/proc/cpuinfo" proc)))) - (if (and (number? result) - (> result 0)) - (common:write-cached-info actual-host "num-cpus" result)) - result)))) - (hash-table-set! *numcpus-cache* actual-host numcpus) - numcpus)))) - -;;====================================================================== -;; wait for normalized cpu load to drop below maxload -;; -(define (common:wait-for-normalized-load maxnormload msg remote-host #!optional (rem-tries 5)) - (let ((num-cpus (common:get-num-cpus remote-host))) - (if num-cpus - (common:wait-for-cpuload maxnormload num-cpus 15 msg: msg remote-host: remote-host) - (begin - (thread-sleep! (random 60)) ;; we failed to get num cpus. wait a bit and try again - (if (> rem-tries 0) - (common:wait-for-normalized-load maxnormload msg remote-host (- rem-tries 1)) - #f))))) - -;;====================================================================== -;; DO NOT CALL THIS DIRECTLY. It is called from common:wait-for-normalized-load -;; count - count down to zero, at some point we'd give up if the load never drops -;; num-tries - count down to zero number tries to get numcpus -;; -(define (common:wait-for-cpuload maxnormload numcpus-in - #!key (count 1000) - (msg #f)(remote-host #f)(num-tries 5)) - (let* ((loadavg (common:get-cpu-load remote-host)) - ;; not possible to have zero. If we get 1, it's possible that we got the previous default, and we should check again - (numcpus (if (<= 1 numcpus-in) - (common:get-num-cpus remote-host) numcpus-in)) - (first (car loadavg)) - (next (cadr loadavg)) - (adjmaxload (* maxnormload (max 1 numcpus))) ;; possible bug where numcpus (or could be maxload) is zero, crude - ;; fallback is to at least use 1 - ;; effective load accounts for load jumps, this should elminate all the first-next-avg, adjwait, load-jump-limit - ;; etc. - (effective-load (common:get-intercept first next)) - (recommended-delay (common:get-delay effective-load numcpus)) - (effective-host (or remote-host "localhost")) - (normalized-effective-load (/ effective-load numcpus)) - (will-wait (> normalized-effective-load maxnormload))) - (if (and will-wait (> recommended-delay 1)) - (let* ((actual-delay (min recommended-delay 30))) - (if (common:low-noise-print 30 (conc (round actual-delay) "-safe-load")) - (debug:print-info 0 *default-log-port* "Load control, delaying " - actual-delay " seconds to maintain safe load. current normalized effective load is " - normalized-effective-load". maxnormload = " maxnormload " numcpus = " numcpus " loadavg = " loadavg " effective-load = " effective-load)) - (thread-sleep! actual-delay))) - - (cond - ;; bad data, try again to get the data - ((not will-wait) - (if (common:low-noise-print 3600 (conc (round normalized-effective-load) "-load-acceptable-" effective-host)) - (debug:print 0 *default-log-port* "Effective load on " effective-host " is acceptable at " effective-load " continuing."))) - - ((and (< first 0) ;; this indicates the loadavg data is bad - machine may not be reachable - (> num-tries 0)) - (debug:print 0 *default-log-port* "WARNING: received bad data from get-cpu-load " - first ", we'll sleep 10s and try " num-tries " more times.") - (thread-sleep! 10) - (common:wait-for-cpuload maxnormload numcpus-in - count: count remote-host: remote-host num-tries: (- num-tries 1))) - - ;; need to wait for load to drop - ((and will-wait ;; (> first adjmaxload) - (> count 0)) - (debug:print-info 0 *default-log-port* - "Delaying 15" ;; adjwait - " seconds due to normalized effective load " normalized-effective-load ;; first - " exceeding max of " adjmaxload - " on server " (or remote-host (get-host-name)) - " (normalized load-limit: " maxnormload ") " (if msg msg "")) - (thread-sleep! 15) ;; adjwait) - (common:wait-for-cpuload maxnormload numcpus count: (- count 1) msg: msg remote-host: remote-host) - ;; put the message here to indicate came out of waiting - (debug:print-info 1 *default-log-port* - "On host: " effective-host - ", effective load: " effective-load - ", numcpus: " numcpus - ", normalized effective load: " normalized-effective-load - )) - ;; overloaded and count expired (i.e. went to zero) - (else - (if (> num-tries 0) ;; should be "num-tries-left". - (if (common:low-noise-print 30 (conc (round effective-load) "-load-acceptable-" effective-host)) - (debug:print 0 *default-log-port* "Load on " effective-host " is acceptable at effective normalized load of " - effective-normalized-load " continuing.")) - (debug:print 0 *default-log-port* "Load on " effective-host ", " - first" could not be retrieved. Giving up and continuing.")))))) - -;;====================================================================== -;; DO NOT CALL THIS DIRECTLY. It is called from common:wait-for-normalized-load -;; -;; (define (common:wait-for-cpuload maxload-in numcpus-in waitdelay #!key (count 1000) (msg #f)(remote-host #f)(force-maxload #f)(num-tries 5)) -;; (let* ((loadavg (common:get-cpu-load remote-host)) -;; (numcpus (if (<= 1 numcpus-in) ;; not possible to have zero. If we get 1, it's possible that we got the previous default, and we should check again -;; (common:get-num-cpus remote-host) -;; numcpus-in)) -;; (maxload (if force-maxload -;; maxload-in -;; (if (number? maxload-in) -;; (max maxload-in 0.5) -;; 0.5))) ;; so maxload must be greater than 0.5 for now BUG - FIXME? -;; (first (car loadavg)) -;; (next (cadr loadavg)) -;; (adjmaxload (* maxload (max 1 numcpus))) ;; possible bug where -;; ;; numcpus (or could be -;; ;; maxload) is zero, -;; ;; crude fallback is to -;; ;; at least use 1 -;; (loadjmp (- first (if (> next (* numcpus 0.7)) ;; could do something with average of first and next? -;; 0 -;; next))) ;; we will force a conservative calculation any time next is large. -;; (first-next-avg (/ (+ first next) 2)) -;; ;; add some randomness to the time to break any alignment -;; ;; where netbatch dumps many jobs to machines simultaneously -;; (adjwait (min (+ 300 (random 10)) (abs (* (+ (random 10) -;; (/ (- 1000 count) 10) -;; waitdelay) -;; (- first adjmaxload) )))) -;; (load-jump-limit (configf:lookup-number *configdat* "setup" "load-jump-limit")) -;; ;; effective load accounts for load jumps, this should elminate all the first-next-avg, adjwait, load-jump-limit -;; ;; etc. -;; (effective-load (common:get-intercept first next)) -;; (effective-host (or remote-host "localhost")) -;; (normalized-effective-load (/ effective-load numcpus)) -;; (will-wait (> normalized-effective-load maxload))) -;; -;; ;; let's let the user know once in a long while that load checking -;; ;; is happening but not constantly report it -;; #;(if (common:low-noise-print 30 (conc "cpuload" (or remote-host "localhost"))) ;; (> (random 100) 75) ;; about 25% of the time -;; (debug:print-info 1 *default-log-port* "Checking cpuload on " (or remote-host "localhost") ", maxload: " maxload -;; ", load: " first ", adjmaxload: " adjmaxload ", loadjmp: " loadjmp)) -;; -;; (debug:print-info 1 *default-log-port* -;; "On host: " effective-host -;; ", effective load: " effective-load -;; ", numcpus: " numcpus -;; ", normalized effective load: " normalized-effective-load -;; ) -;; -;; (cond -;; ;; bad data, try again to get the data -;; ((and (< first 0) ;; this indicates the loadavg data is bad - machine may not be reachable -;; (> num-tries 0)) -;; (debug:print 0 *default-log-port* "WARNING: received bad data from get-cpu-load " first ", we'll sleep 10s and try " num-tries " more times.") -;; (thread-sleep! 10) -;; (common:wait-for-cpuload maxload-in numcpus-in waitdelay -;; count: count remote-host: remote-host force-maxload: force-maxload num-tries: (- num-tries 1))) -;; ;; need to wait for load to drop -;; ((and will-wait ;; (> first adjmaxload) -;; (> count 0)) -;; (debug:print-info 0 *default-log-port* -;; "Delaying " 15 ;; adjwait -;; " seconds due to normalized effective load " normalized-effective-load ;; first -;; " exceeding max of " adjmaxload -;; " on server " (or remote-host (get-host-name)) -;; " (normalized load-limit: " maxload ") " (if msg msg "")) -;; (thread-sleep! 15) ;; adjwait) -;; (common:wait-for-cpuload maxload numcpus waitdelay count: (- count 1) msg: msg remote-host: remote-host)) -;; ((and (> loadjmp (cond -;; (load-jump-limit load-jump-limit) -;; ((> numcpus 8)(/ numcpus 2)) -;; ((> numcpus 4)(/ numcpus 1.2)) -;; (else 0.5))) -;; (> count 0)) -;; (debug:print-info 0 *default-log-port* "waiting " adjwait " seconds due to possible load jump " loadjmp ". " -;; (if msg msg "")) -;; (thread-sleep! adjwait) -;; (common:wait-for-cpuload maxload numcpus waitdelay count: (- count 1) msg: msg remote-host: remote-host)) -;; (else -;; (if (> num-tries 0) -;; (if (common:low-noise-print 30 (conc (round first) "-load-acceptable-" (or remote-host "localhost"))) -;; (debug:print 0 *default-log-port* "Load on " (or remote-host "localhost") " is acceptable at " first " continuing.")) -;; (debug:print 0 *default-log-port* "Load on " (or remote-host "localhost") ", "first" could not be retrieved. Giving up and continuing.")))))) -;; -(define (get-uname . params) - (let* ((uname-res (process:cmd-run->list (conc "uname " (if (null? params) "-a" (car params))))) - (uname #f)) - (if (null? (car uname-res)) - "unknown" - (caar uname-res)))) - -;;====================================================================== -;; D I S K S P A C E -;;====================================================================== - -(define (common:get-disk-space-used fpath) - (with-input-from-pipe (conc "/usr/bin/du -s " fpath) read)) - -;;====================================================================== -;; given path get free space, allows override in [setup] -;; with free-space-script /path/to/some/script.sh -;; -(define (get-df path) - (if (configf:lookup *configdat* "setup" "free-space-script") - (with-input-from-pipe - (conc (configf:lookup *configdat* "setup" "free-space-script") " " path) - (lambda () - (let ((res (read-line))) - (if (string? res) - (string->number res))))) - (get-unix-df path))) - -(define (get-free-inodes path) - (if (configf:lookup *configdat* "setup" "free-inodes-script") - (with-input-from-pipe - (conc (configf:lookup *configdat* "setup" "free-inodes-script") " " path) - (lambda () - (let ((res (read-line))) - (if (string? res) - (string->number res))))) - (get-unix-inodes path))) - -(define (get-unix-df path) - (let* ((df-results (process:cmd-run->list (conc "df " path))) - (space-rx (regexp "([0-9]+)\\s+([0-9]+)%")) - (freespc #f)) - ;; (write df-results) - (for-each (lambda (l) - (let ((match (string-search space-rx l))) - (if match - (let ((newval (string->number (cadr match)))) - (if (number? newval) - (set! freespc newval)))))) - (car df-results)) - freespc)) - -(define (get-unix-inodes path) - (let* ((df-results (process:cmd-run->list (conc "df -i " path))) - (space-rx (regexp "([0-9]+)\\s+([0-9]+)%")) - (freenodes 0)) ;; 0 is a better failsafe than #f here. - ;; (write df-results) - (for-each (lambda (l) - (let ((match (string-search space-rx l))) - (if match - (let ((newval (string->number (cadr match)))) - (if (number? newval) - (set! freenodes newval)))))) - (car df-results)) - freenodes)) - -(define (common:check-space-in-dir dirpath required) - (let* ((dbspace (if (directory? dirpath) - (get-df dirpath) - 0))) - (list (> dbspace required) - dbspace - required - dirpath))) - -;;====================================================================== -;; check space in dbdir and in megatest dir -;; returns: ok/not dbspace required-space -;; -(define (common:check-db-dir-space) - (let* ((required (string->number - ;; default is 1GB (or actually a billion bytes) This is the number of 1 kB blocks. - (or (configf:lookup *configdat* "setup" "dbdir-space-required") - "1000000"))) - (dbdir (common:make-tmpdir-name *toppath* "")) ;; (db:get-dbdir)) - (tdbspace (common:check-space-in-dir dbdir required)) - (mdbspace (common:check-space-in-dir *toppath* required))) - (sort (list tdbspace mdbspace) (lambda (a b) - (< (cadr a)(cadr b)))))) - -;;====================================================================== -;; check available space in dbdir, exit if insufficient -;; -(define (common:check-db-dir-and-exit-if-insufficient) - (let* ((spacedat (car (common:check-db-dir-space))) ;; look only at worst for now - (is-ok (car spacedat)) - (dbspace (cadr spacedat)) - (required (caddr spacedat)) - (dbdir (cadddr spacedat))) - (if (not is-ok) - (begin - (debug:print-error 0 *default-log-port* "Insufficient space in " dbdir ", require " required ", have " dbspace ", exiting now.") - (exit 1))))) - -;;====================================================================== -;; paths is list of lists ((name path) ... ) -;; -(define (common:get-disk-with-most-free-space disks minsize) - (let* ((best #f) - (bestsize 0) - (default-min-inodes-string "1000000") - (default-min-inodes (string->number default-min-inodes-string)) - (min-inodes (or (string->number (if (configf:lookup *configdat* "setup" "min_inodes") (configf:lookup *configdat* "setup" "min_inodes") default-min-inodes-string)) default-min-inodes))) - - (for-each - (lambda (disk-num) - (let* ((dirpath (cadr (assoc disk-num disks))) - (freespc (cond - ((not (directory? dirpath)) - (if (common:low-noise-print 300 "disks not a dir " disk-num) - (debug:print 0 *default-log-port* "WARNING: disk " disk-num " at path \"" dirpath "\" is not a directory - ignoring it.")) - -1) - ((not (file-write-access? dirpath)) - (if (common:low-noise-print 300 "disks not writeable " disk-num) - (debug:print 0 *default-log-port* "WARNING: disk " disk-num " at path \"" dirpath "\" is not writeable - ignoring it.")) - -1) - ((not (eq? (string-ref dirpath 0) #\/)) - (if (common:low-noise-print 300 "disks not a proper path " disk-num) - (debug:print 0 *default-log-port* "WARNING: disk " disk-num " at path \"" dirpath "\" is not a fully qualified path - ignoring it.")) - -1) - (else - (get-df dirpath)))) - (free-inodes (cond - ((not (directory? dirpath)) - (if (common:low-noise-print 300 "disks not a dir " disk-num) - (debug:print 0 *default-log-port* "WARNING: disk " disk-num " at path \"" dirpath "\" is not a directory - ignoring it.")) - -1) - ((not (file-write-access? dirpath)) - (if (common:low-noise-print 300 "disks not writeable " disk-num) - (debug:print 0 *default-log-port* "WARNING: disk " disk-num " at path \"" dirpath "\" is not writeable - ignoring it.")) - -1) - ((not (eq? (string-ref dirpath 0) #\/)) - (if (common:low-noise-print 300 "disks not a proper path " disk-num) - (debug:print 0 *default-log-port* "WARNING: disk " disk-num " at path \"" dirpath "\" is not a fully qualified path - ignoring it.")) - -1) - (else - (get-free-inodes dirpath)))) - ;;(free-inodes (get-free-inodes dirpath)) - ) - (debug:print 2 *default-log-port* "INFO: disk " disk-num " path " dirpath " free space " freespc " free inodes " free-inodes) - (if (and (> freespc bestsize)(> free-inodes min-inodes )) - (begin - (set! best (cons disk-num dirpath)) - (set! bestsize freespc))) - ;;(print "Processing: " disk-num " bestsize: " bestsize " best: " best " freespc: " freespc " min-inodes: " min-inodes " free-inodes: " free-inodes) - )) - (map car disks)) - (if (and best (> bestsize minsize)) - best - #f))) ;; #f means no disk candidate found - -;;====================================================================== -;; convert a spec string to a list of vectors #( rx action rx-string ) -(define (common:spec-string->list-of-specs spec-string actions) - (let ((spec-strings (string-split-fields "\\s*;\\s*" spec-string #:infix)) - (actions-regex (regexp (conc "^(.*)\\s+(" (string-intersperse (map conc actions) "|") ")")))) - (filter - (lambda (x) x) - (map (lambda (s) - (let ((m (string-match actions-regex s))) - (if m - (vector (regexp (cadr m))(string->symbol (caddr m))(cadr m)) - (begin - (debug:print 0 *default-log-port* "WARNING: Unrecognised rule \"" s "\" in clean-up specification.") - #f)))) - spec-strings)))) - -;;====================================================================== -;; given a list of specs rx . rule and a file return the first matching rule -;; -(define (common:file-find-rule fname rules) ;; rule is vector #( rx action rx-string) - (let loop ((rule (car rules)) - (tail (cdr rules))) - (let ((rx (vector-ref rule 0)) - (rn (vector-ref rule 1))) ;; rule name - (if (string-match rx fname) - rule ;; return the whole rule so regex can be printed etc. - (if (null? tail) - #f - (loop (car tail)(cdr tail))))))) - -;;====================================================================== -;; given a spec apply some rules to a directory -;; -;; WARNING: This function will REMOVE files - be sure your spec and path is correct! -;; -;; spec format: -;; file-regex1 action; file-regex2 action; ... -;; e.g. -;; .*\.log$ keep; .* remove -;; --> keep all .log files, remove everything else -;; limitations: -;; cannot have a rule with ; as part of the spec -;; not very flexible, would be nice to return binned file names? -;; supported rules: -;; keep - keep this file -;; remove - remove this file -;; compress - compress this file -;; -(define (common:dir-clean-up path spec-string #!key (compress "gzip")(actions '(keep remove compress))(remove-empty #f)) - (let* ((specs (common:spec-string->list-of-specs spec-string actions)) - (keepers (make-hash-table)) - (directories (make-hash-table))) - (find-files - path - action: (lambda (p res) - (let ((rule (common:file-find-rule p specs))) - (cond - ((directory? p)(hash-table-set! directories p #t)) - (else - (case (vector-ref rule 1) - ((keep)(hash-table-set! keepers p rule)) - ((remove) - (debug:print 0 *default-log-port* "Removing file " p) - (delete-file p)) - ((compress) - (debug:print 0 *default-log-port* "Compressing file " p) - (system (conc compress " " p))) - (else - (debug:print 0 *default-log-port* "No match for file " p)))))))) - (if remove-empty - (for-each - (lambda (d) - (if (null? (glob (conc d "/.*")(conc d "/*"))) - (begin - (debug:print 0 *default-log-port* "Removing empty directory " d) - (delete-directory d)))) - (sort (hash-table-keys directories) (lambda (a b)(> (string-length a)(string-length b)))))) - )) - -;;====================================================================== -;; E N V I R O N M E N T V A R S -;;====================================================================== - -(define (save-environment-as-files fname #!key (ignorevars (list "USER" "HOME" "DISPLAY" "LS_COLORS" "XKEYSYMDB" "EDITOR" "MAKEFLAGS" "MAKEF" "MAKEOVERRIDES" "HOSTNAME"))) - ;;(bb-check-path msg: "save-environment-as-files entry") - (let ((envvars (get-environment-variables)) - (whitesp (regexp "[^a-zA-Z0-9_\\-:,\\.\\/%$]")) - (mungeval (lambda (val) - (cond - ((eq? val #t) "") ;; convert #t to empty string - ((eq? val #f) #f) ;; convert #f to itself (still thinking about this one - (else val))))) - (with-output-to-file (conc fname ".csh") - (lambda () - (for-each (lambda (keyval) - (let* ((key (car keyval)) - (val (cdr keyval)) - (delim (if (and (string-search whitesp val) - (not (string-search "^\".*\"$" val)) - (not (string-search "^'.*'$" val))) - "\"" - ""))) - - (print (if (or (member key ignorevars) - (string-search whitesp key)) - "# setenv " - "setenv ") - key " " delim (mungeval val) delim))) - envvars))) - (with-output-to-file (conc fname ".sh") - (lambda () - (for-each (lambda (keyval) - (let* ((key (car keyval)) - (val (cdr keyval)) - (delim (if (and (string-search whitesp val) - (not (string-search "^\".*\"$" val)) - (not (string-search "^'.*'$" val))) - "\"" - ""))) - (print (if (or (member key ignorevars) - (string-search whitesp key) - (string-search ":" key)) ;; internal only values to be skipped. - "# export " - "export ") - key "=" delim (mungeval val) delim))) - envvars))))) - -(define (common:get-param-mapping #!key (flavor #f)) - "returns alist mapping string keys in testconfig/subrun to megatest command line switches; if flavor is switch-symbol, maps tcmt symbolic switches to megatest switches" - (let ((default '(("tag-expr" . "-tagexpr") - ("mode-patt" . "-modepatt") - ("run-name" . "-runname") - ("contour" . "-contour") - ("target" . "-target") - ("test-patt" . "-testpatt") - ("msg" . "-m") - ("log" . "-log") - ("start-dir" . "-start-dir") - ("new" . "-set-state-status")))) - (if (eq? flavor 'switch-symbol) - (map (lambda (x) - (cons (string->symbol (conc "-" (car x))) (cdr x))) - default) - default))) - -;;====================================================================== -;; set some env vars from an alist, return an alist with original values -;; (("VAR" "value") ...) -;; a value of #f means "unset this var" -;; -(define (alist->env-vars lst) - (if (list? lst) - (let ((res '())) - (for-each (lambda (p) - (let* ((var (car p)) - (val (cadr p)) - (prv (get-environment-variable var))) - (set! res (cons (list var prv) res)) - (if val - (safe-setenv var (->string val)) - (unsetenv var)))) - lst) - res) - '())) - -;;====================================================================== -;; clear vars matching pattern, run proc, set vars back -;; if proc is a string run that string as a command with -;; system. -;; -(define *common:orig-env* - (let ((envvars (get-environment-variables))) - (if (get-environment-variable "MT_ORIG_ENV") - (with-input-from-string - (z3:decode-buffer (base64:base64-decode (get-environment-variable "MT_ORIG_ENV"))) - read) - (filter-map (lambda (x) - (if (string-match "^MT_.*" (car x)) - #f - x)) - envvars)))) - -(define (common:with-orig-env proc) - (let ((current-env (get-environment-variables))) - (for-each (lambda (x) (unsetenv (car x))) current-env) - (for-each (lambda (x) (setenv (car x) (cdr x))) *common:orig-env*) - (let ((rv (cond - ((string? proc)(system proc)) - (proc (proc))))) - (for-each (lambda (x) (unsetenv (car x))) *common:orig-env*) - (for-each (lambda (x) (setenv (car x) (cdr x))) current-env) - rv))) - -(define (common:without-vars proc . var-patts) - (let ((vars (make-hash-table))) - (for-each - (lambda (vardat) ;; each env var - (for-each - (lambda (var-patt) - (if (string-match var-patt (car vardat)) - (let ((var (car vardat)) - (val (cdr vardat))) - (hash-table-set! vars var val) - (unsetenv var)))) - var-patts)) - (get-environment-variables)) - (cond - ((string? proc)(system proc)) - (proc (proc))) - (hash-table-for-each - vars - (lambda (var val) - (setenv var val))) - vars)) - -(define (common:run-a-command cmd #!key (with-vars #f) (with-orig-env #f)) - (let* ((pre-cmd (dtests:get-pre-command)) - (post-cmd (dtests:get-post-command)) - (fullcmd (if (or pre-cmd post-cmd) - (conc pre-cmd cmd post-cmd) - (conc "viewscreen " cmd)))) - (debug:print-info 02 *default-log-port* "Running command: " fullcmd) - (cond - (with-vars (common:without-vars fullcmd)) - (with-orig-env (common:with-orig-env fullcmd)) - (else (common:without-vars fullcmd "MT_.*"))))) - -;;====================================================================== -;; C O L O R S -;;====================================================================== - -(define (common:name->iup-color name) - (case (string->symbol (string-downcase name)) - ((red) "223 33 49") - ((grey) "192 192 192") - ((orange) "255 172 13") - ((purple) "This is unfinished ..."))) - -;;====================================================================== -;; (define (common:get-color-for-state-status state status) -;; (case (string->symbol state) -;; ((COMPLETED) -;; (case (string->symbol status) -;; ((PASS) "70 249 73") -;; ((WARN WAIVED) "255 172 13") -;; ((SKIP) "230 230 0") -;; (else "223 33 49"))) -;; ((LAUNCHED) "101 123 142") -;; ((CHECK) "255 100 50") -;; ((REMOTEHOSTSTART) "50 130 195") -;; ((RUNNING) "9 131 232") -;; ((KILLREQ) "39 82 206") -;; ((KILLED) "234 101 17") -;; ((NOT_STARTED) "240 240 240") -;; (else "192 192 192"))) - -(define (common:iup-color->rgb-hex instr) - (string-intersperse - (map (lambda (x) - (number->string x 16)) - (map string->number - (string-split instr))) - "/")) - -;;====================================================================== -;; L O C K I N G M E C H A N I S M S -;;====================================================================== - -;;====================================================================== -;; faux-lock is deprecated. Please use simple-lock below -;; -(define (common:faux-lock keyname #!key (wait-time 8)(allow-lock-steal #t)) - (if (rmt:no-sync-get/default keyname #f) ;; do not be tempted to compare to pid. locking is a one-shot action, if already locked for this pid it doesn't actually count - (if (> wait-time 0) - (begin - (thread-sleep! 1) - (if (eq? wait-time 1) ;; only one second left, steal the lock - (begin - (debug:print-info 0 *default-log-port* "stealing lock for " keyname) - (common:faux-unlock keyname force: #t))) - (common:faux-lock keyname wait-time: (- wait-time 1))) - #f) - (begin - (rmt:no-sync-set keyname (conc (current-process-id))) - (equal? (conc (current-process-id)) (conc (rmt:no-sync-get/default keyname #f)))))) - -(define (common:faux-unlock keyname #!key (force #f)) - (if (or force (equal? (conc (current-process-id)) (conc (rmt:no-sync-get/default keyname #f)))) - (begin - (if (rmt:no-sync-get/default keyname #f) (rmt:no-sync-del! keyname)) - #t) - #f)) - -;;====================================================================== -;; simple lock. improve and converge on this one. -;; -(define (common:simple-lock keyname) - (rmt:no-sync-get-lock keyname)) - -(define (common:simple-unlock keyname #!key (force #f)) - (rmt:no-sync-del! keyname)) - -;;====================================================================== -;; -;;====================================================================== - -(define (common:in-running-test?) - (and (args:get-arg "-execute") (get-environment-variable "MT_CMDINFO"))) - -(define (common:get-color-from-status status) - (cond - ((equal? status "PASS") "green") - ((equal? status "FAIL") "red") - ((equal? status "WARN") "orange") - ((equal? status "KILLED") "orange") - ((equal? status "KILLREQ") "purple") - ((equal? status "RUNNING") "blue") - ((equal? status "ABORT") "brown") - (else "black"))) - -;;====================================================================== -;; N A N O M S G C L I E N T -;;====================================================================== -;; -;; -;; -;; (define (common:send-dboard-main-changed) -;; (let* ((dashboard-ips (mddb:get-dashboards))) -;; (for-each -;; (lambda (ipadr) -;; (let* ((soc (common:open-nm-req (conc "tcp://" ipadr))) -;; (msg (conc "main " *toppath*)) -;; (res (common:nm-send-receive-timeout soc msg))) -;; (if (not res) ;; couldn't reach that dashboard - remove it from db -;; (print "ERROR: couldn't reach dashboard " ipadr)) -;; res)) -;; dashboard-ips))) -;; -;; -;; ;;====================================================================== -;; ;; D A S H B O A R D D B -;; ;;====================================================================== -;; -;; (define (mddb:open-db) -;; (let* ((db (open-database (conc (get-environment-variable "HOME") "/.dashboard.db")))) -;; (set-busy-handler! db (busy-timeout 10000)) -;; (for-each -;; (lambda (qry) -;; (exec (sql db qry))) -;; (list -;; "CREATE TABLE IF NOT EXISTS vars (id INTEGER PRIMARY KEY,key TEXT, val TEXT, CONSTRAINT varsconstraint UNIQUE (key));" -;; "CREATE TABLE IF NOT EXISTS dashboards ( -;; id INTEGER PRIMARY KEY, -;; pid INTEGER, -;; username TEXT, -;; hostname TEXT, -;; ipaddr TEXT, -;; portnum INTEGER, -;; start_time TIMESTAMP DEFAULT (strftime('%s','now')), -;; CONSTRAINT hostport UNIQUE (hostname,portnum) -;; );" -;; )) -;; db)) -;; -;; ;; register a dashboard -;; ;; -;; (define (mddb:register-dashboard port) -;; (let* ((pid (current-process-id)) -;; (hostname (get-host-name)) -;; (ipaddr (server:get-best-guess-address hostname)) -;; (username (current-user-name)) ;; (car userinfo))) -;; (db (mddb:open-db))) -;; (print "Register monitor, pid: " pid ", hostname: " hostname ", port: " port ", username: " username) -;; (exec (sql db "INSERT OR REPLACE INTO dashboards (pid,username,hostname,ipaddr,portnum) VALUES (?,?,?,?,?);") -;; pid username hostname ipaddr port) -;; (close-database db))) -;; -;; ;; unregister a monitor -;; ;; -;; (define (mddb:unregister-dashboard host port) -;; (let* ((db (mddb:open-db))) -;; (print "Register unregister monitor, host:port=" host ":" port) -;; (exec (sql db "DELETE FROM dashboards WHERE hostname=? AND portnum=?;") host port) -;; (close-database db))) -;; -;; ;; get registered dashboards -;; ;; -;; (define (mddb:get-dashboards) -;; (let ((db (mddb:open-db))) -;; (query fetch-column -;; (sql db "SELECT ipaddr || ':' || portnum FROM dashboards;")))) - -;;====================================================================== -;; T E S T L A U N C H I N G P E R I T E M W I T H H O S T T Y P E S -;;====================================================================== -;; -;; [hosts] -;; arm cubie01 cubie02 -;; x86_64 zeus xena myth01 -;; allhosts #{g hosts arm} #{g hosts x86_64} -;; -;; [host-types] -;; general #MTLOWESTLOAD #{g hosts allhosts} -;; arm #MTLOWESTLOAD #{g hosts arm} -;; nbgeneral nbjob run JOBCOMMAND -log $MT_LINKTREE/$MT_TARGET/$MT_RUNNAME.$MT_TESTNAME-$MT_ITEM_PATH.lgo -;; -;; [host-rules] -;; # maxnload => max normalized load -;; # maxnjobs => max jobs per cpu -;; # maxjobrate => max jobs per second -;; general maxnload=1.1; maxnjobs=1.2; maxjobrate=0.1 -;; -;; [launchers] -;; envsetup general -;; xor/%/n 4C16G -;; % nbgeneral -;; -;; [jobtools] -;; # if defined and not "no" flexi-launcher will bypass "launcher" unless no match. -;; flexi-launcher yes -;; launcher nbfake -;; -(define (common:get-launcher configdat testname itempath) - (let ((fallback-launcher (configf:lookup configdat "jobtools" "launcher"))) - (if (and (configf:lookup configdat "jobtools" "flexi-launcher") ;; overrides launcher - (not (equal? (configf:lookup configdat "jobtools" "flexi-launcher") "no"))) - (let* ((launchers (hash-table-ref/default configdat "launchers" '()))) - (if (null? launchers) - fallback-launcher - (let loop ((hed (car launchers)) - (tal (cdr launchers))) - (let ((patt (car hed)) - (host-type (cadr hed))) - (if (tests:match patt testname itempath) - (begin - (debug:print-info 2 *default-log-port* "Have flexi-launcher match for " testname "/" itempath " = " host-type) - (let ((launcher (configf:lookup configdat "host-types" host-type))) - (if launcher - (let* ((launcher-parts (string-split launcher)) - (launcher-exe (car launcher-parts))) - (if (equal? launcher-exe "#MTLOWESTLOAD") ;; this is our special case, we will find the lowest load and craft a nbfake commandline - (let host-loop ((targ-host (common:get-least-loaded-host (cdr launcher-parts) host-type configdat)) - (count 100)) - (if targ-host - (conc "remrun " targ-host) - (if (> count 0) - (begin - (debug:print 0 *default-log-port* "INFO: Waiting for a host for host-type " host-type) - (thread-sleep! (- 101 count)) - (host-loop (common:get-least-loaded-host (cdr launcher-parts) host-type configdat) - (- count 1))) - (begin - (debug:print 0 *default-log-port* "FATAL: Failed to find a host from #MTLOWESTLOAD for host-type " host-type) - (exit))))) - launcher)) - (begin - (debug:print-info 0 *default-log-port* "WARNING: no launcher found for host-type " host-type) - (if (null? tal) - fallback-launcher - (loop (car tal)(cdr tal))))))) - ;; no match, try again - (if (null? tal) - fallback-launcher - (loop (car tal)(cdr tal)))))))) - fallback-launcher))) - -;;====================================================================== -;; D A S H B O A R D U S E R V I E W S -;;====================================================================== - -;;====================================================================== -;; first read ~/views.config if it exists, then read $MTRAH/views.config if it exists -;; -(define (common:load-views-config) - (let* ((view-cfgdat (make-hash-table)) - (home-cfgfile (conc (get-environment-variable "HOME") "/.mtviews.config")) - (mthome-cfgfile (conc *toppath* "/.mtviews.config"))) - (if (common:file-exists? mthome-cfgfile) - (read-config mthome-cfgfile view-cfgdat #t)) - ;; we load the home dir file AFTER the MTRAH file so the user can clobber settings when running the dashboard in read-only areas - (if (common:file-exists? home-cfgfile) - (read-config home-cfgfile view-cfgdat #t)) - view-cfgdat)) - -;;====================================================================== -;; H I E R A R C H I C A L H A S H T A B L E S -;;====================================================================== -;; -;; Every element including top element is a vector: -;; - -(define (hh:make-hh #!key (ht #f)(value #f)) - (vector (or ht (make-hash-table)) value)) - -;;====================================================================== -;; used internally -(define-inline (hh:set-ht! hh ht) (vector-set! hh 0 ht)) -(define-inline (hh:get-ht hh) (vector-ref hh 0)) -(define-inline (hh:set-value! hh value) (vector-set! hh 1 value)) -(define-inline (hh:get-value hh value) (vector-ref hh 1)) - -;;====================================================================== -;; given a hierarchial hash and some keys look up the value ... -;; -(define (hh:get hh . keys) - (if (null? keys) - (vector-ref hh 1) ;; we have reached the end of the line, return the value sought - (let ((sub-ht (hh:get-ht hh))) - (if sub-ht ;; yes, there is more hierarchy - (let ((sub-hh (hash-table-ref/default sub-ht (car keys) #f))) - (if sub-hh - (apply hh:get sub-hh (cdr keys)) - #f)) - #f)))) - -;;====================================================================== -;; given a hierarchial hash, a value and some keys, add needed hierarcy and insert the value -;; -(define (hh:set! hh value . keys) - (if (null? keys) - (hh:set-value! hh value) ;; we have reached the end of the line, store the value - (let ((sub-ht (hh:get-ht hh))) - (if sub-ht ;; yes, there is more hierarchy - (let ((sub-hh (hash-table-ref/default sub-ht (car keys) #f))) - (if (not sub-hh) ;; we'll need to add the next level of hierarchy - (let ((new-sub-hh (hh:make-hh))) - (hash-table-set! sub-ht (car keys) new-sub-hh) - (apply hh:set! new-sub-hh value (cdr keys))) - (apply hh:set! sub-hh value (cdr keys)))) ;; call the sub-hierhash with remaining keys - (begin - (hh:set-ht! hh (make-hash-table)) - (apply hh:set! hh value keys)))))) - -;;====================================================================== -;; Manage pkts, used in servers, tests and likely other contexts so put -;; in common -;;====================================================================== - -(define common:pkts-spec - '((default . ((parent . P) - (action . a) - (filename . f))) - (configf . ((parent . P) - (action . a) - (filename . f))) - (server . ((action . a) - (pid . d) - (ipaddr . i) - (port . p) - (parent . P))) - - (test . ((cpuuse . c) - (diskuse . d) - (item-path . i) - (runname . r) - (state . s) - (target . t) - (status . u) - (parent . P))))) - -(define (common:get-pkts-dirs mtconf use-lt) - (let* ((pktsdirs-str (or (configf:lookup mtconf "setup" "pktsdirs") - (and use-lt - (conc (or *toppath* - (current-directory)) - "/lt/.pkts")))) - (pktsdirs (if pktsdirs-str - (string-split pktsdirs-str " ") - #f))) - pktsdirs)) - -;;====================================================================== -;; use-lt is use linktree "lt" link to find pkts dir -(define (common:save-pkt pktalist-in mtconf use-lt #!key (add-only #f)) ;; add-only saves the pkt only if there is a parent already - (if (or (not add-only) - (hash-table-exists? *pkts-info* 'last-parent)) - (let* ((parent (hash-table-ref/default *pkts-info* 'last-parent #f)) - (pktalist (if parent - (cons `(parent . ,parent) - pktalist-in) - pktalist-in))) - (let-values (((uuid pkt) - (alist->pkt pktalist common:pkts-spec))) - (hash-table-set! *pkts-info* 'last-parent uuid) - (let ((pktsdir (or (hash-table-ref/default *pkts-info* 'pkts-dir #f) - (let* ((pktsdirs (common:get-pkts-dirs mtconf use-lt)) - (pktsdir (car pktsdirs))) ;; assume it is there - (hash-table-set! *pkts-info* 'pkts-dir pktsdir) - pktsdir)))) - (debug:print 0 *default-log-port* "pktsdir: "pktsdir) - (handle-exceptions - exn - (debug:print-info 0 "failed to write out packet to " pktsdir ", exn=" exn) ;; don't care if this failed for now but MUST FIX - BUG!! - (if (not (file-exists? pktsdir)) - (create-directory pktsdir #t)) - (with-output-to-file - (conc pktsdir "/" uuid ".pkt") - (lambda () - (print pkt))))))))) - -(define (common:with-queue-db mtconf proc #!key (use-lt #f)(toppath-in #f)) - (let* ((pktsdirs (common:get-pkts-dirs mtconf use-lt)) - (pktsdir (if pktsdirs (car pktsdirs) #f)) - (toppath (or (configf:lookup mtconf "scratchdat" "toppath") - toppath-in)) - (pdbpath (or (configf:lookup mtconf "setup" "pdbpath") pktsdir))) - (cond - ((not (and pktsdir toppath pdbpath)) - (debug:print 0 *default-log-port* "ERROR: settings are missing in your megatest.config for area management.") - (debug:print 0 *default-log-port* " you need to have pktsdirs in the [setup] section.")) - ((not (common:file-exists? pktsdir)) - (debug:print 0 *default-log-port* "ERROR: pkts directory not found " pktsdir)) - ((not (equal? (file-owner pktsdir)(current-effective-user-id))) - (debug:print 0 *default-log-port* "ERROR: directory " pktsdir " is not owned by " (current-effective-user-name))) - (else - (let* ((pdb (open-queue-db pdbpath "pkts.db" - schema: '("CREATE TABLE groups (id INTEGER PRIMARY KEY,groupname TEXT, CONSTRAINT group_constraint UNIQUE (groupname));")))) - (proc pktsdirs pktsdir pdb) - (dbi:close pdb)))))) - -(define (common:load-pkts-to-db mtconf #!key (use-lt #f)) - (common:with-queue-db - mtconf - (lambda (pktsdirs pktsdir pdb) - (for-each - (lambda (pktsdir) ;; look at all - (cond - ((not (common:file-exists? pktsdir)) - (debug:print 0 *default-log-port* "ERROR: packets directory " pktsdir " does not exist.")) - ((not (directory? pktsdir)) - (debug:print 0 *default-log-port* "ERROR: packets directory path " pktsdir " is not a directory.")) - ((not (file-read-access? pktsdir)) - (debug:print 0 *default-log-port* "ERROR: packets directory path " pktsdir " is not readable.")) - (else - (debug:print-info 0 *default-log-port* "Loading packets found in " pktsdir) - (let ((pkts (glob (conc pktsdir "/*.pkt"))) - (sqdb (dbi:db-conn pdb)) - ) - ;; Put this in a transaction to avoid issues overloading the db - (sqlite3:with-transaction - sqdb - (lambda () - (for-each - (lambda (pkt) - (let* ((uuid (cadr (string-match ".*/([0-9a-f]+).pkt" pkt))) - (exists (lookup-by-uuid pdb uuid #f))) - (if (not exists) - (let* ((pktdat (string-intersperse - (with-input-from-file pkt read-lines) - "\n")) - (apkt (pkt->alist pktdat)) - (ptype (alist-ref 'T apkt))) - (add-to-queue pdb pktdat uuid (or ptype 'cmd) #f 0) - (debug:print 4 *default-log-port* "Added " uuid " of type " ptype " to queue")) - (debug:print 4 *default-log-port* "pkt: " uuid " exists, skipping...") - ))) - pkts))))))) - pktsdirs)) - use-lt: use-lt)) - -(define (common:get-pkt-alists pkts) - (map (lambda (x) - (alist-ref 'apkt x)) ;; 'pkta pulls out the alist from the read pkt - pkts)) - -;;====================================================================== -;; given list of pkts (alist mode) return list of D cards as Unix epoch, sorted descending -;; also delete duplicates by target i.e. (car pkt) -;; -(define (common:get-pkt-times pkts) - (delete-duplicates - (sort - (map (lambda (x) - `(,(alist-ref 't x) . ,(string->number (alist-ref 'D x)))) - pkts) - (lambda (a b)(> (cdr a)(cdr b)))) ;; sort descending - (lambda (a b)(equal? (car a)(car b))))) ;; remove duplicates by target - - -(define *common:thread-punchlist* (make-hash-table)) -(define (common:send-thunk-to-background-thread thunk #!key (name #f)) - ;;(BB> "launched thread " name) - ;; we need a unique name for the thread. - (let* ((realname (if name - (if (not (hash-table-ref/default *common:thread-punchlist* name #f)) - name - (conc name"-" (symbol->string (gensym)))) - (conc "anonymous-"(symbol->string (gensym))))) - (realthunk (lambda () - (let ((res (thunk))) - (hash-table-delete! *common:thread-punchlist* realname) - res))) - (thread (make-thread realthunk realname))) - (hash-table-set! *common:thread-punchlist* realname thread) - (thread-start! thread) - )) - -(define (common:join-backgrounded-threads) - ;; may need to trap and ignore exceptions -- dunno how atomic threads are... - (for-each - (lambda (thread-name) - (let* ((thread (hash-table-ref/default *common:thread-punchlist* thread-name #f))) - (if thread - (handle-exceptions - exn - (begin - (debug:print 0 *default-log-port* "joining threads failed. exn=" exn) - #t) ;; just ignore it, it might have died in the meantime so joining it will throw an exception - (thread-join! thread)) - ))) - (hash-table-keys *common:thread-punchlist*))) + + + ;;====================================================================== ;; (define *common:telemetry-log-state* 'startup) ;; (define *common:telemetry-log-socket* #f) ;;