@@ -896,8 +896,2938 @@ (debug:print 2 *default-log-port* "Running " cmd " received " output) (if (eq? (length output) 0) #f #t)) #t))) ;; assuming bad query is about a live test is likely not the right thing to do? + +(define (remove-files filespec) + (let ((files (glob filespec))) + (for-each delete-file files))) + +(define (stop-the-train) + (thread-start! (make-thread (lambda () + (let loop () + (if (and *toppath* + (file-exists? (conc *toppath*"/stop-the-train"))) + (let* ((msg (conc "ERROR: found file "*toppath*"/stop-the-train, exiting immediately"))) + ;; yes, print to current-output-port AND *default-log-port*, annoying but necessary I think + (print msg) + (debug:print 0 *default-log-port* msg) + (remove-files (conc *toppath* "/logs/server*")) + (remove-files (conc *toppath* "/.servinfo/*")) + (remove-files (conc *toppath* "/.mtdb/*lock")) + (exit 1))) + (thread-sleep! 5) + (loop)))))) + +;; execute thunk, return value. If exception thrown, trap exception, return #f, and emit nonfatal condition note to *default-log-port* . +;; arguments - thunk, message +(define (common:fail-safe thunk warning-message-on-exception) + (handle-exceptions + exn + (begin + (debug:print-info 0 *default-log-port* "notable but nonfatal condition - "warning-message-on-exception", exn=" exn) + (debug:print-info 0 *default-log-port* + (string-substitute "\n?Error:" "nonfatal condition:" + (with-output-to-string + (lambda () + (print-error-message exn) )))) + (debug:print-info 0 *default-log-port* " -- continuing after nonfatal condition...") + #f) + (thunk))) + + +;; returns list of fd count, socket count +(define (get-file-descriptor-count #!key (pid (current-process-id ))) + (list + (length (glob (conc "/proc/" pid "/fd/*"))) + (length (filter identity (map socket? (glob (conc "/proc/" pid "/fd/*"))))) + ) +) + + + +;; GLOBALS + +;; CONTEXTS +(defstruct cxt + (taskdb #f) + (cmutex (make-mutex))) +;; (define *contexts* (make-hash-table)) +;; (define *context-mutex* (make-mutex)) + +;; ;; safe method for accessing a context given a toppath +;; ;; +;; (define (common:with-cxt toppath proc) +;; (mutex-lock! *context-mutex*) +;; (let ((cxt (hash-table-ref/default *contexts* toppath #f))) +;; (if (not cxt) +;; (set! cxt (let ((x (make-cxt)))(hash-table-set! *contexts* toppath x) x))) +;; (let ((cxt-mutex (cxt-mutex cxt))) +;; (mutex-unlock! *context-mutex*) +;; (mutex-lock! cxt-mutex) +;; (let ((res (proc cxt))) +;; (mutex-unlock! cxt-mutex) +;; res)))) + +;; A hash table that can be accessed by #{scheme ...} calls in +;; config files. Allows communicating between confgs +;; +(define *user-hash-data* (make-hash-table)) + +(define *db-keys* #f) + +(define *pkts-info* (make-hash-table)) ;; store stuff like the last parent here +(define *configinfo* #f) ;; raw results from setup, includes toppath and table from megatest.config +(define *runconfigdat* #f) ;; run configs data +(define *configdat* #f) ;; megatest.config data +(define *configstatus* #f) ;; status of data; 'fulldata : all processing done, #f : no data yet, 'partialdata : partial read done +;; (define *toppath* #f) ;; moved to commonmod +(define *already-seen-runconfig-info* #f) + +(define *test-meta-updated* (make-hash-table)) +(define *globalexitstatus* 0) ;; attempt to work around possible thread issues +(define *passnum* 0) ;; when running track calls to run-tests or similar +;; (define *alt-log-file* #f) ;; used by -log +;; (define *common:denoise* (make-hash-table)) ;; for low noise printing +(define *default-log-port* (current-error-port)) +(define *time-zero* (current-seconds)) ;; for the watchdog +(define *on-exit-procs* '()) ;; add procs to this list to be executed on exit +(define *default-area-tag* "local") + +;; DATABASE +;; db access +(define *db-last-access* (current-seconds)) ;; last db access, used in server +;; (define *db-write-access* #t) +;; db sync +;; (define *db-last-sync* 0) ;; last time the sync to megatest.db happened +(define *db-sync-in-progress* #f) ;; if there is a sync in progress do not try to start another +;; (define *db-multi-sync-mutex* (make-mutex)) ;; protect access to *db-sync-in-progress*, *db-last-sync* +;; task db +(define *task-db* #f) ;; (vector db path-to-db) +(define *db-access-allowed* #t) ;; flag to allow access +;; (define *db-access-mutex* (make-mutex)) ;; moved to dbfile +;; (define *db-transaction-mutex* (make-mutex)) +(define *db-cache-path* #f) +;; (define *db-with-db-mutex* (make-mutex)) +(define *db-api-call-time* (make-hash-table)) ;; hash of command => (list of times) + +;; SERVER +(define *transport-type* 'http) ;; override with [server] transport http|rpc|nmsg +(define *runremote* #f) ;; if set up for server communication this will hold +;; (define *max-cache-size* 0) +(define *logged-in-clients* (make-hash-table)) +(define *server-id* #f) +;; (define *server-info* #f) ;; good candidate for easily convert to non-global +(define *time-to-exit* #f) +(define *run-id* #f) +(define *server-kind-run* (make-hash-table)) +(define *home-host* #f) +;; (define *total-non-write-delay* 0) +(define *heartbeat-mutex* (make-mutex)) +;; (define *api-process-request-count* 0) +;; (define *max-api-process-requests* 0) +(define *server-overloaded* #f) + +;; client +(define *rmt-mutex* (make-mutex)) ;; remote access calls mutex + +;; RPC transport +(define *rpc:listener* #f) + +;; KEY info +(define *target* (make-hash-table)) ;; cache the target here; target is keyval1/keyval2/.../keyvalN +(define *keys* (make-hash-table)) ;; cache the keys here +(define *keyvals* (make-hash-table)) +(define *toptest-paths* (make-hash-table)) ;; cache toptest path settings here +(define *test-paths* (make-hash-table)) ;; cache test-id to test run paths here +(define *test-ids* (make-hash-table)) ;; cache run-id, testname, and item-path => test-id +(define *test-info* (make-hash-table)) ;; cache the test info records, update the state, status, run_duration etc. from testdat.db + +(define *run-info-cache* (make-hash-table)) ;; run info is stable, no need to reget +(define *launch-setup-mutex* (make-mutex)) ;; need to be able to call launch:setup often so mutex it and re-call the real deal only if *toppath* not set +(define *homehost-mutex* (make-mutex)) + +;; Miscellaneous +(define *triggers-mutex* (make-mutex)) ;; block overlapping processing of triggers +(define *numcpus-cache* (make-hash-table)) + +;; this plugs a hole in posix-extras in recent chicken versions > 4.9) +(let-values (( (chicken-release-number chicken-major-version) + (apply values + (map string->number + (take + (string-split (chicken-version) ".") + 2))))) + (let ((resolve-pathname-broken? + (or (> chicken-release-number 4) + (and (eq? 4 chicken-release-number) (> chicken-major-version 9))))) + (if resolve-pathname-broken? + (define ##sys#expand-home-path pathname-expand)))) + +(define (realpath x) (resolve-pathname (pathname-expand (or x "/dev/null")) )) + +(define (common:get-this-exe-fullpath #!key (argv (argv))) + (let* ((this-script + (cond + ((and (> (length argv) 2) + (string-match "^(.*/csi|csi)$" (car argv)) + (string-match "^-(s|ss|sx|script)$" (cadr argv))) + (caddr argv)) + (else (car argv)))) + (fullpath (realpath this-script))) + fullpath)) + +;;====================================================================== + +(define *common:this-exe-fullpath* (common:get-this-exe-fullpath)) +(define *common:this-exe-dir* (pathname-directory *common:this-exe-fullpath*)) +(define *common:this-exe-name* (pathname-strip-directory *common:this-exe-fullpath*)) + +(define (common:get-sync-lock-filepath) + (let* ((tmp-area (common:make-tmpdir-name *toppath* "")) + (lockfile (conc tmp-area "/megatest.db.lock"))) + lockfile)) + +(define *common:logpro-exit-code->status-sym-alist* + '( ( 0 . pass ) + ( 1 . fail ) + ( 2 . warn ) + ( 3 . check ) + ( 4 . waived ) + ( 5 . abort ) + ( 6 . skip ))) + +(define (common:logpro-exit-code->status-sym exit-code) + (or (alist-ref exit-code *common:logpro-exit-code->status-sym-alist*) 'fail)) + +(define (common:worse-status-sym ss1 ss2) + (let loop ((status-syms-remaining '(abort fail check skip warn waived pass))) + (cond + ((null? status-syms-remaining) + 'fail) + ((eq? (car status-syms-remaining) ss1) + ss1) + ((eq? (car status-syms-remaining) ss2) + ss2) + (else + (loop (cdr status-syms-remaining)))))) + +(define (common:steps-can-proceed-given-status-sym status-sym) + (if (member status-sym '(warn waived pass)) + #t + #f)) + +(define (status-sym->string status-sym) + (case status-sym + ((pass) "PASS") + ((fail) "FAIL") + ((warn) "WARN") + ((check) "CHECK") + ((waived) "WAIVED") + ((abort) "ABORT") + ((skip) "SKIP") + (else "FAIL"))) + +(define (common:logpro-exit-code->test-status exit-code) + (status-sym->string (common:logpro-exit-code->status-sym exit-code))) + +;; +(defstruct remote + + ;; transport to be used + ;; http - use http-transport + ;; http-read-cached - use http-transport for writes but in-mem cached for reads + (rmode 'http) + (hh-dat (let ((res (or (server:choose-server *toppath* 'homehost) + (cons #f #f)))) + (assert (pair? res)(conc "FATAL: hh-dat should be a pair, got "res)) + res)) + (server-url #f) ;; (server:check-if-running *toppath*) #f)) + (server-id #f) + (server-info #f) ;; (if *toppath* (server:check-if-running *toppath*) #f)) + (last-server-check 0) ;; last time we checked to see if the server was alive + (connect-time (current-seconds)) ;; when we first connected + (last-access (current-seconds)) ;; last time we talked to server + ;; (conndat #f) ;; iface port api-uri api-url api-req seconds server-id + (server-timeout (server:expiration-timeout)) + (force-server #f) + (ro-mode #f) + (ro-mode-checked #f) ;; flag that indicates we have checked for ro-mode + + ;; conndat stuff + (iface #f) ;; TODO: Consolidate this data with server-url and server-info above + (port #f) + (api-url #f) + (api-uri #f) + (api-req #f)) + +;; launching and hosts +(defstruct host + (reachable #f) + (last-update 0) + (last-used 0) + (last-cpuload 1)) + +(define *host-loads* (make-hash-table)) + +;; cache environment vars for each run here +(define *env-vars-by-run-id* (make-hash-table)) + +;; Testconfig and runconfig caches. +(define *testconfigs* (make-hash-table)) ;; test-name => testconfig +(define *runconfigs* (make-hash-table)) ;; target => runconfig + +;; This is a cache of pre-reqs met, don't re-calc in cases where called with same params less than +;; five seconds ago +(define *pre-reqs-met-cache* (make-hash-table)) + +;; cache of verbosity given string +;; +(define *verbosity-cache* (make-hash-table)) + +(define (common:clear-caches) + (set! *target* (make-hash-table)) + (set! *keys* (make-hash-table)) + (set! *keyvals* (make-hash-table)) + (set! *toptest-paths* (make-hash-table)) + (set! *test-paths* (make-hash-table)) + (set! *test-ids* (make-hash-table)) + (set! *test-info* (make-hash-table)) + (set! *run-info-cache* (make-hash-table)) + (set! *env-vars-by-run-id* (make-hash-table)) + (set! *test-id-cache* (make-hash-table))) + +;; Generic string database +(define sdb:qry #f) ;; (make-sdb:qry)) ;; 'init #f) +;; Generic path database +(define *fdb* #f) + +(define *last-launch* (current-seconds)) ;; use for throttling the launch rate. Would be better to use the db and last time of a test in LAUNCHED state. + +;;====================================================================== +;; V E R S I O N +;;====================================================================== + +(define (common:get-full-version) + (conc megatest-version "-" megatest-fossil-hash)) + +(define (common:version-signature) + (conc megatest-version "-" (substring megatest-fossil-hash 0 4))) + +;;====================================================================== +;; from metadat lookup MEGATEST_VERSION +;; +(define (common:get-last-run-version) ;; RADT => How does this work in send-receive function??; assume it is the value saved in some DB + (rmt:get-var "MEGATEST_VERSION")) + +(define (common:get-last-run-version-number) + (string->number + (substring (common:get-last-run-version) 0 6))) + +(define (common:set-last-run-version) + (rmt:set-var "MEGATEST_VERSION" (common:version-signature))) + +;;====================================================================== +;; postive number if megatest version > db version +;; negative number if megatest version < db version +(define (common:version-db-delta) + (- megatest-version (common:get-last-run-version-number))) + +(define (common:version-changed?) + (not (equal? (common:get-last-run-version) + (common:version-signature)))) + + +;; From 1.70 to 1.80, db's are compatible. + +(define (common:api-changed?) + (let* ( + (megatest-major-version (substring (->string megatest-version) 0 4)) + (run-major-version (substring (conc (common:get-last-run-version)) 0 4)) + ) + (and (not (equal? megatest-major-version "1.80")) + (not (equal? megatest-major-version megatest-run-version))) + ) +) + +;;====================================================================== +;; Move me elsewhere ... +;; RADT => Why do we meed the version check here, this is called only if version misma +;; +(define (common:cleanup-db dbstruct #!key (full #f)) + (case (rmt:transport-mode) + ((http) + (apply db:multi-db-sync + dbstruct + 'schema + 'killservers + 'adj-target + 'new2old + '(dejunk) + )) + ((tcp nfs) + (apply db:multi-db-sync + dbstruct + 'schema + 'killservers + 'adj-target + 'new2old + '(dejunk) + ))) + (if (common:api-changed?) + (common:set-last-run-version))) + +(define (common:snapshot-file filepath #!key (subdir ".") ) + (if (file-exists? filepath) + (let* ((age-sec (lambda (file) + (if (file-exists? file) + (- (current-seconds) (file-modification-time file)) + 1000000000))) ;; return really old value if file doesn't exist. we want to clobber it if old or not exist. + (ok-flag #t) + (age-mins (lambda (file) (/ (age-sec file) 60))) + (age-hrs (lambda (file) (/ (age-mins file) 60))) + (age-days (lambda (file) (/ (age-hrs file) 24))) + (age-wks (lambda (file) (/ (age-days file) 7))) + (docmd (lambda (cmd) + (cond + (ok-flag + (let ((res (system cmd))) + (cond + ((eq? 0 res) + #t) + (else + (set! ok-flag #f) + (debug:print 0 *default-log-port* "ERROR: ["(common:human-time)"] Command failed with exit code " + (if (< res 0) + res + (/ res 8)) " ["cmd"]" ) + #f)))) + (else + (debug:print 0 *default-log-port* "ERROR: ["(common:human-time)"] Not runnining command due to prior error. ["cmd"]") + #f)))) + (copy (lambda (src dest) (docmd (conc "/bin/cp '"src"' '"dest"'")))) + (copy+zip (lambda (src dest) (docmd (conc "gzip -c - < '"src"' > '"dest"'")))) + (fullpath (realpath filepath)) + (basedir (pathname-directory fullpath)) + (basefile (pathname-strip-directory fullpath)) + ;;(prevfile (conc filepath ".prev.gz")) + (minsfile (conc basedir "/" subdir "/" basefile ".mins.gz")) + (hrsfile (conc basedir "/" subdir "/" basefile ".hrs.gz")) + (daysfile (conc basedir "/" subdir "/" basefile ".days.gz")) + (wksfile (conc basedir "/" subdir "/" basefile ".weeks.gz"))) + + ;; create subdir it not exists + (if (not (directory-exists? (conc basedir "/" subdir))) + (docmd (conc "/bin/mkdir -p '"(conc basedir "/" subdir)"'"))) + + ;; copy&zip to .mins if not exists + (if (not (file-exists? minsfile)) + (copy+zip filepath minsfile)) + ;; copy .mins to .hrs if not exists + (if (not (file-exists? hrsfile)) + (copy minsfile hrsfile)) + ;; copy .hrs to .days if not exists + (if (not (file-exists? daysfile)) + (copy hrsfile daysfile)) + ;; copy .days to .weeks if not exists + (if (not (file-exists? wksfile)) + (copy daysfile wksfile)) + + + ;; if age(.mins.gz) >= 1h: + ;; copy .mins.gz .hrs.gz + ;; copy .mins.gz + (when (>= (age-mins minsfile) 1) + (copy minsfile hrsfile) + (copy+zip filepath minsfile)) + + ;; if age(.hrs.gz) >= 1d: + ;; copy .hrs.gz .days.gz + ;; copy .mins.gz .hrs.gz + (when (>= (age-days hrsfile) 1) + (copy hrsfile daysfile) + (copy minsfile hrsfile)) + + ;; if age(.days.gz) >= 1w: + ;; copy .days.gz .weeks.gz + ;; copy .hrs.gz .days.gz + (when (>= (age-wks daysfile) 1) + (copy daysfile wksfile) + (copy hrsfile daysfile)) + #t) + #f)) + +;;====================================================================== +;; Rotate logs, logic: +;; if > 500k and older than 1 week: +;; remove previous compressed log and compress this log +;; WARNING: This proc operates assuming that it is in the directory above the +;; logs directory you wish to log-rotate. +;; +(define (common:rotate-logs) + (let* ((all-files (make-hash-table)) + (stats (make-hash-table)) + (inc-stat (lambda (key) + (hash-table-set! stats key (+ (hash-table-ref/default stats key 0) 1)))) + (max-allowed (string->number (or (configf:lookup *configdat* "setup" "max-logfiles") "600")))) ;; name -> age + (if (not (directory-exists? "logs"))(create-directory "logs")) + (directory-fold + (lambda (file rem) + (handle-exceptions + exn + (begin + (debug:print-info 2 *default-log-port* "unable to rotate log " file ", probably handled by another process, this is safe to ignore. exn=" exn) + (debug:print 2 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) + ;; (print-call-chain (current-error-port)) ;; + ) + (let* ((fullname (conc "logs/" file)) + (mod-time (file-modification-time fullname)) + (file-age (- (current-seconds) mod-time)) + (file-old (> file-age (* 48 60 60))) + (file-big (> (file-size fullname) 200000))) + (hash-table-set! all-files file mod-time) + (if (or (and (string-match "^.*.log" file) + file-old + file-big) + (and (string-match "^server-.*.log" file) + file-old)) + (let ((gzfile (conc fullname ".gz"))) + (if (common:file-exists? gzfile) + (begin + (debug:print-info 0 *default-log-port* "removing " gzfile) + (delete-file* gzfile) + (hash-table-delete! all-files gzfile) ;; needed? + )) + (debug:print-info 0 *default-log-port* "compressing " file) + (system (conc "gzip " fullname)) + (inc-stat "gzipped") + (hash-table-set! all-files (conc file ".gz") file-age) ;; add the .gz file and remove the base file + (hash-table-delete! all-files file) + ) + (if (and (> file-age (* (string->number (or (configf:lookup *configdat* "setup" "log-expire-days") "30")) 24 3600)) + (file-exists? fullname)) ;; just in case it was gzipped - will get it next time + (handle-exceptions + exn + #f + (if (directory? fullname) + (begin + (debug:print-info 0 *default-log-port* fullname " in logs directory is a directory! Cannot rotate it, it is best to not put subdirectories in the logs dir.") + (inc-stat "directories")) + (begin + (delete-file* fullname) + (inc-stat "deleted"))) + (hash-table-delete! all-files file))))))) + '() + "logs") + (for-each + (lambda (category) + (let ((quant (hash-table-ref/default stats category 0))) + (if (> quant 0) + (debug:print-info 0 *default-log-port* category " log files: " quant)))) + `("deleted" "gzipped" "directories")) + (let ((num-logs (hash-table-size all-files))) + (if (> num-logs max-allowed) ;; because NFS => don't let number of logs exceed 300 + (let ((files (take (sort (hash-table-keys all-files) + (lambda (a b) + (< (hash-table-ref all-files a)(hash-table-ref all-files b)))) + (- num-logs max-allowed)))) + (for-each + (lambda (file) + (let* ((fullname (conc "logs/" file))) + (if (directory? fullname) + (debug:print-info 0 *default-log-port* fullname " in logs directory is a directory! Cannot rotate it, it is best to not put subdirectories in the logs dir.") + (handle-exceptions + exn + (debug:print-error 0 *default-log-port* "failed to remove " fullname ", exn=" exn) + (delete-file* fullname))))) + files) + (debug:print-info 0 *default-log-port* "Deleted " (length files) " files from logs, keeping " max-allowed " files.")))))) + +;;====================================================================== +;; Force a megatest cleanup-db if version is changed and skip-version-check not specified +;; Do NOT check if not on homehost! +;; +(define (common:exit-on-version-changed) + (if (and *toppath* ;; do nothing if *toppath* not yet provided + (common:on-homehost?)) + (if (common:api-changed?) + (let* ((mtconf (conc (get-environment-variable "MT_RUN_AREA_HOME") "/megatest.config")) + (dbfile (conc (get-environment-variable "MT_RUN_AREA_HOME") ".mtdb/main.db")) + (read-only (not (file-write-access? dbfile))) + (dbstruct (db:setup))) ;; (db:setup-db *dbstruct-dbs* *toppath* #f))) ;; #t))) + (debug:print 0 *default-log-port* + "WARNING: Version mismatch!\n" + " expected: " (common:version-signature) "\n" + " got: " (common:get-last-run-version)) + (cond + ((get-environment-variable "MT_SKIP_DB_MIGRATE") #t) + ((and (common:file-exists? mtconf) (common:file-exists? dbfile) (not read-only) + (eq? (current-user-id)(file-owner mtconf))) ;; safe to run -cleanup-db + (debug:print 0 *default-log-port* " I see you are the owner of megatest.config, attempting to cleanup and reset to new version") + (handle-exceptions + exn + (begin + (debug:print 0 *default-log-port* "Failed to switch versions. exn=" exn) + (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) + (print-call-chain (current-error-port)) + (exit 1)) + (common:cleanup-db dbstruct))) + ((not (common:file-exists? mtconf)) + (debug:print 0 *default-log-port* " megatest.config does not exist in this area. Cannot proceed with megatest version migration.") + (exit 1)) + ((not (common:file-exists? dbfile)) + (debug:print 0 *default-log-port* " .mtdb/main.db does not exist in this area. Cannot proceed with megatest version migration.") + (exit 1)) + ((not (eq? (current-user-id)(file-owner mtconf))) + (debug:print 0 *default-log-port* " You do not own .mtdb/main.db in this area. Cannot proceed with megatest version migration.") + (exit 1)) + (read-only + (debug:print 0 *default-log-port* " You have read-only access to this area. Cannot proceed with megatest version migration.") + (exit 1)) + (else + (debug:print 0 *default-log-port* " to switch versions you can run: \"megatest -cleanup-db\"") + (exit 1))))))) +;;====================================================================== +;; (begin +;; (debug:print 0 *default-log-port* "ERROR: cannot migrate version unless on homehost. Exiting.") +;; (exit 1)))) + +;;====================================================================== +;; S P A R S E A R R A Y S +;;====================================================================== + +(define (make-sparse-array) + (let ((a (make-sparse-vector))) + (sparse-vector-set! a 0 (make-sparse-vector)) + a)) + +(define (sparse-array? a) + (and (sparse-vector? a) + (sparse-vector? (sparse-vector-ref a 0)))) + +(define (sparse-array-ref a x y) + (let ((row (sparse-vector-ref a x))) + (if row + (sparse-vector-ref row y) + #f))) + +(define (sparse-array-set! a x y val) + (let ((row (sparse-vector-ref a x))) + (if row + (sparse-vector-set! row y val) + (let ((new-row (make-sparse-vector))) + (sparse-vector-set! a x new-row) + (sparse-vector-set! new-row y val))))) + +;;====================================================================== +;; L O C K E R S A N D B L O C K E R S +;;====================================================================== + +;; block further accesses to databases. Call this before shutting db down +(define (common:db-block-further-queries) + (mutex-lock! *db-access-mutex*) + (set! *db-access-allowed* #f) + (mutex-unlock! *db-access-mutex*)) + +(define (common:db-access-allowed?) + (let ((val (begin + (mutex-lock! *db-access-mutex*) + *db-access-allowed* + (mutex-unlock! *db-access-mutex*)))) + val)) + +;;====================================================================== +;; U S E F U L S T U F F +;;====================================================================== + +;; convert things to an alist or assoc list, #f gets converted to "" +;; +(define (common:to-alist dat) + (cond + ((list? dat) (map common:to-alist dat)) + ((vector? dat) + (map common:to-alist (vector->list dat))) + ((pair? dat) + (cons (common:to-alist (car dat)) + (common:to-alist (cdr dat)))) + ((hash-table? dat) + (map common:to-alist (hash-table->alist dat))) + (else + (if dat + dat + "")))) + +(define (common:alist-ref/default key alist default) + (or (alist-ref key alist) default)) + +;; moved into commonmod +;; +;; (define (common:low-noise-print waitval . keys) +;; (let* ((key (string-intersperse (map conc keys) "-" )) +;; (lasttime (hash-table-ref/default *common:denoise* key 0)) +;; (currtime (current-seconds))) +;; (if (> (- currtime lasttime) waitval) +;; (begin +;; (hash-table-set! *common:denoise* key currtime) +;; #t) +;; #f))) + +(define (common:read-encoded-string instr) + (handle-exceptions + exn + (handle-exceptions + exn + (begin + (debug:print-error 0 *default-log-port* "received bad encoded string \"" instr "\", message: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn) + (print-call-chain (current-error-port)) + #f) + (read (open-input-string (base64:base64-decode instr)))) + (read (open-input-string (z3:decode-buffer (base64:base64-decode instr)))))) + + +;;====================================================================== +;; S T A T E S A N D S T A T U S E S +;;====================================================================== + +;; BBnote: *common:std-states* - dashboard filter control and test control state buttons defined here; used in set-fields-panel and dboard:make-controls +(define *common:std-states* ;; for toggle buttons in dashboard + '( + (0 "ARCHIVED") + (1 "STUCK") + (2 "KILLREQ") + (3 "KILLED") + (4 "NOT_STARTED") + (5 "COMPLETED") + (6 "LAUNCHED") + (7 "REMOTEHOSTSTART") + (8 "RUNNING") + )) + +(define *common:dont-roll-up-states* + '("DELETED" + "REMOVING" + "CLEANING" + "ARCHIVE_REMOVING" + )) + +;;====================================================================== +;; BBnote: *common:std-statuses* dashboard filter control and test control status buttons defined here; used in set-fields-panel and dboard:make-controls +;; note these statuses are sorted from better to worse. +;; This sort order is important to dcommon:status-compare3 and db:set-state-status-and-roll-up-items +(define *common:std-statuses* + '(;; (0 "DELETED") + (1 "n/a") + (2 "PASS") + (3 "SKIP") + (4 "WARN") + (5 "WAIVED") + (6 "CHECK") + (7 "STUCK/DEAD") + (8 "DEAD") + (9 "FAIL") + (10 "PREQ_FAIL") + (11 "PREQ_DISCARDED") + (12 "ABORT"))) + +(define *common:ended-states* ;; states which indicate the test is stopped and will not proceed + '("COMPLETED" "ARCHIVED" "KILLED" "KILLREQ" "STUCK" "INCOMPLETE" )) + +(define *common:badly-ended-states* ;; these roll up as CHECK, i.e. results need to be checked + '("KILLED" "KILLREQ" "STUCK" "INCOMPLETE" "DEAD" "CHECK")) + +(define *common:well-ended-states* ;; an item's prereq in this state allows item to proceed + '("PASS" "WARN" "WAIVED" "SKIP")) + +;; BBnote: *common:running-states* used from db:set-state-status-and-roll-up-items +(define *common:running-states* ;; test is either running or can be run + '("RUNNING" "REMOTEHOSTSTART" "LAUNCHED" "STARTED")) + +(define *common:cant-run-states* ;; These are stopping conditions that prevent a test from being run + '("COMPLETED" "KILLED" "UNKNOWN" "INCOMPLETE" "ARCHIVED")) + +(define *common:not-started-ok-statuses* ;; if not one of these statuses when in not_started state treat as dead + '("n/a" "na" "PASS" "FAIL" "WARN" "CHECK" "WAIVED" "DEAD" "SKIP")) + +;;====================================================================== +;; group tests into buckets corresponding to rollup +;;; Running, completed-pass, completed-non-pass + worst status, not started. +;; filter out +;(define (common:categorize-items-for-rollup in-tests) +; ( + +(define (common:special-sort items order comp) + (let ((items-order (map reverse order)) + (acomp (or comp >))) + (sort items + (lambda (a b) + (let ((a-num (cadr (or (assoc a items-order) '(0 0)))) + (b-num (cadr (or (assoc b items-order) '(0 0))))) + (acomp a-num b-num)))))) + +;;====================================================================== +;; ;; given a toplevel with currstate, currstatus apply state and status +;; ;; => (newstate . newstatus) +;; (define (common:apply-state-status currstate currstatus state status) +;; (let* ((cstate (string->symbol (string-downcase currstate))) +;; (cstatus (string->symbol (string-downcase currstatus))) +;; (sstate (string->symbol (string-downcase state))) +;; (sstatus (string->symbol (string-downcase status))) +;; (nstate #f) +;; (nstatus #f)) +;; (set! nstate +;; (case cstate +;; ((completed not_started killed killreq stuck archived) +;; (case sstate ;; completed -> sstate +;; ((completed killed killreq stuck archived) completed) +;; ((running remotehoststart launched) running) +;; (else unknown-error-1))) +;; ((running remotehoststart launched) +;; (case sstate +;; ((completed killed killreq stuck archived) #f) ;; need to look at all items +;; ((running remotehoststart launched) running) +;; (else unknown-error-2))) +;; (else unknown-error-3))) +;; (set! nstatus +;; (case sstatus +;; ((pass) +;; (case nstate +;; ((pass n/a deleted) pass) +;; ((warn) warn) +;; ((fail) fail) +;; ((check) check) +;; ((waived) waived) +;; ((skip) skip) +;; ((stuck/dead) stuck) +;; ((abort) abort) +;; (else unknown-error-4))) +;; ((warn) +;; (case nstate +;; ((pass warn n/a skip deleted) warn) +;; ((fail) fail) +;; ((check) check) +;; ((waived) waived) +;; ((stuck/dead) stuck) +;; (else unknown-error-5))) +;; ((fail) +;; (case nstate +;; ((pass warn fail check n/a waived skip deleted stuck/dead stuck) fail) +;; ((abort) abort) +;; (else unknown-error-6))) +;; (else unknown-error-7))) +;; (cons +;; (if nstate (symbol->string nstate) nstate) +;; (if nstatus (symbol->string nstatus) nstatus)))) + +;;====================================================================== +;; D E B U G G I N G S T U F F +;;====================================================================== + +(define *verbosity* 1) +(define *logging* #f) + +(define (get-with-default val default) + (let ((val (args:get-arg val))) + (if val val default))) + +(define (assoc/default key lst . default) + (let ((res (assoc key lst))) + (if res (cadr res)(if (null? default) #f (car default))))) + +(define (common:get-testsuite-name) + (or (configf:lookup *configdat* "setup" "area-name") ;; megatest is a flexible tool, testsuite is too limiting a description. + (configf:lookup *configdat* "setup" "testsuite" ) + (getenv "MT_TESTSUITE_NAME") + (pathname-file (or (if (string? *toppath* ) + (pathname-file *toppath*) + #f) + (common:get-toppath #f))) + "please-set-setup-area-name")) ;; (pathname-file (current-directory))))) + +;;====================================================================== +;; safe getting of toppath +(define (common:get-toppath areapath) + (or *toppath* + (if areapath + (begin + (set! *toppath* areapath) + (setenv "MT_RUN_AREA_HOME" areapath) + areapath) + #f) + (if (getenv "MT_RUN_AREA_HOME") + (begin + (set! *toppath* (getenv "MT_RUN_AREA_HOME")) + *toppath*) + #f) + ;; last resort, look for megatest.config + (let loop ((thepath (realpath "."))) + (if (file-exists? (conc thepath "/megatest.config")) + thepath + (if (equal? thepath "/") + (begin + (debug:print-error 0 *default-log-port* "Unable to find megatest home directory.") + #f) + (loop (pathname-directory thepath))))) + )) + + +;;====================================================================== +;; redefine for future cleanup (converge on area-name, the more generic +;; +(define common:get-area-name common:get-testsuite-name) + +(define (common:get-db-tmp-area . junk) + (if *db-cache-path* + *db-cache-path* + (if *toppath* ;; common:get-create-writeable-dir + (handle-exceptions + exn + (begin + (debug:print-error 0 *default-log-port* "Couldn't create path to " *db-cache-path* ", exn=" exn) + (exit 1)) + (let* ((toppath (common:real-path *toppath*)) + (tsname (common:get-testsuite-name)) + (dbpath (common:get-create-writeable-dir + (list (conc "/tmp/" (current-user-name) + "/megatest_localdb/" + tsname "/" + (string-translate toppath "/" ".")) + (conc "/tmp/" (current-process-id) ;; just in case we have an issue with the dir by own user name + "/"(current-user-name) "/megatest_localdb/" + tsname + (string-translate toppath "/" ".")) + )))) + (set! *db-cache-path* dbpath) + ;; ensure megatest area has .mtdb + (let ((dbarea (conc *toppath* "/.mtdb"))) + (if (not (file-exists? dbarea)) + (create-directory dbarea))) + ;; ensure tmp area has .mtdb + (let ((dbarea (conc dbpath "/.mtdb"))) + (if (not (file-exists? dbarea)) + (create-directory dbarea))) + dbpath)) + #f))) + +(define (common:get-area-path-signature) + (message-digest-string (md5-primitive) *toppath*)) + +;;====================================================================== +;; E X I T H A N D L I N G +;;====================================================================== + +(define (common:run-sync?) + (and *toppath* ;; gate if called before *toppath* is set + (common:on-homehost?) + (args:get-arg "-server"))) + + +(define (std-signal-handler signum) + ;; (signal-mask! signum) + (set! *time-to-exit* #t) + ;;(debug:print-info 13 *default-log-port* "got signal "signum) + (debug:print-error 0 *default-log-port* "Received signal " signum " aaa exiting promptly") + ;; (std-exit-procedure) ;; shouldn't need this since we are exiting and it will be called anyway + (exit)) + +(define (special-signal-handler signum) + ;; (signal-mask! signum) + (set! *time-to-exit* #t) + ;;(debug:print-info 13 *default-log-port* "got signal "signum) + (debug:print-error 0 *default-log-port* "Received signal " signum " sending email befor exiting!!") + ;;TODO send email to notify admin contact listed in the config that the lisner got killed + ;; (std-exit-procedure) ;; shouldn't need this since we are exiting and it will be called anyway + (exit)) + + +(set-signal-handler! signal/int std-signal-handler) ;; ^C +(set-signal-handler! signal/term std-signal-handler) + +;; (set-signal-handler! signal/stop std-signal-handler) ;; ^Z NO, do NOT handle ^Z! + +;;====================================================================== +;; M I S C U T I L S +;;====================================================================== + +;;====================================================================== +;; convert stuff to a number if possible +(define (any->number val) + (cond + ((number? val) val) + ((string? val) (string->number val)) + ((symbol? val) (any->number (symbol->string val))) + (else #f))) + +(define (any->number-if-possible val) + (let ((num (any->number val))) + (if num num val))) + +(define (patt-list-match item patts) + (debug:print-info 8 *default-log-port* "patt-list-match item=" item " patts=" patts) + (if (and item patts) ;; here we are filtering for matches with item patterns + (let ((res #f)) ;; look through all the item-patts if defined, format is patt1,patt2,patt3 ... wildcard is % + (for-each + (lambda (patt) + (let ((modpatt (string-substitute "%" ".*" patt #t))) + (debug:print-info 10 *default-log-port* "patt " patt " modpatt " modpatt) + (if (string-match (regexp modpatt) item) + (set! res #t)))) + (string-split patts ",")) + res) + #t)) + +;;====================================================================== +;; '(print (string-intersperse (map cadr (hash-table-ref/default (read-config "megatest.config" \#f \#t) "disks" '"'"'("none" ""))) "\n"))' +(define (common:get-disks #!key (configf #f)) + (hash-table-ref/default + (or configf (read-config "megatest.config" #f #t)) + "disks" '("none" ""))) + +(define (common:get-install-area) + (let ((exe-path (car (argv)))) + (if (common:file-exists? exe-path) + (handle-exceptions + exn + #f + (pathname-directory + (pathname-directory + (pathname-directory exe-path)))) + #f))) + +;;====================================================================== +;; return first path that can be created or already exists and is writable +;; +(define (common:get-create-writeable-dir dirs) + (if (null? dirs) + #f + (let loop ((hed (car dirs)) + (tal (cdr dirs))) + (let ((res (or (and (directory? hed) + (file-write-access? hed) + hed) + (handle-exceptions + exn + (begin + (debug:print-info 0 *default-log-port* "could not create " hed + ", this might cause problems down the road. exn=" exn) + #f) + (create-directory hed #t))))) + (if (and (string? res) + (directory? res)) + res + (if (null? tal) + #f + (loop (car tal)(cdr tal)))))))) + +;;====================================================================== +;; return the youngest timestamp . filename +;; +(define (common:get-youngest glob-list) + (let ((all-files (apply append + (map (lambda (patt) + (handle-exceptions + exn + '() + (glob patt))) + glob-list)))) + (fold (lambda (fname res) + (let ((last-mod (car res)) + (curmod (handle-exceptions + exn + 0 + (file-modification-time fname)))) + (if (> curmod last-mod) + (list curmod fname) + res))) + '(0 "n/a") + all-files))) + +;;====================================================================== +;; use bash to expand a glob. Does NOT handle paths with spaces! +;; +(define (common:bash-glob instr) + (string-split + (with-input-from-pipe + (conc "/bin/bash -c \"echo " instr "\"") + read-line))) + +;;====================================================================== +;; Some safety net stuff +;;====================================================================== + +;;====================================================================== +;; return input if it is a list or return null +(define (common:list-or-null inlst #!key (ovrd #f)(message #f)) + (if (list? inlst) + inlst + (begin + (if message (debug:print-error 0 *default-log-port* message)) + (or ovrd '())))) + +;;====================================================================== +;; T A R G E T S , S T A T E , S T A T U S , +;; R U N N A M E A N D T E S T P A T T +;;====================================================================== + +;;====================================================================== +;; (map print (map car (hash-table->alist (read-config "runconfigs.config" #f #t)))) +;; +(define (common:get-runconfig-targets #!key (configf #f)) + (let ((targs (sort (map car (hash-table->alist + (or configf ;; NOTE: There is no value in using runconfig:read here. + (read-config (conc *toppath* "/runconfigs.config") + #f #t) + (make-hash-table)))) + stringsymbol force-setting) #f)) + (force-result (case force-type + ((#f) #f) + ((always) #t) + ((test) (if (args:get-arg "-execute") ;; we are in a test + #t + #f)) + (else + (debug:print 0 *default-log-port* "ERROR: Bad server force setting " force-setting ", forcing server.") + #t)))) ;; default to requiring server + (if force-result + (begin + (debug:print-info 0 *default-log-port* "ATTENTION! Forcing use of server, force setting is \"" force-setting "\".") + #t) + #f))) + +;;====================================================================== +;; M I S C L I S T S +;;====================================================================== + +;;====================================================================== +;; items in lista are matched value and position in listb +;; return the remaining items in listb or #f +;; +(define (common:list-is-sublist lista listb) + (if (null? lista) + listb ;; all items in listb are "remaining" + (if (> (length lista)(length listb)) + #f + (let loop ((heda (car lista)) + (tala (cdr lista)) + (hedb (car listb)) + (talb (cdr listb))) + (if (equal? heda hedb) + (if (null? tala) ;; we are done + talb + (loop (car tala) + (cdr tala) + (car talb) + + (cdr talb))) + #f))))) + +;;====================================================================== +;; Needed for long lists to be sorted where (apply max ... ) dies +;; +(define (common:max inlst) + (let loop ((max-val (car inlst)) + (hed (car inlst)) + (tal (cdr inlst))) + (if (not (null? tal)) + (loop (max hed max-val) + (car tal) + (cdr tal)) + (max hed max-val)))) + +;;====================================================================== +;; get min or max, use > for max and < for min, this works around the limits on apply +;; +(define (common:min-max comp lst) + (if (null? lst) + #f ;; better than an exception for my needs + (fold (lambda (a b) + (if (comp a b) a b)) + (car lst) + lst))) + +;;====================================================================== +;; get min or max, use > for max and < for min, this works around the limits on apply +;; +(define (common:sum lst) + (if (null? lst) + 0 + (fold (lambda (a b) + (+ a b)) + (car lst) + lst))) + +;;====================================================================== +;; path list to hash-table tree +;; ((a b c)(a b d)(e b c)) => ((a (b (d) (c))) (e (b (c)))) +;; +(define (common:list->htree lst) + (let ((resh (make-hash-table))) + (for-each + (lambda (inlst) + (let loop ((ht resh) + (hed (car inlst)) + (tal (cdr inlst))) + (if (hash-table-ref/default ht hed #f) + (if (not (null? tal)) + (loop (hash-table-ref ht hed) + (car tal) + (cdr tal))) + (begin + (hash-table-set! ht hed (make-hash-table)) + (loop ht hed tal))))) + lst) + resh)) + +;;====================================================================== +;; hash-table tree to html list tree +;; +;; tipfunc takes two parameters: y the tip value and path the path to that point +;; +(define (common:htree->html ht path tipfunc) + (let ((datlist (sort (hash-table->alist ht) + (lambda (a b) + (string< (car a)(car b)))))) + (if (null? datlist) + (tipfunc #f path) ;; really shouldn't get here + (s:ul + (map (lambda (x) + (let* ((levelname (car x)) + (y (cdr x)) + (newpath (append path (list levelname))) + (leaf (or (not (hash-table? y)) + (null? (hash-table-keys y))))) + (if leaf + (s:li (tipfunc y newpath)) + (s:li + (list + levelname + (common:htree->html y newpath tipfunc)))))) + datlist))))) + +;;====================================================================== +;; hash-table tree to alist tree +;; +(define (common:htree->atree ht) + (map (lambda (x) + (cons (car x) + (let ((y (cdr x))) + (if (hash-table? y) + (common:htree->atree y) + y)))) + (hash-table->alist ht))) + +;;====================================================================== +;; M U N G E D A T A I N T O N I C E F O R M S +;;====================================================================== + +;;====================================================================== +;; Generate an index for a sparse list of key values +;; ( (rowname1 colname1 val1)(rowname2 colname2 val2) ) +;; +;; => +;; +;; ( (rowname1 0)(rowname2 1)) ;; rownames -> num +;; (colname1 0)(colname2 1)) ) ;; colnames -> num +;; +;; optional apply proc to rownum colnum value +(define (common:sparse-list-generate-index data #!key (proc #f)) + (if (null? data) + (list '() '()) + (let loop ((hed (car data)) + (tal (cdr data)) + (rownames '()) + (colnames '()) + (rownum 0) + (colnum 0)) + (let* ((rowkey (car hed)) + (colkey (cadr hed)) + (value (caddr hed)) + (existing-rowdat (assoc rowkey rownames)) + (existing-coldat (assoc colkey colnames)) + (curr-rownum (if existing-rowdat rownum (+ rownum 1))) + (curr-colnum (if existing-coldat colnum (+ colnum 1))) + (new-rownames (if existing-rowdat rownames (cons (list rowkey curr-rownum) rownames))) + (new-colnames (if existing-coldat colnames (cons (list colkey curr-colnum) colnames)))) + ;; (debug:print-info 0 *default-log-port* "Processing record: " hed ) + (if proc (proc curr-rownum curr-colnum rowkey colkey value)) + (if (null? tal) + (list new-rownames new-colnames) + (loop (car tal) + (cdr tal) + new-rownames + new-colnames + (if (> curr-rownum rownum) curr-rownum rownum) + (if (> curr-colnum colnum) curr-colnum colnum) + )))))) + +;;====================================================================== +;; if it looks like a number -> convert it to a number, else return it +;; +(define (common:lazy-convert inval) + (let* ((as-num (if (string? inval)(string->number inval) #f))) + (or as-num inval))) + +;;====================================================================== +;; convert string a=1; b=2; c=a silly thing; d= +;; to '((a . 1)(b . 2)(c . "a silly thing")(d . "")) +;; +(define (common:val->alist val #!key (convert #f)) + (let ((val-list (string-split-fields ";\\s*" val #:infix))) + (if val-list + (map (lambda (x) + (let ((f (string-split-fields "\\s*=\\s*" x #:infix))) + (case (length f) + ((0) `(,#f)) ;; null string case + ((1) `(,(string->symbol (car f)))) + ((2) `(,(string->symbol (car f)) . ,(let ((inval (cadr f))) + (if convert (common:lazy-convert inval) inval)))) + (else f)))) + val-list) + '()))) + +;;====================================================================== +;; S Y S T E M S T U F F +;;====================================================================== + +;;====================================================================== +;; lazy-safe get file mod time. on any error (file not existing etc.) return 0 +;; +(define (common:lazy-modification-time fpath) + (handle-exceptions + exn + (begin + (debug:print 2 *default-log-port* "Failed to get modification time for " fpath ", treating it as zero. exn=" exn) + 0) + (if (file-exists? fpath) + (file-modification-time fpath) + 0))) + +;;====================================================================== +;; find timestamp of newest file associated with a sqlite db file +(define (common:lazy-sqlite-db-modification-time fpath) + (let* ((glob-list (handle-exceptions + exn + (begin + (debug:print 0 *default-log-port* "Failed to glob " fpath "*, exn=" exn) + `(,(conc "/no/such/file, message: " ((condition-property-accessor 'exn 'message) exn)))) + (glob (conc fpath "*")))) + (file-list (if (eq? 0 (length glob-list)) + '("/no/such/file") + glob-list))) + (apply max + (map + common:lazy-modification-time + file-list)))) + +;;====================================================================== +;; make "nice-path" available in config files and the repl +(define nice-path common:nice-path) + +;;====================================================================== +;; returns *effective load* (not normalized) +;; +(define (common:get-intercept onemin fivemin) + (if (< onemin fivemin) ;; load is decreasing, just use the onemin load + onemin + (let* ((load-change (- onemin fivemin)) + (tchange (- 300 60))) + (max (+ onemin (* 60 (/ load-change tchange))) 0)))) + +;;====================================================================== +;; calculate a delay number based on a droop curve +;; inputs are: +;; - load-in, load as from uptime, NOT normalized +;; - numcpus, number of cpus, ideally use the real cpus, not threads +;; +(define (common:get-delay load-in numcpus) + (let* ((ratio (/ load-in numcpus)) + (new-option (configf:lookup *configdat* "load" "new-load-method")) + (paramstr (or (configf:lookup *configdat* "load" "exp-params") + "15 12 1281453987.9543 0.75")) ;; 5 4 10 1")) + (paramlst (map string->number (string-split paramstr)))) + (if new-option + (begin + (cond ((and (>= ratio 0) (< ratio .5)) + 0) + ((and (>= ratio 0.5) (<= ratio .9)) + (* ratio (/ 5 .9))) + ((and (> ratio .9) (<= ratio 1.1)) + (+ 5 (* (- ratio .9) (/ 55 .2)))) + ((> ratio 1.1) + 60))) + (match paramlst + ((r1 r2 s1 s2) + (debug:print 3 *default-log-port* "Using params r1=" r1 " r2=" r2 " s1=" s1 " s2=" s2) + (min (max (/ (expt r1 (* r2 s2 ratio)) s1) 0) 30)) + (else + (debug:print 0 *default-log-port* "BAD exp-params, should be \"r1 r2 s1 s2\" but got " paramstr) + 30))))) + +;; -mrw- this appears to not be used +;; +;; (define (common:print-delay-table) +;; (let loop ((x 0)) +;; (print x "," (common:get-delay x 1)) +;; (if (< x 2) +;; (loop (+ x 0.1))))) + +;; (define (get-cpu-load #!key (remote-host #f)) +;; (car (common:get-cpu-load remote-host))) + +;;====================================================================== +;; (let* ((load-res (process:cmd-run->list "uptime")) +;; (load-rx (regexp "load average:\\s+(\\d+)")) +;; (cpu-load #f)) +;; (for-each (lambda (l) +;; (let ((match (string-search load-rx l))) +;; (if match +;; (let ((newval (string->number (cadr match)))) +;; (if (number? newval) +;; (set! cpu-load newval)))))) +;; (car load-res)) +;; cpu-load)) + +;;====================================================================== +;; get values from cached info from dropping file in .sysdata dir +;; e.g. key is host and dtype is normalized-load +;; +(define (common:get-cached-info key dtype #!key (age 10)) + (if *toppath* + (let* ((fullpath (conc *toppath* "/.sysdata/" key "-" dtype ".log")) + (delfile (lambda (exn) + (debug:print-info 2 *default-log-port* " removing bad file " fullpath ", exn=" exn) + (delete-file* fullpath) + #f))) + (if (and (file-exists? fullpath) + (file-read-access? fullpath)) + (handle-exceptions + exn + (begin + (debug:print 0 *default-log-port* "failed to get cached info from " fullpath ", exn=" exn) + #f) + (debug:print 2 *default-log-port* "reading file " fullpath) + (let ((real-age (- (current-seconds) + (handle-exceptions + exn + (begin + (debug:print 1 *default-log-port* "Failed to read mod time on file " + fullpath ", using 0, exn=" exn) + 0) + (file-change-time fullpath))))) + (if (< real-age age) + (handle-exceptions + exn + (delfile exn) + (let* ((res (with-input-from-file fullpath read))) + (if (eof-object? res) + (begin + (delfile "n/a") + #f) + res))) + (begin + (debug:print-info 2 *default-log-port* "file " fullpath + " is too old (" real-age" seconds) to trust, skipping reading it") + #f)))) + (begin + (debug:print 2 *default-log-port* "not reading file " fullpath) + #f))) + #f)) + +(define (common:write-cached-info key dtype dat) + (if *toppath* + (let* ((fulldir (conc *toppath* "/.sysdata")) + (fullpath (conc fulldir "/" key "-" dtype ".log"))) + (if (not (file-exists? fulldir))(create-directory fulldir #t)) + (handle-exceptions + exn + (begin + (debug:print 0 *default-log-port* "failed to write file " fullpath ", exn=" exn) + #f) + (with-output-to-file fullpath (lambda ()(pp dat))))) + #f)) + +(define (common:raw-get-remote-host-load remote-host) + (let* ((inp #f)) + (handle-exceptions + exn + (begin + (close-input-pipe inp) + (debug:print 0 *default-log-port* "failed to ssh to " remote-host " and get loadavg. exn=" exn) + #f) ;; more specific handling of errors needed + (set! inp (open-input-pipe (conc "ssh " remote-host " cat /proc/loadavg"))) + (let ((res (list (read inp)(read inp)(read inp)))) + (close-input-pipe inp) + res)))) + +;;====================================================================== +;; get cpu load by reading from /proc/loadavg, return all three values +;; +(define (common:get-cpu-load remote-host) + (handle-exceptions + exn + (begin + (debug:print 0 *default-log-port* "failed to ssh or read loadavg from host " remote-host ", exn=" exn) + '(-99 -99 -99)) + (let* ((actual-hostname (or remote-host (get-host-name) "localhost"))) + (or (common:get-cached-info actual-hostname "cpu-load") + (let ((result (if (and remote-host + (not (equal? remote-host (get-host-name)))) + (map (lambda (res) + (if (eof-object? res) 9e99 res)) + (common:raw-get-remote-host-load remote-host)) + (with-input-from-file "/proc/loadavg" + (lambda ()(list (read)(read)(read))))))) + (match + result + ((l1 l2 l3) + (if (and (number? l1) + (number? l2) + (number? l3)) + (begin + (common:write-cached-info actual-hostname "cpu-load" result) + result) + '(-1 -1 -1))) ;; -1 is bad result + (else '(-2 -2 -2)))))))) + +;;====================================================================== +;; get normalized cpu load by reading from /proc/loadavg and /proc/cpuinfo return all three values and the number of real cpus and the number of threads +;; returns alist '((adj-cpu-load . normalized-proc-load) ... etc. +;; keys: adj-proc-load, adj-core-load, 1m-load, 5m-load, 15m-load +;; +(define (common:get-normalized-cpu-load remote-host) + (let ((res (common:get-normalized-cpu-load-raw remote-host)) + (default `((adj-proc-load . 2) ;; there is no right answer + (adj-core-load . 2) + (1m-load . 2) + (5m-load . 0) ;; causes a large delta - thus causing default of throttling if stuff goes wrong + (15m-load . 0) + (proc . 1) + (core . 1) + (phys . 1) + (error . #t)))) + (cond + ((and (list? res) + (> (length res) 2)) + res) + ((eq? res #f) default) ;; add messages? + ((eq? res #f) default) ;; this would be the #eof + (else default)))) + +(define (common:ssh-get-loadavg remote-host) + (let ((inp (open-input-pipe (conc "ssh " remote-host " \"cat /proc/loadavg;cat /proc/cpuinfo;echo end\"")))) + (let* ((res (read-lines inp))) + (close-input-pipe inp) + res))) + +(define (common:get-normalized-cpu-load-raw remote-host) + (let* ((actual-host (or remote-host (get-host-name)))) ;; #f is localhost + (or (common:get-cached-info actual-host "normalized-load") + (let ((data (if remote-host + (common:ssh-get-loadavg remote-host) + (append + (with-input-from-file "/proc/loadavg" + read-lines) + (with-input-from-file "/proc/cpuinfo" + read-lines) + (list "end")))) + (load-rx (regexp "^([\\d\\.]+)\\s+([\\d\\.]+)\\s+([\\d\\.]+)\\s+.*$")) + (proc-rx (regexp "^processor\\s+:\\s+(\\d+)\\s*$")) + (core-rx (regexp "^core id\\s+:\\s+(\\d+)\\s*$")) + (phys-rx (regexp "^physical id\\s+:\\s+(\\d+)\\s*$")) + (max-num (lambda (p n)(max (string->number p) n)))) + ;; (print "data=" data) + (if (null? data) ;; something went wrong + #f + (let loop ((hed (car data)) + (tal (cdr data)) + (loads #f) + (proc-num 0) ;; processor includes threads + (phys-num 0) ;; physical chip on motherboard + (core-num 0)) ;; core + ;;; (print hed ", " loads ", " proc-num ", " phys-num ", " core-num) + (if (null? tal) ;; have all our data, calculate normalized load and return result + (let* ((act-proc (+ proc-num 1)) + (act-phys (+ phys-num 1)) + (act-core (+ core-num 1)) + (adj-proc-load (/ (car loads) act-proc)) + (adj-core-load (/ (car loads) act-core)) + (result + (append (list (cons 'adj-proc-load adj-proc-load) + (cons 'adj-core-load adj-core-load)) + (list (cons '1m-load (car loads)) + (cons '5m-load (cadr loads)) + (cons '15m-load (caddr loads))) + (list (cons 'proc act-proc) + (cons 'core act-core) + (cons 'phys act-phys))))) + (common:write-cached-info actual-host "normalized-load" result) + result) + (regex-case + hed + (load-rx ( x l1 l5 l15 ) (loop (car tal)(cdr tal)(map string->number (list l1 l5 l15)) proc-num phys-num core-num)) + (proc-rx ( x p ) (loop (car tal)(cdr tal) loads (max-num p proc-num) phys-num core-num)) + (phys-rx ( x p ) (loop (car tal)(cdr tal) loads proc-num (max-num p phys-num) core-num)) + (core-rx ( x c ) (loop (car tal)(cdr tal) loads proc-num phys-num (max-num c core-num))) + (else + (begin + ;; (print "NO MATCH: " hed) + (loop (car tal)(cdr tal) loads proc-num phys-num core-num))))))))))) + +(define (common:unix-ping hostname) + (let ((res (system (conc "ping -c 1 " hostname " > /dev/null")))) + (eq? res 0))) + +;;====================================================================== +;; ideally put all this info into the db, no need to preserve it across moving homehost +;; +;; return list of +;; ( reachable? cpuload update-time ) +(define (common:get-host-info hostname) + (let* ((loadinfo (rmt:get-latest-host-load hostname)) ;; if this host happens to have been recently used by a test reuse the load data + (load (car loadinfo)) + (load-sample-time (cdr loadinfo)) + (load-sample-age (- (current-seconds) load-sample-time)) + (loadinfo-timeout-seconds 6) ;; this was 20 seconds, seems way too lax. Switch to 6 seconds + (host-last-update-timeout-seconds 4) + (host-rec (hash-table-ref/default *host-loads* hostname #f)) + ) + (cond + ((< load-sample-age loadinfo-timeout-seconds) + (list #t + load-sample-time + load)) + ((and host-rec + (< (current-seconds) (+ (host-last-update host-rec) host-last-update-timeout-seconds))) + (list #t + (host-last-update host-rec) + (host-last-cpuload host-rec ))) + ((common:unix-ping hostname) + (list #t + (current-seconds) + (alist-ref 'adj-core-load (common:get-normalized-cpu-load hostname)))) ;; this is cheaper than you might think. get-normalized-cpu-load is cached for up to 5 seconds + (else + (list #f 0 -1) ;; bad host, don't use! + )))) + +;;====================================================================== +;; see defstruct host at top of file. +;; host: reachable last-update last-used last-cpuload +;; +(define (common:update-host-loads-table hosts-raw) + (let* ((hosts (filter (lambda (x) + (string-match (regexp "^\\S+$") x)) + hosts-raw))) + (for-each + (lambda (hostname) + (let* ((rec (let ((h (hash-table-ref/default *host-loads* hostname #f))) + (if h + h + (let ((h (make-host))) + (hash-table-set! *host-loads* hostname h) + h)))) + (host-info (common:get-host-info hostname)) + (is-reachable (car host-info)) + (last-reached-time (cadr host-info)) + (load (caddr host-info))) + (host-reachable-set! rec is-reachable) + (host-last-update-set! rec last-reached-time) + (host-last-cpuload-set! rec load))) + hosts))) + +;;====================================================================== +;; go through the hosts from least recently used to most recently used, pick the first that meets the load criteral from the +;; [host-rules] section. +;; +(define (common:get-least-loaded-host hosts-raw host-type configdat) + (let* ((rdat (configf:lookup configdat "host-rules" host-type)) + (rules (common:val->alist (or rdat "") convert: #t)) ;; maxnload, maxnjobs, maxjobrate + (maxnload (common:alist-ref/default 'maxnload rules 1.5)) ;; max normalized load + (maxnjobs (common:alist-ref/default 'maxnjobs rules 1.5)) ;; max normalized number of jobs + (maxjobrate (common:alist-ref/default 'maxjobrate rules (/ 1 6))) ;; max rate of submitting jobs to a given host in jobs/second + (hosts (filter (lambda (x) + (string-match (regexp "^\\S+$") x)) + hosts-raw)) + ;; (best-host #f) + (get-rec (lambda (hostname) + ;; (print "get-rec hostname=" hostname) + (let ((h (hash-table-ref/default *host-loads* hostname #f))) + (if h + h + (let ((h (make-host))) + (hash-table-set! *host-loads* hostname h) + h))))) + (best-load 99999) + (curr-time (current-seconds)) + (get-hosts-sorted (lambda (hosts) + (sort hosts (lambda (a b) + (let ((a-rec (get-rec a)) + (b-rec (get-rec b))) + ;; (print "a=" a " a-rec=" a-rec " host-last-used=" (host-last-used a-rec)) + ;; (print "b=" b " b-rec=" b-rec " host-last-used=" (host-last-used b-rec)) + (< (host-last-used a-rec) + (host-last-used b-rec)))))))) + (debug:print 0 *default-log-port* "INFO: hosts-sorted=" (get-hosts-sorted hosts)) + (if (null? hosts) + #f ;; no hosts to select from. All done and giving up now. + (let ((hosts-sorted (get-hosts-sorted hosts))) + (common:update-host-loads-table hosts) + (let loop ((hostname (car hosts-sorted)) + (tal (cdr hosts-sorted)) + (best-host #f)) + (let* ((rec (get-rec hostname)) + (reachable (host-reachable rec)) + (load (host-last-cpuload rec)) + (last-used (host-last-used rec)) + (delta (- curr-time last-used)) + (job-rate (if (> delta 0) + (/ 1 delta) + 999)) ;; jobs per second + (new-best + (cond + ((not reachable) + (debug:print 0 *default-log-port* "Skipping host " hostname " as it cannot be reached.") + best-host) + ((and (< load maxnload) ;; load is acceptable + (< job-rate maxjobrate)) ;; job rate is acceptable + (set! best-load load) + hostname) + (else best-host)))) + (debug:print 0 *default-log-port* "INFO: Trying host " hostname " with load " load ", last used " delta " seconds ago, with job-rate " job-rate " for running a test." ) + (if new-best + (begin ;; found a host, return it + (debug:print 0 *default-log-port* "INFO: Found host: " new-best " load: " load " last-used: " delta " seconds ago, with job-rate: " job-rate) + (host-last-used-set! rec curr-time) + new-best) + (if (null? tal) #f (loop (car tal)(cdr tal) best-host))))))))) + +(define (common:wait-for-homehost-load maxnormload msg) + (let loop ((start-time (current-seconds))) ;; we saw some instances of this being called before *toppath* was set. This might be an early setup race. This delay should help but it is impossible to test... + (if (not *toppath*) + (begin + (debug:print 0 *default-log-port* "ERROR: common:wait-for-homehost-load called before *toppath* set.") + (thread-sleep! 30) + (if (< (- (current-seconds) start-time) 300) + (loop start-time))))) + (case (rmt:transport-mode) + ((http) + (let* ((hh-dat (if (common:on-homehost?) ;; if we are on the homehost then pass in #f so the calls are local. + #f + (server:choose-server *toppath* 'homehost))) + (hh (if hh-dat (car hh-dat) #f))) + (common:wait-for-normalized-load maxnormload msg hh))) + (else + (common:wait-for-normalized-load maxnormload msg (get-host-name))))) + +(define (common:get-num-cpus remote-host) + (let* ((actual-host (or remote-host (get-host-name)))) + ;; hosts had better not be changing the number of cpus too often! + (or (hash-table-ref/default *numcpus-cache* actual-host #f) + (let* ((numcpus (or (common:get-cached-info actual-host "num-cpus" age: (+ 2592000 (random 3600))) + (let* ((proc (lambda () + (let loop ((numcpu 0) + (inl (read-line))) + (if (eof-object? inl) + (if (> numcpu 0) + numcpu + #f) ;; if zero return #f so caller knows that things are not working + (loop (if (string-match "^processor\\s+:\\s+\\d+$" inl) + (+ numcpu 1) + numcpu) + (read-line)))))) + (result (if (and remote-host + (not (equal? remote-host (get-host-name)))) + (common:generic-ssh + (conc "ssh " remote-host " cat /proc/cpuinfo") + proc -1) + (with-input-from-file "/proc/cpuinfo" proc)))) + (if (and (number? result) + (> result 0)) + (common:write-cached-info actual-host "num-cpus" result)) + result)))) + (hash-table-set! *numcpus-cache* actual-host numcpus) + numcpus)))) + +;;====================================================================== +;; wait for normalized cpu load to drop below maxload +;; +(define (common:wait-for-normalized-load maxnormload msg remote-host #!optional (rem-tries 5)) + (let ((num-cpus (common:get-num-cpus remote-host))) + (if num-cpus + (common:wait-for-cpuload maxnormload num-cpus 15 msg: msg remote-host: remote-host) + (begin + (thread-sleep! (random 60)) ;; we failed to get num cpus. wait a bit and try again + (if (> rem-tries 0) + (common:wait-for-normalized-load maxnormload msg remote-host (- rem-tries 1)) + #f))))) + +;;====================================================================== +;; DO NOT CALL THIS DIRECTLY. It is called from common:wait-for-normalized-load +;; count - count down to zero, at some point we'd give up if the load never drops +;; num-tries - count down to zero number tries to get numcpus +;; +(define (common:wait-for-cpuload maxnormload numcpus-in + #!key (count 1000) + (msg #f)(remote-host #f)(num-tries 5)) + (let* ((loadavg (common:get-cpu-load remote-host)) + ;; not possible to have zero. If we get 1, it's possible that we got the previous default, and we should check again + (numcpus (if (<= 1 numcpus-in) + (common:get-num-cpus remote-host) numcpus-in)) + (first (car loadavg)) + (next (cadr loadavg)) + (adjmaxload (* maxnormload (max 1 numcpus))) ;; possible bug where numcpus (or could be maxload) is zero, crude + ;; fallback is to at least use 1 + ;; effective load accounts for load jumps, this should elminate all the first-next-avg, adjwait, load-jump-limit + ;; etc. + (effective-load (common:get-intercept first next)) + (recommended-delay (common:get-delay effective-load numcpus)) + (effective-host (or remote-host "localhost")) + (normalized-effective-load (/ effective-load numcpus)) + (will-wait (> normalized-effective-load maxnormload))) + (if (and will-wait (> recommended-delay 1)) + (let* ((actual-delay (min recommended-delay 30))) + (if (common:low-noise-print 30 (conc (round actual-delay) "-safe-load")) + (debug:print-info 0 *default-log-port* "Load control, delaying " + actual-delay " seconds to maintain safe load. current normalized effective load is " + normalized-effective-load". maxnormload = " maxnormload " numcpus = " numcpus " loadavg = " loadavg " effective-load = " effective-load)) + (thread-sleep! actual-delay))) + + (cond + ;; bad data, try again to get the data + ((not will-wait) + (if (common:low-noise-print 3600 (conc (round normalized-effective-load) "-load-acceptable-" effective-host)) + (debug:print 0 *default-log-port* "Effective load on " effective-host " is acceptable at " effective-load " continuing."))) + + ((and (< first 0) ;; this indicates the loadavg data is bad - machine may not be reachable + (> num-tries 0)) + (debug:print 0 *default-log-port* "WARNING: received bad data from get-cpu-load " + first ", we'll sleep 10s and try " num-tries " more times.") + (thread-sleep! 10) + (common:wait-for-cpuload maxnormload numcpus-in + count: count remote-host: remote-host num-tries: (- num-tries 1))) + + ;; need to wait for load to drop + ((and will-wait ;; (> first adjmaxload) + (> count 0)) + (debug:print-info 0 *default-log-port* + "Delaying 15" ;; adjwait + " seconds due to normalized effective load " normalized-effective-load ;; first + " exceeding max of " adjmaxload + " on server " (or remote-host (get-host-name)) + " (normalized load-limit: " maxnormload ") " (if msg msg "")) + (thread-sleep! 15) ;; adjwait) + (common:wait-for-cpuload maxnormload numcpus count: (- count 1) msg: msg remote-host: remote-host) + ;; put the message here to indicate came out of waiting + (debug:print-info 1 *default-log-port* + "On host: " effective-host + ", effective load: " effective-load + ", numcpus: " numcpus + ", normalized effective load: " normalized-effective-load + )) + ;; overloaded and count expired (i.e. went to zero) + (else + (if (> num-tries 0) ;; should be "num-tries-left". + (if (common:low-noise-print 30 (conc (round effective-load) "-load-acceptable-" effective-host)) + (debug:print 0 *default-log-port* "Load on " effective-host " is acceptable at effective normalized load of " + effective-normalized-load " continuing.")) + (debug:print 0 *default-log-port* "Load on " effective-host ", " + first" could not be retrieved. Giving up and continuing.")))))) + +;;====================================================================== +;; DO NOT CALL THIS DIRECTLY. It is called from common:wait-for-normalized-load +;; +;; (define (common:wait-for-cpuload maxload-in numcpus-in waitdelay #!key (count 1000) (msg #f)(remote-host #f)(force-maxload #f)(num-tries 5)) +;; (let* ((loadavg (common:get-cpu-load remote-host)) +;; (numcpus (if (<= 1 numcpus-in) ;; not possible to have zero. If we get 1, it's possible that we got the previous default, and we should check again +;; (common:get-num-cpus remote-host) +;; numcpus-in)) +;; (maxload (if force-maxload +;; maxload-in +;; (if (number? maxload-in) +;; (max maxload-in 0.5) +;; 0.5))) ;; so maxload must be greater than 0.5 for now BUG - FIXME? +;; (first (car loadavg)) +;; (next (cadr loadavg)) +;; (adjmaxload (* maxload (max 1 numcpus))) ;; possible bug where +;; ;; numcpus (or could be +;; ;; maxload) is zero, +;; ;; crude fallback is to +;; ;; at least use 1 +;; (loadjmp (- first (if (> next (* numcpus 0.7)) ;; could do something with average of first and next? +;; 0 +;; next))) ;; we will force a conservative calculation any time next is large. +;; (first-next-avg (/ (+ first next) 2)) +;; ;; add some randomness to the time to break any alignment +;; ;; where netbatch dumps many jobs to machines simultaneously +;; (adjwait (min (+ 300 (random 10)) (abs (* (+ (random 10) +;; (/ (- 1000 count) 10) +;; waitdelay) +;; (- first adjmaxload) )))) +;; (load-jump-limit (configf:lookup-number *configdat* "setup" "load-jump-limit")) +;; ;; effective load accounts for load jumps, this should elminate all the first-next-avg, adjwait, load-jump-limit +;; ;; etc. +;; (effective-load (common:get-intercept first next)) +;; (effective-host (or remote-host "localhost")) +;; (normalized-effective-load (/ effective-load numcpus)) +;; (will-wait (> normalized-effective-load maxload))) +;; +;; ;; let's let the user know once in a long while that load checking +;; ;; is happening but not constantly report it +;; #;(if (common:low-noise-print 30 (conc "cpuload" (or remote-host "localhost"))) ;; (> (random 100) 75) ;; about 25% of the time +;; (debug:print-info 1 *default-log-port* "Checking cpuload on " (or remote-host "localhost") ", maxload: " maxload +;; ", load: " first ", adjmaxload: " adjmaxload ", loadjmp: " loadjmp)) +;; +;; (debug:print-info 1 *default-log-port* +;; "On host: " effective-host +;; ", effective load: " effective-load +;; ", numcpus: " numcpus +;; ", normalized effective load: " normalized-effective-load +;; ) +;; +;; (cond +;; ;; bad data, try again to get the data +;; ((and (< first 0) ;; this indicates the loadavg data is bad - machine may not be reachable +;; (> num-tries 0)) +;; (debug:print 0 *default-log-port* "WARNING: received bad data from get-cpu-load " first ", we'll sleep 10s and try " num-tries " more times.") +;; (thread-sleep! 10) +;; (common:wait-for-cpuload maxload-in numcpus-in waitdelay +;; count: count remote-host: remote-host force-maxload: force-maxload num-tries: (- num-tries 1))) +;; ;; need to wait for load to drop +;; ((and will-wait ;; (> first adjmaxload) +;; (> count 0)) +;; (debug:print-info 0 *default-log-port* +;; "Delaying " 15 ;; adjwait +;; " seconds due to normalized effective load " normalized-effective-load ;; first +;; " exceeding max of " adjmaxload +;; " on server " (or remote-host (get-host-name)) +;; " (normalized load-limit: " maxload ") " (if msg msg "")) +;; (thread-sleep! 15) ;; adjwait) +;; (common:wait-for-cpuload maxload numcpus waitdelay count: (- count 1) msg: msg remote-host: remote-host)) +;; ((and (> loadjmp (cond +;; (load-jump-limit load-jump-limit) +;; ((> numcpus 8)(/ numcpus 2)) +;; ((> numcpus 4)(/ numcpus 1.2)) +;; (else 0.5))) +;; (> count 0)) +;; (debug:print-info 0 *default-log-port* "waiting " adjwait " seconds due to possible load jump " loadjmp ". " +;; (if msg msg "")) +;; (thread-sleep! adjwait) +;; (common:wait-for-cpuload maxload numcpus waitdelay count: (- count 1) msg: msg remote-host: remote-host)) +;; (else +;; (if (> num-tries 0) +;; (if (common:low-noise-print 30 (conc (round first) "-load-acceptable-" (or remote-host "localhost"))) +;; (debug:print 0 *default-log-port* "Load on " (or remote-host "localhost") " is acceptable at " first " continuing.")) +;; (debug:print 0 *default-log-port* "Load on " (or remote-host "localhost") ", "first" could not be retrieved. Giving up and continuing.")))))) +;; +(define (get-uname . params) + (let* ((uname-res (process:cmd-run->list (conc "uname " (if (null? params) "-a" (car params))))) + (uname #f)) + (if (null? (car uname-res)) + "unknown" + (caar uname-res)))) + +;;====================================================================== +;; D I S K S P A C E +;;====================================================================== + +(define (common:get-disk-space-used fpath) + (with-input-from-pipe (conc "/usr/bin/du -s " fpath) read)) + +;;====================================================================== +;; given path get free space, allows override in [setup] +;; with free-space-script /path/to/some/script.sh +;; +(define (get-df path) + (if (configf:lookup *configdat* "setup" "free-space-script") + (with-input-from-pipe + (conc (configf:lookup *configdat* "setup" "free-space-script") " " path) + (lambda () + (let ((res (read-line))) + (if (string? res) + (string->number res))))) + (get-unix-df path))) + +(define (get-free-inodes path) + (if (configf:lookup *configdat* "setup" "free-inodes-script") + (with-input-from-pipe + (conc (configf:lookup *configdat* "setup" "free-inodes-script") " " path) + (lambda () + (let ((res (read-line))) + (if (string? res) + (string->number res))))) + (get-unix-inodes path))) + +(define (get-unix-df path) + (let* ((df-results (process:cmd-run->list (conc "df " path))) + (space-rx (regexp "([0-9]+)\\s+([0-9]+)%")) + (freespc #f)) + ;; (write df-results) + (for-each (lambda (l) + (let ((match (string-search space-rx l))) + (if match + (let ((newval (string->number (cadr match)))) + (if (number? newval) + (set! freespc newval)))))) + (car df-results)) + freespc)) + +(define (get-unix-inodes path) + (let* ((df-results (process:cmd-run->list (conc "df -i " path))) + (space-rx (regexp "([0-9]+)\\s+([0-9]+)%")) + (freenodes 0)) ;; 0 is a better failsafe than #f here. + ;; (write df-results) + (for-each (lambda (l) + (let ((match (string-search space-rx l))) + (if match + (let ((newval (string->number (cadr match)))) + (if (number? newval) + (set! freenodes newval)))))) + (car df-results)) + freenodes)) + +(define (common:check-space-in-dir dirpath required) + (let* ((dbspace (if (directory? dirpath) + (get-df dirpath) + 0))) + (list (> dbspace required) + dbspace + required + dirpath))) + +;;====================================================================== +;; check space in dbdir and in megatest dir +;; returns: ok/not dbspace required-space +;; +(define (common:check-db-dir-space) + (let* ((required (string->number + ;; default is 1GB (or actually a billion bytes) This is the number of 1 kB blocks. + (or (configf:lookup *configdat* "setup" "dbdir-space-required") + "1000000"))) + (dbdir (common:make-tmpdir-name *toppath* "")) ;; (db:get-dbdir)) + (tdbspace (common:check-space-in-dir dbdir required)) + (mdbspace (common:check-space-in-dir *toppath* required))) + (sort (list tdbspace mdbspace) (lambda (a b) + (< (cadr a)(cadr b)))))) + +;;====================================================================== +;; check available space in dbdir, exit if insufficient +;; +(define (common:check-db-dir-and-exit-if-insufficient) + (let* ((spacedat (car (common:check-db-dir-space))) ;; look only at worst for now + (is-ok (car spacedat)) + (dbspace (cadr spacedat)) + (required (caddr spacedat)) + (dbdir (cadddr spacedat))) + (if (not is-ok) + (begin + (debug:print-error 0 *default-log-port* "Insufficient space in " dbdir ", require " required ", have " dbspace ", exiting now.") + (exit 1))))) + +;;====================================================================== +;; paths is list of lists ((name path) ... ) +;; +(define (common:get-disk-with-most-free-space disks minsize) + (let* ((best #f) + (bestsize 0) + (default-min-inodes-string "1000000") + (default-min-inodes (string->number default-min-inodes-string)) + (min-inodes (or (string->number (if (configf:lookup *configdat* "setup" "min_inodes") (configf:lookup *configdat* "setup" "min_inodes") default-min-inodes-string)) default-min-inodes))) + + (for-each + (lambda (disk-num) + (let* ((dirpath (cadr (assoc disk-num disks))) + (freespc (cond + ((not (directory? dirpath)) + (if (common:low-noise-print 300 "disks not a dir " disk-num) + (debug:print 0 *default-log-port* "WARNING: disk " disk-num " at path \"" dirpath "\" is not a directory - ignoring it.")) + -1) + ((not (file-write-access? dirpath)) + (if (common:low-noise-print 300 "disks not writeable " disk-num) + (debug:print 0 *default-log-port* "WARNING: disk " disk-num " at path \"" dirpath "\" is not writeable - ignoring it.")) + -1) + ((not (eq? (string-ref dirpath 0) #\/)) + (if (common:low-noise-print 300 "disks not a proper path " disk-num) + (debug:print 0 *default-log-port* "WARNING: disk " disk-num " at path \"" dirpath "\" is not a fully qualified path - ignoring it.")) + -1) + (else + (get-df dirpath)))) + (free-inodes (cond + ((not (directory? dirpath)) + (if (common:low-noise-print 300 "disks not a dir " disk-num) + (debug:print 0 *default-log-port* "WARNING: disk " disk-num " at path \"" dirpath "\" is not a directory - ignoring it.")) + -1) + ((not (file-write-access? dirpath)) + (if (common:low-noise-print 300 "disks not writeable " disk-num) + (debug:print 0 *default-log-port* "WARNING: disk " disk-num " at path \"" dirpath "\" is not writeable - ignoring it.")) + -1) + ((not (eq? (string-ref dirpath 0) #\/)) + (if (common:low-noise-print 300 "disks not a proper path " disk-num) + (debug:print 0 *default-log-port* "WARNING: disk " disk-num " at path \"" dirpath "\" is not a fully qualified path - ignoring it.")) + -1) + (else + (get-free-inodes dirpath)))) + ;;(free-inodes (get-free-inodes dirpath)) + ) + (debug:print 2 *default-log-port* "INFO: disk " disk-num " path " dirpath " free space " freespc " free inodes " free-inodes) + (if (and (> freespc bestsize)(> free-inodes min-inodes )) + (begin + (set! best (cons disk-num dirpath)) + (set! bestsize freespc))) + ;;(print "Processing: " disk-num " bestsize: " bestsize " best: " best " freespc: " freespc " min-inodes: " min-inodes " free-inodes: " free-inodes) + )) + (map car disks)) + (if (and best (> bestsize minsize)) + best + #f))) ;; #f means no disk candidate found + +;;====================================================================== +;; convert a spec string to a list of vectors #( rx action rx-string ) +(define (common:spec-string->list-of-specs spec-string actions) + (let ((spec-strings (string-split-fields "\\s*;\\s*" spec-string #:infix)) + (actions-regex (regexp (conc "^(.*)\\s+(" (string-intersperse (map conc actions) "|") ")")))) + (filter + (lambda (x) x) + (map (lambda (s) + (let ((m (string-match actions-regex s))) + (if m + (vector (regexp (cadr m))(string->symbol (caddr m))(cadr m)) + (begin + (debug:print 0 *default-log-port* "WARNING: Unrecognised rule \"" s "\" in clean-up specification.") + #f)))) + spec-strings)))) + +;;====================================================================== +;; given a list of specs rx . rule and a file return the first matching rule +;; +(define (common:file-find-rule fname rules) ;; rule is vector #( rx action rx-string) + (let loop ((rule (car rules)) + (tail (cdr rules))) + (let ((rx (vector-ref rule 0)) + (rn (vector-ref rule 1))) ;; rule name + (if (string-match rx fname) + rule ;; return the whole rule so regex can be printed etc. + (if (null? tail) + #f + (loop (car tail)(cdr tail))))))) + +;;====================================================================== +;; given a spec apply some rules to a directory +;; +;; WARNING: This function will REMOVE files - be sure your spec and path is correct! +;; +;; spec format: +;; file-regex1 action; file-regex2 action; ... +;; e.g. +;; .*\.log$ keep; .* remove +;; --> keep all .log files, remove everything else +;; limitations: +;; cannot have a rule with ; as part of the spec +;; not very flexible, would be nice to return binned file names? +;; supported rules: +;; keep - keep this file +;; remove - remove this file +;; compress - compress this file +;; +(define (common:dir-clean-up path spec-string #!key (compress "gzip")(actions '(keep remove compress))(remove-empty #f)) + (let* ((specs (common:spec-string->list-of-specs spec-string actions)) + (keepers (make-hash-table)) + (directories (make-hash-table))) + (find-files + path + action: (lambda (p res) + (let ((rule (common:file-find-rule p specs))) + (cond + ((directory? p)(hash-table-set! directories p #t)) + (else + (case (vector-ref rule 1) + ((keep)(hash-table-set! keepers p rule)) + ((remove) + (debug:print 0 *default-log-port* "Removing file " p) + (delete-file p)) + ((compress) + (debug:print 0 *default-log-port* "Compressing file " p) + (system (conc compress " " p))) + (else + (debug:print 0 *default-log-port* "No match for file " p)))))))) + (if remove-empty + (for-each + (lambda (d) + (if (null? (glob (conc d "/.*")(conc d "/*"))) + (begin + (debug:print 0 *default-log-port* "Removing empty directory " d) + (delete-directory d)))) + (sort (hash-table-keys directories) (lambda (a b)(> (string-length a)(string-length b)))))) + )) + +;;====================================================================== +;; E N V I R O N M E N T V A R S +;;====================================================================== + +(define (save-environment-as-files fname #!key (ignorevars (list "USER" "HOME" "DISPLAY" "LS_COLORS" "XKEYSYMDB" "EDITOR" "MAKEFLAGS" "MAKEF" "MAKEOVERRIDES" "HOSTNAME"))) + ;;(bb-check-path msg: "save-environment-as-files entry") + (let ((envvars (get-environment-variables)) + (whitesp (regexp "[^a-zA-Z0-9_\\-:,\\.\\/%$]")) + (mungeval (lambda (val) + (cond + ((eq? val #t) "") ;; convert #t to empty string + ((eq? val #f) #f) ;; convert #f to itself (still thinking about this one + (else val))))) + (with-output-to-file (conc fname ".csh") + (lambda () + (for-each (lambda (keyval) + (let* ((key (car keyval)) + (val (cdr keyval)) + (delim (if (and (string-search whitesp val) + (not (string-search "^\".*\"$" val)) + (not (string-search "^'.*'$" val))) + "\"" + ""))) + + (print (if (or (member key ignorevars) + (string-search whitesp key)) + "# setenv " + "setenv ") + key " " delim (mungeval val) delim))) + envvars))) + (with-output-to-file (conc fname ".sh") + (lambda () + (for-each (lambda (keyval) + (let* ((key (car keyval)) + (val (cdr keyval)) + (delim (if (and (string-search whitesp val) + (not (string-search "^\".*\"$" val)) + (not (string-search "^'.*'$" val))) + "\"" + ""))) + (print (if (or (member key ignorevars) + (string-search whitesp key) + (string-search ":" key)) ;; internal only values to be skipped. + "# export " + "export ") + key "=" delim (mungeval val) delim))) + envvars))))) + +(define (common:get-param-mapping #!key (flavor #f)) + "returns alist mapping string keys in testconfig/subrun to megatest command line switches; if flavor is switch-symbol, maps tcmt symbolic switches to megatest switches" + (let ((default '(("tag-expr" . "-tagexpr") + ("mode-patt" . "-modepatt") + ("run-name" . "-runname") + ("contour" . "-contour") + ("target" . "-target") + ("test-patt" . "-testpatt") + ("msg" . "-m") + ("log" . "-log") + ("start-dir" . "-start-dir") + ("new" . "-set-state-status")))) + (if (eq? flavor 'switch-symbol) + (map (lambda (x) + (cons (string->symbol (conc "-" (car x))) (cdr x))) + default) + default))) + +;;====================================================================== +;; set some env vars from an alist, return an alist with original values +;; (("VAR" "value") ...) +;; a value of #f means "unset this var" +;; +(define (alist->env-vars lst) + (if (list? lst) + (let ((res '())) + (for-each (lambda (p) + (let* ((var (car p)) + (val (cadr p)) + (prv (get-environment-variable var))) + (set! res (cons (list var prv) res)) + (if val + (safe-setenv var (->string val)) + (unsetenv var)))) + lst) + res) + '())) + +;;====================================================================== +;; clear vars matching pattern, run proc, set vars back +;; if proc is a string run that string as a command with +;; system. +;; +(define *common:orig-env* + (let ((envvars (get-environment-variables))) + (if (get-environment-variable "MT_ORIG_ENV") + (with-input-from-string + (z3:decode-buffer (base64:base64-decode (get-environment-variable "MT_ORIG_ENV"))) + read) + (filter-map (lambda (x) + (if (string-match "^MT_.*" (car x)) + #f + x)) + envvars)))) + +(define (common:with-orig-env proc) + (let ((current-env (get-environment-variables))) + (for-each (lambda (x) (unsetenv (car x))) current-env) + (for-each (lambda (x) (setenv (car x) (cdr x))) *common:orig-env*) + (let ((rv (cond + ((string? proc)(system proc)) + (proc (proc))))) + (for-each (lambda (x) (unsetenv (car x))) *common:orig-env*) + (for-each (lambda (x) (setenv (car x) (cdr x))) current-env) + rv))) + +(define (common:without-vars proc . var-patts) + (let ((vars (make-hash-table))) + (for-each + (lambda (vardat) ;; each env var + (for-each + (lambda (var-patt) + (if (string-match var-patt (car vardat)) + (let ((var (car vardat)) + (val (cdr vardat))) + (hash-table-set! vars var val) + (unsetenv var)))) + var-patts)) + (get-environment-variables)) + (cond + ((string? proc)(system proc)) + (proc (proc))) + (hash-table-for-each + vars + (lambda (var val) + (setenv var val))) + vars)) + +(define (common:run-a-command cmd #!key (with-vars #f) (with-orig-env #f)) + (let* ((pre-cmd (dtests:get-pre-command)) + (post-cmd (dtests:get-post-command)) + (fullcmd (if (or pre-cmd post-cmd) + (conc pre-cmd cmd post-cmd) + (conc "viewscreen " cmd)))) + (debug:print-info 02 *default-log-port* "Running command: " fullcmd) + (cond + (with-vars (common:without-vars fullcmd)) + (with-orig-env (common:with-orig-env fullcmd)) + (else (common:without-vars fullcmd "MT_.*"))))) + +;;====================================================================== +;; C O L O R S +;;====================================================================== + +(define (common:name->iup-color name) + (case (string->symbol (string-downcase name)) + ((red) "223 33 49") + ((grey) "192 192 192") + ((orange) "255 172 13") + ((purple) "This is unfinished ..."))) + +;;====================================================================== +;; (define (common:get-color-for-state-status state status) +;; (case (string->symbol state) +;; ((COMPLETED) +;; (case (string->symbol status) +;; ((PASS) "70 249 73") +;; ((WARN WAIVED) "255 172 13") +;; ((SKIP) "230 230 0") +;; (else "223 33 49"))) +;; ((LAUNCHED) "101 123 142") +;; ((CHECK) "255 100 50") +;; ((REMOTEHOSTSTART) "50 130 195") +;; ((RUNNING) "9 131 232") +;; ((KILLREQ) "39 82 206") +;; ((KILLED) "234 101 17") +;; ((NOT_STARTED) "240 240 240") +;; (else "192 192 192"))) + +(define (common:iup-color->rgb-hex instr) + (string-intersperse + (map (lambda (x) + (number->string x 16)) + (map string->number + (string-split instr))) + "/")) + +;;====================================================================== +;; L O C K I N G M E C H A N I S M S +;;====================================================================== + +;;====================================================================== +;; faux-lock is deprecated. Please use simple-lock below +;; +(define (common:faux-lock keyname #!key (wait-time 8)(allow-lock-steal #t)) + (if (rmt:no-sync-get/default keyname #f) ;; do not be tempted to compare to pid. locking is a one-shot action, if already locked for this pid it doesn't actually count + (if (> wait-time 0) + (begin + (thread-sleep! 1) + (if (eq? wait-time 1) ;; only one second left, steal the lock + (begin + (debug:print-info 0 *default-log-port* "stealing lock for " keyname) + (common:faux-unlock keyname force: #t))) + (common:faux-lock keyname wait-time: (- wait-time 1))) + #f) + (begin + (rmt:no-sync-set keyname (conc (current-process-id))) + (equal? (conc (current-process-id)) (conc (rmt:no-sync-get/default keyname #f)))))) + +(define (common:faux-unlock keyname #!key (force #f)) + (if (or force (equal? (conc (current-process-id)) (conc (rmt:no-sync-get/default keyname #f)))) + (begin + (if (rmt:no-sync-get/default keyname #f) (rmt:no-sync-del! keyname)) + #t) + #f)) + +;;====================================================================== +;; simple lock. improve and converge on this one. +;; +(define (common:simple-lock keyname) + (rmt:no-sync-get-lock keyname)) + +(define (common:simple-unlock keyname #!key (force #f)) + (rmt:no-sync-del! keyname)) + +;;====================================================================== +;; +;;====================================================================== + +(define (common:in-running-test?) + (and (args:get-arg "-execute") (get-environment-variable "MT_CMDINFO"))) + +(define (common:get-color-from-status status) + (cond + ((equal? status "PASS") "green") + ((equal? status "FAIL") "red") + ((equal? status "WARN") "orange") + ((equal? status "KILLED") "orange") + ((equal? status "KILLREQ") "purple") + ((equal? status "RUNNING") "blue") + ((equal? status "ABORT") "brown") + (else "black"))) + +;;====================================================================== +;; N A N O M S G C L I E N T +;;====================================================================== +;; +;; +;; +;; (define (common:send-dboard-main-changed) +;; (let* ((dashboard-ips (mddb:get-dashboards))) +;; (for-each +;; (lambda (ipadr) +;; (let* ((soc (common:open-nm-req (conc "tcp://" ipadr))) +;; (msg (conc "main " *toppath*)) +;; (res (common:nm-send-receive-timeout soc msg))) +;; (if (not res) ;; couldn't reach that dashboard - remove it from db +;; (print "ERROR: couldn't reach dashboard " ipadr)) +;; res)) +;; dashboard-ips))) +;; +;; +;; ;;====================================================================== +;; ;; D A S H B O A R D D B +;; ;;====================================================================== +;; +;; (define (mddb:open-db) +;; (let* ((db (open-database (conc (get-environment-variable "HOME") "/.dashboard.db")))) +;; (set-busy-handler! db (busy-timeout 10000)) +;; (for-each +;; (lambda (qry) +;; (exec (sql db qry))) +;; (list +;; "CREATE TABLE IF NOT EXISTS vars (id INTEGER PRIMARY KEY,key TEXT, val TEXT, CONSTRAINT varsconstraint UNIQUE (key));" +;; "CREATE TABLE IF NOT EXISTS dashboards ( +;; id INTEGER PRIMARY KEY, +;; pid INTEGER, +;; username TEXT, +;; hostname TEXT, +;; ipaddr TEXT, +;; portnum INTEGER, +;; start_time TIMESTAMP DEFAULT (strftime('%s','now')), +;; CONSTRAINT hostport UNIQUE (hostname,portnum) +;; );" +;; )) +;; db)) +;; +;; ;; register a dashboard +;; ;; +;; (define (mddb:register-dashboard port) +;; (let* ((pid (current-process-id)) +;; (hostname (get-host-name)) +;; (ipaddr (server:get-best-guess-address hostname)) +;; (username (current-user-name)) ;; (car userinfo))) +;; (db (mddb:open-db))) +;; (print "Register monitor, pid: " pid ", hostname: " hostname ", port: " port ", username: " username) +;; (exec (sql db "INSERT OR REPLACE INTO dashboards (pid,username,hostname,ipaddr,portnum) VALUES (?,?,?,?,?);") +;; pid username hostname ipaddr port) +;; (close-database db))) +;; +;; ;; unregister a monitor +;; ;; +;; (define (mddb:unregister-dashboard host port) +;; (let* ((db (mddb:open-db))) +;; (print "Register unregister monitor, host:port=" host ":" port) +;; (exec (sql db "DELETE FROM dashboards WHERE hostname=? AND portnum=?;") host port) +;; (close-database db))) +;; +;; ;; get registered dashboards +;; ;; +;; (define (mddb:get-dashboards) +;; (let ((db (mddb:open-db))) +;; (query fetch-column +;; (sql db "SELECT ipaddr || ':' || portnum FROM dashboards;")))) + +;;====================================================================== +;; T E S T L A U N C H I N G P E R I T E M W I T H H O S T T Y P E S +;;====================================================================== +;; +;; [hosts] +;; arm cubie01 cubie02 +;; x86_64 zeus xena myth01 +;; allhosts #{g hosts arm} #{g hosts x86_64} +;; +;; [host-types] +;; general #MTLOWESTLOAD #{g hosts allhosts} +;; arm #MTLOWESTLOAD #{g hosts arm} +;; nbgeneral nbjob run JOBCOMMAND -log $MT_LINKTREE/$MT_TARGET/$MT_RUNNAME.$MT_TESTNAME-$MT_ITEM_PATH.lgo +;; +;; [host-rules] +;; # maxnload => max normalized load +;; # maxnjobs => max jobs per cpu +;; # maxjobrate => max jobs per second +;; general maxnload=1.1; maxnjobs=1.2; maxjobrate=0.1 +;; +;; [launchers] +;; envsetup general +;; xor/%/n 4C16G +;; % nbgeneral +;; +;; [jobtools] +;; # if defined and not "no" flexi-launcher will bypass "launcher" unless no match. +;; flexi-launcher yes +;; launcher nbfake +;; +(define (common:get-launcher configdat testname itempath) + (let ((fallback-launcher (configf:lookup configdat "jobtools" "launcher"))) + (if (and (configf:lookup configdat "jobtools" "flexi-launcher") ;; overrides launcher + (not (equal? (configf:lookup configdat "jobtools" "flexi-launcher") "no"))) + (let* ((launchers (hash-table-ref/default configdat "launchers" '()))) + (if (null? launchers) + fallback-launcher + (let loop ((hed (car launchers)) + (tal (cdr launchers))) + (let ((patt (car hed)) + (host-type (cadr hed))) + (if (tests:match patt testname itempath) + (begin + (debug:print-info 2 *default-log-port* "Have flexi-launcher match for " testname "/" itempath " = " host-type) + (let ((launcher (configf:lookup configdat "host-types" host-type))) + (if launcher + (let* ((launcher-parts (string-split launcher)) + (launcher-exe (car launcher-parts))) + (if (equal? launcher-exe "#MTLOWESTLOAD") ;; this is our special case, we will find the lowest load and craft a nbfake commandline + (let host-loop ((targ-host (common:get-least-loaded-host (cdr launcher-parts) host-type configdat)) + (count 100)) + (if targ-host + (conc "remrun " targ-host) + (if (> count 0) + (begin + (debug:print 0 *default-log-port* "INFO: Waiting for a host for host-type " host-type) + (thread-sleep! (- 101 count)) + (host-loop (common:get-least-loaded-host (cdr launcher-parts) host-type configdat) + (- count 1))) + (begin + (debug:print 0 *default-log-port* "FATAL: Failed to find a host from #MTLOWESTLOAD for host-type " host-type) + (exit))))) + launcher)) + (begin + (debug:print-info 0 *default-log-port* "WARNING: no launcher found for host-type " host-type) + (if (null? tal) + fallback-launcher + (loop (car tal)(cdr tal))))))) + ;; no match, try again + (if (null? tal) + fallback-launcher + (loop (car tal)(cdr tal)))))))) + fallback-launcher))) + +;;====================================================================== +;; D A S H B O A R D U S E R V I E W S +;;====================================================================== + +;;====================================================================== +;; first read ~/views.config if it exists, then read $MTRAH/views.config if it exists +;; +(define (common:load-views-config) + (let* ((view-cfgdat (make-hash-table)) + (home-cfgfile (conc (get-environment-variable "HOME") "/.mtviews.config")) + (mthome-cfgfile (conc *toppath* "/.mtviews.config"))) + (if (common:file-exists? mthome-cfgfile) + (read-config mthome-cfgfile view-cfgdat #t)) + ;; we load the home dir file AFTER the MTRAH file so the user can clobber settings when running the dashboard in read-only areas + (if (common:file-exists? home-cfgfile) + (read-config home-cfgfile view-cfgdat #t)) + view-cfgdat)) + +;;====================================================================== +;; H I E R A R C H I C A L H A S H T A B L E S +;;====================================================================== +;; +;; Every element including top element is a vector: +;; + +(define (hh:make-hh #!key (ht #f)(value #f)) + (vector (or ht (make-hash-table)) value)) + +;;====================================================================== +;; used internally +(define-inline (hh:set-ht! hh ht) (vector-set! hh 0 ht)) +(define-inline (hh:get-ht hh) (vector-ref hh 0)) +(define-inline (hh:set-value! hh value) (vector-set! hh 1 value)) +(define-inline (hh:get-value hh value) (vector-ref hh 1)) + +;;====================================================================== +;; given a hierarchial hash and some keys look up the value ... +;; +(define (hh:get hh . keys) + (if (null? keys) + (vector-ref hh 1) ;; we have reached the end of the line, return the value sought + (let ((sub-ht (hh:get-ht hh))) + (if sub-ht ;; yes, there is more hierarchy + (let ((sub-hh (hash-table-ref/default sub-ht (car keys) #f))) + (if sub-hh + (apply hh:get sub-hh (cdr keys)) + #f)) + #f)))) + +;;====================================================================== +;; given a hierarchial hash, a value and some keys, add needed hierarcy and insert the value +;; +(define (hh:set! hh value . keys) + (if (null? keys) + (hh:set-value! hh value) ;; we have reached the end of the line, store the value + (let ((sub-ht (hh:get-ht hh))) + (if sub-ht ;; yes, there is more hierarchy + (let ((sub-hh (hash-table-ref/default sub-ht (car keys) #f))) + (if (not sub-hh) ;; we'll need to add the next level of hierarchy + (let ((new-sub-hh (hh:make-hh))) + (hash-table-set! sub-ht (car keys) new-sub-hh) + (apply hh:set! new-sub-hh value (cdr keys))) + (apply hh:set! sub-hh value (cdr keys)))) ;; call the sub-hierhash with remaining keys + (begin + (hh:set-ht! hh (make-hash-table)) + (apply hh:set! hh value keys)))))) + +;;====================================================================== +;; Manage pkts, used in servers, tests and likely other contexts so put +;; in common +;;====================================================================== + +(define common:pkts-spec + '((default . ((parent . P) + (action . a) + (filename . f))) + (configf . ((parent . P) + (action . a) + (filename . f))) + (server . ((action . a) + (pid . d) + (ipaddr . i) + (port . p) + (parent . P))) + + (test . ((cpuuse . c) + (diskuse . d) + (item-path . i) + (runname . r) + (state . s) + (target . t) + (status . u) + (parent . P))))) + +(define (common:get-pkts-dirs mtconf use-lt) + (let* ((pktsdirs-str (or (configf:lookup mtconf "setup" "pktsdirs") + (and use-lt + (conc (or *toppath* + (current-directory)) + "/lt/.pkts")))) + (pktsdirs (if pktsdirs-str + (string-split pktsdirs-str " ") + #f))) + pktsdirs)) + +;;====================================================================== +;; use-lt is use linktree "lt" link to find pkts dir +(define (common:save-pkt pktalist-in mtconf use-lt #!key (add-only #f)) ;; add-only saves the pkt only if there is a parent already + (if (or (not add-only) + (hash-table-exists? *pkts-info* 'last-parent)) + (let* ((parent (hash-table-ref/default *pkts-info* 'last-parent #f)) + (pktalist (if parent + (cons `(parent . ,parent) + pktalist-in) + pktalist-in))) + (let-values (((uuid pkt) + (alist->pkt pktalist common:pkts-spec))) + (hash-table-set! *pkts-info* 'last-parent uuid) + (let ((pktsdir (or (hash-table-ref/default *pkts-info* 'pkts-dir #f) + (let* ((pktsdirs (common:get-pkts-dirs mtconf use-lt)) + (pktsdir (car pktsdirs))) ;; assume it is there + (hash-table-set! *pkts-info* 'pkts-dir pktsdir) + pktsdir)))) + (debug:print 0 *default-log-port* "pktsdir: "pktsdir) + (handle-exceptions + exn + (debug:print-info 0 "failed to write out packet to " pktsdir ", exn=" exn) ;; don't care if this failed for now but MUST FIX - BUG!! + (if (not (file-exists? pktsdir)) + (create-directory pktsdir #t)) + (with-output-to-file + (conc pktsdir "/" uuid ".pkt") + (lambda () + (print pkt))))))))) + +(define (common:with-queue-db mtconf proc #!key (use-lt #f)(toppath-in #f)) + (let* ((pktsdirs (common:get-pkts-dirs mtconf use-lt)) + (pktsdir (if pktsdirs (car pktsdirs) #f)) + (toppath (or (configf:lookup mtconf "scratchdat" "toppath") + toppath-in)) + (pdbpath (or (configf:lookup mtconf "setup" "pdbpath") pktsdir))) + (cond + ((not (and pktsdir toppath pdbpath)) + (debug:print 0 *default-log-port* "ERROR: settings are missing in your megatest.config for area management.") + (debug:print 0 *default-log-port* " you need to have pktsdirs in the [setup] section.")) + ((not (common:file-exists? pktsdir)) + (debug:print 0 *default-log-port* "ERROR: pkts directory not found " pktsdir)) + ((not (equal? (file-owner pktsdir)(current-effective-user-id))) + (debug:print 0 *default-log-port* "ERROR: directory " pktsdir " is not owned by " (current-effective-user-name))) + (else + (let* ((pdb (open-queue-db pdbpath "pkts.db" + schema: '("CREATE TABLE groups (id INTEGER PRIMARY KEY,groupname TEXT, CONSTRAINT group_constraint UNIQUE (groupname));")))) + (proc pktsdirs pktsdir pdb) + (dbi:close pdb)))))) + +(define (common:load-pkts-to-db mtconf #!key (use-lt #f)) + (common:with-queue-db + mtconf + (lambda (pktsdirs pktsdir pdb) + (for-each + (lambda (pktsdir) ;; look at all + (cond + ((not (common:file-exists? pktsdir)) + (debug:print 0 *default-log-port* "ERROR: packets directory " pktsdir " does not exist.")) + ((not (directory? pktsdir)) + (debug:print 0 *default-log-port* "ERROR: packets directory path " pktsdir " is not a directory.")) + ((not (file-read-access? pktsdir)) + (debug:print 0 *default-log-port* "ERROR: packets directory path " pktsdir " is not readable.")) + (else + (debug:print-info 0 *default-log-port* "Loading packets found in " pktsdir) + (let ((pkts (glob (conc pktsdir "/*.pkt"))) + (sqdb (dbi:db-conn pdb)) + ) + ;; Put this in a transaction to avoid issues overloading the db + (sqlite3:with-transaction + sqdb + (lambda () + (for-each + (lambda (pkt) + (let* ((uuid (cadr (string-match ".*/([0-9a-f]+).pkt" pkt))) + (exists (lookup-by-uuid pdb uuid #f))) + (if (not exists) + (let* ((pktdat (string-intersperse + (with-input-from-file pkt read-lines) + "\n")) + (apkt (pkt->alist pktdat)) + (ptype (alist-ref 'T apkt))) + (add-to-queue pdb pktdat uuid (or ptype 'cmd) #f 0) + (debug:print 4 *default-log-port* "Added " uuid " of type " ptype " to queue")) + (debug:print 4 *default-log-port* "pkt: " uuid " exists, skipping...") + ))) + pkts))))))) + pktsdirs)) + use-lt: use-lt)) + +(define (common:get-pkt-alists pkts) + (map (lambda (x) + (alist-ref 'apkt x)) ;; 'pkta pulls out the alist from the read pkt + pkts)) + +;;====================================================================== +;; given list of pkts (alist mode) return list of D cards as Unix epoch, sorted descending +;; also delete duplicates by target i.e. (car pkt) +;; +(define (common:get-pkt-times pkts) + (delete-duplicates + (sort + (map (lambda (x) + `(,(alist-ref 't x) . ,(string->number (alist-ref 'D x)))) + pkts) + (lambda (a b)(> (cdr a)(cdr b)))) ;; sort descending + (lambda (a b)(equal? (car a)(car b))))) ;; remove duplicates by target + + +(define *common:thread-punchlist* (make-hash-table)) +(define (common:send-thunk-to-background-thread thunk #!key (name #f)) + ;;(BB> "launched thread " name) + ;; we need a unique name for the thread. + (let* ((realname (if name + (if (not (hash-table-ref/default *common:thread-punchlist* name #f)) + name + (conc name"-" (symbol->string (gensym)))) + (conc "anonymous-"(symbol->string (gensym))))) + (realthunk (lambda () + (let ((res (thunk))) + (hash-table-delete! *common:thread-punchlist* realname) + res))) + (thread (make-thread realthunk realname))) + (hash-table-set! *common:thread-punchlist* realname thread) + (thread-start! thread) + )) + +(define (common:join-backgrounded-threads) + ;; may need to trap and ignore exceptions -- dunno how atomic threads are... + (for-each + (lambda (thread-name) + (let* ((thread (hash-table-ref/default *common:thread-punchlist* thread-name #f))) + (if thread + (handle-exceptions + exn + (begin + (debug:print 0 *default-log-port* "joining threads failed. exn=" exn) + #t) ;; just ignore it, it might have died in the meantime so joining it will throw an exception + (thread-join! thread)) + ))) + (hash-table-keys *common:thread-punchlist*))) + )