Index: Makefile ================================================================== --- Makefile +++ Makefile @@ -28,11 +28,14 @@ ezsteps.scm lock-queue.scm sdb.scm rmt.scm api.scm \ subrun.scm archive.scm env.scm \ diff-report.scm # module source files -MSRCFILES = dbmod.scm servermod.scm apimod.scm commonmod.scm rmtmod.scm ods.scm configfmod.scm transport.scm portlogger.scm tasks.scm pgdb.scm margsmod.scm +MSRCFILES = dbmod.scm servermod.scm apimod.scm commonmod.scm rmtmod.scm \ + ods.scm configfmod.scm transport.scm portlogger.scm tasks.scm \ + pgdb.scm margsmod.scm debugprint.scm + MOFILES = $(addprefix mofiles/,$(MSRCFILES:%.scm=%.o)) MOIMPFILES = $(MSRCFILES:%.scm=%.import.o) # ftail.scm rmtmod.scm commonmod.scm removed # MSRCFILES = ducttape-lib.scm pkts.scm stml2.scm cookie.scm mutils.scm \ Index: apimod.scm ================================================================== --- apimod.scm +++ apimod.scm @@ -20,19 +20,21 @@ (declare (unit apimod)) (declare (uses commonmod)) (declare (uses dbmod)) (declare (uses tasks)) +;; (declare (uses servermod)) (module apimod * (import scheme chicken data-structures extras) (import (prefix sqlite3 sqlite3:) posix typed-records srfi-18 srfi-69) (import commonmod) (import dbmod) (import tasks) +;; (import servermod) ;; allow these queries through without starting a server ;; (define api:read-only-queries '(get-key-val-pairs @@ -185,11 +187,11 @@ ;;=============================================== ((get-keys-write) (db:get-keys dbstruct)) ;; force a dummy "write" query to force server; for debug in -repl ;; SERVERS - ((start-server) (apply server:kind-run params)) + ;; ((start-server) (apply server:kind-run params)) ((kill-server) (set! *server-run* #f)) ;; TESTS ;;((test-set-state-status-by-id) (apply mt:test-set-state-status-by-id dbstruct params)) Index: build.inc ================================================================== --- build.inc +++ build.inc @@ -1,139 +1,32 @@ # To regenerate this file do: # (cd utils/;ck52 csc gendeps.scm) && ./utils/gendeps allunits *scm # cp allunits.inc build.inc # -api.o : mofiles/apimod.o -api.o : mofiles/commonmod.o -api.o : mofiles/dbmod.o -api.o : mofiles/tasks.o -archive.o : mofiles/commonmod.o -archive.o : mofiles/configfmod.o -archive.o : mofiles/dbmod.o -client.o : mofiles/commonmod.o -client.o : mofiles/dbmod.o -client.o : mofiles/servermod.o -common.o : mofiles/commonmod.o -common.o : mofiles/configfmod.o -common.o : mofiles/dbmod.o -common.o : mofiles/servermod.o -configf.o : mofiles/commonmod.o -configf.o : mofiles/configfmod.o -dashboard-context-menu.o : mofiles/commonmod.o -dashboard-context-menu.o : mofiles/configfmod.o -dashboard-context-menu.o : mofiles/dbmod.o -dashboard-guimonitor.o : mofiles/commonmod.o -dashboard-guimonitor.o : mofiles/dbmod.o -dashboard-guimonitor.o : mofiles/tasks.o -dashboard-tests.o : mofiles/commonmod.o -dashboard-tests.o : mofiles/configfmod.o -dashboard-tests.o : mofiles/dbmod.o -dashboard.o : mofiles/apimod.o -dashboard.o : mofiles/commonmod.o -dashboard.o : mofiles/configfmod.o -dashboard.o : mofiles/dbmod.o -dashboard.o : mofiles/dcommonmod.o -dashboard.o : mofiles/servermod.o -db.o : mofiles/commonmod.o -db.o : mofiles/configfmod.o -db.o : mofiles/dbmod.o -db.o : mofiles/servermod.o -dcommon.o : mofiles/commonmod.o -dcommon.o : mofiles/configfmod.o -dcommon.o : mofiles/dbmod.o -dcommon.o : mofiles/dcommonmod.o -dcommon.o : mofiles/servermod.o -diff-report.o : mofiles/commonmod.o -env.o : mofiles/commonmod.o -ezsteps.o : mofiles/commonmod.o -ezsteps.o : mofiles/configfmod.o -ezsteps.o : mofiles/dbmod.o -genexample.o : mofiles/commonmod.o -http-transport.o : mofiles/commonmod.o -http-transport.o : mofiles/configfmod.o -http-transport.o : mofiles/dbmod.o -http-transport.o : mofiles/portlogger.o -http-transport.o : mofiles/servermod.o -http-transport.o : mofiles/transport.o -index-tree.o : mofiles/commonmod.o -items.o : mofiles/commonmod.o -items.o : mofiles/configfmod.o -launch.o : mofiles/commonmod.o -launch.o : mofiles/configfmod.o -launch.o : mofiles/dbmod.o -lock-queue.o : mofiles/commonmod.o -lock-queue.o : mofiles/tasks.o -megatest.o : mofiles/apimod.o -megatest.o : mofiles/commonmod.o -megatest.o : mofiles/configfmod.o -megatest.o : mofiles/dbmod.o -megatest.o : mofiles/ods.o -megatest.o : mofiles/rmtmod.o -megatest.o : mofiles/servermod.o -mofiles/apimod.o : mofiles/commonmod.o -mofiles/commonmod.o : mofiles/margsmod.o -mofiles/configfmod.o : mofiles/commonmod.o -mofiles/dbmod.o : mofiles/commonmod.o +mofiles/apimod.o : mofiles/tasks.o +mofiles/transport.o : mofiles/configfmod.o +mofiles/tasks.o : mofiles/configfmod.o +mofiles/servermod.o : mofiles/configfmod.o +mofiles/portlogger.o : mofiles/configfmod.o +mofiles/dcommonmod.o : mofiles/configfmod.o mofiles/dbmod.o : mofiles/configfmod.o mofiles/dbmod.o : mofiles/ods.o -mofiles/dcommonmod.o : mofiles/commonmod.o -mofiles/dcommonmod.o : mofiles/configfmod.o -mofiles/ods.o : mofiles/commonmod.o -mofiles/portlogger.o : mofiles/commonmod.o -mofiles/portlogger.o : mofiles/configfmod.o -mofiles/portlogger.o : mofiles/dbmod.o -mofiles/rmtmod.o : mofiles/apimod.o -mofiles/rmtmod.o : mofiles/apimod.o -mofiles/rmtmod.o : mofiles/commonmod.o -mofiles/rmtmod.o : mofiles/dbmod.o -mofiles/servermod.o : mofiles/commonmod.o -mofiles/servermod.o : mofiles/configfmod.o -mofiles/servermod.o : mofiles/dbmod.o -mofiles/tasks.o : mofiles/commonmod.o -mofiles/tasks.o : mofiles/configfmod.o +mofiles/transport.o : mofiles/commonmod.o +mofiles/tasks.o : mofiles/commonmod.o +mofiles/servermod.o : mofiles/commonmod.o +mofiles/rmtmod.o : mofiles/commonmod.o +mofiles/portlogger.o : mofiles/commonmod.o +mofiles/ods.o : mofiles/commonmod.o +mofiles/dcommonmod.o : mofiles/commonmod.o +mofiles/dbmod.o : mofiles/commonmod.o +mofiles/configfmod.o : mofiles/commonmod.o +mofiles/apimod.o : mofiles/commonmod.o +mofiles/rmtmod.o : mofiles/apimod.o +mofiles/tasks.o : mofiles/margsmod.o +mofiles/commonmod.o : mofiles/margsmod.o mofiles/tasks.o : mofiles/dbmod.o -mofiles/tasks.o : mofiles/pgdb.o -mofiles/tasks.o : mofiles/rmtmod.o -mofiles/transport.o : mofiles/commonmod.o -mofiles/transport.o : mofiles/configfmod.o +mofiles/servermod.o : mofiles/dbmod.o +mofiles/rmtmod.o : mofiles/dbmod.o +mofiles/portlogger.o : mofiles/dbmod.o +mofiles/apimod.o : mofiles/dbmod.o mofiles/transport.o : mofiles/portlogger.o -mt.o : mofiles/commonmod.o -mt.o : mofiles/configfmod.o -mt.o : mofiles/dbmod.o -mtexec.o : mofiles/configfmod.o -mtut.o : mofiles/commonmod.o -mtut.o : mofiles/configfmod.o -newdashboard.o : mofiles/commonmod.o -newdashboard.o : mofiles/configfmod.o -newdashboard.o : mofiles/dbmod.o -mofiles/pgdb.o : mofiles/margsmod.o -process.o : mofiles/commonmod.o -rmt.o : mofiles/apimod.o -rmt.o : mofiles/commonmod.o -rmt.o : mofiles/configfmod.o -rmt.o : mofiles/dbmod.o -rmt.o : mofiles/rmtmod.o -rmt.o : mofiles/servermod.o -runconfig.o : mofiles/commonmod.o -runs.o : mofiles/commonmod.o -runs.o : mofiles/configfmod.o -runs.o : mofiles/dbmod.o -runs.o : mofiles/servermod.o -server.o : mofiles/commonmod.o -server.o : mofiles/configfmod.o -server.o : mofiles/dbmod.o -server.o : mofiles/servermod.o -subrun.o : mofiles/commonmod.o -subrun.o : mofiles/configfmod.o -subrun.o : mofiles/dbmod.o -synchash.o : mofiles/dbmod.o -tcmt.o : mofiles/commonmod.o -tdb.o : mofiles/commonmod.o -tdb.o : mofiles/dbmod.o -tdb.o : mofiles/ods.o -tests.o : mofiles/commonmod.o -tests.o : mofiles/configfmod.o -tests.o : mofiles/dbmod.o -tests.o : mofiles/servermod.o -tree.o : mofiles/commonmod.o -tree.o : mofiles/dbmod.o Index: common.scm ================================================================== --- common.scm +++ common.scm @@ -61,183 +61,10 @@ (import servermod) (declare (uses margsmod)) (import margsmod) -(include "common_records.scm") - -;;====================================================================== -;; (require-library margs) -;; (include "margs.scm") - -;; (define old-exit exit) -;; -;; (define (exit . code) -;; (if (null? code) -;; (old-exit) -;; (old-exit code))) - -(define (common:debug-setup) - (debug:setup (cond ;; debug arg - ((args:get-arg "-debug-noprop") 'noprop) - ((args:get-arg "-debug") #t) - (else #f)) - (cond ;; verbosity arg - ((args:get-arg "-q") 'v) - ((args:get-arg "-q") 'q) - (else #f)))) -(define *numcpus-cache* (make-hash-table)) - -;;====================================================================== - -(define (common:get-sync-lock-filepath) - (let* ((tmp-area (common:get-db-tmp-area)) - (lockfile (conc tmp-area "/megatest.db.sync-lock"))) - lockfile)) - -;;====================================================================== - (debug:print 0 *default-log-port* "Failed to find this executable! Using what can be found on the path") - -;;====================================================================== -;; L O C K I N G M E C H A N I S M S -;;====================================================================== - -;;====================================================================== -;; faux-lock is deprecated. Please use simple-lock below -;;====================================================================== -;;====================================================================== -;; -(define (common:faux-lock keyname #!key (wait-time 8)(allow-lock-steal #t)) - (if (rmt:no-sync-get/default keyname #f) ;; do not be tempted to compare to pid. locking is a one-shot action, if already locked for this pid it doesn't actually count - (if (> wait-time 0) - (begin - (thread-sleep! 1) - (if (eq? wait-time 1) ;; only one second left, steal the lock - (begin - (debug:print-info 0 *default-log-port* "stealing lock for " keyname) - (common:faux-unlock keyname force: #t))) - (common:faux-lock keyname wait-time: (- wait-time 1))) - #f) - (begin - (rmt:no-sync-set keyname (conc (current-process-id))) - (equal? (conc (current-process-id)) (conc (rmt:no-sync-get/default keyname #f)))))) - -(define (common:faux-unlock keyname #!key (force #f)) - (if (or force (equal? (conc (current-process-id)) (conc (rmt:no-sync-get/default keyname #f)))) - (begin - (if (rmt:no-sync-get/default keyname #f) (rmt:no-sync-del! keyname)) - #t) - #f)) -;;====================================================================== - -;;====================================================================== -;; simple lock. improve and converge on this one. -;; -(define (common:simple-lock keyname) - (rmt:no-sync-get-lock keyname)) - -(define (common:simple-unlock keyname #!key (force #f)) - (rmt:no-sync-del! keyname)) - - -(define (common:run-a-command cmd #!key (with-vars #f) (with-orig-env #f)) - (let* ((pre-cmd (dtests:get-pre-command)) - (post-cmd (dtests:get-post-command)) - (fullcmd (if (or pre-cmd post-cmd) - (conc pre-cmd cmd post-cmd) - (conc "viewscreen " cmd)))) - (debug:print-info 02 *default-log-port* "Running command: " fullcmd) - (cond - (with-vars (common:without-vars fullcmd)) - (with-orig-env (common:with-orig-env fullcmd)) - (else (common:without-vars fullcmd "MT_.*"))))) - -;;====================================================================== -;; ideally put all this info into the db, no need to preserve it across moving homehost -;; -;; return list of -;; ( reachable? cpuload update-time ) -(define (common:get-host-info hostname) - (let* ((loadinfo (rmt:get-latest-host-load hostname)) ;; if this host happens to have been recently used by a test reuse the load data - (load (car loadinfo)) - (load-sample-time (cdr loadinfo)) - (load-sample-age (- (current-seconds) load-sample-time)) - (loadinfo-timeout-seconds 6) ;; this was 20 seconds, seems way too lax. Switch to 6 seconds - (host-last-update-timeout-seconds 4) - (host-rec (hash-table-ref/default *host-loads* hostname #f)) - ) - (cond - ((< load-sample-age loadinfo-timeout-seconds) - (list #t - load-sample-time - load)) - ((and host-rec - (< (current-seconds) (+ (host-last-update host-rec) host-last-update-timeout-seconds))) - (list #t - (host-last-update host-rec) - (host-last-cpuload host-rec ))) - ((common:unix-ping hostname) - (list #t - (current-seconds) - (alist-ref 'adj-core-load (common:get-normalized-cpu-load hostname)))) ;; this is cheaper than you might think. get-normalized-cpu-load is cached for up to 5 seconds - (else - (list #f 0 -1) ;; bad host, don't use! - )))) - -;;====================================================================== -;; see defstruct host at top of file. -;; host: reachable last-update last-used last-cpuload -;; -(define (common:update-host-loads-table hosts-raw) - (let* ((hosts (filter (lambda (x) - (string-match (regexp "^\\S+$") x)) - hosts-raw))) - (for-each - (lambda (hostname) - (let* ((rec (let ((h (hash-table-ref/default *host-loads* hostname #f))) - (if h - h - (let ((h (make-host))) - (hash-table-set! *host-loads* hostname h) - h)))) - (host-info (common:get-host-info hostname)) - (is-reachable (car host-info)) - (last-reached-time (cadr host-info)) - (load (caddr host-info))) - (host-reachable-set! rec is-reachable) - (host-last-update-set! rec last-reached-time) - (host-last-cpuload-set! rec load))) - hosts))) - -;;====================================================================== -;; hash-table tree to html list tree -;; -;; tipfunc takes two parameters: y the tip value and path the path to that point -;; -(define (common:htree->html ht path tipfunc) - (let ((datlist (sort (hash-table->alist ht) -;;====================================================================== -;;====================================================================== - (lambda (a b) - (string< (car a)(car b)))))) - (if (null? datlist) - (tipfunc #f path) ;; really shouldn't get here - (s:ul - (map (lambda (x) - (let* ((levelname (car x)) - (y (cdr x)) - (newpath (append path (list levelname))) - (leaf (or (not (hash-table? y)) - (null? (hash-table-keys y))))) - (if leaf - (s:li (tipfunc y newpath)) - (s:li - (list - levelname - (common:htree->html y newpath tipfunc)))))) - datlist))))) - ;;====================================================================== ;; logic for getting homehost. Returns (host . at-home) ;; IF *toppath* is not set, wait up to five seconds trying every two seconds ;; (this is to accomodate the watchdog) ;; @@ -250,11 +77,10 @@ *home-host*) ((not *toppath*) (mutex-unlock! *homehost-mutex*) (launch:setup) ;; safely mutexed now (if (> trynum 0) -;;====================================================================== (begin (thread-sleep! 2) (common:get-homehost trynum: (- trynum 1))) #f)) (else @@ -294,10 +120,16 @@ (at-home (or (equal? homehost currhost) (equal? homehost bestadrs)))) (set! *home-host* (cons homehost at-home)) (mutex-unlock! *homehost-mutex*) *home-host*)))) +(define (common:wait-for-homehost-load maxnormload msg) + (let* ((hh-dat (if (common:on-homehost?) ;; if we are on the homehost then pass in #f so the calls are local. + #f + (common:get-homehost))) + (hh (if hh-dat (car hh-dat) #f))) + (common:wait-for-normalized-load maxnormload msg hh))) ;;====================================================================== ;; am I on the homehost? ;;====================================================================== ;;====================================================================== @@ -307,1031 +139,9 @@ (let ((hh (common:get-homehost))) (if hh (cdr hh) #f))) -;;====================================================================== -;;====================================================================== -;;====================================================================== -;;====================================================================== -;; D A S H B O A R D U S E R V I E W S -;;====================================================================== - -;;====================================================================== -;; first read ~/views.config if it exists, then read $MTRAH/views.config if it exists -;; -(define (common:load-views-config) - (let* ((view-cfgdat (make-hash-table)) - (home-cfgfile (conc (get-environment-variable "HOME") "/.mtviews.config")) - (mthome-cfgfile (conc *toppath* "/.mtviews.config"))) - (if (common:file-exists? mthome-cfgfile) - (read-config mthome-cfgfile view-cfgdat #t)) - ;; we load the home dir file AFTER the MTRAH file so the user can clobber settings when running the dashboard in read-only areas - (if (common:file-exists? home-cfgfile) - (read-config home-cfgfile view-cfgdat #t)) - view-cfgdat)) - -;;====================================================================== -;; T A R G E T S , S T A T E , S T A T U S , -;; R U N N A M E A N D T E S T P A T T -;;====================================================================== - -;;====================================================================== -;; (map print (map car (hash-table->alist (read-config "runconfigs.config" #f #t)))) -;; -(define (common:get-runconfig-targets #!key (configf #f)) - (let ((targs (sort (map car (hash-table->alist - (or configf ;; NOTE: There is no value in using runconfig:read here. - (read-config (conc *toppath* "/runconfigs.config") - #f #t) - (make-hash-table)))) - stringsymbol force-setting) #f)) - (force-result (case force-type - ((#f) #f) - ((always) #t) - ((test) (if (args:get-arg "-execute") ;; we are in a test - #t - #f)) - (else - (debug:print 0 *default-log-port* "ERROR: Bad server force setting " force-setting ", forcing server.") - #t)))) ;; default to requiring server - (if force-result - (begin - (debug:print-info 0 *default-log-port* "forcing use of server, force setting is \"" force-setting "\".") - #t) - #f))) - -(define (common:in-running-test?) - (and (args:get-arg "-execute") (get-environment-variable "MT_CMDINFO"))) -;;====================================================================== -;;====================================================================== -;;====================================================================== -;;====================================================================== -;;====================================================================== -;;====================================================================== -;;====================================================================== -;;====================================================================== -;;====================================================================== - -(define (debug:setup debug-arg verbose-arg) ;; debug-arg= #f, #t or 'noprop - (let ((debugstr (or debug-arg ;; (args:get-arg "-debug") - ;; (args:get-arg "-debug-noprop") - (get-environment-variable "MT_DEBUG_MODE")))) - (debug:calc-verbosity debugstr verbose-arg) - ;; (debug:check-verbosity *verbosity* debugstr) - ;; if we were handed a bad verbosity rule then we will override it with 1 and continue - (if (not (verbosity))(set! (verbosity) 1)) - (if (and (not (eq? debug-arg 'noprop)) - (or debug-arg - (not (get-environment-variable "MT_DEBUG_MODE")))) - (setenv "MT_DEBUG_MODE" (if (list? (verbosity)) - (string-intersperse (map conc (verbosity)) ",") - (conc (verbosity))))))) - -;;====================================================================== -;;====================================================================== -;;====================================================================== -;;====================================================================== -;;====================================================================== -;;====================================================================== -;;====================================================================== - -;;====================================================================== -;;====================================================================== -;;====================================================================== -;;====================================================================== -;;====================================================================== -;;====================================================================== -;;====================================================================== -;; go through the hosts from least recently used to most recently used, pick the first that meets the load criteral from the -;; [host-rules] section. -;; -(define (common:get-least-loaded-host hosts-raw host-type configdat) - (let* ((rdat (configf:lookup configdat "host-rules" host-type)) - (rules (common:val->alist (or rdat "") convert: #t)) ;; maxnload, maxnjobs, maxjobrate - (maxnload (common:alist-ref/default 'maxnload rules 1.5)) ;; max normalized load - (maxnjobs (common:alist-ref/default 'maxnjobs rules 1.5)) ;; max normalized number of jobs - (maxjobrate (common:alist-ref/default 'maxjobrate rules (/ 1 6))) ;; max rate of submitting jobs to a given host in jobs/second - (hosts (filter (lambda (x) - (string-match (regexp "^\\S+$") x)) - hosts-raw)) - ;; (best-host #f) - (get-rec (lambda (hostname) - ;; (print "get-rec hostname=" hostname) - (let ((h (hash-table-ref/default *host-loads* hostname #f))) - (if h - h - (let ((h (make-host))) - (hash-table-set! *host-loads* hostname h) - h))))) - (best-load 99999) - (curr-time (current-seconds)) - (get-hosts-sorted (lambda (hosts) - (sort hosts (lambda (a b) - (let ((a-rec (get-rec a)) - (b-rec (get-rec b))) - ;; (print "a=" a " a-rec=" a-rec " host-last-used=" (host-last-used a-rec)) - ;; (print "b=" b " b-rec=" b-rec " host-last-used=" (host-last-used b-rec)) - (< (host-last-used a-rec) - (host-last-used b-rec)))))))) - (debug:print 0 *default-log-port* "INFO: hosts-sorted=" (get-hosts-sorted hosts)) - (if (null? hosts) - #f ;; no hosts to select from. All done and giving up now. - (let ((hosts-sorted (get-hosts-sorted hosts))) - (common:update-host-loads-table hosts) - (let loop ((hostname (car hosts-sorted)) - (tal (cdr hosts-sorted)) - (best-host #f)) - (let* ((rec (get-rec hostname)) - (reachable (host-reachable rec)) - (load (host-last-cpuload rec)) - (last-used (host-last-used rec)) - (delta (- curr-time last-used)) - (job-rate (if (> delta 0) - (/ 1 delta) - 999)) ;; jobs per second - (new-best - (cond - ((not reachable) - (debug:print 0 *default-log-port* "Skipping host " hostname " as it cannot be reached.") - best-host) - ((and (< load maxnload) ;; load is acceptable - (< job-rate maxjobrate)) ;; job rate is acceptable - (set! best-load load) - hostname) - (else best-host)))) - (debug:print 0 *default-log-port* "INFO: Trying host " hostname " with load " load ", last used " delta " seconds ago, with job-rate " job-rate " for running a test." ) - (if new-best - (begin ;; found a host, return it - (debug:print 0 *default-log-port* "INFO: Found host: " new-best " load: " load " last-used: " delta " seconds ago, with job-rate: " job-rate) - (host-last-used-set! rec curr-time) - new-best) - (if (null? tal) #f (loop (car tal)(cdr tal) best-host))))))))) - -;;====================================================================== -;; '(print (string-intersperse (map cadr (hash-table-ref/default (read-config "megatest.config" \#f \#t) "disks" '"'"'("none" ""))) "\n"))' -(define (common:get-disks #!key (configf #f)) - (hash-table-ref/default - (or configf (read-config "megatest.config" #f #t)) - "disks" '("none" ""))) - -;;====================================================================== -;; currently the primary job of the watchdog is to run the sync back to megatest.db from the db in /tmp -;; if we are on the homehost and we are a server (by definition we are on the homehost if we are a server) -;; -(define (common:readonly-watchdog dbstruct) - (thread-sleep! 0.05) ;; delay for startup - (debug:print-info 13 *default-log-port* "common:readonly-watchdog entered.") - ;; sync megatest.db to /tmp/.../megatst.db - (let* ((sync-cool-off-duration 3) - (golden-mtdb (dbr:dbstruct-mtdb dbstruct)) - (golden-mtpath (db:dbdat-get-path golden-mtdb)) - (tmp-mtdb (dbr:dbstruct-tmpdb dbstruct)) - (tmp-mtpath (db:dbdat-get-path tmp-mtdb))) - (debug:print-info 0 *default-log-port* "Read-only periodic sync thread started.") - (let loop ((last-sync-time 0)) - (debug:print-info 13 *default-log-port* "loop top tmp-mtpath="tmp-mtpath" golden-mtpath="golden-mtpath) - (let* ((duration-since-last-sync (- (current-seconds) last-sync-time))) - (debug:print-info 13 *default-log-port* "duration-since-last-sync="duration-since-last-sync) - (if (and (not *time-to-exit*) - (< duration-since-last-sync sync-cool-off-duration)) - (thread-sleep! (- sync-cool-off-duration duration-since-last-sync))) - (if (not *time-to-exit*) - (let ((golden-mtdb-mtime (file-modification-time golden-mtpath)) - (tmp-mtdb-mtime (file-modification-time tmp-mtpath))) - (if (> golden-mtdb-mtime tmp-mtdb-mtime) - (if (< golden-mtdb-mtime (- (current-seconds) 3)) ;; file has NOT been touched in past three seconds, this way multiple servers won't fight to sync back - (let ((res (db:multi-db-sync dbstruct 'old2new))) - (debug:print-info 13 *default-log-port* "rosync called, " res " records transferred.")))) - (loop (current-seconds))) - #t))) - (debug:print-info 0 *default-log-port* "Exiting readonly-watchdog timer, *time-to-exit* = " *time-to-exit*" pid="(current-process-id)" mtpath="golden-mtpath))) - -;;====================================================================== -;; Force a megatest cleanup-db if version is changed and skip-version-check not specified -;; Do NOT check if not on homehost! -;; -(define (common:exit-on-version-changed) - (if (common:on-homehost?) - (if (common:api-changed?) - (let* ((mtconf (conc (get-environment-variable "MT_RUN_AREA_HOME") "/megatest.config")) - (dbfile (conc (get-environment-variable "MT_RUN_AREA_HOME") "/megatest.db")) - (read-only (not (file-write-access? dbfile))) - (dbstruct (db:setup #t))) - (debug:print 0 *default-log-port* - "WARNING: Version mismatch!\n" - " expected: " (common:version-signature) "\n" - " got: " (common:get-last-run-version)) - (cond - ((get-environment-variable "MT_SKIP_DB_MIGRATE") #t) - ((and (common:file-exists? mtconf) (common:file-exists? dbfile) (not read-only) - (eq? (current-user-id)(file-owner mtconf))) ;; safe to run -cleanup-db - (debug:print 0 *default-log-port* " I see you are the owner of megatest.config, attempting to cleanup and reset to new version") - (handle-exceptions - exn - (begin - (debug:print 0 *default-log-port* "Failed to switch versions. exn=" exn) - (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) - (print-call-chain (current-error-port)) - (exit 1)) - (common:cleanup-db dbstruct))) - ((not (common:file-exists? mtconf)) - (debug:print 0 *default-log-port* " megatest.config does not exist in this area. Cannot proceed with megatest version migration.") - (exit 1)) - ((not (common:file-exists? dbfile)) - (debug:print 0 *default-log-port* " megatest.db does not exist in this area. Cannot proceed with megatest version migration.") - (exit 1)) - ((not (eq? (current-user-id)(file-owner mtconf))) - (debug:print 0 *default-log-port* " You do not own megatest.db in this area. Cannot proceed with megatest version migration.") - (exit 1)) - (read-only - (debug:print 0 *default-log-port* " You have read-only access to this area. Cannot proceed with megatest version migration.") - (exit 1)) - (else - (debug:print 0 *default-log-port* " to switch versions you can run: \"megatest -cleanup-db\"") - (exit 1))))))) -;;====================================================================== -;; (begin -;; (debug:print 0 *default-log-port* "ERROR: cannot migrate version unless on homehost. Exiting.") -;; (exit 1)))) - -;;====================================================================== -;; Rotate logs, logic: -;; if > 500k and older than 1 week: -;; remove previous compressed log and compress this log -;; WARNING: This proc operates assuming that it is in the directory above the -;; logs directory you wish to log-rotate. -;; -(define (common:rotate-logs) - (let* ((all-files (make-hash-table)) - (stats (make-hash-table)) - (inc-stat (lambda (key) - (hash-table-set! stats key (+ (hash-table-ref/default stats key 0) 1)))) - (max-allowed (string->number (or (configf:lookup *configdat* "setup" "max-logfiles") "300")))) ;; name -> age - (if (not (directory-exists? "logs"))(create-directory "logs")) - (directory-fold - (lambda (file rem) - (handle-exceptions - exn - (begin - (debug:print-info 2 *default-log-port* "unable to rotate log " file ", probably handled by another process, this is safe to ignore. exn=" exn) - (debug:print 2 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) - ;; (print-call-chain (current-error-port)) ;; - ) - (let* ((fullname (conc "logs/" file)) - (mod-time (file-modification-time fullname)) - (file-age (- (current-seconds) mod-time)) - (file-old (> file-age (* 48 60 60))) - (file-big (> (file-size fullname) 200000))) - (hash-table-set! all-files file mod-time) - (if (or (and (string-match "^.*.log" file) - file-old - file-big) - (and (string-match "^server-.*.log" file) - file-old)) - (let ((gzfile (conc fullname ".gz"))) - (if (common:file-exists? gzfile) - (begin - (debug:print-info 0 *default-log-port* "removing " gzfile) - (delete-file* gzfile) - (hash-table-delete! all-files gzfile) ;; needed? - )) - (debug:print-info 0 *default-log-port* "compressing " file) - (system (conc "gzip " fullname)) - (inc-stat "gzipped") - (hash-table-set! all-files (conc file ".gz") file-age) ;; add the .gz file and remove the base file - (hash-table-delete! all-files file) - ) - (if (and (> file-age (* (string->number (or (configf:lookup *configdat* "setup" "log-expire-days") "30")) 24 3600)) - (file-exists? fullname)) ;; just in case it was gzipped - will get it next time - (handle-exceptions - exn - #f - (if (directory? fullname) - (begin - (debug:print-info 0 *default-log-port* fullname " in logs directory is a directory! Cannot rotate it, it is best to not put subdirectories in the logs dir.") - (inc-stat "directories")) - (begin - (delete-file* fullname) - (inc-stat "deleted"))) - (hash-table-delete! all-files file))))))) - '() - "logs") - (for-each - (lambda (category) - (let ((quant (hash-table-ref/default stats category 0))) - (if (> quant 0) - (debug:print-info 0 *default-log-port* category " log files: " quant)))) - `("deleted" "gzipped" "directories")) - (let ((num-logs (hash-table-size all-files))) - (if (> num-logs max-allowed) ;; because NFS => don't let number of logs exceed 300 - (let ((files (take (sort (hash-table-keys all-files) - (lambda (a b) - (< (hash-table-ref all-files a)(hash-table-ref all-files b)))) - (- num-logs max-allowed)))) - (for-each - (lambda (file) - (let* ((fullname (conc "logs/" file))) - (if (directory? fullname) - (debug:print-info 0 *default-log-port* fullname " in logs directory is a directory! Cannot rotate it, it is best to not put subdirectories in the logs dir.") - (handle-exceptions - exn - (debug:print-error 0 *default-log-port* "failed to remove " fullname ", exn=" exn) - (delete-file* fullname))))) - files) - (debug:print-info 0 *default-log-port* "Deleted " (length files) " files from logs, keeping " max-allowed " files.")))))) - -;;====================================================================== -;; calculate a delay number based on a droop curve -;; inputs are: -;; - load-in, load as from uptime, NOT normalized -;; - numcpus, number of cpus, ideally use the real cpus, not threads -;; -(define (common:get-delay load-in numcpus) - (let* ((ratio (/ load-in numcpus)) - (new-option (configf:lookup *configdat* "load" "new-load-method")) - (paramstr (or (configf:lookup *configdat* "load" "exp-params") - "15 12 1281453987.9543 0.75")) ;; 5 4 10 1")) - (paramlst (map string->number (string-split paramstr)))) - (if new-option - (begin - (cond ((and (>= ratio 0) (< ratio .5)) - 0) - ((and (>= ratio 0.5) (<= ratio .9)) - (* ratio (/ 5 .9))) - ((and (> ratio .9) (<= ratio 1.1)) - (+ 5 (* (- ratio .9) (/ 55 .2)))) - ((> ratio 1.1) - 60))) - (match paramlst - ((r1 r2 s1 s2) - (debug:print 3 *default-log-port* "Using params r1=" r1 " r2=" r2 " s1=" s1 " s2=" s2) - (min (max (/ (expt r1 (* r2 s2 ratio)) s1) 0) 30)) - (else - (debug:print 0 *default-log-port* "BAD exp-params, should be \"r1 r2 s1 s2\" but got " paramstr) - 30))))) - -(define (common:print-delay-table) - (let loop ((x 0)) - (print x "," (common:get-delay x 1)) - (if (< x 2) - (loop (+ x 0.1))))) - -;;====================================================================== -;; DO NOT CALL THIS DIRECTLY. It is called from common:wait-for-normalized-load -;; count - count down to zero, at some point we'd give up if the load never drops -;; num-tries - count down to zero number tries to get numcpus -;; -(define (common:wait-for-cpuload maxnormload numcpus-in - #!key (count 1000) - (msg #f)(remote-host #f)(num-tries 5)) - (let* ((loadavg (common:get-cpu-load remote-host)) - ;; not possible to have zero. If we get 1, it's possible that we got the previous default, and we should check again - (numcpus (if (<= 1 numcpus-in) - (common:get-num-cpus remote-host) - numcpus-in)) - (first (car loadavg)) - (next (cadr loadavg)) - (adjmaxload (* maxnormload (max 1 numcpus))) ;; possible bug - ;; where numcpus - ;; (or could be - ;; maxload) is - ;; zero, crude - ;; fallback is to - ;; at least use 1 - ;; effective load accounts for load jumps, this should elminate all the first-next-avg, adjwait, load-jump-limit - ;; etc. - (effective-load (common:get-intercept first next)) - (recommended-delay (common:get-delay effective-load numcpus)) - (effective-host (or remote-host "localhost")) - (normalized-effective-load (/ effective-load numcpus)) - (will-wait (> normalized-effective-load maxnormload))) - (if (> recommended-delay 1) - (let* ((actual-delay (min recommended-delay 30))) - (if (common:low-noise-print 30 (conc (round actual-delay) "-safe-load")) - (debug:print-info 0 *default-log-port* "Load control, delaying " - actual-delay " seconds to maintain safe load. current normalized effective load is " - normalized-effective-load". maxnormload = " maxnormload " numcpus = " numcpus " loadavg = " loadavg " effective-load = " effective-load)) - (thread-sleep! actual-delay))) - - (cond - ;; bad data, try again to get the data - ((not will-wait) - (if (common:low-noise-print 3600 (conc (round normalized-effective-load) "-load-acceptable-" effective-host)) - (debug:print 0 *default-log-port* "Effective load on " effective-host " is acceptable at " effective-load " continuing."))) - - ((and (< first 0) ;; this indicates the loadavg data is bad - machine may not be reachable - (> num-tries 0)) - (debug:print 0 *default-log-port* "WARNING: received bad data from get-cpu-load " - first ", we'll sleep 10s and try " num-tries " more times.") - (thread-sleep! 10) - (common:wait-for-cpuload maxnormload numcpus-in - count: count remote-host: remote-host num-tries: (- num-tries 1))) - - ;; need to wait for load to drop - ((and will-wait ;; (> first adjmaxload) - (> count 0)) - (debug:print-info 0 *default-log-port* - "Delaying 15" ;; adjwait - " seconds due to normalized effective load " normalized-effective-load ;; first - " exceeding max of " adjmaxload - " on server " (or remote-host (get-host-name)) - " (normalized load-limit: " maxnormload ") " (if msg msg "")) - (thread-sleep! 15) ;; adjwait) - (common:wait-for-cpuload maxnormload numcpus count: (- count 1) msg: msg remote-host: remote-host) - ;; put the message here to indicate came out of waiting - (debug:print-info 1 *default-log-port* - "On host: " effective-host - ", effective load: " effective-load - ", numcpus: " numcpus - ", normalized effective load: " normalized-effective-load - )) - ;; overloaded and count expired (i.e. went to zero) - (else - (if (> num-tries 0) ;; should be "num-tries-left". - (if (common:low-noise-print 30 (conc (round effective-load) "-load-acceptable-" effective-host)) - (debug:print 0 *default-log-port* "Load on " effective-host " is acceptable at effective normalized load of " - normalized-effective-load " continuing.")) - (debug:print 0 *default-log-port* "Load on " effective-host ", " - first" could not be retrieved. Giving up and continuing.")))))) - -;;====================================================================== -;; wait for normalized cpu load to drop below maxload -;; -(define (common:wait-for-normalized-load maxnormload msg remote-host #!optional (rem-tries 5)) - (let ((num-cpus (common:get-num-cpus remote-host))) - (if num-cpus - (common:wait-for-cpuload maxnormload num-cpus 15 msg: msg remote-host: remote-host) - (begin - (thread-sleep! (random 60)) ;; we failed to get num cpus. wait a bit and try again - (if (> rem-tries 0) - (common:wait-for-normalized-load maxnormload msg remote-host (- rem-tries 1)) - #f))))) - -;;====================================================================== -;;====================================================================== -;; given path get free space, allows override in [setup] -;; with free-space-script /path/to/some/script.sh -;; -(define (get-df path) - (if (configf:lookup *configdat* "setup" "free-space-script") - (with-input-from-pipe - (conc (configf:lookup *configdat* "setup" "free-space-script") " " path) - (lambda () - (let ((res (read-line))) - (if (string? res) - (string->number res))))) - (get-unix-df path))) - -(define (common:check-space-in-dir dirpath required) - (let* ((dbspace (if (directory? dirpath) - (get-df dirpath) - 0))) - (list (> dbspace required) - dbspace - required - dirpath))) - -(define (get-free-inodes path) - (if (configf:lookup *configdat* "setup" "free-inodes-script") - (with-input-from-pipe - (conc (configf:lookup *configdat* "setup" "free-inodes-script") " " path) -;;====================================================================== -;;====================================================================== -;;====================================================================== -;;====================================================================== -;;====================================================================== - - (lambda () - (let ((res (read-line))) - (if (string? res) - (string->number res))))) - (get-unix-inodes path))) -;;====================================================================== -;;====================================================================== -;;====================================================================== -;;====================================================================== -;;====================================================================== -;;====================================================================== -;;====================================================================== -;;====================================================================== -;;====================================================================== - -;;====================================================================== -;; T E S T L A U N C H I N G P E R I T E M W I T H H O S T T Y P E S -;;====================================================================== -;; -;; [hosts] -;; arm cubie01 cubie02 -;; x86_64 zeus xena myth01 -;; allhosts #{g hosts arm} #{g hosts x86_64} -;; -;; [host-types] -;; general #MTLOWESTLOAD #{g hosts allhosts} -;; arm #MTLOWESTLOAD #{g hosts arm} -;; nbgeneral nbjob run JOBCOMMAND -log $MT_LINKTREE/$MT_TARGET/$MT_RUNNAME.$MT_TESTNAME-$MT_ITEM_PATH.lgo -;; -;; [host-rules] -;; # maxnload => max normalized load -;; # maxnjobs => max jobs per cpu -;; # maxjobrate => max jobs per second -;; general maxnload=1.1; maxnjobs=1.2; maxjobrate=0.1 -;; -;; [launchers] -;; envsetup general -;; xor/%/n 4C16G -;; % nbgeneral -;; -;; [jobtools] -;; # if defined and not "no" flexi-launcher will bypass "launcher" unless no match. -;; flexi-launcher yes -;; launcher nbfake -;; -(define (common:get-launcher configdat testname itempath) - (let ((fallback-launcher (configf:lookup configdat "jobtools" "launcher"))) - (if (and (configf:lookup configdat "jobtools" "flexi-launcher") ;; overrides launcher - (not (equal? (configf:lookup configdat "jobtools" "flexi-launcher") "no"))) - (let* ((launchers (hash-table-ref/default configdat "launchers" '()))) - (if (null? launchers) - fallback-launcher - (let loop ((hed (car launchers)) - (tal (cdr launchers))) - (let ((patt (car hed)) - (host-type (cadr hed))) - (if (tests:match patt testname itempath) - (begin - (debug:print-info 2 *default-log-port* "Have flexi-launcher match for " testname "/" itempath " = " host-type) - (let ((launcher (configf:lookup configdat "host-types" host-type))) - (if launcher - (let* ((launcher-parts (string-split launcher)) - (launcher-exe (car launcher-parts))) - (if (equal? launcher-exe "#MTLOWESTLOAD") ;; this is our special case, we will find the lowest load and craft a nbfake commandline - (let host-loop ((targ-host (common:get-least-loaded-host (cdr launcher-parts) host-type configdat)) - (count 100)) - (if targ-host - (conc "remrun " targ-host) - (if (> count 0) - (begin - (debug:print 0 *default-log-port* "INFO: Waiting for a host for host-type " host-type) - (thread-sleep! (- 101 count)) - (host-loop (common:get-least-loaded-host (cdr launcher-parts) host-type configdat) - (- count 1))) - (begin - (debug:print 0 *default-log-port* "FATAL: Failed to find a host from #MTLOWESTLOAD for host-type " host-type) - (exit))))) - launcher)) - (begin - (debug:print-info 0 *default-log-port* "WARNING: no launcher found for host-type " host-type) - (if (null? tal) - fallback-launcher - (loop (car tal)(cdr tal))))))) - ;; no match, try again - (if (null? tal) - fallback-launcher - (loop (car tal)(cdr tal)))))))) - fallback-launcher))) - -;; (let ((cmddat (make-qitem -;; command: command -;; host-port: host-port -;; params: params))) -;; (queue-push cmddat) ;; put request into the queue -;; (nn-send soc "queued")) ;; reply with "queued" -;; (print "ERROR: ["(common:human-time)"] BAD request " dat)) -;; (loop (nn-recv soc))))) -;; (nn-close soc))) -;;====================================================================== -;;====================================================================== -;;====================================================================== -;;====================================================================== -;;====================================================================== -(define (common:get-pkts-dirs mtconf use-lt) - (let* ((pktsdirs-str (or (configf:lookup mtconf "setup" "pktsdirs") - (and use-lt - (conc (or *toppath* - (current-directory)) - "/lt/.pkts")))) - (pktsdirs (if pktsdirs-str - (string-split pktsdirs-str " ") - #f))) - pktsdirs)) - -;;====================================================================== -(define (common:with-queue-db mtconf proc #!key (use-lt #f)(toppath-in #f)) - (let* ((pktsdirs (common:get-pkts-dirs mtconf use-lt)) - (pktsdir (if pktsdirs (car pktsdirs) #f)) - (toppath (or (configf:lookup mtconf "scratchdat" "toppath") - toppath-in)) - (pdbpath (or (configf:lookup mtconf "setup" "pdbpath") pktsdir))) - (cond - ((not (and pktsdir toppath pdbpath)) - (debug:print 0 *default-log-port* "ERROR: settings are missing in your megatest.config for area management.") - (debug:print 0 *default-log-port* " you need to have pktsdirs in the [setup] section.")) - ((not (common:file-exists? pktsdir)) - (debug:print 0 *default-log-port* "ERROR: pkts directory not found " pktsdir)) - ((not (equal? (file-owner pktsdir)(current-effective-user-id))) - (debug:print 0 *default-log-port* "ERROR: directory " pktsdir " is not owned by " (current-effective-user-name))) - (else - (let* ((pdb (open-queue-db pdbpath "pkts.db" - schema: '("CREATE TABLE groups (id INTEGER PRIMARY KEY,groupname TEXT, CONSTRAINT group_constraint UNIQUE (groupname));")))) - (proc pktsdirs pktsdir pdb) - (dbi:close pdb)))))) - -;;====================================================================== -;; check space in dbdir and in megatest dir -;; returns: ok/not dbspace required-space -;; -(define (common:check-db-dir-space) - (let* ((required (string->number - ;; default is 1GB (or actually a billion bytes) This is the number of 1 kB blocks. - (or (configf:lookup *configdat* "setup" "dbdir-space-required") - "1000000"))) - (dbdir (common:get-db-tmp-area)) ;; (db:get-dbdir)) - (tdbspace (common:check-space-in-dir dbdir required)) - (mdbspace (common:check-space-in-dir *toppath* required))) - (sort (list tdbspace mdbspace) (lambda (a b) - (< (cadr a)(cadr b)))))) - -;;====================================================================== -;; check available space in dbdir, exit if insufficient -;; -(define (common:check-db-dir-and-exit-if-insufficient) - (let* ((spacedat (car (common:check-db-dir-space))) ;; look only at worst for now - (is-ok (car spacedat)) - (dbspace (cadr spacedat)) - (required (caddr spacedat)) - (dbdir (cadddr spacedat))) - (if (not is-ok) - (begin - (debug:print-error 0 *default-log-port* "Insufficient space in " dbdir ", require " required ", have " dbspace ", exiting now.") - (exit 1))))) - -;;====================================================================== -;; paths is list of lists ((name path) ... ) -;; -(define (common:get-disk-with-most-free-space disks minsize) - (let* ((best #f) - (bestsize 0) - (default-min-inodes-string "1000000") - (default-min-inodes (string->number default-min-inodes-string)) - (min-inodes (or (string->number (if (configf:lookup *configdat* "setup" "min_inodes") (configf:lookup *configdat* "setup" "min_inodes") default-min-inodes-string)) default-min-inodes))) - - (for-each - (lambda (disk-num) - (let* ((dirpath (cadr (assoc disk-num disks))) - (freespc (cond - ((not (directory? dirpath)) - (if (common:low-noise-print 300 "disks not a dir " disk-num) - (debug:print 0 *default-log-port* "WARNING: disk " disk-num " at path \"" dirpath "\" is not a directory - ignoring it.")) - -1) - ((not (file-write-access? dirpath)) - (if (common:low-noise-print 300 "disks not writeable " disk-num) - (debug:print 0 *default-log-port* "WARNING: disk " disk-num " at path \"" dirpath "\" is not writeable - ignoring it.")) - -1) - ((not (eq? (string-ref dirpath 0) #\/)) - (if (common:low-noise-print 300 "disks not a proper path " disk-num) - (debug:print 0 *default-log-port* "WARNING: disk " disk-num " at path \"" dirpath "\" is not a fully qualified path - ignoring it.")) - -1) - (else - (get-df dirpath)))) - (free-inodes (cond - ((not (directory? dirpath)) - (if (common:low-noise-print 300 "disks not a dir " disk-num) - (debug:print 0 *default-log-port* "WARNING: disk " disk-num " at path \"" dirpath "\" is not a directory - ignoring it.")) - -1) - ((not (file-write-access? dirpath)) - (if (common:low-noise-print 300 "disks not writeable " disk-num) - (debug:print 0 *default-log-port* "WARNING: disk " disk-num " at path \"" dirpath "\" is not writeable - ignoring it.")) - -1) - ((not (eq? (string-ref dirpath 0) #\/)) - (if (common:low-noise-print 300 "disks not a proper path " disk-num) - (debug:print 0 *default-log-port* "WARNING: disk " disk-num " at path \"" dirpath "\" is not a fully qualified path - ignoring it.")) - -1) - (else - (get-free-inodes dirpath)))) - ;;(free-inodes (get-free-inodes dirpath)) - ) - (debug:print 2 *default-log-port* "INFO: disk " disk-num " path " dirpath " free space " freespc " free inodes " free-inodes) - (if (and (> freespc bestsize)(> free-inodes min-inodes )) - (begin - (set! best (cons disk-num dirpath)) - (set! bestsize freespc))) - ;;(print "Processing: " disk-num " bestsize: " bestsize " best: " best " freespc: " freespc " min-inodes: " min-inodes " free-inodes: " free-inodes) - )) - (map car disks)) - (if (and best (> bestsize minsize)) - best - #f))) ;; #f means no disk candidate found - -;;====================================================================== -;; from metadat lookup MEGATEST_VERSION -;; -(define (common:get-last-run-version) ;; RADT => How does this work in send-receive function??; assume it is the value saved in some DB - (rmt:get-var "MEGATEST_VERSION")) - -(define (common:set-last-run-version) - (rmt:set-var "MEGATEST_VERSION" (common:version-signature))) - -(define (common:get-last-run-version-number) - (string->number - (substring (common:get-last-run-version) 0 6))) - -;;====================================================================== -;; postive number if megatest version > db version -;; negative number if megatest version < db version -(define (common:version-db-delta) - (- megatest-version (common:get-last-run-version-number))) - -(define (common:version-changed?) - (not (equal? (common:get-last-run-version) - (common:version-signature)))) - -(define (common:api-changed?) - (not (equal? (substring (->string megatest-version) 0 4) - (substring (conc (common:get-last-run-version)) 0 4)))) - -;;====================================================================== -;; Move me elsewhere ... -;; RADT => Why do we meed the version check here, this is called only if version misma -;; -(define (common:cleanup-db dbstruct #!key (full #f)) - (apply db:multi-db-sync - dbstruct - 'schema - ;; 'new2old - 'killservers - 'adj-target - ;; 'old2new - 'new2old - ;; (if full - '(dejunk) - ;; '()) - ) - (if (common:api-changed?) - (common:set-last-run-version))) - -;;====================================================================== -;; use to transition to area-name -(define common:get-area-name common:get-testsuite-name) - -(define (common:load-pkts-to-db mtconf #!key (use-lt #f)) - (common:with-queue-db - mtconf - (lambda (pktsdirs pktsdir pdb) - (for-each - (lambda (pktsdir) ;; look at all - (cond - ((not (common:file-exists? pktsdir)) - (debug:print 0 *default-log-port* "ERROR: packets directory " pktsdir " does not exist.")) - ((not (directory? pktsdir)) - (debug:print 0 *default-log-port* "ERROR: packets directory path " pktsdir " is not a directory.")) - ((not (file-read-access? pktsdir)) - (debug:print 0 *default-log-port* "ERROR: packets directory path " pktsdir " is not readable.")) - (else - (debug:print-info 0 *default-log-port* "Loading packets found in " pktsdir) - (let ((pkts (glob (conc pktsdir "/*.pkt")))) - (for-each - (lambda (pkt) - (let* ((uuid (cadr (string-match ".*/([0-9a-f]+).pkt" pkt))) - (exists (lookup-by-uuid pdb uuid #f))) - (if (not exists) - (let* ((pktdat (string-intersperse - (with-input-from-file pkt read-lines) - "\n")) - (apkt (pkt->alist pktdat)) - (ptype (alist-ref 'T apkt))) - (add-to-queue pdb pktdat uuid (or ptype 'cmd) #f 0) - (debug:print 4 *default-log-port* "Added " uuid " of type " ptype " to queue")) - (debug:print 4 *default-log-port* "pkt: " uuid " exists, skipping...") - ))) - pkts))))) - pktsdirs)) - use-lt: use-lt)) - -;;====================================================================== -;; use-lt is use linktree "lt" link to find pkts dir -(define (common:save-pkt pktalist-in mtconf use-lt #!key (add-only #f)) ;; add-only saves the pkt only if there is a parent already - (if (or add-only - (hash-table-exists? *pkts-info* 'last-parent)) - (let* ((parent (hash-table-ref/default *pkts-info* 'last-parent #f)) - (pktalist (if parent - (cons `(parent . ,parent) - pktalist-in) - pktalist-in))) - (let-values (((uuid pkt) - (alist->pkt pktalist common:pkts-spec))) - (hash-table-set! *pkts-info* 'last-parent uuid) - (let ((pktsdir (or (hash-table-ref/default *pkts-info* 'pkts-dir #f) - (let* ((pktsdirs (common:get-pkts-dirs mtconf use-lt)) - (pktsdir (car pktsdirs))) ;; assume it is there - (hash-table-set! *pkts-info* 'pkts-dir pktsdir) - pktsdir)))) - (handle-exceptions - exn - (debug:print-info 0 "failed to write out packet to " pktsdir ", exn=" exn) ;; don't care if this failed for now but MUST FIX - BUG!! - (if (not (file-exists? pktsdir)) - (create-directory pktsdir #t)) - (with-output-to-file - (conc pktsdir "/" uuid ".pkt") - (lambda () - (print pkt))))))))) -;; (set! *common:telemetry-log-socket* #f))))) - Index: commonmod.scm ================================================================== --- commonmod.scm +++ commonmod.scm @@ -18,23 +18,26 @@ ;;====================================================================== (declare (unit commonmod)) (declare (uses margsmod)) +(declare (uses debugprint)) (module commonmod * ;;(import scheme chicken data-structures extras files ports) (import scheme chicken) (import margsmod) +(import debugprint) (use data-structures extras files ports) (use (prefix base64 base64:) (prefix sqlite3 sqlite3:) (srfi 18) + (prefix dbi dbi:) directory-utils format matchable md5 message-digest @@ -45,14 +48,109 @@ sparse-vectors srfi-1 srfi-13 srfi-69 stack + stml2 typed-records z3 ) +(include "common_records.scm") + +;;====================================================================== +;; (require-library margs) +;; (include "margs.scm") + +;; (define old-exit exit) +;; +;; (define (exit . code) +;; (if (null? code) +;; (old-exit) +;; (old-exit code))) + +(define *numcpus-cache* (make-hash-table)) + +;;====================================================================== +;; use to transition to area-name + +(define (common:get-sync-lock-filepath) + (let* ((tmp-area (common:get-db-tmp-area)) + (lockfile (conc tmp-area "/megatest.db.sync-lock"))) + lockfile)) + + +(define (common:get-sync-lock-filepath) + (let* ((tmp-area (common:get-db-tmp-area)) + (lockfile (conc tmp-area "/megatest.db.sync-lock"))) + lockfile)) + +;;====================================================================== +;; (debug:print 0 *default-log-port* "Failed to find this executable! Using what can be found on the path") + + +(define (configf:lookup cfgdat section var) + (if (hash-table? cfgdat) + (let ((sectdat (hash-table-ref/default cfgdat section '()))) + (if (null? sectdat) + #f + (let ((match (assoc var sectdat))) + (if match ;; (and match (list? match)(> (length match) 1)) + (cadr match) + #f)) + )) + #f)) + +(define (common:get-testsuite-name) + (or (configf:lookup *configdat* "setup" "area-name") ;; megatest is a flexible tool, testsuite is too limiting a description. + (configf:lookup *configdat* "setup" "testsuite" ) + (getenv "MT_TESTSUITE_NAME") + (pathname-file (or (if (string? *toppath* ) + (pathname-file *toppath*) + #f) + (common:get-toppath #f))) + "please-set-setup-area-name")) ;; (pathname-file (current-directory))))) + +(define common:get-area-name common:get-testsuite-name) + + +(define (get-area-name configdat toppath #!optional (short #f)) + ;; look up my area name in areas table (future) + ;; generate auto name + (conc (get-area-path-signature toppath short) + "-" + (common:get-testsuite-name toppath configdat))) + + +;;====================================================================== +;; L O C K I N G M E C H A N I S M S +;;====================================================================== +(define (common:get-db-tmp-area . junk) + (if *db-cache-path* + *db-cache-path* + (if *toppath* ;; common:get-create-writeable-dir + (handle-exceptions + exn + (begin + (debug:print-error 0 *default-log-port* "Couldn't create path to " *db-cache-path* ", exn=" exn) + (exit 1)) + (let* ((tsname (common:get-testsuite-name)) + (dbpath (common:get-create-writeable-dir + (list (conc "/tmp/" (current-user-name) + "/megatest_localdb/" + tsname "/" + (string-translate *toppath* "/" ".")) + (conc "/tmp/" (current-process-id) ;; just in case we have an issue with the dir by own user name + "/megatest_localdb/" + tsname + (string-translate *toppath* "/" ".")) + )))) + (set! *db-cache-path* dbpath) + dbpath)) + #f))) + + ;;====================================================================== ;; CONTENTS ;; ;; config file utils @@ -210,99 +308,28 @@ `(,(car entry) . ,(val->alist (cadr entry)))) adat))) ;;====================================================================== -;; debug stuff -;;====================================================================== - -(define verbosity (make-parameter '())) - -;;====================================================================== -;; (define (debug:print . params) #f) -;; (define (debug:print-info . params) #f) -;; -;; (define (set-functions dbgp dbgpinfo) -;; (set! debug:print dbgp) -;; (set! debug:print-info dbgpinfo)) - -;;====================================================================== -;; this was cached based on results from profiling but it turned out the profiling -;; somehow went wrong - perhaps too many processes writing to it. Leaving the caching -;; in for now but can probably take it out later. -;; -(define (debug:calc-verbosity vstr arg) ;; arg is 'v (verbose) or 'q (quiet) - (let* ((res (cond - ((number? vstr) vstr) - ((not (string? vstr)) 1) - ;; ((string-match "^\\s*$" vstr) 1) - (vstr (let ((debugvals (filter number? (map string->number (string-split vstr ","))))) - (cond - ((> (length debugvals) 1) debugvals) - ((> (length debugvals) 0)(car debugvals)) - (else 1)))) - ((eq? arg 'v) 2) ;; verbose - ((eq? arg 'q) 0) ;; quiet - (else 1)))) - (verbosity res) - res)) - -;;====================================================================== -;; check verbosity, #t is ok -#;(define (debug-check-verbosity verbosity vstr) - (if (not (or (number? verbosity) - (list? verbosity))) - (begin - (print "ERROR: Invalid debug value \"" vstr "\"") - #f) - #t)) - -(define (debug:debug-mode n) - (let* ((vb (verbosity))) - (cond - ((and (number? vb) ;; number number - (number? n)) - (<= n vb)) - ((and (list? vb) ;; list number - (number? n)) - (member n vb)) - ((and (list? vb) ;; list list - (list? n)) - (not (null? (lset-intersection! eq? vb n)))) - ((and (number? vb) - (list? n)) - (member vb n))))) - -(define (debug:print n e . params) - (if (debug:debug-mode n) - (with-output-to-port (or e (current-error-port)) - (lambda () - ;; (if *logging* - ;; (db:log-event (apply conc params)) - (apply print params) - )))) ;; ) - -(define (debug:print-error n e . params) - ;; normal print - (if (debug:debug-mode n) - (with-output-to-port (if (port? e) e (current-error-port)) - (lambda () - (apply print "ERROR: " params) - ))) - ;; pass important messages to stderr - (if (and (eq? n 0)(not (eq? e (current-error-port)))) - (with-output-to-port (current-error-port) - (lambda () - (apply print "ERROR: " params) - )))) - -(define (debug:print-info n e . params) - (if (debug:debug-mode n) - (with-output-to-port (if (port? e) e (current-error-port)) - (lambda () - (apply print "INFO: (" n ") " params) ;; res) - )))) +;; debug +;;====================================================================== + +(define (debug:setup debug-arg verbose-arg) ;; debug-arg= #f, #t or 'noprop + (let ((debugstr (or debug-arg ;; (args:get-arg "-debug") + ;; (args:get-arg "-debug-noprop") + (get-environment-variable "MT_DEBUG_MODE")))) + (debug:calc-verbosity debugstr verbose-arg) + ;; (debug:check-verbosity *verbosity* debugstr) + ;; if we were handed a bad verbosity rule then we will override it with 1 and continue + (if (not (verbosity))(set! (verbosity) 1)) + (if (and (not (eq? debug-arg 'noprop)) + (or debug-arg + (not (get-environment-variable "MT_DEBUG_MODE")))) + (setenv "MT_DEBUG_MODE" (if (list? (verbosity)) + (string-intersperse (map conc (verbosity)) ",") + (conc (verbosity))))))) + ;;====================================================================== ;; Safe utilities ;;====================================================================== @@ -888,14 +915,10 @@ ;; This is a cache of pre-reqs met, don't re-calc in cases where called with same params less than ;; five seconds ago (define *pre-reqs-met-cache* (make-hash-table)) -;; cache of verbosity given string -;; -(define *verbosity-cache* (make-hash-table)) - (use posix-extras pathname-expand files) ;; this plugs a hole in posix-extras in recent chicken versions > 4.9) (let-values (( (chicken-release-number chicken-major-version) (apply values @@ -1377,10 +1400,20 @@ ;; (if nstatus (symbol->string nstatus) nstatus)))) ;;====================================================================== ;; D E B U G G I N G S T U F F ;;====================================================================== + +(define (common:debug-setup) + (debug:setup (cond ;; debug arg + ((args:get-arg "-debug-noprop") 'noprop) + ((args:get-arg "-debug") #t) + (else #f)) + (cond ;; verbosity arg + ((args:get-arg "-q") 'v) + ((args:get-arg "-q") 'q) + (else #f)))) (define *verbosity* 1) (define *logging* #f) (define (assoc/default key lst . default) @@ -3255,8 +3288,626 @@ (system (conc "gzip " logfile)) (unsetenv "TARGETHOST_LOGF") (unsetenv "TARGETHOST")))) + +;;====================================================================== +;; hash-table tree to html list tree +;; +;; tipfunc takes two parameters: y the tip value and path the path to that point +;; +(define (common:htree->html ht path tipfunc) + (let ((datlist (sort (hash-table->alist ht) +;;====================================================================== +;;====================================================================== + (lambda (a b) + (string< (car a)(car b)))))) + (if (null? datlist) + (tipfunc #f path) ;; really shouldn't get here + (s:ul + (map (lambda (x) + (let* ((levelname (car x)) + (y (cdr x)) + (newpath (append path (list levelname))) + (leaf (or (not (hash-table? y)) + (null? (hash-table-keys y))))) + (if leaf + (s:li (tipfunc y newpath)) + (s:li + (list + levelname + (common:htree->html y newpath tipfunc)))))) + datlist))))) + +;;====================================================================== + + +;;====================================================================== +;;====================================================================== +;;====================================================================== +;;====================================================================== +;; D A S H B O A R D U S E R V I E W S +;;====================================================================== + +;;====================================================================== + +(define (common:args-get-runname) + (let ((res (or (args:get-arg "-runname") + (args:get-arg ":runname") + (getenv "MT_RUNNAME")))) + ;; (if res (set-environment-variable "MT_RUNNAME" res)) ;; not sure if this is a good idea. side effect and all ... + res)) + +(define (get-with-default val default) + (let ((val (args:get-arg val))) + (if val val default))) + +(define (common:args-get-target #!key (split #f)(exit-if-bad #f)) + (let* ((keys (if (hash-table? *configdat*) (common:get-fields *configdat*) '())) + (numkeys (length keys)) + (target (or (args:get-arg "-reqtarg") + (args:get-arg "-target") + (getenv "MT_TARGET"))) + (tlist (if target (string-split target "/" #t) '())) + (valid (if target + (or (null? keys) ;; probably don't know our keys yet + (and (not (null? tlist)) + (eq? numkeys (length tlist)) + (null? (filter string-null? tlist)))) + #f))) + (if valid + (if split + tlist + target) + (if target + (begin + (debug:print-error 0 *default-log-port* "Invalid target, spaces or blanks not allowed \"" target "\", target should be: " (string-intersperse keys "/") ", have " tlist " for elements") + (if exit-if-bad (exit 1)) + #f) + #f)))) + +;;====================================================================== +;;====================================================================== +;;====================================================================== +;;====================================================================== +;; do we honor the caches of the config files? +;; +(define (common:use-cache?) + (let ((res #t)) ;; priority by order of evaluation + (if *configdat* ;; sillyness here. can't use setup/use-cache to know if we can use the cached files! + (if (equal? (configf:lookup *configdat* "setup" "use-cache") "no") + (set! res #f) + (if (equal? (configf:lookup *configdat* "setup" "use-cache") "yes") + (set! res #t)))) + (if (args:get-arg "-no-cache")(set! res #f)) ;; overrides setting in "setup" + (if (getenv "MT_USE_CACHE") + (if (equal? (getenv "MT_USE_CACHE") "yes") + (set! res #t) + (if (equal? (getenv "MT_USE_CACHE") "no") + (set! res #f)))) ;; overrides -no-cache switch + res)) + +;;====================================================================== +;; force use of server? +;; +(define (common:force-server?) + (let* ((force-setting (configf:lookup *configdat* "server" "force")) + (force-type (if force-setting (string->symbol force-setting) #f)) + (force-result (case force-type + ((#f) #f) + ((always) #t) + ((test) (if (args:get-arg "-execute") ;; we are in a test + #t + #f)) + (else + (debug:print 0 *default-log-port* "ERROR: Bad server force setting " force-setting ", forcing server.") + #t)))) ;; default to requiring server + (if force-result + (begin + (debug:print-info 0 *default-log-port* "forcing use of server, force setting is \"" force-setting "\".") + #t) + #f))) + +(define (common:in-running-test?) + (and (args:get-arg "-execute") (get-environment-variable "MT_CMDINFO"))) +;;====================================================================== +;;====================================================================== +;;====================================================================== +;;====================================================================== +;;====================================================================== +;;====================================================================== +;;====================================================================== +;;====================================================================== +;;====================================================================== + + +;;====================================================================== +;;====================================================================== +;;====================================================================== +;;====================================================================== +;;====================================================================== +;;====================================================================== +;;====================================================================== + +;;====================================================================== +;;====================================================================== +;;====================================================================== +;;====================================================================== +;;====================================================================== +;;====================================================================== + +;;====================================================================== +;; Rotate logs, logic: +;; if > 500k and older than 1 week: +;; remove previous compressed log and compress this log +;; WARNING: This proc operates assuming that it is in the directory above the +;; logs directory you wish to log-rotate. +;; +(define (common:rotate-logs) + (let* ((all-files (make-hash-table)) + (stats (make-hash-table)) + (inc-stat (lambda (key) + (hash-table-set! stats key (+ (hash-table-ref/default stats key 0) 1)))) + (max-allowed (string->number (or (configf:lookup *configdat* "setup" "max-logfiles") "300")))) ;; name -> age + (if (not (directory-exists? "logs"))(create-directory "logs")) + (directory-fold + (lambda (file rem) + (handle-exceptions + exn + (begin + (debug:print-info 2 *default-log-port* "unable to rotate log " file ", probably handled by another process, this is safe to ignore. exn=" exn) + (debug:print 2 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) + ;; (print-call-chain (current-error-port)) ;; + ) + (let* ((fullname (conc "logs/" file)) + (mod-time (file-modification-time fullname)) + (file-age (- (current-seconds) mod-time)) + (file-old (> file-age (* 48 60 60))) + (file-big (> (file-size fullname) 200000))) + (hash-table-set! all-files file mod-time) + (if (or (and (string-match "^.*.log" file) + file-old + file-big) + (and (string-match "^server-.*.log" file) + file-old)) + (let ((gzfile (conc fullname ".gz"))) + (if (common:file-exists? gzfile) + (begin + (debug:print-info 0 *default-log-port* "removing " gzfile) + (delete-file* gzfile) + (hash-table-delete! all-files gzfile) ;; needed? + )) + (debug:print-info 0 *default-log-port* "compressing " file) + (system (conc "gzip " fullname)) + (inc-stat "gzipped") + (hash-table-set! all-files (conc file ".gz") file-age) ;; add the .gz file and remove the base file + (hash-table-delete! all-files file) + ) + (if (and (> file-age (* (string->number (or (configf:lookup *configdat* "setup" "log-expire-days") "30")) 24 3600)) + (file-exists? fullname)) ;; just in case it was gzipped - will get it next time + (handle-exceptions + exn + #f + (if (directory? fullname) + (begin + (debug:print-info 0 *default-log-port* fullname " in logs directory is a directory! Cannot rotate it, it is best to not put subdirectories in the logs dir.") + (inc-stat "directories")) + (begin + (delete-file* fullname) + (inc-stat "deleted"))) + (hash-table-delete! all-files file))))))) + '() + "logs") + (for-each + (lambda (category) + (let ((quant (hash-table-ref/default stats category 0))) + (if (> quant 0) + (debug:print-info 0 *default-log-port* category " log files: " quant)))) + `("deleted" "gzipped" "directories")) + (let ((num-logs (hash-table-size all-files))) + (if (> num-logs max-allowed) ;; because NFS => don't let number of logs exceed 300 + (let ((files (take (sort (hash-table-keys all-files) + (lambda (a b) + (< (hash-table-ref all-files a)(hash-table-ref all-files b)))) + (- num-logs max-allowed)))) + (for-each + (lambda (file) + (let* ((fullname (conc "logs/" file))) + (if (directory? fullname) + (debug:print-info 0 *default-log-port* fullname " in logs directory is a directory! Cannot rotate it, it is best to not put subdirectories in the logs dir.") + (handle-exceptions + exn + (debug:print-error 0 *default-log-port* "failed to remove " fullname ", exn=" exn) + (delete-file* fullname))))) + files) + (debug:print-info 0 *default-log-port* "Deleted " (length files) " files from logs, keeping " max-allowed " files.")))))) + +;;====================================================================== +;; calculate a delay number based on a droop curve +;; inputs are: +;; - load-in, load as from uptime, NOT normalized +;; - numcpus, number of cpus, ideally use the real cpus, not threads +;; +(define (common:get-delay load-in numcpus) + (let* ((ratio (/ load-in numcpus)) + (new-option (configf:lookup *configdat* "load" "new-load-method")) + (paramstr (or (configf:lookup *configdat* "load" "exp-params") + "15 12 1281453987.9543 0.75")) ;; 5 4 10 1")) + (paramlst (map string->number (string-split paramstr)))) + (if new-option + (begin + (cond ((and (>= ratio 0) (< ratio .5)) + 0) + ((and (>= ratio 0.5) (<= ratio .9)) + (* ratio (/ 5 .9))) + ((and (> ratio .9) (<= ratio 1.1)) + (+ 5 (* (- ratio .9) (/ 55 .2)))) + ((> ratio 1.1) + 60))) + (match paramlst + ((r1 r2 s1 s2) + (debug:print 3 *default-log-port* "Using params r1=" r1 " r2=" r2 " s1=" s1 " s2=" s2) + (min (max (/ (expt r1 (* r2 s2 ratio)) s1) 0) 30)) + (else + (debug:print 0 *default-log-port* "BAD exp-params, should be \"r1 r2 s1 s2\" but got " paramstr) + 30))))) + +(define (common:print-delay-table) + (let loop ((x 0)) + (print x "," (common:get-delay x 1)) + (if (< x 2) + (loop (+ x 0.1))))) + +;;====================================================================== +;; DO NOT CALL THIS DIRECTLY. It is called from common:wait-for-normalized-load +;; count - count down to zero, at some point we'd give up if the load never drops +;; num-tries - count down to zero number tries to get numcpus +;; +(define (common:wait-for-cpuload maxnormload numcpus-in + #!key (count 1000) + (msg #f)(remote-host #f)(num-tries 5)) + (let* ((loadavg (common:get-cpu-load remote-host)) + ;; not possible to have zero. If we get 1, it's possible that we got the previous default, and we should check again + (numcpus (if (<= 1 numcpus-in) + (common:get-num-cpus remote-host) + numcpus-in)) + (first (car loadavg)) + (next (cadr loadavg)) + (adjmaxload (* maxnormload (max 1 numcpus))) ;; possible bug + ;; where numcpus + ;; (or could be + ;; maxload) is + ;; zero, crude + ;; fallback is to + ;; at least use 1 + ;; effective load accounts for load jumps, this should elminate all the first-next-avg, adjwait, load-jump-limit + ;; etc. + (effective-load (common:get-intercept first next)) + (recommended-delay (common:get-delay effective-load numcpus)) + (effective-host (or remote-host "localhost")) + (normalized-effective-load (/ effective-load numcpus)) + (will-wait (> normalized-effective-load maxnormload))) + (if (> recommended-delay 1) + (let* ((actual-delay (min recommended-delay 30))) + (if (common:low-noise-print 30 (conc (round actual-delay) "-safe-load")) + (debug:print-info 0 *default-log-port* "Load control, delaying " + actual-delay " seconds to maintain safe load. current normalized effective load is " + normalized-effective-load". maxnormload = " maxnormload " numcpus = " numcpus " loadavg = " loadavg " effective-load = " effective-load)) + (thread-sleep! actual-delay))) + + (cond + ;; bad data, try again to get the data + ((not will-wait) + (if (common:low-noise-print 3600 (conc (round normalized-effective-load) "-load-acceptable-" effective-host)) + (debug:print 0 *default-log-port* "Effective load on " effective-host " is acceptable at " effective-load " continuing."))) + + ((and (< first 0) ;; this indicates the loadavg data is bad - machine may not be reachable + (> num-tries 0)) + (debug:print 0 *default-log-port* "WARNING: received bad data from get-cpu-load " + first ", we'll sleep 10s and try " num-tries " more times.") + (thread-sleep! 10) + (common:wait-for-cpuload maxnormload numcpus-in + count: count remote-host: remote-host num-tries: (- num-tries 1))) + + ;; need to wait for load to drop + ((and will-wait ;; (> first adjmaxload) + (> count 0)) + (debug:print-info 0 *default-log-port* + "Delaying 15" ;; adjwait + " seconds due to normalized effective load " normalized-effective-load ;; first + " exceeding max of " adjmaxload + " on server " (or remote-host (get-host-name)) + " (normalized load-limit: " maxnormload ") " (if msg msg "")) + (thread-sleep! 15) ;; adjwait) + (common:wait-for-cpuload maxnormload numcpus count: (- count 1) msg: msg remote-host: remote-host) + ;; put the message here to indicate came out of waiting + (debug:print-info 1 *default-log-port* + "On host: " effective-host + ", effective load: " effective-load + ", numcpus: " numcpus + ", normalized effective load: " normalized-effective-load + )) + ;; overloaded and count expired (i.e. went to zero) + (else + (if (> num-tries 0) ;; should be "num-tries-left". + (if (common:low-noise-print 30 (conc (round effective-load) "-load-acceptable-" effective-host)) + (debug:print 0 *default-log-port* "Load on " effective-host " is acceptable at effective normalized load of " + normalized-effective-load " continuing.")) + (debug:print 0 *default-log-port* "Load on " effective-host ", " + first" could not be retrieved. Giving up and continuing.")))))) + +;;====================================================================== +;; wait for normalized cpu load to drop below maxload +;; +(define (common:wait-for-normalized-load maxnormload msg remote-host #!optional (rem-tries 5)) + (let ((num-cpus (common:get-num-cpus remote-host))) + (if num-cpus + (common:wait-for-cpuload maxnormload num-cpus 15 msg: msg remote-host: remote-host) + (begin + (thread-sleep! (random 60)) ;; we failed to get num cpus. wait a bit and try again + (if (> rem-tries 0) + (common:wait-for-normalized-load maxnormload msg remote-host (- rem-tries 1)) + #f))))) + +;;====================================================================== +;;====================================================================== +;; given path get free space, allows override in [setup] +;; with free-space-script /path/to/some/script.sh +;; +(define (get-df path) + (if (configf:lookup *configdat* "setup" "free-space-script") + (with-input-from-pipe + (conc (configf:lookup *configdat* "setup" "free-space-script") " " path) + (lambda () + (let ((res (read-line))) + (if (string? res) + (string->number res))))) + (get-unix-df path))) + +(define (common:check-space-in-dir dirpath required) + (let* ((dbspace (if (directory? dirpath) + (get-df dirpath) + 0))) + (list (> dbspace required) + dbspace + required + dirpath))) + +(define (get-free-inodes path) + (if (configf:lookup *configdat* "setup" "free-inodes-script") + (with-input-from-pipe + (conc (configf:lookup *configdat* "setup" "free-inodes-script") " " path) +;;====================================================================== +;;====================================================================== +;;====================================================================== +;;====================================================================== +;;====================================================================== + + (lambda () + (let ((res (read-line))) + (if (string? res) + (string->number res))))) + (get-unix-inodes path))) +;;====================================================================== +;;====================================================================== +;;====================================================================== +;;====================================================================== +;;====================================================================== +;;====================================================================== +;;====================================================================== +;;====================================================================== +;;====================================================================== + +;; (let ((cmddat (make-qitem +;; command: command +;; host-port: host-port +;; params: params))) +;; (queue-push cmddat) ;; put request into the queue +;; (nn-send soc "queued")) ;; reply with "queued" +;; (print "ERROR: ["(common:human-time)"] BAD request " dat)) +;; (loop (nn-recv soc))))) +;; (nn-close soc))) +;;====================================================================== +;;====================================================================== +;;====================================================================== +;;====================================================================== +;;====================================================================== +(define (common:get-pkts-dirs mtconf use-lt) + (let* ((pktsdirs-str (or (configf:lookup mtconf "setup" "pktsdirs") + (and use-lt + (conc (or *toppath* + (current-directory)) + "/lt/.pkts")))) + (pktsdirs (if pktsdirs-str + (string-split pktsdirs-str " ") + #f))) + pktsdirs)) + +;;====================================================================== +(define (common:with-queue-db mtconf proc #!key (use-lt #f)(toppath-in #f)) + (let* ((pktsdirs (common:get-pkts-dirs mtconf use-lt)) + (pktsdir (if pktsdirs (car pktsdirs) #f)) + (toppath (or (configf:lookup mtconf "scratchdat" "toppath") + toppath-in)) + (pdbpath (or (configf:lookup mtconf "setup" "pdbpath") pktsdir))) + (cond + ((not (and pktsdir toppath pdbpath)) + (debug:print 0 *default-log-port* "ERROR: settings are missing in your megatest.config for area management.") + (debug:print 0 *default-log-port* " you need to have pktsdirs in the [setup] section.")) + ((not (common:file-exists? pktsdir)) + (debug:print 0 *default-log-port* "ERROR: pkts directory not found " pktsdir)) + ((not (equal? (file-owner pktsdir)(current-effective-user-id))) + (debug:print 0 *default-log-port* "ERROR: directory " pktsdir " is not owned by " (current-effective-user-name))) + (else + (let* ((pdb (open-queue-db pdbpath "pkts.db" + schema: '("CREATE TABLE groups (id INTEGER PRIMARY KEY,groupname TEXT, CONSTRAINT group_constraint UNIQUE (groupname));")))) + (proc pktsdirs pktsdir pdb) + (dbi:close pdb)))))) + +;;====================================================================== +;; check space in dbdir and in megatest dir +;; returns: ok/not dbspace required-space +;; +(define (common:check-db-dir-space) + (let* ((required (string->number + ;; default is 1GB (or actually a billion bytes) This is the number of 1 kB blocks. + (or (configf:lookup *configdat* "setup" "dbdir-space-required") + "1000000"))) + (dbdir (common:get-db-tmp-area)) ;; (db:get-dbdir)) + (tdbspace (common:check-space-in-dir dbdir required)) + (mdbspace (common:check-space-in-dir *toppath* required))) + (sort (list tdbspace mdbspace) (lambda (a b) + (< (cadr a)(cadr b)))))) + +;;====================================================================== +;; check available space in dbdir, exit if insufficient +;; +(define (common:check-db-dir-and-exit-if-insufficient) + (let* ((spacedat (car (common:check-db-dir-space))) ;; look only at worst for now + (is-ok (car spacedat)) + (dbspace (cadr spacedat)) + (required (caddr spacedat)) + (dbdir (cadddr spacedat))) + (if (not is-ok) + (begin + (debug:print-error 0 *default-log-port* "Insufficient space in " dbdir ", require " required ", have " dbspace ", exiting now.") + (exit 1))))) + +;;====================================================================== +;; paths is list of lists ((name path) ... ) +;; +(define (common:get-disk-with-most-free-space disks minsize) + (let* ((best #f) + (bestsize 0) + (default-min-inodes-string "1000000") + (default-min-inodes (string->number default-min-inodes-string)) + (min-inodes (or (string->number (if (configf:lookup *configdat* "setup" "min_inodes") (configf:lookup *configdat* "setup" "min_inodes") default-min-inodes-string)) default-min-inodes))) + + (for-each + (lambda (disk-num) + (let* ((dirpath (cadr (assoc disk-num disks))) + (freespc (cond + ((not (directory? dirpath)) + (if (common:low-noise-print 300 "disks not a dir " disk-num) + (debug:print 0 *default-log-port* "WARNING: disk " disk-num " at path \"" dirpath "\" is not a directory - ignoring it.")) + -1) + ((not (file-write-access? dirpath)) + (if (common:low-noise-print 300 "disks not writeable " disk-num) + (debug:print 0 *default-log-port* "WARNING: disk " disk-num " at path \"" dirpath "\" is not writeable - ignoring it.")) + -1) + ((not (eq? (string-ref dirpath 0) #\/)) + (if (common:low-noise-print 300 "disks not a proper path " disk-num) + (debug:print 0 *default-log-port* "WARNING: disk " disk-num " at path \"" dirpath "\" is not a fully qualified path - ignoring it.")) + -1) + (else + (get-df dirpath)))) + (free-inodes (cond + ((not (directory? dirpath)) + (if (common:low-noise-print 300 "disks not a dir " disk-num) + (debug:print 0 *default-log-port* "WARNING: disk " disk-num " at path \"" dirpath "\" is not a directory - ignoring it.")) + -1) + ((not (file-write-access? dirpath)) + (if (common:low-noise-print 300 "disks not writeable " disk-num) + (debug:print 0 *default-log-port* "WARNING: disk " disk-num " at path \"" dirpath "\" is not writeable - ignoring it.")) + -1) + ((not (eq? (string-ref dirpath 0) #\/)) + (if (common:low-noise-print 300 "disks not a proper path " disk-num) + (debug:print 0 *default-log-port* "WARNING: disk " disk-num " at path \"" dirpath "\" is not a fully qualified path - ignoring it.")) + -1) + (else + (get-free-inodes dirpath)))) + ;;(free-inodes (get-free-inodes dirpath)) + ) + (debug:print 2 *default-log-port* "INFO: disk " disk-num " path " dirpath " free space " freespc " free inodes " free-inodes) + (if (and (> freespc bestsize)(> free-inodes min-inodes )) + (begin + (set! best (cons disk-num dirpath)) + (set! bestsize freespc))) + ;;(print "Processing: " disk-num " bestsize: " bestsize " best: " best " freespc: " freespc " min-inodes: " min-inodes " free-inodes: " free-inodes) + )) + (map car disks)) + (if (and best (> bestsize minsize)) + best + #f))) ;; #f means no disk candidate found + +(define (common:load-pkts-to-db mtconf #!key (use-lt #f)) + (common:with-queue-db + mtconf + (lambda (pktsdirs pktsdir pdb) + (for-each + (lambda (pktsdir) ;; look at all + (cond + ((not (common:file-exists? pktsdir)) + (debug:print 0 *default-log-port* "ERROR: packets directory " pktsdir " does not exist.")) + ((not (directory? pktsdir)) + (debug:print 0 *default-log-port* "ERROR: packets directory path " pktsdir " is not a directory.")) + ((not (file-read-access? pktsdir)) + (debug:print 0 *default-log-port* "ERROR: packets directory path " pktsdir " is not readable.")) + (else + (debug:print-info 0 *default-log-port* "Loading packets found in " pktsdir) + (let ((pkts (glob (conc pktsdir "/*.pkt")))) + (for-each + (lambda (pkt) + (let* ((uuid (cadr (string-match ".*/([0-9a-f]+).pkt" pkt))) + (exists (lookup-by-uuid pdb uuid #f))) + (if (not exists) + (let* ((pktdat (string-intersperse + (with-input-from-file pkt read-lines) + "\n")) + (apkt (pkt->alist pktdat)) + (ptype (alist-ref 'T apkt))) + (add-to-queue pdb pktdat uuid (or ptype 'cmd) #f 0) + (debug:print 4 *default-log-port* "Added " uuid " of type " ptype " to queue")) + (debug:print 4 *default-log-port* "pkt: " uuid " exists, skipping...") + ))) + pkts))))) + pktsdirs)) + use-lt: use-lt)) + +;;====================================================================== +;; use-lt is use linktree "lt" link to find pkts dir +(define (common:save-pkt pktalist-in mtconf use-lt #!key (add-only #f)) ;; add-only saves the pkt only if there is a parent already + (if (or add-only + (hash-table-exists? *pkts-info* 'last-parent)) + (let* ((parent (hash-table-ref/default *pkts-info* 'last-parent #f)) + (pktalist (if parent + (cons `(parent . ,parent) + pktalist-in) + pktalist-in))) + (let-values (((uuid pkt) + (alist->pkt pktalist common:pkts-spec))) + (hash-table-set! *pkts-info* 'last-parent uuid) + (let ((pktsdir (or (hash-table-ref/default *pkts-info* 'pkts-dir #f) + (let* ((pktsdirs (common:get-pkts-dirs mtconf use-lt)) + (pktsdir (car pktsdirs))) ;; assume it is there + (hash-table-set! *pkts-info* 'pkts-dir pktsdir) + pktsdir)))) + (handle-exceptions + exn + (debug:print-info 0 "failed to write out packet to " pktsdir ", exn=" exn) ;; don't care if this failed for now but MUST FIX - BUG!! + (if (not (file-exists? pktsdir)) + (create-directory pktsdir #t)) + (with-output-to-file + (conc pktsdir "/" uuid ".pkt") + (lambda () + (print pkt))))))))) +;; (set! *common:telemetry-log-socket* #f))))) + + +(define (common:get-linktree) + (or (getenv "MT_LINKTREE") + (if *configdat* + (configf:lookup *configdat* "setup" "linktree") + #f) + (if (or *toppath* (getenv "MT_RUN_AREA_HOME")) + (conc (or *toppath* (getenv "MT_RUN_AREA_HOME")) "/lt") + #f) + (let* ((tp (common:get-toppath #f)) + (lt (conc tp "/lt"))) + (if (not tp)(debug:print 0 *default-log-port* "WARNING: cannot calculate best path for linktree, using " lt)) + lt))) + + ;;======================================================================the end ) Index: configf.scm ================================================================== --- configf.scm +++ configf.scm @@ -36,767 +36,10 @@ (declare (uses margsmod)) (import margsmod) (include "common_records.scm") -;; return list (path fullpath configname) -(define (find-config configname #!key (toppath #f)) - (if toppath - (let ((cfname (conc toppath "/" configname))) - (if (common:file-exists? cfname) - (list toppath cfname configname) - (list #f #f #f))) - (let* ((cwd (string-split (current-directory) "/"))) - (let loop ((dir cwd)) - (let* ((path (conc "/" (string-intersperse dir "/"))) - (fullpath (conc path "/" configname))) - (if (common:file-exists? fullpath) - (list path fullpath configname) - (let ((remcwd (take dir (- (length dir) 1)))) - (if (null? remcwd) - (list #f #f #f) ;; #f #f) - (loop remcwd))))))))) - -(define (configf:eval-string-in-environment str) - ;; (if (or (string-null? str) - ;; (equal? "!" (substring str 0 1))) ;; null string or starts with ! are preserved but NOT set in the environment - str - (handle-exceptions - exn - (begin - (debug:print-error 0 *default-log-port* "problem evaluating \"" str "\" in the shell environment, exn=" exn) - #f) - (let ((cmdres (process:cmd-run->list (conc "echo " str)))) - (if (null? cmdres) "" - (caar cmdres))))) ;; ) - -;;====================================================================== -;; Make the regexp's needed globally available -;;====================================================================== - -(define configf:include-rx (regexp "^\\[include\\s+(.*)\\]\\s*$")) -(define configf:script-rx (regexp "^\\[scriptinc\\s+(\\S+)([^\\]]*)\\]\\s*$")) ;; include output from a script -(define configf:section-rx (regexp "^\\[(.*)\\]\\s*$")) -(define configf:blank-l-rx (regexp "^\\s*$")) -(define configf:key-sys-pr (regexp "^(\\S+)\\s+\\[system\\s+(\\S+.*)\\]\\s*$")) -(define configf:key-val-pr (regexp "^(\\S+)(\\s+(.*)|())$")) -(define configf:key-no-val (regexp "^(\\S+)(\\s*)$")) -(define configf:comment-rx (regexp "^\\s*#.*")) -(define configf:cont-ln-rx (regexp "^(\\s+)(\\S+.*)$")) -(define configf:settings (regexp "^\\[configf:settings\\s+(\\S+)\\s+(\\S+)]\\s*$")) - -;; read a line and process any #{ ... } constructs - -(define configf:var-expand-regex (regexp "^(.*)#\\{(scheme|system|shell|getenv|get|runconfigs-get|rget|scm|sh|rp|gv|g|mtrah)\\s+([^\\}\\{]*)\\}(.*)")) - -(define (configf:system ht cmd) - (system cmd) - ) - -(define (configf:process-line l ht allow-system #!key (linenum #f)) - (let loop ((res l)) - (if (string? res) - (let ((matchdat (string-search configf:var-expand-regex res))) - (if matchdat - (let* ((prestr (list-ref matchdat 1)) - (cmdtype (list-ref matchdat 2)) ;; eval, system, shell, getenv - (cmd (list-ref matchdat 3)) - (poststr (list-ref matchdat 4)) - (result #f) - (start-time (current-seconds)) - (cmdsym (string->symbol cmdtype)) - (fullcmd (case cmdsym - ((scheme scm) (conc "(lambda (ht)" cmd ")")) - ((system) (conc "(lambda (ht)(configf:system ht \"" cmd "\"))")) - ((shell sh) (conc "(lambda (ht)(string-translate (shell \"" cmd "\") \"\n\" \" \"))")) - ((realpath rp)(conc "(lambda (ht)(common:nice-path \"" cmd "\"))")) - ((getenv gv) (conc "(lambda (ht)(get-environment-variable \"" cmd "\"))")) - ((mtrah) (conc "(lambda (ht)" - " (let ((extra \"" cmd "\"))" - " (conc (or *toppath* (get-environment-variable \"MT_RUN_AREA_HOME\"))" - " (if (string-null? extra) \"\" \"/\")" - " extra)))")) - ((get g) - (match (string-split cmd) - ((sect var)(conc "(lambda (ht)(configfmod#configf:lookup ht \"" sect "\" \"" var "\"))")) - (else - (debug:print-error 0 *default-log-port* "#{get ...} used with only one parameter, \"" cmd "\", two needed.") - "(lambda (ht) #f)"))) - ((runconfigs-get rget) (conc "(lambda (ht)(runconfigs-get ht \"" cmd "\"))")) - ;; ((rget) (conc "(lambda (ht)(runconfigs-get ht \"" cmd "\"))")) - (else "(lambda (ht)(print \"ERROR\") \"ERROR\")")))) - ;; (print "fullcmd=" fullcmd) - (handle-exceptions - exn - (begin - (debug:print 0 *default-log-port* "WARNING: failed to process config input \"" l "\", exn=" exn) - (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) - ;; (print "exn=" (condition->list exn)) - (set! result (conc "#{( " cmdtype ") " cmd "}, full expansion: " fullcmd))) - (if (or allow-system - (not (member cmdtype '("system" "shell" "sh")))) - (with-input-from-string fullcmd - (lambda () - (set! result ((eval (read)) ht)))) - (set! result (conc "#{(" cmdtype ") " cmd "}")))) - (case cmdsym - ((system shell scheme) - (let ((delta (- (current-seconds) start-time))) - (if (> delta 2) - (debug:print-info 0 *default-log-port* "for line \"" l "\"\n command: " cmd " took " delta " seconds to run with output:\n " result) - (debug:print-info 9 *default-log-port* "for line \"" l "\"\n command: " cmd " took " delta " seconds to run with output:\n " result))))) - (loop (conc prestr result poststr))) - res)) - res))) - -;; Run a shell command and return the output as a string -(define (shell cmd) - (let* ((output (process:cmd-run->list cmd)) - (res (car output)) - (status (cadr output))) - (if (equal? status 0) - (let ((outres (string-intersperse - res - "\n"))) - (debug:print-info 4 *default-log-port* "shell result:\n" outres) - outres) - (begin - (with-output-to-port (current-error-port) - (lambda () - (print "ERROR: " cmd " returned bad exit code " status))) - "")))) - -;; this was inline but I'm pretty sure that is a hold over from when it was *very* simple ... -;; -(define (configf:read-line p ht allow-processing settings) - (let loop ((inl (read-line p))) - (let ((cont-line (and (string? inl) - (not (string-null? inl)) - (equal? "\\" (string-take-right inl 1))))) - (if cont-line ;; last character is \ - (let ((nextl (read-line p))) - (if (not (eof-object? nextl)) - (loop (string-append (if cont-line - (string-take inl (- (string-length inl) 1)) - inl) - nextl)))) - (let ((res (case allow-processing ;; if (and allow-processing - ;; (not (eq? allow-processing 'return-string))) - ((#t #f) - (configf:process-line inl ht allow-processing)) - ((return-string) - inl) - (else - (configf:process-line inl ht allow-processing))))) - (if (and (string? res) ;; must set to "no" to force NOT trimming trailing spaces - (not (equal? (hash-table-ref/default settings "trim-trailing-spaces" "yes") "no"))) - (string-substitute "\\s+$" "" res) - res)))))) - -(define (configf:cfgdat->env-alist section cfgdat-ht allow-system) - (filter - (lambda (pair) - (let* ((var (car pair)) - (val (cdr pair))) - (cons var - (cond - ((and allow-system (procedure? val)) ;; if we decided to use something other than #t or #f for allow-system ('return-procs or 'return-string) , this may become problematic - (val)) - ((procedure? val) #f) - ((string? val) val) - (else "#f"))))) - (append - (hash-table-ref/default cfgdat-ht "default" '()) - (if (equal? section "default") '() (hash-table-ref/default cfgdat-ht section '()))))) - -(define (calc-allow-system allow-system section sections) - (if sections - (and (or (equal? "default" section) - (member section sections)) - allow-system) ;; account for sections and return allow-system as it might be a symbol such as return-strings - allow-system)) - -;; given a config hash and a section name, apply that section to all matching sections (using wildcard % or regex if /..../) -;; remove the section when done so that there is no downstream clobbering -;; -(define (configf:apply-wildcards ht section-name) - (if (hash-table-exists? ht section-name) - (let* ((vars (hash-table-ref ht section-name)) - (rxstr (if (string-contains section-name "%") - (string-substitute (regexp "%") ".*" section-name) - (string-substitute (regexp "^/(.*)/$") "\\1" section-name))) - (rx (regexp rxstr))) - ;; (print "\nsection-name: " section-name " rxstr: " rxstr) - (for-each - (lambda (section) - (if section - (let ((same-section (string=? section-name section)) - (rx-match (string-match rx section))) - ;; (print "section: " section " vars: " vars " same-section: " same-section " rx-match: " rx-match) - (if (and (not same-section) rx-match) - (for-each - (lambda (bundle) - ;; (print "bundle: " bundle) - (let ((key (car bundle)) - (val (cadr bundle)) - (meta (if (> (length bundle) 2)(caddr bundle) #f))) - (hash-table-set! ht section (configf:assoc-safe-add (hash-table-ref ht section) key val metadata: meta)))) - vars))))) - (hash-table-keys ht)))) - ht) - -;; read a config file, returns hash table of alists - -;; read a config file, returns hash table of alists -;; adds to ht if given (must be #f otherwise) -;; allow-system: -;; #f - do not evaluate [system -;; #t - immediately evaluate [system and store result as string -;; 'return-procs -- return a proc taking ht as an argument that may be evaulated at some future time -;; 'return-string -- return a string representing a proc taking ht as an argument that may be evaulated at some future time -;; envion-patt is a regex spec that identifies sections that will be eval'd -;; in the environment on the fly -;; sections: #f => get all, else list of sections to gather -;; post-section-procs alist of section-pattern => proc, where: (proc section-name next-section-name ht curr-path) -;; apply-wildcards: #t/#f - apply vars from targets with % wildcards to all matching sections -;; -(define (read-config path ht allow-system #!key (environ-patt #f) (curr-section #f) - (sections #f) (settings (make-hash-table)) (keep-filenames #f) - (post-section-procs '()) (apply-wildcards #t) ) - (debug:print 9 *default-log-port* "START: " path) -;; (if *configdat* -;; (common:save-pkt `((action . read-config) -;; (f . ,(cond ((string? path) path) -;; ((port? path) "port") -;; (else (conc path)))) -;; (T . configf)) -;; *configdat* #t add-only: #t)) - (if (and (not (port? path)) - (not (common:file-exists? path))) ;; for case where we are handed a port - (begin - (debug:print-info 1 *default-log-port* "read-config - file not found " path " current path: " (current-directory)) - ;; WARNING: This is a risky change but really, we should not return an empty hash table if no file read? - #f) ;; (if (not ht)(make-hash-table) ht)) - (let ((inp (if (string? path) - (open-input-file path) - path)) ;; we can be handed a port - (res (if (not ht)(make-hash-table) ht)) - (metapath (if (or (debug:debug-mode 9) - keep-filenames) - path #f)) - (process-wildcards (lambda (res curr-section-name) - (if (and apply-wildcards - (or (string-contains curr-section-name "%") ;; wildcard - (string-match "/.*/" curr-section-name))) ;; regex - (begin - (configf:apply-wildcards res curr-section-name) - (hash-table-delete! res curr-section-name)))))) ;; NOTE: if the section is a wild card it will be REMOVED from res - (let loop ((inl (configf:read-line inp res (calc-allow-system allow-system curr-section sections) settings)) ;; (read-line inp)) - (curr-section-name (if curr-section curr-section "default")) - (var-flag #f);; turn on for key-var-pr and cont-ln-rx, turn off elsewhere - (lead #f)) - (debug:print-info 8 *default-log-port* "curr-section-name: " curr-section-name " var-flag: " var-flag "\n inl: \"" inl "\"") - (if (eof-object? inl) - (begin - ;; process last section for wildcards - (process-wildcards res curr-section-name) - (if (string? path) ;; we received a path, not a port, thus we are responsible for closing it. - (close-input-port inp)) - (if (list? sections) ;; delete all sections except given when sections is provided - (for-each - (lambda (section) - (if (not (member section sections)) - (hash-table-delete! res section))) ;; we are using "" as a dumping ground and must remove it before returning the ht - (hash-table-keys res))) - (debug:print 9 *default-log-port* "END: " path) - res - ) ;; retval - (regex-case - inl - (configf:comment-rx _ (loop (configf:read-line inp res (calc-allow-system allow-system curr-section-name sections) settings) - curr-section-name #f #f)) - - (configf:blank-l-rx _ (loop (configf:read-line inp res (calc-allow-system allow-system curr-section-name sections) settings) - curr-section-name #f #f)) - (configf:settings ( x setting val ) - (begin - (hash-table-set! settings setting val) - (loop (configf:read-line inp res (calc-allow-system allow-system curr-section-name sections) settings) - curr-section-name #f #f))) - - (configf:include-rx ( x include-file ) - (let* ((curr-conf-dir (pathname-directory path)) - (full-conf (if (and (absolute-pathname? include-file) (file-exists? include-file)) - include-file - (common:nice-path - (conc (if curr-conf-dir - curr-conf-dir - ".") - "/" include-file))))) - (let ((all-matches (sort (handle-exceptions exn - (begin - (debug:print '(2 9) *default-log-port* "glob of " full-conf " gave no match. , exn=" exn) - (list)) - (glob full-conf)) string<=?))) - (if (null? all-matches) - (begin - (debug:print '(2 9) #f "INFO: include file(s) matching " include-file " not found (called from " path ")") - (debug:print 2 *default-log-port* " " full-conf)) - (for-each - (lambda (fpath) - ;; (push-directory conf-dir) - (debug:print 9 *default-log-port* "Including: " full-conf) - (read-config fpath res allow-system environ-patt: environ-patt - curr-section: curr-section-name sections: sections settings: settings - keep-filenames: keep-filenames)) - all-matches)) - (loop (configf:read-line inp res (calc-allow-system allow-system curr-section-name sections) settings) - curr-section-name #f #f)))) - (configf:script-rx ( x include-script params);; handle-exceptions - ;; exn - ;; (begin - ;; (debug:print '(0 2 9) #f "INFO: include from script " include-script " failed.") - ;; (loop (configf:read-line inp res (calc-allow-system allow-system curr-section-name sections) settings) curr-section-name #f #f)) - (if (and (common:file-exists? include-script)(file-execute-access? include-script)) - (let* ((local-allow-system (calc-allow-system allow-system curr-section-name sections)) - (env-delta (configf:cfgdat->env-alist curr-section-name res local-allow-system)) - (new-inp-port - (common:with-env-vars - env-delta - (lambda () - (open-input-pipe (conc include-script " " params)))))) - (debug:print '(2 9) *default-log-port* "Including from script output: " include-script) - ;; (print "We got here, calling read-config next. Port is: " new-inp-port) - (read-config new-inp-port res allow-system environ-patt: environ-patt curr-section: curr-section-name sections: sections settings: settings keep-filenames: keep-filenames) - (close-input-port new-inp-port) - (loop (configf:read-line inp res (calc-allow-system allow-system curr-section-name sections) settings) curr-section-name #f #f)) - (begin - (debug:print 0 *default-log-port* "Script not found or not exectutable: " include-script) - (loop (configf:read-line inp res (calc-allow-system allow-system curr-section-name sections) settings) curr-section-name #f #f))) - ) ;; ) - (configf:section-rx ( x section-name ) - (begin - ;; call post-section-procs - (for-each - (lambda (dat) - (let ((patt (car dat)) - (proc (cdr dat))) - (if (string-match patt curr-section-name) - (proc curr-section-name section-name res path)))) - post-section-procs) - ;; after gathering the vars for a section and if apply-wildcards is true and if there is a wildcard in the section name process wildcards - ;; NOTE: we are processing the curr-section-name, NOT section-name. - (process-wildcards res curr-section-name) - (if (not (hash-table-ref/default res section-name #f))(hash-table-set! res section-name '())) ;; ensure that mere mention of a section is not lost - (loop (configf:read-line inp res (calc-allow-system allow-system curr-section-name sections) settings) - ;; if we have the sections list then force all settings into "" and delete it later? - ;; (if (or (not sections) - ;; (member section-name sections)) - ;; section-name "") ;; stick everything into "". NOPE: We need new strategy. Put stuff in correct sections and then delete all sections later. - section-name - #f #f))) - (configf:key-sys-pr ( x key cmd ) - (if (calc-allow-system allow-system curr-section-name sections) - (let ((alist (hash-table-ref/default res curr-section-name '())) - (val-proc (lambda () - (let* ((start-time (current-seconds)) - (local-allow-system (calc-allow-system allow-system curr-section-name sections)) - (env-delta (configf:cfgdat->env-alist curr-section-name res local-allow-system)) - (cmdres (process:cmd-run->list cmd delta-env-alist-or-hash-table: env-delta)) ;; BB: here is where [system is exec'd. needs to have env from other vars! - (delta (- (current-seconds) start-time)) - (status (cadr cmdres)) - (res (car cmdres))) - (debug:print-info 4 *default-log-port* "" inl "\n => " (string-intersperse res "\n")) - (if (not (eq? status 0)) - (begin - (debug:print-error 0 *default-log-port* "problem with " inl ", return code " status - " output: " cmdres))) - (if (> delta 2) - (debug:print-info 0 *default-log-port* "for line \"" inl "\"\n command: " cmd " took " delta " seconds to run with output:\n " res) - (debug:print-info 9 *default-log-port* "for line \"" inl "\"\n command: " cmd " took " delta " seconds to run with output:\n " res)) - (if (null? res) - "" - (string-intersperse res " ")))))) - (hash-table-set! res curr-section-name - (configf:assoc-safe-add alist - key - (case (calc-allow-system allow-system curr-section-name sections) - ((return-procs) val-proc) - ((return-string) cmd) - (else (val-proc))) - metadata: metapath)) - (loop (configf:read-line inp res (calc-allow-system allow-system curr-section-name sections) settings) curr-section-name #f #f)) - (loop (configf:read-line inp res - (calc-allow-system allow-system curr-section-name sections) - settings) - curr-section-name #f #f))) - - (configf:key-no-val ( x key val) - (let* ((alist (hash-table-ref/default res curr-section-name '())) - (fval (or (if (string? val) val #f) ""))) ;; fval should be either "" or " " (one or more spaces) - (debug:print 10 *default-log-port* " setting: [" curr-section-name "] " key " = #t") - (safe-setenv key fval) - (hash-table-set! res curr-section-name - (configf:assoc-safe-add alist key fval metadata: metapath)) - (loop (configf:read-line inp res - (calc-allow-system allow-system curr-section-name sections) - settings) - curr-section-name key #f))) - - (configf:key-val-pr ( x key unk1 val unk2 ) - (let* ((alist (hash-table-ref/default res curr-section-name '())) - (envar (and environ-patt - (string-search (regexp environ-patt) curr-section-name) ;; does the section match the envionpatt? - (and (not (string-null? key)) - (not (equal? "!" (substring key 0 1)))) ;; ! as leading character is a signature to NOT export to the environment - ;; (string-match "^.*:.*:.*$" key) ;; ;; something:something:something reserved for triggers in runconfigs - )) - (realval (if envar - (configf:eval-string-in-environment val) - val))) - (debug:print-info 6 *default-log-port* "read-config env setting, envar: " envar " realval: " realval " val: " val " key: " key " curr-section-name: " curr-section-name) - (if envar (safe-setenv key realval)) - (debug:print 10 *default-log-port* " setting: [" curr-section-name "] " key " = " val) - (hash-table-set! res curr-section-name - (configf:assoc-safe-add alist key realval metadata: metapath)) - (loop (configf:read-line inp res - (calc-allow-system allow-system curr-section-name sections) settings) - curr-section-name key #f))) - ;; if a continued line - (configf:cont-ln-rx ( x whsp val ) - (let ((alist (hash-table-ref/default res curr-section-name '()))) - (if var-flag ;; if set to a string then we have a continued var - (let ((newval (conc - (configf:lookup res curr-section-name var-flag) "\n" - ;; trim lead from the incoming whsp to support some indenting. - (if lead - (string-substitute (regexp lead) "" whsp) - "") - val))) - ;; (print "val: " val "\nnewval: \"" newval "\"\nvarflag: " var-flag) - (hash-table-set! res curr-section-name - (configf:assoc-safe-add alist var-flag newval metadata: metapath)) - (loop (configf:read-line inp res (calc-allow-system allow-system curr-section-name sections) settings) curr-section-name var-flag (if lead lead whsp))) - (loop (configf:read-line inp res (calc-allow-system allow-system curr-section-name sections) settings) curr-section-name #f #f)))) - (else (debug:print-error 0 *default-log-port* "problem parsing " path ",\n \"" inl "\"") - (set! var-flag #f) - (loop (configf:read-line inp res (calc-allow-system allow-system curr-section-name sections) settings) curr-section-name #f #f)))) - ) ;; end loop - ))) - -;; pathenvvar will set the named var to the path of the config -(define (find-and-read-config fname #!key (environ-patt #f)(given-toppath #f)(pathenvvar #f)) - (let* ((curr-dir (current-directory)) - (configinfo (find-config fname toppath: given-toppath)) - (toppath (car configinfo)) - (configfile (cadr configinfo)) - (set-fields (lambda (curr-section next-section ht path) - (let ((field-names (if ht (common:get-fields ht) '())) - (target (or (getenv "MT_TARGET")(args:get-arg "-reqtarg")(args:get-arg "-target")))) - (debug:print-info 9 *default-log-port* "set-fields with field-names=" field-names " target=" target " curr-section=" curr-section " next-section=" next-section " path=" path " ht=" ht) - (if (not (null? field-names))(keys:target-set-args field-names target #f)))))) - (if toppath (change-directory toppath)) - (if (and toppath pathenvvar)(setenv pathenvvar toppath)) - (let ((configdat (if configfile - (read-config configfile #f #t environ-patt: environ-patt post-section-procs: (list (cons "^fields$" set-fields)) #f)))) - (if toppath (change-directory curr-dir)) - (list configdat toppath configfile fname)))) - -;;====================================================================== -;; lookup and manipulation routines -;;====================================================================== - -;; (define (configf:assoc-safe-add alist key val #!key (metadata #f)) -;; (let ((newalist (filter (lambda (x)(not (equal? key (car x)))) alist))) -;; (append newalist (list (if metadata -;; (list key val metadata) -;; (list key val)))))) -;; -;; (define (configf:section-var-set! cfgdat section-name var value #!key (metadata #f)) -;; (hash-table-set! cfgdat section-name -;; (configf:assoc-safe-add -;; (hash-table-ref/default cfgdat section-name '()) -;; var value metadata: metadata))) -;; -;; (define (configf:lookup cfgdat section var) -;; (if (hash-table? cfgdat) -;; (let ((sectdat (hash-table-ref/default cfgdat section '()))) -;; (if (null? sectdat) -;; #f -;; (let ((match (assoc var sectdat))) -;; (if match ;; (and match (list? match)(> (length match) 1)) -;; (cadr match) -;; #f)) -;; )) -;; #f)) -;; -;; ;; use to have definitive setting: -;; ;; [foo] -;; ;; var yes -;; ;; -;; ;; (configf:var-is? cfgdat "foo" "var" "yes") => #t -;; ;; -;; (define (configf:var-is? cfgdat section var expected-val) -;; (equal? (configf:lookup cfgdat section var) expected-val)) -;; -;; (define config-lookup configf:lookup) -(define configf:read-file read-config) - -;; ;; safely look up a value that is expected to be a number, return -;; ;; a default (#f unless provided) -;; ;; -;; (define (configf:lookup-number cfdat section varname #!key (default #f)) -;; (let* ((val (configf:lookup *configdat* section varname)) -;; (res (if val -;; (string->number (string-substitute "\\s+" "" val #t)) -;; #f))) -;; (cond -;; (res res) -;; (val (debug:print 0 *default-log-port* "ERROR: no number found for [" section "], " varname ", got: " val)) -;; (else default)))) -;; -;; (define (configf:section-vars cfgdat section) -;; (let ((sectdat (hash-table-ref/default cfgdat section '()))) -;; (if (null? sectdat) -;; '() -;; (map car sectdat)))) -;; -;; (define (configf:get-section cfgdat section) -;; (hash-table-ref/default cfgdat section '())) -;; -;; (define (configf:set-section-var cfgdat section var val) -;; (let ((sectdat (configf:get-section cfgdat section))) -;; (hash-table-set! cfgdat section -;; (configf:assoc-safe-add sectdat var val)))) -;; -;; ;;(append (filter (lambda (x)(not (assoc var sectdat))) sectdat) -;; ;; (list var val)))) -;; -;;====================================================================== -;; setup -;;====================================================================== -;;====================================================================== - -(define (setup) - (let* ((configf (find-config "megatest.config")) - (config (if configf (read-config configf #f #t) #f))) - (if config - (setenv "RUN_AREA_HOME" (pathname-directory configf))) - config)) - -;;====================================================================== -;; Non destructive writing of config file -;;====================================================================== - -(define (configf:compress-multi-lines fdat) - ;; step 1.5 - compress any continued lines - (if (null? fdat) fdat - (let loop ((hed (car fdat)) - (tal (cdr fdat)) - (cur "") - (led #f) - (res '())) - ;; ALL WHITESPACE LEADING LINES ARE TACKED ON!! - ;; 1. remove led whitespace - ;; 2. tack on to hed with "\n" - (let ((match (string-match configf:cont-ln-rx hed))) - (if match ;; blast! have to deal with a multiline - (let* ((lead (cadr match)) - (lval (caddr match)) - (newl (conc cur "\n" lval))) - (if (not led)(set! led lead)) - (if (null? tal) - (set! fdat (append fdat (list newl))) - (loop (car tal)(cdr tal) newl led res))) ;; NB// not tacking newl onto res - (let ((newres (if led - (append res (list cur hed)) - (append res (list hed))))) - ;; prev was a multiline - (if (null? tal) - newres - (loop (car tal)(cdr tal) "" #f newres)))))))) - -;; note: I'm cheating a little here. I merely replace "\n" with "\n " -(define (configf:expand-multi-lines fdat) - ;; step 1.5 - compress any continued lines - (if (null? fdat) fdat - (let loop ((hed (car fdat)) - (tal (cdr fdat)) - (res '())) - (let ((newres (append res (list (string-substitute (regexp "\n") "\n " hed #t))))) - (if (null? tal) - newres - (loop (car tal)(cdr tal) newres)))))) - -(define (configf:file->list fname) - (if (common:file-exists? fname) - (let ((inp (open-input-file fname))) - (let loop ((inl (read-line inp)) - (res '())) - (if (eof-object? inl) - (begin - (close-input-port inp) - (reverse res)) - (loop (read-line inp)(cons inl res))))) - '())) - -;;====================================================================== -;; Write a config -;; 0. Given a refererence data structure "indat" -;; 1. Open the output file and read it into a list -;; 2. Flatten any multiline entries -;; 3. Modify values per contents of "indat" and remove absent values -;; 4. Append new values to the section (immediately after last legit entry) -;; 5. Write out the new list -;;====================================================================== - -(define (configf:write-config indat fname #!key (required-sections '())) - (let* (;; step 1: Open the output file and read it into a list - (fdat (configf:file->list fname)) - (refdat (make-hash-table)) - (sechash (make-hash-table)) ;; current section hash, init with hash for "default" section - (new #f) ;; put the line to be used in new, if it is to be deleted the set new to #f - (secname #f)) - - ;; step 2: Flatten multiline entries - (if (not (null? fdat))(set! fdat (configf:compress-multi-lines fdat))) - - ;; step 3: Modify values per contents of "indat" and remove absent values - (if (not (null? fdat)) - (let loop ((hed (car fdat)) - (tal (cadr fdat)) - (res '()) - (lnum 0)) - (regex-case - hed - (configf:comment-rx _ (set! res (append res (list hed)))) ;; (loop (read-line inp) curr-section-name #f #f)) - (configf:blank-l-rx _ (set! res (append res (list hed)))) ;; (loop (read-line inp) curr-section-name #f #f)) - (configf:section-rx ( x section-name ) (let ((section-hash (hash-table-ref/default refdat section-name #f))) - (if (not section-hash) - (let ((newhash (make-hash-table))) - (hash-table-set! refdat section-name newhash) ;; was refhash - not sure that refdat is correct here - (set! sechash newhash)) - (set! sechash section-hash)) - (set! new hed) ;; will append this at the bottom of the loop - (set! secname section-name) - )) - ;; No need to process key cmd, let it fall though to key val - (configf:key-val-pr ( x key val ) - (let ((newval (configf:lookup indat secname key))) ;; was sec, bug or correct? - ;; can handle newval == #f here => that means key is removed - (cond - ((equal? newval val) - (set! res (append res (list hed)))) - ((not newval) ;; key has been removed - (set! new #f)) - ((not (equal? newval val)) - (hash-table-set! sechash key newval) - (set! new (conc key " " newval))) - (else - (debug:print-error 0 *default-log-port* "problem parsing line number " lnum "\"" hed "\""))))) - (else - (debug:print-error 0 *default-log-port* "Problem parsing line num " lnum " :\n " hed ))) - (if (not (null? tal)) - (loop (car tal)(cdr tal)(if new (append res (list new)) res)(+ lnum 1))) - ;; drop to here when done processing, res contains modified list of lines - (set! fdat res))) - - ;; step 4: Append new values to the section - (for-each - (lambda (section) - (let ((sdat '()) ;; append needed bits here - (svars (configf:section-vars indat section))) - (for-each - (lambda (var) - (let ((val (configf:lookup refdat section var))) - (if (not val) ;; this one is new - (begin - (if (null? sdat)(set! sdat (list (conc "[" section "]")))) - (set! sdat (append sdat (list (conc var " " val)))))))) - svars) - (set! fdat (append fdat sdat)))) - (delete-duplicates (append required-sections (hash-table-keys indat)))) - - ;; step 5: Write out new file - (with-output-to-file fname - (lambda () - (for-each - (lambda (line) - (print line)) - (configf:expand-multi-lines fdat)))))) - -;;====================================================================== -;; refdb -;;====================================================================== - -;; reads a refdb into an assoc array of assoc arrays -;; returns (list dat msg) -(define (configf:read-refdb refdb-path) - (let ((sheets-file (conc refdb-path "/sheet-names.cfg"))) - (if (not (common:file-exists? sheets-file)) - (list #f (conc "ERROR: no refdb found at " refdb-path)) - (if (not (file-read-access? sheets-file)) - (list #f (conc "ERROR: refdb file not readable at " refdb-path)) - (let* ((sheets (with-input-from-file sheets-file - (lambda () - (let loop ((inl (read-line)) - (res '())) - (if (eof-object? inl) - (reverse res) - (loop (read-line)(cons inl res))))))) - (data '())) - (for-each - (lambda (sheet-name) - (let* ((dat-path (conc refdb-path "/" sheet-name ".dat")) - (ref-dat (configf:read-file dat-path #f #t)) - (ref-assoc (map (lambda (key) - (list key (hash-table-ref ref-dat key))) - (hash-table-keys ref-dat)))) - ;; (hash-table->alist ref-dat))) - ;; (set! data (append data (list (list sheet-name ref-assoc)))))) - (set! data (cons (list sheet-name ref-assoc) data)))) - sheets) - (list data "NO ERRORS")))))) - -;; map over all pairs in a three level hierarchial alist and apply a function to the keys/val -;; -(define (configf:map-all-hier-alist data proc #!key (initproc1 #f)(initproc2 #f)(initproc3 #f)) - (for-each - (lambda (sheetname) - (let* ((sheettmp (assoc sheetname data)) - (sheetdat (if sheettmp (cadr sheettmp) '()))) - (if initproc1 (initproc1 sheetname)) - (for-each - (lambda (sectionname) - (let* ((sectiontmp (assoc sectionname sheetdat)) - (sectiondat (if sectiontmp (cadr sectiontmp) '()))) - (if initproc2 (initproc2 sheetname sectionname)) - (for-each - (lambda (varname) - (let* ((valtmp (assoc varname sectiondat)) - (val (if valtmp (cadr valtmp) ""))) - (proc sheetname sectionname varname val))) - (map car sectiondat)))) - (map car sheetdat)))) - (map car data)) - data) - -;;====================================================================== -;; C O N F I G T O / F R O M A L I S T -;;====================================================================== - -(define (configf:config->alist cfgdat) - (hash-table->alist cfgdat)) - -(define (configf:alist->config adat) - (let ((ht (make-hash-table))) - (for-each - (lambda (section) - (hash-table-set! ht (car section)(cdr section))) - adat) - ht)) - ;; if (define (configf:read-alist fname) (handle-exceptions exn (begin @@ -829,21 +72,5 @@ #f)) #f)))) (common:faux-unlock fname) res)) -;; convert hierarchial list to ini format -;; -(define (configf:config->ini data) - (map - (lambda (section) - (let ((section-name (car section)) - (section-dat (cdr section))) - (print "\n[" section-name "]") - (map (lambda (dat-pair) - (let* ((var (car dat-pair)) - (val (cadr dat-pair)) - (fname (if (> (length dat-pair) 2)(caddr dat-pair) #f))) - (if fname (print "# " var "=>" fname)) - (print var " " val))) - section-dat))) ;; (print "section-dat: " section-dat)) - (hash-table->alist data))) Index: configfmod.scm ================================================================== --- configfmod.scm +++ configfmod.scm @@ -17,16 +17,18 @@ ;; along with Megatest. If not, see . ;;====================================================================== (declare (unit configfmod)) -(declare (uses commonmod)) +(declare (uses margsmod)) (module configfmod * (import scheme chicken data-structures extras files ports) +(import margsmod) + (use (prefix base64 base64:) (prefix dbi dbi:) (prefix sqlite3 sqlite3:) (srfi 18) @@ -46,76 +48,13 @@ typed-records directory-utils z3 ) -(import commonmod) - ;;====================================================================== ;; move debug stuff to separate module then put these back where they belong ;;====================================================================== - -(define (common:get-linktree) - (or (getenv "MT_LINKTREE") - (if *configdat* - (configf:lookup *configdat* "setup" "linktree") - #f) - (if (or *toppath* (getenv "MT_RUN_AREA_HOME")) - (conc (or *toppath* (getenv "MT_RUN_AREA_HOME")) "/lt") - #f) - (let* ((tp (common:get-toppath #f)) - (lt (conc tp "/lt"))) - (if (not tp)(debug:print 0 *default-log-port* "WARNING: cannot calculate best path for linktree, using " lt)) - lt))) - -(define (common:get-testsuite-name) - (or (configf:lookup *configdat* "setup" "area-name") ;; megatest is a flexible tool, testsuite is too limiting a description. - (configf:lookup *configdat* "setup" "testsuite" ) - (getenv "MT_TESTSUITE_NAME") - (pathname-file (or (if (string? *toppath* ) - (pathname-file *toppath*) - #f) - (common:get-toppath #f))) - "please-set-setup-area-name")) ;; (pathname-file (current-directory))))) - -(define (get-area-name configdat toppath #!optional (short #f)) - ;; look up my area name in areas table (future) - ;; generate auto name - (conc (get-area-path-signature toppath short) - "-" - (common:get-testsuite-name toppath configdat))) - -(define (common:get-db-tmp-area . junk) - (if *db-cache-path* - *db-cache-path* - (if *toppath* ;; common:get-create-writeable-dir - (handle-exceptions - exn - (begin - (debug:print-error 0 *default-log-port* "Couldn't create path to " *db-cache-path* ", exn=" exn) - (exit 1)) - (let* ((tsname (common:get-testsuite-name)) - (dbpath (common:get-create-writeable-dir - (list (conc "/tmp/" (current-user-name) - "/megatest_localdb/" - tsname "/" - (string-translate *toppath* "/" ".")) - (conc "/tmp/" (current-process-id) ;; just in case we have an issue with the dir by own user name - "/megatest_localdb/" - tsname - (string-translate *toppath* "/" ".")) - )))) - (set! *db-cache-path* dbpath) - dbpath)) - #f))) - -(define (common:get-sync-lock-filepath) - (let* ((tmp-area (common:get-db-tmp-area)) - (lockfile (conc tmp-area "/megatest.db.sync-lock"))) - lockfile)) - - ;;====================================================================== ;; lookup routines - replicated from configf ;;====================================================================== (define (configf:assoc-safe-add alist key val #!key (metadata #f)) @@ -128,22 +67,10 @@ (hash-table-set! cfgdat section-name (configf:assoc-safe-add (hash-table-ref/default cfgdat section-name '()) var value metadata: metadata))) -(define (configf:lookup cfgdat section var) - (if (hash-table? cfgdat) - (let ((sectdat (hash-table-ref/default cfgdat section '()))) - (if (null? sectdat) - #f - (let ((match (assoc var sectdat))) - (if match ;; (and match (list? match)(> (length match) 1)) - (cadr match) - #f)) - )) - #f)) - ;; use to have definitive setting: ;; [foo] ;; var yes ;; ;; (configf:var-is? cfgdat "foo" "var" "yes") => #t @@ -180,7 +107,798 @@ (let ((sectdat (configf:get-section cfgdat section))) (hash-table-set! cfgdat section (configf:assoc-safe-add sectdat var val)))) ;;======================================================================the end + +;; return list (path fullpath configname) +(define (find-config configname #!key (toppath #f)) + (if toppath + (let ((cfname (conc toppath "/" configname))) + (if (common:file-exists? cfname) + (list toppath cfname configname) + (list #f #f #f))) + (let* ((cwd (string-split (current-directory) "/"))) + (let loop ((dir cwd)) + (let* ((path (conc "/" (string-intersperse dir "/"))) + (fullpath (conc path "/" configname))) + (if (common:file-exists? fullpath) + (list path fullpath configname) + (let ((remcwd (take dir (- (length dir) 1)))) + (if (null? remcwd) + (list #f #f #f) ;; #f #f) + (loop remcwd))))))))) + +(define (configf:eval-string-in-environment str) + ;; (if (or (string-null? str) + ;; (equal? "!" (substring str 0 1))) ;; null string or starts with ! are preserved but NOT set in the environment + str + (handle-exceptions + exn + (begin + (debug:print-error 0 *default-log-port* "problem evaluating \"" str "\" in the shell environment, exn=" exn) + #f) + (let ((cmdres (process:cmd-run->list (conc "echo " str)))) + (if (null? cmdres) "" + (caar cmdres))))) ;; ) + +;;====================================================================== +;; Make the regexp's needed globally available +;;====================================================================== + +(define configf:include-rx (regexp "^\\[include\\s+(.*)\\]\\s*$")) +(define configf:script-rx (regexp "^\\[scriptinc\\s+(\\S+)([^\\]]*)\\]\\s*$")) ;; include output from a script +(define configf:section-rx (regexp "^\\[(.*)\\]\\s*$")) +(define configf:blank-l-rx (regexp "^\\s*$")) +(define configf:key-sys-pr (regexp "^(\\S+)\\s+\\[system\\s+(\\S+.*)\\]\\s*$")) +(define configf:key-val-pr (regexp "^(\\S+)(\\s+(.*)|())$")) +(define configf:key-no-val (regexp "^(\\S+)(\\s*)$")) +(define configf:comment-rx (regexp "^\\s*#.*")) +(define configf:cont-ln-rx (regexp "^(\\s+)(\\S+.*)$")) +(define configf:settings (regexp "^\\[configf:settings\\s+(\\S+)\\s+(\\S+)]\\s*$")) + +;; read a line and process any #{ ... } constructs + +(define configf:var-expand-regex (regexp "^(.*)#\\{(scheme|system|shell|getenv|get|runconfigs-get|rget|scm|sh|rp|gv|g|mtrah)\\s+([^\\}\\{]*)\\}(.*)")) + +(define (configf:system ht cmd) + (system cmd) + ) + +(define (configf:process-line l ht allow-system #!key (linenum #f)) + (let loop ((res l)) + (if (string? res) + (let ((matchdat (string-search configf:var-expand-regex res))) + (if matchdat + (let* ((prestr (list-ref matchdat 1)) + (cmdtype (list-ref matchdat 2)) ;; eval, system, shell, getenv + (cmd (list-ref matchdat 3)) + (poststr (list-ref matchdat 4)) + (result #f) + (start-time (current-seconds)) + (cmdsym (string->symbol cmdtype)) + (fullcmd (case cmdsym + ((scheme scm) (conc "(lambda (ht)" cmd ")")) + ((system) (conc "(lambda (ht)(configf:system ht \"" cmd "\"))")) + ((shell sh) (conc "(lambda (ht)(string-translate (shell \"" cmd "\") \"\n\" \" \"))")) + ((realpath rp)(conc "(lambda (ht)(common:nice-path \"" cmd "\"))")) + ((getenv gv) (conc "(lambda (ht)(get-environment-variable \"" cmd "\"))")) + ((mtrah) (conc "(lambda (ht)" + " (let ((extra \"" cmd "\"))" + " (conc (or *toppath* (get-environment-variable \"MT_RUN_AREA_HOME\"))" + " (if (string-null? extra) \"\" \"/\")" + " extra)))")) + ((get g) + (match (string-split cmd) + ((sect var)(conc "(lambda (ht)(configfmod#configf:lookup ht \"" sect "\" \"" var "\"))")) + (else + (debug:print-error 0 *default-log-port* "#{get ...} used with only one parameter, \"" cmd "\", two needed.") + "(lambda (ht) #f)"))) + ((runconfigs-get rget) (conc "(lambda (ht)(runconfigs-get ht \"" cmd "\"))")) + ;; ((rget) (conc "(lambda (ht)(runconfigs-get ht \"" cmd "\"))")) + (else "(lambda (ht)(print \"ERROR\") \"ERROR\")")))) + ;; (print "fullcmd=" fullcmd) + (handle-exceptions + exn + (begin + (debug:print 0 *default-log-port* "WARNING: failed to process config input \"" l "\", exn=" exn) + (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) + ;; (print "exn=" (condition->list exn)) + (set! result (conc "#{( " cmdtype ") " cmd "}, full expansion: " fullcmd))) + (if (or allow-system + (not (member cmdtype '("system" "shell" "sh")))) + (with-input-from-string fullcmd + (lambda () + (set! result ((eval (read)) ht)))) + (set! result (conc "#{(" cmdtype ") " cmd "}")))) + (case cmdsym + ((system shell scheme) + (let ((delta (- (current-seconds) start-time))) + (if (> delta 2) + (debug:print-info 0 *default-log-port* "for line \"" l "\"\n command: " cmd " took " delta " seconds to run with output:\n " result) + (debug:print-info 9 *default-log-port* "for line \"" l "\"\n command: " cmd " took " delta " seconds to run with output:\n " result))))) + (loop (conc prestr result poststr))) + res)) + res))) + +;; Run a shell command and return the output as a string +(define (shell cmd) + (let* ((output (process:cmd-run->list cmd)) + (res (car output)) + (status (cadr output))) + (if (equal? status 0) + (let ((outres (string-intersperse + res + "\n"))) + (debug:print-info 4 *default-log-port* "shell result:\n" outres) + outres) + (begin + (with-output-to-port (current-error-port) + (lambda () + (print "ERROR: " cmd " returned bad exit code " status))) + "")))) + +;; this was inline but I'm pretty sure that is a hold over from when it was *very* simple ... +;; +(define (configf:read-line p ht allow-processing settings) + (let loop ((inl (read-line p))) + (let ((cont-line (and (string? inl) + (not (string-null? inl)) + (equal? "\\" (string-take-right inl 1))))) + (if cont-line ;; last character is \ + (let ((nextl (read-line p))) + (if (not (eof-object? nextl)) + (loop (string-append (if cont-line + (string-take inl (- (string-length inl) 1)) + inl) + nextl)))) + (let ((res (case allow-processing ;; if (and allow-processing + ;; (not (eq? allow-processing 'return-string))) + ((#t #f) + (configf:process-line inl ht allow-processing)) + ((return-string) + inl) + (else + (configf:process-line inl ht allow-processing))))) + (if (and (string? res) ;; must set to "no" to force NOT trimming trailing spaces + (not (equal? (hash-table-ref/default settings "trim-trailing-spaces" "yes") "no"))) + (string-substitute "\\s+$" "" res) + res)))))) + +(define (configf:cfgdat->env-alist section cfgdat-ht allow-system) + (filter + (lambda (pair) + (let* ((var (car pair)) + (val (cdr pair))) + (cons var + (cond + ((and allow-system (procedure? val)) ;; if we decided to use something other than #t or #f for allow-system ('return-procs or 'return-string) , this may become problematic + (val)) + ((procedure? val) #f) + ((string? val) val) + (else "#f"))))) + (append + (hash-table-ref/default cfgdat-ht "default" '()) + (if (equal? section "default") '() (hash-table-ref/default cfgdat-ht section '()))))) + +(define (calc-allow-system allow-system section sections) + (if sections + (and (or (equal? "default" section) + (member section sections)) + allow-system) ;; account for sections and return allow-system as it might be a symbol such as return-strings + allow-system)) + +;; given a config hash and a section name, apply that section to all matching sections (using wildcard % or regex if /..../) +;; remove the section when done so that there is no downstream clobbering +;; +(define (configf:apply-wildcards ht section-name) + (if (hash-table-exists? ht section-name) + (let* ((vars (hash-table-ref ht section-name)) + (rxstr (if (string-contains section-name "%") + (string-substitute (regexp "%") ".*" section-name) + (string-substitute (regexp "^/(.*)/$") "\\1" section-name))) + (rx (regexp rxstr))) + ;; (print "\nsection-name: " section-name " rxstr: " rxstr) + (for-each + (lambda (section) + (if section + (let ((same-section (string=? section-name section)) + (rx-match (string-match rx section))) + ;; (print "section: " section " vars: " vars " same-section: " same-section " rx-match: " rx-match) + (if (and (not same-section) rx-match) + (for-each + (lambda (bundle) + ;; (print "bundle: " bundle) + (let ((key (car bundle)) + (val (cadr bundle)) + (meta (if (> (length bundle) 2)(caddr bundle) #f))) + (hash-table-set! ht section (configf:assoc-safe-add (hash-table-ref ht section) key val metadata: meta)))) + vars))))) + (hash-table-keys ht)))) + ht) + +;; read a config file, returns hash table of alists + +;; read a config file, returns hash table of alists +;; adds to ht if given (must be #f otherwise) +;; allow-system: +;; #f - do not evaluate [system +;; #t - immediately evaluate [system and store result as string +;; 'return-procs -- return a proc taking ht as an argument that may be evaulated at some future time +;; 'return-string -- return a string representing a proc taking ht as an argument that may be evaulated at some future time +;; envion-patt is a regex spec that identifies sections that will be eval'd +;; in the environment on the fly +;; sections: #f => get all, else list of sections to gather +;; post-section-procs alist of section-pattern => proc, where: (proc section-name next-section-name ht curr-path) +;; apply-wildcards: #t/#f - apply vars from targets with % wildcards to all matching sections +;; +(define (read-config path ht allow-system #!key (environ-patt #f) (curr-section #f) + (sections #f) (settings (make-hash-table)) (keep-filenames #f) + (post-section-procs '()) (apply-wildcards #t) ) + (debug:print 9 *default-log-port* "START: " path) +;; (if *configdat* +;; (common:save-pkt `((action . read-config) +;; (f . ,(cond ((string? path) path) +;; ((port? path) "port") +;; (else (conc path)))) +;; (T . configf)) +;; *configdat* #t add-only: #t)) + (if (and (not (port? path)) + (not (common:file-exists? path))) ;; for case where we are handed a port + (begin + (debug:print-info 1 *default-log-port* "read-config - file not found " path " current path: " (current-directory)) + ;; WARNING: This is a risky change but really, we should not return an empty hash table if no file read? + #f) ;; (if (not ht)(make-hash-table) ht)) + (let ((inp (if (string? path) + (open-input-file path) + path)) ;; we can be handed a port + (res (if (not ht)(make-hash-table) ht)) + (metapath (if (or (debug:debug-mode 9) + keep-filenames) + path #f)) + (process-wildcards (lambda (res curr-section-name) + (if (and apply-wildcards + (or (string-contains curr-section-name "%") ;; wildcard + (string-match "/.*/" curr-section-name))) ;; regex + (begin + (configf:apply-wildcards res curr-section-name) + (hash-table-delete! res curr-section-name)))))) ;; NOTE: if the section is a wild card it will be REMOVED from res + (let loop ((inl (configf:read-line inp res (calc-allow-system allow-system curr-section sections) settings)) ;; (read-line inp)) + (curr-section-name (if curr-section curr-section "default")) + (var-flag #f);; turn on for key-var-pr and cont-ln-rx, turn off elsewhere + (lead #f)) + (debug:print-info 8 *default-log-port* "curr-section-name: " curr-section-name " var-flag: " var-flag "\n inl: \"" inl "\"") + (if (eof-object? inl) + (begin + ;; process last section for wildcards + (process-wildcards res curr-section-name) + (if (string? path) ;; we received a path, not a port, thus we are responsible for closing it. + (close-input-port inp)) + (if (list? sections) ;; delete all sections except given when sections is provided + (for-each + (lambda (section) + (if (not (member section sections)) + (hash-table-delete! res section))) ;; we are using "" as a dumping ground and must remove it before returning the ht + (hash-table-keys res))) + (debug:print 9 *default-log-port* "END: " path) + res + ) ;; retval + (regex-case + inl + (configf:comment-rx _ (loop (configf:read-line inp res (calc-allow-system allow-system curr-section-name sections) settings) + curr-section-name #f #f)) + + (configf:blank-l-rx _ (loop (configf:read-line inp res (calc-allow-system allow-system curr-section-name sections) settings) + curr-section-name #f #f)) + (configf:settings ( x setting val ) + (begin + (hash-table-set! settings setting val) + (loop (configf:read-line inp res (calc-allow-system allow-system curr-section-name sections) settings) + curr-section-name #f #f))) + + (configf:include-rx ( x include-file ) + (let* ((curr-conf-dir (pathname-directory path)) + (full-conf (if (and (absolute-pathname? include-file) (file-exists? include-file)) + include-file + (common:nice-path + (conc (if curr-conf-dir + curr-conf-dir + ".") + "/" include-file))))) + (let ((all-matches (sort (handle-exceptions exn + (begin + (debug:print '(2 9) *default-log-port* "glob of " full-conf " gave no match. , exn=" exn) + (list)) + (glob full-conf)) string<=?))) + (if (null? all-matches) + (begin + (debug:print '(2 9) #f "INFO: include file(s) matching " include-file " not found (called from " path ")") + (debug:print 2 *default-log-port* " " full-conf)) + (for-each + (lambda (fpath) + ;; (push-directory conf-dir) + (debug:print 9 *default-log-port* "Including: " full-conf) + (read-config fpath res allow-system environ-patt: environ-patt + curr-section: curr-section-name sections: sections settings: settings + keep-filenames: keep-filenames)) + all-matches)) + (loop (configf:read-line inp res (calc-allow-system allow-system curr-section-name sections) settings) + curr-section-name #f #f)))) + (configf:script-rx ( x include-script params);; handle-exceptions + ;; exn + ;; (begin + ;; (debug:print '(0 2 9) #f "INFO: include from script " include-script " failed.") + ;; (loop (configf:read-line inp res (calc-allow-system allow-system curr-section-name sections) settings) curr-section-name #f #f)) + (if (and (common:file-exists? include-script)(file-execute-access? include-script)) + (let* ((local-allow-system (calc-allow-system allow-system curr-section-name sections)) + (env-delta (configf:cfgdat->env-alist curr-section-name res local-allow-system)) + (new-inp-port + (common:with-env-vars + env-delta + (lambda () + (open-input-pipe (conc include-script " " params)))))) + (debug:print '(2 9) *default-log-port* "Including from script output: " include-script) + ;; (print "We got here, calling read-config next. Port is: " new-inp-port) + (read-config new-inp-port res allow-system environ-patt: environ-patt curr-section: curr-section-name sections: sections settings: settings keep-filenames: keep-filenames) + (close-input-port new-inp-port) + (loop (configf:read-line inp res (calc-allow-system allow-system curr-section-name sections) settings) curr-section-name #f #f)) + (begin + (debug:print 0 *default-log-port* "Script not found or not exectutable: " include-script) + (loop (configf:read-line inp res (calc-allow-system allow-system curr-section-name sections) settings) curr-section-name #f #f))) + ) ;; ) + (configf:section-rx ( x section-name ) + (begin + ;; call post-section-procs + (for-each + (lambda (dat) + (let ((patt (car dat)) + (proc (cdr dat))) + (if (string-match patt curr-section-name) + (proc curr-section-name section-name res path)))) + post-section-procs) + ;; after gathering the vars for a section and if apply-wildcards is true and if there is a wildcard in the section name process wildcards + ;; NOTE: we are processing the curr-section-name, NOT section-name. + (process-wildcards res curr-section-name) + (if (not (hash-table-ref/default res section-name #f))(hash-table-set! res section-name '())) ;; ensure that mere mention of a section is not lost + (loop (configf:read-line inp res (calc-allow-system allow-system curr-section-name sections) settings) + ;; if we have the sections list then force all settings into "" and delete it later? + ;; (if (or (not sections) + ;; (member section-name sections)) + ;; section-name "") ;; stick everything into "". NOPE: We need new strategy. Put stuff in correct sections and then delete all sections later. + section-name + #f #f))) + (configf:key-sys-pr ( x key cmd ) + (if (calc-allow-system allow-system curr-section-name sections) + (let ((alist (hash-table-ref/default res curr-section-name '())) + (val-proc (lambda () + (let* ((start-time (current-seconds)) + (local-allow-system (calc-allow-system allow-system curr-section-name sections)) + (env-delta (configf:cfgdat->env-alist curr-section-name res local-allow-system)) + (cmdres (process:cmd-run->list cmd delta-env-alist-or-hash-table: env-delta)) ;; BB: here is where [system is exec'd. needs to have env from other vars! + (delta (- (current-seconds) start-time)) + (status (cadr cmdres)) + (res (car cmdres))) + (debug:print-info 4 *default-log-port* "" inl "\n => " (string-intersperse res "\n")) + (if (not (eq? status 0)) + (begin + (debug:print-error 0 *default-log-port* "problem with " inl ", return code " status + " output: " cmdres))) + (if (> delta 2) + (debug:print-info 0 *default-log-port* "for line \"" inl "\"\n command: " cmd " took " delta " seconds to run with output:\n " res) + (debug:print-info 9 *default-log-port* "for line \"" inl "\"\n command: " cmd " took " delta " seconds to run with output:\n " res)) + (if (null? res) + "" + (string-intersperse res " ")))))) + (hash-table-set! res curr-section-name + (configf:assoc-safe-add alist + key + (case (calc-allow-system allow-system curr-section-name sections) + ((return-procs) val-proc) + ((return-string) cmd) + (else (val-proc))) + metadata: metapath)) + (loop (configf:read-line inp res (calc-allow-system allow-system curr-section-name sections) settings) curr-section-name #f #f)) + (loop (configf:read-line inp res + (calc-allow-system allow-system curr-section-name sections) + settings) + curr-section-name #f #f))) + + (configf:key-no-val ( x key val) + (let* ((alist (hash-table-ref/default res curr-section-name '())) + (fval (or (if (string? val) val #f) ""))) ;; fval should be either "" or " " (one or more spaces) + (debug:print 10 *default-log-port* " setting: [" curr-section-name "] " key " = #t") + (safe-setenv key fval) + (hash-table-set! res curr-section-name + (configf:assoc-safe-add alist key fval metadata: metapath)) + (loop (configf:read-line inp res + (calc-allow-system allow-system curr-section-name sections) + settings) + curr-section-name key #f))) + + (configf:key-val-pr ( x key unk1 val unk2 ) + (let* ((alist (hash-table-ref/default res curr-section-name '())) + (envar (and environ-patt + (string-search (regexp environ-patt) curr-section-name) ;; does the section match the envionpatt? + (and (not (string-null? key)) + (not (equal? "!" (substring key 0 1)))) ;; ! as leading character is a signature to NOT export to the environment + ;; (string-match "^.*:.*:.*$" key) ;; ;; something:something:something reserved for triggers in runconfigs + )) + (realval (if envar + (configf:eval-string-in-environment val) + val))) + (debug:print-info 6 *default-log-port* "read-config env setting, envar: " envar " realval: " realval " val: " val " key: " key " curr-section-name: " curr-section-name) + (if envar (safe-setenv key realval)) + (debug:print 10 *default-log-port* " setting: [" curr-section-name "] " key " = " val) + (hash-table-set! res curr-section-name + (configf:assoc-safe-add alist key realval metadata: metapath)) + (loop (configf:read-line inp res + (calc-allow-system allow-system curr-section-name sections) settings) + curr-section-name key #f))) + ;; if a continued line + (configf:cont-ln-rx ( x whsp val ) + (let ((alist (hash-table-ref/default res curr-section-name '()))) + (if var-flag ;; if set to a string then we have a continued var + (let ((newval (conc + (configf:lookup res curr-section-name var-flag) "\n" + ;; trim lead from the incoming whsp to support some indenting. + (if lead + (string-substitute (regexp lead) "" whsp) + "") + val))) + ;; (print "val: " val "\nnewval: \"" newval "\"\nvarflag: " var-flag) + (hash-table-set! res curr-section-name + (configf:assoc-safe-add alist var-flag newval metadata: metapath)) + (loop (configf:read-line inp res (calc-allow-system allow-system curr-section-name sections) settings) curr-section-name var-flag (if lead lead whsp))) + (loop (configf:read-line inp res (calc-allow-system allow-system curr-section-name sections) settings) curr-section-name #f #f)))) + (else (debug:print-error 0 *default-log-port* "problem parsing " path ",\n \"" inl "\"") + (set! var-flag #f) + (loop (configf:read-line inp res (calc-allow-system allow-system curr-section-name sections) settings) curr-section-name #f #f)))) + ) ;; end loop + ))) + +;; pathenvvar will set the named var to the path of the config +(define (find-and-read-config fname #!key (environ-patt #f)(given-toppath #f)(pathenvvar #f)) + (let* ((curr-dir (current-directory)) + (configinfo (find-config fname toppath: given-toppath)) + (toppath (car configinfo)) + (configfile (cadr configinfo)) + (set-fields (lambda (curr-section next-section ht path) + (let ((field-names (if ht (common:get-fields ht) '())) + (target (or (getenv "MT_TARGET")(args:get-arg "-reqtarg")(args:get-arg "-target")))) + (debug:print-info 9 *default-log-port* "set-fields with field-names=" field-names " target=" target " curr-section=" curr-section " next-section=" next-section " path=" path " ht=" ht) + (if (not (null? field-names))(keys:target-set-args field-names target #f)))))) + (if toppath (change-directory toppath)) + (if (and toppath pathenvvar)(setenv pathenvvar toppath)) + (let ((configdat (if configfile + (read-config configfile #f #t environ-patt: environ-patt post-section-procs: (list (cons "^fields$" set-fields)) #f)))) + (if toppath (change-directory curr-dir)) + (list configdat toppath configfile fname)))) + +;;====================================================================== +;; lookup and manipulation routines +;;====================================================================== + +;; (define (configf:assoc-safe-add alist key val #!key (metadata #f)) +;; (let ((newalist (filter (lambda (x)(not (equal? key (car x)))) alist))) +;; (append newalist (list (if metadata +;; (list key val metadata) +;; (list key val)))))) +;; +;; (define (configf:section-var-set! cfgdat section-name var value #!key (metadata #f)) +;; (hash-table-set! cfgdat section-name +;; (configf:assoc-safe-add +;; (hash-table-ref/default cfgdat section-name '()) +;; var value metadata: metadata))) +;; +;; (define (configf:lookup cfgdat section var) +;; (if (hash-table? cfgdat) +;; (let ((sectdat (hash-table-ref/default cfgdat section '()))) +;; (if (null? sectdat) +;; #f +;; (let ((match (assoc var sectdat))) +;; (if match ;; (and match (list? match)(> (length match) 1)) +;; (cadr match) +;; #f)) +;; )) +;; #f)) +;; +;; ;; use to have definitive setting: +;; ;; [foo] +;; ;; var yes +;; ;; +;; ;; (configf:var-is? cfgdat "foo" "var" "yes") => #t +;; ;; +;; (define (configf:var-is? cfgdat section var expected-val) +;; (equal? (configf:lookup cfgdat section var) expected-val)) +;; +;; (define config-lookup configf:lookup) +(define configf:read-file read-config) + +;; ;; safely look up a value that is expected to be a number, return +;; ;; a default (#f unless provided) +;; ;; +;; (define (configf:lookup-number cfdat section varname #!key (default #f)) +;; (let* ((val (configf:lookup *configdat* section varname)) +;; (res (if val +;; (string->number (string-substitute "\\s+" "" val #t)) +;; #f))) +;; (cond +;; (res res) +;; (val (debug:print 0 *default-log-port* "ERROR: no number found for [" section "], " varname ", got: " val)) +;; (else default)))) +;; +;; (define (configf:section-vars cfgdat section) +;; (let ((sectdat (hash-table-ref/default cfgdat section '()))) +;; (if (null? sectdat) +;; '() +;; (map car sectdat)))) +;; +;; (define (configf:get-section cfgdat section) +;; (hash-table-ref/default cfgdat section '())) +;; +;; (define (configf:set-section-var cfgdat section var val) +;; (let ((sectdat (configf:get-section cfgdat section))) +;; (hash-table-set! cfgdat section +;; (configf:assoc-safe-add sectdat var val)))) +;; +;; ;;(append (filter (lambda (x)(not (assoc var sectdat))) sectdat) +;; ;; (list var val)))) +;; +;;====================================================================== +;; setup +;;====================================================================== +;;====================================================================== + +(define (setup) + (let* ((configf (find-config "megatest.config")) + (config (if configf (read-config configf #f #t) #f))) + (if config + (setenv "RUN_AREA_HOME" (pathname-directory configf))) + config)) + +;;====================================================================== +;; Non destructive writing of config file +;;====================================================================== + +(define (configf:compress-multi-lines fdat) + ;; step 1.5 - compress any continued lines + (if (null? fdat) fdat + (let loop ((hed (car fdat)) + (tal (cdr fdat)) + (cur "") + (led #f) + (res '())) + ;; ALL WHITESPACE LEADING LINES ARE TACKED ON!! + ;; 1. remove led whitespace + ;; 2. tack on to hed with "\n" + (let ((match (string-match configf:cont-ln-rx hed))) + (if match ;; blast! have to deal with a multiline + (let* ((lead (cadr match)) + (lval (caddr match)) + (newl (conc cur "\n" lval))) + (if (not led)(set! led lead)) + (if (null? tal) + (set! fdat (append fdat (list newl))) + (loop (car tal)(cdr tal) newl led res))) ;; NB// not tacking newl onto res + (let ((newres (if led + (append res (list cur hed)) + (append res (list hed))))) + ;; prev was a multiline + (if (null? tal) + newres + (loop (car tal)(cdr tal) "" #f newres)))))))) + +;; note: I'm cheating a little here. I merely replace "\n" with "\n " +(define (configf:expand-multi-lines fdat) + ;; step 1.5 - compress any continued lines + (if (null? fdat) fdat + (let loop ((hed (car fdat)) + (tal (cdr fdat)) + (res '())) + (let ((newres (append res (list (string-substitute (regexp "\n") "\n " hed #t))))) + (if (null? tal) + newres + (loop (car tal)(cdr tal) newres)))))) + +(define (configf:file->list fname) + (if (common:file-exists? fname) + (let ((inp (open-input-file fname))) + (let loop ((inl (read-line inp)) + (res '())) + (if (eof-object? inl) + (begin + (close-input-port inp) + (reverse res)) + (loop (read-line inp)(cons inl res))))) + '())) + +;;====================================================================== +;; Write a config +;; 0. Given a refererence data structure "indat" +;; 1. Open the output file and read it into a list +;; 2. Flatten any multiline entries +;; 3. Modify values per contents of "indat" and remove absent values +;; 4. Append new values to the section (immediately after last legit entry) +;; 5. Write out the new list +;;====================================================================== + +(define (configf:write-config indat fname #!key (required-sections '())) + (let* (;; step 1: Open the output file and read it into a list + (fdat (configf:file->list fname)) + (refdat (make-hash-table)) + (sechash (make-hash-table)) ;; current section hash, init with hash for "default" section + (new #f) ;; put the line to be used in new, if it is to be deleted the set new to #f + (secname #f)) + + ;; step 2: Flatten multiline entries + (if (not (null? fdat))(set! fdat (configf:compress-multi-lines fdat))) + + ;; step 3: Modify values per contents of "indat" and remove absent values + (if (not (null? fdat)) + (let loop ((hed (car fdat)) + (tal (cadr fdat)) + (res '()) + (lnum 0)) + (regex-case + hed + (configf:comment-rx _ (set! res (append res (list hed)))) ;; (loop (read-line inp) curr-section-name #f #f)) + (configf:blank-l-rx _ (set! res (append res (list hed)))) ;; (loop (read-line inp) curr-section-name #f #f)) + (configf:section-rx ( x section-name ) (let ((section-hash (hash-table-ref/default refdat section-name #f))) + (if (not section-hash) + (let ((newhash (make-hash-table))) + (hash-table-set! refdat section-name newhash) ;; was refhash - not sure that refdat is correct here + (set! sechash newhash)) + (set! sechash section-hash)) + (set! new hed) ;; will append this at the bottom of the loop + (set! secname section-name) + )) + ;; No need to process key cmd, let it fall though to key val + (configf:key-val-pr ( x key val ) + (let ((newval (configf:lookup indat secname key))) ;; was sec, bug or correct? + ;; can handle newval == #f here => that means key is removed + (cond + ((equal? newval val) + (set! res (append res (list hed)))) + ((not newval) ;; key has been removed + (set! new #f)) + ((not (equal? newval val)) + (hash-table-set! sechash key newval) + (set! new (conc key " " newval))) + (else + (debug:print-error 0 *default-log-port* "problem parsing line number " lnum "\"" hed "\""))))) + (else + (debug:print-error 0 *default-log-port* "Problem parsing line num " lnum " :\n " hed ))) + (if (not (null? tal)) + (loop (car tal)(cdr tal)(if new (append res (list new)) res)(+ lnum 1))) + ;; drop to here when done processing, res contains modified list of lines + (set! fdat res))) + + ;; step 4: Append new values to the section + (for-each + (lambda (section) + (let ((sdat '()) ;; append needed bits here + (svars (configf:section-vars indat section))) + (for-each + (lambda (var) + (let ((val (configf:lookup refdat section var))) + (if (not val) ;; this one is new + (begin + (if (null? sdat)(set! sdat (list (conc "[" section "]")))) + (set! sdat (append sdat (list (conc var " " val)))))))) + svars) + (set! fdat (append fdat sdat)))) + (delete-duplicates (append required-sections (hash-table-keys indat)))) + + ;; step 5: Write out new file + (with-output-to-file fname + (lambda () + (for-each + (lambda (line) + (print line)) + (configf:expand-multi-lines fdat)))))) + +;;====================================================================== +;; refdb +;;====================================================================== + +;; reads a refdb into an assoc array of assoc arrays +;; returns (list dat msg) +(define (configf:read-refdb refdb-path) + (let ((sheets-file (conc refdb-path "/sheet-names.cfg"))) + (if (not (common:file-exists? sheets-file)) + (list #f (conc "ERROR: no refdb found at " refdb-path)) + (if (not (file-read-access? sheets-file)) + (list #f (conc "ERROR: refdb file not readable at " refdb-path)) + (let* ((sheets (with-input-from-file sheets-file + (lambda () + (let loop ((inl (read-line)) + (res '())) + (if (eof-object? inl) + (reverse res) + (loop (read-line)(cons inl res))))))) + (data '())) + (for-each + (lambda (sheet-name) + (let* ((dat-path (conc refdb-path "/" sheet-name ".dat")) + (ref-dat (configf:read-file dat-path #f #t)) + (ref-assoc (map (lambda (key) + (list key (hash-table-ref ref-dat key))) + (hash-table-keys ref-dat)))) + ;; (hash-table->alist ref-dat))) + ;; (set! data (append data (list (list sheet-name ref-assoc)))))) + (set! data (cons (list sheet-name ref-assoc) data)))) + sheets) + (list data "NO ERRORS")))))) + +;; map over all pairs in a three level hierarchial alist and apply a function to the keys/val +;; +(define (configf:map-all-hier-alist data proc #!key (initproc1 #f)(initproc2 #f)(initproc3 #f)) + (for-each + (lambda (sheetname) + (let* ((sheettmp (assoc sheetname data)) + (sheetdat (if sheettmp (cadr sheettmp) '()))) + (if initproc1 (initproc1 sheetname)) + (for-each + (lambda (sectionname) + (let* ((sectiontmp (assoc sectionname sheetdat)) + (sectiondat (if sectiontmp (cadr sectiontmp) '()))) + (if initproc2 (initproc2 sheetname sectionname)) + (for-each + (lambda (varname) + (let* ((valtmp (assoc varname sectiondat)) + (val (if valtmp (cadr valtmp) ""))) + (proc sheetname sectionname varname val))) + (map car sectiondat)))) + (map car sheetdat)))) + (map car data)) + data) + +;;====================================================================== +;; C O N F I G T O / F R O M A L I S T +;;====================================================================== + +(define (configf:config->alist cfgdat) + (hash-table->alist cfgdat)) + +(define (configf:alist->config adat) + (let ((ht (make-hash-table))) + (for-each + (lambda (section) + (hash-table-set! ht (car section)(cdr section))) + adat) + ht)) + +;; convert hierarchial list to ini format +;; +(define (configf:config->ini data) + (map + (lambda (section) + (let ((section-name (car section)) + (section-dat (cdr section))) + (print "\n[" section-name "]") + (map (lambda (dat-pair) + (let* ((var (car dat-pair)) + (val (cadr dat-pair)) + (fname (if (> (length dat-pair) 2)(caddr dat-pair) #f))) + (if fname (print "# " var "=>" fname)) + (print var " " val))) + section-dat))) ;; (print "section-dat: " section-dat)) + (hash-table->alist data))) + +(define (tests:get-tests-search-path cfgdat) + (let ((paths (let ((section (if cfgdat + (configf:get-section cfgdat "tests-paths") + #f))) + (if section + (map cadr section) + '())))) + (filter (lambda (d) + (if (directory-exists? d) + d + (begin + ;; (if (common:low-noise-print 60 "tests:get-tests-search-path" d) + ;; (debug:print 0 *default-log-port* "WARNING: problem with directory " d ", dropping it from tests path")) + #f))) + (append paths (list (conc *toppath* "/tests")))))) + ) Index: dashboard-tests.scm ================================================================== --- dashboard-tests.scm +++ dashboard-tests.scm @@ -966,6 +966,21 @@ (dboard:commondat-curr-tab-num commondat))) (curr-updaters (hash-table-ref/default (dboard:commondat-updaters commondat) tnum '()))) (hash-table-set! (dboard:commondat-updaters commondat) tnum (cons updater curr-updaters)))) + + + +(define (common:run-a-command cmd #!key (with-vars #f) (with-orig-env #f)) + (let* ((pre-cmd (dtests:get-pre-command)) + (post-cmd (dtests:get-post-command)) + (fullcmd (if (or pre-cmd post-cmd) + (conc pre-cmd cmd post-cmd) + (conc "viewscreen " cmd)))) + (debug:print-info 02 *default-log-port* "Running command: " fullcmd) + (cond + (with-vars (common:without-vars fullcmd)) + (with-orig-env (common:with-orig-env fullcmd)) + (else (common:without-vars fullcmd "MT_.*"))))) + Index: db.scm ================================================================== --- db.scm +++ db.scm @@ -29,377 +29,29 @@ base64 format dot-locking z3 typed-records matchable) (import (prefix sqlite3 sqlite3:)) (import (prefix base64 base64:)) (declare (unit db)) -(declare (uses common)) -(declare (uses client)) -(declare (uses mt)) -(declare (uses margsmod)) -(import margsmod) - -(declare (uses commonmod)) -(import commonmod) - -(declare (uses configfmod)) -(import configfmod) - -(declare (uses dbmod)) -(import dbmod) - -(declare (uses servermod)) -(import servermod) - -(include "common_records.scm") -(include "db_records.scm") -(include "key_records.scm") -(include "run_records.scm") - -;; MUST RESOLVE mt:process-triggers before these can move to dbmod. - -;; set tests with state currstate and status currstatus to newstate and newstatus -;; use currstate = #f and or currstatus = #f to apply to any state or status respectively -;; WARNING: SQL injection risk. NB// See new but not yet used "faster" version below -;; -;; AND NOT (item_path='' AND testname in (SELECT DISTINCT testname FROM tests WHERE testname=? AND item_path != ''));"))) -;; (debug:print 0 *default-log-port* "QRY: " qry) -;; (db:delay-if-busy) -;; -;; NB// This call only operates on toplevel tests. Consider replacing it with more general call -;; -(define (db:set-tests-state-status dbstruct run-id testnames currstate currstatus newstate newstatus) - (let ((test-ids '())) - (for-each - (lambda (testname) - (let ((qry (conc "UPDATE tests SET state=?,status=? WHERE " - (if currstate (conc "state='" currstate "' AND ") "") - (if currstatus (conc "status='" currstatus "' AND ") "") - " run_id=? AND testname LIKE ?;")) - (test-id (db:get-test-id dbstruct run-id testname ""))) - (db:with-db - dbstruct - run-id - #t - (lambda (db) - (sqlite3:execute db qry - (or newstate currstate "NOT_STARTED") - (or newstatus currstate "UNKNOWN") - run-id testname))) - (if test-id - (begin - (set! test-ids (cons test-id test-ids)) - (mt:process-triggers dbstruct run-id test-id newstate newstatus))))) - testnames) - test-ids)) - -;; state is the priority rollup of all states -;; status is the priority rollup of all completed statesfu -;; -;; if test-name is an integer work off that instead of test-name test-path -;; -(define (db:set-state-status-and-roll-up-items dbstruct run-id test-name item-path state status comment) - ;; establish info on incoming test followed by info on top level test - ;; BBnote - for mode itemwait, linkage between upstream test & matching item status is propagated to run queue in db:prereqs-not-met - (let* ((testdat (if (number? test-name) - (db:get-test-info-by-id dbstruct run-id test-name) ;; test-name is actually a test-id - (db:get-test-info dbstruct run-id test-name item-path))) - (test-id (db:test-get-id testdat)) - (test-name (if (number? test-name) - (db:test-get-testname testdat) - test-name)) - (item-path (db:test-get-item-path testdat)) - (tl-testdat (db:get-test-info dbstruct run-id test-name "")) - (tl-test-id (if tl-testdat - (db:test-get-id tl-testdat) - #f))) - (if (member state '("LAUNCHED" "REMOTEHOSTSTART")) - (db:general-call dbstruct 'set-test-start-time (list test-id))) - (mutex-lock! *db-transaction-mutex*) - (db:with-db - dbstruct #f #f - (lambda (db) - (let ((tr-res - (sqlite3:with-transaction - db - (lambda () - ;; NB// Pass the db so it is part fo the transaction - (db:test-set-state-status db run-id test-id state status comment) ;; this call sets the item state/status - (if (not (equal? item-path "")) ;; only roll up IF incoming test is an item - (let* ((state-status-counts (db:get-all-state-status-counts-for-test dbstruct run-id test-name item-path state status)) ;; item-path is used to exclude current state/status of THIS test - (state-stauses (db:roll-up-rules state-status-counts state status)) - (newstate (car state-stauses)) - (newstatus (cadr state-stauses))) - (debug:print 4 *default-log-port* "BB> tl-test-id="tl-test-id" ; "test-name":"item-path" newstate="newstate" newstatus="newstatus" len(sscs)="(length state-status-counts) " state-status-counts: " - (apply conc - (map (lambda (x) - (conc - (with-output-to-string (lambda () (pp (dbr:counts->alist x)))) " | ")) - state-status-counts))); end debug:print - - (if tl-test-id - (db:test-set-state-status db run-id tl-test-id newstate newstatus #f)) ;; we are still in the transaction - must access the db and not the dbstruct - )))))) - (mutex-unlock! *db-transaction-mutex*) - (if (and test-id state status (equal? status "AUTO")) - (db:test-data-rollup dbstruct run-id test-id status)) - tr-res))))) - -;; ;; speed up for common cases with a little logic -;; ;; NB// Ultimately this will be deprecated in deference to mt:test-set-state-status-by-id -;; -;; NOTE: run-id is not used -;; ;; -(define (db:test-set-state-status dbstruct run-id test-id newstate newstatus newcomment) - (db:with-db - dbstruct - ;; run-id - #f - #t - (lambda (db) - (cond - ((and newstate newstatus newcomment) - (sqlite3:execute db "UPDATE tests SET state=?,status=?,comment=? WHERE id=?;" newstate newstatus newcomment ;; (sdb:qry 'getid newcomment) - test-id)) - ((and newstate newstatus) - (sqlite3:execute db "UPDATE tests SET state=?,status=? WHERE id=?;" newstate newstatus test-id)) - (else - (if newstate (sqlite3:execute db "UPDATE tests SET state=? WHERE id=?;" newstate test-id)) - (if newstatus (sqlite3:execute db "UPDATE tests SET status=? WHERE id=?;" newstatus test-id)) - (if newcomment (sqlite3:execute db "UPDATE tests SET comment=? WHERE id=?;" newcomment ;; (sdb:qry 'getid newcomment) - test-id)))))) - (mt:process-triggers dbstruct run-id test-id newstate newstatus)) - -(define (db:find-and-mark-incomplete dbstruct run-id ovr-deadtime) - (let* ((incompleted '()) - (oldlaunched '()) - (toplevels '()) - ;; The default running-deadtime is 720 seconds = 12 minutes. - ;; "(running-deadtime-default (+ server-start-allowance (* 2 launch-monitor-period)))" = 200 + (2 * (200 + 30 + 30)) - (deadtime-trim (or ovr-deadtime (configf:lookup-number *configdat* "setup" "deadtime"))) - (server-start-allowance 200) - (server-overloaded-budget 200) - (launch-monitor-off-time (or (configf:lookup-number *configdat* "setup" "test-stats-update-period") 30)) - (launch-monitor-on-time-budget 30) - (launch-monitor-period (+ launch-monitor-off-time launch-monitor-on-time-budget server-overloaded-budget)) - (remotehoststart-deadtime-default (+ server-start-allowance server-overloaded-budget 30)) - (remotehoststart-deadtime (or deadtime-trim remotehoststart-deadtime-default)) - (running-deadtime-default (+ server-start-allowance (* 2 launch-monitor-period))) - (running-deadtime (or deadtime-trim running-deadtime-default)) ;; two minutes (30 seconds between updates, this leaves 3x grace period) - ) - (debug:print-info 4 *default-log-port* "running-deadtime = " running-deadtime) - (debug:print-info 4 *default-log-port* "deadtime-trim = " deadtime-trim) - - (db:with-db - dbstruct #f #f - (lambda (db) - (let* ((stmth1 (db:get-cache-stmth - dbstruct db - "SELECT id,rundir,uname,testname,item_path,event_time,run_duration FROM tests - WHERE run_id=? AND (strftime('%s','now') - event_time) > (run_duration + ?) - AND state IN ('RUNNING');")) - (stmth2 (db:get-cache-stmth - dbstruct db - "SELECT id,rundir,uname,testname,item_path,event_time,run_duration FROM tests - WHERE run_id=? AND (strftime('%s','now') - event_time) > (run_duration + ?) - AND state IN ('REMOTEHOSTSTART');")) - (stmth3 (db:get-cache-stmth - dbstruct db - "SELECT id,rundir,uname,testname,item_path FROM tests - WHERE run_id=? AND (strftime('%s','now') - event_time) > 86400 - AND state IN ('LAUNCHED');"))) - ;; in RUNNING or REMOTEHOSTSTART for more than 10 minutes - ;; - ;; HOWEVER: this code in run:test seems to work fine - ;; (> (- (current-seconds)(+ (db:test-get-event_time testdat) - ;; (db:test-get-run_duration testdat))) - ;; 600) - ;; (db:delay-if-busy dbdat) - (sqlite3:for-each-row - (lambda (test-id run-dir uname testname item-path event-time run-duration) - (if (and (equal? uname "n/a") - (equal? item-path "")) ;; this is a toplevel test - ;; what to do with toplevel? call rollup? - (begin - (set! toplevels (cons (list test-id run-dir uname testname item-path run-id) toplevels)) - (debug:print-info 0 *default-log-port* "Found old toplevel test in RUNNING state, test-id=" test-id)) - (begin - (set! incompleted (cons (list test-id run-dir uname testname item-path run-id) incompleted)) - (debug:print-info 0 *default-log-port* "Found old test in RUNNING state, test-id=" - test-id" exceeded running-deadtime "running-deadtime" now="(current-seconds) - " event-time="event-time" run-duration="run-duration)))) - stmth1 - run-id running-deadtime) ;; default time 720 seconds - - (sqlite3:for-each-row - (lambda (test-id run-dir uname testname item-path event-time run-duration) - (if (and (equal? uname "n/a") - (equal? item-path "")) ;; this is a toplevel test - ;; what to do with toplevel? call rollup? - (begin - (set! toplevels (cons (list test-id run-dir uname testname item-path run-id) toplevels)) - (debug:print-info 0 *default-log-port* "Found old toplevel test in RUNNING state, test-id=" test-id)) - (begin - (debug:print-info 0 *default-log-port* "Found old test in REMOTEHOSTSTART state, test-id=" test-id - " exceeded running-deadtime "running-deadtime" now="(current-seconds)" event-time="event-time - " run-duration="run-duration) - (set! incompleted (cons (list test-id run-dir uname testname item-path run-id) incompleted))))) - stmth2 - run-id remotehoststart-deadtime) ;; default time 230 seconds - - ;; in LAUNCHED for more than one day. Could be long due to job queues TODO/BUG: Need override for this in config - ;; - ;; (db:delay-if-busy dbdat) - (sqlite3:for-each-row - (lambda (test-id run-dir uname testname item-path) - (if (and (equal? uname "n/a") - (equal? item-path "")) ;; this is a toplevel test - ;; what to do with toplevel? call rollup? - (set! toplevels (cons (list test-id run-dir uname testname item-path run-id) toplevels)) - (begin - (debug:print-info 0 *default-log-port* "Found old test in LAUNCHED state, test-id=" test-id - " 1 day since event_time marked") - (set! oldlaunched (cons (list test-id run-dir uname testname item-path run-id) oldlaunched))))) - stmth3 - run-id) - - (debug:print-info 18 *default-log-port* "Found " (length oldlaunched) " old LAUNCHED items, " - (length toplevels) " old LAUNCHED toplevel tests and " - (length incompleted) " tests marked RUNNING but apparently dead.")) - - ;; These are defunct tests, do not do all the overhead of set-state-status. Force them to INCOMPLETE. - ;; - ;; (db:delay-if-busy dbdat) - (let* ((min-incompleted-ids (map car incompleted)) ;; do 'em all - (all-ids (append min-incompleted-ids (map car oldlaunched)))) - (if (> (length all-ids) 0) - (begin - ;; (launch:is-test-alive "localhost" 435) - (debug:print 0 *default-log-port* "WARNING: Marking test(s); " (string-intersperse (map conc all-ids) ", ") - " as DEAD") - (for-each - (lambda (test-id) - (let* (;; (run-dir (db:test-get-rundir-from-test-id dbstruct run-id test-id)) - (tinfo (db:get-test-info-by-id dbstruct run-id test-id)) - (run-dir (db:test-get-rundir tinfo)) - (host (db:test-get-host tinfo)) - (pid (db:test-get-process_id tinfo)) - (result (db:get-status-from-final-status-file run-dir))) - (if (and (list? result) (> (length result) 1) (equal? "PASS" (cadr result)) (equal? "COMPLETED" (car result))) - (begin - (debug:print 0 *default-log-port* "INFO: test " test-id " actually passed, so marking PASS not DEAD") - (db:set-state-status-and-roll-up-items - dbstruct run-id test-id 'foo "COMPLETED" "PASS" - "Test stopped responding but it has PASSED; marking it PASS in the DB.")) - (let ((is-alive (and (not (eq? pid 0)) ;; 0 is default in re-used field "attemptnum" where pid stored. - (launch:is-test-alive host pid)))) - (if is-alive - (debug:print 0 *default-log-port* "INFO: test " test-id " on host " host - " has a process on pid " pid ", NOT setting to DEAD.") - (begin - (debug:print 0 *default-log-port* "INFO: test " test-id - " final state/status is not COMPLETED/PASS. It is " result) - (db:set-state-status-and-roll-up-items - dbstruct run-id test-id 'foo "COMPLETED" "DEAD" - "Test stopped responding while in RUNNING or REMOTEHOSTSTART; presumed dead."))))))) - ;; call end of eud of run detection for posthook - from merge, is it needed? - ;; (launch:end-of-run-check run-id) - all-ids) - - ;; MOVE TO rmt:find-and-mark-incomplete - for now always call launch:end-of-run-check after - ;; calling rmt:find-and-mark-incompletes - - ;;ALWAYS CALL after rmt:find-and-mark-incompletes - ;; call end of eud of run detection for posthook - ;; (launch:end-of-run-check run-id) - - ))))))) - -(define (db:test-set-state-status-process-triggers dbstruct run-id test-id newstate newstatus newcomment) - (db:test-set-state-status dbstruct run-id test-id newstate newstatus newcomment) - (mt:process-triggers dbstruct run-id test-id newstate newstatus)) - -;; options: -;; -;; 'killservers - kills all servers -;; 'dejunk - removes junk records -;; 'adj-testids - move test-ids into correct ranges -;; 'old2new - sync megatest.db to /tmp/.../megatest.db and /tmp/.../megatest_ref.db -;; 'new2old - sync /tmp/.../megatest.db to megatest.db and /tmp/.../megatest_ref.db (and update data_synced) -;; 'closeall - close all opened dbs -;; 'schema - attempt to apply schema changes -;; run-ids: '(1 2 3 ...) or #f (for all) -;; -(define (db:multi-db-sync dbstruct . options) - ;; (if (not (launch:setup)) - ;; (debug:print 0 *default-log-port* "ERROR: not able to setup up for megatest.") - (let* ((mtdb (dbr:dbstruct-mtdb dbstruct)) - (tmpdb (db:get-db dbstruct)) - (refndb (dbr:dbstruct-refndb dbstruct)) - (allow-cleanup #t) ;; (if run-ids #f #t)) - (servers (server:get-list *toppath*)) ;; (tasks:get-all-servers (db:delay-if-busy tdbdat))) - (data-synced 0)) ;; count of changed records (I hope) - - (for-each - (lambda (option) - - (case option - ;; kill servers - ((killservers) - (for-each - (lambda (server) - (handle-exceptions - exn - (begin - (debug:print-info 0 *default-log-port* "Unable to get host and/or port from " server ", exn=" exn) - #f) - (match-let (((mod-time host port start-time server-id pid) server)) - (if (and host pid) - (tasks:kill-server host pid))))) - servers) - - ;; /tmp/bjbarcla/megatest_localdb/fullrun/.nfs.pdx.disks.icf_env_disk001.bjbarcla.gwa.issues.mtdev.matt-bisect.megatest.ext-tests.runs.all.v1.65.1.6524.dev.bb-24-justrollup-f8.rollup.fullrun/megatest.db.lock - (delete-file* (common:get-sync-lock-filepath)) - ) - - ;; clear out junk records - ;; - ((dejunk) - ;; (db:delay-if-busy mtdb) ;; ok to delay on mtdb - (when (file-write-access? (db:dbdat-get-path mtdb)) (db:clean-up mtdb)) - (db:clean-up tmpdb) - (db:clean-up refndb)) - - ;; sync runs, test_meta etc. - ;; - ((old2new) - (set! data-synced - (+ (db:sync-tables (db:sync-all-tables-list dbstruct) #f mtdb tmpdb refndb) - data-synced))) - - ;; now ensure all newdb data are synced to megatest.db - ;; do not use the run-ids list passed in to the function - ;; - ((new2old) - (set! data-synced - (+ (db:sync-tables (db:sync-all-tables-list dbstruct) #f tmpdb refndb mtdb) - data-synced))) - - ((adj-target) - (db:adj-target (db:dbdat-get-db mtdb)) - (db:adj-target (db:dbdat-get-db tmpdb)) - (db:adj-target (db:dbdat-get-db refndb))) - - ((schema) - (db:patch-schema-maindb (db:dbdat-get-db mtdb)) - (db:patch-schema-maindb (db:dbdat-get-db tmpdb)) - (db:patch-schema-maindb (db:dbdat-get-db refndb)) - (db:patch-schema-rundb (db:dbdat-get-db mtdb)) - (db:patch-schema-rundb (db:dbdat-get-db tmpdb)) - (db:patch-schema-rundb (db:dbdat-get-db refndb)))) - - (stack-push! (dbr:dbstruct-dbstack dbstruct) tmpdb)) - options) - data-synced)) - -(define (db:get-access-mode) - (if (args:get-arg "-use-db-cache") 'cached 'rmt)) - +;; (declare (uses common)) +;; (declare (uses client)) +;; (declare (uses mt)) +;; (declare (uses margsmod)) +;; (import margsmod) +;; +;; (declare (uses commonmod)) +;; (import commonmod) +;; +;; (declare (uses configfmod)) +;; (import configfmod) +;; +;; (declare (uses dbmod)) +;; (import dbmod) +;; +;; (declare (uses servermod)) +;; (import servermod) +;; +;; (include "common_records.scm") +;; (include "db_records.scm") +;; (include "key_records.scm") +;; (include "run_records.scm") +;; +;; Index: dbmod.scm ================================================================== --- dbmod.scm +++ dbmod.scm @@ -20,17 +20,19 @@ (declare (unit dbmod)) (declare (uses commonmod)) (declare (uses ods)) (declare (uses configfmod)) +(declare (uses margsmod)) (module dbmod * (import commonmod) (import ods) (import configfmod) +(import margsmod) (import scheme chicken data-structures extras ports) (import (prefix base64 base64:) (prefix sqlite3 sqlite3:) @@ -50,10 +52,13 @@ srfi-1 srfi-13 stack z3 ) + +(import (prefix dbi dbi:) + pkts) ;;====================================================================== ;; R E C O R D S ;;====================================================================== @@ -4675,7 +4680,908 @@ (if (file-exists? dbfile) (file-write-access? dbfile) (file-write-access? *toppath*)))) ;;======================================================================the end + +;; state is the priority rollup of all states +;; status is the priority rollup of all completed statesfu +;; +;; if test-name is an integer work off that instead of test-name test-path +;; +(define (db:set-state-status-and-roll-up-items dbstruct run-id test-name item-path state status comment) + ;; establish info on incoming test followed by info on top level test + ;; BBnote - for mode itemwait, linkage between upstream test & matching item status is propagated to run queue in db:prereqs-not-met + (let* ((testdat (if (number? test-name) + (db:get-test-info-by-id dbstruct run-id test-name) ;; test-name is actually a test-id + (db:get-test-info dbstruct run-id test-name item-path))) + (test-id (db:test-get-id testdat)) + (test-name (if (number? test-name) + (db:test-get-testname testdat) + test-name)) + (item-path (db:test-get-item-path testdat)) + (tl-testdat (db:get-test-info dbstruct run-id test-name "")) + (tl-test-id (if tl-testdat + (db:test-get-id tl-testdat) + #f))) + (if (member state '("LAUNCHED" "REMOTEHOSTSTART")) + (db:general-call dbstruct 'set-test-start-time (list test-id))) + (mutex-lock! *db-transaction-mutex*) + (db:with-db + dbstruct #f #f + (lambda (db) + (let ((tr-res + (sqlite3:with-transaction + db + (lambda () + ;; NB// Pass the db so it is part fo the transaction + (db:test-set-state-status db run-id test-id state status comment) ;; this call sets the item state/status + (if (not (equal? item-path "")) ;; only roll up IF incoming test is an item + (let* ((state-status-counts (db:get-all-state-status-counts-for-test dbstruct run-id test-name item-path state status)) ;; item-path is used to exclude current state/status of THIS test + (state-stauses (db:roll-up-rules state-status-counts state status)) + (newstate (car state-stauses)) + (newstatus (cadr state-stauses))) + (debug:print 4 *default-log-port* "BB> tl-test-id="tl-test-id" ; "test-name":"item-path" newstate="newstate" newstatus="newstatus" len(sscs)="(length state-status-counts) " state-status-counts: " + (apply conc + (map (lambda (x) + (conc + (with-output-to-string (lambda () (pp (dbr:counts->alist x)))) " | ")) + state-status-counts))); end debug:print + + (if tl-test-id + (db:test-set-state-status db run-id tl-test-id newstate newstatus #f)) ;; we are still in the transaction - must access the db and not the dbstruct + )))))) + (mutex-unlock! *db-transaction-mutex*) + (if (and test-id state status (equal? status "AUTO")) + (db:test-data-rollup dbstruct run-id test-id status)) + tr-res))))) + +;; ;; speed up for common cases with a little logic +;; ;; NB// Ultimately this will be deprecated in deference to mt:test-set-state-status-by-id +;; +;; NOTE: run-id is not used +;; ;; +(define (db:test-set-state-status dbstruct run-id test-id newstate newstatus newcomment) + (db:with-db + dbstruct + ;; run-id + #f + #t + (lambda (db) + (cond + ((and newstate newstatus newcomment) + (sqlite3:execute db "UPDATE tests SET state=?,status=?,comment=? WHERE id=?;" newstate newstatus newcomment ;; (sdb:qry 'getid newcomment) + test-id)) + ((and newstate newstatus) + (sqlite3:execute db "UPDATE tests SET state=?,status=? WHERE id=?;" newstate newstatus test-id)) + (else + (if newstate (sqlite3:execute db "UPDATE tests SET state=? WHERE id=?;" newstate test-id)) + (if newstatus (sqlite3:execute db "UPDATE tests SET status=? WHERE id=?;" newstatus test-id)) + (if newcomment (sqlite3:execute db "UPDATE tests SET comment=? WHERE id=?;" newcomment ;; (sdb:qry 'getid newcomment) + test-id)))))) + (mt:process-triggers dbstruct run-id test-id newstate newstatus)) + +(define (db:find-and-mark-incomplete dbstruct run-id ovr-deadtime) + (let* ((incompleted '()) + (oldlaunched '()) + (toplevels '()) + ;; The default running-deadtime is 720 seconds = 12 minutes. + ;; "(running-deadtime-default (+ server-start-allowance (* 2 launch-monitor-period)))" = 200 + (2 * (200 + 30 + 30)) + (deadtime-trim (or ovr-deadtime (configf:lookup-number *configdat* "setup" "deadtime"))) + (server-start-allowance 200) + (server-overloaded-budget 200) + (launch-monitor-off-time (or (configf:lookup-number *configdat* "setup" "test-stats-update-period") 30)) + (launch-monitor-on-time-budget 30) + (launch-monitor-period (+ launch-monitor-off-time launch-monitor-on-time-budget server-overloaded-budget)) + (remotehoststart-deadtime-default (+ server-start-allowance server-overloaded-budget 30)) + (remotehoststart-deadtime (or deadtime-trim remotehoststart-deadtime-default)) + (running-deadtime-default (+ server-start-allowance (* 2 launch-monitor-period))) + (running-deadtime (or deadtime-trim running-deadtime-default)) ;; two minutes (30 seconds between updates, this leaves 3x grace period) + ) + (debug:print-info 4 *default-log-port* "running-deadtime = " running-deadtime) + (debug:print-info 4 *default-log-port* "deadtime-trim = " deadtime-trim) + + (db:with-db + dbstruct #f #f + (lambda (db) + (let* ((stmth1 (db:get-cache-stmth + dbstruct db + "SELECT id,rundir,uname,testname,item_path,event_time,run_duration FROM tests + WHERE run_id=? AND (strftime('%s','now') - event_time) > (run_duration + ?) + AND state IN ('RUNNING');")) + (stmth2 (db:get-cache-stmth + dbstruct db + "SELECT id,rundir,uname,testname,item_path,event_time,run_duration FROM tests + WHERE run_id=? AND (strftime('%s','now') - event_time) > (run_duration + ?) + AND state IN ('REMOTEHOSTSTART');")) + (stmth3 (db:get-cache-stmth + dbstruct db + "SELECT id,rundir,uname,testname,item_path FROM tests + WHERE run_id=? AND (strftime('%s','now') - event_time) > 86400 + AND state IN ('LAUNCHED');"))) + ;; in RUNNING or REMOTEHOSTSTART for more than 10 minutes + ;; + ;; HOWEVER: this code in run:test seems to work fine + ;; (> (- (current-seconds)(+ (db:test-get-event_time testdat) + ;; (db:test-get-run_duration testdat))) + ;; 600) + ;; (db:delay-if-busy dbdat) + (sqlite3:for-each-row + (lambda (test-id run-dir uname testname item-path event-time run-duration) + (if (and (equal? uname "n/a") + (equal? item-path "")) ;; this is a toplevel test + ;; what to do with toplevel? call rollup? + (begin + (set! toplevels (cons (list test-id run-dir uname testname item-path run-id) toplevels)) + (debug:print-info 0 *default-log-port* "Found old toplevel test in RUNNING state, test-id=" test-id)) + (begin + (set! incompleted (cons (list test-id run-dir uname testname item-path run-id) incompleted)) + (debug:print-info 0 *default-log-port* "Found old test in RUNNING state, test-id=" + test-id" exceeded running-deadtime "running-deadtime" now="(current-seconds) + " event-time="event-time" run-duration="run-duration)))) + stmth1 + run-id running-deadtime) ;; default time 720 seconds + + (sqlite3:for-each-row + (lambda (test-id run-dir uname testname item-path event-time run-duration) + (if (and (equal? uname "n/a") + (equal? item-path "")) ;; this is a toplevel test + ;; what to do with toplevel? call rollup? + (begin + (set! toplevels (cons (list test-id run-dir uname testname item-path run-id) toplevels)) + (debug:print-info 0 *default-log-port* "Found old toplevel test in RUNNING state, test-id=" test-id)) + (begin + (debug:print-info 0 *default-log-port* "Found old test in REMOTEHOSTSTART state, test-id=" test-id + " exceeded running-deadtime "running-deadtime" now="(current-seconds)" event-time="event-time + " run-duration="run-duration) + (set! incompleted (cons (list test-id run-dir uname testname item-path run-id) incompleted))))) + stmth2 + run-id remotehoststart-deadtime) ;; default time 230 seconds + + ;; in LAUNCHED for more than one day. Could be long due to job queues TODO/BUG: Need override for this in config + ;; + ;; (db:delay-if-busy dbdat) + (sqlite3:for-each-row + (lambda (test-id run-dir uname testname item-path) + (if (and (equal? uname "n/a") + (equal? item-path "")) ;; this is a toplevel test + ;; what to do with toplevel? call rollup? + (set! toplevels (cons (list test-id run-dir uname testname item-path run-id) toplevels)) + (begin + (debug:print-info 0 *default-log-port* "Found old test in LAUNCHED state, test-id=" test-id + " 1 day since event_time marked") + (set! oldlaunched (cons (list test-id run-dir uname testname item-path run-id) oldlaunched))))) + stmth3 + run-id) + + (debug:print-info 18 *default-log-port* "Found " (length oldlaunched) " old LAUNCHED items, " + (length toplevels) " old LAUNCHED toplevel tests and " + (length incompleted) " tests marked RUNNING but apparently dead.")) + + ;; These are defunct tests, do not do all the overhead of set-state-status. Force them to INCOMPLETE. + ;; + ;; (db:delay-if-busy dbdat) + (let* ((min-incompleted-ids (map car incompleted)) ;; do 'em all + (all-ids (append min-incompleted-ids (map car oldlaunched)))) + (if (> (length all-ids) 0) + (begin + ;; (launch:is-test-alive "localhost" 435) + (debug:print 0 *default-log-port* "WARNING: Marking test(s); " (string-intersperse (map conc all-ids) ", ") + " as DEAD") + (for-each + (lambda (test-id) + (let* (;; (run-dir (db:test-get-rundir-from-test-id dbstruct run-id test-id)) + (tinfo (db:get-test-info-by-id dbstruct run-id test-id)) + (run-dir (db:test-get-rundir tinfo)) + (host (db:test-get-host tinfo)) + (pid (db:test-get-process_id tinfo)) + (result (db:get-status-from-final-status-file run-dir))) + (if (and (list? result) (> (length result) 1) (equal? "PASS" (cadr result)) (equal? "COMPLETED" (car result))) + (begin + (debug:print 0 *default-log-port* "INFO: test " test-id " actually passed, so marking PASS not DEAD") + (db:set-state-status-and-roll-up-items + dbstruct run-id test-id 'foo "COMPLETED" "PASS" + "Test stopped responding but it has PASSED; marking it PASS in the DB.")) + (let ((is-alive (and (not (eq? pid 0)) ;; 0 is default in re-used field "attemptnum" where pid stored. + (launch:is-test-alive host pid)))) + (if is-alive + (debug:print 0 *default-log-port* "INFO: test " test-id " on host " host + " has a process on pid " pid ", NOT setting to DEAD.") + (begin + (debug:print 0 *default-log-port* "INFO: test " test-id + " final state/status is not COMPLETED/PASS. It is " result) + (db:set-state-status-and-roll-up-items + dbstruct run-id test-id 'foo "COMPLETED" "DEAD" + "Test stopped responding while in RUNNING or REMOTEHOSTSTART; presumed dead."))))))) + ;; call end of eud of run detection for posthook - from merge, is it needed? + ;; (launch:end-of-run-check run-id) + all-ids) + + ;; MOVE TO rmt:find-and-mark-incomplete - for now always call launch:end-of-run-check after + ;; calling rmt:find-and-mark-incompletes + + ;;ALWAYS CALL after rmt:find-and-mark-incompletes + ;; call end of eud of run detection for posthook + ;; (launch:end-of-run-check run-id) + + ))))))) + +(define (db:test-set-state-status-process-triggers dbstruct run-id test-id newstate newstatus newcomment) + (db:test-set-state-status dbstruct run-id test-id newstate newstatus newcomment) + (mt:process-triggers dbstruct run-id test-id newstate newstatus)) + +;; get a list of servers with all relevant data +;; ( mod-time host port start-time pid ) +;; +(define (server:get-list areapath #!key (limit #f)) + (let ((fname-rx (regexp "^(|.*/)server-(\\d+)-(\\S+).log$")) + (day-seconds (* 24 60 60))) + ;; if the directory exists continue to get the list + ;; otherwise attempt to create the logs dir and then + ;; continue + (if (if (directory-exists? (conc areapath "/logs")) + '() + (if (file-write-access? areapath) + (begin + (condition-case + (create-directory (conc areapath "/logs") #t) + (exn (i/o file)(debug:print 0 *default-log-port* "ERROR: Cannot create directory at " (conc areapath "/logs"))) + (exn ()(debug:print 0 *default-log-port* "ERROR: Unknown error attemtping to get server list. exn=" exn))) + (directory-exists? (conc areapath "/logs"))) + '())) + (let* ((server-logs (glob (conc areapath "/logs/server-[0-9]*.log"))) + (num-serv-logs (length server-logs))) + (if (null? server-logs) + '() + (let loop ((hed (car server-logs)) + (tal (cdr server-logs)) + (res '())) + (let* ((mod-time (handle-exceptions + exn + (begin + (debug:print 0 *default-log-port* "failed to get modification time on " hed ", exn=" exn) + (current-seconds)) ;; 0 + (file-modification-time hed))) ;; default to *very* old so log gets ignored if deleted + (down-time (- (current-seconds) mod-time)) + (serv-dat (if (or (< num-serv-logs 10) + (< down-time 900)) ;; day-seconds)) + (server:logf-get-start-info hed) + '())) ;; don't waste time processing server files not touched in the 15 minutes if there are more than ten servers to look at + (serv-rec (cons mod-time serv-dat)) + (fmatch (string-match fname-rx hed)) + (pid (if fmatch (string->number (list-ref fmatch 2)) #f)) + (new-res (if (null? serv-dat) + res + (cons (append serv-rec (list pid)) res)))) ;; any changes to number of elements in new-res will dirctly affect server:record->url,server:record->id,server:kill,server:get-num-alive which uses match let + (if (null? tal) + (if (and limit + (> (length new-res) limit)) + new-res ;; (take new-res limit) <= need intelligent sorting before this will work + new-res) + (loop (car tal)(cdr tal) new-res))))))))) + +;; options: +;; +;; 'killservers - kills all servers +;; 'dejunk - removes junk records +;; 'adj-testids - move test-ids into correct ranges +;; 'old2new - sync megatest.db to /tmp/.../megatest.db and /tmp/.../megatest_ref.db +;; 'new2old - sync /tmp/.../megatest.db to megatest.db and /tmp/.../megatest_ref.db (and update data_synced) +;; 'closeall - close all opened dbs +;; 'schema - attempt to apply schema changes +;; run-ids: '(1 2 3 ...) or #f (for all) +;; +(define (db:multi-db-sync dbstruct . options) + ;; (if (not (launch:setup)) + ;; (debug:print 0 *default-log-port* "ERROR: not able to setup up for megatest.") + (let* ((mtdb (dbr:dbstruct-mtdb dbstruct)) + (tmpdb (db:get-db dbstruct)) + (refndb (dbr:dbstruct-refndb dbstruct)) + (allow-cleanup #t) ;; (if run-ids #f #t)) + (servers (server:get-list *toppath*)) ;; (tasks:get-all-servers (db:delay-if-busy tdbdat))) + (data-synced 0)) ;; count of changed records (I hope) + + (for-each + (lambda (option) + + (case option + ;; kill servers + ((killservers) + (for-each + (lambda (server) + (handle-exceptions + exn + (begin + (debug:print-info 0 *default-log-port* "Unable to get host and/or port from " server ", exn=" exn) + #f) + (match-let (((mod-time host port start-time server-id pid) server)) + (if (and host pid) + (tasks:kill-server host pid))))) + servers) + + ;; /tmp/bjbarcla/megatest_localdb/fullrun/.nfs.pdx.disks.icf_env_disk001.bjbarcla.gwa.issues.mtdev.matt-bisect.megatest.ext-tests.runs.all.v1.65.1.6524.dev.bb-24-justrollup-f8.rollup.fullrun/megatest.db.lock + (delete-file* (common:get-sync-lock-filepath)) + ) + + ;; clear out junk records + ;; + ((dejunk) + ;; (db:delay-if-busy mtdb) ;; ok to delay on mtdb + (when (file-write-access? (db:dbdat-get-path mtdb)) (db:clean-up mtdb)) + (db:clean-up tmpdb) + (db:clean-up refndb)) + + ;; sync runs, test_meta etc. + ;; + ((old2new) + (set! data-synced + (+ (db:sync-tables (db:sync-all-tables-list dbstruct) #f mtdb tmpdb refndb) + data-synced))) + + ;; now ensure all newdb data are synced to megatest.db + ;; do not use the run-ids list passed in to the function + ;; + ((new2old) + (set! data-synced + (+ (db:sync-tables (db:sync-all-tables-list dbstruct) #f tmpdb refndb mtdb) + data-synced))) + + ((adj-target) + (db:adj-target (db:dbdat-get-db mtdb)) + (db:adj-target (db:dbdat-get-db tmpdb)) + (db:adj-target (db:dbdat-get-db refndb))) + + ((schema) + (db:patch-schema-maindb (db:dbdat-get-db mtdb)) + (db:patch-schema-maindb (db:dbdat-get-db tmpdb)) + (db:patch-schema-maindb (db:dbdat-get-db refndb)) + (db:patch-schema-rundb (db:dbdat-get-db mtdb)) + (db:patch-schema-rundb (db:dbdat-get-db tmpdb)) + (db:patch-schema-rundb (db:dbdat-get-db refndb)))) + + (stack-push! (dbr:dbstruct-dbstack dbstruct) tmpdb)) + options) + data-synced)) + +(define (db:get-access-mode) + (if (args:get-arg "-use-db-cache") 'cached 'rmt)) + +;; given a path to a server log return: host port startseconds +;; any changes to number of elements returned by this fuction will dirctly affect server:record->url,server:record->id,server:kill,server:get-num-alive which uses match let +;; +(define (server:logf-get-start-info logf) + (let ((rx (regexp "^SERVER STARTED: (\\S+):(\\d+) AT ([\\d\\.]+) server-id: (\\S+)"))) ;; SERVER STARTED: host:port AT timesecs server id + ;;(handle-exceptions + ;; exn + ;; (begin + ;; (debug:print-info 0 *default-log-port* "Unable to get server info from " logf ", exn=" exn) + ;; (list #f #f #f #f)) ;; no idea what went wrong, call it a bad server + (if (and (file-exists? logf) + (file-read-access? logf)) + (with-input-from-file + logf + (lambda () + (let loop ((inl (read-line)) + (lnum 0)) + (if (not (eof-object? inl)) + (let ((mlst (string-match rx inl))) + (if (not mlst) + (if (< lnum 500) ;; give up if more than 500 lines of server log read + (loop (read-line)(+ lnum 1)) + (begin + (debug:print-info 0 *default-log-port* "Unable to get server info from first 500 lines of " logf ) + (list #f #f #f #f))) + (let ((dat (cdr mlst))) + (list (car dat) ;; host + (string->number (cadr dat)) ;; port + (string->number (caddr dat)) + (cadr (cddr dat)))))) + (begin + (debug:print-info 0 *default-log-port* "Unable to get server info from " logf " at " (current-seconds)) + (list #f #f #f #f)))))) + (begin + (debug:print-info 0 *default-log-port* "Unable to get server info from " logf ", file not found or not readable.") + (list #f #f #f #f))))) + +;; cannot move to dbmod until lazy-read-testconfig is unravelled. +;; +(define (mt:process-triggers dbstruct run-id test-id newstate newstatus) + (if test-id + (let* ((test-dat (db:get-test-info-by-id dbstruct run-id test-id))) + (if test-dat + (let* ((test-rundir (db:test-get-rundir test-dat)) ;; ) ;; ) + (test-name (db:test-get-testname test-dat)) + (item-path (db:test-get-item-path test-dat)) + (duration (db:test-get-run_duration test-dat)) + (comment (db:test-get-comment test-dat)) + (event-time (db:test-get-event_time test-dat)) + (tconfig #f) + (state (if newstate newstate (db:test-get-state test-dat))) + (status (if newstatus newstatus (db:test-get-status test-dat)))) + ;; (mutex-lock! *triggers-mutex*) + (handle-exceptions + exn + (begin + (debug:print-error 0 *default-log-port* " Exception in mt:process-triggers for run-id="run-id" test-id="test-id" newstate="newstate" newstatus="newstatus + "\n error: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn + "\n test-rundir="test-rundir + "\n test-name="test-name + "\n item-path="item-path + "\n state="state + "\n status="status + "\n") + (print-call-chain (current-error-port)) + #f) + (if (and test-name + test-rundir) ;; #f means no dir set yet + ;; (common:file-exists? test-rundir) + ;; (directory? test-rundir)) + (call-with-environment-variables + (list (cons "MT_TEST_NAME" (or test-name "no such test")) + (cons "MT_TEST_RUN_DIR" (or test-rundir "no test directory yet")) + (cons "MT_ITEMPATH" (or item-path ""))) + (lambda () + (if (directory-exists? test-rundir) + (push-directory test-rundir) + (push-directory *toppath*)) + (set! tconfig (mt:lazy-read-test-config test-name)) + (for-each (lambda (trigger) + (let* ((munged-trigger (string-translate trigger "/ " "--")) + (logname (conc "last-trigger-" munged-trigger ".log"))) + ;; first any triggers from the testconfig + (let ((cmd (configf:lookup tconfig "triggers" trigger))) + (if cmd (mt:run-trigger cmd test-id test-rundir trigger (conc "tconfig-" logname) test-name item-path event-time state status))) + ;; next any triggers from megatest.config + (let ((cmd (configf:lookup *configdat* "triggers" trigger))) + (if cmd (mt:run-trigger cmd test-id test-rundir trigger (conc "mtconfig-" logname) test-name item-path event-time state status))))) + (list + (conc state "/" status) + (conc state "/") + (conc "/" status))) + (pop-directory)) + ))) + ;; (mutex-unlock! *triggers-mutex*) + ))))) + +(define (mt:lazy-read-test-config test-name) + (let ((tconf (hash-table-ref/default *testconfigs* test-name #f))) + (if tconf + tconf + (let ((test-dirs (tests:get-tests-search-path *configdat*))) + (let loop ((hed (car test-dirs)) + (tal (cdr test-dirs))) + ;; Setting MT_LINKTREE here is almost certainly unnecessary. + (let ((tconfig-file (conc hed "/" test-name "/testconfig"))) + (if (and (common:file-exists? tconfig-file) + (file-read-access? tconfig-file)) + (let ((link-tree-path (common:get-linktree)) ;; (configf:lookup *configdat* "setup" "linktree")) + (old-link-tree (get-environment-variable "MT_LINKTREE"))) + (if link-tree-path (setenv "MT_LINKTREE" link-tree-path)) + (let ((newtcfg (read-config tconfig-file #f #f))) ;; NOTE: Does NOT run [system ...] + (hash-table-set! *testconfigs* test-name newtcfg) + (if old-link-tree + (setenv "MT_LINKTREE" old-link-tree) + (unsetenv "MT_LINKTREE")) + newtcfg)) + (if (null? tal) + (begin + (debug:print-error 0 *default-log-port* "No readable testconfig found for " test-name) + #f) + (loop (car tal)(cdr tal)))))))))) + + +;; MUST RESOLVE mt:process-triggers before these can move to dbmod. + +;; set tests with state currstate and status currstatus to newstate and newstatus +;; use currstate = #f and or currstatus = #f to apply to any state or status respectively +;; WARNING: SQL injection risk. NB// See new but not yet used "faster" version below +;; +;; AND NOT (item_path='' AND testname in (SELECT DISTINCT testname FROM tests WHERE testname=? AND item_path != ''));"))) +;; (debug:print 0 *default-log-port* "QRY: " qry) +;; (db:delay-if-busy) +;; +;; NB// This call only operates on toplevel tests. Consider replacing it with more general call +;; +(define (db:set-tests-state-status dbstruct run-id testnames currstate currstatus newstate newstatus) + (let ((test-ids '())) + (for-each + (lambda (testname) + (let ((qry (conc "UPDATE tests SET state=?,status=? WHERE " + (if currstate (conc "state='" currstate "' AND ") "") + (if currstatus (conc "status='" currstatus "' AND ") "") + " run_id=? AND testname LIKE ?;")) + (test-id (db:get-test-id dbstruct run-id testname ""))) + (db:with-db + dbstruct + run-id + #t + (lambda (db) + (sqlite3:execute db qry + (or newstate currstate "NOT_STARTED") + (or newstatus currstate "UNKNOWN") + run-id testname))) + (if test-id + (begin + (set! test-ids (cons test-id test-ids)) + (mt:process-triggers dbstruct run-id test-id newstate newstatus))))) + testnames) + test-ids)) + + +;;====================================================================== +;; TODO: for multiple areas, we will have multiple watchdogs; and multiple threads to manage +(define (common:watchdog) + (debug:print-info 13 *default-log-port* "common:watchdog entered.") + (if (launch:setup) + (if (common:on-homehost?) + (let ((dbstruct (db:setup #t))) + (debug:print-info 13 *default-log-port* "after db:setup with dbstruct=" dbstruct) + (cond + ((dbr:dbstruct-read-only dbstruct) + (debug:print-info 13 *default-log-port* "loading read-only watchdog") + (common:readonly-watchdog dbstruct)) + (else + (debug:print-info 13 *default-log-port* "loading writable-watchdog.") + (let* ((syncer (or (configf:lookup *configdat* "server" "sync-method") "brute-force-sync"))) + (cond + ((equal? syncer "brute-force-sync") + (server:writable-watchdog-bruteforce dbstruct)) + ((equal? syncer "delta-sync") + (server:writable-watchdog-deltasync dbstruct)) + (else + (debug:print-error 0 *default-log-port* "Unknown server/sync-method specified ("syncer") - valid values are brute-force-sync and delta-sync.") + (exit 1))) + ;;(debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] Syncer started (method="syncer")") + ))) + (debug:print-info 13 *default-log-port* "watchdog done.")) + (debug:print-info 13 *default-log-port* "no need for watchdog on non-homehost")))) + +;;====================================================================== +;; Move me elsewhere ... +;; RADT => Why do we meed the version check here, this is called only if version misma +;; +(define (common:cleanup-db dbstruct #!key (full #f)) + (apply db:multi-db-sync + dbstruct + 'schema + ;; 'new2old + 'killservers + 'adj-target + ;; 'old2new + 'new2old + ;; (if full + '(dejunk) + ;; '()) + ) + (if (common:api-changed?) + (common:set-last-run-version))) + +;;====================================================================== +;; Force a megatest cleanup-db if version is changed and skip-version-check not specified +;; Do NOT check if not on homehost! +;; +(define (common:exit-on-version-changed) + (if (common:on-homehost?) + (if (common:api-changed?) + (let* ((mtconf (conc (get-environment-variable "MT_RUN_AREA_HOME") "/megatest.config")) + (dbfile (conc (get-environment-variable "MT_RUN_AREA_HOME") "/megatest.db")) + (read-only (not (file-write-access? dbfile))) + (dbstruct (db:setup #t))) + (debug:print 0 *default-log-port* + "WARNING: Version mismatch!\n" + " expected: " (common:version-signature) "\n" + " got: " (common:get-last-run-version)) + (cond + ((get-environment-variable "MT_SKIP_DB_MIGRATE") #t) + ((and (common:file-exists? mtconf) (common:file-exists? dbfile) (not read-only) + (eq? (current-user-id)(file-owner mtconf))) ;; safe to run -cleanup-db + (debug:print 0 *default-log-port* " I see you are the owner of megatest.config, attempting to cleanup and reset to new version") + (handle-exceptions + exn + (begin + (debug:print 0 *default-log-port* "Failed to switch versions. exn=" exn) + (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) + (print-call-chain (current-error-port)) + (exit 1)) + (common:cleanup-db dbstruct))) + ((not (common:file-exists? mtconf)) + (debug:print 0 *default-log-port* " megatest.config does not exist in this area. Cannot proceed with megatest version migration.") + (exit 1)) + ((not (common:file-exists? dbfile)) + (debug:print 0 *default-log-port* " megatest.db does not exist in this area. Cannot proceed with megatest version migration.") + (exit 1)) + ((not (eq? (current-user-id)(file-owner mtconf))) + (debug:print 0 *default-log-port* " You do not own megatest.db in this area. Cannot proceed with megatest version migration.") + (exit 1)) + (read-only + (debug:print 0 *default-log-port* " You have read-only access to this area. Cannot proceed with megatest version migration.") + (exit 1)) + (else + (debug:print 0 *default-log-port* " to switch versions you can run: \"megatest -cleanup-db\"") + (exit 1))))))) +;;====================================================================== +;; (begin +;; (debug:print 0 *default-log-port* "ERROR: cannot migrate version unless on homehost. Exiting.") +;; (exit 1)))) + +;;====================================================================== +;; currently the primary job of the watchdog is to run the sync back to megatest.db from the db in /tmp +;; if we are on the homehost and we are a server (by definition we are on the homehost if we are a server) +;; +(define (common:readonly-watchdog dbstruct) + (thread-sleep! 0.05) ;; delay for startup + (debug:print-info 13 *default-log-port* "common:readonly-watchdog entered.") + ;; sync megatest.db to /tmp/.../megatst.db + (let* ((sync-cool-off-duration 3) + (golden-mtdb (dbr:dbstruct-mtdb dbstruct)) + (golden-mtpath (db:dbdat-get-path golden-mtdb)) + (tmp-mtdb (dbr:dbstruct-tmpdb dbstruct)) + (tmp-mtpath (db:dbdat-get-path tmp-mtdb))) + (debug:print-info 0 *default-log-port* "Read-only periodic sync thread started.") + (let loop ((last-sync-time 0)) + (debug:print-info 13 *default-log-port* "loop top tmp-mtpath="tmp-mtpath" golden-mtpath="golden-mtpath) + (let* ((duration-since-last-sync (- (current-seconds) last-sync-time))) + (debug:print-info 13 *default-log-port* "duration-since-last-sync="duration-since-last-sync) + (if (and (not *time-to-exit*) + (< duration-since-last-sync sync-cool-off-duration)) + (thread-sleep! (- sync-cool-off-duration duration-since-last-sync))) + (if (not *time-to-exit*) + (let ((golden-mtdb-mtime (file-modification-time golden-mtpath)) + (tmp-mtdb-mtime (file-modification-time tmp-mtpath))) + (if (> golden-mtdb-mtime tmp-mtdb-mtime) + (if (< golden-mtdb-mtime (- (current-seconds) 3)) ;; file has NOT been touched in past three seconds, this way multiple servers won't fight to sync back + (let ((res (db:multi-db-sync dbstruct 'old2new))) + (debug:print-info 13 *default-log-port* "rosync called, " res " records transferred.")))) + (loop (current-seconds))) + #t))) + (debug:print-info 0 *default-log-port* "Exiting readonly-watchdog timer, *time-to-exit* = " *time-to-exit*" pid="(current-process-id)" mtpath="golden-mtpath))) + +;;====================================================================== +;; first read ~/views.config if it exists, then read $MTRAH/views.config if it exists +;; +(define (common:load-views-config) + (let* ((view-cfgdat (make-hash-table)) + (home-cfgfile (conc (get-environment-variable "HOME") "/.mtviews.config")) + (mthome-cfgfile (conc *toppath* "/.mtviews.config"))) + (if (common:file-exists? mthome-cfgfile) + (read-config mthome-cfgfile view-cfgdat #t)) + ;; we load the home dir file AFTER the MTRAH file so the user can clobber settings when running the dashboard in read-only areas + (if (common:file-exists? home-cfgfile) + (read-config home-cfgfile view-cfgdat #t)) + view-cfgdat)) + +;;====================================================================== +;; T A R G E T S , S T A T E , S T A T U S , +;; R U N N A M E A N D T E S T P A T T +;;====================================================================== + +;;====================================================================== +;; (map print (map car (hash-table->alist (read-config "runconfigs.config" #f #t)))) +;; +(define (common:get-runconfig-targets #!key (configf #f)) + (let ((targs (sort (map car (hash-table->alist + (or configf ;; NOTE: There is no value in using runconfig:read here. + (read-config (conc *toppath* "/runconfigs.config") + #f #t) + (make-hash-table)))) + string db version +;; negative number if megatest version < db version +(define (common:version-db-delta) + (- megatest-version (common:get-last-run-version-number))) + +(define (common:version-changed?) + (not (equal? (common:get-last-run-version) + (common:version-signature)))) + +(define (common:api-changed?) + (not (equal? (substring (->string megatest-version) 0 4) + (substring (conc (common:get-last-run-version)) 0 4)))) + +(define (server:writable-watchdog-bruteforce dbstruct) + (thread-sleep! 1) ;; delay for startup + (let* ((do-a-sync (server:get-bruteforce-syncer dbstruct)) + (final-sync (server:get-bruteforce-syncer dbstruct fork-to-background: #t persist-until-sync: #t))) + (when (and (not (args:get-arg "-sync-to-megatest.db")) ;; conditions under which we do not run the sync + (args:get-arg "-server")) + + (let loop () + (do-a-sync) + (if (not *time-to-exit*) (loop))) ;; keep going unless time to exit + + ;; time to exit, close the no-sync db here + (final-sync) + + (if (common:low-noise-print 30) + (debug:print-info 0 *default-log-port* "Exiting watchdog timer, *time-to-exit* = " *time-to-exit*" pid="(current-process-id) + ))))) + +(define (server:writable-watchdog-deltasync dbstruct) + (thread-sleep! 0.05) ;; delay for startup + (let ((legacy-sync (common:run-sync?)) + (sync-stale-seconds (configf:lookup-number *configdat* "server" "sync-stale-seconds" default: 300)) + (debug-mode (debug:debug-mode 1)) + (last-time (current-seconds)) + (no-sync-db (db:open-no-sync-db)) + (stmt-cache (dbr:dbstruct-stmt-cache dbstruct)) + (sync-duration 0) ;; run time of the sync in milliseconds + ) + (set! *no-sync-db* no-sync-db) ;; make the no sync db available to api calls + (debug:print-info 2 *default-log-port* "Periodic sync thread started.") + (debug:print-info 3 *default-log-port* "watchdog starting. legacy-sync is " legacy-sync" pid="(current-process-id) );; " this-wd-num="this-wd-num) + (if (and legacy-sync (not *time-to-exit*)) + (let* (;;(dbstruct (db:setup)) + (mtdb (dbr:dbstruct-mtdb dbstruct)) + (mtpath (db:dbdat-get-path mtdb)) + (tmp-area (common:get-db-tmp-area)) + (start-file (conc tmp-area "/.start-sync")) + (end-file (conc tmp-area "/.end-sync"))) + (debug:print-info 0 *default-log-port* "Server running, periodic sync started.") + (let loop () + ;; sync for filesystem local db writes + ;; + (mutex-lock! *db-multi-sync-mutex*) + (let* ((need-sync (>= *db-last-access* *db-last-sync*)) ;; no sync since last write + (sync-in-progress *db-sync-in-progress*) + (min-intersync-delay (configf:lookup-number *configdat* "server" "minimum-intersync-delay" default: 5)) + (should-sync (and (not *time-to-exit*) + (> (- (current-seconds) *db-last-sync*) min-intersync-delay))) ;; sync every five seconds minimum, deprecated logic, can probably be removed + (start-time (current-seconds)) + (cpu-load-adj (alist-ref 'adj-proc-load (common:get-normalized-cpu-load #f))) + (mt-mod-time (file-modification-time mtpath)) + (last-sync-start (if (common:file-exists? start-file) + (file-modification-time start-file) + 0)) + (last-sync-end (if (common:file-exists? end-file) + (file-modification-time end-file) + 10)) + (sync-period (+ 3 (* cpu-load-adj 30))) ;; as adjusted load increases increase the sync period + (recently-synced (and (< (- start-time mt-mod-time) sync-period) ;; not useful if sync didn't modify megatest.db! + (< mt-mod-time last-sync-start))) + (sync-done (<= last-sync-start last-sync-end)) + (sync-stale (> start-time (+ last-sync-start sync-stale-seconds))) + (will-sync (and (not *time-to-exit*) ;; do not start a sync if we are in the process of exiting + (or need-sync should-sync) + (or sync-done sync-stale) + (not sync-in-progress) + (not recently-synced)))) + (debug:print-info 13 *default-log-port* "WD writable-watchdog top of loop. need-sync="need-sync" sync-in-progress=" sync-in-progress + " should-sync="should-sync" start-time="start-time" mt-mod-time="mt-mod-time" recently-synced="recently-synced" will-sync="will-sync + " sync-done=" sync-done " sync-period=" sync-period) + (if (and (> sync-period 5) + (common:low-noise-print 30 "sync-period")) + (debug:print-info 0 *default-log-port* "Increased sync period due to long sync times, sync took: " sync-period " seconds.")) + ;; (if recently-synced (debug:print-info 0 *default-log-port* "Skipping sync due to recently-synced flag=" recently-synced)) + ;; (debug:print-info 0 *default-log-port* "need-sync: " need-sync " sync-in-progress: " sync-in-progress " should-sync: " should-sync " will-sync: " will-sync) + (if will-sync (set! *db-sync-in-progress* #t)) + (mutex-unlock! *db-multi-sync-mutex*) + (if will-sync + (let (;; (max-sync-duration (configf:lookup-number *configdat* "server" "max-sync-duration")) ;; KEEPING THIS AVAILABLE BUT SHOULD NOT USE, I'M PRETTY SURE IT DOES NOT WORK! + (sync-start (current-milliseconds))) + (with-output-to-file start-file (lambda ()(print (current-process-id)))) + + ;; put lock here + + ;; (if (or (not max-sync-duration) + ;; (< sync-duration max-sync-duration)) ;; NOTE: db:sync-to-megatest.db keeps track of time of last sync and syncs incrementally + (let ((res (db:sync-to-megatest.db dbstruct no-sync-db: no-sync-db))) ;; did we sync any data? If so need to set the db touched flag to keep the server alive + (set! sync-duration (- (current-milliseconds) sync-start)) + (if (> res 0) ;; some records were transferred, keep the db alive + (begin + (mutex-lock! *heartbeat-mutex*) + (set! *db-last-access* (current-seconds)) + (mutex-unlock! *heartbeat-mutex*) + (debug:print-info 0 *default-log-port* "sync called, " res " records transferred.")) + (debug:print-info 2 *default-log-port* "sync called but zero records transferred"))))) +;; ;; TODO: factor this next routine out into a function +;; (with-input-from-pipe ;; this should not block other threads but need to verify this +;; (conc "megatest -sync-to-megatest.db -m testsuite:" (common:get-area-name) ":" *toppath*) +;; (lambda () +;; (let loop ((inl (read-line)) +;; (res #f)) +;; (if (eof-object? inl) +;; (begin +;; (set! sync-duration (- (current-milliseconds) sync-start)) +;; (cond +;; ((not res) +;; (debug:print 0 *default-log-port* "ERROR: sync from /tmp db to megatest.db appears to have failed. Recommended that you stop your runs and run \"megatest -cleanup-db\"")) +;; ((> res 0) +;; (mutex-lock! *heartbeat-mutex*) +;; (set! *db-last-access* (current-seconds)) +;; (mutex-unlock! *heartbeat-mutex*)))) +;; (let ((num-synced (let ((matches (string-match "^Synced (\\d+).*$" inl))) +;; (if matches +;; (string->number (cadr matches)) +;; #f)))) +;; (loop (read-line) +;; (or num-synced res)))))))))) + (if will-sync + (begin + (mutex-lock! *db-multi-sync-mutex*) + (set! *db-sync-in-progress* #f) + (set! *db-last-sync* start-time) + (with-output-to-file end-file (lambda ()(print (current-process-id)))) + + ;; release lock here + + (mutex-unlock! *db-multi-sync-mutex*))) + (if (and debug-mode + (> (- start-time last-time) 60)) + (begin + (set! last-time start-time) + (debug:print-info 4 *default-log-port* "timestamp -> " (seconds->time-string (current-seconds)) ", time since start -> " (seconds->hr-min-sec (- (current-seconds) *time-zero*)))))) + + ;; keep going unless time to exit + ;; + (if (not *time-to-exit*) + (let delay-loop ((count 0)) + ;;(debug:print-info 13 *default-log-port* "delay-loop top; count="count" pid="(current-process-id)" this-wd-num="this-wd-num" *time-to-exit*="*time-to-exit*) + + (if (and (not *time-to-exit*) + (< count 6)) ;; was 11, changing to 4. + (begin + (thread-sleep! 1) + (delay-loop (+ count 1)))) + (if (not *time-to-exit*) (loop)))) + ;; time to exit, close the no-sync db here + (db:no-sync-close-db no-sync-db stmt-cache) + (if (common:low-noise-print 30) + (debug:print-info 0 *default-log-port* "Exiting watchdog timer, *time-to-exit* = " *time-to-exit*" pid="(current-process-id) ))))))) ;;" this-wd-num="this-wd-num))))))) + ) Index: dcommonmod.scm ================================================================== --- dcommonmod.scm +++ dcommonmod.scm @@ -445,6 +445,7 @@ ((data-changed #f) : boolean) ((run-data-offset 0) : number) ;; get only 100 items per call, set back to zero when received less than 100 items (db-path #f)) ;;======================================================================the end + ) ADDED debugprint.scm Index: debugprint.scm ================================================================== --- /dev/null +++ debugprint.scm @@ -0,0 +1,127 @@ +(declare (unit debugprint)) +(declare (uses margsmod)) + +(module debugprint + * + +;;(import scheme chicken data-structures extras files ports) +(import scheme chicken) +(import margsmod) + +(use data-structures extras files ports) +;; (use +;; (prefix base64 base64:) +;; (prefix sqlite3 sqlite3:) +;; (srfi 18) +;; (prefix dbi dbi:) +;; directory-utils +;; format +;; matchable +;; md5 +;; message-digest +;; pkts +;; posix +;; regex +;; regex-case +;; sparse-vectors +;; srfi-1 +;; srfi-13 +;; srfi-69 +;; stack +;; stml2 +;; typed-records +;; z3 +;; ) + +;;====================================================================== +;; debug stuff +;;====================================================================== + +(define verbosity (make-parameter '())) + +;;====================================================================== +;; (define (debug:print . params) #f) +;; (define (debug:print-info . params) #f) +;; +;; (define (set-functions dbgp dbgpinfo) +;; (set! debug:print dbgp) +;; (set! debug:print-info dbgpinfo)) + +;;====================================================================== +;; this was cached based on results from profiling but it turned out the profiling +;; somehow went wrong - perhaps too many processes writing to it. Leaving the caching +;; in for now but can probably take it out later. +;; +(define (debug:calc-verbosity vstr arg) ;; arg is 'v (verbose) or 'q (quiet) + (let* ((res (cond + ((number? vstr) vstr) + ((not (string? vstr)) 1) + ;; ((string-match "^\\s*$" vstr) 1) + (vstr (let ((debugvals (filter number? (map string->number (string-split vstr ","))))) + (cond + ((> (length debugvals) 1) debugvals) + ((> (length debugvals) 0)(car debugvals)) + (else 1)))) + ((eq? arg 'v) 2) ;; verbose + ((eq? arg 'q) 0) ;; quiet + (else 1)))) + (verbosity res) + res)) + +;;====================================================================== +;; check verbosity, #t is ok +#;(define (debug-check-verbosity verbosity vstr) + (if (not (or (number? verbosity) + (list? verbosity))) + (begin + (print "ERROR: Invalid debug value \"" vstr "\"") + #f) + #t)) + +(define (debug:debug-mode n) + (let* ((vb (verbosity))) + (cond + ((and (number? vb) ;; number number + (number? n)) + (<= n vb)) + ((and (list? vb) ;; list number + (number? n)) + (member n vb)) + ((and (list? vb) ;; list list + (list? n)) + (not (null? (lset-intersection! eq? vb n)))) + ((and (number? vb) + (list? n)) + (member vb n))))) + +(define (debug:print n e . params) + (if (debug:debug-mode n) + (with-output-to-port (or e (current-error-port)) + (lambda () + ;; (if *logging* + ;; (db:log-event (apply conc params)) + (apply print params) + )))) ;; ) + +(define (debug:print-error n e . params) + ;; normal print + (if (debug:debug-mode n) + (with-output-to-port (if (port? e) e (current-error-port)) + (lambda () + (apply print "ERROR: " params) + ))) + ;; pass important messages to stderr + (if (and (eq? n 0)(not (eq? e (current-error-port)))) + (with-output-to-port (current-error-port) + (lambda () + (apply print "ERROR: " params) + )))) + +(define (debug:print-info n e . params) + (if (debug:debug-mode n) + (with-output-to-port (if (port? e) e (current-error-port)) + (lambda () + (apply print "INFO: (" n ") " params) ;; res) + )))) + +) Index: http-transport.scm ================================================================== --- http-transport.scm +++ http-transport.scm @@ -86,458 +86,6 @@ ;;====================================================================== ;;====================================================================== ;; C L I E N T S ;;====================================================================== - -(define *http-mutex* (make-mutex)) - -;; NOTE: Large block of code from 32436b426188080f72fceb6894af541fbad9921e removed here -;; I'm pretty sure it is defunct. - -;; This next block all imported en-mass from the api branch -(define *http-requests-in-progress* 0) -(define *http-connections-next-cleanup* (current-seconds)) - -(define (http-transport:get-time-to-cleanup) - (let ((res #f)) - (mutex-lock! *http-mutex*) - (set! res (> (current-seconds) *http-connections-next-cleanup*)) - (mutex-unlock! *http-mutex*) - res)) - -(define (http-transport:inc-requests-count) - (mutex-lock! *http-mutex*) - (set! *http-requests-in-progress* (+ 1 *http-requests-in-progress*)) - ;; Use this opportunity to slow things down iff there are too many requests in flight - (if (> *http-requests-in-progress* 5) - (begin - (debug:print-info 0 *default-log-port* "Whoa there buddy, ease up...") - (thread-sleep! 1))) - (mutex-unlock! *http-mutex*)) - -(define (http-transport:dec-requests-count proc) - (mutex-lock! *http-mutex*) - (proc) - (set! *http-requests-in-progress* (- *http-requests-in-progress* 1)) - (mutex-unlock! *http-mutex*)) - -(define (http-transport:dec-requests-count-and-close-all-connections) - (set! *http-requests-in-progress* (- *http-requests-in-progress* 1)) - (let loop ((etime (+ (current-seconds) 5))) ;; give up in five seconds - (if (> *http-requests-in-progress* 0) - (if (> etime (current-seconds)) - (begin - (thread-sleep! 0.05) - (loop etime)) - (debug:print-error 0 *default-log-port* "requests still in progress after 5 seconds of waiting. I'm going to pass on cleaning up http connections")) - (close-all-connections!))) - (set! *http-connections-next-cleanup* (+ (current-seconds) 10)) - (mutex-unlock! *http-mutex*)) - -(define (http-transport:inc-requests-and-prep-to-close-all-connections) - (mutex-lock! *http-mutex*) - (set! *http-requests-in-progress* (+ 1 *http-requests-in-progress*))) - -;; Send "cmd" with json payload "params" to serverdat and receive result -;; -(define (http-transport:client-api-send-receive run-id serverdat cmd params #!key (numretries 3)(area-dat #f)) - (let* ((fullurl (if (vector? serverdat) - (http-transport:server-dat-get-api-req serverdat) - (begin - (debug:print 0 *default-log-port* "FATAL ERROR: http-transport:client-api-send-receive called with no server info") - (exit 1)))) - (res (vector #f "uninitialized")) - (success #t) - (sparams (db:obj->string params transport: 'http)) - (runremote (or area-dat *runremote*)) - (server-id (if (vector? serverdat) - (http-transport:server-dat-get-server-id serverdat) - (begin - (debug:print 0 *default-log-port* "FATAL ERROR: http-transport:client-api-send-receive called with no server info") - (exit 1))))) - (debug:print-info 11 *default-log-port* "cmd=" cmd " fullurl=" fullurl " server-id=" server-id " current time:" (current-seconds)) - - ;; set up the http-client here - (max-retry-attempts 1) - ;; consider all requests indempotent - (retry-request? (lambda (request) - #f)) - ;; send the data and get the response - ;; extract the needed info from the http data and - ;; process and return it. - (let* ((send-recieve (lambda () - (mutex-lock! *http-mutex*) - ;; (condition-case (with-input-from-request "http://localhost"; #f read-lines) - ;; ((exn http client-error) e (print e))) - (set! res (vector ;;; DON'T FORGET - THIS IS THE CLIENT SIDE! NOTE: consider moving this to client.scm since we are only supporting http transport at this time. - success - (db:string->obj - (handle-exceptions - exn - (let ((call-chain (get-call-chain)) - (msg ((condition-property-accessor 'exn 'message) exn))) - (set! success #f) - (if (debug:debug-mode 1) - (debug:print-info 0 *default-log-port* "couldn't talk to server, trying again ...") - (begin - (debug:print 0 *default-log-port* "WARNING: failure in with-input-from-request to " fullurl ".") - (debug:print 0 *default-log-port* " message: " msg ", exn=" exn) - (debug:print 0 *default-log-port* " cmd: " cmd " params: " params " key:" (or server-id "thekey")) - (debug:print 0 *default-log-port* " call-chain: " call-chain))) - (if runremote - (remote-conndat-set! runremote #f)) - ;; Killing associated server to allow clean retry.") - ;; (tasks:kill-server-run-id run-id) ;; better to kill the server in the logic that called this routine? - (mutex-unlock! *http-mutex*) - ;;; (signal (make-composite-condition - ;;; (make-property-condition 'commfail 'message "failed to connect to server"))) - ;;; "communications failed" - (db:obj->string #f)) - (with-input-from-request ;; was dat - fullurl - (list (cons 'key (or server-id "thekey")) - (cons 'cmd cmd) - (cons 'params sparams)) - read-string)) - transport: 'http) - 0)) ;; added this speculatively - ;; Shouldn't this be a call to the managed call-all-connections stuff above? - (close-all-connections!) - (mutex-unlock! *http-mutex*) - )) - (time-out (lambda () - (thread-sleep! 45) - (debug:print 0 *default-log-port* "WARNING: send-receive took more than 45 seconds!!") - #f)) - (th1 (make-thread send-recieve "with-input-from-request")) - (th2 (make-thread time-out "time out"))) - (thread-start! th1) - (thread-start! th2) - (thread-join! th1) - (vector-set! res 0 success) - (thread-terminate! th2) - (if (vector? res) - (if (vector-ref res 0) ;; this is the first flag or the second flag? - (let* ((res-dat (vector-ref res 1))) - (if (and (string? res-dat) (string-contains res-dat "server-id mismatch")) - (signal (make-composite-condition - (make-property-condition - 'servermismatch - 'message (vector-ref res 1)))) - res)) ;; this is the *inner* vector? seriously? why? - (if (debug:debug-mode 11) - (let ((call-chain (get-call-chain))) ;; note: this code also called in nmsg-transport - consider consolidating it - (print-call-chain (current-error-port)) - (debug:print-error 11 *default-log-port* "error above occured at server, res=" res) ;; " message: " ((condition-property-accessor 'exn 'message) exn)) - (debug:print 11 *default-log-port* " server call chain:") - (pp (vector-ref res 1) (current-error-port)) - (signal (vector-ref res 0))) - res)) - (signal (make-composite-condition - (make-property-condition - 'timeout - 'message "nmsg-transport:client-api-send-receive-raw timed out talking to server"))))))) - -;; careful closing of connections stored in *runremote* -;; -(define (http-transport:close-connections #!key (area-dat #f)) - (let* ((runremote (or area-dat *runremote*)) - (server-dat (if runremote - (remote-conndat runremote) - #f))) ;; (hash-table-ref/default *runremote* run-id #f))) - (if (vector? server-dat) - (let ((api-dat (http-transport:server-dat-get-api-uri server-dat))) - (handle-exceptions - exn - (begin - (print-call-chain *default-log-port*) - (debug:print-error 0 *default-log-port* " closing connection failed with error: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn)) - (close-connection! api-dat) - ;;(close-idle-connections!) - #t)) - #f))) - - -(define (make-http-transport:server-dat)(make-vector 6)) -(define (http-transport:server-dat-get-iface vec) (vector-ref vec 0)) -(define (http-transport:server-dat-get-port vec) (vector-ref vec 1)) -(define (http-transport:server-dat-get-api-uri vec) (vector-ref vec 2)) -(define (http-transport:server-dat-get-api-url vec) (vector-ref vec 3)) -(define (http-transport:server-dat-get-api-req vec) (vector-ref vec 4)) -(define (http-transport:server-dat-get-last-access vec) (vector-ref vec 5)) -;(define (http-transport:server-dat-get-socket vec) (vector-ref vec 6)) -(define (http-transport:server-dat-get-server-id vec) (vector-ref vec 6)) - -(define (http-transport:server-dat-make-url vec) - (if (and (http-transport:server-dat-get-iface vec) - (http-transport:server-dat-get-port vec)) - (conc "http://" - (http-transport:server-dat-get-iface vec) - ":" - (http-transport:server-dat-get-port vec)) - #f)) - -(define (http-transport:server-dat-update-last-access vec) - (if (vector? vec) - (vector-set! vec 5 (current-seconds)) - (begin - (print-call-chain (current-error-port)) - (debug:print-error 0 *default-log-port* "call to http-transport:server-dat-update-last-access with non-vector!!")))) - -;; -;; connect -;; -(define (http-transport:client-connect iface port server-id) - (let* ((api-url (conc "http://" iface ":" port "/api")) - (api-uri (uri-reference (conc "http://" iface ":" port "/api"))) - (api-req (make-request method: 'POST uri: api-uri)) - (server-dat (vector iface port api-uri api-url api-req (current-seconds) server-id))) - server-dat)) - -;; run http-transport:keep-running in a parallel thread to monitor that the db is being -;; used and to shutdown after sometime if it is not. -;; -(define (http-transport:keep-running) - ;; if none running or if > 20 seconds since - ;; server last used then start shutdown - ;; This thread waits for the server to come alive - (debug:print-info 0 *default-log-port* "Starting the sync-back, keep alive thread in server") - (let* ((sdat #f) - (tmp-area (common:get-db-tmp-area)) - (started-file (conc tmp-area "/.server-started")) - (server-start-time (current-seconds)) - (server-info (let loop ((start-time (current-seconds)) - (changed #t) - (last-sdat "not this")) - (begin ;; let ((sdat #f)) - (thread-sleep! 0.01) - (debug:print-info 0 *default-log-port* "Waiting for server alive signature") - (mutex-lock! *heartbeat-mutex*) - (set! sdat *server-info*) - (mutex-unlock! *heartbeat-mutex*) - (if (and sdat - (not changed) - (> (- (current-seconds) start-time) 2)) - (begin - (debug:print-info 0 *default-log-port* "Received server alive signature") - (common:save-pkt `((action . alive) - (T . server) - (pid . ,(current-process-id)) - (ipaddr . ,(car sdat)) - (port . ,(cadr sdat))) - *configdat* #t) - sdat) - (begin - (debug:print-info 0 *default-log-port* "Still waiting, last-sdat=" last-sdat) - (sleep 4) - (if (> (- (current-seconds) start-time) 120) ;; been waiting for two minutes - (begin - (debug:print-error 0 *default-log-port* "transport appears to have died, exiting server") - (common:save-pkt `((action . died) - (T . server) - (pid . ,(current-process-id)) - (ipaddr . ,(car sdat)) - (port . ,(cadr sdat)) - (msg . "Transport died?")) - *configdat* #t) - (exit)) - (loop start-time - (equal? sdat last-sdat) - sdat))))))) - (iface (car server-info)) - (port (cadr server-info)) - (last-access 0) - (server-timeout (server:expiration-timeout)) - (server-going #f) - (server-log-file (args:get-arg "-log"))) ;; always set when we are a server - - (handle-exceptions - exn - (debug:print 0 *default-log-port* "Failed to create " started-file ", exn=" exn) - (with-output-to-file started-file (lambda ()(print (current-process-id))))) - - (let loop ((count 0) - (server-state 'available) - (bad-sync-count 0) - (start-time (current-milliseconds))) - ;; Use this opportunity to sync the tmp db to megatest.db - (if (not server-going) ;; *dbstruct-db* - (begin - (debug:print 0 *default-log-port* "SERVER: dbprep") - (set! *dbstruct-db* (db:setup #t)) ;; run-id)) - (set! server-going #t) - (debug:print 0 *default-log-port* "SERVER: running, megatest version: " (common:get-full-version)) ;; NOTE: the server is NOT yet marked as running in the log. We do that in the keep-running routine. - (thread-start! *watchdog*))) - - ;; when things go wrong we don't want to be doing the various queries too often - ;; so we strive to run this stuff only every four seconds or so. - (let* ((sync-time (- (current-milliseconds) start-time)) - (rem-time (quotient (- 4000 sync-time) 1000))) - (if (and (<= rem-time 4) - (> rem-time 0)) - (thread-sleep! rem-time))) - - (if (< count 1) ;; 3x3 = 9 secs aprox - (loop (+ count 1) 'running bad-sync-count (current-milliseconds))) - - ;; Check that iface and port have not changed (can happen if server port collides) - (mutex-lock! *heartbeat-mutex*) - (set! sdat *server-info*) - (mutex-unlock! *heartbeat-mutex*) - - (if (not (equal? sdat (list iface port))) - (let ((new-iface (car sdat)) - (new-port (cadr sdat))) - (debug:print-info 0 *default-log-port* "WARNING: interface changed, refreshing iface and port info") - (set! iface new-iface) - (set! port new-port) - (if (not *server-id*) - (set! *server-id* (server:mk-signature))) - (debug:print 0 *default-log-port* (current-seconds) (current-directory) (current-process-id) (argv)) - (debug:print 0 *default-log-port* "SERVER STARTED: " iface ":" port " AT " (current-seconds) " server-id: " *server-id*) - (flush-output *default-log-port*))) - - ;; Transfer *db-last-access* to last-access to use in checking that we are still alive - (mutex-lock! *heartbeat-mutex*) - (set! last-access *db-last-access*) - (mutex-unlock! *heartbeat-mutex*) - - (if (common:low-noise-print 120 (conc "server running on " iface ":" port)) - (begin - (if (not *server-id*) - (set! *server-id* (server:mk-signature))) - (debug:print 0 *default-log-port* (current-seconds) (current-directory) (current-process-id) (argv)) - (debug:print 0 *default-log-port* "SERVER STARTED: " iface ":" port " AT " (current-seconds) " server-id: " *server-id*) - (flush-output *default-log-port*))) - (if (common:low-noise-print 60 "dbstats") - (begin - (debug:print 0 *default-log-port* "Server stats:") - (db:print-current-query-stats))) - (let* ((hrs-since-start (/ (- (current-seconds) server-start-time) 3600))) - (cond - ((and *server-run* - (> (+ last-access server-timeout) - (current-seconds))) - (if (common:low-noise-print 120 "server continuing") - (debug:print-info 0 *default-log-port* "Server continuing, seconds since last db access: " (- (current-seconds) last-access)) - (let ((curr-time (current-seconds))) - (handle-exceptions - exn - (debug:print 0 *default-log-port* "ERROR: Failed to change timestamp on log file " server-log-file ". Are you out of space on that disk? exn=" exn) - (if (not *server-overloaded*) - (change-file-times server-log-file curr-time curr-time))))) - (loop 0 server-state bad-sync-count (current-milliseconds))) - (else - (debug:print-info 0 *default-log-port* "Server timed out. seconds since last db access: " (- (current-seconds) last-access)) - (http-transport:server-shutdown port))))))) - -(define (http-transport:server-shutdown port) - (begin - ;;(BB> "http-transport:server-shutdown called") - (debug:print-info 0 *default-log-port* "Starting to shutdown the server. pid="(current-process-id)) - ;; - ;; start_shutdown - ;; - (set! *time-to-exit* #t) ;; tell on-exit to be fast as we've already cleaned up - (portlogger:open-run-close portlogger:set-port port "released") - (thread-sleep! 1) - - ;; (debug:print-info 0 *default-log-port* "Max cached queries was " *max-cache-size*) - ;; (debug:print-info 0 *default-log-port* "Number of cached writes " *number-of-writes*) - ;; (debug:print-info 0 *default-log-port* "Average cached write time " - ;; (if (eq? *number-of-writes* 0) - ;; "n/a (no writes)" - ;; (/ *writes-total-delay* - ;; *number-of-writes*)) - ;; " ms") - ;; (debug:print-info 0 *default-log-port* "Number non-cached queries " *number-non-write-queries*) - ;; (debug:print-info 0 *default-log-port* "Average non-cached time " - ;; (if (eq? *number-non-write-queries* 0) - ;; "n/a (no queries)" - ;; (/ *total-non-write-delay* - ;; *number-non-write-queries*)) - ;; " ms") - - (db:print-current-query-stats) - (common:save-pkt `((action . exit) - (T . server) - (pid . ,(current-process-id))) - *configdat* #t) - (debug:print-info 0 *default-log-port* "Server shutdown complete. Exiting") - (exit))) - -;; all routes though here end in exit ... -;; -;; start_server? -;; -(define (http-transport:launch) - ;; check that a server start is in progress, pause or exit if so - (let* ((tmp-area (common:get-db-tmp-area)) - (server-start (conc tmp-area "/.server-start")) - (server-started (conc tmp-area "/.server-started")) - (start-time (common:lazy-modification-time server-start)) - (started-time (common:lazy-modification-time server-started)) - (server-starting (< start-time started-time)) ;; if start-time is less than started-time then a server is still starting - (start-time-old (> (- (current-seconds) start-time) 5)) - (cleanup-proc (lambda (msg) - (let* ((serv-fname (conc "server-" (current-process-id) "-" (get-host-name) ".log")) - (full-serv-fname (conc *toppath* "/logs/" serv-fname)) - (new-serv-fname (conc *toppath* "/logs/" "defunct-" serv-fname))) - (debug:print 0 *default-log-port* msg) - (if (common:file-exists? full-serv-fname) - (system (conc "sleep 1;mv -f " full-serv-fname " " new-serv-fname)) - (debug:print 0 *default-log-port* "INFO: cannot move " full-serv-fname " to " new-serv-fname)) - (exit))))) - #;(if (and (not start-time-old) ;; last server start try was less than five seconds ago - (not server-starting)) - (begin - (cleanup-proc "NOT starting server, there is either a recently started server or a server in process of starting") - (exit))) - ;; lets not even bother to start if there are already three or more server files ready to go - #;(let* ((num-alive (server:get-num-alive (server:get-list *toppath*)))) - (if (> num-alive 3) - (begin - (cleanup-proc (conc "ERROR: Aborting server start because there are already " num-alive " possible servers either running or starting up")) - (exit)))) - (common:save-pkt `((action . start) - (T . server) - (pid . ,(current-process-id))) - *configdat* #t) - (let* ((th2 (make-thread (lambda () - (debug:print-info 0 *default-log-port* "Server run thread started") - (http-transport:run - (if (args:get-arg "-server") - (args:get-arg "-server") - "-") - )) "Server run")) - (th3 (make-thread (lambda () - (debug:print-info 0 *default-log-port* "Server monitor thread started") - (http-transport:keep-running) - "Keep running")))) - (thread-start! th2) - (thread-sleep! 0.25) ;; give the server time to settle before starting the keep-running monitor. - (thread-start! th3) - (set! *didsomething* #t) - (thread-join! th2) - (exit)))) - -;; (define (http-transport:server-signal-handler signum) -;; (signal-mask! signum) -;; (handle-exceptions -;; exn -;; (debug:print 0 *default-log-port* " ... exiting ...") -;; (let ((th1 (make-thread (lambda () -;; (thread-sleep! 1)) -;; "eat response")) -;; (th2 (make-thread (lambda () -;; (debug:print-error 0 *default-log-port* "Received ^C, attempting clean exit. Please be patient and wait a few seconds before hitting ^C again.") -;; (thread-sleep! 3) ;; give the flush three seconds to do it's stuff -;; (debug:print 0 *default-log-port* " Done.") -;; (exit 4)) -;; "exit on ^C timer"))) -;; (thread-start! th2) -;; (thread-start! th1) -;; (thread-join! th2)))) - - Index: mt.scm ================================================================== --- mt.scm +++ mt.scm @@ -187,91 +187,5 @@ (define (mt:test-set-state-status-by-testname-unless-completed run-id test-name item-path new-state new-status new-comment) (let ((test-id (rmt:get-test-id run-id test-name item-path))) (mt:test-set-state-status-by-id-unless-completed run-id test-id new-state new-status new-comment))) -(define (mt:lazy-read-test-config test-name) - (let ((tconf (hash-table-ref/default *testconfigs* test-name #f))) - (if tconf - tconf - (let ((test-dirs (tests:get-tests-search-path *configdat*))) - (let loop ((hed (car test-dirs)) - (tal (cdr test-dirs))) - ;; Setting MT_LINKTREE here is almost certainly unnecessary. - (let ((tconfig-file (conc hed "/" test-name "/testconfig"))) - (if (and (common:file-exists? tconfig-file) - (file-read-access? tconfig-file)) - (let ((link-tree-path (common:get-linktree)) ;; (configf:lookup *configdat* "setup" "linktree")) - (old-link-tree (get-environment-variable "MT_LINKTREE"))) - (if link-tree-path (setenv "MT_LINKTREE" link-tree-path)) - (let ((newtcfg (read-config tconfig-file #f #f))) ;; NOTE: Does NOT run [system ...] - (hash-table-set! *testconfigs* test-name newtcfg) - (if old-link-tree - (setenv "MT_LINKTREE" old-link-tree) - (unsetenv "MT_LINKTREE")) - newtcfg)) - (if (null? tal) - (begin - (debug:print-error 0 *default-log-port* "No readable testconfig found for " test-name) - #f) - (loop (car tal)(cdr tal)))))))))) - -;; cannot move to dbmod until lazy-read-testconfig is unravelled. -;; -(define (mt:process-triggers dbstruct run-id test-id newstate newstatus) - (if test-id - (let* ((test-dat (db:get-test-info-by-id dbstruct run-id test-id))) - (if test-dat - (let* ((test-rundir (db:test-get-rundir test-dat)) ;; ) ;; ) - (test-name (db:test-get-testname test-dat)) - (item-path (db:test-get-item-path test-dat)) - (duration (db:test-get-run_duration test-dat)) - (comment (db:test-get-comment test-dat)) - (event-time (db:test-get-event_time test-dat)) - (tconfig #f) - (state (if newstate newstate (db:test-get-state test-dat))) - (status (if newstatus newstatus (db:test-get-status test-dat)))) - ;; (mutex-lock! *triggers-mutex*) - (handle-exceptions - exn - (begin - (debug:print-error 0 *default-log-port* " Exception in mt:process-triggers for run-id="run-id" test-id="test-id" newstate="newstate" newstatus="newstatus - "\n error: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn - "\n test-rundir="test-rundir - "\n test-name="test-name - "\n item-path="item-path - "\n state="state - "\n status="status - "\n") - (print-call-chain (current-error-port)) - #f) - (if (and test-name - test-rundir) ;; #f means no dir set yet - ;; (common:file-exists? test-rundir) - ;; (directory? test-rundir)) - (call-with-environment-variables - (list (cons "MT_TEST_NAME" (or test-name "no such test")) - (cons "MT_TEST_RUN_DIR" (or test-rundir "no test directory yet")) - (cons "MT_ITEMPATH" (or item-path ""))) - (lambda () - (if (directory-exists? test-rundir) - (push-directory test-rundir) - (push-directory *toppath*)) - (set! tconfig (mt:lazy-read-test-config test-name)) - (for-each (lambda (trigger) - (let* ((munged-trigger (string-translate trigger "/ " "--")) - (logname (conc "last-trigger-" munged-trigger ".log"))) - ;; first any triggers from the testconfig - (let ((cmd (configf:lookup tconfig "triggers" trigger))) - (if cmd (mt:run-trigger cmd test-id test-rundir trigger (conc "tconfig-" logname) test-name item-path event-time state status))) - ;; next any triggers from megatest.config - (let ((cmd (configf:lookup *configdat* "triggers" trigger))) - (if cmd (mt:run-trigger cmd test-id test-rundir trigger (conc "mtconfig-" logname) test-name item-path event-time state status))))) - (list - (conc state "/" status) - (conc state "/") - (conc "/" status))) - (pop-directory)) - ))) - ;; (mutex-unlock! *triggers-mutex*) - ))))) - Index: rmtmod.scm ================================================================== --- rmtmod.scm +++ rmtmod.scm @@ -20,19 +20,23 @@ (declare (unit rmtmod)) (declare (uses commonmod)) (declare (uses apimod)) (declare (uses dbmod)) +;; (declare (uses transport)) +;; (declare (uses servermod)) (module rmtmod * (import scheme chicken data-structures extras) (import (prefix sqlite3 sqlite3:) posix typed-records srfi-18) (import commonmod) (import dbmod) (import apimod) +;; (import transport) +;; (import servermod) (defstruct alldat (areapath #f) (ulexdat #f) ) @@ -1053,8 +1057,1503 @@ #;(set-functions rmt:send-receive remote-server-url-set! http-transport:close-connections remote-conndat-set! debug:print debug:print-info remote-ro-mode remote-ro-mode-set! remote-ro-mode-checked-set! remote-ro-mode-checked) + +;;====================================================================== +;; EVERYTHING FROM TRANSPORT +;;====================================================================== + +(define (http-transport:make-server-url hostport) + (if (not hostport) + #f + (conc "http://" (car hostport) ":" (cadr hostport)))) + +;;====================================================================== +;; S E R V E R +;; ====================================================================== + +;; Call this to start the actual server +;; + +;; (define *db:process-queue-mutex* (make-mutex)) + +(define (http-transport:run hostn) + ;; Configurations for server + (tcp-buffer-size 2048) + (max-connections 2048) + (debug:print 2 *default-log-port* "Attempting to start the server ...") + (let* ((db #f) ;; (open-db)) ;; we don't want the server to be opening and closing the db unnecesarily + (hostname (get-host-name)) + (ipaddrstr (let ((ipstr (if (string=? "-" hostn) + ;; (string-intersperse (map number->string (u8vector->list (hostname->ip hostname))) ".") + (server:get-best-guess-address hostname) + #f))) + (if ipstr ipstr hostn))) ;; hostname))) + (start-port (portlogger:open-run-close + (lambda (db) + (portlogger:find-port db)))) + (link-tree-path (common:get-linktree)) + (tmp-area (common:get-db-tmp-area)) + (start-file (conc tmp-area "/.server-start"))) + (debug:print-info 0 *default-log-port* "portlogger recommended port: " start-port) + ;; set some parameters for the server + (root-path (if link-tree-path + link-tree-path + (current-directory))) ;; WARNING: SECURITY HOLE. FIX ASAP! + (handle-directory spiffy-directory-listing) + (handle-exception (lambda (exn chain) + (signal (make-composite-condition + (make-property-condition + 'server + 'message "server error"))))) + + ;; http-transport:handle-directory) ;; simple-directory-handler) + ;; Setup the web server and a /ctrl interface + ;; + (vhost-map `(((* any) . ,(lambda (continue) + ;; open the db on the first call + ;; This is were we set up the database connections + (let* (($ (request-vars source: 'both)) + (dat ($ 'dat)) + (res #f)) + (cond + ((equal? (uri-path (request-uri (current-request))) + '(/ "api")) + (send-response body: (api:process-request *dbstruct-db* $) ;; the $ is the request vars proc + headers: '((content-type text/plain))) + (mutex-lock! *heartbeat-mutex*) + (set! *db-last-access* (current-seconds)) + (mutex-unlock! *heartbeat-mutex*)) + ;; ((equal? (uri-path (request-uri (current-request))) + ;; '(/ "")) + ;; (send-response body: (http-transport:main-page))) + ;;((equal? (uri-path (request-uri (current-request))) + ;; '(/ "json_api")) + ;; (send-response body: (http-transport:main-page))) + ;;((equal? (uri-path (request-uri (current-request))) + ;; '(/ "runs")) + ;; (send-response body: (http-transport:main-page))) + ;;((equal? (uri-path (request-uri (current-request))) + ;; '(/ any)) + ;; (send-response body: "hey there!\n" + ;; headers: '((content-type text/plain)))) + ;;((equal? (uri-path (request-uri (current-request))) + ;; '(/ "hey")) + ;; (send-response body: "hey there!\n" + ;; headers: '((content-type text/plain)))) + ;;((equal? (uri-path (request-uri (current-request))) + ;; '(/ "jquery3.1.0.js")) + ;; (send-response body: (http-transport:show-jquery) + ;; headers: '((content-type application/javascript)))) + ;;((equal? (uri-path (request-uri (current-request))) + ;; '(/ "test_log")) + ;; (send-response body: (http-transport:html-test-log $) + ;; headers: '((content-type text/HTML)))) + ;;((equal? (uri-path (request-uri (current-request))) + ;; '(/ "dashboard")) + ;; (send-response body: (http-transport:html-dboard $) + ;; headers: '((content-type text/HTML)))) + (else (continue)))))))) + (handle-exceptions + exn + (debug:print 0 *default-log-port* "Failed to create file " start-file ", exn=" exn) + (with-output-to-file start-file (lambda ()(print (current-process-id))))) + (http-transport:try-start-server ipaddrstr start-port))) + + +;; This is recursively run by http-transport:run until sucessful +;; +(define (http-transport:try-start-server ipaddrstr portnum) + (let ((config-hostname (configf:lookup *configdat* "server" "hostname")) + (config-use-proxy (equal? (configf:lookup *configdat* "client" "use-http_proxy") "yes"))) + (if (not config-use-proxy) + (determine-proxy (constantly #f))) + (debug:print-info 0 *default-log-port* "http-transport:try-start-server time=" (seconds->time-string (current-seconds)) " ipaddrsstr=" ipaddrstr " portnum=" portnum " config-hostname=" config-hostname) + (handle-exceptions + exn + (begin + (print-error-message exn) + (if (< portnum 64000) + (begin + (debug:print 0 *default-log-port* "WARNING: attempt to start server failed. Trying again ...") + (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) + (debug:print 5 *default-log-port* "exn=" (condition->list exn)) + (portlogger:open-run-close portlogger:set-failed portnum) + (debug:print 0 *default-log-port* "WARNING: failed to start on portnum: " portnum ", trying next port") + (thread-sleep! 0.1) + + ;; get_next_port goes here + (http-transport:try-start-server ipaddrstr + (portlogger:open-run-close portlogger:find-port))) + (begin + (print "ERROR: Tried and tried but could not start the server")))) + ;; any error in following steps will result in a retry + (set! *server-info* (list ipaddrstr portnum)) + (debug:print 0 *default-log-port* "INFO: Trying to start server on " ipaddrstr ":" portnum) + ;; This starts the spiffy server + ;; NEED WAY TO SET IP TO #f TO BIND ALL + ;; (start-server bind-address: ipaddrstr port: portnum) + (if config-hostname ;; this is a hint to bind directly + (start-server port: portnum bind-address: (if (equal? config-hostname "-") + ipaddrstr + config-hostname)) + (start-server port: portnum)) + (portlogger:open-run-close + (lambda (db) + (portlogger:set-port db portnum "released"))) + (debug:print 1 *default-log-port* "INFO: server has been stopped")))) + + +;;====================================================================== +;; EVERYTHING FROM SERVERMOD +;;====================================================================== + +(define (server:make-server-url hostport) + (if (not hostport) + #f + (conc "http://" (car hostport) ":" (cadr hostport)))) + +;;====================================================================== +;; S E R V E R U T I L I T I E S +;;====================================================================== + +;; Get the transport +#;(define (server:get-transport) + (if *transport-type* + *transport-type* + (let ((ttype (string->symbol + (or (args:get-arg "-transport") + (configf:lookup *configdat* "server" "transport") + "rpc")))) + (set! *transport-type* ttype) + ttype))) + +;; Generate a unique signature for this server +(define (server:mk-signature) + (message-digest-string (md5-primitive) + (with-output-to-string + (lambda () + (write (list (current-directory) + (current-process-id) + (argv))))))) + +;; When using zmq this would send the message back (two step process) +;; with spiffy or rpc this simply returns the return data to be returned +;; +(define (server:reply return-addr query-sig success/fail result) + (debug:print-info 11 *default-log-port* "server:reply return-addr=" return-addr ", result=" result) + ;; (send-message pubsock target send-more: #t) + ;; (send-message pubsock + (db:obj->string (vector success/fail query-sig result))) +;; (case (server:get-transport) +;; ((rpc) (db:obj->string (vector success/fail query-sig result))) +;; ((http) (db:obj->string (vector success/fail query-sig result))) +;; ((fs) result) +;; (else +;; (debug:print-error 0 *default-log-port* "unrecognised transport type: " *transport-type*) +;; result))) + +(define (server:kind-run areapath) + ;; look for $MT_RUN_AREA_HOME/logs/server-start-last + ;; and wait for it to be at least 3 seconds old + (server:wait-for-server-start-last-flag areapath) + (if (not (server:check-if-running areapath)) ;; why try if there is already a server running? + (let* ((last-run-dat (hash-table-ref/default *server-kind-run* areapath '(0 0))) ;; callnum, whenrun + (call-num (car last-run-dat)) + (when-run (cadr last-run-dat)) + (run-delay (+ (case call-num + ((0) 0) + ((1) 20) + ((2) 300) + (else 600)) + (random 5))) ;; add a small random number just in case a lot of jobs hit the work hosts simultaneously + (lock-file (conc areapath "/logs/server-start.lock"))) + (if (> (- (current-seconds) when-run) run-delay) + (let* ((start-flag (conc areapath "/logs/server-start-last"))) + (common:simple-file-lock-and-wait lock-file expire-time: 15) + (debug:print-info 0 *default-log-port* "server:kind-run: touching " start-flag) + (system (conc "touch " start-flag)) ;; lazy but safe + (server:run areapath) + (thread-sleep! 2) ;; don't release the lock for at least a few seconds + (common:simple-file-release-lock lock-file))) + (hash-table-set! *server-kind-run* areapath (list (+ call-num 1)(current-seconds)))))) + + +(define (server:get-num-alive srvlst) + (let ((num-alive 0)) + (for-each + (lambda (server) + (handle-exceptions + exn + (begin + (debug:print-info 0 *default-log-port* "Unable to get server start-time and/or mod-time from " server ", exn=" exn)) + (match-let (((mod-time host port start-time server-id pid) + server)) + (let* ((uptime (- (current-seconds) mod-time)) + (runtime (if start-time + (- mod-time start-time) + 0))) + (if (< uptime 5)(set! num-alive (+ num-alive 1))))))) + srvlst) + num-alive)) + +;; given a list of servers get a list of valid servers, i.e. at least +;; 10 seconds old, has started and is less than 1 hour old and is +;; active (i.e. mod-time < 10 seconds +;; +;; mod-time host port start-time pid +;; +;; sort by start-time descending. I.e. get the oldest first. Young servers will thus drop off +;; and servers should stick around for about two hours or so. +;; +(define (server:get-best srvlst) + (let* ((nums (server:get-num-servers)) + (now (current-seconds)) + (slst (sort + (filter (lambda (rec) + (if (and (list? rec) + (> (length rec) 2)) + (let ((start-time (list-ref rec 3)) + (mod-time (list-ref rec 0))) + ;; (print "start-time: " start-time " mod-time: " mod-time) + (and start-time mod-time + (> (- now start-time) 0) ;; been running at least 0 seconds + (< (- now mod-time) 16) ;; still alive - file touched in last 16 seconds + (or (not (configf:lookup *configdat* "server" "runtime")) ;; skip if not set + (< (- now start-time) + (+ (- (string->number (configf:lookup *configdat* "server" "runtime")) + 180) + (random 360)))) ;; under one hour running time +/- 180 + )) + #f)) + srvlst) + (lambda (a b) + (< (list-ref a 3) + (list-ref b 3)))))) + (if (> (length slst) nums) + (take slst nums) + slst))) + +(define (server:get-first-best areapath) + (let ((srvrs (server:get-best (server:get-list areapath)))) + (if (and srvrs + (not (null? srvrs))) + (car srvrs) + #f))) + +(define (server:get-rand-best areapath) + (let ((srvrs (server:get-best (server:get-list areapath)))) + (if (and (list? srvrs) + (not (null? srvrs))) + (let* ((len (length srvrs)) + (idx (random len))) + (list-ref srvrs idx)) + #f))) + +(define (server:record->id servr) + (handle-exceptions + exn + (begin + (debug:print-info 0 *default-log-port* "Unable to get server id from " servr ", exn=" exn) + #f) + (match-let (((mod-time host port start-time server-id pid) + servr)) + (if server-id + server-id + #f)))) + +(define (server:record->url servr) + (handle-exceptions + exn + (begin + (debug:print-info 0 *default-log-port* "Unable to get server url from " servr ", exn=" exn) + #f) + (match-let (((mod-time host port start-time server-id pid) + servr)) + (if (and host port) + (conc host ":" port) + #f)))) + +(define (server:get-client-signature) ;; BB> why is this proc named "get-"? it returns nothing -- set! has not return value. + (if *my-client-signature* *my-client-signature* + (let ((sig (server:mk-signature))) + (set! *my-client-signature* sig) + *my-client-signature*))) + +;; wait for server=start-last to be three seconds old +;; +(define (server:wait-for-server-start-last-flag areapath) + (let* ((start-flag (conc areapath "/logs/server-start-last")) + ;;; THIS INTERACTS WITH [server] timeout. Suggest using 0.1 or above for timeout (6 seconds) + (reftime (configf:lookup-number *configdat* "server" "idletime" default: 4)) + (server-key (conc (get-host-name) "-" (current-process-id)))) + (if (file-exists? start-flag) + (let* ((fmodtime (file-modification-time start-flag)) + (delta (- (current-seconds) fmodtime)) + (all-go (> delta reftime))) + (if (and all-go + (begin + (with-output-to-file start-flag + (lambda () + (print server-key))) + (thread-sleep! 0.25) + (let ((res (with-input-from-file start-flag + (lambda () + (read-line))))) + (equal? server-key res)))) + #t ;; (system (conc "touch " start-flag)) ;; lazy but safe + (begin + (debug:print-info 0 *default-log-port* "Gating server start, last start: " + fmodtime ", delta: " delta ", reftime: " reftime ", all-go=" all-go) + (thread-sleep! reftime) + (server:wait-for-server-start-last-flag areapath))))))) + +(define (server:get-num-servers #!key (numservers 2)) + (let ((ns (string->number + (or (configf:lookup *configdat* "server" "numservers") "notanumber")))) + (or ns numservers))) + +(define (server:kill servr) + (handle-exceptions + exn + (begin + (debug:print-info 0 *default-log-port* "Unable to get host and/or port from " servr ", exn=" exn) + #f) + (match-let (((mod-time hostname port start-time server-id pid) + servr)) + (tasks:kill-server hostname pid)))) + +;; called in megatest.scm, host-port is string hostname:port +;; +;; NOTE: This is NOT called directly from clients as not all transports support a client running +;; in the same process as the server. +;; +;; run ping in separate process, safest way in some cases +;; +(define (server:ping-server ifaceport) + (with-input-from-pipe + (conc (common:get-megatest-exe) " -ping " ifaceport) + (lambda () + (let loop ((inl (read-line)) + (res "NOREPLY")) + (if (eof-object? inl) + (case (string->symbol res) + ((NOREPLY) #f) + ((LOGIN_OK) #t) + (else #f)) + (loop (read-line) inl)))))) + +;; NOT USED (well, ok, reference in rpc-transport but otherwise not used). +;; +(define (server:login toppath) + (lambda (toppath) + (set! *db-last-access* (current-seconds)) ;; might not be needed. + (if (equal? *toppath* toppath) + #t + #f))) + +;; timeout is hms string: 1h 5m 3s, default is 1 minute +;; +(define (server:expiration-timeout) + (let ((tmo (configf:lookup *configdat* "server" "timeout"))) + (if (and (string? tmo) + (common:hms-string->seconds tmo)) ;; BUG: hms-string->seconds is broken, if given "10" returns 0. Also, it doesn't belong in this logic unless the string->number is changed below + (* 3600 (string->number tmo)) + 60))) + +(define (server:get-best-guess-address hostname) + (let ((res #f)) + (for-each + (lambda (adr) + (if (not (eq? (u8vector-ref adr 0) 127)) + (set! res adr))) + ;; NOTE: This can fail when there is no mention of the host in /etc/hosts. FIXME + (vector->list (hostinfo-addresses (hostname->hostinfo hostname)))) + (string-intersperse + (map number->string + (u8vector->list + (if res res (hostname->ip hostname)))) "."))) + +;; (define server:sync-lock-token "SERVER_SYNC_LOCK") +;; (define (server:release-sync-lock) +;; (db:no-sync-del! *no-sync-db* server:sync-lock-token)) +;; (define (server:have-sync-lock?) +;; (let* ((have-lock-pair (db:no-sync-get-lock *no-sync-db* server:sync-lock-token)) +;; (have-lock? (car have-lock-pair)) +;; (lock-time (cdr have-lock-pair)) +;; (lock-age (- (current-seconds) lock-time))) +;; (cond +;; (have-lock? #t) +;; ((>lock-age +;; (* 3 (configf:lookup-number *configdat* "server" "minimum-intersync-delay" default: 180))) +;; (server:release-sync-lock) +;; (server:have-sync-lock?)) +;; (else #f)))) + +;; moving this here as it needs access to db and cannot be in common. +;; + +(define (server:get-bruteforce-syncer dbstruct #!key (fork-to-background #f) (persist-until-sync #f)) + (let* ((sqlite-exe (or (get-environment-variable "MT_SQLITE3_EXE"))) ;; defined in cfg.sh + (sync-log (or ;; (args:get-arg "-sync-log") + (conc *toppath* "/logs/sync-" (current-process-id) "-" (get-host-name) ".log"))) + (tmp-area (common:get-db-tmp-area)) + (tmp-db (conc tmp-area "/megatest.db")) + (staging-file (conc *toppath* "/.megatest.db")) + (mtdbfile (conc *toppath* "/megatest.db")) + (lockfile (common:get-sync-lock-filepath)) + (sync-cmd-core (conc sqlite-exe" " tmp-db " .dump | "sqlite-exe" " staging-file "&>"sync-log)) + (sync-cmd (if fork-to-background + (conc "/usr/bin/env NBFAKE_LOG="*toppath*"/logs/last-server-sync-"(current-process-id)".log nbfake \""sync-cmd-core" && /bin/mv -f " staging-file " " mtdbfile" \"") + sync-cmd-core)) + (default-min-intersync-delay 2) + (min-intersync-delay (configf:lookup-number *configdat* "server" "minimum-intersync-delay" default: default-min-intersync-delay)) + (default-duty-cycle 0.1) + (duty-cycle (configf:lookup-number *configdat* "server" "sync-duty-cycle" default: default-duty-cycle)) + (last-sync-seconds 10) ;; we will adjust this to a measurement and delay last-sync-seconds * (1 - duty-cycle) + (calculate-off-time (lambda (work-duration duty-cycle) + (* (/ (- 1 duty-cycle) duty-cycle) last-sync-seconds))) + (off-time min-intersync-delay) ;; adjusted in closure below. + (do-a-sync + (lambda () + ;; (BB> "Start do-a-sync with fork-to-background="fork-to-background" persist-until-sync="persist-until-sync) + (let* ((finalres + (let retry-loop ((num-tries 0)) + (if (common:simple-file-lock lockfile) + (begin + (cond + ((not (or fork-to-background persist-until-sync)) + (debug:print 0 *default-log-port* "INFO: syncer thread sleeping for max of (server.minimum-intersync-delay="min-intersync-delay + " , off-time="off-time" seconds ]") + (thread-sleep! (max off-time min-intersync-delay))) + (else + (debug:print 0 *default-log-port* "INFO: syncer thread NOT sleeping ; maybe time-to-exit..."))) + + (if (not (configf:lookup *configdat* "server" "disable-db-snapshot")) + (common:snapshot-file mtdbfile subdir: ".db-snapshot")) + (delete-file* staging-file) + (let* ((start-time (current-milliseconds)) + (res (system sync-cmd)) + (dbbackupfile (conc mtdbfile ".backup")) + (res2 + (cond + ((eq? 0 res ) + (handle-exceptions + exn + #f + (if (file-exists? dbbackupfile) + (delete-file* dbbackupfile) + ) + (if (eq? 0 (file-size sync-log)) + (delete-file* sync-log)) + (system (conc "/bin/mv " staging-file " " mtdbfile)) + + (set! last-sync-seconds (/ (- (current-milliseconds) start-time) 1000)) + (set! off-time (calculate-off-time + last-sync-seconds + (cond + ((and (number? duty-cycle) (> duty-cycle 0) (< duty-cycle 1)) + duty-cycle) + (else + (debug:print 0 *default-log-port* "WARNING: ["(common:human-time)"] server.sync-duty-cycle is invalid. Should be a number between 0 and 1, but "duty-cycle" was specified. Using default value: "default-duty-cycle) + default-duty-cycle)))) + + (debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] pid="(current-process-id)" SYNC took "last-sync-seconds" sec") + (debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] pid="(current-process-id)" SYNC took "last-sync-seconds" sec ; with duty-cycle of "duty-cycle" off time is now "off-time) + 'sync-completed)) + (else + (system (conc "/bin/cp "sync-log" "sync-log".fail")) + (debug:print 0 *default-log-port* "ERROR: ["(common:human-time)"] Sync failed. See log at "sync-log".fail") + (if (file-exists? (conc mtdbfile ".backup")) + (system (conc "/bin/cp "mtdbfile ".backup " mtdbfile))) + #f)))) + (common:simple-file-release-lock lockfile) + ;; (BB> "released lockfile: " lockfile) + #;(when (common:file-exists? lockfile) + (BB> "DID NOT ACTUALLY RELEASE LOCKFILE")) + res2) ;; end let + );; end begin + ;; else + (cond + (persist-until-sync + (thread-sleep! 1) + (debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] pid="(current-process-id)" other SYNC in progress; we're in a fork-to-background so we need to succeed. Let's wait a jiffy and and try again. num-tries="num-tries" (waiting for lockfile="lockfile" to disappear)") + (retry-loop (add1 num-tries))) + (else + (thread-sleep! (max off-time (+ last-sync-seconds min-intersync-delay))) + (debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] pid="(current-process-id)" other SYNC in progress; not syncing.") + 'parallel-sync-in-progress)) + ) ;; end if got lockfile + ) + )) + ;; (BB> "End do-a-sync with fork-to-background="fork-to-background" persist-until-sync="persist-until-sync" and result="finalres) + finalres) + ) ;; end lambda + )) + do-a-sync)) + +;; (define (debug:print . params) #f) +;; (define (debug:print-info . params) #f) +;; +;; (define (set-functions dbgp dbgpinfo) +;; (set! debug:print dbgp) +;; (set! debug:print-info dbgpinfo)) + +;; (define *server-loop-heart-beat* (current-seconds)) + +;;====================================================================== +;; P K T S S T U F F +;;====================================================================== + +;; ??? + +;;====================================================================== +;; P K T S S T U F F +;;====================================================================== + +;; ??? + +;;====================================================================== +;; S E R V E R +;;====================================================================== + +;; Call this to start the actual server +;; + +;; all routes though here end in exit ... +;; +;; start_server +;; +(define (server:launch run-id transport-type) + (case transport-type + ((http)(http-transport:launch)) + ;;((nmsg)(nmsg-transport:launch run-id)) + ;;((rpc) (rpc-transport:launch run-id)) + (else (debug:print-error 0 *default-log-port* "unknown server type " transport-type)))) + +;; Given a run id start a server process ### NOTE ### > file 2>&1 +;; if the run-id is zero and the target-host is set +;; try running on that host +;; incidental: rotate logs in logs/ dir. +;; +(define (server:run areapath) ;; areapath is *toppath* for a given testsuite area + (let* ((curr-host (get-host-name)) + ;; (attempt-in-progress (server:start-attempted? areapath)) + ;; (dot-server-url (server:check-if-running areapath)) + (curr-ip (server:get-best-guess-address curr-host)) + (curr-pid (current-process-id)) + (homehost (common:get-homehost)) ;; configf:lookup *configdat* "server" "homehost" )) + (target-host (car homehost)) + (testsuite (common:get-testsuite-name)) + (logfile (conc areapath "/logs/server.log")) ;; -" curr-pid "-" target-host ".log")) + (profile-mode (or (configf:lookup *configdat* "misc" "profilesw") + "")) + (cmdln (conc (common:get-megatest-exe) + " -server " (or target-host "-") (if (equal? (configf:lookup *configdat* "server" "daemonize") "yes") + " -daemonize " + "") + ;; " -log " logfile + " -m testsuite:" testsuite + " " profile-mode + )) ;; (conc " >> " logfile " 2>&1 &"))))) + (log-rotate (make-thread common:rotate-logs "server run, rotate logs thread")) ;; why are we rotating logs here? This is a sensitive location with a lot going on!? + (load-limit (configf:lookup-number *configdat* "jobtools" "max-server-start-load" default: 3.0))) + ;; we want the remote server to start in *toppath* so push there + (push-directory areapath) + (debug:print 0 *default-log-port* "INFO: Trying to start server (" cmdln ") ...") + (thread-start! log-rotate) + + ;; host.domain.tld match host? + (if (and target-host + ;; look at target host, is it host.domain.tld or ip address and does it + ;; match current ip or hostname + (not (string-match (conc "("curr-host "|" curr-host"\\..*)") target-host)) + (not (equal? curr-ip target-host))) + (begin + (debug:print-info 0 *default-log-port* "Starting server on " target-host ", logfile is " logfile) + (setenv "TARGETHOST" target-host))) + + (setenv "TARGETHOST_LOGF" logfile) + (thread-sleep! (/ (random 5000) 1000)) ;; add about a random (up to 5 seconds) initial delay. It seems pretty common that many running tests request a server at the same time + ;; (common:wait-for-normalized-load load-limit " delaying server start due to load" target-host) ;; do not try starting servers on an already overloaded machine, just wait forever + #;(common:wait-for-homehost-load load-limit (conc " delaying server start due to load on homehost. limit is " load-limit)) + (system (conc "nbfake " cmdln)) + (unsetenv "TARGETHOST_LOGF") + (if (get-environment-variable "TARGETHOST")(unsetenv "TARGETHOST")) + (thread-join! log-rotate) + (pop-directory))) + + + + + + + + + + + +(define (server:ping host-port-in server-id #!key (do-exit #f)) + (let ((host:port (if (not host-port-in) ;; use read-dotserver to find + #f ;; (server:check-if-running *toppath*) + ;; (if (number? host-port-in) ;; we were handed a server-id + ;; (let ((srec (tasks:get-server-by-id (db:delay-if-busy (tasks:open-db)) host-port-in))) + ;; ;; (print "srec: " srec " host-port-in: " host-port-in) + ;; (if srec + ;; (conc (vector-ref srec 3) ":" (vector-ref srec 4)) + ;; (conc "no such server-id " host-port-in))) + host-port-in))) ;; ) + (let* ((host-port (if host:port + (let ((slst (string-split host:port ":"))) + (if (eq? (length slst) 2) + (list (car slst)(string->number (cadr slst))) + #f)) + #f))) +;; (toppath (launch:setup))) + ;; (print "host-port=" host-port) + (if (not host-port) + (begin + (if host-port-in + (debug:print 0 *default-log-port* "ERROR: bad host:port")) + (if do-exit (exit 1)) + #f) + (let* ((iface (car host-port)) + (port (cadr host-port)) + (server-dat (http-transport:client-connect iface port server-id)) + (login-res (rmt:login-no-auto-client-setup server-dat))) + (if (and (list? login-res) + (car login-res)) + (begin + ;; (print "LOGIN_OK") + (if do-exit (exit 0)) + #t) + (begin + ;; (print "LOGIN_FAILED") + (if do-exit (exit 1)) + #f))))))) + + +;; kind start up of servers, wait 40 seconds before allowing another server for a given +;; run-id to be launched +;; +;; this one seems to be the general entry point +;; +(define (server:start-and-wait areapath #!key (timeout 60)) + (let ((give-up-time (+ (current-seconds) timeout))) + (let loop ((server-info (server:check-if-running areapath)) + (try-num 0)) + (if (or server-info + (> (current-seconds) give-up-time)) ;; server-url will be #f if no server available. + (server:record->url server-info) + (let ((num-ok (length (server:get-best (server:get-list areapath))))) + (if (and (> try-num 0) ;; first time through simply wait a little while then try again + (< num-ok 1)) ;; if there are no decent candidates for servers then try starting a new one + (server:kind-run areapath)) + (thread-sleep! 5) + (loop (server:check-if-running areapath) + (+ try-num 1))))))) + + +;; no longer care if multiple servers are started by accident. older servers will drop off in time. +;; +(define (server:check-if-running areapath) ;; #!key (numservers "2")) + (let* ((ns (server:get-num-servers)) + (servers (server:get-best (server:get-list areapath)))) + (if (or (and servers + (null? servers)) + (not servers) + (and (list? servers) + (< (length servers) (random ns)))) ;; somewhere between 0 and numservers + #f + (let loop ((hed (car servers)) + (tal (cdr servers))) + (let ((res (server:check-server hed))) + (if res + hed + (if (null? tal) + #f + (loop (car tal)(cdr tal))))))))) + +;; ping the given server +;; +(define (server:check-server server-record) + (let* ((server-url (server:record->url server-record)) + (server-id (server:record->id server-record)) + (res (case *transport-type* + ((http)(server:ping server-url server-id)) + ;; ((nmsg)(nmsg-transport:ping (tasks:hostinfo-get-interface server) + ))) + (if res + server-url + #f))) + + +(define server:try-running server:run) ;; there is no more per-run servers ;; REMOVE ME. BUG. + + + + + + + + + + + + + + + +;;====================================================================== +;; REMANANTS OF HTTP_TRANSPORT +;;====================================================================== + +(define *http-mutex* (make-mutex)) + +;; NOTE: Large block of code from 32436b426188080f72fceb6894af541fbad9921e removed here +;; I'm pretty sure it is defunct. + +;; This next block all imported en-mass from the api branch +(define *http-requests-in-progress* 0) +(define *http-connections-next-cleanup* (current-seconds)) + +(define (http-transport:get-time-to-cleanup) + (let ((res #f)) + (mutex-lock! *http-mutex*) + (set! res (> (current-seconds) *http-connections-next-cleanup*)) + (mutex-unlock! *http-mutex*) + res)) + +(define (http-transport:inc-requests-count) + (mutex-lock! *http-mutex*) + (set! *http-requests-in-progress* (+ 1 *http-requests-in-progress*)) + ;; Use this opportunity to slow things down iff there are too many requests in flight + (if (> *http-requests-in-progress* 5) + (begin + (debug:print-info 0 *default-log-port* "Whoa there buddy, ease up...") + (thread-sleep! 1))) + (mutex-unlock! *http-mutex*)) + +(define (http-transport:dec-requests-count proc) + (mutex-lock! *http-mutex*) + (proc) + (set! *http-requests-in-progress* (- *http-requests-in-progress* 1)) + (mutex-unlock! *http-mutex*)) + +(define (http-transport:dec-requests-count-and-close-all-connections) + (set! *http-requests-in-progress* (- *http-requests-in-progress* 1)) + (let loop ((etime (+ (current-seconds) 5))) ;; give up in five seconds + (if (> *http-requests-in-progress* 0) + (if (> etime (current-seconds)) + (begin + (thread-sleep! 0.05) + (loop etime)) + (debug:print-error 0 *default-log-port* "requests still in progress after 5 seconds of waiting. I'm going to pass on cleaning up http connections")) + (close-all-connections!))) + (set! *http-connections-next-cleanup* (+ (current-seconds) 10)) + (mutex-unlock! *http-mutex*)) + +(define (http-transport:inc-requests-and-prep-to-close-all-connections) + (mutex-lock! *http-mutex*) + (set! *http-requests-in-progress* (+ 1 *http-requests-in-progress*))) + +;; Send "cmd" with json payload "params" to serverdat and receive result +;; +(define (http-transport:client-api-send-receive run-id serverdat cmd params #!key (numretries 3)(area-dat #f)) + (let* ((fullurl (if (vector? serverdat) + (http-transport:server-dat-get-api-req serverdat) + (begin + (debug:print 0 *default-log-port* "FATAL ERROR: http-transport:client-api-send-receive called with no server info") + (exit 1)))) + (res (vector #f "uninitialized")) + (success #t) + (sparams (db:obj->string params transport: 'http)) + (runremote (or area-dat *runremote*)) + (server-id (if (vector? serverdat) + (http-transport:server-dat-get-server-id serverdat) + (begin + (debug:print 0 *default-log-port* "FATAL ERROR: http-transport:client-api-send-receive called with no server info") + (exit 1))))) + (debug:print-info 11 *default-log-port* "cmd=" cmd " fullurl=" fullurl " server-id=" server-id " current time:" (current-seconds)) + + ;; set up the http-client here + (max-retry-attempts 1) + ;; consider all requests indempotent + (retry-request? (lambda (request) + #f)) + ;; send the data and get the response + ;; extract the needed info from the http data and + ;; process and return it. + (let* ((send-recieve (lambda () + (mutex-lock! *http-mutex*) + ;; (condition-case (with-input-from-request "http://localhost"; #f read-lines) + ;; ((exn http client-error) e (print e))) + (set! res (vector ;;; DON'T FORGET - THIS IS THE CLIENT SIDE! NOTE: consider moving this to client.scm since we are only supporting http transport at this time. + success + (db:string->obj + (handle-exceptions + exn + (let ((call-chain (get-call-chain)) + (msg ((condition-property-accessor 'exn 'message) exn))) + (set! success #f) + (if (debug:debug-mode 1) + (debug:print-info 0 *default-log-port* "couldn't talk to server, trying again ...") + (begin + (debug:print 0 *default-log-port* "WARNING: failure in with-input-from-request to " fullurl ".") + (debug:print 0 *default-log-port* " message: " msg ", exn=" exn) + (debug:print 0 *default-log-port* " cmd: " cmd " params: " params " key:" (or server-id "thekey")) + (debug:print 0 *default-log-port* " call-chain: " call-chain))) + (if runremote + (remote-conndat-set! runremote #f)) + ;; Killing associated server to allow clean retry.") + ;; (tasks:kill-server-run-id run-id) ;; better to kill the server in the logic that called this routine? + (mutex-unlock! *http-mutex*) + ;;; (signal (make-composite-condition + ;;; (make-property-condition 'commfail 'message "failed to connect to server"))) + ;;; "communications failed" + (db:obj->string #f)) + (with-input-from-request ;; was dat + fullurl + (list (cons 'key (or server-id "thekey")) + (cons 'cmd cmd) + (cons 'params sparams)) + read-string)) + transport: 'http) + 0)) ;; added this speculatively + ;; Shouldn't this be a call to the managed call-all-connections stuff above? + (close-all-connections!) + (mutex-unlock! *http-mutex*) + )) + (time-out (lambda () + (thread-sleep! 45) + (debug:print 0 *default-log-port* "WARNING: send-receive took more than 45 seconds!!") + #f)) + (th1 (make-thread send-recieve "with-input-from-request")) + (th2 (make-thread time-out "time out"))) + (thread-start! th1) + (thread-start! th2) + (thread-join! th1) + (vector-set! res 0 success) + (thread-terminate! th2) + (if (vector? res) + (if (vector-ref res 0) ;; this is the first flag or the second flag? + (let* ((res-dat (vector-ref res 1))) + (if (and (string? res-dat) (string-contains res-dat "server-id mismatch")) + (signal (make-composite-condition + (make-property-condition + 'servermismatch + 'message (vector-ref res 1)))) + res)) ;; this is the *inner* vector? seriously? why? + (if (debug:debug-mode 11) + (let ((call-chain (get-call-chain))) ;; note: this code also called in nmsg-transport - consider consolidating it + (print-call-chain (current-error-port)) + (debug:print-error 11 *default-log-port* "error above occured at server, res=" res) ;; " message: " ((condition-property-accessor 'exn 'message) exn)) + (debug:print 11 *default-log-port* " server call chain:") + (pp (vector-ref res 1) (current-error-port)) + (signal (vector-ref res 0))) + res)) + (signal (make-composite-condition + (make-property-condition + 'timeout + 'message "nmsg-transport:client-api-send-receive-raw timed out talking to server"))))))) + +;; careful closing of connections stored in *runremote* +;; +(define (http-transport:close-connections #!key (area-dat #f)) + (let* ((runremote (or area-dat *runremote*)) + (server-dat (if runremote + (remote-conndat runremote) + #f))) ;; (hash-table-ref/default *runremote* run-id #f))) + (if (vector? server-dat) + (let ((api-dat (http-transport:server-dat-get-api-uri server-dat))) + (handle-exceptions + exn + (begin + (print-call-chain *default-log-port*) + (debug:print-error 0 *default-log-port* " closing connection failed with error: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn)) + (close-connection! api-dat) + ;;(close-idle-connections!) + #t)) + #f))) + + +(define (make-http-transport:server-dat)(make-vector 6)) +(define (http-transport:server-dat-get-iface vec) (vector-ref vec 0)) +(define (http-transport:server-dat-get-port vec) (vector-ref vec 1)) +(define (http-transport:server-dat-get-api-uri vec) (vector-ref vec 2)) +(define (http-transport:server-dat-get-api-url vec) (vector-ref vec 3)) +(define (http-transport:server-dat-get-api-req vec) (vector-ref vec 4)) +(define (http-transport:server-dat-get-last-access vec) (vector-ref vec 5)) +;(define (http-transport:server-dat-get-socket vec) (vector-ref vec 6)) +(define (http-transport:server-dat-get-server-id vec) (vector-ref vec 6)) + +(define (http-transport:server-dat-make-url vec) + (if (and (http-transport:server-dat-get-iface vec) + (http-transport:server-dat-get-port vec)) + (conc "http://" + (http-transport:server-dat-get-iface vec) + ":" + (http-transport:server-dat-get-port vec)) + #f)) + +(define (http-transport:server-dat-update-last-access vec) + (if (vector? vec) + (vector-set! vec 5 (current-seconds)) + (begin + (print-call-chain (current-error-port)) + (debug:print-error 0 *default-log-port* "call to http-transport:server-dat-update-last-access with non-vector!!")))) + +;; +;; connect +;; +(define (http-transport:client-connect iface port server-id) + (let* ((api-url (conc "http://" iface ":" port "/api")) + (api-uri (uri-reference (conc "http://" iface ":" port "/api"))) + (api-req (make-request method: 'POST uri: api-uri)) + (server-dat (vector iface port api-uri api-url api-req (current-seconds) server-id))) + server-dat)) + +;; run http-transport:keep-running in a parallel thread to monitor that the db is being +;; used and to shutdown after sometime if it is not. +;; +(define (http-transport:keep-running) + ;; if none running or if > 20 seconds since + ;; server last used then start shutdown + ;; This thread waits for the server to come alive + (debug:print-info 0 *default-log-port* "Starting the sync-back, keep alive thread in server") + (let* ((sdat #f) + (tmp-area (common:get-db-tmp-area)) + (started-file (conc tmp-area "/.server-started")) + (server-start-time (current-seconds)) + (server-info (let loop ((start-time (current-seconds)) + (changed #t) + (last-sdat "not this")) + (begin ;; let ((sdat #f)) + (thread-sleep! 0.01) + (debug:print-info 0 *default-log-port* "Waiting for server alive signature") + (mutex-lock! *heartbeat-mutex*) + (set! sdat *server-info*) + (mutex-unlock! *heartbeat-mutex*) + (if (and sdat + (not changed) + (> (- (current-seconds) start-time) 2)) + (begin + (debug:print-info 0 *default-log-port* "Received server alive signature") + (common:save-pkt `((action . alive) + (T . server) + (pid . ,(current-process-id)) + (ipaddr . ,(car sdat)) + (port . ,(cadr sdat))) + *configdat* #t) + sdat) + (begin + (debug:print-info 0 *default-log-port* "Still waiting, last-sdat=" last-sdat) + (sleep 4) + (if (> (- (current-seconds) start-time) 120) ;; been waiting for two minutes + (begin + (debug:print-error 0 *default-log-port* "transport appears to have died, exiting server") + (common:save-pkt `((action . died) + (T . server) + (pid . ,(current-process-id)) + (ipaddr . ,(car sdat)) + (port . ,(cadr sdat)) + (msg . "Transport died?")) + *configdat* #t) + (exit)) + (loop start-time + (equal? sdat last-sdat) + sdat))))))) + (iface (car server-info)) + (port (cadr server-info)) + (last-access 0) + (server-timeout (server:expiration-timeout)) + (server-going #f) + (server-log-file (args:get-arg "-log"))) ;; always set when we are a server + + (handle-exceptions + exn + (debug:print 0 *default-log-port* "Failed to create " started-file ", exn=" exn) + (with-output-to-file started-file (lambda ()(print (current-process-id))))) + + (let loop ((count 0) + (server-state 'available) + (bad-sync-count 0) + (start-time (current-milliseconds))) + ;; Use this opportunity to sync the tmp db to megatest.db + (if (not server-going) ;; *dbstruct-db* + (begin + (debug:print 0 *default-log-port* "SERVER: dbprep") + (set! *dbstruct-db* (db:setup #t)) ;; run-id)) + (set! server-going #t) + (debug:print 0 *default-log-port* "SERVER: running, megatest version: " (common:get-full-version)) ;; NOTE: the server is NOT yet marked as running in the log. We do that in the keep-running routine. + (thread-start! *watchdog*))) + + ;; when things go wrong we don't want to be doing the various queries too often + ;; so we strive to run this stuff only every four seconds or so. + (let* ((sync-time (- (current-milliseconds) start-time)) + (rem-time (quotient (- 4000 sync-time) 1000))) + (if (and (<= rem-time 4) + (> rem-time 0)) + (thread-sleep! rem-time))) + + (if (< count 1) ;; 3x3 = 9 secs aprox + (loop (+ count 1) 'running bad-sync-count (current-milliseconds))) + + ;; Check that iface and port have not changed (can happen if server port collides) + (mutex-lock! *heartbeat-mutex*) + (set! sdat *server-info*) + (mutex-unlock! *heartbeat-mutex*) + + (if (not (equal? sdat (list iface port))) + (let ((new-iface (car sdat)) + (new-port (cadr sdat))) + (debug:print-info 0 *default-log-port* "WARNING: interface changed, refreshing iface and port info") + (set! iface new-iface) + (set! port new-port) + (if (not *server-id*) + (set! *server-id* (server:mk-signature))) + (debug:print 0 *default-log-port* (current-seconds) (current-directory) (current-process-id) (argv)) + (debug:print 0 *default-log-port* "SERVER STARTED: " iface ":" port " AT " (current-seconds) " server-id: " *server-id*) + (flush-output *default-log-port*))) + + ;; Transfer *db-last-access* to last-access to use in checking that we are still alive + (mutex-lock! *heartbeat-mutex*) + (set! last-access *db-last-access*) + (mutex-unlock! *heartbeat-mutex*) + + (if (common:low-noise-print 120 (conc "server running on " iface ":" port)) + (begin + (if (not *server-id*) + (set! *server-id* (server:mk-signature))) + (debug:print 0 *default-log-port* (current-seconds) (current-directory) (current-process-id) (argv)) + (debug:print 0 *default-log-port* "SERVER STARTED: " iface ":" port " AT " (current-seconds) " server-id: " *server-id*) + (flush-output *default-log-port*))) + (if (common:low-noise-print 60 "dbstats") + (begin + (debug:print 0 *default-log-port* "Server stats:") + (db:print-current-query-stats))) + (let* ((hrs-since-start (/ (- (current-seconds) server-start-time) 3600))) + (cond + ((and *server-run* + (> (+ last-access server-timeout) + (current-seconds))) + (if (common:low-noise-print 120 "server continuing") + (debug:print-info 0 *default-log-port* "Server continuing, seconds since last db access: " (- (current-seconds) last-access)) + (let ((curr-time (current-seconds))) + (handle-exceptions + exn + (debug:print 0 *default-log-port* "ERROR: Failed to change timestamp on log file " server-log-file ". Are you out of space on that disk? exn=" exn) + (if (not *server-overloaded*) + (change-file-times server-log-file curr-time curr-time))))) + (loop 0 server-state bad-sync-count (current-milliseconds))) + (else + (debug:print-info 0 *default-log-port* "Server timed out. seconds since last db access: " (- (current-seconds) last-access)) + (http-transport:server-shutdown port))))))) + +(define (http-transport:server-shutdown port) + (begin + ;;(BB> "http-transport:server-shutdown called") + (debug:print-info 0 *default-log-port* "Starting to shutdown the server. pid="(current-process-id)) + ;; + ;; start_shutdown + ;; + (set! *time-to-exit* #t) ;; tell on-exit to be fast as we've already cleaned up + (portlogger:open-run-close portlogger:set-port port "released") + (thread-sleep! 1) + + ;; (debug:print-info 0 *default-log-port* "Max cached queries was " *max-cache-size*) + ;; (debug:print-info 0 *default-log-port* "Number of cached writes " *number-of-writes*) + ;; (debug:print-info 0 *default-log-port* "Average cached write time " + ;; (if (eq? *number-of-writes* 0) + ;; "n/a (no writes)" + ;; (/ *writes-total-delay* + ;; *number-of-writes*)) + ;; " ms") + ;; (debug:print-info 0 *default-log-port* "Number non-cached queries " *number-non-write-queries*) + ;; (debug:print-info 0 *default-log-port* "Average non-cached time " + ;; (if (eq? *number-non-write-queries* 0) + ;; "n/a (no queries)" + ;; (/ *total-non-write-delay* + ;; *number-non-write-queries*)) + ;; " ms") + + (db:print-current-query-stats) + (common:save-pkt `((action . exit) + (T . server) + (pid . ,(current-process-id))) + *configdat* #t) + (debug:print-info 0 *default-log-port* "Server shutdown complete. Exiting") + (exit))) + +;; all routes though here end in exit ... +;; +;; start_server? +;; +(define (http-transport:launch) + ;; check that a server start is in progress, pause or exit if so + (let* ((tmp-area (common:get-db-tmp-area)) + (server-start (conc tmp-area "/.server-start")) + (server-started (conc tmp-area "/.server-started")) + (start-time (common:lazy-modification-time server-start)) + (started-time (common:lazy-modification-time server-started)) + (server-starting (< start-time started-time)) ;; if start-time is less than started-time then a server is still starting + (start-time-old (> (- (current-seconds) start-time) 5)) + (cleanup-proc (lambda (msg) + (let* ((serv-fname (conc "server-" (current-process-id) "-" (get-host-name) ".log")) + (full-serv-fname (conc *toppath* "/logs/" serv-fname)) + (new-serv-fname (conc *toppath* "/logs/" "defunct-" serv-fname))) + (debug:print 0 *default-log-port* msg) + (if (common:file-exists? full-serv-fname) + (system (conc "sleep 1;mv -f " full-serv-fname " " new-serv-fname)) + (debug:print 0 *default-log-port* "INFO: cannot move " full-serv-fname " to " new-serv-fname)) + (exit))))) + #;(if (and (not start-time-old) ;; last server start try was less than five seconds ago + (not server-starting)) + (begin + (cleanup-proc "NOT starting server, there is either a recently started server or a server in process of starting") + (exit))) + ;; lets not even bother to start if there are already three or more server files ready to go + #;(let* ((num-alive (server:get-num-alive (server:get-list *toppath*)))) + (if (> num-alive 3) + (begin + (cleanup-proc (conc "ERROR: Aborting server start because there are already " num-alive " possible servers either running or starting up")) + (exit)))) + (common:save-pkt `((action . start) + (T . server) + (pid . ,(current-process-id))) + *configdat* #t) + (let* ((th2 (make-thread (lambda () + (debug:print-info 0 *default-log-port* "Server run thread started") + (http-transport:run + (if (args:get-arg "-server") + (args:get-arg "-server") + "-") + )) "Server run")) + (th3 (make-thread (lambda () + (debug:print-info 0 *default-log-port* "Server monitor thread started") + (http-transport:keep-running) + "Keep running")))) + (thread-start! th2) + (thread-sleep! 0.25) ;; give the server time to settle before starting the keep-running monitor. + (thread-start! th3) + (set! *didsomething* #t) + (thread-join! th2) + (exit)))) + +;; (define (http-transport:server-signal-handler signum) +;; (signal-mask! signum) +;; (handle-exceptions +;; exn +;; (debug:print 0 *default-log-port* " ... exiting ...") +;; (let ((th1 (make-thread (lambda () +;; (thread-sleep! 1)) +;; "eat response")) +;; (th2 (make-thread (lambda () +;; (debug:print-error 0 *default-log-port* "Received ^C, attempting clean exit. Please be patient and wait a few seconds before hitting ^C again.") +;; (thread-sleep! 3) ;; give the flush three seconds to do it's stuff +;; (debug:print 0 *default-log-port* " Done.") +;; (exit 4)) +;; "exit on ^C timer"))) +;; (thread-start! th2) +;; (thread-start! th1) +;; (thread-join! th2)))) + +;;====================================================================== +;; faux-lock is deprecated. Please use simple-lock below +;;====================================================================== +;;====================================================================== +;; +(define (common:faux-lock keyname #!key (wait-time 8)(allow-lock-steal #t)) + (if (rmt:no-sync-get/default keyname #f) ;; do not be tempted to compare to pid. locking is a one-shot action, if already locked for this pid it doesn't actually count + (if (> wait-time 0) + (begin + (thread-sleep! 1) + (if (eq? wait-time 1) ;; only one second left, steal the lock + (begin + (debug:print-info 0 *default-log-port* "stealing lock for " keyname) + (common:faux-unlock keyname force: #t))) + (common:faux-lock keyname wait-time: (- wait-time 1))) + #f) + (begin + (rmt:no-sync-set keyname (conc (current-process-id))) + (equal? (conc (current-process-id)) (conc (rmt:no-sync-get/default keyname #f)))))) + +(define (common:faux-unlock keyname #!key (force #f)) + (if (or force (equal? (conc (current-process-id)) (conc (rmt:no-sync-get/default keyname #f)))) + (begin + (if (rmt:no-sync-get/default keyname #f) (rmt:no-sync-del! keyname)) + #t) + #f)) +;;====================================================================== + +;;====================================================================== +;; simple lock. improve and converge on this one. +;; +(define (common:simple-lock keyname) + (rmt:no-sync-get-lock keyname)) + +(define (common:simple-unlock keyname #!key (force #f)) + (rmt:no-sync-del! keyname)) + +;;====================================================================== +;; ideally put all this info into the db, no need to preserve it across moving homehost +;; +;; return list of +;; ( reachable? cpuload update-time ) +(define (common:get-host-info hostname) + (let* ((loadinfo (rmt:get-latest-host-load hostname)) ;; if this host happens to have been recently used by a test reuse the load data + (load (car loadinfo)) + (load-sample-time (cdr loadinfo)) + (load-sample-age (- (current-seconds) load-sample-time)) + (loadinfo-timeout-seconds 6) ;; this was 20 seconds, seems way too lax. Switch to 6 seconds + (host-last-update-timeout-seconds 4) + (host-rec (hash-table-ref/default *host-loads* hostname #f)) + ) + (cond + ((< load-sample-age loadinfo-timeout-seconds) + (list #t + load-sample-time + load)) + ((and host-rec + (< (current-seconds) (+ (host-last-update host-rec) host-last-update-timeout-seconds))) + (list #t + (host-last-update host-rec) + (host-last-cpuload host-rec ))) + ((common:unix-ping hostname) + (list #t + (current-seconds) + (alist-ref 'adj-core-load (common:get-normalized-cpu-load hostname)))) ;; this is cheaper than you might think. get-normalized-cpu-load is cached for up to 5 seconds + (else + (list #f 0 -1) ;; bad host, don't use! + )))) + +(define (std-exit-procedure) + ;;(common:telemetry-log-close) + (on-exit (lambda () 0)) + ;;(debug:print-info 13 *default-log-port* "std-exit-procedure called; *time-to-exit*="*time-to-exit*) + (let ((no-hurry (if *time-to-exit* ;; hurry up + #f + (begin + (set! *time-to-exit* #t) + #t)))) + (debug:print-info 4 *default-log-port* "starting exit process, finalizing databases.") + (if (and no-hurry (debug:debug-mode 18)) + (rmt:print-db-stats)) + (let ((th1 (make-thread (lambda () ;; thread for cleaning up, give it five seconds + (if *dbstruct-db* (db:close-all *dbstruct-db*)) ;; one second allocated + (if *task-db* + (let ((db (cdr *task-db*))) + (if (sqlite3:database? db) + (begin + (sqlite3:interrupt! db) + (sqlite3:finalize! db #t) + ;; (vector-set! *task-db* 0 #f) + (set! *task-db* #f))))) + (http-client#close-all-connections!) + ;; (if (and *runremote* + ;; (remote-conndat *runremote*)) + ;; (begin + ;; (http-client#close-all-connections!))) ;; for http-client + (if (not (eq? *default-log-port* (current-error-port))) + (close-output-port *default-log-port*)) + (set! *default-log-port* (current-error-port))) "Cleanup db exit thread")) + (th2 (make-thread (lambda () + (debug:print 4 *default-log-port* "Attempting clean exit. Please be patient and wait a few seconds...") + (if no-hurry + (begin + (thread-sleep! 5)) ;; give the clean up few seconds to do it's stuff + (begin + (thread-sleep! 2))) + (debug:print 4 *default-log-port* " ... done") + ) + "clean exit"))) + (thread-start! th1) + (thread-start! th2) + (thread-join! th1) + ) + ) + + 0) + +;;====================================================================== +;; from metadat lookup MEGATEST_VERSION +;; +(define (common:get-last-run-version) ;; RADT => How does this work in send-receive function??; assume it is the value saved in some DB + (rmt:get-var "MEGATEST_VERSION")) + +(define (common:set-last-run-version) + (rmt:set-var "MEGATEST_VERSION" (common:version-signature))) + +(define (common:get-last-run-version-number) + (string->number + (substring (common:get-last-run-version) 0 6))) + +;;====================================================================== +;; see defstruct host at top of file. +;; host: reachable last-update last-used last-cpuload +;; +(define (common:update-host-loads-table hosts-raw) + (let* ((hosts (filter (lambda (x) + (string-match (regexp "^\\S+$") x)) + hosts-raw))) + (for-each + (lambda (hostname) + (let* ((rec (let ((h (hash-table-ref/default *host-loads* hostname #f))) + (if h + h + (let ((h (make-host))) + (hash-table-set! *host-loads* hostname h) + h)))) + (host-info (common:get-host-info hostname)) + (is-reachable (car host-info)) + (last-reached-time (cadr host-info)) + (load (caddr host-info))) + (host-reachable-set! rec is-reachable) + (host-last-update-set! rec last-reached-time) + (host-last-cpuload-set! rec load))) + hosts))) + +;;====================================================================== +;; go through the hosts from least recently used to most recently used, pick the first that meets the load criteral from the +;; [host-rules] section. +;; +(define (common:get-least-loaded-host hosts-raw host-type configdat) + (let* ((rdat (configf:lookup configdat "host-rules" host-type)) + (rules (common:val->alist (or rdat "") convert: #t)) ;; maxnload, maxnjobs, maxjobrate + (maxnload (common:alist-ref/default 'maxnload rules 1.5)) ;; max normalized load + (maxnjobs (common:alist-ref/default 'maxnjobs rules 1.5)) ;; max normalized number of jobs + (maxjobrate (common:alist-ref/default 'maxjobrate rules (/ 1 6))) ;; max rate of submitting jobs to a given host in jobs/second + (hosts (filter (lambda (x) + (string-match (regexp "^\\S+$") x)) + hosts-raw)) + ;; (best-host #f) + (get-rec (lambda (hostname) + ;; (print "get-rec hostname=" hostname) + (let ((h (hash-table-ref/default *host-loads* hostname #f))) + (if h + h + (let ((h (make-host))) + (hash-table-set! *host-loads* hostname h) + h))))) + (best-load 99999) + (curr-time (current-seconds)) + (get-hosts-sorted (lambda (hosts) + (sort hosts (lambda (a b) + (let ((a-rec (get-rec a)) + (b-rec (get-rec b))) + ;; (print "a=" a " a-rec=" a-rec " host-last-used=" (host-last-used a-rec)) + ;; (print "b=" b " b-rec=" b-rec " host-last-used=" (host-last-used b-rec)) + (< (host-last-used a-rec) + (host-last-used b-rec)))))))) + (debug:print 0 *default-log-port* "INFO: hosts-sorted=" (get-hosts-sorted hosts)) + (if (null? hosts) + #f ;; no hosts to select from. All done and giving up now. + (let ((hosts-sorted (get-hosts-sorted hosts))) + (common:update-host-loads-table hosts) + (let loop ((hostname (car hosts-sorted)) + (tal (cdr hosts-sorted)) + (best-host #f)) + (let* ((rec (get-rec hostname)) + (reachable (host-reachable rec)) + (load (host-last-cpuload rec)) + (last-used (host-last-used rec)) + (delta (- curr-time last-used)) + (job-rate (if (> delta 0) + (/ 1 delta) + 999)) ;; jobs per second + (new-best + (cond + ((not reachable) + (debug:print 0 *default-log-port* "Skipping host " hostname " as it cannot be reached.") + best-host) + ((and (< load maxnload) ;; load is acceptable + (< job-rate maxjobrate)) ;; job rate is acceptable + (set! best-load load) + hostname) + (else best-host)))) + (debug:print 0 *default-log-port* "INFO: Trying host " hostname " with load " load ", last used " delta " seconds ago, with job-rate " job-rate " for running a test." ) + (if new-best + (begin ;; found a host, return it + (debug:print 0 *default-log-port* "INFO: Found host: " new-best " load: " load " last-used: " delta " seconds ago, with job-rate: " job-rate) + (host-last-used-set! rec curr-time) + new-best) + (if (null? tal) #f (loop (car tal)(cdr tal) best-host))))))))) + +;;====================================================================== +;; T E S T L A U N C H I N G P E R I T E M W I T H H O S T T Y P E S +;;====================================================================== +;; +;; [hosts] +;; arm cubie01 cubie02 +;; x86_64 zeus xena myth01 +;; allhosts #{g hosts arm} #{g hosts x86_64} +;; +;; [host-types] +;; general #MTLOWESTLOAD #{g hosts allhosts} +;; arm #MTLOWESTLOAD #{g hosts arm} +;; nbgeneral nbjob run JOBCOMMAND -log $MT_LINKTREE/$MT_TARGET/$MT_RUNNAME.$MT_TESTNAME-$MT_ITEM_PATH.lgo +;; +;; [host-rules] +;; # maxnload => max normalized load +;; # maxnjobs => max jobs per cpu +;; # maxjobrate => max jobs per second +;; general maxnload=1.1; maxnjobs=1.2; maxjobrate=0.1 +;; +;; [launchers] +;; envsetup general +;; xor/%/n 4C16G +;; % nbgeneral +;; +;; [jobtools] +;; # if defined and not "no" flexi-launcher will bypass "launcher" unless no match. +;; flexi-launcher yes +;; launcher nbfake +;; +(define (common:get-launcher configdat testname itempath) + (let ((fallback-launcher (configf:lookup configdat "jobtools" "launcher"))) + (if (and (configf:lookup configdat "jobtools" "flexi-launcher") ;; overrides launcher + (not (equal? (configf:lookup configdat "jobtools" "flexi-launcher") "no"))) + (let* ((launchers (hash-table-ref/default configdat "launchers" '()))) + (if (null? launchers) + fallback-launcher + (let loop ((hed (car launchers)) + (tal (cdr launchers))) + (let ((patt (car hed)) + (host-type (cadr hed))) + (if (tests:match patt testname itempath) + (begin + (debug:print-info 2 *default-log-port* "Have flexi-launcher match for " testname "/" itempath " = " host-type) + (let ((launcher (configf:lookup configdat "host-types" host-type))) + (if launcher + (let* ((launcher-parts (string-split launcher)) + (launcher-exe (car launcher-parts))) + (if (equal? launcher-exe "#MTLOWESTLOAD") ;; this is our special case, we will find the lowest load and craft a nbfake commandline + (let host-loop ((targ-host (common:get-least-loaded-host (cdr launcher-parts) host-type configdat)) + (count 100)) + (if targ-host + (conc "remrun " targ-host) + (if (> count 0) + (begin + (debug:print 0 *default-log-port* "INFO: Waiting for a host for host-type " host-type) + (thread-sleep! (- 101 count)) + (host-loop (common:get-least-loaded-host (cdr launcher-parts) host-type configdat) + (- count 1))) + (begin + (debug:print 0 *default-log-port* "FATAL: Failed to find a host from #MTLOWESTLOAD for host-type " host-type) + (exit))))) + launcher)) + (begin + (debug:print-info 0 *default-log-port* "WARNING: no launcher found for host-type " host-type) + (if (null? tal) + fallback-launcher + (loop (car tal)(cdr tal))))))) + ;; no match, try again + (if (null? tal) + fallback-launcher + (loop (car tal)(cdr tal)))))))) + fallback-launcher))) + ) Index: runs.scm ================================================================== --- runs.scm +++ runs.scm @@ -3502,5 +3502,13 @@ (if (tasks:set-area dbh configdat) (tasks:sync-to-postgres configdat dest) (begin (debug:print 0 *default-log-port* "ERROR: unable to create an area record") #f))))) + +(define (tasks:sync-run-data dbh cached-info run-ids area-info smallest-last-update-time) + (for-each + (lambda (run-id) + (debug:print-info 4 *default-log-port* "Check if run with " run-id " needs to be synced" ) + (tasks:run-id->mtpg-run-id dbh cached-info run-id area-info smallest-last-update-time)) +run-ids)) + Index: server.scm ================================================================== --- server.scm +++ server.scm @@ -23,408 +23,29 @@ (use spiffy uri-common intarweb http-client spiffy-request-vars) (declare (unit server)) -(declare (uses common)) -(declare (uses db)) -(declare (uses margsmod)) -(import margsmod) - -(declare (uses http-transport)) -(declare (uses launch)) - -(declare (uses commonmod)) -(import commonmod) - -(declare (uses dbmod)) -(import dbmod) - -(declare (uses configfmod)) -(import configfmod) - -(declare (uses servermod)) -(import servermod) - -(include "common_records.scm") -(include "db_records.scm") - -;; (define *server-loop-heart-beat* (current-seconds)) - -;;====================================================================== -;; P K T S S T U F F -;;====================================================================== - -;; ??? - -;;====================================================================== -;; P K T S S T U F F -;;====================================================================== - -;; ??? - -;;====================================================================== -;; S E R V E R -;;====================================================================== - -;; Call this to start the actual server -;; - -;; all routes though here end in exit ... -;; -;; start_server -;; -(define (server:launch run-id transport-type) - (case transport-type - ((http)(http-transport:launch)) - ;;((nmsg)(nmsg-transport:launch run-id)) - ;;((rpc) (rpc-transport:launch run-id)) - (else (debug:print-error 0 *default-log-port* "unknown server type " transport-type)))) - -;; Given a run id start a server process ### NOTE ### > file 2>&1 -;; if the run-id is zero and the target-host is set -;; try running on that host -;; incidental: rotate logs in logs/ dir. -;; -(define (server:run areapath) ;; areapath is *toppath* for a given testsuite area - (let* ((curr-host (get-host-name)) - ;; (attempt-in-progress (server:start-attempted? areapath)) - ;; (dot-server-url (server:check-if-running areapath)) - (curr-ip (server:get-best-guess-address curr-host)) - (curr-pid (current-process-id)) - (homehost (common:get-homehost)) ;; configf:lookup *configdat* "server" "homehost" )) - (target-host (car homehost)) - (testsuite (common:get-testsuite-name)) - (logfile (conc areapath "/logs/server.log")) ;; -" curr-pid "-" target-host ".log")) - (profile-mode (or (configf:lookup *configdat* "misc" "profilesw") - "")) - (cmdln (conc (common:get-megatest-exe) - " -server " (or target-host "-") (if (equal? (configf:lookup *configdat* "server" "daemonize") "yes") - " -daemonize " - "") - ;; " -log " logfile - " -m testsuite:" testsuite - " " profile-mode - )) ;; (conc " >> " logfile " 2>&1 &"))))) - (log-rotate (make-thread common:rotate-logs "server run, rotate logs thread")) ;; why are we rotating logs here? This is a sensitive location with a lot going on!? - (load-limit (configf:lookup-number *configdat* "jobtools" "max-server-start-load" default: 3.0))) - ;; we want the remote server to start in *toppath* so push there - (push-directory areapath) - (debug:print 0 *default-log-port* "INFO: Trying to start server (" cmdln ") ...") - (thread-start! log-rotate) - - ;; host.domain.tld match host? - (if (and target-host - ;; look at target host, is it host.domain.tld or ip address and does it - ;; match current ip or hostname - (not (string-match (conc "("curr-host "|" curr-host"\\..*)") target-host)) - (not (equal? curr-ip target-host))) - (begin - (debug:print-info 0 *default-log-port* "Starting server on " target-host ", logfile is " logfile) - (setenv "TARGETHOST" target-host))) - - (setenv "TARGETHOST_LOGF" logfile) - (thread-sleep! (/ (random 5000) 1000)) ;; add about a random (up to 5 seconds) initial delay. It seems pretty common that many running tests request a server at the same time - ;; (common:wait-for-normalized-load load-limit " delaying server start due to load" target-host) ;; do not try starting servers on an already overloaded machine, just wait forever - #;(common:wait-for-homehost-load load-limit (conc " delaying server start due to load on homehost. limit is " load-limit)) - (system (conc "nbfake " cmdln)) - (unsetenv "TARGETHOST_LOGF") - (if (get-environment-variable "TARGETHOST")(unsetenv "TARGETHOST")) - (thread-join! log-rotate) - (pop-directory))) - - - - - - - - - - - -(define (server:ping host-port-in server-id #!key (do-exit #f)) - (let ((host:port (if (not host-port-in) ;; use read-dotserver to find - #f ;; (server:check-if-running *toppath*) - ;; (if (number? host-port-in) ;; we were handed a server-id - ;; (let ((srec (tasks:get-server-by-id (db:delay-if-busy (tasks:open-db)) host-port-in))) - ;; ;; (print "srec: " srec " host-port-in: " host-port-in) - ;; (if srec - ;; (conc (vector-ref srec 3) ":" (vector-ref srec 4)) - ;; (conc "no such server-id " host-port-in))) - host-port-in))) ;; ) - (let* ((host-port (if host:port - (let ((slst (string-split host:port ":"))) - (if (eq? (length slst) 2) - (list (car slst)(string->number (cadr slst))) - #f)) - #f))) -;; (toppath (launch:setup))) - ;; (print "host-port=" host-port) - (if (not host-port) - (begin - (if host-port-in - (debug:print 0 *default-log-port* "ERROR: bad host:port")) - (if do-exit (exit 1)) - #f) - (let* ((iface (car host-port)) - (port (cadr host-port)) - (server-dat (http-transport:client-connect iface port server-id)) - (login-res (rmt:login-no-auto-client-setup server-dat))) - (if (and (list? login-res) - (car login-res)) - (begin - ;; (print "LOGIN_OK") - (if do-exit (exit 0)) - #t) - (begin - ;; (print "LOGIN_FAILED") - (if do-exit (exit 1)) - #f))))))) - - -;; kind start up of servers, wait 40 seconds before allowing another server for a given -;; run-id to be launched -;; -(define (server:kind-run areapath) - ;; look for $MT_RUN_AREA_HOME/logs/server-start-last - ;; and wait for it to be at least 3 seconds old - (server:wait-for-server-start-last-flag areapath) - (if (not (server:check-if-running areapath)) ;; why try if there is already a server running? - (let* ((last-run-dat (hash-table-ref/default *server-kind-run* areapath '(0 0))) ;; callnum, whenrun - (call-num (car last-run-dat)) - (when-run (cadr last-run-dat)) - (run-delay (+ (case call-num - ((0) 0) - ((1) 20) - ((2) 300) - (else 600)) - (random 5))) ;; add a small random number just in case a lot of jobs hit the work hosts simultaneously - (lock-file (conc areapath "/logs/server-start.lock"))) - (if (> (- (current-seconds) when-run) run-delay) - (let* ((start-flag (conc areapath "/logs/server-start-last"))) - (common:simple-file-lock-and-wait lock-file expire-time: 15) - (debug:print-info 0 *default-log-port* "server:kind-run: touching " start-flag) - (system (conc "touch " start-flag)) ;; lazy but safe - (server:run areapath) - (thread-sleep! 2) ;; don't release the lock for at least a few seconds - (common:simple-file-release-lock lock-file))) - (hash-table-set! *server-kind-run* areapath (list (+ call-num 1)(current-seconds)))))) - -;; this one seems to be the general entry point -;; -(define (server:start-and-wait areapath #!key (timeout 60)) - (let ((give-up-time (+ (current-seconds) timeout))) - (let loop ((server-info (server:check-if-running areapath)) - (try-num 0)) - (if (or server-info - (> (current-seconds) give-up-time)) ;; server-url will be #f if no server available. - (server:record->url server-info) - (let ((num-ok (length (server:get-best (server:get-list areapath))))) - (if (and (> try-num 0) ;; first time through simply wait a little while then try again - (< num-ok 1)) ;; if there are no decent candidates for servers then try starting a new one - (server:kind-run areapath)) - (thread-sleep! 5) - (loop (server:check-if-running areapath) - (+ try-num 1))))))) - - -;; no longer care if multiple servers are started by accident. older servers will drop off in time. -;; -(define (server:check-if-running areapath) ;; #!key (numservers "2")) - (let* ((ns (server:get-num-servers)) - (servers (server:get-best (server:get-list areapath)))) - (if (or (and servers - (null? servers)) - (not servers) - (and (list? servers) - (< (length servers) (random ns)))) ;; somewhere between 0 and numservers - #f - (let loop ((hed (car servers)) - (tal (cdr servers))) - (let ((res (server:check-server hed))) - (if res - hed - (if (null? tal) - #f - (loop (car tal)(cdr tal))))))))) - -;; ping the given server -;; -(define (server:check-server server-record) - (let* ((server-url (server:record->url server-record)) - (server-id (server:record->id server-record)) - (res (case *transport-type* - ((http)(server:ping server-url server-id)) - ;; ((nmsg)(nmsg-transport:ping (tasks:hostinfo-get-interface server) - ))) - (if res - server-url - #f))) - - -(define server:try-running server:run) ;; there is no more per-run servers ;; REMOVE ME. BUG. - - - - - - - - - - - - - - - -(define (server:writable-watchdog-bruteforce dbstruct) - (thread-sleep! 1) ;; delay for startup - (let* ((do-a-sync (server:get-bruteforce-syncer dbstruct)) - (final-sync (server:get-bruteforce-syncer dbstruct fork-to-background: #t persist-until-sync: #t))) - (when (and (not (args:get-arg "-sync-to-megatest.db")) ;; conditions under which we do not run the sync - (args:get-arg "-server")) - - (let loop () - (do-a-sync) - (if (not *time-to-exit*) (loop))) ;; keep going unless time to exit - - ;; time to exit, close the no-sync db here - (final-sync) - - (if (common:low-noise-print 30) - (debug:print-info 0 *default-log-port* "Exiting watchdog timer, *time-to-exit* = " *time-to-exit*" pid="(current-process-id) - ))))) - -(define (server:writable-watchdog-deltasync dbstruct) - (thread-sleep! 0.05) ;; delay for startup - (let ((legacy-sync (common:run-sync?)) - (sync-stale-seconds (configf:lookup-number *configdat* "server" "sync-stale-seconds" default: 300)) - (debug-mode (debug:debug-mode 1)) - (last-time (current-seconds)) - (no-sync-db (db:open-no-sync-db)) - (stmt-cache (dbr:dbstruct-stmt-cache dbstruct)) - (sync-duration 0) ;; run time of the sync in milliseconds - ) - (set! *no-sync-db* no-sync-db) ;; make the no sync db available to api calls - (debug:print-info 2 *default-log-port* "Periodic sync thread started.") - (debug:print-info 3 *default-log-port* "watchdog starting. legacy-sync is " legacy-sync" pid="(current-process-id) );; " this-wd-num="this-wd-num) - (if (and legacy-sync (not *time-to-exit*)) - (let* (;;(dbstruct (db:setup)) - (mtdb (dbr:dbstruct-mtdb dbstruct)) - (mtpath (db:dbdat-get-path mtdb)) - (tmp-area (common:get-db-tmp-area)) - (start-file (conc tmp-area "/.start-sync")) - (end-file (conc tmp-area "/.end-sync"))) - (debug:print-info 0 *default-log-port* "Server running, periodic sync started.") - (let loop () - ;; sync for filesystem local db writes - ;; - (mutex-lock! *db-multi-sync-mutex*) - (let* ((need-sync (>= *db-last-access* *db-last-sync*)) ;; no sync since last write - (sync-in-progress *db-sync-in-progress*) - (min-intersync-delay (configf:lookup-number *configdat* "server" "minimum-intersync-delay" default: 5)) - (should-sync (and (not *time-to-exit*) - (> (- (current-seconds) *db-last-sync*) min-intersync-delay))) ;; sync every five seconds minimum, deprecated logic, can probably be removed - (start-time (current-seconds)) - (cpu-load-adj (alist-ref 'adj-proc-load (common:get-normalized-cpu-load #f))) - (mt-mod-time (file-modification-time mtpath)) - (last-sync-start (if (common:file-exists? start-file) - (file-modification-time start-file) - 0)) - (last-sync-end (if (common:file-exists? end-file) - (file-modification-time end-file) - 10)) - (sync-period (+ 3 (* cpu-load-adj 30))) ;; as adjusted load increases increase the sync period - (recently-synced (and (< (- start-time mt-mod-time) sync-period) ;; not useful if sync didn't modify megatest.db! - (< mt-mod-time last-sync-start))) - (sync-done (<= last-sync-start last-sync-end)) - (sync-stale (> start-time (+ last-sync-start sync-stale-seconds))) - (will-sync (and (not *time-to-exit*) ;; do not start a sync if we are in the process of exiting - (or need-sync should-sync) - (or sync-done sync-stale) - (not sync-in-progress) - (not recently-synced)))) - (debug:print-info 13 *default-log-port* "WD writable-watchdog top of loop. need-sync="need-sync" sync-in-progress=" sync-in-progress - " should-sync="should-sync" start-time="start-time" mt-mod-time="mt-mod-time" recently-synced="recently-synced" will-sync="will-sync - " sync-done=" sync-done " sync-period=" sync-period) - (if (and (> sync-period 5) - (common:low-noise-print 30 "sync-period")) - (debug:print-info 0 *default-log-port* "Increased sync period due to long sync times, sync took: " sync-period " seconds.")) - ;; (if recently-synced (debug:print-info 0 *default-log-port* "Skipping sync due to recently-synced flag=" recently-synced)) - ;; (debug:print-info 0 *default-log-port* "need-sync: " need-sync " sync-in-progress: " sync-in-progress " should-sync: " should-sync " will-sync: " will-sync) - (if will-sync (set! *db-sync-in-progress* #t)) - (mutex-unlock! *db-multi-sync-mutex*) - (if will-sync - (let (;; (max-sync-duration (configf:lookup-number *configdat* "server" "max-sync-duration")) ;; KEEPING THIS AVAILABLE BUT SHOULD NOT USE, I'M PRETTY SURE IT DOES NOT WORK! - (sync-start (current-milliseconds))) - (with-output-to-file start-file (lambda ()(print (current-process-id)))) - - ;; put lock here - - ;; (if (or (not max-sync-duration) - ;; (< sync-duration max-sync-duration)) ;; NOTE: db:sync-to-megatest.db keeps track of time of last sync and syncs incrementally - (let ((res (db:sync-to-megatest.db dbstruct no-sync-db: no-sync-db))) ;; did we sync any data? If so need to set the db touched flag to keep the server alive - (set! sync-duration (- (current-milliseconds) sync-start)) - (if (> res 0) ;; some records were transferred, keep the db alive - (begin - (mutex-lock! *heartbeat-mutex*) - (set! *db-last-access* (current-seconds)) - (mutex-unlock! *heartbeat-mutex*) - (debug:print-info 0 *default-log-port* "sync called, " res " records transferred.")) - (debug:print-info 2 *default-log-port* "sync called but zero records transferred"))))) -;; ;; TODO: factor this next routine out into a function -;; (with-input-from-pipe ;; this should not block other threads but need to verify this -;; (conc "megatest -sync-to-megatest.db -m testsuite:" (common:get-area-name) ":" *toppath*) -;; (lambda () -;; (let loop ((inl (read-line)) -;; (res #f)) -;; (if (eof-object? inl) -;; (begin -;; (set! sync-duration (- (current-milliseconds) sync-start)) -;; (cond -;; ((not res) -;; (debug:print 0 *default-log-port* "ERROR: sync from /tmp db to megatest.db appears to have failed. Recommended that you stop your runs and run \"megatest -cleanup-db\"")) -;; ((> res 0) -;; (mutex-lock! *heartbeat-mutex*) -;; (set! *db-last-access* (current-seconds)) -;; (mutex-unlock! *heartbeat-mutex*)))) -;; (let ((num-synced (let ((matches (string-match "^Synced (\\d+).*$" inl))) -;; (if matches -;; (string->number (cadr matches)) -;; #f)))) -;; (loop (read-line) -;; (or num-synced res)))))))))) - (if will-sync - (begin - (mutex-lock! *db-multi-sync-mutex*) - (set! *db-sync-in-progress* #f) - (set! *db-last-sync* start-time) - (with-output-to-file end-file (lambda ()(print (current-process-id)))) - - ;; release lock here - - (mutex-unlock! *db-multi-sync-mutex*))) - (if (and debug-mode - (> (- start-time last-time) 60)) - (begin - (set! last-time start-time) - (debug:print-info 4 *default-log-port* "timestamp -> " (seconds->time-string (current-seconds)) ", time since start -> " (seconds->hr-min-sec (- (current-seconds) *time-zero*)))))) - - ;; keep going unless time to exit - ;; - (if (not *time-to-exit*) - (let delay-loop ((count 0)) - ;;(debug:print-info 13 *default-log-port* "delay-loop top; count="count" pid="(current-process-id)" this-wd-num="this-wd-num" *time-to-exit*="*time-to-exit*) - - (if (and (not *time-to-exit*) - (< count 6)) ;; was 11, changing to 4. - (begin - (thread-sleep! 1) - (delay-loop (+ count 1)))) - (if (not *time-to-exit*) (loop)))) - ;; time to exit, close the no-sync db here - (db:no-sync-close-db no-sync-db stmt-cache) - (if (common:low-noise-print 30) - (debug:print-info 0 *default-log-port* "Exiting watchdog timer, *time-to-exit* = " *time-to-exit*" pid="(current-process-id) ))))))) ;;" this-wd-num="this-wd-num))))))) - +;; (declare (uses common)) +;; (declare (uses db)) +;; (declare (uses margsmod)) +;; (import margsmod) +;; +;; (declare (uses http-transport)) +;; (declare (uses launch)) +;; +;; (declare (uses commonmod)) +;; (import commonmod) +;; +;; (declare (uses dbmod)) +;; (import dbmod) +;; +;; (declare (uses configfmod)) +;; (import configfmod) +;; +;; (declare (uses servermod)) +;; (import servermod) +;; +;; (include "common_records.scm") +;; (include "db_records.scm") +;; +;; Index: servermod.scm ================================================================== --- servermod.scm +++ servermod.scm @@ -20,10 +20,11 @@ (declare (unit servermod)) (declare (uses commonmod)) (declare (uses configfmod)) (declare (uses dbmod)) +(declare (uses rmtmod)) (module servermod * (import scheme chicken data-structures extras ports files) @@ -34,459 +35,9 @@ md5) (import commonmod) (import configfmod) (import dbmod) - -(define (server:make-server-url hostport) - (if (not hostport) - #f - (conc "http://" (car hostport) ":" (cadr hostport)))) - -;;====================================================================== -;; S E R V E R U T I L I T I E S -;;====================================================================== - -;; Get the transport -#;(define (server:get-transport) - (if *transport-type* - *transport-type* - (let ((ttype (string->symbol - (or (args:get-arg "-transport") - (configf:lookup *configdat* "server" "transport") - "rpc")))) - (set! *transport-type* ttype) - ttype))) - -;; Generate a unique signature for this server -(define (server:mk-signature) - (message-digest-string (md5-primitive) - (with-output-to-string - (lambda () - (write (list (current-directory) - (current-process-id) - (argv))))))) - -;; When using zmq this would send the message back (two step process) -;; with spiffy or rpc this simply returns the return data to be returned -;; -(define (server:reply return-addr query-sig success/fail result) - (debug:print-info 11 *default-log-port* "server:reply return-addr=" return-addr ", result=" result) - ;; (send-message pubsock target send-more: #t) - ;; (send-message pubsock - (db:obj->string (vector success/fail query-sig result))) -;; (case (server:get-transport) -;; ((rpc) (db:obj->string (vector success/fail query-sig result))) -;; ((http) (db:obj->string (vector success/fail query-sig result))) -;; ((fs) result) -;; (else -;; (debug:print-error 0 *default-log-port* "unrecognised transport type: " *transport-type*) -;; result))) - -;; given a path to a server log return: host port startseconds -;; any changes to number of elements returned by this fuction will dirctly affect server:record->url,server:record->id,server:kill,server:get-num-alive which uses match let -;; -(define (server:logf-get-start-info logf) - (let ((rx (regexp "^SERVER STARTED: (\\S+):(\\d+) AT ([\\d\\.]+) server-id: (\\S+)"))) ;; SERVER STARTED: host:port AT timesecs server id - ;;(handle-exceptions - ;; exn - ;; (begin - ;; (debug:print-info 0 *default-log-port* "Unable to get server info from " logf ", exn=" exn) - ;; (list #f #f #f #f)) ;; no idea what went wrong, call it a bad server - (if (and (file-exists? logf) - (file-read-access? logf)) - (with-input-from-file - logf - (lambda () - (let loop ((inl (read-line)) - (lnum 0)) - (if (not (eof-object? inl)) - (let ((mlst (string-match rx inl))) - (if (not mlst) - (if (< lnum 500) ;; give up if more than 500 lines of server log read - (loop (read-line)(+ lnum 1)) - (begin - (debug:print-info 0 *default-log-port* "Unable to get server info from first 500 lines of " logf ) - (list #f #f #f #f))) - (let ((dat (cdr mlst))) - (list (car dat) ;; host - (string->number (cadr dat)) ;; port - (string->number (caddr dat)) - (cadr (cddr dat)))))) - (begin - (debug:print-info 0 *default-log-port* "Unable to get server info from " logf " at " (current-seconds)) - (list #f #f #f #f)))))) - (begin - (debug:print-info 0 *default-log-port* "Unable to get server info from " logf ", file not found or not readable.") - (list #f #f #f #f))))) - -;; get a list of servers with all relevant data -;; ( mod-time host port start-time pid ) -;; -(define (server:get-list areapath #!key (limit #f)) - (let ((fname-rx (regexp "^(|.*/)server-(\\d+)-(\\S+).log$")) - (day-seconds (* 24 60 60))) - ;; if the directory exists continue to get the list - ;; otherwise attempt to create the logs dir and then - ;; continue - (if (if (directory-exists? (conc areapath "/logs")) - '() - (if (file-write-access? areapath) - (begin - (condition-case - (create-directory (conc areapath "/logs") #t) - (exn (i/o file)(debug:print 0 *default-log-port* "ERROR: Cannot create directory at " (conc areapath "/logs"))) - (exn ()(debug:print 0 *default-log-port* "ERROR: Unknown error attemtping to get server list. exn=" exn))) - (directory-exists? (conc areapath "/logs"))) - '())) - (let* ((server-logs (glob (conc areapath "/logs/server-[0-9]*.log"))) - (num-serv-logs (length server-logs))) - (if (null? server-logs) - '() - (let loop ((hed (car server-logs)) - (tal (cdr server-logs)) - (res '())) - (let* ((mod-time (handle-exceptions - exn - (begin - (debug:print 0 *default-log-port* "failed to get modification time on " hed ", exn=" exn) - (current-seconds)) ;; 0 - (file-modification-time hed))) ;; default to *very* old so log gets ignored if deleted - (down-time (- (current-seconds) mod-time)) - (serv-dat (if (or (< num-serv-logs 10) - (< down-time 900)) ;; day-seconds)) - (server:logf-get-start-info hed) - '())) ;; don't waste time processing server files not touched in the 15 minutes if there are more than ten servers to look at - (serv-rec (cons mod-time serv-dat)) - (fmatch (string-match fname-rx hed)) - (pid (if fmatch (string->number (list-ref fmatch 2)) #f)) - (new-res (if (null? serv-dat) - res - (cons (append serv-rec (list pid)) res)))) ;; any changes to number of elements in new-res will dirctly affect server:record->url,server:record->id,server:kill,server:get-num-alive which uses match let - (if (null? tal) - (if (and limit - (> (length new-res) limit)) - new-res ;; (take new-res limit) <= need intelligent sorting before this will work - new-res) - (loop (car tal)(cdr tal) new-res))))))))) - -(define (server:get-num-alive srvlst) - (let ((num-alive 0)) - (for-each - (lambda (server) - (handle-exceptions - exn - (begin - (debug:print-info 0 *default-log-port* "Unable to get server start-time and/or mod-time from " server ", exn=" exn)) - (match-let (((mod-time host port start-time server-id pid) - server)) - (let* ((uptime (- (current-seconds) mod-time)) - (runtime (if start-time - (- mod-time start-time) - 0))) - (if (< uptime 5)(set! num-alive (+ num-alive 1))))))) - srvlst) - num-alive)) - -;; given a list of servers get a list of valid servers, i.e. at least -;; 10 seconds old, has started and is less than 1 hour old and is -;; active (i.e. mod-time < 10 seconds -;; -;; mod-time host port start-time pid -;; -;; sort by start-time descending. I.e. get the oldest first. Young servers will thus drop off -;; and servers should stick around for about two hours or so. -;; -(define (server:get-best srvlst) - (let* ((nums (server:get-num-servers)) - (now (current-seconds)) - (slst (sort - (filter (lambda (rec) - (if (and (list? rec) - (> (length rec) 2)) - (let ((start-time (list-ref rec 3)) - (mod-time (list-ref rec 0))) - ;; (print "start-time: " start-time " mod-time: " mod-time) - (and start-time mod-time - (> (- now start-time) 0) ;; been running at least 0 seconds - (< (- now mod-time) 16) ;; still alive - file touched in last 16 seconds - (or (not (configf:lookup *configdat* "server" "runtime")) ;; skip if not set - (< (- now start-time) - (+ (- (string->number (configf:lookup *configdat* "server" "runtime")) - 180) - (random 360)))) ;; under one hour running time +/- 180 - )) - #f)) - srvlst) - (lambda (a b) - (< (list-ref a 3) - (list-ref b 3)))))) - (if (> (length slst) nums) - (take slst nums) - slst))) - -(define (server:get-first-best areapath) - (let ((srvrs (server:get-best (server:get-list areapath)))) - (if (and srvrs - (not (null? srvrs))) - (car srvrs) - #f))) - -(define (server:get-rand-best areapath) - (let ((srvrs (server:get-best (server:get-list areapath)))) - (if (and (list? srvrs) - (not (null? srvrs))) - (let* ((len (length srvrs)) - (idx (random len))) - (list-ref srvrs idx)) - #f))) - -(define (server:record->id servr) - (handle-exceptions - exn - (begin - (debug:print-info 0 *default-log-port* "Unable to get server id from " servr ", exn=" exn) - #f) - (match-let (((mod-time host port start-time server-id pid) - servr)) - (if server-id - server-id - #f)))) - -(define (server:record->url servr) - (handle-exceptions - exn - (begin - (debug:print-info 0 *default-log-port* "Unable to get server url from " servr ", exn=" exn) - #f) - (match-let (((mod-time host port start-time server-id pid) - servr)) - (if (and host port) - (conc host ":" port) - #f)))) - -(define (server:get-client-signature) ;; BB> why is this proc named "get-"? it returns nothing -- set! has not return value. - (if *my-client-signature* *my-client-signature* - (let ((sig (server:mk-signature))) - (set! *my-client-signature* sig) - *my-client-signature*))) - -;; wait for server=start-last to be three seconds old -;; -(define (server:wait-for-server-start-last-flag areapath) - (let* ((start-flag (conc areapath "/logs/server-start-last")) - ;;; THIS INTERACTS WITH [server] timeout. Suggest using 0.1 or above for timeout (6 seconds) - (reftime (configf:lookup-number *configdat* "server" "idletime" default: 4)) - (server-key (conc (get-host-name) "-" (current-process-id)))) - (if (file-exists? start-flag) - (let* ((fmodtime (file-modification-time start-flag)) - (delta (- (current-seconds) fmodtime)) - (all-go (> delta reftime))) - (if (and all-go - (begin - (with-output-to-file start-flag - (lambda () - (print server-key))) - (thread-sleep! 0.25) - (let ((res (with-input-from-file start-flag - (lambda () - (read-line))))) - (equal? server-key res)))) - #t ;; (system (conc "touch " start-flag)) ;; lazy but safe - (begin - (debug:print-info 0 *default-log-port* "Gating server start, last start: " - fmodtime ", delta: " delta ", reftime: " reftime ", all-go=" all-go) - (thread-sleep! reftime) - (server:wait-for-server-start-last-flag areapath))))))) - -(define (server:get-num-servers #!key (numservers 2)) - (let ((ns (string->number - (or (configf:lookup *configdat* "server" "numservers") "notanumber")))) - (or ns numservers))) - -(define (server:kill servr) - (handle-exceptions - exn - (begin - (debug:print-info 0 *default-log-port* "Unable to get host and/or port from " servr ", exn=" exn) - #f) - (match-let (((mod-time hostname port start-time server-id pid) - servr)) - (tasks:kill-server hostname pid)))) - -;; called in megatest.scm, host-port is string hostname:port -;; -;; NOTE: This is NOT called directly from clients as not all transports support a client running -;; in the same process as the server. -;; -;; run ping in separate process, safest way in some cases -;; -(define (server:ping-server ifaceport) - (with-input-from-pipe - (conc (common:get-megatest-exe) " -ping " ifaceport) - (lambda () - (let loop ((inl (read-line)) - (res "NOREPLY")) - (if (eof-object? inl) - (case (string->symbol res) - ((NOREPLY) #f) - ((LOGIN_OK) #t) - (else #f)) - (loop (read-line) inl)))))) - -;; NOT USED (well, ok, reference in rpc-transport but otherwise not used). -;; -(define (server:login toppath) - (lambda (toppath) - (set! *db-last-access* (current-seconds)) ;; might not be needed. - (if (equal? *toppath* toppath) - #t - #f))) - -;; timeout is hms string: 1h 5m 3s, default is 1 minute -;; -(define (server:expiration-timeout) - (let ((tmo (configf:lookup *configdat* "server" "timeout"))) - (if (and (string? tmo) - (common:hms-string->seconds tmo)) ;; BUG: hms-string->seconds is broken, if given "10" returns 0. Also, it doesn't belong in this logic unless the string->number is changed below - (* 3600 (string->number tmo)) - 60))) - -(define (server:get-best-guess-address hostname) - (let ((res #f)) - (for-each - (lambda (adr) - (if (not (eq? (u8vector-ref adr 0) 127)) - (set! res adr))) - ;; NOTE: This can fail when there is no mention of the host in /etc/hosts. FIXME - (vector->list (hostinfo-addresses (hostname->hostinfo hostname)))) - (string-intersperse - (map number->string - (u8vector->list - (if res res (hostname->ip hostname)))) "."))) - -;; (define server:sync-lock-token "SERVER_SYNC_LOCK") -;; (define (server:release-sync-lock) -;; (db:no-sync-del! *no-sync-db* server:sync-lock-token)) -;; (define (server:have-sync-lock?) -;; (let* ((have-lock-pair (db:no-sync-get-lock *no-sync-db* server:sync-lock-token)) -;; (have-lock? (car have-lock-pair)) -;; (lock-time (cdr have-lock-pair)) -;; (lock-age (- (current-seconds) lock-time))) -;; (cond -;; (have-lock? #t) -;; ((>lock-age -;; (* 3 (configf:lookup-number *configdat* "server" "minimum-intersync-delay" default: 180))) -;; (server:release-sync-lock) -;; (server:have-sync-lock?)) -;; (else #f)))) - -;; moving this here as it needs access to db and cannot be in common. -;; - -(define (server:get-bruteforce-syncer dbstruct #!key (fork-to-background #f) (persist-until-sync #f)) - (let* ((sqlite-exe (or (get-environment-variable "MT_SQLITE3_EXE"))) ;; defined in cfg.sh - (sync-log (or ;; (args:get-arg "-sync-log") - (conc *toppath* "/logs/sync-" (current-process-id) "-" (get-host-name) ".log"))) - (tmp-area (common:get-db-tmp-area)) - (tmp-db (conc tmp-area "/megatest.db")) - (staging-file (conc *toppath* "/.megatest.db")) - (mtdbfile (conc *toppath* "/megatest.db")) - (lockfile (common:get-sync-lock-filepath)) - (sync-cmd-core (conc sqlite-exe" " tmp-db " .dump | "sqlite-exe" " staging-file "&>"sync-log)) - (sync-cmd (if fork-to-background - (conc "/usr/bin/env NBFAKE_LOG="*toppath*"/logs/last-server-sync-"(current-process-id)".log nbfake \""sync-cmd-core" && /bin/mv -f " staging-file " " mtdbfile" \"") - sync-cmd-core)) - (default-min-intersync-delay 2) - (min-intersync-delay (configf:lookup-number *configdat* "server" "minimum-intersync-delay" default: default-min-intersync-delay)) - (default-duty-cycle 0.1) - (duty-cycle (configf:lookup-number *configdat* "server" "sync-duty-cycle" default: default-duty-cycle)) - (last-sync-seconds 10) ;; we will adjust this to a measurement and delay last-sync-seconds * (1 - duty-cycle) - (calculate-off-time (lambda (work-duration duty-cycle) - (* (/ (- 1 duty-cycle) duty-cycle) last-sync-seconds))) - (off-time min-intersync-delay) ;; adjusted in closure below. - (do-a-sync - (lambda () - ;; (BB> "Start do-a-sync with fork-to-background="fork-to-background" persist-until-sync="persist-until-sync) - (let* ((finalres - (let retry-loop ((num-tries 0)) - (if (common:simple-file-lock lockfile) - (begin - (cond - ((not (or fork-to-background persist-until-sync)) - (debug:print 0 *default-log-port* "INFO: syncer thread sleeping for max of (server.minimum-intersync-delay="min-intersync-delay - " , off-time="off-time" seconds ]") - (thread-sleep! (max off-time min-intersync-delay))) - (else - (debug:print 0 *default-log-port* "INFO: syncer thread NOT sleeping ; maybe time-to-exit..."))) - - (if (not (configf:lookup *configdat* "server" "disable-db-snapshot")) - (common:snapshot-file mtdbfile subdir: ".db-snapshot")) - (delete-file* staging-file) - (let* ((start-time (current-milliseconds)) - (res (system sync-cmd)) - (dbbackupfile (conc mtdbfile ".backup")) - (res2 - (cond - ((eq? 0 res ) - (handle-exceptions - exn - #f - (if (file-exists? dbbackupfile) - (delete-file* dbbackupfile) - ) - (if (eq? 0 (file-size sync-log)) - (delete-file* sync-log)) - (system (conc "/bin/mv " staging-file " " mtdbfile)) - - (set! last-sync-seconds (/ (- (current-milliseconds) start-time) 1000)) - (set! off-time (calculate-off-time - last-sync-seconds - (cond - ((and (number? duty-cycle) (> duty-cycle 0) (< duty-cycle 1)) - duty-cycle) - (else - (debug:print 0 *default-log-port* "WARNING: ["(common:human-time)"] server.sync-duty-cycle is invalid. Should be a number between 0 and 1, but "duty-cycle" was specified. Using default value: "default-duty-cycle) - default-duty-cycle)))) - - (debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] pid="(current-process-id)" SYNC took "last-sync-seconds" sec") - (debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] pid="(current-process-id)" SYNC took "last-sync-seconds" sec ; with duty-cycle of "duty-cycle" off time is now "off-time) - 'sync-completed)) - (else - (system (conc "/bin/cp "sync-log" "sync-log".fail")) - (debug:print 0 *default-log-port* "ERROR: ["(common:human-time)"] Sync failed. See log at "sync-log".fail") - (if (file-exists? (conc mtdbfile ".backup")) - (system (conc "/bin/cp "mtdbfile ".backup " mtdbfile))) - #f)))) - (common:simple-file-release-lock lockfile) - ;; (BB> "released lockfile: " lockfile) - #;(when (common:file-exists? lockfile) - (BB> "DID NOT ACTUALLY RELEASE LOCKFILE")) - res2) ;; end let - );; end begin - ;; else - (cond - (persist-until-sync - (thread-sleep! 1) - (debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] pid="(current-process-id)" other SYNC in progress; we're in a fork-to-background so we need to succeed. Let's wait a jiffy and and try again. num-tries="num-tries" (waiting for lockfile="lockfile" to disappear)") - (retry-loop (add1 num-tries))) - (else - (thread-sleep! (max off-time (+ last-sync-seconds min-intersync-delay))) - (debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] pid="(current-process-id)" other SYNC in progress; not syncing.") - 'parallel-sync-in-progress)) - ) ;; end if got lockfile - ) - )) - ;; (BB> "End do-a-sync with fork-to-background="fork-to-background" persist-until-sync="persist-until-sync" and result="finalres) - finalres) - ) ;; end lambda - )) - do-a-sync)) - -;; (define (debug:print . params) #f) -;; (define (debug:print-info . params) #f) -;; -;; (define (set-functions dbgp dbgpinfo) -;; (set! debug:print dbgp) -;; (set! debug:print-info dbgpinfo)) +(import rmtmod) + ) Index: tasks.scm ================================================================== --- tasks.scm +++ tasks.scm @@ -640,14 +640,7 @@ (debug:print-info 1 *default-log-port* ((condition-property-accessor 'exn 'message) exn)) #f) (if (not (pgdb:is-area-taged-with-a-tag dbh (vector-ref tag-info 0) (vector-ref area-info 0))) (pgdb:insert-area-tag dbh (vector-ref tag-info 0) (vector-ref area-info 0)))))) -(define (tasks:sync-run-data dbh cached-info run-ids area-info smallest-last-update-time) - (for-each - (lambda (run-id) - (debug:print-info 4 *default-log-port* "Check if run with " run-id " needs to be synced" ) - (tasks:run-id->mtpg-run-id dbh cached-info run-id area-info smallest-last-update-time)) -run-ids)) - ) Index: tests.scm ================================================================== --- tests.scm +++ tests.scm @@ -164,26 +164,10 @@ ;; (define (tests:get-all) (let* ((test-search-path (tests:get-tests-search-path *configdat*))) (tests:get-valid-tests (make-hash-table) test-search-path))) -(define (tests:get-tests-search-path cfgdat) - (let ((paths (let ((section (if cfgdat - (configf:get-section cfgdat "tests-paths") - #f))) - (if section - (map cadr section) - '())))) - (filter (lambda (d) - (if (directory-exists? d) - d - (begin - ;; (if (common:low-noise-print 60 "tests:get-tests-search-path" d) - ;; (debug:print 0 *default-log-port* "WARNING: problem with directory " d ", dropping it from tests path")) - #f))) - (append paths (list (conc *toppath* "/tests")))))) - (define (tests:get-valid-tests test-registry tests-paths) (if (null? tests-paths) test-registry (let loop ((hed (car tests-paths)) (tal (cdr tests-paths))) Index: transport.scm ================================================================== --- transport.scm +++ transport.scm @@ -20,20 +20,23 @@ (declare (unit transport)) (declare (uses commonmod)) (declare (uses configfmod)) (declare (uses portlogger)) +(declare (uses apimod)) +(declare (uses servermod)) (module transport * (import scheme chicken data-structures extras ports) (import commonmod) (import configfmod) - +(import apimod) (import portlogger) +(import servermod) (import (prefix base64 base64:) (prefix sqlite3 sqlite3:) call-with-environment-variables @@ -64,147 +67,7 @@ typed-records uri-common z3 ) -(define (http-transport:make-server-url hostport) - (if (not hostport) - #f - (conc "http://" (car hostport) ":" (cadr hostport)))) - -;;====================================================================== -;; S E R V E R -;; ====================================================================== - -;; Call this to start the actual server -;; - -;; (define *db:process-queue-mutex* (make-mutex)) - -(define (http-transport:run hostn) - ;; Configurations for server - (tcp-buffer-size 2048) - (max-connections 2048) - (debug:print 2 *default-log-port* "Attempting to start the server ...") - (let* ((db #f) ;; (open-db)) ;; we don't want the server to be opening and closing the db unnecesarily - (hostname (get-host-name)) - (ipaddrstr (let ((ipstr (if (string=? "-" hostn) - ;; (string-intersperse (map number->string (u8vector->list (hostname->ip hostname))) ".") - (server:get-best-guess-address hostname) - #f))) - (if ipstr ipstr hostn))) ;; hostname))) - (start-port (portlogger:open-run-close - (lambda (db) - (portlogger:find-port db)))) - (link-tree-path (common:get-linktree)) - (tmp-area (common:get-db-tmp-area)) - (start-file (conc tmp-area "/.server-start"))) - (debug:print-info 0 *default-log-port* "portlogger recommended port: " start-port) - ;; set some parameters for the server - (root-path (if link-tree-path - link-tree-path - (current-directory))) ;; WARNING: SECURITY HOLE. FIX ASAP! - (handle-directory spiffy-directory-listing) - (handle-exception (lambda (exn chain) - (signal (make-composite-condition - (make-property-condition - 'server - 'message "server error"))))) - - ;; http-transport:handle-directory) ;; simple-directory-handler) - ;; Setup the web server and a /ctrl interface - ;; - (vhost-map `(((* any) . ,(lambda (continue) - ;; open the db on the first call - ;; This is were we set up the database connections - (let* (($ (request-vars source: 'both)) - (dat ($ 'dat)) - (res #f)) - (cond - ((equal? (uri-path (request-uri (current-request))) - '(/ "api")) - (send-response body: (api:process-request *dbstruct-db* $) ;; the $ is the request vars proc - headers: '((content-type text/plain))) - (mutex-lock! *heartbeat-mutex*) - (set! *db-last-access* (current-seconds)) - (mutex-unlock! *heartbeat-mutex*)) - ;; ((equal? (uri-path (request-uri (current-request))) - ;; '(/ "")) - ;; (send-response body: (http-transport:main-page))) - ;;((equal? (uri-path (request-uri (current-request))) - ;; '(/ "json_api")) - ;; (send-response body: (http-transport:main-page))) - ;;((equal? (uri-path (request-uri (current-request))) - ;; '(/ "runs")) - ;; (send-response body: (http-transport:main-page))) - ;;((equal? (uri-path (request-uri (current-request))) - ;; '(/ any)) - ;; (send-response body: "hey there!\n" - ;; headers: '((content-type text/plain)))) - ;;((equal? (uri-path (request-uri (current-request))) - ;; '(/ "hey")) - ;; (send-response body: "hey there!\n" - ;; headers: '((content-type text/plain)))) - ;;((equal? (uri-path (request-uri (current-request))) - ;; '(/ "jquery3.1.0.js")) - ;; (send-response body: (http-transport:show-jquery) - ;; headers: '((content-type application/javascript)))) - ;;((equal? (uri-path (request-uri (current-request))) - ;; '(/ "test_log")) - ;; (send-response body: (http-transport:html-test-log $) - ;; headers: '((content-type text/HTML)))) - ;;((equal? (uri-path (request-uri (current-request))) - ;; '(/ "dashboard")) - ;; (send-response body: (http-transport:html-dboard $) - ;; headers: '((content-type text/HTML)))) - (else (continue)))))))) - (handle-exceptions - exn - (debug:print 0 *default-log-port* "Failed to create file " start-file ", exn=" exn) - (with-output-to-file start-file (lambda ()(print (current-process-id))))) - (http-transport:try-start-server ipaddrstr start-port))) - - -;; This is recursively run by http-transport:run until sucessful -;; -(define (http-transport:try-start-server ipaddrstr portnum) - (let ((config-hostname (configf:lookup *configdat* "server" "hostname")) - (config-use-proxy (equal? (configf:lookup *configdat* "client" "use-http_proxy") "yes"))) - (if (not config-use-proxy) - (determine-proxy (constantly #f))) - (debug:print-info 0 *default-log-port* "http-transport:try-start-server time=" (seconds->time-string (current-seconds)) " ipaddrsstr=" ipaddrstr " portnum=" portnum " config-hostname=" config-hostname) - (handle-exceptions - exn - (begin - (print-error-message exn) - (if (< portnum 64000) - (begin - (debug:print 0 *default-log-port* "WARNING: attempt to start server failed. Trying again ...") - (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) - (debug:print 5 *default-log-port* "exn=" (condition->list exn)) - (portlogger:open-run-close portlogger:set-failed portnum) - (debug:print 0 *default-log-port* "WARNING: failed to start on portnum: " portnum ", trying next port") - (thread-sleep! 0.1) - - ;; get_next_port goes here - (http-transport:try-start-server ipaddrstr - (portlogger:open-run-close portlogger:find-port))) - (begin - (print "ERROR: Tried and tried but could not start the server")))) - ;; any error in following steps will result in a retry - (set! *server-info* (list ipaddrstr portnum)) - (debug:print 0 *default-log-port* "INFO: Trying to start server on " ipaddrstr ":" portnum) - ;; This starts the spiffy server - ;; NEED WAY TO SET IP TO #f TO BIND ALL - ;; (start-server bind-address: ipaddrstr port: portnum) - (if config-hostname ;; this is a hint to bind directly - (start-server port: portnum bind-address: (if (equal? config-hostname "-") - ipaddrstr - config-hostname)) - (start-server port: portnum)) - (portlogger:open-run-close - (lambda (db) - (portlogger:set-port db portnum "released"))) - (debug:print 1 *default-log-port* "INFO: server has been stopped")))) - )