Index: Makefile ================================================================== --- Makefile +++ Makefile @@ -28,13 +28,23 @@ ezsteps.scm lock-queue.scm rmt.scm api.scm \ subrun.scm portlogger.scm archive.scm env.scm \ diff-report.scm cgisetup/models/pgdb.scm # module source files -MSRCFILES = dbfile.scm debugprint.scm mtargs.scm commonmod.scm dbmod.scm +MSRCFILES = dbfile.scm debugprint.scm mtargs.scm commonmod.scm dbmod.scm \ + tcp-transportmod.scm all : $(PREFIX)/bin/.$(ARCHSTR) mtest dboard mtut tcmt + +transport-mode.scm : transport-mode.scm.template + cp transport-mode.scm.template transport-mode.scm + +dashboard-transport-mode.scm : transport-mode.scm.template + cp transport-mode.scm.template dashboard-transport-mode.scm + +megatest.scm : transport-mode.scm +dashboard.scm : dashboard-transport-mode.scm # dbmod.import.o is just a hack here mofiles/dbfile.o : mofiles/debugprint.o mofiles/commonmod.o dbmod.import.o db.o : dbmod.import.o mofiles/debugprint.o : mofiles/mtargs.o Index: TODO ================================================================== --- TODO +++ TODO @@ -16,10 +16,13 @@ # along with Megatest. If not, see . TODO ==== +23WW07 +. Remove use of *dbstruct-dbs* + WW15 . fill newview matrix with data, filter pipeline gui elements . improve [script], especially indent handling WW16 Index: api.scm ================================================================== --- api.scm +++ api.scm @@ -18,21 +18,26 @@ ;; You should have received a copy of the GNU General Public License ;; along with Megatest. If not, see . ;; ;;====================================================================== -(use srfi-69 posix) - (declare (unit api)) (declare (uses rmt)) (declare (uses db)) (declare (uses dbmod)) (declare (uses dbfile)) (declare (uses tasks)) +(declare (uses tcp-transportmod)) (import dbmod) (import dbfile) +(import tcp-transportmod) + +(use srfi-69 + posix + matchable + s11n) ;; allow these queries through without starting a server ;; (define api:read-only-queries '(get-key-val-pairs @@ -142,93 +147,137 @@ tasks-add tasks-set-state-given-param-key )) (define *db-write-mutexes* (make-hash-table)) - +(define *server-signature* #f) ;; These are called by the server on recipt of /api calls ;; - keep it simple, only return the actual result of the call, i.e. no meta info here ;; ;; - returns #( flag result ) ;; (define (api:execute-requests dbstruct dat) (db:open-no-sync-db) ;; sets *no-sync-db* -;; (handle-exceptions -;; exn -;; (let ((call-chain (get-call-chain))) -;; (debug:print 0 *default-log-port* "WARNING: api:execute-requests received an exception from peer, dat=" dat ", exn=" exn) -;; (print-call-chain (current-error-port)) -;; (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) + ;; (handle-exceptions + ;; exn + ;; (let ((call-chain (get-call-chain))) + ;; (debug:print 0 *default-log-port* "WARNING: api:execute-requests received an exception from peer, dat=" dat ", exn=" exn) + ;; (print-call-chain (current-error-port)) + ;; (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) ;; (vector #f (vector exn call-chain dat))) ;; return some stuff for debug if an exception happens (if (> *api-process-request-count* 200) (begin (if (common:low-noise-print 30 "too many threads") (debug:print 0 *default-log-port* "WARNING: "*api-process-request-count*" threads, potential overload, adding 0.5 sec delay.")) (thread-sleep! 0.5) ;; take a nap )) - (cond - ((not (vector? dat)) ;; it is an error to not receive a vector - (vector #f (vector #f "remote must be called with a vector"))) - #;((> *api-process-request-count* 200) ;; 20) - (debug:print 0 *default-log-port* "WARNING: api:execute-requests received an overloaded message.") - (set! *server-overloaded* #t) - (vector #f (vector #f 'overloaded))) ;; the inner vector is what gets returned. nope, don't know why. please refactor! - (else - (let* ((cmd-in (vector-ref dat 0)) - (cmd (if (symbol? cmd-in) - cmd-in - (string->symbol cmd-in))) - (params (vector-ref dat 1)) - (run-id (if (null? params) - 0 - (car params))) - (write-mutex (if (hash-table-exists? *db-write-mutexes* run-id) - (hash-table-ref *db-write-mutexes* run-id) - (let* ((newmutex (make-mutex))) - (hash-table-set! *db-write-mutexes* run-id newmutex) - newmutex))) - (start-t (current-milliseconds)) - (readonly-mode (dbr:dbstruct-read-only dbstruct)) - (readonly-command (member cmd api:read-only-queries)) - (writecmd-in-readonly-mode (and readonly-mode (not readonly-command)))) - (if (not readonly-command) - (mutex-lock! write-mutex)) - (let* ((tmppath (dbr:dbstruct-tmppath dbstruct)) - (clean-run-id (cond - ((number? run-id) run-id) - ((equal? run-id #f) "main") - (else "other"))) - (crumbfile (dbfile:wait-for-qif tmppath clean-run-id (cons cmd params))) - (res - (if writecmd-in-readonly-mode - (conc "attempt to run write command "cmd" on a read-only database") - (api:dispatch-request dbstruct cmd run-id params)))) - (delete-file* crumbfile) - (if (not readonly-command) - (mutex-unlock! write-mutex)) - - ;; save all stats - (let ((delta-t (- (current-milliseconds) - start-t)) - (modified-cmd (if (eq? cmd 'general-call) - (string->symbol (conc "general-call-" (car params))) - cmd))) - (hash-table-set! *db-api-call-time* modified-cmd - (cons delta-t (hash-table-ref/default *db-api-call-time* modified-cmd '())))) - (if writecmd-in-readonly-mode - (begin - #;(common:telemetry-log (conc "api-out:"(->string cmd)) - payload: `((params . ,params) - (ok-res . #t))) - (vector #f res)) - (begin - #;(common:telemetry-log (conc "api-out:"(->string cmd)) - payload: `((params . ,params) - (ok-res . #f))) - (vector #t res)))))))) + (cond + ((not (vector? dat)) ;; it is an error to not receive a vector + (vector #f (vector #f "remote must be called with a vector"))) + #;((> *api-process-request-count* 200) ;; 20) + (debug:print 0 *default-log-port* "WARNING: api:execute-requests received an overloaded message.") + (set! *server-overloaded* #t) + (vector #f (vector #f 'overloaded))) ;; the inner vector is what gets returned. nope, don't know why. please refactor! + (else + (let* ((cmd-in (vector-ref dat 0)) + (cmd (if (symbol? cmd-in) + cmd-in + (string->symbol cmd-in))) + (params (vector-ref dat 1)) + (run-id (if (null? params) + 0 + (car params))) + (write-mutex (if (hash-table-exists? *db-write-mutexes* run-id) + (hash-table-ref *db-write-mutexes* run-id) + (let* ((newmutex (make-mutex))) + (hash-table-set! *db-write-mutexes* run-id newmutex) + newmutex))) + (start-t (current-milliseconds)) + (readonly-mode (dbr:dbstruct-read-only dbstruct)) + (readonly-command (member cmd api:read-only-queries)) + (writecmd-in-readonly-mode (and readonly-mode (not readonly-command)))) + (if (not readonly-command) + (mutex-lock! write-mutex)) + (let* ((tmppath (dbr:dbstruct-tmppath dbstruct)) + (clean-run-id (cond + ((number? run-id) run-id) + ((equal? run-id #f) "main") + (else "other"))) + (crumbfile (dbfile:wait-for-qif tmppath clean-run-id (cons cmd params))) + (res + (if writecmd-in-readonly-mode + (conc "attempt to run write command "cmd" on a read-only database") + (api:dispatch-request dbstruct cmd run-id params)))) + (delete-file* crumbfile) + (if (not readonly-command) + (mutex-unlock! write-mutex)) + + ;; save all stats + (let ((delta-t (- (current-milliseconds) + start-t)) + (modified-cmd (if (eq? cmd 'general-call) + (string->symbol (conc "general-call-" (car params))) + cmd))) + (hash-table-set! *db-api-call-time* modified-cmd + (cons delta-t (hash-table-ref/default *db-api-call-time* modified-cmd '())))) + (if writecmd-in-readonly-mode + (begin + #;(common:telemetry-log (conc "api-out:"(->string cmd)) + payload: `((params . ,params) + (ok-res . #t))) + (vector #f res)) + (begin + #;(common:telemetry-log (conc "api-out:"(->string cmd)) + payload: `((params . ,params) + (ok-res . #f))) + (vector #t res)))))))) + +;; indat is (cmd run-id params meta) +;; +;; WARNING: Do not print anything in the lambda of this function as it +;; reads/writes to current in/out port +;; +(define (api:tcp-dispatch-request-make-handler dbstruct) ;; cmd run-id params) + (assert *toppath* "FATAL: api:tcp-dispatch-request-make-handler called but *toppath* not set.") + (if (not *server-signature*) + (set! *server-signature* (tt:mk-signature *toppath*))) + (lambda () + (let* ((indat (deserialize)) + (newcount (+ *api-process-request-count* 1)) + (delay-wait (if (> newcount 10) + (- newcount 10) + 0))) + (set! *api-process-request-count* newcount) + (set! *db-last-access* (current-seconds)) + (match indat + ((cmd run-id params meta) + (let* ((status (cond + ((> newcount 30) 'busy) + ((> newcount 15) 'loaded) + (else 'ok))) + (errmsg (case status + ((busy) (conc "Server overloaded, "newcount" threads in flight")) + ((loaded) (conc "Server loaded, "newcount" threads in flight")) + (else #f))) + (result (case status + ((busy) (- newcount 29)) + ((loaded) #f) + (else + (case cmd + ((ping) *server-signature*) + (else + (api:dispatch-request dbstruct cmd run-id params)))))) + (meta `((wait . ,delay-wait))) + (payload (list status errmsg result meta))) + (set! *api-process-request-count* (- *api-process-request-count* 1)) + (serialize payload))) + (else + (assert #f "FATAL: failed to deserialize indat "indat)))))) + (define (api:dispatch-request dbstruct cmd run-id params) + (db:open-no-sync-db) (case cmd ;;=============================================== ;; READ/WRITE QUERIES ;;=============================================== Index: client.scm ================================================================== --- client.scm +++ client.scm @@ -104,11 +104,11 @@ (debug:print-info 4 *default-log-port* "client:setup server-dat=" server-dat ", remaining-tries=" remaining-tries) (if (not runremote) (begin ;; Here we are creating a runremote where there was none or it was clobbered with #f ;; - (set! runremote (make-remote)) + (set! runremote (make-and-init-remote)) (let* ((server-info (server:check-if-running areapath))) (remote-server-info-set! runremote server-info) (if server-info (begin (remote-server-url-set! runremote (server:record->url server-info)) Index: common.scm ================================================================== --- common.scm +++ common.scm @@ -249,33 +249,10 @@ (define (common:get-sync-lock-filepath) (let* ((tmp-area (common:get-db-tmp-area)) (lockfile (conc tmp-area "/megatest.db.sync-lock"))) lockfile)) -;;====================================================================== -;; when called from a wrapper I need sometimes to find the calling -;; wrapper, this is for dashboard to find the correct megatest. -;; -(define (common:find-local-megatest #!optional (progname "megatest")) - (let ((res (filter file-exists? - (map (lambda (updir) - (let* ((lm (car (argv))) - (dir (pathname-directory lm)) - (exe (pathname-strip-directory lm))) - (conc (if dir (conc dir "/") "") - (case (string->symbol exe) - ((dboard) (conc updir progname)) - ((mtest) (conc updir progname)) - ((dashboard) progname) - (else exe))))) - '("../../" "../"))))) - (if (null? res) - (begin - (debug:print 0 *default-log-port* "Failed to find this executable! Using what can be found on the path") - progname) - (car res)))) - (define *common:logpro-exit-code->status-sym-alist* '( ( 0 . pass ) ( 1 . fail ) ( 2 . warn ) ( 3 . check ) @@ -998,25 +975,19 @@ #f))) (define (common:get-area-path-signature) (message-digest-string (md5-primitive) *toppath*)) -(define (common:get-signature str) - (message-digest-string (md5-primitive) str)) - ;;====================================================================== ;; E X I T H A N D L I N G ;;====================================================================== (define (common:run-sync?) (and *toppath* ;; gate if called before *toppath* is set (common:on-homehost?) (args:get-arg "-server"))) -(define (common:human-time) - (time->string (seconds->local-time (current-seconds)) "%Y-%m-%d %H:%M:%S")) - (define (std-signal-handler signum) ;; (signal-mask! signum) (set! *time-to-exit* #t) ;;(debug:print-info 13 *default-log-port* "got signal "signum) @@ -2043,16 +2014,20 @@ (begin (debug:print 0 *default-log-port* "ERROR: common:wait-for-homehost-load called before *toppath* set.") (thread-sleep! 30) (if (< (- (current-seconds) start-time) 300) (loop start-time))))) - (let* ((hh-dat (if (common:on-homehost?) ;; if we are on the homehost then pass in #f so the calls are local. - #f - (server:choose-server *toppath* 'homehost))) - (hh (if hh-dat (car hh-dat) #f))) - (common:wait-for-normalized-load maxnormload msg hh))) - + (case (rmt:transport-mode) + ((http) + (let* ((hh-dat (if (common:on-homehost?) ;; if we are on the homehost then pass in #f so the calls are local. + #f + (server:choose-server *toppath* 'homehost))) + (hh (if hh-dat (car hh-dat) #f))) + (common:wait-for-normalized-load maxnormload msg hh))) + (else + (common:wait-for-normalized-load maxnormload msg (get-host-name))))) + (define (common:get-num-cpus remote-host) (let* ((actual-host (or remote-host (get-host-name)))) ;; hosts had better not be changing the number of cpus too often! (or (hash-table-ref/default *numcpus-cache* actual-host #f) (let* ((numcpus (or (common:get-cached-info actual-host "num-cpus" age: (+ 2592000 (random 3600))) @@ -2653,291 +2628,10 @@ (cond (with-vars (common:without-vars fullcmd)) (with-orig-env (common:with-orig-env fullcmd)) (else (common:without-vars fullcmd "MT_.*"))))) -;;====================================================================== -;; T I M E A N D D A T E -;;====================================================================== - -;;====================================================================== -;; Convert strings like "5s 2h 3m" => 60x60x2 + 3x60 + 5 -(define (common:hms-string->seconds tstr) - (let ((parts (string-split-fields "\\w+" tstr)) - (time-secs 0) - ;; s=seconds, m=minutes, h=hours, d=days, M=months, y=years, w=weeks - (trx (regexp "(\\d+)([smhdMyw])"))) - (for-each (lambda (part) - (let ((match (string-match trx part))) - (if match - (let ((val (string->number (cadr match))) - (unt (caddr match))) - (if val - (set! time-secs (+ time-secs (* val - (case (string->symbol unt) - ((s) 1) - ((m) 60) ;; minutes - ((h) 3600) - ((d) 86400) - ((w) 604800) - ((M) 2628000) ;; aproximately one month - ((y) 31536000) - (else #f)))))))))) - parts) - time-secs)) - -(define (seconds->hr-min-sec secs) - (let* ((hrs (quotient secs 3600)) - (min (quotient (- secs (* hrs 3600)) 60)) - (sec (- secs (* hrs 3600)(* min 60)))) - (conc (if (> hrs 0)(conc hrs "hr ") "") - (if (> min 0)(conc min "m ") "") - sec "s"))) - -(define (seconds->time-string sec) - (time->string - (seconds->local-time sec) "%H:%M:%S")) - -(define (seconds->work-week/day-time sec) - (time->string - (seconds->local-time sec) "ww%V.%u %H:%M")) - -(define (seconds->work-week/day sec) - (time->string - (seconds->local-time sec) "ww%V.%u")) - -(define (seconds->year-work-week/day sec) - (time->string - (seconds->local-time sec) "%yww%V.%w")) - -(define (seconds->year-work-week/day-time sec) - (time->string - (seconds->local-time sec) "%Yww%V.%w %H:%M")) - -(define (seconds->year-week/day-time sec) - (time->string - (seconds->local-time sec) "%Yw%V.%w %H:%M")) - -(define (seconds->quarter sec) - (case (string->number - (time->string - (seconds->local-time sec) - "%m")) - ((1 2 3) 1) - ((4 5 6) 2) - ((7 8 9) 3) - ((10 11 12) 4) - (else #f))) - -;;====================================================================== -;; basic ISO8601 format (e.g. "2017-02-28 06:02:54") date time => Unix epoch -;; -(define (common:date-time->seconds datetime) - (local-time->seconds (string->time datetime "%Y-%m-%d %H:%M:%S"))) - -;;====================================================================== -;; given span of seconds tstart to tend -;; find start time to mark and mark delta -;; -(define (common:find-start-mark-and-mark-delta tstart tend) - (let* ((deltat (- (max tend (+ tend 10)) tstart)) ;; can't handle runs of less than 4 seconds. Pad it to 10 seconds ... - (result #f) - (min 60) - (hr (* 60 60)) - (day (* 24 hr)) - (yr (* 365 day)) ;; year - (mo (/ yr 12)) - (wk (* day 7))) - (for-each - (lambda (max-blks) - (for-each - (lambda (span) ;; 5 2 1 - (if (not result) - (for-each - (lambda (timeunit timesym) ;; year month day hr min sec - (if (not result) - (let* ((time-blk (* span timeunit)) - (num-blks (quotient deltat time-blk))) - (if (and (> num-blks 4)(< num-blks max-blks)) - (let ((first (* (quotient tstart time-blk) time-blk))) - (set! result (list span timeunit time-blk first timesym)) - ))))) - (list yr mo wk day hr min 1) - '( y mo w d h m s)))) - (list 8 6 5 2 1))) - '(5 10 15 20 30 40 50 500)) - (if values - (apply values result) - (values 0 day 1 0 'd)))) - -;;====================================================================== -;; given x y lim return the cron expansion -;; -(define (common:expand-cron-slash x y lim) - (let loop ((curr x) - (res `())) - (if (< curr lim) - (loop (+ curr y) (cons curr res)) - (reverse res)))) - -;;====================================================================== -;; expand a complex cron string to a list of cron strings -;; -;; x/y => x, x+y, x+2y, x+3y while x+Ny a, b ,c -;; -;; NOTE: with flatten a lot of the crud below can be factored down. -;; -(define (common:cron-expand cron-str) - (if (list? cron-str) - (flatten - (fold (lambda (x res) - (if (list? x) - (let ((newres (map common:cron-expand x))) - (append x newres)) - (cons x res))) - '() - cron-str)) ;; (map common:cron-expand cron-str)) - (let ((cron-items (string-split cron-str)) - (slash-rx (regexp "(\\d+)/(\\d+)")) - (comma-rx (regexp ".*,.*")) - (max-vals '((min . 60) - (hour . 24) - (dayofmonth . 28) ;;; BUG!!!! This will be a bug for some combinations - (month . 12) - (dayofweek . 7)))) - (if (< (length cron-items) 5) ;; bad spec - cron-str ;; `(,cron-str) ;; just return the string, something downstream will fix it - (let loop ((hed (car cron-items)) - (tal (cdr cron-items)) - (type 'min) - (type-tal '(hour dayofmonth month dayofweek)) - (res '())) - (regex-case - hed - (slash-rx ( _ base incr ) (let* ((basen (string->number base)) - (incrn (string->number incr)) - (expanded-vals (common:expand-cron-slash basen incrn (alist-ref type max-vals))) - (new-list-crons (fold (lambda (x myres) - (cons (conc (if (null? res) - "" - (conc (string-intersperse res " ") " ")) - x " " (string-intersperse tal " ")) - myres)) - '() expanded-vals))) - ;; (print "new-list-crons: " new-list-crons) - ;; (fold (lambda (x res) - ;; (if (list? x) - ;; (let ((newres (map common:cron-expand x))) - ;; (append x newres)) - ;; (cons x res))) - ;; '() - (flatten (map common:cron-expand new-list-crons)))) - ;; (map common:cron-expand (map common:cron-expand new-list-crons)))) - (else (if (null? tal) - cron-str - (loop (car tal)(cdr tal)(car type-tal)(cdr type-tal)(append res (list hed))))))))))) - -;;====================================================================== -;; given a cron string and the last time event was processed return #t to run or #f to not run -;; -;; min hour dayofmonth month dayofweek -;; 0-59 0-23 1-31 1-12 0-6 ### NOTE: dayofweek does not include 7 -;; -;; #t => yes, run the job -;; #f => no, do not run the job -;; -(define (common:cron-event cron-str now-seconds-in last-done) ;; ref-seconds = #f is NOW. - (let* ((cron-items (map string->number (string-split cron-str))) - (now-seconds (or now-seconds-in (current-seconds))) - (now-time (seconds->local-time now-seconds)) - (last-done-time (seconds->local-time last-done)) - (all-times (make-hash-table))) - ;; (print "cron-items: " cron-items "(length cron-items): " (length cron-items)) - (if (not (eq? (length cron-items) 5)) ;; don't even try to figure out junk strings - #f - (match-let ((( cmin chour cdayofmonth cmonth cdayofweek) - cron-items) - ;; 0 1 2 3 4 5 6 - ((nsec nmin nhour ndayofmonth nmonth nyr ndayofweek n7 n8 n9) - (vector->list now-time)) - ((lsec lmin lhour ldayofmonth lmonth lyr ldayofweek l7 l8 l9) - (vector->list last-done-time))) - ;; create all possible time slots - ;; remove invalid slots due to (for example) day of week - ;; get the start and end entries for the ref-seconds (current) time - ;; if last-done > ref-seconds => this is an ERROR! - ;; does the last-done time fall in the legit region? - ;; yes => #f do not run again this command - ;; no => #t ok to run the command - (for-each ;; month - (lambda (month) - (for-each ;; dayofmonth - (lambda (dom) - (for-each - (lambda (hr) ;; hour - (for-each - (lambda (minute) ;; minute - (let ((copy-now (apply vector (vector->list now-time)))) - (vector-set! copy-now 0 0) ;; force seconds to zero - (vector-set! copy-now 1 minute) - (vector-set! copy-now 2 hr) - (vector-set! copy-now 3 dom) ;; dom is already corrected for zero referenced - (vector-set! copy-now 4 month) - (let* ((copy-now-secs (local-time->seconds copy-now)) - (new-copy (seconds->local-time copy-now-secs))) ;; remake the time vector - (if (or (not cdayofweek) - (equal? (vector-ref new-copy 6) - cdayofweek)) ;; if the day is specified and a match OR if the day is NOT specified - (if (or (not cdayofmonth) - (equal? (vector-ref new-copy 3) - (+ 1 cdayofmonth))) ;; if the month is specified and a match OR if the month is NOT specified - (hash-table-set! all-times copy-now-secs new-copy)))))) - (if cmin - `(,cmin) ;; if given cmin, have to use it - (list (- nmin 1) nmin (+ nmin 1))))) ;; minute - (if chour - `(,chour) - (list (- nhour 1) nhour (+ nhour 1))))) ;; hour - (if cdayofmonth - `(,cdayofmonth) - (list (- ndayofmonth 1) ndayofmonth (+ ndayofmonth 1))))) - (if cmonth - `(,cmonth) - (list (- nmonth 1) nmonth (+ nmonth 1)))) - (let ((before #f) - (is-in #f)) - (for-each - (lambda (moment) - (if (and before - (<= before now-seconds) - (>= moment now-seconds)) - (begin - ;; (print) - ;; (print "Before: " (time->string (seconds->local-time before))) - ;; (print "Now: " (time->string (seconds->local-time now-seconds))) - ;; (print "After: " (time->string (seconds->local-time moment))) - ;; (print "Last: " (time->string (seconds->local-time last-done))) - (if (< last-done before) - (set! is-in before)) - )) - (set! before moment)) - (sort (hash-table-keys all-times) <)) - is-in))))) - -(define (common:extended-cron cron-str now-seconds-in last-done) - (let ((expanded-cron (common:cron-expand cron-str))) - (if (string? expanded-cron) - (common:cron-event expanded-cron now-seconds-in last-done) - (let loop ((hed (car expanded-cron)) - (tal (cdr expanded-cron))) - (if (common:cron-event hed now-seconds-in last-done) - #t - (if (null? tal) - #f - (loop (car tal)(cdr tal)))))))) - ;;====================================================================== ;; C O L O R S ;;====================================================================== (define (common:name->iup-color name) Index: commonmod.scm ================================================================== --- commonmod.scm +++ commonmod.scm @@ -17,21 +17,37 @@ ;; along with Megatest. If not, see . ;;====================================================================== (declare (unit commonmod)) +;; (declare (uses debugprint)) (use srfi-69) (module commonmod * -(import scheme chicken data-structures extras files) -(import (prefix sqlite3 sqlite3:) - posix typed-records srfi-18 srfi-69 - md5 message-digest - regex srfi-1) +(import scheme + chicken + + (prefix sqlite3 sqlite3:) + data-structures + extras + files + matchable + md5 + message-digest + posix + regex + regex-case + srfi-1 + srfi-18 + srfi-69 + typed-records + + ;; debugprint + ) ;;====================================================================== ;; CONTENTS ;; ;; config file utils @@ -161,10 +177,40 @@ (filter (lambda (x) (not (string-match "^\\s*" x))) val-list)) '()))) +(define (get-cpu-load) + (let* ((load-info (with-input-from-file "/proc/loadavg" read-lines))) + (map string->number (string-split load-info)))) + +(define *current-host-cores* #f) + +(define (get-current-host-cores) + (or *current-host-cores* + (let ((cpu-info (with-input-from-file "/proc/cpuinfo" read-lines))) + (let loop ((lines cpu-info)) + (if (null? lines) + 1 ;; gotta be at least one! + (let* ((inl (car lines)) + (tail (cdr lines)) + (parts (string-split inl))) + (match parts + (("cpu" "cores" ":" num) (string->number num)) + (else (loop tail))))))))) + +(define (number-of-processes-running processname) + (with-input-from-pipe + (conc "ps -def | egrep \""processname"\" |wc -l") + (lambda () + (string->number (read-line))))) + +;; get the normalized (i.e. load / numcpus) for *this* host +;; +(define (get-normalized-cpu-load) + (/ (get-cpu-load)(get-current-host-cores))) + ;;====================================================================== ;; testsuite and area utilites ;;====================================================================== (define (get-testsuite-name toppath configdat) @@ -208,18 +254,344 @@ (let ((adat (get-section cfgdat "areas"))) (map (lambda (entry) `(,(car entry) . ,(val->alist (cadr entry)))) adat))) + +;;====================================================================== +;; time utils +;;====================================================================== + +(define (common:human-time) + (time->string (seconds->local-time (current-seconds)) "%Y-%m-%d %H:%M:%S")) + +;;====================================================================== +;; T I M E A N D D A T E +;;====================================================================== + +;;====================================================================== +;; Convert strings like "5s 2h 3m" => 60x60x2 + 3x60 + 5 +(define (common:hms-string->seconds tstr) + (let ((parts (string-split-fields "\\w+" tstr)) + (time-secs 0) + ;; s=seconds, m=minutes, h=hours, d=days, M=months, y=years, w=weeks + (trx (regexp "(\\d+)([smhdMyw])"))) + (for-each (lambda (part) + (let ((match (string-match trx part))) + (if match + (let ((val (string->number (cadr match))) + (unt (caddr match))) + (if val + (set! time-secs (+ time-secs (* val + (case (string->symbol unt) + ((s) 1) + ((m) 60) ;; minutes + ((h) 3600) + ((d) 86400) + ((w) 604800) + ((M) 2628000) ;; aproximately one month + ((y) 31536000) + (else #f)))))))))) + parts) + time-secs)) + +(define (seconds->hr-min-sec secs) + (let* ((hrs (quotient secs 3600)) + (min (quotient (- secs (* hrs 3600)) 60)) + (sec (- secs (* hrs 3600)(* min 60)))) + (conc (if (> hrs 0)(conc hrs "hr ") "") + (if (> min 0)(conc min "m ") "") + sec "s"))) + +(define (seconds->time-string sec) + (time->string + (seconds->local-time sec) "%H:%M:%S")) + +(define (seconds->work-week/day-time sec) + (time->string + (seconds->local-time sec) "ww%V.%u %H:%M")) + +(define (seconds->work-week/day sec) + (time->string + (seconds->local-time sec) "ww%V.%u")) + +(define (seconds->year-work-week/day sec) + (time->string + (seconds->local-time sec) "%yww%V.%w")) + +(define (seconds->year-work-week/day-time sec) + (time->string + (seconds->local-time sec) "%Yww%V.%w %H:%M")) + +(define (seconds->year-week/day-time sec) + (time->string + (seconds->local-time sec) "%Yw%V.%w %H:%M")) + +(define (seconds->quarter sec) + (case (string->number + (time->string + (seconds->local-time sec) + "%m")) + ((1 2 3) 1) + ((4 5 6) 2) + ((7 8 9) 3) + ((10 11 12) 4) + (else #f))) + +;;====================================================================== +;; basic ISO8601 format (e.g. "2017-02-28 06:02:54") date time => Unix epoch +;; +(define (common:date-time->seconds datetime) + (local-time->seconds (string->time datetime "%Y-%m-%d %H:%M:%S"))) + +;;====================================================================== +;; given span of seconds tstart to tend +;; find start time to mark and mark delta +;; +(define (common:find-start-mark-and-mark-delta tstart tend) + (let* ((deltat (- (max tend (+ tend 10)) tstart)) ;; can't handle runs of less than 4 seconds. Pad it to 10 seconds ... + (result #f) + (min 60) + (hr (* 60 60)) + (day (* 24 hr)) + (yr (* 365 day)) ;; year + (mo (/ yr 12)) + (wk (* day 7))) + (for-each + (lambda (max-blks) + (for-each + (lambda (span) ;; 5 2 1 + (if (not result) + (for-each + (lambda (timeunit timesym) ;; year month day hr min sec + (if (not result) + (let* ((time-blk (* span timeunit)) + (num-blks (quotient deltat time-blk))) + (if (and (> num-blks 4)(< num-blks max-blks)) + (let ((first (* (quotient tstart time-blk) time-blk))) + (set! result (list span timeunit time-blk first timesym)) + ))))) + (list yr mo wk day hr min 1) + '( y mo w d h m s)))) + (list 8 6 5 2 1))) + '(5 10 15 20 30 40 50 500)) + (if values + (apply values result) + (values 0 day 1 0 'd)))) + +;;====================================================================== +;; given x y lim return the cron expansion +;; +(define (common:expand-cron-slash x y lim) + (let loop ((curr x) + (res `())) + (if (< curr lim) + (loop (+ curr y) (cons curr res)) + (reverse res)))) + +;;====================================================================== +;; expand a complex cron string to a list of cron strings +;; +;; x/y => x, x+y, x+2y, x+3y while x+Ny a, b ,c +;; +;; NOTE: with flatten a lot of the crud below can be factored down. +;; +(define (common:cron-expand cron-str) + (if (list? cron-str) + (flatten + (fold (lambda (x res) + (if (list? x) + (let ((newres (map common:cron-expand x))) + (append x newres)) + (cons x res))) + '() + cron-str)) ;; (map common:cron-expand cron-str)) + (let ((cron-items (string-split cron-str)) + (slash-rx (regexp "(\\d+)/(\\d+)")) + (comma-rx (regexp ".*,.*")) + (max-vals '((min . 60) + (hour . 24) + (dayofmonth . 28) ;;; BUG!!!! This will be a bug for some combinations + (month . 12) + (dayofweek . 7)))) + (if (< (length cron-items) 5) ;; bad spec + cron-str ;; `(,cron-str) ;; just return the string, something downstream will fix it + (let loop ((hed (car cron-items)) + (tal (cdr cron-items)) + (type 'min) + (type-tal '(hour dayofmonth month dayofweek)) + (res '())) + (regex-case + hed + (slash-rx ( _ base incr ) (let* ((basen (string->number base)) + (incrn (string->number incr)) + (expanded-vals (common:expand-cron-slash basen incrn (alist-ref type max-vals))) + (new-list-crons (fold (lambda (x myres) + (cons (conc (if (null? res) + "" + (conc (string-intersperse res " ") " ")) + x " " (string-intersperse tal " ")) + myres)) + '() expanded-vals))) + ;; (print "new-list-crons: " new-list-crons) + ;; (fold (lambda (x res) + ;; (if (list? x) + ;; (let ((newres (map common:cron-expand x))) + ;; (append x newres)) + ;; (cons x res))) + ;; '() + (flatten (map common:cron-expand new-list-crons)))) + ;; (map common:cron-expand (map common:cron-expand new-list-crons)))) + (else (if (null? tal) + cron-str + (loop (car tal)(cdr tal)(car type-tal)(cdr type-tal)(append res (list hed))))))))))) + +;;====================================================================== +;; given a cron string and the last time event was processed return #t to run or #f to not run +;; +;; min hour dayofmonth month dayofweek +;; 0-59 0-23 1-31 1-12 0-6 ### NOTE: dayofweek does not include 7 +;; +;; #t => yes, run the job +;; #f => no, do not run the job +;; +(define (common:cron-event cron-str now-seconds-in last-done) ;; ref-seconds = #f is NOW. + (let* ((cron-items (map string->number (string-split cron-str))) + (now-seconds (or now-seconds-in (current-seconds))) + (now-time (seconds->local-time now-seconds)) + (last-done-time (seconds->local-time last-done)) + (all-times (make-hash-table))) + ;; (print "cron-items: " cron-items "(length cron-items): " (length cron-items)) + (if (not (eq? (length cron-items) 5)) ;; don't even try to figure out junk strings + #f + (match-let ((( cmin chour cdayofmonth cmonth cdayofweek) + cron-items) + ;; 0 1 2 3 4 5 6 + ((nsec nmin nhour ndayofmonth nmonth nyr ndayofweek n7 n8 n9) + (vector->list now-time)) + ((lsec lmin lhour ldayofmonth lmonth lyr ldayofweek l7 l8 l9) + (vector->list last-done-time))) + ;; create all possible time slots + ;; remove invalid slots due to (for example) day of week + ;; get the start and end entries for the ref-seconds (current) time + ;; if last-done > ref-seconds => this is an ERROR! + ;; does the last-done time fall in the legit region? + ;; yes => #f do not run again this command + ;; no => #t ok to run the command + (for-each ;; month + (lambda (month) + (for-each ;; dayofmonth + (lambda (dom) + (for-each + (lambda (hr) ;; hour + (for-each + (lambda (minute) ;; minute + (let ((copy-now (apply vector (vector->list now-time)))) + (vector-set! copy-now 0 0) ;; force seconds to zero + (vector-set! copy-now 1 minute) + (vector-set! copy-now 2 hr) + (vector-set! copy-now 3 dom) ;; dom is already corrected for zero referenced + (vector-set! copy-now 4 month) + (let* ((copy-now-secs (local-time->seconds copy-now)) + (new-copy (seconds->local-time copy-now-secs))) ;; remake the time vector + (if (or (not cdayofweek) + (equal? (vector-ref new-copy 6) + cdayofweek)) ;; if the day is specified and a match OR if the day is NOT specified + (if (or (not cdayofmonth) + (equal? (vector-ref new-copy 3) + (+ 1 cdayofmonth))) ;; if the month is specified and a match OR if the month is NOT specified + (hash-table-set! all-times copy-now-secs new-copy)))))) + (if cmin + `(,cmin) ;; if given cmin, have to use it + (list (- nmin 1) nmin (+ nmin 1))))) ;; minute + (if chour + `(,chour) + (list (- nhour 1) nhour (+ nhour 1))))) ;; hour + (if cdayofmonth + `(,cdayofmonth) + (list (- ndayofmonth 1) ndayofmonth (+ ndayofmonth 1))))) + (if cmonth + `(,cmonth) + (list (- nmonth 1) nmonth (+ nmonth 1)))) + (let ((before #f) + (is-in #f)) + (for-each + (lambda (moment) + (if (and before + (<= before now-seconds) + (>= moment now-seconds)) + (begin + ;; (print) + ;; (print "Before: " (time->string (seconds->local-time before))) + ;; (print "Now: " (time->string (seconds->local-time now-seconds))) + ;; (print "After: " (time->string (seconds->local-time moment))) + ;; (print "Last: " (time->string (seconds->local-time last-done))) + (if (< last-done before) + (set! is-in before)) + )) + (set! before moment)) + (sort (hash-table-keys all-times) <)) + is-in))))) + +(define (common:extended-cron cron-str now-seconds-in last-done) + (let ((expanded-cron (common:cron-expand cron-str))) + (if (string? expanded-cron) + (common:cron-event expanded-cron now-seconds-in last-done) + (let loop ((hed (car expanded-cron)) + (tal (cdr expanded-cron))) + (if (common:cron-event hed now-seconds-in last-done) + #t + (if (null? tal) + #f + (loop (car tal)(cdr tal)))))))) + + ;;====================================================================== ;; misc stuff ;;====================================================================== -;; (define (debug:print . params) #f) -;; (define (debug:print-info . params) #f) -;; -;; (define (set-functions dbgp dbgpinfo) -;; (set! debug:print dbgp) -;; (set! debug:print-info dbgpinfo)) +(define (common:get-signature str) + (message-digest-string (md5-primitive) str)) + +;;====================================================================== +;; hash of hashs +;;====================================================================== + +(define (db:hoh-set! dat key1 key2 val) + (let* ((subhash (hash-table-ref/default dat key1 #f))) + (if subhash + (hash-table-set! subhash key2 val) + (begin + (hash-table-set! dat key1 (make-hash-table)) + (db:hoh-set! dat key1 key2 val))))) + +(define (db:hoh-get dat key1 key2) + (let* ((subhash (hash-table-ref/default dat key1 #f))) + (and subhash + (hash-table-ref/default subhash key2 #f)))) + +;;====================================================================== +;; when called from a wrapper I need sometimes to find the calling +;; wrapper, this is for dashboard to find the correct megatest. +;; +(define (common:find-local-megatest #!optional (progname "megatest")) + (let ((res (filter file-exists? + (map (lambda (updir) + (let* ((lm (car (argv))) + (dir (pathname-directory lm)) + (exe (pathname-strip-directory lm))) + (conc (if dir (conc dir "/") "") + (case (string->symbol exe) + ((dboard) (conc updir progname)) + ((mtest) (conc updir progname)) + ((dashboard) progname) + (else exe))))) + '("../../" "../"))))) + (if (null? res) + (begin + ;; (debug:print 0 *default-log-port* "Failed to find this executable! Using what can be found on the path") + progname) + (car res)))) ) Index: dashboard-context-menu.scm ================================================================== --- dashboard-context-menu.scm +++ dashboard-context-menu.scm @@ -32,10 +32,11 @@ (use srfi-1 posix regex regex-case srfi-69) (use (prefix sqlite3 sqlite3:)) (declare (unit dashboard-context-menu)) (declare (uses common)) +(declare (uses commonmod)) (declare (uses db)) (declare (uses gutils)) (declare (uses rmt)) (declare (uses ezsteps)) ;; (declare (uses sdb)) @@ -43,10 +44,12 @@ (declare (uses subrun)) (include "common_records.scm") (include "db_records.scm") (include "run_records.scm") + +(import commonmod) (define (dboard:launch-testpanel run-id test-id) (let* ((dboardexe (common:find-local-megatest "dashboard")) (cmd (conc dboardexe " -test " run-id "," test-id Index: dashboard-tests.scm ================================================================== --- dashboard-tests.scm +++ dashboard-tests.scm @@ -31,17 +31,20 @@ (use srfi-1 posix regex regex-case srfi-69) (use (prefix sqlite3 sqlite3:)) (declare (unit dashboard-tests)) (declare (uses common)) +(declare (uses commonmod)) (declare (uses db)) (declare (uses gutils)) (declare (uses rmt)) (declare (uses ezsteps)) ;; (declare (uses sdb)) ;; (declare (uses filedb)) (declare (uses subrun)) + +(import commonmod) (include "common_records.scm") (include "db_records.scm") (include "run_records.scm") @@ -459,12 +462,11 @@ ;;====================================================================== ;; ;;====================================================================== (define (dashboard-tests:examine-test run-id test-id) ;; run-id run-key origtest) (let* ((db-path (db:dbfile-path)) ;; (conc (configf:lookup *configdat* "setup" "linktree") "/db/" run-id ".db")) - (dbstruct #f) ;; NOT ACTUALLY USED (db:setup)) ;; (make-dbr:dbstruct path: (db:dbfile-path #f) ;; (configf:lookup *configdat* "setup" "linktree") - ;; local: #t)) + (dbstruct #f) ;; NOT USED (testdat (rmt:get-test-info-by-id run-id test-id)) ;; (db:get-test-info-by-id dbstruct run-id test-id)) (db-mod-time 0) ;; (file-modification-time db-path)) (last-update 0) ;; (current-seconds)) (request-update #t)) (if (not testdat) Index: dashboard.scm ================================================================== --- dashboard.scm +++ dashboard.scm @@ -44,20 +44,28 @@ (declare (uses dcommon)) (declare (uses dashboard-context-menu)) (declare (uses vg)) (declare (uses subrun)) (declare (uses mt)) -(declare (uses dbfile)) +(declare (uses dbmod)) +;; (declare (uses dbmemmod)) +(declare (uses dbfile)) + +(import dbmod dbfile) (include "common_records.scm") (include "db_records.scm") (include "run_records.scm") (include "task_records.scm") (include "megatest-version.scm") (include "megatest-fossil-hash.scm") (include "vg_records.scm") +;; set some parameters here - these need to be put in something that can be loaded from other +;; executables such as dashboard and mtutil +;; +(include "dashboard-transport-mode.scm") (dbfile:db-init-proc db:initialize-main-db) (define help (conc "Megatest Dashboard, documentation at http://www.kiatoa.com/fossils/megatest version " megatest-version " license GPL, Copyright (C) Matt Welland 2012-2017 Index: db.scm ================================================================== --- db.scm +++ db.scm @@ -22,10 +22,20 @@ ;; Database access ;;====================================================================== ;; dbstruct vector containing all the relevant dbs like main.db, megatest.db, run.db etc +(declare (unit db)) +(declare (uses common)) +(declare (uses debugprint)) +(declare (uses dbmod)) +(declare (uses dbfile)) +(declare (uses keys)) +(declare (uses ods)) +(declare (uses client)) +(declare (uses mt)) + (use (srfi 18) extras tcp stack (prefix sqlite3 sqlite3:) @@ -44,28 +54,19 @@ z3 typed-records matchable files) -(declare (unit db)) -(declare (uses common)) -(declare (uses dbmod)) -;; (declare (uses debugprint)) -(declare (uses dbfile)) -(declare (uses keys)) -(declare (uses ods)) -(declare (uses client)) -(declare (uses mt)) - (include "common_records.scm") (include "db_records.scm") (include "key_records.scm") (include "run_records.scm") (define *number-of-writes* 0) (define *number-non-write-queries* 0) +(import debugprint) (import dbmod) (import dbfile) ;; record for keeping state,status and count for doing roll-ups in ;; iterated tests @@ -74,10 +75,17 @@ (state #f) (status #f) (count 0)) +(define (db:with-db dbstruct run-id r/w proc . params) + (case (rmt:transport-mode) + ((http)(dbfile:with-db dbstruct run-id r/w proc params)) + ((tcp) (dbmod:with-db dbstruct run-id r/w proc params)) + ((nfs) (dbmod:with-db dbstruct run-id r/w proc params)) + (else (assert #f "FATAL: db:with-db called with non-existant transport mode")))) + ;;====================================================================== ;; hash of hashs ;;====================================================================== @@ -124,10 +132,83 @@ (assert *toppath* "FATAL: db:setup called before launch:setup has been run.") (let* ((tmpdir (common:get-db-tmp-area))) (if (not *dbstruct-dbs*) (dbfile:setup do-sync *toppath* tmpdir) *dbstruct-dbs*))) + +;; moved from dbfile +;; +;; ADD run-id SUPPORT +;; +(define (db:create-all-triggers dbstruct) + (db:with-db + dbstruct #f #f + (lambda (dbdat db) + (db:create-triggers db)))) + +(define (db:create-triggers db) + (for-each (lambda (key) + (sqlite3:execute db (cadr key))) + db:trigger-list)) + +(define (db:drop-all-triggers dbstruct) + (db:with-db + dbstruct #f #f + (lambda (dbdat db) + (db:drop-triggers db)))) + +(define (db:have-incompletes? dbstruct run-id ovr-deadtime) + (let* ((incompleted '()) + (oldlaunched '()) + (toplevels '()) + ;; (deadtime-str (configf:lookup *configdat* "setup" "deadtime")) + (deadtime (or ovr-deadtime 72000))) ;; twenty hours + (db:with-db + dbstruct run-id #f + (lambda (dbdat db) + + ;; in RUNNING or REMOTEHOSTSTART for more than 10 minutes + ;; + ;; HOWEVER: this code in run:test seems to work fine + ;; (> (- (current-seconds)(+ (db:test-get-event_time testdat) + ;; (db:test-get-run_duration testdat))) + ;; 600) + ;; (db:delay-if-busy dbdat) + (sqlite3:for-each-row + (lambda (test-id run-dir uname testname item-path) + (if (and (equal? uname "n/a") + (equal? item-path "")) ;; this is a toplevel test + ;; what to do with toplevel? call rollup? + (begin + (set! toplevels (cons (list test-id run-dir uname testname item-path run-id) toplevels))) + ;; (print-info "Found old toplevel test in RUNNING state, test-id=" test-id)) + (set! incompleted (cons (list test-id run-dir uname testname item-path run-id) incompleted)))) + (db:get-cache-stmth dbdat db + "SELECT id,rundir,uname,testname,item_path FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > (run_duration + ?) AND state IN ('RUNNING','REMOTEHOSTSTART');") + run-id deadtime) + + ;; in LAUNCHED for more than one day. Could be long due to job queues TODO/BUG: Need override for this in config + ;; + ;; (db:delay-if-busy dbdat) + (sqlite3:for-each-row + (lambda (test-id run-dir uname testname item-path) + (if (and (equal? uname "n/a") + (equal? item-path "")) ;; this is a toplevel test + ;; what to do with toplevel? call rollup? + (set! toplevels (cons (list test-id run-dir uname testname item-path run-id) toplevels)) + (set! oldlaunched (cons (list test-id run-dir uname testname item-path run-id) oldlaunched)))) + (db:get-cache-stmth dbdat db + "SELECT id,rundir,uname,testname,item_path FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > 86400 AND state IN ('LAUNCHED');") + run-id) + + ;; (print-info "Found " (length oldlaunched) " old LAUNCHED items, " (length toplevels) " old LAUNCHED toplevel tests and " (length incompleted) " tests marked RUNNING but apparently dead.") + (if (and (null? incompleted) + (null? oldlaunched) + (null? toplevels)) + #f + #t))))) + ;; looks up subdb and returns it, if not found then set up ;; and then return it. ;; #;(define (db:get-db dbstruct run-id) @@ -513,11 +594,11 @@ (dbfiles (if old2new (glob (conc *toppath* "/.megatest/*.db")) (glob (conc tmp-area "/.megatest/*.db")))) (keys (db:get-keys dbstruct)) (sync-durations (make-hash-table))) ;; kill servers - (if (and killservers servers)(db:kill-servers)) + (if killservers (db:kill-servers)) (if (not dbfiles) (debug:print-error 0 *default-log-port* "no dbfiles found in " (conc *toppath* "/.megatest")) (for-each (lambda (srcfile) @@ -583,12 +664,11 @@ (define (db:tmp->megatest.db-sync dbstruct run-id last-update) (let* ((subdbs (hash-table-values (dbr:dbstruct-subdbs dbstruct))) (res '())) (for-each (lambda (subdb) - (let* ((dbname (db:run-id->dbname run-id)) - (mtdb (dbr:subdb-mtdb subdb)) + (let* ((mtdb (dbr:subdb-mtdb subdb)) (tmpdb (db:get-subdb dbstruct run-id)) (refndb (dbr:subdb-refndb subdb)) (newres (db:sync-tables (db:sync-all-tables-list dbstruct (db:get-keys dbstruct)) last-update tmpdb refndb mtdb))) ;; (stack-push! (dbr:subdb-dbstack subdb) tmpdb) ;; BUG: verify this is really needed @@ -1392,29 +1472,37 @@ ;;====================================================================== ;; no-sync.db - small bits of data to be shared between servers ;;====================================================================== -(define (db:no-sync-db db-in) - (if db-in - db-in - (if *no-sync-db* - *no-sync-db* - (begin - (mutex-lock! *db-access-mutex*) - (let ((dbpath (common:get-db-tmp-area)) - (db (dbfile:open-no-sync-db dbpath))) - (set! *no-sync-db* db) - (mutex-unlock! *db-access-mutex*) - db))))) +(define (db:get-dbsync-path) + (case (rmt:transport-mode) + ((http)(common:get-db-tmp-area)) + ((tcp) (conc *toppath*"/.megatest")) + ((nfs) (conc *toppath*"/.megatest")) + (else "/tmp/dunno-this-gonna-exist"))) + + ;; (define (db:no-sync-db db-in) + ;; (if db-in + ;; db-in + ;; (if *no-sync-db* + ;; *no-sync-db* + ;; (begin + ;; (mutex-lock! *db-access-mutex*) + ;; (let ((dbpath (db:get-dbsync-path)) + ;; (db (dbfile:open-no-sync-db dbpath))) + ;; (assert (sqlite3:database? db) "FATAL: db:no-sync-db failed to open a database") + ;; (set! *no-sync-db* db) + ;; (mutex-unlock! *db-access-mutex*) + ;; db))))) (define (with-no-sync-db proc) - (let* ((db (db:no-sync-db *no-sync-db*))) + (let* ((db (db:open-no-sync-db))) (proc db))) (define (db:open-no-sync-db) - (dbfile:open-no-sync-db (db:dbfile-path))) + (dbfile:open-no-sync-db (db:get-dbsync-path))) (define (db:no-sync-close-db db stmt-cache) (db:safely-close-sqlite3-db db stmt-cache)) @@ -1953,19 +2041,20 @@ (debug:print-info 4 *default-log-port* "runs:get-runs-by-patt qry=" qry-str " " runnamepatt) ;(print "runs:get-runs-by-patt qry=" qry-str " " runnamepatt) (vector header (reverse - (db:with-db dbstruct #f #f ;; reads db, does not write to it. - (lambda (dbdat db) - (sqlite3:fold-row - (lambda (res . r) - (cons (list->vector r) res)) - '() - db - qry-str - runnamepatt))))))) + (db:with-db + dbstruct #f #f ;; reads db, does not write to it. + (lambda (dbdat db) + (sqlite3:fold-row + (lambda (res . r) + (cons (list->vector r) res)) + '() + db + qry-str + runnamepatt))))))) ;; use (get-value-by-header (db:get-header runinfo)(db:get-rows runinfo)) ;; NOTE: Does NOT return a list of rows (or one row) for the first slot of the vector ;; this is inconsistent with get-runs but it makes some sense. ;; @@ -4666,10 +4755,13 @@ (begin (sqlite3:interrupt! db) (sqlite3:finalize! db #t) ;; (vector-set! *task-db* 0 #f) (set! *task-db* #f))))) + (if (and *no-sync-db* + (sqlite3:database? *no-sync-db*)) + (sqlite3:finalize! *no-sync-db* #t)) (if (and (not (args:get-arg "-server")) *runremote*) (begin (debug:print-info 0 *default-log-port* "Closing all client connections...") (http-transport:close-connections *runremote*) Index: dbfile.scm ================================================================== --- dbfile.scm +++ dbfile.scm @@ -41,11 +41,12 @@ commonmod ;; debugprint ) (define keep-age-param (make-parameter 10)) ;; qif file age, if over move to attic -(define num-run-dbs (make-parameter 10)) ;; number of db's in .megatest +(define num-run-dbs (make-parameter 10)) ;; number of db's in .megatest +(define dbfile:testsuite-name (make-parameter #f)) ;;====================================================================== ;; R E C O R D S ;;====================================================================== @@ -56,10 +57,19 @@ (areapath #f) (homehost #f) (tmppath #f) (read-only #f) (subdbs (make-hash-table)) + ;; + ;; for the inmem approach (see dbmod.scm) + ;; this is one db per server + (inmem #f) ;; handle for the in memory copy + (dbfile #f) ;; path to the db file on disk + (ondiskdb #f) ;; handle for the on-disk file + (dbdat #f) ;; create a dbdat for the downstream calls such as db:with-db + (last-update 0) + (sync-proc #f) ) ;; NOTE: Need one dbr:subdb per main.db, 1.db ... ;; (defstruct dbr:subdb @@ -94,10 +104,11 @@ (define *max-api-process-requests* 0) (define *api-process-request-count* 0) (define *db-write-access* #t) (define *db-last-sync* 0) ;; last time the sync to megatest.db happened (define *db-multi-sync-mutex* (make-mutex)) ;; protect access to *db-sync-in-progress*, *db-last-sync* +(define *db-last-access* (current-seconds)) (define (db:generic-error-printout exn . message) (print-call-chain (current-error-port)) (apply dbfile:print-err message) (dbfile:print-err @@ -159,39 +170,10 @@ ) #f ) ) -;; ;; set up a single db (e.g. main.db, 1.db ... etc.) -;; ;; -;; (define (db:setup-db dbstruct areapath run-id) -;; (let* ((dbname (db:run-id->dbname run-id)) -;; (dbstruct (hash-table-ref/default dbstructs dbname #f))) -;; (if dbstruct -;; dbstruct -;; (let* ((dbstruct-new (make-dbr:dbstruct))) -;; (db:open-db dbstruct-new run-id areapath: areapath do-sync: #t) -;; (hash-table-set! dbstructs dbname dbstruct-new) -;; dbstruct-new)))) - -;; ; Returns the dbdat for a particular dbfile inside the area -;; ;; -;; (define (dbr:dbstruct-get-dbdat dbstruct dbfile) -;; (hash-table-ref/default (dbr:dbstruct-dbdats dbstruct) dbfile #f)) -;; -;; (define (dbr:dbstruct-dbdat-put! dbstruct dbfile db) -;; (hash-table-set! (dbr:dbstruct-dbdats dbstruct) dbfile db)) -;; -;; (define (db:run-id->first-num run-id) -;; (let* ((s (number->string run-id)) -;; (l (string-length s))) -;; (substring s (- l 1) l))) - -;; 1234 => 4/1234.db -;; #f => 0/main.db -;; (abandoned the idea of num/db) -;; (define (dbfile:run-id->path apath run-id) (conc apath"/"(dbfile:run-id->dbname run-id))) (define (db:dbname->path apath dbname) (conc apath"/"dbname)) @@ -215,14 +197,12 @@ (cond (*dbstruct-dbs* (dbfile:print-err "WARNING: dbfile:setup called when *dbstruct-dbs* is already initialized") *dbstruct-dbs*) ;; TODO: when multiple areas are supported, this optimization will be a hazard (else - (let* ((dbstruct (make-dbr:dbstruct))) + (let* ((dbstruct (make-dbr:dbstruct areapath: areapath tmppath: tmppath))) (set! *dbstruct-dbs* dbstruct) - (dbr:dbstruct-areapath-set! dbstruct areapath) - (dbr:dbstruct-tmppath-set! dbstruct tmppath) dbstruct)))) (define (dbfile:get-subdb dbstruct run-id) (let* ((dbfname (dbfile:run-id->dbname run-id))) (hash-table-ref/default (dbr:dbstruct-subdbs dbstruct) dbfname #f))) @@ -415,11 +395,10 @@ "cp "backupfname" "fname))) (dbfile:print-err "WARNING: attempting recovery of file "fname" by running commands:\n" " "cmd) (system cmd))) - (define (dbfile:open-no-sync-db dbpath) (if *no-sync-db* *no-sync-db* (begin (if (not (file-exists? dbpath)) @@ -429,13 +408,20 @@ (init-proc (lambda (db) (if (not db-exists) (begin (sqlite3:execute db "CREATE TABLE IF NOT EXISTS no_sync_metadat (var TEXT,val TEXT, CONSTRAINT no_sync_metadat_constraint UNIQUE (var));")) ))) - (db (dbfile:cautious-open-database dbname init-proc 0 "WAL"))) ;; (sqlite3:open-database dbname))) - ;; (sqlite3:execute db "PRAGMA synchronous = 0;") - ;; (sqlite3:set-busy-handler! db (sqlite3:make-busy-timeout 136000)) ;; done in cautious-open-database + (on-tmp (equal? (car (string-split dbpath "/")) "tmp")) + (db (if on-tmp + (dbfile:cautious-open-database dbname init-proc 0 "WAL") + (dbfile:cautious-open-database dbname init-proc 0 #f) + ;; (sqlite3:open-database dbname) + ))) + (if on-tmp ;; done in cautious-open-database + (begin + (sqlite3:execute db "PRAGMA synchronous = 0;") + (sqlite3:set-busy-handler! db (sqlite3:make-busy-timeout 136000)))) (set! *no-sync-db* db) db)))) (define (db:no-sync-set db var val) (sqlite3:execute db "INSERT OR REPLACE INTO no_sync_metadat (var,val) VALUES (?,?);" var val)) @@ -570,11 +556,11 @@ (tmpdb (db:open-db dbstruct run-id dbinit)) ;; sqlite3-db tmpdbfile #f)) (start-t (current-seconds))) (mutex-lock! *db-multi-sync-mutex*) (let ((update_info (cons "last_update" (if force-sync 0 *db-last-sync*) ))) (mutex-unlock! *db-multi-sync-mutex*) - (db:sync-tables (db:sync-all-tables-list dbstruct keys) update_info tmpdb mtdb)) + (db:sync-tables (db:sync-all-tables-list keys) update_info tmpdb mtdb)) (mutex-lock! *db-multi-sync-mutex*) (set! *db-last-sync* start-t) (set! *db-last-access* start-t) (mutex-unlock! *db-multi-sync-mutex*) (dbfile:add-dbdat dbstruct run-id tmpdb) @@ -633,12 +619,12 @@ '("type" #f) '("last_update" #f)))) ;; needs db to get keys, this is for syncing all tables ;; -(define (db:sync-main-list dbstruct keys) - (let ((keys keys)) ;; (db:get-keys dbstruct))) +(define (db:sync-main-list keys) + (let ((keys keys)) (list (list "keys" '("id" #f) '("fieldname" #f) '("fieldtype" #f)) @@ -689,12 +675,12 @@ '("params" #f) '("creation_time" #f) '("execution_time" #f)) ))) -(define (db:sync-all-tables-list dbstruct keys) - (append (db:sync-main-list dbstruct keys) +(define (db:sync-all-tables-list keys) + (append (db:sync-main-list keys) db:sync-tests-only)) ;; tbls is ( ("tablename" ( "field1" [#f|proc1] ) ( "field2" [#f|proc2] ) .... ) ) ;; db's are dbdat's ;; @@ -945,30 +931,10 @@ FOR EACH ROW BEGIN UPDATE test_data SET last_update=(strftime('%s','now')) WHERE id=old.id; END;" ))) -;; -;; ADD run-id SUPPORT -;; -(define (db:create-all-triggers dbstruct) - (db:with-db - dbstruct #f #f - (lambda (dbdat db) - (db:create-triggers db)))) - -(define (db:create-triggers db) - (for-each (lambda (key) - (sqlite3:execute db (cadr key))) - db:trigger-list)) - -(define (db:drop-all-triggers dbstruct) - (db:with-db - dbstruct #f #f - (lambda (dbdat db) - (db:drop-triggers db)))) - (define (db:is-trigger-dropped db tbl-name) (let* ((trigger-name (if (equal? tbl-name "test_steps") "update_teststeps_trigger" (conc "update_" tbl-name "_trigger"))) (res #f)) @@ -1012,10 +978,14 @@ ;; call with dbinit=db:initialize-main-db ;; (define (db:open-db dbstruct run-id dbinit) ;; (mutex-lock! *db-open-mutex*) (let* ((dbdat (dbfile:open-db dbstruct run-id dbinit))) + #;(case (rmt:transport-mode) + ((http) (dbfile:open-db dbstruct run-id dbinit)) + ((tcp) (dbmod:open-db dbstruct run-id dbinit)) + (else (assert #f "FATAL: rmt:transport-node not correct value"(rmt:transport-mode)))) (set! *db-write-access* (not (dbr:dbdat-read-only dbdat))) ;; (mutex-unlock! *db-open-mutex*) dbdat)) (define dbfile:db-init-proc (make-parameter #f)) @@ -1078,11 +1048,11 @@ (define no-condition-db-with-db (make-parameter #t)) ;; (db:with-db dbstruct run-id sqlite3:exec "select blah fgrom blaz;") ;; r/w is a flag to indicate if the db is modified by this query #t = yes, #f = no ;; -(define (db:with-db dbstruct run-id r/w proc . params) +(define (dbfile:with-db dbstruct run-id r/w proc params) (assert dbstruct "FATAL: db:with-db called with dbstruct "#f) (assert (dbr:dbstruct? dbstruct) "FATAL: dbstruct is "dbstruct) (let* ((use-mutex (> *api-process-request-count* 25)) ;; risk of db corruption (have-struct (dbr:dbstruct? dbstruct)) (dbdat (if have-struct ;; this stuff just allows us to call with a db handle directly @@ -1268,58 +1238,8 @@ (let* ((newstmth (sqlite3:prepare db stmt))) ;; (db:hoh-set! stmt-cache db stmt newstmth) (hash-table-set! stmt-cache stmt newstmth) newstmth)))) -(define (db:have-incompletes? dbstruct run-id ovr-deadtime) - (let* ((incompleted '()) - (oldlaunched '()) - (toplevels '()) - ;; (deadtime-str (configf:lookup *configdat* "setup" "deadtime")) - (deadtime (or ovr-deadtime 72000))) ;; twenty hours - (db:with-db - dbstruct run-id #f - (lambda (dbdat db) - - ;; in RUNNING or REMOTEHOSTSTART for more than 10 minutes - ;; - ;; HOWEVER: this code in run:test seems to work fine - ;; (> (- (current-seconds)(+ (db:test-get-event_time testdat) - ;; (db:test-get-run_duration testdat))) - ;; 600) - ;; (db:delay-if-busy dbdat) - (sqlite3:for-each-row - (lambda (test-id run-dir uname testname item-path) - (if (and (equal? uname "n/a") - (equal? item-path "")) ;; this is a toplevel test - ;; what to do with toplevel? call rollup? - (begin - (set! toplevels (cons (list test-id run-dir uname testname item-path run-id) toplevels))) - ;; (print-info "Found old toplevel test in RUNNING state, test-id=" test-id)) - (set! incompleted (cons (list test-id run-dir uname testname item-path run-id) incompleted)))) - (db:get-cache-stmth dbdat db - "SELECT id,rundir,uname,testname,item_path FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > (run_duration + ?) AND state IN ('RUNNING','REMOTEHOSTSTART');") - run-id deadtime) - - ;; in LAUNCHED for more than one day. Could be long due to job queues TODO/BUG: Need override for this in config - ;; - ;; (db:delay-if-busy dbdat) - (sqlite3:for-each-row - (lambda (test-id run-dir uname testname item-path) - (if (and (equal? uname "n/a") - (equal? item-path "")) ;; this is a toplevel test - ;; what to do with toplevel? call rollup? - (set! toplevels (cons (list test-id run-dir uname testname item-path run-id) toplevels)) - (set! oldlaunched (cons (list test-id run-dir uname testname item-path run-id) oldlaunched)))) - (db:get-cache-stmth dbdat db - "SELECT id,rundir,uname,testname,item_path FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > 86400 AND state IN ('LAUNCHED');") - run-id) - - ;; (print-info "Found " (length oldlaunched) " old LAUNCHED items, " (length toplevels) " old LAUNCHED toplevel tests and " (length incompleted) " tests marked RUNNING but apparently dead.") - (if (and (null? incompleted) - (null? oldlaunched) - (null? toplevels)) - #f - #t))))) ) Index: dbmod.scm ================================================================== --- dbmod.scm +++ dbmod.scm @@ -17,40 +17,301 @@ ;; along with Megatest. If not, see . ;;====================================================================== (declare (unit dbmod)) +(declare (uses dbfile)) +(declare (uses commonmod)) +(declare (uses debugprint)) (module dbmod * -(import scheme chicken data-structures extras) -(import (prefix sqlite3 sqlite3:) - posix typed-records srfi-18 - srfi-69) - -(define (db:run-id->dbname run-id) - (cond - ((number? run-id)(conc run-id ".db")) - ((not run-id) "main.db") - (else run-id))) - - -;;====================================================================== -;; hash of hashs -;;====================================================================== - - -(define (db:hoh-set! dat key1 key2 val) - (let* ((subhash (hash-table-ref/default dat key1 #f))) - (if subhash - (hash-table-set! subhash key2 val) - (begin - (hash-table-set! dat key1 (make-hash-table)) - (db:hoh-set! dat key1 key2 val))))) - -(define (db:hoh-get dat key1 key2) - (let* ((subhash (hash-table-ref/default dat key1 #f))) - (and subhash - (hash-table-ref/default subhash key2 #f)))) +(import scheme + chicken + data-structures + extras + + (prefix sqlite3 sqlite3:) + posix + typed-records + srfi-1 + srfi-18 + srfi-69 + + commonmod + dbfile + debugprint + ) + +;; NOTE: This returns only the name "1.db", "main.db", not the path +;; +(define (dbmod:run-id->dbfname run-id) + (conc (dbfile:run-id->dbnum run-id)".db")) + +(define (dbmod:get-dbdir dbstruct) + (let* ((areapath (dbr:dbstruct-areapath dbstruct)) + (dbdir (conc areapath"/.megatest"))) + (if (and (file-write-access? areapath) + (not (file-exists? dbdir))) + (create-directory dbdir)) + dbdir)) + +(define (dbmod:run-id->full-dbfname dbstruct run-id) + (conc (dbmod:get-dbdir dbstruct run-id)"/"(dbmod:run-id->dbfname run-id))) + +;;====================================================================== +;; Read-only inmem cached direct from disk method +;;====================================================================== + +(define *dbmod:nfs-db-handles* (make-hash-table)) ;; dbfname -> dbstruct + +(define (dbmod:nfs-get-dbstruct run-id keys init-proc areapath) + (assert areapath "FATAL: dbmod:nfs-get-dbstruct called without areapath set.") + (let* ((dbfname (dbmod:run-id->dbfname run-id)) + (dbstruct (hash-table-ref/default *dbmod:nfs-db-handles* dbfname #f))) + (if dbstruct + (let* ((last-update (dbr:dbstruct-last-update dbstruct)) + (curr-secs (current-seconds))) + (if (> (- curr-secs last-update) 2) + (begin + ((dbr:dbstruct-sync-proc dbstruct) last-update) + (dbr:dbstruct-last-update-set! dbstruct curr-secs))) + dbstruct) + (let* ((newdbstruct (dbmod:open-dbmoddb areapath run-id dbfname init-proc keys syncdir: 'fromdisk))) + (hash-table-set! *dbmod:nfs-db-handles* dbfname newdbstruct) + newdbstruct)))) + +;;====================================================================== +;; The inmem one-db file per server method goes in here +;;====================================================================== + +(define (dbmod:with-db dbstruct run-id r/w proc params) + (let* ((dbdat (dbmod:open-db dbstruct run-id (dbfile:db-init-proc))) + (dbh (dbr:dbdat-dbh dbdat)) + (dbfile (dbr:dbdat-dbfile dbdat))) + (apply proc dbdat dbh params))) + +(define (dbmod:open-inmem-db initproc) + (let* ((db (sqlite3:open-database ":memory:")) + (handler (sqlite3:make-busy-timeout 3600))) + (sqlite3:set-busy-handler! db handler) + (initproc db) + db)) + +(define (dbmod:open-db dbstruct run-id dbinit) + (or (dbr:dbstruct-dbdat dbstruct) + (let* ((dbdat (make-dbr:dbdat + dbfile: (dbr:dbstruct-dbfile dbstruct) + dbh: (dbr:dbstruct-inmem dbstruct) + ))) + (dbr:dbstruct-dbdat-set! dbstruct dbdat) + dbdat))) + +;; Open the inmem db and the on-disk db +;; populate the inmem db with data +;; +;; Updates fields in dbstruct +;; Returns dbstruct +;; +;; * This routine creates the db if not found +;; * Probably can get rid of the dbstruct-in +;; +(define (dbmod:open-dbmoddb areapath run-id dbfname-in init-proc keys + #!key (dbstruct-in #f) + (syncdir 'todisk)) + (let* ((dbstruct (or dbstruct-in (make-dbr:dbstruct areapath: areapath))) + (dbfname (or dbfname-in (dbmod:run-id->dbfname run-id))) + (dbpath (dbmod:get-dbdir dbstruct)) ;; directory where all the .db files are kept + (dbfullname (conc dbpath"/"dbfname)) ;; (dbmod:run-id->full-dbfname dbstruct run-id)) + (dbexists (file-exists? dbfullname)) + (inmem (dbmod:open-inmem-db init-proc)) + (write-access (file-write-access? dbpath)) + (db (dbfile:with-simple-file-lock + (conc dbfullname".lock") + (lambda () + (let* ((db (sqlite3:open-database dbfullname)) + (handler (sqlite3:make-busy-timeout 136000))) + (sqlite3:set-busy-handler! db handler) + (if write-access + (init-proc db)) + db)))) + (tables (db:sync-all-tables-list keys))) + (dbr:dbstruct-inmem-set! dbstruct inmem) + (dbr:dbstruct-ondiskdb-set! dbstruct db) + (dbr:dbstruct-dbfile-set! dbstruct dbfullname) + (dbr:dbstruct-sync-proc-set! dbstruct + (lambda (last-update) + (if (eq? syncdir 'todisk) ;; sync to disk normally, sync from in dashboard + (dbmod:sync-tables tables last-update inmem db) + (dbmod:sync-tables tables last-update db inmem)))) + (dbmod:sync-tables tables #f db inmem) + (dbr:dbstruct-last-update-set! dbstruct (current-seconds)) ;; should this be offset back in time by one second? + dbstruct)) + +(define (dbmod:close-db dbstruct) + ;; do final sync to disk file + ;; (do-sync ...) + (sqlite3:finalize! (dbr:dbstruct-ondiskdb dbstruct))) + +;;====================================================================== +;; Sync db +;;====================================================================== + +(define (dbmod:calc-use-last-update has-last-update fields last-update) + (cond + ((and has-last-update + (member "last_update" fields)) + #t) ;; if given a number, just use it for all fields + ((number? last-update) #f) ;; if not matched first entry then ignore last-update for this table + ((and (pair? last-update) + (member (car last-update) ;; last-update field name + (map car fields))) + #t) + ((and last-update (not (pair? last-update)) (not (number? last-update))) + (debug:print 0 *default-log-port* "ERROR: parameter last-update for db:sync-tables must be a pair or a number, received: " last-update);; found in fields + #f) + (else + #f))) + +;; tbls is ( ("tablename" ( "field1" [#f|proc1] ) ( "field2" [#f|proc2] ) .... ) ) +;; dbs are sqlite3 db handles +;; +;; if last-update specified ("field-name" . time-in-seconds) +;; then sync only records where field-name >= time-in-seconds +;; IFF field-name exists +;; +;; Use (db:sync-all-tables-list keys) to get the tbls input +;; +(define (dbmod:sync-tables tbls last-update fromdb todb) + (let ((stmts (make-hash-table)) ;; table-field => stmt + (all-stmts '()) ;; ( ( stmt1 value1 ) ( stml2 value2 )) + (numrecs (make-hash-table)) + (start-time (current-milliseconds)) + (tot-count 0)) + (for-each ;; table + (lambda (tabledat) + (let* ((tablename (car tabledat)) + (fields (cdr tabledat)) + (has-last-update (member "last_update" fields)) + (use-last-update (dbmod:calc-use-last-update has-last-update fields last-update)) + (last-update-value (if use-last-update ;; no need to check for has-last-update - it is already accounted for + (if (number? last-update) + last-update + (cdr last-update)) + #f)) + (last-update-field (if use-last-update + (if (number? last-update) + "last_update" + (car last-update)) + #f)) + (num-fields (length fields)) + (field->num (make-hash-table)) + (num->field (apply vector (map car fields))) ;; BBHERE + (full-sel (conc "SELECT " (string-intersperse (map car fields) ",") + " FROM " tablename (if use-last-update ;; apply last-update criteria + (conc " WHERE " last-update-field " >= " last-update-value) + "") + ";")) + (full-ins (conc "INSERT OR REPLACE INTO " tablename " ( " (string-intersperse (map car fields) ",") " ) " + " VALUES ( " (string-intersperse (make-list num-fields "?") ",") " );")) + (fromdat '()) + (fromdats '()) + (totrecords 0) + (batch-len 100) ;; (string->number (or (configf:lookup *configdat* "sync" "batchsize") "100"))) + (todat (make-hash-table)) + (count 0) + (field-names (map car fields))) + + ;; set up the field->num table + (for-each + (lambda (field) + (hash-table-set! field->num field count) + (set! count (+ count 1))) + fields) + + ;; read the source table + ;; store a list of all rows in the table in fromdat, up to batch-len. + ;; Then add fromdat to the fromdats list, clear fromdat and repeat. + (sqlite3:for-each-row + (lambda (a . b) + (set! fromdat (cons (apply vector a b) fromdat)) + (if (> (length fromdat) batch-len) + (begin + (set! fromdats (cons fromdat fromdats)) + (set! fromdat '()) + (set! totrecords (+ totrecords 1))))) + fromdb + full-sel) + + ;; Count less than batch-len as a record + (if (> (length fromdat) 0) + (set! totrecords (+ totrecords 1))) + + ;; tack on remaining records in fromdat + (if (not (null? fromdat)) + (set! fromdats (cons fromdat fromdats))) + + (sqlite3:for-each-row + (lambda (a . b) + (hash-table-set! todat a (apply vector a b))) + todb + full-sel) + + ;; first pass implementation, just insert all changed rows + (let* ((db todb) + (drp-trigger (if (member "last_update" field-names) + (db:drop-trigger db tablename) + #f)) + (has-last-update (member "last_update" field-names)) + (is-trigger-dropped (if has-last-update + (db:is-trigger-dropped db tablename) + #f)) + (stmth (sqlite3:prepare db full-ins)) + (changed-rows 0)) + (for-each + (lambda (fromdat-lst) + (sqlite3:with-transaction + db + (lambda () + (for-each ;; + (lambda (fromrow) + (let* ((a (vector-ref fromrow 0)) + (curr (hash-table-ref/default todat a #f)) + (same #t)) + (let loop ((i 0)) + (if (or (not curr) + (not (equal? (vector-ref fromrow i)(vector-ref curr i)))) + (set! same #f)) + (if (and same + (< i (- num-fields 1))) + (loop (+ i 1)))) + (if (not same) + (begin + (apply sqlite3:execute stmth (vector->list fromrow)) + (hash-table-set! numrecs tablename (+ 1 (hash-table-ref/default numrecs tablename 0))) + (set! changed-rows (+ changed-rows 1)))))) + fromdat-lst)))) + fromdats) + + (sqlite3:finalize! stmth) + (if (member "last_update" field-names) + (db:create-trigger db tablename))))) + tbls) + (let* ((runtime (- (current-milliseconds) start-time)) + (should-print (or ;; (debug:debug-mode 12) + (common:low-noise-print 120 "db sync") + (> runtime 500)))) ;; low and high sync times treated as separate. + (for-each + (lambda (dat) + (let ((tblname (car dat)) + (count (cdr dat))) + (set! tot-count (+ tot-count count)))) + (sort (hash-table->alist numrecs)(lambda (a b)(> (cdr a)(cdr b)))))) + tot-count)) + +;;====================================================================== +;; Moved from dbfile +;;====================================================================== + ) Index: dcommon.scm ================================================================== --- dcommon.scm +++ dcommon.scm @@ -708,11 +708,13 @@ #:numlin-visible 5 )) (colnames (list "Id" "MTver" "Pid" "Host" "Interface:OutPort" "RunTime" "State" "RunId")) (updater (lambda () (if (dashboard:monitor-changed? commondat tabdat) - (let ((servers (server:choose-server *toppath* 'all-valid))) ;; (server:get-list *toppath* limit: 10))) + (let ((servers (case (rmt:transport-mode) + ((http)(server:choose-server *toppath* 'all-valid)) ;; (server:get-list *toppath* limit: 10))) + (else '())))) (iup:attribute-set! servers-matrix "NUMLIN" (length servers)) ;; (set! colnum 0) ;; (for-each (lambda (colname) ;; ;; (print "colnum: " colnum " colname: " colname) ;; (iup:attribute-set! servers-matrix (conc "0:" colnum) colname) Index: launch.scm ================================================================== --- launch.scm +++ launch.scm @@ -33,17 +33,19 @@ (declare (uses common)) (declare (uses commonmod)) (declare (uses configf)) (declare (uses db)) (declare (uses ezsteps)) +(declare (uses dbfile)) (include "common_records.scm") (include "key_records.scm") (include "db_records.scm") (include "megatest-fossil-hash.scm") -(import commonmod) +(import commonmod + dbfile) ;;====================================================================== ;; ezsteps ;;====================================================================== @@ -1143,11 +1145,14 @@ (setenv "MT_TESTSUITENAME" (common:get-testsuite-name))) (begin (debug:print-error 0 *default-log-port* "failed to find the top path to your Megatest area.") (set! *toppath* #f) ;; force it to be false so we return #f #f)) - + + ;; needed by various transport and db modules + (dbfile:testsuite-name (common:get-testsuite-name)) ;; (get-testsuite-name *toppath* *configdat*)) + ;; one more attempt to cache the configs for future reading (let* ((cachefiles (launch:get-cache-file-paths areapath toppath target mtconfig)) (mtcachef (car cachefiles)) (rccachef (cdr cachefiles))) @@ -1579,11 +1584,11 @@ ;; (list 'serverinf *server-info*) #;(list 'homehost (let* ((hhdat (server:get-homehost))) (if hhdat (car hhdat) #f))) - (list 'serverurl (if *runremote* + #;(list 'serverurl (if *runremote* ;; would like to add this back ... WORK NEEDED (remote-server-url *runremote*) #f)) ;; (list 'areaname (common:get-testsuite-name)) (list 'toppath *toppath*) (list 'work-area work-area) Index: margs.scm ================================================================== --- margs.scm +++ margs.scm @@ -23,10 +23,20 @@ (define (args:get-arg arg . default) (if (null? default) (hash-table-ref/default args:arg-hash arg #f) (hash-table-ref/default args:arg-hash arg (car default)))) + +;; get an arg as a number +(define (args:get-arg-number arg . default) + (let* ((val-str (args:get-arg arg)) + (val (if val-str (string->number val-str) #f))) + (if val + val + (if (null? default) + #f + default)))) (define (args:any? . args) (not (null? (filter (lambda (x) x) (map args:get-arg args))))) Index: megatest.scm ================================================================== --- megatest.scm +++ megatest.scm @@ -23,10 +23,18 @@ (define (toplevel-command . a) #f) (declare (uses common)) ;; (declare (uses megatest-version)) (declare (uses margs)) +(declare (uses mtargs)) +(declare (uses mtargs.import)) +(declare (uses commonmod)) +(declare (uses commonmod.import)) +(declare (uses mtargs)) +(declare (uses mtargs.import)) +(declare (uses debugprint)) +(declare (uses debugprint.import)) (declare (uses runs)) (declare (uses launch)) (declare (uses server)) (declare (uses client)) (declare (uses tests)) @@ -41,27 +49,29 @@ (declare (uses api)) (declare (uses tasks)) ;; only used for debugging. (declare (uses env)) (declare (uses diff-report)) (declare (uses db)) +(declare (uses dbfile)) +(declare (uses dbfile.import)) (declare (uses dbmod)) (declare (uses dbmod.import)) -(declare (uses commonmod)) -(declare (uses commonmod.import)) -(declare (uses dbfile)) -(declare (uses dbfile.import)) +(declare (uses tcp-transportmod)) +(declare (uses tcp-transportmod.import)) ;; (declare (uses debugprint)) ;; (declare (uses debugprint.import)) -;; (declare (uses mtargs)) -;; (declare (uses mtargs.import)) ;; (declare (uses ftail)) ;; (import ftail) -(import dbmod +(import mtargs + debugprint + dbmod commonmod - dbfile) + dbfile + tcp-transportmod + ) (define *db* #f) ;; this is only for the repl, do not use in general!!!! (include "common_records.scm") (include "key_records.scm") @@ -69,11 +79,11 @@ (include "run_records.scm") (include "megatest-fossil-hash.scm") (use (prefix sqlite3 sqlite3:) srfi-1 posix regex regex-case srfi-69 (prefix base64 base64:) readline apropos json http-client directory-utils typed-records - http-client srfi-18 extras format) + http-client srfi-18 extras format tcp-server tcp) ;; Added for csv stuff - will be removed ;; (use sparse-vectors) @@ -80,10 +90,14 @@ (require-library mutils) (define *usage-log-file* #f) ;; put path to file for logging usage in this var in the ~/.megatestrc file (define *usage-use-seconds* #t) ;; for Epoc seconds in usage logging change this to #t in ~/.megatestrc file +;; set some parameters here - these need to be put in something that can be loaded from other +;; executables such as dashboard and mtutil +;; +(include "transport-mode.scm") (dbfile:db-init-proc db:initialize-main-db) ;; load the ~/.megatestrc file, put (use trace)(trace-call-sites #t)(trace function-you-want-to-trace) in this file ;; (let ((debugcontrolf (conc (get-environment-variable "HOME") "/.megatestrc"))) @@ -373,10 +387,11 @@ "-load" ;; load and exectute a scheme file "-section" "-var" "-dumpmode" "-run-id" + "-db" "-ping" "-refdb2dat" "-o" "-log" "-sync-log" @@ -585,16 +600,16 @@ ;; where (launch:setup) returns #f? ;; (if (or (args:get-arg "-log")(args:get-arg "-server")) ;; redirect the log always when a server (handle-exceptions exn - (begin - (print "ERROR: Failed to switch to log output. " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn) - ) + (begin + (print "ERROR: Failed to switch to log output. " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn)) (let* ((tl (or (args:get-arg "-log")(launch:setup))) ;; run launch:setup if -server, ensure we do NOT run launch:setup if -log specified + (dbname (args:get-arg "-db")) ;; for the server logfile name (logf (or (args:get-arg "-log") ;; use -log unless we are a server, then craft a logfile name - (conc tl "/logs/server-" (current-process-id) "-" (get-host-name) ".log"))) + (conc tl "/logs/server-"(or dbname "unk")"-"(current-process-id) "-" (get-host-name) ".log"))) (oup (open-logfile logf))) (if (not (args:get-arg "-log")) (hash-table-set! args:arg-hash "-log" logf)) ;; fake out future queries of -log (debug:print-info 0 *default-log-port* "Sending log output to " logf) (set! *default-log-port* oup)))) @@ -921,13 +936,24 @@ ;;====================================================================== ;; Server? Start up here. ;; (if (args:get-arg "-server") - (let ((tl (launch:setup))) - ;; (server:launch 0 'http) - (http-transport:launch) + (let* (;; (run-id (args:get-arg "-run-id")) + (dbfname (args:get-arg "-db")) + (tl (launch:setup)) + (keys (keys:config-get-fields *configdat*))) + (case (rmt:transport-mode) + ((http)(http-transport:launch)) + ((tcp) + (debug:print 0 *default-log-port* "INFO: Running using tcp method.") + (if dbfname + (tt:start-server tl #f dbfname api:tcp-dispatch-request-make-handler keys) + (begin + (debug:print 0 *default-log-port* "ERROR: transport mode is tcp - -db is required.") + (exit 1)))) + (else (debug:print 0 *default-log-port* "ERROR: rmt:transport-mode value not recognised "(rmt:transport-mode)))) (set! *didsomething* #t))) ;; The adjutant is a bit different, it does NOT run (launch:setup) as it is not necessarily tied to ;; a specific Megatest area. Detail are being hashed out and this may change. ;; @@ -940,20 +966,26 @@ (args:get-arg "-kill-servers")) (let ((tl (launch:setup))) (if tl ;; all roads from here exit (let* ((servers (server:choose-server *toppath* 'all-valid)) ;; (server:get-list *toppath*)) (fmtstr "~33a~22a~20a~20a~8a\n")) - (format #t fmtstr "ID" "host:port" "age (hms)" "Last mod" "State") - (format #t fmtstr "==" "=========" "=========" "========" "=====") - (for-each ;; ( mod-time host port start-time pid ) + (if (not servers) + (begin + (debug:print-info 1 *default-log-port* "No servers found") + (exit) + ) + ) + (format #t fmtstr "PID" "host:port" "age (hms)" "Last mod" "State") + (format #t fmtstr "===" "=========" "=========" "========" "=====") + (for-each ;; (ip-addr port? mod-time host port start-time pid ) (lambda (server) - (let* ((mtm (any->number (car server))) + (let* ((mtm (any->number (caddr server))) (mod (if mtm (- (current-seconds) mtm) "unk")) - (age (- (current-seconds)(or (any->number (list-ref server 3)) (current-seconds)))) - (url (conc (cadr server) ":" (caddr server))) + (age (- (current-seconds)(or (any->number mtm) (current-seconds)))) (pid (list-ref server 4)) - (alv (if (number? mod)(< mod 10) #f))) + (url (conc (car server) ":" (cadr server))) + (alv (if (number? mod)(< mod 360) #f))) (format #t fmtstr pid url (seconds->hr-min-sec age) @@ -966,11 +998,10 @@ (server:kill server))))) (sort servers (lambda (a b) (let ((ma (or (any->number (car a)) 9e9)) (mb (or (any->number (car b)) 9e9))) (> ma mb))))) - ;; (debug:print-info 1 *default-log-port* "Done with listservers") (set! *didsomething* #t) (exit)) (exit)))) ;; must do, would have to add checks to many/all calls below @@ -1370,12 +1401,11 @@ ;; IDEA: megatest list -runname blah% ... ;; (if (or (args:get-arg "-list-runs") (args:get-arg "-list-db-targets")) (if (launch:setup) - (let* (;; (dbstruct (make-dbr:dbstruct path: *toppath* local: (args:get-arg "-local"))) - (runpatt (args:get-arg "-list-runs")) + (let* ((runpatt (args:get-arg "-list-runs")) (access-mode (db:get-access-mode)) (testpatt (common:args-get-testpatt #f)) ;; (if (args:get-arg "-testpatt") ;; (args:get-arg "-testpatt") ;; "%")) @@ -2041,11 +2071,11 @@ (if (args:get-arg "-extract-ods") (general-run-call "-extract-ods" "Make ods spreadsheet" (lambda (target runname keys keyvals) - (let ((dbstruct (make-dbr:dbstruct path: *toppath* local: #t)) + (let ((dbstruct (make-dbr:dbstruct areapath: *toppath* local: #t)) (outputfile (args:get-arg "-extract-ods")) (runspatt (or (args:get-arg "-runname")(args:get-arg ":runname"))) (pathmod (args:get-arg "-pathmod"))) ;; (keyvalalist (keys->alist keys "%"))) (debug:print 2 *default-log-port* "Extract ods, outputfile: " outputfile " runspatt: " runspatt " keyvals: " keyvals) @@ -2308,14 +2338,16 @@ (begin (if (not (launch:setup)) (begin (debug:print 0 *default-log-port* "Failed to setup, exiting") (exit 1))) - (if (not (server:choose-server *toppath* 'home?)) - (begin - (debug:print 0 *default-log-port* "Servers are not running on this host or no servers alive. Cannot run cleanup-db") - (exit 1))) + +;; (if (not (server:choose-server *toppath* 'home?)) +;; (begin +;; (debug:print 0 *default-log-port* "Servers are not running on this host or no servers alive. Cannot run cleanup-db") +;; (exit 1))) + (let ((dbstructs (db:setup #f))) (common:cleanup-db dbstructs)) (set! *didsomething* #t))) (if (args:get-arg "-mark-incompletes") @@ -2370,11 +2402,13 @@ (if (or (getenv "MT_RUNSCRIPT") (args:get-arg "-repl") (args:get-arg "-load")) (let* ((toppath (launch:setup)) (dbstructs (if (and toppath - (server:choose-server toppath 'home?)) + ;; NOTE: server:choose-server is starting a server + ;; either add equivalent for tcp mode or ???? + #;(server:choose-server toppath 'home?)) (db:setup #t) #f))) ;; make-dbr:dbstruct path: toppath local: (args:get-arg "-local")) #f))) (if *toppath* (cond ((getenv "MT_RUNSCRIPT") Index: mtargs/mtargs.scm ================================================================== --- mtargs/mtargs.scm +++ mtargs/mtargs.scm @@ -18,16 +18,17 @@ (module mtargs ( arg-hash get-arg + get-arg-number get-arg-from get-args usage print-args any-defined? - ) + ) (import scheme) ;; gives us cond-expand in chicken-4 (cond-expand (chicken-5 @@ -42,10 +43,20 @@ (define (get-arg arg . default) (if (null? default) (hash-table-ref/default arg-hash arg #f) (hash-table-ref/default arg-hash arg (car default)))) + +;; get an arg as a number +(define (get-arg-number arg . default) + (let* ((val-str (get-arg arg)) + (val (if val-str (string->number val-str) #f))) + (if val + val + (if (null? default) + #f + default)))) (define (any-defined? . args) (not (null? (filter (lambda (x) x) (map get-arg args))))) Index: rmt.scm ================================================================== --- rmt.scm +++ rmt.scm @@ -21,16 +21,32 @@ (use format typed-records) ;; RADT => purpose of json format?? (declare (unit rmt)) (declare (uses api)) (declare (uses http-transport)) +(declare (uses commonmod)) (declare (uses dbfile)) +;; (declare (uses dbmemmod)) +(declare (uses dbmod)) +(declare (uses tcp-transportmod)) (include "common_records.scm") ;; (declare (uses rmtmod)) +;; used by http-transport (import dbfile) ;; rmtmod) +(import commonmod +;; dbmemmod + dbfile + dbmod + tcp-transportmod) + +;; http - use the old http + in /tmp db +;; tcp - use tcp transport with inmem db +;; nfs - use direct to disk access (read-only) +;; +(define rmt:transport-mode (make-parameter 'http)) ;; ;; THESE ARE ALL CALLED ON THE CLIENT SIDE!!! ;; ;; generate entries for ~/.megatestrc with the following @@ -61,42 +77,44 @@ (cdr hh-dat) (begin (debug:print-info 0 *default-log-port* "hh-dat="hh-dat) #f)))) +(define (make-and-init-remote areapath) + (case (rmt:transport-mode) + ((http)(make-remote)) + ((tcp) (tt:make-remote areapath)) + (else #f))) ;;====================================================================== (define *send-receive-mutex* (make-mutex)) ;; should have separate mutex per run-id ;; RA => e.g. usage (rmt:send-receive 'get-var #f (list varname)) ;; (define (rmt:send-receive cmd rid params #!key (attemptnum 1)(area-dat #f)) ;; start attemptnum at 1 so the modulo below works as expected - - #;(common:telemetry-log (conc "rmt:"(->string cmd)) - payload: `((rid . ,rid) - (params . ,params))) - + (assert *toppath* "FATAL: rmt:send-receive called with *toppath* not set.") (if (> attemptnum 2) (debug:print 0 *default-log-port* "INFO: attemptnum in rmt:send-receive is " attemptnum)) - + (cond ((> attemptnum 2) (thread-sleep! 0.05)) ((> attemptnum 10) (thread-sleep! 0.5)) ((> attemptnum 20) (thread-sleep! 1))) + + ;; I'm turning this off, it may make sense to move it + ;; into http-transport-handler (if (and (> attemptnum 5) (= 0 (modulo attemptnum 15))) - (begin (server:run *toppath*) (thread-sleep! 3))) - - - ;;DOT digraph megatest_state_status { - ;;DOT ranksep=0; - ;;DOT // rankdir=LR; - ;;DOT node [shape="box"]; - ;;DOT "rmt:send-receive" -> MUTEXLOCK; - ;;DOT { edge [style=invis];"case 1" -> "case 2" -> "case 3" -> "case 4" -> "case 5" -> "case 6" -> "case 7" -> "case 8" -> "case 9" -> "case 10" -> "case 11"; } - ;; do all the prep locked under the rmt-mutex - (mutex-lock! *rmt-mutex*) + (begin + (debug:print 0 *default-log-port* "ERROR: can't connect to server, trying to start a server.") + (case (rmt:transport-mode) + ((http) + (server:run *toppath*) + (thread-sleep! 3)) + (else + (thread-sleep! 1) ;; for tcp the server is started by routines in tcp-transportmod. For nfs there is no server + )))) ;; 1. check if server is started IFF cmd is a write OR if we are not on the homehost, store in runremote ;; 2. check the age of the connections. refresh the connection if it is older than timeout-20 seconds. ;; 3. do the query, if on homehost use local access ;; @@ -103,79 +121,76 @@ (let* ((start-time (current-seconds)) ;; snapshot time so all use cases get same value (areapath *toppath*);; TODO - resolve from dbstruct to be compatible with multiple areas (runremote (or area-dat *runremote*)) (attemptnum (+ 1 attemptnum)) - (readonly-mode (rmtmod:calc-ro-mode runremote *toppath*))) - - ;; DOT INIT_RUNREMOTE; // leaving off - doesn't really add to the clarity - ;; DOT MUTEXLOCK -> INIT_RUNREMOTE [label="no remote?"]; - ;; DOT INIT_RUNREMOTE -> MUTEXLOCK; - ;; ensure we have a record for our connection for given area - (if (not runremote) ;; can remove this one. should never get here. - (begin - (set! *runremote* (make-remote)) - (let* ((server-info (remote-server-info *runremote*))) - (if server-info - (begin - (remote-server-url-set! *runremote* (server:record->url server-info)) - (remote-server-id-set! *runremote* (server:record->id server-info))))) - (set! runremote *runremote*))) ;; new runremote will come from this on next iteration - - (http-transport-handler runremote cmd rid params attemptnum area-dat areapath readonly-mode))) - + (readonly-mode (rmtmod:calc-ro-mode runremote *toppath*)) + (testsuite (common:get-testsuite-name)) + (mtexe (common:find-local-megatest))) + + (case (rmt:transport-mode) + ((http)(http-transport-handler runremote cmd rid params attemptnum area-dat areapath readonly-mode)) + ((tcp) (tcp-transport-handler runremote cmd rid params attemptnum area-dat areapath readonly-mode testsuite mtexe)) + ((nfs) (nfs:transport-handler runremote cmd rid params attemptnum area-dat areapath readonly-mode testsuite mtexe)) + ))) + +(define (nfs:transport-handler runremote cmd run-id params attemptnum area-dat areapath readonly-mode testsuite mtexe) + (let* ((keys (common:get-fields *configdat*)) + (dbstruct (dbmod:nfs-get-dbstruct run-id keys (dbfile:db-init-proc) areapath))) + (api:dispatch-request dbstruct cmd run-id params))) + +(define (tcp-transport-handler runremote cmd run-id params attemptnum area-dat areapath readonly-mode testsuite mtexe) + (if (not runremote) + (let* ((newremote (make-and-init-remote areapath))) + (set! *runremote* newremote) + (set! runremote newremote))) + (let* ((dbfname (conc (dbfile:run-id->dbnum run-id)".db"))) ;;(dbfile:run-id->path areapath run-id))) + (tt:handler runremote cmd run-id params attemptnum area-dat areapath readonly-mode dbfname testsuite mtexe))) + (define (http-transport-handler runremote cmd rid params attemptnum area-dat areapath readonly-mode) - ;; DOT SET_HOMEHOST; // leaving off - doesn't really add to the clarity - ;; DOT MUTEXLOCK -> SET_HOMEHOST [label="no homehost?"]; - ;; DOT SET_HOMEHOST -> MUTEXLOCK; + ;; do all the prep locked under the rmt-mutex + (mutex-lock! *rmt-mutex*) + + ;; ensure we have a record for our connection for given area + (if (not runremote) ;; can remove this one. should never get here. + (begin + (set! *runremote* (make-and-init-remote areapath)) + (let* ((server-info (remote-server-info *runremote*))) + (if server-info + (begin + (remote-server-url-set! *runremote* (server:record->url server-info)) + (remote-server-id-set! *runremote* (server:record->id server-info))))) + (set! runremote *runremote*))) ;; new runremote will come from this on next iteration + ;; ensure we have a homehost record (if (or (not (pair? (remote-hh-dat runremote))) ;; not on homehost (not (cdr (remote-hh-dat runremote)))) ;; not on homehost (thread-sleep! 0.1) ;; since we shouldn't get here, delay a little (let ((hh-data (server:choose-server areapath 'homehost))) (remote-hh-dat-set! runremote (or hh-data (cons #f #f))))) - ;;(print "BB> readonly-mode is "readonly-mode" dbfile is "dbfile) (cond - #;((> (- (current-seconds)(remote-connect-time runremote)) 180) ;; reconnect to server every 180 seconds - (debug:print 0 *default-log-port* "Forcing reconnect to server(s) due to 180 second timeout.") - (set! *runremote* #f) - ;; BUG: close-connections should go here? - (mutex-unlock! *rmt-mutex*) - (rmt:send-receive cmd rid params attemptnum: 1 area-dat: area-dat)) - - ;;DOT EXIT; - ;;DOT MUTEXLOCK -> EXIT [label="> 15 attempts"]; {rank=same "case 1" "EXIT" } ;; give up if more than 150 attempts ((> attemptnum 150) (debug:print 0 *default-log-port* "ERROR: 150 tries to start/connect to server. Giving up.") (exit 1)) - ;;DOT CASE2 [label="local\nreadonly\nquery"]; - ;;DOT MUTEXLOCK -> CASE2; {rank=same "case 2" CASE2} - ;;DOT CASE2 -> "rmt:open-qry-close-locally"; ;; readonly mode, read request- handle it - case 2 ((and readonly-mode (member cmd api:read-only-queries)) (mutex-unlock! *rmt-mutex*) (debug:print-info 12 *default-log-port* "rmt:send-receive, case 2") (rmt:open-qry-close-locally cmd 0 params) ) - ;;DOT CASE3 [label="write in\nread-only mode"]; - ;;DOT MUTEXLOCK -> CASE3 [label="readonly\nmode?"]; {rank=same "case 3" CASE3} - ;;DOT CASE3 -> "#f"; ;; readonly mode, write request. Do nothing, return #f (readonly-mode (extras-readonly-mode *rmt-mutex* *default-log-port* cmd params)) ;; This block was for pre-emptively resetting the connection if there had been no communication for some time. ;; I don't think it adds any value. If the server is not there, just fail and start a new connection. ;; also, the expire-time calculation might not be correct. We want, time-since-last-server-access > (server:get-timeout) ;; - ;;DOT CASE4 [label="reset\nconnection"]; - ;;DOT MUTEXLOCK -> CASE4 [label="have connection,\nlast_access > expire_time"]; {rank=same "case 4" CASE4} - ;;DOT CASE4 -> "rmt:send-receive"; ;; reset the connection if it has been unused too long ((and runremote (remote-api-url runremote) (> (current-seconds) ;; if it has been more than server-timeout seconds since last contact, close this connection and start a new on (+ (remote-last-access runremote) @@ -185,61 +200,27 @@ ;; moving this setting of runremote conndat to #f to inside the http-transport:close-connections ;; (remote-conndat-set! runremote #f) ;; invalidate the connection, thus forcing a new connection. (mutex-unlock! *rmt-mutex*) (rmt:send-receive cmd rid params attemptnum: attemptnum)) - ;;DOT CASE5 [label="local\nread"]; - ;;DOT MUTEXLOCK -> CASE5 [label="server not required,\non homehost,\nread-only query"]; {rank=same "case 5" CASE5}; - ;;DOT CASE5 -> "rmt:open-qry-close-locally"; - ;; on homehost and this is a read ((and (not (remote-force-server runremote)) ;; honor forced use of server, i.e. server NOT required (rmt:on-homehost? runremote) (member cmd api:read-only-queries)) ;; this is a read (mutex-unlock! *rmt-mutex*) (debug:print-info 12 *default-log-port* "rmt:send-receive, case 5") (rmt:open-qry-close-locally cmd 0 params)) - ;;DOT CASE6 [label="init\nremote"]; - ;;DOT MUTEXLOCK -> CASE6 [label="on homehost,\nwrite query,\nhave server,\ncan't reach it"]; {rank=same "case 6" CASE6}; - ;;DOT CASE6 -> "rmt:send-receive"; - ;; on homehost and this is a write, we already have a server, but server has died - - ;; reinstate this keep-alive section but inject a time condition into the (add ... - ;; - ;; ((and (cdr (remote-hh-dat runremote)) ;; on homehost - ;; (not (member cmd api:read-only-queries)) ;; this is a write - ;; (remote-server-url runremote) ;; have a server - ;; (not (server:ping (remote-server-url runremote) (remote-server-id runremote)))) ;; server has died. NOTE: this is not a cheap call! Need better approach. - ;; (debug:print 0 *default-log-port* "WARNING: server appears to have died, trying to reconnect, case 6") - ;; (http-transport:close-connections area-dat: runremote) ;; make sure to clean up - ;; (set! *runremote* (make-remote)) - ;; (let* ((server-info (remote-server-info *runremote*))) - ;; (if server-info - ;; (begin - ;; (remote-server-url-set! *runremote* (server:record->url server-info)) - ;; (remote-server-id-set! *runremote* (server:record->id server-info))))) - ;; (remote-force-server-set! runremote (common:force-server?)) - ;; (mutex-unlock! *rmt-mutex*) - ;; (debug:print-info 12 *default-log-port* "rmt:send-receive, case 6") - ;; (rmt:send-receive cmd rid params attemptnum: attemptnum)) - - ;;DOT CASE7 [label="homehost\nwrite"]; - ;;DOT MUTEXLOCK -> CASE7 [label="server not required,\non homehost,\na write,\nhave a server"]; {rank=same "case 7" CASE7}; - ;;DOT CASE7 -> "rmt:open-qry-close-locally"; ;; on homehost and this is a write, we already have a server ((and (not (remote-force-server runremote)) ;; honor forced use of server, i.e. server NOT required (cdr (remote-hh-dat runremote)) ;; on homehost (not (member cmd api:read-only-queries)) ;; this is a write (remote-server-url runremote)) ;; have a server (needed to sync written data back) (mutex-unlock! *rmt-mutex*) (debug:print-info 12 *default-log-port* "rmt:send-receive, case 4.1") (rmt:open-qry-close-locally cmd 0 params)) - ;;DOT CASE8 [label="force\nserver"]; - ;;DOT MUTEXLOCK -> CASE8 [label="server not required,\nhave homehost info,\nno connection yet,\nnot a read-only query"]; {rank=same "case 8" CASE8}; - ;;DOT CASE8 -> "rmt:open-qry-close-locally"; ;; on homehost, no server contact made and this is a write, passively start a server ((and (not (remote-force-server runremote)) ;; honor forced use of server, i.e. server NOT required (cdr (remote-hh-dat runremote)) ;; have homehost (not (remote-server-url runremote)) ;; no connection yet (not (member cmd api:read-only-queries))) ;; not a read-only query @@ -286,11 +267,10 @@ ;;DOT MUTEXLOCK -> CASE11 [label="else"]; {rank=same "case 11" CASE11}; ;;DOT CASE11 -> "rmt:send-receive" [label="call failed"]; ;;DOT CASE11 -> "RESULT" [label="call succeeded"]; ;; not on homehost, do server query (else (extras-case-11 *default-log-port* runremote cmd params attemptnum rid)))) -;;DOT } ;; bunch of small functions factored out of send-receive to make debug easier ;; (define (extras-case-11 *default-log-port* runremote cmd params attemptnum rid) @@ -373,11 +353,11 @@ res)) (define (rmt:open-qry-close-locally cmd run-id params #!key (remretries 5)) (let* ((qry-is-write (not (member cmd api:read-only-queries))) (db-file-path (db:dbfile-path)) ;; 0)) - (dbstructs-local (db:setup #t)) ;; make-dbr:dbstruct path: dbdir local: #t))) + (dbstructs-local (db:setup #t)) (read-only (not (file-write-access? db-file-path))) (start (current-milliseconds)) (resdat (if (not (and read-only qry-is-write)) (let ((v (api:execute-requests dbstructs-local (vector (symbol->string cmd) params)))) ;; (handle-exceptions ;; there has been a long history of receiving strange errors from values returned by the client when things go wrong.. @@ -1021,23 +1001,36 @@ (rmt:send-receive 'test-set-archive-block-id run-id (list run-id test-id archive-block-id))) (define (rmt:test-get-archive-block-info archive-block-id) (rmt:send-receive 'test-get-archive-block-info #f (list archive-block-id))) - (define (rmtmod:calc-ro-mode runremote *toppath*) - (if (and runremote - (remote-ro-mode-checked runremote)) - (remote-ro-mode runremote) - (let* ((mtcfgfile (conc *toppath* "/megatest.config")) - (ro-mode (not (file-write-access? mtcfgfile)))) ;; TODO: use dbstruct or runremote to figure this out in future - (if runremote - (begin - (remote-ro-mode-set! runremote ro-mode) - (remote-ro-mode-checked-set! runremote #t) - ro-mode) - ro-mode)))) + (case (rmt:transport-mode) + ((http) + (if (and runremote + (remote-ro-mode-checked runremote)) + (remote-ro-mode runremote) + (let* ((mtcfgfile (conc *toppath* "/megatest.config")) + (ro-mode (not (file-write-access? mtcfgfile)))) ;; TODO: use dbstruct or runremote to figure this out in future + (if runremote + (begin + (remote-ro-mode-set! runremote ro-mode) + (remote-ro-mode-checked-set! runremote #t) + ro-mode) + ro-mode)))) + ((tcp) + (if (and runremote + (tt-ro-mode-checked runremote)) + (tt-ro-mode runremote) + (let* ((mtcfgfile (conc *toppath* "/megatest.config")) + (ro-mode (not (file-write-access? mtcfgfile)))) ;; TODO: use dbstruct or runremote to figure this out in future + (if runremote + (begin + (tt-ro-mode-set! runremote ro-mode) + (tt-ro-mode-checked-set! runremote #t) + ro-mode) + ro-mode)))))) (define (extras-readonly-mode rmt-mutex log-port cmd params) (mutex-unlock! rmt-mutex) (debug:print-info 12 log-port "rmt:send-receive, case 3") (debug:print 0 log-port "WARNING: write transaction requested on a readonly area. cmd="cmd" params="params) Index: runs.scm ================================================================== --- runs.scm +++ runs.scm @@ -22,10 +22,11 @@ sxml-modifications matchable) (declare (unit runs)) (declare (uses db)) (declare (uses common)) +(declare (uses commonmod)) (declare (uses items)) (declare (uses runconfig)) (declare (uses tests)) (declare (uses server)) (declare (uses mt)) @@ -37,10 +38,12 @@ (include "db_records.scm") (include "run_records.scm") (include "test_records.scm") ;; (include "debugger.scm") + +(import commonmod) ;; use this struct to facilitate refactoring ;; (defstruct runs:dat Index: server.scm ================================================================== --- server.scm +++ server.scm @@ -101,24 +101,24 @@ (if *server-id* *server-id* (let ((sig (server:mk-signature))) ;; clients re-use the server:mk-signature logic (set! *server-id* sig) *server-id*))) -;; When using zmq this would send the message back (two step process) -;; with spiffy or rpc this simply returns the return data to be returned -;; -(define (server:reply return-addr query-sig success/fail result) - (debug:print-info 11 *default-log-port* "server:reply return-addr=" return-addr ", result=" result) - ;; (send-message pubsock target send-more: #t) - ;; (send-message pubsock - (case (server:get-transport) - ((rpc) (db:obj->string (vector success/fail query-sig result))) - ((http) (db:obj->string (vector success/fail query-sig result))) - ((fs) result) - (else - (debug:print-error 0 *default-log-port* "unrecognised transport type: " *transport-type*) - result))) +;; ;; When using zmq this would send the message back (two step process) +;; ;; with spiffy or rpc this simply returns the return data to be returned +;; ;; +;; (define (server:reply return-addr query-sig success/fail result) +;; (debug:print-info 11 *default-log-port* "server:reply return-addr=" return-addr ", result=" result) +;; ;; (send-message pubsock target send-more: #t) +;; ;; (send-message pubsock +;; (case (server:get-transport) +;; ((rpc) (db:obj->string (vector success/fail query-sig result))) +;; ((http) (db:obj->string (vector success/fail query-sig result))) +;; ((fs) result) +;; (else +;; (debug:print-error 0 *default-log-port* "unrecognised transport type: " *transport-type*) +;; result))) ;; Given an area path, start a server process ### NOTE ### > file 2>&1 ;; if the target-host is set ;; try running on that host ;; incidental: rotate logs in logs/ dir. @@ -469,10 +469,11 @@ ;; find alive rand from youngest ;; 1. sort by age descending ;; 2. take five ;; 3. check alive, discard if not and repeat ;; first we clean up old server files + (assert (eq? (rmt:transport-mode) 'http) "FATAL: server:run called with rmt:transport-mode="(rmt:transport-mode)) (server:clean-up-old areapath) (let* ((since-last (- (current-seconds) server-last-start)) (server-start-delay 10)) (if ( < (- (current-seconds) server-last-start) 10 ) (begin @@ -568,11 +569,13 @@ sfiles))) ;; would like to eventually get rid of this ;; (define (common:on-homehost?) - (server:choose-server *toppath* 'home?)) + (if (eq? (rmt:transport-mode) 'http) + (server:choose-server *toppath* 'home?) + #t)) ;; there is no homehost for tcp and nfs is always on home so #t should work ;; kind start up of server, wait before allowing another server for a given ;; area to be launched ;; (define (server:kind-run areapath) @@ -655,11 +658,11 @@ (handle-exceptions exn (begin (debug:print-info 0 *default-log-port* "Unable to get host and/or port from " servr ", exn=" exn) #f) - (match-let (((mod-time hostname port start-time server-id pid) + (match-let (((hostname port start-time server-id pid) servr)) (tasks:kill-server hostname pid)))) ;; called in megatest.scm, host-port is string hostname:port ;; @@ -676,11 +679,11 @@ (else #f)))) (cond ((and (list? host-port) (eq? (length host-port) 2)) - (let* ((myrunremote (make-remote)) + (let* ((myrunremote (make-and-init-remote *toppath*)) (iface (car host-port)) (port (cadr host-port)) (server-dat (client:connect iface port server-id myrunremote)) (login-res (rmt:login-no-auto-client-setup myrunremote))) (http-transport:close-connections myrunremote) ADDED tcp-transportmod.scm Index: tcp-transportmod.scm ================================================================== --- /dev/null +++ tcp-transportmod.scm @@ -0,0 +1,685 @@ +;;====================================================================== +;; Copyright 2017, Matthew Welland. +;; +;; This file is part of Megatest. +;; +;; Megatest is free software: you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. +;; +;; Megatest is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with Megatest. If not, see . + +;;====================================================================== + +(declare (unit tcp-transportmod)) +(declare (uses debugprint)) +(declare (uses commonmod)) +(declare (uses dbfile)) +(declare (uses dbmod)) + +(use address-info) + +(module tcp-transportmod + * + + (import scheme + (prefix sqlite3 sqlite3:) + chicken + data-structures + + address-info + directory-utils + extras + files + hostinfo + matchable + md5 + message-digest + ports + posix + regex + regex-case + s11n + srfi-1 + srfi-18 + srfi-4 + srfi-69 + stack + typed-records + tcp-server + tcp + + debugprint + commonmod + dbfile + dbmod + ) + +;;====================================================================== +;; client +;;====================================================================== + +;; (define keep-age-param (make-parameter 10)) ;; qif file age, if over move to attic + +;; Used ONLY for client +;; +(defstruct tt-conn + host + port + host-port + dbfname + server-id + server-start + pid +) + +;; Used for BOTH clients and servers +(defstruct tt + ;; client related + (conns (make-hash-table)) ;; dbfname -> conn + + ;; server related + (areapath #f) + (host #f) + (port #f) + (conn #f) + (cleanup-proc #f) + (handler #f) ;; receives data and responds + (socket #f) + (thread #f) + (host-port #f) + (cmd-thread #f) + (ro-mode #f) + (ro-mode-checked #f) + (last-access (current-seconds)) + (servinf-file #f) + (last-serv-start 0) + ) + +(define (tt:make-remote areapath) + (make-tt areapath: areapath)) + +;; 1 ... or #f +(define (tt:valid-run-id run-id) + (or (number? run-id) + (not run-id))) + +;; do all the busy work of finding and setting up conn for +;; connecting to a server +;; +(define (tt:client-connect-to-server ttdat dbfname run-id testsuite) + (assert (tt:valid-run-id run-id) "FATAL: invalid run-id "run-id) + (let* ((conn (hash-table-ref/default (tt-conns ttdat) dbfname #f)) + (server-start-proc (lambda () + (tt:server-process-run + (tt-areapath ttdat) + testsuite ;; (dbfile:testsuite-name) + (common:find-local-megatest) + run-id)))) + (if conn + conn ;; we are already connected to the server + (let* ((sdat (tt:get-current-server-info ttdat dbfname))) + (match sdat + ((host port start-time server-id pid dbfname2 servinffile) + (assert (equal? dbfname dbfname2) "FATAL: read server info from wrong file.") + (let* ((host-port (conc host":"port)) + (conn (make-tt-conn + host: host + port: port + host-port: host-port + dbfname: dbfname + servinf-file: servinffile + server-id: server-id + server-start: start-time + pid: pid))) + (hash-table-set! (tt-conns ttdat) dbfname conn) + ;; verify we can talk to this server + (if (tt:ping host port server-id) + conn + (let* ((curr-secs (current-seconds))) + ;; rm the (last server) would go here + (if (> (- curr-secs (tt-last-serv-start ttdat)) 10) + (begin + (tt-last-serv-start-set! ttdat curr-secs) + (server-start-proc))) ;; don't try and start server unless 30 sec has gone by since last attempt + (thread-sleep! 1) + (tt:client-connect-to-server ttdat dbfname run-id testsuite))))) + (else + (if (> (- (current-seconds) (tt-last-serv-start ttdat)) 5) ;; really do not want to swamp the machine with servers + (begin + (debug:print-info 0 *default-log-port* "No server found. Starting one for run-id "run-id" in dbfile "dbfname) + (server-start-proc) + (tt-last-serv-start-set! ttdat (current-seconds)))) + (thread-sleep! 1) + (tt:client-connect-to-server ttdat dbfname run-id testsuite))))))) + +(define (tt:ping host port server-id) + (let* ((res (tt:send-receive-direct host port `(ping #f #f #f)))) ;; please send me your server-id + ;; + ;; need two threads, one a 5 second timer + ;; + (match res + ((status errmsg result meta) + (if (equal? result server-id) + (begin + (debug:print 0 *default-log-port* "Ping to "host":"port" successful.") + #t) ;; then we are good + (begin + (debug:print 0 *default-log-port* "WARNING: server-id does not match, expected: "server-id", got: "result) + #f))) + (else + ;; (debug:print 0 *default-log-port* "res not in form (status errmsg result meta), got: "res) + #f)))) + +;; client side handler +;; +;;(tt:handler # get-keys #f () 2 #f "/home/matt/data/megatest/ext-tests" #f "main.db" "ext-tests" "/home/matt/data/megatest/bin/.22.04/../megatest") +;; +(define (tt:handler ttdat cmd run-id params attemptnum area-dat areapath readonly-mode dbfname testsuite mtexe) + ;; NOTE: areapath is passed in and in tt struct. We'll use passed in value for now. + (let* ((conn (tt:client-connect-to-server ttdat dbfname run-id testsuite))) ;; (hash-table-ref/default (tt-conns ttdat) dbfname #f))) + (if conn + ;; have connection, call the server + (let* ((res (tt:send-receive ttdat conn cmd run-id params))) + ;; res is (status errmsg result meta) + (match res + ((status errmsg result meta) + (if (list? meta) + (let* ((delay-wait (alist-ref 'delay-wait meta))) + (if (and (number? delay-wait) + (> delay-wait 0)) + (begin + (debug:print 0 *default-log-port* "Server is loaded, delaying "delay-wait" seconds") + (thread-sleep! delay-wait))))) + (case status + ((busy) ;; result will be how long the server wants you to delay + (debug:print 0 *default-log-port* "WARNING: server is overloaded, will try again in "result" seconds.") + (thread-sleep! (if (number? result) result 2)) + (tt:handler ttdat cmd run-id params (+ attemptnum 1) area-dat areapath readonly-mode dbfname testsuite mtexe)) + ((loaded) + (debug:print 0 *default-log-port* "WARNING: server is loaded, will try again in a 1/4 second.") + (thread-sleep! 0.25) + (tt:handler ttdat cmd run-id params (+ attemptnum 1) area-dat areapath readonly-mode dbfname testsuite mtexe)) + (else + result))) + (else + (if (not res) + (let* ((host (tt-conn-host conn)) + (port (tt-conn-port conn)) + ;; (dbfname (tt-conn-port conn)) ;; 192.168.0.127:4242-726924:4.db + (pid (tt-conn-pid conn)) + (servinf (conc areapath"/.servinfo/"host":"port"-"pid":"dbfname))) + (hash-table-set! (tt-conns ttdat) dbfname #f) + (if (file-exists? servinf) + (begin + (debug:print 0 *default-log-port* "INFO: connection to server "host":"port" broken for "dbfname", attempting to remove servinfo file.") + (delete-file* servinf)) + (debug:print 0 *default-log-port* "INFO: connection to server "host":"port" broken for "dbfname", but do not see servinf file "servinf)) + (tt:handler ttdat cmd run-id params (+ attemptnum 1) area-dat areapath readonly-mode dbfname testsuite mtexe)) + (assert #f "FATAL: tt:handler received bad data "res))))) + (begin + (thread-sleep! 1) ;; give it a rest and try again + (tt:handler ttdat cmd run-id params attemptnum area-dat areapath readonly-mode dbfname testsuite mtexe))))) + + ;; no conn yet, find and or start and find a server +;; (let* ((server (tt:find-server ttdat dbfname))) +;; (if server +;; (let* ((conn (tt:client-connect-to-server server))) +;; (hash-table-set! (tt-conns ttdat) dbfname conn) +;; (tt:handler ttdat cmd run-id params attemptnum area-dat areapath readonly-mode +;; dbfname testsuite mtexe)) +;; ;; no server, try to start a server process +;; (begin +;; (tt:server-process-run areapath testsuite mtexe run-id) ;; #!key (profile-mode "")) +;; (thread-sleep! 1) +;; (tt:handler ttdat cmd run-id params attemptnum area-dat areapath +;; readonly-mode dbfname testsuite mtexe))))))) + +(define (tt:bid-for-servership run-id) + #f) + +;; gets server info and appends path to server file +;; sorts by age, oldest first +;; +;; returns list of (host port startseconds server-id servinfofile) +;; +(define (tt:get-server-info-sorted ttdat dbfname) + (let* ((areapath (tt-areapath ttdat)) + (sfiles (tt:find-server areapath dbfname)) + (sdats (filter car (map tt:server-get-info sfiles))) ;; first element is #f if the file disappeared while being read + (sorted (sort sdats (lambda (a b) + (< (list-ref a 2)(list-ref b 2)))))) + sorted)) + +(define (tt:get-current-server-info ttdat dbfname) + (assert (tt-areapath ttdat) "FATAL: areapath not set in ttdat.") + ;; + ;; TODO - replace most of below with tt;get-server-info-sorted + ;; + (let* ((areapath (tt-areapath ttdat)) + (sfiles (tt:find-server areapath dbfname)) + (sdats (filter car (map tt:server-get-info sfiles))) ;; first element is #f if the file disappeared while being read + (sorted (sort sdats (lambda (a b) + (< (list-ref a 2)(list-ref b 2)))))) + (if (null? sorted) + #f ;; we'll want to wait until extra servers have exited + (car sorted)))) + +(define (tt:send-receive ttdat conn cmd run-id params) + (let* ((host-port (tt-conn-host-port conn)) ;; (conc (tt-conn-host conn)":"(tt-conn-port conn))) + (host (tt-conn-host conn)) + (port (tt-conn-port conn)) + (dat (list cmd run-id params #f))) ;; no meta data yet + (tt:send-receive-direct host port dat))) + +(define (tt:send-receive-direct host port dat) + (assert (number? port) "FATAL: tt:send-receive-direct called with port not a number "port) + (handle-exceptions + exn + #f ;; Add condition-case or better handling here + (let-values (((inp oup)(tcp-connect host port))) + (let ((res (if (and inp oup) + (begin + (serialize dat oup) + (close-output-port oup) + (deserialize inp)) + ))) + (close-input-port inp) + res)))) + + + +;;====================================================================== +;; server +;;====================================================================== + +(define (tt:sync-dbs ttdat) + #f) + +;; start the listener and start responding to requests +;; +;; NOTE: organise by dbfname, not run-id so we don't need +;; to pull in more modules +;; +;; This is the routine called in megatest.scm to start a server +;; +;; Server viability is checked in keep-running. Blindly start and run here. +;; +(define (tt:start-server areapath run-id dbfname-in handler keys) + (assert areapath "FATAL: areapath not provided for tt:start-server") + ;; is there already a server for this dbfile? Then exit. + (let* ((ttdat (make-tt areapath: areapath)) + (dbfname (or dbfname-in (dbmod:run-id->dbfname run-id)))) + ;; (servers (tt:find-server areapath dbfname))) ;; should use tt:get-current-server-info instead + ;; (if (null? servers) + (let* ((dbstruct (dbmod:open-dbmoddb areapath run-id dbfname (dbfile:db-init-proc) keys))) + (tt-handler-set! ttdat (handler dbstruct)) + (let* ((tcp-thread (make-thread + (lambda () + (tt:start-tcp-server ttdat)) ;; start the tcp-server which applies handler to incoming data + "tcp-server-thread")) + (run-thread (make-thread + (lambda () + (tt:keep-running ttdat dbfname dbstruct))))) + (thread-start! tcp-thread) + (thread-start! run-thread) + (thread-join! run-thread) ;; run thread will exit on timeout or other conditions + (exit))) + ;;(begin + ;; (debug:print 0 *default-log-port* "INFO: found server(s) already running for db "dbfname", "(string-intersperse servers ",")" Exiting.") + ;; (exit))))) + )) + +(define (tt:keep-running ttdat dbfname dbstruct) + ;; verfiy conn for ready + ;; listener socket has been started by this stage + ;; wait for a port before creating the registration file + ;; + (let* ((cleanup (lambda () + (if (tt-cleanup-proc ttdat) + ((tt-cleanup-proc ttdat)))))) + (let loop ((count 0)) + (if (> count 240) + (begin + (debug:print 0 *default-log-port* "FATAL: Could not start a tcp server, giving up.") + (exit 1)) + (if (not (tt-port ttdat)) ;; no connection yet + (begin + (thread-sleep! 0.25) + (loop (+ count 1)))))) + + ;; load or reload the data into inmem db before + ;; ((dbr:dbstruct-sync-proc dbstruct) (dbr:dbstruct-last-update dbstruct)) + ;; (dbr:dbstruct-last-update-set! dbstruct (- (current-seconds) 1)) + (tt:create-server-registration-file ttdat dbfname) + ;; now start watching the last-access, if it hasn't been touched + ;; in over ten seconds we exit + (thread-sleep! 0.05) ;; any real need for delay here? + (let loop () + (let* ((servers (tt:get-server-info-sorted ttdat dbfname)) + (ok (cond + ((null? servers) #f) ;; not ok + ((equal? (list-ref (car servers) 6) ;; compare the servinfofile + (tt-servinf-file ttdat)) + (debug:print-info 0 *default-log-port* "Keep running, I'm the top server.") + #t) + (else + (debug:print-info 0 *default-log-port* "I'm not the lead server: "servers) + (let* ((leadsrv (car servers))) + (match leadsrv + ((host port startseconds server-id pid dbfname servinfofile) + (if (tt:ping host port server-id) + #f ;; not the server, but all good, want to exit + (if (and (file-exists? servinfofile) + (> (- (current-seconds)(file-modification-time servinfofile)) 5)) + (begin + ;; can't ping and file has been on disk 5 seconds, go ahead and try to remove it + (debug:print-info 0 *default-log-port* "Removing apparently dead server info file: "servinfofile) + (delete-file* servinfofile) + #t) ;; not the server but the server is not reachable + #t))) + (else ;; should never get here + (debug:print 0 *default-log-port* "BAD SERVER RECORD: "leadsrv) + (assert #f "Bad server record "leadsrv)))))))) + (if ok + ;; (if (> *api-process-request-count* 0) ;; have requests in flight + ;; (tt-last-access-set! ttdat (current-seconds))) + (tt-last-access-set! ttdat *db-last-access*) ;; bit silly, just use db-last-access + (begin + (cleanup) + (exit))) + + (let* ((last-update (dbr:dbstruct-last-update dbstruct)) + (curr-secs (current-seconds))) + (if (> (- curr-secs last-update) 3) ;; every 3-4 seconds update the db? maybe this should be refresh the inmem? + (begin + ((dbr:dbstruct-sync-proc dbstruct) last-update) + (dbr:dbstruct-last-update-set! dbstruct curr-secs)))) + + (if (< (- (current-seconds) (tt-last-access ttdat)) 60) + (begin + (thread-sleep! 5) + (loop))))) + (cleanup) + (debug:print 0 *default-log-port* "INFO: Server timed out, exiting."))) + + +;; ;; given an already set up uconn start the cmd-loop +;; ;; +;; (define (tt:cmd-loop ttdat) +;; (let* ((serv-listener (-socket uconn)) +;; (listener (lambda () +;; (let loop ((state 'start)) +;; (let-values (((inp oup)(tcp-accept serv-listener))) +;; ;; (mutex-lock! *send-mutex*) ;; DOESN'T SEEM TO HELP +;; (let* ((rdat (deserialize inp)) ;; '(my-host-port qrykey cmd params) +;; (resp (ulex-handler uconn rdat))) +;; (serialize resp oup) +;; (close-input-port inp) +;; (close-output-port oup) +;; ;; (mutex-unlock! *send-mutex*) ;; DOESN'T SEEM TO HELP +;; ) +;; (loop state)))))) +;; ;; start N of them +;; (let loop ((thnum 0) +;; (threads '())) +;; (if (< thnum 100) +;; (let* ((th (make-thread listener (conc "listener" thnum)))) +;; (thread-start! th) +;; (loop (+ thnum 1) +;; (cons th threads))) +;; (map thread-join! threads))))) +;; +;; +;; +;; (define (wait-and-close uconn) +;; (thread-join! (udat-cmd-thread uconn)) +;; (tcp-close (udat-socket uconn))) +;; +;; + +(define (tt:shutdown-server ttdat) + (let* ((cleanproc (tt-cleanup-proc ttdat))) + (if cleanproc (cleanproc)) + (tcp-close (tt-socket ttdat)) ;; close up ports here + )) + +;; (define (wait-and-close uconn) +;; (thread-join! (tt-cmd-thread uconn)) +;; (tcp-close (tt-socket uconn))) + +;; return servid +;; side-effects: +;; ttdat-cleanup-proc is populated with function to remove the serverinfo file +(define (tt:create-server-registration-file ttdat dbfname) + (let* ((areapath (tt-areapath ttdat)) + (servdir (tt:get-servinfo-dir areapath)) + (host (tt-host ttdat)) + (port (tt-port ttdat)) + (servinf (conc servdir"/"host":"port"-"(current-process-id)":"dbfname)) + (serv-id (tt:mk-signature areapath)) + (clean-proc (lambda () + (delete-file* servinf)))) + (assert (and host port) "FATAL: tt:create-server-registration-file called with no conn, dbfname="dbfname) + (tt-cleanup-proc-set! ttdat clean-proc) + (tt-servinf-file-set! ttdat servinf) + (with-output-to-file servinf + (lambda () + (print "SERVER STARTED: "host":"port" AT "(current-seconds)" server-id: "serv-id" pid: "(current-process-id)" dbfname: "dbfname))) + serv-id)) + +;; find valid server +;; get servers listed, last part of name must match : +;; if more than one, wait one second and look again +;; future: ping oldest, if alive remove other : files +;; +(define (tt:find-server areapath dbfname) + (let* ((servdir (tt:get-servinfo-dir areapath)) + (sfiles (glob (conc servdir"/*:"dbfname)))) + sfiles)) + +;; given a path to a server info file return: host port startseconds server-id pid dbfname logf +;; example of what it's looking for in the log file: +;; SERVER STARTED: 10.38.175.67:50216 AT 1616502350.0 server-id: 4907e90fc55c7a09694e3f658c639cf4 +;; +(define (tt:server-get-info logf) + (let ((server-rx (regexp "^SERVER STARTED: (\\S+):(\\d+) AT ([\\d\\.]+) server-id: (\\S+) pid: (\\d+) dbfname: (\\S+)")) ;; SERVER STARTED: host:port AT timesecs server id + (dbprep-rx (regexp "^SERVER: dbprep")) + (dbprep-found 0) + (bad-dat (list #f #f #f #f #f #f logf))) + (let ((fdat (handle-exceptions + exn + (begin + ;; WARNING: this is potentially dangerous to blanket ignore the errors + (debug:print-info 0 *default-log-port* "Unable to get server info from "logf", exn=" exn) + '()) ;; no idea what went wrong, call it a bad server, return empty list + (with-input-from-file logf read-lines)))) + (if (null? fdat) ;; bad data, return bad-dat + bad-dat + (let loop ((inl (car fdat)) + (tail (cdr fdat)) + (lnum 0)) + (let ((mlst (string-match server-rx inl)) + (dbprep (string-match dbprep-rx inl))) + (if dbprep (set! dbprep-found 1)) + (if (not mlst) + (if (> lnum 500) ;; give up if more than 500 lines of server log read + bad-dat + (if (null? tail) + bad-dat + (loop (car tail)(cdr tail)(+ lnum 1)))) + (match mlst ;; have a not null list + ((_ host port start server-id pid dbfname) + (list host + (string->number port) + (string->number start) + server-id + (string->number pid) + dbfname + logf)) + (else + (debug:print 0 *default-log-port* "ERROR: did not recognise SERVER line info "mlst) + bad-dat))))))))) + +;; Given an area path, start a server process ### NOTE ### > file 2>&1 +;; if the target-host is set +;; try running on that host +;; incidental: rotate logs in logs/ dir. +;; +(define (tt:server-process-run areapath testsuite mtexe run-id #!key (profile-mode "")) ;; areapath is *toppath* for a given testsuite area + (assert areapath "FATAL: tt:server-process-run called without areapath defined.") + (assert testsuite "FATAL: tt:server-process-run called without testsuite defined.") + (assert mtexe "FATAL: tt:server-process-run called without mtexe defined.") + (let* ((load (get-normalized-cpu-load)) + (nrun (number-of-processes-running "mtest.*server"))) + (cond + ((> load 2.0) + (debug:print 0 *default-log-port* "Normalized load "load" is over the limit of 2.0. Not starting a server.") + (thread-sleep! 1)) + ((> nrun 40) + (debug:print 0 *default-log-port* nrun" servers running on this host, not starting another.") + (thread-sleep! 1)) + (else + (if (not (file-exists? (conc areapath"/logs"))) + (create-directory (conc areapath"/logs") #t)) + (let* ((logfile (conc areapath "/logs/server.log")) ;; -" curr-pid "-" target-host ".log")) + (cmdln (conc + mtexe + " -server - ";; (or target-host "-") + " -m testsuite:" testsuite + ;; " -run-id " (or run-id "main") ;; NO, we do NOT want to have run id as part of this + " -db " (dbmod:run-id->dbfname run-id) + " " profile-mode + ))) ;; (conc " >> " logfile " 2>&1 &"))))) + ;; we want the remote server to start in *toppath* so push there + ;; (push-directory areapath) ;; use cd in the command line instead + (debug:print 0 *default-log-port* "INFO: Trying to start server in tcp mode (" cmdln ") at "(common:human-time)"...") + ;; (debug:print 0 *default-log-port* "INFO: starting server at " (common:human-time)) + (setenv "NBFAKE_QUIET" "yes") ;; BUG: change to with-environment-variable ... + (setenv "NBFAKE_LOG" logfile) + (system (conc "cd "areapath" ; nbfake " cmdln)) + (unsetenv "NBFAKE_QUIET") + (unsetenv "NBFAKE_LOG") + ;;(pop-directory) + ))))) + +;;====================================================================== +;; tcp connection stuff +;;====================================================================== + +;; find a port and start tcp-server. This only starts the tcp portion of +;; the server, look at (tt:start-server ...) above for the entry point +;; for the entire server system +;; +(define (tt:start-tcp-server ttdat) + (setup-listener ttdat) + (let* ((socket (tt-socket ttdat)) + (handler (tt-handler ttdat))) + ((make-tcp-server socket handler) + #f ;; yes, send error messages to std-err + ))) + +;; create a tcp listener and return a populated udat struct with +;; my port, address, hostname, pid etc. +;; return #f if fail to find a port to allocate. +;; +;; if udata-in is #f create the record +;; if there is already a serv-listener return the udata +;; +(define (setup-listener uconn #!optional (port 4242)) + (assert (tt? uconn) "FATAL: setup-listener called with wrong struct "uconn) + (handle-exceptions + exn + (if (< port 65535) + (setup-listener uconn (+ port 1)) + #f) + (connect-listener uconn port))) + +(define (connect-listener uconn port) + ;; (tcp-listener-socket LISTENER)(socket-name so) + ;; sockaddr-address, sockaddr-port, sockaddr->string + (let* ((tlsn (tcp-listen port 1000 #f)) ;; (tcp-listen TCPPORT [BACKLOG [HOST]]) + (addr (tt:get-best-guess-address (get-host-name)))) ;; (get-my-best-address))) ;; (hostinfo-addresses (host-information (current-hostname))) + (tt-port-set! uconn port) + (tt-host-set! uconn addr) + (tt-host-port-set! uconn (conc addr":"port)) + (tt-socket-set! uconn tlsn) + uconn)) + +;;====================================================================== +;; utils +;;====================================================================== + +;; Generate a unique signature for this server +(define (tt:mk-signature areapath) + (message-digest-string (md5-primitive) + (with-output-to-string + (lambda () + (write (list areapath + (current-process-id) + (argv))))))) + + +(define (tt:get-best-guess-address hostname) + (let ((res #f)) + (for-each + (lambda (adr) + (if (not (eq? (u8vector-ref adr 0) 127)) + (set! res adr))) + ;; NOTE: This can fail when there is no mention of the host in /etc/hosts. FIXME + (vector->list (hostinfo-addresses (hostname->hostinfo hostname)))) + (string-intersperse + (map number->string + (u8vector->list + (if res res (hostname->ip hostname)))) "."))) + +(define (tt:get-servinfo-dir areapath) + (let* ((spath (conc areapath"/.servinfo"))) + (if (not (file-exists? spath)) + (create-directory spath #t)) + spath)) + +;;====================================================================== +;; network utilities +;;====================================================================== + +;; NOTE: Look at address-info egg as alternative to some of this + +(define (rate-ip ipaddr) + (regex-case ipaddr + ( "^127\\..*" _ 0 ) + ( "^(10\\.0|192\\.168)\\..*" _ 1 ) + ( else 2 ) )) + +;; Change this to bias for addresses with a reasonable broadcast value? +;; +(define (ip-pref-less? a b) + (> (rate-ip a) (rate-ip b))) + +(define (get-my-best-address) + (let ((all-my-addresses (get-all-ips))) + (cond + ((null? all-my-addresses) + (get-host-name)) ;; no interfaces? + ((eq? (length all-my-addresses) 1) + (car all-my-addresses)) ;; only one to choose from, just go with it + (else + (car (sort all-my-addresses ip-pref-less?)))))) + +(define (get-all-ips-sorted) + (sort (get-all-ips) ip-pref-less?)) + +(define (get-all-ips) + (map address-info-host + (filter (lambda (x) + (equal? (address-info-type x) "tcp")) + (address-infos (get-host-name))))) + +) ADDED transport-mode.scm.template Index: transport-mode.scm.template ================================================================== --- /dev/null +++ transport-mode.scm.template @@ -0,0 +1,3 @@ +;; 'http or 'tcp +(rmt:transport-mode 'tcp) +;; (rmt:transport-mode 'http) Index: utils/nbfake ================================================================== --- utils/nbfake +++ utils/nbfake @@ -39,10 +39,11 @@ nbfake behavior can be changed by setting the following env vars: NBFAKE_HOST SSH to \$NBFAKE_HOST and run command NBFAKE_LOG Logfile for nbfake output NB_WASH_GROUPS comma-separated list of groups to wash into NB_WASH_ENABLED must be set in order to enable wash groups + NBFAKE_QUIET set to suppress informational output __EOF exit fi @@ -87,19 +88,21 @@ #============================================================================== # Run and log #============================================================================== +if [[ -z "$NBFAKE_QUIET" ]];then cat <<__EOF >&2 #====================================================================== # NBFAKE logging command to: $MY_NBFAKE_LOG # $WASHCMD $* #====================================================================== __EOF +fi if [[ -z "$MY_NBFAKE_HOST" ]]; then # Run locally sh -c "cd $CURRWD;export DISPLAY=$DISPLAY; export PATH=\"$PATH\"; nohup $WASHCMD $* >> $MY_NBFAKE_LOG 2>&1 &" else # run remotely ssh -X -n -f $MY_NBFAKE_HOST "sh -c \"cd $CURRWD;export DISPLAY=$DISPLAY; export PATH=\"$PATH\"; nohup $WASHCMD $* >> $MY_NBFAKE_LOG 2>&1 &\"" fi