Index: Makefile ================================================================== --- Makefile +++ Makefile @@ -28,13 +28,19 @@ ezsteps.scm lock-queue.scm rmt.scm api.scm \ subrun.scm portlogger.scm archive.scm env.scm \ diff-report.scm cgisetup/models/pgdb.scm # module source files -MSRCFILES = dbfile.scm debugprint.scm mtargs.scm commonmod.scm dbmod.scm +MSRCFILES = dbfile.scm debugprint.scm mtargs.scm commonmod.scm dbmod.scm \ + dbmemmod.scm tcp-transportmod.scm all : $(PREFIX)/bin/.$(ARCHSTR) mtest dboard mtut tcmt + +transport-mode.scm : transport-mode.scm.template + cp transport-mode.scm.template transport-mode.scm + +megatest.scm : transport-mode.scm # dbmod.import.o is just a hack here mofiles/dbfile.o : mofiles/debugprint.o mofiles/commonmod.o dbmod.import.o db.o : dbmod.import.o mofiles/debugprint.o : mofiles/mtargs.o Index: api.scm ================================================================== --- api.scm +++ api.scm @@ -18,21 +18,26 @@ ;; You should have received a copy of the GNU General Public License ;; along with Megatest. If not, see . ;; ;;====================================================================== -(use srfi-69 posix) - (declare (unit api)) (declare (uses rmt)) (declare (uses db)) (declare (uses dbmod)) (declare (uses dbfile)) (declare (uses tasks)) +(declare (uses tcp-transportmod)) (import dbmod) (import dbfile) +(import tcp-transportmod) + +(use srfi-69 + posix + matchable + s11n) ;; allow these queries through without starting a server ;; (define api:read-only-queries '(get-key-val-pairs @@ -223,10 +228,39 @@ (begin #;(common:telemetry-log (conc "api-out:"(->string cmd)) payload: `((params . ,params) (ok-res . #f))) (vector #t res)))))))) + +;; indat is (cmd run-id params meta) +(define (api:tcp-dispatch-request-make-handler dbstruct) ;; cmd run-id params) + (lambda () + (let* ((indat (deserialize))) + (set! *api-process-request-count* (+ *api-process-request-count* 1)) + (match indat + ((cmd run-id params meta) + (let* ((status (cond + ((> *api-process-request-count* 50) 'busy) + ((> *api-process-request-count* 25) 'loaded) + (else 'ok))) + (errmsg (case status + ((busy) (conc "Server overloaded, "*api-process-request-count*" threads in flight")) + ((loaded) (conc "Server loaded, "*api-process-request-count*" threads in flight")) + (else #f))) + (result (case status + ((busy) #f) + (else + (case cmd + ((ping) (tt:mk-signature *toppath*)) + (else + (api:dispatch-request dbstruct cmd run-id params)))))) + (payload (list status errmsg result '()))) + (set! *api-process-request-count* (- *api-process-request-count* 1)) + (serialize payload))) + (else + (assert #f "FATAL: failed to deserialize indat "indat)))))) + (define (api:dispatch-request dbstruct cmd run-id params) (case cmd ;;=============================================== ;; READ/WRITE QUERIES Index: client.scm ================================================================== --- client.scm +++ client.scm @@ -104,11 +104,11 @@ (debug:print-info 4 *default-log-port* "client:setup server-dat=" server-dat ", remaining-tries=" remaining-tries) (if (not runremote) (begin ;; Here we are creating a runremote where there was none or it was clobbered with #f ;; - (set! runremote (make-remote)) + (set! runremote (make-and-init-remote)) (let* ((server-info (server:check-if-running areapath))) (remote-server-info-set! runremote server-info) (if server-info (begin (remote-server-url-set! runremote (server:record->url server-info)) Index: common.scm ================================================================== --- common.scm +++ common.scm @@ -249,33 +249,10 @@ (define (common:get-sync-lock-filepath) (let* ((tmp-area (common:get-db-tmp-area)) (lockfile (conc tmp-area "/megatest.db.sync-lock"))) lockfile)) -;;====================================================================== -;; when called from a wrapper I need sometimes to find the calling -;; wrapper, this is for dashboard to find the correct megatest. -;; -(define (common:find-local-megatest #!optional (progname "megatest")) - (let ((res (filter file-exists? - (map (lambda (updir) - (let* ((lm (car (argv))) - (dir (pathname-directory lm)) - (exe (pathname-strip-directory lm))) - (conc (if dir (conc dir "/") "") - (case (string->symbol exe) - ((dboard) (conc updir progname)) - ((mtest) (conc updir progname)) - ((dashboard) progname) - (else exe))))) - '("../../" "../"))))) - (if (null? res) - (begin - (debug:print 0 *default-log-port* "Failed to find this executable! Using what can be found on the path") - progname) - (car res)))) - (define *common:logpro-exit-code->status-sym-alist* '( ( 0 . pass ) ( 1 . fail ) ( 2 . warn ) ( 3 . check ) @@ -998,25 +975,19 @@ #f))) (define (common:get-area-path-signature) (message-digest-string (md5-primitive) *toppath*)) -(define (common:get-signature str) - (message-digest-string (md5-primitive) str)) - ;;====================================================================== ;; E X I T H A N D L I N G ;;====================================================================== (define (common:run-sync?) (and *toppath* ;; gate if called before *toppath* is set (common:on-homehost?) (args:get-arg "-server"))) -(define (common:human-time) - (time->string (seconds->local-time (current-seconds)) "%Y-%m-%d %H:%M:%S")) - (define (std-signal-handler signum) ;; (signal-mask! signum) (set! *time-to-exit* #t) ;;(debug:print-info 13 *default-log-port* "got signal "signum) @@ -2043,16 +2014,20 @@ (begin (debug:print 0 *default-log-port* "ERROR: common:wait-for-homehost-load called before *toppath* set.") (thread-sleep! 30) (if (< (- (current-seconds) start-time) 300) (loop start-time))))) - (let* ((hh-dat (if (common:on-homehost?) ;; if we are on the homehost then pass in #f so the calls are local. - #f - (server:choose-server *toppath* 'homehost))) - (hh (if hh-dat (car hh-dat) #f))) - (common:wait-for-normalized-load maxnormload msg hh))) - + (case (rmt:transport-mode) + ((http) + (let* ((hh-dat (if (common:on-homehost?) ;; if we are on the homehost then pass in #f so the calls are local. + #f + (server:choose-server *toppath* 'homehost))) + (hh (if hh-dat (car hh-dat) #f))) + (common:wait-for-normalized-load maxnormload msg hh))) + (else + (common:wait-for-normalized-load maxnormload msg (get-host-name))))) + (define (common:get-num-cpus remote-host) (let* ((actual-host (or remote-host (get-host-name)))) ;; hosts had better not be changing the number of cpus too often! (or (hash-table-ref/default *numcpus-cache* actual-host #f) (let* ((numcpus (or (common:get-cached-info actual-host "num-cpus" age: (+ 2592000 (random 3600))) @@ -2653,291 +2628,10 @@ (cond (with-vars (common:without-vars fullcmd)) (with-orig-env (common:with-orig-env fullcmd)) (else (common:without-vars fullcmd "MT_.*"))))) -;;====================================================================== -;; T I M E A N D D A T E -;;====================================================================== - -;;====================================================================== -;; Convert strings like "5s 2h 3m" => 60x60x2 + 3x60 + 5 -(define (common:hms-string->seconds tstr) - (let ((parts (string-split-fields "\\w+" tstr)) - (time-secs 0) - ;; s=seconds, m=minutes, h=hours, d=days, M=months, y=years, w=weeks - (trx (regexp "(\\d+)([smhdMyw])"))) - (for-each (lambda (part) - (let ((match (string-match trx part))) - (if match - (let ((val (string->number (cadr match))) - (unt (caddr match))) - (if val - (set! time-secs (+ time-secs (* val - (case (string->symbol unt) - ((s) 1) - ((m) 60) ;; minutes - ((h) 3600) - ((d) 86400) - ((w) 604800) - ((M) 2628000) ;; aproximately one month - ((y) 31536000) - (else #f)))))))))) - parts) - time-secs)) - -(define (seconds->hr-min-sec secs) - (let* ((hrs (quotient secs 3600)) - (min (quotient (- secs (* hrs 3600)) 60)) - (sec (- secs (* hrs 3600)(* min 60)))) - (conc (if (> hrs 0)(conc hrs "hr ") "") - (if (> min 0)(conc min "m ") "") - sec "s"))) - -(define (seconds->time-string sec) - (time->string - (seconds->local-time sec) "%H:%M:%S")) - -(define (seconds->work-week/day-time sec) - (time->string - (seconds->local-time sec) "ww%V.%u %H:%M")) - -(define (seconds->work-week/day sec) - (time->string - (seconds->local-time sec) "ww%V.%u")) - -(define (seconds->year-work-week/day sec) - (time->string - (seconds->local-time sec) "%yww%V.%w")) - -(define (seconds->year-work-week/day-time sec) - (time->string - (seconds->local-time sec) "%Yww%V.%w %H:%M")) - -(define (seconds->year-week/day-time sec) - (time->string - (seconds->local-time sec) "%Yw%V.%w %H:%M")) - -(define (seconds->quarter sec) - (case (string->number - (time->string - (seconds->local-time sec) - "%m")) - ((1 2 3) 1) - ((4 5 6) 2) - ((7 8 9) 3) - ((10 11 12) 4) - (else #f))) - -;;====================================================================== -;; basic ISO8601 format (e.g. "2017-02-28 06:02:54") date time => Unix epoch -;; -(define (common:date-time->seconds datetime) - (local-time->seconds (string->time datetime "%Y-%m-%d %H:%M:%S"))) - -;;====================================================================== -;; given span of seconds tstart to tend -;; find start time to mark and mark delta -;; -(define (common:find-start-mark-and-mark-delta tstart tend) - (let* ((deltat (- (max tend (+ tend 10)) tstart)) ;; can't handle runs of less than 4 seconds. Pad it to 10 seconds ... - (result #f) - (min 60) - (hr (* 60 60)) - (day (* 24 hr)) - (yr (* 365 day)) ;; year - (mo (/ yr 12)) - (wk (* day 7))) - (for-each - (lambda (max-blks) - (for-each - (lambda (span) ;; 5 2 1 - (if (not result) - (for-each - (lambda (timeunit timesym) ;; year month day hr min sec - (if (not result) - (let* ((time-blk (* span timeunit)) - (num-blks (quotient deltat time-blk))) - (if (and (> num-blks 4)(< num-blks max-blks)) - (let ((first (* (quotient tstart time-blk) time-blk))) - (set! result (list span timeunit time-blk first timesym)) - ))))) - (list yr mo wk day hr min 1) - '( y mo w d h m s)))) - (list 8 6 5 2 1))) - '(5 10 15 20 30 40 50 500)) - (if values - (apply values result) - (values 0 day 1 0 'd)))) - -;;====================================================================== -;; given x y lim return the cron expansion -;; -(define (common:expand-cron-slash x y lim) - (let loop ((curr x) - (res `())) - (if (< curr lim) - (loop (+ curr y) (cons curr res)) - (reverse res)))) - -;;====================================================================== -;; expand a complex cron string to a list of cron strings -;; -;; x/y => x, x+y, x+2y, x+3y while x+Ny a, b ,c -;; -;; NOTE: with flatten a lot of the crud below can be factored down. -;; -(define (common:cron-expand cron-str) - (if (list? cron-str) - (flatten - (fold (lambda (x res) - (if (list? x) - (let ((newres (map common:cron-expand x))) - (append x newres)) - (cons x res))) - '() - cron-str)) ;; (map common:cron-expand cron-str)) - (let ((cron-items (string-split cron-str)) - (slash-rx (regexp "(\\d+)/(\\d+)")) - (comma-rx (regexp ".*,.*")) - (max-vals '((min . 60) - (hour . 24) - (dayofmonth . 28) ;;; BUG!!!! This will be a bug for some combinations - (month . 12) - (dayofweek . 7)))) - (if (< (length cron-items) 5) ;; bad spec - cron-str ;; `(,cron-str) ;; just return the string, something downstream will fix it - (let loop ((hed (car cron-items)) - (tal (cdr cron-items)) - (type 'min) - (type-tal '(hour dayofmonth month dayofweek)) - (res '())) - (regex-case - hed - (slash-rx ( _ base incr ) (let* ((basen (string->number base)) - (incrn (string->number incr)) - (expanded-vals (common:expand-cron-slash basen incrn (alist-ref type max-vals))) - (new-list-crons (fold (lambda (x myres) - (cons (conc (if (null? res) - "" - (conc (string-intersperse res " ") " ")) - x " " (string-intersperse tal " ")) - myres)) - '() expanded-vals))) - ;; (print "new-list-crons: " new-list-crons) - ;; (fold (lambda (x res) - ;; (if (list? x) - ;; (let ((newres (map common:cron-expand x))) - ;; (append x newres)) - ;; (cons x res))) - ;; '() - (flatten (map common:cron-expand new-list-crons)))) - ;; (map common:cron-expand (map common:cron-expand new-list-crons)))) - (else (if (null? tal) - cron-str - (loop (car tal)(cdr tal)(car type-tal)(cdr type-tal)(append res (list hed))))))))))) - -;;====================================================================== -;; given a cron string and the last time event was processed return #t to run or #f to not run -;; -;; min hour dayofmonth month dayofweek -;; 0-59 0-23 1-31 1-12 0-6 ### NOTE: dayofweek does not include 7 -;; -;; #t => yes, run the job -;; #f => no, do not run the job -;; -(define (common:cron-event cron-str now-seconds-in last-done) ;; ref-seconds = #f is NOW. - (let* ((cron-items (map string->number (string-split cron-str))) - (now-seconds (or now-seconds-in (current-seconds))) - (now-time (seconds->local-time now-seconds)) - (last-done-time (seconds->local-time last-done)) - (all-times (make-hash-table))) - ;; (print "cron-items: " cron-items "(length cron-items): " (length cron-items)) - (if (not (eq? (length cron-items) 5)) ;; don't even try to figure out junk strings - #f - (match-let ((( cmin chour cdayofmonth cmonth cdayofweek) - cron-items) - ;; 0 1 2 3 4 5 6 - ((nsec nmin nhour ndayofmonth nmonth nyr ndayofweek n7 n8 n9) - (vector->list now-time)) - ((lsec lmin lhour ldayofmonth lmonth lyr ldayofweek l7 l8 l9) - (vector->list last-done-time))) - ;; create all possible time slots - ;; remove invalid slots due to (for example) day of week - ;; get the start and end entries for the ref-seconds (current) time - ;; if last-done > ref-seconds => this is an ERROR! - ;; does the last-done time fall in the legit region? - ;; yes => #f do not run again this command - ;; no => #t ok to run the command - (for-each ;; month - (lambda (month) - (for-each ;; dayofmonth - (lambda (dom) - (for-each - (lambda (hr) ;; hour - (for-each - (lambda (minute) ;; minute - (let ((copy-now (apply vector (vector->list now-time)))) - (vector-set! copy-now 0 0) ;; force seconds to zero - (vector-set! copy-now 1 minute) - (vector-set! copy-now 2 hr) - (vector-set! copy-now 3 dom) ;; dom is already corrected for zero referenced - (vector-set! copy-now 4 month) - (let* ((copy-now-secs (local-time->seconds copy-now)) - (new-copy (seconds->local-time copy-now-secs))) ;; remake the time vector - (if (or (not cdayofweek) - (equal? (vector-ref new-copy 6) - cdayofweek)) ;; if the day is specified and a match OR if the day is NOT specified - (if (or (not cdayofmonth) - (equal? (vector-ref new-copy 3) - (+ 1 cdayofmonth))) ;; if the month is specified and a match OR if the month is NOT specified - (hash-table-set! all-times copy-now-secs new-copy)))))) - (if cmin - `(,cmin) ;; if given cmin, have to use it - (list (- nmin 1) nmin (+ nmin 1))))) ;; minute - (if chour - `(,chour) - (list (- nhour 1) nhour (+ nhour 1))))) ;; hour - (if cdayofmonth - `(,cdayofmonth) - (list (- ndayofmonth 1) ndayofmonth (+ ndayofmonth 1))))) - (if cmonth - `(,cmonth) - (list (- nmonth 1) nmonth (+ nmonth 1)))) - (let ((before #f) - (is-in #f)) - (for-each - (lambda (moment) - (if (and before - (<= before now-seconds) - (>= moment now-seconds)) - (begin - ;; (print) - ;; (print "Before: " (time->string (seconds->local-time before))) - ;; (print "Now: " (time->string (seconds->local-time now-seconds))) - ;; (print "After: " (time->string (seconds->local-time moment))) - ;; (print "Last: " (time->string (seconds->local-time last-done))) - (if (< last-done before) - (set! is-in before)) - )) - (set! before moment)) - (sort (hash-table-keys all-times) <)) - is-in))))) - -(define (common:extended-cron cron-str now-seconds-in last-done) - (let ((expanded-cron (common:cron-expand cron-str))) - (if (string? expanded-cron) - (common:cron-event expanded-cron now-seconds-in last-done) - (let loop ((hed (car expanded-cron)) - (tal (cdr expanded-cron))) - (if (common:cron-event hed now-seconds-in last-done) - #t - (if (null? tal) - #f - (loop (car tal)(cdr tal)))))))) - ;;====================================================================== ;; C O L O R S ;;====================================================================== (define (common:name->iup-color name) Index: commonmod.scm ================================================================== --- commonmod.scm +++ commonmod.scm @@ -17,21 +17,37 @@ ;; along with Megatest. If not, see . ;;====================================================================== (declare (unit commonmod)) +;; (declare (uses debugprint)) (use srfi-69) (module commonmod * -(import scheme chicken data-structures extras files) -(import (prefix sqlite3 sqlite3:) - posix typed-records srfi-18 srfi-69 - md5 message-digest - regex srfi-1) +(import scheme + chicken + + (prefix sqlite3 sqlite3:) + data-structures + extras + files + matchable + md5 + message-digest + posix + regex + regex-case + srfi-1 + srfi-18 + srfi-69 + typed-records + + ;; debugprint + ) ;;====================================================================== ;; CONTENTS ;; ;; config file utils @@ -208,18 +224,344 @@ (let ((adat (get-section cfgdat "areas"))) (map (lambda (entry) `(,(car entry) . ,(val->alist (cadr entry)))) adat))) + +;;====================================================================== +;; time utils +;;====================================================================== + +(define (common:human-time) + (time->string (seconds->local-time (current-seconds)) "%Y-%m-%d %H:%M:%S")) + +;;====================================================================== +;; T I M E A N D D A T E +;;====================================================================== + +;;====================================================================== +;; Convert strings like "5s 2h 3m" => 60x60x2 + 3x60 + 5 +(define (common:hms-string->seconds tstr) + (let ((parts (string-split-fields "\\w+" tstr)) + (time-secs 0) + ;; s=seconds, m=minutes, h=hours, d=days, M=months, y=years, w=weeks + (trx (regexp "(\\d+)([smhdMyw])"))) + (for-each (lambda (part) + (let ((match (string-match trx part))) + (if match + (let ((val (string->number (cadr match))) + (unt (caddr match))) + (if val + (set! time-secs (+ time-secs (* val + (case (string->symbol unt) + ((s) 1) + ((m) 60) ;; minutes + ((h) 3600) + ((d) 86400) + ((w) 604800) + ((M) 2628000) ;; aproximately one month + ((y) 31536000) + (else #f)))))))))) + parts) + time-secs)) + +(define (seconds->hr-min-sec secs) + (let* ((hrs (quotient secs 3600)) + (min (quotient (- secs (* hrs 3600)) 60)) + (sec (- secs (* hrs 3600)(* min 60)))) + (conc (if (> hrs 0)(conc hrs "hr ") "") + (if (> min 0)(conc min "m ") "") + sec "s"))) + +(define (seconds->time-string sec) + (time->string + (seconds->local-time sec) "%H:%M:%S")) + +(define (seconds->work-week/day-time sec) + (time->string + (seconds->local-time sec) "ww%V.%u %H:%M")) + +(define (seconds->work-week/day sec) + (time->string + (seconds->local-time sec) "ww%V.%u")) + +(define (seconds->year-work-week/day sec) + (time->string + (seconds->local-time sec) "%yww%V.%w")) + +(define (seconds->year-work-week/day-time sec) + (time->string + (seconds->local-time sec) "%Yww%V.%w %H:%M")) + +(define (seconds->year-week/day-time sec) + (time->string + (seconds->local-time sec) "%Yw%V.%w %H:%M")) + +(define (seconds->quarter sec) + (case (string->number + (time->string + (seconds->local-time sec) + "%m")) + ((1 2 3) 1) + ((4 5 6) 2) + ((7 8 9) 3) + ((10 11 12) 4) + (else #f))) + +;;====================================================================== +;; basic ISO8601 format (e.g. "2017-02-28 06:02:54") date time => Unix epoch +;; +(define (common:date-time->seconds datetime) + (local-time->seconds (string->time datetime "%Y-%m-%d %H:%M:%S"))) + +;;====================================================================== +;; given span of seconds tstart to tend +;; find start time to mark and mark delta +;; +(define (common:find-start-mark-and-mark-delta tstart tend) + (let* ((deltat (- (max tend (+ tend 10)) tstart)) ;; can't handle runs of less than 4 seconds. Pad it to 10 seconds ... + (result #f) + (min 60) + (hr (* 60 60)) + (day (* 24 hr)) + (yr (* 365 day)) ;; year + (mo (/ yr 12)) + (wk (* day 7))) + (for-each + (lambda (max-blks) + (for-each + (lambda (span) ;; 5 2 1 + (if (not result) + (for-each + (lambda (timeunit timesym) ;; year month day hr min sec + (if (not result) + (let* ((time-blk (* span timeunit)) + (num-blks (quotient deltat time-blk))) + (if (and (> num-blks 4)(< num-blks max-blks)) + (let ((first (* (quotient tstart time-blk) time-blk))) + (set! result (list span timeunit time-blk first timesym)) + ))))) + (list yr mo wk day hr min 1) + '( y mo w d h m s)))) + (list 8 6 5 2 1))) + '(5 10 15 20 30 40 50 500)) + (if values + (apply values result) + (values 0 day 1 0 'd)))) + +;;====================================================================== +;; given x y lim return the cron expansion +;; +(define (common:expand-cron-slash x y lim) + (let loop ((curr x) + (res `())) + (if (< curr lim) + (loop (+ curr y) (cons curr res)) + (reverse res)))) + +;;====================================================================== +;; expand a complex cron string to a list of cron strings +;; +;; x/y => x, x+y, x+2y, x+3y while x+Ny a, b ,c +;; +;; NOTE: with flatten a lot of the crud below can be factored down. +;; +(define (common:cron-expand cron-str) + (if (list? cron-str) + (flatten + (fold (lambda (x res) + (if (list? x) + (let ((newres (map common:cron-expand x))) + (append x newres)) + (cons x res))) + '() + cron-str)) ;; (map common:cron-expand cron-str)) + (let ((cron-items (string-split cron-str)) + (slash-rx (regexp "(\\d+)/(\\d+)")) + (comma-rx (regexp ".*,.*")) + (max-vals '((min . 60) + (hour . 24) + (dayofmonth . 28) ;;; BUG!!!! This will be a bug for some combinations + (month . 12) + (dayofweek . 7)))) + (if (< (length cron-items) 5) ;; bad spec + cron-str ;; `(,cron-str) ;; just return the string, something downstream will fix it + (let loop ((hed (car cron-items)) + (tal (cdr cron-items)) + (type 'min) + (type-tal '(hour dayofmonth month dayofweek)) + (res '())) + (regex-case + hed + (slash-rx ( _ base incr ) (let* ((basen (string->number base)) + (incrn (string->number incr)) + (expanded-vals (common:expand-cron-slash basen incrn (alist-ref type max-vals))) + (new-list-crons (fold (lambda (x myres) + (cons (conc (if (null? res) + "" + (conc (string-intersperse res " ") " ")) + x " " (string-intersperse tal " ")) + myres)) + '() expanded-vals))) + ;; (print "new-list-crons: " new-list-crons) + ;; (fold (lambda (x res) + ;; (if (list? x) + ;; (let ((newres (map common:cron-expand x))) + ;; (append x newres)) + ;; (cons x res))) + ;; '() + (flatten (map common:cron-expand new-list-crons)))) + ;; (map common:cron-expand (map common:cron-expand new-list-crons)))) + (else (if (null? tal) + cron-str + (loop (car tal)(cdr tal)(car type-tal)(cdr type-tal)(append res (list hed))))))))))) + +;;====================================================================== +;; given a cron string and the last time event was processed return #t to run or #f to not run +;; +;; min hour dayofmonth month dayofweek +;; 0-59 0-23 1-31 1-12 0-6 ### NOTE: dayofweek does not include 7 +;; +;; #t => yes, run the job +;; #f => no, do not run the job +;; +(define (common:cron-event cron-str now-seconds-in last-done) ;; ref-seconds = #f is NOW. + (let* ((cron-items (map string->number (string-split cron-str))) + (now-seconds (or now-seconds-in (current-seconds))) + (now-time (seconds->local-time now-seconds)) + (last-done-time (seconds->local-time last-done)) + (all-times (make-hash-table))) + ;; (print "cron-items: " cron-items "(length cron-items): " (length cron-items)) + (if (not (eq? (length cron-items) 5)) ;; don't even try to figure out junk strings + #f + (match-let ((( cmin chour cdayofmonth cmonth cdayofweek) + cron-items) + ;; 0 1 2 3 4 5 6 + ((nsec nmin nhour ndayofmonth nmonth nyr ndayofweek n7 n8 n9) + (vector->list now-time)) + ((lsec lmin lhour ldayofmonth lmonth lyr ldayofweek l7 l8 l9) + (vector->list last-done-time))) + ;; create all possible time slots + ;; remove invalid slots due to (for example) day of week + ;; get the start and end entries for the ref-seconds (current) time + ;; if last-done > ref-seconds => this is an ERROR! + ;; does the last-done time fall in the legit region? + ;; yes => #f do not run again this command + ;; no => #t ok to run the command + (for-each ;; month + (lambda (month) + (for-each ;; dayofmonth + (lambda (dom) + (for-each + (lambda (hr) ;; hour + (for-each + (lambda (minute) ;; minute + (let ((copy-now (apply vector (vector->list now-time)))) + (vector-set! copy-now 0 0) ;; force seconds to zero + (vector-set! copy-now 1 minute) + (vector-set! copy-now 2 hr) + (vector-set! copy-now 3 dom) ;; dom is already corrected for zero referenced + (vector-set! copy-now 4 month) + (let* ((copy-now-secs (local-time->seconds copy-now)) + (new-copy (seconds->local-time copy-now-secs))) ;; remake the time vector + (if (or (not cdayofweek) + (equal? (vector-ref new-copy 6) + cdayofweek)) ;; if the day is specified and a match OR if the day is NOT specified + (if (or (not cdayofmonth) + (equal? (vector-ref new-copy 3) + (+ 1 cdayofmonth))) ;; if the month is specified and a match OR if the month is NOT specified + (hash-table-set! all-times copy-now-secs new-copy)))))) + (if cmin + `(,cmin) ;; if given cmin, have to use it + (list (- nmin 1) nmin (+ nmin 1))))) ;; minute + (if chour + `(,chour) + (list (- nhour 1) nhour (+ nhour 1))))) ;; hour + (if cdayofmonth + `(,cdayofmonth) + (list (- ndayofmonth 1) ndayofmonth (+ ndayofmonth 1))))) + (if cmonth + `(,cmonth) + (list (- nmonth 1) nmonth (+ nmonth 1)))) + (let ((before #f) + (is-in #f)) + (for-each + (lambda (moment) + (if (and before + (<= before now-seconds) + (>= moment now-seconds)) + (begin + ;; (print) + ;; (print "Before: " (time->string (seconds->local-time before))) + ;; (print "Now: " (time->string (seconds->local-time now-seconds))) + ;; (print "After: " (time->string (seconds->local-time moment))) + ;; (print "Last: " (time->string (seconds->local-time last-done))) + (if (< last-done before) + (set! is-in before)) + )) + (set! before moment)) + (sort (hash-table-keys all-times) <)) + is-in))))) + +(define (common:extended-cron cron-str now-seconds-in last-done) + (let ((expanded-cron (common:cron-expand cron-str))) + (if (string? expanded-cron) + (common:cron-event expanded-cron now-seconds-in last-done) + (let loop ((hed (car expanded-cron)) + (tal (cdr expanded-cron))) + (if (common:cron-event hed now-seconds-in last-done) + #t + (if (null? tal) + #f + (loop (car tal)(cdr tal)))))))) + + ;;====================================================================== ;; misc stuff ;;====================================================================== -;; (define (debug:print . params) #f) -;; (define (debug:print-info . params) #f) -;; -;; (define (set-functions dbgp dbgpinfo) -;; (set! debug:print dbgp) -;; (set! debug:print-info dbgpinfo)) +(define (common:get-signature str) + (message-digest-string (md5-primitive) str)) + +;;====================================================================== +;; hash of hashs +;;====================================================================== + +(define (db:hoh-set! dat key1 key2 val) + (let* ((subhash (hash-table-ref/default dat key1 #f))) + (if subhash + (hash-table-set! subhash key2 val) + (begin + (hash-table-set! dat key1 (make-hash-table)) + (db:hoh-set! dat key1 key2 val))))) + +(define (db:hoh-get dat key1 key2) + (let* ((subhash (hash-table-ref/default dat key1 #f))) + (and subhash + (hash-table-ref/default subhash key2 #f)))) + +;;====================================================================== +;; when called from a wrapper I need sometimes to find the calling +;; wrapper, this is for dashboard to find the correct megatest. +;; +(define (common:find-local-megatest #!optional (progname "megatest")) + (let ((res (filter file-exists? + (map (lambda (updir) + (let* ((lm (car (argv))) + (dir (pathname-directory lm)) + (exe (pathname-strip-directory lm))) + (conc (if dir (conc dir "/") "") + (case (string->symbol exe) + ((dboard) (conc updir progname)) + ((mtest) (conc updir progname)) + ((dashboard) progname) + (else exe))))) + '("../../" "../"))))) + (if (null? res) + (begin + ;; (debug:print 0 *default-log-port* "Failed to find this executable! Using what can be found on the path") + progname) + (car res)))) ) Index: dashboard.scm ================================================================== --- dashboard.scm +++ dashboard.scm @@ -54,10 +54,14 @@ (include "task_records.scm") (include "megatest-version.scm") (include "megatest-fossil-hash.scm") (include "vg_records.scm") +;; set some parameters here - these need to be put in something that can be loaded from other +;; executables such as dashboard and mtutil +;; +(include "transport-mode.scm") (dbfile:db-init-proc db:initialize-main-db) (define help (conc "Megatest Dashboard, documentation at http://www.kiatoa.com/fossils/megatest version " megatest-version " license GPL, Copyright (C) Matt Welland 2012-2017 Index: db.scm ================================================================== --- db.scm +++ db.scm @@ -22,10 +22,20 @@ ;; Database access ;;====================================================================== ;; dbstruct vector containing all the relevant dbs like main.db, megatest.db, run.db etc +(declare (unit db)) +(declare (uses common)) +(declare (uses debugprint)) +(declare (uses dbmod)) +(declare (uses dbfile)) +(declare (uses keys)) +(declare (uses ods)) +(declare (uses client)) +(declare (uses mt)) + (use (srfi 18) extras tcp stack (prefix sqlite3 sqlite3:) @@ -44,28 +54,19 @@ z3 typed-records matchable files) -(declare (unit db)) -(declare (uses common)) -(declare (uses dbmod)) -;; (declare (uses debugprint)) -(declare (uses dbfile)) -(declare (uses keys)) -(declare (uses ods)) -(declare (uses client)) -(declare (uses mt)) - (include "common_records.scm") (include "db_records.scm") (include "key_records.scm") (include "run_records.scm") (define *number-of-writes* 0) (define *number-non-write-queries* 0) +(import debugprint) (import dbmod) (import dbfile) ;; record for keeping state,status and count for doing roll-ups in ;; iterated tests @@ -124,10 +125,88 @@ (assert *toppath* "FATAL: db:setup called before launch:setup has been run.") (let* ((tmpdir (common:get-db-tmp-area))) (if (not *dbstruct-dbs*) (dbfile:setup do-sync *toppath* tmpdir) *dbstruct-dbs*))) + +(define (db:with-db dbstruct run-id r/w proc . params) + (case (rmt:transport-mode) + ((http)(dbfile:with-db dbstruct run-id r/w proc params)) + ((tcp) (dbmod:with-db dbstruct run-id r/w proc params)))) + +;; moved from dbfile +;; +;; ADD run-id SUPPORT +;; +(define (db:create-all-triggers dbstruct) + (db:with-db + dbstruct #f #f + (lambda (dbdat db) + (db:create-triggers db)))) + +(define (db:create-triggers db) + (for-each (lambda (key) + (sqlite3:execute db (cadr key))) + db:trigger-list)) + +(define (db:drop-all-triggers dbstruct) + (db:with-db + dbstruct #f #f + (lambda (dbdat db) + (db:drop-triggers db)))) + +(define (db:have-incompletes? dbstruct run-id ovr-deadtime) + (let* ((incompleted '()) + (oldlaunched '()) + (toplevels '()) + ;; (deadtime-str (configf:lookup *configdat* "setup" "deadtime")) + (deadtime (or ovr-deadtime 72000))) ;; twenty hours + (db:with-db + dbstruct run-id #f + (lambda (dbdat db) + + ;; in RUNNING or REMOTEHOSTSTART for more than 10 minutes + ;; + ;; HOWEVER: this code in run:test seems to work fine + ;; (> (- (current-seconds)(+ (db:test-get-event_time testdat) + ;; (db:test-get-run_duration testdat))) + ;; 600) + ;; (db:delay-if-busy dbdat) + (sqlite3:for-each-row + (lambda (test-id run-dir uname testname item-path) + (if (and (equal? uname "n/a") + (equal? item-path "")) ;; this is a toplevel test + ;; what to do with toplevel? call rollup? + (begin + (set! toplevels (cons (list test-id run-dir uname testname item-path run-id) toplevels))) + ;; (print-info "Found old toplevel test in RUNNING state, test-id=" test-id)) + (set! incompleted (cons (list test-id run-dir uname testname item-path run-id) incompleted)))) + (db:get-cache-stmth dbdat db + "SELECT id,rundir,uname,testname,item_path FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > (run_duration + ?) AND state IN ('RUNNING','REMOTEHOSTSTART');") + run-id deadtime) + + ;; in LAUNCHED for more than one day. Could be long due to job queues TODO/BUG: Need override for this in config + ;; + ;; (db:delay-if-busy dbdat) + (sqlite3:for-each-row + (lambda (test-id run-dir uname testname item-path) + (if (and (equal? uname "n/a") + (equal? item-path "")) ;; this is a toplevel test + ;; what to do with toplevel? call rollup? + (set! toplevels (cons (list test-id run-dir uname testname item-path run-id) toplevels)) + (set! oldlaunched (cons (list test-id run-dir uname testname item-path run-id) oldlaunched)))) + (db:get-cache-stmth dbdat db + "SELECT id,rundir,uname,testname,item_path FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > 86400 AND state IN ('LAUNCHED');") + run-id) + + ;; (print-info "Found " (length oldlaunched) " old LAUNCHED items, " (length toplevels) " old LAUNCHED toplevel tests and " (length incompleted) " tests marked RUNNING but apparently dead.") + (if (and (null? incompleted) + (null? oldlaunched) + (null? toplevels)) + #f + #t))))) + ;; looks up subdb and returns it, if not found then set up ;; and then return it. ;; #;(define (db:get-db dbstruct run-id) @@ -583,12 +662,11 @@ (define (db:tmp->megatest.db-sync dbstruct run-id last-update) (let* ((subdbs (hash-table-values (dbr:dbstruct-subdbs dbstruct))) (res '())) (for-each (lambda (subdb) - (let* ((dbname (db:run-id->dbname run-id)) - (mtdb (dbr:subdb-mtdb subdb)) + (let* ((mtdb (dbr:subdb-mtdb subdb)) (tmpdb (db:get-subdb dbstruct run-id)) (refndb (dbr:subdb-refndb subdb)) (newres (db:sync-tables (db:sync-all-tables-list dbstruct (db:get-keys dbstruct)) last-update tmpdb refndb mtdb))) ;; (stack-push! (dbr:subdb-dbstack subdb) tmpdb) ;; BUG: verify this is really needed Index: dbfile.scm ================================================================== --- dbfile.scm +++ dbfile.scm @@ -41,11 +41,12 @@ commonmod ;; debugprint ) (define keep-age-param (make-parameter 10)) ;; qif file age, if over move to attic -(define num-run-dbs (make-parameter 10)) ;; number of db's in .megatest +(define num-run-dbs (make-parameter 10)) ;; number of db's in .megatest +(define dbfile:testsuite-name (make-parameter #f)) ;;====================================================================== ;; R E C O R D S ;;====================================================================== @@ -56,10 +57,19 @@ (areapath #f) (homehost #f) (tmppath #f) (read-only #f) (subdbs (make-hash-table)) + ;; + ;; for the inmem approach (see dbmod.scm) + ;; this is one db per server + (inmem #f) ;; handle for the in memory copy + (dbfile #f) ;; path to the db file on disk + (ondiskdb #f) ;; handle for the on-disk file + (dbdat #f) ;; create a dbdat for the downstream calls such as db:with-db + (last-update 0) + (syncback-proc #f) ) ;; NOTE: Need one dbr:subdb per main.db, 1.db ... ;; (defstruct dbr:subdb @@ -570,11 +580,11 @@ (tmpdb (db:open-db dbstruct run-id dbinit)) ;; sqlite3-db tmpdbfile #f)) (start-t (current-seconds))) (mutex-lock! *db-multi-sync-mutex*) (let ((update_info (cons "last_update" (if force-sync 0 *db-last-sync*) ))) (mutex-unlock! *db-multi-sync-mutex*) - (db:sync-tables (db:sync-all-tables-list dbstruct keys) update_info tmpdb mtdb)) + (db:sync-tables (db:sync-all-tables-list keys) update_info tmpdb mtdb)) (mutex-lock! *db-multi-sync-mutex*) (set! *db-last-sync* start-t) (set! *db-last-access* start-t) (mutex-unlock! *db-multi-sync-mutex*) (dbfile:add-dbdat dbstruct run-id tmpdb) @@ -633,12 +643,12 @@ '("type" #f) '("last_update" #f)))) ;; needs db to get keys, this is for syncing all tables ;; -(define (db:sync-main-list dbstruct keys) - (let ((keys keys)) ;; (db:get-keys dbstruct))) +(define (db:sync-main-list keys) + (let ((keys keys)) (list (list "keys" '("id" #f) '("fieldname" #f) '("fieldtype" #f)) @@ -689,12 +699,12 @@ '("params" #f) '("creation_time" #f) '("execution_time" #f)) ))) -(define (db:sync-all-tables-list dbstruct keys) - (append (db:sync-main-list dbstruct keys) +(define (db:sync-all-tables-list keys) + (append (db:sync-main-list keys) db:sync-tests-only)) ;; tbls is ( ("tablename" ( "field1" [#f|proc1] ) ( "field2" [#f|proc2] ) .... ) ) ;; db's are dbdat's ;; @@ -945,30 +955,10 @@ FOR EACH ROW BEGIN UPDATE test_data SET last_update=(strftime('%s','now')) WHERE id=old.id; END;" ))) -;; -;; ADD run-id SUPPORT -;; -(define (db:create-all-triggers dbstruct) - (db:with-db - dbstruct #f #f - (lambda (dbdat db) - (db:create-triggers db)))) - -(define (db:create-triggers db) - (for-each (lambda (key) - (sqlite3:execute db (cadr key))) - db:trigger-list)) - -(define (db:drop-all-triggers dbstruct) - (db:with-db - dbstruct #f #f - (lambda (dbdat db) - (db:drop-triggers db)))) - (define (db:is-trigger-dropped db tbl-name) (let* ((trigger-name (if (equal? tbl-name "test_steps") "update_teststeps_trigger" (conc "update_" tbl-name "_trigger"))) (res #f)) @@ -1012,10 +1002,14 @@ ;; call with dbinit=db:initialize-main-db ;; (define (db:open-db dbstruct run-id dbinit) ;; (mutex-lock! *db-open-mutex*) (let* ((dbdat (dbfile:open-db dbstruct run-id dbinit))) + #;(case (rmt:transport-mode) + ((http) (dbfile:open-db dbstruct run-id dbinit)) + ((tcp) (dbmod:open-db dbstruct run-id dbinit)) + (else (assert #f "FATAL: rmt:transport-node not correct value"(rmt:transport-mode)))) (set! *db-write-access* (not (dbr:dbdat-read-only dbdat))) ;; (mutex-unlock! *db-open-mutex*) dbdat)) (define dbfile:db-init-proc (make-parameter #f)) @@ -1078,11 +1072,11 @@ (define no-condition-db-with-db (make-parameter #t)) ;; (db:with-db dbstruct run-id sqlite3:exec "select blah fgrom blaz;") ;; r/w is a flag to indicate if the db is modified by this query #t = yes, #f = no ;; -(define (db:with-db dbstruct run-id r/w proc . params) +(define (dbfile:with-db dbstruct run-id r/w proc params) (assert dbstruct "FATAL: db:with-db called with dbstruct "#f) (assert (dbr:dbstruct? dbstruct) "FATAL: dbstruct is "dbstruct) (let* ((use-mutex (> *api-process-request-count* 25)) ;; risk of db corruption (have-struct (dbr:dbstruct? dbstruct)) (dbdat (if have-struct ;; this stuff just allows us to call with a db handle directly @@ -1268,58 +1262,8 @@ (let* ((newstmth (sqlite3:prepare db stmt))) ;; (db:hoh-set! stmt-cache db stmt newstmth) (hash-table-set! stmt-cache stmt newstmth) newstmth)))) -(define (db:have-incompletes? dbstruct run-id ovr-deadtime) - (let* ((incompleted '()) - (oldlaunched '()) - (toplevels '()) - ;; (deadtime-str (configf:lookup *configdat* "setup" "deadtime")) - (deadtime (or ovr-deadtime 72000))) ;; twenty hours - (db:with-db - dbstruct run-id #f - (lambda (dbdat db) - - ;; in RUNNING or REMOTEHOSTSTART for more than 10 minutes - ;; - ;; HOWEVER: this code in run:test seems to work fine - ;; (> (- (current-seconds)(+ (db:test-get-event_time testdat) - ;; (db:test-get-run_duration testdat))) - ;; 600) - ;; (db:delay-if-busy dbdat) - (sqlite3:for-each-row - (lambda (test-id run-dir uname testname item-path) - (if (and (equal? uname "n/a") - (equal? item-path "")) ;; this is a toplevel test - ;; what to do with toplevel? call rollup? - (begin - (set! toplevels (cons (list test-id run-dir uname testname item-path run-id) toplevels))) - ;; (print-info "Found old toplevel test in RUNNING state, test-id=" test-id)) - (set! incompleted (cons (list test-id run-dir uname testname item-path run-id) incompleted)))) - (db:get-cache-stmth dbdat db - "SELECT id,rundir,uname,testname,item_path FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > (run_duration + ?) AND state IN ('RUNNING','REMOTEHOSTSTART');") - run-id deadtime) - - ;; in LAUNCHED for more than one day. Could be long due to job queues TODO/BUG: Need override for this in config - ;; - ;; (db:delay-if-busy dbdat) - (sqlite3:for-each-row - (lambda (test-id run-dir uname testname item-path) - (if (and (equal? uname "n/a") - (equal? item-path "")) ;; this is a toplevel test - ;; what to do with toplevel? call rollup? - (set! toplevels (cons (list test-id run-dir uname testname item-path run-id) toplevels)) - (set! oldlaunched (cons (list test-id run-dir uname testname item-path run-id) oldlaunched)))) - (db:get-cache-stmth dbdat db - "SELECT id,rundir,uname,testname,item_path FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > 86400 AND state IN ('LAUNCHED');") - run-id) - - ;; (print-info "Found " (length oldlaunched) " old LAUNCHED items, " (length toplevels) " old LAUNCHED toplevel tests and " (length incompleted) " tests marked RUNNING but apparently dead.") - (if (and (null? incompleted) - (null? oldlaunched) - (null? toplevels)) - #f - #t))))) ) ADDED dbmemmod.scm Index: dbmemmod.scm ================================================================== --- /dev/null +++ dbmemmod.scm @@ -0,0 +1,1322 @@ +;;====================================================================== +;; Copyright 2017, Matthew Welland. +;; +;; This file is part of Megatest. +;; +;; Megatest is free software: you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. +;; +;; Megatest is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with Megatest. If not, see . + +;;====================================================================== + +(declare (unit dbmemmod)) +(declare (uses debugprint)) +(declare (uses commonmod)) + +(module dbmemmod + * + + (import scheme + chicken + data-structures + extras + matchable) + +(import (prefix sqlite3 sqlite3:) + posix typed-records srfi-18 srfi-1 + srfi-69 + stack + files + ports + + debugprint + commonmod + ) + +(define keep-age-param (make-parameter 10)) ;; qif file age, if over move to attic +(define num-run-dbs (make-parameter 10)) ;; number of db's in .megatest + +;;====================================================================== +;; R E C O R D S +;;====================================================================== + +;; a single Megatest area with it's multiple dbs is +;; managed in a dbstruct +;; +(defstruct dbr:dbstruct + (areapath #f) + (homehost #f) + (tmppath #f) + (read-only #f) + (subdbs (make-hash-table)) + ) + +;; NOTE: Need one dbr:subdb per main.db, 1.db ... +;; +(defstruct dbr:subdb + (dbname #f) ;; .megatest/1.db + (mtdbfile #f) ;; mtrah/.megatest/1.db + (mtdbdat #f) ;; only need one of these for syncing + ;; (dbdats (make-hash-table)) ;; id => dbdat + (tmpdbfile #f) ;; /tmp/.../.megatest/1.db + ;; (refndbfile #f) ;; /tmp/.../.megatest/1.db_ref + (dbstack (make-stack)) ;; stack for tmp dbr:dbdat, + (homehost #f) ;; not used yet + (on-homehost #f) ;; not used yet + (read-only #f) + (last-sync 0) + (last-write (current-seconds)) + ) ;; goal is to converge on one struct for an area but for now it is too confusing + +;; need to keep dbhandles and cached statements together +(defstruct dbr:dbdat + (dbfile #f) + (dbh #f) + (stmt-cache (make-hash-table)) + (read-only #f) + (birth-sec (current-seconds))) + +(define *dbstruct-dbs* #f) +(define *db-open-mutex* (make-mutex)) +(define *db-access-mutex* (make-mutex)) ;; used in common.scm +(define *no-sync-db* #f) +(define *db-sync-in-progress* #f) +(define *db-with-db-mutex* (make-mutex)) +(define *max-api-process-requests* 0) +(define *api-process-request-count* 0) +(define *db-write-access* #t) +(define *db-last-sync* 0) ;; last time the sync to megatest.db happened +(define *db-multi-sync-mutex* (make-mutex)) ;; protect access to *db-sync-in-progress*, *db-last-sync* + +(define (db:generic-error-printout exn . message) + (print-call-chain (current-error-port)) + (apply dbfile:print-err message) + (dbfile:print-err + ", error: " ((condition-property-accessor 'exn 'message) exn) + ", arguments: " ((condition-property-accessor 'exn 'arguments) exn) + ", location: " ((condition-property-accessor 'exn 'location) exn) + )) + +(define (dbfile:run-id->key run-id) + (or run-id 'main)) + +(define (db:safely-close-sqlite3-db db stmt-cache #!key (try-num 3)) + (if (<= try-num 0) + #f + (handle-exceptions + exn + (begin + (print "Attempt to safely close sqlite3 db failed. Trying again. exn=" exn) + (thread-sleep! 3) + (sqlite3:interrupt! db) + (db:safely-close-sqlite3-db db stmt-cache try-num: (- try-num 1))) + (if (sqlite3:database? db) + (let* ((stmts (and stmt-cache (hash-table-ref/default stmt-cache db #f)))) + (if stmts (map sqlite3:finalize! (hash-table-values stmts))) + (sqlite3:finalize! db) + #t) + (begin + (dbfile:print-err "db:safely-close-sqlite3-db: " db " is not an sqlite3 db") + #f + ) + )))) + +;; close all opened run-id dbs +(define (db:close-all dbstruct) + (if (dbr:dbstruct? dbstruct) +;; (handle-exceptions +;; exn +;; (begin +;; (debug:print 0 *default-log-port* "WARNING: Finalizing failed, " ((condition-property-accessor 'exn 'message) exn) ", note - exn=" exn) +;; (print-call-chain *default-log-port*)) + ;; (db:sync-touched dbstruct 0 force-sync: #t) ;; NO. Do not do this here. Instead we rely on a server to be started when there are writes, even if the server itself is not going to be used as a server. + (let* ((subdbs (hash-table-values (dbr:dbstruct-subdbs dbstruct)))) + (for-each + (lambda (subdb) + (let* ((tdbs (stack->list (dbr:subdb-dbstack subdb))) + (mtdbdat (dbr:dbdat-dbh (dbr:subdb-mtdbdat subdb))) + #;(rdb (dbr:dbdat-dbh (dbr:subdb-refndb subdb)))) + + (map (lambda (dbdat) + (let* ((stmt-cache (dbr:dbdat-stmt-cache dbdat)) + (dbh (dbr:dbdat-dbh dbdat))) + (db:safely-close-sqlite3-db dbh stmt-cache))) + tdbs) + (db:safely-close-sqlite3-db mtdbdat (dbr:dbdat-stmt-cache (dbr:subdb-mtdbdat subdb))) + ;; (if (sqlite3:database? mdb) (sqlite3:finalize! mdb)) + #;(db:safely-close-sqlite3-db rdb #f))) ;; stmt-cache))))) ;; (if (sqlite3:database? rdb) (sqlite3:finalize! rdb)))))) + subdbs) + #t + ) + #f + ) +) + +;; ;; set up a single db (e.g. main.db, 1.db ... etc.) +;; ;; +;; (define (db:setup-db dbstruct areapath run-id) +;; (let* ((dbname (db:run-id->dbname run-id)) +;; (dbstruct (hash-table-ref/default dbstructs dbname #f))) +;; (if dbstruct +;; dbstruct +;; (let* ((dbstruct-new (make-dbr:dbstruct))) +;; (db:open-db dbstruct-new run-id areapath: areapath do-sync: #t) +;; (hash-table-set! dbstructs dbname dbstruct-new) +;; dbstruct-new)))) + +;; ; Returns the dbdat for a particular dbfile inside the area +;; ;; +;; (define (dbr:dbstruct-get-dbdat dbstruct dbfile) +;; (hash-table-ref/default (dbr:dbstruct-dbdats dbstruct) dbfile #f)) +;; +;; (define (dbr:dbstruct-dbdat-put! dbstruct dbfile db) +;; (hash-table-set! (dbr:dbstruct-dbdats dbstruct) dbfile db)) +;; +;; (define (db:run-id->first-num run-id) +;; (let* ((s (number->string run-id)) +;; (l (string-length s))) +;; (substring s (- l 1) l))) + +;; 1234 => 4/1234.db +;; #f => 0/main.db +;; (abandoned the idea of num/db) +;; +(define (dbfile:run-id->path apath run-id) + (conc apath"/"(dbfile:run-id->dbname run-id))) + +(define (db:dbname->path apath dbname) + (conc apath"/"dbname)) + +(define (dbfile:run-id->dbnum run-id) + (cond + ((number? run-id) + (modulo run-id (num-run-dbs))) + ((not run-id) "main") ;; 0 or main? + (else run-id))) + +;; POTENTIAL BUG: this implementation could produce a db file if run-id is neither #f or a number +(define (dbfile:run-id->dbname run-id) + (conc ".megatest/"(dbfile:run-id->dbnum run-id)".db")) + +;; Make the dbstruct, setup up auxillary db's and call for main db at least once +;; +;; called in http-transport and replicated in rmt.scm for *local* access. +;; +(define (dbfile:setup do-sync areapath tmppath) + (cond + (*dbstruct-dbs* + (dbfile:print-err "WARNING: dbfile:setup called when *dbstruct-dbs* is already initialized") + *dbstruct-dbs*) ;; TODO: when multiple areas are supported, this optimization will be a hazard + (else + (let* ((dbstruct (make-dbr:dbstruct))) + (set! *dbstruct-dbs* dbstruct) + (dbr:dbstruct-areapath-set! dbstruct areapath) + (dbr:dbstruct-tmppath-set! dbstruct tmppath) + dbstruct)))) + +(define (dbfile:get-subdb dbstruct run-id) + (let* ((dbfname (dbfile:run-id->dbname run-id))) + (hash-table-ref/default (dbr:dbstruct-subdbs dbstruct) dbfname #f))) + +(define (dbfile:set-subdb dbstruct run-id subdb) + (hash-table-set! (dbr:dbstruct-subdbs dbstruct) (dbfile:run-id->dbname run-id) subdb)) + +;; (define *dbfile:num-handles-in-use* 0) + +;; Get/open a database +;; if run-id => get run specific db +;; if #f => get main db +;; if run-id is a string treat it as a filename +;; if db already open - return inmem +;; if db not open, open inmem, rundb and sync then return inmem +;; inuse gets set automatically for rundb's +;; +(define (dbfile:get-dbdat dbstruct run-id) + (let* ((subdb (dbfile:get-subdb dbstruct run-id))) + (if (stack-empty? (dbr:subdb-dbstack subdb)) + #f + (begin + (stack-pop! (dbr:subdb-dbstack subdb)))))) + +;; return a previously opened db handle to the stack of available handles +(define (dbfile:add-dbdat dbstruct run-id dbdat) + (let* ((subdb (dbfile:get-subdb dbstruct run-id)) + (dbstk (dbr:subdb-dbstack subdb)) + (count (stack-count dbstk))) + (if (> count 15) + (dbfile:print-err "WARNING: stack for "run-id".db is "count".")) + (stack-push! dbstk dbdat) + dbdat)) + +;; set up a subdb +;; +(define (dbfile:init-subdb dbstruct run-id init-proc) + (let* ((dbname (dbfile:run-id->dbname run-id)) + (areapath (dbr:dbstruct-areapath dbstruct)) + (tmppath (dbr:dbstruct-tmppath dbstruct)) + (mtdbpath (dbfile:run-id->path areapath run-id)) + (tmpdbpath (dbfile:run-id->path tmppath run-id)) + (mtdbdat (dbfile:open-sqlite3-db mtdbpath init-proc sync-mode: 0 journal-mode: #f)) ;; "WAL")) + (newsubdb (make-dbr:subdb dbname: dbname + mtdbfile: mtdbpath + tmpdbfile: tmpdbpath + mtdbdat: mtdbdat))) + (dbfile:set-subdb dbstruct run-id newsubdb) + newsubdb)) ;; return the new subdb - but shouldn't really use it + +;; returns dbdat with dbh and dbfilepath +;; +;; NOTE: the handle is on /tmp db file! +;; +;; 1. if needed setup the subdb for the given run-id +;; 2. if there is no existing db handle in the stack +;; create a new handle and return it (do NOT add +;; it to the stack). +;; +(define (dbfile:open-db dbstruct run-id init-proc) + (let* ((subdb (dbfile:get-subdb dbstruct run-id))) + (if (not subdb) ;; not yet defined + (begin + (dbfile:init-subdb dbstruct run-id init-proc) + (dbfile:open-db dbstruct run-id init-proc)) + (let* ((dbdat (dbfile:get-dbdat dbstruct run-id))) + (if dbdat + dbdat + (let* ((tmppath (dbr:dbstruct-tmppath dbstruct)) + (tmpdbpath (dbfile:run-id->path tmppath run-id)) + (dbdat (dbfile:open-sqlite3-db tmpdbpath init-proc sync-mode: 0 journal-mode: "WAL"))) + ;; the following line short-circuits the "one db handle per thread" model + ;; + ;; (dbfile:add-dbdat dbstruct run-id dbdat) + ;; + dbdat)))))) + +;; COMBINE dbfile:open-sqlite-db and dbfile:lock-create-open +;; + +;; this stuff is for initial debugging, please remove it when +;; this code stabilizes +(define *dbopens* (make-hash-table)) +(define (dbfile:inc-db-open dbfile) + (let* ((curr-opens-count (+ (hash-table-ref/default *dbopens* dbfile 0) 1))) + (if (and (> curr-opens-count 1) ;; this should NOT be happening + (common:low-noise-print 15 "db-opens")) + (dbfile:print-err "INFO: db "dbfile" has been opened "curr-opens-count" times!")) + (hash-table-set! *dbopens* dbfile curr-opens-count) + curr-opens-count)) + +;; Open the classic megatest.db file (defaults to open in toppath) +;; +;; NOTE: returns a dbdat not a dbstruct! +;; +(define (dbfile:open-sqlite3-db dbpath init-proc #!key (sync-mode 0)(journal-mode #f)) + (let* ((dbexists (file-exists? dbpath)) + (write-access (file-write-access? dbpath)) + (db (dbfile:cautious-open-database dbpath init-proc sync-mode journal-mode))) + (dbfile:inc-db-open dbpath) + ;; (init-proc db) + (make-dbr:dbdat dbfile: dbpath dbh: db read-only: (not write-access)))) + +(define (dbfile:print-and-exit . params) + (with-output-to-port + (current-error-port) + (lambda () + (apply print params))) + (exit 1)) + +(define (dbfile:print-err . params) + (with-output-to-port + (current-error-port) + (lambda () + (apply print params)))) + +(define (dbfile:cautious-open-database fname init-proc sync-mode journal-mode #!optional (tries-left 500)) + (let* ((busy-file (conc fname "-journal")) + (delay-time (* (- 51 tries-left) 1.1)) + (write-access (file-write-access? fname)) + (dir-access (file-write-access? (pathname-directory fname))) + (retry (lambda () + (thread-sleep! delay-time) + (if (> tries-left 0) + (dbfile:cautious-open-database fname init-proc + sync-mode journal-mode + (- tries-left 1)))))) + (assert (>= tries-left 0) (conc "FATAL: too many attempts in dbfile:cautious-open-database of "fname", giving up.")) + + (if (and (file-write-access? fname) + (file-exists? busy-file)) + (begin + (if (common:low-noise-print 120 busy-file) + (dbfile:print-err "INFO: dbfile:cautious-open-database: journal file " + busy-file" exists, trying again in few seconds.")) + (thread-sleep! 1) + (if (eq? tries-left 2) + (begin + (dbfile:print-err "INFO: forcing journal rollup "busy-file) + (dbfile:brute-force-salvage-db fname))) + (dbfile:cautious-open-database fname init-proc sync-mode journal-mode (- tries-left 1))) + + (let* ((result (condition-case + (if dir-access + (dbfile:with-simple-file-lock + (conc fname ".lock") + (lambda () + (let* ((db-exists (file-exists? fname)) + (db (sqlite3:open-database fname))) ;; creates an empty db if it did not already exist. + (sqlite3:set-busy-handler! db (sqlite3:make-busy-timeout 30000)) + (if sync-mode + (sqlite3:execute db (conc "PRAGMA synchronous = "sync-mode";"))) + (if journal-mode + (sqlite3:execute db (conc "PRAGMA journal_mode = "journal-mode";"))) + (if (and init-proc (not db-exists)) + (init-proc db)) + db))) + (begin + (if (file-exists? fname ) + (let ((db (sqlite3:open-database fname))) + ;; pragmas synchronous not needed because this db is used read-only + ;; (sqlite3:execute db (conc "PRAGMA synchronous = "mode";") + (sqlite3:set-busy-handler! db (sqlite3:make-busy-timeout 30000)) ;; read-only but still need timeout + db ) + (print "file doesn't exist: " fname)))) + (exn (io-error) + (dbfile:print-err exn "ERROR: i/o error with " fname ". Check permissions, disk space etc. and try again.") + (retry)) + (exn (corrupt) + (dbfile:print-err exn "ERROR: database " fname " is corrupt. Repair it to proceed.") + (retry)) + (exn (busy) + (dbfile:print-err exn "ERROR: database " fname + " is locked. Try copying to another location, remove original and copy back.") + (retry)) + (exn (permission)(dbfile:print-err exn "ERROR: database " fname " has some permissions problem.") + (retry)) + (exn () + (dbfile:print-err exn "ERROR: Unknown error with database " fname " message: " + ((condition-property-accessor 'exn 'message) exn)) + (retry))))) + result)))) + +(define (dbfile:brute-force-salvage-db fname) + (let* ((backupfname (conc fname"-"(current-process-id)".bak")) + (cmd (conc "cp "fname" "backupfname";mv "fname" "(conc fname ".delme;") + "cp "backupfname" "fname))) + (dbfile:print-err "WARNING: attempting recovery of file "fname" by running commands:\n" + " "cmd) + (system cmd))) + + +(define (dbfile:open-no-sync-db dbpath) + (if *no-sync-db* + *no-sync-db* + (begin + (if (not (file-exists? dbpath)) + (create-directory dbpath #t)) + (let* ((dbname (conc dbpath "/no-sync.db")) + (db-exists (file-exists? dbname)) + (init-proc (lambda (db) + (if (not db-exists) + (begin + (sqlite3:execute db "CREATE TABLE IF NOT EXISTS no_sync_metadat (var TEXT,val TEXT, CONSTRAINT no_sync_metadat_constraint UNIQUE (var));")) + ))) + (db (dbfile:cautious-open-database dbname init-proc 0 "WAL"))) ;; (sqlite3:open-database dbname))) + ;; (sqlite3:execute db "PRAGMA synchronous = 0;") + ;; (sqlite3:set-busy-handler! db (sqlite3:make-busy-timeout 136000)) ;; done in cautious-open-database + (set! *no-sync-db* db) + db)))) + +(define (db:no-sync-set db var val) + (sqlite3:execute db "INSERT OR REPLACE INTO no_sync_metadat (var,val) VALUES (?,?);" var val)) + +(define (db:no-sync-del! db var) + (sqlite3:execute db "DELETE FROM no_sync_metadat WHERE var=?;" var)) + +(define (db:no-sync-get/default db var default) + (let ((res default)) + (sqlite3:for-each-row + (lambda (val) + (set! res val)) + db + "SELECT val FROM no_sync_metadat WHERE var=?;" + var) + (if res + (let ((newres (if (string? res) + (string->number res) + #f))) + (if newres + newres + res)) + res))) + +;; transaction protected lock aquisition +;; either: +;; fails returns (#f . lock-creation-time) +;; succeeds (returns (#t . lock-creation-time) +;; use (db:no-sync-del! db keyname) to release the lock +;; +(define (db:no-sync-get-lock db keyname) + (sqlite3:with-transaction + db + (lambda () + (condition-case + (let* ((curr-val (db:no-sync-get/default db keyname #f))) + (if curr-val + `(#f . ,curr-val) ;; (sqlite3:first-result db "SELECT val FROM no_sync_metadat WHERE var=?;" keyname)) + (let ((lock-time (current-seconds))) + (sqlite3:execute db "INSERT OR REPLACE INTO no_sync_metadat (var,val) VALUES(?,?);" keyname lock-time) + `(#t . ,lock-time)))) + (exn (io-error) (dbfile:print-err "ERROR: i/o error with no-sync db. Check permissions, disk space etc. and try again.")) + (exn (corrupt) (dbfile:print-err "ERROR: database no-sync db is corrupt. Repair it to proceed.")) + (exn (busy) (dbfile:print-err "ERROR: database no-sync db is locked. Try copying to another location, remove original and copy back.")) + (exn (permission)(dbfile:print-err "ERROR: database no-sync db has some permissions problem.")) + (exn () ;; (status done) ;; I don't know how to detect status done but no data! + (dbfile:print-err "ERROR: Unknown error with database no-sync db message: exn="(condition->list exn)", \n" + ((condition-property-accessor 'exn 'message) exn)) + `(#f . ,(current-seconds))))))) + +(define (db:no-sync-get-lock-timeout db keyname timeout) + (let* ((lockdat (db:no-sync-get-lock db keyname))) + (match lockdat + ((#f . lock-time) + (if (> (- (current-seconds) (if (string? lock-time)(string->number lock-time)lock-time)) timeout) + (let ((lock-time (current-seconds))) + ;; (debug:print-info 2 *default-log-port* "db:no-sync-get-lock keyname=" keyname ", lock-time=" lock-time ", exn=" exn) + (sqlite3:execute db "INSERT OR REPLACE INTO no_sync_metadat (var,val) VALUES(?,?);" keyname lock-time) + `(#t . ,lock-time)) + lockdat)) + (else lockdat)))) + +;; NOTE: This will steal the lock after timeout of waiting. +;; +(define (db:with-no-sync-lock db keyname timeout proc) + (let* ((lockdat (db:no-sync-get-lock-timeout db keyname)) + (gotlock (car lockdat)) + (locktime (cdr lockdat))) + (if gotlock + (let ((res (proc))) + (db:no-sync-del! db keyname) + res)))) + +;;====================================================================== +;; sync back functions pulled from db.scm +;;====================================================================== + +;; Get a lock from the no-sync-db for the from-db, then delta sync the from-db to the to-db, otherwise return #f +;; +(define (db:lock-and-delta-sync no-sync-db dbstruct from-db-file runid keys dbinit) + (assert (not *db-sync-in-progress*) "FATAL: db:lock-and-sync called while a sync is in progress.") + ;; (dbfile:print-err *default-log-port* "db:lock-and-delta-sync") + (let* ((lock-file (conc from-db-file ".lock"))) + (if (common:simple-file-lock lock-file) + (begin + (dbfile:print-err "INFO: db:lock-and-delta-sync copying db "runid" at "(current-seconds)) + (set! *db-sync-in-progress* #t) + (db:sync-touched dbstruct runid keys dbinit) + (set! *db-sync-in-progress* #f) + (delete-file* lock-file) + #t) + (begin + (if (common:low-noise-print 120 (conc "no lock "from-db-file)) + (dbfile:print-err "INFO: could not get lock for " from-db-file ", sync likely in progress.")) + #f + )))) + +;; ;; Get a lock from the no-sync-db for the from-db, then delta sync the from-db to the to-db, otherwise return #f +;; ;; +;; (define (db:lock-and-delta-sync-orig no-sync-db dbstruct from-db-file runid keys dbinit) +;; (assert (not *db-sync-in-progress*) "FATAL: db:lock-and-sync called while a sync is in progress.") +;; ;; (dbfile:print-err *default-log-port* "db:lock-and-delta-sync") +;; (let* ((lockdat (db:no-sync-get-lock-timeout no-sync-db from-db-file 60)) +;; (gotlock (car lockdat)) +;; (locktime (cdr lockdat))) +;; ;; (debug:print-info 3 *default-log-port* "db:lock-and-delta-sync: got lock?") +;; +;; (if gotlock +;; (begin +;; (dbfile:print-err "INFO: db:lock-and-delta-sync copying db "runid" at "(current-seconds)) +;; (set! *db-sync-in-progress* #t) +;; (db:sync-touched dbstruct runid keys dbinit) +;; (set! *db-sync-in-progress* #f) +;; (db:no-sync-del! no-sync-db from-db-file) +;; #t) +;; (begin +;; (dbfile:print-err "ERROR: could not get lock for " from-db-file " from no-sync-db") +;; #f +;; )))) + +;; sync run from tmp disk to nfs disk if touched +;; +;; call with dbinit=db:initialize-main-db +;; +(define (db:sync-touched dbstruct run-id keys #!key dbinit (force-sync #f)) + (dbfile:print-err "db:sync-touched Syncing: " (conc (if run-id run-id "main") ".db")) + (let* (;; the subdb is needed to access the mtdbdat + (subdb (or (dbfile:get-subdb dbstruct run-id) + (dbfile:init-subdb dbstruct run-id dbinit))) + (tmpdbfile (dbr:subdb-tmpdbfile subdb)) + (mtdb (dbr:subdb-mtdbdat subdb)) + (tmpdb (db:open-db dbstruct run-id dbinit)) ;; sqlite3-db tmpdbfile #f)) + (start-t (current-seconds))) + (mutex-lock! *db-multi-sync-mutex*) + (let ((update_info (cons "last_update" (if force-sync 0 *db-last-sync*) ))) + (mutex-unlock! *db-multi-sync-mutex*) + (db:sync-tables (db:sync-all-tables-list dbstruct keys) update_info tmpdb mtdb)) + (mutex-lock! *db-multi-sync-mutex*) + (set! *db-last-sync* start-t) + (set! *db-last-access* start-t) + (mutex-unlock! *db-multi-sync-mutex*) + (dbfile:add-dbdat dbstruct run-id tmpdb) + #t)) + +;; just tests, test_steps and test_data tables +(define db:sync-tests-only + (list + ;; (list "strs" + ;; '("id" #f) + ;; '("str" #f)) + (list "tests" + '("id" #f) + '("run_id" #f) + '("testname" #f) + '("host" #f) + '("cpuload" #f) + '("diskfree" #f) + '("uname" #f) + '("rundir" #f) + '("shortdir" #f) + '("item_path" #f) + '("state" #f) + '("status" #f) + '("attemptnum" #f) + '("final_logf" #f) + '("logdat" #f) + '("run_duration" #f) + '("comment" #f) + '("event_time" #f) + '("fail_count" #f) + '("pass_count" #f) + '("archived" #f) + '("last_update" #f)) + (list "test_steps" + '("id" #f) + '("test_id" #f) + '("stepname" #f) + '("state" #f) + '("status" #f) + '("event_time" #f) + '("comment" #f) + '("logfile" #f) + '("last_update" #f)) + (list "test_data" + '("id" #f) + '("test_id" #f) + '("category" #f) + '("variable" #f) + '("value" #f) + '("expected" #f) + '("tol" #f) + '("units" #f) + '("comment" #f) + '("status" #f) + '("type" #f) + '("last_update" #f)))) + +;; needs db to get keys, this is for syncing all tables +;; +(define (db:sync-main-list dbstruct keys) + (let ((keys keys)) ;; (db:get-keys dbstruct))) + (list + (list "keys" + '("id" #f) + '("fieldname" #f) + '("fieldtype" #f)) + (list "metadat" '("var" #f) '("val" #f)) + (append (list "runs" + '("id" #f)) + (map (lambda (k)(list k #f)) + (append keys + (list "runname" "state" "status" "owner" "event_time" "comment" "fail_count" "pass_count" "contour" "last_update")))) + (list "archive_disks" + '("id" #f) + '("archive_area_name" #f) + '("disk_path" #f) + '("last_df" #f) + '("last_df_time" #f) + '("creation_time" #f)) + + (list "archive_blocks" + '("id" #f) + '("archive_disk_id" #f) + '("disk_path" #f) + '("last_du" #f) + '("last_du_time" #f) + '("creation_time" #f)) + + (list "test_meta" + '("id" #f) + '("testname" #f) + '("owner" #f) + '("description" #f) + '("reviewed" #f) + '("iterated" #f) + '("avg_runtime" #f) + '("avg_disk" #f) + '("tags" #f) + '("jobgroup" #f)) + + + (list "tasks_queue" + '("id" #f) + '("action" #f) + '("owner" #f) + '("state" #f) + '("target" #f) + '("name" #f) + '("testpatt" #f) + '("keylock" #f) + '("params" #f) + '("creation_time" #f) + '("execution_time" #f)) + ))) + +(define (db:sync-all-tables-list dbstruct keys) + (append (db:sync-main-list dbstruct keys) + db:sync-tests-only)) + +;; tbls is ( ("tablename" ( "field1" [#f|proc1] ) ( "field2" [#f|proc2] ) .... ) ) +;; db's are dbdat's +;; +;; if last-update specified ("field-name" . time-in-seconds) +;; then sync only records where field-name >= time-in-seconds +;; IFF field-name exists +;; +(define (db:sync-tables tbls last-update fromdb todb . slave-dbs) + (handle-exceptions + exn + (begin + (dbfile:print-err "EXCEPTION: database probably overloaded or unreadable in db:sync-tables.") + (print-call-chain (current-error-port)) + (dbfile:print-err " message: " ((condition-property-accessor 'exn 'message) exn)) + (dbfile:print-err "exn=" (condition->list exn)) + (dbfile:print-err " status: " ((condition-property-accessor 'sqlite3 'status) exn)) + (dbfile:print-err " src db: " (dbr:dbdat-dbfile fromdb)) + (for-each (lambda (dbdat) + (let ((dbpath (dbr:dbdat-dbfile dbdat))) + (dbfile:print-err " dbpath: " dbpath) + (if #t ;; (not (db:repair-db dbdat)) + (begin + (dbfile:print-err "Failed to rebuild (repair is turned off) " dbpath ", exiting now.") + (exit))))) + (cons todb slave-dbs)) + + 0) + + ;; this is the work to be done") + (cond + ((not fromdb) (dbfile:print-err "WARNING: db:sync-tables called with fromdb missing") + -1) + ((not todb) (dbfile:print-err "WARNING: db:sync-tables called with todb missing") + -2) + ((not (sqlite3:database? (dbr:dbdat-dbh fromdb))) + (dbfile:print-err "db:sync-tables called with fromdb not a database " fromdb) + -3) + ((not (sqlite3:database? (dbr:dbdat-dbh todb))) + (dbfile:print-err "db:sync-tables called with todb not a database " todb) + -4) + + ((not (file-write-access? (dbr:dbdat-dbfile todb))) + (dbfile:print-err "db:sync-tables called with todb not a read-only database " todb) + -5) + ((not (null? (let ((readonly-slave-dbs + (filter + (lambda (dbdat) + (not (file-write-access? (dbr:dbdat-dbfile todb)))) + slave-dbs))) + (for-each + (lambda (bad-dbdat) + (dbfile:print-err "db:sync-tables called with todb not a read-only database " bad-dbdat)) + readonly-slave-dbs) + readonly-slave-dbs))) -6) + (else + ;; (dbfile:print-err "db:sync-tables: args are good") + + (let ((stmts (make-hash-table)) ;; table-field => stmt + (all-stmts '()) ;; ( ( stmt1 value1 ) ( stml2 value2 )) + (numrecs (make-hash-table)) + (start-time (current-milliseconds)) + (tot-count 0)) + (for-each ;; table + (lambda (tabledat) + (let* ((tablename (car tabledat)) + (fields (cdr tabledat)) + (has-last-update (member "last_update" fields)) + (use-last-update (cond + ((and has-last-update + (member "last_update" fields)) + #t) ;; if given a number, just use it for all fields + ((number? last-update) #f) ;; if not matched first entry then ignore last-update for this table + ((and (pair? last-update) + (member (car last-update) ;; last-update field name + (map car fields))) + #t) + ((and last-update (not (pair? last-update)) (not (number? last-update))) + (dbfile:print-err "ERROR: parameter last-update for db:sync-tables must be a pair or a number, received: " last-update);; found in fields + #f) + (else + #f))) + (last-update-value (if use-last-update ;; no need to check for has-last-update - it is already accounted for + (if (number? last-update) + last-update + (cdr last-update)) + #f)) + (last-update-field (if use-last-update + (if (number? last-update) + "last_update" + (car last-update)) + #f)) + (num-fields (length fields)) + (field->num (make-hash-table)) + (num->field (apply vector (map car fields))) ;; BBHERE + (full-sel (conc "SELECT " (string-intersperse (map car fields) ",") + " FROM " tablename (if use-last-update ;; apply last-update criteria + (conc " WHERE " last-update-field " >= " last-update-value) + "") + ";")) + (full-ins (conc "INSERT OR REPLACE INTO " tablename " ( " (string-intersperse (map car fields) ",") " ) " + " VALUES ( " (string-intersperse (make-list num-fields "?") ",") " );")) + (fromdat '()) + (fromdats '()) + (totrecords 0) + (batch-len 100) ;; (string->number (or (configf:lookup *configdat* "sync" "batchsize") "100"))) + (todat (make-hash-table)) + (count 0) + (field-names (map car fields)) + (delay-handicap 0) ;; (string->number (or (configf:lookup *configdat* "sync" "delay-handicap") "0"))) + ) + + ;; set up the field->num table + (for-each + (lambda (field) + (hash-table-set! field->num field count) + (set! count (+ count 1))) + fields) + + ;; read the source table + ;; store a list of all rows in the table in fromdat, up to batch-len. + ;; Then add fromdat to the fromdats list, clear fromdat and repeat. + (sqlite3:for-each-row + (lambda (a . b) + (set! fromdat (cons (apply vector a b) fromdat)) + (if (> (length fromdat) batch-len) + (begin + (set! fromdats (cons fromdat fromdats)) + (set! fromdat '()) + (set! totrecords (+ totrecords 1))) + ) + ) + (dbr:dbdat-dbh fromdb) + full-sel) + + ;; Count less than batch-len as a record + (if (> (length fromdat) 0) + (set! totrecords (+ totrecords 1))) + + ;; tack on remaining records in fromdat + (if (not (null? fromdat)) + (set! fromdats (cons fromdat fromdats))) + + (sqlite3:for-each-row + (lambda (a . b) + (hash-table-set! todat a (apply vector a b))) + (dbr:dbdat-dbh todb) + full-sel) + + (when (and delay-handicap (> delay-handicap 0)) + (dbfile:print-err "imposing synthetic sync delay of "delay-handicap" seconds since sync/delay-handicap is configured") + (thread-sleep! delay-handicap) + (dbfile:print-err "synthetic sync delay of "delay-handicap" seconds completed") + ) + + ;; first pass implementation, just insert all changed rows + + (for-each + (lambda (targdb) + (let* ((db (dbr:dbdat-dbh targdb)) + (drp-trigger (if (member "last_update" field-names) + (db:drop-trigger db tablename) + #f)) + (has-last-update (member "last_update" field-names)) + (is-trigger-dropped (if has-last-update + (db:is-trigger-dropped db tablename) + #f)) + (stmth (sqlite3:prepare db full-ins)) + (changed-rows 0)) + (for-each + (lambda (fromdat-lst) + (sqlite3:with-transaction + db + (lambda () + (for-each ;; + (lambda (fromrow) + (let* ((a (vector-ref fromrow 0)) + (curr (hash-table-ref/default todat a #f)) + (same #t)) + (let loop ((i 0)) + (if (or (not curr) + (not (equal? (vector-ref fromrow i)(vector-ref curr i)))) + (set! same #f)) + (if (and same + (< i (- num-fields 1))) + (loop (+ i 1)))) + (if (not same) + (begin + (apply sqlite3:execute stmth (vector->list fromrow)) + (hash-table-set! numrecs tablename (+ 1 (hash-table-ref/default numrecs tablename 0))) + (set! changed-rows (+ changed-rows 1)) + ) + ) + )) + fromdat-lst)))) + fromdats) + + (sqlite3:finalize! stmth) + (if (member "last_update" field-names) + (db:create-trigger db tablename)))) + (append (list todb) slave-dbs) + ) + ) + ) + tbls) + (let* ((runtime (- (current-milliseconds) start-time)) + (should-print (or ;; (debug:debug-mode 12) + (common:low-noise-print 120 "db sync") + (> runtime 500)))) ;; low and high sync times treated as separate. + (for-each + (lambda (dat) + (let ((tblname (car dat)) + (count (cdr dat))) + (set! tot-count (+ tot-count count)) + )) + (sort (hash-table->alist numrecs)(lambda (a b)(> (cdr a)(cdr b)))))) + tot-count))))) + +;;====================================================================== +;; trigger setup/takedown +;;====================================================================== + +(define db:trigger-list + (list (list "update_runs_trigger" "CREATE TRIGGER IF NOT EXISTS update_runs_trigger AFTER UPDATE ON runs + FOR EACH ROW + BEGIN + UPDATE runs SET last_update=(strftime('%s','now')) + WHERE id=old.id; + END;" ) + (list "update_run_stats_trigger" "CREATE TRIGGER IF NOT EXISTS update_run_stats_trigger AFTER UPDATE ON run_stats + FOR EACH ROW + BEGIN + UPDATE run_stats SET last_update=(strftime('%s','now')) + WHERE id=old.id; + END;" ) + (list "update_tests_trigger" "CREATE TRIGGER IF NOT EXISTS update_tests_trigger AFTER UPDATE ON tests + FOR EACH ROW + BEGIN + UPDATE tests SET last_update=(strftime('%s','now')) + WHERE id=old.id; + END;" ) + (list "update_teststeps_trigger" "CREATE TRIGGER IF NOT EXISTS update_teststeps_trigger AFTER UPDATE ON test_steps + FOR EACH ROW + BEGIN + UPDATE test_steps SET last_update=(strftime('%s','now')) + WHERE id=old.id; + END;" ) + (list "update_test_data_trigger" "CREATE TRIGGER IF NOT EXISTS update_test_data_trigger AFTER UPDATE ON test_data + FOR EACH ROW + BEGIN + UPDATE test_data SET last_update=(strftime('%s','now')) + WHERE id=old.id; + END;" ))) +;; +;; ADD run-id SUPPORT +;; +(define (db:create-all-triggers dbstruct) + (db:with-db + dbstruct #f #f + (lambda (dbdat db) + (db:create-triggers db)))) + +(define (db:create-triggers db) + (for-each (lambda (key) + (sqlite3:execute db (cadr key))) + db:trigger-list)) + +(define (db:drop-all-triggers dbstruct) + (db:with-db + dbstruct #f #f + (lambda (dbdat db) + (db:drop-triggers db)))) + +(define (db:is-trigger-dropped db tbl-name) + (let* ((trigger-name (if (equal? tbl-name "test_steps") + "update_teststeps_trigger" + (conc "update_" tbl-name "_trigger"))) + (res #f)) + (sqlite3:for-each-row + (lambda (name) + (if (equal? name trigger-name) + (set! res #t))) + db + "SELECT name FROM sqlite_master WHERE type = 'trigger' ;") + res)) + +(define (db:drop-triggers db) + (for-each + (lambda (key) + (sqlite3:execute db (conc "drop trigger if exists " (car key)))) + db:trigger-list)) + +(define (db:drop-trigger db tbl-name) + (let* ((trigger-name (if (equal? tbl-name "test_steps") + "update_teststeps_trigger" + (conc "update_" tbl-name "_trigger")))) + (for-each + (lambda (key) + (if (equal? (car key) trigger-name) + (sqlite3:execute db (conc "drop trigger if exists " trigger-name)))) + db:trigger-list))) + +(define (db:create-trigger db tbl-name) + (let* ((trigger-name (if (equal? tbl-name "test_steps") + "update_teststeps_trigger" + (conc "update_" tbl-name "_trigger")))) + (for-each (lambda (key) + (if (equal? (car key) trigger-name) + (sqlite3:execute db (cadr key)))) + db:trigger-list))) + +;;====================================================================== +;; db access stuff +;;====================================================================== + +;; call with dbinit=db:initialize-main-db +;; +(define (db:open-db dbstruct run-id dbinit) + ;; (mutex-lock! *db-open-mutex*) + (let* ((dbdat (dbfile:open-db dbstruct run-id dbinit))) + (set! *db-write-access* (not (dbr:dbdat-read-only dbdat))) + ;; (mutex-unlock! *db-open-mutex*) + dbdat)) + +(define dbfile:db-init-proc (make-parameter #f)) + +;; in xmaxima this gives a curve close to what I want: +;; plot2d ((exp(x/1.2)-1)/300, [x, 0, 10])$ +;; plot2d ((exp(x/1.5)-1)/40, [x, 0, 10])$ +;; plot2d ((exp(x/5)-1)/40, [x, 0, 20])$ +(define (dbfile:droop x) + (/ (- (exp (/ x 5)) 1) 40)) + ;; (* numqrys (/ 1 (qif-slope)))) + +;; create a dropping near the db file in a qif dir +;; use count of such files to gate queries (queries in flight) +;; +(define (dbfile:wait-for-qif fname run-id params) + (let* ((thedir (pathname-directory fname)) + (dbnum (dbfile:run-id->dbnum run-id)) + (destdir (conc thedir"/qif-"dbnum)) + (uniqn (get-area-path-signature (conc dbnum params))) + (crumbn (conc destdir"/"(current-seconds)"-"uniqn"."(current-process-id)))) + (if (not (file-exists? destdir))(create-directory (conc destdir"/attic") #t)) + (let loop ((count 0)) + (let* ((currlks (glob (conc destdir"/*"))) + (numqrys (length currlks)) + (delayval (cond ;; do a droopish curve + ((> numqrys 25) + (for-each + (lambda (f) + (if (> (- (current-seconds) + (handle-exceptions + exn + (current-seconds) ;; file is likely gone, just fake out + (file-modification-time f))) + (keep-age-param)) + (let* ((basedir (pathname-directory f)) + (filen (pathname-file f)) + (destf (conc basedir"/attic/"filen))) + (dbfile:print-err "Moving qif file "f" older than 10 seconds to "destf) + ;; (delete-file* f) + (handle-exceptions + exn + #t + (file-move f destf #t))))) + currlks) + 4) + ((> numqrys 0) (dbfile:droop numqrys)) ;; slope of 1/100 + (else #f)))) + (if (and delayval + (< count 5)) + (begin + (thread-sleep! delayval) + (loop (+ count 1)))))) + (with-output-to-file crumbn + (lambda () + (print fname" run-id="run-id" params="params) + )) + crumbn)) + +(define no-condition-db-with-db (make-parameter #t)) + +;; (db:with-db dbstruct run-id sqlite3:exec "select blah fgrom blaz;") +;; r/w is a flag to indicate if the db is modified by this query #t = yes, #f = no +;; +(define (db:with-db dbstruct run-id r/w proc . params) + (assert dbstruct "FATAL: db:with-db called with dbstruct "#f) + (assert (dbr:dbstruct? dbstruct) "FATAL: dbstruct is "dbstruct) + (let* ((use-mutex (> *api-process-request-count* 25)) ;; risk of db corruption + (have-struct (dbr:dbstruct? dbstruct)) + (dbdat (if have-struct ;; this stuff just allows us to call with a db handle directly + (db:open-db dbstruct run-id (dbfile:db-init-proc)) ;; (dbfile:get-subdb dbstruct run-id) + #f)) + (db (if have-struct ;; this stuff just allows us to call with a db handle directly + (dbr:dbdat-dbh dbdat) + dbstruct)) + (fname (if dbdat + (dbr:dbdat-dbfile dbdat) + "nofilenameavailable")) + (jfile (conc fname"-journal")) + (qryproc (lambda () + (if use-mutex (mutex-lock! *db-with-db-mutex*)) + (let ((res (apply proc dbdat db params))) ;; the actual call is here. + (if use-mutex (mutex-unlock! *db-with-db-mutex*)) + ;; (if (vector? dbstruct)(db:done-with dbstruct run-id r/w)) + (if dbdat + (dbfile:add-dbdat dbstruct run-id dbdat)) + ;; (delete-file* crumbfile) + res)))) + + (assert (sqlite3:database? db) "FATAL: db:with-db, db is not a database, db="db", fname="fname) + (if (file-exists? jfile) + (begin + (dbfile:print-err "INFO: "jfile" exists, delaying to reduce database load") + (thread-sleep! 0.2))) + (if (and use-mutex + (common:low-noise-print 120 "over-50-parallel-api-requests")) + (dbfile:print-err *api-process-request-count* " parallel api requests being processed in process " + (current-process-id))) ;; ", throttling access")) + (if (no-condition-db-with-db) + (qryproc) + (condition-case + (qryproc) + (exn (io-error) + (db:generic-error-printout exn "ERROR: i/o error with " fname ". Check permissions, disk space etc. and try again.")) + (exn (corrupt) + (db:generic-error-printout exn "ERROR: database " fname " is corrupt. Repair it to proceed.")) + (exn (busy) + (db:generic-error-printout exn "ERROR: database " fname + " is locked. Try copying to another location, remove original and copy back.")) + (exn (permission)(db:generic-error-printout exn "ERROR: database " fname " has some permissions problem.")) + (exn () + (db:generic-error-printout exn "ERROR: Unknown error with database " fname " message: " + ((condition-property-accessor 'exn 'message) exn))))))) + +;;====================================================================== +;; another attempt at a transactionized queue +;;====================================================================== + +;; ;; ;; (define *transaction-queues* (make-hash-table)) +;; ;; ;; +;; ;; ;; (define (db:get-queue run-id) +;; ;; ;; (let* ((res (hash-table-ref/default *transaction-queues* run-id #f))) +;; ;; ;; (if res +;; ;; ;; res +;; ;; ;; (let* ((newq (make-queue))) +;; ;; ;; (hash-table-set! *transaction-queues* run-id newq) +;; ;; ;; newq)))) +;; ;; ;; +;; ;; ;; (define (db:add-to-transaction-queue dbstruct proc params) +;; ;; ;; (let* ((mbox (make-mailbox)) +;; ;; ;; (q (db:get-queue run-id))) +;; ;; ;; (queue-add! *transaction-queue* (list dbstruct proc mbox)) +;; ;; ;; (mailbox-receive mbox))) +;; ;; ;; +;; ;; ;; (define (db:process-transaction-queue *dbstruct-dbs*) +;; ;; ;; (for-each +;; ;; ;; (lambda (run-id) +;; ;; ;; (let* ((q (hash-table-ref *transaction-queue* run-id))) +;; ;; ;; ;; with-transaction +;; ;; ;; ;; dbstruct +;; ;; ;; ;; pop items from queue and execute them, return results via mailbox +;; ;; ;; q +;; ;; ;; ;; pop +;; ;; ;; )) +;; ;; ;; (hash-table-keys *transaction-queues*))) + +;;====================================================================== +;; file utils +;;====================================================================== + +;;====================================================================== +;; lazy-safe get file mod time. on any error (file not existing etc.) return 0 +;; +(define (dbfile:lazy-modification-time fpath) + (handle-exceptions + exn + (begin + (dbfile:print-err "Failed to get modification time for " fpath ", treating it as zero. exn=" exn) + 0) + (if (file-exists? fpath) + (file-modification-time fpath) + 0))) + +;;====================================================================== +;; find timestamp of newest file associated with a sqlite db file +(define (dbfile:lazy-sqlite-db-modification-time fpath) + (let* ((glob-list (handle-exceptions + exn + (begin + (dbfile:print-err "Failed to glob " fpath "*, exn=" exn) + `(,(conc "/no/such/file, message: " ((condition-property-accessor 'exn 'message) exn)))) + (glob (conc fpath "*")))) + (file-list (if (eq? 0 (length glob-list)) + '("/no/such/file") + glob-list))) + (apply max + (map + dbfile:lazy-modification-time + file-list)))) + +;; dot-locking egg seems not to work, using this for now +;; if lock is older than expire-time then remove it and try again +;; to get the lock +;; +(define (dbfile:simple-file-lock fname #!key (expire-time 300)) + (let ((fmod-time (handle-exceptions + ext + (current-seconds) + (file-modification-time fname)))) + (if (file-exists? fname) + (if (> (- (current-seconds) fmod-time) expire-time) + (begin + (handle-exceptions exn #f (delete-file* fname)) + (dbfile:simple-file-lock fname expire-time: expire-time)) + #f) + (let ((key-string (conc (get-host-name) "-" (current-process-id))) + (oup (open-output-file fname))) + (with-output-to-port + oup + (lambda () + (print key-string))) + (close-output-port oup) + #;(with-output-to-file fname ;; bizarre. with-output-to-file does not seem to be cleaning up after itself. + (lambda () + (print key-string))) + (thread-sleep! 0.25) + (if (file-exists? fname) + (handle-exceptions exn + #f + (with-input-from-file fname + (lambda () + (equal? key-string (read-line))))) + #f) + ) + ) + ) +) + +(define (dbfile:simple-file-lock-and-wait fname #!key (expire-time 300)) + (let ((end-time (+ expire-time (current-seconds)))) + (let loop ((got-lock (dbfile:simple-file-lock fname expire-time: expire-time))) + (if got-lock + #t + (if (> end-time (current-seconds)) + (begin + (thread-sleep! 3) + (loop (dbfile:simple-file-lock fname expire-time: expire-time))) + #f))))) + +(define (dbfile:simple-file-release-lock fname) + (handle-exceptions + exn + #f ;; I don't really care why this failed (at least for now) + (delete-file* fname))) + +(define (dbfile:with-simple-file-lock fname proc #!key (expire-time 300)) + (let ((gotlock (dbfile:simple-file-lock-and-wait fname expire-time: expire-time))) + (if gotlock + (let ((res (proc))) + (dbfile:simple-file-release-lock fname) + res) + (assert #t "FATAL: simple file lock never got a lock.")))) + +(define (db:get-cache-stmth dbdat db stmt) + (let* (;; (dbdat (dbfile:get-dbdat dbstruct run-id)) + (stmt-cache (dbr:dbdat-stmt-cache dbdat)) + ;; (stmth (db:hoh-get stmt-cache db stmt)) + (stmth (hash-table-ref/default stmt-cache stmt #f))) + (or stmth + (let* ((newstmth (sqlite3:prepare db stmt))) + ;; (db:hoh-set! stmt-cache db stmt newstmth) + (hash-table-set! stmt-cache stmt newstmth) + newstmth)))) + +(define (db:have-incompletes? dbstruct run-id ovr-deadtime) + (let* ((incompleted '()) + (oldlaunched '()) + (toplevels '()) + ;; (deadtime-str (configf:lookup *configdat* "setup" "deadtime")) + (deadtime (or ovr-deadtime 72000))) ;; twenty hours + (db:with-db + dbstruct run-id #f + (lambda (dbdat db) + + ;; in RUNNING or REMOTEHOSTSTART for more than 10 minutes + ;; + ;; HOWEVER: this code in run:test seems to work fine + ;; (> (- (current-seconds)(+ (db:test-get-event_time testdat) + ;; (db:test-get-run_duration testdat))) + ;; 600) + ;; (db:delay-if-busy dbdat) + (sqlite3:for-each-row + (lambda (test-id run-dir uname testname item-path) + (if (and (equal? uname "n/a") + (equal? item-path "")) ;; this is a toplevel test + ;; what to do with toplevel? call rollup? + (begin + (set! toplevels (cons (list test-id run-dir uname testname item-path run-id) toplevels))) + ;; (print-info "Found old toplevel test in RUNNING state, test-id=" test-id)) + (set! incompleted (cons (list test-id run-dir uname testname item-path run-id) incompleted)))) + (db:get-cache-stmth dbdat db + "SELECT id,rundir,uname,testname,item_path FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > (run_duration + ?) AND state IN ('RUNNING','REMOTEHOSTSTART');") + run-id deadtime) + + ;; in LAUNCHED for more than one day. Could be long due to job queues TODO/BUG: Need override for this in config + ;; + ;; (db:delay-if-busy dbdat) + (sqlite3:for-each-row + (lambda (test-id run-dir uname testname item-path) + (if (and (equal? uname "n/a") + (equal? item-path "")) ;; this is a toplevel test + ;; what to do with toplevel? call rollup? + (set! toplevels (cons (list test-id run-dir uname testname item-path run-id) toplevels)) + (set! oldlaunched (cons (list test-id run-dir uname testname item-path run-id) oldlaunched)))) + (db:get-cache-stmth dbdat db + "SELECT id,rundir,uname,testname,item_path FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > 86400 AND state IN ('LAUNCHED');") + run-id) + + ;; (print-info "Found " (length oldlaunched) " old LAUNCHED items, " (length toplevels) " old LAUNCHED toplevel tests and " (length incompleted) " tests marked RUNNING but apparently dead.") + (if (and (null? incompleted) + (null? oldlaunched) + (null? toplevels)) + #f + #t))))) + + +) Index: dbmod.scm ================================================================== --- dbmod.scm +++ dbmod.scm @@ -17,40 +17,270 @@ ;; along with Megatest. If not, see . ;;====================================================================== (declare (unit dbmod)) +(declare (uses dbfile)) +(declare (uses commonmod)) +(declare (uses debugprint)) (module dbmod * -(import scheme chicken data-structures extras) -(import (prefix sqlite3 sqlite3:) - posix typed-records srfi-18 - srfi-69) - -(define (db:run-id->dbname run-id) - (cond - ((number? run-id)(conc run-id ".db")) - ((not run-id) "main.db") - (else run-id))) - - -;;====================================================================== -;; hash of hashs -;;====================================================================== - - -(define (db:hoh-set! dat key1 key2 val) - (let* ((subhash (hash-table-ref/default dat key1 #f))) - (if subhash - (hash-table-set! subhash key2 val) - (begin - (hash-table-set! dat key1 (make-hash-table)) - (db:hoh-set! dat key1 key2 val))))) - -(define (db:hoh-get dat key1 key2) - (let* ((subhash (hash-table-ref/default dat key1 #f))) - (and subhash - (hash-table-ref/default subhash key2 #f)))) +(import scheme + chicken + data-structures + extras + + (prefix sqlite3 sqlite3:) + posix + typed-records + srfi-1 + srfi-18 + srfi-69 + + commonmod + dbfile + debugprint + ) + +;; NOTE: This returns only the name "1.db", "main.db", not the path +;; +(define (dbmod:run-id->dbfname run-id) + (conc (dbfile:run-id->dbnum run-id)".db")) + +(define (dbmod:get-dbdir dbstruct run-id) + (let* ((areapath (dbr:dbstruct-areapath dbstruct))) + (conc areapath"/.megatest"))) + +(define (dbmod:run-id->full-dbfname dbstruct run-id) + (conc (dbmod:get-dbdir dbstruct run-id)"/"(dbmod:run-id->dbfname run-id))) + +;;====================================================================== +;; The inmem one-db file per server method goes in here +;;====================================================================== + +(define (dbmod:with-db dbstruct run-id r/w proc params) + (let* ((dbdat (dbmod:open-db dbstruct run-id (dbfile:db-init-proc))) + (dbh (dbr:dbdat-dbh dbdat)) + (dbfile (dbr:dbdat-dbfile dbdat))) + (apply proc dbdat dbh params))) + +(define (dbmod:open-inmem-db initproc) + (let* ((db (sqlite3:open-database ":memory:")) + (handler (sqlite3:make-busy-timeout 3600))) + (sqlite3:set-busy-handler! db handler) + (initproc db) + db)) + +(define (dbmod:open-db dbstruct run-id dbinit) + (or (dbr:dbstruct-dbdat dbstruct) + (let* ((dbdat (make-dbr:dbdat + dbfile: (dbr:dbstruct-dbfile dbstruct) + dbh: (dbr:dbstruct-inmem dbstruct) + ))) + (dbr:dbstruct-dbdat-set! dbstruct dbdat) + dbdat))) + +;; Open the inmem db and the on-disk db +;; populate the inmem db with data +;; +;; Updates fields in dbstruct +;; Returns dbstruct +;; +;; * This routine creates the db if not found +;; * Probably can get rid of the dbstruct-in +;; +(define (dbmod:open-dbmoddb areapath run-id init-proc keys #!key (dbstruct-in #f)) ;; (conc *toppath* "/megatest.db") (car *configinfo*))) + (let* ((dbstruct (or dbstruct-in (make-dbr:dbstruct areapath: areapath))) + (dbfname (dbmod:run-id->dbfname run-id)) + (dbpath (dbmod:get-dbdir dbstruct run-id)) ;; directory where all the .db files are kept + (dbfullname (dbmod:run-id->full-dbfname dbstruct run-id)) + (dbexists (file-exists? dbfullname)) + (inmem (dbmod:open-inmem-db init-proc)) + (write-access (file-write-access? dbpath)) + (db (dbfile:with-simple-file-lock + (conc dbfullname".lock") + (lambda () + (let* ((db (sqlite3:open-database dbfullname)) + (handler (sqlite3:make-busy-timeout 136000))) + (sqlite3:set-busy-handler! db handler) + (if write-access + (init-proc db)) + db)))) + (tables (db:sync-all-tables-list keys))) + (dbr:dbstruct-inmem-set! dbstruct inmem) + (dbr:dbstruct-ondiskdb-set! dbstruct db) + (dbr:dbstruct-dbfile-set! dbstruct dbfullname) + (dbr:dbstruct-syncback-proc-set! dbstruct (lambda (last-update) + (dbmod:sync-tables tables last-update inmem db))) + (dbmod:sync-tables tables #f db inmem) + (dbr:dbstruct-last-update-set! dbstruct (current-seconds)) ;; should this be offset back in time by one second? + dbstruct)) + +(define (dbmod:close-db dbstruct) + ;; do final sync to disk file + ;; (do-sync ...) + (sqlite3:finalize! (dbr:dbstruct-ondiskdb dbstruct))) + +;;====================================================================== +;; Sync db +;;====================================================================== + +(define (dbmod:calc-use-last-update has-last-update fields last-update) + (cond + ((and has-last-update + (member "last_update" fields)) + #t) ;; if given a number, just use it for all fields + ((number? last-update) #f) ;; if not matched first entry then ignore last-update for this table + ((and (pair? last-update) + (member (car last-update) ;; last-update field name + (map car fields))) + #t) + ((and last-update (not (pair? last-update)) (not (number? last-update))) + (debug:print 0 *default-log-port* "ERROR: parameter last-update for db:sync-tables must be a pair or a number, received: " last-update);; found in fields + #f) + (else + #f))) + +;; tbls is ( ("tablename" ( "field1" [#f|proc1] ) ( "field2" [#f|proc2] ) .... ) ) +;; dbs are sqlite3 db handles +;; +;; if last-update specified ("field-name" . time-in-seconds) +;; then sync only records where field-name >= time-in-seconds +;; IFF field-name exists +;; +;; Use (db:sync-all-tables-list keys) to get the tbls input +;; +(define (dbmod:sync-tables tbls last-update fromdb todb) + (let ((stmts (make-hash-table)) ;; table-field => stmt + (all-stmts '()) ;; ( ( stmt1 value1 ) ( stml2 value2 )) + (numrecs (make-hash-table)) + (start-time (current-milliseconds)) + (tot-count 0)) + (for-each ;; table + (lambda (tabledat) + (let* ((tablename (car tabledat)) + (fields (cdr tabledat)) + (has-last-update (member "last_update" fields)) + (use-last-update (dbmod:calc-use-last-update has-last-update fields last-update)) + (last-update-value (if use-last-update ;; no need to check for has-last-update - it is already accounted for + (if (number? last-update) + last-update + (cdr last-update)) + #f)) + (last-update-field (if use-last-update + (if (number? last-update) + "last_update" + (car last-update)) + #f)) + (num-fields (length fields)) + (field->num (make-hash-table)) + (num->field (apply vector (map car fields))) ;; BBHERE + (full-sel (conc "SELECT " (string-intersperse (map car fields) ",") + " FROM " tablename (if use-last-update ;; apply last-update criteria + (conc " WHERE " last-update-field " >= " last-update-value) + "") + ";")) + (full-ins (conc "INSERT OR REPLACE INTO " tablename " ( " (string-intersperse (map car fields) ",") " ) " + " VALUES ( " (string-intersperse (make-list num-fields "?") ",") " );")) + (fromdat '()) + (fromdats '()) + (totrecords 0) + (batch-len 100) ;; (string->number (or (configf:lookup *configdat* "sync" "batchsize") "100"))) + (todat (make-hash-table)) + (count 0) + (field-names (map car fields))) + + ;; set up the field->num table + (for-each + (lambda (field) + (hash-table-set! field->num field count) + (set! count (+ count 1))) + fields) + + ;; read the source table + ;; store a list of all rows in the table in fromdat, up to batch-len. + ;; Then add fromdat to the fromdats list, clear fromdat and repeat. + (sqlite3:for-each-row + (lambda (a . b) + (set! fromdat (cons (apply vector a b) fromdat)) + (if (> (length fromdat) batch-len) + (begin + (set! fromdats (cons fromdat fromdats)) + (set! fromdat '()) + (set! totrecords (+ totrecords 1))))) + fromdb + full-sel) + + ;; Count less than batch-len as a record + (if (> (length fromdat) 0) + (set! totrecords (+ totrecords 1))) + + ;; tack on remaining records in fromdat + (if (not (null? fromdat)) + (set! fromdats (cons fromdat fromdats))) + + (sqlite3:for-each-row + (lambda (a . b) + (hash-table-set! todat a (apply vector a b))) + todb + full-sel) + + ;; first pass implementation, just insert all changed rows + (let* ((db todb) + (drp-trigger (if (member "last_update" field-names) + (db:drop-trigger db tablename) + #f)) + (has-last-update (member "last_update" field-names)) + (is-trigger-dropped (if has-last-update + (db:is-trigger-dropped db tablename) + #f)) + (stmth (sqlite3:prepare db full-ins)) + (changed-rows 0)) + (for-each + (lambda (fromdat-lst) + (sqlite3:with-transaction + db + (lambda () + (for-each ;; + (lambda (fromrow) + (let* ((a (vector-ref fromrow 0)) + (curr (hash-table-ref/default todat a #f)) + (same #t)) + (let loop ((i 0)) + (if (or (not curr) + (not (equal? (vector-ref fromrow i)(vector-ref curr i)))) + (set! same #f)) + (if (and same + (< i (- num-fields 1))) + (loop (+ i 1)))) + (if (not same) + (begin + (apply sqlite3:execute stmth (vector->list fromrow)) + (hash-table-set! numrecs tablename (+ 1 (hash-table-ref/default numrecs tablename 0))) + (set! changed-rows (+ changed-rows 1)))))) + fromdat-lst)))) + fromdats) + + (sqlite3:finalize! stmth) + (if (member "last_update" field-names) + (db:create-trigger db tablename))))) + tbls) + (let* ((runtime (- (current-milliseconds) start-time)) + (should-print (or ;; (debug:debug-mode 12) + (common:low-noise-print 120 "db sync") + (> runtime 500)))) ;; low and high sync times treated as separate. + (for-each + (lambda (dat) + (let ((tblname (car dat)) + (count (cdr dat))) + (set! tot-count (+ tot-count count)))) + (sort (hash-table->alist numrecs)(lambda (a b)(> (cdr a)(cdr b)))))) + tot-count)) + +;;====================================================================== +;; Moved from dbfile +;;====================================================================== + ) Index: dcommon.scm ================================================================== --- dcommon.scm +++ dcommon.scm @@ -708,11 +708,13 @@ #:numlin-visible 5 )) (colnames (list "Id" "MTver" "Pid" "Host" "Interface:OutPort" "RunTime" "State" "RunId")) (updater (lambda () (if (dashboard:monitor-changed? commondat tabdat) - (let ((servers (server:choose-server *toppath* 'all-valid))) ;; (server:get-list *toppath* limit: 10))) + (let ((servers (case (rmt:transport-mode) + ((http)(server:choose-server *toppath* 'all-valid)) ;; (server:get-list *toppath* limit: 10))) + (else '())))) (iup:attribute-set! servers-matrix "NUMLIN" (length servers)) ;; (set! colnum 0) ;; (for-each (lambda (colname) ;; ;; (print "colnum: " colnum " colname: " colname) ;; (iup:attribute-set! servers-matrix (conc "0:" colnum) colname) Index: launch.scm ================================================================== --- launch.scm +++ launch.scm @@ -33,17 +33,19 @@ (declare (uses common)) (declare (uses commonmod)) (declare (uses configf)) (declare (uses db)) (declare (uses ezsteps)) +(declare (uses dbfile)) (include "common_records.scm") (include "key_records.scm") (include "db_records.scm") (include "megatest-fossil-hash.scm") -(import commonmod) +(import commonmod + dbfile) ;;====================================================================== ;; ezsteps ;;====================================================================== @@ -1143,11 +1145,14 @@ (setenv "MT_TESTSUITENAME" (common:get-testsuite-name))) (begin (debug:print-error 0 *default-log-port* "failed to find the top path to your Megatest area.") (set! *toppath* #f) ;; force it to be false so we return #f #f)) - + + ;; needed by various transport and db modules + (dbfile:testsuite-name (common:get-testsuite-name)) ;; (get-testsuite-name *toppath* *configdat*)) + ;; one more attempt to cache the configs for future reading (let* ((cachefiles (launch:get-cache-file-paths areapath toppath target mtconfig)) (mtcachef (car cachefiles)) (rccachef (cdr cachefiles))) @@ -1579,11 +1584,11 @@ ;; (list 'serverinf *server-info*) #;(list 'homehost (let* ((hhdat (server:get-homehost))) (if hhdat (car hhdat) #f))) - (list 'serverurl (if *runremote* + #;(list 'serverurl (if *runremote* ;; would like to add this back ... WORK NEEDED (remote-server-url *runremote*) #f)) ;; (list 'areaname (common:get-testsuite-name)) (list 'toppath *toppath*) (list 'work-area work-area) Index: margs.scm ================================================================== --- margs.scm +++ margs.scm @@ -23,10 +23,20 @@ (define (args:get-arg arg . default) (if (null? default) (hash-table-ref/default args:arg-hash arg #f) (hash-table-ref/default args:arg-hash arg (car default)))) + +;; get an arg as a number +(define (args:get-arg-number arg . default) + (let* ((val-str (args:get-arg arg)) + (val (if val-str (string->number val-str) #f))) + (if val + val + (if (null? default) + #f + default)))) (define (args:any? . args) (not (null? (filter (lambda (x) x) (map args:get-arg args))))) Index: megatest.scm ================================================================== --- megatest.scm +++ megatest.scm @@ -23,10 +23,18 @@ (define (toplevel-command . a) #f) (declare (uses common)) ;; (declare (uses megatest-version)) (declare (uses margs)) +(declare (uses mtargs)) +(declare (uses mtargs.import)) +(declare (uses commonmod)) +(declare (uses commonmod.import)) +(declare (uses mtargs)) +(declare (uses mtargs.import)) +(declare (uses debugprint)) +(declare (uses debugprint.import)) (declare (uses runs)) (declare (uses launch)) (declare (uses server)) (declare (uses client)) (declare (uses tests)) @@ -41,27 +49,29 @@ (declare (uses api)) (declare (uses tasks)) ;; only used for debugging. (declare (uses env)) (declare (uses diff-report)) (declare (uses db)) +(declare (uses dbfile)) +(declare (uses dbfile.import)) (declare (uses dbmod)) (declare (uses dbmod.import)) -(declare (uses commonmod)) -(declare (uses commonmod.import)) -(declare (uses dbfile)) -(declare (uses dbfile.import)) +(declare (uses tcp-transportmod)) +(declare (uses tcp-transportmod.import)) ;; (declare (uses debugprint)) ;; (declare (uses debugprint.import)) -;; (declare (uses mtargs)) -;; (declare (uses mtargs.import)) ;; (declare (uses ftail)) ;; (import ftail) -(import dbmod +(import mtargs + debugprint + dbmod commonmod - dbfile) + dbfile + tcp-transportmod + ) (define *db* #f) ;; this is only for the repl, do not use in general!!!! (include "common_records.scm") (include "key_records.scm") @@ -69,11 +79,11 @@ (include "run_records.scm") (include "megatest-fossil-hash.scm") (use (prefix sqlite3 sqlite3:) srfi-1 posix regex regex-case srfi-69 (prefix base64 base64:) readline apropos json http-client directory-utils typed-records - http-client srfi-18 extras format) + http-client srfi-18 extras format tcp-server tcp) ;; Added for csv stuff - will be removed ;; (use sparse-vectors) @@ -80,10 +90,14 @@ (require-library mutils) (define *usage-log-file* #f) ;; put path to file for logging usage in this var in the ~/.megatestrc file (define *usage-use-seconds* #t) ;; for Epoc seconds in usage logging change this to #t in ~/.megatestrc file +;; set some parameters here - these need to be put in something that can be loaded from other +;; executables such as dashboard and mtutil +;; +(include "transport-mode.scm") (dbfile:db-init-proc db:initialize-main-db) ;; load the ~/.megatestrc file, put (use trace)(trace-call-sites #t)(trace function-you-want-to-trace) in this file ;; (let ((debugcontrolf (conc (get-environment-variable "HOME") "/.megatestrc"))) @@ -373,10 +387,11 @@ "-load" ;; load and exectute a scheme file "-section" "-var" "-dumpmode" "-run-id" + "-db" "-ping" "-refdb2dat" "-o" "-log" "-sync-log" @@ -585,13 +600,12 @@ ;; where (launch:setup) returns #f? ;; (if (or (args:get-arg "-log")(args:get-arg "-server")) ;; redirect the log always when a server (handle-exceptions exn - (begin - (print "ERROR: Failed to switch to log output. " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn) - ) + (begin + (print "ERROR: Failed to switch to log output. " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn)) (let* ((tl (or (args:get-arg "-log")(launch:setup))) ;; run launch:setup if -server, ensure we do NOT run launch:setup if -log specified (logf (or (args:get-arg "-log") ;; use -log unless we are a server, then craft a logfile name (conc tl "/logs/server-" (current-process-id) "-" (get-host-name) ".log"))) (oup (open-logfile logf))) (if (not (args:get-arg "-log")) @@ -921,13 +935,24 @@ ;;====================================================================== ;; Server? Start up here. ;; (if (args:get-arg "-server") - (let ((tl (launch:setup))) - ;; (server:launch 0 'http) - (http-transport:launch) + (let* ((run-id (args:get-arg "-run-id")) + (dbfname (args:get-arg "-db")) + (tl (launch:setup)) + (keys (keys:config-get-fields *configdat*))) + (case (rmt:transport-mode) + ((http)(http-transport:launch)) + ((tcp) + (debug:print 0 *default-log-port* "INFO: Running using tcp method.") + (if run-id + (tt:start-server tl run-id dbfname api:tcp-dispatch-request-make-handler keys) + (begin + (debug:print 0 *default-log-port* "ERROR: transport mode is tcp - -run-id is required.") + (exit 1)))) + (else (debug:print 0 *default-log-port* "ERROR: rmt:transport-mode value not recognised "(rmt:transport-mode)))) (set! *didsomething* #t))) ;; The adjutant is a bit different, it does NOT run (launch:setup) as it is not necessarily tied to ;; a specific Megatest area. Detail are being hashed out and this may change. ;; @@ -2370,11 +2395,13 @@ (if (or (getenv "MT_RUNSCRIPT") (args:get-arg "-repl") (args:get-arg "-load")) (let* ((toppath (launch:setup)) (dbstructs (if (and toppath - (server:choose-server toppath 'home?)) + ;; NOTE: server:choose-server is starting a server + ;; either add equivalent for tcp mode or ???? + #;(server:choose-server toppath 'home?)) (db:setup #t) #f))) ;; make-dbr:dbstruct path: toppath local: (args:get-arg "-local")) #f))) (if *toppath* (cond ((getenv "MT_RUNSCRIPT") Index: mtargs/mtargs.scm ================================================================== --- mtargs/mtargs.scm +++ mtargs/mtargs.scm @@ -18,16 +18,17 @@ (module mtargs ( arg-hash get-arg + get-arg-number get-arg-from get-args usage print-args any-defined? - ) + ) (import scheme) ;; gives us cond-expand in chicken-4 (cond-expand (chicken-5 @@ -42,10 +43,20 @@ (define (get-arg arg . default) (if (null? default) (hash-table-ref/default arg-hash arg #f) (hash-table-ref/default arg-hash arg (car default)))) + +;; get an arg as a number +(define (get-arg-number arg . default) + (let* ((val-str (get-arg arg)) + (val (if val-str (string->number val-str) #f))) + (if val + val + (if (null? default) + #f + default)))) (define (any-defined? . args) (not (null? (filter (lambda (x) x) (map get-arg args))))) Index: rmt.scm ================================================================== --- rmt.scm +++ rmt.scm @@ -21,16 +21,25 @@ (use format typed-records) ;; RADT => purpose of json format?? (declare (unit rmt)) (declare (uses api)) (declare (uses http-transport)) +(declare (uses commonmod)) (declare (uses dbfile)) +(declare (uses dbmemmod)) +(declare (uses tcp-transportmod)) (include "common_records.scm") ;; (declare (uses rmtmod)) +;; used by http-transport (import dbfile) ;; rmtmod) +(import commonmod + dbmemmod + tcp-transportmod) + +(define rmt:transport-mode (make-parameter 'http)) ;; ;; THESE ARE ALL CALLED ON THE CLIENT SIDE!!! ;; ;; generate entries for ~/.megatestrc with the following @@ -61,42 +70,39 @@ (cdr hh-dat) (begin (debug:print-info 0 *default-log-port* "hh-dat="hh-dat) #f)))) +(define (make-and-init-remote areapath) + (case (rmt:transport-mode) + ((http)(make-remote)) + ((tcp) (tt:make-remote areapath)) + (else #f))) ;;====================================================================== (define *send-receive-mutex* (make-mutex)) ;; should have separate mutex per run-id ;; RA => e.g. usage (rmt:send-receive 'get-var #f (list varname)) ;; (define (rmt:send-receive cmd rid params #!key (attemptnum 1)(area-dat #f)) ;; start attemptnum at 1 so the modulo below works as expected - #;(common:telemetry-log (conc "rmt:"(->string cmd)) - payload: `((rid . ,rid) - (params . ,params))) - (if (> attemptnum 2) (debug:print 0 *default-log-port* "INFO: attemptnum in rmt:send-receive is " attemptnum)) - + (cond ((> attemptnum 2) (thread-sleep! 0.05)) ((> attemptnum 10) (thread-sleep! 0.5)) ((> attemptnum 20) (thread-sleep! 1))) + + ;; I'm turning this off, it may make sense to move it + ;; into http-transport-handler (if (and (> attemptnum 5) (= 0 (modulo attemptnum 15))) - (begin (server:run *toppath*) (thread-sleep! 3))) - - - ;;DOT digraph megatest_state_status { - ;;DOT ranksep=0; - ;;DOT // rankdir=LR; - ;;DOT node [shape="box"]; - ;;DOT "rmt:send-receive" -> MUTEXLOCK; - ;;DOT { edge [style=invis];"case 1" -> "case 2" -> "case 3" -> "case 4" -> "case 5" -> "case 6" -> "case 7" -> "case 8" -> "case 9" -> "case 10" -> "case 11"; } - ;; do all the prep locked under the rmt-mutex - (mutex-lock! *rmt-mutex*) + (begin + (debug:print 0 *default-log-port* "ERROR: can't connect to server, trying to start a server.") + (server:run *toppath*) + (thread-sleep! 3))) ;; 1. check if server is started IFF cmd is a write OR if we are not on the homehost, store in runremote ;; 2. check the age of the connections. refresh the connection if it is older than timeout-20 seconds. ;; 3. do the query, if on homehost use local access ;; @@ -103,79 +109,69 @@ (let* ((start-time (current-seconds)) ;; snapshot time so all use cases get same value (areapath *toppath*);; TODO - resolve from dbstruct to be compatible with multiple areas (runremote (or area-dat *runremote*)) (attemptnum (+ 1 attemptnum)) - (readonly-mode (rmtmod:calc-ro-mode runremote *toppath*))) - - ;; DOT INIT_RUNREMOTE; // leaving off - doesn't really add to the clarity - ;; DOT MUTEXLOCK -> INIT_RUNREMOTE [label="no remote?"]; - ;; DOT INIT_RUNREMOTE -> MUTEXLOCK; - ;; ensure we have a record for our connection for given area - (if (not runremote) ;; can remove this one. should never get here. - (begin - (set! *runremote* (make-remote)) - (let* ((server-info (remote-server-info *runremote*))) - (if server-info - (begin - (remote-server-url-set! *runremote* (server:record->url server-info)) - (remote-server-id-set! *runremote* (server:record->id server-info))))) - (set! runremote *runremote*))) ;; new runremote will come from this on next iteration - - (http-transport-handler runremote cmd rid params attemptnum area-dat areapath readonly-mode))) - + (readonly-mode (rmtmod:calc-ro-mode runremote *toppath*)) + (testsuite (common:get-testsuite-name)) + (mtexe (common:find-local-megatest))) + + (case (rmt:transport-mode) + ((http)(http-transport-handler runremote cmd rid params attemptnum area-dat areapath readonly-mode)) + ((tcp) (tcp-transport-handler runremote cmd rid params attemptnum area-dat areapath readonly-mode testsuite mtexe))))) + +(define (tcp-transport-handler runremote cmd rid params attemptnum area-dat areapath readonly-mode testsuite mtexe) + (if (not runremote) + (let* ((newremote (make-and-init-remote areapath))) + (set! *runremote* newremote) + (set! runremote newremote))) + (let* ((dbfname (conc (dbfile:run-id->dbnum rid)".db"))) ;;(dbfile:run-id->path areapath run-id))) + (tt:handler runremote cmd rid params attemptnum area-dat areapath readonly-mode dbfname testsuite mtexe))) + (define (http-transport-handler runremote cmd rid params attemptnum area-dat areapath readonly-mode) - ;; DOT SET_HOMEHOST; // leaving off - doesn't really add to the clarity - ;; DOT MUTEXLOCK -> SET_HOMEHOST [label="no homehost?"]; - ;; DOT SET_HOMEHOST -> MUTEXLOCK; + ;; do all the prep locked under the rmt-mutex + (mutex-lock! *rmt-mutex*) + + ;; ensure we have a record for our connection for given area + (if (not runremote) ;; can remove this one. should never get here. + (begin + (set! *runremote* (make-and-init-remote areapath)) + (let* ((server-info (remote-server-info *runremote*))) + (if server-info + (begin + (remote-server-url-set! *runremote* (server:record->url server-info)) + (remote-server-id-set! *runremote* (server:record->id server-info))))) + (set! runremote *runremote*))) ;; new runremote will come from this on next iteration + ;; ensure we have a homehost record (if (or (not (pair? (remote-hh-dat runremote))) ;; not on homehost (not (cdr (remote-hh-dat runremote)))) ;; not on homehost (thread-sleep! 0.1) ;; since we shouldn't get here, delay a little (let ((hh-data (server:choose-server areapath 'homehost))) (remote-hh-dat-set! runremote (or hh-data (cons #f #f))))) - ;;(print "BB> readonly-mode is "readonly-mode" dbfile is "dbfile) (cond - #;((> (- (current-seconds)(remote-connect-time runremote)) 180) ;; reconnect to server every 180 seconds - (debug:print 0 *default-log-port* "Forcing reconnect to server(s) due to 180 second timeout.") - (set! *runremote* #f) - ;; BUG: close-connections should go here? - (mutex-unlock! *rmt-mutex*) - (rmt:send-receive cmd rid params attemptnum: 1 area-dat: area-dat)) - - ;;DOT EXIT; - ;;DOT MUTEXLOCK -> EXIT [label="> 15 attempts"]; {rank=same "case 1" "EXIT" } ;; give up if more than 150 attempts ((> attemptnum 150) (debug:print 0 *default-log-port* "ERROR: 150 tries to start/connect to server. Giving up.") (exit 1)) - ;;DOT CASE2 [label="local\nreadonly\nquery"]; - ;;DOT MUTEXLOCK -> CASE2; {rank=same "case 2" CASE2} - ;;DOT CASE2 -> "rmt:open-qry-close-locally"; ;; readonly mode, read request- handle it - case 2 ((and readonly-mode (member cmd api:read-only-queries)) (mutex-unlock! *rmt-mutex*) (debug:print-info 12 *default-log-port* "rmt:send-receive, case 2") (rmt:open-qry-close-locally cmd 0 params) ) - ;;DOT CASE3 [label="write in\nread-only mode"]; - ;;DOT MUTEXLOCK -> CASE3 [label="readonly\nmode?"]; {rank=same "case 3" CASE3} - ;;DOT CASE3 -> "#f"; ;; readonly mode, write request. Do nothing, return #f (readonly-mode (extras-readonly-mode *rmt-mutex* *default-log-port* cmd params)) ;; This block was for pre-emptively resetting the connection if there had been no communication for some time. ;; I don't think it adds any value. If the server is not there, just fail and start a new connection. ;; also, the expire-time calculation might not be correct. We want, time-since-last-server-access > (server:get-timeout) ;; - ;;DOT CASE4 [label="reset\nconnection"]; - ;;DOT MUTEXLOCK -> CASE4 [label="have connection,\nlast_access > expire_time"]; {rank=same "case 4" CASE4} - ;;DOT CASE4 -> "rmt:send-receive"; ;; reset the connection if it has been unused too long ((and runremote (remote-api-url runremote) (> (current-seconds) ;; if it has been more than server-timeout seconds since last contact, close this connection and start a new on (+ (remote-last-access runremote) @@ -185,61 +181,27 @@ ;; moving this setting of runremote conndat to #f to inside the http-transport:close-connections ;; (remote-conndat-set! runremote #f) ;; invalidate the connection, thus forcing a new connection. (mutex-unlock! *rmt-mutex*) (rmt:send-receive cmd rid params attemptnum: attemptnum)) - ;;DOT CASE5 [label="local\nread"]; - ;;DOT MUTEXLOCK -> CASE5 [label="server not required,\non homehost,\nread-only query"]; {rank=same "case 5" CASE5}; - ;;DOT CASE5 -> "rmt:open-qry-close-locally"; - ;; on homehost and this is a read ((and (not (remote-force-server runremote)) ;; honor forced use of server, i.e. server NOT required (rmt:on-homehost? runremote) (member cmd api:read-only-queries)) ;; this is a read (mutex-unlock! *rmt-mutex*) (debug:print-info 12 *default-log-port* "rmt:send-receive, case 5") (rmt:open-qry-close-locally cmd 0 params)) - ;;DOT CASE6 [label="init\nremote"]; - ;;DOT MUTEXLOCK -> CASE6 [label="on homehost,\nwrite query,\nhave server,\ncan't reach it"]; {rank=same "case 6" CASE6}; - ;;DOT CASE6 -> "rmt:send-receive"; - ;; on homehost and this is a write, we already have a server, but server has died - - ;; reinstate this keep-alive section but inject a time condition into the (add ... - ;; - ;; ((and (cdr (remote-hh-dat runremote)) ;; on homehost - ;; (not (member cmd api:read-only-queries)) ;; this is a write - ;; (remote-server-url runremote) ;; have a server - ;; (not (server:ping (remote-server-url runremote) (remote-server-id runremote)))) ;; server has died. NOTE: this is not a cheap call! Need better approach. - ;; (debug:print 0 *default-log-port* "WARNING: server appears to have died, trying to reconnect, case 6") - ;; (http-transport:close-connections area-dat: runremote) ;; make sure to clean up - ;; (set! *runremote* (make-remote)) - ;; (let* ((server-info (remote-server-info *runremote*))) - ;; (if server-info - ;; (begin - ;; (remote-server-url-set! *runremote* (server:record->url server-info)) - ;; (remote-server-id-set! *runremote* (server:record->id server-info))))) - ;; (remote-force-server-set! runremote (common:force-server?)) - ;; (mutex-unlock! *rmt-mutex*) - ;; (debug:print-info 12 *default-log-port* "rmt:send-receive, case 6") - ;; (rmt:send-receive cmd rid params attemptnum: attemptnum)) - - ;;DOT CASE7 [label="homehost\nwrite"]; - ;;DOT MUTEXLOCK -> CASE7 [label="server not required,\non homehost,\na write,\nhave a server"]; {rank=same "case 7" CASE7}; - ;;DOT CASE7 -> "rmt:open-qry-close-locally"; ;; on homehost and this is a write, we already have a server ((and (not (remote-force-server runremote)) ;; honor forced use of server, i.e. server NOT required (cdr (remote-hh-dat runremote)) ;; on homehost (not (member cmd api:read-only-queries)) ;; this is a write (remote-server-url runremote)) ;; have a server (needed to sync written data back) (mutex-unlock! *rmt-mutex*) (debug:print-info 12 *default-log-port* "rmt:send-receive, case 4.1") (rmt:open-qry-close-locally cmd 0 params)) - ;;DOT CASE8 [label="force\nserver"]; - ;;DOT MUTEXLOCK -> CASE8 [label="server not required,\nhave homehost info,\nno connection yet,\nnot a read-only query"]; {rank=same "case 8" CASE8}; - ;;DOT CASE8 -> "rmt:open-qry-close-locally"; ;; on homehost, no server contact made and this is a write, passively start a server ((and (not (remote-force-server runremote)) ;; honor forced use of server, i.e. server NOT required (cdr (remote-hh-dat runremote)) ;; have homehost (not (remote-server-url runremote)) ;; no connection yet (not (member cmd api:read-only-queries))) ;; not a read-only query @@ -286,11 +248,10 @@ ;;DOT MUTEXLOCK -> CASE11 [label="else"]; {rank=same "case 11" CASE11}; ;;DOT CASE11 -> "rmt:send-receive" [label="call failed"]; ;;DOT CASE11 -> "RESULT" [label="call succeeded"]; ;; not on homehost, do server query (else (extras-case-11 *default-log-port* runremote cmd params attemptnum rid)))) -;;DOT } ;; bunch of small functions factored out of send-receive to make debug easier ;; (define (extras-case-11 *default-log-port* runremote cmd params attemptnum rid) @@ -1021,23 +982,36 @@ (rmt:send-receive 'test-set-archive-block-id run-id (list run-id test-id archive-block-id))) (define (rmt:test-get-archive-block-info archive-block-id) (rmt:send-receive 'test-get-archive-block-info #f (list archive-block-id))) - (define (rmtmod:calc-ro-mode runremote *toppath*) - (if (and runremote - (remote-ro-mode-checked runremote)) - (remote-ro-mode runremote) - (let* ((mtcfgfile (conc *toppath* "/megatest.config")) - (ro-mode (not (file-write-access? mtcfgfile)))) ;; TODO: use dbstruct or runremote to figure this out in future - (if runremote - (begin - (remote-ro-mode-set! runremote ro-mode) - (remote-ro-mode-checked-set! runremote #t) - ro-mode) - ro-mode)))) + (case (rmt:transport-mode) + ((http) + (if (and runremote + (remote-ro-mode-checked runremote)) + (remote-ro-mode runremote) + (let* ((mtcfgfile (conc *toppath* "/megatest.config")) + (ro-mode (not (file-write-access? mtcfgfile)))) ;; TODO: use dbstruct or runremote to figure this out in future + (if runremote + (begin + (remote-ro-mode-set! runremote ro-mode) + (remote-ro-mode-checked-set! runremote #t) + ro-mode) + ro-mode)))) + ((tcp) + (if (and runremote + (tt-ro-mode-checked runremote)) + (tt-ro-mode runremote) + (let* ((mtcfgfile (conc *toppath* "/megatest.config")) + (ro-mode (not (file-write-access? mtcfgfile)))) ;; TODO: use dbstruct or runremote to figure this out in future + (if runremote + (begin + (tt-ro-mode-set! runremote ro-mode) + (tt-ro-mode-checked-set! runremote #t) + ro-mode) + ro-mode)))))) (define (extras-readonly-mode rmt-mutex log-port cmd params) (mutex-unlock! rmt-mutex) (debug:print-info 12 log-port "rmt:send-receive, case 3") (debug:print 0 log-port "WARNING: write transaction requested on a readonly area. cmd="cmd" params="params) Index: runs.scm ================================================================== --- runs.scm +++ runs.scm @@ -22,10 +22,11 @@ sxml-modifications matchable) (declare (unit runs)) (declare (uses db)) (declare (uses common)) +(declare (uses commonmod)) (declare (uses items)) (declare (uses runconfig)) (declare (uses tests)) (declare (uses server)) (declare (uses mt)) @@ -37,10 +38,12 @@ (include "db_records.scm") (include "run_records.scm") (include "test_records.scm") ;; (include "debugger.scm") + +(import commonmod) ;; use this struct to facilitate refactoring ;; (defstruct runs:dat Index: server.scm ================================================================== --- server.scm +++ server.scm @@ -101,24 +101,24 @@ (if *server-id* *server-id* (let ((sig (server:mk-signature))) ;; clients re-use the server:mk-signature logic (set! *server-id* sig) *server-id*))) -;; When using zmq this would send the message back (two step process) -;; with spiffy or rpc this simply returns the return data to be returned -;; -(define (server:reply return-addr query-sig success/fail result) - (debug:print-info 11 *default-log-port* "server:reply return-addr=" return-addr ", result=" result) - ;; (send-message pubsock target send-more: #t) - ;; (send-message pubsock - (case (server:get-transport) - ((rpc) (db:obj->string (vector success/fail query-sig result))) - ((http) (db:obj->string (vector success/fail query-sig result))) - ((fs) result) - (else - (debug:print-error 0 *default-log-port* "unrecognised transport type: " *transport-type*) - result))) +;; ;; When using zmq this would send the message back (two step process) +;; ;; with spiffy or rpc this simply returns the return data to be returned +;; ;; +;; (define (server:reply return-addr query-sig success/fail result) +;; (debug:print-info 11 *default-log-port* "server:reply return-addr=" return-addr ", result=" result) +;; ;; (send-message pubsock target send-more: #t) +;; ;; (send-message pubsock +;; (case (server:get-transport) +;; ((rpc) (db:obj->string (vector success/fail query-sig result))) +;; ((http) (db:obj->string (vector success/fail query-sig result))) +;; ((fs) result) +;; (else +;; (debug:print-error 0 *default-log-port* "unrecognised transport type: " *transport-type*) +;; result))) ;; Given an area path, start a server process ### NOTE ### > file 2>&1 ;; if the target-host is set ;; try running on that host ;; incidental: rotate logs in logs/ dir. @@ -676,11 +676,11 @@ (else #f)))) (cond ((and (list? host-port) (eq? (length host-port) 2)) - (let* ((myrunremote (make-remote)) + (let* ((myrunremote (make-and-init-remote *toppath*)) (iface (car host-port)) (port (cadr host-port)) (server-dat (client:connect iface port server-id myrunremote)) (login-res (rmt:login-no-auto-client-setup myrunremote))) (http-transport:close-connections myrunremote) ADDED tcp-transportmod.scm Index: tcp-transportmod.scm ================================================================== --- /dev/null +++ tcp-transportmod.scm @@ -0,0 +1,586 @@ +;;====================================================================== +;; Copyright 2017, Matthew Welland. +;; +;; This file is part of Megatest. +;; +;; Megatest is free software: you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. +;; +;; Megatest is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with Megatest. If not, see . + +;;====================================================================== + +(declare (unit tcp-transportmod)) +(declare (uses debugprint)) +(declare (uses commonmod)) +(declare (uses dbfile)) +(declare (uses dbmod)) + +(use address-info) + +(module tcp-transportmod + * + + (import scheme + (prefix sqlite3 sqlite3:) + chicken + data-structures + + address-info + directory-utils + extras + files + hostinfo + matchable + md5 + message-digest + ports + posix + regex + regex-case + s11n + srfi-1 + srfi-18 + srfi-4 + srfi-69 + stack + typed-records + tcp-server + tcp + + debugprint + commonmod + dbfile + dbmod + ) + +;;====================================================================== +;; client +;;====================================================================== + +;; (define keep-age-param (make-parameter 10)) ;; qif file age, if over move to attic + +;; Used ONLY for client +;; +(defstruct tt-conn + host + port + host-port + dbfname + server-id + server-start + pid +) + +;; Used for BOTH clients and servers +(defstruct tt + ;; client related + (conns (make-hash-table)) ;; dbfname -> conn + + ;; server related + (areapath #f) + (host #f) + (port #f) + (conn #f) + (cleanup-proc #f) + (handler #f) ;; receives data and responds + (socket #f) + (thread #f) + (host-port #f) + (cmd-thread #f) + (ro-mode #f) + (ro-mode-checked #f) + (last-access (current-seconds)) + ) + +(define (tt:make-remote areapath) + (make-tt areapath: areapath)) + +;; do all the busy work of finding and setting up conn for +;; connecting to a server +;; +(define (tt:client-connect-to-server ttdat dbfname run-id ) + (let* ((conn (hash-table-ref/default (tt-conns ttdat) dbfname #f)) + (server-start-proc (lambda () + (tt:server-process-run + (tt-areapath ttdat) + (dbfile:testsuite-name) + (common:find-local-megatest) + run-id)))) + (if conn + conn ;; we are already connected to the server + (let* ((sdat (tt:get-current-server-info ttdat dbfname run-id))) + (match sdat + ((host port start-time server-id pid dbfname2) + (assert (equal? dbfname dbfname2) "FATAL: read server info from wrong file.") + (let* ((host-port (conc host":"port)) + (conn (make-tt-conn + host: host + port: port + host-port: host-port + dbfname: dbfname + server-id: server-id + server-start: start-time + pid: pid))) + (hash-table-set! (tt-conns ttdat) dbfname conn) + ;; verify we can talk to this server + (if (tt:ping host port server-id) + conn + (begin + ;; rm the (last server) would go here + (server-start-proc) + (thread-sleep! 1) + (tt:client-connect-to-server ttdat dbfname run-id))))) + (else + (server-start-proc) + (thread-sleep! 1) + (tt:client-connect-to-server ttdat dbfname run-id))))))) + +(define (tt:ping host port server-id) + (let* ((res (tt:send-receive-direct host port `(ping #f #f #f)))) ;; please send me your server-id + ;; + ;; need two threads, one a 5 second timer + ;; + (match res + ((status errmsg result meta) + (if (equal? result server-id) + #t ;; then we are good + (begin + (debug:print 0 *default-log-port* "WARNING: server-id does not match, expected: "server-id", got: "result) + #f))) + (else + (debug:print 0 *default-log-port* "res not in form (status errmsg resutl meta), got: "res) + #f)))) + +;; client side handler +;; +(define (tt:handler ttdat cmd run-id params attemptnum area-dat areapath readonly-mode dbfname testsuite mtexe) + ;; NOTE: areapath is passed in and in tt struct. We'll use passed in value for now. + (let* ((conn (tt:client-connect-to-server ttdat dbfname run-id))) ;; (hash-table-ref/default (tt-conns ttdat) dbfname #f))) + (if conn + ;; have connection, call the server + (let* ((res (tt:send-receive ttdat conn cmd run-id params))) + ;; res is (status errmsg result meta) + (match res + ((status errmsg result meta) + (case status + ((busy) + (debug:print 0 *default-log-port* "WARNING: server is overloaded, will try again in few seconds.") + (thread-sleep! 2) + (tt:handler ttdat cmd run-id params attemptnum area-dat areapath readonly-mode dbfname testsuite mtexe)) + ((loaded) + (debug:print 0 *default-log-port* "WARNING: server is loaded, will try again in a second.") + (thread-sleep! 1) + (tt:handler ttdat cmd run-id params attemptnum area-dat areapath readonly-mode dbfname testsuite mtexe)) + (else + result))) + (else + (if (not res) + (begin ;; server likely died + (hash-table-set! (tt-conns ttdat) dbfname #f) + (debug:print 0 *default-log-port* "INFO: connection to server broken, reconnecting.") + (tt:handler ttdat cmd run-id params attemptnum area-dat areapath readonly-mode dbfname testsuite mtexe)) + (assert #f "FATAL: tt:handler received bad data "res))))) + (begin + (thread-sleep! 1) ;; give it a rest and try again + (tt:handler ttdat cmd run-id params attemptnum area-dat areapath readonly-mode dbfname testsuite mtexe))))) + + ;; no conn yet, find and or start and find a server +;; (let* ((server (tt:find-server ttdat dbfname))) +;; (if server +;; (let* ((conn (tt:client-connect-to-server server))) +;; (hash-table-set! (tt-conns ttdat) dbfname conn) +;; (tt:handler ttdat cmd run-id params attemptnum area-dat areapath readonly-mode +;; dbfname testsuite mtexe)) +;; ;; no server, try to start a server process +;; (begin +;; (tt:server-process-run areapath testsuite mtexe run-id) ;; #!key (profile-mode "")) +;; (thread-sleep! 1) +;; (tt:handler ttdat cmd run-id params attemptnum area-dat areapath +;; readonly-mode dbfname testsuite mtexe))))))) + +(define (tt:bid-for-servership run-id) + #f) + +(define (tt:get-current-server-info ttdat dbfname run-id) + (assert (tt-areapath ttdat) "FATAL: areapath not set in ttdat.") + (let* ((areapath (tt-areapath ttdat)) + (sfiles (tt:find-server areapath dbfname)) + (sdats (filter car (map tt:server-get-info sfiles))) ;; first element is #f if the file disappeared while being read + (sorted (sort sdats (lambda (a b) + (< (list-ref a 2)(list-ref b 2)))))) + (if (null? sorted) + #f ;; we'll want to wait until extra servers have exited + (car sorted)))) + +(define (tt:send-receive ttdat conn cmd run-id params) + (let* ((host-port (tt-conn-host-port conn)) ;; (conc (tt-conn-host conn)":"(tt-conn-port conn))) + (host (tt-conn-host conn)) + (port (tt-conn-port conn)) + (dat (list cmd run-id params #f))) ;; no meta data yet + (tt:send-receive-direct host port dat))) + +(define (tt:send-receive-direct host port dat) + (assert (number? port) "FATAL: tt:send-receive-direct called with port not a number "port) + (handle-exceptions + exn + #f ;; Add condition-case or better handling here + (let-values (((inp oup)(tcp-connect host port))) + (let ((res (if (and inp oup) + (begin + (serialize dat oup) + (close-output-port oup) + (deserialize inp)) + ))) + (close-input-port inp) + res)))) + + + +;;====================================================================== +;; server +;;====================================================================== + +(define (tt:sync-dbs ttdat) + #f) + +;; start the listener and start responding to requests +;; +;; NOTE: organise by dbfname, not run-id so we don't need +;; to pull in more modules +;; +;; This is the routine called in megatest.scm to start a server +;; +(define (tt:start-server areapath run-id dbfname handler keys) + (assert areapath "FATAL: areapath not provided for tt:start-server") + ;; is there already a server for this dbfile? Then exit. + (let* ((ttdat (make-tt areapath: areapath)) + (servers (tt:find-server areapath dbfname))) ;; should use tt:get-current-server-info instead + (if (null? servers) + (let* ((dbstruct (dbmod:open-dbmoddb areapath run-id (dbfile:db-init-proc) keys))) + (tt-handler-set! ttdat (handler dbstruct)) + (let* ((tcp-thread (make-thread + (lambda () + (tt:start-tcp-server ttdat)) ;; start the tcp-server which applies handler to incoming data + "tcp-server-thread")) + (run-thread (make-thread + (lambda () + (tt:keep-running ttdat dbfname dbstruct))))) + (thread-start! tcp-thread) + (thread-start! run-thread) + (thread-join! run-thread) ;; run thread will exit on timeout or other conditions + ;; + ;; set a flag here to tell tcp-thread to stop running + ;; + ;; (thread-join! tcp-thread) ;; can't wait + ;; + ;; remove the servinfo file + ;; + ;; close the database, remove lock in on-disk db + ;; + ;; close the listener ports + ;; + (exit))) + (begin + (debug:print 0 *default-log-port* "INFO: found server(s) already running for db "dbfname", "(string-intersperse servers ",")" Exiting.") + (exit))))) + +(define (tt:keep-running ttdat dbfname dbstruct) + ;; verfiy conn for ready + ;; listener socket has been started by this stage + (thread-sleep! 1) + (let loop ((count 0)) + (if (> count 60) + (begin + (debug:print 0 *default-log-port* "FATAL: Could not start a tcp server, giving up.") + (exit 1)) + (if (not (tt-port ttdat)) ;; no connection yet + (let* ((last-update (dbr:dbstruct-last-update dbstruct)) + (curr-secs (current-seconds))) + (if (> (- curr-secs last-update) 3) ;; every 3-4 seconds + (begin + ((dbr:dbstruct-syncback-proc) last-update) + (dbr:dbstruct-last-update-set! curr-secs))) + (thread-sleep! 1) + (loop (+ count 1)))))) + + (tt:create-server-registration-file ttdat dbfname) + ;; now start watching the last-access, if it hasn't been touched + ;; in over ten seconds we exit + (let loop () + (if (< (- (current-seconds) (tt-last-access ttdat)) 60) + (begin + (thread-sleep! 2) + (loop)))) + (if (tt-cleanup-proc ttdat) + ((tt-cleanup-proc ttdat))) + (debug:print 0 *default-log-port* "INFO: Server timed out, exiting.")) + +;; ;; given an already set up uconn start the cmd-loop +;; ;; +;; (define (tt:cmd-loop ttdat) +;; (let* ((serv-listener (-socket uconn)) +;; (listener (lambda () +;; (let loop ((state 'start)) +;; (let-values (((inp oup)(tcp-accept serv-listener))) +;; ;; (mutex-lock! *send-mutex*) ;; DOESN'T SEEM TO HELP +;; (let* ((rdat (deserialize inp)) ;; '(my-host-port qrykey cmd params) +;; (resp (ulex-handler uconn rdat))) +;; (serialize resp oup) +;; (close-input-port inp) +;; (close-output-port oup) +;; ;; (mutex-unlock! *send-mutex*) ;; DOESN'T SEEM TO HELP +;; ) +;; (loop state)))))) +;; ;; start N of them +;; (let loop ((thnum 0) +;; (threads '())) +;; (if (< thnum 100) +;; (let* ((th (make-thread listener (conc "listener" thnum)))) +;; (thread-start! th) +;; (loop (+ thnum 1) +;; (cons th threads))) +;; (map thread-join! threads))))) +;; +;; +;; +;; (define (wait-and-close uconn) +;; (thread-join! (udat-cmd-thread uconn)) +;; (tcp-close (udat-socket uconn))) +;; +;; + +(define (tt:shutdown-server ttdat) + (let* ((cleanproc (tt-cleanup-proc ttdat))) + (if cleanproc (cleanproc)) + (tcp-close (tt-socket ttdat)) ;; close up ports here + )) + +;; (define (wait-and-close uconn) +;; (thread-join! (tt-cmd-thread uconn)) +;; (tcp-close (tt-socket uconn))) + +;; return servid +;; side-effects: +;; ttdat-cleanup-proc is populated with function to remove the serverinfo file +(define (tt:create-server-registration-file ttdat dbfname) + (let* ((areapath (tt-areapath ttdat)) + (servdir (tt:get-servinfo-dir areapath)) + (host (tt-host ttdat)) + (port (tt-port ttdat)) + (servinf (conc servdir"/"host":"port"-"(current-process-id)":"dbfname)) + (serv-id (tt:mk-signature areapath)) + (clean-proc (lambda () + (delete-file* servinf)))) + (assert (and host port) "FATAL: tt:create-server-registration-file called with no conn, dbfname="dbfname) + (tt-cleanup-proc-set! ttdat clean-proc) + (with-output-to-file servinf + (lambda () + (print "SERVER STARTED: "host":"port" AT "(current-seconds)" server-id: "serv-id" pid: "(current-process-id)" dbfname: "dbfname))) + serv-id)) + +;; find valid server +;; get servers listed, last part of name must match : +;; if more than one, wait one second and look again +;; future: ping oldest, if alive remove other : files +;; +(define (tt:find-server areapath dbfname) + (let* ((servdir (tt:get-servinfo-dir areapath)) + (sfiles (glob (conc servdir"/*:"dbfname)))) + sfiles)) + +;; given a path to a server info file return: host port startseconds server-id +;; example of what it's looking for in the log file: +;; SERVER STARTED: 10.38.175.67:50216 AT 1616502350.0 server-id: 4907e90fc55c7a09694e3f658c639cf4 +;; +(define (tt:server-get-info logf) + (let ((server-rx (regexp "^SERVER STARTED: (\\S+):(\\d+) AT ([\\d\\.]+) server-id: (\\S+) pid: (\\d+) dbfname: (\\S+)")) ;; SERVER STARTED: host:port AT timesecs server id + (dbprep-rx (regexp "^SERVER: dbprep")) + (dbprep-found 0) + (bad-dat (list #f #f #f #f #f #f))) + (handle-exceptions + exn + (begin + ;; WARNING: this is potentially dangerous to blanket ignore the errors + (if (file-exists? logf) + (debug:print-info 2 *default-log-port* "Unable to get server info from "logf", exn=" exn)) + bad-dat) ;; no idea what went wrong, call it a bad server + (with-input-from-file + logf + (lambda () + (let loop ((inl (read-line)) + (lnum 0)) + (if (not (eof-object? inl)) + (let ((mlst (string-match server-rx inl)) + (dbprep (string-match dbprep-rx inl))) + (if dbprep (set! dbprep-found 1)) + (if (not mlst) + (if (< lnum 500) ;; give up if more than 500 lines of server log read + (loop (read-line)(+ lnum 1)) + (begin + (debug:print-info 0 *default-log-port* "Unable to get server info from first 500 lines of " logf ) + bad-dat)) + (match mlst + ((_ host port start server-id pid dbfname) + (list host + (string->number port) + (string->number start) + server-id + (string->number pid) + dbfname)) + (else + (debug:print 0 *default-log-port* "ERROR: did not recognise SERVER line info "mlst) + bad-dat)))) + (begin + (if dbprep-found + (begin + (debug:print-info 2 *default-log-port* "Server is in dbprep at " (common:human-time)) + (thread-sleep! 0.5)) ;; was 25 sec but that blocked things from starting? + (debug:print-info 0 *default-log-port* "Unable to get server info from " logf " at " (seconds->time-string (current-seconds)))) + bad-dat)))))))) + +;; Given an area path, start a server process ### NOTE ### > file 2>&1 +;; if the target-host is set +;; try running on that host +;; incidental: rotate logs in logs/ dir. +;; +(define (tt:server-process-run areapath testsuite mtexe run-id #!key (profile-mode "")) ;; areapath is *toppath* for a given testsuite area + (let* ((logfile (conc areapath "/logs/server.log")) ;; -" curr-pid "-" target-host ".log")) + (cmdln (conc + mtexe + " -server - ";; (or target-host "-") + " -m testsuite:" testsuite + " -run-id " (or run-id "main") + " -db " (dbmod:run-id->dbfname run-id) + " " profile-mode + ))) ;; (conc " >> " logfile " 2>&1 &"))))) + ;; we want the remote server to start in *toppath* so push there + (push-directory areapath) + (debug:print 0 *default-log-port* "INFO: Trying to start server in tcp mode (" cmdln ") ...") + (debug:print 0 *default-log-port* "INFO: starting server at " (common:human-time)) + (system (conc "nbfake " cmdln)) + (pop-directory))) + +;;====================================================================== +;; tcp connection stuff +;;====================================================================== + +;; find a port and start tcp-server. This only starts the tcp portion of +;; the server, look at (tt:start-server ...) above for the entry point +;; for the entire server system +;; +(define (tt:start-tcp-server ttdat) + (setup-listener ttdat) + (let* ((socket (tt-socket ttdat)) + (handler (tt-handler ttdat))) + ((make-tcp-server socket handler) + #t ;; yes, send error messages to std-err + ))) + +;; create a tcp listener and return a populated udat struct with +;; my port, address, hostname, pid etc. +;; return #f if fail to find a port to allocate. +;; +;; if udata-in is #f create the record +;; if there is already a serv-listener return the udata +;; +(define (setup-listener uconn #!optional (port 4242)) + (assert (tt? uconn) "FATAL: setup-listener called with wrong struct "uconn) + (handle-exceptions + exn + (if (< port 65535) + (setup-listener uconn (+ port 1)) + #f) + (connect-listener uconn port))) + +(define (connect-listener uconn port) + ;; (tcp-listener-socket LISTENER)(socket-name so) + ;; sockaddr-address, sockaddr-port, sockaddr->string + (let* ((tlsn (tcp-listen port 1000 #f)) ;; (tcp-listen TCPPORT [BACKLOG [HOST]]) + (addr (tt:get-best-guess-address (get-host-name)))) ;; (get-my-best-address))) ;; (hostinfo-addresses (host-information (current-hostname))) + (tt-port-set! uconn port) + (tt-host-set! uconn addr) + (tt-host-port-set! uconn (conc addr":"port)) + (tt-socket-set! uconn tlsn) + uconn)) + + + +;;====================================================================== +;; utils +;;====================================================================== + +;; Generate a unique signature for this server +(define (tt:mk-signature areapath) + (message-digest-string (md5-primitive) + (with-output-to-string + (lambda () + (write (list areapath + (current-process-id) + (argv))))))) + + +(define (tt:get-best-guess-address hostname) + (let ((res #f)) + (for-each + (lambda (adr) + (if (not (eq? (u8vector-ref adr 0) 127)) + (set! res adr))) + ;; NOTE: This can fail when there is no mention of the host in /etc/hosts. FIXME + (vector->list (hostinfo-addresses (hostname->hostinfo hostname)))) + (string-intersperse + (map number->string + (u8vector->list + (if res res (hostname->ip hostname)))) "."))) + +(define (tt:get-servinfo-dir areapath) + (let* ((spath (conc areapath"/.servinfo"))) + (if (not (file-exists? spath)) + (create-directory spath #t)) + spath)) + +;;====================================================================== +;; network utilities +;;====================================================================== + +;; NOTE: Look at address-info egg as alternative to some of this + +(define (rate-ip ipaddr) + (regex-case ipaddr + ( "^127\\..*" _ 0 ) + ( "^(10\\.0|192\\.168)\\..*" _ 1 ) + ( else 2 ) )) + +;; Change this to bias for addresses with a reasonable broadcast value? +;; +(define (ip-pref-less? a b) + (> (rate-ip a) (rate-ip b))) + +(define (get-my-best-address) + (let ((all-my-addresses (get-all-ips))) + (cond + ((null? all-my-addresses) + (get-host-name)) ;; no interfaces? + ((eq? (length all-my-addresses) 1) + (car all-my-addresses)) ;; only one to choose from, just go with it + (else + (car (sort all-my-addresses ip-pref-less?)))))) + +(define (get-all-ips-sorted) + (sort (get-all-ips) ip-pref-less?)) + +(define (get-all-ips) + (map address-info-host + (filter (lambda (x) + (equal? (address-info-type x) "tcp")) + (address-infos (get-host-name))))) + +) ADDED transport-mode.scm.template Index: transport-mode.scm.template ================================================================== --- /dev/null +++ transport-mode.scm.template @@ -0,0 +1,3 @@ +;; 'http or 'tcp +;; (rmt:transport-mode 'tcp) +(rmt:transport-mode 'http)