Index: common.scm ================================================================== --- common.scm +++ common.scm @@ -279,11 +279,13 @@ (conndat #f) (transport *transport-type*) (server-timeout (server:expiration-timeout)) (force-server #f) (ro-mode #f) - (ro-mode-checked #f)) ;; flag that indicates we have checked for ro-mode + (ro-mode-checked #f) ;; flag that indicates we have checked for ro-mode + (ulex:conn #f) ;; ulex db conn is not exactly a db connector, more like a network connector + ) ;; launching and hosts (defstruct host (reachable #f) (last-update 0) Index: db.scm ================================================================== --- db.scm +++ db.scm @@ -304,13 +304,19 @@ ;if wal mode is on -WAL and -shm file get created when db:open-megatest-db is called. modtimedelta will always be < 10 so db in tmp not get synced ;(tmpdbmodtime (if dbfexists (db:get-last-update-time (car tmpdb)) #f)) ;(fmt (file-modification-time tmpdbfname)) (modtimedelta (and mtdbmodtime tmpdbmodtime (- mtdbmodtime tmpdbmodtime)))) - (when write-access - (sqlite3:execute (car mtdb) "drop trigger if exists update_tests_trigger") - (sqlite3:execute (car mtdb) "drop trigger if exists update_runs_trigger")) + (handle-exceptions + exn + (let ((call-chain (get-call-chain)) + (msg ((condition-property-accessor 'exn 'message) exn))) + (debug:print 0 *default-log-port* "ERROR: attempted to drop triggers on MTRA/megatest.db but failed. Error is " msg) + (set! write-access #f)) ;; if we failed to drop the triggers then we probably don't have write access + (when write-access + (sqlite3:execute (car mtdb) "drop trigger if exists update_tests_trigger") + (sqlite3:execute (car mtdb) "drop trigger if exists update_runs_trigger"))) ;(print "mtdbmodtime " mtdbmodtime " tmpdbmodtime " tmpdbmodtime " mtdbpath " mtdbpath " " (conc *toppath* "/megatest.db")) ;;(debug:print-info 13 *default-log-port* "db:open-db>> mtdbpath="mtdbpath" mtdbexists="mtdbexists" and write-access="write-access) (if (and dbexists (not write-access)) (begin Index: rmt.scm ================================================================== --- rmt.scm +++ rmt.scm @@ -26,10 +26,14 @@ (include "common_records.scm") (declare (uses rmtmod)) (import rmtmod) +(define (member:print msg x lst) + (print "member: " msg " x=" x " lst=" lst) + (member x lst)) + ;; ;; THESE ARE ALL CALLED ON THE CLIENT SIDE!!! ;; ;; generate entries for ~/.megatestrc with the following @@ -54,189 +58,160 @@ (client:setup areapath) #f)))) (define *send-receive-mutex* (make-mutex)) ;; should have separate mutex per run-id + + +;; this entry point can decide based on cmd whether to dispatch to old api calls via remote or via ulex +;; +(define (rmt:send-receive cmd rid params #!key (attemptnum 1)(area-dat #f)) + (let* ((areapath *toppath*);; TODO - resolve from dbstruct to be compatible with multiple areas + (runremote (or area-dat + *runremote*))) + ;; ensure we have a record for our connection for given area + (if (not runremote) ;; can remove this one. should never get here. + (begin + (set! *runremote* (make-remote)) + (set! runremote *runremote*))) ;; new runremote will come from this on next iteration + + (if (member:print "#2" cmd '(blah)) + (begin + (mutex-lock! *send-receive-mutex*) + (if (not *runremote*)(set! *runremote* (make-remote))) + (let ((ulex:conn (remote-ulex:conn *runremote*))) + (if (not ulex:conn)(remote-ulex:conn-set! *runremote* (rmtmod:setup-ulex *toppath*))) + (rmtmod:send-receive-ulex ulex:conn cmd rid params attemptnum area-dat))) + (rmt:send-receive-orig *default-log-port* runremote *rmt-mutex* areapath *db-multi-sync-mutex* cmd rid params attemptnum: attemptnum area-dat: area-dat ro-queries: api:read-only-queries)))) + ;; RA => e.g. usage (rmt:send-receive 'get-var #f (list varname)) ;; -(define (rmt:send-receive cmd rid params #!key (attemptnum 1)(area-dat #f)) ;; start attemptnum at 1 so the modulo below works as expected +;; add multi-sync-mutex +;; +(define (rmt:send-receive-orig log-port runremote rmt-mutex toppath multi-sync-mutex cmd rid params #!key (attemptnum 1)(area-dat #f)) ;; start attemptnum at 1 so the modulo below works as expected - (common:telemetry-log (conc "rmt:"(->string cmd)) + #;(common:telemetry-log (conc "rmt:"(->string cmd)) payload: `((rid . ,rid) (params . ,params))) - ;;DOT digraph megatest_state_status { - ;;DOT ranksep=0; - ;;DOT // rankdir=LR; - ;;DOT node [shape="box"]; - ;;DOT "rmt:send-receive" -> MUTEXLOCK; - ;;DOT { edge [style=invis];"case 1" -> "case 2" -> "case 3" -> "case 4" -> "case 5" -> "case 6" -> "case 7" -> "case 8" -> "case 9" -> "case 10" -> "case 11"; } ;; do all the prep locked under the rmt-mutex - (mutex-lock! *rmt-mutex*) + ;;(mutex-lock! rmt-mutex) ;; 1. check if server is started IFF cmd is a write OR if we are not on the homehost, store in runremote ;; 2. check the age of the connections. refresh the connection if it is older than timeout-20 seconds. ;; 3. do the query, if on homehost use local access ;; (let* ((start-time (current-seconds)) ;; snapshot time so all use cases get same value - (areapath *toppath*);; TODO - resolve from dbstruct to be compatible with multiple areas - (runremote (or area-dat - *runremote*)) - (readonly-mode (rmtmod:calc-ro-mode runremote *toppath*))) - - ;; DOT INIT_RUNREMOTE; // leaving off - doesn't really add to the clarity - ;; DOT MUTEXLOCK -> INIT_RUNREMOTE [label="no remote?"]; - ;; DOT INIT_RUNREMOTE -> MUTEXLOCK; - ;; ensure we have a record for our connection for given area - (if (not runremote) ;; can remove this one. should never get here. - (begin - (set! *runremote* (make-remote)) - (set! runremote *runremote*))) ;; new runremote will come from this on next iteration - - ;; DOT SET_HOMEHOST; // leaving off - doesn't really add to the clarity - ;; DOT MUTEXLOCK -> SET_HOMEHOST [label="no homehost?"]; - ;; DOT SET_HOMEHOST -> MUTEXLOCK; + + (readonly-mode (rmtmod:calc-ro-mode runremote toppath))) + ;; ensure we have a homehost record (if (not (pair? (remote-hh-dat runremote))) ;; not on homehost (thread-sleep! 0.1) ;; since we shouldn't get here, delay a little (remote-hh-dat-set! runremote (common:get-homehost))) ;;(print "BB> readonly-mode is "readonly-mode" dbfile is "dbfile) (cond - ;;DOT EXIT; - ;;DOT MUTEXLOCK -> EXIT [label="> 15 attempts"]; {rank=same "case 1" "EXIT" } ;; give up if more than 15 attempts ((> attemptnum 15) - (debug:print 0 *default-log-port* "ERROR: 15 tries to start/connect to server. Giving up.") + (debug:print 0 log-port "ERROR: 15 tries to start/connect to server. Giving up.") (exit 1)) - ;;DOT CASE2 [label="local\nreadonly\nquery"]; - ;;DOT MUTEXLOCK -> CASE2; {rank=same "case 2" CASE2} - ;;DOT CASE2 -> "rmt:open-qry-close-locally"; ;; readonly mode, read request- handle it - case 2 ((and readonly-mode - (member cmd api:read-only-queries)) - (mutex-unlock! *rmt-mutex*) - (debug:print-info 12 *default-log-port* "rmt:send-receive, case 2") - (rmt:open-qry-close-locally cmd 0 params) + (member:print "#3" cmd api:read-only-queries)) + ;; (mutex-unlock! rmt-mutex) + (debug:print-info 12 log-port "rmt:send-receive, case 2") + (rmt:open-qry-close-locally log-port multi-sync-mutex cmd 0 params ro-queries: api:read-only-queries) ) - ;;DOT CASE3 [label="write in\nread-only mode"]; - ;;DOT MUTEXLOCK -> CASE3 [label="readonly\nmode?"]; {rank=same "case 3" CASE3} - ;;DOT CASE3 -> "#f"; ;; readonly mode, write request. Do nothing, return #f - (readonly-mode (extras-readonly-mode *rmt-mutex* *default-log-port* cmd params)) + (readonly-mode (extras-readonly-mode rmt-mutex log-port cmd params)) ;; This block was for pre-emptively resetting the connection if there had been no communication for some time. ;; I don't think it adds any value. If the server is not there, just fail and start a new connection. ;; also, the expire-time calculation might not be correct. We want, time-since-last-server-access > (server:get-timeout) ;; - ;;DOT CASE4 [label="reset\nconnection"]; - ;;DOT MUTEXLOCK -> CASE4 [label="have connection,\nlast_access > expire_time"]; {rank=same "case 4" CASE4} - ;;DOT CASE4 -> "rmt:send-receive"; ;; reset the connection if it has been unused too long ((and runremote (remote-conndat runremote) (> (current-seconds) ;; if it has been more than server-timeout seconds since last contact, close this connection and start a new on (+ (http-transport:server-dat-get-last-access (remote-conndat runremote)) (remote-server-timeout runremote)))) - (debug:print-info 0 *default-log-port* "Connection to " (remote-server-url runremote) " expired due to no accesses, forcing new connection.") + (debug:print-info 0 log-port "Connection to " (remote-server-url runremote) " expired due to no accesses, forcing new connection.") (http-transport:close-connections area-dat: runremote) (remote-conndat-set! runremote #f) ;; invalidate the connection, thus forcing a new connection. - (mutex-unlock! *rmt-mutex*) - (rmt:send-receive cmd rid params attemptnum: attemptnum)) + ;; (mutex-unlock! rmt-mutex) + (rmt:send-receive-orig log-port runremote rmt-mutex toppath multi-sync-mutex cmd rid params attemptnum: attemptnum)) - ;;DOT CASE5 [label="local\nread"]; - ;;DOT MUTEXLOCK -> CASE5 [label="server not required,\non homehost,\nread-only query"]; {rank=same "case 5" CASE5}; - ;;DOT CASE5 -> "rmt:open-qry-close-locally"; ;; on homehost and this is a read ((and (not (remote-force-server runremote)) ;; honor forced use of server, i.e. server NOT required (cdr (remote-hh-dat runremote)) ;; on homehost - (member cmd api:read-only-queries)) ;; this is a read - (mutex-unlock! *rmt-mutex*) - (debug:print-info 12 *default-log-port* "rmt:send-receive, case 5") - (rmt:open-qry-close-locally cmd 0 params)) - - ;;DOT CASE6 [label="init\nremote"]; - ;;DOT MUTEXLOCK -> CASE6 [label="on homehost,\nwrite query,\nhave server,\ncan't reach it"]; {rank=same "case 6" CASE6}; - ;;DOT CASE6 -> "rmt:send-receive"; + (member:print "#4" cmd api:read-only-queries)) ;; this is a read + ;; (mutex-unlock! rmt-mutex) + (debug:print-info 12 log-port "rmt:send-receive, case 5") + (rmt:open-qry-close-locally log-port multi-sync-mutex cmd 0 params ro-queries: api:read-only-queries)) + ;; on homehost and this is a write, we already have a server, but server has died ((and (cdr (remote-hh-dat runremote)) ;; on homehost - (not (member cmd api:read-only-queries)) ;; this is a write + (not (member:print "#5" cmd api:read-only-queries)) ;; this is a write (remote-server-url runremote) ;; have a server (not (server:ping (remote-server-url runremote)))) ;; server has died. NOTE: this is not a cheap call! Need better approach. (set! *runremote* (make-remote)) (remote-force-server-set! runremote (common:force-server?)) - (mutex-unlock! *rmt-mutex*) - (debug:print-info 12 *default-log-port* "rmt:send-receive, case 6") - (rmt:send-receive cmd rid params attemptnum: attemptnum)) + ;; (mutex-unlock! rmt-mutex) + (debug:print-info 12 log-port "rmt:send-receive, case 6") + (rmt:send-receive-orig log-port runremote rmt-mutex toppath multi-sync-mutex cmd rid params attemptnum: attemptnum)) - ;;DOT CASE7 [label="homehost\nwrite"]; - ;;DOT MUTEXLOCK -> CASE7 [label="server not required,\non homehost,\na write,\nhave a server"]; {rank=same "case 7" CASE7}; - ;;DOT CASE7 -> "rmt:open-qry-close-locally"; ;; on homehost and this is a write, we already have a server ((and (not (remote-force-server runremote)) ;; honor forced use of server, i.e. server NOT required (cdr (remote-hh-dat runremote)) ;; on homehost - (not (member cmd api:read-only-queries)) ;; this is a write + (not (member:print "#6" cmd api:read-only-queries)) ;; this is a write (remote-server-url runremote)) ;; have a server - (mutex-unlock! *rmt-mutex*) - (debug:print-info 12 *default-log-port* "rmt:send-receive, case 4.1") - (rmt:open-qry-close-locally cmd 0 params)) + ;;(mutex-unlock! rmt-mutex) + (debug:print-info 12 log-port "rmt:send-receive, case 4.1") + (rmt:open-qry-close-locally log-port multi-sync-mutex cmd 0 params ro-queries: api:read-only-queries)) - ;;DOT CASE8 [label="force\nserver"]; - ;;DOT MUTEXLOCK -> CASE8 [label="server not required,\nhave homehost info,\nno connection yet,\nnot a read-only query"]; {rank=same "case 8" CASE8}; - ;;DOT CASE8 -> "rmt:open-qry-close-locally"; ;; on homehost, no server contact made and this is a write, passively start a server ((and (not (remote-force-server runremote)) ;; honor forced use of server, i.e. server NOT required (cdr (remote-hh-dat runremote)) ;; have homehost (not (remote-server-url runremote)) ;; no connection yet - (not (member cmd api:read-only-queries))) ;; not a read-only query - (debug:print-info 12 *default-log-port* "rmt:send-receive, case 8") - (let ((server-url (server:check-if-running *toppath*))) ;; (server:read-dotserver->url *toppath*))) ;; (server:check-if-running *toppath*))) ;; Do NOT want to run server:check-if-running - very expensive to do for every write call + (not (member:print "#7" cmd api:read-only-queries))) ;; not a read-only query + (debug:print-info 12 log-port "rmt:send-receive, case 8") + (let ((server-url (server:check-if-running toppath))) ;; (server:read-dotserver->url toppath))) ;; (server:check-if-running toppath))) ;; Do NOT want to run server:check-if-running - very expensive to do for every write call (if server-url (remote-server-url-set! runremote server-url) ;; the string can be consumed by the client setup if needed (if (common:force-server?) - (server:start-and-wait *toppath*) - (server:kind-run *toppath*)))) + (server:start-and-wait toppath) + (server:kind-run toppath)))) (remote-force-server-set! runremote (common:force-server?)) - (mutex-unlock! *rmt-mutex*) - (debug:print-info 12 *default-log-port* "rmt:send-receive, case 8.1") - (rmt:open-qry-close-locally cmd 0 params)) + ;; (mutex-unlock! rmt-mutex) + (debug:print-info 12 log-port "rmt:send-receive, case 8.1") + (rmt:open-qry-close-locally log-port multi-sync-mutex cmd 0 params ro-queries: api:read-only-queries)) - ;;DOT CASE9 [label="force server\nnot on homehost"]; - ;;DOT MUTEXLOCK -> CASE9 [label="no connection\nand either require server\nor not on homehost"]; {rank=same "case 9" CASE9}; - ;;DOT CASE9 -> "start\nserver" -> "rmt:send-receive"; ((or (and (remote-force-server runremote) ;; we are forcing a server and don't yet have a connection to one (not (remote-conndat runremote))) (and (not (cdr (remote-hh-dat runremote))) ;; not on a homehost (not (remote-conndat runremote)))) ;; and no connection - (debug:print-info 12 *default-log-port* "rmt:send-receive, case 9, hh-dat: " (remote-hh-dat runremote) " conndat: " (remote-conndat runremote)) - (mutex-unlock! *rmt-mutex*) - (if (not (server:check-if-running *toppath*)) ;; who knows, maybe one has started up? - (server:start-and-wait *toppath*)) - (remote-conndat-set! runremote (rmt:get-connection-info *toppath*)) ;; calls client:setup which calls client:setup-http - (rmt:send-receive cmd rid params attemptnum: attemptnum)) ;; TODO: add back-off timeout as - - ;;DOT CASE10 [label="on homehost"]; - ;;DOT MUTEXLOCK -> CASE10 [label="server not required,\non homehost"]; {rank=same "case 10" CASE10}; - ;;DOT CASE10 -> "rmt:open-qry-close-locally"; + (debug:print-info 12 log-port "rmt:send-receive, case 9, hh-dat: " (remote-hh-dat runremote) " conndat: " (remote-conndat runremote)) + ;;(mutex-unlock! rmt-mutex) + (if (not (server:check-if-running toppath)) ;; who knows, maybe one has started up? + (server:start-and-wait toppath)) + (remote-conndat-set! runremote (rmt:get-connection-info toppath)) ;; calls client:setup which calls client:setup-http + (rmt:send-receive-orig log-port runremote rmt-mutex toppath multi-sync-mutex cmd rid params attemptnum: attemptnum)) ;; TODO: add back-off timeout as + ;; all set up if get this far, dispatch the query ((and (not (remote-force-server runremote)) (cdr (remote-hh-dat runremote))) ;; we are on homehost - (mutex-unlock! *rmt-mutex*) - (debug:print-info 12 *default-log-port* "rmt:send-receive, case 10") - (rmt:open-qry-close-locally cmd (if rid rid 0) params)) - - ;;DOT CASE11 [label="send_receive"]; - ;;DOT MUTEXLOCK -> CASE11 [label="else"]; {rank=same "case 11" CASE11}; - ;;DOT CASE11 -> "rmt:send-receive" [label="call failed"]; - ;;DOT CASE11 -> "RESULT" [label="call succeeded"]; + ;;(mutex-unlock! rmt-mutex) + (debug:print-info 12 log-port "rmt:send-receive, case 10") + (rmt:open-qry-close-locally log-port multi-sync-mutex cmd (if rid rid 0) params ro-queries: api:read-only-queries)) + ;; not on homehost, do server query - (else (extras-case-11 *default-log-port* runremote cmd params attemptnum rid))))) - ;;DOT } + (else (extras-case-11 log-port runremote cmd params attemptnum rid))))) ;; bunch of small functions factored out of send-receive to make debug easier ;; (define (extras-case-11 *default-log-port* runremote cmd params attemptnum rid) @@ -331,53 +306,10 @@ (cons 'none 0)) (loop (car tal)(cdr tal) newmax-cmd currmax))))))) (mutex-unlock! *db-stats-mutex*) res)) -(define (rmt:open-qry-close-locally cmd run-id params #!key (remretries 5)) - (let* ((qry-is-write (not (member cmd api:read-only-queries))) - (db-file-path (db:dbfile-path)) ;; 0)) - (dbstruct-local (db:setup #t)) ;; make-dbr:dbstruct path: dbdir local: #t))) - (read-only (not (file-write-access? db-file-path))) - (start (current-milliseconds)) - (resdat (if (not (and read-only qry-is-write)) - (let ((v (api:execute-requests dbstruct-local (vector (symbol->string cmd) params)))) - (handle-exceptions ;; there has been a long history of receiving strange errors from values returned by the client when things go wrong.. - exn ;; This is an attempt to detect that situation and recover gracefully - (begin - (debug:print0 *default-log-port* "ERROR: bad data from server " v " message: " ((condition-property-accessor 'exn 'message) exn)) - (vector #t '())) ;; should always get a vector but if something goes wrong return a dummy - (if (and (vector? v) - (> (vector-length v) 1)) - (let ((newvec (vector (vector-ref v 0)(vector-ref v 1)))) - newvec) ;; by copying the vector while inside the error handler we should force the detection of a corrupted record - (vector #t '())))) ;; we could also check that the returned types are valid - (vector #t '()))) - (success (vector-ref resdat 0)) - (res (vector-ref resdat 1)) - (duration (- (current-milliseconds) start))) - (if (and read-only qry-is-write) - (debug:print 0 *default-log-port* "ERROR: attempt to write to read-only database ignored. cmd=" cmd)) - (if (not success) - (if (> remretries 0) - (begin - (debug:print-error 0 *default-log-port* "local query failed. Trying again.") - (thread-sleep! (/ (random 5000) 1000)) ;; some random delay - (rmt:open-qry-close-locally cmd run-id params remretries: (- remretries 1))) - (begin - (debug:print-error 0 *default-log-port* "too many retries in rmt:open-qry-close-locally, giving up") - #f)) - (begin - ;; (rmt:update-db-stats run-id cmd params duration) - ;; mark this run as dirty if this was a write, the watchdog is responsible for syncing it - (if qry-is-write - (let ((start-time (current-seconds))) - (mutex-lock! *db-multi-sync-mutex*) -/ (set! *db-last-access* start-time) ;; THIS IS PROBABLY USELESS? (we are on a client) - (mutex-unlock! *db-multi-sync-mutex*))))) - res)) - (define (rmt:send-receive-no-auto-client-setup connection-info cmd run-id params) (let* ((run-id (if run-id run-id 0)) (res (handle-exceptions exn #f @@ -568,11 +500,11 @@ '() (let loop ((hed (car run-id-list)) (tal (cdr run-id-list)) (threads '())) (if (> (length threads) 5) - (loop hed tal (filter (lambda (th)(not (member (thread-state th) '(terminated dead)))) threads)) + (loop hed tal (filter (lambda (th)(not (member:print "#8" (thread-state th) '(terminated dead)))) threads)) (let* ((newthread (make-thread (lambda () (let ((res (rmt:send-receive 'get-tests-for-run-mindata hed (list hed testpatt states status not-in)))) (if (list? res) (begin @@ -934,7 +866,10 @@ (rmt:send-receive 'test-get-archive-block-info #f (list archive-block-id))) (set-functions rmt:send-receive remote-server-url-set! http-transport:close-connections remote-conndat-set! debug:print debug:print-info + debug:print-error remote-ro-mode remote-ro-mode-set! - remote-ro-mode-checked-set! remote-ro-mode-checked) + remote-ro-mode-checked-set! remote-ro-mode-checked + db:dbfile-path db:setup + api:execute-requests api:read-only-queries) Index: rmtmod.scm ================================================================== --- rmtmod.scm +++ rmtmod.scm @@ -1,7 +1,7 @@ ;;====================================================================== -;; Copyright 2017, Matthew Welland. +;; Copyright 2019, Matthew Welland. ;; ;; This file is part of Megatest. ;; ;; Megatest is free software: you can redistribute it and/or modify ;; it under the terms of the GNU General Public License as published by @@ -25,10 +25,15 @@ * (import scheme chicken data-structures extras) (import (prefix sqlite3 sqlite3:) posix typed-records srfi-18) (import commonmod) +(use (prefix ulex ulex:)) + +(define (member:print msg x lst) + (print "member-" msg ": x=" x " lst=" lst) + (member x lst)) ;; Hack to make these functions visible to the refactored code, goal is to eliminate these over time. (define (rmt:send-receive . params) #f) (define (http-transport:close-connections . params) #f) ;; from remote defstruct in common.scm @@ -38,54 +43,115 @@ (define (remote-ro-mode-set! . params) #f) (define (remote-ro-mode-checked-set! . params) #f) (define (remote-ro-mode-checked . params) #f) (define (debug:print . params) #f) (define (debug:print-info . params) #f) +(define (debug:print-error . params) #f) +(define (db:dbfile-path . params) #f) +(define (db:setup . params) #f) +(define (api:execute-requests . params) #f) (define (set-functions send-receive rsus close-connections rcs dbgp dbgpinfo + dbgperr ro-mode ro-mode-set ro-mode-checked-set ro-mode-checked + + dbfile-path dbsetup + exec-req read-only-queries ) - (set! rmt:send-receive send-receive) + (set! rmt:send-receive send-receive) (set! remote-server-url-set! rsus) (set! http-transport:close-connections close-connections) (set! remote-conndat-set! rcs) + ;; print stuff (set! debug:print dbgp) (set! debug:print-info dbgpinfo) + (set! debug:print-error dbgperr) + ;; (set! remote-ro-mode ro-mode) (set! remote-ro-mode-set! ro-mode-set) (set! remote-ro-mode-checked-set! ro-mode-checked-set) - (set! remote-ro-mode-checked ro-mode-checked)) + (set! remote-ro-mode-checked ro-mode-checked) + ;; db stuff for local db access + (set! db:dbfile-path dbfile-path) + (set! db:setup dbsetup) + (set! apt:execute-requests exec-req) + ) + +(define (rmt:open-qry-close-locally log-port multi-sync-mutex cmd run-id params #!key (ro-queries '())(remretries 5)) + (let* ((qry-is-write (not (member:print "#1" cmd ro-queries))) + (db-file-path (db:dbfile-path)) ;; 0)) + (dbstruct-local (db:setup #t)) ;; make-dbr:dbstruct path: dbdir local: #t))) + (read-only (not (file-write-access? db-file-path))) + (start (current-milliseconds)) + (resdat (if (not (and read-only qry-is-write)) + (let ((v (api:execute-requests dbstruct-local (vector (symbol->string cmd) params)))) + (handle-exceptions ;; there has been a long history of receiving strange errors from values returned by the client when things go wrong.. + exn ;; This is an attempt to detect that situation and recover gracefully + (begin + (debug:print 0 log-port "ERROR: bad data from server " v " message: " ((condition-property-accessor 'exn 'message) exn)) + (vector #t '())) ;; should always get a vector but if something goes wrong return a dummy + (if (and (vector? v) + (> (vector-length v) 1)) + (let ((newvec (vector (vector-ref v 0)(vector-ref v 1)))) + newvec) ;; by copying the vector while inside the error handler we should force the detection of a corrupted record + (vector #t '())))) ;; we could also check that the returned types are valid + (vector #t '()))) + (success (vector-ref resdat 0)) + (res (vector-ref resdat 1)) + (duration (- (current-milliseconds) start))) + (if (and read-only qry-is-write) + (debug:print 0 log-port "ERROR: attempt to write to read-only database ignored. cmd=" cmd)) + (if (not success) + (if (> remretries 0) + (begin + (debug:print-error 0 log-port "local query failed. Trying again.") + (thread-sleep! (/ (random 5000) 1000)) ;; some random delay + (rmt:open-qry-close-locally log-port multi-sync-mutex cmd run-id params ro-queries: ro-queries remretries: (- remretries 1))) + (begin + (debug:print-error 0 log-port "too many retries in rmt:open-qry-close-locally, giving up") + #f)) + (begin + ;; (rmt:update-db-stats run-id cmd params duration) + ;; mark this run as dirty if this was a write, the watchdog is responsible for syncing it + #;(if qry-is-write + (let ((start-time (current-seconds))) + (mutex-lock! multi-sync-mutex) + (set! *db-last-access* start-time) ;; THIS IS PROBABLY USELESS? (we are on a client) + (mutex-unlock! multi-sync-mutex))))) + res)) + + -(define (rmtmod:calc-ro-mode runremote *toppath*) +(define (rmtmod:calc-ro-mode runremote toppath) (if (and runremote (remote-ro-mode-checked runremote)) (remote-ro-mode runremote) - (let* ((dbfile (conc *toppath* "/megatest.db")) + (let* ((dbfile (conc toppath "/megatest.db")) (ro-mode (not (file-write-access? dbfile)))) ;; TODO: use dbstruct or runremote to figure this out in future (if runremote (begin (remote-ro-mode-set! runremote ro-mode) (remote-ro-mode-checked-set! runremote #t) ro-mode) ro-mode)))) (define (extras-readonly-mode rmt-mutex log-port cmd params) - (mutex-unlock! rmt-mutex) + ;;(mutex-unlock! rmt-mutex) (debug:print-info 12 log-port "rmt:send-receive, case 3") (debug:print 0 log-port "WARNING: write transaction requested on a readonly area. cmd="cmd" params="params) #f) (define (extras-transport-failed *default-log-port* *rmt-mutex* attemptnum runremote cmd rid params) (debug:print 0 *default-log-port* "WARNING: communication failed. Trying again, try num: " attemptnum) - (mutex-lock! *rmt-mutex*) + ;;(mutex-lock! *rmt-mutex*) (remote-conndat-set! runremote #f) (http-transport:close-connections area-dat: runremote) (remote-server-url-set! runremote #f) - (mutex-unlock! *rmt-mutex*) + ;;(mutex-unlock! *rmt-mutex*) (debug:print-info 12 *default-log-port* "rmt:send-receive, case 9.1") (rmt:send-receive cmd rid params attemptnum: (+ attemptnum 1))) (define (extras-transport-succeded *default-log-port* *rmt-mutex* attemptnum runremote res params rid cmd) (if (and (vector? res) @@ -106,14 +172,32 @@ ;; overloaded and we ;; want to ease off ;; the queries (let ((wait-delay (+ attemptnum (* attemptnum 10)))) (debug:print 0 *default-log-port* "WARNING: server is overloaded. Delaying " wait-delay " seconds and trying call again.") - (mutex-lock! *rmt-mutex*) + ;;(mutex-lock! *rmt-mutex*) (http-transport:close-connections area-dat: runremote) (set! *runremote* #f) ;; force starting over - (mutex-unlock! *rmt-mutex*) + ;;(mutex-unlock! *rmt-mutex*) (thread-sleep! wait-delay) (rmt:send-receive cmd rid params attemptnum: (+ attemptnum 1))) res)) ;; All good, return res +;;====================================================================== +;; ulex and steps stuff +;;====================================================================== + +(define (rmtmod:setup-ulex toppath) + (ulex:make-area + dbdir: (conc toppath "/ulexdb") + pktsdir: (conc toppath "/pkts") + )) + + + +(define (rmtmod:send-receive-ulex ulex:conn cmd rid params attemptnum area-dat) + #f) + +(use trace)(trace-call-sites #t) +(trace member rmtmod:calc-ro-mode rmt:open-qry-close-locally member:print) + )