Index: rmt.scm ================================================================== --- rmt.scm +++ rmt.scm @@ -55,24 +55,17 @@ #f)))) (define *send-receive-mutex* (make-mutex)) ;; should have separate mutex per run-id ;; RA => e.g. usage (rmt:send-receive 'get-var #f (list varname)) +;; start attemptnum at 1 so the modulo below works as expected ;; -(define (rmt:send-receive cmd rid params #!key (attemptnum 1)(area-dat #f)) ;; start attemptnum at 1 so the modulo below works as expected +(define (rmt:send-receive cmd rid params #!key (attemptnum 1)(area-dat #f)) (common:telemetry-log (conc "rmt:"(->string cmd)) payload: `((rid . ,rid) (params . ,params))) - - - ;;DOT digraph megatest_state_status { - ;;DOT ranksep=0; - ;;DOT // rankdir=LR; - ;;DOT node [shape="box"]; - ;;DOT "rmt:send-receive" -> MUTEXLOCK; - ;;DOT { edge [style=invis];"case 1" -> "case 2" -> "case 3" -> "case 4" -> "case 5" -> "case 6" -> "case 7" -> "case 8" -> "case 9" -> "case 10" -> "case 11"; } ;; do all the prep locked under the rmt-mutex (mutex-lock! *rmt-mutex*) ;; 1. check if server is started IFF cmd is a write OR if we are not on the homehost, store in runremote ;; 2. check the age of the connections. refresh the connection if it is older than timeout-20 seconds. @@ -82,60 +75,43 @@ (areapath *toppath*);; TODO - resolve from dbstruct to be compatible with multiple areas (runremote (or area-dat *runremote*)) (readonly-mode (rmtmod:calc-ro-mode runremote *toppath*))) - ;; DOT INIT_RUNREMOTE; // leaving off - doesn't really add to the clarity - ;; DOT MUTEXLOCK -> INIT_RUNREMOTE [label="no remote?"]; - ;; DOT INIT_RUNREMOTE -> MUTEXLOCK; ;; ensure we have a record for our connection for given area (if (not runremote) ;; can remove this one. should never get here. (begin (set! *runremote* (make-remote)) (set! runremote *runremote*))) ;; new runremote will come from this on next iteration - ;; DOT SET_HOMEHOST; // leaving off - doesn't really add to the clarity - ;; DOT MUTEXLOCK -> SET_HOMEHOST [label="no homehost?"]; - ;; DOT SET_HOMEHOST -> MUTEXLOCK; ;; ensure we have a homehost record (if (not (pair? (remote-hh-dat runremote))) ;; not on homehost (thread-sleep! 0.1) ;; since we shouldn't get here, delay a little (remote-hh-dat-set! runremote (common:get-homehost))) ;;(print "BB> readonly-mode is "readonly-mode" dbfile is "dbfile) (cond - ;;DOT EXIT; - ;;DOT MUTEXLOCK -> EXIT [label="> 15 attempts"]; {rank=same "case 1" "EXIT" } ;; give up if more than 15 attempts ((> attemptnum 15) (debug:print 0 *default-log-port* "ERROR: 15 tries to start/connect to server. Giving up.") (exit 1)) - ;;DOT CASE2 [label="local\nreadonly\nquery"]; - ;;DOT MUTEXLOCK -> CASE2; {rank=same "case 2" CASE2} - ;;DOT CASE2 -> "rmt:open-qry-close-locally"; ;; readonly mode, read request- handle it - case 2 ((and readonly-mode (member cmd api:read-only-queries)) (mutex-unlock! *rmt-mutex*) (debug:print-info 12 *default-log-port* "rmt:send-receive, case 2") (rmt:open-qry-close-locally cmd 0 params) ) - ;;DOT CASE3 [label="write in\nread-only mode"]; - ;;DOT MUTEXLOCK -> CASE3 [label="readonly\nmode?"]; {rank=same "case 3" CASE3} - ;;DOT CASE3 -> "#f"; ;; readonly mode, write request. Do nothing, return #f (readonly-mode (extras-readonly-mode *rmt-mutex* *default-log-port* cmd params)) ;; This block was for pre-emptively resetting the connection if there had been no communication for some time. ;; I don't think it adds any value. If the server is not there, just fail and start a new connection. ;; also, the expire-time calculation might not be correct. We want, time-since-last-server-access > (server:get-timeout) ;; - ;;DOT CASE4 [label="reset\nconnection"]; - ;;DOT MUTEXLOCK -> CASE4 [label="have connection,\nlast_access > expire_time"]; {rank=same "case 4" CASE4} - ;;DOT CASE4 -> "rmt:send-receive"; ;; reset the connection if it has been unused too long ((and runremote (remote-conndat runremote) (> (current-seconds) ;; if it has been more than server-timeout seconds since last contact, close this connection and start a new on (+ (http-transport:server-dat-get-last-access (remote-conndat runremote)) @@ -143,26 +119,19 @@ (debug:print-info 0 *default-log-port* "Connection to " (remote-server-url runremote) " expired due to no accesses, forcing new connection.") (http-transport:close-connections area-dat: runremote) (remote-conndat-set! runremote #f) ;; invalidate the connection, thus forcing a new connection. (mutex-unlock! *rmt-mutex*) (rmt:send-receive cmd rid params attemptnum: attemptnum)) - - ;;DOT CASE5 [label="local\nread"]; - ;;DOT MUTEXLOCK -> CASE5 [label="server not required,\non homehost,\nread-only query"]; {rank=same "case 5" CASE5}; - ;;DOT CASE5 -> "rmt:open-qry-close-locally"; ;; on homehost and this is a read ((and (not (remote-force-server runremote)) ;; honor forced use of server, i.e. server NOT required (cdr (remote-hh-dat runremote)) ;; on homehost (member cmd api:read-only-queries)) ;; this is a read (mutex-unlock! *rmt-mutex*) (debug:print-info 12 *default-log-port* "rmt:send-receive, case 5") (rmt:open-qry-close-locally cmd 0 params)) - ;;DOT CASE6 [label="init\nremote"]; - ;;DOT MUTEXLOCK -> CASE6 [label="on homehost,\nwrite query,\nhave server,\ncan't reach it"]; {rank=same "case 6" CASE6}; - ;;DOT CASE6 -> "rmt:send-receive"; ;; on homehost and this is a write, we already have a server, but server has died ((and (cdr (remote-hh-dat runremote)) ;; on homehost (not (member cmd api:read-only-queries)) ;; this is a write (remote-server-url runremote) ;; have a server (not (server:ping (remote-server-url runremote)))) ;; server has died. NOTE: this is not a cheap call! Need better approach. @@ -170,25 +139,19 @@ (remote-force-server-set! runremote (common:force-server?)) (mutex-unlock! *rmt-mutex*) (debug:print-info 12 *default-log-port* "rmt:send-receive, case 6") (rmt:send-receive cmd rid params attemptnum: attemptnum)) - ;;DOT CASE7 [label="homehost\nwrite"]; - ;;DOT MUTEXLOCK -> CASE7 [label="server not required,\non homehost,\na write,\nhave a server"]; {rank=same "case 7" CASE7}; - ;;DOT CASE7 -> "rmt:open-qry-close-locally"; ;; on homehost and this is a write, we already have a server ((and (not (remote-force-server runremote)) ;; honor forced use of server, i.e. server NOT required (cdr (remote-hh-dat runremote)) ;; on homehost (not (member cmd api:read-only-queries)) ;; this is a write (remote-server-url runremote)) ;; have a server (mutex-unlock! *rmt-mutex*) (debug:print-info 12 *default-log-port* "rmt:send-receive, case 4.1") (rmt:open-qry-close-locally cmd 0 params)) - ;;DOT CASE8 [label="force\nserver"]; - ;;DOT MUTEXLOCK -> CASE8 [label="server not required,\nhave homehost info,\nno connection yet,\nnot a read-only query"]; {rank=same "case 8" CASE8}; - ;;DOT CASE8 -> "rmt:open-qry-close-locally"; ;; on homehost, no server contact made and this is a write, passively start a server ((and (not (remote-force-server runremote)) ;; honor forced use of server, i.e. server NOT required (cdr (remote-hh-dat runremote)) ;; have homehost (not (remote-server-url runremote)) ;; no connection yet (not (member cmd api:read-only-queries))) ;; not a read-only query @@ -202,13 +165,10 @@ (remote-force-server-set! runremote (common:force-server?)) (mutex-unlock! *rmt-mutex*) (debug:print-info 12 *default-log-port* "rmt:send-receive, case 8.1") (rmt:open-qry-close-locally cmd 0 params)) - ;;DOT CASE9 [label="force server\nnot on homehost"]; - ;;DOT MUTEXLOCK -> CASE9 [label="no connection\nand either require server\nor not on homehost"]; {rank=same "case 9" CASE9}; - ;;DOT CASE9 -> "start\nserver" -> "rmt:send-receive"; ((or (and (remote-force-server runremote) ;; we are forcing a server and don't yet have a connection to one (not (remote-conndat runremote))) (and (not (cdr (remote-hh-dat runremote))) ;; not on a homehost (not (remote-conndat runremote)))) ;; and no connection (debug:print-info 12 *default-log-port* "rmt:send-receive, case 9, hh-dat: " (remote-hh-dat runremote) " conndat: " (remote-conndat runremote)) @@ -216,88 +176,23 @@ (if (not (server:check-if-running *toppath*)) ;; who knows, maybe one has started up? (server:start-and-wait *toppath*)) (remote-conndat-set! runremote (rmt:get-connection-info *toppath*)) ;; calls client:setup which calls client:setup-http (rmt:send-receive cmd rid params attemptnum: attemptnum)) ;; TODO: add back-off timeout as - ;;DOT CASE10 [label="on homehost"]; - ;;DOT MUTEXLOCK -> CASE10 [label="server not required,\non homehost"]; {rank=same "case 10" CASE10}; - ;;DOT CASE10 -> "rmt:open-qry-close-locally"; ;; all set up if get this far, dispatch the query ((and (not (remote-force-server runremote)) (cdr (remote-hh-dat runremote))) ;; we are on homehost (mutex-unlock! *rmt-mutex*) (debug:print-info 12 *default-log-port* "rmt:send-receive, case 10") (rmt:open-qry-close-locally cmd (if rid rid 0) params)) - ;;DOT CASE11 [label="send_receive"]; - ;;DOT MUTEXLOCK -> CASE11 [label="else"]; {rank=same "case 11" CASE11}; - ;;DOT CASE11 -> "rmt:send-receive" [label="call failed"]; - ;;DOT CASE11 -> "RESULT" [label="call succeeded"]; ;; not on homehost, do server query - (else (extras-case-11 *default-log-port* runremote cmd params attemptnum))))) - ;;DOT } + (else (extras-case-11 *default-log-port* runremote cmd params attemptnum rid *rmt-mutex*))))) ;; bunch of small functions factored out of send-receive to make debug easier ;; -(define (extras-case-11 *default-log-port* runremote cmd params attemptnum) - ;; (mutex-unlock! *rmt-mutex*) - (debug:print-info 12 *default-log-port* "rmt:send-receive, case 9") - ;; (mutex-lock! *rmt-mutex*) - (let* ((conninfo (remote-conndat runremote)) - (dat (case (remote-transport runremote) - ((http) (condition-case ;; handling here has - ;; caused a lot of - ;; problems. However it - ;; is needed to deal with - ;; attemtped - ;; communication to - ;; servers that have gone - ;; away - (http-transport:client-api-send-receive 0 conninfo cmd params) - ((commfail)(vector #f "communications fail")) - ((exn)(vector #f "other fail" (print-call-chain))))) - (else - (debug:print 0 *default-log-port* "ERROR: transport " (remote-transport runremote) " not supported") - (exit)))) - (success (if (vector? dat) (vector-ref dat 0) #f)) - (res (if (vector? dat) (vector-ref dat 1) #f))) - (if (and (vector? conninfo) (< 5 (vector-length conninfo))) - (http-transport:server-dat-update-last-access conninfo) ;; refresh access time - (begin - (debug:print 0 *default-log-port* "INFO: Should not get here! conninfo=" conninfo) - (set! conninfo #f) - (remote-conndat-set! *runremote* #f) ;; NOTE: *runremote* is global copy of runremote. Purpose: factor out global. - (http-transport:close-connections area-dat: runremote))) - (debug:print-info 13 *default-log-port* "rmt:send-receive, case 9. conninfo=" conninfo " dat=" dat " runremote = " runremote) - (mutex-unlock! *rmt-mutex*) - (if success ;; success only tells us that the transport was - ;; successful, have to examine the data to see if - ;; there was a detected issue at the other end - (extras-transport-succeded *default-log-port* *rmt-mutex* attemptnum runremote res) - (extras-transport-failed *default-log-port* *rmt-mutex* attemptnum runremote cmd rid params) - ))) - -;; (define (rmt:update-db-stats run-id rawcmd params duration) -;; (mutex-lock! *db-stats-mutex*) -;; (handle-exceptions -;; exn -;; (begin -;; (debug:print 0 *default-log-port* "WARNING: stats collection failed in update-db-stats") -;; (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) -;; (print "exn=" (condition->list exn)) -;; #f) ;; if this fails we don't care, it is just stats -;; (let* ((cmd (conc "run-id=" run-id " " (if (eq? rawcmd 'general-call) (car params) rawcmd))) -;; (stat-vec (hash-table-ref/default *db-stats* cmd #f))) -;; (if (not (vector? stat-vec)) -;; (let ((newvec (vector 0 0))) -;; (hash-table-set! *db-stats* cmd newvec) -;; (set! stat-vec newvec))) -;; (vector-set! stat-vec 0 (+ (vector-ref stat-vec 0) 1)) -;; (vector-set! stat-vec 1 (+ (vector-ref stat-vec 1) duration)))) -;; (mutex-unlock! *db-stats-mutex*)) - (define (rmt:print-db-stats) (let ((fmtstr "~40a~7-d~9-d~20,2-f")) ;; "~20,2-f" (debug:print 18 *default-log-port* "DB Stats\n========") (debug:print 18 *default-log-port* (format #f "~40a~8a~10a~10a" "Cmd" "Count" "TotTime" "Avg")) (for-each (lambda (cmd) @@ -935,6 +830,10 @@ (set-functions rmt:send-receive remote-server-url-set! http-transport:close-connections remote-conndat-set! debug:print debug:print-info remote-ro-mode remote-ro-mode-set! - remote-ro-mode-checked-set! remote-ro-mode-checked) + remote-ro-mode-checked-set! remote-ro-mode-checked + remote-access remote-conndat + remote-transport + http-transport:server-dat-update-last-access + http-transport:client-api-send-receive) Index: rmtmod.scm ================================================================== --- rmtmod.scm +++ rmtmod.scm @@ -26,39 +26,58 @@ (import scheme chicken data-structures extras) (import (prefix sqlite3 sqlite3:) posix typed-records srfi-18) (import commonmod) +;; ;; Hack to make these functions visible to the refactored code, goal is to eliminate these over time. -(define (rmt:send-receive . params) #f) -(define (http-transport:close-connections . params) #f) +;; +(define (rmt:send-receive . params) #f) +(define (http-transport:close-connections . params) #f) +(define (http-transport:server-dat-update-last-access . params) #f) +(define (http-transport:client-api-send-receive . params) #f) + ;; from remote defstruct in common.scm (define (remote-conndat-set! . params) #f) (define (remote-server-url-set! . params) #f) (define (remote-ro-mode . params) #f) (define (remote-ro-mode-set! . params) #f) (define (remote-ro-mode-checked-set! . params) #f) (define (remote-ro-mode-checked . params) #f) +(define (remote-access . params) #f) +(define (remote-conndat . params) #f) +(define (remote-transport . params) #f) + (define (debug:print . params) #f) (define (debug:print-info . params) #f) (define (set-functions send-receive rsus close-connections rcs dbgp dbgpinfo ro-mode ro-mode-set ro-mode-checked-set ro-mode-checked - ) + access conndat + transport + update-last-access api-send-receive) (set! rmt:send-receive send-receive) - (set! remote-server-url-set! rsus) + (set! http-transport:close-connections close-connections) + (set! http-transport:server-dat-update-last-access update-last-access) + (set! http-transport:client-api-send-receive api-send-receive) + + (set! remote-server-url-set! rsus) (set! remote-conndat-set! rcs) - (set! debug:print dbgp) - (set! debug:print-info dbgpinfo) + (set! remote-conndat conndat) + (set! remote-access access) (set! remote-ro-mode ro-mode) (set! remote-ro-mode-set! ro-mode-set) (set! remote-ro-mode-checked-set! ro-mode-checked-set) - (set! remote-ro-mode-checked ro-mode-checked)) + (set! remote-ro-mode-checked ro-mode-checked) + (set! remote-transport transport) + (set! debug:print dbgp) + (set! debug:print-info dbgpinfo) + ) (define (rmtmod:calc-ro-mode runremote *toppath*) (if (and runremote (remote-ro-mode-checked runremote)) (remote-ro-mode runremote) @@ -113,7 +132,45 @@ (set! *runremote* #f) ;; force starting over (mutex-unlock! *rmt-mutex*) (thread-sleep! wait-delay) (rmt:send-receive cmd rid params attemptnum: (+ attemptnum 1))) res)) ;; All good, return res + +(define (extras-case-11 *default-log-port* runremote cmd params attemptnum rid rmt-mutex) + ;; (mutex-unlock! *rmt-mutex*) + (debug:print-info 12 *default-log-port* "rmt:send-receive, case 9") + ;; (mutex-lock! *rmt-mutex*) + (let* ((conninfo (remote-conndat runremote)) + (dat (case (remote-transport runremote) + ((http) (condition-case ;; handling here has + ;; caused a lot of + ;; problems. However it + ;; is needed to deal with + ;; attemtped + ;; communication to + ;; servers that have gone + ;; away + (http-transport:client-api-send-receive 0 conninfo cmd params) + ((commfail)(vector #f "communications fail")) + ((exn)(vector #f "other fail" (print-call-chain))))) + (else + (debug:print 0 *default-log-port* "ERROR: transport " (remote-transport runremote) " not supported") + (exit)))) + (success (if (vector? dat) (vector-ref dat 0) #f)) + (res (if (vector? dat) (vector-ref dat 1) #f))) + (if (and (vector? conninfo) (< 5 (vector-length conninfo))) + (http-transport:server-dat-update-last-access conninfo) ;; refresh access time + (begin + (debug:print 0 *default-log-port* "INFO: Should not get here! conninfo=" conninfo) + (set! conninfo #f) + (remote-conndat-set! runremote #f) ;; NOTE: *runremote* is global copy of runremote. Purpose: factor out global. + (http-transport:close-connections area-dat: runremote))) + (debug:print-info 13 *default-log-port* "rmt:send-receive, case 9. conninfo=" conninfo " dat=" dat " runremote = " runremote) + (mutex-unlock! rmt-mutex) + (if success ;; success only tells us that the transport was + ;; successful, have to examine the data to see if + ;; there was a detected issue at the other end + (extras-transport-succeded *default-log-port* rmt-mutex attemptnum runremote res) + (extras-transport-failed *default-log-port* rmt-mutex attemptnum runremote cmd rid params) + ))) )