Index: commonmod.scm ================================================================== --- commonmod.scm +++ commonmod.scm @@ -211,11 +211,12 @@ (define *no-sync-db* #f) ;; SERVER (define *my-client-signature* #f) (define *transport-type* 'http) ;; override with [server] transport http|rpc|nmsg -(define *runremote* #f) ;; if set up for server communication this will hold +;; replaced by *rmt:remote* +;; (define *runremote* #f) ;; if set up for server communication this will hold ;; (define *max-cache-size* 0) (define *logged-in-clients* (make-hash-table)) (define *server-id* #f) (define *server-info* #f) ;; good candidate for easily convert to non-global ;; (define *time-to-exit* #f) @@ -611,22 +612,24 @@ (else "FAIL"))) (define (common:logpro-exit-code->test-status exit-code) (status-sym->string (common:logpro-exit-code->status-sym exit-code))) -(defstruct remote - (hh-dat #f) ;; (common:get-homehost)) ;; homehost record ( addr . hhflag ) - (server-url #f) ;; (server:check-if-running *toppath*) #f)) - (server-id #f) - (server-info #f) ;; (if *toppath* (server:check-if-running *toppath*) #f)) - (last-server-check 0) ;; last time we checked to see if the server was alive - (conndat #f) - ;; (transport *transport-type*) - (server-timeout #f) ;; (server:expiration-timeout)) - (force-server #f) - (ro-mode #f) - (ro-mode-checked #f)) ;; flag that indicates we have checked for ro-mode +;; REPLACED BY rmt:remote in rmtmod.scm +;; +;; (defstruct remote +;; (hh-dat #f) ;; (common:get-homehost)) ;; homehost record ( addr . hhflag ) +;; (server-url #f) ;; (server:check-if-running *toppath*) #f)) +;; (server-id #f) +;; (server-info #f) ;; (if *toppath* (server:check-if-running *toppath*) #f)) +;; (last-server-check 0) ;; last time we checked to see if the server was alive +;; (conndat #f) +;; ;; (transport *transport-type*) +;; (server-timeout #f) ;; (server:expiration-timeout)) +;; (force-server #f) +;; (ro-mode #f) +;; (ro-mode-checked #f)) ;; flag that indicates we have checked for ro-mode ;; launching and hosts (defstruct host (reachable #f) (last-update 0) Index: http-transportmod.scm ================================================================== --- http-transportmod.scm +++ http-transportmod.scm @@ -297,39 +297,31 @@ (define (http-transport:inc-requests-and-prep-to-close-all-connections) (mutex-lock! *http-mutex*) (set! *http-requests-in-progress* (+ 1 *http-requests-in-progress*))) -;; Turn off proxy handling -(define (http-transport:client-turn-off-proxy) - (determine-proxy (constantly #f))) ;; From (chicken base) - ;; serverdat contains uuid to be used for connection validation ;; -;; NOTE: serverdat must be initialized once by servdat-init +;; NOTE: serverdat must be initialized or created by servdat-init ;; -(define (http-transport:send-receive serverdat cmd params #!key (numretries 3)) - (let* ((fullurl (servdat-api-uri serverdat)) ;; gets uri for /api - (res #f) +(define (http-transport:send-receive sdat qry-key cmd params #!key (numretries 3)) + (let* ((res #f) (success #t) - (server-id (servdat-uuid serverdat))) - ;; set up the http-client here - (max-retry-attempts 1) - ;; consider all requests indempotent - (retry-request? (lambda (request) - #f)) + (sparams (with-output-to-string + (lambda ()(write params))))) ;; send the data and get the response extract the needed info from ;; the http data and process and return it. (let* ((send-recieve (lambda () (set! res (vector #t ;; success (with-input-from-request - fullurl - (list (cons 'key server-id) + (servdat-api-uri sdat) + (list (cons 'key qry-key) + (cons 'srvid (servdat-uuid sdat)) (cons 'cmd cmd) - (cons 'params params)) + (cons 'params sparams)) read))))) (time-out (lambda () (thread-sleep! 45) (debug:print 0 *default-log-port* "WARNING: send-receive took more than 45 seconds!!") #f)) @@ -338,30 +330,34 @@ (thread-start! th1) (thread-start! th2) (thread-join! th1) (close-idle-connections!) (thread-terminate! th2) - res))) + (if (string? res) + (with-input-from-string res + (lambda () read)) + res)))) ;; careful closing of connections stored in *runremote* ;; (define (http-transport:close-connections #!key (area-dat #f)) - (let* ((runremote (or area-dat *runremote*)) - (server-dat (if runremote - (remote-conndat runremote) - #f))) ;; (hash-table-ref/default *runremote* run-id #f))) - (if (vector? server-dat) - (let ((api-dat (http-transport:server-dat-get-api-uri server-dat))) - (handle-exceptions - exn - (begin - (print-call-chain *default-log-port*) - (debug:print-error 0 *default-log-port* " closing connection failed with error: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn)) - (close-connection! api-dat) - ;;(close-idle-connections!) - #t)) - #f))) + (debug:print-info 0 *default-log-port* "http-transport:close-connections doesn't do anything now!")) +;; (let* ((runremote (or area-dat *runremote*)) +;; (server-dat (if runremote +;; (remote-conndat runremote) +;; #f))) ;; (hash-table-ref/default *runremote* run-id #f))) +;; (if (vector? server-dat) +;; (let ((api-dat (http-transport:server-dat-get-api-uri server-dat))) +;; (handle-exceptions +;; exn +;; (begin +;; (print-call-chain *default-log-port*) +;; (debug:print-error 0 *default-log-port* " closing connection failed with error: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn)) +;; (close-connection! api-dat) +;; ;;(close-idle-connections!) +;; #t)) +;; #f))) (define (make-http-transport:server-dat)(make-vector 6)) (define (http-transport:server-dat-get-iface vec) (vector-ref vec 0)) (define (http-transport:server-dat-get-port vec) (vector-ref vec 1)) @@ -386,31 +382,29 @@ (vector-set! vec 5 (current-seconds)) (begin (print-call-chain (current-error-port)) (debug:print-error 0 *default-log-port* "call to http-transport:server-dat-update-last-access with non-vector!!")))) -;; -;; connect -;; -(define (http-transport:client-connect iface port server-id) - (let* ((api-url (conc "http://" iface ":" port "/api")) - (api-uri (uri-reference (conc "http://" iface ":" port "/api"))) - (api-req (make-request method: 'POST uri: api-uri)) - (server-dat (vector iface port api-uri api-url api-req (current-seconds) server-id))) - server-dat)) - -;; initialize servdat for client side +;; initialize servdat for client side, setup needed parameters +;; pass in #f as sdat-in to create sdat +;; (define (servdat-init sdat-in iface port uuid) (let* ((sdat (or sdat-in (make-servdat)))) (if uuid (servdat-uuid-set! sdat uuid)) (servdat-host-set! sdat iface) (servdat-port-set! sdat port) (servdat-api-url-set! sdat (conc "http://" iface ":" port "/api")) (servdat-api-uri-set! sdat (uri-reference (servdat->url sdat))) (servdat-api-req-set! sdat (make-request method: 'POST uri: (servdat-api-uri sdat))) - sdat)) + ;; set up the http-client parameters + (max-retry-attempts 1) + ;; consider all requests indempotent + (retry-request? (lambda (request) + #f)) + (determine-proxy (constantly #f)) + sdat)) ;;====================================================================== ;; NEW SERVER METHOD ;;====================================================================== @@ -472,14 +466,15 @@ (define (server-address srv-pkt) (conc (alist-ref 'host srv-pkt) ":" (alist-ref 'port srv-pkt))) -(define (server-ready? server-address) +(define (server-ready? host port) ;; server-address is host:port ;; ping the server and ask it ;; if it ready - #f) + (let* ((sdat (servdat-init #f host port #f))) + (http-transport:send-receive sdat 'ping '()))) ;; from the pkts return servers associated with dbpath ;; NOTE: Only one can be alive - have to check on each ;; in the list of pkts returned ;; @@ -499,12 +494,14 @@ (define (get-the-server serv-pkts dbpath) (let loop ((tail serv-pkts)) (if (null? tail) #f (let* ((spkt (car tail)) + (host (alist-ref 'host spkt)) + (port (alist-ref 'port spkt)) (addr (server-address spkt))) - (if (server-ready? addr) + (if (server-ready? host port) spkt (loop (cdr tail))))))) ;; am I the "first" in line server? I.e. my D card is smallest ;; use Z card as tie breaker @@ -578,34 +575,35 @@ (best-srv-key (if best-srv (alist-ref 'servkey best-srv) #f))) (debug:print 0 *default-log-port* "best-srv-key: "best-srv-key", server-key: "server-key) ;; am I the best-srv, compare server-keys to know (if (equal? best-srv-key server-key) (if (get-lock-db sdat db-file) ;; (db:get-iam-server-lock *dbstruct-db* *toppath* run-id) - (debug:print 0 *default-log-port* "I'm the server!") - (servdat-dbfile-set! sdat db-file)) - (begin - (debug:print 0 *default-log-port* "I'm not the server, exiting.") - (bdat-time-to-exit-set! *bdat* #t) - (thread-sleep! 0.2) - (exit))) - (begin - (debug:print 0 *default-log-port* - "Keys do not match "best-srv-key", "server-key", exiting.") - (bdat-time-to-exit-set! *bdat* #t) - (thread-sleep! 0.2) - (exit))) - sdat)) - (begin - (debug:print-info 0 *default-log-port* "Still waiting, last-sdat=" last-sdat) - (sleep 4) - (if (> (- (current-seconds) start-time) 120) ;; been waiting for two minutes - (begin - (debug:print-error 0 *default-log-port* "transport appears to have died, exiting server") - (exit)) - (loop start-time - (equal? sdat last-sdat) - sdat))))))) + (begin + (debug:print 0 *default-log-port* "I'm the server!") + (servdat-dbfile-set! sdat db-file)) + (begin + (debug:print 0 *default-log-port* "I'm not the server, exiting.") + (bdat-time-to-exit-set! *bdat* #t) + (thread-sleep! 0.2) + (exit))) + (begin + (debug:print 0 *default-log-port* + "Keys do not match "best-srv-key", "server-key", exiting.") + (bdat-time-to-exit-set! *bdat* #t) + (thread-sleep! 0.2) + (exit))) + sdat)) + (begin ;; sdat not yet contains server info + (debug:print-info 0 *default-log-port* "Still waiting, last-sdat=" last-sdat) + (sleep 4) + (if (> (- (current-seconds) start-time) 120) ;; been waiting for two minutes + (begin + (debug:print-error 0 *default-log-port* "transport appears to have died, exiting server") + (exit)) + (loop start-time + (equal? sdat last-sdat) + sdat)))))))) ;; run http-transport:keep-running in a parallel thread to monitor that the db is being ;; used and to shutdown after sometime if it is not. ;; (define (http-transport:keep-running dbname) Index: launchmod.scm ================================================================== --- launchmod.scm +++ launchmod.scm @@ -1448,13 +1448,13 @@ ;; (list 'serverinf *server-info*) #;(list 'homehost (let* ((hhdat (common:get-homehost))) (if hhdat (car hhdat) #f))) - (list 'serverurl (if *runremote* - (remote-server-url *runremote*) - #f)) ;; + ;; (list 'serverurl (if *runremote* + ;; (remote-server-url *runremote*) + ;; #f)) ;; (list 'areaname (common:get-area-name)) (list 'toppath *toppath*) (list 'work-area work-area) (list 'test-name test-name) (list 'runscript runscript) Index: rmtmod.scm ================================================================== --- rmtmod.scm +++ rmtmod.scm @@ -134,23 +134,23 @@ ;;====================================================================== ;; S U P P O R T F U N C T I O N S ;;====================================================================== -;; if a server is either running or in the process of starting call client:setup -;; else return #f to let the calling proc know that there is no server available -;; -(define (rmt:get-connection-info areapath #!key (area-dat #f)) ;; TODO: push areapath down. - (let* ((runremote (or area-dat *runremote*)) - (cinfo (if (remote? runremote) - (remote-conndat runremote) - #f))) - (if cinfo - cinfo - (if (server:check-if-running areapath) - (client:setup areapath) - #f)))) +;; ;; if a server is either running or in the process of starting call client:setup +;; ;; else return #f to let the calling proc know that there is no server available +;; ;; +;; (define (rmt:get-connection-info areapath #!key (area-dat #f)) ;; TODO: push areapath down. +;; (let* ((runremote (or area-dat *runremote*)) +;; (cinfo (if (remote? runremote) +;; (remote-conndat runremote) +;; #f))) +;; (if cinfo +;; cinfo +;; (if (server:check-if-running areapath) +;; (client:setup areapath) +;; #f)))) (defstruct rmt:remote (conns (make-hash-table)) ;; apath/dbname => rmt:conn ) @@ -160,23 +160,31 @@ (fullname #f) (hostport #f) (lastmsg 0) (expires 0)) +;; replaces *runremote* (define *rmt:remote* (make-rmt:remote)) ;; do we have a connection to apath dbname and ;; is it not expired? then return it ;; -(define (rmt:get-existing-live-conn remote apath dbname) +;; else setup a connection +;; +;; if that fails, return '(#f "some reason") ;; NB// convert to raising an exception +;; +(define (rmt:get-connection remote apath dbname) (let* ((fullname (db:dbname->path apath dbname)) (conn (hash-table-ref/default (rmt:remote-conns remote) fullname #f))) (if (and conn (> (current-seconds) (rmt:conn-expires conn))) conn #f))) + +;; (rmt:general-open-connection remote apath dbname)))) + ;; looks for a connection to main ;; connections for other servers happens by requesting from main ;; (define (rmt:open-main-connection remote apath) (let* ((pktsdir (get-pkts-dir apath)) @@ -206,20 +214,27 @@ expires: (+ (current-seconds) 60) ;; this needs to be gathered during the ping )) (start-main-srv))) (start-main-srv)))) +;; NB// remote is a rmt:remote struct +;; (define (rmt:general-open-connection remote apath dbname) - (let ((mainconn (rmt:get-existing-live-conn remote apath (db:run-id->dbname #f)))) - (if (not mainconn)(rmt:open-main-connection remote apath)) - ;; TODO - call main for connection info - )) - - + (let ((mainconn (rmt:get-connection remote apath (db:run-id->dbname #f)))) + (if (not mainconn) + (begin + (rmt:open-main-connection remote apath) + (thread-sleep! 1) + (rmt:general-open-connection remote apath dbname)) + ;; we have a connection to main, ask for contact info for dbname + (let* ((res (http-transport:send-receive mainconn "x" 'get-server `(,apath ,dbname)))) + (print "rmt:general-open-connection got res="res))))) + + ;;====================================================================== -;; RA => e.g. usage (rmt:send-receive 'get-var #f (list varname)) +;; Defaults to ;; (define (rmt:send-receive cmd rid params #!key (attemptnum 1)(area-dat #f)) (if (not *rmt:remote*)(set! *rmt:remote* (make-rmt:remote))) (let* ((apath *toppath*) (conns *rmt:remote*) @@ -228,286 +243,15 @@ ;; db is at apath/.db/dbname, rid is an intermediary solution and will be removed ;; sometime in the future ;; (define (rmt:send-receive-real remote apath dbname rid params) - ;; do we have a connection to the needed db? - ;; has the connection expired? - (let connloop ((conn (rmt:get-existing-live-conn remote apath dbname))) - (if (not conn) - (connloop (rmt:general-open-connection remote apath dbname)) - (begin - #t ;; here we do the actual connection work - )))) - - -;; ;; ;; start attemptnum at 1 so the modulo below works as expected -;; ;; #;(common:telemetry-log (conc "rmt:"(->string cmd)) -;; ;; payload: `((rid . ,rid) -;; ;; (params . ,params))) -;; ;; -;; ;; (if (> attemptnum 2) -;; ;; (debug:print 0 *default-log-port* "INFO: attemptnum in rmt:send-receive is " attemptnum)) -;; ;; -;; ;; (cond -;; ;; ((> attemptnum 2) (thread-sleep! 0.053)) -;; ;; ((> attemptnum 10) (thread-sleep! 0.5)) -;; ;; ((> attemptnum 20) (thread-sleep! 1))) -;; ;; (if (and (> attemptnum 5) (= 0 (modulo attemptnum 15))) -;; ;; (begin (server:run *toppath*) (thread-sleep! 3))) -;; ;; -;; ;; -;; ;; ;;DOT digraph megatest_state_status { -;; ;; ;;DOT ranksep=0; -;; ;; ;;DOT // rankdir=LR; -;; ;; ;;DOT node [shape="box"]; -;; ;; ;;DOT "rmt:send-receive" -> MUTEXLOCK; -;; ;; ;;DOT { edge [style=invis];"case 1" -> "case 2" -> "case 3" -> "case 4" -> "case 5" -> "case 6" -> "case 7" -> "case 8" -> "case 9" -> "case 10" -> "case 11"; } -;; ;; ;; do all the prep locked under the rmt-mutex -;; ;; (mutex-lock! *rmt-mutex*) -;; ;; -;; ;; ;; 1. check if server is started IFF cmd is a write OR if we are not on the homehost, store in runremote -;; ;; ;; 2. check the age of the connections. refresh the connection if it is older than timeout-20 seconds. -;; ;; ;; 3. do the query, if on homehost use local access -;; ;; ;; -;; ;; (let* ((start-time (current-seconds)) ;; snapshot time so all use cases get same value -;; ;; (areapath *toppath*);; TODO - resolve from dbstruct to be compatible with multiple areas -;; ;; (runremote (or area-dat -;; ;; *runremote*)) -;; ;; (attemptnum (+ 1 attemptnum)) -;; ;; (readonly-mode (rmtmod:calc-ro-mode runremote *toppath*))) -;; ;; -;; ;; ;; DOT INIT_RUNREMOTE; // leaving off - doesn't really add to the clarity -;; ;; ;; DOT MUTEXLOCK -> INIT_RUNREMOTE [label="no remote?"]; -;; ;; ;; DOT INIT_RUNREMOTE -> MUTEXLOCK; -;; ;; ;; ensure we have a record for our connection for given area -;; ;; (if (not runremote) ;; can remove this one. should never get here. -;; ;; (begin -;; ;; (set! *runremote* (make-and-init-remote)) -;; ;; (let* ((server-info (remote-server-info *runremote*))) -;; ;; (if server-info -;; ;; (begin -;; ;; (remote-server-url-set! *runremote* (server:record->url server-info)) -;; ;; (remote-server-id-set! *runremote* (server:record->id server-info))))) -;; ;; (set! runremote *runremote*))) ;; new runremote will come from this on next iteration -;; ;; -;; ;; ;; DOT SET_HOMEHOST; // leaving off - doesn't really add to the clarity -;; ;; ;; DOT MUTEXLOCK -> SET_HOMEHOST [label="no homehost?"]; -;; ;; ;; DOT SET_HOMEHOST -> MUTEXLOCK; -;; ;; ;; ensure we have a homehost record -;; ;; (if (not (pair? (remote-hh-dat runremote))) ;; not on homehost -;; ;; (thread-sleep! 0.1) ;; since we shouldn't get here, delay a little -;; ;; (remote-hh-dat-set! runremote (common:get-homehost))) -;; ;; -;; ;; ;;(print "BB> readonly-mode is "readonly-mode" dbfile is "dbfile) -;; ;; (cond -;; ;; ;;DOT EXIT; -;; ;; ;;DOT MUTEXLOCK -> EXIT [label="> 15 attempts"]; {rank=same "case 1" "EXIT" } -;; ;; ;; give up if more than 150 attempts -;; ;; ((> attemptnum 150) -;; ;; (debug:print 0 *default-log-port* "ERROR: 150 tries to start/connect to server. Giving up.") -;; ;; (exit 1)) -;; ;; -;; ;; ;;DOT CASE2 [label="local\nreadonly\nquery"]; -;; ;; ;;DOT MUTEXLOCK -> CASE2; {rank=same "case 2" CASE2} -;; ;; ;;DOT CASE2 -> "rmt:open-qry-close-locally"; -;; ;; ;; readonly mode, read request- handle it - case 2 -;; ;; ((and readonly-mode -;; ;; (member cmd api:read-only-queries)) -;; ;; (mutex-unlock! *rmt-mutex*) -;; ;; (debug:print-info 12 *default-log-port* "rmt:send-receive, case 2") -;; ;; (rmt:open-qry-close-locally cmd 0 params) -;; ;; ) -;; ;; -;; ;; ;;DOT CASE3 [label="write in\nread-only mode"]; -;; ;; ;;DOT MUTEXLOCK -> CASE3 [label="readonly\nmode?"]; {rank=same "case 3" CASE3} -;; ;; ;;DOT CASE3 -> "#f"; -;; ;; ;; readonly mode, write request. Do nothing, return #f -;; ;; (readonly-mode (extras-readonly-mode *rmt-mutex* *default-log-port* cmd params)) -;; ;; -;; ;; ;; This block was for pre-emptively resetting the connection if there had been no communication for some time. -;; ;; ;; I don't think it adds any value. If the server is not there, just fail and start a new connection. -;; ;; ;; also, the expire-time calculation might not be correct. We want, time-since-last-server-access > (server:get-timeout) -;; ;; ;; -;; ;; ;;DOT CASE4 [label="reset\nconnection"]; -;; ;; ;;DOT MUTEXLOCK -> CASE4 [label="have connection,\nlast_access > expire_time"]; {rank=same "case 4" CASE4} -;; ;; ;;DOT CASE4 -> "rmt:send-receive"; -;; ;; ;; reset the connection if it has been unused too long -;; ;; ((and runremote -;; ;; (remote-conndat runremote) -;; ;; (> (current-seconds) ;; if it has been more than server-timeout seconds since last contact, close this connection and start a new on -;; ;; (+ (http-transport:server-dat-get-last-access (remote-conndat runremote)) -;; ;; (remote-server-timeout runremote)))) -;; ;; (debug:print-info 0 *default-log-port* "Connection to " (remote-server-url runremote) " expired due to no accesses, forcing new connection.") -;; ;; (http-transport:close-connections area-dat: runremote) -;; ;; (remote-conndat-set! runremote #f) ;; invalidate the connection, thus forcing a new connection. -;; ;; (mutex-unlock! *rmt-mutex*) -;; ;; (rmt:send-receive cmd rid params attemptnum: attemptnum)) -;; ;; -;; ;; ;;DOT CASE5 [label="local\nread"]; -;; ;; ;;DOT MUTEXLOCK -> CASE5 [label="server not required,\non homehost,\nread-only query"]; {rank=same "case 5" CASE5}; -;; ;; ;;DOT CASE5 -> "rmt:open-qry-close-locally"; -;; ;; -;; ;; ;; on homehost and this is a read -;; ;; ((and (not (remote-force-server runremote)) ;; honor forced use of server, i.e. server NOT required -;; ;; (cdr (remote-hh-dat runremote)) ;; on homehost -;; ;; (member cmd api:read-only-queries)) ;; this is a read -;; ;; (mutex-unlock! *rmt-mutex*) -;; ;; (debug:print-info 12 *default-log-port* "rmt:send-receive, case 5") -;; ;; (rmt:open-qry-close-locally cmd 0 params)) -;; ;; -;; ;; ;;DOT CASE6 [label="init\nremote"]; -;; ;; ;;DOT MUTEXLOCK -> CASE6 [label="on homehost,\nwrite query,\nhave server,\ncan't reach it"]; {rank=same "case 6" CASE6}; -;; ;; ;;DOT CASE6 -> "rmt:send-receive"; -;; ;; ;; on homehost and this is a write, we already have a server, but server has died -;; ;; ((and (cdr (remote-hh-dat runremote)) ;; on homehost -;; ;; (not (member cmd api:read-only-queries)) ;; this is a write -;; ;; (remote-server-url runremote) ;; have a server -;; ;; (not (server:ping (remote-server-url runremote) (remote-server-id runremote)))) ;; server has died. NOTE: this is not a cheap call! Need better approach. -;; ;; (set! *runremote* (make-and-init-remote)) -;; ;; (let* ((server-info (remote-server-info *runremote*))) -;; ;; (if server-info -;; ;; (begin -;; ;; (remote-server-url-set! *runremote* (server:record->url server-info)) -;; ;; (remote-server-id-set! *runremote* (server:record->id server-info))))) -;; ;; (remote-force-server-set! runremote (common:force-server?)) -;; ;; (mutex-unlock! *rmt-mutex*) -;; ;; (debug:print-info 12 *default-log-port* "rmt:send-receive, case 6") -;; ;; (rmt:send-receive cmd rid params attemptnum: attemptnum)) -;; ;; -;; ;; ;;DOT CASE7 [label="homehost\nwrite"]; -;; ;; ;;DOT MUTEXLOCK -> CASE7 [label="server not required,\non homehost,\na write,\nhave a server"]; {rank=same "case 7" CASE7}; -;; ;; ;;DOT CASE7 -> "rmt:open-qry-close-locally"; -;; ;; ;; on homehost and this is a write, we already have a server -;; ;; ((and (not (remote-force-server runremote)) ;; honor forced use of server, i.e. server NOT required -;; ;; (cdr (remote-hh-dat runremote)) ;; on homehost -;; ;; (not (member cmd api:read-only-queries)) ;; this is a write -;; ;; (remote-server-url runremote)) ;; have a server -;; ;; (mutex-unlock! *rmt-mutex*) -;; ;; (debug:print-info 12 *default-log-port* "rmt:send-receive, case 4.1") -;; ;; (rmt:open-qry-close-locally cmd 0 params)) -;; ;; -;; ;; ;;DOT CASE8 [label="force\nserver"]; -;; ;; ;;DOT MUTEXLOCK -> CASE8 [label="server not required,\nhave homehost info,\nno connection yet,\nnot a read-only query"]; {rank=same "case 8" CASE8}; -;; ;; ;;DOT CASE8 -> "rmt:open-qry-close-locally"; -;; ;; ;; on homehost, no server contact made and this is a write, passively start a server -;; ;; ((and (not (remote-force-server runremote)) ;; honor forced use of server, i.e. server NOT required -;; ;; (cdr (remote-hh-dat runremote)) ;; have homehost -;; ;; (not (remote-server-url runremote)) ;; no connection yet -;; ;; (not (member cmd api:read-only-queries))) ;; not a read-only query -;; ;; (debug:print-info 12 *default-log-port* "rmt:send-receive, case 8") -;; ;; (let ((server-info (server:check-if-running *toppath*))) ;; (server:read-dotserver->url *toppath*))) ;; (server:check-if-running *toppath*))) ;; Do NOT want to run server:check-if-running - very expensive to do for every write call -;; ;; (if server-info -;; ;; (begin -;; ;; (remote-server-url-set! runremote (server:record->url server-info)) ;; the string can be consumed by the client setup if needed -;; ;; (remote-server-id-set! runremote (server:record->id server-info))) -;; ;; (if (common:force-server?) -;; ;; (server:start-and-wait *toppath*) -;; ;; (server:kind-run *toppath*))) -;; ;; (remote-force-server-set! runremote (common:force-server?)) -;; ;; (mutex-unlock! *rmt-mutex*) -;; ;; (debug:print-info 12 *default-log-port* "rmt:send-receive, case 8.1") -;; ;; (rmt:open-qry-close-locally cmd 0 params))) -;; ;; -;; ;; ;;DOT CASE9 [label="force server\nnot on homehost"]; -;; ;; ;;DOT MUTEXLOCK -> CASE9 [label="no connection\nand either require server\nor not on homehost"]; {rank=same "case 9" CASE9}; -;; ;; ;;DOT CASE9 -> "start\nserver" -> "rmt:send-receive"; -;; ;; ((or (and (remote-force-server runremote) ;; we are forcing a server and don't yet have a connection to one -;; ;; (not (remote-conndat runremote))) -;; ;; (and (not (cdr (remote-hh-dat runremote))) ;; not on a homehost -;; ;; (not (remote-conndat runremote)))) ;; and no connection -;; ;; (debug:print-info 12 *default-log-port* "rmt:send-receive, case 9, hh-dat: " (remote-hh-dat runremote) " conndat: " (remote-conndat runremote)) -;; ;; (mutex-unlock! *rmt-mutex*) -;; ;; (if (not (server:check-if-running *toppath*)) ;; who knows, maybe one has started up? -;; ;; (server:start-and-wait *toppath*)) -;; ;; (remote-conndat-set! runremote (rmt:get-connection-info *toppath*)) ;; calls client:setup which calls client:setup-http -;; ;; (rmt:send-receive cmd rid params attemptnum: attemptnum)) ;; TODO: add back-off timeout as -;; ;; -;; ;; ;;DOT CASE10 [label="on homehost"]; -;; ;; ;;DOT MUTEXLOCK -> CASE10 [label="server not required,\non homehost"]; {rank=same "case 10" CASE10}; -;; ;; ;;DOT CASE10 -> "rmt:open-qry-close-locally"; -;; ;; ;; all set up if get this far, dispatch the query -;; ;; ((and (not (remote-force-server runremote)) -;; ;; (cdr (remote-hh-dat runremote))) ;; we are on homehost -;; ;; (mutex-unlock! *rmt-mutex*) -;; ;; (debug:print-info 12 *default-log-port* "rmt:send-receive, case 10") -;; ;; (rmt:open-qry-close-locally cmd (if rid rid 0) params)) -;; ;; -;; ;; ;;DOT CASE11 [label="send_receive"]; -;; ;; ;;DOT MUTEXLOCK -> CASE11 [label="else"]; {rank=same "case 11" CASE11}; -;; ;; ;;DOT CASE11 -> "rmt:send-receive" [label="call failed"]; -;; ;; ;;DOT CASE11 -> "RESULT" [label="call succeeded"]; -;; ;; ;; not on homehost, do server query -;; ;; (else (extras-case-11 *default-log-port* runremote cmd params attemptnum rid))))) -;; ;; ;;DOT } - -;; bunch of small functions factored out of send-receive to make debug easier -;; - -;; No Title -;; Error: (vector-ref) out of range -;; #(# (#("db.scm:3740: regex#regexp" #f #f) #("db.scm:3739: regex#string-substitute" #f #f) #("db.scm:3738: base64#base64-decode" #f #f) #("db.scm:3737: z3#z3:decode-buffer" #f #f) #("db.scm:3736: with-input-from-string" #f #f) #("db.scm:3741: s11n#deserialize" #f #f) #("api.scm:374: api:execute-requests" #f #f) #("api.scm:139: call-with-current-continuation" #f #f) #("api.scm:139: with-exception-handler" #f #f) #("api.scm:139: ##sys#call-with-values" #f #f) #("api.scm:158: string->symbol" #f #f) #("api.scm:160: current-milliseconds" #f #f) #("api.scm:161: dbr:dbstruct-read-only" #f #f) #("api.scm:139: k15" #f #f) #("api.scm:139: g19" #f #f) #("api.scm:142: get-call-chain" #f #f)) #("get-test-info-by-id" (1102 507299))) -;; 6 -;; -;; Call history: -;; -;; http-transport.scm:306: thread-terminate! -;; http-transport.scm:307: debug:print-info -;; common_records.scm:235: debug:debug-mode -;; rmt.scm:259: k587 -;; rmt.scm:259: g591 -;; rmt.scm:276: http-transport:server-dat-update-last-access -;; http-transport.scm:364: current-seconds -;; rmt.scm:282: debug:print-info -;; common_records.scm:235: debug:debug-mode -;; rmt.scm:283: mutex-unlock! -;; rmt.scm:287: extras-transport-succeded <-- -;; +-----------------------------------------------------------------------------+ -;; | Exit Status : 70 -;; - -(define (extras-case-11 *default-log-port* runremote cmd params attemptnum rid) - ;; (mutex-unlock! *rmt-mutex*) - (debug:print-info 12 *default-log-port* "rmt:send-receive, case 9") - ;; (mutex-lock! *rmt-mutex*) - (let* ((conninfo (remote-conndat runremote)) - (dat-in (condition-case ;; handling here has - ;; caused a lot of - ;; problems. However it - ;; is needed to deal with - ;; attemtped - ;; communication to - ;; servers that have gone - ;; away - (http-transport:send-receive 0 conninfo cmd params) - ((servermismatch) (vector #f "Server id mismatch" )) - ((commfail)(vector #f "communications fail")) - ((exn)(vector #f "other fail" (print-call-chain))))) - (dat (if (and (vector? dat-in) ;; ... check it is a correct size - (> (vector-length dat-in) 1)) - dat-in - (vector #f (conc "communications fail (type 2), dat-in=" dat-in)))) - (success (if (vector? dat) (vector-ref dat 0) #f)) - (res (if (vector? dat) (vector-ref dat 1) #f))) - (if (and (vector? conninfo) (< 5 (vector-length conninfo))) - (http-transport:server-dat-update-last-access conninfo) ;; refresh access time - (begin - (debug:print 0 *default-log-port* "INFO: Should not get here! conninfo=" conninfo) - (set! conninfo #f) - (remote-conndat-set! *runremote* #f) ;; NOTE: *runremote* is global copy of runremote. Purpose: factor out global. - (http-transport:close-connections area-dat: runremote))) - (debug:print-info 13 *default-log-port* "rmt:send-receive, case 9. conninfo=" conninfo " dat=" dat " runremote = " runremote) - (mutex-unlock! *rmt-mutex*) - (if success ;; success only tells us that the transport was - ;; successful, have to examine the data to see if - ;; there was a detected issue at the other end - (extras-transport-succeded *default-log-port* *rmt-mutex* attemptnum runremote res params rid cmd) - (begin - (debug:print-error 0 *default-log-port* " dat=" dat) - (extras-transport-failed *default-log-port* *rmt-mutex* attemptnum runremote cmd rid params)) - ))) - + (let* ((conn (rmt:get-connection remote apath dbname))) + (assert conn "FATAL: Unable to connect to db "apath"/"dbname) + #t ;; here we do the actual connection work + )) + (define (rmt:print-db-stats) (let ((fmtstr "~40a~7-d~9-d~20,2-f")) ;; "~20,2-f" (debug:print 18 *default-log-port* "DB Stats\n========") (debug:print 18 *default-log-port* (format #f "~40a~8a~10a~10a" "Cmd" "Count" "TotTime" "Avg")) (for-each (lambda (cmd) @@ -541,52 +285,10 @@ (cons 'none 0)) (loop (car tal)(cdr tal) newmax-cmd currmax))))))) (mutex-unlock! *db-stats-mutex*) res)) -(define (rmt:open-qry-close-locally cmd run-id params #!key (remretries 5)) - (let* ((qry-is-write (not (member cmd api:read-only-queries))) - (db-file-path (common:get-db-tmp-area)) ;; 0)) - (dbstruct-local (db:setup #t)) ;; make-dbr:dbstruct path: dbdir local: #t))) - (read-only (not (file-writable? db-file-path))) - (start (current-milliseconds)) - (resdat (if (not (and read-only qry-is-write)) - (let ((v (api:execute-requests dbstruct-local (vector (symbol->string cmd) params)))) - (handle-exceptions ;; there has been a long history of receiving strange errors from values returned by the client when things go wrong.. - exn ;; This is an attempt to detect that situation and recover gracefully - (begin - (debug:print 0 *default-log-port* "ERROR: bad data from server " v " message: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn) - (vector #t '())) ;; should always get a vector but if something goes wrong return a dummy - (if (and (vector? v) - (> (vector-length v) 1)) - (let ((newvec (vector (vector-ref v 0)(vector-ref v 1)))) - newvec) ;; by copying the vector while inside the error handler we should force the detection of a corrupted record - (vector #t '())))) ;; we could also check that the returned types are valid - (vector #t '()))) - (success (vector-ref resdat 0)) - (res (vector-ref resdat 1)) - (duration (- (current-milliseconds) start))) - (if (and read-only qry-is-write) - (debug:print 0 *default-log-port* "ERROR: attempt to write to read-only database ignored. cmd=" cmd)) - (if (not success) - (if (> remretries 0) - (begin - (debug:print-error 0 *default-log-port* "local query failed. Trying again.") - (thread-sleep! (/ (pseudo-random-integer 5000) 1000)) ;; some random delay - (rmt:open-qry-close-locally cmd run-id params remretries: (- remretries 1))) - (begin - (debug:print-error 0 *default-log-port* "too many retries in rmt:open-qry-close-locally, giving up") - #f)) - (begin - ;; (rmt:update-db-stats run-id cmd params duration) - ;; mark this run as dirty if this was a write, the watchdog is responsible for syncing it - (if qry-is-write - (let ((start-time (current-seconds))) - (mutex-lock! *db-multi-sync-mutex*) - (set! *db-last-access* start-time) ;; THIS IS PROBABLY USELESS? (we are on a client) - (mutex-unlock! *db-multi-sync-mutex*))))) - res)) ;;====================================================================== ;; ;; A C T U A L A P I C A L L S ;; @@ -1124,75 +826,10 @@ (rmt:send-receive 'test-set-archive-block-id run-id (list run-id test-id archive-block-id))) (define (rmt:test-get-archive-block-info archive-block-id) (rmt:send-receive 'test-get-archive-block-info #f (list archive-block-id))) - -(define (rmtmod:calc-ro-mode runremote *toppath*) - (if (and runremote - (remote-ro-mode-checked runremote)) - (remote-ro-mode runremote) - (let* ((dbfile (conc *toppath* "/megatest.db")) - (ro-mode (not (file-writable? dbfile)))) ;; TODO: use dbstruct or runremote to figure this out in future - (if runremote - (begin - (remote-ro-mode-set! runremote ro-mode) - (remote-ro-mode-checked-set! runremote #t) - ro-mode) - ro-mode)))) - -(define (extras-readonly-mode rmt-mutex log-port cmd params) - (mutex-unlock! rmt-mutex) - (debug:print-info 12 log-port "rmt:send-receive, case 3") - (debug:print 0 log-port "WARNING: write transaction requested on a readonly area. cmd="cmd" params="params) - #f) - -(define (extras-transport-failed *default-log-port* *rmt-mutex* attemptnum runremote cmd rid params) - (debug:print 0 *default-log-port* "WARNING: communication failed. Trying again, try num: " attemptnum) - (mutex-lock! *rmt-mutex*) - (remote-conndat-set! runremote #f) - (http-transport:close-connections area-dat: runremote) - (remote-server-url-set! runremote #f) - (mutex-unlock! *rmt-mutex*) - (debug:print-info 12 *default-log-port* "rmt:send-receive, case 9.1") - (rmt:send-receive cmd rid params attemptnum: (+ attemptnum 1))) - -(define (extras-transport-succeded *default-log-port* *rmt-mutex* attemptnum runremote res params rid cmd) - (if (and (vector? res) - (eq? (vector-length res) 2) - (eq? (vector-ref res 1) 'overloaded)) ;; since we are - ;; looking at the - ;; data to carry the - ;; error we'll use a - ;; fairly obtuse - ;; combo to minimise - ;; the chances of - ;; some sort of - ;; collision. this - ;; is the case where - ;; the returned data - ;; is bad or the - ;; server is - ;; overloaded and we - ;; want to ease off - ;; the queries - (let ((wait-delay (+ attemptnum (* attemptnum 10)))) - (debug:print 0 *default-log-port* "WARNING: server is overloaded. Delaying " wait-delay " seconds and trying call again.") - (mutex-lock! *rmt-mutex*) - (http-transport:close-connections area-dat: runremote) - (set! *runremote* #f) ;; force starting over - (mutex-unlock! *rmt-mutex*) - (thread-sleep! wait-delay) - (rmt:send-receive cmd rid params attemptnum: (+ attemptnum 1))) - res)) ;; All good, return res - -#;(set-functions rmt:send-receive remote-server-url-set! - http-transport:close-connections remote-conndat-set! - debug:print debug:print-info - remote-ro-mode remote-ro-mode-set! - remote-ro-mode-checked-set! remote-ro-mode-checked) - ;; gets mtpg-run-id and syncs the record if different ;; (define (tasks:run-id->mtpg-run-id dbh cached-info run-id area-info smallest-last-update-time) (let* ((runs-ht (hash-table-ref cached-info 'runs)) (runinf (hash-table-ref/default runs-ht run-id #f)) @@ -1771,11 +1408,11 @@ (string-match ".*/main.db$" dbfile)) (begin (debug:print-info 0 *default-log-port* "Releasing lock for "dbfile) (db:with-lock-db (servdat-dbfile *server-info*) (lambda (dbh dbfile) - (db:release-lock dbh))))))) + (db:release-lock dbh dbfile))))))) (if (bdat-task-db *bdat*) ;; TODO: Check that this is correct for task db (let ((db (cdr (bdat-task-db *bdat*)))) (if (sqlite3:database? db) (begin (sqlite3:interrupt! db) @@ -1872,18 +1509,10 @@ (< num-ok 1)) ;; if there are no decent candidates for servers then try starting a new one (server:kind-run areapath)) (thread-sleep! 5) (loop (server:check-if-running areapath) (+ try-num 1))))))) - -(define (make-and-init-remote) - (make-remote ;; hh-dat: (common:get-homehost) - server-info: (if *toppath* (server:check-if-running *toppath*) #f) - server-timeout: (server:expiration-timeout))) - - - ;; called in megatest.scm, host-port is string hostname:port ;; ;; NOTE: This is NOT called directly from clients as not all transports support a client running ;; in the same process as the server.