Index: clientmod.scm ================================================================== --- clientmod.scm +++ clientmod.scm @@ -23,11 +23,36 @@ ;;(use srfi-18 extras tcp s11n srfi-1 posix regex srfi-69 hostinfo md5 ;; message-digest matchable spiffy uri-common intarweb http-client ;; spiffy-request-vars uri-common intarweb directory-utils) (declare (unit clientmod)) +(declare (uses servermod)) +(declare (uses artifacts)) (module clientmod * +(import scheme + posix + data-structures + srfi-18 + + artifacts + servermod + ) + +(define (client:find-server areapath) + (let* ((sdir (conc areapath"/.server")) + (sarfs (glob (conc sdir"/*.artifact")))) ;; server artifacts + (if (null? sarfs) + (begin + (server:launch areapath) + (thread-sleep! 1) + (client:find-server areapath)) + (let* ((sarf (car sarfs)) + (sdat (read-artifact->alist sarf)) + (srvdir (alist-ref 'd sdat))) + srvdir)))) + + ) Index: rmt.scm ================================================================== --- rmt.scm +++ rmt.scm @@ -23,15 +23,15 @@ (declare (unit rmt)) (declare (uses api)) (declare (uses http-transport)) (declare (uses dbfile)) -;; (include "common_records.scm") -;; ;; (declare (uses rmtmod)) -;; -;; (import dbfile) ;; rmtmod) -;; +(include "common_records.scm") +;; (declare (uses rmtmod)) + +(import dbfile) ;; rmtmod) + ;; ;; ;; ;; THESE ARE ALL CALLED ON THE CLIENT SIDE!!! ;; ;; ;; ;; ;; generate entries for ~/.megatestrc with the following @@ -66,270 +66,271 @@ ;; ;; ;;====================================================================== ;; ;; (define *send-receive-mutex* (make-mutex)) ;; should have separate mutex per run-id ;; +;; (define (rmt:send-receive cmd rid params #!key (attemptnum 1)(area-dat #f)) ;; start attemptnum at 1 so the modulo below works as expected +;; ;; ;; RA => e.g. usage (rmt:send-receive 'get-var #f (list varname)) ;; ;; -;; (define (rmt:send-receive cmd rid params #!key (attemptnum 1)(area-dat #f)) ;; start attemptnum at 1 so the modulo below works as expected -;; -;; #;(common:telemetry-log (conc "rmt:"(->string cmd)) -;; payload: `((rid . ,rid) -;; (params . ,params))) -;; -;; (if (> attemptnum 2) -;; (debug:print 0 *default-log-port* "INFO: attemptnum in rmt:send-receive is " attemptnum)) -;; -;; (cond -;; ((> attemptnum 2) (thread-sleep! 0.05)) -;; ((> attemptnum 10) (thread-sleep! 0.5)) -;; ((> attemptnum 20) (thread-sleep! 1))) -;; (if (and (> attemptnum 5) (= 0 (modulo attemptnum 15))) -;; (begin (server:run *toppath*) (thread-sleep! 3))) -;; -;; -;; ;;DOT digraph megatest_state_status { -;; ;;DOT ranksep=0; -;; ;;DOT // rankdir=LR; -;; ;;DOT node [shape="box"]; -;; ;;DOT "rmt:send-receive" -> MUTEXLOCK; -;; ;;DOT { edge [style=invis];"case 1" -> "case 2" -> "case 3" -> "case 4" -> "case 5" -> "case 6" -> "case 7" -> "case 8" -> "case 9" -> "case 10" -> "case 11"; } -;; ;; do all the prep locked under the rmt-mutex -;; (mutex-lock! *rmt-mutex*) -;; -;; ;; 1. check if server is started IFF cmd is a write OR if we are not on the homehost, store in runremote -;; ;; 2. check the age of the connections. refresh the connection if it is older than timeout-20 seconds. -;; ;; 3. do the query, if on homehost use local access -;; ;; -;; (let* ((start-time (current-seconds)) ;; snapshot time so all use cases get same value -;; (areapath *toppath*);; TODO - resolve from dbstruct to be compatible with multiple areas -;; (runremote (or area-dat -;; *runremote*)) -;; (attemptnum (+ 1 attemptnum)) -;; (readonly-mode (rmtmod:calc-ro-mode runremote *toppath*))) -;; -;; ;; DOT INIT_RUNREMOTE; // leaving off - doesn't really add to the clarity -;; ;; DOT MUTEXLOCK -> INIT_RUNREMOTE [label="no remote?"]; -;; ;; DOT INIT_RUNREMOTE -> MUTEXLOCK; -;; ;; ensure we have a record for our connection for given area -;; (if (not runremote) ;; can remove this one. should never get here. -;; (begin -;; (set! *runremote* (make-remote)) -;; (let* ((server-info (remote-server-info *runremote*))) -;; (if server-info -;; (begin -;; (remote-server-url-set! *runremote* (server:record->url server-info)) -;; (remote-server-id-set! *runremote* (server:record->id server-info))))) -;; (set! runremote *runremote*))) ;; new runremote will come from this on next iteration -;; -;; ;; DOT SET_HOMEHOST; // leaving off - doesn't really add to the clarity -;; ;; DOT MUTEXLOCK -> SET_HOMEHOST [label="no homehost?"]; -;; ;; DOT SET_HOMEHOST -> MUTEXLOCK; -;; ;; ensure we have a homehost record -;; (if (not (pair? (remote-hh-dat runremote))) ;; not on homehost -;; (thread-sleep! 0.1) ;; since we shouldn't get here, delay a little -;; (let ((hh-data (server:choose-server areapath 'homehost))) -;; (remote-hh-dat-set! runremote (or hh-data (cons #f #f))))) -;; -;; ;;(print "BB> readonly-mode is "readonly-mode" dbfile is "dbfile) -;; (cond -;; #;((> (- (current-seconds)(remote-connect-time runremote)) 180) ;; reconnect to server every 180 seconds -;; (debug:print 0 *default-log-port* "Forcing reconnect to server(s) due to 180 second timeout.") -;; (set! *runremote* #f) -;; ;; BUG: close-connections should go here? -;; (mutex-unlock! *rmt-mutex*) -;; (rmt:send-receive cmd rid params attemptnum: 1 area-dat: area-dat)) -;; -;; ;;DOT EXIT; -;; ;;DOT MUTEXLOCK -> EXIT [label="> 15 attempts"]; {rank=same "case 1" "EXIT" } -;; ;; give up if more than 150 attempts -;; ((> attemptnum 150) -;; (debug:print 0 *default-log-port* "ERROR: 150 tries to start/connect to server. Giving up.") -;; (exit 1)) -;; -;; ;;DOT CASE2 [label="local\nreadonly\nquery"]; -;; ;;DOT MUTEXLOCK -> CASE2; {rank=same "case 2" CASE2} -;; ;;DOT CASE2 -> "rmt:open-qry-close-locally"; -;; ;; readonly mode, read request- handle it - case 2 -;; ((and readonly-mode -;; (member cmd api:read-only-queries)) -;; (mutex-unlock! *rmt-mutex*) -;; (debug:print-info 12 *default-log-port* "rmt:send-receive, case 2") -;; (rmt:open-qry-close-locally cmd 0 params) -;; ) -;; -;; ;;DOT CASE3 [label="write in\nread-only mode"]; -;; ;;DOT MUTEXLOCK -> CASE3 [label="readonly\nmode?"]; {rank=same "case 3" CASE3} -;; ;;DOT CASE3 -> "#f"; -;; ;; readonly mode, write request. Do nothing, return #f -;; (readonly-mode (extras-readonly-mode *rmt-mutex* *default-log-port* cmd params)) -;; -;; ;; This block was for pre-emptively resetting the connection if there had been no communication for some time. -;; ;; I don't think it adds any value. If the server is not there, just fail and start a new connection. -;; ;; also, the expire-time calculation might not be correct. We want, time-since-last-server-access > (server:get-timeout) -;; ;; -;; ;;DOT CASE4 [label="reset\nconnection"]; -;; ;;DOT MUTEXLOCK -> CASE4 [label="have connection,\nlast_access > expire_time"]; {rank=same "case 4" CASE4} -;; ;;DOT CASE4 -> "rmt:send-receive"; -;; ;; reset the connection if it has been unused too long -;; ((and runremote -;; ;; (remote-conndat runremote) -;; (> (current-seconds) ;; if it has been more than server-timeout seconds since last contact, close this connection and start a new on -;; (+ (remote-last-access runremote) -;; (remote-server-timeout runremote)))) -;; (debug:print-info 0 *default-log-port* "Connection to " (remote-server-url runremote) " expired due to no accesses, forcing new connection.") -;; (http-transport:close-connections runremote) -;; ;; moving this setting of runremote conndat to #f to inside the http-transport:close-connections -;; ;; (remote-conndat-set! runremote #f) ;; invalidate the connection, thus forcing a new connection. -;; (mutex-unlock! *rmt-mutex*) -;; (rmt:send-receive cmd rid params attemptnum: attemptnum)) -;; -;; ;;DOT CASE5 [label="local\nread"]; -;; ;;DOT MUTEXLOCK -> CASE5 [label="server not required,\non homehost,\nread-only query"]; {rank=same "case 5" CASE5}; -;; ;;DOT CASE5 -> "rmt:open-qry-close-locally"; -;; -;; ;; on homehost and this is a read -;; ((and (not (remote-force-server runremote)) ;; honor forced use of server, i.e. server NOT required -;; (rmt:on-homehost? runremote) -;; (member cmd api:read-only-queries)) ;; this is a read -;; (mutex-unlock! *rmt-mutex*) -;; (debug:print-info 12 *default-log-port* "rmt:send-receive, case 5") -;; (rmt:open-qry-close-locally cmd 0 params)) -;; -;; ;;DOT CASE6 [label="init\nremote"]; -;; ;;DOT MUTEXLOCK -> CASE6 [label="on homehost,\nwrite query,\nhave server,\ncan't reach it"]; {rank=same "case 6" CASE6}; -;; ;;DOT CASE6 -> "rmt:send-receive"; -;; ;; on homehost and this is a write, we already have a server, but server has died -;; -;; ;; reinstate this keep-alive section but inject a time condition into the (add ... -;; ;; -;; ;; ((and (cdr (remote-hh-dat runremote)) ;; on homehost -;; ;; (not (member cmd api:read-only-queries)) ;; this is a write -;; ;; (remote-server-url runremote) ;; have a server -;; ;; (not (server:ping (remote-server-url runremote) (remote-server-id runremote)))) ;; server has died. NOTE: this is not a cheap call! Need better approach. -;; ;; (debug:print 0 *default-log-port* "WARNING: server appears to have died, trying to reconnect, case 6") -;; ;; (http-transport:close-connections area-dat: runremote) ;; make sure to clean up -;; ;; (set! *runremote* (make-remote)) -;; ;; (let* ((server-info (remote-server-info *runremote*))) -;; ;; (if server-info -;; ;; (begin -;; ;; (remote-server-url-set! *runremote* (server:record->url server-info)) -;; ;; (remote-server-id-set! *runremote* (server:record->id server-info))))) -;; ;; (remote-force-server-set! runremote (common:force-server?)) -;; ;; (mutex-unlock! *rmt-mutex*) -;; ;; (debug:print-info 12 *default-log-port* "rmt:send-receive, case 6") -;; ;; (rmt:send-receive cmd rid params attemptnum: attemptnum)) -;; -;; ;;DOT CASE7 [label="homehost\nwrite"]; -;; ;;DOT MUTEXLOCK -> CASE7 [label="server not required,\non homehost,\na write,\nhave a server"]; {rank=same "case 7" CASE7}; -;; ;;DOT CASE7 -> "rmt:open-qry-close-locally"; -;; ;; on homehost and this is a write, we already have a server -;; ((and (not (remote-force-server runremote)) ;; honor forced use of server, i.e. server NOT required -;; (cdr (remote-hh-dat runremote)) ;; on homehost -;; (not (member cmd api:read-only-queries)) ;; this is a write -;; (remote-server-url runremote)) ;; have a server -;; (mutex-unlock! *rmt-mutex*) -;; (debug:print-info 12 *default-log-port* "rmt:send-receive, case 4.1") -;; (rmt:open-qry-close-locally cmd 0 params)) -;; -;; ;;DOT CASE8 [label="force\nserver"]; -;; ;;DOT MUTEXLOCK -> CASE8 [label="server not required,\nhave homehost info,\nno connection yet,\nnot a read-only query"]; {rank=same "case 8" CASE8}; -;; ;;DOT CASE8 -> "rmt:open-qry-close-locally"; -;; ;; on homehost, no server contact made and this is a write, passively start a server -;; ((and (not (remote-force-server runremote)) ;; honor forced use of server, i.e. server NOT required -;; (cdr (remote-hh-dat runremote)) ;; have homehost -;; (not (remote-server-url runremote)) ;; no connection yet -;; (not (member cmd api:read-only-queries))) ;; not a read-only query -;; (debug:print-info 12 *default-log-port* "rmt:send-receive, case 8") -;; (let ((server-info (server:check-if-running *toppath*))) ;; (server:read-dotserver->url *toppath*))) ;; (server:check-if-running *toppath*))) ;; Do NOT want to run server:check-if-running - very expensive to do for every write call -;; (if server-info -;; (begin -;; (remote-server-url-set! runremote (server:record->url server-info)) ;; the string can be consumed by the client setup if needed -;; (remote-server-id-set! runremote (server:record->id server-info))) -;; (if (common:force-server?) -;; (server:start-and-wait *toppath*) -;; (server:kind-run *toppath*))) -;; (remote-force-server-set! runremote (common:force-server?)) -;; (mutex-unlock! *rmt-mutex*) -;; (debug:print-info 12 *default-log-port* "rmt:send-receive, case 8.1") -;; (rmt:open-qry-close-locally cmd 0 params))) -;; -;; ;;DOT CASE9 [label="force server\nnot on homehost"]; -;; ;;DOT MUTEXLOCK -> CASE9 [label="no connection\nand either require server\nor not on homehost"]; {rank=same "case 9" CASE9}; -;; ;;DOT CASE9 -> "start\nserver" -> "rmt:send-receive"; -;; ((or (and (remote-force-server runremote) ;; we are forcing a server and don't yet have a connection to one -;; (not (remote-conndat runremote))) -;; (and (not (cdr (remote-hh-dat runremote))) ;; not on a homehost -;; (not (remote-conndat runremote)))) ;; and no connection -;; (debug:print-info 12 *default-log-port* "rmt:send-receive, case 9, hh-dat: " (remote-hh-dat runremote) " conndat: " (remote-conndat runremote)) -;; (mutex-unlock! *rmt-mutex*) -;; (if (not (server:check-if-running *toppath*)) ;; who knows, maybe one has started up? -;; (server:start-and-wait *toppath*)) -;; (remote-conndat-set! runremote (rmt:get-connection-info *toppath* runremote)) ;; calls client:setup which calls client:setup-http -;; (rmt:send-receive cmd rid params attemptnum: attemptnum)) ;; TODO: add back-off timeout as -;; -;; ;;DOT CASE10 [label="on homehost"]; -;; ;;DOT MUTEXLOCK -> CASE10 [label="server not required,\non homehost"]; {rank=same "case 10" CASE10}; -;; ;;DOT CASE10 -> "rmt:open-qry-close-locally"; -;; ;; all set up if get this far, dispatch the query -;; ((and (not (remote-force-server runremote)) -;; (cdr (remote-hh-dat runremote))) ;; we are on homehost -;; (mutex-unlock! *rmt-mutex*) -;; (debug:print-info 12 *default-log-port* "rmt:send-receive, case 10") -;; (rmt:open-qry-close-locally cmd (if rid rid 0) params)) -;; -;; ;;DOT CASE11 [label="send_receive"]; -;; ;;DOT MUTEXLOCK -> CASE11 [label="else"]; {rank=same "case 11" CASE11}; -;; ;;DOT CASE11 -> "rmt:send-receive" [label="call failed"]; -;; ;;DOT CASE11 -> "RESULT" [label="call succeeded"]; -;; ;; not on homehost, do server query -;; (else (extras-case-11 *default-log-port* runremote cmd params attemptnum rid))))) -;; ;;DOT } -;; -;; ;; bunch of small functions factored out of send-receive to make debug easier -;; ;; -;; -;; (define (extras-case-11 *default-log-port* runremote cmd params attemptnum rid) -;; ;; (mutex-unlock! *rmt-mutex*) -;; (debug:print-info 12 *default-log-port* "rmt:send-receive, case 9") -;; ;; (mutex-lock! *rmt-mutex*) -;; (let* ((conninfo (remote-conndat runremote)) -;; (dat-in (condition-case ;; handling here has -;; ;; caused a lot of -;; ;; problems. However it -;; ;; is needed to deal with -;; ;; attemtped -;; ;; communication to -;; ;; servers that have gone -;; ;; away -;; (http-transport:client-api-send-receive 0 runremote cmd params) -;; ;; (http-transport:client-api-send-receive 0 conninfo cmd params runremote) -;; ((servermismatch) (vector #f "Server id mismatch" )) -;; ((commfail)(vector #f "communications fail")) -;; ((exn)(vector #f "other fail" (print-call-chain))))) -;; (dat (if (and (vector? dat-in) ;; ... check it is a correct size -;; (> (vector-length dat-in) 1)) -;; dat-in -;; (vector #f (conc "communications fail (type 2), dat-in=" dat-in)))) -;; (success (if (vector? dat) (vector-ref dat 0) #f)) -;; (res (if (vector? dat) (vector-ref dat 1) #f))) -;; (if (and (vector? conninfo) (< 5 (vector-length conninfo))) -;; (remote-last-access-set! runremote (current-seconds)) ;; refresh access time -;; (begin -;; (debug:print 0 *default-log-port* "INFO: Should not get here! conninfo=" conninfo) -;; (set! conninfo #f) -;; (http-transport:close-connections runremote))) -;; (debug:print-info 13 *default-log-port* "rmt:send-receive, case 9. conninfo=" conninfo " dat=" dat " runremote = " runremote) -;; (mutex-unlock! *rmt-mutex*) -;; (if success ;; success only tells us that the transport was -;; ;; successful, have to examine the data to see if -;; ;; there was a detected issue at the other end -;; (extras-transport-succeded *default-log-port* *rmt-mutex* attemptnum runremote res params rid cmd) -;; (begin -;; (debug:print-error 0 *default-log-port* " dat=" dat) -;; (extras-transport-failed *default-log-port* *rmt-mutex* attemptnum runremote cmd rid params)) -;; ))) +;; ;; (define (rmt:send-receive cmd rid params #!key (attemptnum 1)(area-dat #f)) ;; start attemptnum at 1 so the modulo below works as expected +;; ;; #;(common:telemetry-log (conc "rmt:"(->string cmd)) +;; ;; payload: `((rid . ,rid) +;; ;; (params . ,params))) +;; ;; +;; ;; (if (> attemptnum 2) +;; ;; (debug:print 0 *default-log-port* "INFO: attemptnum in rmt:send-receive is " attemptnum)) +;; ;; +;; ;; (cond +;; ;; ((> attemptnum 2) (thread-sleep! 0.05)) +;; ;; ((> attemptnum 10) (thread-sleep! 0.5)) +;; ;; ((> attemptnum 20) (thread-sleep! 1))) +;; ;; (if (and (> attemptnum 5) (= 0 (modulo attemptnum 15))) +;; ;; (begin (server:run *toppath*) (thread-sleep! 3))) +;; ;; +;; ;; +;; ;; ;;DOT digraph megatest_state_status { +;; ;; ;;DOT ranksep=0; +;; ;; ;;DOT // rankdir=LR; +;; ;; ;;DOT node [shape="box"]; +;; ;; ;;DOT "rmt:send-receive" -> MUTEXLOCK; +;; ;; ;;DOT { edge [style=invis];"case 1" -> "case 2" -> "case 3" -> "case 4" -> "case 5" -> "case 6" -> "case 7" -> "case 8" -> "case 9" -> "case 10" -> "case 11"; } +;; ;; ;; do all the prep locked under the rmt-mutex +;; ;; (mutex-lock! *rmt-mutex*) +;; ;; +;; ;; ;; 1. check if server is started IFF cmd is a write OR if we are not on the homehost, store in runremote +;; ;; ;; 2. check the age of the connections. refresh the connection if it is older than timeout-20 seconds. +;; ;; ;; 3. do the query, if on homehost use local access +;; ;; ;; +;; ;; (let* ((start-time (current-seconds)) ;; snapshot time so all use cases get same value +;; ;; (areapath *toppath*);; TODO - resolve from dbstruct to be compatible with multiple areas +;; ;; (runremote (or area-dat +;; ;; *runremote*)) +;; ;; (attemptnum (+ 1 attemptnum)) +;; ;; (readonly-mode (rmtmod:calc-ro-mode runremote *toppath*))) +;; ;; +;; ;; ;; DOT INIT_RUNREMOTE; // leaving off - doesn't really add to the clarity +;; ;; ;; DOT MUTEXLOCK -> INIT_RUNREMOTE [label="no remote?"]; +;; ;; ;; DOT INIT_RUNREMOTE -> MUTEXLOCK; +;; ;; ;; ensure we have a record for our connection for given area +;; ;; (if (not runremote) ;; can remove this one. should never get here. +;; ;; (begin +;; ;; (set! *runremote* (make-remote)) +;; ;; (let* ((server-info (remote-server-info *runremote*))) +;; ;; (if server-info +;; ;; (begin +;; ;; (remote-server-url-set! *runremote* (server:record->url server-info)) +;; ;; (remote-server-id-set! *runremote* (server:record->id server-info))))) +;; ;; (set! runremote *runremote*))) ;; new runremote will come from this on next iteration +;; ;; +;; ;; ;; DOT SET_HOMEHOST; // leaving off - doesn't really add to the clarity +;; ;; ;; DOT MUTEXLOCK -> SET_HOMEHOST [label="no homehost?"]; +;; ;; ;; DOT SET_HOMEHOST -> MUTEXLOCK; +;; ;; ;; ensure we have a homehost record +;; ;; (if (not (pair? (remote-hh-dat runremote))) ;; not on homehost +;; ;; (thread-sleep! 0.1) ;; since we shouldn't get here, delay a little +;; ;; (let ((hh-data (server:choose-server areapath 'homehost))) +;; ;; (remote-hh-dat-set! runremote (or hh-data (cons #f #f))))) +;; ;; +;; ;; ;;(print "BB> readonly-mode is "readonly-mode" dbfile is "dbfile) +;; ;; (cond +;; ;; #;((> (- (current-seconds)(remote-connect-time runremote)) 180) ;; reconnect to server every 180 seconds +;; ;; (debug:print 0 *default-log-port* "Forcing reconnect to server(s) due to 180 second timeout.") +;; ;; (set! *runremote* #f) +;; ;; ;; BUG: close-connections should go here? +;; ;; (mutex-unlock! *rmt-mutex*) +;; ;; (rmt:send-receive cmd rid params attemptnum: 1 area-dat: area-dat)) +;; ;; +;; ;; ;;DOT EXIT; +;; ;; ;;DOT MUTEXLOCK -> EXIT [label="> 15 attempts"]; {rank=same "case 1" "EXIT" } +;; ;; ;; give up if more than 150 attempts +;; ;; ((> attemptnum 150) +;; ;; (debug:print 0 *default-log-port* "ERROR: 150 tries to start/connect to server. Giving up.") +;; ;; (exit 1)) +;; ;; +;; ;; ;;DOT CASE2 [label="local\nreadonly\nquery"]; +;; ;; ;;DOT MUTEXLOCK -> CASE2; {rank=same "case 2" CASE2} +;; ;; ;;DOT CASE2 -> "rmt:open-qry-close-locally"; +;; ;; ;; readonly mode, read request- handle it - case 2 +;; ;; ((and readonly-mode +;; ;; (member cmd api:read-only-queries)) +;; ;; (mutex-unlock! *rmt-mutex*) +;; ;; (debug:print-info 12 *default-log-port* "rmt:send-receive, case 2") +;; ;; (rmt:open-qry-close-locally cmd 0 params) +;; ;; ) +;; ;; +;; ;; ;;DOT CASE3 [label="write in\nread-only mode"]; +;; ;; ;;DOT MUTEXLOCK -> CASE3 [label="readonly\nmode?"]; {rank=same "case 3" CASE3} +;; ;; ;;DOT CASE3 -> "#f"; +;; ;; ;; readonly mode, write request. Do nothing, return #f +;; ;; (readonly-mode (extras-readonly-mode *rmt-mutex* *default-log-port* cmd params)) +;; ;; +;; ;; ;; This block was for pre-emptively resetting the connection if there had been no communication for some time. +;; ;; ;; I don't think it adds any value. If the server is not there, just fail and start a new connection. +;; ;; ;; also, the expire-time calculation might not be correct. We want, time-since-last-server-access > (server:get-timeout) +;; ;; ;; +;; ;; ;;DOT CASE4 [label="reset\nconnection"]; +;; ;; ;;DOT MUTEXLOCK -> CASE4 [label="have connection,\nlast_access > expire_time"]; {rank=same "case 4" CASE4} +;; ;; ;;DOT CASE4 -> "rmt:send-receive"; +;; ;; ;; reset the connection if it has been unused too long +;; ;; ((and runremote +;; ;; ;; (remote-conndat runremote) +;; ;; (> (current-seconds) ;; if it has been more than server-timeout seconds since last contact, close this connection and start a new on +;; ;; (+ (remote-last-access runremote) +;; ;; (remote-server-timeout runremote)))) +;; ;; (debug:print-info 0 *default-log-port* "Connection to " (remote-server-url runremote) " expired due to no accesses, forcing new connection.") +;; ;; (http-transport:close-connections runremote) +;; ;; ;; moving this setting of runremote conndat to #f to inside the http-transport:close-connections +;; ;; ;; (remote-conndat-set! runremote #f) ;; invalidate the connection, thus forcing a new connection. +;; ;; (mutex-unlock! *rmt-mutex*) +;; ;; (rmt:send-receive cmd rid params attemptnum: attemptnum)) +;; ;; +;; ;; ;;DOT CASE5 [label="local\nread"]; +;; ;; ;;DOT MUTEXLOCK -> CASE5 [label="server not required,\non homehost,\nread-only query"]; {rank=same "case 5" CASE5}; +;; ;; ;;DOT CASE5 -> "rmt:open-qry-close-locally"; +;; ;; +;; ;; ;; on homehost and this is a read +;; ;; ((and (not (remote-force-server runremote)) ;; honor forced use of server, i.e. server NOT required +;; ;; (rmt:on-homehost? runremote) +;; ;; (member cmd api:read-only-queries)) ;; this is a read +;; ;; (mutex-unlock! *rmt-mutex*) +;; ;; (debug:print-info 12 *default-log-port* "rmt:send-receive, case 5") +;; ;; (rmt:open-qry-close-locally cmd 0 params)) +;; ;; +;; ;; ;;DOT CASE6 [label="init\nremote"]; +;; ;; ;;DOT MUTEXLOCK -> CASE6 [label="on homehost,\nwrite query,\nhave server,\ncan't reach it"]; {rank=same "case 6" CASE6}; +;; ;; ;;DOT CASE6 -> "rmt:send-receive"; +;; ;; ;; on homehost and this is a write, we already have a server, but server has died +;; ;; +;; ;; ;; reinstate this keep-alive section but inject a time condition into the (add ... +;; ;; ;; +;; ;; ;; ((and (cdr (remote-hh-dat runremote)) ;; on homehost +;; ;; ;; (not (member cmd api:read-only-queries)) ;; this is a write +;; ;; ;; (remote-server-url runremote) ;; have a server +;; ;; ;; (not (server:ping (remote-server-url runremote) (remote-server-id runremote)))) ;; server has died. NOTE: this is not a cheap call! Need better approach. +;; ;; ;; (debug:print 0 *default-log-port* "WARNING: server appears to have died, trying to reconnect, case 6") +;; ;; ;; (http-transport:close-connections area-dat: runremote) ;; make sure to clean up +;; ;; ;; (set! *runremote* (make-remote)) +;; ;; ;; (let* ((server-info (remote-server-info *runremote*))) +;; ;; ;; (if server-info +;; ;; ;; (begin +;; ;; ;; (remote-server-url-set! *runremote* (server:record->url server-info)) +;; ;; ;; (remote-server-id-set! *runremote* (server:record->id server-info))))) +;; ;; ;; (remote-force-server-set! runremote (common:force-server?)) +;; ;; ;; (mutex-unlock! *rmt-mutex*) +;; ;; ;; (debug:print-info 12 *default-log-port* "rmt:send-receive, case 6") +;; ;; ;; (rmt:send-receive cmd rid params attemptnum: attemptnum)) +;; ;; +;; ;; ;;DOT CASE7 [label="homehost\nwrite"]; +;; ;; ;;DOT MUTEXLOCK -> CASE7 [label="server not required,\non homehost,\na write,\nhave a server"]; {rank=same "case 7" CASE7}; +;; ;; ;;DOT CASE7 -> "rmt:open-qry-close-locally"; +;; ;; ;; on homehost and this is a write, we already have a server +;; ;; ((and (not (remote-force-server runremote)) ;; honor forced use of server, i.e. server NOT required +;; ;; (cdr (remote-hh-dat runremote)) ;; on homehost +;; ;; (not (member cmd api:read-only-queries)) ;; this is a write +;; ;; (remote-server-url runremote)) ;; have a server +;; ;; (mutex-unlock! *rmt-mutex*) +;; ;; (debug:print-info 12 *default-log-port* "rmt:send-receive, case 4.1") +;; ;; (rmt:open-qry-close-locally cmd 0 params)) +;; ;; +;; ;; ;;DOT CASE8 [label="force\nserver"]; +;; ;; ;;DOT MUTEXLOCK -> CASE8 [label="server not required,\nhave homehost info,\nno connection yet,\nnot a read-only query"]; {rank=same "case 8" CASE8}; +;; ;; ;;DOT CASE8 -> "rmt:open-qry-close-locally"; +;; ;; ;; on homehost, no server contact made and this is a write, passively start a server +;; ;; ((and (not (remote-force-server runremote)) ;; honor forced use of server, i.e. server NOT required +;; ;; (cdr (remote-hh-dat runremote)) ;; have homehost +;; ;; (not (remote-server-url runremote)) ;; no connection yet +;; ;; (not (member cmd api:read-only-queries))) ;; not a read-only query +;; ;; (debug:print-info 12 *default-log-port* "rmt:send-receive, case 8") +;; ;; (let ((server-info (server:check-if-running *toppath*))) ;; (server:read-dotserver->url *toppath*))) ;; (server:check-if-running *toppath*))) ;; Do NOT want to run server:check-if-running - very expensive to do for every write call +;; ;; (if server-info +;; ;; (begin +;; ;; (remote-server-url-set! runremote (server:record->url server-info)) ;; the string can be consumed by the client setup if needed +;; ;; (remote-server-id-set! runremote (server:record->id server-info))) +;; ;; (if (common:force-server?) +;; ;; (server:start-and-wait *toppath*) +;; ;; (server:kind-run *toppath*))) +;; ;; (remote-force-server-set! runremote (common:force-server?)) +;; ;; (mutex-unlock! *rmt-mutex*) +;; ;; (debug:print-info 12 *default-log-port* "rmt:send-receive, case 8.1") +;; ;; (rmt:open-qry-close-locally cmd 0 params))) +;; ;; +;; ;; ;;DOT CASE9 [label="force server\nnot on homehost"]; +;; ;; ;;DOT MUTEXLOCK -> CASE9 [label="no connection\nand either require server\nor not on homehost"]; {rank=same "case 9" CASE9}; +;; ;; ;;DOT CASE9 -> "start\nserver" -> "rmt:send-receive"; +;; ;; ((or (and (remote-force-server runremote) ;; we are forcing a server and don't yet have a connection to one +;; ;; (not (remote-conndat runremote))) +;; ;; (and (not (cdr (remote-hh-dat runremote))) ;; not on a homehost +;; ;; (not (remote-conndat runremote)))) ;; and no connection +;; ;; (debug:print-info 12 *default-log-port* "rmt:send-receive, case 9, hh-dat: " (remote-hh-dat runremote) " conndat: " (remote-conndat runremote)) +;; ;; (mutex-unlock! *rmt-mutex*) +;; ;; (if (not (server:check-if-running *toppath*)) ;; who knows, maybe one has started up? +;; ;; (server:start-and-wait *toppath*)) +;; ;; (remote-conndat-set! runremote (rmt:get-connection-info *toppath* runremote)) ;; calls client:setup which calls client:setup-http +;; ;; (rmt:send-receive cmd rid params attemptnum: attemptnum)) ;; TODO: add back-off timeout as +;; ;; +;; ;; ;;DOT CASE10 [label="on homehost"]; +;; ;; ;;DOT MUTEXLOCK -> CASE10 [label="server not required,\non homehost"]; {rank=same "case 10" CASE10}; +;; ;; ;;DOT CASE10 -> "rmt:open-qry-close-locally"; +;; ;; ;; all set up if get this far, dispatch the query +;; ;; ((and (not (remote-force-server runremote)) +;; ;; (cdr (remote-hh-dat runremote))) ;; we are on homehost +;; ;; (mutex-unlock! *rmt-mutex*) +;; ;; (debug:print-info 12 *default-log-port* "rmt:send-receive, case 10") +;; ;; (rmt:open-qry-close-locally cmd (if rid rid 0) params)) +;; ;; +;; ;; ;;DOT CASE11 [label="send_receive"]; +;; ;; ;;DOT MUTEXLOCK -> CASE11 [label="else"]; {rank=same "case 11" CASE11}; +;; ;; ;;DOT CASE11 -> "rmt:send-receive" [label="call failed"]; +;; ;; ;;DOT CASE11 -> "RESULT" [label="call succeeded"]; +;; ;; ;; not on homehost, do server query +;; ;; (else (extras-case-11 *default-log-port* runremote cmd params attemptnum rid))))) +;; ;; ;;DOT } +;; ;; +;; ;; ;; bunch of small functions factored out of send-receive to make debug easier +;; ;; ;; +;; ;; +;; ;; (define (extras-case-11 *default-log-port* runremote cmd params attemptnum rid) +;; ;; ;; (mutex-unlock! *rmt-mutex*) +;; ;; (debug:print-info 12 *default-log-port* "rmt:send-receive, case 9") +;; ;; ;; (mutex-lock! *rmt-mutex*) +;; ;; (let* ((conninfo (remote-conndat runremote)) +;; ;; (dat-in (condition-case ;; handling here has +;; ;; ;; caused a lot of +;; ;; ;; problems. However it +;; ;; ;; is needed to deal with +;; ;; ;; attemtped +;; ;; ;; communication to +;; ;; ;; servers that have gone +;; ;; ;; away +;; ;; (http-transport:client-api-send-receive 0 runremote cmd params) +;; ;; ;; (http-transport:client-api-send-receive 0 conninfo cmd params runremote) +;; ;; ((servermismatch) (vector #f "Server id mismatch" )) +;; ;; ((commfail)(vector #f "communications fail")) +;; ;; ((exn)(vector #f "other fail" (print-call-chain))))) +;; ;; (dat (if (and (vector? dat-in) ;; ... check it is a correct size +;; ;; (> (vector-length dat-in) 1)) +;; ;; dat-in +;; ;; (vector #f (conc "communications fail (type 2), dat-in=" dat-in)))) +;; ;; (success (if (vector? dat) (vector-ref dat 0) #f)) +;; ;; (res (if (vector? dat) (vector-ref dat 1) #f))) +;; ;; (if (and (vector? conninfo) (< 5 (vector-length conninfo))) +;; ;; (remote-last-access-set! runremote (current-seconds)) ;; refresh access time +;; ;; (begin +;; ;; (debug:print 0 *default-log-port* "INFO: Should not get here! conninfo=" conninfo) +;; ;; (set! conninfo #f) +;; ;; (http-transport:close-connections runremote))) +;; ;; (debug:print-info 13 *default-log-port* "rmt:send-receive, case 9. conninfo=" conninfo " dat=" dat " runremote = " runremote) +;; ;; (mutex-unlock! *rmt-mutex*) +;; ;; (if success ;; success only tells us that the transport was +;; ;; ;; successful, have to examine the data to see if +;; ;; ;; there was a detected issue at the other end +;; ;; (extras-transport-succeded *default-log-port* *rmt-mutex* attemptnum runremote res params rid cmd) +;; ;; (begin +;; ;; (debug:print-error 0 *default-log-port* " dat=" dat) +;; ;; (extras-transport-failed *default-log-port* *rmt-mutex* attemptnum runremote cmd rid params)) +;; ;; ))) ;; ;; (define (rmt:print-db-stats) ;; (let ((fmtstr "~40a~7-d~9-d~20,2-f")) ;; "~20,2-f" ;; (debug:print 18 *default-log-port* "DB Stats\n========") ;; (debug:print 18 *default-log-port* (format #f "~40a~8a~10a~10a" "Cmd" "Count" "TotTime" "Avg")) @@ -1074,6 +1075,7 @@ ;; #;(set-functions rmt:send-receive remote-server-url-set! ;; http-transport:close-connections remote-conndat-set! ;; debug:print debug:print-info ;; remote-ro-mode remote-ro-mode-set! ;; remote-ro-mode-checked-set! remote-ro-mode-checked) +;; ;; Index: rmtmod.scm ================================================================== --- rmtmod.scm +++ rmtmod.scm @@ -18,10 +18,1068 @@ ;;====================================================================== (declare (unit rmtmod)) (declare (uses commonmod)) +(declare (uses clientmod)) (module rmtmod - * +* + +(import scheme + + clientmod + ) + +;; +;; THESE ARE ALL CALLED ON THE CLIENT SIDE!!! +;; + +;; generate entries for ~/.megatestrc with the following +;; +;; grep define ../rmt.scm | grep rmt: |perl -pi -e 's/\(define\s+\((\S+)\W.*$/\1/'|sort -u + +;;====================================================================== +;; S U P P O R T F U N C T I O N S +;;====================================================================== + +;; if a server is either running or in the process of starting call client:setup +;; else return #f to let the calling proc know that there is no server available +;; +(define (rmt:get-connection-info areapath runremote) ;; TODO: push areapath down. + (client:find-server areapath) + #;(let* ((cinfo (if (remote? runremote) + (remote-conndat runremote) + #f))) + (if cinfo + cinfo + (if (server:check-if-running areapath) + (client:setup areapath runremote) + #f)))) + +(define (rmt:on-homehost? runremote) + #t + ) + #;(let* ((hh-dat (remote-hh-dat runremote))) + (if (pair? hh-dat) + (cdr hh-dat) + (begin + (debug:print-info 0 *default-log-port* "hh-dat="hh-dat) + #f)))) + + +;;====================================================================== + +(define *send-receive-mutex* (make-mutex)) ;; should have separate mutex per run-id + +(define (rmt:send-receive cmd rid params #!key (attemptnum 1)) ;; start attemptnum at 1 so the modulo below works as expected + +;; RA => e.g. usage (rmt:send-receive 'get-var #f (list varname)) +;; +;; (define (rmt:send-receive cmd rid params #!key (attemptnum 1)(area-dat #f)) ;; start attemptnum at 1 so the modulo below works as expected +;; #;(common:telemetry-log (conc "rmt:"(->string cmd)) +;; payload: `((rid . ,rid) +;; (params . ,params))) +;; +;; (if (> attemptnum 2) +;; (debug:print 0 *default-log-port* "INFO: attemptnum in rmt:send-receive is " attemptnum)) +;; +;; (cond +;; ((> attemptnum 2) (thread-sleep! 0.05)) +;; ((> attemptnum 10) (thread-sleep! 0.5)) +;; ((> attemptnum 20) (thread-sleep! 1))) +;; (if (and (> attemptnum 5) (= 0 (modulo attemptnum 15))) +;; (begin (server:run *toppath*) (thread-sleep! 3))) +;; +;; +;; ;;DOT digraph megatest_state_status { +;; ;;DOT ranksep=0; +;; ;;DOT // rankdir=LR; +;; ;;DOT node [shape="box"]; +;; ;;DOT "rmt:send-receive" -> MUTEXLOCK; +;; ;;DOT { edge [style=invis];"case 1" -> "case 2" -> "case 3" -> "case 4" -> "case 5" -> "case 6" -> "case 7" -> "case 8" -> "case 9" -> "case 10" -> "case 11"; } +;; ;; do all the prep locked under the rmt-mutex +;; (mutex-lock! *rmt-mutex*) +;; +;; ;; 1. check if server is started IFF cmd is a write OR if we are not on the homehost, store in runremote +;; ;; 2. check the age of the connections. refresh the connection if it is older than timeout-20 seconds. +;; ;; 3. do the query, if on homehost use local access +;; ;; +;; (let* ((start-time (current-seconds)) ;; snapshot time so all use cases get same value +;; (areapath *toppath*);; TODO - resolve from dbstruct to be compatible with multiple areas +;; (runremote (or area-dat +;; *runremote*)) +;; (attemptnum (+ 1 attemptnum)) +;; (readonly-mode (rmtmod:calc-ro-mode runremote *toppath*))) +;; +;; ;; DOT INIT_RUNREMOTE; // leaving off - doesn't really add to the clarity +;; ;; DOT MUTEXLOCK -> INIT_RUNREMOTE [label="no remote?"]; +;; ;; DOT INIT_RUNREMOTE -> MUTEXLOCK; +;; ;; ensure we have a record for our connection for given area +;; (if (not runremote) ;; can remove this one. should never get here. +;; (begin +;; (set! *runremote* (make-remote)) +;; (let* ((server-info (remote-server-info *runremote*))) +;; (if server-info +;; (begin +;; (remote-server-url-set! *runremote* (server:record->url server-info)) +;; (remote-server-id-set! *runremote* (server:record->id server-info))))) +;; (set! runremote *runremote*))) ;; new runremote will come from this on next iteration +;; +;; ;; DOT SET_HOMEHOST; // leaving off - doesn't really add to the clarity +;; ;; DOT MUTEXLOCK -> SET_HOMEHOST [label="no homehost?"]; +;; ;; DOT SET_HOMEHOST -> MUTEXLOCK; +;; ;; ensure we have a homehost record +;; (if (not (pair? (remote-hh-dat runremote))) ;; not on homehost +;; (thread-sleep! 0.1) ;; since we shouldn't get here, delay a little +;; (let ((hh-data (server:choose-server areapath 'homehost))) +;; (remote-hh-dat-set! runremote (or hh-data (cons #f #f))))) +;; +;; ;;(print "BB> readonly-mode is "readonly-mode" dbfile is "dbfile) +;; (cond +;; #;((> (- (current-seconds)(remote-connect-time runremote)) 180) ;; reconnect to server every 180 seconds +;; (debug:print 0 *default-log-port* "Forcing reconnect to server(s) due to 180 second timeout.") +;; (set! *runremote* #f) +;; ;; BUG: close-connections should go here? +;; (mutex-unlock! *rmt-mutex*) +;; (rmt:send-receive cmd rid params attemptnum: 1 area-dat: area-dat)) +;; +;; ;;DOT EXIT; +;; ;;DOT MUTEXLOCK -> EXIT [label="> 15 attempts"]; {rank=same "case 1" "EXIT" } +;; ;; give up if more than 150 attempts +;; ((> attemptnum 150) +;; (debug:print 0 *default-log-port* "ERROR: 150 tries to start/connect to server. Giving up.") +;; (exit 1)) +;; +;; ;;DOT CASE2 [label="local\nreadonly\nquery"]; +;; ;;DOT MUTEXLOCK -> CASE2; {rank=same "case 2" CASE2} +;; ;;DOT CASE2 -> "rmt:open-qry-close-locally"; +;; ;; readonly mode, read request- handle it - case 2 +;; ((and readonly-mode +;; (member cmd api:read-only-queries)) +;; (mutex-unlock! *rmt-mutex*) +;; (debug:print-info 12 *default-log-port* "rmt:send-receive, case 2") +;; (rmt:open-qry-close-locally cmd 0 params) +;; ) +;; +;; ;;DOT CASE3 [label="write in\nread-only mode"]; +;; ;;DOT MUTEXLOCK -> CASE3 [label="readonly\nmode?"]; {rank=same "case 3" CASE3} +;; ;;DOT CASE3 -> "#f"; +;; ;; readonly mode, write request. Do nothing, return #f +;; (readonly-mode (extras-readonly-mode *rmt-mutex* *default-log-port* cmd params)) +;; +;; ;; This block was for pre-emptively resetting the connection if there had been no communication for some time. +;; ;; I don't think it adds any value. If the server is not there, just fail and start a new connection. +;; ;; also, the expire-time calculation might not be correct. We want, time-since-last-server-access > (server:get-timeout) +;; ;; +;; ;;DOT CASE4 [label="reset\nconnection"]; +;; ;;DOT MUTEXLOCK -> CASE4 [label="have connection,\nlast_access > expire_time"]; {rank=same "case 4" CASE4} +;; ;;DOT CASE4 -> "rmt:send-receive"; +;; ;; reset the connection if it has been unused too long +;; ((and runremote +;; ;; (remote-conndat runremote) +;; (> (current-seconds) ;; if it has been more than server-timeout seconds since last contact, close this connection and start a new on +;; (+ (remote-last-access runremote) +;; (remote-server-timeout runremote)))) +;; (debug:print-info 0 *default-log-port* "Connection to " (remote-server-url runremote) " expired due to no accesses, forcing new connection.") +;; (http-transport:close-connections runremote) +;; ;; moving this setting of runremote conndat to #f to inside the http-transport:close-connections +;; ;; (remote-conndat-set! runremote #f) ;; invalidate the connection, thus forcing a new connection. +;; (mutex-unlock! *rmt-mutex*) +;; (rmt:send-receive cmd rid params attemptnum: attemptnum)) +;; +;; ;;DOT CASE5 [label="local\nread"]; +;; ;;DOT MUTEXLOCK -> CASE5 [label="server not required,\non homehost,\nread-only query"]; {rank=same "case 5" CASE5}; +;; ;;DOT CASE5 -> "rmt:open-qry-close-locally"; +;; +;; ;; on homehost and this is a read +;; ((and (not (remote-force-server runremote)) ;; honor forced use of server, i.e. server NOT required +;; (rmt:on-homehost? runremote) +;; (member cmd api:read-only-queries)) ;; this is a read +;; (mutex-unlock! *rmt-mutex*) +;; (debug:print-info 12 *default-log-port* "rmt:send-receive, case 5") +;; (rmt:open-qry-close-locally cmd 0 params)) +;; +;; ;;DOT CASE6 [label="init\nremote"]; +;; ;;DOT MUTEXLOCK -> CASE6 [label="on homehost,\nwrite query,\nhave server,\ncan't reach it"]; {rank=same "case 6" CASE6}; +;; ;;DOT CASE6 -> "rmt:send-receive"; +;; ;; on homehost and this is a write, we already have a server, but server has died +;; +;; ;; reinstate this keep-alive section but inject a time condition into the (add ... +;; ;; +;; ;; ((and (cdr (remote-hh-dat runremote)) ;; on homehost +;; ;; (not (member cmd api:read-only-queries)) ;; this is a write +;; ;; (remote-server-url runremote) ;; have a server +;; ;; (not (server:ping (remote-server-url runremote) (remote-server-id runremote)))) ;; server has died. NOTE: this is not a cheap call! Need better approach. +;; ;; (debug:print 0 *default-log-port* "WARNING: server appears to have died, trying to reconnect, case 6") +;; ;; (http-transport:close-connections area-dat: runremote) ;; make sure to clean up +;; ;; (set! *runremote* (make-remote)) +;; ;; (let* ((server-info (remote-server-info *runremote*))) +;; ;; (if server-info +;; ;; (begin +;; ;; (remote-server-url-set! *runremote* (server:record->url server-info)) +;; ;; (remote-server-id-set! *runremote* (server:record->id server-info))))) +;; ;; (remote-force-server-set! runremote (common:force-server?)) +;; ;; (mutex-unlock! *rmt-mutex*) +;; ;; (debug:print-info 12 *default-log-port* "rmt:send-receive, case 6") +;; ;; (rmt:send-receive cmd rid params attemptnum: attemptnum)) +;; +;; ;;DOT CASE7 [label="homehost\nwrite"]; +;; ;;DOT MUTEXLOCK -> CASE7 [label="server not required,\non homehost,\na write,\nhave a server"]; {rank=same "case 7" CASE7}; +;; ;;DOT CASE7 -> "rmt:open-qry-close-locally"; +;; ;; on homehost and this is a write, we already have a server +;; ((and (not (remote-force-server runremote)) ;; honor forced use of server, i.e. server NOT required +;; (cdr (remote-hh-dat runremote)) ;; on homehost +;; (not (member cmd api:read-only-queries)) ;; this is a write +;; (remote-server-url runremote)) ;; have a server +;; (mutex-unlock! *rmt-mutex*) +;; (debug:print-info 12 *default-log-port* "rmt:send-receive, case 4.1") +;; (rmt:open-qry-close-locally cmd 0 params)) +;; +;; ;;DOT CASE8 [label="force\nserver"]; +;; ;;DOT MUTEXLOCK -> CASE8 [label="server not required,\nhave homehost info,\nno connection yet,\nnot a read-only query"]; {rank=same "case 8" CASE8}; +;; ;;DOT CASE8 -> "rmt:open-qry-close-locally"; +;; ;; on homehost, no server contact made and this is a write, passively start a server +;; ((and (not (remote-force-server runremote)) ;; honor forced use of server, i.e. server NOT required +;; (cdr (remote-hh-dat runremote)) ;; have homehost +;; (not (remote-server-url runremote)) ;; no connection yet +;; (not (member cmd api:read-only-queries))) ;; not a read-only query +;; (debug:print-info 12 *default-log-port* "rmt:send-receive, case 8") +;; (let ((server-info (server:check-if-running *toppath*))) ;; (server:read-dotserver->url *toppath*))) ;; (server:check-if-running *toppath*))) ;; Do NOT want to run server:check-if-running - very expensive to do for every write call +;; (if server-info +;; (begin +;; (remote-server-url-set! runremote (server:record->url server-info)) ;; the string can be consumed by the client setup if needed +;; (remote-server-id-set! runremote (server:record->id server-info))) +;; (if (common:force-server?) +;; (server:start-and-wait *toppath*) +;; (server:kind-run *toppath*))) +;; (remote-force-server-set! runremote (common:force-server?)) +;; (mutex-unlock! *rmt-mutex*) +;; (debug:print-info 12 *default-log-port* "rmt:send-receive, case 8.1") +;; (rmt:open-qry-close-locally cmd 0 params))) +;; +;; ;;DOT CASE9 [label="force server\nnot on homehost"]; +;; ;;DOT MUTEXLOCK -> CASE9 [label="no connection\nand either require server\nor not on homehost"]; {rank=same "case 9" CASE9}; +;; ;;DOT CASE9 -> "start\nserver" -> "rmt:send-receive"; +;; ((or (and (remote-force-server runremote) ;; we are forcing a server and don't yet have a connection to one +;; (not (remote-conndat runremote))) +;; (and (not (cdr (remote-hh-dat runremote))) ;; not on a homehost +;; (not (remote-conndat runremote)))) ;; and no connection +;; (debug:print-info 12 *default-log-port* "rmt:send-receive, case 9, hh-dat: " (remote-hh-dat runremote) " conndat: " (remote-conndat runremote)) +;; (mutex-unlock! *rmt-mutex*) +;; (if (not (server:check-if-running *toppath*)) ;; who knows, maybe one has started up? +;; (server:start-and-wait *toppath*)) +;; (remote-conndat-set! runremote (rmt:get-connection-info *toppath* runremote)) ;; calls client:setup which calls client:setup-http +;; (rmt:send-receive cmd rid params attemptnum: attemptnum)) ;; TODO: add back-off timeout as +;; +;; ;;DOT CASE10 [label="on homehost"]; +;; ;;DOT MUTEXLOCK -> CASE10 [label="server not required,\non homehost"]; {rank=same "case 10" CASE10}; +;; ;;DOT CASE10 -> "rmt:open-qry-close-locally"; +;; ;; all set up if get this far, dispatch the query +;; ((and (not (remote-force-server runremote)) +;; (cdr (remote-hh-dat runremote))) ;; we are on homehost +;; (mutex-unlock! *rmt-mutex*) +;; (debug:print-info 12 *default-log-port* "rmt:send-receive, case 10") +;; (rmt:open-qry-close-locally cmd (if rid rid 0) params)) +;; +;; ;;DOT CASE11 [label="send_receive"]; +;; ;;DOT MUTEXLOCK -> CASE11 [label="else"]; {rank=same "case 11" CASE11}; +;; ;;DOT CASE11 -> "rmt:send-receive" [label="call failed"]; +;; ;;DOT CASE11 -> "RESULT" [label="call succeeded"]; +;; ;; not on homehost, do server query +;; (else (extras-case-11 *default-log-port* runremote cmd params attemptnum rid))))) +;; ;;DOT } +;; +;; ;; bunch of small functions factored out of send-receive to make debug easier +;; ;; +;; +;; (define (extras-case-11 *default-log-port* runremote cmd params attemptnum rid) +;; ;; (mutex-unlock! *rmt-mutex*) +;; (debug:print-info 12 *default-log-port* "rmt:send-receive, case 9") +;; ;; (mutex-lock! *rmt-mutex*) +;; (let* ((conninfo (remote-conndat runremote)) +;; (dat-in (condition-case ;; handling here has +;; ;; caused a lot of +;; ;; problems. However it +;; ;; is needed to deal with +;; ;; attemtped +;; ;; communication to +;; ;; servers that have gone +;; ;; away +;; (http-transport:client-api-send-receive 0 runremote cmd params) +;; ;; (http-transport:client-api-send-receive 0 conninfo cmd params runremote) +;; ((servermismatch) (vector #f "Server id mismatch" )) +;; ((commfail)(vector #f "communications fail")) +;; ((exn)(vector #f "other fail" (print-call-chain))))) +;; (dat (if (and (vector? dat-in) ;; ... check it is a correct size +;; (> (vector-length dat-in) 1)) +;; dat-in +;; (vector #f (conc "communications fail (type 2), dat-in=" dat-in)))) +;; (success (if (vector? dat) (vector-ref dat 0) #f)) +;; (res (if (vector? dat) (vector-ref dat 1) #f))) +;; (if (and (vector? conninfo) (< 5 (vector-length conninfo))) +;; (remote-last-access-set! runremote (current-seconds)) ;; refresh access time +;; (begin +;; (debug:print 0 *default-log-port* "INFO: Should not get here! conninfo=" conninfo) +;; (set! conninfo #f) +;; (http-transport:close-connections runremote))) +;; (debug:print-info 13 *default-log-port* "rmt:send-receive, case 9. conninfo=" conninfo " dat=" dat " runremote = " runremote) +;; (mutex-unlock! *rmt-mutex*) +;; (if success ;; success only tells us that the transport was +;; ;; successful, have to examine the data to see if +;; ;; there was a detected issue at the other end +;; (extras-transport-succeded *default-log-port* *rmt-mutex* attemptnum runremote res params rid cmd) +;; (begin +;; (debug:print-error 0 *default-log-port* " dat=" dat) +;; (extras-transport-failed *default-log-port* *rmt-mutex* attemptnum runremote cmd rid params)) +;; ))) + +(define (rmt:print-db-stats) + (let ((fmtstr "~40a~7-d~9-d~20,2-f")) ;; "~20,2-f" + (debug:print 18 *default-log-port* "DB Stats\n========") + (debug:print 18 *default-log-port* (format #f "~40a~8a~10a~10a" "Cmd" "Count" "TotTime" "Avg")) + (for-each (lambda (cmd) + (let ((cmd-dat (hash-table-ref *db-stats* cmd))) + (debug:print 18 *default-log-port* (format #f fmtstr cmd (vector-ref cmd-dat 0) (vector-ref cmd-dat 1) (/ (vector-ref cmd-dat 1)(vector-ref cmd-dat 0)))))) + (sort (hash-table-keys *db-stats*) + (lambda (a b) + (> (vector-ref (hash-table-ref *db-stats* a) 0) + (vector-ref (hash-table-ref *db-stats* b) 0))))))) + +(define (rmt:get-max-query-average run-id) + (mutex-lock! *db-stats-mutex*) + (let* ((runkey (conc "run-id=" run-id " ")) + (cmds (filter (lambda (x) + (substring-index runkey x)) + (hash-table-keys *db-stats*))) + (res (if (null? cmds) + (cons 'none 0) + (let loop ((cmd (car cmds)) + (tal (cdr cmds)) + (max-cmd (car cmds)) + (res 0)) + (let* ((cmd-dat (hash-table-ref *db-stats* cmd)) + (tot (vector-ref cmd-dat 0)) + (curravg (/ (vector-ref cmd-dat 1) (vector-ref cmd-dat 0))) ;; count is never zero by construction + (currmax (max res curravg)) + (newmax-cmd (if (> curravg res) cmd max-cmd))) + (if (null? tal) + (if (> tot 10) + (cons newmax-cmd currmax) + (cons 'none 0)) + (loop (car tal)(cdr tal) newmax-cmd currmax))))))) + (mutex-unlock! *db-stats-mutex*) + res)) + +(define (rmt:open-qry-close-locally cmd run-id params #!key (remretries 5)) + (let* ((qry-is-write (not (member cmd api:read-only-queries))) + (db-file-path (db:dbfile-path)) ;; 0)) + (dbstructs-local (db:setup #t)) ;; make-dbr:dbstruct path: dbdir local: #t))) + (read-only (not (file-write-access? db-file-path))) + (start (current-milliseconds)) + (resdat (if (not (and read-only qry-is-write)) + (let ((v (api:execute-requests dbstructs-local (vector (symbol->string cmd) params)))) + ;; (handle-exceptions ;; there has been a long history of receiving strange errors from values returned by the client when things go wrong.. + ;; exn ;; This is an attempt to detect that situation and recover gracefully + ;; (begin + ;; (debug:print 0 *default-log-port* "ERROR: bad data from server " v " message: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn) + ;; (vector #t '())) ;; should always get a vector but if something goes wrong return a dummy + (if (and (vector? v) + (> (vector-length v) 1)) + (let ((newvec (vector (vector-ref v 0)(vector-ref v 1)))) + newvec) ;; by copying the vector while inside the error handler we should force the detection of a corrupted record + (vector #t '()))) ;; ) ;; we could also check that the returned types are valid + (vector #t '()))) + (success (vector-ref resdat 0)) + (res (vector-ref resdat 1)) + (duration (- (current-milliseconds) start))) + (if (and read-only qry-is-write) + (debug:print 0 *default-log-port* "ERROR: attempt to write to read-only database ignored. cmd=" cmd)) + (if (not success) + (if (> remretries 0) + (begin + (debug:print-error 0 *default-log-port* "local query failed. Trying again.") + (thread-sleep! (/ (random 5000) 1000)) ;; some random delay + (rmt:open-qry-close-locally cmd run-id params remretries: (- remretries 1))) + (begin + (debug:print-error 0 *default-log-port* "too many retries in rmt:open-qry-close-locally, giving up") + #f)) + (begin + ;; (rmt:update-db-stats run-id cmd params duration) + ;; mark this run as dirty if this was a write, the watchdog is responsible for syncing it + (if qry-is-write + (let ((start-time (current-seconds))) + (mutex-lock! *db-multi-sync-mutex*) +/ (set! *db-last-access* start-time) ;; THIS IS PROBABLY USELESS? (we are on a client) + (mutex-unlock! *db-multi-sync-mutex*))))) + res)) + +(define (rmt:send-receive-no-auto-client-setup runremote cmd run-id params) + (let* ((run-id (if run-id run-id 0)) + (res (http-transport:client-api-send-receive run-id runremote cmd params))) + (if (and res (vector-ref res 0)) + (vector-ref res 1) ;;; YES!! THIS IS CORRECT!! CHANGE IT HERE, THEN CHANGE rmt:send-receive ALSO!!! + #f))) + +;;====================================================================== +;; +;; A C T U A L A P I C A L L S +;; +;;====================================================================== + +;;====================================================================== +;; S E R V E R +;;====================================================================== + +(define (rmt:kill-server run-id) + (rmt:send-receive 'kill-server run-id (list run-id))) + +(define (rmt:start-server run-id) + (rmt:send-receive 'start-server 0 (list run-id))) + +;;====================================================================== +;; M I S C +;;====================================================================== + +(define (rmt:login run-id) + (rmt:send-receive 'login run-id (list *toppath* megatest-version (client:get-signature)))) + +;; This login does no retries under the hood - it acts a bit like a ping. +;; Deprecated for nmsg-transport. +;; +(define (rmt:login-no-auto-client-setup runremote) + (rmt:send-receive-no-auto-client-setup runremote 'login 0 (list *toppath* megatest-version (client:get-signature)))) + +;; hand off a call to one of the db:queries statements +;; added run-id to make looking up the correct db possible +;; +(define (rmt:general-call stmtname run-id . params) + (rmt:send-receive 'general-call run-id (append (list stmtname run-id) params))) + + +;; given a hostname, return a pair of cpu load and update time representing latest intelligence from tests running on that host +(define (rmt:get-latest-host-load hostname) + (rmt:send-receive 'get-latest-host-load 0 (list hostname))) + +(define (rmt:sdb-qry qry val run-id) + ;; add caching if qry is 'getid or 'getstr + (rmt:send-receive 'sdb-qry run-id (list qry val))) + +;; NOT COMPLETED +(define (rmt:runtests user run-id testpatt params) + (rmt:send-receive 'runtests run-id testpatt)) + +(define (rmt:get-run-record-ids target run keynames test-patt) + (rmt:send-receive 'get-run-record-ids #f (list target run keynames test-patt))) + +(define (rmt:get-changed-record-ids since-time) + (rmt:send-receive 'get-changed-record-ids #f (list since-time)) ) + +(define (rmt:drop-all-triggers) + (rmt:send-receive 'drop-all-triggers #f '())) + +(define (rmt:create-all-triggers) + (rmt:send-receive 'create-all-triggers #f '())) + +;;====================================================================== +;; T E S T M E T A +;;====================================================================== + +(define (rmt:get-tests-tags) + (rmt:send-receive 'get-tests-tags #f '())) + +;;====================================================================== +;; K E Y S +;;====================================================================== + +;; These require run-id because the values come from the run! +;; +(define (rmt:get-key-val-pairs run-id) + (rmt:send-receive 'get-key-val-pairs run-id (list run-id))) + +(define (rmt:get-keys) + (if *db-keys* *db-keys* + (let ((res (rmt:send-receive 'get-keys #f '()))) + (set! *db-keys* res) + res))) + +(define (rmt:get-keys-write) ;; dummy query to force server start + (let ((res (rmt:send-receive 'get-keys-write #f '()))) + (set! *db-keys* res) + res)) + +;; we don't reuse run-id's (except possibly *after* a db cleanup) so it is safe +;; to cache the resuls in a hash +;; +(define (rmt:get-key-vals run-id) + (or (hash-table-ref/default *keyvals* run-id #f) + (let ((res (rmt:send-receive 'get-key-vals #f (list run-id)))) + (hash-table-set! *keyvals* run-id res) + res))) + +(define (rmt:get-targets) + (rmt:send-receive 'get-targets #f '())) + +(define (rmt:get-target run-id) + (assert (number? run-id) "FATAL: Run id required.") + (rmt:send-receive 'get-target run-id (list run-id))) + +(define (rmt:get-run-times runpatt targetpatt) + (rmt:send-receive 'get-run-times #f (list runpatt targetpatt ))) + + +;;====================================================================== +;; T E S T S +;;====================================================================== + +;; Just some syntatic sugar +(define (rmt:register-test run-id test-name item-path) + (assert (number? run-id) "FATAL: Run id required.") + (rmt:general-call 'register-test run-id run-id test-name item-path)) + +(define (rmt:get-test-id run-id testname item-path) + (assert (number? run-id) "FATAL: Run id required.") + (rmt:send-receive 'get-test-id run-id (list run-id testname item-path))) + +;; run-id is NOT used +;; +(define (rmt:get-test-info-by-id run-id test-id) + (if (number? test-id) + (rmt:send-receive 'get-test-info-by-id run-id (list run-id test-id)) + (begin + (debug:print 0 *default-log-port* "WARNING: Bad data handed to rmt:get-test-info-by-id run-id=" run-id ", test-id=" test-id) + (print-call-chain (current-error-port)) + #f))) + +(define (rmt:test-get-rundir-from-test-id run-id test-id) + (rmt:send-receive 'test-get-rundir-from-test-id run-id (list run-id test-id))) + +(define (rmt:open-test-db-by-test-id run-id test-id #!key (work-area #f)) + (assert (number? run-id) "FATAL: Run id required.") + (let* ((test-path (if (string? work-area) + work-area + (rmt:test-get-rundir-from-test-id run-id test-id)))) + (debug:print 3 *default-log-port* "TEST PATH: " test-path) + (open-test-db test-path))) + +;; WARNING: This currently bypasses the transaction wrapped writes system +(define (rmt:test-set-state-status-by-id run-id test-id newstate newstatus newcomment) + (assert (number? run-id) "FATAL: Run id required.") + (rmt:send-receive 'test-set-state-status-by-id run-id (list run-id test-id newstate newstatus newcomment))) + +(define (rmt:set-tests-state-status run-id testnames currstate currstatus newstate newstatus) + (assert (number? run-id) "FATAL: Run id required.") + (rmt:send-receive 'set-tests-state-status run-id (list run-id testnames currstate currstatus newstate newstatus))) + +(define (rmt:get-tests-for-run run-id testpatt states statuses offset limit not-in sort-by sort-order qryvals last-update mode) + (assert (number? run-id) "FATAL: Run id required.") + ;; (if (number? run-id) + (rmt:send-receive 'get-tests-for-run run-id (list run-id testpatt states statuses offset limit not-in sort-by sort-order qryvals last-update mode))) + ;; (begin + ;; (debug:print-error 0 *default-log-port* "rmt:get-tests-for-run called with bad run-id=" run-id) + ;; (print-call-chain (current-error-port)) + ;; '()))) + +(define (rmt:get-tests-for-run-state-status run-id testpatt last-update) + (assert (number? run-id) "FATAL: Run id required.") + (rmt:send-receive 'get-tests-for-run-state-status run-id (list run-id testpatt last-update))) + +;; get stuff via synchash +(define (rmt:synchash-get run-id proc synckey keynum params) + (assert (number? run-id) "FATAL: Run id required.") + (rmt:send-receive 'synchash-get run-id (list run-id proc synckey keynum params))) + +(define (rmt:get-tests-for-run-mindata run-id testpatt states status not-in) + (assert (number? run-id) "FATAL: Run id required.") + (rmt:send-receive 'get-tests-for-run-mindata run-id (list run-id testpatt states status not-in))) + +;; IDEA: Threadify these - they spend a lot of time waiting ... +;; +(define (rmt:get-tests-for-runs-mindata run-ids testpatt states status not-in) + (let ((multi-run-mutex (make-mutex)) + (run-id-list (if run-ids + run-ids + (rmt:get-all-run-ids))) + (result '())) + (if (null? run-id-list) + '() + (let loop ((hed (car run-id-list)) + (tal (cdr run-id-list)) + (threads '())) + (if (> (length threads) 5) + (loop hed tal (filter (lambda (th)(not (member (thread-state th) '(terminated dead)))) threads)) + (let* ((newthread (make-thread + (lambda () + (let ((res (rmt:send-receive 'get-tests-for-run-mindata hed (list hed testpatt states status not-in)))) + (if (list? res) + (begin + (mutex-lock! multi-run-mutex) + (set! result (append result res)) + (mutex-unlock! multi-run-mutex)) + (debug:print-error 0 *default-log-port* "get-tests-for-run-mindata failed for run-id " hed ", testpatt " testpatt ", states " states ", status " status ", not-in " not-in)))) + (conc "multi-run-thread for run-id " hed))) + (newthreads (cons newthread threads))) + (thread-start! newthread) + (thread-sleep! 0.05) ;; give that thread some time to start + (if (null? tal) + newthreads + (loop (car tal)(cdr tal) newthreads)))))) + result)) + +;; ;; IDEA: Threadify these - they spend a lot of time waiting ... +;; ;; +;; (define (rmt:get-tests-for-runs-mindata run-ids testpatt states status not-in) +;; (let ((run-id-list (if run-ids +;; run-ids +;; (rmt:get-all-run-ids)))) +;; (apply append (map (lambda (run-id) +;; (rmt:send-receive 'get-tests-for-run-mindata run-id (list run-ids testpatt states status not-in))) +;; run-id-list)))) + +(define (rmt:delete-test-records run-id test-id) + (assert (number? run-id) "FATAL: Run id required.") + (rmt:send-receive 'delete-test-records run-id (list run-id test-id))) + +(define (rmt:test-set-state-status run-id test-id state status msg) + (assert (number? run-id) "FATAL: Run id required.") + (rmt:send-receive 'test-set-state-status run-id (list run-id test-id state status msg))) + +(define (rmt:test-toplevel-num-items run-id test-name) + (assert (number? run-id) "FATAL: Run id required.") + (rmt:send-receive 'test-toplevel-num-items run-id (list run-id test-name))) + +;; (define (rmt:get-previous-test-run-record run-id test-name item-path) +;; (rmt:send-receive 'get-previous-test-run-record run-id (list run-id test-name item-path))) + +(define (rmt:get-matching-previous-test-run-records run-id test-name item-path) + (assert (number? run-id) "FATAL: Run id required.") + (rmt:send-receive 'get-matching-previous-test-run-records run-id (list run-id test-name item-path))) + +(define (rmt:test-get-logfile-info run-id test-name) + (assert (number? run-id) "FATAL: Run id required.") + (rmt:send-receive 'test-get-logfile-info run-id (list run-id test-name))) + +(define (rmt:test-get-records-for-index-file run-id test-name) + (assert (number? run-id) "FATAL: Run id required.") + (rmt:send-receive 'test-get-records-for-index-file run-id (list run-id test-name))) + +(define (rmt:get-testinfo-state-status run-id test-id) + (assert (number? run-id) "FATAL: Run id required.") + (rmt:send-receive 'get-testinfo-state-status run-id (list run-id test-id))) + +(define (rmt:test-set-log! run-id test-id logf) + (assert (number? run-id) "FATAL: Run id required.") + (if (string? logf)(rmt:general-call 'test-set-log run-id logf test-id))) + +(define (rmt:test-set-top-process-pid run-id test-id pid) + (assert (number? run-id) "FATAL: Run id required.") + (rmt:send-receive 'test-set-top-process-pid run-id (list run-id test-id pid))) + +(define (rmt:test-get-top-process-pid run-id test-id) + (assert (number? run-id) "FATAL: Run id required.") + (rmt:send-receive 'test-get-top-process-pid run-id (list run-id test-id))) + +(define (rmt:get-run-ids-matching-target keynames target res runname testpatt statepatt statuspatt) + (rmt:send-receive 'get-run-ids-matching-target #f (list keynames target res runname testpatt statepatt statuspatt))) + +;; NOTE: This will open and access ALL run databases. +;; +(define (rmt:test-get-paths-matching-keynames-target-new keynames target res testpatt statepatt statuspatt runname) + (let ((run-ids (rmt:get-run-ids-matching-target keynames target res runname testpatt statepatt statuspatt))) + (apply append + (map (lambda (run-id) + (rmt:send-receive 'test-get-paths-matching-keynames-target-new run-id (list run-id keynames target res testpatt statepatt statuspatt runname))) + run-ids)))) + +(define (rmt:get-prereqs-not-met run-id waitons ref-test-name ref-item-path #!key (mode '(normal))(itemmaps #f)) + (assert (number? run-id) "FATAL: Run id required.") + (rmt:send-receive 'get-prereqs-not-met run-id (list run-id waitons ref-test-name ref-item-path mode itemmaps))) + +(define (rmt:get-count-tests-running-for-run-id run-id) + (assert (number? run-id) "FATAL: Run id required.") + (rmt:send-receive 'get-count-tests-running-for-run-id run-id (list run-id))) + +(define (rmt:get-not-completed-cnt run-id) + (assert (number? run-id) "FATAL: Run id required.") + (rmt:send-receive 'get-not-completed-cnt run-id (list run-id))) + + +;; Statistical queries + +(define (rmt:get-count-tests-running run-id) + (assert (number? run-id) "FATAL: Run id required.") + (rmt:send-receive 'get-count-tests-running run-id (list run-id))) + +(define (rmt:get-count-tests-running-for-testname run-id testname) + (assert (number? run-id) "FATAL: Run id required.") + (rmt:send-receive 'get-count-tests-running-for-testname run-id (list run-id testname))) + +(define (rmt:get-count-tests-running-in-jobgroup run-id jobgroup) + (assert (number? run-id) "FATAL: Run id required.") + (rmt:send-receive 'get-count-tests-running-in-jobgroup run-id (list run-id jobgroup))) + +;; state and status are extra hints not usually used in the calculation +;; +(define (rmt:set-state-status-and-roll-up-items run-id test-name item-path state status comment) + (assert (number? run-id) "FATAL: Run id required.") + (rmt:send-receive 'set-state-status-and-roll-up-items run-id (list run-id test-name item-path state status comment))) + +(define (rmt:set-state-status-and-roll-up-run run-id state status) + (assert (number? run-id) "FATAL: Run id required.") + (rmt:send-receive 'set-state-status-and-roll-up-run run-id (list run-id state status))) + + +(define (rmt:update-pass-fail-counts run-id test-name) + (assert (number? run-id) "FATAL: Run id required.") + (rmt:general-call 'update-pass-fail-counts run-id test-name test-name test-name)) + +(define (rmt:top-test-set-per-pf-counts run-id test-name) + (assert (number? run-id) "FATAL: Run id required.") + (rmt:send-receive 'top-test-set-per-pf-counts run-id (list run-id test-name))) + +(define (rmt:get-raw-run-stats run-id) + (assert (number? run-id) "FATAL: Run id required.") + (rmt:send-receive 'get-raw-run-stats run-id (list run-id))) + +(define (rmt:get-test-times runname target) + (rmt:send-receive 'get-test-times #f (list runname target ))) + +;;====================================================================== +;; R U N S +;;====================================================================== + +;; BUG - LOOK AT HOW THIS WORKS!!! +;; +(define (rmt:get-run-info run-id) + (assert (number? run-id) "FATAL: Run id required.") + (rmt:send-receive 'get-run-info #f (list run-id))) + +(define (rmt:get-num-runs runpatt) + (rmt:send-receive 'get-num-runs #f (list runpatt))) + +(define (rmt:get-runs-cnt-by-patt runpatt targetpatt keys) + (rmt:send-receive 'get-runs-cnt-by-patt #f (list runpatt targetpatt keys))) + +;; Use the special run-id == #f scenario here since there is no run yet +(define (rmt:register-run keyvals runname state status user contour) + (rmt:send-receive 'register-run #f (list keyvals runname state status user contour))) + +(define (rmt:get-run-name-from-id run-id) + (assert (number? run-id) "FATAL: Run id required.") + (rmt:send-receive 'get-run-name-from-id #f (list run-id))) + +(define (rmt:delete-run run-id) + (rmt:send-receive 'delete-run #f (list run-id))) + +(define (rmt:update-run-stats run-id stats) + (rmt:send-receive 'update-run-stats #f (list run-id stats))) + +(define (rmt:delete-old-deleted-test-records) + (rmt:send-receive 'delete-old-deleted-test-records #f '())) + +(define (rmt:get-runs runpatt count offset keypatts) + (rmt:send-receive 'get-runs #f (list runpatt count offset keypatts))) + +(define (rmt:simple-get-runs runpatt count offset target last-update) + (rmt:send-receive 'simple-get-runs #f (list runpatt count offset target last-update))) + +(define (rmt:get-all-run-ids) + (rmt:send-receive 'get-all-run-ids #f '())) + +(define (rmt:get-prev-run-ids run-id) + (assert (number? run-id) "FATAL: Run id required.") + (rmt:send-receive 'get-prev-run-ids #f (list run-id))) + +(define (rmt:lock/unlock-run run-id lock unlock user) + (assert (number? run-id) "FATAL: Run id required.") + (rmt:send-receive 'lock/unlock-run #f (list run-id lock unlock user))) + +;; set/get status +(define (rmt:get-run-status run-id) + (assert (number? run-id) "FATAL: Run id required.") + (rmt:send-receive 'get-run-status #f (list run-id))) + +(define (rmt:get-run-state run-id) + (assert (number? run-id) "FATAL: Run id required.") + (rmt:send-receive 'get-run-state #f (list run-id))) + + +(define (rmt:set-run-status run-id run-status #!key (msg #f)) + (assert (number? run-id) "FATAL: Run id required.") + (rmt:send-receive 'set-run-status #f (list run-id run-status msg))) + +(define (rmt:set-run-state-status run-id state status ) + (assert (number? run-id) "FATAL: Run id required.") + (rmt:send-receive 'set-run-state-status #f (list run-id state status))) + +(define (rmt:update-tesdata-on-repilcate-db old-lt new-lt) +(rmt:send-receive 'update-tesdata-on-repilcate-db #f (list old-lt new-lt))) + +(define (rmt:update-run-event_time run-id) + (assert (number? run-id) "FATAL: Run id required.") + (rmt:send-receive 'update-run-event_time #f (list run-id))) + +(define (rmt:get-runs-by-patt keys runnamepatt targpatt offset limit fields last-runs-update #!key (sort-order "asc")) ;; fields of #f uses default + (rmt:send-receive 'get-runs-by-patt #f (list keys runnamepatt targpatt offset limit fields last-runs-update sort-order))) + +(define (rmt:find-and-mark-incomplete run-id ovr-deadtime) + (assert (number? run-id) "FATAL: Run id required.") + ;; (if (rmt:send-receive 'have-incompletes? run-id (list run-id ovr-deadtime)) + (rmt:send-receive 'mark-incomplete run-id (list run-id ovr-deadtime))) ;; ) + +(define (rmt:get-main-run-stats run-id) + (assert (number? run-id) "FATAL: Run id required.") + (rmt:send-receive 'get-main-run-stats #f (list run-id))) + +(define (rmt:get-var varname) + (rmt:send-receive 'get-var #f (list varname))) + +(define (rmt:del-var varname) + (rmt:send-receive 'del-var #f (list varname))) + +(define (rmt:set-var varname value) + (rmt:send-receive 'set-var #f (list varname value))) + +(define (rmt:inc-var varname) + (rmt:send-receive 'inc-var #f (list varname))) + +(define (rmt:dec-var varname) + (rmt:send-receive 'dec-var #f (list varname))) + +(define (rmt:add-var varname value) + (rmt:send-receive 'add-var #f (list varname value))) + +;;====================================================================== +;; M U L T I R U N Q U E R I E S +;;====================================================================== + +;; Need to move this to multi-run section and make associated changes +(define (rmt:find-and-mark-incomplete-all-runs #!key (ovr-deadtime #f)) + (let ((run-ids (rmt:get-all-run-ids))) + (for-each (lambda (run-id) + (rmt:find-and-mark-incomplete run-id ovr-deadtime)) + run-ids))) + +;; get the previous record for when this test was run where all keys match but runname +;; returns #f if no such test found, returns a single test record if found +;; +;; Run this at the client end since we have to connect to multiple run-id dbs +;; +(define (rmt:get-previous-test-run-record run-id test-name item-path) + (let* ((keyvals (rmt:get-key-val-pairs run-id)) + (keys (rmt:get-keys)) + (selstr (string-intersperse keys ",")) + (qrystr (string-intersperse (map (lambda (x)(conc x "=?")) keys) " AND "))) + (if (not keyvals) + #f + (let ((prev-run-ids (rmt:get-prev-run-ids run-id))) + ;; for each run starting with the most recent look to see if there is a matching test + ;; if found then return that matching test record + (debug:print 4 *default-log-port* "selstr: " selstr ", qrystr: " qrystr ", keyvals: " keyvals ", previous run ids found: " prev-run-ids) + (if (null? prev-run-ids) #f + (let loop ((hed (car prev-run-ids)) + (tal (cdr prev-run-ids))) + (let ((results (rmt:get-tests-for-run hed (conc test-name "/" item-path) '() '() ;; run-id testpatt states statuses + #f #f #f ;; offset limit not-in hide/not-hide + #f #f #f #f 'normal))) ;; sort-by sort-order qryvals last-update mode + (debug:print 4 *default-log-port* "Got tests for run-id " run-id ", test-name " test-name ", item-path " item-path ": " results) + (if (and (null? results) + (not (null? tal))) + (loop (car tal)(cdr tal)) + (if (null? results) #f + (car results)))))))))) + +(define (rmt:get-run-stats) + (rmt:send-receive 'get-run-stats #f '())) + +;;====================================================================== +;; S T E P S +;;====================================================================== + +;; Getting steps is more complicated. +;; +;; If given work area +;; 1. Find the testdat.db file +;; 2. Open the testdat.db file and do the query +;; If not given the work area +;; 1. Do a remote call to get the test path +;; 2. Continue as above +;; +;;(define (rmt:get-steps-for-test run-id test-id) +;; (rmt:send-receive 'get-steps-data run-id (list test-id))) + +(define (rmt:teststep-set-status! run-id test-id teststep-name state-in status-in comment logfile) + (assert (number? run-id) "FATAL: Run id required.") + (let* ((state (items:check-valid-items "state" state-in)) + (status (items:check-valid-items "status" status-in))) + (if (or (not state)(not status)) + (debug:print 3 *default-log-port* "WARNING: Invalid " (if status "status" "state") + " value \"" (if status state-in status-in) "\", update your validvalues section in megatest.config")) + (rmt:send-receive 'teststep-set-status! run-id (list run-id test-id teststep-name state-in status-in comment logfile)))) + + +(define (rmt:delete-steps-for-test! run-id test-id) + (assert (number? run-id) "FATAL: Run id required.") + (rmt:send-receive 'delete-steps-for-test! run-id (list run-id test-id))) + +(define (rmt:get-steps-for-test run-id test-id) + (assert (number? run-id) "FATAL: Run id required.") + (rmt:send-receive 'get-steps-for-test run-id (list run-id test-id))) + +(define (rmt:get-steps-info-by-id run-id test-step-id) + (assert (number? run-id) "FATAL: Run id required.") + (rmt:send-receive 'get-steps-info-by-id #f (list run-id test-step-id))) + +;;====================================================================== +;; T E S T D A T A +;;====================================================================== + +(define (rmt:read-test-data run-id test-id categorypatt #!key (work-area #f)) + (assert (number? run-id) "FATAL: Run id required.") + (rmt:send-receive 'read-test-data run-id (list run-id test-id categorypatt))) + +(define (rmt:read-test-data-varpatt run-id test-id categorypatt varpatt #!key (work-area #f)) + (assert (number? run-id) "FATAL: Run id required.") + (rmt:send-receive 'read-test-data-varpatt run-id (list run-id test-id categorypatt varpatt))) + +(define (rmt:get-data-info-by-id run-id test-data-id) + (assert (number? run-id) "FATAL: Run id required.") + (rmt:send-receive 'get-data-info-by-id #f (list run-id test-data-id))) + +(define (rmt:testmeta-add-record testname) + (rmt:send-receive 'testmeta-add-record #f (list testname))) + +(define (rmt:testmeta-get-record testname) + (rmt:send-receive 'testmeta-get-record #f (list testname))) + +(define (rmt:testmeta-update-field test-name fld val) + (rmt:send-receive 'testmeta-update-field #f (list test-name fld val))) + +(define (rmt:test-data-rollup run-id test-id status) + (assert (number? run-id) "FATAL: Run id required.") + (rmt:send-receive 'test-data-rollup run-id (list run-id test-id status))) + +(define (rmt:csv->test-data run-id test-id csvdata) + (assert (number? run-id) "FATAL: Run id required.") + (rmt:send-receive 'csv->test-data run-id (list run-id test-id csvdata))) + +;;====================================================================== +;; T A S K S +;;====================================================================== + +(define (rmt:tasks-find-task-queue-records target run-name test-patt state-patt action-patt) + (rmt:send-receive 'find-task-queue-records #f (list target run-name test-patt state-patt action-patt))) + +(define (rmt:tasks-add action owner target runname testpatt params) + (rmt:send-receive 'tasks-add #f (list action owner target runname testpatt params))) + +(define (rmt:tasks-set-state-given-param-key param-key new-state) + (rmt:send-receive 'tasks-set-state-given-param-key #f (list param-key new-state))) + +(define (rmt:tasks-get-last target runname) + (rmt:send-receive 'tasks-get-last #f (list target runname))) + +;;====================================================================== +;; N O S Y N C D B +;;====================================================================== + +(define (rmt:no-sync-set var val) + (rmt:send-receive 'no-sync-set #f `(,var ,val))) + +(define (rmt:no-sync-get/default var default) + (rmt:send-receive 'no-sync-get/default #f `(,var ,default))) + +(define (rmt:no-sync-del! var) + (rmt:send-receive 'no-sync-del! #f `(,var))) + +(define (rmt:no-sync-get-lock keyname) + (rmt:send-receive 'no-sync-get-lock #f `(,keyname))) + +;;====================================================================== +;; A R C H I V E S +;;====================================================================== + +(define (rmt:archive-get-allocations testname itempath dneeded) + (rmt:send-receive 'archive-get-allocations #f (list testname itempath dneeded))) + +(define (rmt:archive-register-block-name bdisk-id archive-path) + (rmt:send-receive 'archive-register-block-name #f (list bdisk-id archive-path))) + +(define (rmt:archive-allocate-testsuite/area-to-block block-id testsuite-name areakey) + (rmt:send-receive 'archive-allocate-test-to-block #f (list block-id testsuite-name areakey))) + +(define (rmt:archive-register-disk bdisk-name bdisk-path df) + (rmt:send-receive 'archive-register-disk #f (list bdisk-name bdisk-path df))) + +(define (rmt:test-set-archive-block-id run-id test-id archive-block-id) + (assert (number? run-id) "FATAL: Run id required.") + (rmt:send-receive 'test-set-archive-block-id run-id (list run-id test-id archive-block-id))) + +(define (rmt:test-get-archive-block-info archive-block-id) + (rmt:send-receive 'test-get-archive-block-info #f (list archive-block-id))) + + +(define (rmtmod:calc-ro-mode runremote *toppath*) + (if (and runremote + (remote-ro-mode-checked runremote)) + (remote-ro-mode runremote) + (let* ((mtcfgfile (conc *toppath* "/megatest.config")) + (ro-mode (not (file-write-access? mtcfgfile)))) ;; TODO: use dbstruct or runremote to figure this out in future + (if runremote + (begin + (remote-ro-mode-set! runremote ro-mode) + (remote-ro-mode-checked-set! runremote #t) + ro-mode) + ro-mode)))) + +(define (extras-readonly-mode rmt-mutex log-port cmd params) + (mutex-unlock! rmt-mutex) + (debug:print-info 12 log-port "rmt:send-receive, case 3") + (debug:print 0 log-port "WARNING: write transaction requested on a readonly area. cmd="cmd" params="params) + #f) + +(define (extras-transport-failed *default-log-port* *rmt-mutex* attemptnum runremote cmd rid params) + (debug:print 0 *default-log-port* "WARNING: communication failed. Trying again, try num: " attemptnum) + (mutex-lock! *rmt-mutex*) + (http-transport:close-connections runremote) + (remote-server-url-set! runremote #f) + (mutex-unlock! *rmt-mutex*) + (debug:print-info 12 *default-log-port* "rmt:send-receive, case 9.1") + (rmt:send-receive cmd rid params attemptnum: (+ attemptnum 1))) + +(define (extras-transport-succeded *default-log-port* *rmt-mutex* attemptnum runremote res params rid cmd) + (if (and (vector? res) + (eq? (vector-length res) 2) + (eq? (vector-ref res 1) 'overloaded)) ;; since we are + ;; looking at the + ;; data to carry the + ;; error we'll use a + ;; fairly obtuse + ;; combo to minimise + ;; the chances of + ;; some sort of + ;; collision. this + ;; is the case where + ;; the returned data + ;; is bad or the + ;; server is + ;; overloaded and we + ;; want to ease off + ;; the queries + (let ((wait-delay (+ attemptnum (* attemptnum 10)))) + (debug:print 0 *default-log-port* "WARNING: server is overloaded. Delaying " wait-delay " seconds and trying call again.") + (mutex-lock! *rmt-mutex*) + (http-transport:close-connections runremote) + (set! *runremote* #f) ;; force starting over + (mutex-unlock! *rmt-mutex*) + (thread-sleep! wait-delay) + (rmt:send-receive cmd rid params attemptnum: (+ attemptnum 1))) + res)) ;; All good, return res + +#;(set-functions rmt:send-receive remote-server-url-set! + http-transport:close-connections remote-conndat-set! + debug:print debug:print-info + remote-ro-mode remote-ro-mode-set! + remote-ro-mode-checked-set! remote-ro-mode-checked) + ) Index: servermod.scm ================================================================== --- servermod.scm +++ servermod.scm @@ -43,11 +43,12 @@ (defstruct srv (areapath #f) (host #f) (pid #f) (type #f) - (dir #f) + (sdir #f) ;; .server directory + (hdir #f) ;; .server/host.pid directory (incoming #f) (dbstruct #f) (handler #f) (obj-to-str #f) (str-to-obj #f) @@ -92,17 +93,18 @@ (define (server:setup areapath) (let* ((srvdat (make-srv areapath: areapath host: (get-host-name) ;; likely need to replace with ip address pid: (current-process-id) + sdir: (conc areapath"/.server") ;; put server artifacts here )) - (srvdir (conc areapath"/.server/"(get-host.pid srvdat)))) - (srv-dir-set! srvdat srvdir) - (srv-incoming-set! srvdat (conc srvdir"/incoming")) - (create-directory srvdir #t) + (hdir (conc (srv-sdir srvdat)"/"(get-host.pid srvdat)))) + (srv-hdir-set! srvdat hdir) + (srv-incoming-set! srvdat (conc hdir"/incoming")) + (create-directory hdir #t) (for-each (lambda (d) - (create-directory (conc srvdir"/"d))) + (create-directory (conc hdir"/"d))) '("incoming" "responses")) srvdat)) (define *server-keep-running* #f) @@ -118,25 +120,25 @@ ;; foreach bundle ;; process the request ;; create results arf and write it to clients dir ;; remove in-arf from incoming (let* ((areapath (srv-areapath srvdat)) - (srvdir (srv-dir srvdat)) ;; (server:get-servinfo-dir areapath)) + (sdir (srv-sdir srvdat)) + (hdir (srv-hdir srvdat)) (myarf `((h . ,(srv-host srvdat)) (i . ,(srv-pid srvdat)) - (d . ,srvdir))) ;; (srv->alist srvdat)) - (myuuid (write-alist->artifact srvdir myarf ptype: 'S)) - (arf-fname (get-artifact-fname srvdir myuuid)) + (d . ,hdir))) + (myuuid (write-alist->artifact sdir myarf ptype: 'S)) + (arf-fname (get-artifact-fname sdir myuuid)) (dbstruct (srv-dbstruct srvdat))) (set! *server-keep-running* #t) (let loop ((last-access (current-seconds))) (let* ((start (current-milliseconds)) (res (server:process-incoming srvdat)) (delta (- (current-milliseconds) start)) (timed-out (> (- (current-seconds) last-access) - 60)) ;; accessed in last 60 seconds - ) + 60))) ;; accessed in last 60 seconds (if timed-out (begin (print "INFO: server has not been accessed in 60 seconds, exiting shortly.") (set! *server-keep-running* #f)) (thread-sleep! (if (> delta 500) @@ -153,11 +155,12 @@ ;; read arfs from incoming, process them and put result arfs in proper dirs ;; return number requests found and processed ;; (define (server:process-incoming srvdat) - (let* ((srvdir (srv-dir srvdat)) + (let* ((sdir (srv-sdir srvdat)) + (hdir (srv-hdir srvdat)) (indir (srv-incoming srvdat)) (arfs (glob (conc indir"/*.artifacts"))) (handler (srv-handler srvdat)) (obj->string (srv-obj-to-str srvdat)) (dbstruct (srv-dbstruct srvdat))) @@ -166,11 +169,11 @@ (let* ((arf (car rem)) (dat (read-artifact->alist arf)) (ruuid (alist-ref 'Z dat)) (host (alist-ref 'h dat)) (pid (alist-ref 'i dat)) - (dest (conc srvdir"/"host"."pid"/responses")) + (dest (conc sdir"/"host"."pid"/responses")) ;; the calling host area (cmd (alist-ref 'c dat)) (params (alist-ref 'p dat)) (res (handler dbstruct cmd params)) (narf `((r . ,(obj->string res)) (P . ,ruuid)))) @@ -186,11 +189,13 @@ (let* ((logd (conc areapath"/logs")) (logf (conc logd"/from-"(get-host.pid #f)".log"))) (if (not (file-exists? logd))(create-directory logd #t)) (setenv "NBFAKE_LOG" logf) (system (conc "nbfake mtserve -start-dir "areapath)))) - + + + ;;====================================================================== ;; OLD SERVER STUFF BELOW HERE ;;====================================================================== ;; ;; servers start by setting up fs transport