Index: megatest.scm ================================================================== --- megatest.scm +++ megatest.scm @@ -2385,11 +2385,13 @@ (if (or (getenv "MT_RUNSCRIPT") (args:get-arg "-repl") (args:get-arg "-load")) (let* ((toppath (launch:setup)) (dbstructs (if (and toppath - (server:choose-server toppath 'home?)) + ;; NOTE: server:choose-server is starting a server + ;; either add equivalent for tcp mode or ???? + #;(server:choose-server toppath 'home?)) (db:setup #t) #f))) ;; make-dbr:dbstruct path: toppath local: (args:get-arg "-local")) #f))) (if *toppath* (cond ((getenv "MT_RUNSCRIPT") Index: rmt.scm ================================================================== --- rmt.scm +++ rmt.scm @@ -23,17 +23,19 @@ (declare (unit rmt)) (declare (uses api)) (declare (uses http-transport)) (declare (uses dbfile)) (declare (uses dbmemmod)) +(declare (uses tcp-transportmod)) (include "common_records.scm") ;; (declare (uses rmtmod)) ;; used by http-transport (import dbfile) ;; rmtmod) -(import dbmemmod) +(import dbmemmod + tcp-transportmod) (define rmt:transport-mode (make-parameter 'http)) ;; ;; THESE ARE ALL CALLED ON THE CLIENT SIDE!!! ;; @@ -83,119 +85,94 @@ (define (rmt:send-receive cmd rid params #!key (attemptnum 1)(area-dat #f)) ;; start attemptnum at 1 so the modulo below works as expected #;(common:telemetry-log (conc "rmt:"(->string cmd)) payload: `((rid . ,rid) (params . ,params))) - + (if (> attemptnum 2) (debug:print 0 *default-log-port* "INFO: attemptnum in rmt:send-receive is " attemptnum)) - + (cond ((> attemptnum 2) (thread-sleep! 0.05)) ((> attemptnum 10) (thread-sleep! 0.5)) ((> attemptnum 20) (thread-sleep! 1))) + + ;; I'm turning this off, it may make sense to move it + ;; into http-transport-handler (if (and (> attemptnum 5) (= 0 (modulo attemptnum 15))) - (begin (server:run *toppath*) (thread-sleep! 3))) - - - ;;DOT digraph megatest_state_status { - ;;DOT ranksep=0; - ;;DOT // rankdir=LR; - ;;DOT node [shape="box"]; - ;;DOT "rmt:send-receive" -> MUTEXLOCK; - ;;DOT { edge [style=invis];"case 1" -> "case 2" -> "case 3" -> "case 4" -> "case 5" -> "case 6" -> "case 7" -> "case 8" -> "case 9" -> "case 10" -> "case 11"; } - ;; do all the prep locked under the rmt-mutex - (mutex-lock! *rmt-mutex*) - + (begin + (server:run *toppath*) + (thread-sleep! 3))) + ;; 1. check if server is started IFF cmd is a write OR if we are not on the homehost, store in runremote ;; 2. check the age of the connections. refresh the connection if it is older than timeout-20 seconds. ;; 3. do the query, if on homehost use local access ;; (let* ((start-time (current-seconds)) ;; snapshot time so all use cases get same value (areapath *toppath*);; TODO - resolve from dbstruct to be compatible with multiple areas (runremote (or area-dat *runremote*)) (attemptnum (+ 1 attemptnum)) - (readonly-mode (rmtmod:calc-ro-mode runremote *toppath*))) + (readonly-mode (rmtmod:calc-ro-mode runremote *toppath*)) + (testsuite (common:get-testsuite-name)) + (mtexe (common:find-local-megatest))) (case (rmt:transport-mode) ((http)(http-transport-handler runremote cmd rid params attemptnum area-dat areapath readonly-mode)) - ((tcp) (tcp-transport-handler runremote cmd rid params attemptnum area-dat areapath readonly-mode))))) + ((tcp) (tcp-transport-handler runremote cmd rid params attemptnum area-dat areapath readonly-mode testsuite mtexe))))) -(define (tcp-transport-handler runremote cmd rid params attemptnum area-dat areapath readonly-mode) +(define (tcp-transport-handler runremote cmd rid params attemptnum area-dat areapath readonly-mode testsuite mtexe) (if (not runremote) - (let* ((newremote (make-and-init-remote))) + (let* ((newremote (make-and-init-remote areapath))) (set! *runremote* newremote) (set! runremote newremote))) - (let* ((dbfname (conc (dbfile:run-id->dbnum run-id)".db"))) ;;(dbfile:run-id->path areapath run-id))) - (tt:handler runremote cmd rid params attemptnum area-dat areapath readonly-mode dbfname))) - -(define (http-transport-handler runremote cmd rid params attemptnum area-dat areapath readonly-mode) - ;; DOT INIT_RUNREMOTE; // leaving off - doesn't really add to the clarity - ;; DOT MUTEXLOCK -> INIT_RUNREMOTE [label="no remote?"]; - ;; DOT INIT_RUNREMOTE -> MUTEXLOCK; - ;; ensure we have a record for our connection for given area - (if (not runremote) ;; can remove this one. should never get here. - (begin - (set! *runremote* (make-and-init-remote areapath)) - (let* ((server-info (remote-server-info *runremote*))) - (if server-info - (begin - (remote-server-url-set! *runremote* (server:record->url server-info)) - (remote-server-id-set! *runremote* (server:record->id server-info))))) - (set! runremote *runremote*))) ;; new runremote will come from this on next iteration - - ;; DOT SET_HOMEHOST; // leaving off - doesn't really add to the clarity - ;; DOT MUTEXLOCK -> SET_HOMEHOST [label="no homehost?"]; - ;; DOT SET_HOMEHOST -> MUTEXLOCK; + (let* ((dbfname (conc (dbfile:run-id->dbnum rid)".db"))) ;;(dbfile:run-id->path areapath run-id))) + (tt:handler runremote cmd rid params attemptnum area-dat areapath readonly-mode dbfname testsuite mtexe))) + +(define (http-transport-handler runremote cmd rid params attemptnum area-dat areapath readonly-mode testsuite mtexe) + ;; do all the prep locked under the rmt-mutex + (mutex-lock! *rmt-mutex*) + + ;; ensure we have a record for our connection for given area + (if (not runremote) ;; can remove this one. should never get here. + (begin + (set! *runremote* (make-and-init-remote areapath)) + (let* ((server-info (remote-server-info *runremote*))) + (if server-info + (begin + (remote-server-url-set! *runremote* (server:record->url server-info)) + (remote-server-id-set! *runremote* (server:record->id server-info))))) + (set! runremote *runremote*))) ;; new runremote will come from this on next iteration + ;; ensure we have a homehost record (if (or (not (pair? (remote-hh-dat runremote))) ;; not on homehost (not (cdr (remote-hh-dat runremote)))) ;; not on homehost (thread-sleep! 0.1) ;; since we shouldn't get here, delay a little (let ((hh-data (server:choose-server areapath 'homehost))) (remote-hh-dat-set! runremote (or hh-data (cons #f #f))))) - ;;(print "BB> readonly-mode is "readonly-mode" dbfile is "dbfile) (cond - #;((> (- (current-seconds)(remote-connect-time runremote)) 180) ;; reconnect to server every 180 seconds - (debug:print 0 *default-log-port* "Forcing reconnect to server(s) due to 180 second timeout.") - (set! *runremote* #f) - ;; BUG: close-connections should go here? - (mutex-unlock! *rmt-mutex*) - (rmt:send-receive cmd rid params attemptnum: 1 area-dat: area-dat)) - - ;;DOT EXIT; - ;;DOT MUTEXLOCK -> EXIT [label="> 15 attempts"]; {rank=same "case 1" "EXIT" } ;; give up if more than 150 attempts ((> attemptnum 150) (debug:print 0 *default-log-port* "ERROR: 150 tries to start/connect to server. Giving up.") (exit 1)) - ;;DOT CASE2 [label="local\nreadonly\nquery"]; - ;;DOT MUTEXLOCK -> CASE2; {rank=same "case 2" CASE2} - ;;DOT CASE2 -> "rmt:open-qry-close-locally"; ;; readonly mode, read request- handle it - case 2 ((and readonly-mode (member cmd api:read-only-queries)) (mutex-unlock! *rmt-mutex*) (debug:print-info 12 *default-log-port* "rmt:send-receive, case 2") (rmt:open-qry-close-locally cmd 0 params) ) - ;;DOT CASE3 [label="write in\nread-only mode"]; - ;;DOT MUTEXLOCK -> CASE3 [label="readonly\nmode?"]; {rank=same "case 3" CASE3} - ;;DOT CASE3 -> "#f"; ;; readonly mode, write request. Do nothing, return #f (readonly-mode (extras-readonly-mode *rmt-mutex* *default-log-port* cmd params)) ;; This block was for pre-emptively resetting the connection if there had been no communication for some time. ;; I don't think it adds any value. If the server is not there, just fail and start a new connection. ;; also, the expire-time calculation might not be correct. We want, time-since-last-server-access > (server:get-timeout) ;; - ;;DOT CASE4 [label="reset\nconnection"]; - ;;DOT MUTEXLOCK -> CASE4 [label="have connection,\nlast_access > expire_time"]; {rank=same "case 4" CASE4} - ;;DOT CASE4 -> "rmt:send-receive"; ;; reset the connection if it has been unused too long ((and runremote (remote-api-url runremote) (> (current-seconds) ;; if it has been more than server-timeout seconds since last contact, close this connection and start a new on (+ (remote-last-access runremote) @@ -205,61 +182,27 @@ ;; moving this setting of runremote conndat to #f to inside the http-transport:close-connections ;; (remote-conndat-set! runremote #f) ;; invalidate the connection, thus forcing a new connection. (mutex-unlock! *rmt-mutex*) (rmt:send-receive cmd rid params attemptnum: attemptnum)) - ;;DOT CASE5 [label="local\nread"]; - ;;DOT MUTEXLOCK -> CASE5 [label="server not required,\non homehost,\nread-only query"]; {rank=same "case 5" CASE5}; - ;;DOT CASE5 -> "rmt:open-qry-close-locally"; - ;; on homehost and this is a read ((and (not (remote-force-server runremote)) ;; honor forced use of server, i.e. server NOT required (rmt:on-homehost? runremote) (member cmd api:read-only-queries)) ;; this is a read (mutex-unlock! *rmt-mutex*) (debug:print-info 12 *default-log-port* "rmt:send-receive, case 5") (rmt:open-qry-close-locally cmd 0 params)) - ;;DOT CASE6 [label="init\nremote"]; - ;;DOT MUTEXLOCK -> CASE6 [label="on homehost,\nwrite query,\nhave server,\ncan't reach it"]; {rank=same "case 6" CASE6}; - ;;DOT CASE6 -> "rmt:send-receive"; - ;; on homehost and this is a write, we already have a server, but server has died - - ;; reinstate this keep-alive section but inject a time condition into the (add ... - ;; - ;; ((and (cdr (remote-hh-dat runremote)) ;; on homehost - ;; (not (member cmd api:read-only-queries)) ;; this is a write - ;; (remote-server-url runremote) ;; have a server - ;; (not (server:ping (remote-server-url runremote) (remote-server-id runremote)))) ;; server has died. NOTE: this is not a cheap call! Need better approach. - ;; (debug:print 0 *default-log-port* "WARNING: server appears to have died, trying to reconnect, case 6") - ;; (http-transport:close-connections area-dat: runremote) ;; make sure to clean up - ;; (set! *runremote* (make-remote)) - ;; (let* ((server-info (remote-server-info *runremote*))) - ;; (if server-info - ;; (begin - ;; (remote-server-url-set! *runremote* (server:record->url server-info)) - ;; (remote-server-id-set! *runremote* (server:record->id server-info))))) - ;; (remote-force-server-set! runremote (common:force-server?)) - ;; (mutex-unlock! *rmt-mutex*) - ;; (debug:print-info 12 *default-log-port* "rmt:send-receive, case 6") - ;; (rmt:send-receive cmd rid params attemptnum: attemptnum)) - - ;;DOT CASE7 [label="homehost\nwrite"]; - ;;DOT MUTEXLOCK -> CASE7 [label="server not required,\non homehost,\na write,\nhave a server"]; {rank=same "case 7" CASE7}; - ;;DOT CASE7 -> "rmt:open-qry-close-locally"; ;; on homehost and this is a write, we already have a server ((and (not (remote-force-server runremote)) ;; honor forced use of server, i.e. server NOT required (cdr (remote-hh-dat runremote)) ;; on homehost (not (member cmd api:read-only-queries)) ;; this is a write (remote-server-url runremote)) ;; have a server (needed to sync written data back) (mutex-unlock! *rmt-mutex*) (debug:print-info 12 *default-log-port* "rmt:send-receive, case 4.1") (rmt:open-qry-close-locally cmd 0 params)) - ;;DOT CASE8 [label="force\nserver"]; - ;;DOT MUTEXLOCK -> CASE8 [label="server not required,\nhave homehost info,\nno connection yet,\nnot a read-only query"]; {rank=same "case 8" CASE8}; - ;;DOT CASE8 -> "rmt:open-qry-close-locally"; ;; on homehost, no server contact made and this is a write, passively start a server ((and (not (remote-force-server runremote)) ;; honor forced use of server, i.e. server NOT required (cdr (remote-hh-dat runremote)) ;; have homehost (not (remote-server-url runremote)) ;; no connection yet (not (member cmd api:read-only-queries))) ;; not a read-only query @@ -306,11 +249,10 @@ ;;DOT MUTEXLOCK -> CASE11 [label="else"]; {rank=same "case 11" CASE11}; ;;DOT CASE11 -> "rmt:send-receive" [label="call failed"]; ;;DOT CASE11 -> "RESULT" [label="call succeeded"]; ;; not on homehost, do server query (else (extras-case-11 *default-log-port* runremote cmd params attemptnum rid)))) -;;DOT } ;; bunch of small functions factored out of send-receive to make debug easier ;; (define (extras-case-11 *default-log-port* runremote cmd params attemptnum rid) Index: tcp-transportmod.scm ================================================================== --- tcp-transportmod.scm +++ tcp-transportmod.scm @@ -59,26 +59,20 @@ ;; client ;;====================================================================== ;; (define keep-age-param (make-parameter 10)) ;; qif file age, if over move to attic -;; the client side struct -;; -(defstruct tt - ;; all - (areapath #f) - ;; client related - (conns (make-hash-table)) ;; dbfname -> conn - ) - (defstruct tt-conn host port dbfname ) -(defstruct tt-srv +(defstruct tt + ;; client related + (conns (make-hash-table)) ;; dbfname -> conn + ;; server related (areapath #f) (host #f) (port #f) (conn #f) @@ -96,33 +90,35 @@ (define (tt:client-connect-to-server ttdat) #f) ;; client side handler ;; -(define (tt:handler runremote cmd rid params attemptnum area-dat areapath readonly-mode dbfname) +(define (tt:handler runremote cmd rid params attemptnum area-dat areapath readonly-mode dbfname testsuite mtexe) ;; NOTE: areapath is passed in and in tt struct. We'll use passed in value for now. (let* ((conn (hash-table-ref/default (tt-conns runremote) dbfname #f))) (if conn ;; have connection, call the server (let* ((res (tt:send-receive runremote conn cmd rid params))) (cond ((member res '(busy starting)) (thread-sleep! 1) - (tt:handler runremote cmd rid params attemptnum area-dat areapath readonly-mode dbfname)) + (tt:handler runremote cmd rid params attemptnum area-dat areapath readonly-mode dbfname testsuite mtexe)) (else res))) ;; no conn yet, find and or start and find a server - (let* ((server (tt:find-server areapath dbfname))) + (let* ((server (tt:find-server runremote dbfname))) (if server (let* ((conn (tt:client-connect-to-server server))) (hash-table-set! (tt-conns runremote) dbfname conn) - (tt:handler runremote cmd rid params attemptnum area-dat areapath readonly-mode dbfname)) - ;; no server, try to start one + (tt:handler runremote cmd rid params attemptnum area-dat areapath readonly-mode + dbfname testsuite mtexe)) + ;; no server, try to start a server process (begin - (tt:start-server areapath dbfname) + (tt:server-process-run areapath testsuite mtexe rid) ;; #!key (profile-mode "")) (thread-sleep! 1) - (tt:handler runremote cmd rid params attemptnum area-dat areapath readonly-mode dbfname))))))) + (tt:handler runremote cmd rid params attemptnum area-dat areapath + readonly-mode dbfname testsuite mtexe))))))) (define (tt:bid-for-servership run-id) #f) (define (tt:get-current-server run-id) @@ -143,13 +139,13 @@ ;; NOTE: organise by dbfname, not run-id so we don't need ;; to pull in more modules ;; (define (tt:start-server areapath dbfname handler) ;; is there already a server for this dbfile? Then exit. - (let* ((ttdat (make-tt-srv areapath: areapath)) + (let* ((ttdat (make-tt areapath: areapath)) (servers (tt:find-server ttdat dbfname))) - (tt-srv-handler-set! ttdat handler) + (tt-handler-set! ttdat handler) (if (null? servers) (begin (tt:start-tcp-server ttdat) ;; start the tcp-server which applies handler to incoming data (tt:keep-running ttdat dbfname)) (begin @@ -158,12 +154,12 @@ ;; find a port and start tcp-server ;; (define (tt:start-tcp-server ttdat) (setup-listener ttdat) - (let* ((socket (tt-srv-socket ttdat)) - (handler (tt-srv-handler ttdat))) + (let* ((socket (tt-socket ttdat)) + (handler (tt-handler ttdat))) ((make-tcp-server socket handler) #t ;; yes, send error messages to std-err ))) (define (tt:keep-running ttdat dbfile) @@ -204,18 +200,18 @@ ;; (tcp-close (udat-socket uconn))) ;; ;; (define (tt:shutdown-server ttdat) - (let* ((cleanproc (tt-srv-cleanup-proc ttdat))) + (let* ((cleanproc (tt-cleanup-proc ttdat))) (if cleanproc (cleanproc)) - (tcp-close (tt-srv-socket ttdat)) ;; close up ports here + (tcp-close (tt-socket ttdat)) ;; close up ports here )) ;; (define (wait-and-close uconn) -;; (thread-join! (tt-srv-cmd-thread uconn)) -;; (tcp-close (tt-srv-socket uconn))) +;; (thread-join! (tt-cmd-thread uconn)) +;; (tcp-close (tt-socket uconn))) ;; return servid ;; side-effects: ;; ttdat-cleanup-proc is populated with function to remove the serverinfo file (define (tt:create-server-registration-file ttdat dbfname) @@ -227,11 +223,11 @@ (port (tt-conn-port conn)) (servinf (conc servdir"/"host":"port"-"(current-process-id)":"dbfname)) (serv-id (tt:mk-signature areapath)) (clean-proc (lambda () (delete-file* servinf)))) - (tt-srv-cleanup-proc-set! ttdat clean-proc) + (tt-cleanup-proc-set! ttdat clean-proc) (with-output-to-file servinf (lambda () (print "SERVER STARTED: "host":"port" AT "(current-seconds)" server-id: "serv-id" pid: "(current-process-id)" dbfname: "dbfname))) serv-id))) @@ -249,16 +245,17 @@ ;; Given an area path, start a server process ### NOTE ### > file 2>&1 ;; if the target-host is set ;; try running on that host ;; incidental: rotate logs in logs/ dir. ;; -(define (tt:server-process-run areapath testsuite mtexe #!key (profile-mode "")) ;; areapath is *toppath* for a given testsuite area +(define (tt:server-process-run areapath testsuite mtexe run-id #!key (profile-mode "")) ;; areapath is *toppath* for a given testsuite area (let* ((logfile (conc areapath "/logs/server.log")) ;; -" curr-pid "-" target-host ".log")) (cmdln (conc mtexe " -server - ";; (or target-host "-") " -m testsuite:" testsuite + " -run-id " run-id " " profile-mode ))) ;; (conc " >> " logfile " 2>&1 &"))))) ;; we want the remote server to start in *toppath* so push there (push-directory areapath) (debug:print 0 *default-log-port* "INFO: Trying to start server in tcp mode (" cmdln ") ...") @@ -276,11 +273,11 @@ ;; ;; if udata-in is #f create the record ;; if there is already a serv-listener return the udata ;; (define (setup-listener uconn #!optional (port 4242)) - (assert (tt-srv? uconn) "FATAL: setup-listener called with wrong struct "uconn) + (assert (tt? uconn) "FATAL: setup-listener called with wrong struct "uconn) (handle-exceptions exn (if (< port 65535) (setup-listener uconn (+ port 1)) #f) @@ -289,13 +286,13 @@ (define (connect-listener uconn port) ;; (tcp-listener-socket LISTENER)(socket-name so) ;; sockaddr-address, sockaddr-port, sockaddr->string (let* ((tlsn (tcp-listen port 1000 #f)) ;; (tcp-listen TCPPORT [BACKLOG [HOST]]) (addr (get-my-best-address))) ;; (hostinfo-addresses (host-information (current-hostname))) - (tt-srv-port-set! uconn port) - (tt-srv-host-port-set! uconn (conc addr":"port)) - (tt-srv-socket-set! uconn tlsn) + (tt-port-set! uconn port) + (tt-host-port-set! uconn (conc addr":"port)) + (tt-socket-set! uconn tlsn) uconn)) ;;======================================================================