Index: client.scm ================================================================== --- client.scm +++ client.scm @@ -41,10 +41,109 @@ (define (client:logout serverdat) (let ((ok (and (socket? serverdat) (cdb:logout serverdat *toppath* (client:get-signature))))) ok)) +(define (client:connect iface port) + (case (server:get-transport) + ((rpc) (rpc:client-connect iface port)) + ((http) (http:client-connect iface port)) + ((zmq) (zmq:client-connect iface port)) + (else (rpc:client-connect iface port)))) + +(define (client:login-no-auto-setup server-info run-id) + (case (server:get-transport) + ((rpc) (rpc:login-no-auto-client-setup server-info run-id)) + ((http) (rmt:login-no-auto-client-setup server-info run-id)) + (else (rpc:login-no-auto-client-setup server-info run-id)))) + +(define (client:setup run-id #!key (remaining-tries 10) (failed-connects 0)) + (case (server:get-transport) + ((rpc) (client:setup-rpc run-id)) + ((http)(client:setup-http run-id)) + (else (client:setup-rpc run-id)))) + +(define (client:setup-rpc run-id) + (debug:print 0 "INFO: client:setup remaining-tries=" remaining-tries) + (if (<= remaining-tries 0) + (begin + (debug:print 0 "ERROR: failed to start or connect to server for run-id " run-id) + (exit 1)) + (let ((host-info (hash-table-ref/default *runremote* run-id #f))) + (debug:print-info 0 "client:setup host-info=" host-info ", remaining-tries=" remaining-tries) + (if host-info + (let* ((iface (car host-info)) + (port (cadr host-info)) + (start-res (client:connect iface port)) + ;; (ping-res (server:ping-server run-id iface port)) + (ping-res (client:login-no-auto-setup start-res run-id))) + (if ping-res ;; sucessful login? + (begin + (hash-table-set! *runremote* run-id start-res) + start-res) ;; return the server info + (if (member remaining-tries '(3 4 6)) + (begin ;; login failed + (debug:print 25 "INFO: client:setup start-res=" start-res ", run-id=" run-id ", server-dat=" host-info) + (hash-table-delete! *runremote* run-id) + (open-run-close tasks:server-force-clean-run-record + tasks:open-db + run-id + (car host-info) + (cadr host-info) + " client:setup (host-info=#t)") + (thread-sleep! 5) + (client:setup run-id remaining-tries: 10)) ;; (- remaining-tries 1))) + (begin + (debug:print 25 "INFO: client:setup failed to connect, start-res=" start-res ", run-id=" run-id ", host-info=" host-info) + (thread-sleep! 5) + (client:setup run-id remaining-tries: (- remaining-tries 1)))))) + ;; YUK: rename server-dat here + (let* ((server-dat (open-run-close tasks:get-server tasks:open-db run-id))) + (debug:print-info 0 "client:setup server-dat=" server-dat ", remaining-tries=" remaining-tries) + (if server-dat + (let* ((iface (tasks:hostinfo-get-interface server-dat)) + (port (tasks:hostinfo-get-port server-dat)) + (start-res (http-transport:client-connect iface port)) + ;; (ping-res (server:ping-server run-id iface port)) + (ping-res (rmt:login-no-auto-client-setup start-res run-id))) + (if start-res + (begin + (hash-table-set! *runremote* run-id start-res) + start-res) + (if (member remaining-tries '(2 5)) + (begin ;; login failed + (debug:print 25 "INFO: client:setup start-res=" start-res ", run-id=" run-id ", server-dat=" server-dat) + (hash-table-delete! *runremote* run-id) + (open-run-close tasks:server-force-clean-run-record + tasks:open-db + run-id + (tasks:hostinfo-get-interface server-dat) + (tasks:hostinfo-get-port server-dat) + " client:setup (server-dat = #t)") + (thread-sleep! 2) + (server:try-running run-id) + (thread-sleep! 10) ;; give server a little time to start up + (client:setup run-id remaining-tries: 10)) ;; (- remaining-tries 1))) + (begin + (debug:print 25 "INFO: client:setup start-res=" start-res ", run-id=" run-id ", server-dat=" server-dat) + (thread-sleep! 5) + (client:setup run-id remaining-tries: (- remaining-tries 1)))))) + (begin ;; no server registered + (if (eq? remaining-tries 2) + (begin + ;; (open-run-close tasks:server-clean-out-old-records-for-run-id tasks:open-db run-id " client:setup (server-dat=#f)") + (client:setup run-id remaining-tries: 10)) + (begin + (thread-sleep! 2) + (debug:print 25 "INFO: client:setup start-res (not defined here), run-id=" run-id ", server-dat=" server-dat) + (if (< (open-run-close tasks:num-in-available-state tasks:open-db run-id) 3) + (begin + ;; (open-run-close tasks:server-clean-out-old-records-for-run-id tasks:open-db run-id " client:setup (server-dat=#f)") + (server:try-running run-id))) + (thread-sleep! 10) ;; give server a little time to start up + (client:setup run-id remaining-tries: (- remaining-tries 1))))))))))) + ;; Do all the connection work, look up the transport type and set up the ;; connection if required. ;; ;; There are two scenarios. ;; 1. We are a test manager and we received *transport-type* and *runremote* via cmdline @@ -53,11 +152,11 @@ ;; ;; client:setup ;; ;; lookup_server, need to remove *runremote* stuff ;; -(define (client:setup run-id #!key (remaining-tries 10) (failed-connects 0)) +(define (client:setup-http run-id #!key (remaining-tries 10) (failed-connects 0)) (debug:print 0 "INFO: client:setup remaining-tries=" remaining-tries) (if (<= remaining-tries 0) (begin (debug:print 0 "ERROR: failed to start or connect to server for run-id " run-id) (exit 1)) @@ -64,13 +163,13 @@ (let ((host-info (hash-table-ref/default *runremote* run-id #f))) (debug:print-info 0 "client:setup host-info=" host-info ", remaining-tries=" remaining-tries) (if host-info (let* ((iface (car host-info)) (port (cadr host-info)) - (start-res (http-transport:client-connect iface port)) + (start-res (client:connect iface port)) ;; (ping-res (server:ping-server run-id iface port)) - (ping-res (rmt:login-no-auto-client-setup start-res run-id))) + (ping-res (client:login-no-auto-setup start-res run-id))) (if ping-res ;; sucessful login? (begin (hash-table-set! *runremote* run-id start-res) start-res) ;; return the server info (if (member remaining-tries '(3 4 6)) Index: rmt.scm ================================================================== --- rmt.scm +++ rmt.scm @@ -32,10 +32,18 @@ ;;====================================================================== ;; S U P P O R T F U N C T I O N S ;;====================================================================== +(define (rmt:call-transport run-id connection-info cmd jparams) + (case (server:get-transport) + ((rpc) ( rpc-transport:client-api-send-receive run-id connection-info cmd jparams)) + ((http) (http-transport:client-api-send-receive run-id connection-info cmd jparams)) + ((fs) ( fs-transport:client-api-send-receive run-id connection-info cmd jparams)) + ((zmq) (zmq-transport:client-api-send-receive run-id connection-info cmd jparams)) + (else ( rpc-transport:client-api-send-receive run-id connection-info cmd jparams)))) + ;; cmd is a symbol ;; vars is a json string encoding the parameters for the call ;; (define (rmt:send-receive cmd rid params) (let* ((run-id (if rid rid 0)) @@ -52,11 +60,11 @@ (loop (- numtries 1))) (begin (debug:print 0 "ERROR: 100 tries and no server, giving up") (exit 1))))))))) (jparams (db:obj->string params)) - (res (http-transport:client-api-send-receive run-id connection-info cmd jparams))) + (res (rmt:call-transport connection-info cmd jparams))) (if res (db:string->obj res) ;; (rmt:json-str->dat res) (let ((new-connection-info (client:setup run-id))) (debug:print 0 "WARNING: Communication failed, trying call to http-transport:client-api-send-receive again.") (rmt:send-receive cmd run-id params))))) Index: rpc-transport.scm ================================================================== --- rpc-transport.scm +++ rpc-transport.scm @@ -193,11 +193,11 @@ (begin (debug:print-info 0 "Server continuing, tests running: " numrunning ", seconds since last db access: " (- (current-seconds) *last-db-access*)) (loop (+ 1 count))) (begin (debug:print-info 0 "Starting to shutdown the server side") - (open-run-close tasks:server-force-clean-run-record tasks:open-db run-id ipaddrstr portnum " rpc-transport:try-start-server stop") + (open-run-close tasks:server-delete-record tasks:open-db server-id " rpc-transport:try-start-server stop") (thread-sleep! 10) (debug:print-info 0 "Max cached queries was " *max-cache-size*) (debug:print-info 0 "Server shutdown complete. Exiting") )))))