Index: client.scm ================================================================== --- client.scm +++ client.scm @@ -54,31 +54,31 @@ ;; client:setup ;; ;; lookup_server, need to remove *runremote* stuff ;; (define (client:setup run-id #!key (remaining-tries 10) (failed-connects 0)) - (debug:print 0 "INFO: client:setup remaining-tries=" remaining-tries) + (debug:print-info 0 "client:setup remaining-tries=" remaining-tries) (if (<= remaining-tries 0) (begin (debug:print 0 "ERROR: failed to start or connect to server for run-id " run-id) (exit 1)) (let ((host-info (hash-table-ref/default *runremote* run-id #f))) - (debug:print-info 0 "client:setup host-info=" host-info ", remaining-tries=" remaining-tries) (if host-info (let* ((iface (http-transport:server-dat-get-iface host-info)) (port (http-transport:server-dat-get-port host-info)) (start-res (http-transport:client-connect iface port)) - ;; (ping-res (server:ping-server run-id iface port)) (ping-res (rmt:login-no-auto-client-setup start-res run-id))) (if ping-res ;; sucessful login? (begin - (http-transport:close-connections run-id) + (debug:print-info 0 "client:setup, ping is good using host-info=" host-info ", remaining-tries=" remaining-tries) + ;; Why add the close-connections here? + ;; (http-transport:close-connections run-id) (hash-table-set! *runremote* run-id start-res) start-res) ;; return the server info (if (member remaining-tries '(9 6 4 2)) (begin ;; login failed - (debug:print 25 "INFO: client:setup start-res=" start-res ", run-id=" run-id ", server-dat=" host-info) + (debug:print-info 0 "client:setup, ping is bad for start-res=" start-res " and *runremote*=" host-info) (http-transport:close-connections run-id) (hash-table-delete! *runremote* run-id) (open-run-close tasks:server-force-clean-run-record tasks:open-db run-id @@ -98,19 +98,20 @@ (debug:print-info 0 "client:setup server-dat=" server-dat ", remaining-tries=" remaining-tries) (if server-dat (let* ((iface (tasks:hostinfo-get-interface server-dat)) (port (tasks:hostinfo-get-port server-dat)) (start-res (http-transport:client-connect iface port)) - ;; (ping-res (server:ping-server run-id iface port)) (ping-res (rmt:login-no-auto-client-setup start-res run-id))) - (if start-res + (if (and start-res + ping-res) (begin (hash-table-set! *runremote* run-id start-res) + (debug:print-info 0 "connected to " (http-transport:server-dat-make-url start-res))) start-res) (if (member remaining-tries '(2 5)) (begin ;; login failed - (debug:print 25 "INFO: client:setup start-res=" start-res ", run-id=" run-id ", server-dat=" server-dat) + (debug:print-info 0 "client:setup, login failed, will attempt to start server ... start-res=" start-res ", run-id=" run-id ", server-dat=" server-dat) (http-transport:close-connections run-id) (hash-table-delete! *runremote* run-id) (open-run-close tasks:server-force-clean-run-record tasks:open-db run-id @@ -122,20 +123,21 @@ (thread-sleep! 10) ;; give server a little time to start up (client:setup run-id remaining-tries: 10)) ;; (- remaining-tries 1))) (begin (debug:print 25 "INFO: client:setup start-res=" start-res ", run-id=" run-id ", server-dat=" server-dat) (thread-sleep! 5) - (client:setup run-id remaining-tries: (- remaining-tries 1)))))) - (begin ;; no server registered - (if (eq? remaining-tries 2) - (begin - ;; (open-run-close tasks:server-clean-out-old-records-for-run-id tasks:open-db run-id " client:setup (server-dat=#f)") - (client:setup run-id remaining-tries: 10)) - (begin - (thread-sleep! 2) - (debug:print 25 "INFO: client:setup start-res (not defined here), run-id=" run-id ", server-dat=" server-dat) - (if (< (open-run-close tasks:num-in-available-state tasks:open-db run-id) 3) + (client:setup run-id remaining-tries: (- remaining-tries 1))))) + (begin ;; no server registered + (if (member remaining-tries '(2)) + (begin + (debug:print-info 0 "no server registered, remaining-tries=" remaining-tries ", try running client:setup again") + ;; (open-run-close tasks:server-clean-out-old-records-for-run-id tasks:open-db run-id " client:setup (server-dat=#f)") + (client:setup run-id remaining-tries: remaining-tries)) + (let ((num-available (open-run-close tasks:num-in-available-state tasks:open-db run-id))) + (thread-sleep! 2) + (debug:print-info 0 "client:setup, no server registered, remaining-tries=" remaining-tries " num-available=" num-available) + (if (< num-available 2) (begin ;; (open-run-close tasks:server-clean-out-old-records-for-run-id tasks:open-db run-id " client:setup (server-dat=#f)") (server:try-running run-id))) (thread-sleep! 10) ;; give server a little time to start up (client:setup run-id remaining-tries: (- remaining-tries 1))))))))))) Index: http-transport.scm ================================================================== --- http-transport.scm +++ http-transport.scm @@ -290,10 +290,19 @@ (define (http-transport:server-dat-get-iface vec) (vector-ref vec 0)) (define (http-transport:server-dat-get-port vec) (vector-ref vec 1)) (define (http-transport:server-dat-get-api-uri vec) (vector-ref vec 2)) (define (http-transport:server-dat-get-api-url vec) (vector-ref vec 3)) (define (http-transport:server-dat-get-api-req vec) (vector-ref vec 4)) + +(define (http-transport:server-dat-make-url vec) + (if (and (http-transport:server-dat-get-iface vec) + (http-transport:server-dat-get-port vec)) + (conc "http://" + (http-transport:server-dat-get-iface vec) + ":" + (http-transport:server-dat-get-port vec)) + #f)) ;; ;; connect ;; (define (http-transport:client-connect iface port) Index: megatest.scm ================================================================== --- megatest.scm +++ megatest.scm @@ -344,27 +344,30 @@ (common:get-disks) ) "\n")) (set! *didsomething* #t))) (if (args:get-arg "-ping") - (let* ((run-id (string->number (args:get-arg "-run-id"))) - (host-port (let ((slst (string-split (args:get-arg "-ping") ":"))) - (if (eq? (length slst) 2) - (list (car slst)(string->number (cadr slst))) - #f))) - (toppath (setup-for-run))) + (let* ((run-id (string->number (args:get-arg "-run-id"))) + (host-port (let ((slst (string-split (args:get-arg "-ping") ":"))) + (if (eq? (length slst) 2) + (list (car slst)(string->number (cadr slst))) + #f))) + (toppath (setup-for-run)) + (server-db-dat (if (not host-port)(open-run-close tasks:get-server tasks:open-db run-id) #f))) (if (not run-id) (begin (debug:print 0 "ERROR: must specify run-id when doing ping, -run-id n") (print "ERROR: No run-id") (exit 1)) - (if (not host-port) + (if (and (not host-port) + (not server-db-dat)) (begin - (debug:print 0 "ERROR: argument to -ping is host:port, got " (args:get-arg "-ping")) (print "ERROR: bad host:port") (exit 1)) - (let* ((server-dat (http-transport:client-connect (car host-port)(cadr host-port))) + (let* ((iface (if host-port (car host-port) (tasks:hostinfo-get-interface server-db-dat))) + (port (if host-port (cadr host-port)(tasks:hostinfo-get-port server-db-dat))) + (server-dat (http-transport:client-connect iface port)) (login-res (rmt:login-no-auto-client-setup server-dat run-id))) (if (and (list? login-res) (car login-res)) (begin (print "LOGIN_OK") @@ -797,11 +800,10 @@ (if (or (args:get-arg "-test-files")(args:get-arg "-test-paths")) ;; if we are in a test use the MT_CMDINFO data (if (getenv "MT_CMDINFO") (let* ((startingdir (current-directory)) (cmdinfo (read (open-input-string (base64:base64-decode (getenv "MT_CMDINFO"))))) - ;; (runremote (assoc/default 'runremote cmdinfo)) (testpath (assoc/default 'testpath cmdinfo)) (test-name (assoc/default 'test-name cmdinfo)) (runscript (assoc/default 'runscript cmdinfo)) (db-host (assoc/default 'db-host cmdinfo)) (run-id (assoc/default 'run-id cmdinfo)) @@ -845,11 +847,10 @@ (if (args:get-arg "-archive") ;; if we are in a test use the MT_CMDINFO data (if (getenv "MT_CMDINFO") (let* ((startingdir (current-directory)) (cmdinfo (read (open-input-string (base64:base64-decode (getenv "MT_CMDINFO"))))) - ;; (runremote (assoc/default 'runremote cmdinfo)) (testpath (assoc/default 'testpath cmdinfo)) (test-name (assoc/default 'test-name cmdinfo)) (runscript (assoc/default 'runscript cmdinfo)) (db-host (assoc/default 'db-host cmdinfo)) (run-id (assoc/default 'run-id cmdinfo)) @@ -924,11 +925,10 @@ (if (not (getenv "MT_CMDINFO")) (begin (debug:print 0 "ERROR: MT_CMDINFO env var not set, -step must be called *inside* a megatest invoked environment!") (exit 5)) (let* ((cmdinfo (read (open-input-string (base64:base64-decode (getenv "MT_CMDINFO"))))) - ;; (runremote (assoc/default 'runremote cmdinfo)) (testpath (assoc/default 'testpath cmdinfo)) (test-name (assoc/default 'test-name cmdinfo)) (runscript (assoc/default 'runscript cmdinfo)) (db-host (assoc/default 'db-host cmdinfo)) (run-id (assoc/default 'run-id cmdinfo)) @@ -971,11 +971,10 @@ (begin (debug:print 0 "ERROR: MT_CMDINFO env var not set, commands -test-status, -runstep and -setlog must be called *inside* a megatest environment!") (exit 5)) (let* ((startingdir (current-directory)) (cmdinfo (read (open-input-string (base64:base64-decode (getenv "MT_CMDINFO"))))) - ;; (runremote (assoc/default 'runremote cmdinfo)) (testpath (assoc/default 'testpath cmdinfo)) (test-name (assoc/default 'test-name cmdinfo)) (runscript (assoc/default 'runscript cmdinfo)) (db-host (assoc/default 'db-host cmdinfo)) (run-id (assoc/default 'run-id cmdinfo)) Index: rmt.scm ================================================================== --- rmt.scm +++ rmt.scm @@ -54,22 +54,21 @@ (debug:print 0 "ERROR: 100 tries and no server, giving up") (exit 1))))))))) (jparams (db:obj->string params)) (res (http-transport:client-api-send-receive run-id connection-info cmd jparams))) (if res - (db:string->obj res) ;; (rmt:json-str->dat res) + (db:string->obj res) (let ((new-connection-info (client:setup run-id))) (debug:print 0 "WARNING: Communication failed, trying call to http-transport:client-api-send-receive again.") (rmt:send-receive cmd run-id params))))) (define (rmt:send-receive-no-auto-client-setup connection-info cmd run-id params) (let* ((run-id (if run-id run-id 0)) (jparams (db:obj->string params)) ;; (rmt:dat->json-str params)) (res (http-transport:client-api-send-receive run-id connection-info cmd jparams))) (if res - (db:string->obj res) ;; (rmt:json-str->dat res) - ;; this one does NOT keep trying + (db:string->obj res) res))) ;; Wrap json library for strings (why the ports crap in the first place?) (define (rmt:dat->json-str dat) (with-output-to-string Index: server.scm ================================================================== --- server.scm +++ server.scm @@ -122,15 +122,18 @@ ;; note: client:start will set *runremote*. this needs to be changed ;; also, client:start will login to the server, also need to change that. ;; ;; client:start returns #t if login was successful. ;; - (let ((res (server:ping-server run-id (vector-ref server 1)(vector-ref server 0)))) + (let ((res (server:ping-server run-id + (tasks:hostinfo-get-interface server) + (tasks:hostinfo-get-port server)))) ;; if the server didn't respond we must remove the record (if res #t (begin + (debug:print-info 0 "server at " server " not responding, removing record") (open-run-close tasks:server-force-clean-running-records-for-run-id tasks:open-db run-id " server:check-if-running") res))) #f)))