Index: client.scm ================================================================== --- client.scm +++ client.scm @@ -170,28 +170,28 @@ (start-res (case *transport-type* ((http)(http-transport:client-connect iface port)) ;;((nmsg)(nmsg-transport:client-connect hostname port)) )) (ping-res (case *transport-type* - ((http)(rmt:login-no-auto-client-setup start-res run-id)) + ((http)(rmt:login-no-auto-client-setup start-res)) ;; ((nmsg)(let ((logininfo (rmt:login-no-auto-client-setup start-res run-id))) ;; (if logininfo ;; (car (vector-ref logininfo 1)) ;; #f))) - + ))) (if (and start-res ping-res) (begin - (set! *runremote* start-res) ;; (hash-table-set! *runremote* run-id start-res) + (remote-conndat-set! *runremote* start-res) ;; (hash-table-set! *runremote* run-id start-res) (debug:print-info 2 *default-log-port* "connected to " (http-transport:server-dat-make-url start-res)) start-res) (begin ;; login failed but have a server record, clean out the record and try again (debug:print-info 0 *default-log-port* "client:setup, login failed, will attempt to start server ... start-res=" start-res ", run-id=" run-id ", server-dat=" server-dat) (case *transport-type* ((http)(http-transport:close-connections run-id))) - (set! *runremote* #f) ;; (hash-table-delete! *runremote* run-id) + (remote-conndat-set! *runremote* #f) ;; (hash-table-delete! *runremote* run-id) (tasks:kill-server-run-id run-id) (tasks:server-force-clean-run-record (db:delay-if-busy tdbdat) run-id (tasks:hostinfo-get-interface server-dat) (tasks:hostinfo-get-port server-dat) Index: http-transport.scm ================================================================== --- http-transport.scm +++ http-transport.scm @@ -26,10 +26,11 @@ (declare (uses tests)) (declare (uses tasks)) ;; tasks are where stuff is maintained about what is running. (declare (uses server)) (declare (uses daemon)) (declare (uses portlogger)) +(declare (uses rmt)) (include "common_records.scm") (include "db_records.scm") (define (http-transport:make-server-url hostport) @@ -219,28 +220,10 @@ (debug:print 0 *default-log-port* "FATAL ERROR: http-transport:client-api-send-receive called with no server info") (exit 1)))) (res #f) (success #t) (sparams (db:obj->string params transport: 'http))) -;; (condition-case -;; handle-exceptions -;; exn -;; (if (> numretries 0) -;; (begin -;; (mutex-unlock! *http-mutex*) -;; (thread-sleep! 1) -;; (handle-exceptions -;; exn -;; (debug:print 0 *default-log-port* "WARNING: closing connections failed. Server at " fullurl " almost certainly dead") -;; (close-all-connections!)) -;; (debug:print 0 *default-log-port* "WARNING: Failed to communicate with server, trying again, numretries left: " numretries) -;; (http-transport:client-api-send-receive run-id serverdat cmd sparams numretries: (- numretries 1))) -;; (begin -;; (mutex-unlock! *http-mutex*) -;; (tasks:kill-server-run-id run-id) -;; #f)) -;; (begin (debug:print-info 11 *default-log-port* "fullurl=" fullurl ", cmd=" cmd ", params=" params ", run-id=" run-id "\n") ;; set up the http-client here (max-retry-attempts 1) ;; consider all requests indempotent (retry-request? (lambda (request) @@ -259,11 +242,12 @@ exn (begin (set! success #f) (debug:print 0 *default-log-port* "WARNING: failure in with-input-from-request to " fullurl ".") (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) - (set! *runremote* #f) ;; (hash-table-delete! *runremote* run-id) + (if *runremote* + (remote-conndat-set! *runremote* #f)) ;; Killing associated server to allow clean retry.") ;; (tasks:kill-server-run-id run-id) ;; better to kill the server in the logic that called this routine? (mutex-unlock! *http-mutex*) ;;; (signal (make-composite-condition ;;; (make-property-condition 'commfail 'message "failed to connect to server"))) @@ -306,11 +290,13 @@ 'message "nmsg-transport:client-api-send-receive-raw timed out talking to server"))))))) ;; careful closing of connections stored in *runremote* ;; (define (http-transport:close-connections run-id) - (let* ((server-dat *runremote*)) ;; (hash-table-ref/default *runremote* run-id #f))) + (let* ((server-dat (if *runremote* + (remote-conndat *runremote*) + #f))) ;; (hash-table-ref/default *runremote* run-id #f))) (if (vector? server-dat) (let ((api-dat (http-transport:server-dat-get-api-uri server-dat))) (close-connection! api-dat) #t) #f))) Index: megatest.scm ================================================================== --- megatest.scm +++ megatest.scm @@ -1863,12 +1863,15 @@ (if (or (getenv "MT_RUNSCRIPT") (args:get-arg "-repl") (args:get-arg "-load")) (let* ((toppath (launch:setup)) - (dbstruct (if toppath (db:setup)))) ;; make-dbr:dbstruct path: toppath local: (args:get-arg "-local")) #f))) - (if dbstruct + (dbstruct (if (and toppath + (common:on-homehost?)) + (db:setup) + #f))) ;; make-dbr:dbstruct path: toppath local: (args:get-arg "-local")) #f))) + (if *toppath* (cond ((getenv "MT_RUNSCRIPT") ;; How to run megatest scripts ;; ;; #!/bin/bash Index: rmt.scm ================================================================== --- rmt.scm +++ rmt.scm @@ -40,16 +40,11 @@ ;; if a server is either running or in the process of starting call client:setup ;; else return #f to let the calling proc know that there is no server available ;; (define (rmt:get-connection-info run-id) - (let ((cinfo *runremote*)) ;; (hash-table-ref/default *runremote* run-id #f))) -;; how about if rrr is a defstruct and we use a wrapper to access it (even better would be a macro) -;; look in common_records for with-mutex -;; -;; (with-mutex *rrr-mutex* rmt:dat-host *runremote*) => returns value -;; (with-mutex *rrr-mutex* rmt: + (let ((cinfo (remote-conndat *runremote*))) (if cinfo cinfo (if (tasks:server-running-or-starting? (db:delay-if-busy (tasks:open-db)) run-id) (client:setup run-id) #f)))) @@ -75,31 +70,36 @@ (exit 1)) ;; ensure we have a record for our connection for given area ((not *runremote*) (set! *runremote* (make-remote)) (mutex-unlock! *rmt-mutex*) + ;; (print "case 1") (rmt:send-receive cmd rid params attemptnum: attemptnum)) ;; ensure we have a homehost record ((not (pair? (remote-hh-dat *runremote*))) ;; have a homehost record? (thread-sleep! 0.1) ;; since we shouldn't get here, delay a little (remote-hh-dat-set! *runremote* (common:get-homehost)) (mutex-unlock! *rmt-mutex*) + ;; (print "case 2") (rmt:send-receive cmd rid params attemptnum: attemptnum)) ;; on homehost and this is a read ((and (cdr (remote-hh-dat *runremote*)) ;; on homehost (member cmd api:read-only-queries)) ;; this is a read (mutex-unlock! *rmt-mutex*) + ;; (print "case 3") (rmt:open-qry-close-locally cmd 0 params)) ;; on homehost and this is a write, we already have a server ((and (cdr (remote-hh-dat *runremote*)) ;; on homehost (not (member cmd api:read-only-queries)) ;; this is a write (remote-server-url *runremote*)) ;; have a server (mutex-unlock! *rmt-mutex*) + ;; (print "case 4") (rmt:open-qry-close-locally cmd 0 params)) - ;; no server contact made and this is a write, try starting a server + ;; no server contact made and this is a write, passively start a server ((and (not (remote-server-url *runremote*)) (not (member cmd api:read-only-queries))) + ;; (print "case 5") (let ((serverconn (server:check-if-running *toppath*))) (if serverconn (remote-server-url-set! *runremote* serverconn) ;; the string can be consumed by the client setup if needed (if (not (server:start-attempted? *toppath*)) (server:kind-run *toppath*)))) @@ -109,52 +109,58 @@ (rmt:open-qry-close-locally cmd 0 params)) (begin (mutex-unlock! *rmt-mutex*) (rmt:send-receive cmd rid params attemptnum: attemptnum)))) ;; if not on homehost ensure we have a connection to a live server - ((or (not (pair? (remote-hh-dat *runremote*))) ;; have a homehost record? - (not (cdr (remote-hh-dat *runremote*))) ;; have record, are we on a homehost? - (not (remote-conndat *runremote*))) ;; do we not have a connection? - (remote-hh-dat-set! *runremote* (common:get-homehost)) - (remote-conndat-set! *runremote* (rmt:get-connection-info 0)) + ;; NOTE: we *have* a homehost record by now + ((and (not (cdr (remote-hh-dat *runremote*))) ;; are we on a homehost? + (not (remote-conndat *runremote*))) ;; and no connection + ;; (print "case 6 hh-dat: " (remote-hh-dat *runremote*) " conndat: " (remote-conndat *runremote*)) (mutex-unlock! *rmt-mutex*) - (server:kind-run *toppath*) ;; we need a sever + (tasks:start-and-wait-for-server (tasks:open-db) 0 15) + (remote-conndat-set! *runremote* (rmt:get-connection-info 0)) (rmt:send-receive cmd rid params attemptnum: attemptnum)) ;; all set up if get this far, dispatch the query ((cdr (remote-hh-dat *runremote*)) ;; we are on homehost (mutex-unlock! *rmt-mutex*) + ;; (print "case 7") (rmt:open-qry-close-locally cmd (if rid rid 0) params)) ;; reset the connection if it has been unused too long ((and (remote-conndat *runremote*) (let ((expire-time (- start-time (remote-server-timeout *runremote*)))) - (< (http-transport:server-dat-get-last-access connection) expire-time))) - (remote-conndatr *runremote* #f)) + (< (http-transport:server-dat-get-last-access (remote-conndat *runremote*)) expire-time))) + ;; (print "case 8") + (remote-conndat-set! *runremote* #f)) ;; not on homehost, do server query (else (mutex-unlock! *rmt-mutex*) + ;; (print "case 9") (let* ((conninfo (remote-conndat *runremote*)) (dat (case (remote-transport *runremote*) - ((http)(condition-case - (http-transport:client-api-send-receive run-id conninfo cmd params) - ((commfail)(vector #f "communications fail")) - ((exn)(vector #f "other fail")))) + ((http) ;; (condition-case ;; handling here has caused a lot of problems. + (http-transport:client-api-send-receive 0 conninfo cmd params) + ;; ((commfail)(vector #f "communications fail")) + ;; ((exn)(vector #f "other fail" (print-call-chain))))) + ) (else (debug:print 0 *default-log-port* "ERROR: transport " (remote-transport *runremote*) " not supported") (exit)))) (success (if (vector? dat) (vector-ref dat 0) #f)) (res (if (vector? dat) (vector-ref dat 1) #f))) (if (vector? conninfo)(http-transport:server-dat-update-last-access conninfo)) ;; refresh access time - (if (and success res) + ;; (print "case 9. conninfo=" conninfo " dat=" dat) + (if success (case (remote-transport *runremote*) ((http) res) (else (debug:print 0 *default-log-port* "ERROR: transport " (remote-transport *runremote*) " is unknown") (exit 1))) (begin (debug:print 0 *default-log-port* "WARNING: communication failed. Trying again, try num: " attemptnum) - (remote-conndat-set! *runremote* #f) - (server-url-set! *runremote* #f) + (remote-conndat-set! *runremote* #f) + (remote-server-url-set! *runremote* #f) + ;; (print "case 9.1") (tasks:start-and-wait-for-server (tasks:open-db) 0 15) (rmt:send-receive cmd rid params attemptnum: (+ attemptnum 1))))))))) (define (rmt:update-db-stats run-id rawcmd params duration) (mutex-lock! *db-stats-mutex*) @@ -246,23 +252,17 @@ (mutex-unlock! *db-multi-sync-mutex*))))) res)) (define (rmt:send-receive-no-auto-client-setup connection-info cmd run-id params) (let* ((run-id (if run-id run-id 0)) - ;; (jparams (db:obj->string params)) ;; (rmt:dat->json-str params)) (res (handle-exceptions exn #f (http-transport:client-api-send-receive run-id connection-info cmd params)))) -;; ((commfail) (vector #f "communications fail"))))) (if (and res (vector-ref res 0)) (vector-ref res 1) ;;; YES!! THIS IS CORRECT!! CHANGE IT HERE, THEN CHANGE rmt:send-receive ALSO!!! #f))) -;; (db:string->obj (vector-ref dat 1)) -;; (begin -;; (debug:print-error 0 *default-log-port* "rmt:send-receive-no-auto-client-setup failed, attempting to continue. Got " dat) -;; dat)))) ;; ;; Wrap json library for strings (why the ports crap in the first place?) ;; (define (rmt:dat->json-str dat) ;; (with-output-to-string ;; (lambda ()