Index: client.scm ================================================================== --- client.scm +++ client.scm @@ -62,30 +62,33 @@ (debug:print 0 "ERROR: failed to start or connect to server for run-id " run-id) (exit 1)) (let ((host-info (hash-table-ref/default *runremote* run-id #f))) (debug:print-info 0 "client:setup host-info=" host-info ", remaining-tries=" remaining-tries) (if host-info - (let* ((iface (car host-info)) - (port (cadr host-info)) + (let* ((iface (http-transport:server-dat-get-iface host-info)) + (port (http-transport:server-dat-get-port host-info)) (start-res (http-transport:client-connect iface port)) ;; (ping-res (server:ping-server run-id iface port)) (ping-res (rmt:login-no-auto-client-setup start-res run-id))) (if ping-res ;; sucessful login? (begin + (http-transport:close-connections run-id) (hash-table-set! *runremote* run-id start-res) start-res) ;; return the server info - (if (member remaining-tries '(3 4 6)) + (if (member remaining-tries '(9 6 4 2)) (begin ;; login failed (debug:print 25 "INFO: client:setup start-res=" start-res ", run-id=" run-id ", server-dat=" host-info) + (http-transport:close-connections run-id) (hash-table-delete! *runremote* run-id) (open-run-close tasks:server-force-clean-run-record tasks:open-db run-id - (car host-info) - (cadr host-info) + iface + port " client:setup (host-info=#t)") - (thread-sleep! 5) + (if (< remaining-tries 8) + (thread-sleep! 5)) (client:setup run-id remaining-tries: 10)) ;; (- remaining-tries 1))) (begin (debug:print 25 "INFO: client:setup failed to connect, start-res=" start-res ", run-id=" run-id ", host-info=" host-info) (thread-sleep! 5) (client:setup run-id remaining-tries: (- remaining-tries 1)))))) Index: http-transport.scm ================================================================== --- http-transport.scm +++ http-transport.scm @@ -93,29 +93,29 @@ (mutex-lock! *heartbeat-mutex*) (set! *last-db-access* (current-seconds)) (mutex-unlock! *heartbeat-mutex*)) ;; This is the /ctrl path where data is handed to the server and ;; responses - ((equal? (uri-path (request-uri (current-request))) - '(/ "ctrl")) - (let* ((packet (db:string->obj dat)) - (qtype (cdb:packet-get-qtype packet))) - (debug:print-info 12 "server=> received packet=" packet) - (if (not (member qtype '(sync ping))) - (begin - (mutex-lock! *heartbeat-mutex*) - (set! *last-db-access* (current-seconds)) - (mutex-unlock! *heartbeat-mutex*))) - ;; (mutex-lock! *db:process-queue-mutex*) ;; trying a mutex - ;; (set! res (open-run-close db:process-queue-item open-db packet)) - (set! res (db:process-queue-item db packet)) - ;; (mutex-unlock! *db:process-queue-mutex*) - (debug:print-info 11 "Return value from db:process-queue-item is " res) - (send-response body: (conc "ctrl data\n" - res - "") - headers: '((content-type text/plain))))) + ;; ((equal? (uri-path (request-uri (current-request))) + ;; '(/ "ctrl")) + ;; (let* ((packet (db:string->obj dat)) + ;; (qtype (cdb:packet-get-qtype packet))) + ;; (debug:print-info 12 "server=> received packet=" packet) + ;; (if (not (member qtype '(sync ping))) + ;; (begin + ;; (mutex-lock! *heartbeat-mutex*) + ;; (set! *last-db-access* (current-seconds)) + ;; (mutex-unlock! *heartbeat-mutex*))) + ;; ;; (mutex-lock! *db:process-queue-mutex*) ;; trying a mutex + ;; ;; (set! res (open-run-close db:process-queue-item open-db packet)) + ;; (set! res (db:process-queue-item db packet)) + ;; ;; (mutex-unlock! *db:process-queue-mutex*) + ;; (debug:print-info 11 "Return value from db:process-queue-item is " res) + ;; (send-response body: (conc "ctrl data\n" + ;; res + ;; "") + ;; headers: '((content-type text/plain))))) ((equal? (uri-path (request-uri (current-request))) '(/ "")) (send-response body: (http-transport:main-page))) ((equal? (uri-path (request-uri (current-request))) '(/ "runs")) @@ -221,22 +221,22 @@ (set! *http-requests-in-progress* (+ 1 *http-requests-in-progress*))) ;; Send "cmd" with json payload "params" to serverdat and receive result ;; (define (http-transport:client-api-send-receive run-id serverdat cmd params #!key (numretries 3)) - (let* ((fullurl (if (list? serverdat) - (list-ref serverdat 4) ;; (cadddr serverdat) ;; this is the uri for /api + (let* ((fullurl (if (vector? serverdat) + (http-transport:server-dat-get-api-req serverdat) (begin (debug:print 0 "FATAL ERROR: http-transport:client-api-send-receive called with no server info") (exit 1)))) (res #f)) (handle-exceptions exn (if (> numretries 0) (begin (mutex-unlock! *http-mutex*) - (thread-sleep! 2) + (thread-sleep! 1) (close-all-connections!) (debug:print 0 "WARNING: Failed to communicate with server, trying again, numretries left: " numretries) (http-transport:client-api-send-receive run-id serverdat cmd params numretries: (- numretries 1))) (begin (mutex-unlock! *http-mutex*) @@ -273,32 +273,37 @@ (thread-join! th1) (thread-terminate! th2) (debug:print-info 11 "got res=" res) res))))) +;; careful closing of connections stored in *runremote* +;; +(define (http-transport:close-connections run-id) + (let* ((server-dat (hash-table-ref/default *runremote* run-id #f))) + (if (vector? server-dat) + (let ((api-dat (http-transport:server-dat-get-api-uri server-dat))) + (close-connection! api-dat) + #t) + #f))) + + +(define (make-http-transport:server-dat)(make-vector 5)) +(define (http-transport:server-dat-get-iface vec) (vector-ref vec 0)) +(define (http-transport:server-dat-get-port vec) (vector-ref vec 1)) +(define (http-transport:server-dat-get-api-uri vec) (vector-ref vec 2)) +(define (http-transport:server-dat-get-api-url vec) (vector-ref vec 3)) +(define (http-transport:server-dat-get-api-req vec) (vector-ref vec 4)) + ;; ;; connect ;; (define (http-transport:client-connect iface port) - (let* ((api-url (conc "http://" iface ":" port "/api")) - (uri-dat (make-request method: 'POST uri: (uri-reference (conc "http://" iface ":" port "/ctrl")))) - ;; (uri-dat (make-request method: 'GET uri: (uri-reference (conc "http://" iface ":" port "/ctrl")))) - (uri-api-dat (make-request method: 'POST uri: api-url)) ;; (uri-reference (conc "http://" iface ":" port "/api")))) - ;; (uri-api-dat (make-request method: 'GET uri: (uri-reference (conc "http://" iface ":" port "/api")))) - (server-dat (list iface port uri-dat uri-api-dat api-url))) -;; (login-res (server:ping-server run-id server-dat))) ;; login-no-auto-client-setup server-dat run-id))) + (let* ((api-url (conc "http://" iface ":" port "/api")) + (api-uri (uri-reference (conc "http://" iface ":" port "/api"))) + (api-req (make-request method: 'POST uri: api-uri)) + (server-dat (vector iface port api-uri api-url api-req))) server-dat)) -;; (if (and (list? login-res) -;; (car login-res)) -;; (begin -;; (hash-table-set! *runremote* run-id server-dat) -;; (debug:print-info 2 "Logged in and connected to " iface ":" port) -;; (hash-table-set! *runremote* run-id server-dat) -;; server-dat) -;; (begin -;; (debug:print-info 0 "ERROR: Failed to login or connect to " iface ":" port) -;; #f)))) ;; run http-transport:keep-running in a parallel thread to monitor that the db is being ;; used and to shutdown after sometime if it is not. ;; (define (http-transport:keep-running server-id) Index: tests/fullrun/config/mt_include_1.config ================================================================== --- tests/fullrun/config/mt_include_1.config +++ tests/fullrun/config/mt_include_1.config @@ -1,19 +1,19 @@ [setup] # exectutable /path/to/megatest -max_concurrent_jobs 500 +max_concurrent_jobs 20 linktree #{getenv MT_RUN_AREA_HOME}/tmp/mt_links [jobtools] useshell yes # ## launcher launches jobs, the job is managed on the target host ## by megatest, comment out launcher to run local # workhosts localhost hermes # launcher exec nbfake -# launcher nbfake -launcher loadrunner +launcher nbfake +# launcher loadrunner # launcher echo # launcher nbfind # launcher nodanggood # launcher nbload Index: tests/fullrun/megatest.config ================================================================== --- tests/fullrun/megatest.config +++ tests/fullrun/megatest.config @@ -116,11 +116,11 @@ port 8080 # This server will keep running this number of hours after last access. # Three minutes is 0.05 hours # timeout 0.025 -timeout 0.25 +timeout 0.025 ## disks are: ## name host:/path/to/area ## -or- ## name /path/to/area