Megatest

Check-in [7a1479ff37]
Login
Overview
Comment:Couple fixes to rmt connections
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | v1.64
Files: files | file ages | folders
SHA1: 7a1479ff374df0d1c317a97d35d3b0af6644ee80
User & Date: mrwellan on 2017-07-14 12:30:56
Other Links: branch diff | manifest | tags
Context
2017-07-14
17:21
Added get-file-descriptor-count function to common.scm check-in: 09e3c49fcb user: jmoon18 tags: v1.64
12:30
Couple fixes to rmt connections check-in: 7a1479ff37 user: mrwellan tags: v1.64
10:19
Added mechanism for tracking calls to megatest. Fixed undefined variable. Backoff sync rate when system load is high. Run external sync only if internal sync is taking more than 4 seconds. check-in: 5852022e7d user: mrwellan tags: v1.64, v1.6425
Changes

Modified rmt.scm from [efb226ceca] to [6f288ac3e2].

242
243
244
245
246
247
248
249
250
251
252
253
254
255

256
257

258
259
260
261
262
263
264


265
266
267
268
269
270
271
272
273
274
275

276
277
278

279
280
281
282
283
284
285
286
                                  ((commfail)(vector #f "communications fail"))
                                  ((exn)(vector #f "other fail" (print-call-chain)))))
			 (else
			  (debug:print 0 *default-log-port* "ERROR: transport " (remote-transport runremote) " not supported")
			  (exit))))
	     (success  (if (vector? dat) (vector-ref dat 0) #f))
	     (res      (if (vector? dat) (vector-ref dat 1) #f)))
	(if (and (vector? conninfo) (> 5 (vector-length conninfo)))

	    
            (http-transport:server-dat-update-last-access conninfo) ;; refresh access time


	    (begin

              (set! conninfo #f)
              (remote-conndat-set! runremote #f))) 

	;; (mutex-unlock! *rmt-mutex*)
        (debug:print-info 13 *default-log-port* "rmt:send-receive, case  9. conninfo=" conninfo " dat=" dat " runremote = " runremote)
	(mutex-unlock! *rmt-mutex*)
	(if success ;; success only tells us that the transport was successful, have to examine the data to see if there was a detected issue at the other end
	    (if (and (vector? res)
		     (eq? (vector-length res) 2)
		     (eq? (vector-ref res 1) 'overloaded)) ;; since we are looking at the data to carry the error we'll use a fairly obtuse combo to minimise the chances of some sort of collision.


		(let ((wait-delay (+ attemptnum (* attemptnum 10))))
		  (debug:print 0 *default-log-port* "WARNING: server is overloaded. Delaying " wait-delay " seconds and trying call again.")
		  (mutex-lock! *rmt-mutex*)
		  (http-transport:close-connections area-dat: runremote)
		  (set! *runremote* #f) ;; force starting over
		  (mutex-unlock! *rmt-mutex*)
		  (thread-sleep! wait-delay)
		  (rmt:send-receive cmd rid params attemptnum: (+ attemptnum 1)))
		res) ;; All good, return res
	    (begin
	      (debug:print 0 *default-log-port* "WARNING: communication failed. Trying again, try num: " attemptnum)

	      (remote-conndat-set!    runremote #f)
	      (http-transport:close-connections area-dat: runremote)
	      (remote-server-url-set! runremote #f)

	      (debug:print-info 12 *default-log-port* "rmt:send-receive, case  9.1")
	      ;; (if (not (server:check-if-running *toppath*))
	      ;; 	  (server:start-and-wait *toppath*))
	      (rmt:send-receive cmd rid params attemptnum: (+ attemptnum 1)))))))))

    ;;DOT }
    
;; (define (rmt:update-db-stats run-id rawcmd params duration)







|
<
<

<
<

>

|
>







>
>











>
|


>
|







242
243
244
245
246
247
248
249


250


251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
                                  ((commfail)(vector #f "communications fail"))
                                  ((exn)(vector #f "other fail" (print-call-chain)))))
			 (else
			  (debug:print 0 *default-log-port* "ERROR: transport " (remote-transport runremote) " not supported")
			  (exit))))
	     (success  (if (vector? dat) (vector-ref dat 0) #f))
	     (res      (if (vector? dat) (vector-ref dat 1) #f)))
	(if (and (vector? conninfo) (< 5 (vector-length conninfo)))


            (http-transport:server-dat-update-last-access conninfo) ;; refresh access time


	    (begin
              (debug:print 0 *default-log-port* "INFO: Should not get here! conninfo=" conninfo)
              (set! conninfo #f)
              (remote-conndat-set! *runremote* #f)
              (http-transport:close-connections  area-dat: runremote)))
	;; (mutex-unlock! *rmt-mutex*)
        (debug:print-info 13 *default-log-port* "rmt:send-receive, case  9. conninfo=" conninfo " dat=" dat " runremote = " runremote)
	(mutex-unlock! *rmt-mutex*)
	(if success ;; success only tells us that the transport was successful, have to examine the data to see if there was a detected issue at the other end
	    (if (and (vector? res)
		     (eq? (vector-length res) 2)
		     (eq? (vector-ref res 1) 'overloaded)) ;; since we are looking at the data to carry the error we'll use a fairly obtuse combo to minimise the chances of some sort of collision.
                ;; this is the case where the returned data is bad or the server is overloaded and we want
                ;; to ease off the queries
		(let ((wait-delay (+ attemptnum (* attemptnum 10))))
		  (debug:print 0 *default-log-port* "WARNING: server is overloaded. Delaying " wait-delay " seconds and trying call again.")
		  (mutex-lock! *rmt-mutex*)
		  (http-transport:close-connections area-dat: runremote)
		  (set! *runremote* #f) ;; force starting over
		  (mutex-unlock! *rmt-mutex*)
		  (thread-sleep! wait-delay)
		  (rmt:send-receive cmd rid params attemptnum: (+ attemptnum 1)))
		res) ;; All good, return res
	    (begin
	      (debug:print 0 *default-log-port* "WARNING: communication failed. Trying again, try num: " attemptnum)
	      (mutex-lock! *rmt-mutex*)
              (remote-conndat-set!    runremote #f)
	      (http-transport:close-connections area-dat: runremote)
	      (remote-server-url-set! runremote #f)
	      (mutex-unlock! *rmt-mutex*)
              (debug:print-info 12 *default-log-port* "rmt:send-receive, case  9.1")
	      ;; (if (not (server:check-if-running *toppath*))
	      ;; 	  (server:start-and-wait *toppath*))
	      (rmt:send-receive cmd rid params attemptnum: (+ attemptnum 1)))))))))

    ;;DOT }
    
;; (define (rmt:update-db-stats run-id rawcmd params duration)