Megatest

Check-in [676fe7701a]
Login
Overview
Comment:Fixed old conninfo record not being properly removed from runremote
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | v1.80
Files: files | file ages | folders
SHA1: 676fe7701aa95de7e763d29f7592d7eaf0d2e4ba
User & Date: matt on 2023-01-31 21:17:57
Original Comment: Fixed TCP leak issue due to old conninfo record not being properly removed from runremote
Other Links: branch diff | manifest | tags
Context
2023-02-01
06:11
Pulled in ulex from v2.0 check-in: 69eb6e4304 user: matt tags: v1.80
2023-01-31
21:17
Fixed old conninfo record not being properly removed from runremote check-in: 676fe7701a user: matt tags: v1.80
2023-01-26
04:01
Added some reference docs on servers check-in: f6037c5e9e user: matt tags: v1.80
Changes

Modified common.scm from [c2a1a4f762] to [fc85f532b1].

325
326
327
328
329
330
331
332

333
334
335
336
337
338
339
325
326
327
328
329
330
331

332
333
334
335
336
337
338
339







-
+







		       res))
  (server-url        #f) ;; (server:check-if-running *toppath*) #f))
  (server-id         #f)
  (server-info       #f) ;; (if *toppath* (server:check-if-running *toppath*) #f))
  (last-server-check 0)  ;; last time we checked to see if the server was alive
  (connect-time      (current-seconds)) ;; when we first connected
  (last-access       (current-seconds)) ;; last time we talked to server
  (conndat           #f) ;; iface port api-uri api-url api-req seconds server-id
  ;; (conndat           #f) ;; iface port api-uri api-url api-req seconds server-id
  (server-timeout    (server:expiration-timeout))
  (force-server      #f)
  (ro-mode           #f)  
  (ro-mode-checked   #f) ;; flag that indicates we have checked for ro-mode

  ;; conndat stuff
  (iface             #f) ;; TODO: Consolidate this data with server-url and server-info above

Modified http-transport.scm from [c450806bbf] to [c61c306ba3].

338
339
340
341
342
343
344
345





346
347
348
349
350
351
352
338
339
340
341
342
343
344

345
346
347
348
349
350
351
352
353
354
355
356







-
+
+
+
+
+







	    exn
	  (begin
	    (print-call-chain *default-log-port*)
	    (debug:print-error 0 *default-log-port* " closing connection failed with error: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn))
	  (if (args:any-defined? "-server" "-execute" "-run")
	      (debug:print-info 0 *default-log-port* "Closing connections to "api-dat))
	  (if api-dat (close-connection! api-dat))
	  (remote-conndat-set! runremote #f)

	  ;; Would it be better to set *runremote* to #f? I don't think so. But we may
	  ;; need to clear more of the runremote fields
	  (remote-api-url-set! runremote #f) ;; used as a flag for connection up and running
	  
	  #t))
      #f))

;; run http-transport:keep-running in a parallel thread to monitor that the db is being 
;; used and to shutdown after sometime if it is not.
;;
(define (http-transport:keep-running) 

Modified rmt.scm from [a0fac94067] to [8ad8a6d094].

41
42
43
44
45
46
47
48
49



50
51
52
53
54
55





56
57
58
59
60
61
62
41
42
43
44
45
46
47


48
49
50
51





52
53
54
55
56
57
58
59
60
61
62
63







-
-
+
+
+

-
-
-
-
-
+
+
+
+
+







;;  S U P P O R T   F U N C T I O N S
;;======================================================================

;; if a server is either running or in the process of starting call client:setup
;; else return #f to let the calling proc know that there is no server available
;;
(define (rmt:get-connection-info areapath runremote) ;; TODO: push areapath down.
  (let* ((cinfo     (if (remote? runremote)
			(remote-conndat runremote)
  (let* ((cinfo     (if (and (remote? runremote)
			     (remote-api-url runremote)) ;; we have a connection
			runremote
			#f)))
	  (if cinfo
	      cinfo
	      (if (server:check-if-running areapath)
		  (client:setup areapath runremote)
		  #f))))
    (if cinfo
	cinfo
	(if (server:check-if-running areapath)
	    (client:setup areapath runremote)
	    #f))))

(define (rmt:on-homehost? runremote)
  (let* ((hh-dat (remote-hh-dat runremote)))
    (if (pair? hh-dat)
	(cdr hh-dat)
	(begin
	  (debug:print-info 0 *default-log-port* "hh-dat="hh-dat)
120
121
122
123
124
125
126
127


128
129
130
131
132
133
134
121
122
123
124
125
126
127

128
129
130
131
132
133
134
135
136







-
+
+







			(remote-server-id-set! *runremote* (server:record->id server-info)))))  
	  (set! runremote   *runremote*))) ;; new runremote will come from this on next iteration
    
    ;; DOT SET_HOMEHOST; // leaving off - doesn't really add to the clarity
    ;; DOT MUTEXLOCK -> SET_HOMEHOST [label="no homehost?"];
    ;; DOT SET_HOMEHOST -> MUTEXLOCK;
    ;; ensure we have a homehost record
    (if (not (pair? (remote-hh-dat runremote)))  ;; not on homehost
    (if (or (not (pair? (remote-hh-dat runremote)))  ;; not on homehost
	    (not (cdr (remote-hh-dat runremote))))   ;; not on homehost
	(thread-sleep! 0.1) ;; since we shouldn't get here, delay a little
	(let ((hh-data (server:choose-server areapath 'homehost)))
	  (remote-hh-dat-set! runremote (or hh-data (cons #f #f)))))
    
    ;;(print "BB> readonly-mode is "readonly-mode" dbfile is "dbfile)
    (cond
     #;((> (- (current-seconds)(remote-connect-time runremote)) 180) ;; reconnect to server every 180 seconds
167
168
169
170
171
172
173
174

175
176
177
178
179
180
181
169
170
171
172
173
174
175

176
177
178
179
180
181
182
183







-
+







     ;; also, the expire-time calculation might not be correct. We want, time-since-last-server-access > (server:get-timeout)
     ;;
     ;;DOT CASE4 [label="reset\nconnection"];
     ;;DOT MUTEXLOCK -> CASE4 [label="have connection,\nlast_access > expire_time"]; {rank=same "case 4" CASE4}
     ;;DOT CASE4 -> "rmt:send-receive";
     ;; reset the connection if it has been unused too long
     ((and runremote
           (remote-conndat runremote)
           (remote-api-url runremote)
	   (> (current-seconds) ;; if it has been more than server-timeout seconds since last contact, close this connection and start a new on
	      (+ (remote-last-access runremote)
		 (remote-server-timeout runremote))))
      (debug:print-info 0 *default-log-port* "Connection to " (remote-server-url runremote) " expired due to no accesses in " (remote-server-timeout runremote) " seconds, forcing new connection.")
      (http-transport:close-connections runremote)
      ;; moving this setting of runremote conndat to #f to inside the http-transport:close-connections
      ;; (remote-conndat-set! runremote #f) ;; invalidate the connection, thus forcing a new connection.
252
253
254
255
256
257
258
259

260
261
262


263
264
265
266


267
268
269
270
271
272
273
254
255
256
257
258
259
260

261
262


263
264
265
266
267

268
269
270
271
272
273
274
275
276







-
+

-
-
+
+



-
+
+







      (debug:print-info 12 *default-log-port* "rmt:send-receive, case  8.1")
      (rmt:open-qry-close-locally cmd 0 params)))

     ;;DOT CASE9 [label="force server\nnot on homehost"];
     ;;DOT MUTEXLOCK -> CASE9 [label="no connection\nand either require server\nor not on homehost"]; {rank=same "case 9" CASE9};
     ;;DOT CASE9 -> "start\nserver" -> "rmt:send-receive";
     ((or (and (remote-force-server runremote)              ;; we are forcing a server and don't yet have a connection to one
	       (not (remote-conndat runremote)))
	       (not (remote-api-url runremote)))
	  (and (not (cdr (remote-hh-dat runremote)))        ;; not on a homehost 
	       (not (remote-conndat runremote))))           ;; and no connection
      (debug:print-info 12 *default-log-port* "rmt:send-receive, case 9, hh-dat: " (remote-hh-dat runremote) " conndat: " (remote-conndat runremote))
	       (not (remote-api-url runremote))))           ;; and no connection
      (debug:print-info 12 *default-log-port* "rmt:send-receive, case 9, hh-dat: " (remote-hh-dat runremote) " runremote: " (remote->alist runremote))
      (mutex-unlock! *rmt-mutex*)
      (if (not (server:check-if-running *toppath*)) ;; who knows, maybe one has started up?
	  (server:start-and-wait *toppath*))
      (remote-conndat-set! runremote (rmt:get-connection-info *toppath* runremote)) ;; calls client:setup which calls client:setup-http
      ;; was: (remote-conndat-set! runremote (rmt:get-connection-info *toppath* runremote)) ;; calls client:setup which calls client:setup-http
      (set! runremote (rmt:get-connection-info *toppath* runremote)) ;; calls client:setup which calls client:setup-http
      (rmt:send-receive cmd rid params attemptnum: attemptnum)) ;; TODO: add back-off timeout as

     ;;DOT CASE10 [label="on homehost"];
     ;;DOT MUTEXLOCK -> CASE10 [label="server not required,\non homehost"]; {rank=same "case 10" CASE10};
     ;;DOT CASE10 -> "rmt:open-qry-close-locally";
     ;; all set up if get this far, dispatch the query
     ((and (not (remote-force-server runremote))
287
288
289
290
291
292
293
294

295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313

314

315
316
317
318


319
320

321
322
323
324
325
326
327
290
291
292
293
294
295
296

297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317

318
319
320


321
322
323

324
325
326
327
328
329
330
331







-
+



















+
-
+


-
-
+
+

-
+







;; bunch of small functions factored out of send-receive to make debug easier
;;

(define (extras-case-11 *default-log-port* runremote cmd params attemptnum rid)
  ;; (mutex-unlock! *rmt-mutex*)
  (debug:print-info 12 *default-log-port* "rmt:send-receive, case  9")
  ;; (mutex-lock! *rmt-mutex*)
  (let* ((conninfo (remote-conndat runremote))
  (let* (;; (conninfo (remote-conndat runremote))
	 (dat-in  (condition-case ;; handling here has
			     ;; caused a lot of
			     ;; problems. However it
			     ;; is needed to deal with
			     ;; attemtped
			     ;; communication to
			     ;; servers that have gone
			     ;; away
			     (http-transport:client-api-send-receive 0 runremote cmd params)
			     ;; (http-transport:client-api-send-receive 0 conninfo cmd params runremote)
			     ((servermismatch)  (vector #f "Server id mismatch" ))
			     ((commfail)(vector #f "communications fail"))
			     ((exn)(vector #f "other fail" (print-call-chain)))))
	 (dat      (if (and (vector? dat-in) ;; ... check it is a correct size
			    (> (vector-length dat-in) 1))
		       dat-in
		       (vector #f (conc "communications fail (type 2), dat-in=" dat-in))))
	 (success  (if (vector? dat) (vector-ref dat 0) #f))
	 (res      (if (vector? dat) (vector-ref dat 1) #f)))
    (if (and (remote? runremote)
    (if (and (vector? conninfo) (< 5 (vector-length conninfo)))
	     (remote-api-url runremote)) ;; (and (vector? conninfo) (< 5 (vector-length conninfo)))
	(remote-last-access-set! runremote (current-seconds)) ;; refresh access time
	(begin
	  (debug:print 0 *default-log-port* "INFO: Should not get here! conninfo=" conninfo)
	  (set! conninfo #f)
	  (debug:print 0 *default-log-port* "INFO: Should not get here! runremote="(remote->alist runremote))
	  ;; (set! conninfo #f)
	  (http-transport:close-connections runremote)))
    (debug:print-info 13 *default-log-port* "rmt:send-receive, case  9. conninfo=" conninfo " dat=" dat " runremote = " runremote)
    (debug:print-info 13 *default-log-port* "rmt:send-receive, case  9. runremote=" (remote->alist runremote) " dat=" dat " runremote = " runremote)
    (mutex-unlock! *rmt-mutex*)
    (if success ;; success only tells us that the transport was
	;; successful, have to examine the data to see if
	;; there was a detected issue at the other end
	(extras-transport-succeded *default-log-port* *rmt-mutex* attemptnum runremote res params rid cmd)
	(begin
           (debug:print-error 0 *default-log-port* " dat=" dat)