Megatest

Check-in [08646db430]
Login
Overview
Comment:Remove case 6 from rmt:send-receive. This was pinging the server constantly and opening addtional connections that don't appear to being closed. Also, just let the connection die, it should retry.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | v1.70-nohomehost
Files: files | file ages | folders
SHA1: 08646db430e863506fed170998c073b42ad25647
User & Date: matt on 2022-11-22 06:36:06
Other Links: branch diff | manifest | tags
Context
2022-11-22
07:49
Keep more servers around but let them also expire quickly if not used check-in: c0ef1c5bfa user: matt tags: v1.70-nohomehost
06:36
Remove case 6 from rmt:send-receive. This was pinging the server constantly and opening addtional connections that don't appear to being closed. Also, just let the connection die, it should retry. check-in: 08646db430 user: matt tags: v1.70-nohomehost
02:57
sixyfivek is running pretty good check-in: d1ac2665ef user: matt tags: v1.70-nohomehost
Changes

Modified db.scm from [32f96d6d77] to [e18bcc992d].

4610
4611
4612
4613
4614
4615
4616




4617
4618
4619
4620
4621
4622
4623
4624
				  (let ((db (cdr *task-db*)))
				    (if (sqlite3:database? db)
					(begin
					  (sqlite3:interrupt! db)
					  (sqlite3:finalize! db #t)
					  ;; (vector-set! *task-db* 0 #f)
					  (set! *task-db* #f)))))




                              (http-client#close-all-connections!)
                              ;; (if (and *runremote*
                              ;;          (remote-conndat *runremote*))
                              ;;     (begin
                              ;;       (http-client#close-all-connections!))) ;; for http-client
                              (if (not (eq? *default-log-port* (current-error-port)))
                                  (close-output-port *default-log-port*))
			      (set! *default-log-port* (current-error-port))) "Cleanup db exit thread"))







>
>
>
>
|







4610
4611
4612
4613
4614
4615
4616
4617
4618
4619
4620
4621
4622
4623
4624
4625
4626
4627
4628
				  (let ((db (cdr *task-db*)))
				    (if (sqlite3:database? db)
					(begin
					  (sqlite3:interrupt! db)
					  (sqlite3:finalize! db #t)
					  ;; (vector-set! *task-db* 0 #f)
					  (set! *task-db* #f)))))
			      (if (and (not (args:get-arg "-server"))
				       *runremote*)
				  (begin
				    (debug:print-info 0 *default-log-port* "Closing all client connections...")
				    (http-client#close-all-connections!)))
                              ;; (if (and *runremote*
                              ;;          (remote-conndat *runremote*))
                              ;;     (begin
                              ;;       (http-client#close-all-connections!))) ;; for http-client
                              (if (not (eq? *default-log-port* (current-error-port)))
                                  (close-output-port *default-log-port*))
			      (set! *default-log-port* (current-error-port))) "Cleanup db exit thread"))

Modified http-transport.scm from [acc168fde7] to [9bfccef351].

283
284
285
286
287
288
289

290
291
292
293
294
295
296
                                                (if (debug:debug-mode 1)
                                                    (debug:print-info 0 *default-log-port* "couldn't talk to server, trying again ...")
                                                    (begin
                                                      (debug:print 0 *default-log-port* "WARNING: failure in with-input-from-request to " fullurl ".")
                                                      (debug:print 0 *default-log-port* " message: " msg ", exn=" exn)
                                                      (debug:print 0 *default-log-port* " cmd: " cmd " params: " params " key:" (or server-id "thekey"))
                                                      (debug:print 0 *default-log-port* " call-chain: " call-chain)))

						(set! *runremote* #f)
						(set! runremote #f)
						;; (if runremote
						;;    (remote-conndat-set! runremote #f))
						;; Killing associated server to allow clean retry.")
						;; (tasks:kill-server-run-id run-id)  ;; better to kill the server in the logic that called this routine?
						(mutex-unlock! *http-mutex*)







>







283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
                                                (if (debug:debug-mode 1)
                                                    (debug:print-info 0 *default-log-port* "couldn't talk to server, trying again ...")
                                                    (begin
                                                      (debug:print 0 *default-log-port* "WARNING: failure in with-input-from-request to " fullurl ".")
                                                      (debug:print 0 *default-log-port* " message: " msg ", exn=" exn)
                                                      (debug:print 0 *default-log-port* " cmd: " cmd " params: " params " key:" (or server-id "thekey"))
                                                      (debug:print 0 *default-log-port* " call-chain: " call-chain)))
						;; what if another thread is communicating ok? Can't happen due to mutex
						(set! *runremote* #f)
						(set! runremote #f)
						;; (if runremote
						;;    (remote-conndat-set! runremote #f))
						;; Killing associated server to allow clean retry.")
						;; (tasks:kill-server-run-id run-id)  ;; better to kill the server in the logic that called this routine?
						(mutex-unlock! *http-mutex*)
389
390
391
392
393
394
395
396

397
398
399
400
401
402
403
      (begin
	(print-call-chain (current-error-port))
	(debug:print-error 0 *default-log-port* "call to http-transport:server-dat-update-last-access with non-vector!!"))))

;;
;; connect
;;
(define (http-transport:client-connect iface port server-id) 

  (let* ((api-url      (conc "http://" iface ":" port "/api"))
	 (api-uri      (uri-reference (conc "http://" iface ":" port "/api")))
	 (api-req      (make-request method: 'POST uri: api-uri))
	 (server-dat   (vector iface port api-uri api-url api-req (current-seconds) server-id)))
    server-dat))









|
>







390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
      (begin
	(print-call-chain (current-error-port))
	(debug:print-error 0 *default-log-port* "call to http-transport:server-dat-update-last-access with non-vector!!"))))

;;
;; connect
;;
(define (http-transport:client-connect iface port server-id)
  (debug:print-info 0 *default-log-port* "Connecting to client at "iface":"port", with server-id "server-id)
  (let* ((api-url      (conc "http://" iface ":" port "/api"))
	 (api-uri      (uri-reference (conc "http://" iface ":" port "/api")))
	 (api-req      (make-request method: 'POST uri: api-uri))
	 (server-dat   (vector iface port api-uri api-url api-req (current-seconds) server-id)))
    server-dat))


Modified rmt.scm from [f32646f514] to [91ffe1108a].

194
195
196
197
198
199
200



201
202
203
204
205
206
207
208
      (debug:print-info 12 *default-log-port* "rmt:send-receive, case  5")
      (rmt:open-qry-close-locally cmd 0 params))

     ;;DOT CASE6 [label="init\nremote"];
     ;;DOT MUTEXLOCK -> CASE6 [label="on homehost,\nwrite query,\nhave server,\ncan't reach it"]; {rank=same "case 6" CASE6};
     ;;DOT CASE6 -> "rmt:send-receive";
     ;; on homehost and this is a write, we already have a server, but server has died



     ((and (cdr (remote-hh-dat runremote))           ;; on homehost
           (not (member cmd api:read-only-queries))  ;; this is a write
           (remote-server-url runremote)             ;; have a server
           (not (server:ping (remote-server-url runremote) (remote-server-id runremote))))  ;; server has died. NOTE: this is not a cheap call! Need better approach.
      (debug:print 0 *default-log-port* "WARNING: server appears to have died, trying to reconnect, case 6")
      (http-transport:close-connections area-dat: runremote) ;; make sure to clean up
      (set! *runremote* (make-remote))
      (let* ((server-info (remote-server-info *runremote*))) 







>
>
>
|







194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
      (debug:print-info 12 *default-log-port* "rmt:send-receive, case  5")
      (rmt:open-qry-close-locally cmd 0 params))

     ;;DOT CASE6 [label="init\nremote"];
     ;;DOT MUTEXLOCK -> CASE6 [label="on homehost,\nwrite query,\nhave server,\ncan't reach it"]; {rank=same "case 6" CASE6};
     ;;DOT CASE6 -> "rmt:send-receive";
     ;; on homehost and this is a write, we already have a server, but server has died

     ;; reinstate this keep-alive section but inject a time condition into the (add ...
     
     #;((and (cdr (remote-hh-dat runremote))           ;; on homehost
           (not (member cmd api:read-only-queries))  ;; this is a write
           (remote-server-url runremote)             ;; have a server
           (not (server:ping (remote-server-url runremote) (remote-server-id runremote))))  ;; server has died. NOTE: this is not a cheap call! Need better approach.
      (debug:print 0 *default-log-port* "WARNING: server appears to have died, trying to reconnect, case 6")
      (http-transport:close-connections area-dat: runremote) ;; make sure to clean up
      (set! *runremote* (make-remote))
      (let* ((server-info (remote-server-info *runremote*)))