Megatest

Check-in [3e016c5af5]
Login
Overview
Comment:More close-connections fixes.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | v1.80
Files: files | file ages | folders
SHA1: 3e016c5af52582f6784dcde42ed5b1a20863409e
User & Date: matt on 2023-01-12 20:02:19
Other Links: branch diff | manifest | tags
Context
2023-01-12
21:49
Calling db:with-db inside a transaction was bad. Disallowed calling db:with-db with db and factored out few calls to take db. TCP connections seems stable. Fingers crossed. check-in: fb48802dd5 user: matt tags: v1.80
20:02
More close-connections fixes. check-in: 3e016c5af5 user: matt tags: v1.80
17:48
Added tasks_queue to db:sync-main-list to sync this table to NFS check-in: d461a68372 user: mmgraham tags: v1.80
Changes

Modified client.scm from [2a6738b25e] to [04727fe923].

105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
			       (begin
				 (remote-conndat-set! runremote start-res) ;; (hash-table-set! runremote run-id start-res)
				 (debug:print-info 2 *default-log-port* "connected to " (http-transport:server-dat-make-url start-res))
				 start-res)
			       (client:setup-http-baby areapath remaining-tries: (- remaining-tries 1))))
			 (begin    ;; login failed but have a server record, clean out the record and try again
			   (debug:print-info 0 *default-log-port* "client:setup, login unsuccessful, will attempt to start server ... start-res=" start-res ", server-dat=" server-dat) ;; had runid.  Fixes part of Randy;s ticket 1405717332
			   (case *transport-type* 
			     ((http)(http-transport:close-connections)))
                           (if *runremote* 
			       (remote-conndat-set! runremote #f)  ;; (hash-table-delete! runremote run-id)
                               )
			   (thread-sleep! 1)
			   (client:setup-http-baby areapath remaining-tries: (- remaining-tries 1))
			   )))
		   (begin    ;; no server registered
		     ;; (server:kind-run areapath)
		     (server:start-and-wait areapath)
		     (debug:print-info 0 *default-log-port* "client:setup, no server registered, remaining-tries=" remaining-tries)







<
|
<
<
<







105
106
107
108
109
110
111

112



113
114
115
116
117
118
119
			       (begin
				 (remote-conndat-set! runremote start-res) ;; (hash-table-set! runremote run-id start-res)
				 (debug:print-info 2 *default-log-port* "connected to " (http-transport:server-dat-make-url start-res))
				 start-res)
			       (client:setup-http-baby areapath remaining-tries: (- remaining-tries 1))))
			 (begin    ;; login failed but have a server record, clean out the record and try again
			   (debug:print-info 0 *default-log-port* "client:setup, login unsuccessful, will attempt to start server ... start-res=" start-res ", server-dat=" server-dat) ;; had runid.  Fixes part of Randy;s ticket 1405717332

			   (http-transport:close-connections *runremote*)



			   (thread-sleep! 1)
			   (client:setup-http-baby areapath remaining-tries: (- remaining-tries 1))
			   )))
		   (begin    ;; no server registered
		     ;; (server:kind-run areapath)
		     (server:start-and-wait areapath)
		     (debug:print-info 0 *default-log-port* "client:setup, no server registered, remaining-tries=" remaining-tries)

Modified http-transport.scm from [01982440a6] to [4037b30aac].

343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
	     (signal (make-composite-condition
		      (make-property-condition 
		       'timeout
		       'message "nmsg-transport:client-api-send-receive-raw timed out talking to server")))))))

;; careful closing of connections stored in *runremote*
;;
(define (http-transport:close-connections #!key (area-dat #f))
  (let* ((runremote  (or area-dat *runremote*))
	 (server-dat (if runremote
                         (remote-conndat runremote)
                         #f))) ;; (hash-table-ref/default *runremote* run-id #f)))
    (if (vector? server-dat)
	(let ((api-dat (http-transport:server-dat-get-api-uri server-dat)))
	  (handle-exceptions
	    exn







|
|







343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
	     (signal (make-composite-condition
		      (make-property-condition 
		       'timeout
		       'message "nmsg-transport:client-api-send-receive-raw timed out talking to server")))))))

;; careful closing of connections stored in *runremote*
;;
(define (http-transport:close-connections runremote)
  (let* (;; (runremote  (or area-dat *runremote*))
	 (server-dat (if runremote
                         (remote-conndat runremote)
                         #f))) ;; (hash-table-ref/default *runremote* run-id #f)))
    (if (vector? server-dat)
	(let ((api-dat (http-transport:server-dat-get-api-uri server-dat)))
	  (handle-exceptions
	    exn

Modified rmt.scm from [d02e4b3389] to [53cc395259].

173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
     ;; reset the connection if it has been unused too long
     ((and runremote
           (remote-conndat runremote)
	   (> (current-seconds) ;; if it has been more than server-timeout seconds since last contact, close this connection and start a new on
	      (+ (http-transport:server-dat-get-last-access (remote-conndat runremote))
		 (remote-server-timeout runremote))))
      (debug:print-info 0 *default-log-port* "Connection to " (remote-server-url runremote) " expired due to no accesses, forcing new connection.")
      (http-transport:close-connections area-dat: runremote)
      ;; moving this setting of runremote conndat to #f to inside the http-transport:close-connections
      ;; (remote-conndat-set! runremote #f) ;; invalidate the connection, thus forcing a new connection.
      (mutex-unlock! *rmt-mutex*)
      (rmt:send-receive cmd rid params attemptnum: attemptnum))
     
     ;;DOT CASE5 [label="local\nread"];
     ;;DOT MUTEXLOCK -> CASE5 [label="server not required,\non homehost,\nread-only query"]; {rank=same "case 5" CASE5};







|







173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
     ;; reset the connection if it has been unused too long
     ((and runremote
           (remote-conndat runremote)
	   (> (current-seconds) ;; if it has been more than server-timeout seconds since last contact, close this connection and start a new on
	      (+ (http-transport:server-dat-get-last-access (remote-conndat runremote))
		 (remote-server-timeout runremote))))
      (debug:print-info 0 *default-log-port* "Connection to " (remote-server-url runremote) " expired due to no accesses, forcing new connection.")
      (http-transport:close-connections runremote)
      ;; moving this setting of runremote conndat to #f to inside the http-transport:close-connections
      ;; (remote-conndat-set! runremote #f) ;; invalidate the connection, thus forcing a new connection.
      (mutex-unlock! *rmt-mutex*)
      (rmt:send-receive cmd rid params attemptnum: attemptnum))
     
     ;;DOT CASE5 [label="local\nread"];
     ;;DOT MUTEXLOCK -> CASE5 [label="server not required,\non homehost,\nread-only query"]; {rank=same "case 5" CASE5};
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
	 (success  (if (vector? dat) (vector-ref dat 0) #f))
	 (res      (if (vector? dat) (vector-ref dat 1) #f)))
    (if (and (vector? conninfo) (< 5 (vector-length conninfo)))
	(http-transport:server-dat-update-last-access conninfo) ;; refresh access time
	(begin
	  (debug:print 0 *default-log-port* "INFO: Should not get here! conninfo=" conninfo)
	  (set! conninfo #f)
	  (remote-conndat-set! *runremote* #f) ;; NOTE: *runremote* is global copy of runremote. Purpose: factor out global.
	  (http-transport:close-connections  area-dat: runremote)))
    (debug:print-info 13 *default-log-port* "rmt:send-receive, case  9. conninfo=" conninfo " dat=" dat " runremote = " runremote)
    (mutex-unlock! *rmt-mutex*)
    (if success ;; success only tells us that the transport was
	;; successful, have to examine the data to see if
	;; there was a detected issue at the other end
	(extras-transport-succeded *default-log-port* *rmt-mutex* attemptnum runremote res params rid cmd)
	(begin







<
|







338
339
340
341
342
343
344

345
346
347
348
349
350
351
352
	 (success  (if (vector? dat) (vector-ref dat 0) #f))
	 (res      (if (vector? dat) (vector-ref dat 1) #f)))
    (if (and (vector? conninfo) (< 5 (vector-length conninfo)))
	(http-transport:server-dat-update-last-access conninfo) ;; refresh access time
	(begin
	  (debug:print 0 *default-log-port* "INFO: Should not get here! conninfo=" conninfo)
	  (set! conninfo #f)

	  (http-transport:close-connections runremote)))
    (debug:print-info 13 *default-log-port* "rmt:send-receive, case  9. conninfo=" conninfo " dat=" dat " runremote = " runremote)
    (mutex-unlock! *rmt-mutex*)
    (if success ;; success only tells us that the transport was
	;; successful, have to examine the data to see if
	;; there was a detected issue at the other end
	(extras-transport-succeded *default-log-port* *rmt-mutex* attemptnum runremote res params rid cmd)
	(begin
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
  (debug:print-info 12 log-port "rmt:send-receive, case 3")
  (debug:print 0 log-port "WARNING: write transaction requested on a readonly area.  cmd="cmd" params="params)
  #f)

(define (extras-transport-failed *default-log-port* *rmt-mutex* attemptnum runremote cmd rid params)
  (debug:print 0 *default-log-port* "WARNING: communication failed. Trying again, try num: " attemptnum)
  (mutex-lock! *rmt-mutex*)
  (remote-conndat-set!    runremote #f)
  (http-transport:close-connections area-dat: runremote)
  (remote-server-url-set! runremote #f)
  (mutex-unlock! *rmt-mutex*)
  (debug:print-info 12 *default-log-port* "rmt:send-receive, case  9.1")
  (rmt:send-receive cmd rid params attemptnum: (+ attemptnum 1)))
  
(define (extras-transport-succeded *default-log-port* *rmt-mutex* attemptnum runremote res params rid cmd)
  (if (and (vector? res)







<
|







1065
1066
1067
1068
1069
1070
1071

1072
1073
1074
1075
1076
1077
1078
1079
  (debug:print-info 12 log-port "rmt:send-receive, case 3")
  (debug:print 0 log-port "WARNING: write transaction requested on a readonly area.  cmd="cmd" params="params)
  #f)

(define (extras-transport-failed *default-log-port* *rmt-mutex* attemptnum runremote cmd rid params)
  (debug:print 0 *default-log-port* "WARNING: communication failed. Trying again, try num: " attemptnum)
  (mutex-lock! *rmt-mutex*)

  (http-transport:close-connections runremote)
  (remote-server-url-set! runremote #f)
  (mutex-unlock! *rmt-mutex*)
  (debug:print-info 12 *default-log-port* "rmt:send-receive, case  9.1")
  (rmt:send-receive cmd rid params attemptnum: (+ attemptnum 1)))
  
(define (extras-transport-succeded *default-log-port* *rmt-mutex* attemptnum runremote res params rid cmd)
  (if (and (vector? res)
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
						 ;; server is
						 ;; overloaded and we
						 ;; want to ease off
						 ;; the queries
      (let ((wait-delay (+ attemptnum (* attemptnum 10))))
	(debug:print 0 *default-log-port* "WARNING: server is overloaded. Delaying " wait-delay " seconds and trying call again.")
	(mutex-lock! *rmt-mutex*)
	(http-transport:close-connections area-dat: runremote)
	(set! *runremote* #f) ;; force starting over
	(mutex-unlock! *rmt-mutex*)
	(thread-sleep! wait-delay)
	(rmt:send-receive cmd rid params attemptnum: (+ attemptnum 1)))
      res)) ;; All good, return res

#;(set-functions rmt:send-receive                       remote-server-url-set!
	       http-transport:close-connections	      remote-conndat-set!
	       debug:print                            debug:print-info
	       remote-ro-mode                         remote-ro-mode-set!
	       remote-ro-mode-checked-set!            remote-ro-mode-checked)







|











1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
						 ;; server is
						 ;; overloaded and we
						 ;; want to ease off
						 ;; the queries
      (let ((wait-delay (+ attemptnum (* attemptnum 10))))
	(debug:print 0 *default-log-port* "WARNING: server is overloaded. Delaying " wait-delay " seconds and trying call again.")
	(mutex-lock! *rmt-mutex*)
	(http-transport:close-connections runremote)
	(set! *runremote* #f) ;; force starting over
	(mutex-unlock! *rmt-mutex*)
	(thread-sleep! wait-delay)
	(rmt:send-receive cmd rid params attemptnum: (+ attemptnum 1)))
      res)) ;; All good, return res

#;(set-functions rmt:send-receive                       remote-server-url-set!
	       http-transport:close-connections	      remote-conndat-set!
	       debug:print                            debug:print-info
	       remote-ro-mode                         remote-ro-mode-set!
	       remote-ro-mode-checked-set!            remote-ro-mode-checked)