Megatest

Check-in [386b7b1848]
Login
Overview
Comment:Removed more globals from rmt:send-receive-orig
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | v1.65-wip
Files: files | file ages | folders
SHA1: 386b7b1848ab5f0babbbc73fa424b2514472e359
User & Date: matt on 2019-10-01 22:35:30
Other Links: branch diff | manifest | tags
Context
2019-10-01
22:54
Broke it with mutex deadlock check-in: 71a141e42e user: matt tags: v1.65-wip
22:35
Removed more globals from rmt:send-receive-orig check-in: 386b7b1848 user: matt tags: v1.65-wip
22:24
Removed globals from rmt:send-receive-orig check-in: 2d1ae5e508 user: matt tags: v1.65-wip
Changes

Modified rmt.scm from [caaecdf3ba] to [97df11d244].

74
75
76
77
78
79
80
81
82
83
84


85
86
87
88
89
90
91
92
    (if (member cmd '(blah))
      (begin
	(mutex-lock! *send-receive-mutex*)
	(if (not *runremote*)(set! *runremote* (make-remote)))
	(let ((ulex:conn (remote-ulex:conn *runremote*)))
	  (if (not ulex:conn)(remote-ulex:conn-set! *runremote* (rmtmod:setup-ulex *toppath*)))
	  (rmtmod:send-receive-ulex ulex:conn cmd rid params attemptnum area-dat)))
      (rmt:send-receive-orig *default-log-port* runremote *rmt-mutex* areapath cmd rid params attemptnum: attemptnum area-dat: area-dat))))

;; RA => e.g. usage (rmt:send-receive 'get-var #f (list varname))
;;


(define (rmt:send-receive-orig log-port runremote rmt-mutex toppath cmd rid params #!key (attemptnum 1)(area-dat #f)) ;; start attemptnum at 1 so the modulo below works as expected

  #;(common:telemetry-log (conc "rmt:"(->string cmd))
                        payload: `((rid . ,rid)
                                   (params . ,params)))
                          
  
  ;; do all the prep locked under the rmt-mutex







|



>
>
|







74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
    (if (member cmd '(blah))
      (begin
	(mutex-lock! *send-receive-mutex*)
	(if (not *runremote*)(set! *runremote* (make-remote)))
	(let ((ulex:conn (remote-ulex:conn *runremote*)))
	  (if (not ulex:conn)(remote-ulex:conn-set! *runremote* (rmtmod:setup-ulex *toppath*)))
	  (rmtmod:send-receive-ulex ulex:conn cmd rid params attemptnum area-dat)))
      (rmt:send-receive-orig *default-log-port* runremote *rmt-mutex* areapath *db-multi-sync-mutex* cmd rid params attemptnum: attemptnum area-dat: area-dat))))

;; RA => e.g. usage (rmt:send-receive 'get-var #f (list varname))
;;
;;  add multi-sync-mutex 
;;
(define (rmt:send-receive-orig log-port runremote rmt-mutex toppath multi-sync-mutex cmd rid params #!key (attemptnum 1)(area-dat #f)) ;; start attemptnum at 1 so the modulo below works as expected

  #;(common:telemetry-log (conc "rmt:"(->string cmd))
                        payload: `((rid . ,rid)
                                   (params . ,params)))
                          
  
  ;; do all the prep locked under the rmt-mutex
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
      (exit 1))

     ;; readonly mode, read request-  handle it - case 2
     ((and readonly-mode
           (member cmd api:read-only-queries)) 
      (mutex-unlock! rmt-mutex)
      (debug:print-info 12 log-port "rmt:send-receive, case 2")
      (rmt:open-qry-close-locally cmd 0 params)
      )

     ;; readonly mode, write request.  Do nothing, return #f
     (readonly-mode (extras-readonly-mode rmt-mutex log-port cmd params))

     ;; This block was for pre-emptively resetting the connection if there had been no communication for some time.
     ;; I don't think it adds any value. If the server is not there, just fail and start a new connection.
     ;; also, the expire-time calculation might not be correct. We want, time-since-last-server-access > (server:get-timeout)
     ;;
     ;; reset the connection if it has been unused too long
     ((and runremote
           (remote-conndat runremote)
	   (> (current-seconds) ;; if it has been more than server-timeout seconds since last contact, close this connection and start a new on
	      (+ (http-transport:server-dat-get-last-access (remote-conndat runremote))
		 (remote-server-timeout runremote))))
      (debug:print-info 0 log-port "Connection to " (remote-server-url runremote) " expired due to no accesses, forcing new connection.")
      (http-transport:close-connections area-dat: runremote)
      (remote-conndat-set! runremote #f) ;; invalidate the connection, thus forcing a new connection.
      (mutex-unlock! rmt-mutex)
      (rmt:send-receive-orig log-port runremote rmt-mutex toppath cmd rid params attemptnum: attemptnum))
     

     ;; on homehost and this is a read
     ((and (not (remote-force-server runremote)) ;; honor forced use of server, i.e. server NOT required
	   (cdr (remote-hh-dat runremote))       ;; on homehost
           (member cmd api:read-only-queries))   ;; this is a read
      (mutex-unlock! rmt-mutex)
      (debug:print-info 12 log-port "rmt:send-receive, case  5")
      (rmt:open-qry-close-locally cmd 0 params))

     ;; on homehost and this is a write, we already have a server, but server has died
     ((and (cdr (remote-hh-dat runremote))           ;; on homehost
           (not (member cmd api:read-only-queries))  ;; this is a write
           (remote-server-url runremote)             ;; have a server
           (not (server:ping (remote-server-url runremote))))  ;; server has died. NOTE: this is not a cheap call! Need better approach.
      (set! *runremote* (make-remote))
      (remote-force-server-set! runremote (common:force-server?))
      (mutex-unlock! rmt-mutex)
      (debug:print-info 12 log-port "rmt:send-receive, case  6")
      (rmt:send-receive-orig log-port runremote rmt-mutex toppath cmd rid params attemptnum: attemptnum))

     ;; on homehost and this is a write, we already have a server
     ((and (not (remote-force-server runremote))     ;; honor forced use of server, i.e. server NOT required
	   (cdr (remote-hh-dat runremote))           ;; on homehost
           (not (member cmd api:read-only-queries))  ;; this is a write
           (remote-server-url runremote))            ;; have a server
      (mutex-unlock! rmt-mutex)
      (debug:print-info 12 log-port "rmt:send-receive, case  4.1")
      (rmt:open-qry-close-locally cmd 0 params))

     ;;  on homehost, no server contact made and this is a write, passively start a server 
     ((and (not (remote-force-server runremote))     ;; honor forced use of server, i.e. server NOT required
	   (cdr (remote-hh-dat runremote))           ;; have homehost
           (not (remote-server-url runremote))       ;; no connection yet
	   (not (member cmd api:read-only-queries))) ;; not a read-only query
      (debug:print-info 12 log-port "rmt:send-receive, case  8")
      (let ((server-url  (server:check-if-running toppath))) ;; (server:read-dotserver->url toppath))) ;; (server:check-if-running toppath))) ;; Do NOT want to run server:check-if-running - very expensive to do for every write call
	(if server-url
	    (remote-server-url-set! runremote server-url) ;; the string can be consumed by the client setup if needed
	    (if (common:force-server?)
		(server:start-and-wait toppath)
		(server:kind-run toppath))))
      (remote-force-server-set! runremote (common:force-server?))
      (mutex-unlock! rmt-mutex)
      (debug:print-info 12 log-port "rmt:send-receive, case  8.1")
      (rmt:open-qry-close-locally cmd 0 params))

     ((or (and (remote-force-server runremote)              ;; we are forcing a server and don't yet have a connection to one
	       (not (remote-conndat runremote)))
	  (and (not (cdr (remote-hh-dat runremote)))        ;; not on a homehost 
	       (not (remote-conndat runremote))))           ;; and no connection
      (debug:print-info 12 log-port "rmt:send-receive, case 9, hh-dat: " (remote-hh-dat runremote) " conndat: " (remote-conndat runremote))
      (mutex-unlock! rmt-mutex)
      (if (not (server:check-if-running toppath)) ;; who knows, maybe one has started up?
	  (server:start-and-wait toppath))
      (remote-conndat-set! runremote (rmt:get-connection-info toppath)) ;; calls client:setup which calls client:setup-http
      (rmt:send-receive-orig log-port runremote rmt-mutex toppath cmd rid params attemptnum: attemptnum)) ;; TODO: add back-off timeout as

     ;; all set up if get this far, dispatch the query
     ((and (not (remote-force-server runremote))
	   (cdr (remote-hh-dat runremote))) ;; we are on homehost
      (mutex-unlock! rmt-mutex)
      (debug:print-info 12 log-port "rmt:send-receive, case 10")
      (rmt:open-qry-close-locally cmd (if rid rid 0) params))

     ;; not on homehost, do server query
     (else (extras-case-11 log-port runremote cmd params attemptnum rid)))))

;; bunch of small functions factored out of send-receive to make debug easier
;;








|



















|








|










|








|
















|










|






|







115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
      (exit 1))

     ;; readonly mode, read request-  handle it - case 2
     ((and readonly-mode
           (member cmd api:read-only-queries)) 
      (mutex-unlock! rmt-mutex)
      (debug:print-info 12 log-port "rmt:send-receive, case 2")
      (rmt:open-qry-close-locally log-port multi-sync-mutex cmd 0 params)
      )

     ;; readonly mode, write request.  Do nothing, return #f
     (readonly-mode (extras-readonly-mode rmt-mutex log-port cmd params))

     ;; This block was for pre-emptively resetting the connection if there had been no communication for some time.
     ;; I don't think it adds any value. If the server is not there, just fail and start a new connection.
     ;; also, the expire-time calculation might not be correct. We want, time-since-last-server-access > (server:get-timeout)
     ;;
     ;; reset the connection if it has been unused too long
     ((and runremote
           (remote-conndat runremote)
	   (> (current-seconds) ;; if it has been more than server-timeout seconds since last contact, close this connection and start a new on
	      (+ (http-transport:server-dat-get-last-access (remote-conndat runremote))
		 (remote-server-timeout runremote))))
      (debug:print-info 0 log-port "Connection to " (remote-server-url runremote) " expired due to no accesses, forcing new connection.")
      (http-transport:close-connections area-dat: runremote)
      (remote-conndat-set! runremote #f) ;; invalidate the connection, thus forcing a new connection.
      (mutex-unlock! rmt-mutex)
      (rmt:send-receive-orig log-port runremote rmt-mutex toppath multi-sync-mutex cmd rid params attemptnum: attemptnum))
     

     ;; on homehost and this is a read
     ((and (not (remote-force-server runremote)) ;; honor forced use of server, i.e. server NOT required
	   (cdr (remote-hh-dat runremote))       ;; on homehost
           (member cmd api:read-only-queries))   ;; this is a read
      (mutex-unlock! rmt-mutex)
      (debug:print-info 12 log-port "rmt:send-receive, case  5")
      (rmt:open-qry-close-locally log-port multi-sync-mutex cmd 0 params))

     ;; on homehost and this is a write, we already have a server, but server has died
     ((and (cdr (remote-hh-dat runremote))           ;; on homehost
           (not (member cmd api:read-only-queries))  ;; this is a write
           (remote-server-url runremote)             ;; have a server
           (not (server:ping (remote-server-url runremote))))  ;; server has died. NOTE: this is not a cheap call! Need better approach.
      (set! *runremote* (make-remote))
      (remote-force-server-set! runremote (common:force-server?))
      (mutex-unlock! rmt-mutex)
      (debug:print-info 12 log-port "rmt:send-receive, case  6")
      (rmt:send-receive-orig log-port runremote rmt-mutex toppath  multi-sync-mutex cmd rid params attemptnum: attemptnum))

     ;; on homehost and this is a write, we already have a server
     ((and (not (remote-force-server runremote))     ;; honor forced use of server, i.e. server NOT required
	   (cdr (remote-hh-dat runremote))           ;; on homehost
           (not (member cmd api:read-only-queries))  ;; this is a write
           (remote-server-url runremote))            ;; have a server
      (mutex-unlock! rmt-mutex)
      (debug:print-info 12 log-port "rmt:send-receive, case  4.1")
      (rmt:open-qry-close-locally  log-port multi-sync-mutex cmd 0 params))

     ;;  on homehost, no server contact made and this is a write, passively start a server 
     ((and (not (remote-force-server runremote))     ;; honor forced use of server, i.e. server NOT required
	   (cdr (remote-hh-dat runremote))           ;; have homehost
           (not (remote-server-url runremote))       ;; no connection yet
	   (not (member cmd api:read-only-queries))) ;; not a read-only query
      (debug:print-info 12 log-port "rmt:send-receive, case  8")
      (let ((server-url  (server:check-if-running toppath))) ;; (server:read-dotserver->url toppath))) ;; (server:check-if-running toppath))) ;; Do NOT want to run server:check-if-running - very expensive to do for every write call
	(if server-url
	    (remote-server-url-set! runremote server-url) ;; the string can be consumed by the client setup if needed
	    (if (common:force-server?)
		(server:start-and-wait toppath)
		(server:kind-run toppath))))
      (remote-force-server-set! runremote (common:force-server?))
      (mutex-unlock! rmt-mutex)
      (debug:print-info 12 log-port "rmt:send-receive, case  8.1")
      (rmt:open-qry-close-locally  log-port multi-sync-mutex cmd 0 params))

     ((or (and (remote-force-server runremote)              ;; we are forcing a server and don't yet have a connection to one
	       (not (remote-conndat runremote)))
	  (and (not (cdr (remote-hh-dat runremote)))        ;; not on a homehost 
	       (not (remote-conndat runremote))))           ;; and no connection
      (debug:print-info 12 log-port "rmt:send-receive, case 9, hh-dat: " (remote-hh-dat runremote) " conndat: " (remote-conndat runremote))
      (mutex-unlock! rmt-mutex)
      (if (not (server:check-if-running toppath)) ;; who knows, maybe one has started up?
	  (server:start-and-wait toppath))
      (remote-conndat-set! runremote (rmt:get-connection-info toppath)) ;; calls client:setup which calls client:setup-http
      (rmt:send-receive-orig log-port runremote rmt-mutex toppath multi-sync-mutex cmd rid params attemptnum: attemptnum)) ;; TODO: add back-off timeout as

     ;; all set up if get this far, dispatch the query
     ((and (not (remote-force-server runremote))
	   (cdr (remote-hh-dat runremote))) ;; we are on homehost
      (mutex-unlock! rmt-mutex)
      (debug:print-info 12 log-port "rmt:send-receive, case 10")
      (rmt:open-qry-close-locally  log-port multi-sync-mutex cmd (if rid rid 0) params))

     ;; not on homehost, do server query
     (else (extras-case-11 log-port runremote cmd params attemptnum rid)))))

;; bunch of small functions factored out of send-receive to make debug easier
;;

299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
			     (if (> tot 10)
				 (cons newmax-cmd currmax)
				 (cons 'none 0))
			     (loop (car tal)(cdr tal) newmax-cmd currmax)))))))
    (mutex-unlock! *db-stats-mutex*)
    res))

(define (rmt:open-qry-close-locally cmd run-id params #!key (remretries 5))
  (let* ((qry-is-write   (not (member cmd api:read-only-queries)))
	 (db-file-path   (db:dbfile-path)) ;;  0))
	 (dbstruct-local (db:setup #t))  ;; make-dbr:dbstruct path:  dbdir local: #t)))
	 (read-only      (not (file-write-access? db-file-path)))
	 (start          (current-milliseconds))
	 (resdat         (if (not (and read-only qry-is-write))
			     (let ((v (api:execute-requests dbstruct-local (vector (symbol->string cmd) params))))
			       (handle-exceptions ;; there has been a long history of receiving strange errors from values returned by the client when things go wrong..
				exn               ;;  This is an attempt to detect that situation and recover gracefully
				(begin
				  (debug:print0 *default-log-port* "ERROR: bad data from server " v " message: "  ((condition-property-accessor 'exn 'message) exn))
				  (vector #t '())) ;; should always get a vector but if something goes wrong return a dummy
				(if (and (vector? v)
					 (> (vector-length v) 1))
				    (let ((newvec (vector (vector-ref v 0)(vector-ref v 1))))
				      newvec)           ;; by copying the vector while inside the error handler we should force the detection of a corrupted record
				    (vector #t '()))))  ;; we could also check that the returned types are valid
			     (vector #t '())))
	 (success        (vector-ref resdat 0))
	 (res            (vector-ref resdat 1))
	 (duration       (- (current-milliseconds) start)))
    (if (and read-only qry-is-write)
        (debug:print 0 *default-log-port* "ERROR: attempt to write to read-only database ignored. cmd=" cmd))
    (if (not success)
	(if (> remretries 0)
	    (begin
	      (debug:print-error 0 *default-log-port* "local query failed. Trying again.")
	      (thread-sleep! (/ (random 5000) 1000)) ;; some random delay 
	      (rmt:open-qry-close-locally cmd run-id params remretries: (- remretries 1)))
	    (begin
	      (debug:print-error 0 *default-log-port* "too many retries in rmt:open-qry-close-locally, giving up")
	      #f))
	(begin
	  ;; (rmt:update-db-stats run-id cmd params duration)
	  ;; mark this run as dirty if this was a write, the watchdog is responsible for syncing it
	  (if qry-is-write
	      (let ((start-time (current-seconds)))
		(mutex-lock! *db-multi-sync-mutex*)
/		(set! *db-last-access* start-time)  ;; THIS IS PROBABLY USELESS? (we are on a client)
                (mutex-unlock! *db-multi-sync-mutex*)))))
    res))

(define (rmt:send-receive-no-auto-client-setup connection-info cmd run-id params)
  (let* ((run-id   (if run-id run-id 0))
	 (res  	   (handle-exceptions
		    exn
		    #f







|










|











|



|

|

|






|
|
|







301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
			     (if (> tot 10)
				 (cons newmax-cmd currmax)
				 (cons 'none 0))
			     (loop (car tal)(cdr tal) newmax-cmd currmax)))))))
    (mutex-unlock! *db-stats-mutex*)
    res))

(define (rmt:open-qry-close-locally log-port multi-sync-mutex cmd run-id params #!key (remretries 5))
  (let* ((qry-is-write   (not (member cmd api:read-only-queries)))
	 (db-file-path   (db:dbfile-path)) ;;  0))
	 (dbstruct-local (db:setup #t))  ;; make-dbr:dbstruct path:  dbdir local: #t)))
	 (read-only      (not (file-write-access? db-file-path)))
	 (start          (current-milliseconds))
	 (resdat         (if (not (and read-only qry-is-write))
			     (let ((v (api:execute-requests dbstruct-local (vector (symbol->string cmd) params))))
			       (handle-exceptions ;; there has been a long history of receiving strange errors from values returned by the client when things go wrong..
				exn               ;;  This is an attempt to detect that situation and recover gracefully
				(begin
				  (debug:print0 log-port "ERROR: bad data from server " v " message: "  ((condition-property-accessor 'exn 'message) exn))
				  (vector #t '())) ;; should always get a vector but if something goes wrong return a dummy
				(if (and (vector? v)
					 (> (vector-length v) 1))
				    (let ((newvec (vector (vector-ref v 0)(vector-ref v 1))))
				      newvec)           ;; by copying the vector while inside the error handler we should force the detection of a corrupted record
				    (vector #t '()))))  ;; we could also check that the returned types are valid
			     (vector #t '())))
	 (success        (vector-ref resdat 0))
	 (res            (vector-ref resdat 1))
	 (duration       (- (current-milliseconds) start)))
    (if (and read-only qry-is-write)
        (debug:print 0 log-port "ERROR: attempt to write to read-only database ignored. cmd=" cmd))
    (if (not success)
	(if (> remretries 0)
	    (begin
	      (debug:print-error 0 log-port "local query failed. Trying again.")
	      (thread-sleep! (/ (random 5000) 1000)) ;; some random delay 
	      (rmt:open-qry-close-locally log-port multi-sync-mutex cmd run-id params remretries: (- remretries 1)))
	    (begin
	      (debug:print-error 0 log-port "too many retries in rmt:open-qry-close-locally, giving up")
	      #f))
	(begin
	  ;; (rmt:update-db-stats run-id cmd params duration)
	  ;; mark this run as dirty if this was a write, the watchdog is responsible for syncing it
	  (if qry-is-write
	      (let ((start-time (current-seconds)))
		(mutex-lock! multi-sync-mutex)
		;; (set! *db-last-access* start-time)  ;; THIS IS PROBABLY USELESS? (we are on a client)
                (mutex-unlock! multi-sync-mutex)))))
    res))

(define (rmt:send-receive-no-auto-client-setup connection-info cmd run-id params)
  (let* ((run-id   (if run-id run-id 0))
	 (res  	   (handle-exceptions
		    exn
		    #f