Megatest

Check-in [0869d10a93]
Login
Overview
Comment:still broke but differently
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | v1.65-wip
Files: files | file ages | folders
SHA1: 0869d10a938cbcba1b028d87fd7a6cc843303683
User & Date: matt on 2019-10-02 00:05:38
Other Links: branch diff | manifest | tags
Context
2019-10-02
17:46
Remove muffs from *toppath* in calc-ro-mode. Added trace to rmtmod.scm while debugging. check-in: dc7ab217fe user: mrwellan tags: v1.65-wip
00:05
still broke but differently check-in: 0869d10a93 user: matt tags: v1.65-wip
2019-10-01
22:54
Broke it with mutex deadlock check-in: 71a141e42e user: matt tags: v1.65-wip
Changes

Modified rmt.scm from [5ad0893a6b] to [a8b59bdcdd].

74
75
76
77
78
79
80
81

82
83
84
85
86
87
88
89
90
91
92
93
94
95

96
97
98
99
100
101
102
74
75
76
77
78
79
80

81
82
83
84
85
86
87
88
89
90
91
92
93
94

95
96
97
98
99
100
101
102







-
+













-
+







    (if (member cmd '(blah))
      (begin
	(mutex-lock! *send-receive-mutex*)
	(if (not *runremote*)(set! *runremote* (make-remote)))
	(let ((ulex:conn (remote-ulex:conn *runremote*)))
	  (if (not ulex:conn)(remote-ulex:conn-set! *runremote* (rmtmod:setup-ulex *toppath*)))
	  (rmtmod:send-receive-ulex ulex:conn cmd rid params attemptnum area-dat)))
      (rmt:send-receive-orig *default-log-port* runremote *rmt-mutex* areapath *db-multi-sync-mutex* cmd rid params attemptnum: attemptnum area-dat: area-dat))))
      (rmt:send-receive-orig *default-log-port* runremote *rmt-mutex* areapath *db-multi-sync-mutex* cmd rid params attemptnum: attemptnum area-dat: area-dat ro-queries: api:read-only-queries))))

;; RA => e.g. usage (rmt:send-receive 'get-var #f (list varname))
;;
;;  add multi-sync-mutex 
;;
(define (rmt:send-receive-orig log-port runremote rmt-mutex toppath multi-sync-mutex cmd rid params #!key (attemptnum 1)(area-dat #f)) ;; start attemptnum at 1 so the modulo below works as expected

  #;(common:telemetry-log (conc "rmt:"(->string cmd))
                        payload: `((rid . ,rid)
                                   (params . ,params)))
                          
  
  ;; do all the prep locked under the rmt-mutex
  (mutex-lock! rmt-mutex)
  ;;(mutex-lock! rmt-mutex)
  
  ;; 1. check if server is started IFF cmd is a write OR if we are not on the homehost, store in runremote
  ;; 2. check the age of the connections. refresh the connection if it is older than timeout-20 seconds.
  ;; 3. do the query, if on homehost use local access
  ;;
  (let* ((start-time    (current-seconds)) ;; snapshot time so all use cases get same value

113
114
115
116
117
118
119
120

121
122

123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141

142
143
144
145
146
147
148
149

150
151

152
153
154
155
156
157
158
159
160

161
162
163
164
165
166
167
168
169

170
171

172
173
174
175
176
177
178
179
180
181
182
183
184
185
186

187
188

189
190
191
192
193
194
195

196
197
198
199
200
201
202
203
204

205
206

207
208
209
210
211
212
213
113
114
115
116
117
118
119

120
121

122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140

141
142
143
144
145
146
147
148

149
150

151
152
153
154
155
156
157
158
159

160
161
162
163
164
165
166
167
168

169
170

171
172
173
174
175
176
177
178
179
180
181
182
183
184
185

186
187

188
189
190
191
192
193
194

195
196
197
198
199
200
201
202
203

204
205

206
207
208
209
210
211
212
213







-
+

-
+


















-
+







-
+

-
+








-
+








-
+

-
+














-
+

-
+






-
+








-
+

-
+







     ((> attemptnum 15)
      (debug:print 0 log-port "ERROR: 15 tries to start/connect to server. Giving up.")
      (exit 1))

     ;; readonly mode, read request-  handle it - case 2
     ((and readonly-mode
           (member cmd api:read-only-queries)) 
      (mutex-unlock! rmt-mutex)
      ;; (mutex-unlock! rmt-mutex)
      (debug:print-info 12 log-port "rmt:send-receive, case 2")
      (rmt:open-qry-close-locally log-port multi-sync-mutex cmd 0 params)
      (rmt:open-qry-close-locally log-port multi-sync-mutex cmd 0 params ro-queries: api:read-only-queries)
      )

     ;; readonly mode, write request.  Do nothing, return #f
     (readonly-mode (extras-readonly-mode rmt-mutex log-port cmd params))

     ;; This block was for pre-emptively resetting the connection if there had been no communication for some time.
     ;; I don't think it adds any value. If the server is not there, just fail and start a new connection.
     ;; also, the expire-time calculation might not be correct. We want, time-since-last-server-access > (server:get-timeout)
     ;;
     ;; reset the connection if it has been unused too long
     ((and runremote
           (remote-conndat runremote)
	   (> (current-seconds) ;; if it has been more than server-timeout seconds since last contact, close this connection and start a new on
	      (+ (http-transport:server-dat-get-last-access (remote-conndat runremote))
		 (remote-server-timeout runremote))))
      (debug:print-info 0 log-port "Connection to " (remote-server-url runremote) " expired due to no accesses, forcing new connection.")
      (http-transport:close-connections area-dat: runremote)
      (remote-conndat-set! runremote #f) ;; invalidate the connection, thus forcing a new connection.
      (mutex-unlock! rmt-mutex)
      ;; (mutex-unlock! rmt-mutex)
      (rmt:send-receive-orig log-port runremote rmt-mutex toppath multi-sync-mutex cmd rid params attemptnum: attemptnum))
     

     ;; on homehost and this is a read
     ((and (not (remote-force-server runremote)) ;; honor forced use of server, i.e. server NOT required
	   (cdr (remote-hh-dat runremote))       ;; on homehost
           (member cmd api:read-only-queries))   ;; this is a read
      (mutex-unlock! rmt-mutex)
      ;; (mutex-unlock! rmt-mutex)
      (debug:print-info 12 log-port "rmt:send-receive, case  5")
      (rmt:open-qry-close-locally log-port multi-sync-mutex cmd 0 params))
      (rmt:open-qry-close-locally log-port multi-sync-mutex cmd 0 params ro-queries: api:read-only-queries))

     ;; on homehost and this is a write, we already have a server, but server has died
     ((and (cdr (remote-hh-dat runremote))           ;; on homehost
           (not (member cmd api:read-only-queries))  ;; this is a write
           (remote-server-url runremote)             ;; have a server
           (not (server:ping (remote-server-url runremote))))  ;; server has died. NOTE: this is not a cheap call! Need better approach.
      (set! *runremote* (make-remote))
      (remote-force-server-set! runremote (common:force-server?))
      (mutex-unlock! rmt-mutex)
      ;; (mutex-unlock! rmt-mutex)
      (debug:print-info 12 log-port "rmt:send-receive, case  6")
      (rmt:send-receive-orig log-port runremote rmt-mutex toppath  multi-sync-mutex cmd rid params attemptnum: attemptnum))

     ;; on homehost and this is a write, we already have a server
     ((and (not (remote-force-server runremote))     ;; honor forced use of server, i.e. server NOT required
	   (cdr (remote-hh-dat runremote))           ;; on homehost
           (not (member cmd api:read-only-queries))  ;; this is a write
           (remote-server-url runremote))            ;; have a server
      (mutex-unlock! rmt-mutex)
      ;;(mutex-unlock! rmt-mutex)
      (debug:print-info 12 log-port "rmt:send-receive, case  4.1")
      (rmt:open-qry-close-locally  log-port multi-sync-mutex cmd 0 params))
      (rmt:open-qry-close-locally  log-port multi-sync-mutex cmd 0 params ro-queries: api:read-only-queries))

     ;;  on homehost, no server contact made and this is a write, passively start a server 
     ((and (not (remote-force-server runremote))     ;; honor forced use of server, i.e. server NOT required
	   (cdr (remote-hh-dat runremote))           ;; have homehost
           (not (remote-server-url runremote))       ;; no connection yet
	   (not (member cmd api:read-only-queries))) ;; not a read-only query
      (debug:print-info 12 log-port "rmt:send-receive, case  8")
      (let ((server-url  (server:check-if-running toppath))) ;; (server:read-dotserver->url toppath))) ;; (server:check-if-running toppath))) ;; Do NOT want to run server:check-if-running - very expensive to do for every write call
	(if server-url
	    (remote-server-url-set! runremote server-url) ;; the string can be consumed by the client setup if needed
	    (if (common:force-server?)
		(server:start-and-wait toppath)
		(server:kind-run toppath))))
      (remote-force-server-set! runremote (common:force-server?))
      (mutex-unlock! rmt-mutex)
      ;; (mutex-unlock! rmt-mutex)
      (debug:print-info 12 log-port "rmt:send-receive, case  8.1")
      (rmt:open-qry-close-locally  log-port multi-sync-mutex cmd 0 params))
      (rmt:open-qry-close-locally  log-port multi-sync-mutex cmd 0 params ro-queries: api:read-only-queries))

     ((or (and (remote-force-server runremote)              ;; we are forcing a server and don't yet have a connection to one
	       (not (remote-conndat runremote)))
	  (and (not (cdr (remote-hh-dat runremote)))        ;; not on a homehost 
	       (not (remote-conndat runremote))))           ;; and no connection
      (debug:print-info 12 log-port "rmt:send-receive, case 9, hh-dat: " (remote-hh-dat runremote) " conndat: " (remote-conndat runremote))
      (mutex-unlock! rmt-mutex)
      ;;(mutex-unlock! rmt-mutex)
      (if (not (server:check-if-running toppath)) ;; who knows, maybe one has started up?
	  (server:start-and-wait toppath))
      (remote-conndat-set! runremote (rmt:get-connection-info toppath)) ;; calls client:setup which calls client:setup-http
      (rmt:send-receive-orig log-port runremote rmt-mutex toppath multi-sync-mutex cmd rid params attemptnum: attemptnum)) ;; TODO: add back-off timeout as

     ;; all set up if get this far, dispatch the query
     ((and (not (remote-force-server runremote))
	   (cdr (remote-hh-dat runremote))) ;; we are on homehost
      (mutex-unlock! rmt-mutex)
      ;;(mutex-unlock! rmt-mutex)
      (debug:print-info 12 log-port "rmt:send-receive, case 10")
      (rmt:open-qry-close-locally  log-port multi-sync-mutex cmd (if rid rid 0) params))
      (rmt:open-qry-close-locally  log-port multi-sync-mutex cmd (if rid rid 0) params ro-queries: api:read-only-queries))

     ;; not on homehost, do server query
     (else (extras-case-11 log-port runremote cmd params attemptnum rid)))))

;; bunch of small functions factored out of send-receive to make debug easier
;;

Modified rmtmod.scm from [dce20d5361] to [fd791a8953].

73
74
75
76
77
78
79
80
81


82
83
84
85
86
87
88
73
74
75
76
77
78
79


80
81
82
83
84
85
86
87
88







-
-
+
+







  ;; db stuff for local db access
  (set! db:dbfile-path                   dbfile-path)
  (set! db:setup                         dbsetup)
  (set! apt:execute-requests             exec-req)
  (set! api:read-only-queries	         read-only-queries)
  )

(define (rmt:open-qry-close-locally log-port multi-sync-mutex cmd run-id params #!key (remretries 5))
  (let* ((qry-is-write   (not (member cmd api:read-only-queries)))
(define (rmt:open-qry-close-locally log-port multi-sync-mutex cmd run-id params #!key (ro-queries '())(remretries 5))
  (let* ((qry-is-write   (not (member cmd ro-queries)))
	 (db-file-path   (db:dbfile-path)) ;;  0))
	 (dbstruct-local (db:setup #t))  ;; make-dbr:dbstruct path:  dbdir local: #t)))
	 (read-only      (not (file-write-access? db-file-path)))
	 (start          (current-milliseconds))
	 (resdat         (if (not (and read-only qry-is-write))
			     (let ((v (api:execute-requests dbstruct-local (vector (symbol->string cmd) params))))
			       (handle-exceptions ;; there has been a long history of receiving strange errors from values returned by the client when things go wrong..
102
103
104
105
106
107
108
109

110
111
112
113
114
115
116
102
103
104
105
106
107
108

109
110
111
112
113
114
115
116







-
+







    (if (and read-only qry-is-write)
        (debug:print 0 log-port "ERROR: attempt to write to read-only database ignored. cmd=" cmd))
    (if (not success)
	(if (> remretries 0)
	    (begin
	      (debug:print-error 0 log-port "local query failed. Trying again.")
	      (thread-sleep! (/ (random 5000) 1000)) ;; some random delay 
	      (rmt:open-qry-close-locally log-port multi-sync-mutex cmd run-id params remretries: (- remretries 1)))
	      (rmt:open-qry-close-locally log-port multi-sync-mutex cmd run-id params ro-queries: ro-queries remretries: (- remretries 1)))
	    (begin
	      (debug:print-error 0 log-port "too many retries in rmt:open-qry-close-locally, giving up")
	      #f))
	(begin
	  ;; (rmt:update-db-stats run-id cmd params duration)
	  ;; mark this run as dirty if this was a write, the watchdog is responsible for syncing it
	  #;(if qry-is-write
132
133
134
135
136
137
138
139

140
141
142
143
144
145
146

147
148
149
150

151
152
153
154
155
156
157
132
133
134
135
136
137
138

139
140
141
142
143
144
145

146
147
148
149

150
151
152
153
154
155
156
157







-
+






-
+



-
+







	    (begin
	      (remote-ro-mode-set! runremote ro-mode)
	      (remote-ro-mode-checked-set! runremote #t)
	      ro-mode)
	    ro-mode))))

(define (extras-readonly-mode rmt-mutex log-port cmd params)
  (mutex-unlock! rmt-mutex)
  ;;(mutex-unlock! rmt-mutex)
  (debug:print-info 12 log-port "rmt:send-receive, case 3")
  (debug:print 0 log-port "WARNING: write transaction requested on a readonly area.  cmd="cmd" params="params)
  #f)

(define (extras-transport-failed *default-log-port* *rmt-mutex* attemptnum runremote cmd rid params)
  (debug:print 0 *default-log-port* "WARNING: communication failed. Trying again, try num: " attemptnum)
  (mutex-lock! *rmt-mutex*)
  ;;(mutex-lock! *rmt-mutex*)
  (remote-conndat-set!    runremote #f)
  (http-transport:close-connections area-dat: runremote)
  (remote-server-url-set! runremote #f)
  (mutex-unlock! *rmt-mutex*)
  ;;(mutex-unlock! *rmt-mutex*)
  (debug:print-info 12 *default-log-port* "rmt:send-receive, case  9.1")
  (rmt:send-receive cmd rid params attemptnum: (+ attemptnum 1)))
  
(define (extras-transport-succeded *default-log-port* *rmt-mutex* attemptnum runremote res params rid cmd)
  (if (and (vector? res)
	   (eq? (vector-length res) 2)
	   (eq? (vector-ref res 1) 'overloaded)) ;; since we are
168
169
170
171
172
173
174
175

176
177
178

179
180
181
182
183
184
185
168
169
170
171
172
173
174

175
176
177

178
179
180
181
182
183
184
185







-
+


-
+







						 ;; is bad or the
						 ;; server is
						 ;; overloaded and we
						 ;; want to ease off
						 ;; the queries
      (let ((wait-delay (+ attemptnum (* attemptnum 10))))
	(debug:print 0 *default-log-port* "WARNING: server is overloaded. Delaying " wait-delay " seconds and trying call again.")
	(mutex-lock! *rmt-mutex*)
	;;(mutex-lock! *rmt-mutex*)
	(http-transport:close-connections area-dat: runremote)
	(set! *runremote* #f) ;; force starting over
	(mutex-unlock! *rmt-mutex*)
	;;(mutex-unlock! *rmt-mutex*)
	(thread-sleep! wait-delay)
	(rmt:send-receive cmd rid params attemptnum: (+ attemptnum 1)))
      res)) ;; All good, return res

;;======================================================================
;; ulex and steps stuff
;;======================================================================