Megatest

Check-in [67b18cc991]
Login
Overview
Comment:Major refactor/clean up of client connection stuff. Removed old crufty approach of having both client and server side calls in http-transport. Plan would be to eventually get rid of http-transport.

Open files slowly grows. bad argument type in http-transport. Line 285 (close-connection! fullurl) is wrong.

Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | v1.80-refactor
Files: files | file ages | folders
SHA1: 67b18cc991f1c7fb802943e0d3f0768c260f8059
User & Date: matt on 2023-01-16 10:00:35
Original Comment: Major refactor/clean up of client connection stuff. Removed old crufty approach of having both client and server side calls in http-transport. Plan would be to eventually get rid of http-transport.
Other Links: branch diff | manifest | tags
Context
2023-01-18
21:46
Removed bad call to close-connection! check-in: 463fffbd79 user: matt tags: v1.80-refactor-fix1
2023-01-16
12:17
last of overt connection issues addressed. TCP connections growing very slowly. However now the db issues are manifesting. check-in: 22f2c33711 user: matt tags: v1.80-transport-cleanish, v1.80-refactor
10:00
Major refactor/clean up of client connection stuff. Removed old crufty approach of having both client and server side calls in http-transport. Plan would be to eventually get rid of http-transport.

Open files slowly grows. bad argument type in http-transport. Line 285 (close-connection! fullurl) is wrong. check-in: 67b18cc991 user: matt tags: v1.80-refactor

04:20
Original call to close-connections! was correct check-in: 883ba16e82 user: matt tags: v1.80
Changes

Modified client.scm from [04727fe923] to [c0f004bcfc].

26
27
28
29
30
31
32







33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64










65

66
67

68
69
70
71

72
73
74
75
76
77
78
79
80
81
82
83


84


85

86
87
88
89
90
91
92
93







94
95
96
97



98
99
100


101
102

103
104
105


106
107
108
109



110
111
112

113
114

115
116
117
118
119
120
121

122
123
124






















26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55



56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78

79
80

81
82
83
84

85
86
87
88
89
90
91
92
93
94
95


96
97
98
99
100

101
102
103
104





105
106
107
108
109
110
111
112
113


114
115
116
117


118
119


120



121
122




123
124
125
126
127

128
129

130
131
132
133
134
135
136

137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162







+
+
+
+
+
+
+
















-
-
-













+
+
+
+
+
+
+
+
+
+
-
+

-
+



-
+










-
-
+
+

+
+
-
+



-
-
-
-
-
+
+
+
+
+
+
+


-
-
+
+
+

-
-
+
+
-
-
+
-
-
-
+
+
-
-
-
-
+
+
+


-
+

-
+






-
+



+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+

(declare (unit client))

(declare (uses common))
(declare (uses db))
(declare (uses tasks)) ;; tasks are where stuff is maintained about what is running.

(module client
*

)

(import client)

(include "common_records.scm")
(include "db_records.scm")

;; client:get-signature
(define (client:get-signature)
  (if *my-client-signature* *my-client-signature*
      (let ((sig (conc (get-host-name) " " (current-process-id))))
	(set! *my-client-signature* sig)
	*my-client-signature*)))

;; Not currently used! But, I think it *should* be used!!!
#;(define (client:logout serverdat)
  (let ((ok (and (socket? serverdat)
		 (cdb:logout serverdat *toppath* (client:get-signature)))))
    ok))

(define (client:setup areapath #!key (remaining-tries 100) (failed-connects 0))
  (client:setup-http areapath remaining-tries: remaining-tries failed-connects: failed-connects))

;; Do all the connection work, look up the transport type and set up the
;; connection if required.
;;
;; There are two scenarios. 
;;   1. We are a test manager and we received *transport-type* and *runremote* via cmdline
;;   2. We are a run tests, list runs or other interactive process and we must figure out
;;      *transport-type* and *runremote* from the monitor.db
;;
;; client:setup
;;
;; lookup_server, need to remove *runremote* stuff
;;
 
;;(define (http-transport:server-dat-make-url runremote)
(define (client:get-url runremote)
  (if (and (remote-iface runremote)
	   (remote-port  runremote))
      (conc "http://" 
	    (remote-iface runremote)
	    ":"
	    (remote-port  runremote))
      #f))

(define (client:setup-http areapath #!key (remaining-tries 100) (failed-connects 0)(area-dat #f))
(define (client:setup areapath runremote #!key (remaining-tries 100) (failed-connects 0)(area-dat #f))
  (mutex-lock! *rmt-mutex*)
  (let ((res (client:setup-http-baby areapath remaining-tries: remaining-tries failed-connects: failed-connects area-dat: area-dat)))
  (let ((res (client:setup-http areapath runremote remaining-tries: remaining-tries failed-connects: failed-connects area-dat: area-dat)))
    (mutex-unlock! *rmt-mutex*)
    res))

(define (client:setup-http-baby areapath #!key (remaining-tries 100) (failed-connects 0)(area-dat #f))
(define (client:setup-http areapath runremote #!key (remaining-tries 100) (failed-connects 0))
  (debug:print-info 2 *default-log-port* "client:setup remaining-tries=" remaining-tries)
  (server:start-and-wait areapath)
  (if (<= remaining-tries 0)
      (begin
	(debug:print-error 0 *default-log-port* "failed to start or connect to server")
	(exit 1))
      ;;
      ;; Alternatively here, we can get the list of candidate servers and work our way
      ;; through them searching for a good one.
      ;;
      (let* ((server-dat (server:choose-server areapath 'best))
	     (runremote  (or area-dat *runremote*)))
      (let* ((server-dat (server:choose-server areapath 'best))) ;; list host port start-time server-id pid
;;	     (runremote  (or area-dat *runremote*)))
	(if (not server-dat) ;; no server found
	    (begin
	      (if (< remaining-tries 99)(thread-sleep! 1)) ;; obviously it needs time
	    (client:setup-http-baby areapath remaining-tries: (- remaining-tries 1))
	      (client:setup-http areapath runremote remaining-tries: (- remaining-tries 1)))
	    (match server-dat
	      ((host port start-time server-id pid)
	       (debug:print-info 4 *default-log-port* "client:setup server-dat=" server-dat ", remaining-tries=" remaining-tries)
	       (if (and (not area-dat)
			(not *runremote*))
                   (begin       
		     (set! *runremote* (make-remote))
                     (let* ((server-info (remote-server-info *runremote*))) 
	       (if (not runremote)
                   (begin
		     ;; Here we are creating a runremote where there was none or it was clobbered with #f
		     ;;
		     (set! runremote (make-remote))
                     (let* ((server-info (server:check-if-running areapath)))
		       (remote-server-info-set! runremote server-info)
                       (if server-info
                           (begin
                             (remote-server-url-set! *runremote* (server:record->url server-info))
                             (remote-server-id-set! *runremote* (server:record->id server-info)))))))
                             (remote-server-url-set! runremote (server:record->url server-info))
                             (remote-server-id-set! runremote (server:record->id server-info)))))))
	       ;; at this point we have a runremote
	       (if (and host port server-id)
		   (let* ((start-res (http-transport:client-connect host port server-id))
			  (ping-res  (rmt:login-no-auto-client-setup start-res)))
		   (let* ((nada     (client:connect host port server-id runremote))
			  (ping-res (rmt:login-no-auto-client-setup runremote)))
		     (if (and start-res
			      ping-res)
		     (if ping-res
			 (let ((runremote (or area-dat *runremote*))) ;; it might have been generated only a few statements ago
			   (if runremote
			       (begin
			 (if runremote
			     (begin
				 (remote-conndat-set! runremote start-res) ;; (hash-table-set! runremote run-id start-res)
				 (debug:print-info 2 *default-log-port* "connected to " (http-transport:server-dat-make-url start-res))
				 start-res)
			       (client:setup-http-baby areapath remaining-tries: (- remaining-tries 1))))
			       (debug:print-info 2 *default-log-port* "connected to " (client:get-url runremote))
			       runremote)
			     (client:setup-http areapath runremote remaining-tries: (- remaining-tries 1)))
			 (begin    ;; login failed but have a server record, clean out the record and try again
			   (debug:print-info 0 *default-log-port* "client:setup, login unsuccessful, will attempt to start server ... start-res=" start-res ", server-dat=" server-dat) ;; had runid.  Fixes part of Randy;s ticket 1405717332
			   (http-transport:close-connections *runremote*)
			   (http-transport:close-connections runremote)
			   (thread-sleep! 1)
			   (client:setup-http-baby areapath remaining-tries: (- remaining-tries 1))
			   (client:setup-http-baby areapath runremote remaining-tries: (- remaining-tries 1))
			   )))
		   (begin    ;; no server registered
		     ;; (server:kind-run areapath)
		     (server:start-and-wait areapath)
		     (debug:print-info 0 *default-log-port* "client:setup, no server registered, remaining-tries=" remaining-tries)
		     (thread-sleep! 1) ;; (+ 5 (random (- 20 remaining-tries))))  ;; give server a little time to start up, randomize a little to avoid start storms.
		     (client:setup-http-baby areapath remaining-tries: (- remaining-tries 1)))))
		     (client:setup-http areapath runremote remaining-tries: (- remaining-tries 1)))))
	      (else
	       (debug:print 0 *default-log-port* "ERROR: malformed server-dat="server-dat)))))))

;;
;; connect - stored in remote-condat
;;
;; (define (http-transport:client-connect iface port server-id runremote)
(define (client:connect iface port server-id runremote-in)
  (let* ((runremote (or runremote-in
			(make-runremote))))
    (debug:print-info 2 *default-log-port* "Connecting to server at "iface":"port", id "server-id)
    (let* ((api-url      (conc "http://" iface ":" port "/api"))
	   (api-uri      (uri-reference (conc "http://" iface ":" port "/api")))
	   (api-req      (make-request method: 'POST uri: api-uri)))
      ;;	 (server-dat   (vector iface port api-uri api-url api-req (current-seconds) server-id)))
      (remote-iface-set!   runremote iface)
      (remote-port-set!    runremote port)
      (remote-server-id-set! runremote server-id)
      (remote-connect-time-set! runremote (current-seconds))
      (remote-last-access-set! runremote (current-seconds))
      (remote-api-url-set! runremote api-url)
      (remote-api-uri-set! runremote api-uri)
      (remote-api-req-set! runremote api-req)
      runremote)))

Modified common.scm from [9019b001c0] to [55cc68e51c].

313
314
315
316
317
318
319

320
321
322
323
324
325
326
327

328
329
330



331
332
333
334
335








336
337
338
339
340
341
342
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327

328
329


330
331
332

333
334
335

336
337
338
339
340
341
342
343
344
345
346
347
348
349
350







+







-
+

-
-
+
+
+
-



-
+
+
+
+
+
+
+
+







    ((abort) "ABORT")
    ((skip) "SKIP")
    (else "FAIL")))

(define (common:logpro-exit-code->test-status exit-code)
  (status-sym->string (common:logpro-exit-code->status-sym exit-code)))

;; 
(defstruct remote
  (hh-dat            (let ((res (or (server:choose-server *toppath* 'homehost)
				    (cons #f #f))))
		       (assert (pair? res)(conc "FATAL: hh-dat should be a pair, got "res))
		       res))
  (server-url        #f) ;; (server:check-if-running *toppath*) #f))
  (server-id         #f)
  (server-info       (if *toppath* (server:check-if-running *toppath*) #f))
  (server-info       #f) ;; (if *toppath* (server:check-if-running *toppath*) #f))
  (last-server-check 0)  ;; last time we checked to see if the server was alive
  (connect-time      (current-seconds))
  (conndat           #f)
  (connect-time      (current-seconds)) ;; when we first connected
  (last-access       (current-seconds)) ;; last time we talked to server
  (conndat           #f) ;; iface port api-uri api-url api-req seconds server-id
  (transport         *transport-type*)
  (server-timeout    (server:expiration-timeout))
  (force-server      #f)
  (ro-mode           #f)  
  (ro-mode-checked   #f)) ;; flag that indicates we have checked for ro-mode
  (ro-mode-checked   #f) ;; flag that indicates we have checked for ro-mode

  ;; conndat stuff
  (iface             #f) ;; TODO: Consolidate this data with server-url and server-info above
  (port              #f)
  (api-url           #f)
  (api-uri           #f)
  (api-req           #f))

;; launching and hosts
(defstruct host
  (reachable    #f)
  (last-update  0)
  (last-used    0)
  (last-cpuload 1))

Modified http-transport.scm from [bf24c3b619] to [7539e23e31].

239
240
241
242
243
244
245
246

247
248
249
250

251

252
253
254
255
256

257
258
259
260
261
262
263
264
265
266
267
239
240
241
242
243
244
245

246




247

248
249
250
251


252




253
254
255
256
257
258
259







-
+
-
-
-
-
+
-
+



-
-
+
-
-
-
-








(define (http-transport:inc-requests-and-prep-to-close-all-connections)
  (mutex-lock! *http-mutex*)
  (set! *http-requests-in-progress* (+ 1 *http-requests-in-progress*)))

;; Send "cmd" with json payload "params" to serverdat and receive result
;;
(define (http-transport:client-api-send-receive run-id serverdat cmd params #!key (numretries 3)(area-dat #f))
(define (http-transport:client-api-send-receive run-id runremote cmd params #!key (numretries 3))
  (let* ((fullurl    (if (vector? serverdat)
			 (http-transport:server-dat-get-api-req serverdat)
			 (begin
			   (debug:print 0 *default-log-port* "FATAL ERROR: http-transport:client-api-send-receive called with no server info")
  (assert (remote? runremote) "FATAL: http-transport:client-api-send-receive called with serverdat="serverdat)
			   (exit 1))))
  (let* ((fullurl    (remote-api-req runremote))
	 (res        (vector #f "uninitialized"))
	 (success    #t)
	 (sparams    (db:obj->string params transport: 'http))
	 (runremote  (or area-dat *runremote*))
         (server-id   (if (vector? serverdat) 
         (server-id  (remote-server-id runremote)))
                           (http-transport:server-dat-get-server-id serverdat)
                           (begin
			     (debug:print 0 *default-log-port* "FATAL ERROR: http-transport:client-api-send-receive called with no server info")
			     (exit 1)))))
       (debug:print-info 11 *default-log-port* "cmd=" cmd " fullurl=" fullurl " server-id=" server-id " current time:" (current-seconds)) 

       ;; set up the http-client here
       (max-retry-attempts 1)
       ;; consider all requests indempotent
       (retry-request? (lambda (request)
			 #f))
284
285
286
287
288
289
290
291
292
293

294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
276
277
278
279
280
281
282



283



284




285
286
287
288
289
290
291







-
-
-
+
-
-
-

-
-
-
-







                                                    (debug:print-info 0 *default-log-port* "couldn't talk to server, trying again ...")
                                                    (begin
                                                      (debug:print 0 *default-log-port* "WARNING: failure in with-input-from-request to " fullurl ".")
                                                      (debug:print 0 *default-log-port* " message: " msg ", exn=" exn)
                                                      (debug:print 0 *default-log-port* " cmd: " cmd " params: " params " key:" (or server-id "thekey"))
                                                      (debug:print 0 *default-log-port* " call-chain: " call-chain)))
						;; what if another thread is communicating ok? Can't happen due to mutex
						(set! *runremote* #f)
						(set! runremote #f)
						;; (if runremote
						(http-transport:close-connections runremote)
						;;    (remote-conndat-set! runremote #f))
						;; Killing associated server to allow clean retry.")
						;; (tasks:kill-server-run-id run-id)  ;; better to kill the server in the logic that called this routine?
						(mutex-unlock! *http-mutex*)
						;; (signal (make-composite-condition
						;;          (make-property-condition 'commfail 'message "failed to connect to server")))
						;; "communications failed"
						;; (close-all-connections!)
						(close-connection! fullurl)
						(db:obj->string #f))
					      (with-input-from-request ;; was dat
					       fullurl 
					       (list (cons 'key (or server-id   "thekey"))
						     (cons 'cmd cmd)
						     (cons 'params sparams))
345
346
347
348
349
350
351
352
353
354

355
356
357
358


359
360
361
362
363
364
365






366
367
368


369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
328
329
330
331
332
333
334



335




336
337
338






339
340
341
342
343
344



345
346




















347





















348
349
350
351
352
353
354







-
-
-
+
-
-
-
-
+
+

-
-
-
-
-
-
+
+
+
+
+
+
-
-
-
+
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-

-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-







		      (make-property-condition 
		       'timeout
		       'message "nmsg-transport:client-api-send-receive-raw timed out talking to server")))))))

;; careful closing of connections stored in *runremote*
;;
(define (http-transport:close-connections runremote)
  (let* (;; (runremote  (or area-dat *runremote*))
	 (server-dat (if runremote
                         (remote-conndat runremote)
  (if (remote? runremote)
                         #f))) ;; (hash-table-ref/default *runremote* run-id #f)))
    (if (vector? server-dat)
	(let ((api-dat (http-transport:server-dat-get-api-uri server-dat)))
	  (handle-exceptions
      (let ((api-dat (remote-api-uri runremote)))
	(handle-exceptions
	    exn
	    (begin
	      (print-call-chain *default-log-port*)
	      (debug:print-error 0 *default-log-port* " closing connection failed with error: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn))
	    (if (args:any-defined? "-server" "-execute" "-run")
		(debug:print-info 0 *default-log-port* "Closing connections to "api-dat))
	    (close-connection! api-dat)
	  (begin
	    (print-call-chain *default-log-port*)
	    (debug:print-error 0 *default-log-port* " closing connection failed with error: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn))
	  (if (args:any-defined? "-server" "-execute" "-run")
	      (debug:print-info 0 *default-log-port* "Closing connections to "api-dat))
	  (close-connection! api-dat)
	    ;; (close-connection! (http-transport:server-dat-make-url server-dat))
	    (remote-conndat-set! runremote #f)
	    #t))
	  (remote-conndat-set! runremote #f)
	  #t))
	#f)))


(define (make-http-transport:server-dat)(make-vector 6))
(define (http-transport:server-dat-get-iface         vec)    (vector-ref  vec 0))
(define (http-transport:server-dat-get-port          vec)    (vector-ref  vec 1))
(define (http-transport:server-dat-get-api-uri       vec)    (vector-ref  vec 2))
(define (http-transport:server-dat-get-api-url       vec)    (vector-ref  vec 3))
(define (http-transport:server-dat-get-api-req       vec)    (vector-ref  vec 4))
(define (http-transport:server-dat-get-last-access   vec)    (vector-ref  vec 5))
;(define (http-transport:server-dat-get-socket        vec)    (vector-ref  vec 6))
(define (http-transport:server-dat-get-server-id     vec)    (vector-ref  vec 6))

(define (http-transport:server-dat-make-url vec)
  (if (and (http-transport:server-dat-get-iface vec)
	   (http-transport:server-dat-get-port  vec))
      (conc "http://" 
	    (http-transport:server-dat-get-iface vec)
	    ":"
	    (http-transport:server-dat-get-port  vec))
      #f))

(define (http-transport:server-dat-update-last-access vec)
  (if (vector? vec)
      (vector-set! vec 5 (current-seconds))
      (begin
	(print-call-chain (current-error-port))
	(debug:print-error 0 *default-log-port* "call to http-transport:server-dat-update-last-access with non-vector!!"))))

;;
;; connect
;;
(define (http-transport:client-connect iface port server-id)
  (debug:print-info 2 *default-log-port* "Connecting to server at "iface":"port", id "server-id)
  (let* ((api-url      (conc "http://" iface ":" port "/api"))
	 (api-uri      (uri-reference (conc "http://" iface ":" port "/api")))
	 (api-req      (make-request method: 'POST uri: api-uri))
	 (server-dat   (vector iface port api-uri api-url api-req (current-seconds) server-id)))
    server-dat))




;; run http-transport:keep-running in a parallel thread to monitor that the db is being 
;; used and to shutdown after sometime if it is not.
;;
(define (http-transport:keep-running) 
  ;; if none running or if > 20 seconds since 
  ;; server last used then start shutdown

Modified rmt.scm from [c1074b692d] to [ef90d475fb].

47
48
49
50
51
52
53
54

55
56
57
58
59
60
61
47
48
49
50
51
52
53

54
55
56
57
58
59
60
61







-
+







(define (rmt:get-connection-info areapath runremote) ;; TODO: push areapath down.
  (let* ((cinfo     (if (remote? runremote)
			(remote-conndat runremote)
			#f)))
	  (if cinfo
	      cinfo
	      (if (server:check-if-running areapath)
		  (client:setup areapath)
		  (client:setup areapath runremote)
		  #f))))

(define (rmt:on-homehost? runremote)
  (let* ((hh-dat (remote-hh-dat runremote)))
    (if (pair? hh-dat)
	(cdr hh-dat)
	(begin
167
168
169
170
171
172
173
174

175
176

177
178
179
180
181
182
183
167
168
169
170
171
172
173

174
175

176
177
178
179
180
181
182
183







-
+

-
+







     ;; also, the expire-time calculation might not be correct. We want, time-since-last-server-access > (server:get-timeout)
     ;;
     ;;DOT CASE4 [label="reset\nconnection"];
     ;;DOT MUTEXLOCK -> CASE4 [label="have connection,\nlast_access > expire_time"]; {rank=same "case 4" CASE4}
     ;;DOT CASE4 -> "rmt:send-receive";
     ;; reset the connection if it has been unused too long
     ((and runremote
           (remote-conndat runremote)
           ;; (remote-conndat runremote)
	   (> (current-seconds) ;; if it has been more than server-timeout seconds since last contact, close this connection and start a new on
	      (+ (http-transport:server-dat-get-last-access (remote-conndat runremote))
	      (+ (remote-last-access runremote)
		 (remote-server-timeout runremote))))
      (debug:print-info 0 *default-log-port* "Connection to " (remote-server-url runremote) " expired due to no accesses, forcing new connection.")
      (http-transport:close-connections runremote)
      ;; moving this setting of runremote conndat to #f to inside the http-transport:close-connections
      ;; (remote-conndat-set! runremote #f) ;; invalidate the connection, thus forcing a new connection.
      (mutex-unlock! *rmt-mutex*)
      (rmt:send-receive cmd rid params attemptnum: attemptnum))
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219

















220
221
222
223
224
225
226
196
197
198
199
200
201
202

















203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226







-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+








     ;;DOT CASE6 [label="init\nremote"];
     ;;DOT MUTEXLOCK -> CASE6 [label="on homehost,\nwrite query,\nhave server,\ncan't reach it"]; {rank=same "case 6" CASE6};
     ;;DOT CASE6 -> "rmt:send-receive";
     ;; on homehost and this is a write, we already have a server, but server has died

     ;; reinstate this keep-alive section but inject a time condition into the (add ...
     
     #;((and (cdr (remote-hh-dat runremote))           ;; on homehost
           (not (member cmd api:read-only-queries))  ;; this is a write
           (remote-server-url runremote)             ;; have a server
           (not (server:ping (remote-server-url runremote) (remote-server-id runremote))))  ;; server has died. NOTE: this is not a cheap call! Need better approach.
      (debug:print 0 *default-log-port* "WARNING: server appears to have died, trying to reconnect, case 6")
      (http-transport:close-connections area-dat: runremote) ;; make sure to clean up
      (set! *runremote* (make-remote))
      (let* ((server-info (remote-server-info *runremote*))) 
            (if server-info
		(begin
		  (remote-server-url-set! *runremote* (server:record->url server-info))
                  (remote-server-id-set! *runremote* (server:record->id server-info)))))
      (remote-force-server-set! runremote (common:force-server?))
      (mutex-unlock! *rmt-mutex*)
      (debug:print-info 12 *default-log-port* "rmt:send-receive, case  6")
      (rmt:send-receive cmd rid params attemptnum: attemptnum))
     ;;
     ;; ((and (cdr (remote-hh-dat runremote))           ;; on homehost
     ;;       (not (member cmd api:read-only-queries))  ;; this is a write
     ;;       (remote-server-url runremote)             ;; have a server
     ;;       (not (server:ping (remote-server-url runremote) (remote-server-id runremote))))  ;; server has died. NOTE: this is not a cheap call! Need better approach.
     ;;  (debug:print 0 *default-log-port* "WARNING: server appears to have died, trying to reconnect, case 6")
     ;;  (http-transport:close-connections area-dat: runremote) ;; make sure to clean up
     ;;  (set! *runremote* (make-remote))
     ;;  (let* ((server-info (remote-server-info *runremote*))) 
     ;;        (if server-info
     ;; 		(begin
     ;; 		  (remote-server-url-set! *runremote* (server:record->url server-info))
     ;;              (remote-server-id-set! *runremote* (server:record->id server-info)))))
     ;;  (remote-force-server-set! runremote (common:force-server?))
     ;;  (mutex-unlock! *rmt-mutex*)
     ;;  (debug:print-info 12 *default-log-port* "rmt:send-receive, case  6")
     ;;  (rmt:send-receive cmd rid params attemptnum: attemptnum))

     ;;DOT CASE7 [label="homehost\nwrite"];
     ;;DOT MUTEXLOCK -> CASE7 [label="server not required,\non homehost,\na write,\nhave a server"]; {rank=same "case 7" CASE7};
     ;;DOT CASE7 -> "rmt:open-qry-close-locally";
     ;; on homehost and this is a write, we already have a server
     ((and (not (remote-force-server runremote))     ;; honor forced use of server, i.e. server NOT required
	   (cdr (remote-hh-dat runremote))           ;; on homehost
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329













330
331
332
333
334
335
336
337
338
339
340

341
342
343
344
345
346
347
280
281
282
283
284
285
286






















287
288
289
290
291
292
293
294













295
296
297
298
299
300
301
302
303
304
305
306
307



308
309
310
311
312
313
314

315
316
317
318
319
320
321
322







-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-








-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
-







-
+







     ;;DOT MUTEXLOCK -> CASE11 [label="else"]; {rank=same "case 11" CASE11};
     ;;DOT CASE11 -> "rmt:send-receive" [label="call failed"];
     ;;DOT CASE11 -> "RESULT" [label="call succeeded"];
     ;; not on homehost, do server query
     (else (extras-case-11 *default-log-port* runremote cmd params attemptnum rid)))))
    ;;DOT }

;; No Title 
;; Error: (vector-ref) out of range
;; #(#<condition: (exn type)> (#("db.scm:3740: regex#regexp" #f #f) #("db.scm:3739: regex#string-substitute" #f #f) #("db.scm:3738: base64#base64-decode" #f #f) #("db.scm:3737: z3#z3:decode-buffer" #f #f) #("db.scm:3736: with-input-from-string" #f #f) #("db.scm:3741: s11n#deserialize" #f #f) #("api.scm:374: api:execute-requests" #f #f) #("api.scm:139: call-with-current-continuation" #f #f) #("api.scm:139: with-exception-handler" #f #f) #("api.scm:139: ##sys#call-with-values" #f #f) #("api.scm:158: string->symbol" #f #f) #("api.scm:160: current-milliseconds" #f #f) #("api.scm:161: dbr:dbstruct-read-only" #f #f) #("api.scm:139: k15" #f #f) #("api.scm:139: g19" #f #f) #("api.scm:142: get-call-chain" #f #f)) #("get-test-info-by-id" (1102 507299)))
;; 6
;; 
;; 	Call history:
;; 
;; 	http-transport.scm:306: thread-terminate!	  
;; 	http-transport.scm:307: debug:print-info	  
;; 	common_records.scm:235: debug:debug-mode	  
;; 	rmt.scm:259: k587	  
;; 	rmt.scm:259: g591	  
;; 	rmt.scm:276: http-transport:server-dat-update-last-access	  
;; 	http-transport.scm:364: current-seconds	  
;; 	rmt.scm:282: debug:print-info	  
;; 	common_records.scm:235: debug:debug-mode	  
;; 	rmt.scm:283: mutex-unlock!	  
;; 	rmt.scm:287: extras-transport-succeded	  	<--
;; +-----------------------------------------------------------------------------+
;; | Exit Status    : 70  
;;  

;; bunch of small functions factored out of send-receive to make debug easier
;;

(define (extras-case-11 *default-log-port* runremote cmd params attemptnum rid)
  ;; (mutex-unlock! *rmt-mutex*)
  (debug:print-info 12 *default-log-port* "rmt:send-receive, case  9")
  ;; (mutex-lock! *rmt-mutex*)
  (let* ((conninfo (remote-conndat runremote))
	 (dat-in      (case (remote-transport runremote)
		     ((http) (condition-case ;; handling here has
					     ;; caused a lot of
					     ;; problems. However it
					     ;; is needed to deal with
					     ;; attemtped
					     ;; communication to
					     ;; servers that have gone
					     ;; away
			      (http-transport:client-api-send-receive 0 conninfo cmd params)
                              ((servermismatch)  (vector #f "Server id mismatch" ))
			      ((commfail)(vector #f "communications fail"))
			      ((exn)(vector #f "other fail" (print-call-chain)))))
	 (dat-in  (condition-case ;; handling here has
			     ;; caused a lot of
			     ;; problems. However it
			     ;; is needed to deal with
			     ;; attemtped
			     ;; communication to
			     ;; servers that have gone
			     ;; away
			     (http-transport:client-api-send-receive 0 runremote cmd params)
			     ;; (http-transport:client-api-send-receive 0 conninfo cmd params runremote)
			     ((servermismatch)  (vector #f "Server id mismatch" ))
			     ((commfail)(vector #f "communications fail"))
			     ((exn)(vector #f "other fail" (print-call-chain)))))
		     (else
		      (debug:print 0 *default-log-port* "ERROR: transport " (remote-transport runremote) " not supported")
		      (exit))))
	 (dat      (if (and (vector? dat-in) ;; ... check it is a correct size
			    (> (vector-length dat-in) 1))
		       dat-in
		       (vector #f (conc "communications fail (type 2), dat-in=" dat-in))))
	 (success  (if (vector? dat) (vector-ref dat 0) #f))
	 (res      (if (vector? dat) (vector-ref dat 1) #f)))
    (if (and (vector? conninfo) (< 5 (vector-length conninfo)))
	(http-transport:server-dat-update-last-access conninfo) ;; refresh access time
	(remote-last-access-set! runremote (current-seconds)) ;; refresh access time
	(begin
	  (debug:print 0 *default-log-port* "INFO: Should not get here! conninfo=" conninfo)
	  (set! conninfo #f)
	  (http-transport:close-connections runremote)))
    (debug:print-info 13 *default-log-port* "rmt:send-receive, case  9. conninfo=" conninfo " dat=" dat " runremote = " runremote)
    (mutex-unlock! *rmt-mutex*)
    (if success ;; success only tells us that the transport was
429
430
431
432
433
434
435
436

437
438
439
440
441
442
443

444
445
446
447
448
449
450
404
405
406
407
408
409
410

411
412






413
414
415
416
417
418
419
420







-
+

-
-
-
-
-
-
+







	  (if qry-is-write
	      (let ((start-time (current-seconds)))
		(mutex-lock! *db-multi-sync-mutex*)
/		(set! *db-last-access* start-time)  ;; THIS IS PROBABLY USELESS? (we are on a client)
                (mutex-unlock! *db-multi-sync-mutex*)))))
    res))

(define (rmt:send-receive-no-auto-client-setup connection-info cmd run-id params)
(define (rmt:send-receive-no-auto-client-setup runremote cmd run-id params)
  (let* ((run-id   (if run-id run-id 0))
	 (res  	   ;; (handle-exceptions
		   ;;     exn
		   ;;   (begin
		   ;;     (print "transport failed. exn=" exn)
		   ;;     #f)
		     (http-transport:client-api-send-receive run-id connection-info cmd params))) ;; )
	 (res  	   (http-transport:client-api-send-receive run-id runremote cmd params)))
    (if (and res (vector-ref res 0))
	(vector-ref res 1) ;;; YES!! THIS IS CORRECT!! CHANGE IT HERE, THEN CHANGE rmt:send-receive ALSO!!!
	#f)))

;;======================================================================
;;
;; A C T U A L   A P I   C A L L S  
467
468
469
470
471
472
473
474

475
476

477
478
479
480
481
482
483
484
485
437
438
439
440
441
442
443

444


445


446
447
448
449
450
451
452







-
+
-
-
+
-
-








(define (rmt:login run-id)
  (rmt:send-receive 'login run-id (list *toppath* megatest-version (client:get-signature))))

;; This login does no retries under the hood - it acts a bit like a ping.
;; Deprecated for nmsg-transport.
;;
(define (rmt:login-no-auto-client-setup connection-info)
(define (rmt:login-no-auto-client-setup runremote)
  (case *transport-type* ;; run-id of 0 is just a placeholder
    ((http)(rmt:send-receive-no-auto-client-setup connection-info 'login 0 (list *toppath* megatest-version (client:get-signature))))
  (rmt:send-receive-no-auto-client-setup runremote 'login 0 (list *toppath* megatest-version (client:get-signature))))
    ;;((nmsg)(nmsg-transport:client-api-send-receive run-id connection-info 'login (list *toppath* megatest-version run-id *my-client-signature*)))
    ))

;; hand off a call to one of the db:queries statements
;; added run-id to make looking up the correct db possible 
;;
(define (rmt:general-call stmtname run-id . params)
  (rmt:send-receive 'general-call run-id (append (list stmtname run-id) params)))

Modified server.scm from [1caa4a85a3] to [1a43f0a48b].

662
663
664
665
666
667
668
669

670
671
672
673
674
675
676
677
678
679
680
681
682
683
684








685
686
687


688
689

690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706





















707
708
709
710
711
712
713
662
663
664
665
666
667
668

669















670
671
672
673
674
675
676
677



678
679


680

















681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708







-
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
-
-
-
+
+
-
-
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+







    (tasks:kill-server hostname pid))))

;; called in megatest.scm, host-port is string hostname:port
;;
;; NOTE: This is NOT called directly from clients as not all transports support a client running
;;       in the same process as the server.
;;
(define (server:ping host-port-in server-id #!key (do-exit #f))
(define (server:ping host:port server-id #!key (do-exit #f))
  (let ((host:port (if (not host-port-in) ;; use read-dotserver to find
		       #f ;; (server:check-if-running *toppath*)
		;; (if (number? host-port-in) ;; we were handed a server-id
		;; 	   (let ((srec (tasks:get-server-by-id (db:delay-if-busy (tasks:open-db)) host-port-in)))
		;; 	     ;; (print "srec: " srec " host-port-in: " host-port-in)
		;; 	     (if srec
		;; 		 (conc (vector-ref srec 3) ":" (vector-ref srec 4))
		;; 		 (conc "no such server-id " host-port-in)))
		       host-port-in))) ;; )
    (let* ((host-port (if host:port
			  (let ((slst (string-split   host:port ":")))
			    (if (eq? (length slst) 2)
				(list (car slst)(string->number (cadr slst)))
				#f))
			  #f)))
  (let* ((host-port (cond
		     ((string? host:port)
		      (let ((slst (string-split   host:port ":")))
			(if (eq? (length slst) 2)
			    (list (car slst)(string->number (cadr slst)))
			    #f)))
		     (else
		      #f))))
;;	   (toppath       (launch:setup)))
      ;; (print "host-port=" host-port)
      (if (not host-port)
    (cond
     ((and (list? host-port)
	  (begin
	    (if host-port-in
	   (eq? (length host-port) 2))
		(debug:print 0 *default-log-port*  "ERROR: bad host:port"))
	    (if do-exit (exit 1))
	    #f)
	  (let* ((iface      (car host-port))
		 (port       (cadr host-port))
		 (server-dat (http-transport:client-connect iface port server-id))
		 (login-res  (rmt:login-no-auto-client-setup server-dat)))
	    (if (and (list? login-res)
		     (car login-res))
		(begin
		  ;; (print "LOGIN_OK")
		  (if do-exit (exit 0))
		  #t)
		(begin
		  ;; (print "LOGIN_FAILED")
		  (if do-exit (exit 1))
		  #f)))))))
      (let* ((myrunremote (make-remote))
	     (iface       (car host-port))
	     (port        (cadr host-port))
	     (server-dat  (client:connect iface port server-id myrunremote))
	     (login-res   (rmt:login-no-auto-client-setup myrunremote)))
	(if (and (list? login-res)
		 (car login-res))
	    (begin
	      ;; (print "LOGIN_OK")
	      (if do-exit (exit 0))
	      #t)
	    (begin
	      ;; (print "LOGIN_FAILED")
	      (if do-exit (exit 1))
	      #f))))
     (else 
      (if host:port
	  (debug:print 0 *default-log-port*  "ERROR: bad host:port "host:port))
      (if do-exit
	  (exit 1)
	  #f)))))

;; run ping in separate process, safest way in some cases
;;
(define (server:ping-server ifaceport)
  (with-input-from-pipe 
   (conc (common:get-megatest-exe) " -ping " ifaceport)
   (lambda ()