Megatest

Diff
Login

Differences From Artifact [cfa1ca3a75]:

To Artifact [3618123224]:


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18


19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35







36
37
38
39
40
41
42
;;======================================================================
;; Copyright 2006-2013, Matthew Welland.
;; 
;;  This program is made available under the GNU GPL version 2.0 or
;;  greater. See the accompanying file COPYING for details.
;; 
;;  This program is distributed WITHOUT ANY WARRANTY; without even the
;;  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
;;  PURPOSE.
;;======================================================================
;;
(use json format) ;; RADT => purpose of json format??

(declare (unit rmt))
(declare (uses api))
(declare (uses tdb))
(declare (uses http-transport))
;;(declare (uses nmsg-transport))


;;
;; THESE ARE ALL CALLED ON THE CLIENT SIDE!!!
;;

;; ;; For debugging add the following to ~/.megatestrc
;;
;; (require-library trace)
;; (import trace)
;; (trace
;; rmt:send-receive
;; api:execute-requests
;; )

;; generate entries for ~/.megatestrc with the following
;;
;;  grep define ../rmt.scm | grep rmt: |perl -pi -e 's/\(define\s+\((\S+)\W.*$/\1/'|sort -u









;;======================================================================
;;  S U P P O R T   F U N C T I O N S
;;======================================================================

;; thread-safe interface to *runremote* hash
(define *rrr-mutex* (make-mutex))











|






>
>




<
<
<
<
<
<
<
<
<




>
>
>
>
>
>
>







1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24









25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
;;======================================================================
;; Copyright 2006-2013, Matthew Welland.
;; 
;;  This program is made available under the GNU GPL version 2.0 or
;;  greater. See the accompanying file COPYING for details.
;; 
;;  This program is distributed WITHOUT ANY WARRANTY; without even the
;;  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
;;  PURPOSE.
;;======================================================================
;;
(use format typed-records) ;; RADT => purpose of json format??

(declare (unit rmt))
(declare (uses api))
(declare (uses tdb))
(declare (uses http-transport))
;;(declare (uses nmsg-transport))
(include "common_records.scm")

;;
;; THESE ARE ALL CALLED ON THE CLIENT SIDE!!!
;;










;; generate entries for ~/.megatestrc with the following
;;
;;  grep define ../rmt.scm | grep rmt: |perl -pi -e 's/\(define\s+\((\S+)\W.*$/\1/'|sort -u

(defstruct remote
  (hh-dat            (common:get-homehost)) ;; homehost record ( addr . hhflag )
  (server-url        (if *toppath* (server:check-if-running *toppath*) #f))
  (last-server-check 0)  ;; last time we checked to see if the server was alive
  (conndat           #f)
  (transport         *transport-type*)
  (server-timeout    (or (server:get-timeout) 100))) ;; default to 100 seconds

;;======================================================================
;;  S U P P O R T   F U N C T I O N S
;;======================================================================

;; thread-safe interface to *runremote* hash
(define *rrr-mutex* (make-mutex))
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83

84
85
86
87
88
89
90







91


























92

93
94
95
96
97
98



99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116


117
118
119
120
121
122

123
124
125
126


127
128
129
130
131
132
133
134
135
136
137
138
139
140
141

142
143
144

145
146
147
148


149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177

178
179
180
181
182
183

184
185
186
187
188
189
190
191
192

193


194
195
196
197
198
199


200
201
202
203
204
205
206
207



;; if a server is either running or in the process of starting call client:setup
;; else return #f to let the calling proc know that there is no server available
;;
(define (rmt:get-connection-info run-id)
  (let ((cinfo *runremote*)) ;; (hash-table-ref/default *runremote* run-id #f)))
    (if cinfo
	cinfo
	(if (tasks:server-running-or-starting? (db:delay-if-busy (tasks:open-db)) run-id)
	    (client:setup run-id)
	    #f))))

(define *send-receive-mutex* (make-mutex)) ;; should have separate mutex per run-id

;; RA => e.g. usage (rmt:send-receive 'get-var #f (list varname))
;;
(define (rmt:send-receive cmd rid params #!key (attemptnum 1)) ;; start attemptnum at 1 so the modulo below works as expected
  ;; clean out old connections

  ;; (mutex-lock! *db-multi-sync-mutex*)

  ;; 1. check if server is started IFF cmd is a write OR if we are not on the homehost, store in *runremote*
  ;; 2. check the age of the connections. refresh the connection if it is older than timeout-20 seconds.
  ;; 3. do the query, if on homehost use local access
  ;;
  (if (and ;; #f  ;; FORCE NO GO FOR RIGHT NOW







	   (not *runremote*)                         ;; we trust *runremote* to reflect that a server was found previously


























	   (not (member cmd api:read-only-queries))) ;; we don't trust so much the list of write queries

      (let ((serverconn (server:check-if-running *toppath*)))
	(if serverconn
	    (set! *runremote* serverconn) ;; the string can be consumed by the client setup if needed
	    (if (not (server:start-attempted? *toppath*))
		(server:kind-run *toppath*)))))
  



  (rmt:open-qry-close-locally cmd (if rid rid 0) params))

;;   (let ((expire-time (- (current-seconds) (server:get-timeout) 10))) ;; don't forget the 10 second margin
;;     (for-each 
;;      (lambda (run-id)
;;        (let ((connection (hash-table-ref/default *runremote* run-id #f)))
;;          (if (and (vector? connection)
;;         	  (< (http-transport:server-dat-get-last-access connection) expire-time))
;;              (begin
;;                (debug:print-info 0 *default-log-port* "Discarding connection to server for run-id " run-id ", too long between accesses")
;;                ;; bb- disabling nanomsg
;;                ;; SHOULD CLOSE THE CONNECTION HERE 
;; 	       ;; (case *transport-type*
;; 	       ;;   ((nmsg)(nn-close (http-transport:server-dat-get-socket 
;; 	       ;;  		   (hash-table-ref *runremote* run-id)))))
;;                (hash-table-delete! *runremote* run-id)))))
;;      (hash-table-keys *runremote*)))
;;   ;; (mutex-unlock! *db-multi-sync-mutex*)


;;   ;; (mutex-lock! *send-receive-mutex*)
;;   (let* ((run-id          (if rid rid 0))
;; 	 (home-host       (common:get-homehost))
;; 	 (connection-info (if (cdr home-host) ;; we are on the home-host
;; 			      #f
;; 			      (rmt:get-connection-info run-id))))

;;     (cond
;;      (home-host        (rmt:open-qry-close-locally cmd run-id params))
;;      (connection-info
;;       ;; the nmsg method does the encoding under the hood (the http method should be changed to do this also)


;;       ;; use the server if have connection info
;;       (let* ((dat     (case *transport-type*
;; 			((http)(condition-case
;; 				(http-transport:client-api-send-receive run-id connection-info cmd params)
;; 				((commfail)(vector #f "communications fail"))
;; 				((exn)(vector #f "other fail"))))
;; 			;; ((nmsg)(condition-case
;; 			;;         (nmsg-transport:client-api-send-receive run-id connection-info cmd params)
;; 			;;         ((timeout)(vector #f "timeout talking to server"))))
;; 			(else  (exit))))
;; 	     (success (if (vector? dat) (vector-ref dat 0) #f))
;; 	     (res     (if (vector? dat) (vector-ref dat 1) #f)))
;; 	(if (vector? connection-info)(http-transport:server-dat-update-last-access connection-info))
;; 	(if success
;; 	    (begin

;; 	      ;; (mutex-unlock! *send-receive-mutex*)
;; 	      (case *transport-type* 
;; 		((http) res) ;; (db:string->obj res))

;; 		;; ((nmsg) res)
;; 		)) ;; (vector-ref res 1)))
;; 	    (begin ;; let ((new-connection-info (client:setup run-id)))
;; 	      (debug:print 0 *default-log-port* "WARNING: Communication failed, trying call to rmt:send-receive again.")


;; 	      ;; (case *transport-type*
;; 	      ;;   ((nmsg)(nn-close (http-transport:server-dat-get-socket connection-info))))
;; 	      (hash-table-delete! *runremote* run-id) ;; don't keep using the same connection
;; 	      ;; NOTE: killing server causes this process to block forever. No idea why. Dec 2. 
;; 	      ;; (if (eq? (modulo attemptnum 5) 0)
;; 	      ;;     (tasks:kill-server-run-id run-id tag: "api-send-receive-failed"))
;; 	      ;; (mutex-unlock! *send-receive-mutex*) ;; close the mutex here to allow other threads access to communications
;; 	      (tasks:start-and-wait-for-server (tasks:open-db) run-id 15)
;; 	      ;; (nmsg-transport:client-api-send-receive run-id connection-info cmd param remtries: (- remtries 1))))))
;; 	      
;; 	      ;; no longer killing the server in http-transport:client-api-send-receive
;; 	      ;; may kill it here but what are the criteria?
;; 	      ;; start with three calls then kill server
;; 	      ;; (if (eq? attemptnum 3)(tasks:kill-server-run-id run-id))
;; 	      ;; (thread-sleep! 2)
;; 	      (rmt:send-receive cmd run-id params attemptnum: (+ attemptnum 1))))))
;;      (else
;;      ;; no connection info? try to start a server, or access locally if no
;;       ;; server and the query is read-only
;;       ;;
;;       ;; Note: The tasks db was checked for a server in starting mode in the rmt:get-connection-info call
;;       ;;
;;       (if (and (< attemptnum 15)
;; 	       (member cmd api:write-queries))
;; 	  (let ((homehost  (common:get-homehost))) ;; faststart (configf:lookup *configdat* "server" "faststart")))
;; 	      (hash-table-delete! *runremote* run-id)
;; 	      ;; (mutex-unlock! *send-receive-mutex*)
;; 	      (if (not (cdr homehost)) ;; we always require a server if not on homehost ;; (and faststart (equal? faststart "no"))
;; 		  (begin

;; 		    (tasks:start-and-wait-for-server (db:delay-if-busy (tasks:open-db)) run-id 10)
;; 		    (thread-sleep! (random 5)) ;; give some time to settle and minimize collison?
;; 		    (rmt:send-receive cmd rid params attemptnum: (+ attemptnum 1)))
;; 		  ;; NB - probably can remove the query time stuff but need to discuss it ....
;; 		  (let ((start-time (current-milliseconds))
;; 			(max-query  (string->number (or (configf:lookup *configdat* "server" "server-query-threshold")

;; 							"300")))
;; 			(newres     (rmt:open-qry-close-locally cmd run-id params)))
;; 		    (let ((delta (- (current-milliseconds) start-time)))
;; 		      (if (> delta max-query)
;; 			  (begin
;; 			    (debug:print-info 0 *default-log-port* "WARNING: long query times, you may have an overloaded homehost.") ;; Starting server as query time " delta " is over the limit of " max-query)
;; 			    ;; (server:kind-run run-id)))
;; 			    ))
;; 		      ;; return the result!

;; 		      newres)


;; 		    )))
;; 	  (begin
;; 	    ;; (debug:print-error 0 *default-log-port* "Communication failed!")
;; 	    ;; (mutex-unlock! *send-receive-mutex*)
;; 	    ;; (exit)
;; 	    (rmt:open-qry-close-locally cmd run-id params)


;; 	    ))))))

(define (rmt:update-db-stats run-id rawcmd params duration)
  (mutex-lock! *db-stats-mutex*)
  (handle-exceptions
   exn
   (begin
     (debug:print 0 *default-log-port* "WARNING: stats collection failed in update-db-stats")







|











|
>
|





<
>
>
>
>
>
>
>
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
>


|

|
|
>
>
>
|
<
<
<
<
<
<
<
|
<
<
<
<
<
<
<
<
|
>
>
|
<
<
|
|
<
>
<
<
|
<
>
>
|
<
<
|
<
<
<
|
<
<
<
<
<
<
|
>
|
|
<
>
|
<
|
<
>
>
|
<
|
<
<
<
<
<
<
|
<
<
<
<
<
<
|
<
<
<
<
<
<
<
<
<
|
<
|
>
|
|
|
|
|
|
>
|
<
|
|
|
|
<
<
|
>
|
>
>
|
|
|
<
|
|
>
>
|







64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90

91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136







137








138
139
140
141


142
143

144


145

146
147
148


149



150






151
152
153
154

155
156

157

158
159
160

161






162






163









164

165
166
167
168
169
170
171
172
173
174

175
176
177
178


179
180
181
182
183
184
185
186

187
188
189
190
191
192
193
194
195
196
197
198



;; if a server is either running or in the process of starting call client:setup
;; else return #f to let the calling proc know that there is no server available
;;
(define (rmt:get-connection-info run-id)
  (let ((cinfo (remote-conndat *runremote*)))
    (if cinfo
	cinfo
	(if (tasks:server-running-or-starting? (db:delay-if-busy (tasks:open-db)) run-id)
	    (client:setup run-id)
	    #f))))

(define *send-receive-mutex* (make-mutex)) ;; should have separate mutex per run-id

;; RA => e.g. usage (rmt:send-receive 'get-var #f (list varname))
;;
(define (rmt:send-receive cmd rid params #!key (attemptnum 1)) ;; start attemptnum at 1 so the modulo below works as expected

  ;; do all the prep locked under the rmt-mutex
  (mutex-lock! *rmt-mutex*)

  ;; 1. check if server is started IFF cmd is a write OR if we are not on the homehost, store in *runremote*
  ;; 2. check the age of the connections. refresh the connection if it is older than timeout-20 seconds.
  ;; 3. do the query, if on homehost use local access
  ;;

  (let* ((start-time (current-seconds))) ;; snapshot time so all use cases get same value
    (cond
     ;; give up if more than 15 attempts
     ((> attemptnum 15)
      (debug:print 0 *default-log-port* "ERROR: 15 tries to start/connect to server. Giving up.")
      (exit 1))
     ;; ensure we have a record for our connection for given area
     ((not *runremote*)                     
      (set! *runremote* (make-remote))
      (mutex-unlock! *rmt-mutex*)
      (print "case 1")
      (rmt:send-receive cmd rid params attemptnum: attemptnum))
     ;; ensure we have a homehost record
     ((not (pair? (remote-hh-dat *runremote*)))  ;; have a homehost record?
      (thread-sleep! 0.1) ;; since we shouldn't get here, delay a little
      (remote-hh-dat-set! *runremote* (common:get-homehost))
      (mutex-unlock! *rmt-mutex*)
      (print "case 2")
      (rmt:send-receive cmd rid params attemptnum: attemptnum))
     ;; on homehost and this is a read
     ((and (cdr (remote-hh-dat *runremote*))   ;; on homehost
           (member cmd api:read-only-queries)) ;; this is a read
      (mutex-unlock! *rmt-mutex*)
      (print "case 3")
      (rmt:open-qry-close-locally cmd 0 params))
     ;; on homehost and this is a write, we already have a server
     ((and (cdr (remote-hh-dat *runremote*))         ;; on homehost
           (not (member cmd api:read-only-queries))  ;; this is a write
           (remote-server-url *runremote*))          ;; have a server
      (mutex-unlock! *rmt-mutex*)
      (print "case 4")
      (rmt:open-qry-close-locally cmd 0 params))
     ;; no server contact made and this is a write, passively start a server 
     ((and (not (remote-server-url *runremote*))
	   (not (member cmd api:read-only-queries)))
      (print "case 5")
      (let ((serverconn (server:check-if-running *toppath*)))
	(if serverconn
	    (remote-server-url-set! *runremote* serverconn) ;; the string can be consumed by the client setup if needed
	    (if (not (server:start-attempted? *toppath*))
		(server:kind-run *toppath*))))
      (if (cdr (remote-hh-dat *runremote*)) ;; we are on the homehost, just do the call
          (begin
            (mutex-unlock! *rmt-mutex*)
	    (print "case 5.1")
            (rmt:open-qry-close-locally cmd 0 params))







          (begin








            (mutex-unlock! *rmt-mutex*)
	    (print "case 5.2")
	    (tasks:start-and-wait-for-server (tasks:open-db) 0 15)
            (rmt:send-receive cmd rid params attemptnum: attemptnum))))


     ;; if not on homehost ensure we have a connection to a live server
     ;; NOTE: we *have* a homehost record by now

     ((and (not (cdr (remote-hh-dat *runremote*)))        ;; are we on a homehost?


           (not (remote-conndat *runremote*)))            ;; and no connection

      (print "case 6  hh-dat: " (remote-hh-dat *runremote*) " conndat: " (remote-conndat *runremote*))
      (mutex-unlock! *rmt-mutex*)
      (tasks:start-and-wait-for-server (tasks:open-db) 0 15)


      (remote-conndat-set! *runremote* (rmt:get-connection-info 0))



      (rmt:send-receive cmd rid params attemptnum: attemptnum))






     ;; all set up if get this far, dispatch the query
     ((cdr (remote-hh-dat *runremote*)) ;; we are on homehost
      (mutex-unlock! *rmt-mutex*)
      (print "case 7")

      (rmt:open-qry-close-locally cmd (if rid rid 0) params))
     ;; reset the connection if it has been unused too long

     ((and (remote-conndat *runremote*)

	   (let ((expire-time (- start-time (remote-server-timeout *runremote*))))
	     (< (http-transport:server-dat-get-last-access (remote-conndat *runremote*)) expire-time)))
      (print "case 8")

      (remote-conndat-set! *runremote* #f))






     ;; not on homehost, do server query






     (else









      (mutex-unlock! *rmt-mutex*)

      (print "case 9")
      (let* ((conninfo (remote-conndat *runremote*))
	     (dat      (case (remote-transport *runremote*)
			 ((http) (condition-case ;; handling here has caused a lot of problems. However it is needed to deal with attemtped communication to servers that have gone away
                                  (http-transport:client-api-send-receive 0 conninfo cmd params)
                                  ((commfail)(vector #f "communications fail"))
                                  ((exn)(vector #f "other fail" (print-call-chain)))))
			 (else
			  (debug:print 0 *default-log-port* "ERROR: transport " (remote-transport *runremote*) " not supported")
			  (exit))))

	     (success  (if (vector? dat) (vector-ref dat 0) #f))
	     (res      (if (vector? dat) (vector-ref dat 1) #f)))
	(if (vector? conninfo)(http-transport:server-dat-update-last-access conninfo)) ;; refresh access time
        (print "case 9. conninfo=" conninfo " dat=" dat)


	(if success
	    (case (remote-transport *runremote*)
	      ((http) res)
	      (else
	       (debug:print 0 *default-log-port* "ERROR: transport " (remote-transport *runremote*) " is unknown")
	       (exit 1)))
	    (begin
	      (debug:print 0 *default-log-port* "WARNING: communication failed. Trying again, try num: " attemptnum)

	      (remote-conndat-set!    *runremote* #f)
	      (remote-server-url-set! *runremote* #f)
              (print "case 9.1")
	      (tasks:start-and-wait-for-server (tasks:open-db) 0 15)
	      (rmt:send-receive cmd rid params attemptnum: (+ attemptnum 1)))))))))

(define (rmt:update-db-stats run-id rawcmd params duration)
  (mutex-lock! *db-stats-mutex*)
  (handle-exceptions
   exn
   (begin
     (debug:print 0 *default-log-port* "WARNING: stats collection failed in update-db-stats")
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
		(mutex-lock! *db-multi-sync-mutex*)
		(set! *db-last-write* start-time) ;; the oldest "write"
                (mutex-unlock! *db-multi-sync-mutex*)))))
    res))

(define (rmt:send-receive-no-auto-client-setup connection-info cmd run-id params)
  (let* ((run-id   (if run-id run-id 0))
	 ;; (jparams  (db:obj->string params)) ;; (rmt:dat->json-str params))
	 (res  	   (handle-exceptions
		    exn
		    #f
		    (http-transport:client-api-send-receive run-id connection-info cmd params))))
;;		    ((commfail) (vector #f "communications fail")))))
    (if (and res (vector-ref res 0))
	(vector-ref res 1) ;;; YES!! THIS IS CORRECT!! CHANGE IT HERE, THEN CHANGE rmt:send-receive ALSO!!!
	#f)))
;; 	(db:string->obj (vector-ref dat 1))
;; 	(begin
;; 	  (debug:print-error 0 *default-log-port* "rmt:send-receive-no-auto-client-setup failed, attempting to continue. Got " dat)
;; 	  dat))))

;; Wrap json library for strings (why the ports crap in the first place?)
(define (rmt:dat->json-str dat)
  (with-output-to-string 
    (lambda ()
      (json-write dat))))

(define (rmt:json-str->dat json-str)
  (with-input-from-string json-str
    (lambda ()
      (json-read))))

;;======================================================================
;;
;; A C T U A L   A P I   C A L L S  
;;
;;======================================================================








<




<



<
<
<
<

|
|
|
|
|
|
|
|
|
|







278
279
280
281
282
283
284

285
286
287
288

289
290
291




292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
		(mutex-lock! *db-multi-sync-mutex*)
		(set! *db-last-write* start-time) ;; the oldest "write"
                (mutex-unlock! *db-multi-sync-mutex*)))))
    res))

(define (rmt:send-receive-no-auto-client-setup connection-info cmd run-id params)
  (let* ((run-id   (if run-id run-id 0))

	 (res  	   (handle-exceptions
		    exn
		    #f
		    (http-transport:client-api-send-receive run-id connection-info cmd params))))

    (if (and res (vector-ref res 0))
	(vector-ref res 1) ;;; YES!! THIS IS CORRECT!! CHANGE IT HERE, THEN CHANGE rmt:send-receive ALSO!!!
	#f)))





;; ;; Wrap json library for strings (why the ports crap in the first place?)
;; (define (rmt:dat->json-str dat)
;;   (with-output-to-string 
;;     (lambda ()
;;       (json-write dat))))
;; 
;; (define (rmt:json-str->dat json-str)
;;   (with-input-from-string json-str
;;     (lambda ()
;;       (json-read))))

;;======================================================================
;;
;; A C T U A L   A P I   C A L L S  
;;
;;======================================================================

333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
  (rmt:send-receive 'start-server 0 (list run-id)))

;;======================================================================
;;  M I S C
;;======================================================================

(define (rmt:login run-id)
  (rmt:send-receive 'login run-id (list *toppath* megatest-version run-id *my-client-signature*)))

;; This login does no retries under the hood - it acts a bit like a ping.
;; Deprecated for nmsg-transport.
;;
(define (rmt:login-no-auto-client-setup connection-info)
  (case *transport-type* ;; run-id of 0 is just a placeholder
    ((http)(rmt:send-receive-no-auto-client-setup connection-info 'login 0 (list *toppath* megatest-version *my-client-signature*)))







|







318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
  (rmt:send-receive 'start-server 0 (list run-id)))

;;======================================================================
;;  M I S C
;;======================================================================

(define (rmt:login run-id)
  (rmt:send-receive 'login run-id (list *toppath* megatest-version *my-client-signature*)))

;; This login does no retries under the hood - it acts a bit like a ping.
;; Deprecated for nmsg-transport.
;;
(define (rmt:login-no-auto-client-setup connection-info)
  (case *transport-type* ;; run-id of 0 is just a placeholder
    ((http)(rmt:send-receive-no-auto-client-setup connection-info 'login 0 (list *toppath* megatest-version *my-client-signature*)))