Megatest

Check-in [ce0b324459]
Login
Overview
Comment:Basic server code added back
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | v1.62-no-rpc
Files: files | file ages | folders
SHA1: ce0b324459b408385f3773b30f45371b1a2d1587
User & Date: matt on 2016-11-30 22:52:22
Other Links: branch diff | manifest | tags
Context
2016-11-30
23:10
Basic server code, removed some junk and corrected couple typos check-in: bb626804c7 user: matt tags: v1.62-no-rpc
22:52
Basic server code added back check-in: ce0b324459 user: matt tags: v1.62-no-rpc
2016-11-28
22:06
Simplified and streamlined the rollup code check-in: 47f0625f23 user: matt tags: v1.62-no-rpc
Changes

Modified Makefile from [83b5fe2a28] to [1b85fc3382].

70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
tests.o db.o launch.o runs.o dashboard-tests.o dashboard-guimonitor.o dashboard-main.o monitor.o dashboard.o  \
archive.o megatest.o : db_records.scm
tests.o runs.o dashboard.o dashboard-tests.o dashboard-main.o  : run_records.scm
db.o ezsteps.o keys.o launch.o megatest.o monitor.o runs-for-ref.o runs.o tests.o : key_records.scm
tests.o tasks.o dashboard-tasks.o : task_records.scm
runs.o : test_records.scm
megatest.o : megatest-fossil-hash.scm
client.scm common.scm configf.scm dashboard-guimonitor.scm dashboard-tests.scm dashboard.scm db.scm dcommon.scm ezsteps.scm fs-transport.scm http-transport.scm index-tree.scm items.scm keys.scm launch.scm megatest.scm monitor.scm mt.scm newdashboard.scm runconfig.scm runs.scm server.scm tdb.scm tests.scm tree.scm : common_records.scm rpc-transport.scm
common_records.scm : altdb.scm
vg.o dashboard.o : vg_records.scm
dcommon.o : run_records.scm
# Temporary while transitioning to new routine
# runs.o : run-tests-queue-classic.scm  run-tests-queue-new.scm

megatest-fossil-hash.scm : $(SRCFILES) megatest.scm *_records.scm







|







70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
tests.o db.o launch.o runs.o dashboard-tests.o dashboard-guimonitor.o dashboard-main.o monitor.o dashboard.o  \
archive.o megatest.o : db_records.scm
tests.o runs.o dashboard.o dashboard-tests.o dashboard-main.o  : run_records.scm
db.o ezsteps.o keys.o launch.o megatest.o monitor.o runs-for-ref.o runs.o tests.o : key_records.scm
tests.o tasks.o dashboard-tasks.o : task_records.scm
runs.o : test_records.scm
megatest.o : megatest-fossil-hash.scm
rmt.scm client.scm common.scm configf.scm dashboard-guimonitor.scm dashboard-tests.scm dashboard.scm db.scm dcommon.scm ezsteps.scm fs-transport.scm http-transport.scm index-tree.scm items.scm keys.scm launch.scm megatest.scm monitor.scm mt.scm newdashboard.scm runconfig.scm runs.scm server.scm tdb.scm tests.scm tree.scm : common_records.scm rpc-transport.scm
common_records.scm : altdb.scm
vg.o dashboard.o : vg_records.scm
dcommon.o : run_records.scm
# Temporary while transitioning to new routine
# runs.o : run-tests-queue-classic.scm  run-tests-queue-new.scm

megatest-fossil-hash.scm : $(SRCFILES) megatest.scm *_records.scm

Modified common.scm from [e7c38eab03] to [544bde0493].

114
115
116
117
118
119
120



121
122
123
124
125
126
127
(define *server-run*        #t)
(define *run-id*            #f)
(define *server-kind-run*   (make-hash-table))
(define *home-host*         #f)
(define *total-non-write-delay* 0)
(define *heartbeat-mutex*   (make-mutex))




;; RPC transport
(define *rpc:listener*      #f)

;; KEY info
(define *target*            (make-hash-table)) ;; cache the target here; target is keyval1/keyval2/.../keyvalN
(define *keys*              (make-hash-table)) ;; cache the keys here
(define *keyvals*           (make-hash-table))







>
>
>







114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
(define *server-run*        #t)
(define *run-id*            #f)
(define *server-kind-run*   (make-hash-table))
(define *home-host*         #f)
(define *total-non-write-delay* 0)
(define *heartbeat-mutex*   (make-mutex))

;; client
(define *rmt-mutex*         (make-mutex))     ;; remote access calls mutex 

;; RPC transport
(define *rpc:listener*      #f)

;; KEY info
(define *target*            (make-hash-table)) ;; cache the target here; target is keyval1/keyval2/.../keyvalN
(define *keys*              (make-hash-table)) ;; cache the keys here
(define *keyvals*           (make-hash-table))

Modified common_records.scm from [9b8dfbfc6d] to [0e6990e6a2].

41
42
43
44
45
46
47









48
49
50
51
52
53
54
     (print-call-chain (current-error-port))
     (with-output-to-port (current-error-port)
       (lambda ()
	 (print ((condition-property-accessor 'exn 'message) exn))
	 (print "Callback error in " procname)
	 (print "Full condition info:\n" (condition->list exn)))))
   (proc)))










;; this was cached based on results from profiling but it turned out the profiling
;; somehow went wrong - perhaps too many processes writing to it. Leaving the caching
;; in for now but can probably take it out later.
;;
(define (debug:calc-verbosity vstr)
  (or (hash-table-ref/default *verbosity-cache* vstr #f)







>
>
>
>
>
>
>
>
>







41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
     (print-call-chain (current-error-port))
     (with-output-to-port (current-error-port)
       (lambda ()
	 (print ((condition-property-accessor 'exn 'message) exn))
	 (print "Callback error in " procname)
	 (print "Full condition info:\n" (condition->list exn)))))
   (proc)))

;; Need a mutex protected way to get and set values
;; or use (define-simple-syntax ??
;;
(define-inline (with-mutex mtx accessor record . val)
  (mutex-lock! mtx)
  (let ((res (apply accessor record val)))
    (mutex-unlock! mtx)
    res))

;; this was cached based on results from profiling but it turned out the profiling
;; somehow went wrong - perhaps too many processes writing to it. Leaving the caching
;; in for now but can probably take it out later.
;;
(define (debug:calc-verbosity vstr)
  (or (hash-table-ref/default *verbosity-cache* vstr #f)

Modified rmt.scm from [1582aa56da] to [6106225e0a].

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18


19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35







36
37
38
39
40
41
42
43
44
45





46
47
48
49
50
51
52
53
54
55
56
57

58
59
60
61
62
63
64






65





66
67
68
69
70
71
72













73





























74

75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
;;======================================================================
;; Copyright 2006-2013, Matthew Welland.
;; 
;;  This program is made available under the GNU GPL version 2.0 or
;;  greater. See the accompanying file COPYING for details.
;; 
;;  This program is distributed WITHOUT ANY WARRANTY; without even the
;;  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
;;  PURPOSE.
;;======================================================================

(use json format) ;; RADT => purpose of json format??

(declare (unit rmt))
(declare (uses api))
(declare (uses tdb))
(declare (uses http-transport))
;;(declare (uses nmsg-transport))


;;
;; THESE ARE ALL CALLED ON THE CLIENT SIDE!!!
;;

;; ;; For debugging add the following to ~/.megatestrc
;;
;; (require-library trace)
;; (import trace)
;; (trace
;; rmt:send-receive
;; api:execute-requests
;; )

;; generate entries for ~/.megatestrc with the following
;;
;;  grep define ../rmt.scm | grep rmt: |perl -pi -e 's/\(define\s+\((\S+)\W.*$/\1/'|sort -u









;;======================================================================
;;  S U P P O R T   F U N C T I O N S
;;======================================================================

;; if a server is either running or in the process of starting call client:setup
;; else return #f to let the calling proc know that there is no server available
;;
(define (rmt:get-connection-info run-id)
  (let ((cinfo *runremote*)) ;; (hash-table-ref/default *runremote* run-id #f)))





    (if cinfo
	cinfo
	(if (tasks:server-running-or-starting? (db:delay-if-busy (tasks:open-db)) run-id)
	    (client:setup run-id)
	    #f))))

(define *send-receive-mutex* (make-mutex)) ;; should have separate mutex per run-id

;; RA => e.g. usage (rmt:send-receive 'get-var #f (list varname))
;;
(define (rmt:send-receive cmd rid params #!key (attemptnum 1)) ;; start attemptnum at 1 so the modulo below works as expected
  ;; clean out old connections

  ;; (mutex-lock! *db-multi-sync-mutex*)

  ;; 1. check if server is started IFF cmd is a write OR if we are not on the homehost, store in *runremote*
  ;; 2. check the age of the connections. refresh the connection if it is older than timeout-20 seconds.
  ;; 3. do the query, if on homehost use local access
  ;;
  (if (and ;; #f  ;; FORCE NO GO FOR RIGHT NOW






	   (not *runremote*)                         ;; we trust *runremote* to reflect that a server was found previously





	   (not (member cmd api:read-only-queries))) ;; we don't trust so much the list of write queries
      (let ((serverconn (server:check-if-running *toppath*)))
	(if serverconn
	    (set! *runremote* serverconn) ;; the string can be consumed by the client setup if needed
	    (if (not (server:start-attempted? *toppath*))
		(server:kind-run *toppath*)))))
  













  (rmt:open-qry-close-locally cmd (if rid rid 0) params))































;;   (let ((expire-time (- (current-seconds) (server:get-timeout) 10))) ;; don't forget the 10 second margin
;;     (for-each 
;;      (lambda (run-id)
;;        (let ((connection (hash-table-ref/default *runremote* run-id #f)))
;;          (if (and (vector? connection)
;;         	  (< (http-transport:server-dat-get-last-access connection) expire-time))
;;              (begin
;;                (debug:print-info 0 *default-log-port* "Discarding connection to server for run-id " run-id ", too long between accesses")
;;                ;; bb- disabling nanomsg
;;                ;; SHOULD CLOSE THE CONNECTION HERE 
;; 	       ;; (case *transport-type*
;; 	       ;;   ((nmsg)(nn-close (http-transport:server-dat-get-socket 
;; 	       ;;  		   (hash-table-ref *runremote* run-id)))))
;;                (hash-table-delete! *runremote* run-id)))))
;;      (hash-table-keys *runremote*)))
;;   ;; (mutex-unlock! *db-multi-sync-mutex*)
;;   ;; (mutex-lock! *send-receive-mutex*)
;;   (let* ((run-id          (if rid rid 0))
;; 	 (home-host       (common:get-homehost))
;; 	 (connection-info (if (cdr home-host) ;; we are on the home-host
;; 			      #f
;; 			      (rmt:get-connection-info run-id))))
;;     (cond
;;      (home-host        (rmt:open-qry-close-locally cmd run-id params))
;;      (connection-info
;;       ;; the nmsg method does the encoding under the hood (the http method should be changed to do this also)
;;       ;; use the server if have connection info
;;       (let* ((dat     (case *transport-type*
;; 			((http)(condition-case
;; 				(http-transport:client-api-send-receive run-id connection-info cmd params)
;; 				((commfail)(vector #f "communications fail"))
;; 				((exn)(vector #f "other fail"))))
;; 			;; ((nmsg)(condition-case
;; 			;;         (nmsg-transport:client-api-send-receive run-id connection-info cmd params)
;; 			;;         ((timeout)(vector #f "timeout talking to server"))))
;; 			(else  (exit))))
;; 	     (success (if (vector? dat) (vector-ref dat 0) #f))
;; 	     (res     (if (vector? dat) (vector-ref dat 1) #f)))
;; 	(if (vector? connection-info)(http-transport:server-dat-update-last-access connection-info))
;; 	(if success
;; 	    (begin
;; 	      ;; (mutex-unlock! *send-receive-mutex*)
;; 	      (case *transport-type* 
;; 		((http) res) ;; (db:string->obj res))
;; 		;; ((nmsg) res)
;; 		)) ;; (vector-ref res 1)))
;; 	    (begin ;; let ((new-connection-info (client:setup run-id)))
;; 	      (debug:print 0 *default-log-port* "WARNING: Communication failed, trying call to rmt:send-receive again.")
;; 	      ;; (case *transport-type*
;; 	      ;;   ((nmsg)(nn-close (http-transport:server-dat-get-socket connection-info))))
;; 	      (hash-table-delete! *runremote* run-id) ;; don't keep using the same connection
;; 	      ;; NOTE: killing server causes this process to block forever. No idea why. Dec 2. 
;; 	      ;; (if (eq? (modulo attemptnum 5) 0)
;; 	      ;;     (tasks:kill-server-run-id run-id tag: "api-send-receive-failed"))
;; 	      ;; (mutex-unlock! *send-receive-mutex*) ;; close the mutex here to allow other threads access to communications
;; 	      (tasks:start-and-wait-for-server (tasks:open-db) run-id 15)
;; 	      ;; (nmsg-transport:client-api-send-receive run-id connection-info cmd param remtries: (- remtries 1))))))
;; 	      
;; 	      ;; no longer killing the server in http-transport:client-api-send-receive
;; 	      ;; may kill it here but what are the criteria?
;; 	      ;; start with three calls then kill server
;; 	      ;; (if (eq? attemptnum 3)(tasks:kill-server-run-id run-id))
;; 	      ;; (thread-sleep! 2)
;; 	      (rmt:send-receive cmd run-id params attemptnum: (+ attemptnum 1))))))
;;      (else
;;      ;; no connection info? try to start a server, or access locally if no
;;       ;; server and the query is read-only
;;       ;;
;;       ;; Note: The tasks db was checked for a server in starting mode in the rmt:get-connection-info call
;;       ;;
;;       (if (and (< attemptnum 15)
;; 	       (member cmd api:write-queries))
;; 	  (let ((homehost  (common:get-homehost))) ;; faststart (configf:lookup *configdat* "server" "faststart")))
;; 	      (hash-table-delete! *runremote* run-id)
;; 	      ;; (mutex-unlock! *send-receive-mutex*)
;; 	      (if (not (cdr homehost)) ;; we always require a server if not on homehost ;; (and faststart (equal? faststart "no"))
;; 		  (begin
;; 		    (tasks:start-and-wait-for-server (db:delay-if-busy (tasks:open-db)) run-id 10)
;; 		    (thread-sleep! (random 5)) ;; give some time to settle and minimize collison?
;; 		    (rmt:send-receive cmd rid params attemptnum: (+ attemptnum 1)))
;; 		  ;; NB - probably can remove the query time stuff but need to discuss it ....
;; 		  (let ((start-time (current-milliseconds))
;; 			(max-query  (string->number (or (configf:lookup *configdat* "server" "server-query-threshold")
;; 							"300")))
;; 			(newres     (rmt:open-qry-close-locally cmd run-id params)))
;; 		    (let ((delta (- (current-milliseconds) start-time)))
;; 		      (if (> delta max-query)
;; 			  (begin
;; 			    (debug:print-info 0 *default-log-port* "WARNING: long query times, you may have an overloaded homehost.") ;; Starting server as query time " delta " is over the limit of " max-query)
;; 			    ;; (server:kind-run run-id)))
;; 			    ))
;; 		      ;; return the result!
;; 		      newres)
;; 		    )))
;; 	  (begin
;; 	    ;; (debug:print-error 0 *default-log-port* "Communication failed!")
;; 	    ;; (mutex-unlock! *send-receive-mutex*)
;; 	    ;; (exit)
;; 	    (rmt:open-qry-close-locally cmd run-id params)
;; 	    ))))))

(define (rmt:update-db-stats run-id rawcmd params duration)
  (mutex-lock! *db-stats-mutex*)
  (handle-exceptions
   exn
   (begin
     (debug:print 0 *default-log-port* "WARNING: stats collection failed in update-db-stats")











|






>
>




<
<
<
<
<
<
<
<
<




>
>
>
>
>
>
>










>
>
>
>
>











|
>
|





|
>
>
>
>
>
>
|
>
>
>
>
>
|


|

|
|
>
>
>
>
>
>
>
>
>
>
>
>
>
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>

>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|







1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24









25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
;;======================================================================
;; Copyright 2006-2013, Matthew Welland.
;; 
;;  This program is made available under the GNU GPL version 2.0 or
;;  greater. See the accompanying file COPYING for details.
;; 
;;  This program is distributed WITHOUT ANY WARRANTY; without even the
;;  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
;;  PURPOSE.
;;======================================================================

(use format typed-records) ;; RADT => purpose of json format??

(declare (unit rmt))
(declare (uses api))
(declare (uses tdb))
(declare (uses http-transport))
;;(declare (uses nmsg-transport))
(include "common_records.scm")

;;
;; THESE ARE ALL CALLED ON THE CLIENT SIDE!!!
;;










;; generate entries for ~/.megatestrc with the following
;;
;;  grep define ../rmt.scm | grep rmt: |perl -pi -e 's/\(define\s+\((\S+)\W.*$/\1/'|sort -u

(defstruct remote
  (hh-dat            (common:get-homehost)) ;; homehost record ( addr . hhflag )
  (server-url        (if *toppath* (server:check-if-running *toppath*) #f))
  (last-server-check 0)  ;; last time we checked to see if the server was alive
  (conndat           #f)
  (transport         *transport-type*)
  (server-timeout    (or (server:get-timeout) 100))) ;; default to 100 seconds

;;======================================================================
;;  S U P P O R T   F U N C T I O N S
;;======================================================================

;; if a server is either running or in the process of starting call client:setup
;; else return #f to let the calling proc know that there is no server available
;;
(define (rmt:get-connection-info run-id)
  (let ((cinfo *runremote*)) ;; (hash-table-ref/default *runremote* run-id #f)))
;; how about if rrr is a defstruct and we use a wrapper to access it (even better would be a macro)
;; look in common_records for with-mutex
;;
;; (with-mutex *rrr-mutex* rmt:dat-host *runremote*) => returns value
;; (with-mutex *rrr-mutex* rmt:
    (if cinfo
	cinfo
	(if (tasks:server-running-or-starting? (db:delay-if-busy (tasks:open-db)) run-id)
	    (client:setup run-id)
	    #f))))

(define *send-receive-mutex* (make-mutex)) ;; should have separate mutex per run-id

;; RA => e.g. usage (rmt:send-receive 'get-var #f (list varname))
;;
(define (rmt:send-receive cmd rid params #!key (attemptnum 1)) ;; start attemptnum at 1 so the modulo below works as expected

  ;; do all the prep locked under the rmt-mutex
  (mutex-lock! *rmt-mutex*)

  ;; 1. check if server is started IFF cmd is a write OR if we are not on the homehost, store in *runremote*
  ;; 2. check the age of the connections. refresh the connection if it is older than timeout-20 seconds.
  ;; 3. do the query, if on homehost use local access
  ;;
  (let* ((start-time (current-time))) ;; snapshot time so all use cases get same value
    (cond
     ;; give up if more than 15 attempts
     ((> attemptnum 15)
      (debug:print 0 *default-log-port* "ERROR: 15 tries to start/connect to server. Giving up.")
      (exit 1))
     ;; ensure we have a record for our connection for given area
     ((not *runremote*)                     
      (set! *runremote* (make-remote))
      (mutex-unlock! *rmt-mutex*)
      (rmt:send-receive cmd rid params attemptnum: attemptnum))
     ;; no server contact made and this is a write, try starting a server 
     ((and (not (remote-server-url *runremote*))
	   (not (member cmd api:read-only-queries)))
      (let ((serverconn (server:check-if-running *toppath*)))
	(if serverconn
	    (remote-server-url-set! *runremote* serverconn) ;; the string can be consumed by the client setup if needed
	    (if (not (server:start-attempted? *toppath*))
		(server:kind-run *toppath*))))
      (mutex-unlock! *rmt-mutex*)
      (rmt:send-receive cmd rid params attemptnum: attemptnum))
     ;; if not on homehost ensure we have a connection to a live server
     ((or (not (pair? (remote-hh-dat *runremote*)))  ;; have a homehost record?
	  (not (cdr (remote-hh-dat *runremote*)))    ;; have record, are we on a homehost?
	  (not (remote-conndat *runremote*)))         ;; do we not have a connection?
      (remote-hh-dat-set!  *runremote* (common:get-homehost))
      (remote-conndat-set! *runremote* (rmt:get-connection-info 0))
      (mutex-unlock! *rmt-mutex*)
      (server:kind-run *toppath*) ;; we need a sever
      (rmt:send-receive cmd rid params attemptnum: attemptnum))
     ;; all set up if get this far, dispatch the query
     ((cdr (remote-hh-dat *runremote*)) ;; we are on homehost
      (mutex-unlock! *rmt-mutex*)
      (rmt:open-qry-close-locally cmd (if rid rid 0) params)      )
     ;; reset the connection if it has been unused too long
     ;(> (- start-time (remote-last-server-check *runremote*))
     ;(remote-server-timeout *runremote*)))   ;; we have timed out for this connection
     ;; not on homehost, do server query
     (else
      (mutex-unlock! *rmt-mutex*)
      (let* ((dat      (case (remote-transport *runremote*)
			 ((http)(condition-case
				 (http-transport:client-api-send-receive run-id connection-info cmd params)
				 ((commfail)(vector #f "communications fail"))
				 ((exn)(vector #f "other fail"))))
			 (else
			  (debug:print 0 *default-log-port* "ERROR: transport " (remote-transport *runremote*) " not supported")
			  (exit))))
	     (success  (if (vector? dat) (vector-ref dat 0) #f))
	     (res      (if (vector? dat) (vector-ref dat 1) #f)))
	(if (vector? connection-info)(http-transport:server-dat-update-last-access connection-info)) ;; refresh access time
	(if (and success res)
	    (case (remote-transport *runremote*)
	      ((http) res)
	      (else
	       (debug:print 0 *default-log-port* "ERROR: transport " (remote-transport *runremote*) " is unknown")
	       (exit 1)))
	    (begin
	      (debug:print 0 *default-log-port* "WARNING: communication failed. Trying again, try num: " attemptnum)
	      (remote-conndat-set! *runremote* #f)
	      (server-url-set!     *runremote* #f)
	      (tasks:start-and-wait-for-server (tasks:open-db) 0 15)
	      (rmt:send-receive cmd rid params attemptnum: (+ attemptnum 1)))))))))

(define (junk-delete-me)
  (let ((expire-time (- (current-seconds) (server:get-timeout) 10))) ;; don't forget the 10 second margin
    (for-each 
      (lambda (run-id)
        (let ((connection (hash-table-ref/default *runremote* run-id #f)))
          (if (and (vector? connection)
		   (< (http-transport:server-dat-get-last-access connection) expire-time))
              (begin
                (debug:print-info 0 *default-log-port* "Discarding connection to server for run-id " run-id ", too long between accesses")
                ;; bb- disabling nanomsg
                ;; SHOULD CLOSE THE CONNECTION HERE 
		;; (case *transport-type*
		;;   ((nmsg)(nn-close (http-transport:server-dat-get-socket 
		;;  		   (hash-table-ref *runremote* run-id)))))
                (hash-table-delete! *runremote* run-id)))))
      (hash-table-keys *runremote*)))
  ;; (mutex-unlock! *db-multi-sync-mutex*)
  ;; (mutex-lock! *send-receive-mutex*)
  (let* ((run-id          (if rid rid 0))
 	 (home-host       (common:get-homehost))
 	 (connection-info (if (cdr home-host) ;; we are on the home-host
 			      #f
 			      (rmt:get-connection-info run-id))))
    (cond
     (home-host        (rmt:open-qry-close-locally cmd run-id params))
     (connection-info
      ;; the nmsg method does the encoding under the hood (the http method should be changed to do this also)
      ;; use the server if have connection info
      (let* ((dat     (case *transport-type*
 			((http)(condition-case
 				(http-transport:client-api-send-receive run-id connection-info cmd params)
 				((commfail)(vector #f "communications fail"))
 				((exn)(vector #f "other fail"))))
 			;; ((nmsg)(condition-case
 			;;         (nmsg-transport:client-api-send-receive run-id connection-info cmd params)
 			;;         ((timeout)(vector #f "timeout talking to server"))))
 			(else  (exit))))
 	     (success (if (vector? dat) (vector-ref dat 0) #f))
 	     (res     (if (vector? dat) (vector-ref dat 1) #f)))
 	(if (vector? connection-info)(http-transport:server-dat-update-last-access connection-info))
 	(if success
 	    (begin
 	      ;; (mutex-unlock! *send-receive-mutex*)
 	      (case *transport-type* 
 		((http) res) ;; (db:string->obj res))
 		;; ((nmsg) res)
 		)) ;; (vector-ref res 1)))
 	    (begin ;; let ((new-connection-info (client:setup run-id)))
 	      (debug:print 0 *default-log-port* "WARNING: Communication failed, trying call to rmt:send-receive again.")
 	      ;; (case *transport-type*
 	      ;;   ((nmsg)(nn-close (http-transport:server-dat-get-socket connection-info))))
 	      (hash-table-delete! *runremote* run-id) ;; don't keep using the same connection
 	      ;; NOTE: killing server causes this process to block forever. No idea why. Dec 2. 
 	      ;; (if (eq? (modulo attemptnum 5) 0)
 	      ;;     (tasks:kill-server-run-id run-id tag: "api-send-receive-failed"))
 	      ;; (mutex-unlock! *send-receive-mutex*) ;; close the mutex here to allow other threads access to communications
 	      (tasks:start-and-wait-for-server (tasks:open-db) run-id 15)
 	      ;; (nmsg-transport:client-api-send-receive run-id connection-info cmd param remtries: (- remtries 1))))))
 	      
 	      ;; no longer killing the server in http-transport:client-api-send-receive
 	      ;; may kill it here but what are the criteria?
 	      ;; start with three calls then kill server
 	      ;; (if (eq? attemptnum 3)(tasks:kill-server-run-id run-id))
 	      ;; (thread-sleep! 2)
 	      (rmt:send-receive cmd run-id params attemptnum: (+ attemptnum 1))))))
     (else
      ;; no connection info? try to start a server, or access locally if no
      ;; server and the query is read-only
      ;;
      ;; Note: The tasks db was checked for a server in starting mode in the rmt:get-connection-info call
      ;;
      (if (and (< attemptnum 15)
 	       (member cmd api:write-queries))
 	  (let ((homehost  (common:get-homehost))) ;; faststart (configf:lookup *configdat* "server" "faststart")))
	    (hash-table-delete! *runremote* run-id)
	    ;; (mutex-unlock! *send-receive-mutex*)
	    (if (not (cdr homehost)) ;; we always require a server if not on homehost ;; (and faststart (equal? faststart "no"))
		(begin
		  (tasks:start-and-wait-for-server (db:delay-if-busy (tasks:open-db)) run-id 10)
		  (thread-sleep! (random 5)) ;; give some time to settle and minimize collison?
		  (rmt:send-receive cmd rid params attemptnum: (+ attemptnum 1)))
		;; NB - probably can remove the query time stuff but need to discuss it ....
		(let ((start-time (current-milliseconds))
		      (max-query  (string->number (or (configf:lookup *configdat* "server" "server-query-threshold")
						      "300")))
		      (newres     (rmt:open-qry-close-locally cmd run-id params)))
		  (let ((delta (- (current-milliseconds) start-time)))
		    (if (> delta max-query)
			(begin
			  (debug:print-info 0 *default-log-port* "WARNING: long query times, you may have an overloaded homehost.") ;; Starting server as query time " delta " is over the limit of " max-query)
			  ;; (server:kind-run run-id)))
			  ))
		    ;; return the result!
		    newres)
		  )))
 	  (begin
 	    ;; (debug:print-error 0 *default-log-port* "Communication failed!")
 	    ;; (mutex-unlock! *send-receive-mutex*)
 	    ;; (exit)
 	    (rmt:open-qry-close-locally cmd run-id params)
 	    ))))))

(define (rmt:update-db-stats run-id rawcmd params duration)
  (mutex-lock! *db-stats-mutex*)
  (handle-exceptions
   exn
   (begin
     (debug:print 0 *default-log-port* "WARNING: stats collection failed in update-db-stats")
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
	(vector-ref res 1) ;;; YES!! THIS IS CORRECT!! CHANGE IT HERE, THEN CHANGE rmt:send-receive ALSO!!!
	#f)))
;; 	(db:string->obj (vector-ref dat 1))
;; 	(begin
;; 	  (debug:print-error 0 *default-log-port* "rmt:send-receive-no-auto-client-setup failed, attempting to continue. Got " dat)
;; 	  dat))))

;; Wrap json library for strings (why the ports crap in the first place?)
(define (rmt:dat->json-str dat)
  (with-output-to-string 
    (lambda ()
      (json-write dat))))

(define (rmt:json-str->dat json-str)
  (with-input-from-string json-str
    (lambda ()
      (json-read))))

;;======================================================================
;;
;; A C T U A L   A P I   C A L L S  
;;
;;======================================================================








|
|
|
|
|
|
|
|
|
|







335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
	(vector-ref res 1) ;;; YES!! THIS IS CORRECT!! CHANGE IT HERE, THEN CHANGE rmt:send-receive ALSO!!!
	#f)))
;; 	(db:string->obj (vector-ref dat 1))
;; 	(begin
;; 	  (debug:print-error 0 *default-log-port* "rmt:send-receive-no-auto-client-setup failed, attempting to continue. Got " dat)
;; 	  dat))))

;; ;; Wrap json library for strings (why the ports crap in the first place?)
;; (define (rmt:dat->json-str dat)
;;   (with-output-to-string 
;;     (lambda ()
;;       (json-write dat))))
;; 
;; (define (rmt:json-str->dat json-str)
;;   (with-input-from-string json-str
;;     (lambda ()
;;       (json-read))))

;;======================================================================
;;
;; A C T U A L   A P I   C A L L S  
;;
;;======================================================================

Modified server.scm from [15a5983a03] to [0d4a46d4c7].

164
165
166
167
168
169
170



171
172
173
174
175
176
177



178
179
180
181
182
183
184
185
186
187
188
189
190
191
;;    (if (eq? run-id 0)
;;        (server:run run-id)
;;        (rmt:start-server run-id)))
(define server:try-running server:run) ;; there is no more per-run servers ;; REMOVE ME. BUG.

(define (server:start-attempted? areapath)
  (let ((flagfile (conc areapath "/.starting-server")))



    (and (file-exists? flagfile)
	 (< (- (current-seconds)
	       (file-modification-time flagfile))
	    15)))) ;; exists and less than 15 seconds old
    
(define (server:read-dotserver areapath)
  (let ((dotfile (conc areapath "/.server")))



    (if (and (file-exists? dotfile)
	     (file-read-access? dotfile))
	(with-input-from-file
	    dotfile
	  (lambda ()
	    (read-line)))
	#f)))

;; write a .server file in *toppath* with hostport
;; return #t on success, #f otherwise
;;
(define (server:write-dotserver areapath hostport)
  (let ((lock-file   (conc areapath "/.server.lock"))
	(server-file (conc areapath "/.server")))







>
>
>
|
|
|
|



>
>
>
|
|
|
|
|
|
|







164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
;;    (if (eq? run-id 0)
;;        (server:run run-id)
;;        (rmt:start-server run-id)))
(define server:try-running server:run) ;; there is no more per-run servers ;; REMOVE ME. BUG.

(define (server:start-attempted? areapath)
  (let ((flagfile (conc areapath "/.starting-server")))
    (handle-exceptions
     exn
     #f  ;; if things go wrong pretend we can't see the file
     (and (file-exists? flagfile)
	  (< (- (current-seconds)
		(file-modification-time flagfile))
	     15))))) ;; exists and less than 15 seconds old
    
(define (server:read-dotserver areapath)
  (let ((dotfile (conc areapath "/.server")))
    (handle-exceptions
     exn
     #f  ;; if things go wrong pretend we can't see the file
     (if (and (file-exists? dotfile)
	      (file-read-access? dotfile))
	 (with-input-from-file
	     dotfile
	   (lambda ()
	     (read-line)))
	 #f))))

;; write a .server file in *toppath* with hostport
;; return #t on success, #f otherwise
;;
(define (server:write-dotserver areapath hostport)
  (let ((lock-file   (conc areapath "/.server.lock"))
	(server-file (conc areapath "/.server")))