Megatest

Check-in [8df45e5eed]
Login
Overview
Comment:Partial migration of server-dat to vector
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | v1.60-no-compile
Files: files | file ages | folders
SHA1: 8df45e5eed7cb7a83b6f35c2864b935d0d4556ba
User & Date: matt on 2014-03-11 12:15:01
Other Links: branch diff | manifest | tags
Context
2014-03-11
22:17
Fixed http-transport start-on-dead issue Closed-Leaf check-in: 21e4cbf850 user: matt tags: v1.60-no-compile
12:15
Partial migration of server-dat to vector check-in: 8df45e5eed user: matt tags: v1.60-no-compile
2014-03-10
16:18
Added better but still too slow startup of server when prev-running server is stuck check-in: acfb29f22e user: mrwellan tags: v1.60
Changes

Modified client.scm from [ef3271835b] to [db9b1cb823].

67
68
69
70
71
72
73

74
75
76
77
78

79
80
81
82
83
84
85

86
87
88
89
90
91
92
93
	    (let* ((iface     (car  host-info))
		   (port      (cadr host-info))
		   (start-res (http-transport:client-connect iface port))
		   ;; (ping-res  (server:ping-server run-id iface port))
		   (ping-res  (rmt:login-no-auto-client-setup start-res run-id)))
	      (if ping-res   ;; sucessful login?
		  (begin

		    (hash-table-set! *runremote* run-id start-res)
		    start-res)  ;; return the server info
		  (if (member remaining-tries '(3 4 6))
		      (begin    ;; login failed
			(debug:print 25 "INFO: client:setup start-res=" start-res ", run-id=" run-id ", server-dat=" host-info)

			(hash-table-delete! *runremote* run-id)
			(open-run-close tasks:server-force-clean-run-record
			 		tasks:open-db
			 		run-id 
			 		(car  host-info)
			 		(cadr host-info)
					" client:setup (host-info=#t)")

			(thread-sleep! 5)
			(client:setup run-id remaining-tries: 10)) ;; (- remaining-tries 1)))
		      (begin
			(debug:print 25 "INFO: client:setup failed to connect, start-res=" start-res ", run-id=" run-id ", host-info=" host-info)
			(thread-sleep! 5)
			(client:setup run-id remaining-tries: (- remaining-tries 1))))))
	    ;; YUK: rename server-dat here
	    (let* ((server-dat (open-run-close tasks:get-server tasks:open-db run-id)))







>


|


>







>
|







67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
	    (let* ((iface     (car  host-info))
		   (port      (cadr host-info))
		   (start-res (http-transport:client-connect iface port))
		   ;; (ping-res  (server:ping-server run-id iface port))
		   (ping-res  (rmt:login-no-auto-client-setup start-res run-id)))
	      (if ping-res   ;; sucessful login?
		  (begin
		    (http-transport:close-connections run-id)
		    (hash-table-set! *runremote* run-id start-res)
		    start-res)  ;; return the server info
		  (if (member remaining-tries '(9 6 4 2))
		      (begin    ;; login failed
			(debug:print 25 "INFO: client:setup start-res=" start-res ", run-id=" run-id ", server-dat=" host-info)
			(http-transport:close-connections run-id)
			(hash-table-delete! *runremote* run-id)
			(open-run-close tasks:server-force-clean-run-record
			 		tasks:open-db
			 		run-id 
			 		(car  host-info)
			 		(cadr host-info)
					" client:setup (host-info=#t)")
			(if (< remaining-tries 8)
			    (thread-sleep! 5))
			(client:setup run-id remaining-tries: 10)) ;; (- remaining-tries 1)))
		      (begin
			(debug:print 25 "INFO: client:setup failed to connect, start-res=" start-res ", run-id=" run-id ", host-info=" host-info)
			(thread-sleep! 5)
			(client:setup run-id remaining-tries: (- remaining-tries 1))))))
	    ;; YUK: rename server-dat here
	    (let* ((server-dat (open-run-close tasks:get-server tasks:open-db run-id)))

Modified http-transport.scm from [cda5622689] to [d2de8d2ff6].

91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
				   (send-response body:    (api:process-request db $) ;; the $ is the request vars proc
						  headers: '((content-type text/plain)))
				   (mutex-lock! *heartbeat-mutex*)
				   (set! *last-db-access* (current-seconds))
				   (mutex-unlock! *heartbeat-mutex*))
				  ;; This is the /ctrl path where data is handed to the server and
				  ;; responses 
				  ((equal? (uri-path (request-uri (current-request)))
					   '(/ "ctrl"))
				   (let* ((packet (db:string->obj dat))
					  (qtype  (cdb:packet-get-qtype packet)))
				     (debug:print-info 12 "server=> received packet=" packet)
				     (if (not (member qtype '(sync ping)))
					 (begin
					   (mutex-lock! *heartbeat-mutex*)
					   (set! *last-db-access* (current-seconds))
					   (mutex-unlock! *heartbeat-mutex*)))
				     ;; (mutex-lock! *db:process-queue-mutex*) ;; trying a mutex
				     ;; (set! res (open-run-close db:process-queue-item open-db packet))
				     (set! res (db:process-queue-item db packet))
				     ;; (mutex-unlock! *db:process-queue-mutex*)
				     (debug:print-info 11 "Return value from db:process-queue-item is " res)
				     (send-response body: (conc "<head>ctrl data</head>\n<body>"
								res
								"</body>")
						    headers: '((content-type text/plain)))))
				  ((equal? (uri-path (request-uri (current-request))) 
					   '(/ ""))
				   (send-response body: (http-transport:main-page)))
				  ((equal? (uri-path (request-uri (current-request))) 
					   '(/ "runs"))
				   (send-response body: (http-transport:main-page)))
				  ((equal? (uri-path (request-uri (current-request))) 







|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|







91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
				   (send-response body:    (api:process-request db $) ;; the $ is the request vars proc
						  headers: '((content-type text/plain)))
				   (mutex-lock! *heartbeat-mutex*)
				   (set! *last-db-access* (current-seconds))
				   (mutex-unlock! *heartbeat-mutex*))
				  ;; This is the /ctrl path where data is handed to the server and
				  ;; responses 
				  ;; ((equal? (uri-path (request-uri (current-request)))
				  ;;          '(/ "ctrl"))
				  ;;  (let* ((packet (db:string->obj dat))
				  ;;         (qtype  (cdb:packet-get-qtype packet)))
				  ;;    (debug:print-info 12 "server=> received packet=" packet)
				  ;;    (if (not (member qtype '(sync ping)))
				  ;;        (begin
				  ;;          (mutex-lock! *heartbeat-mutex*)
				  ;;          (set! *last-db-access* (current-seconds))
				  ;;          (mutex-unlock! *heartbeat-mutex*)))
				  ;;    ;; (mutex-lock! *db:process-queue-mutex*) ;; trying a mutex
				  ;;    ;; (set! res (open-run-close db:process-queue-item open-db packet))
				  ;;    (set! res (db:process-queue-item db packet))
				  ;;    ;; (mutex-unlock! *db:process-queue-mutex*)
				  ;;    (debug:print-info 11 "Return value from db:process-queue-item is " res)
				  ;;    (send-response body: (conc "<head>ctrl data</head>\n<body>"
				  ;;       			res
				  ;;       			"</body>")
				  ;;       	    headers: '((content-type text/plain)))))
				  ((equal? (uri-path (request-uri (current-request))) 
					   '(/ ""))
				   (send-response body: (http-transport:main-page)))
				  ((equal? (uri-path (request-uri (current-request))) 
					   '(/ "runs"))
				   (send-response body: (http-transport:main-page)))
				  ((equal? (uri-path (request-uri (current-request))) 
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
			   (exit 1))))
	 (res        #f))
    (handle-exceptions
     exn
     (if (> numretries 0)
	 (begin
	   (mutex-unlock! *http-mutex*)
	   (thread-sleep! 2)
	   (close-all-connections!)
	   (debug:print 0 "WARNING: Failed to communicate with server, trying again, numretries left: " numretries)
	   (http-transport:client-api-send-receive run-id serverdat cmd params numretries: (- numretries 1)))
	 (begin
	   (mutex-unlock! *http-mutex*)
	   #f))
     (begin







|







230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
			   (exit 1))))
	 (res        #f))
    (handle-exceptions
     exn
     (if (> numretries 0)
	 (begin
	   (mutex-unlock! *http-mutex*)
	   (thread-sleep! 1)
	   (close-all-connections!)
	   (debug:print 0 "WARNING: Failed to communicate with server, trying again, numretries left: " numretries)
	   (http-transport:client-api-send-receive run-id serverdat cmd params numretries: (- numretries 1)))
	 (begin
	   (mutex-unlock! *http-mutex*)
	   #f))
     (begin
271
272
273
274
275
276
277

278




















279
280
281
282
283
284
285
286

287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
	 (thread-start! th1)
	 (thread-start! th2)
	 (thread-join! th1)
	 (thread-terminate! th2)
	 (debug:print-info 11 "got res=" res)
	 res)))))


;;




















;; connect
;;
(define (http-transport:client-connect iface port)
  (let* ((api-url     (conc "http://" iface ":" port "/api"))
	 (uri-dat     (make-request method: 'POST uri: (uri-reference (conc "http://" iface ":" port "/ctrl"))))
	 ;; (uri-dat     (make-request method: 'GET uri: (uri-reference (conc "http://" iface ":" port "/ctrl"))))
	 (uri-api-dat (make-request method: 'POST uri: api-url)) ;; (uri-reference (conc "http://" iface ":" port "/api"))))
	 ;; (uri-api-dat (make-request method: 'GET uri: (uri-reference (conc "http://" iface ":" port "/api"))))

	 (server-dat  (list iface port uri-dat uri-api-dat api-url)))
;;	 (login-res   (server:ping-server run-id server-dat))) ;; login-no-auto-client-setup server-dat run-id)))
    server-dat))
;;     (if (and (list? login-res)
;; 	     (car login-res))
;; 	(begin
;; 	  (hash-table-set! *runremote* run-id server-dat)
;; 	  (debug:print-info 2 "Logged in and connected to " iface ":" port)
;; 	  (hash-table-set! *runremote* run-id server-dat)
;; 	  server-dat)
;; 	(begin
;; 	  (debug:print-info 0 "ERROR: Failed to login or connect to " iface ":" port)
;; 	  #f))))

;; run http-transport:keep-running in a parallel thread to monitor that the db is being 
;; used and to shutdown after sometime if it is not.
;;
(define (http-transport:keep-running server-id)
  ;; if none running or if > 20 seconds since 
  ;; server last used then start shutdown







>

>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>



|
|
<
|
|
>
|
<

<
<
<
<
<
<
<
<
<
<







271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304

305
306
307
308

309










310
311
312
313
314
315
316
	 (thread-start! th1)
	 (thread-start! th2)
	 (thread-join! th1)
	 (thread-terminate! th2)
	 (debug:print-info 11 "got res=" res)
	 res)))))

;; careful closing of connections stored in *runremote*
;;
(define (http-transport:close-connections run-id)
  (let* ((server-dat (hash-table-ref/default *runremote* run-id #f)))
    (if (and (list? server-dat)
	     (>= (length server-dat) 5))
	(let ((ctrl-dat (list-ref server-dat 2))
	      (api-dat  (list-ref server-dat 3)))
	  (close-connection! ctrl-dat)
	  (close-connection! api-dat)
	  #t)
	#f)))


(define (make-http-transport:server-dat)(make-vector 5))
(define (http-transport:server-dat-get-iface         vec)    (vector-ref  vec 0))
(define (http-transport:server-dat-get-port          vec)    (vector-ref  vec 1))
(define (http-transport:server-dat-get-api-uri       vec)    (vector-ref  vec 2))
(define (http-transport:server-dat-get-api-url       vec)    (vector-ref  vec 3))
(define (http-transport:server-dat-get-api-request   vec)    (vector-ref  vec 4))

;;
;; connect
;;
(define (http-transport:client-connect iface port)
  (let* ((api-url      (conc "http://" iface ":" port "/api"))
	 (api-uri      (uri-reference (conc "http://" iface ":" port "/api")))

	 (uri-api-dat  (make-request method: 'POST uri: api-uri))
	 ;; (uri-ctrl-dat (make-request method: 'POST uri: (uri-reference (conc "http://" iface ":" port "/ctrl"))))
	 (uri-ctrl-dat #f) ;; not used anymore
	 (server-dat   (vector iface port uri-ctrl-dat uri-api-dat api-url)))

    server-dat))











;; run http-transport:keep-running in a parallel thread to monitor that the db is being 
;; used and to shutdown after sometime if it is not.
;;
(define (http-transport:keep-running server-id)
  ;; if none running or if > 20 seconds since 
  ;; server last used then start shutdown

Modified tests/fullrun/config/mt_include_1.config from [e1668a96e9] to [798352a4c6].

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
[setup]
# exectutable /path/to/megatest
max_concurrent_jobs 500

linktree #{getenv MT_RUN_AREA_HOME}/tmp/mt_links

[jobtools]
useshell yes
# ## launcher launches jobs, the job is managed on the target host
## by megatest, comment out launcher to run local
# workhosts localhost hermes
# launcher exec nbfake
# launcher nbfake
launcher loadrunner
# launcher echo
# launcher nbfind
# launcher nodanggood
# launcher nbload

## use "xterm -e csi -- " as a launcher to examine the launch environment.
## exit with (exit)
## get a shell with (system "bash")
# launcher xterm -e csi --


|









|
|









1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
[setup]
# exectutable /path/to/megatest
max_concurrent_jobs 20

linktree #{getenv MT_RUN_AREA_HOME}/tmp/mt_links

[jobtools]
useshell yes
# ## launcher launches jobs, the job is managed on the target host
## by megatest, comment out launcher to run local
# workhosts localhost hermes
# launcher exec nbfake
launcher nbfake
# launcher loadrunner
# launcher echo
# launcher nbfind
# launcher nodanggood
# launcher nbload

## use "xterm -e csi -- " as a launcher to examine the launch environment.
## exit with (exit)
## get a shell with (system "bash")
# launcher xterm -e csi --

Modified tests/fullrun/megatest.config from [68ba577ce2] to [385c046203].

114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
# If the server can't be started on this port it will try the next port until
# it succeeds
port 8080

# This server will keep running this number of hours after last access. 
# Three minutes is 0.05 hours
# timeout 0.025
timeout 0.25

## disks are:
## name host:/path/to/area
## -or-
## name /path/to/area
[disks]
disk0 /foobarbazz







|







114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
# If the server can't be started on this port it will try the next port until
# it succeeds
port 8080

# This server will keep running this number of hours after last access. 
# Three minutes is 0.05 hours
# timeout 0.025
timeout 0.025

## disks are:
## name host:/path/to/area
## -or-
## name /path/to/area
[disks]
disk0 /foobarbazz