Megatest

Check-in [2b3405f60c]
Login
Overview
Comment:Partially completed rework of server/client logic
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | re-re-factor-server
Files: files | file ages | folders
SHA1: 2b3405f60c1493e0f0bd6d3dac74d596db0af022
User & Date: matt on 2014-02-17 18:26:08
Other Links: branch diff | manifest | tags
Context
2014-02-17
19:36
Trimmed out some junk code and fixed some logic in the server start up sequencing check-in: 4a2103f62b user: matt tags: re-re-factor-server
18:26
Partially completed rework of server/client logic check-in: 2b3405f60c user: matt tags: re-re-factor-server
2014-02-16
23:42
Partial fix for run-id of zero server refusing to start when other servers are in the available state check-in: 452be75fb9 user: matt tags: re-re-factor-server
Changes

Modified client.scm from [20000f46f9] to [065a0a550e].

52
53
54
55
56
57
58




59
60

61
62
63
64
65






66


67
68

69


70

71
72






73

74






75

76
77
78
79
80
81
82
83
84
;;      *transport-type* and *runremote* from the monitor.db
;;
;; client:setup
;;
;; lookup_server, need to remove *runremote* stuff
;;
(define (client:setup run-id #!key (remaining-tries 3))




  (let ((hostinfo (and run-id (hash-table-ref/default *runremote* run-id #f))))
    (if hostinfo

	hostinfo ;; have hostinfo - just return it
	(let* ((hostinfo  (open-run-close tasks:get-server tasks:open-db run-id)))
	  (if (not hostinfo)
	      (if (> remaining-tries 0)
		  (begin






		    (server:ensure-running run-id)


		    (client:setup run-id remaining-tries: (- remaining-tries 1)))
		  (begin

		    (debug:print 0 "ERROR: Expected to be able to connect to a server by now. No server available for run-id = " run-id)


		    (exit 1)))

	      (begin
		(hash-table-set! *runremote* run-id hostinfo)






		(debug:print-info 11 "CLIENT SETUP, hostinfo=" hostinfo)

		(client:start run-id hostinfo)))))))








(define (client:start run-id server-info)
  ;; this saves the server-info in the *runremote* hash and returns it
  (http-transport:client-connect run-id 
				 (tasks:hostinfo-get-interface server-info)
				 (tasks:hostinfo-get-port server-info)))

;; client:signal-handler
(define (client:signal-handler signum)
  (handle-exceptions







>
>
>
>
|
|
>
|
<
|
|

>
>
>
>
>
>
|
>
>
|
<
>
|
>
>
|
>
|
|
>
>
>
>
>
>
|
>
|
>
>
>
>
>
>

>

<







52
53
54
55
56
57
58
59
60
61
62
63
64
65
66

67
68
69
70
71
72
73
74
75
76
77
78
79

80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105

106
107
108
109
110
111
112
;;      *transport-type* and *runremote* from the monitor.db
;;
;; client:setup
;;
;; lookup_server, need to remove *runremote* stuff
;;
(define (client:setup run-id #!key (remaining-tries 3))
  (if (<= remaining-tries 0)
      (begin
	(debug:print 0 "ERROR: failed to start or connect to server for run-id " run-id)
	(exit 1))
      (let ((server-dat (and run-id (hash-table-ref/default *runremote* run-id #f))))
	(if server-dat
	    (let ((start-res (http-transport:client-connect run-id
							    (tasks:hostinfo-get-interface server-dat)

							    (tasks:hostinfo-get-port      server-dat))))
	      (if start-res ;; sucessful login?
		  (begin
		    (hash-table-set! *runremote* run-id server-dat)
		    server-dat)
		  (begin    ;; login failed
		    (hash-table-delete! *runremote* run-id)
		    (open-run-close tasks:server-force-clean-run-record
				    tasks:open-db
				    run-id 
				    (tasks:hostinfo-get-interface server-dat)
				    (tasks:hostinfo-get-port      server-dat))
		    (client:setup run-id remaining-tries: (- remaining-tries 1)))))

	    (let* ((server-dat (open-run-close tasks:get-server tasks:open-db run-id)))
	      (if server-dat
		  (let ((start-res (http-transport:client-connect run-id
								  (tasks:hostinfo-get-interface server-dat)
								  (tasks:hostinfo-get-port      server-dat))))
		    (if start-res
			(begin
			  (hash-table-set! *runremote* run-id server-dat)
			  server-dat)
			(begin    ;; login failed
			  (hash-table-delete! *runremote* run-id)
			  (open-run-close tasks:server-force-clean-run-record
					  tasks:open-db
					  run-id 
					  (tasks:hostinfo-get-interface server-dat)
					  (tasks:hostinfo-get-port      server-dat))
			  (server:try-running run-id)
			  (thread-sleep! 3) ;; give server a little time to start up
			  (client:setup run-id remaining-tries: (- remaining-tries 1)))))
		  (begin    ;; no server registered
		    (server:try-running run-id)
		    (thread-sleep! 3) ;; give server a little time to start up
		    (client:setup run-id remaining-tries: (- remaining-tries 1)))))))))

;; keep this as a function to ease future 
(define (client:start run-id server-info)

  (http-transport:client-connect run-id 
				 (tasks:hostinfo-get-interface server-info)
				 (tasks:hostinfo-get-port server-info)))

;; client:signal-handler
(define (client:signal-handler signum)
  (handle-exceptions

Modified docs/manual/Makefile from [c3b17d73da] to [038153bc89].


1
2
3
4
5
6
7
8


9
10
11


megatest_manual.html : megatest_manual.txt getting_started.txt writing_tests.txt reference.txt ../plan.txt
	asciidoc megatest_manual.txt
	dos2unix megatest_manual.html

server.pdf : server.dot
	dot -Tpdf server.dot > server.pdf
	



clean:
	rm -f megatest_manual.html
>








>
>



1
2
3
4
5
6
7
8
9
10
11
12
13
14
all : server.pdf megatest_manual.html client.pdf

megatest_manual.html : megatest_manual.txt getting_started.txt writing_tests.txt reference.txt ../plan.txt
	asciidoc megatest_manual.txt
	dos2unix megatest_manual.html

server.pdf : server.dot
	dot -Tpdf server.dot > server.pdf
	
client.pdf : client.dot
	dot -Tpdf client.dot > client.pdf

clean:
	rm -f megatest_manual.html

Added docs/manual/client.dot version [23d472e170].







































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
digraph G {

    // put client after server so server_start node is visible
    //
    subgraph cluster_2 {
        node [style=filled,shape=box];
	
	"client:setup start"     -> runremote_lookup_server;
	runremote_lookup_server  -> login_attempt [label="have server"];
	runremote_lookup_server  -> monitordb_lookup_server [label="no server"];

	monitordb_lookup_server  -> login_attempt [label="have server"];
	monitordb_lookup_server  -> server_start_remote [label="no server"];

	server_start_remote      -> delay_2_sec;
	delay_2_sec              -> runremote_lookup_server;

	login_attempt            -> "rmt:send-receive_start" [label="login sucessful"];
	"rmt:send-receive_start" -> "rmt:send-receive_start";

	"rmt:send-receive_start" -> runremote_lookup_server [label=exception];
	login_attempt            -> clear_runremote [label="login failed"];

	"remove_running > 5s"    -> runremote_lookup_server;

	subgraph cluster_3 {
		node [style=filled];
		clear_runremote          -> "remove_running > 5s";
	}

        label = "client:setup";
        color=green;
    }

}

Modified docs/manual/server.dot from [2cf0263449] to [5b6f6b599f].

1
2
3
4
5
6
7
8


9
10
11
12
13
14
15


16
17
18
19
20
21
22
23

24

25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
digraph G {

    // put client after server so server_start node is visible
    //
    subgraph cluster_0 {
        node [style=filled];
	
	start_client -> lookup_server;


	lookup_server -> connect [label=found];

	lookup_server -> "server_available?";
	"server_available?" -> delay [label=yes];
	"server_available?" -> client_start_server [label=no];

	client_start_server -> delay;



	connect -> login;
	login -> read_write [label=success];
	login -> "server_dead?" [label=fail];

	read_write -> timeout -> "server_dead?";
	read_write -> wrong_server -> delay;
	// read_write -> read_write;

	

	"server_dead?" -> remove_record [label="yes (too many tries)"];
	remove_record -> lookup_server;
	"server_dead?" -> delay [label=no];

	delay -> lookup_server;	

        label = "client";
        color=green;
    }

    subgraph cluster_1 {
        node [style=filled];
	
	start_server -> "server_running?";
	"server_running?" -> set_available [label="no"];
	"server_running?" -> delay_2s [label="yes"];
	delay_2s -> "still_running?";
	"still_running?" -> ping_server [label=yes];
	"still_running?" -> set_available [label=no];
	ping_server -> exit [label=alive];
	ping_server -> remove_server_record [label=dead];
	remove_server_record -> set_available;
	set_available -> avail_delay [label="delay 3s"];
	avail_delay -> "first_in_queue?";

	"first_in_queue?" -> set_running [label=yes];
	set_running -> get_next_port -> handle_requests;
	"first_in_queue?" -> "dead_entry_in_queue?" [label=no];
	"dead_entry_in_queue?" -> "server_running?" [label=no];
	"dead_entry_in_queue?" -> "remove_dead_entries" [label=yes];
	remove_dead_entries -> "server_running?";

	handle_requests -> start_shutdown [label="no traffic"];
	handle_requests -> shutdown_request;
	start_shutdown -> shutdown_delay;
	shutdown_request -> shutdown_delay;
	shutdown_delay -> exit;
	
        label = "server";
        color=brown;
    }

    client_start_server -> start_server;
    handle_requests -> read_write;
    read_write -> handle_requests;
}


<
<
|
|
|
<
>
>
|

|
|
<

<
>
>

<
|
|

|
<
|
>
|
>
|
<
|
|
|
|
<
<
|
<
<
<
<
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|

|



|
|
|

1
2


3
4
5

6
7
8
9
10
11

12

13
14
15

16
17
18
19

20
21
22
23
24

25
26
27
28


29




30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
digraph G {



    subgraph cluster_1 {
        node [style=filled,shape=box];


	check_available_queue       -> remove_entries_over_10s_old;
	remove_entries_over_10s_old -> set_available [label="num_avail < 3"];
	remove_entries_over_10s_old -> exit [label="num_avail > 2"];

	set_available               -> delay_2s;
	delay_2s          -> check_place_in_queue;



	check_place_in_queue        -> "http:transport-launch" [label="at head"];
	check_place_in_queue        -> exit [label="not at head"];


	"client:login"              -> "server:shutdown" [label="login failed"];
	"server:shutdown"           -> exit;	

	subgraph cluster_2 {

		"http:transport-launch"       -> "http:transport-run";
		"http:transport-launch"       -> "http:transport-keep-running";

		"http:transport-keep-running" -> "tests running?";
		"tests running?"              -> "client:login" [label=yes];

		"tests running?"              -> "server:shutdown" [label=no];
		"client:login"                -> delay_5s [label="login ok"];
		delay_5s                      -> "http:transport-keep-running";
	}







	// start_server -> "server_running?";
	// "server_running?" -> set_available [label="no"];
	// "server_running?" -> delay_2s [label="yes"];
	// delay_2s -> "still_running?";
	// "still_running?" -> ping_server [label=yes];
	// "still_running?" -> set_available [label=no];
	// ping_server -> exit [label=alive];
	// ping_server -> remove_server_record [label=dead];
	// remove_server_record -> set_available;
	// set_available -> avail_delay [label="delay 3s"];
	// avail_delay -> "first_in_queue?";
	// 
	// "first_in_queue?" -> set_running [label=yes];
	// set_running -> get_next_port -> handle_requests;
	// "first_in_queue?" -> "dead_entry_in_queue?" [label=no];
	// "dead_entry_in_queue?" -> "server_running?" [label=no];
	// "dead_entry_in_queue?" -> "remove_dead_entries" [label=yes];
	// remove_dead_entries -> "server_running?";
	// 
	// handle_requests -> start_shutdown [label="no traffic\nno running tests"];
	// handle_requests -> shutdown_request;
	// start_shutdown -> shutdown_delay;
	// shutdown_request -> shutdown_delay;
	// shutdown_delay -> exit;
	
        label = "server:launch";
        color=brown;
    }

//     client_start_server -> start_server;
//     handle_requests -> read_write;
//     read_write -> handle_requests;
}

Modified http-transport.scm from [ab4a44cd0c] to [af9b9bb667].

213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291


292
293
294
295
296


297
298



299
300
301
302
303
304
305
306

307
308
309
310
311
312
313
314
315
316
317

318
319
320
321
322
323
324
325
  (set! *http-connections-next-cleanup* (+ (current-seconds) 10))
  (mutex-unlock! *http-mutex*))

(define (http-transport:inc-requests-and-prep-to-close-all-connections)
  (mutex-lock! *http-mutex*)
  (set! *http-requests-in-progress* (+ 1 *http-requests-in-progress*)))

;; (system "megatest -list-servers | grep alive || megatest -server - -daemonize && sleep 4")

;; <html>
;; <head></head>
;; <body>1 Hello, world! Goodbye Dolly</body></html>
;; Send msg to serverdat and receive result
(define (http-transport:client-send-receive serverdat msg #!key (numretries 30))
  (let* (;; (url        (http-transport:make-server-url serverdat))
	 (fullurl    (if (list? serverdat)
			 (caddr serverdat)
			 (begin
			   (debug:print 0 "FATAL ERROR: http-transport:client-send-receive called with no server info")
			   (exit 1)))) ;; (conc url "/ctrl")) ;; (conc url "/?dat=" msg)))
	 (res        #f))
    (handle-exceptions
     exn
     (begin
       (print "ERROR IN http-transport:client-send-receive " ((condition-property-accessor 'exn 'message) exn))
       (if (> numretries 0)
	   (begin
	     (thread-sleep! 2)
	     (http-transport:client-send-receive serverdat msg numretries: (- numretries 1)))
	   #f))
     (begin
       (debug:print-info 11 "fullurl=" fullurl "\n")
       ;; set up the http-client here
       (max-retry-attempts 5)
       ;; consider all requests indempotent
       (retry-request? (lambda (request)
			 #t))   ;;  		 (thread-sleep! (/ (if (> numretries 100) 100 numretries) 10))
       ;; (set! numretries (- numretries 1))
       ;;  		 #t))
       ;; send the data and get the response
       ;; extract the needed info from the http data and 
       ;; process and return it.
       (let* ((send-recieve (lambda ()
			      (mutex-lock! *http-mutex*)
			      (set! res (with-input-from-request 
					 fullurl 
					 (list (cons 'dat msg)) 
					 read-string))
			      (close-all-connections!) 
			      (mutex-unlock! *http-mutex*)))
	      (time-out     (lambda ()
			      (thread-sleep! 45)
			      (if (not res)
				  (begin
				    (debug:print 0 "WARNING: communication with the server timed out.")
				    (mutex-unlock! *http-mutex*)
				    ;; Maybe the server died? Try starting it up.
				    (server:ensure-running run-id)
				    (http-transport:client-send-receive serverdat msg numretries: (- numretries 1))
				    (if (< numretries 3) ;; on last try just exit
					(begin
					  (debug:print 0 "ERROR: communication with the server timed out. Giving up.")
					  (exit 1)))))))
	      (th1 (make-thread send-recieve "with-input-from-request"))
	      (th2 (make-thread time-out     "time out")))
	 (thread-start! th1)
	 (thread-start! th2)
	 (thread-join! th1)
	 (thread-terminate! th2)
	 (debug:print-info 11 "got res=" res)
	 (let ((match (string-search (regexp "<body>(.*)<.body>") res)))
	   (debug:print-info 11 "match=" match)
	   (let ((final (cadr match)))
	     (debug:print-info 11 "final=" final)
	     final)))))))

;; Send "cmd" with json payload "params" to serverdat and receive result
;;
(define (http-transport:client-api-send-receive run-id serverdat cmd params #!key (numretries 30))


  (if (not serverdat) ;; get #f, something went wrong. try starting the server again and reconnecting
      (begin
	;; try to restart the server and then reconnect
	(server:ensure-running run-id)
	(hash-table-delete! *runremote* run-id)


	(client:setup run-id)
	(set! serverdat (hash-table-ref/default *runremote* run-id #f))))



  (let* ((fullurl    (if (list? serverdat)
			 (cadddr serverdat) ;; this is the uri for /api
			 (begin
			   (debug:print 0 "FATAL ERROR: http-transport:client-send-receive called with no server info")
			   (exit 1))))
	 (res        #f))
    (handle-exceptions
     exn

     (begin
       ;; TODO: Send this output to a log file so it isn't lost when running as daemon
       (if (> numretries 0)
	   ;; on the zeroeth retry do not print the error message - this allows the call to be used as a ping (no junk on output).
	   (begin
	     (print "ERROR IN http-transport:client-api-send-receive " ((condition-property-accessor 'exn 'message) exn))
	     ;; try to restart the server and then reconnect
	     (server:ensure-running run-id)
	     (hash-table-delete! *runremote* run-id)
	     (client:setup run-id)
	     (http-transport:client-api-send-receive run-id serverdat cmd params numretries: (- numretries 1)))

	   #f))
     (begin
       (debug:print-info 11 "fullurl=" fullurl ", cmd=" cmd ", params=" params ", run-id=" run-id "\n")
       ;; set up the http-client here
       (max-retry-attempts 5)
       ;; consider all requests indempotent
       (retry-request? (lambda (request)
			 #t))   ;;  		 (thread-sleep! (/ (if (> numretries 100) 100 numretries) 10))







<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<



>
>
|
|
|
<
|
>
>
|
|
>
>
>



|




>
|

|
|
|
|
|
<
|
|
|
>
|







213
214
215
216
217
218
219





































































220
221
222
223
224
225
226
227

228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251

252
253
254
255
256
257
258
259
260
261
262
263
  (set! *http-connections-next-cleanup* (+ (current-seconds) 10))
  (mutex-unlock! *http-mutex*))

(define (http-transport:inc-requests-and-prep-to-close-all-connections)
  (mutex-lock! *http-mutex*)
  (set! *http-requests-in-progress* (+ 1 *http-requests-in-progress*)))






































































;; Send "cmd" with json payload "params" to serverdat and receive result
;;
(define (http-transport:client-api-send-receive run-id serverdat cmd params #!key (numretries 30))
  ;; (let loop ((sdat  serverdat)
  ;;            (tries 10))
  ;;   (if (not sdat) ;; get #f, something went wrong. try starting the server again and reconnecting
  ;;       (begin
  ;;         ;; try to restart the server and then reconnect

  ;;         ;; (hash-table-delete! *runremote* run-id) ;; this should be taken care of by client:setup
  ;;         (thread-sleep! 1)
  ;;         (if (> tries 0)
  ;;             (let ((newsdat (client:setup run-id)))
  ;;       	(set! serverdat newsdat)
  ;;       	(loop newsdat (- tries 1)))
  ;;             (debug:print 0 "ERROR: could not connect to or start a server for run-id " run-id)))))
  ;; (debug:print 0 "serverdat=" serverdat)
  (let* ((fullurl    (if (list? serverdat)
			 (cadddr serverdat) ;; this is the uri for /api
			 (begin
			   (debug:print 0 "FATAL ERROR: http-transport:client-api-send-receive called with no server info")
			   (exit 1))))
	 (res        #f))
    (handle-exceptions
     exn
     #f
     ;; (begin
       ;; TODO: Send this output to a log file so it isn't lost when running as daemon
       ;; (if (> numretries 0)
	;;    ;; on the zeroeth retry do not print the error message - this allows the call to be used as a ping (no junk on output).
	;;    (begin
	;;      (print "ERROR IN http-transport:client-api-send-receive " ((condition-property-accessor 'exn 'message) exn))
	;;      ;; try to restart the server and then reconnect

	;;      ;; (hash-table-delete! *runremote* run-id)
	;;      ;; (client:setup run-id)
	;;      ;; (http-transport:client-api-send-receive run-id serverdat cmd params numretries: (- numretries 1)))
	;;      #f) ;; simply return #f to indicate failure. The caller will need to do the retry.
	;;    #f))
     (begin
       (debug:print-info 11 "fullurl=" fullurl ", cmd=" cmd ", params=" params ", run-id=" run-id "\n")
       ;; set up the http-client here
       (max-retry-attempts 5)
       ;; consider all requests indempotent
       (retry-request? (lambda (request)
			 #t))   ;;  		 (thread-sleep! (/ (if (> numretries 100) 100 numretries) 10))
356
357
358
359
360
361
362

363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
				  ;;     (set! res dat)
				  ;;     (http-transport:dec-requests-count-and-close-all-connections))
				  ;;   (http-transport:dec-requests-count
				  ;;    (lambda ()
				  ;;      (set! res dat)))))))
	      (time-out     (lambda ()
			      (thread-sleep! 45)

			      (if (not res)
				  (begin
				    (debug:print 0 "WARNING: communication with the server timed out.")
				    (mutex-unlock! *http-mutex*)
				    (http-transport:client-api-send-receive run-id serverdat cmd params numretries: (- numretries 1))
				    (if (< numretries 3) ;; on last try just exit
					(begin
					  (debug:print 0 "ERROR: communication with the server timed out. Giving up.")
					  (exit 1)))))))
	      (th1 (make-thread send-recieve "with-input-from-request"))
	      (th2 (make-thread time-out     "time out")))
	 (thread-start! th1)
	 (thread-start! th2)
	 (thread-join! th1)
	 (thread-terminate! th2)
	 (debug:print-info 11 "got res=" res)







>
|
|
|
|
|
|
|
|
|







294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
				  ;;     (set! res dat)
				  ;;     (http-transport:dec-requests-count-and-close-all-connections))
				  ;;   (http-transport:dec-requests-count
				  ;;    (lambda ()
				  ;;      (set! res dat)))))))
	      (time-out     (lambda ()
			      (thread-sleep! 45)
			      #f))
			      ;; (if (not res)
			      ;;     (begin
			      ;;       (debug:print 0 "WARNING: communication with the server timed out.")
			      ;;       (mutex-unlock! *http-mutex*)
			      ;;       (http-transport:client-api-send-receive run-id serverdat cmd params numretries: (- numretries 1))
			      ;;       (if (< numretries 3) ;; on last try just exit
			      ;;   	(begin
			      ;;   	  (debug:print 0 "ERROR: communication with the server timed out. Giving up.")
			      ;;   	  (exit 1)))))))
	      (th1 (make-thread send-recieve "with-input-from-request"))
	      (th2 (make-thread time-out     "time out")))
	 (thread-start! th1)
	 (thread-start! th2)
	 (thread-join! th1)
	 (thread-terminate! th2)
	 (debug:print-info 11 "got res=" res)

Modified megatest.scm from [51576793bd] to [7c47c73e54].

362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
	      ;; (set! *fdb*   (filedb:open-db (conc *toppath* "/db/paths.db")))
	      ;; if not list or kill then start a client (if appropriate)
	      (if (or (args-defined? "-h" "-version" "-gen-megatest-area" "-gen-megatest-test")
		      (eq? (length (hash-table-keys args:arg-hash)) 0))
		  (debug:print-info 1 "Server connection not needed")
		  (begin
		    (if run-id 
			(begin
			  (server:ensure-running run-id)
			  (client:launch run-id))
			(begin
			  ;; without run-id we'll start a server for "0"
			  (server:ensure-running 0)
			  (client:launch 0)))))))))

;; MAY STILL NEED THIS
;;		       (set! *megatest-db* (make-dbr:dbstruct path: *toppath* local: #t))))))))))

(if (or (args:get-arg "-list-servers")
	(args:get-arg "-stop-server"))
    (let ((tl (setup-for-run)))







<
<
|
<
|
<
|







362
363
364
365
366
367
368


369

370

371
372
373
374
375
376
377
378
	      ;; (set! *fdb*   (filedb:open-db (conc *toppath* "/db/paths.db")))
	      ;; if not list or kill then start a client (if appropriate)
	      (if (or (args-defined? "-h" "-version" "-gen-megatest-area" "-gen-megatest-test")
		      (eq? (length (hash-table-keys args:arg-hash)) 0))
		  (debug:print-info 1 "Server connection not needed")
		  (begin
		    (if run-id 


			(client:launch run-id) 

			(client:launch 0)      ;; without run-id we'll start a server for "0"

			)))))))

;; MAY STILL NEED THIS
;;		       (set! *megatest-db* (make-dbr:dbstruct path: *toppath* local: #t))))))))))

(if (or (args:get-arg "-list-servers")
	(args:get-arg "-stop-server"))
    (let ((tl (setup-for-run)))

Modified rmt.scm from [0c6c337631] to [37aacb2dec].

34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55

56
57
58
59
60
61
62
63
64
;;  S U P P O R T   F U N C T I O N S
;;======================================================================

;; cmd is a symbol
;; vars is a json string encoding the parameters for the call
;;
(define (rmt:send-receive cmd run-id params)
  (let* ((connection-info (client:setup (if run-id run-id 0)))
	 (jparams         (db:obj->string params)) ;; (rmt:dat->json-str params))
	 (res (http-transport:client-api-send-receive run-id connection-info cmd jparams)))
    (if res
	(db:string->obj res) ;; (rmt:json-str->dat res)
	(begin
	  (debug:print 0 "ERROR: Bad value from http-transport:client-api-send-receive " res)
	  #f))))

(define (rmt:send-receive-no-auto-client-setup connection-info cmd run-id params)
  (let* ((jparams         (db:obj->string params)) ;; (rmt:dat->json-str params))
	 (res (http-transport:client-api-send-receive run-id connection-info cmd jparams numretries: 0)))
    (if res
	(db:string->obj res) ;; (rmt:json-str->dat res)
	(begin

	  (debug:print 0 "ERROR: Bad value from http-transport:client-api-send-receive " res)
	  #f))))

;; Wrap json library for strings (why the ports crap in the first place?)
(define (rmt:dat->json-str dat)
  (with-output-to-string 
    (lambda ()
      (json-write dat))))








|
|



|
|
|



|


|
>
|
|







34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
;;  S U P P O R T   F U N C T I O N S
;;======================================================================

;; cmd is a symbol
;; vars is a json string encoding the parameters for the call
;;
(define (rmt:send-receive cmd run-id params)
  (let* ((connection-info (hash-table-ref/default *runremote* run-id #f))
	 (jparams         (db:obj->string params))
	 (res (http-transport:client-api-send-receive run-id connection-info cmd jparams)))
    (if res
	(db:string->obj res) ;; (rmt:json-str->dat res)
	(let ((new-connection-info (client:setup run-id)))
	  (debug:print 0 "WARNING: Communication failed, trying call to http-transport:client-api-send-receive again.")
	  (rmt:send-receive-no-auto-client-setup connection-info cmd run-id params)))))

(define (rmt:send-receive-no-auto-client-setup connection-info cmd run-id params)
  (let* ((jparams         (db:obj->string params)) ;; (rmt:dat->json-str params))
	 (res (http-transport:client-api-send-receive run-id connection-info cmd jparams numretries: 3)))
    (if res
	(db:string->obj res) ;; (rmt:json-str->dat res)
	(let ((connection-info (client:setup run-id)))
	  ;; something went wrong, try setting up the client again and then resend
	  (debug:print 0 "WARNING: Communication failed, trying call to http-transport:client-api-send-receive again.")
	  (rmt:send-receive-no-auto-client-setup connection-info cmd run-id params)))))

;; Wrap json library for strings (why the ports crap in the first place?)
(define (rmt:dat->json-str dat)
  (with-output-to-string 
    (lambda ()
      (json-write dat))))

Modified server.scm from [e93cd93c50] to [cffc1a2257].

44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
;;

;; all routes though here end in exit ...
;;
;; start_server
;;
(define (server:launch run-id)
  (if (server:check-if-running run-id)
      ;; a server is already running
      (exit)
      (http-transport:launch run-id)))

(define (server:launch-no-exit run-id)
  (if (server:check-if-running run-id)
      #t ;; if running
      (http-transport:launch run-id)))

;;======================================================================
;; Q U E U E   M A N A G E M E N T
;;======================================================================

;; We don't want to flush the queue if it was just flushed
(define *server:last-write-flush* (current-milliseconds))







|
|
|
|

|
|
|
|







44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
;;

;; all routes though here end in exit ...
;;
;; start_server
;;
(define (server:launch run-id)
  ;; (if (server:check-if-running run-id)
  ;; a server is already running
  ;; (exit)
  (http-transport:launch run-id))

;; (define (server:launch-no-exit run-id)
;;   (if (server:check-if-running run-id)
;;       #t ;; if running
;;       (http-transport:launch run-id)))

;;======================================================================
;; Q U E U E   M A N A G E M E N T
;;======================================================================

;; We don't want to flush the queue if it was just flushed
(define *server:last-write-flush* (current-milliseconds))
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134

135
136
137
138
139
140
141

;; When using zmq this would send the message back (two step process)
;; with spiffy or rpc this simply returns the return data to be returned
;; 
(define (server:reply return-addr query-sig success/fail result)
  (db:obj->string (vector success/fail query-sig result)))

(define (server:ensure-running run-id)
  (let loop ((server  (open-run-close tasks:get-server tasks:open-db run-id))
	     (trycount 0))
    (if (not server)
	(begin
	  (if (even? trycount) ;; just do the server start every other time through this loop (every 8 seconds)
	      (let ((cmdln (conc (if (getenv "MT_MEGATEST") (getenv "MT_MEGATEST") "megatest")
				 " -server - -run-id " run-id " &> " *toppath* "/db/" run-id ".log &")))
		(debug:print 0 "INFO: Starting server (" cmdln ") as none running ...")
		(push-directory *toppath*)
		(system cmdln)
		(pop-directory)
		(thread-sleep! 3)
		;; (process-run (car (argv)) (list "-server" "-" "-daemonize" "-transport" (args:get-arg "-transport" "http")))
		)
	      (begin
		(debug:print-info 0 "Waiting for server to start")
		(thread-sleep! 4)))
	  (if (< trycount 10)
	      (loop (open-run-close tasks:get-server tasks:open-db run-id) 
		    (+ trycount 1))
	      (debug:print 0 "WARNING: Couldn't start or find a server.")))
	(debug:print 2 "INFO: Server(s) running " server))))

(define (server:check-if-running run-id)
  (let loop ((server (open-run-close tasks:get-server tasks:open-db run-id))
	     (trycount 0))

    (if server
	;; note: client:start will set *runremote*. this needs to be changed
	;;       also, client:start will login to the server, also need to change that.
	;;
	;; client:start returns #t if login was successful.
	;;
	(let ((res (client:start run-id server)))







|
<
<
<
<
<
|
|
|
|
|
|
<
<
<
<
<
<
<
<
<
<
<




>







101
102
103
104
105
106
107
108





109
110
111
112
113
114











115
116
117
118
119
120
121
122
123
124
125
126

;; When using zmq this would send the message back (two step process)
;; with spiffy or rpc this simply returns the return data to be returned
;; 
(define (server:reply return-addr query-sig success/fail result)
  (db:obj->string (vector success/fail query-sig result)))

(define (server:try-running run-id)





  (let ((cmdln (conc (if (getenv "MT_MEGATEST") (getenv "MT_MEGATEST") "megatest")
		     " -server - -run-id " run-id " &> " *toppath* "/db/" run-id ".log &")))
    (debug:print 0 "INFO: Starting server (" cmdln ") as none running ...")
    (push-directory *toppath*)
    (system cmdln)
    (pop-directory)))












(define (server:check-if-running run-id)
  (let loop ((server (open-run-close tasks:get-server tasks:open-db run-id))
	     (trycount 0))
    (thread-sleep! 2)
    (if server
	;; note: client:start will set *runremote*. this needs to be changed
	;;       also, client:start will login to the server, also need to change that.
	;;
	;; client:start returns #t if login was successful.
	;;
	(let ((res (client:start run-id server)))

Modified tasks.scm from [fe9409354b] to [a457dae400].

89
90
91
92
93
94
95
96
97
98
99
100
101
102

103
104
105
106
107
108
109
110
111
112
113
114
115
116
(define (tasks:hostinfo-get-port        vec)    (vector-ref  vec 2))
(define (tasks:hostinfo-get-pubport     vec)    (vector-ref  vec 3))
(define (tasks:hostinfo-get-transport   vec)    (vector-ref  vec 4))
(define (tasks:hostinfo-get-pid         vec)    (vector-ref  vec 5))
(define (tasks:hostinfo-get-hostname    vec)    (vector-ref  vec 6))

(define (tasks:server-lock-slot mdb run-id)
  (let loop ((res       #f)
	     (num-tries 0))
    (if (and (< num-tries 5)
	     (not res))
	(begin
	  (tasks:server-clean-out-old-records-for-run-id mdb run-id)
	  (if (< (tasks:num-in-available-state mdb run-id) 4)

	      (tasks:server-set-available mdb run-id))
	  (thread-sleep! 2) ;; Try removing this. It may not be needed.
	  (loop (tasks:server-am-i-the-server? mdb run-id)
		(+ num-tries 1)))
	res)))
	
      
	
;; register that this server may come online (first to register goes though with the process)
(define (tasks:server-set-available mdb run-id)
  (sqlite3:execute 
   mdb 
   "INSERT INTO servers (pid,hostname,port,pubport,start_time,      priority,state,mt_version,heartbeat,   interface,transport,run_id)
                   VALUES(?, ?,       ?,   ?, strftime('%s','now'), ?,       ?,    ?,-1,?,        ?,        ?);"







<
<
<
<
<
|
|
>
|
|
|
<
|
<
<







89
90
91
92
93
94
95





96
97
98
99
100
101

102


103
104
105
106
107
108
109
(define (tasks:hostinfo-get-port        vec)    (vector-ref  vec 2))
(define (tasks:hostinfo-get-pubport     vec)    (vector-ref  vec 3))
(define (tasks:hostinfo-get-transport   vec)    (vector-ref  vec 4))
(define (tasks:hostinfo-get-pid         vec)    (vector-ref  vec 5))
(define (tasks:hostinfo-get-hostname    vec)    (vector-ref  vec 6))

(define (tasks:server-lock-slot mdb run-id)





  (tasks:server-clean-out-old-records-for-run-id mdb run-id)
  (if (< (tasks:num-in-available-state mdb run-id) 4)
      (begin 
	(tasks:server-set-available mdb run-id)
	(thread-sleep! 2) ;; Try removing this. It may not be needed.
	(tasks:server-am-i-the-server? mdb run-id))

      #f))      


	
;; register that this server may come online (first to register goes though with the process)
(define (tasks:server-set-available mdb run-id)
  (sqlite3:execute 
   mdb 
   "INSERT INTO servers (pid,hostname,port,pubport,start_time,      priority,state,mt_version,heartbeat,   interface,transport,run_id)
                   VALUES(?, ?,       ?,   ?, strftime('%s','now'), ?,       ?,    ?,-1,?,        ?,        ?);"
133
134
135
136
137
138
139
140
141
142
143
144
145




146
147
148
149
150
151
152
       (set! res num-in-queue))
     mdb
     "SELECT count(id) FROM servers WHERE run_id=?;"
     run-id)
    res))

(define (tasks:server-clean-out-old-records-for-run-id mdb run-id)
  (sqlite3:execute mdb "DELETE FROM servers WHERE state in ('available','shutting-down') AND (strftime('%s','now') - start_time) > 10 AND run_id=?;" run-id)
  (if (server:check-if-running run-id)
      (sqlite3:execute mdb "DELETE FROM servers WHERE run_id=?;" run-id)))

(define (tasks:server-force-clean-running-records-for-run-id mdb run-id)
  (sqlite3:execute mdb "DELETE FROM servers WHERE state = 'running' AND run_id=?;" run-id))





(define (tasks:server-set-state! mdb server-id state)
  (sqlite3:execute mdb "UPDATE servers SET state=? WHERE id=?;" state server-id))

(define (tasks:server-delete-record! mdb server-id)
  (sqlite3:execute mdb "DELETE FROM servers WHERE id=?;" server-id))








|
<
<



>
>
>
>







126
127
128
129
130
131
132
133


134
135
136
137
138
139
140
141
142
143
144
145
146
147
       (set! res num-in-queue))
     mdb
     "SELECT count(id) FROM servers WHERE run_id=?;"
     run-id)
    res))

(define (tasks:server-clean-out-old-records-for-run-id mdb run-id)
  (sqlite3:execute mdb "DELETE FROM servers WHERE state in ('available','shutting-down') AND (strftime('%s','now') - start_time) > 10 AND run_id=?;" run-id))



(define (tasks:server-force-clean-running-records-for-run-id mdb run-id)
  (sqlite3:execute mdb "DELETE FROM servers WHERE state = 'running' AND run_id=?;" run-id))

(define (tasks:server-force-clean-run-record mdb run-id iface port)
  (sqlite3:execute mdb "DELETE FROM servers WHERE state = 'running' AND run_id=? AND interface=? AND port=?;"
		   run-id iface port))

(define (tasks:server-set-state! mdb server-id state)
  (sqlite3:execute mdb "UPDATE servers SET state=? WHERE id=?;" state server-id))

(define (tasks:server-delete-record! mdb server-id)
  (sqlite3:execute mdb "DELETE FROM servers WHERE id=?;" server-id))

Modified tests/fullrun/megatest.config from [394e6d468f] to [79fdce739b].

75
76
77
78
79
80
81




82
83
84
85
86
87
88
[validvalues]
state start end 0 1 - 2
status pass fail n/a 0 1 running - 2

# These are set before all tests, override them 
# in the testconfig [pre-launch-env-overrides] section
[env-override]




# MT_XTERM_CMD overrides the terminal command
# MT_XTERM_CMD xterm -bg lightgreen -fg black

SPECIAL_ENV_VARS overide them here - should be seen at launch and in the runs
TESTVAR [system echo $PWD]
DEADVAR [system ls]
VARWITHDOLLAR $HOME/.zshrc







>
>
>
>







75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
[validvalues]
state start end 0 1 - 2
status pass fail n/a 0 1 running - 2

# These are set before all tests, override them 
# in the testconfig [pre-launch-env-overrides] section
[env-override]
# This variable is honored by the loadrunner script. The value is in percent
# a value of 200 will stop new jobs from starting.
MAX_ALLOWED_LOAD 200

# MT_XTERM_CMD overrides the terminal command
# MT_XTERM_CMD xterm -bg lightgreen -fg black

SPECIAL_ENV_VARS overide them here - should be seen at launch and in the runs
TESTVAR [system echo $PWD]
DEADVAR [system ls]
VARWITHDOLLAR $HOME/.zshrc