Overview
Context
Changes
Modified client.scm
from [065a0a550e]
to [5cb1c0c7dc].
︙ | | |
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
|
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
|
-
+
+
-
+
-
-
+
+
+
-
-
+
+
+
-
+
+
-
+
|
;; 2. We are a run tests, list runs or other interactive process and we must figure out
;; *transport-type* and *runremote* from the monitor.db
;;
;; client:setup
;;
;; lookup_server, need to remove *runremote* stuff
;;
(define (client:setup run-id #!key (remaining-tries 3))
(define (client:setup run-id #!key (remaining-tries 10))
(debug:print 0 "INFO: client:setup remaining-tries=" remaining-tries)
(if (<= remaining-tries 0)
(begin
(debug:print 0 "ERROR: failed to start or connect to server for run-id " run-id)
(exit 1))
(let ((server-dat (and run-id (hash-table-ref/default *runremote* run-id #f))))
(if server-dat
(let ((start-res (http-transport:client-connect run-id
(let ((start-res (http-transport:client-connect run-id ;; NB// confusion over server-dat and connection result!
(tasks:hostinfo-get-interface server-dat)
(tasks:hostinfo-get-port server-dat))))
(if start-res ;; sucessful login?
(begin
(hash-table-set! *runremote* run-id server-dat)
server-dat)
(hash-table-set! *runremote* run-id start-res)
start-res)
(begin ;; login failed
(hash-table-delete! *runremote* run-id)
(open-run-close tasks:server-force-clean-run-record
tasks:open-db
run-id
(tasks:hostinfo-get-interface server-dat)
(tasks:hostinfo-get-port server-dat))
(thread-sleep! 5)
(client:setup run-id remaining-tries: (- remaining-tries 1)))))
(let* ((server-dat (open-run-close tasks:get-server tasks:open-db run-id)))
(if server-dat
(let ((start-res (http-transport:client-connect run-id
(tasks:hostinfo-get-interface server-dat)
(tasks:hostinfo-get-port server-dat))))
(if start-res
(begin
(hash-table-set! *runremote* run-id server-dat)
server-dat)
(hash-table-set! *runremote* run-id start-res)
start-res)
(begin ;; login failed
(hash-table-delete! *runremote* run-id)
(open-run-close tasks:server-force-clean-run-record
tasks:open-db
run-id
(tasks:hostinfo-get-interface server-dat)
(tasks:hostinfo-get-port server-dat))
(thread-sleep! 2)
(server:try-running run-id)
(thread-sleep! 3) ;; give server a little time to start up
(thread-sleep! 5) ;; give server a little time to start up
(client:setup run-id remaining-tries: (- remaining-tries 1)))))
(begin ;; no server registered
(thread-sleep! 2)
(server:try-running run-id)
(thread-sleep! 3) ;; give server a little time to start up
(thread-sleep! 5) ;; give server a little time to start up
(client:setup run-id remaining-tries: (- remaining-tries 1)))))))))
;; keep this as a function to ease future
(define (client:start run-id server-info)
(http-transport:client-connect run-id
(tasks:hostinfo-get-interface server-info)
(tasks:hostinfo-get-port server-info)))
|
︙ | | |
Modified http-transport.scm
from [7e052a2b1c]
to [d9e94ba5d7].
︙ | | |
398
399
400
401
402
403
404
405
406
407
408
409
410
411
|
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
|
+
+
+
+
|
;;
;; start_server?
;;
(define (http-transport:launch run-id)
(set! *run-id* run-id)
(if (args:get-arg "-daemonize")
(daemon:ize))
(if (server:check-if-running run-id)
(begin
(debug:print 0 "INFO: Server for run-id " run-id " already running")
(exit 0)))
(let ((server-id (open-run-close tasks:server-lock-slot tasks:open-db run-id)))
(if (not server-id)
(begin
;; since we didn't get the server lock we are going to clean up and bail out
(debug:print-info 2 "INFO: server pid=" (current-process-id) ", hostname=" (get-host-name) " not starting due to other candidates ahead in start queue")
(open-run-close tasks:server-delete-records-for-this-pid tasks:open-db))
(let* ((th2 (make-thread (lambda ()
|
︙ | | |
Modified rmt.scm
from [deb40682ce]
to [206f8532c1].
︙ | | |
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
|
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
|
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
+
+
-
+
|
;;======================================================================
;; S U P P O R T F U N C T I O N S
;;======================================================================
;; cmd is a symbol
;; vars is a json string encoding the parameters for the call
;;
(define (rmt:send-receive cmd run-id params)
(let* ((connection-info (hash-table-ref/default *runremote* run-id #f))
(define (rmt:send-receive cmd rid params)
(let* ((run-id (if rid rid 0))
(connection-info (let ((cinfo (hash-table-ref/default *runremote* run-id #f)))
(if cinfo
cinfo
(let loop ((numtries 100))
(thread-sleep! 1)
(let ((res (client:setup run-id)))
(if res
res
(if (> numtries 0)
(loop (- numtries 1))
(begin
(debug:print 0 "ERROR: 100 tries and no server, giving up")
(exit 1)))))))))
(jparams (db:obj->string params))
(res (http-transport:client-api-send-receive run-id connection-info cmd jparams)))
(if res
(db:string->obj res) ;; (rmt:json-str->dat res)
(let ((new-connection-info (client:setup run-id)))
(debug:print 0 "WARNING: Communication failed, trying call to http-transport:client-api-send-receive again.")
(rmt:send-receive-no-auto-client-setup connection-info cmd run-id params)))))
(rmt:send-receive cmd run-id params)))))
(define (rmt:send-receive-no-auto-client-setup connection-info cmd run-id params)
(let* ((run-id (if run-id run-id 0))
(let* ((jparams (db:obj->string params)) ;; (rmt:dat->json-str params))
(jparams (db:obj->string params)) ;; (rmt:dat->json-str params))
(res (http-transport:client-api-send-receive run-id connection-info cmd jparams numretries: 3)))
(if res
(db:string->obj res) ;; (rmt:json-str->dat res)
;; this one does NOT keep trying
res)))
;; Wrap json library for strings (why the ports crap in the first place?)
|
︙ | | |
Modified server.scm
from [cffc1a2257]
to [8eb4730569].
︙ | | |
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
|
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
|
+
+
-
-
+
+
|
;; When using zmq this would send the message back (two step process)
;; with spiffy or rpc this simply returns the return data to be returned
;;
(define (server:reply return-addr query-sig success/fail result)
(db:obj->string (vector success/fail query-sig result)))
;; > file 2>&1
(define (server:try-running run-id)
(let* ((rand-name (random 100))
(let ((cmdln (conc (if (getenv "MT_MEGATEST") (getenv "MT_MEGATEST") "megatest")
" -server - -run-id " run-id " &> " *toppath* "/db/" run-id ".log &")))
(cmdln (conc (if (getenv "MT_MEGATEST") (getenv "MT_MEGATEST") "megatest")
" -server - -run-id " run-id " name=" rand-name " > " *toppath* "/db/" run-id "-" rand-name ".log 2>&1 &")))
(debug:print 0 "INFO: Starting server (" cmdln ") as none running ...")
(push-directory *toppath*)
(system cmdln)
(pop-directory)))
(define (server:check-if-running run-id)
(let loop ((server (open-run-close tasks:get-server tasks:open-db run-id))
|
︙ | | |
Modified tasks.scm
from [c13323f697]
to [19f1225d86].
︙ | | |
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
|
90
91
92
93
94
95
96
97
98
99
100
101
102
103
|
-
|
(define (tasks:hostinfo-get-pubport vec) (vector-ref vec 3))
(define (tasks:hostinfo-get-transport vec) (vector-ref vec 4))
(define (tasks:hostinfo-get-pid vec) (vector-ref vec 5))
(define (tasks:hostinfo-get-hostname vec) (vector-ref vec 6))
(define (tasks:server-lock-slot mdb run-id)
(tasks:server-clean-out-old-records-for-run-id mdb run-id)
(server:check-if-running run-id)
(if (< (tasks:num-in-available-state mdb run-id) 4)
(begin
(tasks:server-set-available mdb run-id)
(thread-sleep! 2) ;; Try removing this. It may not be needed.
(tasks:server-am-i-the-server? mdb run-id))
#f))
|
︙ | | |
Modified tests/Makefile
from [f03136dea1]
to [488ee3eee2].
︙ | | |
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
|
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
|
-
+
|
NEWTARGET = "$(OS)/$(FS)/$(VER)"
TARGET = "ubuntu/nfs/none"
all : test1 test2 test3 test4 test5 test6 test7 test8 test9
server :
cd ..;make;make install
cd fullrun;../../bin/megatest -server - -debug 22 -run-id $(RUNID)
cd fullrun;../../bin/megatest -server - -debug $(DEBUG) -run-id $(RUNID)
stopserver :
cd ..;make && make install
cd fullrun;$(MEGATEST) -stop-server 0
repl :
cd ..;make && make install
|
︙ | | |