Megatest

Check-in [9838452f20]
Login
Overview
Comment:Tweaked server pinging to use contexts. Didn't seem to help but keeping as it is probably the right thing to do
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | monitor-cleanup
Files: files | file ages | folders
SHA1: 9838452f209557736357ba41480ba9bf1baeef5b
User & Date: mrwellan on 2012-11-01 11:16:15
Other Links: branch diff | manifest | tags
Context
2012-11-01
11:18
Added md5 to list of eggs to install in installall.sh Closed-Leaf check-in: 76a262c686 user: mrwellan tags: monitor-cleanup
11:16
Tweaked server pinging to use contexts. Didn't seem to help but keeping as it is probably the right thing to do check-in: 9838452f20 user: mrwellan tags: monitor-cleanup
01:41
Removed instrumentation check-in: e7e1e90a39 user: matt tags: monitor-cleanup
Changes

Modified common.scm from [12802b14f2] to [7a887c91b9].

72
73
74
75
76
77
78









79
80
81
82
83
84
85
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94







+
+
+
+
+
+
+
+
+







(define (assoc/default key lst . default)
  (let ((res (assoc key lst)))
    (if res (cadr res)(if (null? default) #f (car default)))))

;;======================================================================
;; Misc utils
;;======================================================================

;; one-of args defined
(define (args-defined? . param)
  (let ((res #f))
    (for-each 
     (lambda (arg)
       (if (args:get-arg arg)(set! res #t)))
     param)
    res))

;; convert stuff to a number if possible
(define (any->number val)
  (cond 
   ((number? val) val)
   ((string? val) (string->number val))
   ((symbol? val) (any->number (symbol->string val)))

Modified megatest.scm from [bab637bbe2] to [100a519d71].

319
320
321
322
323
324
325
326
327
328
329
330

331
332
333




334

335
336
337
338
339
340
341
319
320
321
322
323
324
325





326

327
328
329
330
331
332

333
334
335
336
337
338
339
340







-
-
-
-
-
+
-


+
+
+
+
-
+







		       (debug:print-info 1 "Killed server by pid at " hostname ":" port)))
		 ;; (if zmq-socket (close-socket  zmq-socket))
		 (format #t fmtstr id pid hostname port start-time priority 
			 status numclients)))
	     servers)
	    (set! *didsomething* #t))))
    ;; if not list or kill then start a client (if appropriate)
    (if (or (let ((res #f))
	      (for-each
	       (lambda (key)
		 (if (args:get-arg key)(set! res #t)))
	       (list "-h" "-version" "-gen-megatest-area" "-gen-megatest-test"))
    (if (or (args-defined? "-h" "-version" "-gen-megatest-area" "-gen-megatest-test")
	      res)
	    (eq? (length (hash-table-keys args:arg-hash)) 0))
	(debug:print-info 1 "Server connection not needed")
	;; ping servers only if -runall -runtests
	(let ((ping (args-defined? "-runall" "-runtests" "-remove-runs" 
				   "-set-state-status" "-rerun" "-rollup" "-lock" "-unlock"
				   "-set-values" "-list-runs")))
	(server:client-launch)))
	  (server:client-launch do-ping: ping))))
    
;;======================================================================
;; Remove old run(s)
;;======================================================================

;; since several actions can be specified on the command line the removal
;; is done first

Modified server.scm from [6599df07df] to [932059ea25].

135
136
137
138
139
140
141
142

143
144


145

146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164

165
166

167




168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
























192
193
194
195
196
197
198
199
200
201


202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222


223
224
225
226
227
228
229

230
231
232
233
234

235

236
237
238


239
240
241



242
243
244
245


246
247
248
249
250
251
252
253
254

255
256
257
258
259

260
261
135
136
137
138
139
140
141

142
143
144
145
146

147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165

166
167

168
169
170
171
172
173
























174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201






202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222


223
224
225
226
227
228
229
230

231
232
233
234
235
236
237

238
239
240
241
242
243



244
245
246
247
248


249
250
251
252
253
254
255
256
257
258

259
260
261
262
263

264
265
266







-
+


+
+
-
+


















-
+

-
+

+
+
+
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+




-
-
-
-
-
-
+
+



















-
-
+
+






-
+





+
-
+



+
+
-
-
-
+
+
+


-
-
+
+








-
+




-
+


(define (server:get-client-signature)
  (if *my-client-signature* *my-client-signature*
      (let ((sig (server:mk-signature)))
	(set! *my-client-signature* sig)
	*my-client-signature*)))

;; 
(define (server:client-connect host port)
(define (server:client-connect host port #!key (context #f))
  (debug:print 3 "client-connect " host ":" port)
  (let ((connect-ok #f)
	(zmq-socket (if context 
			(make-socket 'req context)
	(zmq-socket (make-socket 'req))
			(make-socket 'req)))
	(conurl     (server:make-server-url (list host port))))
    (if (socket? zmq-socket)
	(begin
	  (connect-socket zmq-socket conurl)
	  zmq-socket)
	#f)))
  

(define (server:client-login zmq-socket)
  (cdb:login zmq-socket *toppath* (server:get-client-signature)))

(define (server:client-logout zmq-socket)
  (let ((ok (and (socket? zmq-socket)
		 (cdb:logout zmq-socket *toppath* (server:get-client-signature)))))
    ;; (close-socket zmq-socket)
    ok))

;; Do all the connection work, start a server if not already running
(define (server:client-setup #!key (numtries 10))
(define (server:client-setup #!key (numtries 10)(do-ping #f))
  (if (not *toppath*)(setup-for-run))
  (let ((hostinfo   (open-run-close tasks:get-best-server tasks:open-db)))
  (let ((hostinfo   (open-run-close tasks:get-best-server tasks:open-db do-ping: do-ping)))
    (if hostinfo
	(let ((host    (car hostinfo))
	      (port    (cadr hostinfo))
	      (zsocket (caddr hostinfo)))
	;; (set! *runremote* zsocket))
	(let* ((host       (car hostinfo))
	       (port       (cadr hostinfo)))
	  (debug:print-info 2 "Setting up to connect to " hostinfo)
	  (handle-exceptions
	   exn
	   (begin
	     (debug:print 0 "ERROR: Failed to open a connection to the server at: " hostinfo)
	     (debug:print 0 "   EXCEPTION: " ((condition-property-accessor 'exn 'message) exn))
	     (debug:print 0 "   perhaps jobs killed with -9? Removing server records")
	     (open-run-close tasks:server-deregister tasks:open-db host port: port)
	     #f)
	   (let* ((zmq-socket (server:client-connect host port))
		  (login-res  (server:client-login zmq-socket))
		  (connect-ok (if (null? login-res) #f (car login-res)))
		  (conurl     (server:make-server-url hostinfo)))
	     (if connect-ok
		 (begin
		   (debug:print-info 2 "Logged in and connected to " conurl)
		   (set! *runremote* zmq-socket)
		   #t)
		 (begin
		   (debug:print-info 2 "Failed to login or connect to " conurl)
		   (set! *runremote* #f)
		   #f)))))
	  (let* ((host       (car hostinfo))
		 (port       (cadr hostinfo)))
	    (debug:print-info 2 "Setting up to connect to " hostinfo)
	    (handle-exceptions
	     exn
	     (begin
	       (debug:print 0 "ERROR: Failed to open a connection to the server at: " hostinfo)
	       (debug:print 0 "   EXCEPTION: " ((condition-property-accessor 'exn 'message) exn))
	       (debug:print 0 "   perhaps jobs killed with -9? Removing server records")
	       (open-run-close tasks:server-deregister tasks:open-db host port: port)
	       #f)
	     (let* ((zmq-socket (server:client-connect host port))
		    (login-res  (server:client-login zmq-socket))
		    (connect-ok (if (null? login-res) #f (car login-res)))
		    (conurl     (server:make-server-url hostinfo)))
	       (if connect-ok
		   (begin
		     (debug:print-info 2 "Logged in and connected to " conurl)
		     (set! *runremote* zmq-socket)
		     #t)
		   (begin
		     (debug:print-info 2 "Failed to login or connect to " conurl)
		     (set! *runremote* #f)
		     #f))))))
	(if (> numtries 0)
	    (let ((exe (car (argv))))
	      (debug:print-info 1 "No server available, attempting to start one...")
	      (process-run exe (list "-server" "-" "-debug" (conc *verbosity*)))
	      ;; (system (conc  " -server - " (if (args:get-arg "-debug")
	      ;;   					   (conc "-debug " (args:get-arg "-debug"))
	      ;;   					   "")
	      ;;   	    " &"))
	      (sleep 10)
	      (server:client-setup numtries: (- numtries 1)))
	      (sleep 5)
	      (server:client-setup numtries: (- numtries 1) do-ping: do-ping))
	    (debug:print-info 1 "Too many retries, giving up")))))

(define (server:launch)
  (let* ((toppath (setup-for-run)))
    (debug:print-info 0 "Starting the standalone server")
    (let ((hostinfo (open-run-close tasks:get-best-server tasks:open-db)))
      (if hostinfo
	  (debug:print-info 1 "NOT starting new server, one is already running on " (car hostinfo) ":" (cadr hostinfo))
	  (if *toppath* 
	      (let* ((th2 (make-thread (lambda ()
					 (server:run (args:get-arg "-server")))))
		     (th3 (make-thread (lambda ()
					 (server:keep-running)))))
		(thread-start! th2)
		(thread-start! th3)
		(set! *didsomething* #t)
		(thread-join! th3))
	      (debug:print 0 "ERROR: Failed to setup for megatest"))))))

(define (server:client-launch)
  (if (server:client-setup)
(define (server:client-launch #!key (do-ping #f))
  (if (server:client-setup do-ping: do-ping)
      (debug:print-info 0 "connected as client")
      (begin
	(debug:print 0 "ERROR: Failed to connect as client")
	(exit))))

;; ping a server and return number of clients or #f (if no response)
(define (server:ping host port #!key (secs 10))
(define (server:ping host port #!key (secs 10)(return-socket #f))
  (cdb:use-non-blocking-mode
   (lambda ()
     (let* ((res #f)
	    (th1 (make-thread
		  (lambda ()
		    (let* ((zmq-context (make-context 1))
		    (let ((zmq-socket (server:client-connect host port)))
			   (zmq-socket  (server:client-connect host port context: zmq-context)))
		      (if zmq-socket
			  (if (server:client-login zmq-socket)
			      (let ((numclients (cdb:num-clients zmq-socket)))
				(if (not return-socket)
				    (begin
				(server:client-logout zmq-socket)
				(close-socket  zmq-socket)
				(set! res (list #t numclients)))
				      (server:client-logout zmq-socket)
				      (close-socket  zmq-socket)))
				(set! res (list #t numclients (if return-socket zmq-socket #f))))
			      (begin
				;; (close-socket zmq-socket)
				(set! res (list #f "CAN'T LOGIN"))))
			  (set! res (list #f "CAN'T CONNECT")))))))
				(set! res (list #f "CAN'T LOGIN" #f))))
			  (set! res (list #f "CAN'T CONNECT" #f)))))))
	    (th2 (make-thread
		  (lambda ()
		    (let loop ((count 1))
		      (debug:print-info 1 "Ping " count " server on " host " at port " port)
		      (thread-sleep! 2)
		      (if (< count (/ secs 2))
			  (loop (+ count 1))))
		    ;; (thread-terminate! th1)
		    (set! res (list #f "TIMED OUT"))))))
		    (set! res (list #f "TIMED OUT" #f))))))
       (thread-start! th2)
       (thread-start! th1)
       (handle-exceptions
	exn
	(set! res (list #f "TIMED OUT"))
	(set! res (list #f "TIMED OUT" #f))
	(thread-join! th1 secs))
       res))))

Modified tasks.scm from [b09182cfe4] to [29f8996e48].

118
119
120
121
122
123
124
125

126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142







143
144
145
146
147
148
149
118
119
120
121
122
123
124

125
126
127
128
129
130
131
132
133
134
135
136
137
138




139
140
141
142
143
144
145
146
147
148
149
150
151
152







-
+













-
-
-
-
+
+
+
+
+
+
+







     server-id)))

(define (tasks:have-clients? mdb server-id)
  (null? (tasks:get-logged-in-clients mdb server-id)))

;; ping each server in the db and return first found that responds. 
;; remove any others. will not necessarily remove all!
(define (tasks:get-best-server mdb)
(define (tasks:get-best-server mdb #!key (do-ping #f))
  (let ((res '())
	(best #f))
    (sqlite3:for-each-row
     (lambda (id hostname port)
       (set! res (cons (list hostname port) res))
       (debug:print-info 1 "Found " hostname ":" port))
     mdb
     "SELECT id,hostname,port FROM servers WHERE state='live' ORDER BY start_time DESC LIMIT 1;")
    ;; (print "res=" res)
    (if (null? res) #f
	(let loop ((hed (car res))
		   (tal (cdr res)))
	  ;; (print "hed=" hed ", tal=" tal)
	  (let* ((host (car hed))
		 (port (cadr hed))
		 (ping-res (server:ping host port)))
	    (if ping-res hed
	  (let* ((host     (car hed))
		 (port     (cadr hed))
		 (ping-res (if do-ping (server:ping host port return-socket: #f) '(#t "NO PING" #f)))
		 (alive    (car ping-res))
		 (reason   (cadr ping-res))
		 (zsocket  (caddr ping-res)))
	    (if alive (list host port zsocket)
		;; remove defunct server from table
		(begin
		  (open-run-close tasks:server-deregister tasks:open-db  host port: port)
		  (if (null? tal)
		      #f
		      (loop (car tal)(cdr tal))))))))))

Modified tests/Makefile from [b3a42dd537] to [14232cd2a2].

27
28
29
30
31
32
33
34

35
36
37
38
39
40
41
42
43

44
45
46
47

48
49
50
51
52

53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74

75
76
77
78
79
80
81
27
28
29
30
31
32
33

34
35
36
37
38
39
40
41
42

43
44
45


46
47
48
49


50
51
52
53
54
55
56
57
58
59
60
61
62
63

64
65
66
67
68
69


70
71
72
73
74
75
76
77







-
+








-
+


-
-
+



-
-
+













-






-
-
+







	rm -f simplerun/megatest.db
	rm -rf simplelinks/ simpleruns/
	mkdir -p simplelinks simpleruns
	cd simplerun;cp ../../*_records.scm .;perl -pi.bak -e 's/define-inline/define/' *_records.scm
	cd simplerun;echo '(load "../tests.scm")' | $(MEGATEST) -repl -debug $(DEBUG)

test2 : fullprep
	cd fullrun;$(MEGATEST) -runtests ez_pass,runfirst/a/% -reqtarg ubuntu/nfs/none :runname $(RUNNAME) $(SERVER) -debug $(DEBUG) $(LOGGING)
	cd fullrun;$(MEGATEST) -runtests ez_pass,runfirst/a/% -reqtarg ubuntu/nfs/none :runname $(RUNNAME) -debug $(DEBUG) $(LOGGING)
	cd fullrun;megatest -runall -target ubunut/nfs/none :runname $(RUNNAME)_01 -testpatt %/,%/ai -debug $(DEBUG)
	cd fullrun;megatest -runtests %/,%/ai -target ubunut/nfs/none :runname $(RUNAME)_02 -debug $(DEBUG)
	cd fullrun;megatest -runtests runfirst/%,%/ai -target ubunut/nfs/none :runname $(RUNNAME)_02 -debug $(DEBUG)
	cd fullrun;megatest -runtests %/,%/winter -target ubunut/nfs/none :runname $(RUNNAME)_03  -debug $(DEBUG)
	sleep 40;cd fullrun;megatest -target ubuntu/nfs/none :runname $(RUNNAME) -set-state-status COMPLETED,FORCED :state COMPLETED :status PASS -testpatt ez_p%s,runfirst/ -debug $(DEBUG) $(LOGGING)


test3 : fullprep
	cd fullrun;$(MEGATEST) -runtests runfirst -reqtarg ubuntu/nfs/none :runname $(RUNNAME)_b  $(SERVER) -debug 10
	cd fullrun;$(MEGATEST) -runtests runfirst -reqtarg ubuntu/nfs/none :runname $(RUNNAME)_b -debug 10

test4 : fullprep
	cd fullrun;$(MEGATEST) $(SERVER) $(LOGGING) -debug $(DEBUG) &
	cd fullrun;sleep 5;$(MEGATEST) -debug $(DEBUG) -runall -reqtarg ubuntu/nfs/none :runname $(RUNNAME)_b -m "This is a comment specific to a run" -v $(LOGGING)
	cd fullrun;$(MEGATEST) -debug $(DEBUG) -runall -reqtarg ubuntu/nfs/none :runname $(RUNNAME)_b -m "This is a comment specific to a run" -v $(LOGGING)

# NOTE: Only one instance can be a server
test5 : fullprep
	cd fullrun;$(MEGATEST) $(SERVER) $(LOGGING) &
	cd fullrun;sleep 10;$(MEGATEST) -runall $(TARGET) :runname $(RUNNAME)_aa -debug $(DEBUG) $(LOGGING) > aa.log 2> aa.log &
	cd fullrun;sleep  0;$(MEGATEST) -runall $(TARGET) :runname $(RUNNAME)_aa -debug $(DEBUG) $(LOGGING) > aa.log 2> aa.log &
	cd fullrun;sleep 10;$(MEGATEST) -runall $(TARGET) :runname $(RUNNAME)_ab -debug $(DEBUG) $(LOGGING) > ab.log 2> ab.log &
	cd fullrun;sleep 10;$(MEGATEST) -runall $(TARGET) :runname $(RUNNAME)_ac -debug $(DEBUG) $(LOGGING) > ac.log 2> ac.log &
	cd fullrun;sleep 10;$(MEGATEST) -runall $(TARGET) :runname $(RUNNAME)_ad -debug $(DEBUG) $(LOGGING) > ad.log 2> ad.log &	
	cd fullrun;sleep 10;$(MEGATEST) -runall $(TARGET) :runname $(RUNNAME)_ae -debug $(DEBUG) $(LOGGING) > ae.log 2> ae.log &	
	cd fullrun;sleep 10;$(MEGATEST) -runall $(TARGET) :runname $(RUNNAME)_af -debug $(DEBUG) $(LOGGING) > af.log 2> af.log &	

test6: fullprep
	cd fullrun;$(MEGATEST) -runtests runfirst -testpatt %/1 -reqtarg ubuntu/nfs/none :runname $(RUNNAME)_itempatt -v
	cd fullrun;$(MEGATEST) -runtests runfirst -testpatt %blahha% -reqtarg ubuntu/nfs/none :runname $(RUNNAME)_itempatt -debug 10
	cd fullrun;$(MEGATEST) -rollup :runname newrun -target ubuntu/nfs/none -debug 10


cleanprep : ../*.scm Makefile */*.config
	# if [ -e fullrun/megatest.db ]; then sqlite3 fullrun/megatest.db "delete from metadat where var='SERVER';";fi
	mkdir -p /tmp/mt_runs /tmp/mt_links
	cd ..;make install
	rm -f */logging.db */monitor.db
	touch cleanprep

fullprep : cleanprep
	cd fullrun;$(MEGATEST) -server - -debug $(DEBUG) &
	sleep 5;cd fullrun;$(MEGATEST) -remove-runs :runname $(RUNNAME)% -target %/%/% -testpatt %/%
	cd fullrun;$(MEGATEST) -remove-runs :runname $(RUNNAME)% -target %/%/% -testpatt %/%
	cd fullrun;$(BINPATH)/dboard -rows 15 &

dashboard : cleanprep
	cd fullrun && $(BINPATH)/dashboard -rows 25 &

remove :
	cd fullrun;$(MEGATEST) -remove-runs :runname $(RUN)  -testpatt % -itempatt % :sysname % :fsname % :datapath %

Modified tests/fullrun/megatest.config from [5d94f21968] to [d9f969ffd5].

16
17
18
19
20
21
22
23
24


25
26
27
28
29
30
31
16
17
18
19
20
21
22


23
24
25
26
27
28
29
30
31







-
-
+
+







# Throttle roughly scales the db access milliseconds to seconds delay
throttle 0.2
# Max retries allows megatest to re-check that a tests status has changed
# as tests can have transient FAIL status occasionally
maxretries 20

[validvalues]
state start end 
status pass fail n/a 0 1 running
state start end 0 1 - 2
status pass fail n/a 0 1 running - 2

# These are set before all tests, override them 
# in the testconfig [pre-launch-env-overrides] section
[env-override]
SPECIAL_ENV_VARS overide them here - should be seen at launch and in the runs
TESTVAR [system realpath .]
DEADVAR [system ls]