Megatest

Check-in [60f1fc22c3]
Login
Overview
Comment:Reduced noise from messages, bumped server life to 70 hrs and other minor cleanups
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk | v1.5115
Files: files | file ages | folders
SHA1: 60f1fc22c315b8c6dfeec93cf06e3b0edb109d76
User & Date: mrwellan on 2012-11-12 20:34:46
Other Links: manifest | tags
Context
2012-11-12
22:00
Cut back test5 to 4 parallel runs check-in: eb80c72f89 user: mrwellan tags: trunk
20:34
Reduced noise from messages, bumped server life to 70 hrs and other minor cleanups check-in: 60f1fc22c3 user: mrwellan tags: trunk, v1.5115
19:50
Cherrypicked the fix to building for deploy check-in: e4ac93792c user: matt tags: trunk
Changes

Modified megatest-version.scm from [f4fe051edd] to [0980567fa8].

1
2
3
4
5
6
7
;; Always use two digit decimal
;; 1.01, 1.02...1.10,1.11 ... 1.99,2.00..

(declare (unit megatest-version))

(define megatest-version 1.5114)






|

1
2
3
4
5
6
7
;; Always use two digit decimal
;; 1.01, 1.02...1.10,1.11 ... 1.99,2.00..

(declare (unit megatest-version))

(define megatest-version 1.5115)

Modified megatest.scm from [8b945e5bb2] to [f379a0e485].

183
184
185
186
187
188
189



190
191
192
193
194
195
196
		        "-gui"
			;; misc
			"-archive"
			"-repl"
			"-lock"
			"-unlock"
			"-listservers"



			;; queries
			"-test-paths" ;; get path(s) to a test, ordered by youngest first

			"-runall"    ;; run all tests
			"-remove-runs"
			"-usequeue"
			"-rebuild-db"







>
>
>







183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
		        "-gui"
			;; misc
			"-archive"
			"-repl"
			"-lock"
			"-unlock"
			"-listservers"
			;; mist queries
			"-list-disks"
			"-list-targets"
			;; queries
			"-test-paths" ;; get path(s) to a test, ordered by youngest first

			"-runall"    ;; run all tests
			"-remove-runs"
			"-usequeue"
			"-rebuild-db"
257
258
259
260
261
262
263














264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
;; Misc general calls
;;======================================================================

(if (args:get-arg "-env2file")
    (begin
      (save-environment-as-files (args:get-arg "-env2file"))
      (set! *didsomething* #t)))















;;======================================================================
;; Start the server - can be done in conjunction with -runall or -runtests (one day...)
;;   we start the server if not running else start the client thread
;;======================================================================

(if (args:get-arg "-server")
    (begin
      (debug:print 1 "Launching server...")
      (server:launch)))

(if (or (args:get-arg "-listservers")
	(args:get-arg "-killserver"))
    (let ((tl (setup-for-run)))
      (if tl 
	  (let ((servers (open-run-close tasks:get-all-servers tasks:open-db))







>
>
>
>
>
>
>
>
>
>
>
>
>
>








|







260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
;; Misc general calls
;;======================================================================

(if (args:get-arg "-env2file")
    (begin
      (save-environment-as-files (args:get-arg "-env2file"))
      (set! *didsomething* #t)))

(if (args:get-arg "-list-targets")
    (print (string-intersperse
	    (sort (map car (hash-table->alist
			    (read-config "runconfigs.config"
					 (make-hash-table) #f))) string<?) "\n")))


(if (args:get-arg "-list-disks")
    (print (string-intersperse 
	    (map cadr (hash-table-ref/default 
		       (read-config "megatest.config" #f #t) 
		       "disks" "'" 
		       ("none" ""))) "\n")))

;;======================================================================
;; Start the server - can be done in conjunction with -runall or -runtests (one day...)
;;   we start the server if not running else start the client thread
;;======================================================================

(if (args:get-arg "-server")
    (begin
      (debug:print 2 "Launching server...")
      (server:launch)))

(if (or (args:get-arg "-listservers")
	(args:get-arg "-killserver"))
    (let ((tl (setup-for-run)))
      (if tl 
	  (let ((servers (open-run-close tasks:get-all-servers tasks:open-db))

Modified runs.scm from [9e964d5db3] to [3e4e30fa7d].

863
864
865
866
867
868
869
870

871
872
873
874
875
876
877
	    (begin 
	      (debug:print 0 "Failed to setup, exiting")
	      (exit 1)))
	(if (args:get-arg "-server")
	    (open-run-close server:start db (args:get-arg "-server"))
 	    (if (not (or (args:get-arg "-runall")     ;; runall and runtests are allowed to be servers
 			 (args:get-arg "-runtests")))
		(server:client-setup)))

	(set! keys (open-run-close db:get-keys db))
	;; have enough to process -target or -reqtarg here
	(if (args:get-arg "-reqtarg")
	    (let* ((runconfigf (conc  *toppath* "/runconfigs.config")) ;; DO NOT EVALUATE ALL 
		   (runconfig  (read-config runconfigf #f #f environ-patt: #f))) 
	      (if (hash-table-ref/default runconfig (args:get-arg "-reqtarg") #f)
		  (keys:target-set-args keys (args:get-arg "-reqtarg") args:arg-hash)







|
>







863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
	    (begin 
	      (debug:print 0 "Failed to setup, exiting")
	      (exit 1)))
	(if (args:get-arg "-server")
	    (open-run-close server:start db (args:get-arg "-server"))
 	    (if (not (or (args:get-arg "-runall")     ;; runall and runtests are allowed to be servers
 			 (args:get-arg "-runtests")))
		(server:client-setup) ;; This is a duplicate startup!!!??? BUG?
		))
	(set! keys (open-run-close db:get-keys db))
	;; have enough to process -target or -reqtarg here
	(if (args:get-arg "-reqtarg")
	    (let* ((runconfigf (conc  *toppath* "/runconfigs.config")) ;; DO NOT EVALUATE ALL 
		   (runconfig  (read-config runconfigf #f #f environ-patt: #f))) 
	      (if (hash-table-ref/default runconfig (args:get-arg "-reqtarg") #f)
		  (keys:target-set-args keys (args:get-arg "-reqtarg") args:arg-hash)

Modified server.scm from [c2beddecc4] to [6fd4f98b66].

42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
      (debug:print 4 "server:self-ping - I'm alive on " iface ":" port "!")
      (mutex-lock! *heartbeat-mutex*)
      (set! *server-loop-heart-beat* (current-seconds))
      (mutex-unlock! *heartbeat-mutex*)
      (loop))))
    
(define (server:run hostn)
  (debug:print 0 "Attempting to start the server ...")
  (if (not *toppath*)
      (if (not (setup-for-run))
	  (begin
	    (debug:print 0 "ERROR: cannot find megatest.config, cannot start server, exiting")
	    (exit))))
  (let* ((zmq-socket     #f)
	 (zmq-socket-dat #f)







|







42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
      (debug:print 4 "server:self-ping - I'm alive on " iface ":" port "!")
      (mutex-lock! *heartbeat-mutex*)
      (set! *server-loop-heart-beat* (current-seconds))
      (mutex-unlock! *heartbeat-mutex*)
      (loop))))
    
(define (server:run hostn)
  (debug:print 2 "Attempting to start the server ...")
  (if (not *toppath*)
      (if (not (setup-for-run))
	  (begin
	    (debug:print 0 "ERROR: cannot find megatest.config, cannot start server, exiting")
	    (exit))))
  (let* ((zmq-socket     #f)
	 (zmq-socket-dat #f)
123
124
125
126
127
128
129



130
131
132
133
134
135
136

;; run server:keep-running in a parallel thread to monitor that the db is being 
;; used and to shutdown after sometime if it is not.
;;
(define (server:keep-running)
  ;; if none running or if > 20 seconds since 
  ;; server last used then start shutdown



  (let loop ((count 0))
    (thread-sleep! 4) ;; no need to do this very often
    (db:write-cached-data)
    ;; (print "Server running, count is " count)
    (if (< count 1) ;; 3x3 = 9 secs aprox
	(loop (+ count 1))
	(let (;; (numrunning            (open-run-close db:get-count-tests-running #f))







>
>
>







123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139

;; run server:keep-running in a parallel thread to monitor that the db is being 
;; used and to shutdown after sometime if it is not.
;;
(define (server:keep-running)
  ;; if none running or if > 20 seconds since 
  ;; server last used then start shutdown

;;  (let ((die-timeout (

  (let loop ((count 0))
    (thread-sleep! 4) ;; no need to do this very often
    (db:write-cached-data)
    ;; (print "Server running, count is " count)
    (if (< count 1) ;; 3x3 = 9 secs aprox
	(loop (+ count 1))
	(let (;; (numrunning            (open-run-close db:get-count-tests-running #f))
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
	      (begin
		(open-run-close tasks:server-deregister tasks:open-db (cadr server-info) port: (caddr server-info))
		(debug:print 0 "ERROR: Heartbeat failed, committing servercide")
		(exit))
	      (open-run-close tasks:server-update-heartbeat tasks:open-db (car server-info)))
	  ;; (if ;; (or (> numrunning 0) ;; stay alive for two days after last access
	  (if (> (+ *last-db-access* 
		    ;; (* 48 60 60)    ;; 48 hrs
		    ;; 60              ;; one minute
		    (* 60 60)       ;; one hour
		    )
		 (current-seconds))
	      (begin
		;; (debug:print-info 2 "Server continuing, tests running: " numrunning ", seconds since last db access: " (- (current-seconds) *last-db-access*))
		(debug:print-info 2 "Server continuing, seconds since last db access: " (- (current-seconds) *last-db-access*))
		(loop 0))
	      (begin







|

|







154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
	      (begin
		(open-run-close tasks:server-deregister tasks:open-db (cadr server-info) port: (caddr server-info))
		(debug:print 0 "ERROR: Heartbeat failed, committing servercide")
		(exit))
	      (open-run-close tasks:server-update-heartbeat tasks:open-db (car server-info)))
	  ;; (if ;; (or (> numrunning 0) ;; stay alive for two days after last access
	  (if (> (+ *last-db-access* 
		    (* 70 60 60)       ;; 70 hrs is enough that the server will still be available after the weekend 
		    ;; 60              ;; one minute
		    ;; (* 60 60)       ;; one hour
		    )
		 (current-seconds))
	      (begin
		;; (debug:print-info 2 "Server continuing, tests running: " numrunning ", seconds since last db access: " (- (current-seconds) *last-db-access*))
		(debug:print-info 2 "Server continuing, seconds since last db access: " (- (current-seconds) *last-db-access*))
		(loop 0))
	      (begin
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
       ;; (old-handler)
       ;; (print-call-chain)
       (if (> trynum 0)
	   (server:find-free-port-and-open iface s (+ p 1) trynum: (- trynum 1))
	   (debug:print-info 0 "Tried ports up to " p 
			     " but all were in use. Please try a different port range by starting the server with parameter \" -port N\" where N is the starting port number to use")))
     (let ((zmq-url (conc "tcp://" iface ":" p)))
       (print "Trying to start server on " zmq-url)
       (bind-socket s zmq-url)
       (set! *runremote* #f)
       (debug:print 0 "Server started on " zmq-url)
       (mutex-lock! *heartbeat-mutex*)
       (set! *server-info* (open-run-close tasks:server-register tasks:open-db (current-process-id) iface p 0 'live))
       (mutex-unlock! *heartbeat-mutex*)
       (list iface s port)))))

(define (server:mk-signature)
  (message-digest-string (md5-primitive) 







|


|







189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
       ;; (old-handler)
       ;; (print-call-chain)
       (if (> trynum 0)
	   (server:find-free-port-and-open iface s (+ p 1) trynum: (- trynum 1))
	   (debug:print-info 0 "Tried ports up to " p 
			     " but all were in use. Please try a different port range by starting the server with parameter \" -port N\" where N is the starting port number to use")))
     (let ((zmq-url (conc "tcp://" iface ":" p)))
       (debug:print 2 "Trying to start server on " zmq-url)
       (bind-socket s zmq-url)
       (set! *runremote* #f)
       (debug:print 2 "Server started on " zmq-url)
       (mutex-lock! *heartbeat-mutex*)
       (set! *server-info* (open-run-close tasks:server-register tasks:open-db (current-process-id) iface p 0 'live))
       (mutex-unlock! *heartbeat-mutex*)
       (list iface s port)))))

(define (server:mk-signature)
  (message-digest-string (md5-primitive) 
271
272
273
274
275
276
277
278
279




280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
		   #t)
		 (begin
		   (debug:print-info 2 "Failed to login or connect to " conurl)
		   (set! *runremote* #f)
		   #f)))))
	(if (> numtries 0)
	    (let ((exe (car (argv))))
	      (debug:print-info 1 "No server available, attempting to start one...")
	      (process-run exe (list "-server" "-" "-debug" (conc *verbosity*)))




	      (sleep 5) ;; give server time to start
	      ;; we are starting a server, do not try again! That can lead to 
	      ;; recursively starting many processes!!!
	      (server:client-setup numtries: 0))
	    (debug:print-info 1 "Too many attempts, giving up")))))

;; all routes though here end in exit ...
(define (server:launch)
  (if (not *toppath*)
      (if (not (setup-for-run))
	  (begin
	    (debug:print 0 "ERROR: cannot find megatest.config, exiting")
	    (exit))))
  (debug:print-info 1 "Starting the standalone server")
  (let ((hostinfo (open-run-close tasks:get-best-server tasks:open-db)))
    (if hostinfo
	(debug:print-info 1 "NOT starting new server, one is already running on " (car hostinfo) ":" (cadr hostinfo))
	(if *toppath* 
	    (let* ((th1 (make-thread (lambda ()
				       (let ((server-info #f))
					 ;; wait for the server to be online and available
					 (let loop ()
					   (debug:print-info 1 "Waiting for the server to come online before starting heartbeat")
					   (thread-sleep! 2)
					   (mutex-lock! *heartbeat-mutex*)
					   (set! server-info *server-info* )
					   (mutex-unlock! *heartbeat-mutex*)
					   (if (not server-info)(loop)))
					 (debug:print 1 "Server alive, starting self-ping")
					 (server:self-ping (cadr server-info)(caddr server-info)))) "Self ping"))
		   (th2 (make-thread (lambda ()
				       (server:run (args:get-arg "-server"))) "Server run"))
		   (th3 (make-thread (lambda ()
				       (server:keep-running)) "Keep running")))
	      (set! *client-non-blocking-mode* #t)
	      (thread-start! th1)







|

>
>
>
>













|


|





|





|







274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
		   #t)
		 (begin
		   (debug:print-info 2 "Failed to login or connect to " conurl)
		   (set! *runremote* #f)
		   #f)))))
	(if (> numtries 0)
	    (let ((exe (car (argv))))
	      (debug:print-info 2 "No server available, attempting to start one...")
	      (process-run exe (list "-server" "-" "-debug" (conc *verbosity*)))
	      ;; (process-fork (lambda ()
	      ;;   	      (server:launch)
	      ;;   	      (exit) ;; should never get here ....
	      ;;   	      ))
	      (sleep 5) ;; give server time to start
	      ;; we are starting a server, do not try again! That can lead to 
	      ;; recursively starting many processes!!!
	      (server:client-setup numtries: 0))
	    (debug:print-info 1 "Too many attempts, giving up")))))

;; all routes though here end in exit ...
(define (server:launch)
  (if (not *toppath*)
      (if (not (setup-for-run))
	  (begin
	    (debug:print 0 "ERROR: cannot find megatest.config, exiting")
	    (exit))))
  (debug:print-info 2 "Starting the standalone server")
  (let ((hostinfo (open-run-close tasks:get-best-server tasks:open-db)))
    (if hostinfo
	(debug:print-info 2 "NOT starting new server, one is already running on " (car hostinfo) ":" (cadr hostinfo))
	(if *toppath* 
	    (let* ((th1 (make-thread (lambda ()
				       (let ((server-info #f))
					 ;; wait for the server to be online and available
					 (let loop ()
					   (debug:print-info 2 "Waiting for the server to come online before starting heartbeat")
					   (thread-sleep! 2)
					   (mutex-lock! *heartbeat-mutex*)
					   (set! server-info *server-info* )
					   (mutex-unlock! *heartbeat-mutex*)
					   (if (not server-info)(loop)))
					 (debug:print 2 "Server alive, starting self-ping")
					 (server:self-ping (cadr server-info)(caddr server-info)))) "Self ping"))
		   (th2 (make-thread (lambda ()
				       (server:run (args:get-arg "-server"))) "Server run"))
		   (th3 (make-thread (lambda ()
				       (server:keep-running)) "Keep running")))
	      (set! *client-non-blocking-mode* #t)
	      (thread-start! th1)