Megatest

Check-in [04c31891a9]
Login
Overview
Comment:Added more instrumentation info on server pulse. Removed rm of monitor.db in Makefile
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 04c31891a949473d502fd0cd2dfa979427c08089
User & Date: matt on 2012-11-03 17:18:04
Other Links: manifest | tags
Context
2012-11-03
18:20
Added exception handling to server kill check-in: 6aea490eac user: matt tags: trunk, v1.5108
17:18
Added more instrumentation info on server pulse. Removed rm of monitor.db in Makefile check-in: 04c31891a9 user: matt tags: trunk
17:11
Heartbeat monitoring, on-the-fly server starting all working in simple manual testing check-in: 12d923326a user: matt tags: trunk
Changes

Modified server.scm from [442b819d32] to [ebf5897ef4].

140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
	  (mutex-lock! *heartbeat-mutex*)
	  (set! server-loop-heartbeat *server-loop-heart-beat*)
	  (set! server-info *server-info*)
	  (mutex-unlock! *heartbeat-mutex*)
	  ;; The logic here is that if the server loop gets stuck blocked in working
	  ;; we don't want to update our heartbeat
	  (set! pulse (- (current-seconds) server-loop-heartbeat))
	  (debug:print-info 1 "Heartbeat period is " pulse " on " (cadr server-info) ":" (caddr server-info))
	  (if (> pulse 11) ;; must stay less than 10 seconds 
	      (begin
		(debug:print 0 "ERROR: Heartbeat failed, committing servercide")
		(exit))
	      (open-run-close tasks:server-update-heartbeat tasks:open-db (car server-info)))
	  (if (or (> numrunning 0) ;; stay alive for two days after last access
		  (> (+ *last-db-access* 







|







140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
	  (mutex-lock! *heartbeat-mutex*)
	  (set! server-loop-heartbeat *server-loop-heart-beat*)
	  (set! server-info *server-info*)
	  (mutex-unlock! *heartbeat-mutex*)
	  ;; The logic here is that if the server loop gets stuck blocked in working
	  ;; we don't want to update our heartbeat
	  (set! pulse (- (current-seconds) server-loop-heartbeat))
	  (debug:print-info 1 "Heartbeat period is " pulse " seconds on " (cadr server-info) ":" (caddr server-info) ", last db access is " *last-db-access*)
	  (if (> pulse 11) ;; must stay less than 10 seconds 
	      (begin
		(debug:print 0 "ERROR: Heartbeat failed, committing servercide")
		(exit))
	      (open-run-close tasks:server-update-heartbeat tasks:open-db (car server-info)))
	  (if (or (> numrunning 0) ;; stay alive for two days after last access
		  (> (+ *last-db-access* 

Modified tests/Makefile from [438ac58123] to [6078b877de].

59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
	cd fullrun;$(MEGATEST) -runtests runfirst -testpatt %blahha% -reqtarg ubuntu/nfs/none :runname $(RUNNAME)_itempatt -debug 10
	cd fullrun;$(MEGATEST) -rollup :runname newrun -target ubuntu/nfs/none -debug 10


cleanprep : ../*.scm Makefile */*.config
	mkdir -p /tmp/mt_runs /tmp/mt_links
	cd ..;make install
	rm -f */logging.db */monitor.db
	touch cleanprep

fullprep : cleanprep
	cd fullrun;$(MEGATEST) -remove-runs :runname $(RUNNAME)% -target %/%/% -testpatt %/%
	cd fullrun;$(BINPATH)/dboard -rows 15 &

dashboard : cleanprep







|







59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
	cd fullrun;$(MEGATEST) -runtests runfirst -testpatt %blahha% -reqtarg ubuntu/nfs/none :runname $(RUNNAME)_itempatt -debug 10
	cd fullrun;$(MEGATEST) -rollup :runname newrun -target ubuntu/nfs/none -debug 10


cleanprep : ../*.scm Makefile */*.config
	mkdir -p /tmp/mt_runs /tmp/mt_links
	cd ..;make install
	rm -f */logging.db
	touch cleanprep

fullprep : cleanprep
	cd fullrun;$(MEGATEST) -remove-runs :runname $(RUNNAME)% -target %/%/% -testpatt %/%
	cd fullrun;$(BINPATH)/dboard -rows 15 &

dashboard : cleanprep