Megatest

Check-in [598e97c160]
Login
Overview
Comment:Fixed bad params on test status calls
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | servermode
Files: files | file ages | folders
SHA1: 598e97c1608422582b19e45d0e7f8f016c816cc1
User & Date: matt on 2012-03-11 23:01:53
Other Links: branch diff | manifest | tags
Context
2012-03-11
23:26
tweaked check-in: a680aa27b4 user: matt tags: servermode
23:01
Fixed bad params on test status calls check-in: 598e97c160 user: matt tags: servermode
22:47
Added IPADDR option to makefile check-in: 71cdd263a3 user: mrwellan tags: servermode
Changes

Modified launch.scm from [0b40b3f299] to [15622d4d14].

102
103
104
105
106
107
108
109

110
111
112
113
114
115
116
102
103
104
105
106
107
108

109
110
111
112
113
114
115
116







-
+







	  (set-run-config-vars db run-id)
	  ;; environment overrides are done *before* the remaining critical envars.
	  (alist->env-vars env-ovrd)
	  (set-megatest-env-vars db run-id)
	  (set-item-env-vars itemdat)
	  (save-environment-as-files "megatest")
	  (test-set-meta-info db run-id test-name itemdat)
	  (test-set-status! db run-id test-name "REMOTEHOSTSTART" "n/a" itemdat (args:get-arg "-m") #f)
	  (test-set-status! db test-id "REMOTEHOSTSTART" "n/a" (args:get-arg "-m") #f)
	  (if (args:get-arg "-xterm")
	      (set! fullrunscript "xterm")
	      (if (and fullrunscript (not (file-execute-access? fullrunscript)))
		  (system (conc "chmod ug+x " fullrunscript))))
	  ;; We are about to actually kick off the test
	  ;; so this is a good place to remove the records for 
	  ;; any previous runs
226
227
228
229
230
231
232
233

234
235
236
237

238
239
240

241
242
243
244
245
246
247
226
227
228
229
230
231
232

233
234
235
236

237
238
239

240
241
242
243
244
245
246
247







-
+



-
+


-
+







						       (debug:print 4 "Exit value received: " (vector-ref exit-info 2) " logpro-used: " logpro-used 
								    " this-step-status: " this-step-status " overall-status: " overall-status 
								    " next-status: " next-status " rollup-status: " rollup-status)
						       (case next-status
							 ((warn)
							  (set! rollup-status 2)
							  ;; NB// test-set-status! does rdb calls under the hood
							  (test-set-status! db run-id test-name "RUNNING" "WARN" itemdat 
							  (test-set-status! db test-id "RUNNING" "WARN" 
									    (if (eq? this-step-status 'warn) "Logpro warning found" #f)
									    #f))
							 ((pass)
							  (test-set-status! db run-id test-name "RUNNING" "PASS" itemdat #f #f))
							  (test-set-status! db test-id "RUNNING" "PASS" #f #f))
							 (else ;; 'fail
							  (set! rollup-status 1) ;; force fail
							  (test-set-status! db run-id test-name "RUNNING" "FAIL" itemdat (conc "Failed at step " stepname) #f)
							  (test-set-status! db test-id "RUNNING" "FAIL" (conc "Failed at step " stepname) #f)
							  ))))
						   (if (and (steprun-good? logpro-used (vector-ref exit-info 2))
							    (not (null? tal)))
						       (loop (car tal) (cdr tal) stepname)))
					     (debug:print 4 "WARNING: a prior step failed, stopping at " ezstep))))))))
		 (monitorjob   (lambda ()
				 (let* ((start-seconds (current-seconds))
281
282
283
284
285
286
287
288
289


290
291
292
293
294
295
296
281
282
283
284
285
286
287


288
289
290
291
292
293
294
295
296







-
-
+
+







								(begin
								  (debug:print 0 "Killing " (cadr parts) "; kill -9  " p-id)
								  (system (conc "kill -9 " p-id))))))
							(car processes))
						       (system (conc "kill -9 " pid))))
						   (begin
						     (debug:print 0 "WARNING: Request received to kill job but problem with process, attempting to kill manager process")
						     (test-set-status! db run-id test-name "KILLED"  "FAIL"
								       itemdat (args:get-arg "-m") #f)
						     (test-set-status! db test-id "KILLED"  "FAIL"
								       (args:get-arg "-m") #f)
						     (sqlite3:finalize! db)
						     (exit 1))))
					     (set! kill-tries (+ 1 kill-tries))
					     (mutex-unlock! m)))
				       (sqlite3:finalize! db)
				       (thread-sleep! (+ 10 (random 10))) ;; add some jitter to the call home time to spread out the db accesses
				       (loop (calc-minutes)))))))

Modified runs.scm from [4ae48c4de9] to [c9cc30bbd7].

401
402
403
404
405
406
407
408


409
410
411
412
413
414
415

416
417
418
419
420
421
422
401
402
403
404
405
406
407

408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424







-
+
+







+







        (begin
	   (hash-table-set! *test-meta-updated* test-name #t)
           (runs:update-test_meta db test-name test-conf)))
    
    ;; (lambda (itemdat) ;;; ((ripeness "overripe") (temperature "cool") (season "summer"))
    (let* ((new-test-path (string-intersperse (cons test-path (map cadr itemdat)) "/"))
	   (new-test-name (if (equal? item-path "") test-name (conc test-name "/" item-path))) ;; just need it to be unique
	   (testdat       (db:get-test-info db run-id test-name item-path)))
	   (testdat       (db:get-test-info db run-id test-name item-path))
	   (test-id       #f))
      (if (not testdat)
	  (begin
	    ;; ensure that the path exists before registering the test
	    ;; NOPE: Cannot! Don't know yet which disk area will be assigned....
	    ;; (system (conc "mkdir -p " new-test-path))
	    (rtests:register-test db run-id test-name item-path)
	    (set! testdat (db:get-test-info db run-id test-name item-path))))
      (set! test-id (db:test-get-id testdat))
      (change-directory test-path)
      (case (if force ;; (args:get-arg "-force")
		'NOT_STARTED
		(if testdat
		    (string->symbol (test:get-state testdat))
		    'failed-to-insert))
	((failed-to-insert)
470
471
472
473
474
475
476
477

478
479
480
481
482
483
484
472
473
474
475
476
477
478

479
480
481
482
483
484
485
486







-
+







	 (debug:print 1 "NOTE: " new-test-name " is already running or was explictly killed, use -force to launch it."))
	((LAUNCHED REMOTEHOSTSTART RUNNING)  
	 (if (> (- (current-seconds)(+ (db:test-get-event_time testdat)
				       (db:test-get-run_duration testdat)))
		600) ;; i.e. no update for more than 600 seconds
	     (begin
	       (debug:print 0 "WARNING: Test " test-name " appears to be dead. Forcing it to state INCOMPLETE and status STUCK/DEAD")
	       (test-set-status! db run-id test-name "INCOMPLETE" "STUCK/DEAD" itemdat "Test is stuck or dead" #f))
	       (test-set-status! db test-id "INCOMPLETE" "STUCK/DEAD" "Test is stuck or dead" #f))
	     (debug:print 2 "NOTE: " test-name " is already running")))
	(else       (debug:print 0 "ERROR: Failed to launch test " new-test-name ". Unrecognised state " (test:get-state testdat)))))))

;;======================================================================
;; END OF NEW STUFF
;;======================================================================

Modified tests/Makefile from [783ccc5c9e] to [7122e40641].

41
42
43
44
45
46
47



48
49
41
42
43
44
45
46
47
48
49
50
51
52







+
+
+


dashboard :
	cd ../;make install
	$(BINPATH)/dboard &

remove :
	(cd ../;make);$(MEGATEST) -remove-runs :runname $(RUN)  -testpatt % -itempatt % :sysname % :fsname % :datapath %

clean  : 
	rm cleanprep

runforever :
	while(ls); do runname=`date +%F-%R:%S`;$(MEGATEST) -runall -target ubuntu/nfs/none :runname $$runname;/home/matt/data/megatest/megatest -runall -target ubuntu/nfs/none :runname $$runname;/home/matt/data/megatest/megatest -runall -target ubuntu/nfs/none :runname $$runname;done