Megatest

Check-in [598e97c160]
Login
Overview
Comment:Fixed bad params on test status calls
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | servermode
Files: files | file ages | folders
SHA1: 598e97c1608422582b19e45d0e7f8f016c816cc1
User & Date: matt on 2012-03-11 23:01:53
Other Links: branch diff | manifest | tags
Context
2012-03-11
23:26
tweaked check-in: a680aa27b4 user: matt tags: servermode
23:01
Fixed bad params on test status calls check-in: 598e97c160 user: matt tags: servermode
22:47
Added IPADDR option to makefile check-in: 71cdd263a3 user: mrwellan tags: servermode
Changes

Modified launch.scm from [0b40b3f299] to [15622d4d14].

102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
	  (set-run-config-vars db run-id)
	  ;; environment overrides are done *before* the remaining critical envars.
	  (alist->env-vars env-ovrd)
	  (set-megatest-env-vars db run-id)
	  (set-item-env-vars itemdat)
	  (save-environment-as-files "megatest")
	  (test-set-meta-info db run-id test-name itemdat)
	  (test-set-status! db run-id test-name "REMOTEHOSTSTART" "n/a" itemdat (args:get-arg "-m") #f)
	  (if (args:get-arg "-xterm")
	      (set! fullrunscript "xterm")
	      (if (and fullrunscript (not (file-execute-access? fullrunscript)))
		  (system (conc "chmod ug+x " fullrunscript))))
	  ;; We are about to actually kick off the test
	  ;; so this is a good place to remove the records for 
	  ;; any previous runs







|







102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
	  (set-run-config-vars db run-id)
	  ;; environment overrides are done *before* the remaining critical envars.
	  (alist->env-vars env-ovrd)
	  (set-megatest-env-vars db run-id)
	  (set-item-env-vars itemdat)
	  (save-environment-as-files "megatest")
	  (test-set-meta-info db run-id test-name itemdat)
	  (test-set-status! db test-id "REMOTEHOSTSTART" "n/a" (args:get-arg "-m") #f)
	  (if (args:get-arg "-xterm")
	      (set! fullrunscript "xterm")
	      (if (and fullrunscript (not (file-execute-access? fullrunscript)))
		  (system (conc "chmod ug+x " fullrunscript))))
	  ;; We are about to actually kick off the test
	  ;; so this is a good place to remove the records for 
	  ;; any previous runs
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
						       (debug:print 4 "Exit value received: " (vector-ref exit-info 2) " logpro-used: " logpro-used 
								    " this-step-status: " this-step-status " overall-status: " overall-status 
								    " next-status: " next-status " rollup-status: " rollup-status)
						       (case next-status
							 ((warn)
							  (set! rollup-status 2)
							  ;; NB// test-set-status! does rdb calls under the hood
							  (test-set-status! db run-id test-name "RUNNING" "WARN" itemdat 
									    (if (eq? this-step-status 'warn) "Logpro warning found" #f)
									    #f))
							 ((pass)
							  (test-set-status! db run-id test-name "RUNNING" "PASS" itemdat #f #f))
							 (else ;; 'fail
							  (set! rollup-status 1) ;; force fail
							  (test-set-status! db run-id test-name "RUNNING" "FAIL" itemdat (conc "Failed at step " stepname) #f)
							  ))))
						   (if (and (steprun-good? logpro-used (vector-ref exit-info 2))
							    (not (null? tal)))
						       (loop (car tal) (cdr tal) stepname)))
					     (debug:print 4 "WARNING: a prior step failed, stopping at " ezstep))))))))
		 (monitorjob   (lambda ()
				 (let* ((start-seconds (current-seconds))







|



|


|







226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
						       (debug:print 4 "Exit value received: " (vector-ref exit-info 2) " logpro-used: " logpro-used 
								    " this-step-status: " this-step-status " overall-status: " overall-status 
								    " next-status: " next-status " rollup-status: " rollup-status)
						       (case next-status
							 ((warn)
							  (set! rollup-status 2)
							  ;; NB// test-set-status! does rdb calls under the hood
							  (test-set-status! db test-id "RUNNING" "WARN" 
									    (if (eq? this-step-status 'warn) "Logpro warning found" #f)
									    #f))
							 ((pass)
							  (test-set-status! db test-id "RUNNING" "PASS" #f #f))
							 (else ;; 'fail
							  (set! rollup-status 1) ;; force fail
							  (test-set-status! db test-id "RUNNING" "FAIL" (conc "Failed at step " stepname) #f)
							  ))))
						   (if (and (steprun-good? logpro-used (vector-ref exit-info 2))
							    (not (null? tal)))
						       (loop (car tal) (cdr tal) stepname)))
					     (debug:print 4 "WARNING: a prior step failed, stopping at " ezstep))))))))
		 (monitorjob   (lambda ()
				 (let* ((start-seconds (current-seconds))
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
								(begin
								  (debug:print 0 "Killing " (cadr parts) "; kill -9  " p-id)
								  (system (conc "kill -9 " p-id))))))
							(car processes))
						       (system (conc "kill -9 " pid))))
						   (begin
						     (debug:print 0 "WARNING: Request received to kill job but problem with process, attempting to kill manager process")
						     (test-set-status! db run-id test-name "KILLED"  "FAIL"
								       itemdat (args:get-arg "-m") #f)
						     (sqlite3:finalize! db)
						     (exit 1))))
					     (set! kill-tries (+ 1 kill-tries))
					     (mutex-unlock! m)))
				       (sqlite3:finalize! db)
				       (thread-sleep! (+ 10 (random 10))) ;; add some jitter to the call home time to spread out the db accesses
				       (loop (calc-minutes)))))))







|
|







281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
								(begin
								  (debug:print 0 "Killing " (cadr parts) "; kill -9  " p-id)
								  (system (conc "kill -9 " p-id))))))
							(car processes))
						       (system (conc "kill -9 " pid))))
						   (begin
						     (debug:print 0 "WARNING: Request received to kill job but problem with process, attempting to kill manager process")
						     (test-set-status! db test-id "KILLED"  "FAIL"
								       (args:get-arg "-m") #f)
						     (sqlite3:finalize! db)
						     (exit 1))))
					     (set! kill-tries (+ 1 kill-tries))
					     (mutex-unlock! m)))
				       (sqlite3:finalize! db)
				       (thread-sleep! (+ 10 (random 10))) ;; add some jitter to the call home time to spread out the db accesses
				       (loop (calc-minutes)))))))

Modified runs.scm from [4ae48c4de9] to [c9cc30bbd7].

401
402
403
404
405
406
407
408

409
410
411
412
413
414
415

416
417
418
419
420
421
422
        (begin
	   (hash-table-set! *test-meta-updated* test-name #t)
           (runs:update-test_meta db test-name test-conf)))
    
    ;; (lambda (itemdat) ;;; ((ripeness "overripe") (temperature "cool") (season "summer"))
    (let* ((new-test-path (string-intersperse (cons test-path (map cadr itemdat)) "/"))
	   (new-test-name (if (equal? item-path "") test-name (conc test-name "/" item-path))) ;; just need it to be unique
	   (testdat       (db:get-test-info db run-id test-name item-path)))

      (if (not testdat)
	  (begin
	    ;; ensure that the path exists before registering the test
	    ;; NOPE: Cannot! Don't know yet which disk area will be assigned....
	    ;; (system (conc "mkdir -p " new-test-path))
	    (rtests:register-test db run-id test-name item-path)
	    (set! testdat (db:get-test-info db run-id test-name item-path))))

      (change-directory test-path)
      (case (if force ;; (args:get-arg "-force")
		'NOT_STARTED
		(if testdat
		    (string->symbol (test:get-state testdat))
		    'failed-to-insert))
	((failed-to-insert)







|
>







>







401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
        (begin
	   (hash-table-set! *test-meta-updated* test-name #t)
           (runs:update-test_meta db test-name test-conf)))
    
    ;; (lambda (itemdat) ;;; ((ripeness "overripe") (temperature "cool") (season "summer"))
    (let* ((new-test-path (string-intersperse (cons test-path (map cadr itemdat)) "/"))
	   (new-test-name (if (equal? item-path "") test-name (conc test-name "/" item-path))) ;; just need it to be unique
	   (testdat       (db:get-test-info db run-id test-name item-path))
	   (test-id       #f))
      (if (not testdat)
	  (begin
	    ;; ensure that the path exists before registering the test
	    ;; NOPE: Cannot! Don't know yet which disk area will be assigned....
	    ;; (system (conc "mkdir -p " new-test-path))
	    (rtests:register-test db run-id test-name item-path)
	    (set! testdat (db:get-test-info db run-id test-name item-path))))
      (set! test-id (db:test-get-id testdat))
      (change-directory test-path)
      (case (if force ;; (args:get-arg "-force")
		'NOT_STARTED
		(if testdat
		    (string->symbol (test:get-state testdat))
		    'failed-to-insert))
	((failed-to-insert)
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
	 (debug:print 1 "NOTE: " new-test-name " is already running or was explictly killed, use -force to launch it."))
	((LAUNCHED REMOTEHOSTSTART RUNNING)  
	 (if (> (- (current-seconds)(+ (db:test-get-event_time testdat)
				       (db:test-get-run_duration testdat)))
		600) ;; i.e. no update for more than 600 seconds
	     (begin
	       (debug:print 0 "WARNING: Test " test-name " appears to be dead. Forcing it to state INCOMPLETE and status STUCK/DEAD")
	       (test-set-status! db run-id test-name "INCOMPLETE" "STUCK/DEAD" itemdat "Test is stuck or dead" #f))
	     (debug:print 2 "NOTE: " test-name " is already running")))
	(else       (debug:print 0 "ERROR: Failed to launch test " new-test-name ". Unrecognised state " (test:get-state testdat)))))))

;;======================================================================
;; END OF NEW STUFF
;;======================================================================








|







472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
	 (debug:print 1 "NOTE: " new-test-name " is already running or was explictly killed, use -force to launch it."))
	((LAUNCHED REMOTEHOSTSTART RUNNING)  
	 (if (> (- (current-seconds)(+ (db:test-get-event_time testdat)
				       (db:test-get-run_duration testdat)))
		600) ;; i.e. no update for more than 600 seconds
	     (begin
	       (debug:print 0 "WARNING: Test " test-name " appears to be dead. Forcing it to state INCOMPLETE and status STUCK/DEAD")
	       (test-set-status! db test-id "INCOMPLETE" "STUCK/DEAD" "Test is stuck or dead" #f))
	     (debug:print 2 "NOTE: " test-name " is already running")))
	(else       (debug:print 0 "ERROR: Failed to launch test " new-test-name ". Unrecognised state " (test:get-state testdat)))))))

;;======================================================================
;; END OF NEW STUFF
;;======================================================================

Modified tests/Makefile from [783ccc5c9e] to [7122e40641].

41
42
43
44
45
46
47



48
49
dashboard :
	cd ../;make install
	$(BINPATH)/dboard &

remove :
	(cd ../;make);$(MEGATEST) -remove-runs :runname $(RUN)  -testpatt % -itempatt % :sysname % :fsname % :datapath %




runforever :
	while(ls); do runname=`date +%F-%R:%S`;$(MEGATEST) -runall -target ubuntu/nfs/none :runname $$runname;/home/matt/data/megatest/megatest -runall -target ubuntu/nfs/none :runname $$runname;/home/matt/data/megatest/megatest -runall -target ubuntu/nfs/none :runname $$runname;done







>
>
>


41
42
43
44
45
46
47
48
49
50
51
52
dashboard :
	cd ../;make install
	$(BINPATH)/dboard &

remove :
	(cd ../;make);$(MEGATEST) -remove-runs :runname $(RUN)  -testpatt % -itempatt % :sysname % :fsname % :datapath %

clean  : 
	rm cleanprep

runforever :
	while(ls); do runname=`date +%F-%R:%S`;$(MEGATEST) -runall -target ubuntu/nfs/none :runname $$runname;/home/matt/data/megatest/megatest -runall -target ubuntu/nfs/none :runname $$runname;/home/matt/data/megatest/megatest -runall -target ubuntu/nfs/none :runname $$runname;done