Megatest

Check-in [ff130a6cab]
Login
Overview
Comment:Fixed bad records problem in archiving that was causing a crash
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: ff130a6cabb5fdc3a59a929e5803567048e37a73
User & Date: matt on 2015-01-09 01:21:45
Other Links: manifest | tags
Context
2015-05-20
09:42
Merging to trunk check-in: 0251075b09 user: mrwellan tags: trunk
2015-01-09
01:22
Merged changes made on trunk check-in: 557510500b user: matt tags: v1.60
01:21
Fixed bad records problem in archiving that was causing a crash check-in: ff130a6cab user: matt tags: trunk
00:41
Archive restoring now works check-in: 5494747c0c user: matt tags: trunk
Changes

Modified archive.scm from [a161042522] to [dea3fe5e91].

224
225
226
227
228
229
230

231

232
233
234
235
236
237
238


239
240
241
242
243
244
245
246
247
248
249
250
251

252



253
254
255
256

257
258
	      (test-path         (conc linktree "/" test-partial-path))
	      ;; if the old path was not deleted then prev-test-physical-path will end up pointing to a real directory
	      (prev-test-physical-path (if (file-exists? test-path) (read-symbolic-link test-path #t) #f))

	      (new-test-physical-path  (conc best-disk "/" test-partial-path))
	      (archive-block-id        (db:test-get-archived test-dat))
	      (archive-block-info      (rmt:test-get-archive-block-info archive-block-id))

	      (archive-path            (vector-ref archive-block-info 2)) ;; look in db.scm for test-get-archive-block-info for the vector record info

	      (archive-internal-path   (conc (common:get-testsuite-name) "-" run-id "/latest/" test-partial-path)))
	 
	 ;; some sanity checks 
	 (if (and prev-test-physical-path
		  (file-exists? prev-test-physical-path)) ;; what to do? abort or clean up or link it in?
	     (debug:print 0 "ERROR: the old directory " prev-test-physical-path ", still exists! This should not be."))



	 ;; CREATE WORK AREA
	 ;; test-src-path == #f     ==> don't copy in data from tests directory
	 ;; itemdat       == string ==> use directly
	 (create-work-area run-id run-name keyvals test-id #f best-disk test-name item-path) ;; #!key (remtries 2))

	 ;; 1. Get the block id from the test info
	 ;; 2. Get the block data given the block id
	 ;; 3. Construct the paths etc. for the following command:
	 ;; 
	 ;; bup -d /tmp/matt/adisk1/2015_q1/fullrun_e1a40/ restore -C /tmp/seeme fullrun-30/latest/ubuntu/nfs/none/w02.1.20.54_b/

	 ;; DO BUP RESTORE
	 (let* ((new-test-dat        (rmt:get-test-info-by-id run-id test-id))

		(new-test-path       (db:test-get-rundir new-test-dat))



		;; new-test-path won't work - must use best-disk instead? Nope, new-test-path but tack on /..
		(bup-restore-params  (list "-d" archive-path "restore" "-C" (conc new-test-path "/..") archive-internal-path)))
	   (debug:print-info 0 "Restoring archived data to " new-test-physical-path "; params: " bup-restore-params)
	   (run-n-wait bup-exe params: bup-restore-params print-cmd: #f))))

     tests)))
	 







>
|
>







>
>
|
|
|
|

|
|
|
|
|

|
|
>
|
>
>
>
|
|
|
|
>
|

224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
	      (test-path         (conc linktree "/" test-partial-path))
	      ;; if the old path was not deleted then prev-test-physical-path will end up pointing to a real directory
	      (prev-test-physical-path (if (file-exists? test-path) (read-symbolic-link test-path #t) #f))

	      (new-test-physical-path  (conc best-disk "/" test-partial-path))
	      (archive-block-id        (db:test-get-archived test-dat))
	      (archive-block-info      (rmt:test-get-archive-block-info archive-block-id))
	      (archive-path            (if (vector? archive-block-info)
					   (vector-ref archive-block-info 2) ;; look in db.scm for test-get-archive-block-info for the vector record info
					   #f)) ;; no archive found?
	      (archive-internal-path   (conc (common:get-testsuite-name) "-" run-id "/latest/" test-partial-path)))
	 
	 ;; some sanity checks 
	 (if (and prev-test-physical-path
		  (file-exists? prev-test-physical-path)) ;; what to do? abort or clean up or link it in?
	     (debug:print 0 "ERROR: the old directory " prev-test-physical-path ", still exists! This should not be."))

	 (if archive-path ;; no point in proceeding if there is no actual archive
	     (begin
	       ;; CREATE WORK AREA
	       ;; test-src-path == #f     ==> don't copy in data from tests directory
	       ;; itemdat       == string ==> use directly
	       (create-work-area run-id run-name keyvals test-id #f best-disk test-name item-path) ;; #!key (remtries 2))

	       ;; 1. Get the block id from the test info
	       ;; 2. Get the block data given the block id
	       ;; 3. Construct the paths etc. for the following command:
	       ;; 
	       ;; bup -d /tmp/matt/adisk1/2015_q1/fullrun_e1a40/ restore -C /tmp/seeme fullrun-30/latest/ubuntu/nfs/none/w02.1.20.54_b/

	       ;; DO BUP RESTORE
	       (let* ((new-test-dat        (rmt:get-test-info-by-id run-id test-id))
		      (new-test-path       (if (vector? new-test-dat )
					       (db:test-get-rundir new-test-dat)
					       (begin
						 (debug:print 0 "ERROR: unable to get data for run-id=" run-id ", test-id=" test-id)
						 (exit 1))))
		      ;; new-test-path won't work - must use best-disk instead? Nope, new-test-path but tack on /..
		      (bup-restore-params  (list "-d" archive-path "restore" "-C" (conc new-test-path "/..") archive-internal-path)))
		 (debug:print-info 0 "Restoring archived data to " new-test-physical-path " from archive in " archive-path " ... " archive-internal-path)
		 (run-n-wait bup-exe params: bup-restore-params print-cmd: #f)))
	     (debug:print 0 "ERROR: No archive path in the record for run-id=" run-id " test-id=" test-id))))
     (filter vector? tests))))
	 

Modified runs.scm from [b3bcca181a] to [73d47dd3ee].

1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488


1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
		    (debug:print 1 "Modifying state and staus for tests for run: " runkey " " (db:get-value-by-header run header "runname")))
		   ((print-run)
		    (debug:print 1 "Printing info for run " runkey ", run=" run ", tests=" tests ", header=" header)
		    action)
		   ((run-wait)
		    (debug:print 1 "Waiting for run " runkey ", run=" runnamepatt " to complete"))
		   ((archive)
		    (debug:print 1 "Archiving data for run: " runkey " " (db:get-value-by-header run header "runname"))
		    (set! worker-thread (make-thread (lambda ()
						       (case (string->symbol (args:get-arg "-archive"))
							 ((save save-remove keep-html)(archive:run-bup (args:get-arg "-archive") run-id run-name tests))
							 ((restore)(archive:bup-restore (args:get-arg "-archive") run-id run-name tests))
							 (else (debug:print 0 "ERROR: unrecognised sub command to -archive. Run \"megatest\" to see help"))))
						     "archive-bup-thread"))
		    (thread-start! worker-thread))
		   (else
		    (debug:print-info 0 "action not recognised " action)))
		 
		 ;; actions that operate on one test at a time can be handled below
		 ;;


		 (let ((sorted-tests     (sort tests (lambda (a b)(let ((dira ;; (rmt:sdb-qry 'getstr 
									 (db:test-get-rundir a)) ;; )  ;; (filedb:get-path *fdb* (db:test-get-rundir a)))
									(dirb ;; (rmt:sdb-qry 'getstr 
									 (db:test-get-rundir b))) ;; ) ;; ((filedb:get-path *fdb* (db:test-get-rundir b))))
								    (if (and (string? dira)(string? dirb))
									(> (string-length dira)(string-length dirb))
									#f)))))
		       (toplevel-retries (make-hash-table)) ;; try three times to loop through and remove top level tests
		       (test-retry-time  (make-hash-table))
		       (allow-run-time   10)) ;; seconds to allow for killing tests before just brutally killing 'em
		   (let loop ((test (car sorted-tests))
			      (tal  (cdr sorted-tests)))
		     (let* ((test-id       (db:test-get-id test))
			    (new-test-dat  (rmt:get-test-info-by-id run-id test-id)))







|












>
>
|
|
|
|
|
|
|







1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
		    (debug:print 1 "Modifying state and staus for tests for run: " runkey " " (db:get-value-by-header run header "runname")))
		   ((print-run)
		    (debug:print 1 "Printing info for run " runkey ", run=" run ", tests=" tests ", header=" header)
		    action)
		   ((run-wait)
		    (debug:print 1 "Waiting for run " runkey ", run=" runnamepatt " to complete"))
		   ((archive)
		    (debug:print 1 "Archiving/restoring (" (args:get-arg "-archive") ") data for run: " runkey " " (db:get-value-by-header run header "runname"))
		    (set! worker-thread (make-thread (lambda ()
						       (case (string->symbol (args:get-arg "-archive"))
							 ((save save-remove keep-html)(archive:run-bup (args:get-arg "-archive") run-id run-name tests))
							 ((restore)(archive:bup-restore (args:get-arg "-archive") run-id run-name tests))
							 (else (debug:print 0 "ERROR: unrecognised sub command to -archive. Run \"megatest\" to see help"))))
						     "archive-bup-thread"))
		    (thread-start! worker-thread))
		   (else
		    (debug:print-info 0 "action not recognised " action)))
		 
		 ;; actions that operate on one test at a time can be handled below
		 ;;
		 (let ((sorted-tests     (filter 
					  vector?
					  (sort tests (lambda (a b)(let ((dira ;; (rmt:sdb-qry 'getstr 
									  (db:test-get-rundir a)) ;; )  ;; (filedb:get-path *fdb* (db:test-get-rundir a)))
									 (dirb ;; (rmt:sdb-qry 'getstr 
									  (db:test-get-rundir b))) ;; ) ;; ((filedb:get-path *fdb* (db:test-get-rundir b))))
								     (if (and (string? dira)(string? dirb))
									 (> (string-length dira)(string-length dirb))
									 #f))))))
		       (toplevel-retries (make-hash-table)) ;; try three times to loop through and remove top level tests
		       (test-retry-time  (make-hash-table))
		       (allow-run-time   10)) ;; seconds to allow for killing tests before just brutally killing 'em
		   (let loop ((test (car sorted-tests))
			      (tal  (cdr sorted-tests)))
		     (let* ((test-id       (db:test-get-id test))
			    (new-test-dat  (rmt:get-test-info-by-id run-id test-id)))