Megatest

Check-in [95dcd86380]
Login
Overview
Comment:Moved call to run-wait to a point after all other likely calls so that run wait can be used with other switches. Added blocking of remove whne a test has sub tests.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | v1.55
Files: files | file ages | folders
SHA1: 95dcd863801cfd94c1319aaa3be81a50170ceed5
User & Date: mrwellan on 2014-05-30 09:00:31
Other Links: branch diff | manifest | tags
Context
2014-05-30
09:50
Split off pre and post commands in dashboard test control panel system calls with override from config check-in: 4c81f4b156 user: mrwellan tags: v1.55
09:00
Moved call to run-wait to a point after all other likely calls so that run wait can be used with other switches. Added blocking of remove whne a test has sub tests. check-in: 95dcd86380 user: mrwellan tags: v1.55
2014-05-29
08:39
Added override ability in remove-runs to optionally not remove the record check-in: 3168e11fa8 user: matt tags: v1.55
Changes

Modified db.scm from [f79f1b36ee] to [238ddbc58d].

65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85

(define (open-db) ;;  (conc *toppath* "/megatest.db") (car *configinfo*)))
  (if (not *toppath*)
      (if (not (setup-for-run))
	  (begin
	    (debug:print 0 "ERROR: Attempted to open db when not in megatest area. Exiting.")
	    (exit))))
  (let* ((dbpath    (conc *toppath* "/megatest.db")) ;; fname)
	 (dbexists  (file-exists? dbpath))
	 (write-access (file-write-access? dbpath))
	 (db        (sqlite3:open-database dbpath)) ;; (never-give-up-open-db dbpath))
	 (handler   (make-busy-timeout (if (args:get-arg "-override-timeout")
					   (string->number (args:get-arg "-override-timeout"))
					   6000)))) ;; NB// this is in milliseconds. 136000))) ;; 136000 = 2.2 minutes
    (if (and dbexists
	     (not write-access))
	(set! *db-write-access* write-access)) ;; only unset so other db's also can use this control
    (debug:print-info 11 "open-db, dbpath=" dbpath " argv=" (argv))
    (if write-access (sqlite3:set-busy-handler! db handler))
    (if (not dbexists)
	(db:initialize db))







|
|

|
|
|
|







65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85

(define (open-db) ;;  (conc *toppath* "/megatest.db") (car *configinfo*)))
  (if (not *toppath*)
      (if (not (setup-for-run))
	  (begin
	    (debug:print 0 "ERROR: Attempted to open db when not in megatest area. Exiting.")
	    (exit))))
  (let* ((dbpath       (conc *toppath* "/megatest.db")) ;; fname)
	 (dbexists     (file-exists? dbpath))
	 (write-access (file-write-access? dbpath))
	 (db           (sqlite3:open-database dbpath)) ;; (never-give-up-open-db dbpath))
	 (handler      (make-busy-timeout (if (args:get-arg "-override-timeout")
					      (string->number (args:get-arg "-override-timeout"))
					      6000)))) ;; NB// this is in milliseconds. 136000))) ;; 136000 = 2.2 minutes
    (if (and dbexists
	     (not write-access))
	(set! *db-write-access* write-access)) ;; only unset so other db's also can use this control
    (debug:print-info 11 "open-db, dbpath=" dbpath " argv=" (argv))
    (if write-access (sqlite3:set-busy-handler! db handler))
    (if (not dbexists)
	(db:initialize db))
1578
1579
1580
1581
1582
1583
1584











1585
1586
1587
1588
1589
1590
1591
		(debug:print-info 4 "Trying " (car pathdat) " at " (cadr pathdat))
		(sqlite3:finalize! db)
		(if (not (null? newres))
		    (car newres)
		    (if (null? tal)
			#f
			(loop (car tal)(cdr tal))))))))))












;;======================================================================
;; QUEUE UP META, TEST STATUS AND STEPS REMOTE ACCESS
;;======================================================================

;; NOTE: Can remove the regex and base64 encoding for zmq
(define (db:obj->string obj)







>
>
>
>
>
>
>
>
>
>
>







1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
		(debug:print-info 4 "Trying " (car pathdat) " at " (cadr pathdat))
		(sqlite3:finalize! db)
		(if (not (null? newres))
		    (car newres)
		    (if (null? tal)
			#f
			(loop (car tal)(cdr tal))))))))))

(define (db:test-toplevel-num-items db run-id testname)
  (let ((res 0))
    (sqlite3:for-each-row
     (lambda (num-items)
       (set! res num-items))
     db
     "SELECT count(id) FROM tests WHERE run_id=? AND testname=? AND item_path != '' AND state NOT IN ('DELETED');"
     run-id
     testname)
    res))

;;======================================================================
;; QUEUE UP META, TEST STATUS AND STEPS REMOTE ACCESS
;;======================================================================

;; NOTE: Can remove the regex and base64 encoding for zmq
(define (db:obj->string obj)

Modified db_records.scm from [a8b0eafe9a] to [02b88af351].



1
2
3
4
5
6
7


(define (make-db:test)(make-vector 20))
(define-inline (db:test-get-id           vec) (vector-ref vec 0))
(define-inline (db:test-get-run_id       vec) (vector-ref vec 1))
(define-inline (db:test-get-testname     vec) (vector-ref vec 2))
(define-inline (db:test-get-state        vec) (vector-ref vec 3))
(define-inline (db:test-get-status       vec) (vector-ref vec 4))
(define-inline (db:test-get-event_time   vec) (vector-ref vec 5))
>
>







1
2
3
4
5
6
7
8
9
;; Test record accessors
;;
(define (make-db:test)(make-vector 20))
(define-inline (db:test-get-id           vec) (vector-ref vec 0))
(define-inline (db:test-get-run_id       vec) (vector-ref vec 1))
(define-inline (db:test-get-testname     vec) (vector-ref vec 2))
(define-inline (db:test-get-state        vec) (vector-ref vec 3))
(define-inline (db:test-get-status       vec) (vector-ref vec 4))
(define-inline (db:test-get-event_time   vec) (vector-ref vec 5))
24
25
26
27
28
29
30








31
32
33
34
35
36
37
(define-inline (db:test-set-diskfree! vec val)(vector-set! vec 8 val))
(define-inline (db:test-set-testname! vec val)(vector-set! vec 2 val))
(define-inline (db:test-set-state!    vec val)(vector-set! vec 3 val))
(define-inline (db:test-set-status!   vec val)(vector-set! vec 4 val))
(define-inline (db:test-set-run_duration! vec val)(vector-set! vec 12 val))
(define-inline (db:test-set-final_logf! vec val)(vector-set! vec 13 val))









;; get rows and header from 
(define-inline (db:get-header vec)(vector-ref vec 0))
(define-inline (db:get-rows   vec)(vector-ref vec 1))

;; make-vector-record "" db mintest id run_id testname state status event_time item_path
;;
(define (make-db:mintest)(make-vector 7))







>
>
>
>
>
>
>
>







26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
(define-inline (db:test-set-diskfree! vec val)(vector-set! vec 8 val))
(define-inline (db:test-set-testname! vec val)(vector-set! vec 2 val))
(define-inline (db:test-set-state!    vec val)(vector-set! vec 3 val))
(define-inline (db:test-set-status!   vec val)(vector-set! vec 4 val))
(define-inline (db:test-set-run_duration! vec val)(vector-set! vec 12 val))
(define-inline (db:test-set-final_logf! vec val)(vector-set! vec 13 val))

;; Test record utility functions

;; Is a test a toplevel?
;;
(define (db:test-get-is-toplevel vec)
  (and (equal? (db:test-get-item-path vec) "")      ;; test is not an item
       (equal? (db:test-get-uname vec)     "n/a"))) ;; test has never been run

;; get rows and header from 
(define-inline (db:get-header vec)(vector-ref vec 0))
(define-inline (db:get-rows   vec)(vector-ref vec 1))

;; make-vector-record "" db mintest id run_id testname state status event_time item_path
;;
(define (make-db:mintest)(make-vector 7))

Modified megatest.scm from [3aa4b2f17c] to [7c3c3863dc].

1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
      (if (not (setup-for-run))
	  (begin
	    (debug:print 0 "Failed to setup, exiting") 
	    (exit 1)))
      (open-run-close db:find-and-mark-incomplete #f)
      (set! *didsomething* #t)))

;;======================================================================
;; Wait on a run to complete
;;======================================================================

(if (args:get-arg "-run-wait")
    (begin
      (if (not (setup-for-run))
	  (begin
	    (debug:print 0 "Failed to setup, exiting") 
	    (exit 1)))
      (operate-on 'run-wait)
      (set! *didsomething* #t)))

;;======================================================================
;; Update the tests meta data from the testconfig files
;;======================================================================

(if (args:get-arg "-update-meta")
    (begin
      (if (not (setup-for-run))







<
<
<
<
<
<
<
<
<
<
<
<
<







1197
1198
1199
1200
1201
1202
1203













1204
1205
1206
1207
1208
1209
1210
      (if (not (setup-for-run))
	  (begin
	    (debug:print 0 "Failed to setup, exiting") 
	    (exit 1)))
      (open-run-close db:find-and-mark-incomplete #f)
      (set! *didsomething* #t)))














;;======================================================================
;; Update the tests meta data from the testconfig files
;;======================================================================

(if (args:get-arg "-update-meta")
    (begin
      (if (not (setup-for-run))
1248
1249
1250
1251
1252
1253
1254













1255
1256
1257
1258
1259
1260
1261
	      (or (get-environment-variable "HOME") ".") "/.megatest_history"))
	    (current-input-port (make-gnu-readline-port "megatest> "))
	    (if (args:get-arg "-repl")
		(repl)
		(load (args:get-arg "-load"))))
	  (exit))
      (set! *didsomething* #t)))














;;======================================================================
;; Exit and clean up
;;======================================================================

(if *runremote* (close-all-connections!))








>
>
>
>
>
>
>
>
>
>
>
>
>







1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
	      (or (get-environment-variable "HOME") ".") "/.megatest_history"))
	    (current-input-port (make-gnu-readline-port "megatest> "))
	    (if (args:get-arg "-repl")
		(repl)
		(load (args:get-arg "-load"))))
	  (exit))
      (set! *didsomething* #t)))

;;======================================================================
;; Wait on a run to complete
;;======================================================================

(if (args:get-arg "-run-wait")
    (begin
      (if (not (setup-for-run))
	  (begin
	    (debug:print 0 "Failed to setup, exiting") 
	    (exit 1)))
      (operate-on 'run-wait)
      (set! *didsomething* #t)))

;;======================================================================
;; Exit and clean up
;;======================================================================

(if *runremote* (close-all-connections!))

Modified runs.scm from [9e72541023] to [331811ff8d].

1379
1380
1381
1382
1383
1384
1385
1386



1387
1388




1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
			   (let* ((item-path     (db:test-get-item-path new-test-dat))
				  (test-name     (db:test-get-testname new-test-dat))
				  (run-dir       (db:test-get-rundir new-test-dat))    ;; run dir is from the link tree
				  (real-dir      (if (file-exists? run-dir)
						     (resolve-pathname run-dir)
						     #f))
				  (test-state    (db:test-get-state new-test-dat))
				  (test-fulln    (db:test-get-fullname new-test-dat)))



			     (case action
			       ((remove-runs)




				(debug:print-info 0 "test: " test-name " itest-state: " test-state)
				(if (member test-state (list "RUNNING" "LAUNCHED" "REMOTEHOSTSTART" "KILLREQ"))
				    (begin
				      (if (not (hash-table-ref/default test-retry-time test-fulln #f))
					  (begin
					    ;; want to set to REMOVING BUT CANNOT do it here?
					    (hash-table-set! test-retry-time test-fulln (current-seconds))))
				      (if (> (- (current-seconds)(hash-table-ref test-retry-time test-fulln)) allow-run-time)
					  ;; This test is not in a correct state for cleaning up. Let's try some graceful shutdown steps first
					  ;; Set the test to "KILLREQ" and wait five seconds then try again. Repeat up to five times then give
					  ;; up and blow it away.
					  (begin
					    (debug:print 0 "WARNING: could not gracefully remove test " test-fulln ", tried to kill it to no avail. Forcing state to FAILEDKILL and continuing")
					    (mt:test-set-state-status-by-id (db:test-get-id test) "FAILEDKILL" "n/a" #f)
					    (thread-sleep! 1))
					  (begin
					    (mt:test-set-state-status-by-id (db:test-get-id test) "KILLREQ" "n/a" #f)
					    (thread-sleep! 1)))
				      ;; NOTE: This is suboptimal as the testdata will be used later and the state/status may have changed ...
				      (if (null? tal)
					  (loop new-test-dat tal)
					  (loop (car tal)(append tal (list new-test-dat)))))
				    (begin
				      (mt:test-set-state-status-by-id (db:test-get-id test) "REMOVING" "LOCKED" #f)
				      (debug:print-info 1 "Attempting to remove " (if real-dir (conc " dir " real-dir " and ") "") " link " run-dir)
				      (if (and real-dir 
					       (> (string-length real-dir) 5)
					       (file-exists? real-dir)) ;; bad heuristic but should prevent /tmp /home etc.
					  (begin ;; let* ((realpath (resolve-pathname run-dir)))
					    (debug:print-info 1 "Recursively removing " real-dir)
					    (if (file-exists? real-dir)
						(runs:safe-delete-test-dir real-dir)
						(debug:print 0 "WARNING: test dir " real-dir " appears to not exist or is not readable")))
					  (if real-dir 
					      (debug:print 0 "WARNING: directory " real-dir " does not exist")
					      (debug:print 0 "WARNING: no real directory corrosponding to link " run-dir ", nothing done")))
				      (if (symbolic-link? run-dir)
					  (begin
					    (debug:print-info 1 "Removing symlink " run-dir)
					    (handle-exceptions
					     exn
					     (debug:print 0 "ERROR:  Failed to remove symlink " run-dir ((condition-property-accessor 'exn 'message) exn) ", attempting to continue")
					     (delete-file run-dir)))
					  (if (directory? run-dir)
					      (if (> (directory-fold (lambda (f x)(+ 1 x)) 0 run-dir) 0)
						  (debug:print 0 "WARNING: refusing to remove " run-dir " as it is not empty")
						  (handle-exceptions
						   exn
						   (debug:print 0 "ERROR:  Failed to remove directory " run-dir ((condition-property-accessor 'exn 'message) exn) ", attempting to continue")
						   (delete-directory run-dir)))
					      (if run-dir
						  (debug:print 0 "WARNING: not removing " run-dir " as it either doesn't exist or is not a symlink")
						  (debug:print 0 "NOTE: the run dir for this test is undefined. Test may have already been deleted."))
					      ))
				      ;; Only delete the records *after* removing the directory. If things fail we have a record 
				      (if (not remove-data-only)
				          (cdb:remote-run db:delete-test-records db #f (db:test-get-id test)))
				      (if (not (null? tal))
					  (loop (car tal)(cdr tal))))))
			       ((set-state-status)
				(debug:print-info 2 "new state " (car state-status) ", new status " (cadr state-status))
				(mt:test-set-state-status-by-id (db:test-get-id test) (car state-status)(cadr state-status) #f)
				(if (not (null? tal))
				    (loop (car tal)(cdr tal))))
			       ((run-wait)
				(debug:print-info 2 "still waiting, " (length tests) " tests still running")







|
>
>
>


>
>
>
>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|







1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
			   (let* ((item-path     (db:test-get-item-path new-test-dat))
				  (test-name     (db:test-get-testname new-test-dat))
				  (run-dir       (db:test-get-rundir new-test-dat))    ;; run dir is from the link tree
				  (real-dir      (if (file-exists? run-dir)
						     (resolve-pathname run-dir)
						     #f))
				  (test-state    (db:test-get-state new-test-dat))
				  (test-fulln    (db:test-get-fullname new-test-dat))
				  (uname         (db:test-get-uname    new-test-dat))
				  (toplevel-with-children (and (db:test-get-is-toplevel test)
							       (> (cdb:remote-run db:test-toplevel-num-items db run-id test-name) 0))))
			     (case action
			       ((remove-runs)
				;; if the test is a toplevel-with-children issue an error and do not remove
				(if toplevel-with-children
				    (debug:print 0 "WARNING: skipping removal of " test-fulln " with run-id " run-id " as it has sub tests")
				    (begin
				      (debug:print-info 0 "test: " test-name " itest-state: " test-state)
				      (if (member test-state (list "RUNNING" "LAUNCHED" "REMOTEHOSTSTART" "KILLREQ"))
					  (begin
					    (if (not (hash-table-ref/default test-retry-time test-fulln #f))
						(begin
						  ;; want to set to REMOVING BUT CANNOT do it here?
						  (hash-table-set! test-retry-time test-fulln (current-seconds))))
					    (if (> (- (current-seconds)(hash-table-ref test-retry-time test-fulln)) allow-run-time)
						;; This test is not in a correct state for cleaning up. Let's try some graceful shutdown steps first
						;; Set the test to "KILLREQ" and wait five seconds then try again. Repeat up to five times then give
						;; up and blow it away.
						(begin
						  (debug:print 0 "WARNING: could not gracefully remove test " test-fulln ", tried to kill it to no avail. Forcing state to FAILEDKILL and continuing")
						  (mt:test-set-state-status-by-id (db:test-get-id test) "FAILEDKILL" "n/a" #f)
						  (thread-sleep! 1))
						(begin
						  (mt:test-set-state-status-by-id (db:test-get-id test) "KILLREQ" "n/a" #f)
						  (thread-sleep! 1)))
					    ;; NOTE: This is suboptimal as the testdata will be used later and the state/status may have changed ...
					    (if (null? tal)
						(loop new-test-dat tal)
						(loop (car tal)(append tal (list new-test-dat)))))
					  (begin
					    (mt:test-set-state-status-by-id (db:test-get-id test) "REMOVING" "LOCKED" #f)
					    (debug:print-info 1 "Attempting to remove " (if real-dir (conc " dir " real-dir " and ") "") " link " run-dir)
					    (if (and real-dir 
						     (> (string-length real-dir) 5)
						     (file-exists? real-dir)) ;; bad heuristic but should prevent /tmp /home etc.
						(begin ;; let* ((realpath (resolve-pathname run-dir)))
						  (debug:print-info 1 "Recursively removing " real-dir)
						  (if (file-exists? real-dir)
						      (runs:safe-delete-test-dir real-dir)
						      (debug:print 0 "WARNING: test dir " real-dir " appears to not exist or is not readable")))
						(if real-dir 
						    (debug:print 0 "WARNING: directory " real-dir " does not exist")
						    (debug:print 0 "WARNING: no real directory corrosponding to link " run-dir ", nothing done")))
					    (if (symbolic-link? run-dir)
						(begin
						  (debug:print-info 1 "Removing symlink " run-dir)
						  (handle-exceptions
						   exn
						   (debug:print 0 "ERROR:  Failed to remove symlink " run-dir ((condition-property-accessor 'exn 'message) exn) ", attempting to continue")
						   (delete-file run-dir)))
						(if (directory? run-dir)
						    (if (> (directory-fold (lambda (f x)(+ 1 x)) 0 run-dir) 0)
							(debug:print 0 "WARNING: refusing to remove " run-dir " as it is not empty")
							(handle-exceptions
							 exn
							 (debug:print 0 "ERROR:  Failed to remove directory " run-dir ((condition-property-accessor 'exn 'message) exn) ", attempting to continue")
							 (delete-directory run-dir)))
						    (if run-dir
							(debug:print 0 "WARNING: not removing " run-dir " as it either doesn't exist or is not a symlink")
							(debug:print 0 "NOTE: the run dir for this test is undefined. Test may have already been deleted."))
						    ))
					    ;; Only delete the records *after* removing the directory. If things fail we have a record 
					    (if (not remove-data-only)
						(cdb:remote-run db:delete-test-records db #f (db:test-get-id test)))
					    (if (not (null? tal))
						(loop (car tal)(cdr tal))))))))
			       ((set-state-status)
				(debug:print-info 2 "new state " (car state-status) ", new status " (cadr state-status))
				(mt:test-set-state-status-by-id (db:test-get-id test) (car state-status)(cadr state-status) #f)
				(if (not (null? tal))
				    (loop (car tal)(cdr tal))))
			       ((run-wait)
				(debug:print-info 2 "still waiting, " (length tests) " tests still running")