Megatest

Check-in [a8e4d577bc]
Login
Overview
Comment:Partial implementation of testrundat
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | test-specific-db
Files: files | file ages | folders
SHA1: a8e4d577bc0799b6b30752cf91d476b48da2e652
User & Date: mrwellan on 2012-09-17 18:02:51
Other Links: branch diff | manifest | tags
Context
2012-09-18
02:49
Competed initial implementation of testrundat check-in: 75c8dc4713 user: matt tags: test-specific-db
2012-09-17
18:02
Partial implementation of testrundat check-in: a8e4d577bc user: mrwellan tags: test-specific-db
2012-09-14
14:18
Brought up to date with latest from trunk check-in: 191987e384 user: mrwellan tags: test-specific-db
Changes

Modified db.scm from [d32735ad01] to [22790e3566].

155
156
157
158
159
160
161
162

163
164
165
166
167
168
169
170



171

172
173
174
175
176
177
178

179
180
181

182
183
184
185

186
187
188
189
190
191
192
193
155
156
157
158
159
160
161

162
163
164
165
166
167
168
169
170
171
172
173

174
175
176
177
178
179
180

181
182
183

184
185

186

187

188
189
190
191
192
193
194







-
+








+
+
+
-
+






-
+


-
+

-

-
+
-







                              CONSTRAINT test_data_constraint UNIQUE (test_id,category,variable));")
    ;; Must do this *after* running patch db !! No more. 
    (db:set-var db "MEGATEST_VERSION" megatest-version)
    ))

;; Create the sqlite db for the individual test(s)
(define (open-test-db testpath) 
  (let* ((dbpath    (conc testpath "/.testdat.db"))
  (let* ((dbpath    (conc testpath "/testdat.db"))
	 (dbexists  (file-exists? dbpath))
	 (db        (sqlite3:open-database dbpath)) ;; (never-give-up-open-db dbpath))
	 (handler   (make-busy-timeout (if (args:get-arg "-override-timeout")
					   (string->number (args:get-arg "-override-timeout"))
					   36000))))
    (debug:print 4 "INFO: test dbpath=" dbpath)
    (sqlite3:set-busy-handler! db handler)
    (if (not dbexists)
	(begin
	  (sqlite3:execute db "PRAGMA synchronous = FULL;")
	  (debug:print 0 "Initialized test database " dbpath)
	(db:testdb-initialize db))
	  (db:testdb-initialize db)))
    (sqlite3:execute db "PRAGMA synchronous = 0;")
    db))

(define (db:testdb-initialize db)
  (for-each
   (lambda (sqlcmd)
     (sqlite3:exectute db sqlcmd))
     (sqlite3:execute db sqlcmd))
   (list "CREATE TABLE IF NOT EXISTS test_rundat (
              id INTEGER PRIMARY KEY,
              event_time TIMESTAMP,
              update_time TIMESTAMP,
              cpuload INTEGER DEFAULT -1,
              uname TEXT DEFAULT '',
              diskfree INTEGER DEFAULT -1,
              diskusage INTGER DEFAULT -1,
              diskusage INTGER DEFAULT -1);"
              run_duration INTEGER DEFAULT 0);"
	  "CREATE TABLE IF NOT EXISTS test_data (
              id INTEGER PRIMARY KEY,
              test_id INTEGER,
              category TEXT DEFAULT '',
              variable TEXT,
	      value REAL,
	      expected REAL,
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832











































833
834
835
836
837
838
839
784
785
786
787
788
789
790











































791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840







-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+







     qrystr)
    res))

;;======================================================================
;; QUEUE UP META, TEST STATUS AND STEPS
;;======================================================================

(define (db:updater db)
  (let loop ((start-time (current-time)))
    (thread-sleep! 0.5) ;; move save time around to minimize regular collisions?
    (db:write-cached-data db)
    (loop start-time)))
    
(define (db:test-update-meta-info db test-id minutes cpuload diskfree tmpfree)
  (mutex-lock! *incoming-mutex*)
  (set! *incoming-data* (cons (vector 'meta-info
				      (current-seconds)
				      (list cpuload
					    diskfree
					    minutes
					    test-id)) ;; run-id test-name item-path minutes cpuload diskfree tmpfree) 
			      *incoming-data*))
  (mutex-unlock! *incoming-mutex*)
  (if *cache-on*
      (debug:print 6 "INFO: *cache-on* is " *cache-on* ", skipping cache write as part of test-update-meta-info")
      (db:write-cached-data db)))

(define (db:write-cached-data db)
  (let ((meta-stmt (sqlite3:prepare db "UPDATE tests SET cpuload=?,diskfree=?,run_duration=?,state='RUNNING' WHERE id=? AND state NOT IN ('COMPLETED','KILLREQ','KILLED');"))
	(step-stmt (sqlite3:prepare db "INSERT OR REPLACE into test_steps (test_id,stepname,state,status,event_time,comment,logfile) VALUES(?,?,?,?,?,?,?);")) ;; strftime('%s','now')#f)
	(data (sort *incoming-data* (lambda (a b)(< (vector-ref a 1)(vector-ref b 1))))))
    (if (> (length data) 0)
	(debug:print 4 "Writing cached data " data))
    (mutex-lock! *incoming-mutex*)
    (sqlite3:with-transaction 
     db
     (lambda ()
       (for-each (lambda (entry)
		   (case (vector-ref entry 0)
		     ((meta-info)
		      (apply sqlite3:execute meta-stmt (vector-ref entry 2)))
		     ((step-status)
		      (apply sqlite3:execute step-stmt (vector-ref entry 2)))
		     (else
		      (debug:print 0 "ERROR: Queued entry not recognised " entry))))
		 data)))
    (sqlite3:finalize! meta-stmt) ;; sqlite is the bottleneck, clear the statements asap?
    (sqlite3:finalize! step-stmt)
    (set! *incoming-data* '())
    (mutex-unlock! *incoming-mutex*)))
;; (define (db:updater db)
;;   (let loop ((start-time (current-time)))
;;     (thread-sleep! 0.5) ;; move save time around to minimize regular collisions?
;;     (db:write-cached-data db)
;;     (loop start-time)))
;;     
;; (define (db:test-update-meta-info db test-id minutes cpuload diskfree tmpfree)
;;   (mutex-lock! *incoming-mutex*)
;;   (set! *incoming-data* (cons (vector 'meta-info
;; 				      (current-seconds)
;; 				      (list cpuload
;; 					    diskfree
;; 					    minutes
;; 					    test-id)) ;; run-id test-name item-path minutes cpuload diskfree tmpfree) 
;; 			      *incoming-data*))
;;   (mutex-unlock! *incoming-mutex*)
;;   (if *cache-on*
;;       (debug:print 6 "INFO: *cache-on* is " *cache-on* ", skipping cache write as part of test-update-meta-info")
;;       (db:write-cached-data db)))
;; 
;; (define (db:write-cached-data db)
;;   (let ((meta-stmt (sqlite3:prepare db "UPDATE tests SET cpuload=?,diskfree=?,run_duration=?,state='RUNNING' WHERE id=? AND state NOT IN ('COMPLETED','KILLREQ','KILLED');"))
;; 	(step-stmt (sqlite3:prepare db "INSERT OR REPLACE into test_steps (test_id,stepname,state,status,event_time,comment,logfile) VALUES(?,?,?,?,?,?,?);")) ;; strftime('%s','now')#f)
;; 	(data (sort *incoming-data* (lambda (a b)(< (vector-ref a 1)(vector-ref b 1))))))
;;     (if (> (length data) 0)
;; 	(debug:print 4 "Writing cached data " data))
;;     (mutex-lock! *incoming-mutex*)
;;     (sqlite3:with-transaction 
;;      db
;;      (lambda ()
;;        (for-each (lambda (entry)
;; 		   (case (vector-ref entry 0)
;; 		     ((meta-info)
;; 		      (apply sqlite3:execute meta-stmt (vector-ref entry 2)))
;; 		     ((step-status)
;; 		      (apply sqlite3:execute step-stmt (vector-ref entry 2)))
;; 		     (else
;; 		      (debug:print 0 "ERROR: Queued entry not recognised " entry))))
;; 		 data)))
;;     (sqlite3:finalize! meta-stmt) ;; sqlite is the bottleneck, clear the statements asap?
;;     (sqlite3:finalize! step-stmt)
;;     (set! *incoming-data* '())
;;     (mutex-unlock! *incoming-mutex*)))

(define (db:roll-up-pass-fail-counts db run-id test-name item-path status)
  (if (and (not (equal? item-path ""))
	   (or (equal? status "PASS")
	       (equal? status "WARN")
	       (equal? status "FAIL")
	       (equal? status "WAIVED")

Modified launch.scm from [228baa4ee9] to [4bec83887e].

62
63
64
65
66
67
68

69
70
71
72
73
74
75
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76







+







	       (env-ovrd  (assoc/default 'env-ovrd  cmdinfo))
	       (set-vars  (assoc/default 'set-vars  cmdinfo)) ;; pre-overrides from -setvar
	       (runname   (assoc/default 'runname   cmdinfo))
	       (megatest  (assoc/default 'megatest  cmdinfo))
	       (mt-bindir-path (assoc/default 'mt-bindir-path cmdinfo))
	       (fullrunscript (if runscript (conc testpath "/" runscript) #f))
	       (db        #f)
	       (tdb       #f)
	       (rollup-status 0))
	  
	  (debug:print 2 "Exectuing " test-name " (id: " test-id ") on " (get-host-name))
	  ;; apply pre-overrides before other variables. The pre-override vars must not
	  ;; clobbers things from the official sources such as megatest.config and runconfigs.config
	  (if (string? set-vars)
	      (let ((varpairs (string-split set-vars ",")))
89
90
91
92
93
94
95


96
97
98
99
100
101
102
103
104








105
106
107
108
109
110
111

112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121

122
123
124
125
126
127
128
129
130
131
132





133
134
135
136
137
138
139







+
+









+
+
+
+
+
+
+
+






-
+










-
-
-
-
-







	  (setenv "MT_MEGATEST"  megatest)
	  (setenv "MT_TARGET"    target)
	  (if mt-bindir-path (setenv "PATH" (conc (getenv "PATH") ":" mt-bindir-path)))
	  (change-directory top-path)
	  (if (not (setup-for-run))
	      (begin
		(debug:print 0 "Failed to setup, exiting") 
		;; (sqlite3:finalize! db)
		;; (sqlite3:finalize! tdb)
		(exit 1)))
	  (change-directory *toppath*)
	  ;; now can find our db
	  (set! db (open-db))
	  (if (not (args:get-arg "-server"))
	      (server:client-setup db))
	  ;; (set! *cache-on* #t)
	  (set-megatest-env-vars db run-id) ;; these may be needed by the launching process
	  (change-directory work-area) 

	  ;; Open up the test specific database
	  (set! tdb (open-test-db work-area))
	  (on-exit (lambda ()
		     (debug:print 0 "Finalizing both tdb and db!!!")
		     (sqlite3:finalize! tdb)
		     (sqlite3:finalize! db)))

	  (set-run-config-vars db run-id)
	  ;; environment overrides are done *before* the remaining critical envars.
	  (alist->env-vars env-ovrd)
	  (set-megatest-env-vars db run-id)
	  (set-item-env-vars itemdat)
	  (save-environment-as-files "megatest")
	  (test-set-meta-info db run-id test-name itemdat)
	  (test-set-meta-info db tdb run-id test-name itemdat)
	  (test-set-status! db test-id "REMOTEHOSTSTART" "n/a" (args:get-arg "-m") #f)
	  (if (args:get-arg "-xterm")
	      (set! fullrunscript "xterm")
	      (if (and fullrunscript (not (file-execute-access? fullrunscript)))
		  (system (conc "chmod ug+x " fullrunscript))))
	  ;; We are about to actually kick off the test
	  ;; so this is a good place to remove the records for 
	  ;; any previous runs
	  ;; (db:test-remove-steps db run-id testname itemdat)
	  
	  ;; from here on out we will open and close the db
	  ;; on every access to reduce the probablitiy of 
	  ;; contention or stuck access on nfs.
	  (sqlite3:finalize! db)

	  (let* ((m            (make-mutex))
		 (kill-job?    #f)
		 (exit-info    (vector #t #t #t))
		 (job-thread   #f)
		 (runit        (lambda ()
				 ;; (let-values
				 ;;  (((pid exit-status exit-code)
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266









267

268

269
270
271
272
273
274
275
258
259
260
261
262
263
264








265
266
267
268
269
270
271
272
273
274
275

276
277
278
279
280
281
282
283







-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+

+
-
+







							 (inexact->exact 
							  (round 
							   (- 
							    (current-seconds) 
							    start-seconds)))))
					(kill-tries 0))
				   (let loop ((minutes   (calc-minutes)))
				     (let* ((db       (open-db))
					    (cpuload  (get-cpu-load))
					    (diskfree (get-df (current-directory)))
					    (tmpfree  (get-df "/tmp")))
				       (if (not (args:get-arg "-server"))
					   (server:client-setup db))
				       (if (not cpuload)  (begin (debug:print 0 "WARNING: CPULOAD not found.")  (set! cpuload "n/a")))
				       (if (not diskfree) (begin (debug:print 0 "WARNING: DISKFREE not found.") (set! diskfree "n/a")))
				     ;; (let* (;; (db       (open-db))
					    ;; (cpuload  (get-cpu-load))
					    ;; (diskfree (get-df (current-directory)))
					    ;; (tmpfree  (get-df "/tmp")))
				     (begin
				       ;; (if (not (args:get-arg "-server"))
				       ;;	   (server:client-setup db))
				       ;; (if (not cpuload)  (begin (debug:print 0 "WARNING: CPULOAD not found.")  (set! cpuload "n/a")))
				       ;; (if (not diskfree) (begin (debug:print 0 "WARNING: DISKFREE not found.") (set! diskfree "n/a")))
				       (set! kill-job? (test-get-kill-request db run-id test-name itemdat))
				       (test-set-meta-info db tdb run-id testname itemdat minutes: minutes)
				       (rdb:test-update-meta-info db test-id minutes cpuload diskfree tmpfree)
				       ;; (rdb:test-update-meta-info db test-id minutes cpuload diskfree tmpfree)
				       (if kill-job? 
					   (begin
					     (mutex-lock! m)
					     (let* ((pid (vector-ref exit-info 0)))
					       (if (number? pid)
						   (begin
						     (debug:print 0 "WARNING: Request received to kill job (attempt # " kill-tries ")")
287
288
289
290
291
292
293

294
295
296
297

298
299
300
301
302
303
304
305
306
307
308
309



310
311
312
313
314
315
316
295
296
297
298
299
300
301
302
303
304
305

306
307
308
309
310
311
312
313
314
315



316
317
318
319
320
321
322
323
324
325







+



-
+









-
-
-
+
+
+







							(car processes))
						       (system (conc "kill -9 " pid))))
						   (begin
						     (debug:print 0 "WARNING: Request received to kill job but problem with process, attempting to kill manager process")
						     (test-set-status! db test-id "KILLED"  "FAIL"
								       (args:get-arg "-m") #f)
						     (sqlite3:finalize! db)
						     (sqlite3:finalize! tdb)
						     (exit 1))))
					     (set! kill-tries (+ 1 kill-tries))
					     (mutex-unlock! m)))
				       (sqlite3:finalize! db)
				       ;; (sqlite3:finalize! db)
				       (thread-sleep! (+ 10 (random 10))) ;; add some jitter to the call home time to spread out the db accesses
				       (loop (calc-minutes)))))))
		 (th1          (make-thread monitorjob))
		 (th2          (make-thread runit)))
	    (set! job-thread th2)
	    (thread-start! th1)
	    (thread-start! th2)
	    (thread-join! th2)
	    (mutex-lock! m)
	    (set! db (open-db))
	    (if (not (args:get-arg "-server"))
		(server:client-setup db))
	    ;; (set! db (open-db))
	    ;; (if (not (args:get-arg "-server"))
	    ;;	(server:client-setup db))
	    (let* ((item-path (item-list->path itemdat))
		   (testinfo  (rdb:get-test-info db run-id test-name item-path)))
	      (if (not (equal? (db:test-get-state testinfo) "COMPLETED"))
		  (begin
		    (debug:print 2 "Test NOT logged as COMPLETED, (state=" (db:test-get-state testinfo) "), updating result, rollup-status is " rollup-status)
		    (test-set-status! db test-id 
				      (if kill-job? "KILLED" "COMPLETED")
339
340
341
342
343
344
345

346
347
348
349
350
351
352
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362







+







	      )
	    (mutex-unlock! m)
	    ;; (exec-results (cmd-run->list fullrunscript)) ;;  (list ">" (conc test-name "-run.log"))))
	    ;; (success      exec-results)) ;; (eq? (cadr exec-results) 0)))
	    (debug:print 2 "Output from running " fullrunscript ", pid " (vector-ref exit-info 0) " in work area " 
			 work-area ":\n====\n exit code " (vector-ref exit-info 2) "\n" "====\n")
	    (sqlite3:finalize! db)
	    (sqlite3:finalize! tdb)
	    (if (not (vector-ref exit-info 1))
		(exit 4)))))))

;; set up the very basics needed for doing anything here.
(define (setup-for-run)
  ;; would set values for KEYS in the environment here for better support of env-override but 
  ;; have chicken/egg scenario. need to read megatest.config then read it again. Going to 

Modified tests.scm from [ce4320f87b] to [4b23cdd7f6].

399
400
401
402
403
404
405









406
407
408
409
410






411
412



413
414
415


416
417
418
419









420
421
422
423
424
425
426
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414





415
416
417
418
419
420


421
422
423



424
425




426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441







+
+
+
+
+
+
+
+
+
-
-
-
-
-
+
+
+
+
+
+
-
-
+
+
+
-
-
-
+
+
-
-
-
-
+
+
+
+
+
+
+
+
+







;; teststep-set-status! used to be here

(define (test-get-kill-request db run-id test-name itemdat)
  (let* ((item-path (item-list->path itemdat))
	 (testdat   (db:get-test-info db run-id test-name item-path)))
    (equal? (test:get-state testdat) "KILLREQ")))

(define (test:tdb-get-rundat-count tdb)
  (let ((res 0))
    (sqlite3:for-each-row
     (lambda (count)
       (set! res count))
     tdb
     "SELECT count(id) FROM test_rundat;")
    res))

(define (test-set-meta-info db run-id testname itemdat)
  (let ((item-path (item-list->path itemdat))
	(cpuload  (get-cpu-load))
	(hostname (get-host-name))
	(diskfree (get-df (current-directory)))
(define (test-set-meta-info db tdb run-id testname itemdat)
  (let* ((num-records (test:tdb-get-rundat-count tdb))
	 (item-path   (item-list->path itemdat))
	 (cpuload  (get-cpu-load))
	 ;; (hostname (get-host-name))
	 (diskfree (get-df (current-directory))))
	(uname    (get-uname "-srvpio")))
    (sqlite3:execute db "UPDATE tests SET host=?,cpuload=?,diskfree=?,uname=? WHERE run_id=? AND testname=? AND item_path=?;"
    (if (eq? (modulo num-records 10) 0) ;; every ten records update central
	(begin
	  (sqlite3:execute db "UPDATE tests SET cpuload=?,diskfree=? WHERE run_id=? AND testname=? AND item_path=?;"
		  hostname
		  cpuload
		  diskfree
			   cpuload
			   diskfree
		  uname
		  run-id
		  testname
		  item-path)))
			   run-id
			   testname
			   item-path)
	  (if (eq? num-records 0)
	      (sqlite3:execute db "UPDATE tests SET uname=?,hostname=? WHERE run_id=? AND testname=? AND item_path=?;"
			       (get-uname "-srvpio") (get-host-name) run-id testname item-path))))
    (sqlite3:execute tdb "INSERT INTO test_rundat (cpuload,diskfree) VALUES (?,?);"
		     cpuload diskfree)))
	  

;;======================================================================
;; A R C H I V I N G
;;======================================================================

(define (test:archive db test-id)
  #f)