Megatest

Changes On Branch 3c58f4639bbd2b40
Login

Changes In Branch better-html-update-control Through [3c58f4639b] Excluding Merge-Ins

This is equivalent to a diff from a0d9704d2f to 3c58f4639b

2015-03-09
05:19
Merging better-html-update-control to v1.60 check-in: bd663b5d13 user: matt tags: v1.60
05:18
Attempt to improve server recovery in bad situations Closed-Leaf check-in: 4125fe9193 user: matt tags: server-robustness
2015-03-08
22:28
Use db:dbfile instead of constructing path to db check-in: 1fb33719ba user: matt tags: better-html-update-control
08:25
Add trailing slash to dbdir check-in: 3c58f4639b user: matt tags: better-html-update-control
08:22
Add option dbdir to set database path check-in: e68b3262eb user: matt tags: better-html-update-control
2015-03-04
18:42
Better method for locking on html update check-in: 150c672a53 user: mrwellan tags: better-html-update-control
08:41
Moved delay in can-run-more-tests out of the main path check-in: a0d9704d2f user: mrwellan tags: v1.60
06:55
Merged in the static-html branch check-in: 16e4ac3a73 user: mrwellan tags: v1.60

Modified common.scm from [63e9da460d] to [163b8623d2].

162
163
164
165
166
167
168























169
170
171
172
173
174
175
    exn
    (begin
      (debug:print 0 "ERROR: received bad encoded string \"" instr "\", message: " ((condition-property-accessor 'exn 'message) exn))
      (print-call-chain (current-error-port))
      #f)
    (read (open-input-string (base64:base64-decode instr))))
   (read (open-input-string (z3:decode-buffer (base64:base64-decode instr))))))
























;;======================================================================
;; S T A T E S   A N D   S T A T U S E S
;;======================================================================

(define *common:std-states*   
  '((0 "COMPLETED")







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
    exn
    (begin
      (debug:print 0 "ERROR: received bad encoded string \"" instr "\", message: " ((condition-property-accessor 'exn 'message) exn))
      (print-call-chain (current-error-port))
      #f)
    (read (open-input-string (base64:base64-decode instr))))
   (read (open-input-string (z3:decode-buffer (base64:base64-decode instr))))))

;; dot-locking egg seems not to work, using this for now
;; if lock is older than expire-time then remove it and try again
;; to get the lock
;;
(define (common:simple-file-lock fname #!key (expire-time 300))
  (if (file-exists? fname)
      (if (> (- (current-seconds)(file-modification-time fname)) expire-time)
	  (begin
	    (delete-file* fname)
	    (common:simple-file-lock fname expire-time: expire-time))
	  #f)
      (let ((key-string (conc (get-host-name) "-" (current-process-id))))
	(with-output-to-file fname
	  (lambda ()
	    (print key-string)))
	(thread-sleep! 0.25)
	(with-input-from-file fname
	  (lambda ()
	    (equal? key-string (read-line)))))))
	
(define (common:simple-file-release-lock fname)
  (delete-file* fname))

;;======================================================================
;; S T A T E S   A N D   S T A T U S E S
;;======================================================================

(define *common:std-states*   
  '((0 "COMPLETED")

Modified db.scm from [23d4e4d7d7] to [b5e5388382].

140
141
142
143
144
145
146

147


148
149
150
151
152
153
154
155
;;     (filedb:get-path db id)))

;; NB// #f => zeroth db with name=main.db
;;
(define (db:dbfile-path run-id)
  (let* (;; (toppath      (dbr:dbstruct-get-path  dbstruct))
	 (link-tree-path  (configf:lookup *configdat* "setup" "linktree"))

	 (fname           (if (eq? run-id 0) "main.db" (conc run-id ".db")))


	 (dbdir           (conc link-tree-path "/.db/")))
    (handle-exceptions
     exn
     (begin
       (debug:print 0 "ERROR: Couldn't create path to " dbdir)
       (exit 1))
     (if (not (directory? dbdir))(create-directory dbdir #t)))
    (conc dbdir fname)))







>

>
>
|







140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
;;     (filedb:get-path db id)))

;; NB// #f => zeroth db with name=main.db
;;
(define (db:dbfile-path run-id)
  (let* (;; (toppath      (dbr:dbstruct-get-path  dbstruct))
	 (link-tree-path  (configf:lookup *configdat* "setup" "linktree"))
	 (dbpath          (configf:lookup *configdat* "setup" "dbdir"))
	 (fname           (if (eq? run-id 0) "main.db" (conc run-id ".db")))
	 (dbdir           (if dbpath
			      dbpath
			      (conc link-tree-path "/.db/"))))
    (handle-exceptions
     exn
     (begin
       (debug:print 0 "ERROR: Couldn't create path to " dbdir)
       (exit 1))
     (if (not (directory? dbdir))(create-directory dbdir #t)))
    (conc dbdir fname)))

Modified launch.scm from [65077b4cd7] to [c50b0e51c3].

433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
					    new-state
					    new-status
					    (args:get-arg "-m") #f)
		    ;; need to update the top test record if PASS or FAIL and this is a subtest
		    ;; NO NEED TO CALL roll-up-pass-fail-counts HERE, THIS IS DONE IN roll-up-pass-fail-counts called by tests:test-set-status!
		    ))
	      ;; for automated creation of the rollup html file this is a good place...
	      (if (and (not (equal? item-path ""))
		       (< (random (rmt:get-count-tests-running-for-testname run-id test-name)) 5))
		  (tests:summarize-items run-id test-id test-name #f))
	      (tests:summarize-test run-id test-id)) ;; don't force - just update if no
	    (mutex-unlock! m)
	    (debug:print 2 "Output from running " fullrunscript ", pid " (vector-ref exit-info 0) " in work area " 
			 work-area ":\n====\n exit code " (vector-ref exit-info 2) "\n" "====\n")
	    (if (not (vector-ref exit-info 1))
		(exit 4)))))))








|
|
|







433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
					    new-state
					    new-status
					    (args:get-arg "-m") #f)
		    ;; need to update the top test record if PASS or FAIL and this is a subtest
		    ;; NO NEED TO CALL roll-up-pass-fail-counts HERE, THIS IS DONE IN roll-up-pass-fail-counts called by tests:test-set-status!
		    ))
	      ;; for automated creation of the rollup html file this is a good place...
	      ;; (if (and (not (equal? item-path ""))
	      ;;      (< (random (rmt:get-count-tests-running-for-testname run-id test-name)) 5))
	      (tests:summarize-items run-id test-id test-name #f)
	      (tests:summarize-test run-id test-id)) ;; don't force - just update if no
	    (mutex-unlock! m)
	    (debug:print 2 "Output from running " fullrunscript ", pid " (vector-ref exit-info 0) " in work area " 
			 work-area ":\n====\n exit code " (vector-ref exit-info 2) "\n" "====\n")
	    (if (not (vector-ref exit-info 1))
		(exit 4)))))))

Modified lock-queue.scm from [4e7622a7f2] to [1e70529cd9].

117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
	 (db        (lock-queue:db-dat-get-db dbdat))
	 (lckqry    (sqlite3:prepare db "SELECT test_id,run_lock FROM runlocks WHERE run_lock='locked';"))
	 (mklckqry  (sqlite3:prepare db "INSERT INTO runlocks (test_id,run_lock) VALUES (?,'locked');")))
    (let ((result 
	   (handle-exceptions
	    exn
	    (begin
	      (debug:print 0 "WARNING: failed to get queue lock. Will try again in a few seconds")
	      (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn))
	      (thread-sleep! 10)
	      (if (> count 0)
		  (lock-queue:get-lock dbdat test-id count: (- count 1))
		  (begin ;; never recovered, remote the lock file and return #f, no lock obtained
		    (lock-queue:delete-lock-db dbdat)
		    #f)))
	    (sqlite3:with-transaction
	     db
	     (lambda ()
	       (sqlite3:for-each-row (lambda (tid lockstate)
				       (set! res (list tid lockstate)))
				     lckqry)
	       (if res







|


|
|
|
|
|







117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
	 (db        (lock-queue:db-dat-get-db dbdat))
	 (lckqry    (sqlite3:prepare db "SELECT test_id,run_lock FROM runlocks WHERE run_lock='locked';"))
	 (mklckqry  (sqlite3:prepare db "INSERT INTO runlocks (test_id,run_lock) VALUES (?,'locked');")))
    (let ((result 
	   (handle-exceptions
	    exn
	    (begin
	      (debug:print 0 "WARNING: failed to get queue lock. Removing lock db and returning fail") ;; Will try again in a few seconds")
	      (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn))
	      (thread-sleep! 10)
	      ;; (if (> count 0)	
	      ;;  #f ;; (lock-queue:get-lock dbdat test-id count: (- count 1)) - give up on retries 
	      ;; (begin ;; never recovered, remote the lock file and return #f, no lock obtained
	      (lock-queue:delete-lock-db dbdat)
	      #f)
	    (sqlite3:with-transaction
	     db
	     (lambda ()
	       (sqlite3:for-each-row (lambda (tid lockstate)
				       (set! res (list tid lockstate)))
				     lckqry)
	       (if res

Modified tests.scm from [ae99c23c59] to [1b02d35c8f].

314
315
316
317
318
319
320
321
322
323

324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394







































































395
396
397
398
399
400
401
402
	  (change-directory path))
	;; (set! outputfilename (conc path "/" outputfilename)))
	(debug:print 0 "ERROR: summarize-items for run-id=" run-id ", test-name=" test-name ", no such path: " path))
    (debug:print 4 "summarize-items with logf " logf ", outputfilename " outputfilename " and force " force)
    (if (or (equal? logf "logs/final.log")
	    (equal? logf outputfilename)
	    force)
	(begin
	  (if (not (lock-queue:wait-turn outputfilename test-id))
	      (print "Not updating " outputfilename " as another test item has signed up for the job")

	      (begin
		(print "Obtained lock for " outputfilename)
		(let ((oup    (open-output-file outputfilename))
		      (counts (make-hash-table))
		      (statecounts (make-hash-table))
		      (outtxt "")
		      (tot    0)
		      (testdat (rmt:test-get-records-for-index-file run-id test-name)))
		  (with-output-to-port
		      oup
		    (lambda ()
		      (set! outtxt (conc outtxt "<html><title>Summary: " test-name 
					 "</title><body><h2>Summary for " test-name "</h2>"))
		      (for-each
		       (lambda (testrecord)
			 (let ((id             (vector-ref testrecord 0))
			       (itempath       (vector-ref testrecord 1))
			       (state          (vector-ref testrecord 2))
			       (status         (vector-ref testrecord 3))
			       (run_duration   (vector-ref testrecord 4))
			       (logf           (vector-ref testrecord 5))
			       (comment        (vector-ref testrecord 6)))
			   (hash-table-set! counts status (+ 1 (hash-table-ref/default counts status 0)))
			   (hash-table-set! statecounts state (+ 1 (hash-table-ref/default statecounts state 0)))
			   (set! outtxt (conc outtxt "<tr>"
					      ;; "<td><a href=\"" itempath "/" logf "\"> " itempath "</a></td>" 
					      "<td><a href=\"" itempath "/test-summary.html\"> " itempath "</a></td>" 
					      "<td>" state    "</td>" 
					      "<td><font color=" (common:get-color-from-status status)
					      ">"   status   "</font></td>"
					      "<td>" (if (equal? comment "")
							 "&nbsp;"
							 comment) "</td>"
							 "</tr>"))))
		       (if (list? testdat)
			   testdat
			   (begin
			     (print "ERROR: failed to get records with rmt:test-get-records-for-index-file run-id=" run-id "test-name=" test-name)
			     '())))
			   
		      (print "<table><tr><td valign=\"top\">")
		      ;; Print out stats for status
		      (set! tot 0)
		      (print "<table cellspacing=\"0\" border=\"1\"><tr><td colspan=\"2\"><h2>State stats</h2></td></tr>")
		      (for-each (lambda (state)
				  (set! tot (+ tot (hash-table-ref statecounts state)))
				  (print "<tr><td>" state "</td><td>" (hash-table-ref statecounts state) "</td></tr>"))
				(hash-table-keys statecounts))
		      (print "<tr><td>Total</td><td>" tot "</td></tr></table>")
		      (print "</td><td valign=\"top\">")
		      ;; Print out stats for state
		      (set! tot 0)
		      (print "<table cellspacing=\"0\" border=\"1\"><tr><td colspan=\"2\"><h2>Status stats</h2></td></tr>")
		      (for-each (lambda (status)
				  (set! tot (+ tot (hash-table-ref counts status)))
				  (print "<tr><td><font color=\"" (common:get-color-from-status status) "\">" status
					 "</font></td><td>" (hash-table-ref counts status) "</td></tr>"))
				(hash-table-keys counts))
		      (print "<tr><td>Total</td><td>" tot "</td></tr></table>")
		      (print "</td></td></tr></table>")
		      
		      (print "<table cellspacing=\"0\" border=\"1\">" 
			     "<tr><td>Item</td><td>State</td><td>Status</td><td>Comment</td>"
			     outtxt "</table></body></html>")
		      ;; (release-dot-lock outputfilename)
		      ))
		  (close-output-port oup)
		  (lock-queue:release-lock outputfilename test-id)
		  (change-directory orig-dir)
		  ;; NB// tests:test-set-toplog! is remote internal...
		  (tests:test-set-toplog! run-id test-name outputfilename)







































































		  )))))))

;; CHECK - WAS THIS ADDED OR REMOVED? MANUAL MERGE WITH API STUFF!!!
;;
;; get a pretty table to summarize steps
;;
;; (define (dcommon:process-steps-table steps);; db test-id #!key (work-area #f))
(define (tests:process-steps-table steps);; db test-id #!key (work-area #f))







|
|
|
>
|
|
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
|
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
|
<
<
<
<
<
<
<


|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|







314
315
316
317
318
319
320
321
322
323
324
325
326





































327




















328







329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
	  (change-directory path))
	;; (set! outputfilename (conc path "/" outputfilename)))
	(debug:print 0 "ERROR: summarize-items for run-id=" run-id ", test-name=" test-name ", no such path: " path))
    (debug:print 4 "summarize-items with logf " logf ", outputfilename " outputfilename " and force " force)
    (if (or (equal? logf "logs/final.log")
	    (equal? logf outputfilename)
	    force)
	(let ((my-start-time (current-seconds))
	      (lockf         (conc outputfilename ".lock")))
	  (let loop ((have-lock  (common:simple-file-lock lockf)))
	    (if have-lock
		(begin
		  (print "Obtained lock for " outputfilename)





































		  (tests:generate-html-summary-for-iterated-test run-id test-id test-name outputfilename)




















		  (common:simple-file-release-lock lockf)







		  (change-directory orig-dir)
		  ;; NB// tests:test-set-toplog! is remote internal...
		  (tests:test-set-toplog! run-id test-name outputfilename))
		;; didn't get the lock, check to see if current update started later than this 
		;; update, if so we can exit without doing any work
		(if (> my-start-time (file-modification-time lockf))
		    ;; we started since current re-gen in flight, delay a little and try again
		    (begin
		      (debug:print-info 1 "Waiting to update " outputfilename ", another test currently updating it")
		      (thread-sleep! (+ 5 (random 5))) ;; delay between 5 and 10 seconds
		      (loop (common:simple-file-lock lockf))))))))))

(define (tests:generate-html-summary-for-iterated-test run-id test-id test-name outputfilename)
  (let ((counts (make-hash-table))
	(statecounts (make-hash-table))
	(outtxt "")
	(tot    0)
	(testdat (rmt:test-get-records-for-index-file run-id test-name)))
    (with-output-to-file outputfilename
      (lambda ()
	(set! outtxt (conc outtxt "<html><title>Summary: " test-name 
			   "</title><body><h2>Summary for " test-name "</h2>"))
	(for-each
	 (lambda (testrecord)
	   (let ((id             (vector-ref testrecord 0))
		 (itempath       (vector-ref testrecord 1))
		 (state          (vector-ref testrecord 2))
		 (status         (vector-ref testrecord 3))
		 (run_duration   (vector-ref testrecord 4))
		 (logf           (vector-ref testrecord 5))
		 (comment        (vector-ref testrecord 6)))
	     (hash-table-set! counts status (+ 1 (hash-table-ref/default counts status 0)))
	     (hash-table-set! statecounts state (+ 1 (hash-table-ref/default statecounts state 0)))
	     (set! outtxt (conc outtxt "<tr>"
				;; "<td><a href=\"" itempath "/" logf "\"> " itempath "</a></td>" 
				"<td><a href=\"" itempath "/test-summary.html\"> " itempath "</a></td>" 
				"<td>" state    "</td>" 
				"<td><font color=" (common:get-color-from-status status)
				">"   status   "</font></td>"
				"<td>" (if (equal? comment "")
					   "&nbsp;"
					   comment) "</td>"
					   "</tr>"))))
	 (if (list? testdat)
	     testdat
	     (begin
	       (print "ERROR: failed to get records with rmt:test-get-records-for-index-file run-id=" run-id "test-name=" test-name)
	       '())))
	
	(print "<table><tr><td valign=\"top\">")
	;; Print out stats for status
	(set! tot 0)
	(print "<table cellspacing=\"0\" border=\"1\"><tr><td colspan=\"2\"><h2>State stats</h2></td></tr>")
	(for-each (lambda (state)
		    (set! tot (+ tot (hash-table-ref statecounts state)))
		    (print "<tr><td>" state "</td><td>" (hash-table-ref statecounts state) "</td></tr>"))
		  (hash-table-keys statecounts))
	(print "<tr><td>Total</td><td>" tot "</td></tr></table>")
	(print "</td><td valign=\"top\">")
	;; Print out stats for state
	(set! tot 0)
	(print "<table cellspacing=\"0\" border=\"1\"><tr><td colspan=\"2\"><h2>Status stats</h2></td></tr>")
	(for-each (lambda (status)
		    (set! tot (+ tot (hash-table-ref counts status)))
		    (print "<tr><td><font color=\"" (common:get-color-from-status status) "\">" status
			   "</font></td><td>" (hash-table-ref counts status) "</td></tr>"))
		  (hash-table-keys counts))
	(print "<tr><td>Total</td><td>" tot "</td></tr></table>")
	(print "</td></td></tr></table>")
	
	(print "<table cellspacing=\"0\" border=\"1\">" 
	       "<tr><td>Item</td><td>State</td><td>Status</td><td>Comment</td>"
	       outtxt "</table></body></html>")
	;; (release-dot-lock outputfilename)
	))))

;; CHECK - WAS THIS ADDED OR REMOVED? MANUAL MERGE WITH API STUFF!!!
;;
;; get a pretty table to summarize steps
;;
;; (define (dcommon:process-steps-table steps);; db test-id #!key (work-area #f))
(define (tests:process-steps-table steps);; db test-id #!key (work-area #f))

Modified tests/fullrun/megatest.config from [76d08fa242] to [ab5b809b18].

25
26
27
28
29
30
31



32
33
34
35
36
37
38
39
40
41
42
[setup]
# Set launchwait to no to use the more agressive code that does not wait for the launch to complete before proceeding
# this may save a few milliseconds on launching tests
# launchwait no
waivercommentpatt ^WW\d+ [a-z].*
incomplete-timeout 1




# yes, anything else is no
run-wait yes



# If set to "default" the old code is used. Otherwise defaults to 200 or uses
# numeric value given.
#
runqueue 20

# Default runtimelim 1d 1h 1m 10s







>
>
>
|

<
<







25
26
27
28
29
30
31
32
33
34
35
36


37
38
39
40
41
42
43
[setup]
# Set launchwait to no to use the more agressive code that does not wait for the launch to complete before proceeding
# this may save a few milliseconds on launching tests
# launchwait no
waivercommentpatt ^WW\d+ [a-z].*
incomplete-timeout 1

# set the dbdir, default is linktree
dbdir #{getenv MT_RUN_AREA_HOME}/db/

# wait for runs to completely complete. yes, anything else is no
run-wait yes



# If set to "default" the old code is used. Otherwise defaults to 200 or uses
# numeric value given.
#
runqueue 20

# Default runtimelim 1d 1h 1m 10s