Megatest

Check-in [7180157463]
Login
Overview
Comment:Fixed locking on rollup bug by writing simple locking queue explictly for the needed behavior
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | v1.55
Files: files | file ages | folders
SHA1: 7180157463a3ac567e7909c88fb3f895fb95f5b6
User & Date: matt on 2013-08-12 00:51:08
Other Links: branch diff | manifest | tags
Context
2013-08-12
01:18
Added missing finalize for lock-queue check-in: 8d21f434ab user: matt tags: v1.55
00:51
Fixed locking on rollup bug by writing simple locking queue explictly for the needed behavior check-in: 7180157463 user: matt tags: v1.55
2013-08-11
21:44
Fixed missing host/uname data. Streamlined data updates from running test a little check-in: 1daca50091 user: matt tags: v1.55
Changes

Modified Makefile from [00d538b903] to [bc20ab1870].

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
# make install CSCOPTS='-accumulate-profile -profile-name $(PWD)/profile-ww$(shell date +%V.%u)'
PREFIX=$(PWD)
CSCOPTS= 
INSTALL=install
SRCFILES = common.scm items.scm launch.scm \
           ods.scm runconfig.scm server.scm configf.scm \
           db.scm keys.scm margs.scm megatest-version.scm \
           process.scm runs.scm tasks.scm tests.scm genexample.scm \
	   fs-transport.scm http-transport.scm \
           client.scm gutils.scm synchash.scm daemon.scm mt.scm dcommon.scm \
	   tree.scm ezsteps.scm

GUISRCF  = dashboard-tests.scm dashboard-guimonitor.scm 

OFILES   = $(SRCFILES:%.scm=%.o)
GOFILES  = $(GUISRCF:%.scm=%.o)

ADTLSCR=mt_laststep mt_runstep mt_ezstep










|







1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
# make install CSCOPTS='-accumulate-profile -profile-name $(PWD)/profile-ww$(shell date +%V.%u)'
PREFIX=$(PWD)
CSCOPTS= 
INSTALL=install
SRCFILES = common.scm items.scm launch.scm \
           ods.scm runconfig.scm server.scm configf.scm \
           db.scm keys.scm margs.scm megatest-version.scm \
           process.scm runs.scm tasks.scm tests.scm genexample.scm \
	   fs-transport.scm http-transport.scm \
           client.scm gutils.scm synchash.scm daemon.scm mt.scm dcommon.scm \
	   tree.scm ezsteps.scm lock-queue.scm

GUISRCF  = dashboard-tests.scm dashboard-guimonitor.scm 

OFILES   = $(SRCFILES:%.scm=%.o)
GOFILES  = $(GUISRCF:%.scm=%.o)

ADTLSCR=mt_laststep mt_runstep mt_ezstep

Modified ezsteps.scm from [ad9d3c507a] to [56b532d0f1].

163
164
165
166
167
168
169
170
171
172
173
					  new-status
					  (args:get-arg "-m") #f)
		  ;; need to update the top test record if PASS or FAIL and this is a subtest
		  (if (not (equal? item-path ""))
		      (cdb:roll-up-pass-fail-counts *runremote* run-id test-name item-path new-status))))
	    ;; for automated creation of the rollup html file this is a good place...
	    (if (not (equal? item-path ""))
		(tests:summarize-items #f run-id test-name #f)) ;; don't force - just update if no
	    )))
    (pop-directory)
    rollup-status))







|



163
164
165
166
167
168
169
170
171
172
173
					  new-status
					  (args:get-arg "-m") #f)
		  ;; need to update the top test record if PASS or FAIL and this is a subtest
		  (if (not (equal? item-path ""))
		      (cdb:roll-up-pass-fail-counts *runremote* run-id test-name item-path new-status))))
	    ;; for automated creation of the rollup html file this is a good place...
	    (if (not (equal? item-path ""))
		(tests:summarize-items #f run-id test-id test-name #f)) ;; don't force - just update if no
	    )))
    (pop-directory)
    rollup-status))

Modified launch.scm from [870df0aae4] to [89b6ce45bc].

1
2
3
4
5
6
7
8
9

;; Copyright 2006-2012, Matthew Welland.
;; 
;;  This program is made available under the GNU GPL version 2.0 or
;;  greater. See the accompanying file COPYING for details.
;; 
;;  This program is distributed WITHOUT ANY WARRANTY; without even the
;;  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
;;  PURPOSE.

|







1
2
3
4
5
6
7
8
9

;; Copyright 2006-2013, Matthew Welland.
;; 
;;  This program is made available under the GNU GPL version 2.0 or
;;  greater. See the accompanying file COPYING for details.
;; 
;;  This program is distributed WITHOUT ANY WARRANTY; without even the
;;  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
;;  PURPOSE.
360
361
362
363
364
365
366


367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
		    (debug:print-info 1 "Test exited in state=" (db:test-get-state testinfo) ", setting state/status based on exit code of " (vector-ref exit-info 1) " and rollup-status of " rollup-status)
		    (tests:test-set-status! test-id 
					    new-state
					    new-status
					    (args:get-arg "-m") #f)
		    ;; need to update the top test record if PASS or FAIL and this is a subtest
		    (if (not (equal? item-path ""))


			(cdb:roll-up-pass-fail-counts *runremote* run-id test-name item-path new-status))
		    
		    ))
	      ;; for automated creation of the rollup html file this is a good place...
	      (if (not (equal? item-path ""))
		  (tests:summarize-items #f run-id test-name #f)) ;; don't force - just update if no
	      )
	    (mutex-unlock! m)
	    ;; (exec-results (cmd-run->list fullrunscript)) ;;  (list ">" (conc test-name "-run.log"))))
	    ;; (success      exec-results)) ;; (eq? (cadr exec-results) 0)))
	    (debug:print 2 "Output from running " fullrunscript ", pid " (vector-ref exit-info 0) " in work area " 
			 work-area ":\n====\n exit code " (vector-ref exit-info 2) "\n" "====\n")
	    ;; (sqlite3:finalize! db)
	    ;; (sqlite3:finalize! tdb)
	    (if (not (vector-ref exit-info 1))
		(exit 4)))))))

;; set up the very basics needed for doing anything here.
(define (setup-for-run)
  ;; would set values for KEYS in the environment here for better support of env-override but 
  ;; have chicken/egg scenario. need to read megatest.config then read it again. Going to 







>
>
|
<
<


|
<

<
<


<
<







360
361
362
363
364
365
366
367
368
369


370
371
372

373


374
375


376
377
378
379
380
381
382
		    (debug:print-info 1 "Test exited in state=" (db:test-get-state testinfo) ", setting state/status based on exit code of " (vector-ref exit-info 1) " and rollup-status of " rollup-status)
		    (tests:test-set-status! test-id 
					    new-state
					    new-status
					    (args:get-arg "-m") #f)
		    ;; need to update the top test record if PASS or FAIL and this is a subtest
		    (if (not (equal? item-path ""))
			(begin
			  (thread-sleep! 0.1) ;; give other processes an opportunity to access the db as rollup is lower priority
			  (cdb:roll-up-pass-fail-counts *runremote* run-id test-name item-path new-status)))))


	      ;; for automated creation of the rollup html file this is a good place...
	      (if (not (equal? item-path ""))
		  (tests:summarize-items #f run-id test-id test-name #f))) ;; don't force - just update if no

	    (mutex-unlock! m)


	    (debug:print 2 "Output from running " fullrunscript ", pid " (vector-ref exit-info 0) " in work area " 
			 work-area ":\n====\n exit code " (vector-ref exit-info 2) "\n" "====\n")


	    (if (not (vector-ref exit-info 1))
		(exit 4)))))))

;; set up the very basics needed for doing anything here.
(define (setup-for-run)
  ;; would set values for KEYS in the environment here for better support of env-override but 
  ;; have chicken/egg scenario. need to read megatest.config then read it again. Going to 

Added lock-queue.scm version [9cc456adfe].





























































































































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
;; Copyright 2006-2013, Matthew Welland.
;; 
;;  This program is made available under the GNU GPL version 2.0 or
;;  greater. See the accompanying file COPYING for details.
;; 
;;  This program is distributed WITHOUT ANY WARRANTY; without even the
;;  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
;;  PURPOSE.

;;======================================================================
;; launch a task - this runs on the originating host, tests themselves
;;
;;======================================================================

(use sqlite3 srfi-18)
(import (prefix sqlite3 sqlite3:))

(declare (unit lock-queue))
(declare (uses common))

;;======================================================================
;; attempt to prevent overlapping updates of rollup files by queueing
;; update requests in an sqlite db
;;======================================================================

(define (lock-queue:open-db fname)
  (let* ((actualfname (conc fname ".lockdb"))
	 (dbexists (file-exists? actualfname))
	 (db       (sqlite3:open-database actualfname))
	 (handler  (make-busy-timeout 3600)))
    (if dbexists
	db
	(begin
	  (sqlite3:execute 
	   db
	   "CREATE TABLE IF NOT EXISTS queue (
  	      id         INTEGER PRIMARY KEY,
              test_id    INTEGER,
              start_time INTEGER,
              state      TEXT,
              CONSTRAINT queue_constraint UNIQUE (test_id));")
	  (sqlite3:execute
	   db
	   "CREATE TABLE IF NOT EXISTS runlocks (
              id         INTEGER PRIMARY KEY,
              test_id    INTEGER,
              run_lock   TEXT,
              CONSTRAINT runlock_constraint UNIQUE (run_lock));")))
    db))

(define (lock-queue:set-state db test-id newstate)
  (sqlite3:execute db "UPDATE queue SET state=? WHERE test_id=?;"
		   newstate
		   test-id))

(define (lock-queue:any-younger? db mystart test-id)
  (let ((res #f))
    (sqlite3:for-each-row
     (lambda (tid)
       ;; Actually this should not be needed as mystart cannot be simultaneously less than and test-id same as 
       (if (not (equal? tid test-id)) 
	   (set! res tid)))
     db
     "SELECT test_id FROM queue WHERE start_time > ?;" mystart)
    res))

(define (lock-queue:get-lock db test-id)
  (let ((res       #f)
	(lckqry    (sqlite3:prepare db "SELECT test_id,run_lock FROM runlocks WHERE run_lock='locked';"))
	(mklckqry  (sqlite3:prepare db "INSERT INTO runlocks (test_id,run_lock) VALUES (?,'locked');")))
    (handle-exceptions
     exn
     #f
     (sqlite3:with-transaction
      db
      (lambda ()
	(sqlite3:for-each-row (lambda (tid lockstate)
				(set! res (list tid lockstate)))
			      lckqry)
	(if res
	    (if (equal? (car res) test-id)
		#t ;; already have the lock
		#f)
	    (begin
	      (sqlite3:execute mklckqry test-id)
	      ;; if no error handled then return #t for got the lock
	      #t)))))))

(define (lock-queue:release-lock fname test-id)
  (let ((db (lock-queue:open-db fname)))
    (sqlite3:execute db "DELETE FROM runlocks WHERE test_id=?;" test-id)
    (sqlite3:finalize! db)))

;; returns #f if ok to skip the task
;; returns #t if ok to proceed with task
;; otherwise waits
;;
(define (lock-queue:wait-turn fname test-id)
  (let ((db      (lock-queue:open-db fname))
	(mystart (current-seconds)))
    (sqlite3:execute
     db
     "INSERT OR REPLACE INTO queue (test_id,start_time,state) VALUES (?,?,'waiting');"
     test-id mystart)
    (thread-sleep! 1) ;; give other tests a chance to register
    (let loop ((younger-waiting (lock-queue:any-younger? db mystart test-id)))
      (if younger-waiting
	  (begin
	    ;; no need for us to wait. mark in the lock queue db as skipping
	    (lock-queue:set-state db test-id "skipping")
	    (begin
	      ;; (sqlite3:finalize! db)
	      #f)) ;; let the calling process know that nothing needs to be done
	  (if (lock-queue:get-lock db test-id)
	      (begin
		;; (sqlite3:finalize! db)
		#t)
	      (if (> (- (current-seconds) mystart) 36000) ;; waited too long, steal the lock
		  (lock-queue:steal-lock db test-id)
		  (begin
		    (thread-sleep! 1)
		    (loop (lock-queue:any-younger? db mystart test-id)))))))))
	  
            
;; (use trace)
;; (trace lock-queue:get-lock lock-queue:release-lock lock-queue:wait-turn lock-queue:any-younger? lock-queue:set-state)

Modified megatest.scm from [7c0eaf00e3] to [7748c6c15d].

934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
	      (let ((logfname (args:get-arg "-setlog")))
		(cdb:test-set-log! *runremote* test-id logfname)))
	  (if (args:get-arg "-set-toplog")
	      ;; DO NOT run remote
	      (tests:test-set-toplog! db run-id test-name (args:get-arg "-set-toplog")))
	  (if (args:get-arg "-summarize-items")
	      ;; DO NOT run remote
	      (tests:summarize-items db run-id test-name #t)) ;; do force here
	  (if (args:get-arg "-runstep")
	      (if (null? remargs)
		  (begin
		    (debug:print 0 "ERROR: nothing specified to run!")
		    (if db (sqlite3:finalize! db))
		    (exit 6))
		  (let* ((stepname   (args:get-arg "-runstep"))







|







934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
	      (let ((logfname (args:get-arg "-setlog")))
		(cdb:test-set-log! *runremote* test-id logfname)))
	  (if (args:get-arg "-set-toplog")
	      ;; DO NOT run remote
	      (tests:test-set-toplog! db run-id test-name (args:get-arg "-set-toplog")))
	  (if (args:get-arg "-summarize-items")
	      ;; DO NOT run remote
	      (tests:summarize-items db run-id test-id test-name #t)) ;; do force here
	  (if (args:get-arg "-runstep")
	      (if (null? remargs)
		  (begin
		    (debug:print 0 "ERROR: nothing specified to run!")
		    (if db (sqlite3:finalize! db))
		    (exit 6))
		  (let* ((stepname   (args:get-arg "-runstep"))

Modified tests.scm from [5109c208ba] to [92c97fb3f3].

13
14
15
16
17
18
19

20
21
22
23
24
25
26
;; Tests
;;======================================================================

(use sqlite3 srfi-1 posix regex regex-case srfi-69 dot-locking tcp directory-utils)
(import (prefix sqlite3 sqlite3:))

(declare (unit tests))

(declare (uses db))
(declare (uses common))
(declare (uses items))
(declare (uses runconfig))

(include "common_records.scm")
(include "key_records.scm")







>







13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
;; Tests
;;======================================================================

(use sqlite3 srfi-1 posix regex regex-case srfi-69 dot-locking tcp directory-utils)
(import (prefix sqlite3 sqlite3:))

(declare (unit tests))
(declare (uses lock-queue))
(declare (uses db))
(declare (uses common))
(declare (uses items))
(declare (uses runconfig))

(include "common_records.scm")
(include "key_records.scm")
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405

406
407
408
409
410
411
412
	  (cdb:remote-run db:test-set-comment #f test-id cmt)))
    ))


(define (tests:test-set-toplog! db run-id test-name logf) 
  (cdb:client-call *runremote* 'tests:test-set-toplog #t 2 logf run-id test-name))

(define (tests:summarize-items db run-id test-name force)
  ;; if not force then only update the record if one of these is true:
  ;;   1. logf is "log/final.log
  ;;   2. logf is same as outputfilename
  (let* ((outputfilename (conc "megatest-rollup-" test-name ".html"))
	 (orig-dir       (current-directory))
	 (logf-info      (cdb:remote-run db:test-get-logfile-info #f run-id test-name))
	 (logf           (if logf-info (cadr logf-info) #f))
	 (path           (if logf-info (car  logf-info) #f)))
    ;; This query finds the path and changes the directory to it for the test
    (if (directory? path)
	(begin
	  (debug:print 4 "Found path: " path)
	  (change-directory path))
	;; (set! outputfilename (conc path "/" outputfilename)))
	(print "No such path: " path))
    (debug:print 4 "summarize-items with logf " logf ", outputfilename " outputfilename " and force " force)
    (if (or (equal? logf "logs/final.log")
	    (equal? logf outputfilename)
	    force)
	(begin
	  (if (not (obtain-dot-lock outputfilename 1 5 7)) ;; retry every second for 20 seconds, call it dead after 30 seconds and steal the lock
	      (print "Failed to obtain lock for " outputfilename)

	      (begin
		(print "Obtained lock for " outputfilename)
		(let ((oup    (open-output-file outputfilename))
		      (counts (make-hash-table))
		      (statecounts (make-hash-table))
		      (outtxt "")
		      (tot    0)







|




















|
|
>







377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
	  (cdb:remote-run db:test-set-comment #f test-id cmt)))
    ))


(define (tests:test-set-toplog! db run-id test-name logf) 
  (cdb:client-call *runremote* 'tests:test-set-toplog #t 2 logf run-id test-name))

(define (tests:summarize-items db run-id test-id test-name force)
  ;; if not force then only update the record if one of these is true:
  ;;   1. logf is "log/final.log
  ;;   2. logf is same as outputfilename
  (let* ((outputfilename (conc "megatest-rollup-" test-name ".html"))
	 (orig-dir       (current-directory))
	 (logf-info      (cdb:remote-run db:test-get-logfile-info #f run-id test-name))
	 (logf           (if logf-info (cadr logf-info) #f))
	 (path           (if logf-info (car  logf-info) #f)))
    ;; This query finds the path and changes the directory to it for the test
    (if (directory? path)
	(begin
	  (debug:print 4 "Found path: " path)
	  (change-directory path))
	;; (set! outputfilename (conc path "/" outputfilename)))
	(print "No such path: " path))
    (debug:print 4 "summarize-items with logf " logf ", outputfilename " outputfilename " and force " force)
    (if (or (equal? logf "logs/final.log")
	    (equal? logf outputfilename)
	    force)
	(begin
	  (if ;; (not (obtain-dot-lock outputfilename 1 5 7)) ;; retry every second for 20 seconds, call it dead after 30 seconds and steal the lock
              (not (lock-queue:wait-turn outputfilename test-id))
	      (print "Not updating " outputfilename " as another test item has signed up for the job")
	      (begin
		(print "Obtained lock for " outputfilename)
		(let ((oup    (open-output-file outputfilename))
		      (counts (make-hash-table))
		      (statecounts (make-hash-table))
		      (outtxt "")
		      (tot    0)
459
460
461
462
463
464
465

466
467
468
469
470
471
472
		      (print "</td></td></tr></table>")
		      
		      (print "<table cellspacing=\"0\" border=\"1\">" 
			     "<tr><td>Item</td><td>State</td><td>Status</td><td>Comment</td>"
			     outtxt "</table></body></html>")
		      (release-dot-lock outputfilename)))
		  (close-output-port oup)

		  (change-directory orig-dir)
		  ;; NB// tests:test-set-toplog! is remote internal...
		  (tests:test-set-toplog! db run-id test-name outputfilename)
		  )))))))

;;======================================================================
;; Gather data from test/task specifications







>







461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
		      (print "</td></td></tr></table>")
		      
		      (print "<table cellspacing=\"0\" border=\"1\">" 
			     "<tr><td>Item</td><td>State</td><td>Status</td><td>Comment</td>"
			     outtxt "</table></body></html>")
		      (release-dot-lock outputfilename)))
		  (close-output-port oup)
		  (lock-queue:release-lock outputfilename test-id)
		  (change-directory orig-dir)
		  ;; NB// tests:test-set-toplog! is remote internal...
		  (tests:test-set-toplog! db run-id test-name outputfilename)
		  )))))))

;;======================================================================
;; Gather data from test/task specifications