Megatest

Check-in [c12c6d9114]
Login
Overview
Comment:Found never-end bug in runs.scm where run-wait logic was at the wrong level
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | v1.55 | v1.55241
Files: files | file ages | folders
SHA1: c12c6d9114457b2dc861d4edcff7a9ab6b8ce750
User & Date: mrwellan on 2014-07-30 11:56:17
Other Links: branch diff | manifest | tags
Context
2014-07-30
15:32
Forcing synchronous to 0 check-in: b052a1ff92 user: mrwellan tags: v1.55
11:56
Found never-end bug in runs.scm where run-wait logic was at the wrong level check-in: c12c6d9114 user: mrwellan tags: v1.55, v1.55241
2014-07-24
11:42
Backed out the treatment of INCOMPLETE as NOT_STARTED. It must be treated as COMPLETED: check-in: ed97e023ae user: mrwellan tags: v1.55
Changes

Modified dashboard.scm from [8bde9de63a] to [0b0b326b4f].

14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
(import (prefix iup iup:))

(use canvas-draw)
(import canvas-draw-iup)

(use sqlite3 srfi-1 posix regex regex-case srfi-69)
(import (prefix sqlite3 sqlite3:))
(use trace)

(declare (uses common))
(declare (uses margs))
(declare (uses keys))
(declare (uses items))
(declare (uses db))
(declare (uses configf))







<







14
15
16
17
18
19
20

21
22
23
24
25
26
27
(import (prefix iup iup:))

(use canvas-draw)
(import canvas-draw-iup)

(use sqlite3 srfi-1 posix regex regex-case srfi-69)
(import (prefix sqlite3 sqlite3:))


(declare (uses common))
(declare (uses margs))
(declare (uses keys))
(declare (uses items))
(declare (uses db))
(declare (uses configf))

Added datashare-testing/.datashare.config version [dd7d3035a4].









>
>
>
>
1
2
3
4
[datastores]
1 eng /tmp/datastore/eng


Added datashare.scm version [7b598fa3cf].















































































































































































































































































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199

;; Copyright 2006-2013, Matthew Welland.
;; 
;;  This program is made available under the GNU GPL version 2.0 or
;;  greater. See the accompanying file COPYING for details.
;; 
;;  This program is distributed WITHOUT ANY WARRANTY; without even the
;;  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
;;  PURPOSE.

(use ssax)
(use sxml-serializer)
(use sxml-modifications)
(use regex)
(use srfi-69)
(use regex-case)
(use posix)
(use json)
(use csv)
(use srfi-18)
(use format)

(require-library iup)
(import (prefix iup iup:))
(require-library ini-file)
(import (prefix ini-file ini:))

(use canvas-draw)
(import canvas-draw-iup)

(use sqlite3 srfi-1 posix regex regex-case srfi-69)
(import (prefix sqlite3 sqlite3:))

(include "megatest-fossil-hash.scm")

;;
;; GLOBALS
;;
(define *datashare:current-tab-number* 0)
(define datashare:help (conc "Usage: datashare [action [params ...]]

Note: run datashare without parameters to start the gui.

  publish <area> <key> [group]        : Publish data to share, use group to protect (i)
  get <area> <key> [destpath]         : Get a link to data, put the link in destpath (ii)
  update <area> <key>                 : Update the link to data to the latest iteration.

(i)  Uses group ownership of files to be published for group if not specified
(ii) Uses local path or looks up script to find path in configs

Part of the Megatest tool suite. Learn more at http://www.kiatoa.com/fossils/megatest

Version: " megatest-fossil-hash)) ;; "

;;======================================================================
;; DB
;;======================================================================

(define (datashare:initialize-db db)
  (for-each
   (lambda (qry)
     (sqlite3:execute db qry))
   (list 
    "CREATE TABLE pkgs 
         (id        INTEGER PRIMARY KEY,
          area      TEXT,
          key       TEXT,
          iteration INTEGER,
          submitter TEXT,
          datetime  TEXT,
          storegrp  TEXT,
          disk_id   INTEGER,
          comment   TEXT);"
    "CREATE TABLE refs
         (id        INTEGER PRIMARY KEY,
          pkg_id    INTEGER,
          destlink  TEXT);"
    "CREATE TABLE disks
         (id         INTEGER PRIMARY KEY,
          storegrp   TEXT,
          path       TEXT);")))

;; Create the sqlite db
(define (datashare:open-db path) 
  (if (and path
	   (directory? path)
	   (file-read-access? path))
      (let* ((dbpath    (conc path "/datashare.db"))
	     (writeable (file-write-access? dbpath))
	     (dbexists  (file-exists? dbpath))
	     (handler   (make-busy-timeout 136000)))
	(handle-exceptions
	 exn
	 (begin
	   (debug:print 2 "ERROR: problem accessing db " dbpath
			((condition-property-accessor 'exn 'message) exn))
	   (exit))
	 (set! db (sqlite3:open-database dbpath)))
	(if *db-write-access* (sqlite3:set-busy-handler! db handler))
	(if (not dbexists)
	    (begin
	      (datashare:initialize-db db)))
	db)))

;;======================================================================
;; GUI
;;======================================================================

;; The main menu 
(define (datashare:main-menu)
  (iup:menu ;; a menu is a special attribute to a dialog (think Gnome putting the menu at screen top)
   (iup:menu-item "Files" (iup:menu   ;; Note that you can use either #:action or action: for options
		       (iup:menu-item "Open"  action: (lambda (obj)
							(iup:show (iup:file-dialog))
							(print "File->open " obj)))
		       (iup:menu-item "Save"  #:action (lambda (obj)(print "File->save " obj)))
		       (iup:menu-item "Exit"  #:action (lambda (obj)(exit)))))
   (iup:menu-item "Tools" (iup:menu
		       (iup:menu-item "Create new blah" #:action (lambda (obj)(print "Tools->new blah")))
		       ;; (iup:menu-item "Show dialog"     #:action (lambda (obj)
		       ;;  					   (show message-window
		       ;;  					     #:modal? #t
		       ;;  					     ;; set positon using coordinates or center, start, top, left, end, bottom, right, parent-center, current
		       ;;  					     ;; #:x 'mouse
		       ;;  					     ;; #:y 'mouse
		       ;;  )					     
		       ))))

(define (datashare:publish-view)
  (iup:vbox
   (iup:hbox 
    (iup:button "Pushme" 
		#:expand "YES"
		))))

(define (datashare:get-view)
  (iup:vbox
   (iup:hbox 
    (iup:button "Pushme"
		#:expand "YES"
		))))

(define (datashare:manage-view)
  (iup:vbox
   (iup:hbox 
    (iup:button "Pushme"
		#:expand "YES"
		))))

(define (datashare:gui)
  (iup:show
   (iup:dialog 
    #:title (conc "DataShare dashboard " (current-user-name) ":" (current-directory))
    #:menu (datashare:main-menu)
    (let* ((tabs (iup:tabs
		  #:tabchangepos-cb (lambda (obj curr prev)
				      (set! *datashare:current-tab-number* curr))
		  (datashare:publish-view)
		  (datashare:get-view)
		  (datashare:manage-view)
		  )))
	;; (set! (iup:callback tabs tabchange-cb:) (lambda (a b c)(print "SWITCHED TO TAB: " a " " b " " c)))
	(iup:attribute-set! tabs "TABTITLE0" "Publish")
	(iup:attribute-set! tabs "TABTITLE1" "Get")
	(iup:attribute-set! tabs "TABTITLE2" "Manage")
	;; (iup:attribute-set! tabs "BGCOLOR" "190 190 190")
	tabs)))
  (iup:main-loop))

;;======================================================================
;; MAIN
;;======================================================================

(define (datashare:load-config path)
  (let ((fname (conc path "/.datashare.config")))
    (ini:property-separator-patt " *  *")
    (ini:property-separator #\space)
    (if (file-exists? fname)
	(ini:read fname)
	'())))

(define (main)
  (let* ((args (argv))
	 (prog (car args))
	 (rema (cdr args))
	 (conf (datashare:load-config (pathname-directory prog))))
    (cond
     ((eq? (length rema) 1)
      (case (string->symbol (car rema))
	((help -h -help --h --help)
	 (print datashare:help))
	(else
	 (print "ERROR: Unrecognised command. Try \"datashare help\""))))
     ((null? rema)(datashare:gui))
     ((>= (length rema) 2)
      (apply process-action (car rema)(cdr rema)))
     (else (print "ERROR: Unrecognised command. Try \"datashare help\"")))))

(main)

Modified runs.scm from [0fd6716531] to [7cf12a5297].

1038
1039
1040
1041
1042
1043
1044
1045
1046






1047
1048

1049

1050
1051
1052

1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
	 ((not (null? tal))
	  (debug:print-info 4 "I'm pretty sure I shouldn't get here."))
	 ((not (null? reg)) ;; could we get here with leftovers?
	  (debug:print-info 0 "Have leftovers!")
	  (loop (car reg)(cdr reg) '() reruns))
	 (else
	  (debug:print-info 4 "Exiting loop with...\n  hed=" hed "\n  tal=" tal "\n  reruns=" reruns))
	 ))
	;; now *if* -run-wait we wait for all tests to be done






      (let loop ((num-running      (cdb:remote-run db:get-count-tests-running-for-run-id #f run-id #f))
		 (prev-num-running 0))

	(if (and (args:get-arg "-run-wait")

		 (> num-running 0))
	    (begin
	      ;; Here we mark any old defunct tests as incomplete. Do this every fifteen minutes

	      (if (> (current-seconds)(+ last-time-incomplete 900))
		  (begin
		    (debug:print-info 0 "Marking stuck tests as INCOMPLETE while waiting for run " run-id ". Running as pid " (current-process-id) " on " (get-host-name))
		    (set! last-time-incomplete (current-seconds))
		    (cdb:remote-run db:find-and-mark-incomplete #f)))
	      (if (not (eq? num-running prev-num-running))
		  (debug:print-info 0 "run-wait specified, waiting on " num-running " tests in RUNNING, REMOTEHOSTSTART or LAUNCHED state at " (time->string (seconds->local-time (current-seconds)))))
	      (thread-sleep! 15)
	      (loop (cdb:remote-run db:get-count-tests-running-for-run-id #f run-id #f) num-running))))
      ) ;; LET* ((test-record
    ;; we get here on "drop through". All done!
    (debug:print-info 1 "All tests launched")))

(define (runs:calc-fails prereqs-not-met)
  (filter (lambda (test)
	    (and (vector? test) ;; not (string? test))
		 (equal? (db:test-get-state test) "COMPLETED")







|
|
>
>
>
>
>
>
|
|
>
|
>
|
|
|
>
|
|
|
|
|
|
|
|
|
|







1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
	 ((not (null? tal))
	  (debug:print-info 4 "I'm pretty sure I shouldn't get here."))
	 ((not (null? reg)) ;; could we get here with leftovers?
	  (debug:print-info 0 "Have leftovers!")
	  (loop (car reg)(cdr reg) '() reruns))
	 (else
	  (debug:print-info 4 "Exiting loop with...\n  hed=" hed "\n  tal=" tal "\n  reruns=" reruns))
	 )))
    ;; now *if* -run-wait we wait for all tests to be done
    
    ;; if run-wait mode then wait 15 seconds for db to stabilize
    (if (or (args:get-arg "-run-wait")
	    (equal? (configf:lookup *configdat* "setup" "run-wait") "yes"))
	(thread-sleep! 15))
    ;; Now wait for any RUNNING tests to complete (if in run-wait mode)
    (let wait-loop ((num-running      (cdb:remote-run db:get-count-tests-running-for-run-id #f run-id #f))
		    (prev-num-running 0))
      ;; (debug:print 0 "num-running=" num-running ", prev-num-running=" prev-num-running)
      (if (and (or (args:get-arg "-run-wait")
		   (equal? (configf:lookup *configdat* "setup" "run-wait") "yes"))
	       (> num-running 0))
	  (begin
	    ;; Here we mark any old defunct tests as incomplete. Do this every fifteen minutes
	    ;; (debug:print 0 "Got here eh! num-running=" num-running " (> num-running 0) " (> num-running 0))
	    (if (> (current-seconds)(+ last-time-incomplete 900))
		(begin
		  (debug:print-info 0 "Marking stuck tests as INCOMPLETE while waiting for run " run-id ". Running as pid " (current-process-id) " on " (get-host-name))
		  (set! last-time-incomplete (current-seconds))
		  (cdb:remote-run db:find-and-mark-incomplete #f)))
	    (if (not (eq? num-running prev-num-running))
		(debug:print-info 0 "run-wait specified, waiting on " num-running " tests in RUNNING, REMOTEHOSTSTART or LAUNCHED state at " (time->string (seconds->local-time (current-seconds)))))
	    (thread-sleep! 15)
	    (wait-loop (cdb:remote-run db:get-count-tests-running-for-run-id #f run-id #f) num-running))))
    ;; LET* ((test-record
    ;; we get here on "drop through". All done!
    (debug:print-info 1 "All tests launched")))

(define (runs:calc-fails prereqs-not-met)
  (filter (lambda (test)
	    (and (vector? test) ;; not (string? test))
		 (equal? (db:test-get-state test) "COMPLETED")
1119
1120
1121
1122
1123
1124
1125

1126
1127
1128
1129
1130
1131
1132
	 (test-waitons (tests:testqueue-get-waitons    test-record))
	 (test-conf    (tests:testqueue-get-testconfig test-record))
	 (itemdat      (tests:testqueue-get-itemdat    test-record))
	 (test-path    (hash-table-ref all-tests-registry test-name)) ;; (conc *toppath* "/tests/" test-name)) ;; could use tests:get-testconfig here ...
	 (force        (hash-table-ref/default flags "-force" #f))
	 (rerun        (hash-table-ref/default flags "-rerun" #f))
	 (keepgoing    (hash-table-ref/default flags "-keepgoing" #f))

	 (item-path     "")
	 (db           #f)
	 (full-test-name #f))

    ;; setting itemdat to a list if it is #f
    (if (not itemdat)(set! itemdat '()))
    (set! item-path (item-list->path itemdat))







>







1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
	 (test-waitons (tests:testqueue-get-waitons    test-record))
	 (test-conf    (tests:testqueue-get-testconfig test-record))
	 (itemdat      (tests:testqueue-get-itemdat    test-record))
	 (test-path    (hash-table-ref all-tests-registry test-name)) ;; (conc *toppath* "/tests/" test-name)) ;; could use tests:get-testconfig here ...
	 (force        (hash-table-ref/default flags "-force" #f))
	 (rerun        (hash-table-ref/default flags "-rerun" #f))
	 (keepgoing    (hash-table-ref/default flags "-keepgoing" #f))
	 (incomplete-timeout (string->number (or (configf:lookup *configdat* "setup" "incomplete-timeout") "x")))
	 (item-path     "")
	 (db           #f)
	 (full-test-name #f))

    ;; setting itemdat to a list if it is #f
    (if (not itemdat)(set! itemdat '()))
    (set! item-path (item-list->path itemdat))
1265
1266
1267
1268
1269
1270
1271

1272
1273
1274
1275
1276
1277
1278
1279
			   (process-signal (current-process-id) signal/kill))))))))
	((KILLED) 
	 (debug:print 1 "NOTE: " full-test-name " is already running or was explictly killed, use -force to launch it.")
	 (hash-table-set! test-registry (runs:make-full-test-name test-name test-path) 'DONOTRUN)) ;; KILLED))
	((LAUNCHED REMOTEHOSTSTART RUNNING)  
	 (if (> (- (current-seconds)(+ (db:test-get-event_time testdat)
				       (db:test-get-run_duration testdat)))

		600) ;; i.e. no update for more than 600 seconds
	     (begin
	       (debug:print 0 "WARNING: Test " test-name " appears to be dead. Forcing it to state INCOMPLETE and status STUCK/DEAD")
	       (tests:test-set-status! test-id "INCOMPLETE" "STUCK/DEAD" "" #f))
	     (debug:print 2 "NOTE: " test-name " is already running")))
	(else      
	 (debug:print 0 "ERROR: Failed to launch test " full-test-name ". Unrecognised state " (test:get-state testdat))
	 (case (string->symbol (test:get-state testdat)) 







>
|







1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
			   (process-signal (current-process-id) signal/kill))))))))
	((KILLED) 
	 (debug:print 1 "NOTE: " full-test-name " is already running or was explictly killed, use -force to launch it.")
	 (hash-table-set! test-registry (runs:make-full-test-name test-name test-path) 'DONOTRUN)) ;; KILLED))
	((LAUNCHED REMOTEHOSTSTART RUNNING)  
	 (if (> (- (current-seconds)(+ (db:test-get-event_time testdat)
				       (db:test-get-run_duration testdat)))
		(or incomplete-timeout
		    6000)) ;; i.e. no update for more than 6000 seconds
	     (begin
	       (debug:print 0 "WARNING: Test " test-name " appears to be dead. Forcing it to state INCOMPLETE and status STUCK/DEAD")
	       (tests:test-set-status! test-id "INCOMPLETE" "STUCK/DEAD" "" #f))
	     (debug:print 2 "NOTE: " test-name " is already running")))
	(else      
	 (debug:print 0 "ERROR: Failed to launch test " full-test-name ". Unrecognised state " (test:get-state testdat))
	 (case (string->symbol (test:get-state testdat)) 

Modified tests/fullrun/megatest.config from [3b80db06cd] to [6d157bfc9e].

23
24
25
26
27
28
29





30
31
32
33
34
35
36
1 #{get misc parent}/simplerun/tests

[setup]
# Set launchwait to no to use the more agressive code that does not wait for the launch to complete before proceeding
# this may save a few milliseconds on launching tests
# launchwait no
waivercommentpatt ^WW\d+ [a-z].*






# Use http instead of direct filesystem access
# transport http
transport fs

# If set to "default" the old code is used. Otherwise defaults to 200 or uses
# numeric value given.







>
>
>
>
>







23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
1 #{get misc parent}/simplerun/tests

[setup]
# Set launchwait to no to use the more agressive code that does not wait for the launch to complete before proceeding
# this may save a few milliseconds on launching tests
# launchwait no
waivercommentpatt ^WW\d+ [a-z].*
incomplete-timeout 1

# yes, anything else is no
run-wait yes


# Use http instead of direct filesystem access
# transport http
transport fs

# If set to "default" the old code is used. Otherwise defaults to 200 or uses
# numeric value given.

Modified utils/Makefile_latest.installall from [2749919870] to [a5be37ec2b].

41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
IUPBRANCH=iup-3.10.1

# Eggs to install (straightforward ones)
EGGS=matchable readline apropos base64 regex-literals format regex-case test coops trace csv \
     dot-locking posix-utils posix-extras directory-utils hostinfo tcp-server rpc csv-xml fmt \
     json md5 awful http-client spiffy uri-common intarweb spiffy-request-vars \
     spiffy-directory-listing ssax sxml-serializer sxml-modifications sql-de-lite \
     srfi-19 refdb

#
# Derived variables
#

ifeq ($(PROXY),)
PROX:=







|







41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
IUPBRANCH=iup-3.10.1

# Eggs to install (straightforward ones)
EGGS=matchable readline apropos base64 regex-literals format regex-case test coops trace csv \
     dot-locking posix-utils posix-extras directory-utils hostinfo tcp-server rpc csv-xml fmt \
     json md5 awful http-client spiffy uri-common intarweb spiffy-request-vars \
     spiffy-directory-listing ssax sxml-serializer sxml-modifications sql-de-lite \
     srfi-19 refdb ini-file

#
# Derived variables
#

ifeq ($(PROXY),)
PROX:=