Megatest

Check-in [cf15dc19dc]
Login
Overview
Comment:Merged v1.60 into trunk
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: cf15dc19dc547077e804f75a92138c1af0ff70eb
User & Date: matt on 2014-11-02 09:26:19
Other Links: manifest | tags
Context
2014-11-30
12:44
Bring v1.60 back to trunk check-in: 2f9676f58c user: matt tags: trunk
2014-11-02
09:26
Merged v1.60 into trunk check-in: cf15dc19dc user: matt tags: trunk
09:25
Merged streamline exception handling branch into v1.60 check-in: 8fd7d261b7 user: matt tags: v1.60
2014-10-18
23:48
Merged v1.60 into trunk check-in: 56761f4e0a user: matt tags: trunk
Changes

Modified api.scm from [859483d7bd] to [2952b351e1].

24
25
26
27
28
29
30

31
32
33
34
35
36
37
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38







+







    get-count-tests-running
    get-count-tests-running-in-jobgroup
    get-previous-test-run-record
    get-matching-previous-test-run-records
    test-get-logfile-info
    test-get-records-for-index-file
    get-testinfo-state-status
    test-get-top-process-pid
    test-get-paths-matching-keynames-target-new
    get-prereqs-not-met
    get-count-tests-running-for-run-id
    get-run-info
    get-run-status
    register-run
    get-tests-for-run
71
72
73
74
75
76
77


78
79
80
81
82
83
84
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87







+
+







    ((delete-old-deleted-test-records) (apply db:delete-old-deleted-test-records dbstruct params))
    ((test-set-status-state)           (apply db:test-set-status-state dbstruct params))
    ((get-previous-test-run-record)    (apply db:get-previous-test-run-record dbstruct params))
    ((get-matching-previous-test-run-records)(apply db:get-matching-previous-test-run-records dbstruct params))
    ((test-get-logfile-info)           (apply db:test-get-logfile-info dbstruct params))
    ((test-get-records-for-index-file)  (apply db:test-get-records-for-index-file dbstruct params))
    ((get-testinfo-state-status)       (apply db:get-testinfo-state-status dbstruct params))
    ((test-set-top-process-pid)        (apply db:test-set-top-process-pid dbstruct params))
    ((test-get-top-process-pid)        (apply db:test-get-top-process-pid dbstruct params))
    ((test-get-paths-matching-keynames-target-new) (apply db:test-get-paths-matching-keynames-target-new dbstruct params))
    ((get-prereqs-not-met)             (apply db:get-prereqs-not-met dbstruct params))
    ((roll-up-pass-fail-counts)        (apply db:roll-up-pass-fail-counts dbstruct params))
    ((update-fail-pass-counts)         (apply db:general-call dbstruct 'update-pass-fail-counts params))
    ((get-count-tests-running-for-run-id) (apply db:get-count-tests-running-for-run-id dbstruct params))

    ;; RUNS

Modified client.scm from [a5253ced0b] to [dc8b2be6ad].

124
125
126
127
128
129
130

131
132
133
134
135
136
137
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138







+







;; keep this as a function to ease future 
(define (client:start run-id server-info)
  (http-transport:client-connect (tasks:hostinfo-get-interface server-info)
				 (tasks:hostinfo-get-port server-info)))

;; client:signal-handler
(define (client:signal-handler signum)
  (signal-mask! signum)
  (handle-exceptions
   exn
   (debug:print " ... exiting ...")
   (let ((th1 (make-thread (lambda ()
			     "") ;; do nothing for now (was flush out last call if applicable)
			   "eat response"))
	 (th2 (make-thread (lambda ()

Modified common.scm from [79e5c51a63] to [51076419ec].

48
49
50
51
52
53
54
55

56
57
58
59
60
61
62
63
64


65
66
67
68
69
70
71
48
49
50
51
52
53
54

55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73







-
+









+
+







(define *passnum*           0) ;; when running track calls to run-tests or similar
(define *write-frequency*   (make-hash-table)) ;; run-id => (vector (current-seconds) 0))
(define *alt-log-file* #f)  ;; used by -log
(define *common:denoise*    (make-hash-table)) ;; for low noise printing

;; DATABASE
(define *dbstruct-db*  #f)
(define *db-stats* (make-hash-table)) ;; hash of vectors < count duration-total >
(define *db-stats*            (make-hash-table)) ;; hash of vectors < count duration-total >
(define *db-stats-mutex*      (make-mutex))
(define *db-sync-mutex*       (make-mutex))
(define *db-multi-sync-mutex* (make-mutex))
(define *db-local-sync*       (make-hash-table)) ;; used to record last touch of db
(define *megatest-db*         #f)
(define *last-db-access*      (current-seconds))  ;; update when db is accessed via server
(define *db-write-access*     #t)
(define *inmemdb*             #f)
(define *task-db*             #f) ;; (vector db path-to-db)
(define *db-access-allowed*   #t) ;; flag to allow access
(define *db-access-mutex*     (make-mutex))

;; SERVER
(define *my-client-signature* #f)
(define *transport-type*    'http)
(define *rpc:listener*      #f) ;; if set up for server communication this will hold the tcp port
(define *runremote*         (make-hash-table)) ;; if set up for server communication this will hold <host port>
(define *max-cache-size*    0)
110
111
112
113
114
115
116
117

118
119

120

















121
122
123
124
125
126
127
112
113
114
115
116
117
118

119
120

121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146







-
+

-
+

+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+







  (set! *test-paths*         (make-hash-table))
  (set! *test-ids*           (make-hash-table))
  (set! *test-info*          (make-hash-table))
  (set! *run-info-cache*     (make-hash-table))
  (set! *env-vars-by-run-id* (make-hash-table))
  (set! *test-id-cache*      (make-hash-table)))

;; Generic string database (normalization of sorts)
;; Generic string database
(define sdb:qry #f) ;; (make-sdb:qry)) ;;  'init #f)
;; Generic path database (normalization of sorts)
;; Generic path database
(define *fdb* #f)

;;======================================================================
;; L O C K E R S   A N D   B L O C K E R S 
;;======================================================================

;; block further accesses to databases. Call this before shutting db down
(define (common:db-block-further-queries)
  (mutex-lock! *db-access-mutex*)
  (set! *db-access-allowed* #f)
  (mutex-unlock! *db-access-mutex*))

(define (common:db-access-allowed?)
  (let ((val (begin
	       (mutex-lock! *db-access-mutex*)
	       *db-access-allowed*
	       (mutex-unlock! *db-access-mutex*))))
    val))

;;======================================================================
;; U S E F U L   S T U F F
;;======================================================================

(define (common:low-noise-print waitval . keys)
  (let* ((key      (string-intersperse (map conc keys) "-" ))
135
136
137
138
139
140
141






142

143
144
145
146
147
148
149
154
155
156
157
158
159
160
161
162
163
164
165
166

167
168
169
170
171
172
173
174







+
+
+
+
+
+
-
+








(define (common:get-megatest-exe)
  (if (getenv "MT_MEGATEST") (getenv "MT_MEGATEST") "megatest"))

(define (common:read-encoded-string instr)
  (handle-exceptions
   exn
   (handle-exceptions
    exn
    (begin
      (debug:print 0 "ERROR: received bad encoded string \"" instr "\", message: " ((condition-property-accessor 'exn 'message) exn))
      (print-call-chain)
      #f)
   (read (open-input-string (base64:base64-decode instr)))
    (read (open-input-string (base64:base64-decode instr))))
   (read (open-input-string (z3:decode-buffer (base64:base64-decode instr))))))

;;======================================================================
;; S T A T E S   A N D   S T A T U S E S
;;======================================================================

(define *common:std-states*   
181
182
183
184
185
186
187




188
189
190
191
192
193
194
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223







+
+
+
+







(define (get-with-default val default)
  (let ((val (args:get-arg val)))
    (if val val default)))

(define (assoc/default key lst . default)
  (let ((res (assoc key lst)))
    (if res (cadr res)(if (null? default) #f (car default)))))

(define (common:get-testsuite-name)
  (or (configf:lookup *configdat* "server" "testsuite" )
       (pathname-file *toppath*)))

;;======================================================================
;; Misc utils
;;======================================================================

;; Convert strings like "5s 2h 3m" => 60x60x2 + 3x60 + 5
(define (common:hms-string->seconds tstr)
525
526
527
528
529
530
531











532
533
534
535
536
537
538
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578







+
+
+
+
+
+
+
+
+
+
+







(define (seconds->year-work-week/day sec)
  (time->string
   (seconds->local-time sec) "%yww%V.%w"))

(define (seconds->year-work-week/day-time sec)
  (time->string
   (seconds->local-time sec) "%yww%V.%w %H:%M"))

(define (seconds->quarter sec)
  (case (string->number
	 (time->string 
	  (seconds->local-time sec)
	  "%m"))
    ((1 2 3) 1)
    ((4 5 6) 2)
    ((7 8 9) 3)
    ((10 11 12) 4)
    (else #f)))

;;======================================================================
;; Colors
;;======================================================================
      
(define (common:name->iup-color name)
  (case (string->symbol (string-downcase name))

Modified configf.scm from [81365b22b7] to [3684e66c72].

59
60
61
62
63
64
65
66

67
68
69
70
71
72
73
59
60
61
62
63
64
65

66
67
68
69
70
71
72
73







-
+







(define configf:key-no-val (regexp "^(\\S+)(\\s*)$"))
(define configf:comment-rx (regexp "^\\s*#.*"))
(define configf:cont-ln-rx (regexp "^(\\s+)(\\S+.*)$"))

;; read a line and process any #{ ... } constructs

(define configf:var-expand-regex (regexp "^(.*)#\\{(scheme|system|shell|getenv|get|runconfigs-get|rget)\\s+([^\\}\\{]*)\\}(.*)"))
(define (configf:process-line l ht)
(define (configf:process-line l ht allow-system)
  (let loop ((res l))
    (if (string? res)
	(let ((matchdat (string-search configf:var-expand-regex res)))
	  (if matchdat
	      (let* ((prestr  (list-ref matchdat 1))
		     (cmdtype (list-ref matchdat 2)) ;; eval, system, shell, getenv
		     (cmd     (list-ref matchdat 3))
83
84
85
86
87
88
89


90
91
92
93




94
95
96
97
98
99
100
83
84
85
86
87
88
89
90
91




92
93
94
95
96
97
98
99
100
101
102







+
+
-
-
-
-
+
+
+
+







					(sect  (car parts))
					(var   (cadr parts)))
				   (conc "(lambda (ht)(config-lookup ht \"" sect "\" \"" var "\"))")))
				((runconfigs-get) (conc "(lambda (ht)(runconfigs-get ht \"" cmd "\"))"))
				((rget)           (conc "(lambda (ht)(runconfigs-get ht \"" cmd "\"))"))
				(else "(lambda (ht)(print \"ERROR\") \"ERROR\")"))))
		;; (print "fullcmd=" fullcmd)
		(if (or allow-system
			(not (member cmdtype '("system" "shell"))))
		(with-input-from-string fullcmd
		  (lambda ()
		    (set! result ((eval (read)) ht))))
		(loop (conc prestr result poststr)))
		    (with-input-from-string fullcmd
		      (lambda ()
			(set! result ((eval (read)) ht))))
		    (set! result (conc "#{(" cmdtype ") "  cmd "}")))		(loop (conc prestr result poststr)))
	      res))
	res)))

;; Run a shell command and return the output as a string
(define (shell cmd)
  (let* ((output (cmd-run->list cmd))
	 (res    (car output))
115
116
117
118
119
120
121


122

123
124
125
126
127
128
129
130
131
132
133
134
135
136
137








138
139
140
141
142
143
144
117
118
119
120
121
122
123
124
125

126
127
128
129
130
131
132
133
134
135
136
137




138
139
140
141
142
143
144
145
146
147
148
149
150
151
152







+
+
-
+











-
-
-
-
+
+
+
+
+
+
+
+







(define (runconfigs-get config var)
  (let ((targ (or (args:get-arg "-reqtarg")(args:get-arg "-target")(getenv "MT_TARGET"))))
    (if targ
	(or (configf:lookup config targ var)
	    (configf:lookup config "default" var))
	(configf:lookup config "default" var))))

;; this was inline but I'm pretty sure that is a hold over from when it was *very* simple ...
;;
(define-inline (configf:read-line p ht allow-processing)
(define (configf:read-line p ht allow-processing)
  (let loop ((inl (read-line p)))
    (let ((cont-line (and (string? inl)
			  (not (string-null? inl))
			  (equal? "\\" (string-take-right inl 1)))))
      (if cont-line ;; last character is \ 
	  (let ((nextl (read-line p)))
	    (if (not (eof-object? nextl))
		(loop (string-append (if cont-line 
					 (string-take inl (- (string-length inl) 1))
					 inl)
				     nextl))))
	  (if (and allow-processing 
		   (not (eq? allow-processing 'return-string)))
	      (configf:process-line inl ht)
	      inl)))))
	  (case allow-processing ;; if (and allow-processing 
	    ;;	   (not (eq? allow-processing 'return-string)))
	    ((#t #f)
	     (configf:process-line inl ht allow-processing))
	    ((return-string)
	     inl)
	    (else
	     (configf:process-line inl ht allow-processing)))))))

;; read a config file, returns hash table of alists

;; read a config file, returns hash table of alists
;; adds to ht if given (must be #f otherwise)
;; envion-patt is a regex spec that identifies sections that will be eval'd
;; in the environment on the fly

Modified dashboard-tests.scm from [5aa3eae6d0] to [22f7e05798].

126
127
128
129
130
131
132
133

134
135
136
137
138
139
140
141
126
127
128
129
130
131
132

133

134
135
136
137
138
139
140







-
+
-







	   (append (map (lambda (val)
			  (iup:label val ; #:expand "HORIZONTAL"
				     ))
			(list "Author: "
			      "Owner: "
			      "Reviewed: "
			      "Tags: "
			      "Description: "
			      "Description: "))
			      ))
		   (list (iup:label "" #:expand "VERTICAL"))))
    (apply iup:vbox  ; #:expand "YES"
	   (list 
	    (store-meta "author"
			 (iup:label (db:testmeta-get-author testmeta) #:expand "HORIZONTAL")
			 (lambda (testmeta)(db:testmeta-get-author testmeta)))
	    (store-meta "owner"
195
196
197
198
199
200
201
202


203
204
205
206
207
208
209
194
195
196
197
198
199
200

201
202
203
204
205
206
207
208
209







-
+
+







			  (iup:label val ; #:expand "HORIZONTAL"
				     ))
			(list "Hostname: "
			      "Uname -a: "
			      "Disk free: "
			      "CPU Load: "
			      "Run duration: "
			      "Logfile: "))
			      "Logfile: "
			      "Top process id: "))
		   (iup:label "" #:expand "VERTICAL")))
    (apply iup:vbox ; #:expand "YES"
	   (list
	    ;; NOTE: Yes, the host can change!
	    (store-label "HostName"
			 (iup:label ;; (sdb:qry 'getstr 
			  (db:test-get-host testdat) ;; )
218
219
220
221
222
223
224
225

226
227





228
229
230
231
232
233
234
218
219
220
221
222
223
224

225
226

227
228
229
230
231
232
233
234
235
236
237
238







-
+

-
+
+
+
+
+







			 (lambda (testdat)(conc (db:test-get-diskfree testdat))))
	    (store-label "CPULoad"
			 (iup:label (conc (db:test-get-cpuload testdat)) #:expand "HORIZONTAL")
			 (lambda (testdat)(conc (db:test-get-cpuload testdat))))
	    (store-label "RunDuration"
			 (iup:label (conc (seconds->hr-min-sec (db:test-get-run_duration testdat))) #:expand "HORIZONTAL")
			 (lambda (testdat)(conc (seconds->hr-min-sec (db:test-get-run_duration testdat)))))
	    (store-label "CPULoad"
	    (store-label "LogFile"
			 (iup:label (conc (db:test-get-final_logf testdat)) #:expand "HORIZONTAL")
			 (lambda (testdat)(conc (db:test-get-final_logf testdat)))))))))
			 (lambda (testdat)(conc (db:test-get-final_logf testdat))))
	    (store-label "ProcessId"
			 (iup:label (conc (db:test-get-process_id testdat)) #:expand "HORIZONTAL")
			 (lambda (testdat)(conc (db:test-get-process_id testdat))))
	    )))))

;; use a global for setting the buttons colors
;;                           state status teststeps
(define *state-status* (vector #f #f #f))
(define (update-state-status-buttons testdat)
  (let* ((state  (db:test-get-state  testdat))
	 (status (db:test-get-status testdat))
555
556
557
558
559
560
561
562

563
564
565
566
567
568
569
559
560
561
562
563
564
565

566
567
568
569
570
571
572
573







-
+







							    (> (current-milliseconds)(+ last-update 250))) ;; every half seconds if db touched
						       (> (current-milliseconds)(+ last-update 10000))     ;; force update even 10 seconds
						       request-update))
				    (newtestdat (if need-update 
						    ;; NOTE: BUG HIDER, try to eliminate this exception handler
						    (handle-exceptions
						     exn 
						     (debug:print-info 0 "test db access issue: " ((condition-property-accessor 'exn 'message) exn))
						     (debug:print-info 0 "test db access issue in examine test for run-id " run-id ", test-id " test-id ": " ((condition-property-accessor 'exn 'message) exn))
						     (db:get-test-info-by-id dbstruct run-id test-id )))))
			       ;; (debug:print-info 0 "need-update= " need-update " curr-mod-time = " curr-mod-time)
			       (cond
				((and need-update newtestdat)
				 (set! testdat newtestdat)
				 (set! teststeps    (dashboard-tests:get-compressed-steps dbstruct run-id test-id))
				 (set! logfile      (conc (db:test-get-rundir testdat) "/" (db:test-get-final_logf testdat)))

Modified dashboard.scm from [f6eb70a199] to [588f9fa302].

974
975
976
977
978
979
980
981

982
983
984
985
986
987
988
974
975
976
977
978
979
980

981
982
983
984
985
986
987
988







-
+








;;======================================================================
;; S U M M A R Y 
;;======================================================================
;;
;; General info about the run(s) and megatest area
(define (dashboard:summary db)
  (let ((rawconfig        (read-config (conc *toppath* "/megatest.config") #f 'return-string)))
  (let ((rawconfig        (read-config (conc *toppath* "/megatest.config") #f #f))) ;; changed to #f since I want #{} to be expanded by [system ...] to NOT be expanded. WAS: 'return-string)))
    (iup:vbox
     (iup:split
      #:value 500
      (iup:frame 
       #:title "General Info"
       (iup:vbox
	(iup:hbox
1453
1454
1455
1456
1457
1458
1459





1460
1461
1462



1463
1464
1465
1466
1467
1468
1469
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464



1465
1466
1467
1468
1469
1470
1471
1472
1473
1474







+
+
+
+
+
-
-
-
+
+
+







(define *last-monitor-update-time* 0)

;; Force creation of the db in case it isn't already there.
(let ((db (tasks:open-db)))
  (sqlite3:finalize! db))

(define (dashboard:get-youngest-run-db-mod-time)
  (handle-exceptions
   exn
   (begin
     (debug:print 0 "WARNING: error in accessing databases in get-youngest-run-db-mod-time: " ((condition-property-accessor 'exn 'message) exn))
     (current-seconds)) ;; something went wrong - just print an error and return current-seconds
  (apply max (map (lambda (filen)
		    (file-modification-time filen))
		  (glob (conc *dbdir* "/*.db")))))
   (apply max (map (lambda (filen)
		     (file-modification-time filen))
		   (glob (conc *dbdir* "/*.db"))))))

(define (dashboard:run-update x)
  (let* ((modtime         (dashboard:get-youngest-run-db-mod-time)) ;; (file-modification-time *db-file-path*))
	 (monitor-modtime (if (file-exists? *monitor-db-path*)
			      (file-modification-time *monitor-db-path*)
			      -1))
	 (run-update-time (current-seconds))

Modified db.scm from [3d37442279] to [d5f00a0358].

10
11
12
13
14
15
16
17

18
19
20
21
22
23
24
10
11
12
13
14
15
16

17
18
19
20
21
22
23
24







-
+







;;======================================================================

;;======================================================================
;; Database access
;;======================================================================

(require-extension (srfi 18) extras tcp)
(use sqlite3 srfi-1 posix regex regex-case srfi-69 csv-xml s11n md5 message-digest base64 format dot-locking)
(use sqlite3 srfi-1 posix regex regex-case srfi-69 csv-xml s11n md5 message-digest base64 format dot-locking z3)
(import (prefix sqlite3 sqlite3:))
(import (prefix base64 base64:))

(declare (unit db))
(declare (uses common))
(declare (uses keys))
(declare (uses ods))
276
277
278
279
280
281
282


283
284
285
286
287
288
289


290

291

292
293
294












295


296
297
298
299
300
301

302
303
304
305
306
307
308
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293

294
295
296



297
298
299
300
301
302
303
304
305
306
307
308

309
310
311
312
313
314

315
316
317
318
319
320
321
322
323







+
+







+
+
-
+

+
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
-
+
+




-

+







	      num-synced)
	    0))))

;; close all opened run-id dbs
(define (db:close-all dbstruct)
  ;; finalize main.db
  (db:sync-touched dbstruct 0 force-sync: #t)
  ;;(common:db-block-further-queries)
  ;; (mutex-lock! *db-sync-mutex*) ;; with this perhaps it isn't necessary to use the block-further-queries mechanism?
  (sqlite3:finalize! (db:get-db dbstruct #f))
  (let* ((local (dbr:dbstruct-get-local dbstruct))
	 (rundb (dbr:dbstruct-get-rundb dbstruct)))
    (if local
	(for-each
	 (lambda (db)
	   (if (sqlite3:database? db)
	       (begin
		 (sqlite3:interrupt! db)
	       (sqlite3:finalize! db)))
		 (sqlite3:finalize! db #t))))
	 (hash-table-values (dbr:dbstruct-get-locdbs dbstruct))))
    (thread-sleep! 3)
    (if rundb
	(if (sqlite3:database? rundb)
	    (sqlite3:finalize! rundb)
    (if (and rundb
	     (sqlite3:database? rundb))
	(handle-exceptions
	 exn
	 (begin 
	   (debug:print 0 "WARNING: database files may not have been closed correctly. Consider running -cleanup-db")
	   (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn))
	   (debug:print 0 " db: " rundb)
	   (print-call-chain)
	   #f)
	 (sqlite3:interrupt! rundb)
	 (sqlite3:finalize! rundb #t))))
	    (debug:print 2 "WARNING: attempting to close databases but got " rundb " instead of a database")))))
  ;; (mutex-unlock! *db-sync-mutex*)
  )

(define (db:open-inmem-db)
  (let* ((db      (sqlite3:open-database ":memory:"))
	 (handler (make-busy-timeout 3600)))
    (db:initialize-run-id-db db)
    (sqlite3:set-busy-handler! db handler)
    (db:initialize-run-id-db db)
    db))

;; just tests, test_steps and test_data tables
(define db:sync-tests-only
  (list
   ;; (list "strs"
   ;;       '("id"             #f)
845
846
847
848
849
850
851
852

853
854
855
856
857
858
859
860
861
862
863
864
865
866

867
868
869
870
871
872
873
874







-
+







;;    a. If test dir exists, set the the test to state='UNKNOWN', Set the run to 'unknown'
;;    b. If test dir gone, delete the test record
;; 2. Look at run records
;;    a. If have tests that are not deleted, set state='unknown'
;;    b. ....
;;
(define (db:clean-up db)
  (debug:print 0 "WARNING: db clean up not ported to v1.60, cleanup action will be on megatest.db")
  (debug:print 0 "WARNING: db clean up not fully ported to v1.60, cleanup action will be on megatest.db")
  (let* (;; (db         (db:get-db dbstruct #f))
	 (count-stmt (sqlite3:prepare db "SELECT (SELECT count(id) FROM tests)+(SELECT count(id) FROM runs);"))
	(statements
	 (map (lambda (stmt)
		(sqlite3:prepare db stmt))
	      (list
	       ;; delete all tests that belong to runs that are 'deleted'
1161
1162
1163
1164
1165
1166
1167
1168

1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182

1183
1184
1185
1186
1187
1188
1189
1190







-
+








(define (db:get-all-run-ids dbstruct)
  (let ((run-ids '()))
    (sqlite3:for-each-row
     (lambda (run-id)
       (set! run-ids (cons run-id run-ids)))
     (db:get-db dbstruct #f)
     "SELECT id FROM runs WHERE state != 'deleted';")
     "SELECT id FROM runs WHERE state != 'deleted' ORDER BY event_time DESC;")
    (reverse run-ids)))

;; get some basic run stats
;;
;; ( (runname (( state  count ) ... ))
;;   (   ...  
(define (db:get-run-stats dbstruct)
1539
1540
1541
1542
1543
1544
1545




1546
1547
1548
1549
1550
1551

1552
1553
1554

1555
1556
1557
1558
1559
1560
1561
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569

1570



1571
1572
1573
1574
1575
1576
1577
1578







+
+
+
+





-
+
-
-
-
+







       (sqlite3:execute (db:get-db dbstruct run-id) "DELETE FROM tests WHERE state='DELETED' AND event_time<?;" targtime))
     run-ids)))

;; set tests with state currstate and status currstatus to newstate and newstatus
;; use currstate = #f and or currstatus = #f to apply to any state or status respectively
;; WARNING: SQL injection risk. NB// See new but not yet used "faster" version below
;;
		;;  AND NOT (item_path='' AND testname in (SELECT DISTINCT testname FROM tests WHERE testname=? AND item_path != ''));")))
		;;(debug:print 0 "QRY: " qry)
		;; (db:delay-if-busy)

(define (db:set-tests-state-status dbstruct run-id testnames currstate currstatus newstate newstatus)
  (for-each (lambda (testname)
	      (let ((qry (conc "UPDATE tests SET state=?,status=? WHERE "
			       (if currstate  (conc "state='" currstate "' AND ") "")
			       (if currstatus (conc "status='" currstatus "' AND ") "")
			       " run_id=? AND testname=? AND NOT (item_path='' AND testname in (SELECT DISTINCT testname FROM tests WHERE testname=? AND item_path != ''));")))
			       " run_id=? AND testname LIKE ?;")))
		;;(debug:print 0 "QRY: " qry)
		;; (db:delay-if-busy)
		(sqlite3:execute (db:get-db dbstruct run-id) qry run-id newstate newstatus testname testname)))
		(sqlite3:execute (db:get-db dbstruct run-id) qry newstate newstatus run-id testname)))
	    testnames))

;; speed up for common cases with a little logic
;; NB// Ultimately this will be deprecated in deference to mt:test-set-state-status-by-id
;;
(define (db:test-set-state-status-by-id dbstruct run-id test-id newstate newstatus newcomment)
  ;; (db:delay-if-busy)
1660
1661
1662
1663
1664
1665
1666
1667











1668
1669
1670


1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686

1687
1688
1689
1690
1691
1692
1693
1694
1695
1696



1697
1698
1699
1700
1701
1702
1703
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696


1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722



1723
1724
1725
1726
1727
1728
1729
1730
1731
1732








+
+
+
+
+
+
+
+
+
+
+

-
-
+
+
















+







-
-
-
+
+
+







(define (db:get-test-id dbstruct run-id testname item-path)
  (let* ((db (db:get-db dbstruct run-id)))
    (db:first-result-default
     (db:get-db dbstruct run-id)
     "SELECT id FROM tests WHERE testname=? AND item_path=?;"
     #f ;; the default
     testname item-path)))

;; overload the unused attemptnum field for the process id of the runscript or 
;; ezsteps step script in progress
;;
(define (db:test-set-top-process-pid dbstruct run-id test-id pid)
  (sqlite3:execute (db:get-db dbstruct run-id) "UPDATE tests SET attemptnum=? WHERE id=?;"
		   pid test-id))

(define (db:test-get-top-process-pid dbstruct run-id test-id)
  (sqlite3:first-result (db:get-db dbstruct run-id) "SELECT attemptnum FROM tests WHERE id=?;"
			test-id))

(define db:test-record-fields '("id"           "run_id"        "testname"  "state"      "status"      "event_time"
				"host"         "cpuload"       "diskfree"  "uname"      "rundir"   "item_path"
                                "run_duration" "final_logf" "comment"   "shortdir"))
				"host"         "cpuload"       "diskfree"  "uname"      "rundir"      "item_path"
                                "run_duration" "final_logf"    "comment"   "shortdir"   "attemptnum"))

;; fields *must* be a non-empty list
;;
(define (db:field->number fieldname fields)
  (if (null? fields)
      #f
      (let loop ((hed  (car fields))
		 (tal  (cdr fields))
		 (indx 0))
	(if (equal? fieldname hed)
	    indx
	    (if (null? tal)
		#f
		(loop (car tal)(cdr tal)(+ indx 1)))))))

(define db:test-record-qry-selector (string-intersperse db:test-record-fields ","))


;; NOTE: Use db:test-get* to access records
;; NOTE: This needs rundir decoding? Decide, decode here or where used? For the moment decode where used.
(define (db:get-all-tests-info-by-run-id dbstruct run-id)
  (let ((db (db:get-db dbstruct run-id))
	(res '()))
    (sqlite3:for-each-row
     (lambda (id run-id testname state status event-time host cpuload diskfree uname rundir item-path run-duration final-logf comment shortdir)
       ;;                 0    1       2      3      4        5       6      7        8     9     10      11          12          13       14
       (set! res (cons (vector id run-id testname state status event-time host cpuload diskfree uname rundir item-path run-duration final-logf comment shortdir)
     (lambda (id run-id testname state status event-time host cpuload diskfree uname rundir item-path run-duration final-logf comment shortdir attemptnum)
       ;;                 0    1       2      3      4        5       6      7        8     9     10      11          12          13       14     15
       (set! res (cons (vector id run-id testname state status event-time host cpuload diskfree uname rundir item-path run-duration final-logf comment shortdir attemptnum)
		       res)))
     (db:get-db dbstruct run-id)
     (conc "SELECT " db:test-record-qry-selector " FROM tests WHERE state != 'DELETED' AND run_id=?;")
     run-id)
    res))

(define (db:replace-test-records dbstruct run-id testrecs)
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769




1770
1771
1772
1773
1774
1775
1776
1788
1789
1790
1791
1792
1793
1794




1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805







-
-
-
-
+
+
+
+







	 (db:prep-megatest.db-adj-test-ids mtdb run-id testrecs)))
     run-ids)))

;; Get test data using test_id
(define (db:get-test-info-by-id dbstruct run-id test-id)
  (let ((db (db:get-db dbstruct run-id))
	(res #f))
    (sqlite3:for-each-row
     (lambda (id run-id testname state status event-time host cpuload diskfree uname rundir-id item-path run_duration final-logf-id comment short-dir-id)
	   ;;                 0    1       2      3      4        5       6      7        8     9     10      11          12          13       14
       (set! res (vector id run-id testname state status event-time host cpuload diskfree uname rundir-id item-path run_duration final-logf-id comment short-dir-id)))
    (sqlite3:for-each-row ;; attemptnum added to hold pid of top process (not Megatest) controlling a test
     (lambda (id run-id testname state status event-time host cpuload diskfree uname rundir-id item-path run_duration final-logf-id comment short-dir-id attemptnum)
	   ;;             0    1       2      3      4        5       6      7        8     9     10      11          12          13           14         15          16
       (set! res (vector id run-id testname state status event-time host cpuload diskfree uname rundir-id item-path run_duration final-logf-id comment short-dir-id attemptnum)))
     (db:get-db dbstruct run-id)
     (conc "SELECT " db:test-record-qry-selector " FROM tests WHERE id=?;")
	 test-id)
    res))

;; Use db:test-get* to access
;; Get test data using test_ids. NB// Only works within a single run!!
1978
1979
1980
1981
1982
1983
1984
1985




1986
1987
1988
1989
1990
1991
1992
1993
1994
1995

1996
1997
1998



1999
2000
2001
2002
2003
2004
2005
2007
2008
2009
2010
2011
2012
2013

2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028



2029
2030
2031
2032
2033
2034
2035
2036
2037
2038







-
+
+
+
+










+
-
-
-
+
+
+







;; NOTE: Can remove the regex and base64 encoding for zmq
(define (db:obj->string obj)
  (case *transport-type*
    ;; ((fs) obj)
    ((http fs)
     (string-substitute
      (regexp "=") "_"
      (base64:base64-encode (with-output-to-string (lambda ()(serialize obj))))
      (base64:base64-encode 
       (z3:encode-buffer
	(with-output-to-string
	  (lambda ()(serialize obj)))))
      #t))
    ((zmq)(with-output-to-string (lambda ()(serialize obj))))
    (else obj)))

(define (db:string->obj msg)
  (case *transport-type*
    ;; ((fs) msg)
    ((http fs)
     (if (string? msg)
	 (with-input-from-string 
	     (z3:decode-buffer
	     (base64:base64-decode
	      (string-substitute 
	       (regexp "_") "=" msg #t))
	      (base64:base64-decode
	       (string-substitute 
		(regexp "_") "=" msg #t)))
	   (lambda ()(deserialize)))
	 (vector #f #f #f))) ;; crude reply for when things go awry
    ((zmq)(with-input-from-string msg (lambda ()(deserialize))))
    (else msg)))

(define (db:test-set-status-state dbstruct run-id test-id status state msg)
  (let ((db  (db:get-db dbstruct run-id)))
2407
2408
2409
2410
2411
2412
2413
2414

2415
2416
2417
2418
2419
2420
2421
2440
2441
2442
2443
2444
2445
2446

2447
2448
2449
2450
2451
2452
2453
2454







-
+







  (handle-exceptions
   exn
   (let ((err-status ((condition-property-accessor 'sqlite3 'status #f) exn)))
     ;; check for (exn sqlite3) ((condition-property-accessor 'exn 'message) exn)
     (if (eq? err-status 'done)
	 default
	 (begin
	   (debug:print 0 "ERROR:  query " stmt " failed " ((condition-property-accessor 'exn 'message) exn))
	   (debug:print 0 "ERROR:  query " stmt " failed, params: " params ", error: " ((condition-property-accessor 'exn 'message) exn))
	   (print-call-chain)
	   default)))
   (apply sqlite3:first-result db stmt params)))

;;======================================================================
;; Extract ods file from the db
;;======================================================================

Modified db_records.scm from [828e5a591d] to [8738c33604].

74
75
76
77
78
79
80

81
82


83
84
85
86
87
88
89
74
75
76
77
78
79
80
81


82
83
84
85
86
87
88
89
90







+
-
-
+
+







(define-inline (db:test-get-uname        vec) (vector-ref vec 9))
;; (define-inline (db:test-get-rundir       vec) (sdb:qry 'getstr (vector-ref vec 10)))
(define-inline (db:test-get-rundir       vec) (vector-ref vec 10))
(define-inline (db:test-get-item-path    vec) (vector-ref vec 11))
(define-inline (db:test-get-run_duration vec) (vector-ref vec 12))
(define-inline (db:test-get-final_logf   vec) (vector-ref vec 13))
(define-inline (db:test-get-comment      vec) (vector-ref vec 14))
(define-inline (db:test-get-process_id   vec) (vector-ref vec 16))
(define-inline (db:test-get-pass_count   vec) (vector-ref vec 15))
(define-inline (db:test-get-fail_count   vec) (vector-ref vec 16))
;; (define-inline (db:test-get-pass_count   vec) (vector-ref vec 15))
;; (define-inline (db:test-get-fail_count   vec) (vector-ref vec 16))
(define-inline (db:test-get-fullname     vec)
  (conc (db:test-get-testname vec) "/" (db:test-get-item-path vec)))

(define-inline (db:test-get-first_err    vec) (printable (vector-ref vec 15)))
(define-inline (db:test-get-first_warn   vec) (printable (vector-ref vec 16)))

(define-inline (db:test-set-cpuload!  vec val)(vector-set! vec 7 val))

Modified docs/manual/megatest_manual.html from [20d78e9a28] to [191f1255c5].

1
2
3
4
5
6

7
8
9
10
11
12
13
1
2
3
4
5

6
7
8
9
10
11
12
13





-
+







<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
    "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
<head>
<meta http-equiv="Content-Type" content="application/xhtml+xml; charset=UTF-8" />
<meta name="generator" content="AsciiDoc 8.6.7" />
<meta name="generator" content="AsciiDoc 8.6.9" />
<title>The Megatest Users Manual</title>
<style type="text/css">
/* Shared CSS for AsciiDoc xhtml11 and html5 backends */

/* Default font. */
body {
  font-family: Georgia,serif;
83
84
85
86
87
88
89
90




91
92
93



94
95
96
97
98
99
100
83
84
85
86
87
88
89

90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106







-
+
+
+
+



+
+
+








ul, ol, li > p {
  margin-top: 0;
}
ul > li     { color: #aaa; }
ul > li > * { color: black; }

pre {
.monospaced, code, pre {
  font-family: "Courier New", Courier, monospace;
  font-size: inherit;
  color: navy;
  padding: 0;
  margin: 0;
}
pre {
  white-space: pre-wrap;
}

#author {
  color: #527bbd;
  font-weight: bold;
  font-size: 1.1em;
}
#email {
215
216
217
218
219
220
221
222

223
224
225
226
227
228
229
221
222
223
224
225
226
227

228
229
230
231
232
233
234
235







-
+








div.exampleblock > div.content {
  border-left: 3px solid #dddddd;
  padding-left: 0.5em;
}

div.imageblock div.content { padding-left: 0; }
span.image img { border-style: none; }
span.image img { border-style: none; vertical-align: text-bottom; }
a.image:visited { color: white; }

dl {
  margin-top: 0.8em;
  margin-bottom: 0.8em;
}
dt {
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
417
418
419
420
421
422
423






424
425
426
427
428
429
430







-
-
-
-
-
-









/*
 * xhtml11 specific
 *
 * */

tt {
  font-family: "Courier New", Courier, monospace;
  font-size: inherit;
  color: navy;
}

div.tableblock {
  margin-top: 1.0em;
  margin-bottom: 1.5em;
}
div.tableblock > table {
  border: 3px solid #527bbd;
}
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
450
451
452
453
454
455
456






457
458
459
460
461
462
463







-
-
-
-
-
-









/*
 * html5 specific
 *
 * */

.monospaced {
  font-family: "Courier New", Courier, monospace;
  font-size: inherit;
  color: navy;
}

table.tableblock {
  margin-top: 1.0em;
  margin-bottom: 1.5em;
}
thead, p.tableblock.header {
  font-weight: bold;
  color: #527bbd;
535
536
537
538
539
540
541


542
543
544
545
546
547
548
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544







+
+







body.manpage div.sectionbody {
  margin-left: 3em;
}

@media print {
  body.manpage div#toc { display: none; }
}


</style>
<script type="text/javascript">
/*<![CDATA[*/
var asciidoc = {  // Namespace.

/////////////////////////////////////////////////////////////////////
// Table Of Contents generator
735
736
737
738
739
740
741
742

743
744
745
746
747
748
749
731
732
733
734
735
736
737

738
739
740
741
742
743
744
745







-
+







/*]]>*/
</script>
</head>
<body class="book">
<div id="header">
<h1>The Megatest Users Manual</h1>
<span id="author">Matt Welland</span><br />
<span id="email"><tt>&lt;<a href="mailto:matt@kiatoa.com">matt@kiatoa.com</a>&gt;</tt></span><br />
<span id="email"><code>&lt;<a href="mailto:matt@kiatoa.com">matt@kiatoa.com</a>&gt;</code></span><br />
<span id="revnumber">version 1.0,</span>
<span id="revdate">April 2012</span>
</div>
<div id="content">
<div class="sect1">
<h2 id="_preface">Preface</h2>
<div class="sectionbody">
957
958
959
960
961
962
963
964

965
966

967
968

969
970
971
972
973
974
975
976


977
978
979

980
981
982
983
984
985


986

987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013


1014
1015
1016
1017

1018
1019
1020
1021
1022
1023

1024


1025

1026
1027
1028
1029
1030
1031
1032
1033














1034
1035









1036
1037
1038

1039
1040
1041
1042
1043
1044

1045






1046
1047
1048

1049



1050
1051
1052

1053
1054

1055



1056
1057
1058
1059
1060

1061
1062
1063
1064
1065

1066
1067
1068
1069
1070
1071
1072

1073
1074
1075
1076

1077
1078
1079

1080



1081
1082
1083

1084






1085
1086
1087

1088



1089
1090
1091

1092
1093
1094

1095



1096
1097
1098

1099



1100
1101
1102
1103
1104
1105

1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119

1120




1121
1122
1123
1124

1125


































1126
1127
1128
1129
1130
1131
1132
1133





1134


1135
1136
1137
1138
1139
1140
1141
1142
1143
1144






1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160





1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180





1181
1182
1183
1184



1185
1186
1187
1188
1189
1190
1191
1192

1193
1194
1195
1196
953
954
955
956
957
958
959

960
961

962


963




964
965


966
967
968


969




970
971
972
973

974
975









976
977
978
979
980
981
982
983
984
985
986
987

988
989


990
991
992
993
994

995
996
997
998
999
1000

1001
1002
1003
1004

1005








1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032

1033
1034
1035
1036
1037
1038

1039
1040
1041
1042
1043
1044
1045
1046
1047
1048

1049
1050
1051
1052
1053
1054
1055

1056
1057

1058
1059
1060
1061
1062
1063
1064
1065
1066

1067
1068
1069
1070
1071

1072
1073
1074
1075
1076
1077
1078

1079
1080
1081
1082

1083
1084
1085

1086
1087
1088
1089
1090
1091
1092

1093
1094
1095
1096
1097
1098
1099
1100
1101
1102

1103
1104
1105
1106
1107
1108
1109

1110
1111
1112

1113
1114
1115
1116
1117
1118
1119

1120
1121
1122
1123
1124
1125
1126
1127
1128
1129

1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143

1144
1145
1146
1147
1148
1149
1150
1151
1152

1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267

1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279

1280
1281
1282
1283
1284







-
+

-
+
-
-
+
-
-
-
-


-
-
+
+

-
-
+
-
-
-
-


+
+
-
+

-
-
-
-
-
-
-
-
-












-


-
-
+
+



-
+





-
+

+
+
-
+
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+


+
+
+
+
+
+
+
+
+


-
+





-
+

+
+
+
+
+
+


-
+

+
+
+


-
+

-
+

+
+
+




-
+




-
+






-
+



-
+


-
+

+
+
+


-
+

+
+
+
+
+
+


-
+

+
+
+


-
+


-
+

+
+
+


-
+

+
+
+





-
+













-
+

+
+
+
+



-
+

+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+








+
+
+
+
+

+
+










+
+
+
+
+
+
















+
+
+
+
+




















+
+
+
+
+

-


+
+
+







-
+




<h2 id="_tricks">Tricks</h2>
<div class="sectionbody">
<div class="paragraph"><p>This section is a compendium of a various useful tricks for debugging,
configuring and generally getting the most out of Megatest.</p></div>
</div>
</div>
<div class="sect1">
<h2 id="_debugging_tricks">Debugging Tricks</h2>
<h2 id="_limiting_your_running_jobs">Limiting your running jobs</h2>
<div class="sectionbody">
<div class="sect2">
<div class="paragraph"><p>The following example will limit a test in the jobgroup "group1" to no more than 10 tests simultaneously.</p></div>
<h3 id="_examining_the_environment">Examining The Environment</h3>
<div class="sect3">
<div class="paragraph"><p>In your testconfig:</p></div>
<h4 id="_during_config_file_processing">During Config File Processing</h4>
</div>
<div class="sect3">
<h4 id="_organising_your_tests_and_tasks">Organising Your Tests and Tasks</h4>
<div class="listingblock">
<div class="content">
<pre><tt>[tests-paths]
1 #{get misc parent}/simplerun/tests</tt></pre>
<pre><code>[test_meta]
jobgroup group1</code></pre>
</div></div>
<div class="listingblock">
<div class="content">
<div class="paragraph"><p>In your megatest.config:</p></div>
<pre><tt>[setup]</tt></pre>
</div></div>
<div class="paragraph"><p>The runscript method is a brute force way to run scripts where the
user is responsible for setting STATE and STATUS</p></div>
<div class="listingblock">
<div class="content">
<pre><code>[jobgroups]
group1 10
<pre><tt>runscript main.csh</tt></pre>
custdes 4</code></pre>
</div></div>
</div>
</div>
</div>
</div>
<div class="sect1">
<h2 id="_tricks">Tricks</h2>
<div class="sectionbody">
<div class="paragraph"><p>This section is a compendium of a various useful tricks for debugging,
configuring and generally getting the most out of Megatest.</p></div>
</div>
</div>
<div class="sect1">
<h2 id="_debugging_tricks">Debugging Tricks</h2>
<div class="sectionbody">
<div class="sect2">
<h3 id="_examining_the_environment">Examining The Environment</h3>
<div class="sect3">
<h4 id="_during_config_file_processing">During Config File Processing</h4>
</div>
<div class="sect3">
<h4 id="_organising_your_tests_and_tasks">Organising Your Tests and Tasks</h4>
<div class="paragraph"><p>/nfs/ch/disks/ch_unienv_disk005/qa_mrwellan/interim/src/megatest/tests/fdktestqa/testqa</p></div>
<div class="listingblock">
<div class="content">
<pre><tt>[tests-paths]
1 #{get misc parent}/simplerun/tests</tt></pre>
<pre><code>[tests-paths]
1 #{get misc parent}/simplerun/tests</code></pre>
</div></div>
<div class="listingblock">
<div class="content">
<pre><tt>[setup]</tt></pre>
<pre><code>[setup]</code></pre>
</div></div>
<div class="paragraph"><p>The runscript method is a brute force way to run scripts where the
user is responsible for setting STATE and STATUS</p></div>
<div class="listingblock">
<div class="content">
<pre><tt>runscript main.csh</tt></pre>
<pre><code>runscript main.csh</code></pre>
</div></div>
</div>
</div>
<div class="paragraph"><p>ww30.2
<div class="sect2">
cellname/LVS/cellname.LAYOUT_ERRORS</p></div>
<div class="paragraph"><p>Error: text open</p></div>
<div class="paragraph"><p>ww31.3
cellname/LVS/cellname.LAYOUT_ERRORS</p></div>
<div class="paragraph"><p>Error: text open
Reference</p></div>
<div class="exampleblock">
<div class="content">
<h3 id="_debugging_server_problems">Debugging Server Problems</h3>
<div class="listingblock">
<div class="content">
<pre><code>sudo lsof -i
sudo netstat -lptu
sudo netstat -tulpn</code></pre>
</div></div>
</div>
</div>
</div>
<h1 id="_reference">Reference</h1>
<div class="sect1">
<h2 id="_the_first_chapter_of_the_second_part_2">The First Chapter of the Second Part</h2>
<div class="sectionbody">
<div class="paragraph"><p>Chapters grouped into book parts are at level 1 and can contain
sub-sections.</p></div>
</div>
</div>
<div class="sect1">
<h2 id="_the_testconfig_file">The testconfig File</h2>
<div class="sectionbody">
<div class="sect2">
<h3 id="_setup_section">Setup section</h3>
<div class="sect3">
<h4 id="_header">Header</h4>
<div class="listingblock">
<div class="content">
<pre><tt>[setup]</tt></pre>
<pre><code>[setup]</code></pre>
</div></div>
<div class="paragraph"><p>The runscript method is a brute force way to run scripts where the
user is responsible for setting STATE and STATUS</p></div>
<div class="listingblock">
<div class="content">
<pre><tt>runscript main.csh</tt></pre>
<pre><code>runscript main.csh</code></pre>
</div></div>
</div>
</div>
<div class="sect2">
<h3 id="_requirements_section">Requirements section</h3>
<div class="sect3">
<h4 id="_header_2">Header</h4>
<div class="listingblock">
<div class="content">
<pre><tt>[requirements]</tt></pre>
<pre><code>[requirements]</code></pre>
</div></div>
</div>
<div class="sect3">
<h4 id="_wait_on_other_tests">Wait on Other Tests</h4>
<div class="listingblock">
<div class="content">
<pre><tt># A normal waiton waits for the prior tests to be COMPLETED
<pre><code># A normal waiton waits for the prior tests to be COMPLETED
# and PASS, CHECK or WAIVED
waiton test1 test2</tt></pre>
waiton test1 test2</code></pre>
</div></div>
</div>
<div class="sect3">
<h4 id="_mode">Mode</h4>
<div class="paragraph"><p>The default (i.e. if mode is not specified) is normal. All pre-dependent tests
must be COMPLETED and PASS, CHECK or WAIVED before the test will start</p></div>
<div class="listingblock">
<div class="content">
<pre><tt>mode   normal</tt></pre>
<pre><code>mode   normal</code></pre>
</div></div>
<div class="paragraph"><p>The toplevel mode requires only that the prior tests are COMPLETED.</p></div>
<div class="listingblock">
<div class="content">
<pre><tt>mode toplevel</tt></pre>
<pre><code>mode toplevel</code></pre>
</div></div>
<div class="paragraph"><p>A item based waiton will start items in a test when the
same-named item is COMPLETED and PASS, CHECK or WAIVED
in the prior test</p></div>
<div class="listingblock">
<div class="content">
<pre><tt>mode itemmatch</tt></pre>
<pre><code>mode itemmatch</code></pre>
</div></div>
<div class="listingblock">
<div class="content">
<pre><tt># With a toplevel test you may wish to generate your list
<pre><code># With a toplevel test you may wish to generate your list
# of tests to run dynamically
#
# waiton #{shell get-valid-tests-to-run.sh}</tt></pre>
# waiton #{shell get-valid-tests-to-run.sh}</code></pre>
</div></div>
</div>
<div class="sect3">
<h4 id="_run_time_limit">Run time limit</h4>
<div class="listingblock">
<div class="content">
<pre><tt>runtimelim 1h 2m 3s  # this will automatically kill the test if it runs for more than 1h 2m and 3s</tt></pre>
<pre><code>runtimelim 1h 2m 3s  # this will automatically kill the test if it runs for more than 1h 2m and 3s</code></pre>
</div></div>
</div>
<div class="sect3">
<h4 id="_skip">Skip</h4>
</div>
<div class="sect3">
<h4 id="_header_3">Header</h4>
<div class="listingblock">
<div class="content">
<pre><tt>[skip]</tt></pre>
<pre><code>[skip]</code></pre>
</div></div>
</div>
<div class="sect3">
<h4 id="_skip_on_still_running_tests">Skip on Still-running Tests</h4>
<div class="listingblock">
<div class="content">
<pre><tt># NB// If the prevrunning line exists with *any* value the test will
<pre><code># NB// If the prevrunning line exists with *any* value the test will
# automatically SKIP if the same-named test is currently RUNNING

prevrunning x</tt></pre>
prevrunning x</code></pre>
</div></div>
</div>
<div class="sect3">
<h4 id="_skip_if_a_file_exists">Skip if a File Exists</h4>
<div class="listingblock">
<div class="content">
<pre><tt>fileexists /path/to/a/file # skip if /path/to/a/file exists</tt></pre>
<pre><code>fileexists /path/to/a/file # skip if /path/to/a/file exists</code></pre>
</div></div>
</div>
<div class="sect3">
<h4 id="_controlled_waiver_propagation">Controlled waiver propagation</h4>
<div class="paragraph"><p>If test is FAIL and previous test in run with same MT_TARGET is WAIVED then apply the following rules from the testconfig:
If a waiver check is specified in the testconfig apply the check and if it passes then set this FAIL to WAIVED</p></div>
<div class="paragraph"><p>Waiver check has two parts, 1) a list of waiver, rulename, filepatterns and 2) the rulename script spec (note that "diff" and "logpro" are predefined)</p></div>
<div class="listingblock">
<div class="content">
<pre><tt>###### EXAMPLE FROM testconfig #########
<pre><code>###### EXAMPLE FROM testconfig #########
# matching file(s) will be diff'd with previous run and logpro applied
# if PASS or WARN result from logpro then WAIVER state is set
#
[waivers]
# logpro_file    rulename      input_glob
waiver_1         logpro        lookittmp.log

[waiver_rules]

# This builtin rule is the default if there is no &lt;waivername&gt;.logpro file
# diff   diff %file1% %file2%

# This builtin rule is applied if a &lt;waivername&gt;.logpro file exists
# logpro diff %file1% %file2% | logpro %waivername%.logpro %waivername%.html</tt></pre>
# logpro diff %file1% %file2% | logpro %waivername%.logpro %waivername%.html</code></pre>
</div></div>
</div>
</div>
<div class="sect2">
<h3 id="_ezsteps">Ezsteps</h3>
<div class="paragraph"><p>To transfer the environment to the next step you can do the following:</p></div>
<div class="listingblock">
<div class="content">
<pre><tt>$MT_MEGATEST -env2file .ezsteps/${stepname}</tt></pre>
<pre><code>$MT_MEGATEST -env2file .ezsteps/${stepname}</code></pre>
</div></div>
</div>
<div class="sect2">
<h3 id="_triggers">Triggers</h3>
<div class="paragraph"><p>In your testconfig triggers can be specified</p></div>
<div class="listingblock">
<div class="content">
<pre><code>[triggers]

# Call script running.sh when test goes to state=RUNNING, status=PASS
RUNNING/PASS running.sh

# Call script running.sh any time state goes to RUNNING
RUNNING/ running.sh

# Call script onpass.sh any time status goes to PASS
PASS/ onpass.sh</code></pre>
</div></div>
<div class="paragraph"><p>Scripts called will have; test-id test-rundir trigger, added to the commandline.</p></div>
<div class="paragraph"><p>HINT</p></div>
<div class="paragraph"><p>To start an xterm (useful for debugging), use a command line like the following:</p></div>
<div class="listingblock">
<div class="content">
<pre><code>[triggers]
COMPLETED/ xterm -e bash -s --</code></pre>
</div></div>
<div class="admonitionblock">
<table><tr>
<td class="icon">
<div class="title">Note</div>
</td>
<td class="content">There is a trailing space after the --</td>
</tr></table>
</div>
</div>
<div class="sect2">
<h3 id="_megatest_internals">Megatest Internals</h3>
<div class="imageblock graphviz">
<div class="content">
<img src="server.png" alt="server.png" />
</div>
</div>
</div>
</div>
</div>
<div class="sect1">
<h2 id="_example_appendix">Appendix A: Example Appendix</h2>
<div class="sectionbody">
<div class="paragraph"><p>One or more optional appendixes go here at section level zero.</p></div>
<div class="sect2">
<h3 id="_appendix_sub_section">Appendix Sub-section</h3>
<div class="admonitionblock">
<table><tr>
<td class="icon">
<div class="title">Note</div>
</td>
<td class="content">Preface and appendix subsections start out of sequence at level
2 (level 1 is skipped).  This only applies to multi-part book
documents.</td>
</tr></table>
</div>
</div>
</div>
</div>
<div class="sect1">
<h2 id="_example_bibliography">Example Bibliography</h2>
<div class="sectionbody">
<div class="paragraph"><p>The bibliography list is a style of AsciiDoc bulleted list.</p></div>
<div class="ulist bibliography"><ul>
<li>
<p>
<a id="taoup"></a>[taoup] Eric Steven Raymond. <em>The Art of Unix
  Programming</em>. Addison-Wesley. ISBN 0-13-142901-9.
</p>
</li>
<li>
<p>
<a id="walsh-muellner"></a>[walsh-muellner] Norman Walsh &amp; Leonard Muellner.
  <em>DocBook - The Definitive Guide</em>. O&#8217;Reilly &amp; Associates. 1999.
  ISBN 1-56592-580-7.
</p>
</li>
</ul></div>
</div>
</div>
<div class="sect1">
<h2 id="_example_glossary">Example Glossary</h2>
<div class="sectionbody">
<div class="paragraph"><p>Glossaries are optional. Glossaries entries are an example of a style
of AsciiDoc labeled lists.</p></div>
<div class="dlist glossary"><dl>
<dt>
A glossary term
</dt>
<dd>
<p>
  The corresponding (indented) definition.
</p>
</dd>
<dt>
A second glossary term
</dt>
<dd>
<p>
  The corresponding (indented) definition.
</p>
</dd>
</dl></div>
</div>
</div>
<div class="sect1">
<h2 id="_example_colophon">Example Colophon</h2>
<div class="sectionbody">
<div class="paragraph"><p>Text at the end of a book describing facts about its production.</p></div>
</div></div>
</div>
</div>
<div class="sect1">
<h2 id="_example_index">Example Index</h2>
<div class="sectionbody">
</div>
</div>
</div>
<div id="footnotes"><hr /></div>
<div id="footer">
<div id="footer-text">
Version 1.0<br />
Last updated 2014-02-18 07:24:48 MST
Last updated 2014-10-08 23:02:21 MST
</div>
</div>
</body>
</html>

Modified docs/manual/reference.txt from [b45aa0231c] to [eff8aa5426].

146
147
148
149
150
151
152































153
154
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185







+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+



To transfer the environment to the next step you can do the following:

----------------------------
$MT_MEGATEST -env2file .ezsteps/${stepname}	  
----------------------------

Triggers
~~~~~~~~

In your testconfig triggers can be specified 

-----------------
[triggers]

# Call script running.sh when test goes to state=RUNNING, status=PASS
RUNNING/PASS running.sh

# Call script running.sh any time state goes to RUNNING
RUNNING/ running.sh

# Call script onpass.sh any time status goes to PASS
PASS/ onpass.sh
-----------------

Scripts called will have; test-id test-rundir trigger, added to the commandline.

HINT

To start an xterm (useful for debugging), use a command line like the following:

-----------------
[triggers]
COMPLETED/ xterm -e bash -s -- 
-----------------

NOTE: There is a trailing space after the --

:numbered!:

Modified docs/manual/server.png from [ae7d7ee58e] to [a508d3edd1].

cannot compute difference between binary files

Modified http-transport.scm from [9b05b6d402] to [f3dd18aa3b].

246
247
248
249
250
251
252



253

254
255
256
257

258
259
260
261
262
263
264
265
266
267
268
269
270
271
272






273
274
275
276
277
278






279
280
281
282
283
284
285
246
247
248
249
250
251
252
253
254
255

256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282






283
284
285
286
287
288
289
290
291
292
293
294
295







+
+
+
-
+




+















+
+
+
+
+
+
-
-
-
-
-
-
+
+
+
+
+
+







	 (res        #f))
    (handle-exceptions
     exn
     (if (> numretries 0)
	 (begin
	   (mutex-unlock! *http-mutex*)
	   (thread-sleep! 1)
	   (handle-exceptions
	    exn
	    (debug:print 0 "WARNING: closing connections failed. Server at " fullurl " almost certainly dead")
	   (close-all-connections!)
	    (close-all-connections!))
	   (debug:print 0 "WARNING: Failed to communicate with server, trying again, numretries left: " numretries)
	   (http-transport:client-api-send-receive run-id serverdat cmd params numretries: (- numretries 1)))
	 (begin
	   (mutex-unlock! *http-mutex*)
	   (tasks:kill-server-run-id run-id)
	   #f))
     (begin
       (debug:print-info 11 "fullurl=" fullurl ", cmd=" cmd ", params=" params ", run-id=" run-id "\n")
       ;; set up the http-client here
       (max-retry-attempts 1)
       ;; consider all requests indempotent
       (retry-request? (lambda (request)
			 #f))
       ;; send the data and get the response
       ;; extract the needed info from the http data and 
       ;; process and return it.
       (let* ((send-recieve (lambda ()
			      (mutex-lock! *http-mutex*)
			      ;; (condition-case (with-input-from-request "http://localhost"; #f read-lines)
			      ;;					       ((exn http client-error) e (print e)))
			      (set! res (handle-exceptions
					 exn
					 (begin
					   (debug:print 0 "ERROR: failure in with-input-from-request. Giving up.")
					   (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn))
					   #f)
			      (set! res (with-input-from-request ;; was dat
					 fullurl 
					 (list (cons 'key "thekey")
					       (cons 'cmd cmd)
					       (cons 'params params))
					 read-string))
					 (with-input-from-request ;; was dat
					  fullurl 
					  (list (cons 'key "thekey")
						(cons 'cmd cmd)
						(cons 'params params))
					  read-string)))
			      ;; Shouldn't this be a call to the managed call-all-connections stuff above?
			      (close-all-connections!)
			      (mutex-unlock! *http-mutex*)
			      ))
	      (time-out     (lambda ()
			      (thread-sleep! 45)
			      #f))
305
306
307
308
309
310
311

312
313
314
315
316
317
318
319
320
321



322
323
324
325
326
327
328
329

330
331
332
333
334
335
336
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342

343
344
345
346
347
348
349
350







+










+
+
+







-
+








(define (make-http-transport:server-dat)(make-vector 5))
(define (http-transport:server-dat-get-iface         vec)    (vector-ref  vec 0))
(define (http-transport:server-dat-get-port          vec)    (vector-ref  vec 1))
(define (http-transport:server-dat-get-api-uri       vec)    (vector-ref  vec 2))
(define (http-transport:server-dat-get-api-url       vec)    (vector-ref  vec 3))
(define (http-transport:server-dat-get-api-req       vec)    (vector-ref  vec 4))
(define (http-transport:server-dat-get-last-access   vec)    (vector-ref  vec 5))

(define (http-transport:server-dat-make-url vec)
  (if (and (http-transport:server-dat-get-iface vec)
	   (http-transport:server-dat-get-port  vec))
      (conc "http://" 
	    (http-transport:server-dat-get-iface vec)
	    ":"
	    (http-transport:server-dat-get-port  vec))
      #f))

(define (http-transport:server-dat-update-last-access vec)
  (vector-set! vec 5 (current-seconds)))

;;
;; connect
;;
(define (http-transport:client-connect iface port)
  (let* ((api-url      (conc "http://" iface ":" port "/api"))
	 (api-uri      (uri-reference (conc "http://" iface ":" port "/api")))
	 (api-req      (make-request method: 'POST uri: api-uri))
	 (server-dat   (vector iface port api-uri api-url api-req)))
	 (server-dat   (vector iface port api-uri api-url api-req (current-seconds))))
    server-dat))

;; run http-transport:keep-running in a parallel thread to monitor that the db is being 
;; used and to shutdown after sometime if it is not.
;;
(define (http-transport:keep-running server-id run-id)
  ;; if none running or if > 20 seconds since 
522
523
524
525
526
527
528

529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550







+







	  (thread-start! th2)
	  (thread-start! th3)
	  (set! *didsomething* #t)
	  (thread-join! th2)
	  (exit)))))

(define (http-transport:server-signal-handler signum)
  (signal-mask! signum)
  (handle-exceptions
   exn
   (debug:print " ... exiting ...")
   (let ((th1 (make-thread (lambda ()
			     (thread-sleep! 1))
			   "eat response"))
	 (th2 (make-thread (lambda ()

Modified launch.scm from [acb41eb596] to [1d44ba3939].

89
90
91
92
93
94
95


96
97
98
99
100
101
102
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104







+
+







	                                  (if (and (file-exists? fulln)
                                                   (file-execute-access? fulln))
                                              fulln
                                              runscript))))) ;; assume it is on the path
	       (rollup-status 0))
	  (change-directory top-path)

	  ;; (set-signal-handler! signal/int (lambda ()
					    
	  ;; Do not run the test if it is REMOVING, RUNNING, KILLREQ or REMOTEHOSTSTART,
	  ;; Mark the test as REMOTEHOSTSTART *IMMEDIATELY*
	  ;;
	  (let ((test-info (rmt:get-testinfo-state-status run-id test-id)))
	    (if (not (member (db:test-get-state test-info) '("REMOVING" "REMOTEHOSTSTART" "RUNNING" "KILLREQ")))
		(tests:test-force-state-status! run-id test-id "REMOTEHOSTSTART" "n/a")
		(debug:print 0 "ERROR: test state is " (db:test-get-state test-info) ", cannot proceed")))
205
206
207
208
209
210
211

212
213
214
215
216
217
218
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221







+







				 (tests:test-force-state-status! run-id test-id "RUNNING" "n/a")
				 (rmt:roll-up-pass-fail-counts run-id test-name item-path "RUNNING")
				 (thread-sleep! 0.3) ;; NFS slowness has caused grief here

				 ;; if there is a runscript do it first
				 (if fullrunscript
				     (let ((pid (process-run fullrunscript)))
				       (rmt:test-set-top-process-pid run-id test-id pid)
				       (let loop ((i 0))
					 (let-values
					  (((pid-val exit-status exit-code) (process-wait pid #t)))
					  (mutex-lock! m)
					  (vector-set! exit-info 0 pid)
					  (vector-set! exit-info 1 exit-status)
					  (vector-set! exit-info 2 exit-code)
260
261
262
263
264
265
266

267
268
269
270
271
272
273
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277







+







						   ;; call the command using mt_ezstep
						   (set! script (conc "mt_ezstep " stepname " " (if prevstep prevstep "-") " " stepcmd))

						   (debug:print 4 "script: " script)
						   (rmt:teststep-set-status! run-id test-id stepname "start" "-" #f #f)
						   ;; now launch
						   (let ((pid (process-run script)))
						     (rmt:test-set-top-process-pid run-id test-id pid)
						     (let processloop ((i 0))
						       (let-values (((pid-val exit-status exit-code)(process-wait pid #t)))
								   (mutex-lock! m)
								   (vector-set! exit-info 0 pid)
								   (vector-set! exit-info 1 exit-status)
								   (vector-set! exit-info 2 exit-code)
								   (mutex-unlock! m)
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360









361
362
363
364




365
366
367
368
369
370
371
372
373


374
375

376
377
378
379





380
381

382
383

384
385
386
387



388
389
390
391
392
393
394
395
396
397
398
399

400
401
402
403
404
405
406
343
344
345
346
347
348
349


350


351
352
353
354
355
356




357
358
359
360
361
362
363
364
365




366
367
368
369









370
371


372




373
374
375
376
377
378

379


380

381


382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404







-
-

-
-






-
-
-
-
+
+
+
+
+
+
+
+
+
-
-
-
-
+
+
+
+
-
-
-
-
-
-
-
-
-
+
+
-
-
+
-
-
-
-
+
+
+
+
+

-
+
-
-
+
-

-
-
+
+
+












+







							   (and runtlim (let* ((run-seconds   (- (current-seconds) start-seconds))
									       (time-exceeded (> run-seconds runtlim)))
									  (if time-exceeded
									      (begin
										(debug:print-info 0 "KILLING TEST DUE TO TIME LIMIT EXCEEDED! Runtime=" run-seconds " seconds, limit=" runtlim)
										#t)
									      #f)))))
				       ;; open-run-close not needed for test-set-meta-info
				       ;; (tests:set-partial-meta-info #f test-id run-id minutes work-area)
				       (tests:update-central-meta-info run-id test-id (get-cpu-load) (get-df (current-directory))(calc-minutes) #f #f)
				       ;; (tests:set-partial-meta-info #f test-id run-id minutes work-area 10)
				       ;; (tests:set-partial-meta-info test-id run-id minutes work-area)
				       (if kill-job? 
					   (begin
					     (mutex-lock! m)
					     ;; NOTE: The pid can change as different steps are run. Do we need handshaking between this
					     ;;       section and the runit section? Or add a loop that tries three times with a 1/4 second
					     ;;       between tries?
					     (let* ((pid (vector-ref exit-info 0)))
					       (if (number? pid)
						   (handle-exceptions
						    exn
					     (let* ((pid1 (vector-ref exit-info 0))
						    (pid2 (rmt:test-get-top-process-pid run-id test-id))
						    (pids (delete-duplicates (filter number? (list pid1 pid2)))))
					       (if (not (null? pids))
						   (begin
						     (for-each
						      (lambda (pid)
							(handle-exceptions
							 exn
						    (debug:print-info 0 "Unable to kill process with pid " pid ", possibly already killed.")
						    ;;(process-signal pid signal/kill))
						    (begin
						      (debug:print 0 "WARNING: Request received to kill job (attempt # " kill-tries ")")
							 (begin
							   (debug:print-info 0 "Unable to kill process with pid " pid ", possibly already killed.")
							   (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn)))
							 (debug:print 0 "WARNING: Request received to kill job " pid) ;;  " (attempt # " kill-tries ")")
						      (let ((processes (cmd-run->list (conc "pgrep -l -P " pid))))
							(for-each 
							 (lambda (p)
							   (let* ((parts  (string-split p))
								  (p-id   (if (> (length parts) 0)
									      (string->number (car parts))
									      #f)))
							     (if p-id
								 (begin
							 (if (process:alive? pid)
							     (begin
								   (debug:print 0 "Killing " (cadr parts) "; kill -9  " p-id)
								   ;; (process-signal pid signal/kill))))) ;; 
							       (process-signal pid signal/int)
								   (system (conc "kill -9 " p-id))))))
							 (car processes)))
						      (system (conc "kill -9 -" pid))
						      (tests:test-set-status! test-id "KILLED"  "KILLED" (args:get-arg "-m") #f)))
							       (thread-sleep! 5)
							       (if (process:process-alive? pid)
								   (process-signal pid signal/kill))))))
						      pids)
						     (tests:test-set-status! run-id test-id "KILLED"  "KILLED" (args:get-arg "-m") #f))
						   (begin
						     (debug:print 0 "WARNING: Request received to kill job but problem with process, attempting to kill manager process")
						     (debug:print 0 "ERROR: Nothing to kill, pid1=" pid1 ", pid2=" pid2)
						     (tests:test-set-status! test-id "KILLED"  "KILLED" (args:get-arg "-m") #f)
						     (tests:test-set-status! run-id test-id "KILLED"  "FAIL" (args:get-arg "-m") #f)
						     (tests:test-set-status! run-id test-id "KILLED"  "FAILED TO KILL" (args:get-arg "-m") #f)
						     (exit 1) ;; IS THIS NECESSARY OR WISE???
						     )))
					     (set! kill-tries (+ 1 kill-tries))
					     (mutex-unlock! m)))
					     (mutex-unlock! m)
					     ;; no point in sticking around. Exit now.
					     (exit)))
				       (if keep-going
					   (begin
					     (thread-sleep! 3) ;; (+ 3 (random 6))) ;; add some jitter to the call home time to spread out the db accesses
					     (if keep-going
						 (loop (calc-minutes)))))))
				   (tests:update-central-meta-info run-id test-id (get-cpu-load) (get-df (current-directory))(calc-minutes) #f #f)))) ;; NOTE: Checking twice for keep-going is intentional
		 (th1          (make-thread monitorjob "monitor job"))
		 (th2          (make-thread runit "run job")))
	    (set! job-thread th2)
	    (thread-start! th1)
	    (thread-start! th2)
	    (thread-join! th2)
	    (debug:print-info 0 "Megatest exectute of test " test-name ", item path " item-path " complete. Notifying the db ...")
	    (set! keep-going #f)
	    (thread-join! th1)
	    (thread-sleep! 1)       ;; give thread th1 a chance to be done TODO: Verify this is needed. At 0.1 I was getting fail to stop, increased to total of 1.1 sec.
	    (mutex-lock! m)
	    (let* ((item-path (item-list->path itemdat))
		   ;; only state and status needed - use lazy routine
		   (testinfo  (rmt:get-testinfo-state-status run-id test-id)))
457
458
459
460
461
462
463

464
465
466
467
468
469
470
471
472

473


474
475
476
477
478
479
480
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472

473
474
475
476
477
478
479
480
481







+









+
-
+
+







	  (if linktree
	      (if (not (file-exists? linktree))
		  (begin
		    (handle-exceptions
		     exn
		     (begin
		       (debug:print 0 "ERROR: Something went wrong when trying to create linktree dir at " linktree)
		       (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn))
		       (exit 1))
		     (create-directory linktree #t))))
	      (begin
		(debug:print 0 "ERROR: linktree not defined in [setup] section of megatest.config")
		(exit 1)))
	  (if linktree
	      (let ((dbdir (conc linktree "/.db")))
		(handle-exceptions
		 exn
		 (begin
		 (debug:print 0 "ERROR: failed to create the " dbdir " area for your database files")
		   (debug:print 0 "ERROR: failed to create the " dbdir " area for your database files")
		   (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn)))
		 (if (not (directory-exists? dbdir))(create-directory dbdir)))
		(setenv "MT_LINKTREE" linktree))
	      (begin
		(debug:print 0 "ERROR: linktree is required in your megatest.config [setup] section")
		(exit 1)))
	  (if (and *toppath*
		   (directory-exists? *toppath*))

Modified lock-queue.scm from [0c7d16446b] to [31ed29958c].

34
35
36
37
38
39
40



41
42
43
44
45
46
47
48
49
50
51
52
53
54
55















56
57
58
59
60
61
62
63
64

65
66
67
68
69
70
71
72
73
74
75
76
77
78
79

80
81
82
83
84
85
86
34
35
36
37
38
39
40
41
42
43















44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91







+
+
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+









+















+







	  (handle-exceptions
	   exn
	   (begin
	     (thread-sleep! 10)
	     (if (> count 0)
		 (lock-queue:open-db fname count: (- count 1))
		 db))
	   (sqlite3:with-transaction
	    db
	    (lambda ()
	   (sqlite3:execute 
	    db
	    "CREATE TABLE IF NOT EXISTS queue (
  	      id         INTEGER PRIMARY KEY,
              test_id    INTEGER,
              start_time INTEGER,
              state      TEXT,
              CONSTRAINT queue_constraint UNIQUE (test_id));")
	   (sqlite3:execute
	    db
	    "CREATE TABLE IF NOT EXISTS runlocks (
              id         INTEGER PRIMARY KEY,
              test_id    INTEGER,
              run_lock   TEXT,
              CONSTRAINT runlock_constraint UNIQUE (run_lock));"))))
	      (sqlite3:execute 
	       db
	       "CREATE TABLE IF NOT EXISTS queue (
     	         id         INTEGER PRIMARY KEY,
                 test_id    INTEGER,
                 start_time INTEGER,
                 state      TEXT,
                 CONSTRAINT queue_constraint UNIQUE (test_id));")
	      (sqlite3:execute
	       db
	       "CREATE TABLE IF NOT EXISTS runlocks (
                 id         INTEGER PRIMARY KEY,
                 test_id    INTEGER,
                 run_lock   TEXT,
                 CONSTRAINT runlock_constraint UNIQUE (run_lock));"))))))
    (sqlite3:set-busy-handler! db handler)
    db))

(define (lock-queue:set-state db test-id newstate #!key (remtries 10))
  (handle-exceptions
   exn
   (if (> remtries 0)
       (begin
	 (debug:print 0 "WARNING: exception on lock-queue:set-state. Trying again in 30 seconds.")
	 (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn))
	 (thread-sleep! 30)
	 (lock-queue:set-state db test-id newstate remtries: (- remtries 1)))
       (begin
	 (debug:print 0 "ERROR:  Failed to set lock state for test with id " test-id ", error: " ((condition-property-accessor 'exn 'message) exn) ", giving up.")
	 #f))
   (sqlite3:execute db "UPDATE queue SET state=? WHERE test_id=?;"
		    newstate
		    test-id)))

(define (lock-queue:any-younger? db mystart test-id #!key (remtries 10))
  (handle-exceptions
   exn
   (if (> remtries 0)
       (begin
	 (debug:print 0 "WARNING: exception on lock-queue:any-younger. Trying again in 30 seconds.")
	 (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn))
	 (thread-sleep! 30)
	 (lock-queue:any-younger? db mystart test-id remtries: (- remtries 1)))
       (begin
	 (debug:print 0 "ERROR:  Failed to find younger locks for test with id " test-id ", error: " ((condition-property-accessor 'exn 'message) exn) ", giving up.")
	 #f))
   (let ((res #f))
     (sqlite3:for-each-row
96
97
98
99
100
101
102


103
104
105
106
107
108
109
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116







+
+







  (let ((res       #f)
	(lckqry    (sqlite3:prepare db "SELECT test_id,run_lock FROM runlocks WHERE run_lock='locked';"))
	(mklckqry  (sqlite3:prepare db "INSERT INTO runlocks (test_id,run_lock) VALUES (?,'locked');")))
    (let ((result 
	   (handle-exceptions
	    exn
	    (begin
	      (debug:print 0 "WARNING: failed to get queue lock. Will try again in a few seconds")
	      (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn))
	      (thread-sleep! 10)
	      (if (> count 0)
		  (lock-queue:get-lock db test-id count: (- count 1)))
	      #f)
	    (sqlite3:with-transaction
	     db
	     (lambda ()
123
124
125
126
127
128
129


130
131
132
133
134
135
136
137
138
139
140


141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157


158
159
160
161
162
163
164
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177







+
+











+
+

















+
+







      result)))

(define (lock-queue:release-lock fname test-id #!key (count 10))
  (let ((db (lock-queue:open-db fname)))
    (handle-exceptions
     exn
     (begin
       (debug:print 0 "WARNING: Failed to release queue lock. Will try again in few seconds")
       (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn))
       (thread-sleep! 10)
       (if (> count 0)
	   (lock-queue:release-lock fname test-id count: (- count 1))
	   #f))
     (sqlite3:execute db "DELETE FROM runlocks WHERE test_id=?;" test-id)
     (sqlite3:finalize! db))))

(define (lock-queue:steal-lock db test-id #!key (count 10))
  (handle-exceptions
   exn
   (begin
     (debug:print 0 "WARNING: Failed to steal queue lock. Will try again in few seconds")
     (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn))
     (thread-sleep! 10)
     (if (> count 0)
	 (lock-queue:steal-lock db test-id count: (- count 1))
	 #f))
   (sqlite3:execute db "DELETE FROM runlocks WHERE run_lock='locked';"))
  (lock-queue:get-lock db test-it))

;; returns #f if ok to skip the task
;; returns #t if ok to proceed with task
;; otherwise waits
;;
(define (lock-queue:wait-turn fname test-id #!key (count 10))
  (let ((db      (lock-queue:open-db fname))
	(mystart (current-seconds)))
    (handle-exceptions
     exn
     (begin
       (debug:print 0 "WARNING: Failed to find out if it is ok to skip the wait queue. Will try again in few seconds")
       (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn))
       (thread-sleep! 10)
       (if (> count 0)
	   (lock-queue:wait-turn fname test-id count: (- count 1))
	   #f))
     (sqlite3:execute
      db
      "INSERT OR REPLACE INTO queue (test_id,start_time,state) VALUES (?,?,'waiting');"

Modified megatest-version.scm from [b5b5f125e2] to [99deda71f1].

1
2
3
4
5
6

7
1
2
3
4
5

6
7





-
+

;; Always use two or four digit decimal
;; 1.01, 1.02...1.10,1.11,1,1101 ... 1.99,2.00..

(declare (unit megatest-version))

(define megatest-version 1.6003)
(define megatest-version 1.6005)

Modified megatest.scm from [edf456cc83] to [93df8109f6].

1
2
3
4
5
6
7
8
9
10
11
12
13



14
15
16
17
18
19
20
1
2
3
4
5
6
7
8
9
10
11
12

13
14
15
16
17
18
19
20
21
22












-
+
+
+







;; Copyright 2006-2012, Matthew Welland.
;; 
;;  This program is made available under the GNU GPL version 2.0 or
;;  greater. See the accompanying file COPYING for details.
;; 
;;  This program is distributed WITHOUT ANY WARRANTY; without even the
;;  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
;;  PURPOSE.

;; (include "common.scm")
;; (include "megatest-version.scm")

(use sqlite3 srfi-1 posix regex regex-case srfi-69 base64 format readline apropos json http-client directory-utils z3) ;; (srfi 18) extras)
(use sqlite3 srfi-1 posix regex regex-case srfi-69 base64 format readline apropos json 
     http-client directory-utils z3 srfi-18) ;;  extras)

(import (prefix sqlite3 sqlite3:))
(import (prefix base64 base64:))

;; (use zmq)

(declare (uses common))
(declare (uses megatest-version))
277
278
279
280
281
282
283
























































284
285
286
287
288
289
290
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348







+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+







			"-logging"
			"-v" ;; verbose 2, more than normal (normal is 1)
			"-q" ;; quiet 0, errors/warnings only
		       )
		 args:arg-hash
		 0))

;; The watchdog is to keep an eye on things like db sync etc.
;;
(define *watchdog*
  (make-thread 
   (lambda ()
     (let loop ()
       (thread-sleep! 5) ;; five second resolution is only a minor burden and should be tolerable 

       ;; sync for filesystem local db writes
       ;;
       (let ((start-time (current-seconds)))
	 (mutex-lock! *db-multi-sync-mutex*)
	 (for-each 
	  (lambda (run-id)
	    (let ((last-write (hash-table-ref/default *db-local-sync* run-id 0)))
	      (if ;; (and 
	       (> (- start-time last-write) 5) ;; every five seconds
	       ;;      (common:db-access-allowed?))
	       (begin
		 (db:multi-db-sync (list run-id) 'new2old)
		 (if (common:low-noise-print 30 "sync new to old")
		     (debug:print-info 0 "Sync of newdb to olddb for run-id " run-id " completed in " (- (current-seconds) start-time) " seconds"))
		 (hash-table-delete! *db-local-sync* run-id)))))
	  (hash-table-keys *db-local-sync*))
	 (mutex-unlock! *db-multi-sync-mutex*))
       
       ;; keep going unless time to exit
       ;;
       (if (not *time-to-exit*)
	   (loop))))
   "Watchdog thread"))

(thread-start! *watchdog*)

(define (std-exit-procedure)
  (rmt:print-db-stats)
  (let ((run-ids (hash-table-keys *db-local-sync*)))
    (if (not (null? run-ids))
	(db:multi-db-sync run-ids 'new2old)))
  (if *dbstruct-db* (db:close-all *dbstruct-db*))
  (if *megatest-db* (begin
		      (sqlite3:interrupt! *megatest-db*)
		      (sqlite3:finalize! *megatest-db* #t)))
  (if *task-db*     (let ((db (vector-ref *task-db* 0)))
		      (sqlite3:interrupt! db)
		      (sqlite3:finalize! db #t))))

(define (std-signal-handler signum)
  (signal-mask! signum)
  (debug:print 0 "ERROR: Received signal " signum " exiting promptly")
  (std-exit-procedure)
  (exit))

(set-signal-handler! signal/int std-signal-handler)
(set-signal-handler! signal/term std-signal-handler)

(if (args:get-arg "-log")
    (let ((oup (open-output-file (args:get-arg "-log"))))
      (debug:print-info 0 "Sending log output to " (args:get-arg "-log"))
      (current-error-port oup)
      (current-output-port oup)))

(if (or (args:get-arg "-h")
345
346
347
348
349
350
351
352

353
354
355
356
357
358
359
360
361
362
363
364
365
366
403
404
405
406
407
408
409

410







411
412
413
414
415
416
417







-
+
-
-
-
-
-
-
-








(if (args:get-arg "-itempatt")
    (let ((newval (conc (args:get-arg "-testpatt") "/" (args:get-arg "-itempatt"))))
      (debug:print 0 "WARNING: -itempatt has been deprecated, please use -testpatt testpatt/itempatt method, new testpatt is "newval)
      (hash-table-set! args:arg-hash "-testpatt" newval)
      (hash-table-delete! args:arg-hash "-itempatt")))

(on-exit (lambda ()
;; (on-exit std-exit-procedure)
	   (rmt:print-db-stats)
	   (let ((run-ids (hash-table-keys *db-local-sync*)))
	     (if (not (null? run-ids))
		 (db:multi-db-sync run-ids 'new2old)))
	   (if *dbstruct-db* (db:close-all *dbstruct-db*))
	   (if *megatest-db* (sqlite3:finalize! *megatest-db*))
	   (if *task-db*     (sqlite3:finalize! (vector-ref *task-db* 0)))))

;;======================================================================
;; Misc general calls
;;======================================================================

(if (args:get-arg "-env2file")
    (begin
1202
1203
1204
1205
1206
1207
1208
1209










1210
1211
1212
1213
1214
1215
1216
1253
1254
1255
1256
1257
1258
1259

1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276







-
+
+
+
+
+
+
+
+
+
+







(if (args:get-arg "-cleanup-db")
    (begin
      (if (not (launch:setup-for-run))
	  (begin
	    (debug:print 0 "Failed to setup, exiting") 
	    (exit 1)))
      ;; keep this one local
      (open-run-close db:clean-up #f)
      ;; (open-run-close db:clean-up #f)
      (db:multi-db-sync 
       #f ;; do all run-ids
       'new2old
       'killservers
       'dejunk
       'adj-testids
       'old2new
       'new2old
       )
      (set! *didsomething* #t)))

(if (args:get-arg "-mark-incompletes")
    (begin
      (if (not (launch:setup-for-run))
	  (begin
	    (debug:print 0 "Failed to setup, exiting") 
1327
1328
1329
1330
1331
1332
1333



1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406







+
+
+










;;======================================================================

(if *runremote* (close-all-connections!))

(if (not *didsomething*)
    (debug:print 0 help))

(set! *time-to-exit* #t)
(thread-join! *watchdog*)

(if (not (eq? *globalexitstatus* 0))
    (if (or (args:get-arg "-runtests")(args:get-arg "-runall"))
        (begin
           (debug:print 0 "NOTE: Subprocesses with non-zero exit code detected: " *globalexitstatus*)
           (exit 0))
        (case *globalexitstatus*
         ((0)(exit 0))
         ((1)(exit 1))
         ((2)(exit 2))
         (else (exit 3)))))

Modified mt.scm from [77d6104e75] to [fdb95af183].

163
164
165
166
167
168
169






170
171
172
173
174
175
176
177
178
179
180











181
182
183
184
185
186
187
163
164
165
166
167
168
169
170
171
172
173
174
175











176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193







+
+
+
+
+
+
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+








;;======================================================================
;;  S T A T E   A N D   S T A T U S   F O R   T E S T S 
;;======================================================================

;; speed up for common cases with a little logic
(define (mt:test-set-state-status-by-id run-id test-id newstate newstatus newcomment)
  (if (not (and run-id test-id))
      (begin
	(debug:print 0 "ERROR: bad data handed to mt:test-set-state-status-by-id, run-id=" run-id ", test-id=" test-id ", newstate=" newstate)
	(print-call-chain)
	#f)
      (begin
  (cond
   ((and newstate newstatus newcomment)
    (rmt:general-call 'state-status-msg run-id newstate newstatus newcomment test-id))
   ((and newstate newstatus)
    (rmt:general-call 'state-status run-id newstate newstatus test-id))
   (else
    (if newstate   (rmt:general-call 'set-test-state   run-id newstate   test-id))
    (if newstatus  (rmt:general-call 'set-test-status  run-id newstatus  test-id))
    (if newcomment (rmt:general-call 'set-test-comment run-id newcomment test-id))))
   (mt:process-triggers run-id test-id newstate newstatus)
   #t)
	(cond
	 ((and newstate newstatus newcomment)
	  (rmt:general-call 'state-status-msg run-id newstate newstatus newcomment test-id))
	 ((and newstate newstatus)
	  (rmt:general-call 'state-status run-id newstate newstatus test-id))
	 (else
	  (if newstate   (rmt:general-call 'set-test-state   run-id newstate   test-id))
	  (if newstatus  (rmt:general-call 'set-test-status  run-id newstatus  test-id))
	  (if newcomment (rmt:general-call 'set-test-comment run-id newcomment test-id))))
	(mt:process-triggers run-id test-id newstate newstatus)
	#t)))

(define (mt:test-set-state-status-by-testname run-id test-name item-path new-state new-status new-comment)
  (let ((test-id (cdb:remote-run db:get-test-id-cached #f run-id test-name item-path)))
    (mt:test-set-state-status-by-id test-id new-state new-status new-comment)))

(define (mt:lazy-read-test-config test-name)
  (let ((tconf (hash-table-ref/default *testconfigs* test-name #f)))

Modified portlogger.scm from [3222aa8dd9] to [614321ce45].

29
30
31
32
33
34
35
36

37
38
39
40
41
42
43
29
30
31
32
33
34
35

36
37
38
39
40
41
42
43







-
+







	 (handler  (make-busy-timeout 136000))
	 (canwrite (file-write-access? fname)))
    (sqlite3:set-busy-handler! db handler)
    (sqlite3:execute db "PRAGMA synchronous = 0;")
    (if (not exists)
	(sqlite3:execute 
	 db
	 "CREATE TABLE ports (
	 "CREATE TABLE IF NOT EXISTS ports (
            port INTEGER PRIMARY KEY,
            state TEXT DEFAULT 'not-used',
            fail_count INTEGER DEFAULT 0,
            update_time TIMESTAMP DEFAULT (strftime('%s','now')) );"))
    db))

(define (portlogger:open-run-close proc . params)
86
87
88
89
90
91
92









93
94
95
96
97
98






99
100
101
102
103
104
105
106
107
108








109

110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127


128
129
130
131
132
133
134
135
136
137


















138
139
140
141
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101






102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125

126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143

144
145
146









147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168







+
+
+
+
+
+
+
+
+
-
-
-
-
-
-
+
+
+
+
+
+










+
+
+
+
+
+
+
+
-
+

















-
+
+

-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+




		    res)))))
    (sqlite3:finalize! qry1)
    (sqlite3:finalize! qry2)
    (sqlite3:finalize! qry3)
    res))

(define (portlogger:get-prev-used-port db)
  (handle-exceptions
   exn
   (begin
     (debug:print 0 "EXCEPTION: portlogger database probably overloaded or unreadable. If you see this message again remove /tmp/.$USER-portlogger.db")
     (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn))
     (debug:print 0 "exn=" (condition->list exn))
     (print-call-chain)
     (debug:print 0 "Continuing anyway.")
     #f)
  (sqlite3:fold-row
   (lambda (var curr)
     (or curr var curr))
   #f
   db
   "SELECT (port) FROM ports WHERE state='released' LIMIT 1;"))
   (sqlite3:fold-row
    (lambda (var curr)
      (or curr var curr))
    #f
    db
    "SELECT (port) FROM ports WHERE state='released' LIMIT 1;")))

(define (portlogger:find-port db)
  (let* ((lowport (let ((val (configf:lookup *configdat* "server" "lowport")))
		    (if (and val 
			     (string->number val))
			(string->number val)
			32768)))
	 (portnum (or (portlogger:get-prev-used-port db)
		      (+ lowport ;; top of registered ports is 49152 but lets use ports in the registered range
			 (random (- 64000 lowport))))))
    (handle-exceptions
     exn
     (begin
       (debug:print 0 "EXCEPTION: portlogger database probably overloaded or unreadable. If you see this message again remove /tmp/.$USER-portlogger.db")
       (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn))
       (debug:print 0 "exn=" (condition->list exn))
       (print-call-chain)
       (debug:print 0 "Continuing anyway."))
    (portlogger:take-port db portnum)
     (portlogger:take-port db portnum))
    portnum))

;; set port to "released", "failed" etc.
;; 
(define (portlogger:set-port db portnum value)
  (sqlite3:execute db "UPDATE ports SET state=?,update_time=strftime('%s','now') WHERE port=?;" value portnum))

;; set port to failed (attempted to take but got error)
;;
(define (portlogger:set-failed db portnum)
  (sqlite3:execute db "UPDATE ports SET state='failed',fail_count=fail_count+1,update_time=strftime('%s','now') WHERE port=?;" portnum))

;;======================================================================
;; MAIN
;;======================================================================

(define (portlogger:main . args)
  (let* ((db      (portlogger:open-db (conc "/tmp/." (current-user-name) "-portlogger.db")))
  (let* ((dbfname (conc "/tmp/." (current-user-name) "-portlogger.db"))
	 (db      (portlogger:open-db dbfname))
	 (numargs (length args))
	 (result  (cond
		   ((> numargs 1) ;; most commands
		    (case (string->symbol (car args)) ;; commands with two or more params
		      ((take)(portlogger:take-port db (string->number (cadr args))))
		      ((set) (portlogger:set-port db 
						  (string->number (cadr args))
						  (caddr args))
		       (caddr args))
		      ((failed)(portlogger:set-failed db (string->number (cadr args))) 'failed))))))
	 (result  
	  (handle-exceptions
	   exn
	   (begin
	     (debug:print 0 "EXCEPTION: portlogger database at " dbfname " probably overloaded or unreadable. Try removing it.")
	     (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn))
	     (print "exn=" (condition->list exn))
	     (debug:print 0 " status:  " ((condition-property-accessor 'sqlite3 'status) exn))
	     (print-call-chain))
	   (cond
	    ((> numargs 1) ;; most commands
	     (case (string->symbol (car args)) ;; commands with two or more params
	       ((take)(portlogger:take-port db (string->number (cadr args))))
	       ((set) (portlogger:set-port db 
					   (string->number (cadr args))
					   (caddr args))
		(caddr args))
	       ((failed)(portlogger:set-failed db (string->number (cadr args))) 'failed)))))))
    (sqlite3:finalize! db)
    result))
     
;; (print (apply portlogger:main (cdr (argv))))

Modified process.scm from [88799f98f8] to [781c177a90].

49
50
51
52
53
54
55


56
57
58
59
60
61
62
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64







+
+








(define (cmd-run-proc-each-line cmd proc . params)
  ;; (print "Called with cmd=" cmd ", proc=" proc ", params=" params)
  (handle-exceptions
   exn
   (begin
     (print "ERROR:  Failed to run command: " cmd " " (string-intersperse params " "))
     (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn))
     (print "exn=" (condition->list exn))
     #f)
   (let-values (((fh fho pid) (if (null? params)
				  (process cmd)
				  (process cmd params))))
       (let loop ((curr (read-line fh))
		(result  '()))
       (if (not (eof-object? curr))
122
123
124
125
126
127
128
129










124
125
126
127
128
129
130

131
132
133
134
135
136
137
138
139
140







-
+
+
+
+
+
+
+
+
+
+
     (let loop ((inl (read-line))
		(res '()))
       (if (eof-object? inl)
	   (reverse res)
	   (let ((pid (string->number inl)))
	     (if proc (proc pid))
	     (loop (read-line) (cons pid res))))))))
       

(define (process:alive? pid)
  (handle-exceptions
   exn
   ;; possibly pid is a process not a child, look in /proc to see if it is running still
   (file-exists? (conc "/proc/" pid))
   (let-values (((rpid exit-type exit-signal)(process-wait pid #t)))
       (and (number? rpid)
	    (equal? rpid pid)))))
	 

Modified rmt.scm from [7baaea28d7] to [ef41e52830].

55
56
57
58
59
60
61













62
63
64
65
66
67
68
69
70
71
72
73
74
75
76

77
78
79
80
81
82

83
84
85
86
87
88
89
90
91
92
93







94
95
96
97
98
99
100
101








102
103
104
105
106
107
108
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95

96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114








115
116
117
118
119
120
121
122
123
124
125
126
127
128
129







+
+
+
+
+
+
+
+
+
+
+
+
+















+





-
+











+
+
+
+
+
+
+
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+







	       #t)
	     #f))))

;; cmd is a symbol
;; vars is a json string encoding the parameters for the call
;;
(define (rmt:send-receive cmd rid params)
  ;; clean out old connections
  (mutex-lock! *db-multi-sync-mutex*)
  (let ((expire-time (- (current-seconds) 60)))
    (for-each 
     (lambda (run-id)
       (let ((connection (hash-table-ref/default *runremote* run-id #f)))
	 (if ;; (and connection 
		  (< (http-transport:server-dat-get-last-access connection) expire-time) ; )
	     (begin
	       (debug:print-info 0 "Discarding connection to server for run-id " run-id ", too long between accesses")
	       (hash-table-delete! *runremote* run-id)))))
     (hash-table-keys *runremote*)))
  (mutex-unlock! *db-multi-sync-mutex*)
  (let* ((run-id          (if rid rid 0))
	 (connection-info (let ((cinfo (hash-table-ref/default *runremote* run-id #f)))
			    (if cinfo
				cinfo
				;; NB// can cache the answer for server running for 10 seconds ...
				;;  ;; (and (not (rmt:write-frequency-over-limit? cmd run-id))
				(if (tasks:server-running-or-starting? (tasks:get-db) run-id)
				    (let ((res (client:setup run-id)))
				      (if res 
					  (hash-table-ref/default *runremote* run-id #f) ;; client:setup filled this in (hopefully)
					  #f))
				    #f))))
	 (jparams         (db:obj->string params)))
    (if connection-info
	(let ((res             (http-transport:client-api-send-receive run-id connection-info cmd jparams)))
	  (http-transport:server-dat-update-last-access connection-info)
	  (if res
	      (db:string->obj res)
	      (let ((new-connection-info (client:setup run-id)))
		(debug:print 0 "WARNING: Communication failed, trying call to http-transport:client-api-send-receive again.")
		(rmt:send-receive cmd run-id params))))
	(let ((max-avg-qry (string->number (or (configf:lookup *configdat* "server" "server-query-threshold") "800"))))
	(let ((max-avg-qry (string->number (or (configf:lookup *configdat* "server" "server-query-threshold") "-1"))))
	  (debug:print-info 4 "no server and read-only query, bypassing normal channel")
	  ;; (if (rmt:write-frequency-over-limit? cmd run-id)(server:kind-run run-id))
	  (let ((curr-max (rmt:get-max-query-average)))
	    (if (> (cdr curr-max) max-avg-qry)
		(begin
		  (debug:print-info 3 "Max average query, " (inexact->exact (round (cdr curr-max))) "ms (" (car curr-max) ") exceeds " max-avg-qry ", try starting server ...")
		  (server:kind-run run-id))))
	  (rmt:open-qry-close-locally cmd run-id params)))))

(define (rmt:update-db-stats rawcmd params duration)
  (mutex-lock! *db-stats-mutex*)
  (handle-exceptions
   exn
   (begin
     (debug:print 0 "WARNING: stats collection failed in update-db-stats")
     (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn))
     (print "exn=" (condition->list exn))
     #f) ;; if this fails we don't care, it is just stats
  (let* ((cmd      (if (eq? rawcmd 'general-call) (car params) rawcmd))
	 (stat-vec (hash-table-ref/default *db-stats* cmd #f)))
    (if (not stat-vec)
	(let ((newvec (vector 0 0)))
	  (hash-table-set! *db-stats* cmd newvec)
	  (set! stat-vec newvec)))
    (vector-set! stat-vec 0 (+ (vector-ref stat-vec 0) 1))
    (vector-set! stat-vec 1 (+ (vector-ref stat-vec 1) duration)))
   (let* ((cmd      (if (eq? rawcmd 'general-call) (car params) rawcmd))
	  (stat-vec (hash-table-ref/default *db-stats* cmd #f)))
     (if (not stat-vec)
	 (let ((newvec (vector 0 0)))
	   (hash-table-set! *db-stats* cmd newvec)
	   (set! stat-vec newvec)))
     (vector-set! stat-vec 0 (+ (vector-ref stat-vec 0) 1))
     (vector-set! stat-vec 1 (+ (vector-ref stat-vec 1) duration))))
  (mutex-unlock! *db-stats-mutex*))


(define (rmt:print-db-stats)
  (let ((fmtstr "~40a~7-d~9-d~20,2-f")) ;; "~20,2-f"
    (debug:print 18 "DB Stats\n========")
    (debug:print 18 (format #f "~40a~8a~10a~10a" "Cmd" "Count" "TotTime" "Avg"))
133
134
135
136
137
138
139
140
141

142

143

144
145
146
147
148
149
150
151
152
153
154
155
156

157
158
159
160
161

162
163
164
165
166
167
168
154
155
156
157
158
159
160


161
162
163

164
165
166
167
168
169
170
171
172
173
174
175
176

177





178
179
180
181
182
183
184
185







-
-
+

+
-
+












-
+
-
-
-
-
-
+







			       (cons newmax-cmd currmax)
			       (cons 'none 0))
			   (loop (car tal)(cdr tal) newmax-cmd currmax)))))))
    (mutex-unlock! *db-stats-mutex*)
    res))
	  
(define (rmt:open-qry-close-locally cmd run-id params)
  (let* ((dbdir (conc    (configf:lookup *configdat* "setup" "linktree") "/.db"))
	 (dbstruct-local (if *dbstruct-db*
  (let* ((dbstruct-local (if *dbstruct-db*
			     *dbstruct-db*
			     (let* ((dbdir (conc    (configf:lookup *configdat* "setup" "linktree") "/.db"))
			     (let ((db (make-dbr:dbstruct path:  dbdir local: #t)))
				    (db (make-dbr:dbstruct path:  dbdir local: #t)))
			       (set! *dbstruct-db* db)
			       db)))
	 (db-file-path   (db:dbfile-path 0)))
    ;; (read-only      (not (file-read-access? db-file-path)))
    (let* ((start         (current-milliseconds))
	   (res           (api:execute-requests dbstruct-local (symbol->string cmd) params))
	   (duration      (- (current-milliseconds) start)))
      (rmt:update-db-stats cmd params duration)
      ;; mark this run as dirty if this was a write
      (if (not (member cmd api:read-only-queries))
	  (let ((start-time (current-seconds)))
	    (mutex-lock! *db-multi-sync-mutex*)
	    (let ((last-sync (hash-table-ref/default *db-local-sync* run-id 0)))
	    (if (not (hash-table-ref/default *db-local-sync* run-id #f))
	      (if (> (- start-time last-sync) 5) ;; every five seconds
		  (begin
		    (db:multi-db-sync (list run-id) 'new2old)
		    (debug:print-info 0 "Sync of newdb to olddb for run-id " run-id " completed in " (- (current-seconds) start-time) " seconds")
		    (hash-table-set! *db-local-sync* run-id start-time))))
		(hash-table-set! *db-local-sync* run-id start-time)) ;; the oldest "write"
	    (mutex-unlock! *db-multi-sync-mutex*)))
      res)))

(define (rmt:send-receive-no-auto-client-setup connection-info cmd run-id params)
  (let* ((run-id   (if run-id run-id 0))
	 (jparams         (db:obj->string params)) ;; (rmt:dat->json-str params))
	 (res (http-transport:client-api-send-receive run-id connection-info cmd jparams)))
245
246
247
248
249
250
251
252

253
254
255
256
257
258
259
262
263
264
265
266
267
268

269
270
271
272
273
274
275
276







-
+







(define (rmt:get-test-id run-id testname item-path)
  (rmt:send-receive 'get-test-id run-id (list run-id testname item-path)))

(define (rmt:get-test-info-by-id run-id test-id)
  (if (and (number? run-id)(number? test-id))
      (rmt:send-receive 'get-test-info-by-id run-id (list run-id test-id))
      (begin
	(debug:print 0 "ERROR: Bad data handed to rmt:get-test-info-by-id run-id=" run-id ", test-id=" test-id)
	(debug:print 0 "WARNING: Bad data handed to rmt:get-test-info-by-id run-id=" run-id ", test-id=" test-id)
	(print-call-chain)
	#f)))

(define (rmt:test-get-rundir-from-test-id run-id test-id)
  (rmt:send-receive 'test-get-rundir-from-test-id run-id (list run-id test-id)))

(define (rmt:open-test-db-by-test-id run-id test-id #!key (work-area #f))
314
315
316
317
318
319
320






321
322
323
324
325
326
327
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350







+
+
+
+
+
+








(define (rmt:get-testinfo-state-status run-id test-id)
  (rmt:send-receive 'get-testinfo-state-status run-id (list run-id test-id)))

(define (rmt:test-set-log! run-id test-id logf)
  (if (string? logf)(rmt:general-call 'test-set-log run-id logf test-id)))

(define (rmt:test-set-top-process-pid run-id test-id pid)
  (rmt:send-receive 'test-set-top-process-pid run-id (list run-id test-id pid)))

(define (rmt:test-get-top-process-pid run-id test-id)
  (rmt:send-receive 'test-get-top-process-pid run-id (list run-id test-id)))

(define (rmt:get-run-ids-matching-target keynames target res runname testpatt statepatt statuspatt)
  (rmt:send-receive 'get-run-ids-matching-target #f (list keynames target res runname testpatt statepatt statuspatt)))

;; NOTE: This will open and access ALL run databases. 
;;
(define (rmt:test-get-paths-matching-keynames-target-new keynames target res testpatt statepatt statuspatt runname)
  (let ((run-ids (rmt:get-run-ids-matching-target keynames target res runname testpatt statepatt statuspatt)))

Modified runs.scm from [0a93b5efd0] to [31523ae98e].

215
216
217
218
219
220
221

222
223

224
225

226
227
228
229
230
231
232
215
216
217
218
219
220
221
222
223
224
225
226

227
228
229
230
231
232
233
234







+


+

-
+







	 (test-names         #f)  ;; (tests:filter-test-names all-test-names test-patts))
	 (required-tests     #f)  ;;(lset-intersection equal? (string-split test-patts ",") test-names))) ;; test-names)) ;; Added test-names as initial for required-tests but that failed to work
	 (task-key           (conc (hash-table->alist flags) " " (get-host-name) " " (current-process-id)))
	 (tasks-db           (tasks:open-db)))

    (set-signal-handler! signal/int
			 (lambda (signum)
			   (signal-mask! signum)
			   (let ((tdb (tasks:open-db)))
			     (tasks:set-state-given-param-key tdb task-key "killed")
			     ;; (sqlite3:interrupt! tdb) ;; seems silly?
			     (sqlite3:finalize! tdb))
			   (print "Killed by sigint. Exiting")
			   (print "Killed by signal " signum ". Exiting")
			   (exit)))

    ;; register this run in monitor.db
    (tasks:add tasks-db "run-tests" user target runname test-patts task-key) ;; params)
    (tasks:set-state-given-param-key tasks-db task-key "running")
    (runs:set-megatest-env-vars run-id inkeys: keys inrunname: runname) ;; these may be needed by the launching process
    (if (file-exists? runconfigf)
247
248
249
250
251
252
253



254
255
256
257





258
259
260


261
262
263
264
265
266
267
268
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268


269
270

271
272
273
274
275
276
277







+
+
+




+
+
+
+
+

-
-
+
+
-







    (debug:print-info 0 "all tests:  " (string-intersperse (sort all-test-names string<) " "))
    (debug:print-info 0 "test names: " (string-intersperse (sort test-names string<) " "))

    ;; on the first pass or call to run-tests set FAILS to NOT_STARTED if
    ;; -keepgoing is specified
    (if (eq? *passnum* 0)
	(begin
	  ;; Is this still necessary? I think not. Unreachable tests are marked as such and 
	  ;; should not cause problems here.
	  ;;
	  ;; have to delete test records where NOT_STARTED since they can cause -keepgoing to 
	  ;; get stuck due to becoming inaccessible from a failed test. I.e. if test B depends 
	  ;; on test A but test B reached the point on being registered as NOT_STARTED and test
	  ;; A failed for some reason then on re-run using -keepgoing the run can never complete.
	  ;;
	  ;; (rmt:general-call 'delete-tests-in-state run-id "NOT_STARTED")
	  
	  ;; Now convert FAIL and anything in allow-auto-rerun to NOT_STARTED
	  ;;
	  (for-each (lambda (state)
		      (rmt:general-call 'delete-tests-in-state run-id state))
		    (cons "NOT_STARTED" (string-split (or (configf:lookup *configdat* "setup" "allow-auto-rerun") ""))))
		      (rmt:set-tests-state-status run-id test-names state #f "NOT_STARTED" state))
		    (string-split (or (configf:lookup *configdat* "setup" "allow-auto-rerun") "")))))
	  (rmt:set-tests-state-status run-id test-names #f "FAIL" "NOT_STARTED" "FAIL")))

    ;; Ensure all tests are registered in the test_meta table
    (runs:update-all-test_meta #f)

    ;; now add non-directly referenced dependencies (i.e. waiton)
    ;;======================================================================
    ;; refactoring this block into tests:get-full-data
354
355
356
357
358
359
360
361
362

















363
364
365
366
367
368
369
363
364
365
366
367
368
369


370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393







-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+








    (if (not (null? required-tests))
	(debug:print-info 1 "Adding " required-tests " to the run queue"))
    ;; NOTE: these are all parent tests, items are not expanded yet.
    (debug:print-info 4 "test-records=" (hash-table->alist test-records))
    (let ((reglen (configf:lookup *configdat* "setup" "runqueue")))
      (if (> (length (hash-table-keys test-records)) 0)
	  (begin
	    (runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests (any->number reglen) all-tests-registry)
	  (let* ((keep-going #t)
		 (th1        (make-thread (lambda ()
					    (runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests (any->number reglen) all-tests-registry))
					  "runs:run-tests-queue"))
		 (th2        (make-thread (lambda ()				    
					    ;; (rmt:find-and-mark-incomplete-all-runs))))) CAN'T INTERRUPT IT ...
					    (let ((run-ids (rmt:get-all-run-ids)))
					      (for-each (lambda (run-id)
							  (if keep-going
							      (rmt:find-and-mark-incomplete run-id #f))) ;; ovr-deadtime)))
							run-ids)))
					  "runs: mark-incompletes")))
	    (thread-start! th1)
	    (thread-start! th2)
	    (thread-join! th1)
	    (set! keep-going #f)
	    (thread-join! th2)
	    ;; if run-count > 0 call, set -preclean and -rerun STUCK/DEAD
	    (if (> run-count 0)
		(begin
		  (if (not (hash-table-ref/default flags "-preclean" #f))
		      (hash-table-set! flags "-preclean" #t))
		  (if (not (hash-table-ref/default flags "-rerun" #f))
		      (hash-table-set! flags "-rerun" "STUCK/DEAD,n/a,ZERO_ITEMS"))
473
474
475
476
477
478
479
480

481
482
483
484
485
486
487
497
498
499
500
501
502
503

504
505
506
507
508
509
510
511







-
+







	(setenv "MT_RUNNAME"   runname)
	(runs:set-megatest-env-vars run-id inrunname: runname) ;; these may be needed by the launching process
	(let ((items-list (items:get-items-from-config tconfig)))
	  (if (list? items-list)
	      (begin
		(if (null? items-list)
		    (let ((test-id (rmt:get-test-id run-id test-name "")))
		      (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "ZERO_ITEMS" "Failed to run due to failed prerequisites")))
		      (if test-id (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "ZERO_ITEMS" "Failed to run due to failed prerequisites"))))
		(tests:testqueue-set-items! test-record items-list)
		(list hed tal reg reruns))
	      (begin
		(debug:print 0 "ERROR: The proc from reading the items table did not yield a list - please report this")
		(exit 1))))))

     ((and (null? fails)
511
512
513
514
515
516
517
518

519
520
521
522
523
524
525
535
536
537
538
539
540
541

542
543
544
545
546
547
548
549







-
+







	(if (and give-up
		 (not (and (null? tal)(null? reg))))
	    (let ((trimmed-tal (mt:discard-blocked-tests run-id hed tal test-records))
		  (trimmed-reg (mt:discard-blocked-tests run-id hed reg test-records)))
	      (debug:print 1 "WARNING: test " hed " has discarded prerequisites, removing it from the queue")

	      (let ((test-id (rmt:get-test-id run-id hed "")))
		(mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_DISCARDED" "Failed to run due to discarded prerequisites"))
		(if test-id (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_DISCARDED" "Failed to run due to discarded prerequisites")))
	      
	      (if (and (null? trimmed-tal)
		       (null? trimmed-reg))
		  #f
		  (list (runs:queue-next-hed trimmed-tal trimmed-reg reglen regfull)
			(runs:queue-next-tal trimmed-tal trimmed-reg reglen regfull)
			(runs:queue-next-reg trimmed-tal trimmed-reg reglen regfull)
535
536
537
538
539
540
541
542

543
544
545
546
547
548
549
550
551
552
553
554
555

556
557
558



559
560
561
562
563
564
565
559
560
561
562
563
564
565

566
567
568
569
570
571
572
573
574
575
576
577
578
579
580



581
582
583
584
585
586
587
588
589
590







-
+













+
-
-
-
+
+
+







	    (debug:print-info 1 "no fails in prerequisites for " hed " but also none running, keeping " hed " for now. Try count: " (hash-table-ref/default *seen-cant-run-tests* hed 0))
	    ;; num-retries code was here
	    ;; we use this opportunity to move contents of reg to tal
	    (list (car newtal)(append (cdr newtal) reg) '() reruns)) ;; an issue with prereqs not yet met?
	  (begin
	    (debug:print-info 1 "no fails in prerequisites for " hed " but nothing seen running in a while, dropping test " hed " from the run queue")
	    (let ((test-id (rmt:get-test-id run-id hed "")))
	      (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "TIMED_OUT" "Nothing seen running in a while."))
	      (if test-id (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "TIMED_OUT" "Nothing seen running in a while.")))
	    (list (runs:queue-next-hed tal reg reglen regfull)
		  (runs:queue-next-tal tal reg reglen regfull)
		  (runs:queue-next-reg tal reg reglen regfull)
		  reruns))))

     ((and 
       (or (not (null? fails))
	   (not (null? prereq-fails)))
       (member 'normal testmode))
      (debug:print-info 1 "test "  hed " (mode=" testmode ") has failed prerequisite(s); "
			(string-intersperse (map (lambda (t)(conc (db:test-get-testname t) ":" (db:test-get-state t)"/"(db:test-get-status t))) fails) ", ")
			", removing it from to-do list")
      (let ((test-id (rmt:get-test-id run-id hed "")))
	(if test-id
	(if (not (null? prereq-fails))
	    (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_DISCARDED" "Failed to run due to prior failed prerequisites")
	    (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_FAIL"      "Failed to run due to failed prerequisites")))
	    (if (not (null? prereq-fails))
		(mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_DISCARDED" "Failed to run due to prior failed prerequisites")
		(mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_FAIL"      "Failed to run due to failed prerequisites"))))
      (if (or (not (null? reg))(not (null? tal)))
	  (begin
	    (hash-table-set! test-registry hed 'CANNOTRUN)
	    (list (runs:queue-next-hed tal reg reglen regfull)
		  (runs:queue-next-tal tal reg reglen regfull)
		  (runs:queue-next-reg tal reg reglen regfull)
		  (cons hed reruns)))
642
643
644
645
646
647
648
649

650
651
652
653
654
655
656
667
668
669
670
671
672
673

674
675
676
677
678
679
680
681







-
+







		reruns)
	  #f))
     
     ;; Register tests 
     ;;
     ((not (hash-table-ref/default test-registry (runs:make-full-test-name test-name item-path) #f))
      (debug:print-info 4 "Pre-registering test " test-name "/" item-path " to create placeholder" )
      (if (eq? *transport-type* 'fs) ;; no point in parallel registration if use fs
      (if #t ;; always do firm registration now in v1.60 and greater ;; (eq? *transport-type* 'fs) ;; no point in parallel registration if use fs
	  (begin
	    (rmt:general-call 'register-test run-id run-id test-name item-path)
	    (hash-table-set! test-registry (runs:make-full-test-name test-name item-path) 'done))
	  (let ((th (make-thread (lambda ()
				   (mutex-lock! registry-mutex)
				   (hash-table-set! test-registry (runs:make-full-test-name test-name item-path) 'start)
				   (mutex-unlock! registry-mutex)
746
747
748
749
750
751
752
753

754
755
756
757
758
759
760
771
772
773
774
775
776
777

778
779
780
781
782
783
784
785







-
+







	  ;; the waiton is FAIL so no point in trying to run hed ever again
	  (if (or (not (null? reg))(not (null? tal)))
	      (if (vector? hed)
		  (begin
		    (debug:print 1 "WARNING: Dropping test " test-name "/" item-path
				 " from the launch list as it has prerequistes that are FAIL")
		    (let ((test-id (rmt:get-test-id run-id hed "")))
		      (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_FAIL" "Failed to run due to failed prerequisites"))
		      (if test-id (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_FAIL" "Failed to run due to failed prerequisites")))
		    (runs:shrink-can-run-more-tests-count) ;; DELAY TWEAKER (still needed?)
		    ;; (thread-sleep! *global-delta*)
		    ;; This next is for the items
		    (mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "BLOCKED" #f)
		    (hash-table-set! test-registry (runs:make-full-test-name test-name item-path) 'removed)
		    (list (runs:queue-next-hed tal reg reglen regfull)
			  (runs:queue-next-tal tal reg reglen regfull)
885
886
887
888
889
890
891


892
893
894
895


896
897
898
899
900
901
902
910
911
912
913
914
915
916
917
918
919
920
921

922
923
924
925
926
927
928
929
930







+
+



-
+
+







	       (tal         (cdr sorted-test-names))
	       (reg         '()) ;; registered, put these at the head of tal 
	       (reruns      '()))

      (if (not (null? reruns))(debug:print-info 4 "reruns=" reruns))

      ;; Here we mark any old defunct tests as incomplete. Do this every fifteen minutes
      ;; moving this to a parallel thread and just run it once.
      ;;
      (if (> (current-seconds)(+ last-time-incomplete 900))
          (begin
            (set! last-time-incomplete (current-seconds))
            (rmt:find-and-mark-incomplete-all-runs)))
            ;; (rmt:find-and-mark-incomplete-all-runs)
	    ))

      ;; (print "Top of loop, hed=" hed ", tal=" tal " ,reruns=" reruns)
      (let* ((test-record (hash-table-ref test-records hed))
	     (test-name   (tests:testqueue-get-testname test-record))
	     (tconfig     (tests:testqueue-get-testconfig test-record))
	     (jobgroup    (config-lookup tconfig "test_meta" "jobgroup"))
	     (testmode    (let ((m (config-lookup tconfig "requirements" "mode")))

Modified server.scm from [033734a741] to [faceda817c].

81
82
83
84
85
86
87

88
89
90
91
92
93

94
95
96
97
98
99
100
101
102
103
104
105
106
107


108
109
110
111
112
113
114
81
82
83
84
85
86
87
88
89
90
91
92
93

94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117







+





-
+














+
+







;; if the run-id is zero and the target-host is set 
;; try running on that host
;;
(define  (server:run run-id)
  (let* ((curr-host   (get-host-name))
	 (curr-ip     (server:get-best-guess-address curr-host))
	 (target-host (configf:lookup *configdat* "server" "homehost" ))
	 (testsuite   (common:get-testsuite-name))
	 (logfile     (conc *toppath* "/logs/" run-id ".log"))
	 (cmdln (conc (common:get-megatest-exe)
		      " -server " (or target-host "-") " -run-id " run-id (if (equal? (configf:lookup *configdat* "server" "daemonize") "yes")
									      (conc " -daemonize -log " logfile)
									      "")
		      " -debug 4 "))) ;; (conc " >> " logfile " 2>&1 &")))))
		      " -debug 4 testsuite:" testsuite))) ;; (conc " >> " logfile " 2>&1 &")))))
    (debug:print 0 "INFO: Starting server (" cmdln ") as none running ...")
    (push-directory *toppath*)
    (if (not (directory-exists? "logs"))(create-directory "logs"))
    ;; host.domain.tld match host?
    (if (and target-host 
	     ;; look at target host, is it host.domain.tld or ip address and does it 
	     ;; match current ip or hostname
	     (not (string-match (conc "("curr-host "|" curr-host"\\..*)") target-host))
	     (not (equal? curr-ip target-host)))
	(begin
	  (debug:print-info 0 "Starting server on " target-host ", logfile is " logfile)
	  (setenv "TARGETHOST" target-host)))
    (setenv "TARGETHOST_LOGF" logfile)
    (system (conc "nbfake " cmdln))
    (unsetenv "TARGETHOST_LOGF")
    (if (get-environment-variable "TARGETHOST")(unsetenv "TARGETHOST"))
    ;; (system cmdln)
    (pop-directory)))

;; kind start up of servers, wait 40 seconds before allowing another server for a given
;; run-id to be launched
(define (server:kind-run run-id)
  (let ((last-run-time (hash-table-ref/default *server-kind-run* run-id #f)))

Modified tasks.scm from [903bab69fd] to [e808e7265f].

22
23
24
25
26
27
28



29
30
31
32
33
34
35
36
37
38
39
40












41
42
43
44
45
46
47
22
23
24
25
26
27
28
29
30
31












32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50







+
+
+
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+







;; Tasks db
;;======================================================================

;; wait up to aprox n seconds for a journal to go away
;;
(define (tasks:wait-on-journal path n #!key (remove #f))
  (let ((fullpath (conc path "-journal")))
    (handle-exceptions
     exn
     #t ;; if stuff goes wrong just allow it to move on
    (let loop ((journal-exists (file-exists? fullpath))
	       (count          n)) ;; wait ten times ...
      (if journal-exists
	  (if (> count 0)
	      (begin
		(thread-sleep! 1)
		(loop (file-exists? fullpath)
		      (- count 1)))
	      (begin
		(if remove (system (conc "rm -rf " path)))
		#f))
	  #t))))
     (let loop ((journal-exists (file-exists? fullpath))
		(count          n)) ;; wait ten times ...
       (if journal-exists
	   (if (> count 0)
	       (begin
		 (thread-sleep! 1)
		 (loop (file-exists? fullpath)
		       (- count 1)))
	       (begin
		 (if remove (system (conc "rm -rf " path)))
		 #f))
	   #t)))))

(define (tasks:get-task-db-path)
  (if *task-db*
      (vector-ref *task-db* 1)
      (let* ((linktree     (configf:lookup *configdat* "setup" "linktree"))
	     (dbpath       (conc linktree "/.db/monitor.db")))
	dbpath)))
319
320
321
322
323
324
325

326



327













328
329
330
331
332
333
334
322
323
324
325
326
327
328
329

330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353







+
-
+
+
+

+
+
+
+
+
+
+
+
+
+
+
+
+







    res))

;; no elegance here ...
;;
(define (tasks:kill-server hostname pid)
  (debug:print-info 0 "Attempting to kill server process " pid " on host " hostname)
  (setenv "TARGETHOST" hostname)
  (setenv "TARGETHOST_LOGF" "server-kills.log")
  (system (conc "nbfake kill " pid)))
  (system (conc "nbfake kill " pid))
  (unsetenv "TARGETHOST_LOGF")
  (unsetenv "TARGETHOST"))
 
;; look up a server by run-id and send it a kill, also delete the record for that server
;;
(define (tasks:kill-server-run-id run-id)
  (let* ((tdb  (tasks:open-db))
	 (sdat (tasks:get-server mdb run-id)))
    (if sdat
	(let ((hostname (vector-ref sdat 6))
	      (pid      (vector-ref sdat 5)))
	  (debug:print-info 0 "Killing server for run-id " run-id " on host " hostname " with pid " pid)
	  (tasks:kill-server hostname pid)
	  (tasks:server-delete-record mdb server-id tag) )
	(debug:print-info 0 "No server found for run-id " run-id ", nothing to kill"))))
    
;;   (if status ;; #t means alive
;;       (begin
;; 	(if (equal? hostname (get-host-name))
;; 	    (handle-exceptions
;; 	     exn
;; 	     (debug:print-info 0 "server may or may not be dead, check for megatest -server running as pid " pid "\n"
;; 			       "  EXCEPTION: " ((condition-property-accessor 'exn 'message) exn))
626
627
628
629
630
631
632
633
634
635
636
637
638








639
640
641
642
643
644
645
646
647
648
649

















650
651
652
653
654
655
656
645
646
647
648
649
650
651






652
653
654
655
656
657
658
659











660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683







-
-
-
-
-
-
+
+
+
+
+
+
+
+
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+







	(hostpid-rx (regexp "\\s+(\\w+)\\s+(\\d+)$"))) ;; host pid is at end of param string
    (if (null? records)
	(debug:print 0 "No run launching processes found for " target " / " run-name)
	(debug:print 0 "Found " (length records) " run(s) to kill."))
    (for-each 
     (lambda (record)
       (let* ((param-key (list-ref record 8))
	      (match-dat (string-search hostpid-rx param-key))
	      (hostname  (cadr match-dat))
	      (pid       (caddr match-dat)))
	 (debug:print 0 "Sending SIGINT to process " pid " on host " hostname)
	 (if (equal? (get-host-name) hostname)
	     (begin
	      (match-dat (string-search hostpid-rx param-key)))
	 (if match-dat
	     (let ((hostname  (cadr match-dat))
		   (pid       (string->number (caddr match-dat))))
	       (debug:print 0 "Sending SIGINT to process " pid " on host " hostname)
	       (if (equal? (get-host-name) hostname)
		   (if (process:alive? pid)
		       (begin
	       (process-signal (string->number pid) signal/int)
	       (thread-sleep! 5)
	       (handle-exceptions
		exn
		#t
		(process-signal (string->number pid) signal/kill)))
	     ;;  (call-with-environment-variables
	     (let ((old-targethost (getenv "TARGETHOST")))
	       (set-environment-variable "TARGETHOST" hostname)
	       (system (conc "nbfake " kill " " pid))
	       (if old-targethost (set-environment-variable "TARGETHOST" old-targethost))))))
			 (handle-exceptions
			  exn
			  (begin
			    (debug:print 0 "Kill of process " pid " on host " hostname " failed.")
			    (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn))
			    #t)
			  (process-signal pid signal/int)
			  (thread-sleep! 5)
			  (if (process:alive? pid)
			      (process-signal pid signal/kill)))))
		   ;;  (call-with-environment-variables
		   (let ((old-targethost (getenv "TARGETHOST")))
		     (setenv "TARGETHOST" hostname)
		     (system (conc "nbfake kill " pid))
		     (if old-targethost (setenv "TARGETHOST" old-targethost))
		     (unsetenv "TARGETHOST"))))
	     (debug:print 0 "ERROR: no record or improper record for " target "/" run-name " in tasks_queue in monitor.db"))))
     records)))


;;======================================================================
;; The routines to process tasks
;;======================================================================

Modified tdb.scm from [de69c98c94] to [4b5015105f].

68
69
70
71
72
73
74
75


76
77
78
79
80
81
82
68
69
70
71
72
73
74

75
76
77
78
79
80
81
82
83







-
+
+







	      (tdb:testdb-initialize db)))
	;; (sqlite3:execute db "PRAGMA synchronous = 0;")
	(debug:print-info 11 "open-test-db END (sucessful)" work-area)
	;; now let's test that everything is correct
	(handle-exceptions
	 exn
	 (begin
	   (debug:print 0 "ERROR: problem accessing test db " work-area ", you probably should clean and re-run this test"
	   (debug:print 0 "ERROR: problem accessing test db " work-area ", you probably should clean and re-run this test or remove the file " 
			dbpath ".\n  "
			((condition-property-accessor 'exn 'message) exn))
	   #f)
	 ;; Is there a cheaper single line operation that will check for existance of a table
	 ;; and raise an exception ?
	 (sqlite3:execute db "SELECT id FROM test_data LIMIT 1;"))
	db)
      (let ((baddb (sqlite3:open-database ":memory:")))

Modified tests/Makefile from [25eaa21107] to [60c8f28493].

172
173
174
175
176
177
178
179

180
181
182
183
184
185
186
172
173
174
175
176
177
178

179
180
181
182
183
184
185
186







-
+







	cd fullrun;$(MEGATEST) -remove-runs :runname $(RUN)  -testpatt % -itempatt % :sysname % :fsname % :datapath %

clean  : 
	rm cleanprep

kill :
	killall -v mtest main.sh dboard || true
	rm -rf *run/db/* */megatest.db */logging.db */monitor.db fullrun/tmp/mt_*/* fullrun/tmp/mt_*/.db* fullrun/logs/*.log || true
	rm -rf *run/db/* */megatest.db */logging.db */monitor.db fullrun/tmp/mt_*/* fullrun/tmp/mt_*/.db* fullrun/logs/*.log fullrun/*.log || true
	killall -v mtest dboard || true

hardkill : kill
	sleep 2;killall -v mtest main.sh dboard -9

listservers :
	cd fullrun;$(MEGATEST) -list-servers

Modified tests/fullrun/runconfigs.config from [5fc85197af] to [ed560fa611].

15
16
17
18
19
20
21

22
23
24
25
26
27
28
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29







+








[default/ubuntu/nfs]
WACKYVAR2 #{runconfigs-get CURRENT}

[ubuntu/nfs/none]
WACKYVAR2 #{runconfigs-get CURRENT}
SOMEVAR2  This should show up in SOMEVAR4 if the target is ubuntu/nfs/none
VARWITHDOLLARSIGNS The$USER/signs/should/be/replaced/with/variable

[default]
SOMEVAR3 #{rget SOMEVAR}
SOMEVAR4 #{rget SOMEVAR2}
SOMEVAR5 #{runconfigs-get SOMEVAR2}

[this/a/test]

Modified tests/fullrun/tests/exit_0/testconfig from [475b97c77b] to [5010ef5eb6].

1
2
3
4
5
6
7
8
9
10





1
2
3
4
5
6
7
8
9
10
11
12
13
14
15










+
+
+
+
+
[setup]
runscript main.sh

[test_meta]
author matt
owner  bob
description This test checks that a multi-lineitem test with mix of pass and non-fail rolls up a PASS

tags first,single
reviewed 09/10/2011, by Matt

[triggers]
NOT_STARTED/ xterm -e bash -s -- 
RUNNING/ xterm -e bash -s -- 

Added tests/fullrun/tests/test_mt_vars/eval_vars.sh version [786761600e].








1
2
3
4
5
6
7
+
+
+
+
+
+
+
#!/bin/bash

if env | grep VARWITHDOLLARSIGNS | grep USER;then
    exit 1 # fails!
else
    exit 0 # good!
fi

Modified tests/fullrun/tests/test_mt_vars/testconfig from [a0c61adcaf] to [0d7c3216f9].

16
17
18
19
20
21
22



23
24
25
26
27
28
29
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32







+
+
+







empty_var empty_var.sh

# VACKYVAR should be set to a path
vackyvar vackyvar.sh

# test-path and test-file
test-path test-path-file.sh

# verify that vars with $ signs get expanded
varwithdollar eval_vars.sh

[requirements]
waiton runfirst
priority 0

[items]
NUMNUM [system cat $MT_RUN_AREA_HOME/tmp/$USER/$sysname/$fsname/$datapath/$MT_RUNNAME/$PREDICTABLE]

Modified utils/nbfake from [99a526d022] to [9de79bbac2].

1

2
3
4
5
6






















7

8

9
10
11


12






13

14







15

16
























17

18

19

20
1
2





3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26

27



28
29
30
31
32
33
34
35
36

37
38
39
40
41
42
43
44
45

46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71

72
73
74

75
76

+
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+

+
-
+
-
-
-
+
+

+
+
+
+
+
+
-
+

+
+
+
+
+
+
+
-
+

+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
+

+
-
+

#!/bin/bash
###############################################################################

# Can't always trust $PWD
CURRWD=`pwd`
if [[ $TARGETHOST_LOGF == "" ]]; then
    TARGETHOST_LOGF=NBFAKE-`date +%GWW%V.%u_%T`
#
# nbfake - capture command output in a logfile
#
# nbfake behavior can be changed by setting the following env vars:
#   NBFAKE_HOST       SSH to $NBFAKE_HOST and run command
#   NBFAKE_LOG        Logfile for nbfake output
#
###############################################################################

if [[ -z "$@" ]]; then
  cat <<__EOF

nbfake usage:

nbfake <command to run>

nbfake behavior can be changed by setting the following env vars:
   NBFAKE_HOST       SSH to \$NBFAKE_HOST and run command
   NBFAKE_LOG        Logfile for nbfake output

__EOF
  exit
fi

echo "#======================================================================"
#==============================================================================
echo "# NBFAKE Running command:"
echo "#     \"$*\""
echo "#======================================================================"
# Setup
#==============================================================================

# Can't always trust $PWD
CURRWD=$(pwd)

# Make sure nbfake host and logfile are set. Fall back to old-style variable names

if [[ -z "$NBFAKE_HOST" && -n "$TARGETHOST" ]]; then 
if [[ $TARGETHOST == ""  ]]; then
  MY_NBFAKE_HOST=$TARGETHOST     
  unset TARGETHOST
else
  MY_NBFAKE_HOST=$NBFAKE_HOST
  unset NBFAKE_HOST
fi


if [[ -z "$NBFAKE_LOG" && -n "$TARGETHOST_LOGF" ]]; then 
  TARGETHOST_LOGF_TEMP=$TARGETHOST_LOGF
  MY_NBFAKE_LOG=$TARGETHOST_LOGF
  unset TARGETHOST_LOGF
else
  MY_NBFAKE_LOG=$NBFAKE_LOG
  unset NBFAKE_LOG
fi

# Set default nbfake log

if [[ -z "$MY_NBFAKE_LOG" ]]; then
  MY_NBFAKE_LOG=NBFAKE-$(date +%GWW%V.%u_%T)
fi

#==============================================================================
# Run and log
#==============================================================================

cat <<__EOF >&2
#======================================================================
# NBFAKE logging command to: $MY_NBFAKE_LOG
#     $*
#======================================================================
__EOF

if [[ -z "$MY_NBFAKE_HOST" ]]; then
  # Run locally
  sh -c "cd $CURRWD;export DISPLAY=$DISPLAY; export PATH=$PATH; nohup $* > $TARGETHOST_LOGF_TEMP 2>&1 &"
  sh -c "cd $CURRWD;export DISPLAY=$DISPLAY; export PATH=$PATH; nohup $* >> $MY_NBFAKE_LOG 2>&1 &"
else
  # run remotely
  ssh -n -f $TARGETHOST "sh -c \"cd $CURRWD;export DISPLAY=$DISPLAY; export PATH=$PATH; nohup $* > $TARGETHOST_LOGF 2>&1 &\""
  ssh -n -f $MY_NBFAKE_HOST "sh -c \"cd $CURRWD;export DISPLAY=$DISPLAY; export PATH=$PATH; nohup $* >> $MY_NBFAKE_LOG 2>&1 &\""
fi