Megatest

Changes On Branch 2d5e0edb49030012
Login

Changes In Branch streamline-exception-handling Through [2d5e0edb49] Excluding Merge-Ins

This is equivalent to a diff from 9926990b7a to 2d5e0edb49

2014-11-02
09:25
Merged streamline exception handling branch into v1.60 check-in: 8fd7d261b7 user: matt tags: v1.60
2014-11-01
08:41
More cleanup in exception handling check-in: dba7069ea8 user: matt tags: streamline-exception-handling
07:01
Streamline exception handling check-in: 2d5e0edb49 user: matt tags: streamline-exception-handling
2014-10-29
17:29
Fixed nbfake variable handling - was causing log files disappearing check-in: 9926990b7a user: mrwellan tags: v1.60, v1.6005_ww44.5
16:03
Merged nbfake fix check-in: f050e1d721 user: mrwellan tags: v1.60, v1.6005_ww44.3

Modified client.scm from [a5253ced0b] to [dc8b2be6ad].

124
125
126
127
128
129
130

131
132
133
134
135
136
137
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138







+







;; keep this as a function to ease future 
(define (client:start run-id server-info)
  (http-transport:client-connect (tasks:hostinfo-get-interface server-info)
				 (tasks:hostinfo-get-port server-info)))

;; client:signal-handler
(define (client:signal-handler signum)
  (signal-mask! signum)
  (handle-exceptions
   exn
   (debug:print " ... exiting ...")
   (let ((th1 (make-thread (lambda ()
			     "") ;; do nothing for now (was flush out last call if applicable)
			   "eat response"))
	 (th2 (make-thread (lambda ()

Modified common.scm from [a2276ff658] to [1306ff927d].

154
155
156
157
158
159
160






161

162
163
164
165
166
167
168
154
155
156
157
158
159
160
161
162
163
164
165
166

167
168
169
170
171
172
173
174







+
+
+
+
+
+
-
+








(define (common:get-megatest-exe)
  (if (getenv "MT_MEGATEST") (getenv "MT_MEGATEST") "megatest"))

(define (common:read-encoded-string instr)
  (handle-exceptions
   exn
   (handle-exceptions
    exn
    (begin
      (debug:print 0 "ERROR: received bad encoded string \"" instr "\", message: " ((condition-property-accessor 'exn 'message) exn))
      (print-call-chain)
      #f)
   (read (open-input-string (base64:base64-decode instr)))
    (read (open-input-string (base64:base64-decode instr))))
   (read (open-input-string (z3:decode-buffer (base64:base64-decode instr))))))

;;======================================================================
;; S T A T E S   A N D   S T A T U S E S
;;======================================================================

(define *common:std-states*   

Modified db.scm from [1e343e9345] to [e68ca6ea25].

294
295
296
297
298
299
300

301




302
303
304
305
306
307
308
294
295
296
297
298
299
300
301

302
303
304
305
306
307
308
309
310
311
312







+
-
+
+
+
+







		 (sqlite3:finalize! db #t))))
	 (hash-table-values (dbr:dbstruct-get-locdbs dbstruct))))
    (thread-sleep! 3)
    (if (and rundb
	     (sqlite3:database? rundb))
	(handle-exceptions
	 exn
	 (begin 
	 #t ;; (debug:print 0 "WARNING: database files may not have been closed correctly. Consider running -cleanup-db")
	   (debug:print 0 "WARNING: database files may not have been closed correctly. Consider running -cleanup-db")
	   (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn))
	   (print-call-chain)
	   #f)
	 (sqlite3:interrupt! rundb)
	 (sqlite3:finalize! rundb #t))))
  ;; (mutex-unlock! *db-sync-mutex*)
  )

(define (db:open-inmem-db)
  (let* ((db      (sqlite3:open-database ":memory:"))
2435
2436
2437
2438
2439
2440
2441
2442

2443
2444
2445
2446
2447
2448
2449
2439
2440
2441
2442
2443
2444
2445

2446
2447
2448
2449
2450
2451
2452
2453







-
+







  (handle-exceptions
   exn
   (let ((err-status ((condition-property-accessor 'sqlite3 'status #f) exn)))
     ;; check for (exn sqlite3) ((condition-property-accessor 'exn 'message) exn)
     (if (eq? err-status 'done)
	 default
	 (begin
	   (debug:print 0 "ERROR:  query " stmt " failed " ((condition-property-accessor 'exn 'message) exn))
	   (debug:print 0 "ERROR:  query " stmt " failed, params: " params ", error: " ((condition-property-accessor 'exn 'message) exn))
	   (print-call-chain)
	   default)))
   (apply sqlite3:first-result db stmt params)))

;;======================================================================
;; Extract ods file from the db
;;======================================================================

Modified docs/manual/megatest_manual.html from [ec93b0a8a8] to [191f1255c5].

1
2
3
4
5
6

7
8
9
10
11
12
13
1
2
3
4
5

6
7
8
9
10
11
12
13





-
+







<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
    "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
<head>
<meta http-equiv="Content-Type" content="application/xhtml+xml; charset=UTF-8" />
<meta name="generator" content="AsciiDoc 8.6.7" />
<meta name="generator" content="AsciiDoc 8.6.9" />
<title>The Megatest Users Manual</title>
<style type="text/css">
/* Shared CSS for AsciiDoc xhtml11 and html5 backends */

/* Default font. */
body {
  font-family: Georgia,serif;
83
84
85
86
87
88
89
90




91
92
93



94
95
96
97
98
99
100
83
84
85
86
87
88
89

90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106







-
+
+
+
+



+
+
+








ul, ol, li > p {
  margin-top: 0;
}
ul > li     { color: #aaa; }
ul > li > * { color: black; }

pre {
.monospaced, code, pre {
  font-family: "Courier New", Courier, monospace;
  font-size: inherit;
  color: navy;
  padding: 0;
  margin: 0;
}
pre {
  white-space: pre-wrap;
}

#author {
  color: #527bbd;
  font-weight: bold;
  font-size: 1.1em;
}
#email {
215
216
217
218
219
220
221
222

223
224
225
226
227
228
229
221
222
223
224
225
226
227

228
229
230
231
232
233
234
235







-
+








div.exampleblock > div.content {
  border-left: 3px solid #dddddd;
  padding-left: 0.5em;
}

div.imageblock div.content { padding-left: 0; }
span.image img { border-style: none; }
span.image img { border-style: none; vertical-align: text-bottom; }
a.image:visited { color: white; }

dl {
  margin-top: 0.8em;
  margin-bottom: 0.8em;
}
dt {
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
417
418
419
420
421
422
423






424
425
426
427
428
429
430







-
-
-
-
-
-









/*
 * xhtml11 specific
 *
 * */

tt {
  font-family: "Courier New", Courier, monospace;
  font-size: inherit;
  color: navy;
}

div.tableblock {
  margin-top: 1.0em;
  margin-bottom: 1.5em;
}
div.tableblock > table {
  border: 3px solid #527bbd;
}
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
450
451
452
453
454
455
456






457
458
459
460
461
462
463







-
-
-
-
-
-









/*
 * html5 specific
 *
 * */

.monospaced {
  font-family: "Courier New", Courier, monospace;
  font-size: inherit;
  color: navy;
}

table.tableblock {
  margin-top: 1.0em;
  margin-bottom: 1.5em;
}
thead, p.tableblock.header {
  font-weight: bold;
  color: #527bbd;
535
536
537
538
539
540
541


542
543
544
545
546
547
548
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544







+
+







body.manpage div.sectionbody {
  margin-left: 3em;
}

@media print {
  body.manpage div#toc { display: none; }
}


</style>
<script type="text/javascript">
/*<![CDATA[*/
var asciidoc = {  // Namespace.

/////////////////////////////////////////////////////////////////////
// Table Of Contents generator
735
736
737
738
739
740
741
742

743
744
745
746
747
748
749
731
732
733
734
735
736
737

738
739
740
741
742
743
744
745







-
+







/*]]>*/
</script>
</head>
<body class="book">
<div id="header">
<h1>The Megatest Users Manual</h1>
<span id="author">Matt Welland</span><br />
<span id="email"><tt>&lt;<a href="mailto:matt@kiatoa.com">matt@kiatoa.com</a>&gt;</tt></span><br />
<span id="email"><code>&lt;<a href="mailto:matt@kiatoa.com">matt@kiatoa.com</a>&gt;</code></span><br />
<span id="revnumber">version 1.0,</span>
<span id="revdate">April 2012</span>
</div>
<div id="content">
<div class="sect1">
<h2 id="_preface">Preface</h2>
<div class="sectionbody">
963
964
965
966
967
968
969
970
971


972
973
974
975
976

977
978

979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995


996
997
998
999

1000
1001
1002
1003
1004
1005

1006
1007
1008
1009
1010
1011
1012
1013

1014
1015

1016
1017
1018
1019
1020
1021
1022
959
960
961
962
963
964
965


966
967
968
969
970
971

972
973

974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989


990
991
992
993
994

995
996
997
998
999
1000

1001
1002
1003
1004
1005
1006
1007
1008

1009
1010

1011
1012
1013
1014
1015
1016
1017
1018







-
-
+
+




-
+

-
+















-
-
+
+



-
+





-
+







-
+

-
+







<div class="sect1">
<h2 id="_limiting_your_running_jobs">Limiting your running jobs</h2>
<div class="sectionbody">
<div class="paragraph"><p>The following example will limit a test in the jobgroup "group1" to no more than 10 tests simultaneously.</p></div>
<div class="paragraph"><p>In your testconfig:</p></div>
<div class="listingblock">
<div class="content">
<pre><tt>[test_meta]
jobgroup group1</tt></pre>
<pre><code>[test_meta]
jobgroup group1</code></pre>
</div></div>
<div class="paragraph"><p>In your megatest.config:</p></div>
<div class="listingblock">
<div class="content">
<pre><tt>[jobgroups]
<pre><code>[jobgroups]
group1 10
custdes 4</tt></pre>
custdes 4</code></pre>
</div></div>
</div>
</div>
<div class="sect1">
<h2 id="_debugging_tricks">Debugging Tricks</h2>
<div class="sectionbody">
<div class="sect2">
<h3 id="_examining_the_environment">Examining The Environment</h3>
<div class="sect3">
<h4 id="_during_config_file_processing">During Config File Processing</h4>
</div>
<div class="sect3">
<h4 id="_organising_your_tests_and_tasks">Organising Your Tests and Tasks</h4>
<div class="listingblock">
<div class="content">
<pre><tt>[tests-paths]
1 #{get misc parent}/simplerun/tests</tt></pre>
<pre><code>[tests-paths]
1 #{get misc parent}/simplerun/tests</code></pre>
</div></div>
<div class="listingblock">
<div class="content">
<pre><tt>[setup]</tt></pre>
<pre><code>[setup]</code></pre>
</div></div>
<div class="paragraph"><p>The runscript method is a brute force way to run scripts where the
user is responsible for setting STATE and STATUS</p></div>
<div class="listingblock">
<div class="content">
<pre><tt>runscript main.csh</tt></pre>
<pre><code>runscript main.csh</code></pre>
</div></div>
</div>
</div>
<div class="sect2">
<h3 id="_debugging_server_problems">Debugging Server Problems</h3>
<div class="listingblock">
<div class="content">
<pre><tt>sudo lsof -i
<pre><code>sudo lsof -i
sudo netstat -lptu
sudo netstat -tulpn</tt></pre>
sudo netstat -tulpn</code></pre>
</div></div>
</div>
</div>
</div>
<h1 id="_reference">Reference</h1>
<div class="sect1">
<h2 id="_the_first_chapter_of_the_second_part_2">The First Chapter of the Second Part</h2>
1030
1031
1032
1033
1034
1035
1036
1037

1038
1039
1040
1041
1042
1043

1044
1045
1046
1047
1048
1049
1050
1051
1052
1053

1054
1055
1056
1057
1058
1059
1060

1061
1062

1063
1064
1065
1066
1067
1068
1069
1070
1071

1072
1073
1074
1075
1076

1077
1078
1079
1080
1081
1082
1083

1084
1085
1086
1087

1088
1089
1090

1091
1092
1093
1094
1095
1096
1097

1098
1099
1100
1101
1102
1103
1104
1105
1106
1107

1108
1109
1110
1111
1112
1113
1114

1115
1116
1117

1118
1119
1120
1121
1122
1123
1124

1125
1126
1127
1128
1129
1130
1131
1132
1133
1134

1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148

1149
1150
1151
1152
1153
1154
1155
1156
1157

1158
1159
1160
1161
1162
1163
1164
1165

1166
1167
1168
1169
1170
1171
1172
1173
1174

1175
















1176
1177
1178
1179
1180
1181
1182
1026
1027
1028
1029
1030
1031
1032

1033
1034
1035
1036
1037
1038

1039
1040
1041
1042
1043
1044
1045
1046
1047
1048

1049
1050
1051
1052
1053
1054
1055

1056
1057

1058
1059
1060
1061
1062
1063
1064
1065
1066

1067
1068
1069
1070
1071

1072
1073
1074
1075
1076
1077
1078

1079
1080
1081
1082

1083
1084
1085

1086
1087
1088
1089
1090
1091
1092

1093
1094
1095
1096
1097
1098
1099
1100
1101
1102

1103
1104
1105
1106
1107
1108
1109

1110
1111
1112

1113
1114
1115
1116
1117
1118
1119

1120
1121
1122
1123
1124
1125
1126
1127
1128
1129

1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143

1144
1145
1146
1147
1148
1149
1150
1151
1152

1153
1154
1155
1156
1157
1158
1159
1160

1161
1162
1163
1164
1165
1166
1167
1168
1169

1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194







-
+





-
+









-
+






-
+

-
+








-
+




-
+






-
+



-
+


-
+






-
+









-
+






-
+


-
+






-
+









-
+













-
+








-
+







-
+








-
+

+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+







<div class="sectionbody">
<div class="sect2">
<h3 id="_setup_section">Setup section</h3>
<div class="sect3">
<h4 id="_header">Header</h4>
<div class="listingblock">
<div class="content">
<pre><tt>[setup]</tt></pre>
<pre><code>[setup]</code></pre>
</div></div>
<div class="paragraph"><p>The runscript method is a brute force way to run scripts where the
user is responsible for setting STATE and STATUS</p></div>
<div class="listingblock">
<div class="content">
<pre><tt>runscript main.csh</tt></pre>
<pre><code>runscript main.csh</code></pre>
</div></div>
</div>
</div>
<div class="sect2">
<h3 id="_requirements_section">Requirements section</h3>
<div class="sect3">
<h4 id="_header_2">Header</h4>
<div class="listingblock">
<div class="content">
<pre><tt>[requirements]</tt></pre>
<pre><code>[requirements]</code></pre>
</div></div>
</div>
<div class="sect3">
<h4 id="_wait_on_other_tests">Wait on Other Tests</h4>
<div class="listingblock">
<div class="content">
<pre><tt># A normal waiton waits for the prior tests to be COMPLETED
<pre><code># A normal waiton waits for the prior tests to be COMPLETED
# and PASS, CHECK or WAIVED
waiton test1 test2</tt></pre>
waiton test1 test2</code></pre>
</div></div>
</div>
<div class="sect3">
<h4 id="_mode">Mode</h4>
<div class="paragraph"><p>The default (i.e. if mode is not specified) is normal. All pre-dependent tests
must be COMPLETED and PASS, CHECK or WAIVED before the test will start</p></div>
<div class="listingblock">
<div class="content">
<pre><tt>mode   normal</tt></pre>
<pre><code>mode   normal</code></pre>
</div></div>
<div class="paragraph"><p>The toplevel mode requires only that the prior tests are COMPLETED.</p></div>
<div class="listingblock">
<div class="content">
<pre><tt>mode toplevel</tt></pre>
<pre><code>mode toplevel</code></pre>
</div></div>
<div class="paragraph"><p>A item based waiton will start items in a test when the
same-named item is COMPLETED and PASS, CHECK or WAIVED
in the prior test</p></div>
<div class="listingblock">
<div class="content">
<pre><tt>mode itemmatch</tt></pre>
<pre><code>mode itemmatch</code></pre>
</div></div>
<div class="listingblock">
<div class="content">
<pre><tt># With a toplevel test you may wish to generate your list
<pre><code># With a toplevel test you may wish to generate your list
# of tests to run dynamically
#
# waiton #{shell get-valid-tests-to-run.sh}</tt></pre>
# waiton #{shell get-valid-tests-to-run.sh}</code></pre>
</div></div>
</div>
<div class="sect3">
<h4 id="_run_time_limit">Run time limit</h4>
<div class="listingblock">
<div class="content">
<pre><tt>runtimelim 1h 2m 3s  # this will automatically kill the test if it runs for more than 1h 2m and 3s</tt></pre>
<pre><code>runtimelim 1h 2m 3s  # this will automatically kill the test if it runs for more than 1h 2m and 3s</code></pre>
</div></div>
</div>
<div class="sect3">
<h4 id="_skip">Skip</h4>
</div>
<div class="sect3">
<h4 id="_header_3">Header</h4>
<div class="listingblock">
<div class="content">
<pre><tt>[skip]</tt></pre>
<pre><code>[skip]</code></pre>
</div></div>
</div>
<div class="sect3">
<h4 id="_skip_on_still_running_tests">Skip on Still-running Tests</h4>
<div class="listingblock">
<div class="content">
<pre><tt># NB// If the prevrunning line exists with *any* value the test will
<pre><code># NB// If the prevrunning line exists with *any* value the test will
# automatically SKIP if the same-named test is currently RUNNING

prevrunning x</tt></pre>
prevrunning x</code></pre>
</div></div>
</div>
<div class="sect3">
<h4 id="_skip_if_a_file_exists">Skip if a File Exists</h4>
<div class="listingblock">
<div class="content">
<pre><tt>fileexists /path/to/a/file # skip if /path/to/a/file exists</tt></pre>
<pre><code>fileexists /path/to/a/file # skip if /path/to/a/file exists</code></pre>
</div></div>
</div>
<div class="sect3">
<h4 id="_controlled_waiver_propagation">Controlled waiver propagation</h4>
<div class="paragraph"><p>If test is FAIL and previous test in run with same MT_TARGET is WAIVED then apply the following rules from the testconfig:
If a waiver check is specified in the testconfig apply the check and if it passes then set this FAIL to WAIVED</p></div>
<div class="paragraph"><p>Waiver check has two parts, 1) a list of waiver, rulename, filepatterns and 2) the rulename script spec (note that "diff" and "logpro" are predefined)</p></div>
<div class="listingblock">
<div class="content">
<pre><tt>###### EXAMPLE FROM testconfig #########
<pre><code>###### EXAMPLE FROM testconfig #########
# matching file(s) will be diff'd with previous run and logpro applied
# if PASS or WARN result from logpro then WAIVER state is set
#
[waivers]
# logpro_file    rulename      input_glob
waiver_1         logpro        lookittmp.log

[waiver_rules]

# This builtin rule is the default if there is no &lt;waivername&gt;.logpro file
# diff   diff %file1% %file2%

# This builtin rule is applied if a &lt;waivername&gt;.logpro file exists
# logpro diff %file1% %file2% | logpro %waivername%.logpro %waivername%.html</tt></pre>
# logpro diff %file1% %file2% | logpro %waivername%.logpro %waivername%.html</code></pre>
</div></div>
</div>
</div>
<div class="sect2">
<h3 id="_ezsteps">Ezsteps</h3>
<div class="paragraph"><p>To transfer the environment to the next step you can do the following:</p></div>
<div class="listingblock">
<div class="content">
<pre><tt>$MT_MEGATEST -env2file .ezsteps/${stepname}</tt></pre>
<pre><code>$MT_MEGATEST -env2file .ezsteps/${stepname}</code></pre>
</div></div>
</div>
<div class="sect2">
<h3 id="_triggers">Triggers</h3>
<div class="paragraph"><p>In your testconfig triggers can be specified</p></div>
<div class="listingblock">
<div class="content">
<pre><tt>[triggers]
<pre><code>[triggers]

# Call script running.sh when test goes to state=RUNNING, status=PASS
RUNNING/PASS running.sh

# Call script running.sh any time state goes to RUNNING
RUNNING/ running.sh

# Call script onpass.sh any time status goes to PASS
PASS/ onpass.sh</tt></pre>
PASS/ onpass.sh</code></pre>
</div></div>
<div class="paragraph"><p>Scripts called will have; test-id test-rundir trigger, added to the commandline.</p></div>
<div class="paragraph"><p>HINT</p></div>
<div class="paragraph"><p>To start an xterm (useful for debugging), use a command line like the following:</p></div>
<div class="listingblock">
<div class="content">
<pre><code>[triggers]
COMPLETED/ xterm -e bash -s --</code></pre>
</div></div>
<div class="admonitionblock">
<table><tr>
<td class="icon">
<div class="title">Note</div>
</td>
<td class="content">There is a trailing space after the --</td>
</tr></table>
</div>
</div>
<div class="sect2">
<h3 id="_megatest_internals">Megatest Internals</h3>
<div class="imageblock graphviz">
<div class="content">
<img src="server.png" alt="server.png" />
</div>
1261
1262
1263
1264
1265
1266
1267
1268

1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279

1280
1281
1282
1283
1284







-
+




</div>
</div>
</div>
<div id="footnotes"><hr /></div>
<div id="footer">
<div id="footer-text">
Version 1.0<br />
Last updated 2014-09-12 13:35:39 MST
Last updated 2014-10-08 23:02:21 MST
</div>
</div>
</body>
</html>

Modified docs/manual/server.png from [da39a3ee5e] to [a508d3edd1].

cannot compute difference between binary files

Modified http-transport.scm from [cf3cf50511] to [f3dd18aa3b].

254
255
256
257
258
259
260

261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279

280
281
282
283
284
285
286
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288







+



















+







	    exn
	    (debug:print 0 "WARNING: closing connections failed. Server at " fullurl " almost certainly dead")
	    (close-all-connections!))
	   (debug:print 0 "WARNING: Failed to communicate with server, trying again, numretries left: " numretries)
	   (http-transport:client-api-send-receive run-id serverdat cmd params numretries: (- numretries 1)))
	 (begin
	   (mutex-unlock! *http-mutex*)
	   (tasks:kill-server-run-id run-id)
	   #f))
     (begin
       (debug:print-info 11 "fullurl=" fullurl ", cmd=" cmd ", params=" params ", run-id=" run-id "\n")
       ;; set up the http-client here
       (max-retry-attempts 1)
       ;; consider all requests indempotent
       (retry-request? (lambda (request)
			 #f))
       ;; send the data and get the response
       ;; extract the needed info from the http data and 
       ;; process and return it.
       (let* ((send-recieve (lambda ()
			      (mutex-lock! *http-mutex*)
			      ;; (condition-case (with-input-from-request "http://localhost"; #f read-lines)
			      ;;					       ((exn http client-error) e (print e)))
			      (set! res (handle-exceptions
					 exn
					 (begin
					   (debug:print 0 "ERROR: failure in with-input-from-request. Giving up.")
					   (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn))
					   #f)
					 (with-input-from-request ;; was dat
					  fullurl 
					  (list (cons 'key "thekey")
						(cons 'cmd cmd)
						(cons 'params params))
					  read-string)))
534
535
536
537
538
539
540

541
542
543
544
545
546
547
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550







+







	  (thread-start! th2)
	  (thread-start! th3)
	  (set! *didsomething* #t)
	  (thread-join! th2)
	  (exit)))))

(define (http-transport:server-signal-handler signum)
  (signal-mask! signum)
  (handle-exceptions
   exn
   (debug:print " ... exiting ...")
   (let ((th1 (make-thread (lambda ()
			     (thread-sleep! 1))
			   "eat response"))
	 (th2 (make-thread (lambda ()

Modified launch.scm from [09c74e18be] to [1d44ba3939].

359
360
361
362
363
364
365

366


367


368
369
370




371
372
373
374
375
376
377
359
360
361
362
363
364
365
366

367
368
369
370
371



372
373
374
375
376
377
378
379
380
381
382







+
-
+
+

+
+
-
-
-
+
+
+
+







						    (pids (delete-duplicates (filter number? (list pid1 pid2)))))
					       (if (not (null? pids))
						   (begin
						     (for-each
						      (lambda (pid)
							(handle-exceptions
							 exn
							 (begin
							 (debug:print-info 0 "Unable to kill process with pid " pid ", possibly already killed.")
							   (debug:print-info 0 "Unable to kill process with pid " pid ", possibly already killed.")
							   (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn)))
							 (debug:print 0 "WARNING: Request received to kill job " pid) ;;  " (attempt # " kill-tries ")")
							 (if (process:alive? pid)
							     (begin
							 (process-signal pid signal/int)
							 (thread-sleep! 5)
							 (process-signal pid signal/kill)))
							       (process-signal pid signal/int)
							       (thread-sleep! 5)
							       (if (process:process-alive? pid)
								   (process-signal pid signal/kill))))))
						      pids)
						     (tests:test-set-status! run-id test-id "KILLED"  "KILLED" (args:get-arg "-m") #f))
						   (begin
						     (debug:print 0 "ERROR: Nothing to kill, pid1=" pid1 ", pid2=" pid2)
						     (tests:test-set-status! run-id test-id "KILLED"  "FAILED TO KILL" (args:get-arg "-m") #f)
						     )))
					     (mutex-unlock! m)
450
451
452
453
454
455
456

457
458
459
460
461
462
463
464
465

466


467
468
469
470
471
472
473
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472

473
474
475
476
477
478
479
480
481







+









+
-
+
+







	  (if linktree
	      (if (not (file-exists? linktree))
		  (begin
		    (handle-exceptions
		     exn
		     (begin
		       (debug:print 0 "ERROR: Something went wrong when trying to create linktree dir at " linktree)
		       (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn))
		       (exit 1))
		     (create-directory linktree #t))))
	      (begin
		(debug:print 0 "ERROR: linktree not defined in [setup] section of megatest.config")
		(exit 1)))
	  (if linktree
	      (let ((dbdir (conc linktree "/.db")))
		(handle-exceptions
		 exn
		 (begin
		 (debug:print 0 "ERROR: failed to create the " dbdir " area for your database files")
		   (debug:print 0 "ERROR: failed to create the " dbdir " area for your database files")
		   (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn)))
		 (if (not (directory-exists? dbdir))(create-directory dbdir)))
		(setenv "MT_LINKTREE" linktree))
	      (begin
		(debug:print 0 "ERROR: linktree is required in your megatest.config [setup] section")
		(exit 1)))
	  (if (and *toppath*
		   (directory-exists? *toppath*))

Modified lock-queue.scm from [0c7d16446b] to [31ed29958c].

34
35
36
37
38
39
40



41
42
43
44
45
46
47
48
49
50
51
52
53
54
55















56
57
58
59
60
61
62
63
64

65
66
67
68
69
70
71
72
73
74
75
76
77
78
79

80
81
82
83
84
85
86
34
35
36
37
38
39
40
41
42
43















44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91







+
+
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+









+















+







	  (handle-exceptions
	   exn
	   (begin
	     (thread-sleep! 10)
	     (if (> count 0)
		 (lock-queue:open-db fname count: (- count 1))
		 db))
	   (sqlite3:with-transaction
	    db
	    (lambda ()
	   (sqlite3:execute 
	    db
	    "CREATE TABLE IF NOT EXISTS queue (
  	      id         INTEGER PRIMARY KEY,
              test_id    INTEGER,
              start_time INTEGER,
              state      TEXT,
              CONSTRAINT queue_constraint UNIQUE (test_id));")
	   (sqlite3:execute
	    db
	    "CREATE TABLE IF NOT EXISTS runlocks (
              id         INTEGER PRIMARY KEY,
              test_id    INTEGER,
              run_lock   TEXT,
              CONSTRAINT runlock_constraint UNIQUE (run_lock));"))))
	      (sqlite3:execute 
	       db
	       "CREATE TABLE IF NOT EXISTS queue (
     	         id         INTEGER PRIMARY KEY,
                 test_id    INTEGER,
                 start_time INTEGER,
                 state      TEXT,
                 CONSTRAINT queue_constraint UNIQUE (test_id));")
	      (sqlite3:execute
	       db
	       "CREATE TABLE IF NOT EXISTS runlocks (
                 id         INTEGER PRIMARY KEY,
                 test_id    INTEGER,
                 run_lock   TEXT,
                 CONSTRAINT runlock_constraint UNIQUE (run_lock));"))))))
    (sqlite3:set-busy-handler! db handler)
    db))

(define (lock-queue:set-state db test-id newstate #!key (remtries 10))
  (handle-exceptions
   exn
   (if (> remtries 0)
       (begin
	 (debug:print 0 "WARNING: exception on lock-queue:set-state. Trying again in 30 seconds.")
	 (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn))
	 (thread-sleep! 30)
	 (lock-queue:set-state db test-id newstate remtries: (- remtries 1)))
       (begin
	 (debug:print 0 "ERROR:  Failed to set lock state for test with id " test-id ", error: " ((condition-property-accessor 'exn 'message) exn) ", giving up.")
	 #f))
   (sqlite3:execute db "UPDATE queue SET state=? WHERE test_id=?;"
		    newstate
		    test-id)))

(define (lock-queue:any-younger? db mystart test-id #!key (remtries 10))
  (handle-exceptions
   exn
   (if (> remtries 0)
       (begin
	 (debug:print 0 "WARNING: exception on lock-queue:any-younger. Trying again in 30 seconds.")
	 (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn))
	 (thread-sleep! 30)
	 (lock-queue:any-younger? db mystart test-id remtries: (- remtries 1)))
       (begin
	 (debug:print 0 "ERROR:  Failed to find younger locks for test with id " test-id ", error: " ((condition-property-accessor 'exn 'message) exn) ", giving up.")
	 #f))
   (let ((res #f))
     (sqlite3:for-each-row
96
97
98
99
100
101
102


103
104
105
106
107
108
109
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116







+
+







  (let ((res       #f)
	(lckqry    (sqlite3:prepare db "SELECT test_id,run_lock FROM runlocks WHERE run_lock='locked';"))
	(mklckqry  (sqlite3:prepare db "INSERT INTO runlocks (test_id,run_lock) VALUES (?,'locked');")))
    (let ((result 
	   (handle-exceptions
	    exn
	    (begin
	      (debug:print 0 "WARNING: failed to get queue lock. Will try again in a few seconds")
	      (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn))
	      (thread-sleep! 10)
	      (if (> count 0)
		  (lock-queue:get-lock db test-id count: (- count 1)))
	      #f)
	    (sqlite3:with-transaction
	     db
	     (lambda ()
123
124
125
126
127
128
129


130
131
132
133
134
135
136
137
138
139
140


141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157


158
159
160
161
162
163
164
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177







+
+











+
+

















+
+







      result)))

(define (lock-queue:release-lock fname test-id #!key (count 10))
  (let ((db (lock-queue:open-db fname)))
    (handle-exceptions
     exn
     (begin
       (debug:print 0 "WARNING: Failed to release queue lock. Will try again in few seconds")
       (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn))
       (thread-sleep! 10)
       (if (> count 0)
	   (lock-queue:release-lock fname test-id count: (- count 1))
	   #f))
     (sqlite3:execute db "DELETE FROM runlocks WHERE test_id=?;" test-id)
     (sqlite3:finalize! db))))

(define (lock-queue:steal-lock db test-id #!key (count 10))
  (handle-exceptions
   exn
   (begin
     (debug:print 0 "WARNING: Failed to steal queue lock. Will try again in few seconds")
     (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn))
     (thread-sleep! 10)
     (if (> count 0)
	 (lock-queue:steal-lock db test-id count: (- count 1))
	 #f))
   (sqlite3:execute db "DELETE FROM runlocks WHERE run_lock='locked';"))
  (lock-queue:get-lock db test-it))

;; returns #f if ok to skip the task
;; returns #t if ok to proceed with task
;; otherwise waits
;;
(define (lock-queue:wait-turn fname test-id #!key (count 10))
  (let ((db      (lock-queue:open-db fname))
	(mystart (current-seconds)))
    (handle-exceptions
     exn
     (begin
       (debug:print 0 "WARNING: Failed to find out if it is ok to skip the wait queue. Will try again in few seconds")
       (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn))
       (thread-sleep! 10)
       (if (> count 0)
	   (lock-queue:wait-turn fname test-id count: (- count 1))
	   #f))
     (sqlite3:execute
      db
      "INSERT OR REPLACE INTO queue (test_id,start_time,state) VALUES (?,?,'waiting');"

Modified process.scm from [88799f98f8] to [f9c6ebc5de].

123
124
125
126
127
128
129










123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139







+
+
+
+
+
+
+
+
+
+
		(res '()))
       (if (eof-object? inl)
	   (reverse res)
	   (let ((pid (string->number inl)))
	     (if proc (proc pid))
	     (loop (read-line) (cons pid res))))))))
       

(define (process:alive? pid)
  (handle-exceptions
   exn
   ;; possibly pid is a process not a child, look in /proc to see if it is running still
   (file-exists? (conc "/proc/" pid))
   (let-values (((rpid exit-type exit-signal)(process-wait pid #t)))
       (and (number? rpid)
	    (equal? rpid pid)))))
	 

Modified tasks.scm from [481c79cde8] to [9445a5df8c].

322
323
324
325
326
327
328

329



330













331
332
333
334
335
336
337
322
323
324
325
326
327
328
329

330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353







+
-
+
+
+

+
+
+
+
+
+
+
+
+
+
+
+
+







    res))

;; no elegance here ...
;;
(define (tasks:kill-server hostname pid)
  (debug:print-info 0 "Attempting to kill server process " pid " on host " hostname)
  (setenv "TARGETHOST" hostname)
  (setenv "TARGETHOST_LOGF" "server-kills.log")
  (system (conc "nbfake kill " pid)))
  (system (conc "nbfake kill " pid))
  (unsetenv "TARGETHOST_LOGF")
  (unsetenv "TARGETHOST"))
 
;; look up a server by run-id and send it a kill, also delete the record for that server
;;
(define (tasks:kill-server-run-id run-id)
  (let* ((tdb  (tasks:open-db))
	 (sdat (tasks:get-server mdb run-id)))
    (if sdat
	(let ((hostname (vector-ref sdat 6))
	      (pid      (vector-ref sdat 5)))
	  (debug:print-info 0 "Killing server for run-id " run-id " on host " hostname " with pid " pid)
	  (tasks:kill-server hostname pid)
	  (tasks:server-delete-record mdb server-id tag) )
	(debug:print-info 0 "No server found for run-id " run-id ", nothing to kill"))))
    
;;   (if status ;; #t means alive
;;       (begin
;; 	(if (equal? hostname (get-host-name))
;; 	    (handle-exceptions
;; 	     exn
;; 	     (debug:print-info 0 "server may or may not be dead, check for megatest -server running as pid " pid "\n"
;; 			       "  EXCEPTION: " ((condition-property-accessor 'exn 'message) exn))