Megatest

Check-in [8fd7d261b7]
Login
Overview
Comment:Merged streamline exception handling branch into v1.60
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | v1.60
Files: files | file ages | folders
SHA1: 8fd7d261b754dd44ff490851e1bffdf421c3771c
User & Date: matt on 2014-11-02 09:25:32
Other Links: branch diff | manifest | tags
Context
2014-11-02
11:09
Speed up exit by moving delays around in the watchdog check-in: 0ca08ed334 user: mrwellan tags: v1.60
09:26
Merged v1.60 into trunk check-in: cf15dc19dc user: matt tags: trunk
09:25
Merged streamline exception handling branch into v1.60 check-in: 8fd7d261b7 user: matt tags: v1.60
09:24
stopped calls to set state and status where the test-id was not known (test not actually registered yet). Added safety net to some calls to print call chain if called with bad data, moved couple mutexes around to prevent contention Closed-Leaf check-in: 39d1713c91 user: matt tags: streamline-exception-handling
2014-10-29
17:29
Fixed nbfake variable handling - was causing log files disappearing check-in: 9926990b7a user: mrwellan tags: v1.60, v1.6005_ww44.5
Changes

Modified client.scm from [a5253ced0b] to [dc8b2be6ad].

124
125
126
127
128
129
130

131
132
133
134
135
136
137
;; keep this as a function to ease future 
(define (client:start run-id server-info)
  (http-transport:client-connect (tasks:hostinfo-get-interface server-info)
				 (tasks:hostinfo-get-port server-info)))

;; client:signal-handler
(define (client:signal-handler signum)

  (handle-exceptions
   exn
   (debug:print " ... exiting ...")
   (let ((th1 (make-thread (lambda ()
			     "") ;; do nothing for now (was flush out last call if applicable)
			   "eat response"))
	 (th2 (make-thread (lambda ()







>







124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
;; keep this as a function to ease future 
(define (client:start run-id server-info)
  (http-transport:client-connect (tasks:hostinfo-get-interface server-info)
				 (tasks:hostinfo-get-port server-info)))

;; client:signal-handler
(define (client:signal-handler signum)
  (signal-mask! signum)
  (handle-exceptions
   exn
   (debug:print " ... exiting ...")
   (let ((th1 (make-thread (lambda ()
			     "") ;; do nothing for now (was flush out last call if applicable)
			   "eat response"))
	 (th2 (make-thread (lambda ()

Modified common.scm from [a2276ff658] to [51076419ec].

154
155
156
157
158
159
160






161
162
163
164
165
166
167
168

(define (common:get-megatest-exe)
  (if (getenv "MT_MEGATEST") (getenv "MT_MEGATEST") "megatest"))

(define (common:read-encoded-string instr)
  (handle-exceptions
   exn






   (read (open-input-string (base64:base64-decode instr)))
   (read (open-input-string (z3:decode-buffer (base64:base64-decode instr))))))

;;======================================================================
;; S T A T E S   A N D   S T A T U S E S
;;======================================================================

(define *common:std-states*   







>
>
>
>
>
>
|







154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174

(define (common:get-megatest-exe)
  (if (getenv "MT_MEGATEST") (getenv "MT_MEGATEST") "megatest"))

(define (common:read-encoded-string instr)
  (handle-exceptions
   exn
   (handle-exceptions
    exn
    (begin
      (debug:print 0 "ERROR: received bad encoded string \"" instr "\", message: " ((condition-property-accessor 'exn 'message) exn))
      (print-call-chain)
      #f)
    (read (open-input-string (base64:base64-decode instr))))
   (read (open-input-string (z3:decode-buffer (base64:base64-decode instr))))))

;;======================================================================
;; S T A T E S   A N D   S T A T U S E S
;;======================================================================

(define *common:std-states*   
548
549
550
551
552
553
554











555
556
557
558
559
560
561
(define (seconds->year-work-week/day sec)
  (time->string
   (seconds->local-time sec) "%yww%V.%w"))

(define (seconds->year-work-week/day-time sec)
  (time->string
   (seconds->local-time sec) "%yww%V.%w %H:%M"))












;;======================================================================
;; Colors
;;======================================================================
      
(define (common:name->iup-color name)
  (case (string->symbol (string-downcase name))







>
>
>
>
>
>
>
>
>
>
>







554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
(define (seconds->year-work-week/day sec)
  (time->string
   (seconds->local-time sec) "%yww%V.%w"))

(define (seconds->year-work-week/day-time sec)
  (time->string
   (seconds->local-time sec) "%yww%V.%w %H:%M"))

(define (seconds->quarter sec)
  (case (string->number
	 (time->string 
	  (seconds->local-time sec)
	  "%m"))
    ((1 2 3) 1)
    ((4 5 6) 2)
    ((7 8 9) 3)
    ((10 11 12) 4)
    (else #f)))

;;======================================================================
;; Colors
;;======================================================================
      
(define (common:name->iup-color name)
  (case (string->symbol (string-downcase name))

Modified dashboard-tests.scm from [701f564f5d] to [22f7e05798].

559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
							    (> (current-milliseconds)(+ last-update 250))) ;; every half seconds if db touched
						       (> (current-milliseconds)(+ last-update 10000))     ;; force update even 10 seconds
						       request-update))
				    (newtestdat (if need-update 
						    ;; NOTE: BUG HIDER, try to eliminate this exception handler
						    (handle-exceptions
						     exn 
						     (debug:print-info 0 "test db access issue: " ((condition-property-accessor 'exn 'message) exn))
						     (db:get-test-info-by-id dbstruct run-id test-id )))))
			       ;; (debug:print-info 0 "need-update= " need-update " curr-mod-time = " curr-mod-time)
			       (cond
				((and need-update newtestdat)
				 (set! testdat newtestdat)
				 (set! teststeps    (dashboard-tests:get-compressed-steps dbstruct run-id test-id))
				 (set! logfile      (conc (db:test-get-rundir testdat) "/" (db:test-get-final_logf testdat)))







|







559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
							    (> (current-milliseconds)(+ last-update 250))) ;; every half seconds if db touched
						       (> (current-milliseconds)(+ last-update 10000))     ;; force update even 10 seconds
						       request-update))
				    (newtestdat (if need-update 
						    ;; NOTE: BUG HIDER, try to eliminate this exception handler
						    (handle-exceptions
						     exn 
						     (debug:print-info 0 "test db access issue in examine test for run-id " run-id ", test-id " test-id ": " ((condition-property-accessor 'exn 'message) exn))
						     (db:get-test-info-by-id dbstruct run-id test-id )))))
			       ;; (debug:print-info 0 "need-update= " need-update " curr-mod-time = " curr-mod-time)
			       (cond
				((and need-update newtestdat)
				 (set! testdat newtestdat)
				 (set! teststeps    (dashboard-tests:get-compressed-steps dbstruct run-id test-id))
				 (set! logfile      (conc (db:test-get-rundir testdat) "/" (db:test-get-final_logf testdat)))

Modified dashboard.scm from [7792f161eb] to [588f9fa302].

1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
(let ((db (tasks:open-db)))
  (sqlite3:finalize! db))

(define (dashboard:get-youngest-run-db-mod-time)
  (handle-exceptions
   exn
   (begin
     (debug:print 0 "WARNING: error in accessing databases: " ((condition-property-accessor 'exn 'message) exn))
     (current-seconds)) ;; something went wrong - just print an error and return current-seconds
   (apply max (map (lambda (filen)
		     (file-modification-time filen))
		   (glob (conc *dbdir* "/*.db"))))))

(define (dashboard:run-update x)
  (let* ((modtime         (dashboard:get-youngest-run-db-mod-time)) ;; (file-modification-time *db-file-path*))







|







1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
(let ((db (tasks:open-db)))
  (sqlite3:finalize! db))

(define (dashboard:get-youngest-run-db-mod-time)
  (handle-exceptions
   exn
   (begin
     (debug:print 0 "WARNING: error in accessing databases in get-youngest-run-db-mod-time: " ((condition-property-accessor 'exn 'message) exn))
     (current-seconds)) ;; something went wrong - just print an error and return current-seconds
   (apply max (map (lambda (filen)
		     (file-modification-time filen))
		   (glob (conc *dbdir* "/*.db"))))))

(define (dashboard:run-update x)
  (let* ((modtime         (dashboard:get-youngest-run-db-mod-time)) ;; (file-modification-time *db-file-path*))

Modified db.scm from [1e343e9345] to [d5f00a0358].

294
295
296
297
298
299
300

301




302
303
304
305
306
307
308
		 (sqlite3:finalize! db #t))))
	 (hash-table-values (dbr:dbstruct-get-locdbs dbstruct))))
    (thread-sleep! 3)
    (if (and rundb
	     (sqlite3:database? rundb))
	(handle-exceptions
	 exn

	 #t ;; (debug:print 0 "WARNING: database files may not have been closed correctly. Consider running -cleanup-db")




	 (sqlite3:interrupt! rundb)
	 (sqlite3:finalize! rundb #t))))
  ;; (mutex-unlock! *db-sync-mutex*)
  )

(define (db:open-inmem-db)
  (let* ((db      (sqlite3:open-database ":memory:"))







>
|
>
>
>
>







294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
		 (sqlite3:finalize! db #t))))
	 (hash-table-values (dbr:dbstruct-get-locdbs dbstruct))))
    (thread-sleep! 3)
    (if (and rundb
	     (sqlite3:database? rundb))
	(handle-exceptions
	 exn
	 (begin 
	   (debug:print 0 "WARNING: database files may not have been closed correctly. Consider running -cleanup-db")
	   (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn))
	   (debug:print 0 " db: " rundb)
	   (print-call-chain)
	   #f)
	 (sqlite3:interrupt! rundb)
	 (sqlite3:finalize! rundb #t))))
  ;; (mutex-unlock! *db-sync-mutex*)
  )

(define (db:open-inmem-db)
  (let* ((db      (sqlite3:open-database ":memory:"))
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
  (handle-exceptions
   exn
   (let ((err-status ((condition-property-accessor 'sqlite3 'status #f) exn)))
     ;; check for (exn sqlite3) ((condition-property-accessor 'exn 'message) exn)
     (if (eq? err-status 'done)
	 default
	 (begin
	   (debug:print 0 "ERROR:  query " stmt " failed " ((condition-property-accessor 'exn 'message) exn))
	   (print-call-chain)
	   default)))
   (apply sqlite3:first-result db stmt params)))

;;======================================================================
;; Extract ods file from the db
;;======================================================================







|







2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
  (handle-exceptions
   exn
   (let ((err-status ((condition-property-accessor 'sqlite3 'status #f) exn)))
     ;; check for (exn sqlite3) ((condition-property-accessor 'exn 'message) exn)
     (if (eq? err-status 'done)
	 default
	 (begin
	   (debug:print 0 "ERROR:  query " stmt " failed, params: " params ", error: " ((condition-property-accessor 'exn 'message) exn))
	   (print-call-chain)
	   default)))
   (apply sqlite3:first-result db stmt params)))

;;======================================================================
;; Extract ods file from the db
;;======================================================================

Modified docs/manual/megatest_manual.html from [ec93b0a8a8] to [191f1255c5].

1
2
3
4
5
6
7
8
9
10
11
12
13
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
    "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
<head>
<meta http-equiv="Content-Type" content="application/xhtml+xml; charset=UTF-8" />
<meta name="generator" content="AsciiDoc 8.6.7" />
<title>The Megatest Users Manual</title>
<style type="text/css">
/* Shared CSS for AsciiDoc xhtml11 and html5 backends */

/* Default font. */
body {
  font-family: Georgia,serif;





|







1
2
3
4
5
6
7
8
9
10
11
12
13
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
    "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
<head>
<meta http-equiv="Content-Type" content="application/xhtml+xml; charset=UTF-8" />
<meta name="generator" content="AsciiDoc 8.6.9" />
<title>The Megatest Users Manual</title>
<style type="text/css">
/* Shared CSS for AsciiDoc xhtml11 and html5 backends */

/* Default font. */
body {
  font-family: Georgia,serif;
83
84
85
86
87
88
89
90



91
92
93



94
95
96
97
98
99
100

ul, ol, li > p {
  margin-top: 0;
}
ul > li     { color: #aaa; }
ul > li > * { color: black; }

pre {



  padding: 0;
  margin: 0;
}




#author {
  color: #527bbd;
  font-weight: bold;
  font-size: 1.1em;
}
#email {







|
>
>
>



>
>
>







83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106

ul, ol, li > p {
  margin-top: 0;
}
ul > li     { color: #aaa; }
ul > li > * { color: black; }

.monospaced, code, pre {
  font-family: "Courier New", Courier, monospace;
  font-size: inherit;
  color: navy;
  padding: 0;
  margin: 0;
}
pre {
  white-space: pre-wrap;
}

#author {
  color: #527bbd;
  font-weight: bold;
  font-size: 1.1em;
}
#email {
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229

div.exampleblock > div.content {
  border-left: 3px solid #dddddd;
  padding-left: 0.5em;
}

div.imageblock div.content { padding-left: 0; }
span.image img { border-style: none; }
a.image:visited { color: white; }

dl {
  margin-top: 0.8em;
  margin-bottom: 0.8em;
}
dt {







|







221
222
223
224
225
226
227
228
229
230
231
232
233
234
235

div.exampleblock > div.content {
  border-left: 3px solid #dddddd;
  padding-left: 0.5em;
}

div.imageblock div.content { padding-left: 0; }
span.image img { border-style: none; vertical-align: text-bottom; }
a.image:visited { color: white; }

dl {
  margin-top: 0.8em;
  margin-bottom: 0.8em;
}
dt {
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430


/*
 * xhtml11 specific
 *
 * */

tt {
  font-family: "Courier New", Courier, monospace;
  font-size: inherit;
  color: navy;
}

div.tableblock {
  margin-top: 1.0em;
  margin-bottom: 1.5em;
}
div.tableblock > table {
  border: 3px solid #527bbd;
}







<
<
<
<
<
<







417
418
419
420
421
422
423






424
425
426
427
428
429
430


/*
 * xhtml11 specific
 *
 * */







div.tableblock {
  margin-top: 1.0em;
  margin-bottom: 1.5em;
}
div.tableblock > table {
  border: 3px solid #527bbd;
}
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469


/*
 * html5 specific
 *
 * */

.monospaced {
  font-family: "Courier New", Courier, monospace;
  font-size: inherit;
  color: navy;
}

table.tableblock {
  margin-top: 1.0em;
  margin-bottom: 1.5em;
}
thead, p.tableblock.header {
  font-weight: bold;
  color: #527bbd;







<
<
<
<
<
<







450
451
452
453
454
455
456






457
458
459
460
461
462
463


/*
 * html5 specific
 *
 * */







table.tableblock {
  margin-top: 1.0em;
  margin-bottom: 1.5em;
}
thead, p.tableblock.header {
  font-weight: bold;
  color: #527bbd;
535
536
537
538
539
540
541


542
543
544
545
546
547
548
body.manpage div.sectionbody {
  margin-left: 3em;
}

@media print {
  body.manpage div#toc { display: none; }
}


</style>
<script type="text/javascript">
/*<![CDATA[*/
var asciidoc = {  // Namespace.

/////////////////////////////////////////////////////////////////////
// Table Of Contents generator







>
>







529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
body.manpage div.sectionbody {
  margin-left: 3em;
}

@media print {
  body.manpage div#toc { display: none; }
}


</style>
<script type="text/javascript">
/*<![CDATA[*/
var asciidoc = {  // Namespace.

/////////////////////////////////////////////////////////////////////
// Table Of Contents generator
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
/*]]>*/
</script>
</head>
<body class="book">
<div id="header">
<h1>The Megatest Users Manual</h1>
<span id="author">Matt Welland</span><br />
<span id="email"><tt>&lt;<a href="mailto:matt@kiatoa.com">matt@kiatoa.com</a>&gt;</tt></span><br />
<span id="revnumber">version 1.0,</span>
<span id="revdate">April 2012</span>
</div>
<div id="content">
<div class="sect1">
<h2 id="_preface">Preface</h2>
<div class="sectionbody">







|







731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
/*]]>*/
</script>
</head>
<body class="book">
<div id="header">
<h1>The Megatest Users Manual</h1>
<span id="author">Matt Welland</span><br />
<span id="email"><code>&lt;<a href="mailto:matt@kiatoa.com">matt@kiatoa.com</a>&gt;</code></span><br />
<span id="revnumber">version 1.0,</span>
<span id="revdate">April 2012</span>
</div>
<div id="content">
<div class="sect1">
<h2 id="_preface">Preface</h2>
<div class="sectionbody">
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
<div class="sect1">
<h2 id="_limiting_your_running_jobs">Limiting your running jobs</h2>
<div class="sectionbody">
<div class="paragraph"><p>The following example will limit a test in the jobgroup "group1" to no more than 10 tests simultaneously.</p></div>
<div class="paragraph"><p>In your testconfig:</p></div>
<div class="listingblock">
<div class="content">
<pre><tt>[test_meta]
jobgroup group1</tt></pre>
</div></div>
<div class="paragraph"><p>In your megatest.config:</p></div>
<div class="listingblock">
<div class="content">
<pre><tt>[jobgroups]
group1 10
custdes 4</tt></pre>
</div></div>
</div>
</div>
<div class="sect1">
<h2 id="_debugging_tricks">Debugging Tricks</h2>
<div class="sectionbody">
<div class="sect2">
<h3 id="_examining_the_environment">Examining The Environment</h3>
<div class="sect3">
<h4 id="_during_config_file_processing">During Config File Processing</h4>
</div>
<div class="sect3">
<h4 id="_organising_your_tests_and_tasks">Organising Your Tests and Tasks</h4>
<div class="listingblock">
<div class="content">
<pre><tt>[tests-paths]
1 #{get misc parent}/simplerun/tests</tt></pre>
</div></div>
<div class="listingblock">
<div class="content">
<pre><tt>[setup]</tt></pre>
</div></div>
<div class="paragraph"><p>The runscript method is a brute force way to run scripts where the
user is responsible for setting STATE and STATUS</p></div>
<div class="listingblock">
<div class="content">
<pre><tt>runscript main.csh</tt></pre>
</div></div>
</div>
</div>
<div class="sect2">
<h3 id="_debugging_server_problems">Debugging Server Problems</h3>
<div class="listingblock">
<div class="content">
<pre><tt>sudo lsof -i
sudo netstat -lptu
sudo netstat -tulpn</tt></pre>
</div></div>
</div>
</div>
</div>
<h1 id="_reference">Reference</h1>
<div class="sect1">
<h2 id="_the_first_chapter_of_the_second_part_2">The First Chapter of the Second Part</h2>







|
|




|

|















|
|



|





|







|

|







959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
<div class="sect1">
<h2 id="_limiting_your_running_jobs">Limiting your running jobs</h2>
<div class="sectionbody">
<div class="paragraph"><p>The following example will limit a test in the jobgroup "group1" to no more than 10 tests simultaneously.</p></div>
<div class="paragraph"><p>In your testconfig:</p></div>
<div class="listingblock">
<div class="content">
<pre><code>[test_meta]
jobgroup group1</code></pre>
</div></div>
<div class="paragraph"><p>In your megatest.config:</p></div>
<div class="listingblock">
<div class="content">
<pre><code>[jobgroups]
group1 10
custdes 4</code></pre>
</div></div>
</div>
</div>
<div class="sect1">
<h2 id="_debugging_tricks">Debugging Tricks</h2>
<div class="sectionbody">
<div class="sect2">
<h3 id="_examining_the_environment">Examining The Environment</h3>
<div class="sect3">
<h4 id="_during_config_file_processing">During Config File Processing</h4>
</div>
<div class="sect3">
<h4 id="_organising_your_tests_and_tasks">Organising Your Tests and Tasks</h4>
<div class="listingblock">
<div class="content">
<pre><code>[tests-paths]
1 #{get misc parent}/simplerun/tests</code></pre>
</div></div>
<div class="listingblock">
<div class="content">
<pre><code>[setup]</code></pre>
</div></div>
<div class="paragraph"><p>The runscript method is a brute force way to run scripts where the
user is responsible for setting STATE and STATUS</p></div>
<div class="listingblock">
<div class="content">
<pre><code>runscript main.csh</code></pre>
</div></div>
</div>
</div>
<div class="sect2">
<h3 id="_debugging_server_problems">Debugging Server Problems</h3>
<div class="listingblock">
<div class="content">
<pre><code>sudo lsof -i
sudo netstat -lptu
sudo netstat -tulpn</code></pre>
</div></div>
</div>
</div>
</div>
<h1 id="_reference">Reference</h1>
<div class="sect1">
<h2 id="_the_first_chapter_of_the_second_part_2">The First Chapter of the Second Part</h2>
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
















1176
1177
1178
1179
1180
1181
1182
<div class="sectionbody">
<div class="sect2">
<h3 id="_setup_section">Setup section</h3>
<div class="sect3">
<h4 id="_header">Header</h4>
<div class="listingblock">
<div class="content">
<pre><tt>[setup]</tt></pre>
</div></div>
<div class="paragraph"><p>The runscript method is a brute force way to run scripts where the
user is responsible for setting STATE and STATUS</p></div>
<div class="listingblock">
<div class="content">
<pre><tt>runscript main.csh</tt></pre>
</div></div>
</div>
</div>
<div class="sect2">
<h3 id="_requirements_section">Requirements section</h3>
<div class="sect3">
<h4 id="_header_2">Header</h4>
<div class="listingblock">
<div class="content">
<pre><tt>[requirements]</tt></pre>
</div></div>
</div>
<div class="sect3">
<h4 id="_wait_on_other_tests">Wait on Other Tests</h4>
<div class="listingblock">
<div class="content">
<pre><tt># A normal waiton waits for the prior tests to be COMPLETED
# and PASS, CHECK or WAIVED
waiton test1 test2</tt></pre>
</div></div>
</div>
<div class="sect3">
<h4 id="_mode">Mode</h4>
<div class="paragraph"><p>The default (i.e. if mode is not specified) is normal. All pre-dependent tests
must be COMPLETED and PASS, CHECK or WAIVED before the test will start</p></div>
<div class="listingblock">
<div class="content">
<pre><tt>mode   normal</tt></pre>
</div></div>
<div class="paragraph"><p>The toplevel mode requires only that the prior tests are COMPLETED.</p></div>
<div class="listingblock">
<div class="content">
<pre><tt>mode toplevel</tt></pre>
</div></div>
<div class="paragraph"><p>A item based waiton will start items in a test when the
same-named item is COMPLETED and PASS, CHECK or WAIVED
in the prior test</p></div>
<div class="listingblock">
<div class="content">
<pre><tt>mode itemmatch</tt></pre>
</div></div>
<div class="listingblock">
<div class="content">
<pre><tt># With a toplevel test you may wish to generate your list
# of tests to run dynamically
#
# waiton #{shell get-valid-tests-to-run.sh}</tt></pre>
</div></div>
</div>
<div class="sect3">
<h4 id="_run_time_limit">Run time limit</h4>
<div class="listingblock">
<div class="content">
<pre><tt>runtimelim 1h 2m 3s  # this will automatically kill the test if it runs for more than 1h 2m and 3s</tt></pre>
</div></div>
</div>
<div class="sect3">
<h4 id="_skip">Skip</h4>
</div>
<div class="sect3">
<h4 id="_header_3">Header</h4>
<div class="listingblock">
<div class="content">
<pre><tt>[skip]</tt></pre>
</div></div>
</div>
<div class="sect3">
<h4 id="_skip_on_still_running_tests">Skip on Still-running Tests</h4>
<div class="listingblock">
<div class="content">
<pre><tt># NB// If the prevrunning line exists with *any* value the test will
# automatically SKIP if the same-named test is currently RUNNING

prevrunning x</tt></pre>
</div></div>
</div>
<div class="sect3">
<h4 id="_skip_if_a_file_exists">Skip if a File Exists</h4>
<div class="listingblock">
<div class="content">
<pre><tt>fileexists /path/to/a/file # skip if /path/to/a/file exists</tt></pre>
</div></div>
</div>
<div class="sect3">
<h4 id="_controlled_waiver_propagation">Controlled waiver propagation</h4>
<div class="paragraph"><p>If test is FAIL and previous test in run with same MT_TARGET is WAIVED then apply the following rules from the testconfig:
If a waiver check is specified in the testconfig apply the check and if it passes then set this FAIL to WAIVED</p></div>
<div class="paragraph"><p>Waiver check has two parts, 1) a list of waiver, rulename, filepatterns and 2) the rulename script spec (note that "diff" and "logpro" are predefined)</p></div>
<div class="listingblock">
<div class="content">
<pre><tt>###### EXAMPLE FROM testconfig #########
# matching file(s) will be diff'd with previous run and logpro applied
# if PASS or WARN result from logpro then WAIVER state is set
#
[waivers]
# logpro_file    rulename      input_glob
waiver_1         logpro        lookittmp.log

[waiver_rules]

# This builtin rule is the default if there is no &lt;waivername&gt;.logpro file
# diff   diff %file1% %file2%

# This builtin rule is applied if a &lt;waivername&gt;.logpro file exists
# logpro diff %file1% %file2% | logpro %waivername%.logpro %waivername%.html</tt></pre>
</div></div>
</div>
</div>
<div class="sect2">
<h3 id="_ezsteps">Ezsteps</h3>
<div class="paragraph"><p>To transfer the environment to the next step you can do the following:</p></div>
<div class="listingblock">
<div class="content">
<pre><tt>$MT_MEGATEST -env2file .ezsteps/${stepname}</tt></pre>
</div></div>
</div>
<div class="sect2">
<h3 id="_triggers">Triggers</h3>
<div class="paragraph"><p>In your testconfig triggers can be specified</p></div>
<div class="listingblock">
<div class="content">
<pre><tt>[triggers]

# Call script running.sh when test goes to state=RUNNING, status=PASS
RUNNING/PASS running.sh

# Call script running.sh any time state goes to RUNNING
RUNNING/ running.sh

# Call script onpass.sh any time status goes to PASS
PASS/ onpass.sh</tt></pre>
</div></div>
















</div>
<div class="sect2">
<h3 id="_megatest_internals">Megatest Internals</h3>
<div class="imageblock graphviz">
<div class="content">
<img src="server.png" alt="server.png" />
</div>







|





|









|






|

|








|




|






|



|


|






|









|






|


|






|









|













|








|







|








|

>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
<div class="sectionbody">
<div class="sect2">
<h3 id="_setup_section">Setup section</h3>
<div class="sect3">
<h4 id="_header">Header</h4>
<div class="listingblock">
<div class="content">
<pre><code>[setup]</code></pre>
</div></div>
<div class="paragraph"><p>The runscript method is a brute force way to run scripts where the
user is responsible for setting STATE and STATUS</p></div>
<div class="listingblock">
<div class="content">
<pre><code>runscript main.csh</code></pre>
</div></div>
</div>
</div>
<div class="sect2">
<h3 id="_requirements_section">Requirements section</h3>
<div class="sect3">
<h4 id="_header_2">Header</h4>
<div class="listingblock">
<div class="content">
<pre><code>[requirements]</code></pre>
</div></div>
</div>
<div class="sect3">
<h4 id="_wait_on_other_tests">Wait on Other Tests</h4>
<div class="listingblock">
<div class="content">
<pre><code># A normal waiton waits for the prior tests to be COMPLETED
# and PASS, CHECK or WAIVED
waiton test1 test2</code></pre>
</div></div>
</div>
<div class="sect3">
<h4 id="_mode">Mode</h4>
<div class="paragraph"><p>The default (i.e. if mode is not specified) is normal. All pre-dependent tests
must be COMPLETED and PASS, CHECK or WAIVED before the test will start</p></div>
<div class="listingblock">
<div class="content">
<pre><code>mode   normal</code></pre>
</div></div>
<div class="paragraph"><p>The toplevel mode requires only that the prior tests are COMPLETED.</p></div>
<div class="listingblock">
<div class="content">
<pre><code>mode toplevel</code></pre>
</div></div>
<div class="paragraph"><p>A item based waiton will start items in a test when the
same-named item is COMPLETED and PASS, CHECK or WAIVED
in the prior test</p></div>
<div class="listingblock">
<div class="content">
<pre><code>mode itemmatch</code></pre>
</div></div>
<div class="listingblock">
<div class="content">
<pre><code># With a toplevel test you may wish to generate your list
# of tests to run dynamically
#
# waiton #{shell get-valid-tests-to-run.sh}</code></pre>
</div></div>
</div>
<div class="sect3">
<h4 id="_run_time_limit">Run time limit</h4>
<div class="listingblock">
<div class="content">
<pre><code>runtimelim 1h 2m 3s  # this will automatically kill the test if it runs for more than 1h 2m and 3s</code></pre>
</div></div>
</div>
<div class="sect3">
<h4 id="_skip">Skip</h4>
</div>
<div class="sect3">
<h4 id="_header_3">Header</h4>
<div class="listingblock">
<div class="content">
<pre><code>[skip]</code></pre>
</div></div>
</div>
<div class="sect3">
<h4 id="_skip_on_still_running_tests">Skip on Still-running Tests</h4>
<div class="listingblock">
<div class="content">
<pre><code># NB// If the prevrunning line exists with *any* value the test will
# automatically SKIP if the same-named test is currently RUNNING

prevrunning x</code></pre>
</div></div>
</div>
<div class="sect3">
<h4 id="_skip_if_a_file_exists">Skip if a File Exists</h4>
<div class="listingblock">
<div class="content">
<pre><code>fileexists /path/to/a/file # skip if /path/to/a/file exists</code></pre>
</div></div>
</div>
<div class="sect3">
<h4 id="_controlled_waiver_propagation">Controlled waiver propagation</h4>
<div class="paragraph"><p>If test is FAIL and previous test in run with same MT_TARGET is WAIVED then apply the following rules from the testconfig:
If a waiver check is specified in the testconfig apply the check and if it passes then set this FAIL to WAIVED</p></div>
<div class="paragraph"><p>Waiver check has two parts, 1) a list of waiver, rulename, filepatterns and 2) the rulename script spec (note that "diff" and "logpro" are predefined)</p></div>
<div class="listingblock">
<div class="content">
<pre><code>###### EXAMPLE FROM testconfig #########
# matching file(s) will be diff'd with previous run and logpro applied
# if PASS or WARN result from logpro then WAIVER state is set
#
[waivers]
# logpro_file    rulename      input_glob
waiver_1         logpro        lookittmp.log

[waiver_rules]

# This builtin rule is the default if there is no &lt;waivername&gt;.logpro file
# diff   diff %file1% %file2%

# This builtin rule is applied if a &lt;waivername&gt;.logpro file exists
# logpro diff %file1% %file2% | logpro %waivername%.logpro %waivername%.html</code></pre>
</div></div>
</div>
</div>
<div class="sect2">
<h3 id="_ezsteps">Ezsteps</h3>
<div class="paragraph"><p>To transfer the environment to the next step you can do the following:</p></div>
<div class="listingblock">
<div class="content">
<pre><code>$MT_MEGATEST -env2file .ezsteps/${stepname}</code></pre>
</div></div>
</div>
<div class="sect2">
<h3 id="_triggers">Triggers</h3>
<div class="paragraph"><p>In your testconfig triggers can be specified</p></div>
<div class="listingblock">
<div class="content">
<pre><code>[triggers]

# Call script running.sh when test goes to state=RUNNING, status=PASS
RUNNING/PASS running.sh

# Call script running.sh any time state goes to RUNNING
RUNNING/ running.sh

# Call script onpass.sh any time status goes to PASS
PASS/ onpass.sh</code></pre>
</div></div>
<div class="paragraph"><p>Scripts called will have; test-id test-rundir trigger, added to the commandline.</p></div>
<div class="paragraph"><p>HINT</p></div>
<div class="paragraph"><p>To start an xterm (useful for debugging), use a command line like the following:</p></div>
<div class="listingblock">
<div class="content">
<pre><code>[triggers]
COMPLETED/ xterm -e bash -s --</code></pre>
</div></div>
<div class="admonitionblock">
<table><tr>
<td class="icon">
<div class="title">Note</div>
</td>
<td class="content">There is a trailing space after the --</td>
</tr></table>
</div>
</div>
<div class="sect2">
<h3 id="_megatest_internals">Megatest Internals</h3>
<div class="imageblock graphviz">
<div class="content">
<img src="server.png" alt="server.png" />
</div>
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
</div>
</div>
</div>
<div id="footnotes"><hr /></div>
<div id="footer">
<div id="footer-text">
Version 1.0<br />
Last updated 2014-09-12 13:35:39 MST
</div>
</div>
</body>
</html>







|




1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
</div>
</div>
</div>
<div id="footnotes"><hr /></div>
<div id="footer">
<div id="footer-text">
Version 1.0<br />
Last updated 2014-10-08 23:02:21 MST
</div>
</div>
</body>
</html>

Modified docs/manual/server.png from [da39a3ee5e] to [a508d3edd1].

cannot compute difference between binary files

Modified http-transport.scm from [cf3cf50511] to [f3dd18aa3b].

254
255
256
257
258
259
260

261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279

280
281
282
283
284
285
286
	    exn
	    (debug:print 0 "WARNING: closing connections failed. Server at " fullurl " almost certainly dead")
	    (close-all-connections!))
	   (debug:print 0 "WARNING: Failed to communicate with server, trying again, numretries left: " numretries)
	   (http-transport:client-api-send-receive run-id serverdat cmd params numretries: (- numretries 1)))
	 (begin
	   (mutex-unlock! *http-mutex*)

	   #f))
     (begin
       (debug:print-info 11 "fullurl=" fullurl ", cmd=" cmd ", params=" params ", run-id=" run-id "\n")
       ;; set up the http-client here
       (max-retry-attempts 1)
       ;; consider all requests indempotent
       (retry-request? (lambda (request)
			 #f))
       ;; send the data and get the response
       ;; extract the needed info from the http data and 
       ;; process and return it.
       (let* ((send-recieve (lambda ()
			      (mutex-lock! *http-mutex*)
			      ;; (condition-case (with-input-from-request "http://localhost"; #f read-lines)
			      ;;					       ((exn http client-error) e (print e)))
			      (set! res (handle-exceptions
					 exn
					 (begin
					   (debug:print 0 "ERROR: failure in with-input-from-request. Giving up.")

					   #f)
					 (with-input-from-request ;; was dat
					  fullurl 
					  (list (cons 'key "thekey")
						(cons 'cmd cmd)
						(cons 'params params))
					  read-string)))







>



















>







254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
	    exn
	    (debug:print 0 "WARNING: closing connections failed. Server at " fullurl " almost certainly dead")
	    (close-all-connections!))
	   (debug:print 0 "WARNING: Failed to communicate with server, trying again, numretries left: " numretries)
	   (http-transport:client-api-send-receive run-id serverdat cmd params numretries: (- numretries 1)))
	 (begin
	   (mutex-unlock! *http-mutex*)
	   (tasks:kill-server-run-id run-id)
	   #f))
     (begin
       (debug:print-info 11 "fullurl=" fullurl ", cmd=" cmd ", params=" params ", run-id=" run-id "\n")
       ;; set up the http-client here
       (max-retry-attempts 1)
       ;; consider all requests indempotent
       (retry-request? (lambda (request)
			 #f))
       ;; send the data and get the response
       ;; extract the needed info from the http data and 
       ;; process and return it.
       (let* ((send-recieve (lambda ()
			      (mutex-lock! *http-mutex*)
			      ;; (condition-case (with-input-from-request "http://localhost"; #f read-lines)
			      ;;					       ((exn http client-error) e (print e)))
			      (set! res (handle-exceptions
					 exn
					 (begin
					   (debug:print 0 "ERROR: failure in with-input-from-request. Giving up.")
					   (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn))
					   #f)
					 (with-input-from-request ;; was dat
					  fullurl 
					  (list (cons 'key "thekey")
						(cons 'cmd cmd)
						(cons 'params params))
					  read-string)))
534
535
536
537
538
539
540

541
542
543
544
545
546
547
	  (thread-start! th2)
	  (thread-start! th3)
	  (set! *didsomething* #t)
	  (thread-join! th2)
	  (exit)))))

(define (http-transport:server-signal-handler signum)

  (handle-exceptions
   exn
   (debug:print " ... exiting ...")
   (let ((th1 (make-thread (lambda ()
			     (thread-sleep! 1))
			   "eat response"))
	 (th2 (make-thread (lambda ()







>







536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
	  (thread-start! th2)
	  (thread-start! th3)
	  (set! *didsomething* #t)
	  (thread-join! th2)
	  (exit)))))

(define (http-transport:server-signal-handler signum)
  (signal-mask! signum)
  (handle-exceptions
   exn
   (debug:print " ... exiting ...")
   (let ((th1 (make-thread (lambda ()
			     (thread-sleep! 1))
			   "eat response"))
	 (th2 (make-thread (lambda ()

Modified launch.scm from [09c74e18be] to [1d44ba3939].

359
360
361
362
363
364
365

366

367


368
369

370
371
372
373
374
375
376
377
						    (pids (delete-duplicates (filter number? (list pid1 pid2)))))
					       (if (not (null? pids))
						   (begin
						     (for-each
						      (lambda (pid)
							(handle-exceptions
							 exn

							 (debug:print-info 0 "Unable to kill process with pid " pid ", possibly already killed.")

							 (debug:print 0 "WARNING: Request received to kill job " pid) ;;  " (attempt # " kill-tries ")")


							 (process-signal pid signal/int)
							 (thread-sleep! 5)

							 (process-signal pid signal/kill)))
						      pids)
						     (tests:test-set-status! run-id test-id "KILLED"  "KILLED" (args:get-arg "-m") #f))
						   (begin
						     (debug:print 0 "ERROR: Nothing to kill, pid1=" pid1 ", pid2=" pid2)
						     (tests:test-set-status! run-id test-id "KILLED"  "FAILED TO KILL" (args:get-arg "-m") #f)
						     )))
					     (mutex-unlock! m)







>
|
>

>
>
|
|
>
|







359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
						    (pids (delete-duplicates (filter number? (list pid1 pid2)))))
					       (if (not (null? pids))
						   (begin
						     (for-each
						      (lambda (pid)
							(handle-exceptions
							 exn
							 (begin
							   (debug:print-info 0 "Unable to kill process with pid " pid ", possibly already killed.")
							   (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn)))
							 (debug:print 0 "WARNING: Request received to kill job " pid) ;;  " (attempt # " kill-tries ")")
							 (if (process:alive? pid)
							     (begin
							       (process-signal pid signal/int)
							       (thread-sleep! 5)
							       (if (process:process-alive? pid)
								   (process-signal pid signal/kill))))))
						      pids)
						     (tests:test-set-status! run-id test-id "KILLED"  "KILLED" (args:get-arg "-m") #f))
						   (begin
						     (debug:print 0 "ERROR: Nothing to kill, pid1=" pid1 ", pid2=" pid2)
						     (tests:test-set-status! run-id test-id "KILLED"  "FAILED TO KILL" (args:get-arg "-m") #f)
						     )))
					     (mutex-unlock! m)
450
451
452
453
454
455
456

457
458
459
460
461
462
463
464
465

466

467
468
469
470
471
472
473
	  (if linktree
	      (if (not (file-exists? linktree))
		  (begin
		    (handle-exceptions
		     exn
		     (begin
		       (debug:print 0 "ERROR: Something went wrong when trying to create linktree dir at " linktree)

		       (exit 1))
		     (create-directory linktree #t))))
	      (begin
		(debug:print 0 "ERROR: linktree not defined in [setup] section of megatest.config")
		(exit 1)))
	  (if linktree
	      (let ((dbdir (conc linktree "/.db")))
		(handle-exceptions
		 exn

		 (debug:print 0 "ERROR: failed to create the " dbdir " area for your database files")

		 (if (not (directory-exists? dbdir))(create-directory dbdir)))
		(setenv "MT_LINKTREE" linktree))
	      (begin
		(debug:print 0 "ERROR: linktree is required in your megatest.config [setup] section")
		(exit 1)))
	  (if (and *toppath*
		   (directory-exists? *toppath*))







>









>
|
>







455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
	  (if linktree
	      (if (not (file-exists? linktree))
		  (begin
		    (handle-exceptions
		     exn
		     (begin
		       (debug:print 0 "ERROR: Something went wrong when trying to create linktree dir at " linktree)
		       (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn))
		       (exit 1))
		     (create-directory linktree #t))))
	      (begin
		(debug:print 0 "ERROR: linktree not defined in [setup] section of megatest.config")
		(exit 1)))
	  (if linktree
	      (let ((dbdir (conc linktree "/.db")))
		(handle-exceptions
		 exn
		 (begin
		   (debug:print 0 "ERROR: failed to create the " dbdir " area for your database files")
		   (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn)))
		 (if (not (directory-exists? dbdir))(create-directory dbdir)))
		(setenv "MT_LINKTREE" linktree))
	      (begin
		(debug:print 0 "ERROR: linktree is required in your megatest.config [setup] section")
		(exit 1)))
	  (if (and *toppath*
		   (directory-exists? *toppath*))

Modified lock-queue.scm from [0c7d16446b] to [31ed29958c].

34
35
36
37
38
39
40



41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64

65
66
67
68
69
70
71
72
73
74
75
76
77
78
79

80
81
82
83
84
85
86
	  (handle-exceptions
	   exn
	   (begin
	     (thread-sleep! 10)
	     (if (> count 0)
		 (lock-queue:open-db fname count: (- count 1))
		 db))



	   (sqlite3:execute 
	    db
	    "CREATE TABLE IF NOT EXISTS queue (
  	      id         INTEGER PRIMARY KEY,
              test_id    INTEGER,
              start_time INTEGER,
              state      TEXT,
              CONSTRAINT queue_constraint UNIQUE (test_id));")
	   (sqlite3:execute
	    db
	    "CREATE TABLE IF NOT EXISTS runlocks (
              id         INTEGER PRIMARY KEY,
              test_id    INTEGER,
              run_lock   TEXT,
              CONSTRAINT runlock_constraint UNIQUE (run_lock));"))))
    (sqlite3:set-busy-handler! db handler)
    db))

(define (lock-queue:set-state db test-id newstate #!key (remtries 10))
  (handle-exceptions
   exn
   (if (> remtries 0)
       (begin
	 (debug:print 0 "WARNING: exception on lock-queue:set-state. Trying again in 30 seconds.")

	 (thread-sleep! 30)
	 (lock-queue:set-state db test-id newstate remtries: (- remtries 1)))
       (begin
	 (debug:print 0 "ERROR:  Failed to set lock state for test with id " test-id ", error: " ((condition-property-accessor 'exn 'message) exn) ", giving up.")
	 #f))
   (sqlite3:execute db "UPDATE queue SET state=? WHERE test_id=?;"
		    newstate
		    test-id)))

(define (lock-queue:any-younger? db mystart test-id #!key (remtries 10))
  (handle-exceptions
   exn
   (if (> remtries 0)
       (begin
	 (debug:print 0 "WARNING: exception on lock-queue:any-younger. Trying again in 30 seconds.")

	 (thread-sleep! 30)
	 (lock-queue:any-younger? db mystart test-id remtries: (- remtries 1)))
       (begin
	 (debug:print 0 "ERROR:  Failed to find younger locks for test with id " test-id ", error: " ((condition-property-accessor 'exn 'message) exn) ", giving up.")
	 #f))
   (let ((res #f))
     (sqlite3:for-each-row







>
>
>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|









>















>







34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
	  (handle-exceptions
	   exn
	   (begin
	     (thread-sleep! 10)
	     (if (> count 0)
		 (lock-queue:open-db fname count: (- count 1))
		 db))
	   (sqlite3:with-transaction
	    db
	    (lambda ()
	      (sqlite3:execute 
	       db
	       "CREATE TABLE IF NOT EXISTS queue (
     	         id         INTEGER PRIMARY KEY,
                 test_id    INTEGER,
                 start_time INTEGER,
                 state      TEXT,
                 CONSTRAINT queue_constraint UNIQUE (test_id));")
	      (sqlite3:execute
	       db
	       "CREATE TABLE IF NOT EXISTS runlocks (
                 id         INTEGER PRIMARY KEY,
                 test_id    INTEGER,
                 run_lock   TEXT,
                 CONSTRAINT runlock_constraint UNIQUE (run_lock));"))))))
    (sqlite3:set-busy-handler! db handler)
    db))

(define (lock-queue:set-state db test-id newstate #!key (remtries 10))
  (handle-exceptions
   exn
   (if (> remtries 0)
       (begin
	 (debug:print 0 "WARNING: exception on lock-queue:set-state. Trying again in 30 seconds.")
	 (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn))
	 (thread-sleep! 30)
	 (lock-queue:set-state db test-id newstate remtries: (- remtries 1)))
       (begin
	 (debug:print 0 "ERROR:  Failed to set lock state for test with id " test-id ", error: " ((condition-property-accessor 'exn 'message) exn) ", giving up.")
	 #f))
   (sqlite3:execute db "UPDATE queue SET state=? WHERE test_id=?;"
		    newstate
		    test-id)))

(define (lock-queue:any-younger? db mystart test-id #!key (remtries 10))
  (handle-exceptions
   exn
   (if (> remtries 0)
       (begin
	 (debug:print 0 "WARNING: exception on lock-queue:any-younger. Trying again in 30 seconds.")
	 (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn))
	 (thread-sleep! 30)
	 (lock-queue:any-younger? db mystart test-id remtries: (- remtries 1)))
       (begin
	 (debug:print 0 "ERROR:  Failed to find younger locks for test with id " test-id ", error: " ((condition-property-accessor 'exn 'message) exn) ", giving up.")
	 #f))
   (let ((res #f))
     (sqlite3:for-each-row
96
97
98
99
100
101
102


103
104
105
106
107
108
109
  (let ((res       #f)
	(lckqry    (sqlite3:prepare db "SELECT test_id,run_lock FROM runlocks WHERE run_lock='locked';"))
	(mklckqry  (sqlite3:prepare db "INSERT INTO runlocks (test_id,run_lock) VALUES (?,'locked');")))
    (let ((result 
	   (handle-exceptions
	    exn
	    (begin


	      (thread-sleep! 10)
	      (if (> count 0)
		  (lock-queue:get-lock db test-id count: (- count 1)))
	      #f)
	    (sqlite3:with-transaction
	     db
	     (lambda ()







>
>







101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
  (let ((res       #f)
	(lckqry    (sqlite3:prepare db "SELECT test_id,run_lock FROM runlocks WHERE run_lock='locked';"))
	(mklckqry  (sqlite3:prepare db "INSERT INTO runlocks (test_id,run_lock) VALUES (?,'locked');")))
    (let ((result 
	   (handle-exceptions
	    exn
	    (begin
	      (debug:print 0 "WARNING: failed to get queue lock. Will try again in a few seconds")
	      (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn))
	      (thread-sleep! 10)
	      (if (> count 0)
		  (lock-queue:get-lock db test-id count: (- count 1)))
	      #f)
	    (sqlite3:with-transaction
	     db
	     (lambda ()
123
124
125
126
127
128
129


130
131
132
133
134
135
136
137
138
139
140


141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157


158
159
160
161
162
163
164
      result)))

(define (lock-queue:release-lock fname test-id #!key (count 10))
  (let ((db (lock-queue:open-db fname)))
    (handle-exceptions
     exn
     (begin


       (thread-sleep! 10)
       (if (> count 0)
	   (lock-queue:release-lock fname test-id count: (- count 1))
	   #f))
     (sqlite3:execute db "DELETE FROM runlocks WHERE test_id=?;" test-id)
     (sqlite3:finalize! db))))

(define (lock-queue:steal-lock db test-id #!key (count 10))
  (handle-exceptions
   exn
   (begin


     (thread-sleep! 10)
     (if (> count 0)
	 (lock-queue:steal-lock db test-id count: (- count 1))
	 #f))
   (sqlite3:execute db "DELETE FROM runlocks WHERE run_lock='locked';"))
  (lock-queue:get-lock db test-it))

;; returns #f if ok to skip the task
;; returns #t if ok to proceed with task
;; otherwise waits
;;
(define (lock-queue:wait-turn fname test-id #!key (count 10))
  (let ((db      (lock-queue:open-db fname))
	(mystart (current-seconds)))
    (handle-exceptions
     exn
     (begin


       (thread-sleep! 10)
       (if (> count 0)
	   (lock-queue:wait-turn fname test-id count: (- count 1))
	   #f))
     (sqlite3:execute
      db
      "INSERT OR REPLACE INTO queue (test_id,start_time,state) VALUES (?,?,'waiting');"







>
>











>
>

















>
>







130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
      result)))

(define (lock-queue:release-lock fname test-id #!key (count 10))
  (let ((db (lock-queue:open-db fname)))
    (handle-exceptions
     exn
     (begin
       (debug:print 0 "WARNING: Failed to release queue lock. Will try again in few seconds")
       (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn))
       (thread-sleep! 10)
       (if (> count 0)
	   (lock-queue:release-lock fname test-id count: (- count 1))
	   #f))
     (sqlite3:execute db "DELETE FROM runlocks WHERE test_id=?;" test-id)
     (sqlite3:finalize! db))))

(define (lock-queue:steal-lock db test-id #!key (count 10))
  (handle-exceptions
   exn
   (begin
     (debug:print 0 "WARNING: Failed to steal queue lock. Will try again in few seconds")
     (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn))
     (thread-sleep! 10)
     (if (> count 0)
	 (lock-queue:steal-lock db test-id count: (- count 1))
	 #f))
   (sqlite3:execute db "DELETE FROM runlocks WHERE run_lock='locked';"))
  (lock-queue:get-lock db test-it))

;; returns #f if ok to skip the task
;; returns #t if ok to proceed with task
;; otherwise waits
;;
(define (lock-queue:wait-turn fname test-id #!key (count 10))
  (let ((db      (lock-queue:open-db fname))
	(mystart (current-seconds)))
    (handle-exceptions
     exn
     (begin
       (debug:print 0 "WARNING: Failed to find out if it is ok to skip the wait queue. Will try again in few seconds")
       (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn))
       (thread-sleep! 10)
       (if (> count 0)
	   (lock-queue:wait-turn fname test-id count: (- count 1))
	   #f))
     (sqlite3:execute
      db
      "INSERT OR REPLACE INTO queue (test_id,start_time,state) VALUES (?,?,'waiting');"

Modified megatest.scm from [3a1b995ec4] to [93df8109f6].

1
2
3
4
5
6
7
8
9
10
11
12
13


14
15
16
17
18
19
20
;; Copyright 2006-2012, Matthew Welland.
;; 
;;  This program is made available under the GNU GPL version 2.0 or
;;  greater. See the accompanying file COPYING for details.
;; 
;;  This program is distributed WITHOUT ANY WARRANTY; without even the
;;  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
;;  PURPOSE.

;; (include "common.scm")
;; (include "megatest-version.scm")

(use sqlite3 srfi-1 posix regex regex-case srfi-69 base64 format readline apropos json http-client directory-utils z3) ;; (srfi 18) extras)


(import (prefix sqlite3 sqlite3:))
(import (prefix base64 base64:))

;; (use zmq)

(declare (uses common))
(declare (uses megatest-version))












|
>
>







1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
;; Copyright 2006-2012, Matthew Welland.
;; 
;;  This program is made available under the GNU GPL version 2.0 or
;;  greater. See the accompanying file COPYING for details.
;; 
;;  This program is distributed WITHOUT ANY WARRANTY; without even the
;;  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
;;  PURPOSE.

;; (include "common.scm")
;; (include "megatest-version.scm")

(use sqlite3 srfi-1 posix regex regex-case srfi-69 base64 format readline apropos json 
     http-client directory-utils z3 srfi-18) ;;  extras)

(import (prefix sqlite3 sqlite3:))
(import (prefix base64 base64:))

;; (use zmq)

(declare (uses common))
(declare (uses megatest-version))
276
277
278
279
280
281
282


































283
284
285
286
287
288
289

			"-logging"
			"-v" ;; verbose 2, more than normal (normal is 1)
			"-q" ;; quiet 0, errors/warnings only
		       )
		 args:arg-hash
		 0))



































(define (std-exit-procedure)
  (rmt:print-db-stats)
  (let ((run-ids (hash-table-keys *db-local-sync*)))
    (if (not (null? run-ids))
	(db:multi-db-sync run-ids 'new2old)))
  (if *dbstruct-db* (db:close-all *dbstruct-db*))







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325

			"-logging"
			"-v" ;; verbose 2, more than normal (normal is 1)
			"-q" ;; quiet 0, errors/warnings only
		       )
		 args:arg-hash
		 0))

;; The watchdog is to keep an eye on things like db sync etc.
;;
(define *watchdog*
  (make-thread 
   (lambda ()
     (let loop ()
       (thread-sleep! 5) ;; five second resolution is only a minor burden and should be tolerable 

       ;; sync for filesystem local db writes
       ;;
       (let ((start-time (current-seconds)))
	 (mutex-lock! *db-multi-sync-mutex*)
	 (for-each 
	  (lambda (run-id)
	    (let ((last-write (hash-table-ref/default *db-local-sync* run-id 0)))
	      (if ;; (and 
	       (> (- start-time last-write) 5) ;; every five seconds
	       ;;      (common:db-access-allowed?))
	       (begin
		 (db:multi-db-sync (list run-id) 'new2old)
		 (if (common:low-noise-print 30 "sync new to old")
		     (debug:print-info 0 "Sync of newdb to olddb for run-id " run-id " completed in " (- (current-seconds) start-time) " seconds"))
		 (hash-table-delete! *db-local-sync* run-id)))))
	  (hash-table-keys *db-local-sync*))
	 (mutex-unlock! *db-multi-sync-mutex*))
       
       ;; keep going unless time to exit
       ;;
       (if (not *time-to-exit*)
	   (loop))))
   "Watchdog thread"))

(thread-start! *watchdog*)

(define (std-exit-procedure)
  (rmt:print-db-stats)
  (let ((run-ids (hash-table-keys *db-local-sync*)))
    (if (not (null? run-ids))
	(db:multi-db-sync run-ids 'new2old)))
  (if *dbstruct-db* (db:close-all *dbstruct-db*))
1351
1352
1353
1354
1355
1356
1357



1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
;;======================================================================

(if *runremote* (close-all-connections!))

(if (not *didsomething*)
    (debug:print 0 help))




(if (not (eq? *globalexitstatus* 0))
    (if (or (args:get-arg "-runtests")(args:get-arg "-runall"))
        (begin
           (debug:print 0 "NOTE: Subprocesses with non-zero exit code detected: " *globalexitstatus*)
           (exit 0))
        (case *globalexitstatus*
         ((0)(exit 0))
         ((1)(exit 1))
         ((2)(exit 2))
         (else (exit 3)))))







>
>
>










1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
;;======================================================================

(if *runremote* (close-all-connections!))

(if (not *didsomething*)
    (debug:print 0 help))

(set! *time-to-exit* #t)
(thread-join! *watchdog*)

(if (not (eq? *globalexitstatus* 0))
    (if (or (args:get-arg "-runtests")(args:get-arg "-runall"))
        (begin
           (debug:print 0 "NOTE: Subprocesses with non-zero exit code detected: " *globalexitstatus*)
           (exit 0))
        (case *globalexitstatus*
         ((0)(exit 0))
         ((1)(exit 1))
         ((2)(exit 2))
         (else (exit 3)))))

Modified mt.scm from [77d6104e75] to [fdb95af183].

163
164
165
166
167
168
169






170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187

;;======================================================================
;;  S T A T E   A N D   S T A T U S   F O R   T E S T S 
;;======================================================================

;; speed up for common cases with a little logic
(define (mt:test-set-state-status-by-id run-id test-id newstate newstatus newcomment)






  (cond
   ((and newstate newstatus newcomment)
    (rmt:general-call 'state-status-msg run-id newstate newstatus newcomment test-id))
   ((and newstate newstatus)
    (rmt:general-call 'state-status run-id newstate newstatus test-id))
   (else
    (if newstate   (rmt:general-call 'set-test-state   run-id newstate   test-id))
    (if newstatus  (rmt:general-call 'set-test-status  run-id newstatus  test-id))
    (if newcomment (rmt:general-call 'set-test-comment run-id newcomment test-id))))
   (mt:process-triggers run-id test-id newstate newstatus)
   #t)

(define (mt:test-set-state-status-by-testname run-id test-name item-path new-state new-status new-comment)
  (let ((test-id (cdb:remote-run db:get-test-id-cached #f run-id test-name item-path)))
    (mt:test-set-state-status-by-id test-id new-state new-status new-comment)))

(define (mt:lazy-read-test-config test-name)
  (let ((tconf (hash-table-ref/default *testconfigs* test-name #f)))







>
>
>
>
>
>
|
|
|
|
|
|
|
|
|
|
|







163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193

;;======================================================================
;;  S T A T E   A N D   S T A T U S   F O R   T E S T S 
;;======================================================================

;; speed up for common cases with a little logic
(define (mt:test-set-state-status-by-id run-id test-id newstate newstatus newcomment)
  (if (not (and run-id test-id))
      (begin
	(debug:print 0 "ERROR: bad data handed to mt:test-set-state-status-by-id, run-id=" run-id ", test-id=" test-id ", newstate=" newstate)
	(print-call-chain)
	#f)
      (begin
	(cond
	 ((and newstate newstatus newcomment)
	  (rmt:general-call 'state-status-msg run-id newstate newstatus newcomment test-id))
	 ((and newstate newstatus)
	  (rmt:general-call 'state-status run-id newstate newstatus test-id))
	 (else
	  (if newstate   (rmt:general-call 'set-test-state   run-id newstate   test-id))
	  (if newstatus  (rmt:general-call 'set-test-status  run-id newstatus  test-id))
	  (if newcomment (rmt:general-call 'set-test-comment run-id newcomment test-id))))
	(mt:process-triggers run-id test-id newstate newstatus)
	#t)))

(define (mt:test-set-state-status-by-testname run-id test-name item-path new-state new-status new-comment)
  (let ((test-id (cdb:remote-run db:get-test-id-cached #f run-id test-name item-path)))
    (mt:test-set-state-status-by-id test-id new-state new-status new-comment)))

(define (mt:lazy-read-test-config test-name)
  (let ((tconf (hash-table-ref/default *testconfigs* test-name #f)))

Modified portlogger.scm from [3222aa8dd9] to [614321ce45].

29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
	 (handler  (make-busy-timeout 136000))
	 (canwrite (file-write-access? fname)))
    (sqlite3:set-busy-handler! db handler)
    (sqlite3:execute db "PRAGMA synchronous = 0;")
    (if (not exists)
	(sqlite3:execute 
	 db
	 "CREATE TABLE ports (
            port INTEGER PRIMARY KEY,
            state TEXT DEFAULT 'not-used',
            fail_count INTEGER DEFAULT 0,
            update_time TIMESTAMP DEFAULT (strftime('%s','now')) );"))
    db))

(define (portlogger:open-run-close proc . params)







|







29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
	 (handler  (make-busy-timeout 136000))
	 (canwrite (file-write-access? fname)))
    (sqlite3:set-busy-handler! db handler)
    (sqlite3:execute db "PRAGMA synchronous = 0;")
    (if (not exists)
	(sqlite3:execute 
	 db
	 "CREATE TABLE IF NOT EXISTS ports (
            port INTEGER PRIMARY KEY,
            state TEXT DEFAULT 'not-used',
            fail_count INTEGER DEFAULT 0,
            update_time TIMESTAMP DEFAULT (strftime('%s','now')) );"))
    db))

(define (portlogger:open-run-close proc . params)
86
87
88
89
90
91
92









93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108








109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127

128
129









130
131
132
133
134
135
136
137
138
139
140
141
		    res)))))
    (sqlite3:finalize! qry1)
    (sqlite3:finalize! qry2)
    (sqlite3:finalize! qry3)
    res))

(define (portlogger:get-prev-used-port db)









  (sqlite3:fold-row
   (lambda (var curr)
     (or curr var curr))
   #f
   db
   "SELECT (port) FROM ports WHERE state='released' LIMIT 1;"))

(define (portlogger:find-port db)
  (let* ((lowport (let ((val (configf:lookup *configdat* "server" "lowport")))
		    (if (and val 
			     (string->number val))
			(string->number val)
			32768)))
	 (portnum (or (portlogger:get-prev-used-port db)
		      (+ lowport ;; top of registered ports is 49152 but lets use ports in the registered range
			 (random (- 64000 lowport))))))








    (portlogger:take-port db portnum)
    portnum))

;; set port to "released", "failed" etc.
;; 
(define (portlogger:set-port db portnum value)
  (sqlite3:execute db "UPDATE ports SET state=?,update_time=strftime('%s','now') WHERE port=?;" value portnum))

;; set port to failed (attempted to take but got error)
;;
(define (portlogger:set-failed db portnum)
  (sqlite3:execute db "UPDATE ports SET state='failed',fail_count=fail_count+1,update_time=strftime('%s','now') WHERE port=?;" portnum))

;;======================================================================
;; MAIN
;;======================================================================

(define (portlogger:main . args)
  (let* ((db      (portlogger:open-db (conc "/tmp/." (current-user-name) "-portlogger.db")))

	 (numargs (length args))
	 (result  (cond









		   ((> numargs 1) ;; most commands
		    (case (string->symbol (car args)) ;; commands with two or more params
		      ((take)(portlogger:take-port db (string->number (cadr args))))
		      ((set) (portlogger:set-port db 
						  (string->number (cadr args))
						  (caddr args))
		       (caddr args))
		      ((failed)(portlogger:set-failed db (string->number (cadr args))) 'failed))))))
    (sqlite3:finalize! db)
    result))
     
;; (print (apply portlogger:main (cdr (argv))))







>
>
>
>
>
>
>
>
>
|
|
|
|
|
|










>
>
>
>
>
>
>
>
|

















|
>

|
>
>
>
>
>
>
>
>
>
|
|
|
|
|
|
|
|




86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
		    res)))))
    (sqlite3:finalize! qry1)
    (sqlite3:finalize! qry2)
    (sqlite3:finalize! qry3)
    res))

(define (portlogger:get-prev-used-port db)
  (handle-exceptions
   exn
   (begin
     (debug:print 0 "EXCEPTION: portlogger database probably overloaded or unreadable. If you see this message again remove /tmp/.$USER-portlogger.db")
     (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn))
     (debug:print 0 "exn=" (condition->list exn))
     (print-call-chain)
     (debug:print 0 "Continuing anyway.")
     #f)
   (sqlite3:fold-row
    (lambda (var curr)
      (or curr var curr))
    #f
    db
    "SELECT (port) FROM ports WHERE state='released' LIMIT 1;")))

(define (portlogger:find-port db)
  (let* ((lowport (let ((val (configf:lookup *configdat* "server" "lowport")))
		    (if (and val 
			     (string->number val))
			(string->number val)
			32768)))
	 (portnum (or (portlogger:get-prev-used-port db)
		      (+ lowport ;; top of registered ports is 49152 but lets use ports in the registered range
			 (random (- 64000 lowport))))))
    (handle-exceptions
     exn
     (begin
       (debug:print 0 "EXCEPTION: portlogger database probably overloaded or unreadable. If you see this message again remove /tmp/.$USER-portlogger.db")
       (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn))
       (debug:print 0 "exn=" (condition->list exn))
       (print-call-chain)
       (debug:print 0 "Continuing anyway."))
     (portlogger:take-port db portnum))
    portnum))

;; set port to "released", "failed" etc.
;; 
(define (portlogger:set-port db portnum value)
  (sqlite3:execute db "UPDATE ports SET state=?,update_time=strftime('%s','now') WHERE port=?;" value portnum))

;; set port to failed (attempted to take but got error)
;;
(define (portlogger:set-failed db portnum)
  (sqlite3:execute db "UPDATE ports SET state='failed',fail_count=fail_count+1,update_time=strftime('%s','now') WHERE port=?;" portnum))

;;======================================================================
;; MAIN
;;======================================================================

(define (portlogger:main . args)
  (let* ((dbfname (conc "/tmp/." (current-user-name) "-portlogger.db"))
	 (db      (portlogger:open-db dbfname))
	 (numargs (length args))
	 (result  
	  (handle-exceptions
	   exn
	   (begin
	     (debug:print 0 "EXCEPTION: portlogger database at " dbfname " probably overloaded or unreadable. Try removing it.")
	     (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn))
	     (print "exn=" (condition->list exn))
	     (debug:print 0 " status:  " ((condition-property-accessor 'sqlite3 'status) exn))
	     (print-call-chain))
	   (cond
	    ((> numargs 1) ;; most commands
	     (case (string->symbol (car args)) ;; commands with two or more params
	       ((take)(portlogger:take-port db (string->number (cadr args))))
	       ((set) (portlogger:set-port db 
					   (string->number (cadr args))
					   (caddr args))
		(caddr args))
	       ((failed)(portlogger:set-failed db (string->number (cadr args))) 'failed)))))))
    (sqlite3:finalize! db)
    result))
     
;; (print (apply portlogger:main (cdr (argv))))

Modified process.scm from [88799f98f8] to [781c177a90].

49
50
51
52
53
54
55


56
57
58
59
60
61
62

(define (cmd-run-proc-each-line cmd proc . params)
  ;; (print "Called with cmd=" cmd ", proc=" proc ", params=" params)
  (handle-exceptions
   exn
   (begin
     (print "ERROR:  Failed to run command: " cmd " " (string-intersperse params " "))


     #f)
   (let-values (((fh fho pid) (if (null? params)
				  (process cmd)
				  (process cmd params))))
       (let loop ((curr (read-line fh))
		(result  '()))
       (if (not (eof-object? curr))







>
>







49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64

(define (cmd-run-proc-each-line cmd proc . params)
  ;; (print "Called with cmd=" cmd ", proc=" proc ", params=" params)
  (handle-exceptions
   exn
   (begin
     (print "ERROR:  Failed to run command: " cmd " " (string-intersperse params " "))
     (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn))
     (print "exn=" (condition->list exn))
     #f)
   (let-values (((fh fho pid) (if (null? params)
				  (process cmd)
				  (process cmd params))))
       (let loop ((curr (read-line fh))
		(result  '()))
       (if (not (eof-object? curr))
122
123
124
125
126
127
128
129









     (let loop ((inl (read-line))
		(res '()))
       (if (eof-object? inl)
	   (reverse res)
	   (let ((pid (string->number inl)))
	     (if proc (proc pid))
	     (loop (read-line) (cons pid res))))))))
       
















|
>
>
>
>
>
>
>
>
>
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
     (let loop ((inl (read-line))
		(res '()))
       (if (eof-object? inl)
	   (reverse res)
	   (let ((pid (string->number inl)))
	     (if proc (proc pid))
	     (loop (read-line) (cons pid res))))))))

(define (process:alive? pid)
  (handle-exceptions
   exn
   ;; possibly pid is a process not a child, look in /proc to see if it is running still
   (file-exists? (conc "/proc/" pid))
   (let-values (((rpid exit-type exit-signal)(process-wait pid #t)))
       (and (number? rpid)
	    (equal? rpid pid)))))
	 

Modified rmt.scm from [a8f8013949] to [ef41e52830].

56
57
58
59
60
61
62

63
64
65
66

67
68
69
70
71

72
73
74
75
76
77
78
	     #f))))

;; cmd is a symbol
;; vars is a json string encoding the parameters for the call
;;
(define (rmt:send-receive cmd rid params)
  ;; clean out old connections

  (let ((expire-time (- (current-seconds) 60)))
    (for-each 
     (lambda (run-id)
       (let ((connection (hash-table-ref *runremote* run-id)))

	 (if (< (http-transport:server-dat-get-last-access connection) expire-time)
	     (begin
	       (debug:print-info 0 "Discarding connection to server for run-id " run-id ", too long between accesses")
	       (hash-table-delete! *runremote* run-id)))))
     (hash-table-keys *runremote*)))

  (let* ((run-id          (if rid rid 0))
	 (connection-info (let ((cinfo (hash-table-ref/default *runremote* run-id #f)))
			    (if cinfo
				cinfo
				;; NB// can cache the answer for server running for 10 seconds ...
				;;  ;; (and (not (rmt:write-frequency-over-limit? cmd run-id))
				(if (tasks:server-running-or-starting? (tasks:get-db) run-id)







>



|
>
|




>







56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
	     #f))))

;; cmd is a symbol
;; vars is a json string encoding the parameters for the call
;;
(define (rmt:send-receive cmd rid params)
  ;; clean out old connections
  (mutex-lock! *db-multi-sync-mutex*)
  (let ((expire-time (- (current-seconds) 60)))
    (for-each 
     (lambda (run-id)
       (let ((connection (hash-table-ref/default *runremote* run-id #f)))
	 (if ;; (and connection 
		  (< (http-transport:server-dat-get-last-access connection) expire-time) ; )
	     (begin
	       (debug:print-info 0 "Discarding connection to server for run-id " run-id ", too long between accesses")
	       (hash-table-delete! *runremote* run-id)))))
     (hash-table-keys *runremote*)))
  (mutex-unlock! *db-multi-sync-mutex*)
  (let* ((run-id          (if rid rid 0))
	 (connection-info (let ((cinfo (hash-table-ref/default *runremote* run-id #f)))
			    (if cinfo
				cinfo
				;; NB// can cache the answer for server running for 10 seconds ...
				;;  ;; (and (not (rmt:write-frequency-over-limit? cmd run-id))
				(if (tasks:server-running-or-starting? (tasks:get-db) run-id)
102
103
104
105
106
107
108


109
110
111
112
113
114
115

(define (rmt:update-db-stats rawcmd params duration)
  (mutex-lock! *db-stats-mutex*)
  (handle-exceptions
   exn
   (begin
     (debug:print 0 "WARNING: stats collection failed in update-db-stats")


     #f) ;; if this fails we don't care, it is just stats
   (let* ((cmd      (if (eq? rawcmd 'general-call) (car params) rawcmd))
	  (stat-vec (hash-table-ref/default *db-stats* cmd #f)))
     (if (not stat-vec)
	 (let ((newvec (vector 0 0)))
	   (hash-table-set! *db-stats* cmd newvec)
	   (set! stat-vec newvec)))







>
>







105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120

(define (rmt:update-db-stats rawcmd params duration)
  (mutex-lock! *db-stats-mutex*)
  (handle-exceptions
   exn
   (begin
     (debug:print 0 "WARNING: stats collection failed in update-db-stats")
     (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn))
     (print "exn=" (condition->list exn))
     #f) ;; if this fails we don't care, it is just stats
   (let* ((cmd      (if (eq? rawcmd 'general-call) (car params) rawcmd))
	  (stat-vec (hash-table-ref/default *db-stats* cmd #f)))
     (if (not stat-vec)
	 (let ((newvec (vector 0 0)))
	   (hash-table-set! *db-stats* cmd newvec)
	   (set! stat-vec newvec)))
149
150
151
152
153
154
155
156
157
158

159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
			       (cons newmax-cmd currmax)
			       (cons 'none 0))
			   (loop (car tal)(cdr tal) newmax-cmd currmax)))))))
    (mutex-unlock! *db-stats-mutex*)
    res))
	  
(define (rmt:open-qry-close-locally cmd run-id params)
  (let* ((dbdir (conc    (configf:lookup *configdat* "setup" "linktree") "/.db"))
	 (dbstruct-local (if *dbstruct-db*
			     *dbstruct-db*

			     (let ((db (make-dbr:dbstruct path:  dbdir local: #t)))
			       (set! *dbstruct-db* db)
			       db)))
	 (db-file-path   (db:dbfile-path 0)))
    ;; (read-only      (not (file-read-access? db-file-path)))
    (let* ((start         (current-milliseconds))
	   (res           (api:execute-requests dbstruct-local (symbol->string cmd) params))
	   (duration      (- (current-milliseconds) start)))
      (rmt:update-db-stats cmd params duration)
      ;; mark this run as dirty if this was a write
      (if (not (member cmd api:read-only-queries))
	  (let ((start-time (current-seconds)))
	    (mutex-lock! *db-multi-sync-mutex*)
	    (let ((last-sync (hash-table-ref/default *db-local-sync* run-id 0)))
	      (if ;; (and 
		   (> (- start-time last-sync) 5) ;; every five seconds
		  ;;      (common:db-access-allowed?))
		  (begin
		    ;; MOVE THIS TO A THREAD?
		    (db:multi-db-sync (list run-id) 'new2old)
		    (if (common:low-noise-print 30 "sync new to old")
			(debug:print-info 0 "Sync of newdb to olddb for run-id " run-id " completed in " (- (current-seconds) start-time) " seconds"))
		    (hash-table-set! *db-local-sync* run-id start-time))))
	    (mutex-unlock! *db-multi-sync-mutex*)))
      res)))

(define (rmt:send-receive-no-auto-client-setup connection-info cmd run-id params)
  (let* ((run-id   (if run-id run-id 0))
	 (jparams         (db:obj->string params)) ;; (rmt:dat->json-str params))
	 (res (http-transport:client-api-send-receive run-id connection-info cmd jparams)))







<
|

>
|












|
<
<
<
<
<
<
<
<
|







154
155
156
157
158
159
160

161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177








178
179
180
181
182
183
184
185
			       (cons newmax-cmd currmax)
			       (cons 'none 0))
			   (loop (car tal)(cdr tal) newmax-cmd currmax)))))))
    (mutex-unlock! *db-stats-mutex*)
    res))
	  
(define (rmt:open-qry-close-locally cmd run-id params)

  (let* ((dbstruct-local (if *dbstruct-db*
			     *dbstruct-db*
			     (let* ((dbdir (conc    (configf:lookup *configdat* "setup" "linktree") "/.db"))
				    (db (make-dbr:dbstruct path:  dbdir local: #t)))
			       (set! *dbstruct-db* db)
			       db)))
	 (db-file-path   (db:dbfile-path 0)))
    ;; (read-only      (not (file-read-access? db-file-path)))
    (let* ((start         (current-milliseconds))
	   (res           (api:execute-requests dbstruct-local (symbol->string cmd) params))
	   (duration      (- (current-milliseconds) start)))
      (rmt:update-db-stats cmd params duration)
      ;; mark this run as dirty if this was a write
      (if (not (member cmd api:read-only-queries))
	  (let ((start-time (current-seconds)))
	    (mutex-lock! *db-multi-sync-mutex*)
	    (if (not (hash-table-ref/default *db-local-sync* run-id #f))








		(hash-table-set! *db-local-sync* run-id start-time)) ;; the oldest "write"
	    (mutex-unlock! *db-multi-sync-mutex*)))
      res)))

(define (rmt:send-receive-no-auto-client-setup connection-info cmd run-id params)
  (let* ((run-id   (if run-id run-id 0))
	 (jparams         (db:obj->string params)) ;; (rmt:dat->json-str params))
	 (res (http-transport:client-api-send-receive run-id connection-info cmd jparams)))
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
(define (rmt:get-test-id run-id testname item-path)
  (rmt:send-receive 'get-test-id run-id (list run-id testname item-path)))

(define (rmt:get-test-info-by-id run-id test-id)
  (if (and (number? run-id)(number? test-id))
      (rmt:send-receive 'get-test-info-by-id run-id (list run-id test-id))
      (begin
	(debug:print 0 "ERROR: Bad data handed to rmt:get-test-info-by-id run-id=" run-id ", test-id=" test-id)
	(print-call-chain)
	#f)))

(define (rmt:test-get-rundir-from-test-id run-id test-id)
  (rmt:send-receive 'test-get-rundir-from-test-id run-id (list run-id test-id)))

(define (rmt:open-test-db-by-test-id run-id test-id #!key (work-area #f))







|







262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
(define (rmt:get-test-id run-id testname item-path)
  (rmt:send-receive 'get-test-id run-id (list run-id testname item-path)))

(define (rmt:get-test-info-by-id run-id test-id)
  (if (and (number? run-id)(number? test-id))
      (rmt:send-receive 'get-test-info-by-id run-id (list run-id test-id))
      (begin
	(debug:print 0 "WARNING: Bad data handed to rmt:get-test-info-by-id run-id=" run-id ", test-id=" test-id)
	(print-call-chain)
	#f)))

(define (rmt:test-get-rundir-from-test-id run-id test-id)
  (rmt:send-receive 'test-get-rundir-from-test-id run-id (list run-id test-id)))

(define (rmt:open-test-db-by-test-id run-id test-id #!key (work-area #f))

Modified runs.scm from [e52279daef] to [31523ae98e].

497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
	(setenv "MT_RUNNAME"   runname)
	(runs:set-megatest-env-vars run-id inrunname: runname) ;; these may be needed by the launching process
	(let ((items-list (items:get-items-from-config tconfig)))
	  (if (list? items-list)
	      (begin
		(if (null? items-list)
		    (let ((test-id (rmt:get-test-id run-id test-name "")))
		      (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "ZERO_ITEMS" "Failed to run due to failed prerequisites")))
		(tests:testqueue-set-items! test-record items-list)
		(list hed tal reg reruns))
	      (begin
		(debug:print 0 "ERROR: The proc from reading the items table did not yield a list - please report this")
		(exit 1))))))

     ((and (null? fails)







|







497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
	(setenv "MT_RUNNAME"   runname)
	(runs:set-megatest-env-vars run-id inrunname: runname) ;; these may be needed by the launching process
	(let ((items-list (items:get-items-from-config tconfig)))
	  (if (list? items-list)
	      (begin
		(if (null? items-list)
		    (let ((test-id (rmt:get-test-id run-id test-name "")))
		      (if test-id (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "ZERO_ITEMS" "Failed to run due to failed prerequisites"))))
		(tests:testqueue-set-items! test-record items-list)
		(list hed tal reg reruns))
	      (begin
		(debug:print 0 "ERROR: The proc from reading the items table did not yield a list - please report this")
		(exit 1))))))

     ((and (null? fails)
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
	(if (and give-up
		 (not (and (null? tal)(null? reg))))
	    (let ((trimmed-tal (mt:discard-blocked-tests run-id hed tal test-records))
		  (trimmed-reg (mt:discard-blocked-tests run-id hed reg test-records)))
	      (debug:print 1 "WARNING: test " hed " has discarded prerequisites, removing it from the queue")

	      (let ((test-id (rmt:get-test-id run-id hed "")))
		(mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_DISCARDED" "Failed to run due to discarded prerequisites"))
	      
	      (if (and (null? trimmed-tal)
		       (null? trimmed-reg))
		  #f
		  (list (runs:queue-next-hed trimmed-tal trimmed-reg reglen regfull)
			(runs:queue-next-tal trimmed-tal trimmed-reg reglen regfull)
			(runs:queue-next-reg trimmed-tal trimmed-reg reglen regfull)







|







535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
	(if (and give-up
		 (not (and (null? tal)(null? reg))))
	    (let ((trimmed-tal (mt:discard-blocked-tests run-id hed tal test-records))
		  (trimmed-reg (mt:discard-blocked-tests run-id hed reg test-records)))
	      (debug:print 1 "WARNING: test " hed " has discarded prerequisites, removing it from the queue")

	      (let ((test-id (rmt:get-test-id run-id hed "")))
		(if test-id (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_DISCARDED" "Failed to run due to discarded prerequisites")))
	      
	      (if (and (null? trimmed-tal)
		       (null? trimmed-reg))
		  #f
		  (list (runs:queue-next-hed trimmed-tal trimmed-reg reglen regfull)
			(runs:queue-next-tal trimmed-tal trimmed-reg reglen regfull)
			(runs:queue-next-reg trimmed-tal trimmed-reg reglen regfull)
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579

580
581
582
583
584
585
586
587
588
589
	    (debug:print-info 1 "no fails in prerequisites for " hed " but also none running, keeping " hed " for now. Try count: " (hash-table-ref/default *seen-cant-run-tests* hed 0))
	    ;; num-retries code was here
	    ;; we use this opportunity to move contents of reg to tal
	    (list (car newtal)(append (cdr newtal) reg) '() reruns)) ;; an issue with prereqs not yet met?
	  (begin
	    (debug:print-info 1 "no fails in prerequisites for " hed " but nothing seen running in a while, dropping test " hed " from the run queue")
	    (let ((test-id (rmt:get-test-id run-id hed "")))
	      (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "TIMED_OUT" "Nothing seen running in a while."))
	    (list (runs:queue-next-hed tal reg reglen regfull)
		  (runs:queue-next-tal tal reg reglen regfull)
		  (runs:queue-next-reg tal reg reglen regfull)
		  reruns))))

     ((and 
       (or (not (null? fails))
	   (not (null? prereq-fails)))
       (member 'normal testmode))
      (debug:print-info 1 "test "  hed " (mode=" testmode ") has failed prerequisite(s); "
			(string-intersperse (map (lambda (t)(conc (db:test-get-testname t) ":" (db:test-get-state t)"/"(db:test-get-status t))) fails) ", ")
			", removing it from to-do list")
      (let ((test-id (rmt:get-test-id run-id hed "")))

	(if (not (null? prereq-fails))
	    (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_DISCARDED" "Failed to run due to prior failed prerequisites")
	    (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_FAIL"      "Failed to run due to failed prerequisites")))
      (if (or (not (null? reg))(not (null? tal)))
	  (begin
	    (hash-table-set! test-registry hed 'CANNOTRUN)
	    (list (runs:queue-next-hed tal reg reglen regfull)
		  (runs:queue-next-tal tal reg reglen regfull)
		  (runs:queue-next-reg tal reg reglen regfull)
		  (cons hed reruns)))







|













>
|
|
|







559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
	    (debug:print-info 1 "no fails in prerequisites for " hed " but also none running, keeping " hed " for now. Try count: " (hash-table-ref/default *seen-cant-run-tests* hed 0))
	    ;; num-retries code was here
	    ;; we use this opportunity to move contents of reg to tal
	    (list (car newtal)(append (cdr newtal) reg) '() reruns)) ;; an issue with prereqs not yet met?
	  (begin
	    (debug:print-info 1 "no fails in prerequisites for " hed " but nothing seen running in a while, dropping test " hed " from the run queue")
	    (let ((test-id (rmt:get-test-id run-id hed "")))
	      (if test-id (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "TIMED_OUT" "Nothing seen running in a while.")))
	    (list (runs:queue-next-hed tal reg reglen regfull)
		  (runs:queue-next-tal tal reg reglen regfull)
		  (runs:queue-next-reg tal reg reglen regfull)
		  reruns))))

     ((and 
       (or (not (null? fails))
	   (not (null? prereq-fails)))
       (member 'normal testmode))
      (debug:print-info 1 "test "  hed " (mode=" testmode ") has failed prerequisite(s); "
			(string-intersperse (map (lambda (t)(conc (db:test-get-testname t) ":" (db:test-get-state t)"/"(db:test-get-status t))) fails) ", ")
			", removing it from to-do list")
      (let ((test-id (rmt:get-test-id run-id hed "")))
	(if test-id
	    (if (not (null? prereq-fails))
		(mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_DISCARDED" "Failed to run due to prior failed prerequisites")
		(mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_FAIL"      "Failed to run due to failed prerequisites"))))
      (if (or (not (null? reg))(not (null? tal)))
	  (begin
	    (hash-table-set! test-registry hed 'CANNOTRUN)
	    (list (runs:queue-next-hed tal reg reglen regfull)
		  (runs:queue-next-tal tal reg reglen regfull)
		  (runs:queue-next-reg tal reg reglen regfull)
		  (cons hed reruns)))
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
	  ;; the waiton is FAIL so no point in trying to run hed ever again
	  (if (or (not (null? reg))(not (null? tal)))
	      (if (vector? hed)
		  (begin
		    (debug:print 1 "WARNING: Dropping test " test-name "/" item-path
				 " from the launch list as it has prerequistes that are FAIL")
		    (let ((test-id (rmt:get-test-id run-id hed "")))
		      (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_FAIL" "Failed to run due to failed prerequisites"))
		    (runs:shrink-can-run-more-tests-count) ;; DELAY TWEAKER (still needed?)
		    ;; (thread-sleep! *global-delta*)
		    ;; This next is for the items
		    (mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "BLOCKED" #f)
		    (hash-table-set! test-registry (runs:make-full-test-name test-name item-path) 'removed)
		    (list (runs:queue-next-hed tal reg reglen regfull)
			  (runs:queue-next-tal tal reg reglen regfull)







|







771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
	  ;; the waiton is FAIL so no point in trying to run hed ever again
	  (if (or (not (null? reg))(not (null? tal)))
	      (if (vector? hed)
		  (begin
		    (debug:print 1 "WARNING: Dropping test " test-name "/" item-path
				 " from the launch list as it has prerequistes that are FAIL")
		    (let ((test-id (rmt:get-test-id run-id hed "")))
		      (if test-id (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_FAIL" "Failed to run due to failed prerequisites")))
		    (runs:shrink-can-run-more-tests-count) ;; DELAY TWEAKER (still needed?)
		    ;; (thread-sleep! *global-delta*)
		    ;; This next is for the items
		    (mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "BLOCKED" #f)
		    (hash-table-set! test-registry (runs:make-full-test-name test-name item-path) 'removed)
		    (list (runs:queue-next-hed tal reg reglen regfull)
			  (runs:queue-next-tal tal reg reglen regfull)

Modified tasks.scm from [481c79cde8] to [e808e7265f].

322
323
324
325
326
327
328

329


330













331
332
333
334
335
336
337
    res))

;; no elegance here ...
;;
(define (tasks:kill-server hostname pid)
  (debug:print-info 0 "Attempting to kill server process " pid " on host " hostname)
  (setenv "TARGETHOST" hostname)

  (system (conc "nbfake kill " pid)))


 













;;   (if status ;; #t means alive
;;       (begin
;; 	(if (equal? hostname (get-host-name))
;; 	    (handle-exceptions
;; 	     exn
;; 	     (debug:print-info 0 "server may or may not be dead, check for megatest -server running as pid " pid "\n"
;; 			       "  EXCEPTION: " ((condition-property-accessor 'exn 'message) exn))







>
|
>
>

>
>
>
>
>
>
>
>
>
>
>
>
>







322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
    res))

;; no elegance here ...
;;
(define (tasks:kill-server hostname pid)
  (debug:print-info 0 "Attempting to kill server process " pid " on host " hostname)
  (setenv "TARGETHOST" hostname)
  (setenv "TARGETHOST_LOGF" "server-kills.log")
  (system (conc "nbfake kill " pid))
  (unsetenv "TARGETHOST_LOGF")
  (unsetenv "TARGETHOST"))
 
;; look up a server by run-id and send it a kill, also delete the record for that server
;;
(define (tasks:kill-server-run-id run-id)
  (let* ((tdb  (tasks:open-db))
	 (sdat (tasks:get-server mdb run-id)))
    (if sdat
	(let ((hostname (vector-ref sdat 6))
	      (pid      (vector-ref sdat 5)))
	  (debug:print-info 0 "Killing server for run-id " run-id " on host " hostname " with pid " pid)
	  (tasks:kill-server hostname pid)
	  (tasks:server-delete-record mdb server-id tag) )
	(debug:print-info 0 "No server found for run-id " run-id ", nothing to kill"))))
    
;;   (if status ;; #t means alive
;;       (begin
;; 	(if (equal? hostname (get-host-name))
;; 	    (handle-exceptions
;; 	     exn
;; 	     (debug:print-info 0 "server may or may not be dead, check for megatest -server running as pid " pid "\n"
;; 			       "  EXCEPTION: " ((condition-property-accessor 'exn 'message) exn))
632
633
634
635
636
637
638
639
640
641

642
643
644
645
646
647
648
649
650

651
652
653
654
655
656

657
658
659
660
661
662
663
	(debug:print 0 "Found " (length records) " run(s) to kill."))
    (for-each 
     (lambda (record)
       (let* ((param-key (list-ref record 8))
	      (match-dat (string-search hostpid-rx param-key)))
	 (if match-dat
	     (let ((hostname  (cadr match-dat))
		   (pid       (caddr match-dat)))
	       (debug:print 0 "Sending SIGINT to process " pid " on host " hostname)
	       (if (equal? (get-host-name) hostname)

		   (begin
		     (handle-exceptions
		      exn
		      (begin
			(debug:print 0 "Kill of process " pid " on host " hostname " failed.")
			(debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn))
			#t)
		      (process-signal (string->number pid) signal/int)
		      (thread-sleep! 5)

		      (process-signal (string->number pid) signal/kill)))
		   ;;  (call-with-environment-variables
		   (let ((old-targethost (getenv "TARGETHOST")))
		     (setenv "TARGETHOST" hostname)
		     (system (conc "nbfake kill " pid))
		     (if old-targethost (setenv "TARGETHOST" old-targethost)))))

	     (debug:print 0 "ERROR: no record or improper record for " target "/" run-name " in tasks_queue in monitor.db"))))
     records)))


;;======================================================================
;; The routines to process tasks
;;======================================================================







|


>
|
|
|
|
|
|
|
|
|
>
|




|
>







648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
	(debug:print 0 "Found " (length records) " run(s) to kill."))
    (for-each 
     (lambda (record)
       (let* ((param-key (list-ref record 8))
	      (match-dat (string-search hostpid-rx param-key)))
	 (if match-dat
	     (let ((hostname  (cadr match-dat))
		   (pid       (string->number (caddr match-dat))))
	       (debug:print 0 "Sending SIGINT to process " pid " on host " hostname)
	       (if (equal? (get-host-name) hostname)
		   (if (process:alive? pid)
		       (begin
			 (handle-exceptions
			  exn
			  (begin
			    (debug:print 0 "Kill of process " pid " on host " hostname " failed.")
			    (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn))
			    #t)
			  (process-signal pid signal/int)
			  (thread-sleep! 5)
			  (if (process:alive? pid)
			      (process-signal pid signal/kill)))))
		   ;;  (call-with-environment-variables
		   (let ((old-targethost (getenv "TARGETHOST")))
		     (setenv "TARGETHOST" hostname)
		     (system (conc "nbfake kill " pid))
		     (if old-targethost (setenv "TARGETHOST" old-targethost))
		     (unsetenv "TARGETHOST"))))
	     (debug:print 0 "ERROR: no record or improper record for " target "/" run-name " in tasks_queue in monitor.db"))))
     records)))


;;======================================================================
;; The routines to process tasks
;;======================================================================

Modified tdb.scm from [de69c98c94] to [4b5015105f].

68
69
70
71
72
73
74
75

76
77
78
79
80
81
82
	      (tdb:testdb-initialize db)))
	;; (sqlite3:execute db "PRAGMA synchronous = 0;")
	(debug:print-info 11 "open-test-db END (sucessful)" work-area)
	;; now let's test that everything is correct
	(handle-exceptions
	 exn
	 (begin
	   (debug:print 0 "ERROR: problem accessing test db " work-area ", you probably should clean and re-run this test"

			((condition-property-accessor 'exn 'message) exn))
	   #f)
	 ;; Is there a cheaper single line operation that will check for existance of a table
	 ;; and raise an exception ?
	 (sqlite3:execute db "SELECT id FROM test_data LIMIT 1;"))
	db)
      (let ((baddb (sqlite3:open-database ":memory:")))







|
>







68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
	      (tdb:testdb-initialize db)))
	;; (sqlite3:execute db "PRAGMA synchronous = 0;")
	(debug:print-info 11 "open-test-db END (sucessful)" work-area)
	;; now let's test that everything is correct
	(handle-exceptions
	 exn
	 (begin
	   (debug:print 0 "ERROR: problem accessing test db " work-area ", you probably should clean and re-run this test or remove the file " 
			dbpath ".\n  "
			((condition-property-accessor 'exn 'message) exn))
	   #f)
	 ;; Is there a cheaper single line operation that will check for existance of a table
	 ;; and raise an exception ?
	 (sqlite3:execute db "SELECT id FROM test_data LIMIT 1;"))
	db)
      (let ((baddb (sqlite3:open-database ":memory:")))

Modified tests/Makefile from [25eaa21107] to [60c8f28493].

172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
	cd fullrun;$(MEGATEST) -remove-runs :runname $(RUN)  -testpatt % -itempatt % :sysname % :fsname % :datapath %

clean  : 
	rm cleanprep

kill :
	killall -v mtest main.sh dboard || true
	rm -rf *run/db/* */megatest.db */logging.db */monitor.db fullrun/tmp/mt_*/* fullrun/tmp/mt_*/.db* fullrun/logs/*.log || true
	killall -v mtest dboard || true

hardkill : kill
	sleep 2;killall -v mtest main.sh dboard -9

listservers :
	cd fullrun;$(MEGATEST) -list-servers







|







172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
	cd fullrun;$(MEGATEST) -remove-runs :runname $(RUN)  -testpatt % -itempatt % :sysname % :fsname % :datapath %

clean  : 
	rm cleanprep

kill :
	killall -v mtest main.sh dboard || true
	rm -rf *run/db/* */megatest.db */logging.db */monitor.db fullrun/tmp/mt_*/* fullrun/tmp/mt_*/.db* fullrun/logs/*.log fullrun/*.log || true
	killall -v mtest dboard || true

hardkill : kill
	sleep 2;killall -v mtest main.sh dboard -9

listservers :
	cd fullrun;$(MEGATEST) -list-servers