Megatest

Check-in [0a7ab47eca]
Login
Overview
Comment:Change some exn= debug messages to be proper debug:print calls and moved to level 5
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | v1.64
Files: files | file ages | folders
SHA1: 0a7ab47eca77add9ad5fa6f2df6d31c71195e337
User & Date: mrwellan on 2017-05-11 11:15:06
Other Links: branch diff | manifest | tags
Context
2017-05-11
18:24
do not migrate db on patch version change (eg 16408 to 16411) check-in: 78e116111d user: bjbarcla tags: v1.64
11:15
Change some exn= debug messages to be proper debug:print calls and moved to level 5 check-in: 0a7ab47eca user: mrwellan tags: v1.64
2017-05-10
11:25
fixed deleted rollup issue check-in: d1aca6ab74 user: bjbarcla tags: v1.64, v1.6412
Changes

Modified db.scm from [b7bdf7cceb] to [6743497f27].

611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
(define (db:sync-tables tbls last-update fromdb todb . slave-dbs)
  (handle-exceptions
   exn
   (begin
     (debug:print 0 *default-log-port* "EXCEPTION: database probably overloaded or unreadable in db:sync-tables.")
     (print-call-chain (current-error-port))
     (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
     (print "exn=" (condition->list exn))
     (debug:print 0 *default-log-port* " status:  " ((condition-property-accessor 'sqlite3 'status) exn))
     (debug:print 0 *default-log-port* " src db:  " (db:dbdat-get-path fromdb))
     (for-each (lambda (dbdat)
		 (let ((dbpath (db:dbdat-get-path dbdat)))
		   (debug:print 0 *default-log-port* " dbpath:  " dbpath)
		   (if (not (db:repair-db dbdat))
		       (begin







|







611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
(define (db:sync-tables tbls last-update fromdb todb . slave-dbs)
  (handle-exceptions
   exn
   (begin
     (debug:print 0 *default-log-port* "EXCEPTION: database probably overloaded or unreadable in db:sync-tables.")
     (print-call-chain (current-error-port))
     (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
     (debug:print 5 *default-log-port* "exn=" (condition->list exn))
     (debug:print 0 *default-log-port* " status:  " ((condition-property-accessor 'sqlite3 'status) exn))
     (debug:print 0 *default-log-port* " src db:  " (db:dbdat-get-path fromdb))
     (for-each (lambda (dbdat)
		 (let ((dbpath (db:dbdat-get-path dbdat)))
		   (debug:print 0 *default-log-port* " dbpath:  " dbpath)
		   (if (not (db:repair-db dbdat))
		       (begin
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
	 (err-status ((condition-property-accessor 'sqlite3 'status #f) exn)))
     (case err-status
       ((busy)
	(thread-sleep! sleep-time))
       (else
	(debug:print 0 *default-log-port* "EXCEPTION: database probably overloaded or unreadable.")
	(debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
	(print "exn=" (condition->list exn))
	(debug:print 0 *default-log-port* " status:  " ((condition-property-accessor 'sqlite3 'status) exn))
	(print-call-chain (current-error-port))
	(thread-sleep! sleep-time)
	(debug:print-info 0 *default-log-port* "trying db call one more time....this may never recover, if necessary kill process " (current-process-id) " on host " (get-host-name) " to clean up")))
     (apply open-run-close-exception-handling proc idb params))
   (apply open-run-close-no-exception-handling proc idb params)))








|







1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
	 (err-status ((condition-property-accessor 'sqlite3 'status #f) exn)))
     (case err-status
       ((busy)
	(thread-sleep! sleep-time))
       (else
	(debug:print 0 *default-log-port* "EXCEPTION: database probably overloaded or unreadable.")
	(debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
	(debug:print 5 *default-log-port* "exn=" (condition->list exn))
	(debug:print 0 *default-log-port* " status:  " ((condition-property-accessor 'sqlite3 'status) exn))
	(print-call-chain (current-error-port))
	(thread-sleep! sleep-time)
	(debug:print-info 0 *default-log-port* "trying db call one more time....this may never recover, if necessary kill process " (current-process-id) " on host " (get-host-name) " to clean up")))
     (apply open-run-close-exception-handling proc idb params))
   (apply open-run-close-no-exception-handling proc idb params)))

Modified docs/manual/megatest_manual.html from [76096cc0e9] to [7412bce352].

799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
integrating and or running a complex suite of tests for release
qualification.</p></div>
</div>
</div>
<div class="sect1">
<h2 id="_megatest_design_philosophy">Megatest Design Philosophy</h2>
<div class="sectionbody">
<div class="paragraph"><p>Megatest is intended to provide the minimum needed resources to make
writing a suite of tests and tasks for implementing continuous build
for software, design engineering or process control (via owlfs for
example) without being specialized for any specific problem
space. Megatest in of itself does not know what constitutes a PASS or
FAIL of a test or task. In most cases megatest is best used in
conjunction with logpro or a similar tool to parse, analyze and decide
on the test outcome.</p></div>
<div class="ulist"><ul>
<li>
<p>
Self-checking -Repeatable strive for directed or self-checking test
   as opposed to delta based tests
</p>
</li>







|
|
|
|
|
|
|
<







799
800
801
802
803
804
805
806
807
808
809
810
811
812

813
814
815
816
817
818
819
integrating and or running a complex suite of tests for release
qualification.</p></div>
</div>
</div>
<div class="sect1">
<h2 id="_megatest_design_philosophy">Megatest Design Philosophy</h2>
<div class="sectionbody">
<div class="paragraph"><p>Megatest is a distributed system intended to provide the minimum needed
resources to make writing a suite of tests and tasks for implementing
continuous build for software, design engineering or process control (via
owlfs for example) without being specialized for any specific problem
space. Megatest in of itself does not know what constitutes a PASS or FAIL
of a test or task. In most cases megatest is best used in conjunction with
logpro or a similar tool to parse, analyze and decide on the test outcome.</p></div>

<div class="ulist"><ul>
<li>
<p>
Self-checking -Repeatable strive for directed or self-checking test
   as opposed to delta based tests
</p>
</li>
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972

973
974
975
976
977
978
979
<h3 id="_architecture_refactor">Architecture Refactor</h3>
<div class="sect3">
<h4 id="_goals">Goals</h4>
<div class="olist arabic"><ol class="arabic">
<li>
<p>
Reduce load on the file system. Sqlite3 files on network filesystem can be
  a burden.
</p>
</li>
<li>
<p>
Reduce number of servers and frequency of start/stop. This is mostly an
  issue of clutter but also a reduction in "moving parts".
</p>
</li>
<li>
<p>
Coalesce activities to a single home host where possible. Give the user
  feedback that they have started the dashboard on a host other than the
  home host.
</p>
</li>
<li>
<p>
Reduce number of processes involved in managing running tests.
</p>
</li>
</ol></div>
</div>
<div class="sect3">
<h4 id="_changes_needed">Changes Needed</h4>
<div class="olist arabic"><ol class="arabic">
<li>
<p>
ACID compliant db will be on /tmp and synced to megatest.db with a five
  second max delay.
</p>
</li>
<li>
<p>
Read/writes to db for processes on homehost will go direct to /tmp
  megatest.db file.
</p>
</li>
<li>
<p>
Read/wites fron non-homehost processes will go through one server. Bulk
  reads (e.g. for dashboard or list-runs) will be cached on the current host
  in /tmp and synced from the home megatest.db in the testsuite area.
</p>
</li>
<li>
<p>
Db syncs rely on the target db file timestame minus some margin.
</p>
</li>
<li>
<p>
Since bulk reads do not use the server we can switch to simple RPC for the
  network transport.
</p>
</li>
<li>
<p>
Test running manager process extended to manage multiple running tests.
</p>
</li>
</ol></div>
</div>
</div>
<div class="sect2">
<h3 id="_current_items">Current Items</h3>
<div class="sect3">
<h4 id="_ww05_migrate_to_inmem_db">ww05 - migrate to inmem-db</h4>
<div class="olist arabic"><ol class="arabic">
<li>
<p>
Switch to inmem db with fast sync to on disk db&#8217;s [DONE]
</p>
</li>
<li>
<p>
Server polls tasks table for next action
</p>
<div class="olist loweralpha"><ol class="loweralpha">
<li>
<p>
Task table used for tracking runner process [DONE]
</p>
</li>
<li>
<p>
Task table used for jobs to run
</p>
</li>
<li>
<p>
Task table used for queueing runner actions (remove runs, cleanRunExecute, etc)

</p>
</li>
</ol></div>
</li>
</ol></div>
<div class="paragraph"><p>shifting, note that the preceding blank line is needed.</p></div>
</div>







|





|






|















|





|






|




|





|

















|









|




|




|
>







873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
<h3 id="_architecture_refactor">Architecture Refactor</h3>
<div class="sect3">
<h4 id="_goals">Goals</h4>
<div class="olist arabic"><ol class="arabic">
<li>
<p>
Reduce load on the file system. Sqlite3 files on network filesystem can be
  a burden. <span class="green">[DONE]</span>
</p>
</li>
<li>
<p>
Reduce number of servers and frequency of start/stop. This is mostly an
  issue of clutter but also a reduction in "moving parts". <span class="green">[DONE]</span>
</p>
</li>
<li>
<p>
Coalesce activities to a single home host where possible. Give the user
  feedback that they have started the dashboard on a host other than the
  home host. <span class="green">[DONE]</span>
</p>
</li>
<li>
<p>
Reduce number of processes involved in managing running tests.
</p>
</li>
</ol></div>
</div>
<div class="sect3">
<h4 id="_changes_needed">Changes Needed</h4>
<div class="olist arabic"><ol class="arabic">
<li>
<p>
ACID compliant db will be on /tmp and synced to megatest.db with a five
  second max delay. <span class="green">[DONE]</span>
</p>
</li>
<li>
<p>
Read/writes to db for processes on homehost will go direct to /tmp
  megatest.db file. <span class="green">[DONE]</span>
</p>
</li>
<li>
<p>
Read/wites fron non-homehost processes will go through one server. Bulk
  reads (e.g. for dashboard or list-runs) will be cached on the current host
  in /tmp and synced from the home megatest.db in the testsuite area. <span class="green">[DONE]</span>
</p>
</li>
<li>
<p>
Db syncs rely on the target db file timestame minus some margin. <span class="green">[DONE]</span>
</p>
</li>
<li>
<p>
Since bulk reads do not use the server we can switch to simple RPC for the
  network transport. <span class="green">[DONE]</span>
</p>
</li>
<li>
<p>
Test running manager process extended to manage multiple running tests.
</p>
</li>
</ol></div>
</div>
</div>
<div class="sect2">
<h3 id="_current_items">Current Items</h3>
<div class="sect3">
<h4 id="_ww05_migrate_to_inmem_db">ww05 - migrate to inmem-db</h4>
<div class="olist arabic"><ol class="arabic">
<li>
<p>
Switch to inmem db with fast sync to on disk db&#8217;s <span class="green">[DONE]</span>
</p>
</li>
<li>
<p>
Server polls tasks table for next action
</p>
<div class="olist loweralpha"><ol class="loweralpha">
<li>
<p>
Task table used for tracking runner process <span class="red">[Replaced by mtutil]</span>
</p>
</li>
<li>
<p>
Task table used for jobs to run <span class="red">[Replaced by mtutil]</span>
</p>
</li>
<li>
<p>
Task table used for queueing runner actions (remove runs,
   cleanRunExecute, etc)  <span class="red">[Replaced by mtutil</span>]
</p>
</li>
</ol></div>
</li>
</ol></div>
<div class="paragraph"><p>shifting, note that the preceding blank line is needed.</p></div>
</div>
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
</div>
</div>
</div>
<div id="footnotes"><hr></div>
<div id="footer">
<div id="footer-text">
Version 1.0<br>
Last updated 2017-04-12 13:34:13 PDT
</div>
</div>
</body>
</html>







|




2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
</div>
</div>
</div>
<div id="footnotes"><hr></div>
<div id="footer">
<div id="footer-text">
Version 1.0<br>
Last updated 2017-05-10 08:25:49 PDT
</div>
</div>
</body>
</html>

Modified docs/manual/megatest_manual.txt from [c82fa9e963] to [6773fd5157].

22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
tool, flexible enough to meet the needs of any team doing continuous
integrating and or running a complex suite of tests for release
qualification.

Megatest Design Philosophy
--------------------------

Megatest is intended to provide the minimum needed resources to make
writing a suite of tests and tasks for implementing continuous build
for software, design engineering or process control (via owlfs for
example) without being specialized for any specific problem
space. Megatest in of itself does not know what constitutes a PASS or
FAIL of a test or task. In most cases megatest is best used in
conjunction with logpro or a similar tool to parse, analyze and decide
on the test outcome. 

 * Self-checking -Repeatable strive for directed or self-checking test
   as opposed to delta based tests

 * Traceable - environment variables, host OS and other possibly influential
   variables are captured and kept recorded.








|
|
|
|
|
|
|
<







22
23
24
25
26
27
28
29
30
31
32
33
34
35

36
37
38
39
40
41
42
tool, flexible enough to meet the needs of any team doing continuous
integrating and or running a complex suite of tests for release
qualification.

Megatest Design Philosophy
--------------------------

Megatest is a distributed system intended to provide the minimum needed
resources to make writing a suite of tests and tasks for implementing
continuous build for software, design engineering or process control (via
owlfs for example) without being specialized for any specific problem
space. Megatest in of itself does not know what constitutes a PASS or FAIL
of a test or task. In most cases megatest is best used in conjunction with
logpro or a similar tool to parse, analyze and decide on the test outcome.


 * Self-checking -Repeatable strive for directed or self-checking test
   as opposed to delta based tests

 * Traceable - environment variables, host OS and other possibly influential
   variables are captured and kept recorded.

Modified docs/plan.txt from [92bba79ce7] to [0b286fe847].

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46

47
48
49
50
51
52
53
Road Map
--------

Note 1: This road-map is still evolving and subject to change without notice.

Architecture Refactor
~~~~~~~~~~~~~~~~~~~~~

Goals
^^^^^

. Reduce load on the file system. Sqlite3 files on network filesystem can be
  a burden.
. Reduce number of servers and frequency of start/stop. This is mostly an
  issue of clutter but also a reduction in "moving parts".
. Coalesce activities to a single home host where possible. Give the user
  feedback that they have started the dashboard on a host other than the
  home host.
. Reduce number of processes involved in managing running tests.

Changes Needed
^^^^^^^^^^^^^^

. ACID compliant db will be on /tmp and synced to megatest.db with a five
  second max delay.
. Read/writes to db for processes on homehost will go direct to /tmp
  megatest.db file.
. Read/wites fron non-homehost processes will go through one server. Bulk
  reads (e.g. for dashboard or list-runs) will be cached on the current host
  in /tmp and synced from the home megatest.db in the testsuite area.
. Db syncs rely on the target db file timestame minus some margin.
. Since bulk reads do not use the server we can switch to simple RPC for the
  network transport.
. Test running manager process extended to manage multiple running tests.

Current Items
~~~~~~~~~~~~~

ww05 - migrate to inmem-db
^^^^^^^^^^^^^^^^^^^^^^^^^^

. Switch to inmem db with fast sync to on disk db's [DONE]
. Server polls tasks table for next action
.. Task table used for tracking runner process [DONE]
.. Task table used for jobs to run
.. Task table used for queueing runner actions (remove runs, cleanRunExecute, etc)



// ww32
// ~~~~
// 
// . Rerun step and or subsequent steps from gui
// . Refresh test area files from gui












|

|


|






|

|


|
|

|








|

|
|
|
>







1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
Road Map
--------

Note 1: This road-map is still evolving and subject to change without notice.

Architecture Refactor
~~~~~~~~~~~~~~~~~~~~~

Goals
^^^^^

. Reduce load on the file system. Sqlite3 files on network filesystem can be
  a burden. [green]#[DONE]#
. Reduce number of servers and frequency of start/stop. This is mostly an
  issue of clutter but also a reduction in "moving parts". [green]#[DONE]#
. Coalesce activities to a single home host where possible. Give the user
  feedback that they have started the dashboard on a host other than the
  home host. [green]#[DONE]#
. Reduce number of processes involved in managing running tests.

Changes Needed
^^^^^^^^^^^^^^

. ACID compliant db will be on /tmp and synced to megatest.db with a five
  second max delay. [green]#[DONE]#
. Read/writes to db for processes on homehost will go direct to /tmp
  megatest.db file. [green]#[DONE]#
. Read/wites fron non-homehost processes will go through one server. Bulk
  reads (e.g. for dashboard or list-runs) will be cached on the current host
  in /tmp and synced from the home megatest.db in the testsuite area. [green]#[DONE]#
. Db syncs rely on the target db file timestame minus some margin. [green]#[DONE]#
. Since bulk reads do not use the server we can switch to simple RPC for the
  network transport. [green]#[DONE]#
. Test running manager process extended to manage multiple running tests.

Current Items
~~~~~~~~~~~~~

ww05 - migrate to inmem-db
^^^^^^^^^^^^^^^^^^^^^^^^^^

. Switch to inmem db with fast sync to on disk db's [green]#[DONE]#
. Server polls tasks table for next action
.. Task table used for tracking runner process [red]#[Replaced by mtutil]#
.. Task table used for jobs to run [red]#[Replaced by mtutil]#
.. Task table used for queueing runner actions (remove runs,
   cleanRunExecute, etc)  [red]#[Replaced by mtutil#]


// ww32
// ~~~~
// 
// . Rerun step and or subsequent steps from gui
// . Refresh test area files from gui

Modified http-transport.scm from [559219b56a] to [bb11c9d077].

120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
	exn
	(begin
	  (print-error-message exn)
	  (if (< portnum 64000)
	      (begin 
		(debug:print 0 *default-log-port* "WARNING: attempt to start server failed. Trying again ...")
		(debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
		(debug:print 0 *default-log-port* "exn=" (condition->list exn))
		(portlogger:open-run-close portlogger:set-failed portnum)
		(debug:print 0 *default-log-port* "WARNING: failed to start on portnum: " portnum ", trying next port")
		(thread-sleep! 0.1)
		
		;; get_next_port goes here
		(http-transport:try-start-server ipaddrstr
						 (portlogger:open-run-close portlogger:find-port)))







|







120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
	exn
	(begin
	  (print-error-message exn)
	  (if (< portnum 64000)
	      (begin 
		(debug:print 0 *default-log-port* "WARNING: attempt to start server failed. Trying again ...")
		(debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
		(debug:print 5 *default-log-port* "exn=" (condition->list exn))
		(portlogger:open-run-close portlogger:set-failed portnum)
		(debug:print 0 *default-log-port* "WARNING: failed to start on portnum: " portnum ", trying next port")
		(thread-sleep! 0.1)
		
		;; get_next_port goes here
		(http-transport:try-start-server ipaddrstr
						 (portlogger:open-run-close portlogger:find-port)))

Modified megatest.scm from [eb075a7248] to [413a102ae1].

1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
		       
		     (for-each 
		      (lambda (test)
		      	(common:debug-handle-exceptions #f
			 exn
			 (begin
			   (debug:print-error 0 *default-log-port* "Bad data in test record? " test)
			   (print "exn=" (condition->list exn))
			   (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
			   (print-call-chain (current-error-port)))
			 (let* ((test-id      (if (member "id"           tests-spec)(get-value-by-fieldname test test-field-index "id"          ) #f)) ;; (db:test-get-id         test))
				(testname     (if (member "testname"     tests-spec)(get-value-by-fieldname test test-field-index "testname"    ) #f)) ;; (db:test-get-testname   test))
				(itempath     (if (member "item_path"    tests-spec)(get-value-by-fieldname test test-field-index "item_path"   ) #f)) ;; (db:test-get-item-path  test))
				(comment      (if (member "comment"      tests-spec)(get-value-by-fieldname test test-field-index "comment"     ) #f)) ;; (db:test-get-comment    test))
				(tstate       (if (member "state"        tests-spec)(get-value-by-fieldname test test-field-index "state"       ) #f)) ;; (db:test-get-state      test))







|







1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
		       
		     (for-each 
		      (lambda (test)
		      	(common:debug-handle-exceptions #f
			 exn
			 (begin
			   (debug:print-error 0 *default-log-port* "Bad data in test record? " test)
			   (debug:print-error 5 *default-log-port* "exn=" (condition->list exn))
			   (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
			   (print-call-chain (current-error-port)))
			 (let* ((test-id      (if (member "id"           tests-spec)(get-value-by-fieldname test test-field-index "id"          ) #f)) ;; (db:test-get-id         test))
				(testname     (if (member "testname"     tests-spec)(get-value-by-fieldname test test-field-index "testname"    ) #f)) ;; (db:test-get-testname   test))
				(itempath     (if (member "item_path"    tests-spec)(get-value-by-fieldname test test-field-index "item_path"   ) #f)) ;; (db:test-get-item-path  test))
				(comment      (if (member "comment"      tests-spec)(get-value-by-fieldname test test-field-index "comment"     ) #f)) ;; (db:test-get-comment    test))
				(tstate       (if (member "state"        tests-spec)(get-value-by-fieldname test test-field-index "state"       ) #f)) ;; (db:test-get-state      test))

Modified portlogger.scm from [b8f7cf5181] to [7553f0634d].

54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
	 (avail  (tasks:wait-on-journal fname 10))) ;; wait up to about 10 seconds for the journal to go away
    (handle-exceptions
     exn
     (begin
       ;; (release-dot-lock fname)
       (debug:print-error 0 *default-log-port* "portlogger:open-run-close failed. " proc " " params)
       (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
       (debug:print 0 *default-log-port* "exn=" (condition->list exn))
       (if (file-exists? fname)(delete-file fname)) ;; brutally get rid of it
       (print-call-chain (current-error-port)))
     (let* (;; (lock   (obtain-dot-lock fname 2 9 10))
	    (db     (portlogger:open-db fname))
	    (res    (apply proc db params)))
       (sqlite3:finalize! db)
       ;; (release-dot-lock fname)







|







54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
	 (avail  (tasks:wait-on-journal fname 10))) ;; wait up to about 10 seconds for the journal to go away
    (handle-exceptions
     exn
     (begin
       ;; (release-dot-lock fname)
       (debug:print-error 0 *default-log-port* "portlogger:open-run-close failed. " proc " " params)
       (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
       (debug:print 5 *default-log-port* "exn=" (condition->list exn))
       (if (file-exists? fname)(delete-file fname)) ;; brutally get rid of it
       (print-call-chain (current-error-port)))
     (let* (;; (lock   (obtain-dot-lock fname 2 9 10))
	    (db     (portlogger:open-db fname))
	    (res    (apply proc db params)))
       (sqlite3:finalize! db)
       ;; (release-dot-lock fname)
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115

(define (portlogger:get-prev-used-port db)
  (handle-exceptions
      exn
      (begin
	(debug:print 0 *default-log-port* "EXCEPTION: portlogger database probably overloaded or unreadable. If you see this message again remove /tmp/.$USER-portlogger.db")
	(debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
	(debug:print 0 *default-log-port* "exn=" (condition->list exn))
	(print-call-chain (current-error-port))
	(debug:print 0 *default-log-port* "Continuing anyway.")
	#f)
    (sqlite3:fold-row
     (lambda (var curr)
       (or curr var curr))
     #f







|







101
102
103
104
105
106
107
108
109
110
111
112
113
114
115

(define (portlogger:get-prev-used-port db)
  (handle-exceptions
      exn
      (begin
	(debug:print 0 *default-log-port* "EXCEPTION: portlogger database probably overloaded or unreadable. If you see this message again remove /tmp/.$USER-portlogger.db")
	(debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
	(debug:print 5 *default-log-port* "exn=" (condition->list exn))
	(print-call-chain (current-error-port))
	(debug:print 0 *default-log-port* "Continuing anyway.")
	#f)
    (sqlite3:fold-row
     (lambda (var curr)
       (or curr var curr))
     #f
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
		      (+ lowport ;; top of registered ports is 49152 but lets use ports in the registered range
			 (random (- 64000 lowport))))))
    (handle-exceptions
     exn
     (begin
       (debug:print 0 *default-log-port* "EXCEPTION: portlogger database probably overloaded or unreadable. If you see this message again remove /tmp/.$USER-portlogger.db")
       (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
       (debug:print 0 *default-log-port* "exn=" (condition->list exn))
       (print-call-chain (current-error-port))
       (debug:print 0 *default-log-port* "Continuing anyway."))
     (portlogger:take-port db portnum))
    portnum))

;; set port to "released", "failed" etc.
;; 







|







126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
		      (+ lowport ;; top of registered ports is 49152 but lets use ports in the registered range
			 (random (- 64000 lowport))))))
    (handle-exceptions
     exn
     (begin
       (debug:print 0 *default-log-port* "EXCEPTION: portlogger database probably overloaded or unreadable. If you see this message again remove /tmp/.$USER-portlogger.db")
       (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
       (debug:print 5 *default-log-port* "exn=" (condition->list exn))
       (print-call-chain (current-error-port))
       (debug:print 0 *default-log-port* "Continuing anyway."))
     (portlogger:take-port db portnum))
    portnum))

;; set port to "released", "failed" etc.
;; 
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
	 (numargs (length args))
	 (result  
	  (handle-exceptions
	   exn
	   (begin
	     (debug:print 0 *default-log-port* "EXCEPTION: portlogger database at " dbfname " probably overloaded or unreadable. Try removing it.")
	     (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
	     (print "exn=" (condition->list exn))
	     (debug:print 0 *default-log-port* " status:  " ((condition-property-accessor 'sqlite3 'status) exn))
	     (print-call-chain (current-error-port))
	     #f)
	   (case (string->symbol (car args)) ;; commands with two or more params
	     ((take)(portlogger:take-port db (string->number (cadr args))))
	     ((find)(portlogger:find-port db))
	     ((set) (let ((port  (cadr  args))







|







156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
	 (numargs (length args))
	 (result  
	  (handle-exceptions
	   exn
	   (begin
	     (debug:print 0 *default-log-port* "EXCEPTION: portlogger database at " dbfname " probably overloaded or unreadable. Try removing it.")
	     (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
	     (debug:print 5 *default-log-port* "exn=" (condition->list exn))
	     (debug:print 0 *default-log-port* " status:  " ((condition-property-accessor 'sqlite3 'status) exn))
	     (print-call-chain (current-error-port))
	     #f)
	   (case (string->symbol (car args)) ;; commands with two or more params
	     ((take)(portlogger:take-port db (string->number (cadr args))))
	     ((find)(portlogger:find-port db))
	     ((set) (let ((port  (cadr  args))

Modified process.scm from [1851bdf789] to [78317beb2c].

51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
(define (process:cmd-run-proc-each-line cmd proc . params)
  ;; (print "Called with cmd=" cmd ", proc=" proc ", params=" params)
  (handle-exceptions
   exn
   (begin
     (print "ERROR:  Failed to run command: " cmd " " (string-intersperse params " "))
     (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
     (print "exn=" (condition->list exn))
     #f)
   (let-values (((fh fho pid) (if (null? params)
				  (process cmd)
				  (process cmd params))))
       (let loop ((curr (read-line fh))
		(result  '()))
       (if (not (eof-object? curr))







|







51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
(define (process:cmd-run-proc-each-line cmd proc . params)
  ;; (print "Called with cmd=" cmd ", proc=" proc ", params=" params)
  (handle-exceptions
   exn
   (begin
     (print "ERROR:  Failed to run command: " cmd " " (string-intersperse params " "))
     (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
     (debug:print 5 *default-log-port* "exn=" (condition->list exn))
     #f)
   (let-values (((fh fho pid) (if (null? params)
				  (process cmd)
				  (process cmd params))))
       (let loop ((curr (read-line fh))
		(result  '()))
       (if (not (eof-object? curr))

Modified tasks.scm from [6c3eb33bfb] to [d9bc9473e2].

34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
      (debug:print-error 0 *default-log-port* "Called tasks:wait-on-journal with path=" path " (not a string)")
      (let ((fullpath (conc path "-journal")))
	(handle-exceptions
	 exn
	 (begin
	   (print-call-chain (current-error-port))
	   (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
	   (debug:print 0 *default-log-port* " exn=" (condition->list exn))
	   (debug:print 0 *default-log-port* "tasks:wait-on-journal failed. Continuing on, you can ignore this call-chain")
	   #t) ;; if stuff goes wrong just allow it to move on
	 (let loop ((journal-exists (file-exists? fullpath))
		    (count          n)) ;; wait ten times ...
	   (if journal-exists
	       (begin
		 (if (and waiting-msg







|







34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
      (debug:print-error 0 *default-log-port* "Called tasks:wait-on-journal with path=" path " (not a string)")
      (let ((fullpath (conc path "-journal")))
	(handle-exceptions
	 exn
	 (begin
	   (print-call-chain (current-error-port))
	   (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
	   (debug:print 5 *default-log-port* " exn=" (condition->list exn))
	   (debug:print 0 *default-log-port* "tasks:wait-on-journal failed. Continuing on, you can ignore this call-chain")
	   #t) ;; if stuff goes wrong just allow it to move on
	 (let loop ((journal-exists (file-exists? fullpath))
		    (count          n)) ;; wait ten times ...
	   (if journal-exists
	       (begin
		 (if (and waiting-msg
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
      *task-db*
      (handle-exceptions
       exn
       (if (> numretries 0)
	   (begin
	     (print-call-chain (current-error-port))
	     (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
	     (debug:print 0 *default-log-port* " exn=" (condition->list exn))
	     (thread-sleep! 1)
	     (tasks:open-db numretries (- numretries 1)))
	   (begin
	     (print-call-chain (current-error-port))
	     (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
	     (debug:print 0 *default-log-port* " exn=" (condition->list exn))))
       (let* ((dbpath        (db:dbfile-path )) ;; (tasks:get-task-db-path))
	      (dbfile       (conc dbpath "/monitor.db"))
	      (avail        (tasks:wait-on-journal dbpath 10)) ;; wait up to about 10 seconds for the journal to go away
	      (exists       (file-exists? dbpath))
	      (write-access (file-write-access? dbpath))
	      (mdb          (cond ;; what the hek is *toppath* doing here?
			     ((and (string? *toppath*)(file-write-access? *toppath*))







|





|







85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
      *task-db*
      (handle-exceptions
       exn
       (if (> numretries 0)
	   (begin
	     (print-call-chain (current-error-port))
	     (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
	     (debug:print 5 *default-log-port* " exn=" (condition->list exn))
	     (thread-sleep! 1)
	     (tasks:open-db numretries (- numretries 1)))
	   (begin
	     (print-call-chain (current-error-port))
	     (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
	     (debug:print 5 *default-log-port* " exn=" (condition->list exn))))
       (let* ((dbpath        (db:dbfile-path )) ;; (tasks:get-task-db-path))
	      (dbfile       (conc dbpath "/monitor.db"))
	      (avail        (tasks:wait-on-journal dbpath 10)) ;; wait up to about 10 seconds for the journal to go away
	      (exists       (file-exists? dbpath))
	      (write-access (file-write-access? dbpath))
	      (mdb          (cond ;; what the hek is *toppath* doing here?
			     ((and (string? *toppath*)(file-write-access? *toppath*))

Modified tests.scm from [30002a4340] to [9a02d23357].

1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
	   (set! remtries (- remtries 1))
	   (thread-sleep! 10)
	   (tests:set-full-meta-info db test-id run-id minutes work-area (- remtries 1)))
	 (let ((err-status ((condition-property-accessor 'sqlite3 'status #f) exn)))
	   (debug:print-error 0 *default-log-port* "tried for over a minute to update meta info and failed. Giving up")
	   (debug:print 0 *default-log-port* "EXCEPTION: database probably overloaded or unreadable.")
	   (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
	   (print "exn=" (condition->list exn))
	   (debug:print 0 *default-log-port* " status:  " ((condition-property-accessor 'sqlite3 'status) exn))
	   (print-call-chain (current-error-port))))
     (tests:update-testdat-meta-info db test-id work-area cpuload diskfree minutes)
  )))
	 
;;======================================================================
;; A R C H I V I N G







|







1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
	   (set! remtries (- remtries 1))
	   (thread-sleep! 10)
	   (tests:set-full-meta-info db test-id run-id minutes work-area (- remtries 1)))
	 (let ((err-status ((condition-property-accessor 'sqlite3 'status #f) exn)))
	   (debug:print-error 0 *default-log-port* "tried for over a minute to update meta info and failed. Giving up")
	   (debug:print 0 *default-log-port* "EXCEPTION: database probably overloaded or unreadable.")
	   (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
	   (debug:print 5 *default-log-port* "exn=" (condition->list exn))
	   (debug:print 0 *default-log-port* " status:  " ((condition-property-accessor 'sqlite3 'status) exn))
	   (print-call-chain (current-error-port))))
     (tests:update-testdat-meta-info db test-id work-area cpuload diskfree minutes)
  )))
	 
;;======================================================================
;; A R C H I V I N G