Megatest

Check-in [028f0d8c40]
Login
Overview
Comment:refactoring in flight. does not compile.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | v1.65-wip
Files: files | file ages | folders
SHA1: 028f0d8c40e871fdf6ce62f08a8e5805d513c6a9
User & Date: mrwellan on 2019-10-16 17:59:56
Other Links: branch diff | manifest | tags
Context
2019-10-18
08:00
wip check-in: b7b562b7b3 user: matt tags: v1.65-wip
2019-10-16
17:59
refactoring in flight. does not compile. check-in: 028f0d8c40 user: mrwellan tags: v1.65-wip
2019-10-15
05:10
Partial (but broken) usage of dbmod.scm check-in: 77149209be user: matt tags: v1.65-wip
Changes

Modified archive.scm from [618f9a591e] to [d8898d0993].

130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
;; 4. save
;;
(define (archive:run-bup archive-command run-id run-name tests rp-mutex bup-mutex)
  ;; move the getting of archive space down into the below block so that a single run can 
  ;; allocate as needed should a disk fill up
  ;;
  (let* ((blockid-cache  (make-hash-table))
	 (tsname         (common:get-testsuite-name))
	 (min-space      (string->number (or (configf:lookup *configdat* "archive" "minspace") "1000")))
	 (arch-groups    (make-hash-table)) ;; archive groups, each corrosponds to a bup area
	 (disk-groups    (make-hash-table)) ;; 
	 (test-groups    (make-hash-table)) ;; these two (disk and test groups) could be combined nicely
	 (test-dirs      (make-hash-table))
	 (bup-exe        (or (configf:lookup *configdat* "archive" "bup") "bup"))
	 (compress       (or (configf:lookup *configdat* "archive" "compress") "9"))







|







130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
;; 4. save
;;
(define (archive:run-bup archive-command run-id run-name tests rp-mutex bup-mutex)
  ;; move the getting of archive space down into the below block so that a single run can 
  ;; allocate as needed should a disk fill up
  ;;
  (let* ((blockid-cache  (make-hash-table))
	 (tsname         (common:get-area-name))
	 (min-space      (string->number (or (configf:lookup *configdat* "archive" "minspace") "1000")))
	 (arch-groups    (make-hash-table)) ;; archive groups, each corrosponds to a bup area
	 (disk-groups    (make-hash-table)) ;; 
	 (test-groups    (make-hash-table)) ;; these two (disk and test groups) could be combined nicely
	 (test-dirs      (make-hash-table))
	 (bup-exe        (or (configf:lookup *configdat* "archive" "bup") "bup"))
	 (compress       (or (configf:lookup *configdat* "archive" "compress") "9"))
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
	       (create-directory archive-dir #t))
	   (case archiver
	     ((bup) ;; Archive using bup
	      (let* ((bup-init-params  (list "-d" archive-dir "init"))
		     (bup-index-params (append (list "-d" archive-dir "index") test-paths))
		     (bup-save-params  (append (list "-d" archive-dir "save" ;; (conc "--strip-path=" linktree)
						     (conc "-" compress) ;; or (conc "--compress=" compress)
						     "-n" (conc (common:get-testsuite-name) "-" run-id)
						     (conc "--strip-path=" test-base) ;; if we push to the directory do we need this?
						     )
					       test-paths)))
		(if (not (common:file-exists? (conc archive-dir "/HEAD")))
		    (begin
		      ;; replace this with jobrunner stuff enventually
		      (debug:print-info 0 *default-log-port* "Init bup in " archive-dir)







|







253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
	       (create-directory archive-dir #t))
	   (case archiver
	     ((bup) ;; Archive using bup
	      (let* ((bup-init-params  (list "-d" archive-dir "init"))
		     (bup-index-params (append (list "-d" archive-dir "index") test-paths))
		     (bup-save-params  (append (list "-d" archive-dir "save" ;; (conc "--strip-path=" linktree)
						     (conc "-" compress) ;; or (conc "--compress=" compress)
						     "-n" (conc (common:get-area-name) "-" run-id)
						     (conc "--strip-path=" test-base) ;; if we push to the directory do we need this?
						     )
					       test-paths)))
		(if (not (common:file-exists? (conc archive-dir "/HEAD")))
		    (begin
		      ;; replace this with jobrunner stuff enventually
		      (debug:print-info 0 *default-log-port* "Init bup in " archive-dir)
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
	      (mutex-unlock! rp-mutex)
	      (new-test-physical-path  (conc best-disk "/" test-partial-path))
	      (archive-block-id        (db:test-get-archived test-dat))
	      (archive-block-info      (rmt:test-get-archive-block-info archive-block-id))
	      (archive-path            (if (vector? archive-block-info)
					   (vector-ref archive-block-info 2) ;; look in db.scm for test-get-archive-block-info for the vector record info
					   #f)) ;; no archive found?
	      (archive-internal-path   (conc (common:get-testsuite-name) "-" run-id "/latest/" test-partial-path)))
	 
	 ;; some sanity checks, move an existing path out of the way - iif it is not a toplevel with children
	 ;;
	 (if (and (not toplevel/children)  ;; special handling needed for toplevel with children
		  prev-test-physical-path
		  (common:file-exists? prev-test-physical-path)) ;; what to do? abort or clean up or link it in?
	     (let* ((base (pathname-directory prev-test-physical-path))







|







341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
	      (mutex-unlock! rp-mutex)
	      (new-test-physical-path  (conc best-disk "/" test-partial-path))
	      (archive-block-id        (db:test-get-archived test-dat))
	      (archive-block-info      (rmt:test-get-archive-block-info archive-block-id))
	      (archive-path            (if (vector? archive-block-info)
					   (vector-ref archive-block-info 2) ;; look in db.scm for test-get-archive-block-info for the vector record info
					   #f)) ;; no archive found?
	      (archive-internal-path   (conc (common:get-area-name) "-" run-id "/latest/" test-partial-path)))
	 
	 ;; some sanity checks, move an existing path out of the way - iif it is not a toplevel with children
	 ;;
	 (if (and (not toplevel/children)  ;; special handling needed for toplevel with children
		  prev-test-physical-path
		  (common:file-exists? prev-test-physical-path)) ;; what to do? abort or clean up or link it in?
	     (let* ((base (pathname-directory prev-test-physical-path))

Modified client.scm from [e9fa0c3f2e] to [7492f7ae02].

52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
    ((http) (http:client-connect iface port))
    ((zmq)  (zmq:client-connect  iface port))
    (else   (rpc:client-connect  iface port))))

(define (client:setup areapath #!key (remaining-tries 100) (failed-connects 0))
  (case (server:get-transport)
    ((rpc) (rpc-transport:client-setup remaining-tries: remaining-tries failed-connects: failed-connects)) ;;(client:setup-rpc run-id))
    ((http)(client:setup-http *runremote* areapath remaining-tries: remaining-tries failed-connects: failed-connects))
    (else  (rpc-transport:client-setup remaining-tries: remaining-tries failed-connects: failed-connects)))) ;; (client:setup-rpc run-id))))

(set-fn 'client:setup client:setup)


;; Do all the connection work, look up the transport type and set up the
;; connection if required.







|







52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
    ((http) (http:client-connect iface port))
    ((zmq)  (zmq:client-connect  iface port))
    (else   (rpc:client-connect  iface port))))

(define (client:setup areapath #!key (remaining-tries 100) (failed-connects 0))
  (case (server:get-transport)
    ((rpc) (rpc-transport:client-setup remaining-tries: remaining-tries failed-connects: failed-connects)) ;;(client:setup-rpc run-id))
    ((http)(client:setup-http *alldat* areapath remaining-tries: remaining-tries failed-connects: failed-connects))
    (else  (rpc-transport:client-setup remaining-tries: remaining-tries failed-connects: failed-connects)))) ;; (client:setup-rpc run-id))))

(set-fn 'client:setup client:setup)


;; Do all the connection work, look up the transport type and set up the
;; connection if required.
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
		  (let* ((start-res (case *transport-type*
				      ((http)(http-transport:client-connect host port))))
			 (ping-res  (case *transport-type* 
				      ((http)(rmt:login-no-auto-client-setup start-res)))))
		    (if (and start-res
			     ping-res)
			(begin
			  (remote-conndat-set! runremote start-res)
			  (debug:print-info 2 *default-log-port* "connected to " (http-transport:server-dat-make-url start-res))
			  start-res)
			(begin    ;; login failed but have a server record, clean out the record and try again
			  (debug:print-info 0 *default-log-port* "client:setup, login unsuccessful, will attempt to start server ... start-res=" start-res ", server-dat=" server-dat) ;; had runid.  Fixes part of Randy;s ticket 1405717332
			  (case *transport-type* 
			    ((http)(http-transport:close-connections)))
			  (remote-conndat-set! runremote #f)  ;; (hash-table-delete! runremote run-id)
			  (thread-sleep! 1)
			  (client:setup-http runremote areapath remaining-tries: (- remaining-tries 1))
			  )))
		  (begin    ;; no server registered
		    ;; (server:kind-run areapath)
		    (server:start-and-wait areapath)
		    (debug:print-info 0 *default-log-port* "client:setup, no server registered, remaining-tries=" remaining-tries)
		    (thread-sleep! 1) ;; (+ 5 (random (- 20 remaining-tries))))  ;; give server a little time to start up, randomize a little to avoid start storms.
		    (client:setup-http runremote areapath remaining-tries: (- remaining-tries 1)))))))))








|






|










96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
		  (let* ((start-res (case *transport-type*
				      ((http)(http-transport:client-connect host port))))
			 (ping-res  (case *transport-type* 
				      ((http)(rmt:login-no-auto-client-setup start-res)))))
		    (if (and start-res
			     ping-res)
			(begin
			  (alldat-conndat-set! runremote start-res)
			  (debug:print-info 2 *default-log-port* "connected to " (http-transport:server-dat-make-url start-res))
			  start-res)
			(begin    ;; login failed but have a server record, clean out the record and try again
			  (debug:print-info 0 *default-log-port* "client:setup, login unsuccessful, will attempt to start server ... start-res=" start-res ", server-dat=" server-dat) ;; had runid.  Fixes part of Randy;s ticket 1405717332
			  (case *transport-type* 
			    ((http)(http-transport:close-connections)))
			  (alldat-conndat-set! runremote #f)  ;; (hash-table-delete! runremote run-id)
			  (thread-sleep! 1)
			  (client:setup-http runremote areapath remaining-tries: (- remaining-tries 1))
			  )))
		  (begin    ;; no server registered
		    ;; (server:kind-run areapath)
		    (server:start-and-wait areapath)
		    (debug:print-info 0 *default-log-port* "client:setup, no server registered, remaining-tries=" remaining-tries)
		    (thread-sleep! 1) ;; (+ 5 (random (- 20 remaining-tries))))  ;; give server a little time to start up, randomize a little to avoid start storms.
		    (client:setup-http runremote areapath remaining-tries: (- remaining-tries 1)))))))))

Modified common.scm from [13e2877614] to [e293d1548a].

345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
  (not (equal? (common:get-last-run-version)
               (common:version-signature))))

(define (common:api-changed?)
  (not (equal? (substring (->string megatest-version) 0 4)
               (substring (conc (common:get-last-run-version)) 0 4))))


(define (common:get-sync-lock-filepath)
  (let* ((tmp-area     (common:get-db-tmp-area))
         (lockfile     (conc tmp-area "/megatest.db.sync-lock")))
    lockfile))
    
;; Move me elsewhere ...
;; RADT => Why do we meed the version check here, this is called only if version misma
;;
(define (common:cleanup-db dbstruct #!key (full #f))







<
|
|







345
346
347
348
349
350
351

352
353
354
355
356
357
358
359
360
  (not (equal? (common:get-last-run-version)
               (common:version-signature))))

(define (common:api-changed?)
  (not (equal? (substring (->string megatest-version) 0 4)
               (substring (conc (common:get-last-run-version)) 0 4))))


(define (common:get-sync-lock-filepath alldat)
  (let* ((tmp-area     (common:get-db-tmp-area alldat))
         (lockfile     (conc tmp-area "/megatest.db.sync-lock")))
    lockfile))
    
;; Move me elsewhere ...
;; RADT => Why do we meed the version check here, this is called only if version misma
;;
(define (common:cleanup-db dbstruct #!key (full #f))
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
  (let ((val (args:get-arg val)))
    (if val val default)))

(define (assoc/default key lst . default)
  (let ((res (assoc key lst)))
    (if res (cadr res)(if (null? default) #f (car default)))))

(define (common:get-testsuite-name)
  (or (configf:lookup *configdat* "setup" "area-name") ;; megatest is a flexible tool, testsuite is too limiting a description.
      (configf:lookup *configdat* "setup" "testsuite" )
      (getenv "MT_TESTSUITE_NAME")
      (if (string? *toppath* )
          (pathname-file *toppath*)
          #f))) ;; (pathname-file (current-directory)))))

(define common:get-area-name common:get-testsuite-name)

(define (common:get-db-tmp-area . junk)
  (if *db-cache-path*
      *db-cache-path*
      (if *toppath* ;; common:get-create-writeable-dir
	  (handle-exceptions
	      exn
	      (begin
		(debug:print-error 0 *default-log-port* "Couldn't create path to " dbdir)
		(exit 1))
	    (let ((dbpath (common:get-create-writeable-dir
			   (list (conc "/tmp/" (current-user-name)
				       "/megatest_localdb/"
				       (common:get-testsuite-name) "/"
				       (string-translate *toppath* "/" ".")))))) ;;  #t))))
	      (set! *db-cache-path* dbpath)
	      dbpath))
	  #f)))

(define (common:get-area-path-signature)
  (message-digest-string (md5-primitive) *toppath*))

(define (common:get-signature str)
  (message-digest-string (md5-primitive) str))








<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<







842
843
844
845
846
847
848



























849
850
851
852
853
854
855
  (let ((val (args:get-arg val)))
    (if val val default)))

(define (assoc/default key lst . default)
  (let ((res (assoc key lst)))
    (if res (cadr res)(if (null? default) #f (car default)))))





























(define (common:get-area-path-signature)
  (message-digest-string (md5-primitive) *toppath*))

(define (common:get-signature str)
  (message-digest-string (md5-primitive) str))

2040
2041
2042
2043
2044
2045
2046
2047
2048

2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
	  dbspace
	  required
	  dirpath)))

;; check space in dbdir and in megatest dir
;; returns: ok/not dbspace required-space
;;
(define (common:check-db-dir-space)
  (let* ((required (string->number 

		    (or (configf:lookup *configdat* "setup" "dbdir-space-required")
			"100000")))
	 (dbdir    (common:get-db-tmp-area)) ;; (db:get-dbdir))
	 (tdbspace (common:check-space-in-dir dbdir required))
	 (mdbspace (common:check-space-in-dir *toppath* required)))
    (sort (list tdbspace mdbspace) (lambda (a b)
				     (< (cadr a)(cadr b))))))
    
;; check available space in dbdir, exit if insufficient
;;
(define (common:check-db-dir-and-exit-if-insufficient)
  (let* ((spacedat (car (common:check-db-dir-space))) ;; look only at worst for now







|

>
|

|

|







2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
	  dbspace
	  required
	  dirpath)))

;; check space in dbdir and in megatest dir
;; returns: ok/not dbspace required-space
;;
(define (common:check-db-dir-space alldat)
  (let* ((required (string->number 
		    (or (and (alldat-mtconfig alldat)
			     (configf:lookup (alldat-mtconfig alldat) "setup" "dbdir-space-required"))
			"100000")))
	 (dbdir    (common:get-db-tmp-area alldat)) ;; (db:get-dbdir))
	 (tdbspace (common:check-space-in-dir dbdir required))
	 (mdbspace (common:check-space-in-dir (alldat-areapath alldat) required)))
    (sort (list tdbspace mdbspace) (lambda (a b)
				     (< (cadr a)(cadr b))))))
    
;; check available space in dbdir, exit if insufficient
;;
(define (common:check-db-dir-and-exit-if-insufficient)
  (let* ((spacedat (car (common:check-db-dir-space))) ;; look only at worst for now

Modified common_records.scm from [31fe22390a] to [9e73d286c0].

38
39
40
41
42
43
44
45

46
47
48
49
50
51
52
53
	#f)))

(define (set-fn fn-name fn)
  (hash-table-set! *functions* fn-name fn))

(include "altdb.scm")



(defstruct remote
  (hh-dat            #f) ;; (exec-fn 'common:get-homehost)) ;; homehost record ( addr . hhflag )
  (server-url        #f) ;; (if *toppath* (exec-fn 'server:check-if-running *toppath*))) ;; (server:check-if-running *toppath*) #f))
  (last-server-check 0)  ;; last time we checked to see if the server was alive
  (conndat           #f)
  (transport         *transport-type*)
  (server-timeout    #f) ;; (exec-fn 'server:expiration-timeout))
  (force-server      #f)







|
>
|







38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
	#f)))

(define (set-fn fn-name fn)
  (hash-table-set! *functions* fn-name fn))

(include "altdb.scm")

;; remote connection information - moved to alldat
;;
#;(defstruct remote
  (hh-dat            #f) ;; (exec-fn 'common:get-homehost)) ;; homehost record ( addr . hhflag )
  (server-url        #f) ;; (if *toppath* (exec-fn 'server:check-if-running *toppath*))) ;; (server:check-if-running *toppath*) #f))
  (last-server-check 0)  ;; last time we checked to see if the server was alive
  (conndat           #f)
  (transport         *transport-type*)
  (server-timeout    #f) ;; (exec-fn 'server:expiration-timeout))
  (force-server      #f)
196
197
198
199
200
201
202

203




204
205
206
















207
208
209
210
211
212
213
214

;;======================================================================
;; ALLDATA
;;======================================================================
;;
;; attempt to consolidate a bunch of global information into one struct to toss around
(defstruct alldat

  (toppath           #f)




  (read-only-queries api:read-only-queries)
  (write-queries     api:write-queries))

















(define *alldata* (make-alldat))

;; Some of these routines use:
;;
;;     http://www.cs.toronto.edu/~gfb/scheme/simple-macros.html
;;
;; Syntax for defining macros in a simple style similar to function definiton,
;;  when there is a single pattern for the argument list and there are no keywords.







>
|
>
>
>
>

|

>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|







197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236

;;======================================================================
;; ALLDATA
;;======================================================================
;;
;; attempt to consolidate a bunch of global information into one struct to toss around
(defstruct alldat
  (areapath          #f) ;; i.e. toppath
  (mtconfig          #f)
  (log-port          #f)
  (areadat           #f) ;; i.e. runremote
  (rmt-mutex         (make-mutex))
  (db-sync-mutex     (make-mutex))
  (read-only-queries api:read-only-queries)
  (write-queries     api:write-queries)

  ;; database related
  (tmppath           #f) ;; tmp path for dbs

  ;; runremote fields
  (hh-dat            #f) ;; (exec-fn 'common:get-homehost)) ;; homehost record ( addr . hhflag )
  (server-url        #f) ;; (if *toppath* (exec-fn 'server:check-if-running *toppath*))) ;; (server:check-if-running *toppath*) #f))
  (last-server-check 0)  ;; last time we checked to see if the server was alive
  (conndat           #f)
  (transport         *transport-type*)
  (server-timeout    #f) ;; (exec-fn 'server:expiration-timeout))
  (force-server      #f)
  (ro-mode           #f)  
  (ro-mode-checked   #f) ;; flag that indicates we have checked for ro-mode
  (ulex:conn         #f) ;; ulex db conn is not exactly a db connector, more like a network connector 
  )

(define *alldat* (make-alldat))

;; Some of these routines use:
;;
;;     http://www.cs.toronto.edu/~gfb/scheme/simple-macros.html
;;
;; Syntax for defining macros in a simple style similar to function definiton,
;;  when there is a single pattern for the argument list and there are no keywords.

Modified commonmod.scm from [b77b64f35a] to [0bf160ddb9].

20
21
22
23
24
25
26
27


28
29
30
31
32
































33
34
35
36
37
38
39

(declare (unit commonmod))

(module commonmod
	*
	
(import scheme chicken data-structures extras)
(import (prefix sqlite3 sqlite3:) posix typed-records srfi-18)



(define (db:dbdat-get-path dbdat)
  (if (pair? dbdat)
      (cdr dbdat)
      #f))

































;; (define (debug:print . params) #f)
;; (define (debug:print-info . params) #f)
;; 
;; (define (set-functions dbgp dbgpinfo)
;;   (set! debug:print dbgp)
;;   (set! debug:print-info dbgpinfo))







|
>
>





>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73

(declare (unit commonmod))

(module commonmod
	*
	
(import scheme chicken data-structures extras)
(import (prefix sqlite3 sqlite3:) posix typed-records srfi-18 srfi-69 ports srfi-1 files format)

(include "common_records.scm")

(define (db:dbdat-get-path dbdat)
  (if (pair? dbdat)
      (cdr dbdat)
      #f))

(define (common:get-area-name alldat)
  (let* ((configdat (alldat-mtconfig alldat))
	 (areapath   (alldat-areapath alldat)))
    (or (configf:lookup configdat "setup" "area-name") ;; megatest is a flexible tool, testsuite is too limiting a description.
	(configf:lookup configdat "setup" "testsuite" )
	(get-environment-variable "MT_TESTSUITE_NAME")
	(if (string? areapath )
	    (pathname-file areapath)
	    #f)))) ;; (pathname-file (current-directory)))))

;; (define common:get-area-name common:get-area-name)

(define (common:get-db-tmp-area alldat)
  (let* ((dbdir #f))
    (if (alldat-tmppath alldat)
	(alldat-tmppath alldat)
	(if (alldat-areapath alldat) ;; common:get-create-writeable-dir
	    (handle-exceptions
	     exn
	     (begin
	       (debug:print-error 0 *default-log-port* "Couldn't create path to " dbdir)
	       (exit 1))
	     (let ((dbpath (common:get-create-writeable-dir
			    (list (conc "/tmp/" (current-user-name)
					"/megatest_localdb/"
					(common:get-area-name alldat) "/"
					(string-translate (alldat-areapath alldat) "/" ".")))))) ;;  #t))))
	       (set! dbdir dbpath) 
	       (alldat-tmppath alldat dbpath)
	       dbpath))
	    #f))))

;; (define (debug:print . params) #f)
;; (define (debug:print-info . params) #f)
;; 
;; (define (set-functions dbgp dbgpinfo)
;;   (set! debug:print dbgp)
;;   (set! debug:print-info dbgpinfo))

Modified dashboard-tests.scm from [2fbc8e905f] to [62cf6f432c].

450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
    dlog))


;;======================================================================
;;
;;======================================================================
(define (dashboard-tests:examine-test run-id test-id) ;; run-id run-key origtest)
  (let* ((db-path       (db:dbfile-path)) ;; (conc (configf:lookup *configdat* "setup" "linktree") "/db/" run-id ".db"))
	 (dbstruct      #f) ;; NOT ACTUALLY USED (db:setup)) ;; (make-dbr:dbstruct path:  (db:dbfile-path #f) ;; (configf:lookup *configdat* "setup" "linktree") 
			    ;;		   local: #t))
	 (testdat        (rmt:get-test-info-by-id run-id test-id)) ;; (db:get-test-info-by-id dbstruct run-id test-id))
	 (db-mod-time   0) ;; (file-modification-time db-path))
	 (last-update   0) ;; (current-seconds))
	 (request-update #t))
    (if (not testdat)
	(begin







|
|







450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
    dlog))


;;======================================================================
;;
;;======================================================================
(define (dashboard-tests:examine-test run-id test-id) ;; run-id run-key origtest)
  (let* ((db-path       (common:get-db-tmp-area *alldat*)) 
	 (dbstruct      #f) ;; NOT ACTUALLY USED (db:setup))
			    ;;		   local: #t))
	 (testdat        (rmt:get-test-info-by-id run-id test-id)) ;; (db:get-test-info-by-id dbstruct run-id test-id))
	 (db-mod-time   0) ;; (file-modification-time db-path))
	 (last-update   0) ;; (current-seconds))
	 (request-update #t))
    (if (not testdat)
	(begin

Modified dashboard.scm from [df9da433e4] to [bd65627b1f].

377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
(define (dboard:tabdat-make-data)
  (let ((dat (make-dboard:tabdat)))
    (dboard:setup-tabdat dat)
    (dboard:setup-num-rows dat)
    dat))

(define (dboard:setup-tabdat tabdat)
  (dboard:tabdat-dbdir-set! tabdat (db:dbfile-path)) ;; (conc (configf:lookup *configdat* "setup" "linktree") "/.db"))
  (dboard:tabdat-dbfpath-set! tabdat (db:dbfile-path))
  (dboard:tabdat-monitor-db-path-set! tabdat (conc (dboard:tabdat-dbdir tabdat) "/monitor.db"))

  ;; HACK ALERT: this is a hack, please fix.
  (dboard:tabdat-ro-set! tabdat (not (file-read-access? (dboard:tabdat-dbfpath tabdat))))
  
  (dboard:tabdat-keys-set! tabdat (rmt:get-keys))
  (dboard:tabdat-dbkeys-set! tabdat (append (dboard:tabdat-keys tabdat) (list "runname")))







|
|







377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
(define (dboard:tabdat-make-data)
  (let ((dat (make-dboard:tabdat)))
    (dboard:setup-tabdat dat)
    (dboard:setup-num-rows dat)
    dat))

(define (dboard:setup-tabdat tabdat)
  (dboard:tabdat-dbdir-set! tabdat (common:get-db-tmp-area *alldat*)) 
  (dboard:tabdat-dbfpath-set! tabdat (common:get-db-tmp-area *alldat*))
  (dboard:tabdat-monitor-db-path-set! tabdat (conc (dboard:tabdat-dbdir tabdat) "/monitor.db"))

  ;; HACK ALERT: this is a hack, please fix.
  (dboard:tabdat-ro-set! tabdat (not (file-read-access? (dboard:tabdat-dbfpath tabdat))))
  
  (dboard:tabdat-keys-set! tabdat (rmt:get-keys))
  (dboard:tabdat-dbkeys-set! tabdat (append (dboard:tabdat-keys tabdat) (list "runname")))
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
                        ;;(dboard:tabdat-filters-changed tabdat))
                        0
                        (dboard:rundat-last-update run-dat)))
	 (last-db-time (if do-not-use-db-file-timestamps
			   0
			   (dboard:rundat-last-db-time run-dat)))
	 (db-path      (or (dboard:rundat-db-path run-dat)
			   (let* ((db-dir (common:get-db-tmp-area))
				  (db-pth (conc db-dir "/megatest.db")))
			     (dboard:rundat-db-path-set! run-dat db-pth)
			     db-pth)))
	 (db-mod-time  (common:lazy-sqlite-db-modification-time db-path))
	 (db-modified  (>= db-mod-time last-db-time))
	 (multi-get    (> (dboard:rundat-run-data-offset run-dat) 0))  ;; multi-get in progress
	 (tmptests     (if (or do-not-use-db-file-timestamps







|







602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
                        ;;(dboard:tabdat-filters-changed tabdat))
                        0
                        (dboard:rundat-last-update run-dat)))
	 (last-db-time (if do-not-use-db-file-timestamps
			   0
			   (dboard:rundat-last-db-time run-dat)))
	 (db-path      (or (dboard:rundat-db-path run-dat)
			   (let* ((db-dir (common:get-db-tmp-area *alldat*))
				  (db-pth (conc db-dir "/megatest.db")))
			     (dboard:rundat-db-path-set! run-dat db-pth)
			     db-pth)))
	 (db-mod-time  (common:lazy-sqlite-db-modification-time db-path))
	 (db-modified  (>= db-mod-time last-db-time))
	 (multi-get    (> (dboard:rundat-run-data-offset run-dat) 0))  ;; multi-get in progress
	 (tmptests     (if (or do-not-use-db-file-timestamps

Modified db.scm from [3e7b7fa94e] to [2dfbcce9d8].

110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
;;    if db already open - return inmem
;;    if db not open, open inmem, rundb and sync then return inmem
;;    inuse gets set automatically for rundb's
;;
(define (db:get-db dbstruct) ;;  run-id) 
  (if (stack? (dbr:dbstruct-dbstack dbstruct))
      (if (stack-empty? (dbr:dbstruct-dbstack dbstruct))
          (let ((newdb (db:open-megatest-db path: (db:dbfile-path))))
            ;; (stack-push! (dbr:dbstruct-dbstack dbstruct) newdb)
            newdb)
          (stack-pop! (dbr:dbstruct-dbstack dbstruct)))
      (db:open-db dbstruct)))

;; mod-read:
;;     'mod   modified data







|







110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
;;    if db already open - return inmem
;;    if db not open, open inmem, rundb and sync then return inmem
;;    inuse gets set automatically for rundb's
;;
(define (db:get-db dbstruct) ;;  run-id) 
  (if (stack? (dbr:dbstruct-dbstack dbstruct))
      (if (stack-empty? (dbr:dbstruct-dbstack dbstruct))
          (let ((newdb (db:open-megatest-db path: (common:get-db-tmp-area *alldat*))))
            ;; (stack-push! (dbr:dbstruct-dbstack dbstruct) newdb)
            newdb)
          (stack-pop! (dbr:dbstruct-dbstack dbstruct)))
      (db:open-db dbstruct)))

;; mod-read:
;;     'mod   modified data
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
;;     (filedb:get-path db id)))

;; NB// #f => return dbdir only
;;      (was planned to be;  zeroth db with name=main.db)
;; 
;; If run-id is #f return to create and retrieve the path where the db will live.
;;
(define db:dbfile-path common:get-db-tmp-area)

(define (db:set-sync db)
  (let ((syncprag (configf:lookup *configdat* "setup" "sychronous")))
    (sqlite3:execute db (conc "PRAGMA synchronous = " (or syncprag 0) ";")))) 

;; open an sql database inside a file lock
;; returns: db existed-prior-to-opening







|







191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
;;     (filedb:get-path db id)))

;; NB// #f => return dbdir only
;;      (was planned to be;  zeroth db with name=main.db)
;; 
;; If run-id is #f return to create and retrieve the path where the db will live.
;;
;; (define db:dbfile-path common:get-db-tmp-area)

(define (db:set-sync db)
  (let ((syncprag (configf:lookup *configdat* "setup" "sychronous")))
    (sqlite3:execute db (conc "PRAGMA synchronous = " (or syncprag 0) ";")))) 

;; open an sql database inside a file lock
;; returns: db existed-prior-to-opening
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
;; This routine creates the db if not already present. It is only called if the db is not already opened
;;
(define (db:open-db dbstruct #!key (areapath #f)(do-sync #t)) ;; TODO: actually use areapath
  (let ((tmpdb-stack (dbr:dbstruct-dbstack dbstruct))) ;; RA => Returns the first reference in dbstruct
    (if (stack? tmpdb-stack)
	(db:get-db tmpdb-stack) ;; get previously opened db (will create new db handle if all in the stack are already used
        (let* ((max-stale-tmp (configf:lookup-number *configdat* "server" "filling-db-max-stale-seconds" default: 10))
               (dbpath       (db:dbfile-path ))      ;; path to tmp db area
               (dbexists     (common:file-exists? dbpath))
	       (tmpdbfname   (conc dbpath "/megatest.db"))
	       (dbfexists    (common:file-exists? tmpdbfname))  ;; (conc dbpath "/megatest.db")))
               (mtdbexists   (common:file-exists? (conc *toppath* "/megatest.db")))
							 
               (mtdbmodtime  (if mtdbexists (common:lazy-sqlite-db-modification-time (conc *toppath* "/megatest.db"))   #f))
	        		 (tmpdbmodtime (if dbfexists  (common:lazy-sqlite-db-modification-time tmpdbfname) #f)) 







|







278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
;; This routine creates the db if not already present. It is only called if the db is not already opened
;;
(define (db:open-db dbstruct #!key (areapath #f)(do-sync #t)) ;; TODO: actually use areapath
  (let ((tmpdb-stack (dbr:dbstruct-dbstack dbstruct))) ;; RA => Returns the first reference in dbstruct
    (if (stack? tmpdb-stack)
	(db:get-db tmpdb-stack) ;; get previously opened db (will create new db handle if all in the stack are already used
        (let* ((max-stale-tmp (configf:lookup-number *configdat* "server" "filling-db-max-stale-seconds" default: 10))
               (dbpath       (common:get-db-tmp-area *alldat* ))      ;; path to tmp db area
               (dbexists     (common:file-exists? dbpath))
	       (tmpdbfname   (conc dbpath "/megatest.db"))
	       (dbfexists    (common:file-exists? tmpdbfname))  ;; (conc dbpath "/megatest.db")))
               (mtdbexists   (common:file-exists? (conc *toppath* "/megatest.db")))
							 
               (mtdbmodtime  (if mtdbexists (common:lazy-sqlite-db-modification-time (conc *toppath* "/megatest.db"))   #f))
	        		 (tmpdbmodtime (if dbfexists  (common:lazy-sqlite-db-modification-time tmpdbfname) #f)) 
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
		(sqlite3:execute db "DELETE FROM metadat WHERE var=?;" var))))

;;======================================================================
;; no-sync.db - small bits of data to be shared between servers
;;======================================================================

(define (db:open-no-sync-db)
  (let* ((dbpath (db:dbfile-path))
	 (dbname (conc dbpath "/no-sync.db"))
	 (db-exists (common:file-exists? dbname))
	 (db     (sqlite3:open-database dbname)))
    (sqlite3:set-busy-handler! db (make-busy-timeout 136000))
    (if (not db-exists)
	(begin
	  (sqlite3:execute db "PRAGMA synchronous = 0;")







|







1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
		(sqlite3:execute db "DELETE FROM metadat WHERE var=?;" var))))

;;======================================================================
;; no-sync.db - small bits of data to be shared between servers
;;======================================================================

(define (db:open-no-sync-db)
  (let* ((dbpath (common:get-db-tmp-area *alldat*))
	 (dbname (conc dbpath "/no-sync.db"))
	 (db-exists (common:file-exists? dbname))
	 (db     (sqlite3:open-database dbname)))
    (sqlite3:set-busy-handler! db (make-busy-timeout 136000))
    (if (not db-exists)
	(begin
	  (sqlite3:execute db "PRAGMA synchronous = 0;")
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
		   )))
    (debug:print-info 11 *default-log-port* "db:get-runs END qrystr: " qrystr " target: " target " offset: " offset " limit: " count)
    res))

;; TODO: Switch this to use max(update_time) from each run db? Then if using a server there is no disk traffic (using inmem db)
;;
(define (db:get-changed-run-ids since-time)
  (let* ((dbdir      (db:dbfile-path)) ;; (configf:lookup *configdat* "setup" "dbdir"))
	 (alldbs     (glob (conc dbdir "/[0-9]*.db")))
	 (changed    (filter (lambda (dbfile)
			       (> (file-modification-time dbfile) since-time))
			     alldbs)))
    (delete-duplicates
     (map (lambda (dbfile)
	    (let* ((res (string-match ".*\\/(\\d)*\\.db" dbfile)))







|







2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
		   )))
    (debug:print-info 11 *default-log-port* "db:get-runs END qrystr: " qrystr " target: " target " offset: " offset " limit: " count)
    res))

;; TODO: Switch this to use max(update_time) from each run db? Then if using a server there is no disk traffic (using inmem db)
;;
(define (db:get-changed-run-ids since-time)
  (let* ((dbdir      (common:get-db-tmp-area *alldat*)) ;; (configf:lookup *configdat* "setup" "dbdir"))
	 (alldbs     (glob (conc dbdir "/[0-9]*.db")))
	 (changed    (filter (lambda (dbfile)
			       (> (file-modification-time dbfile) since-time))
			     alldbs)))
    (delete-duplicates
     (map (lambda (dbfile)
	    (let* ((res (string-match ".*\\/(\\d)*\\.db" dbfile)))
4745
4746
4747
4748
4749
4750
4751
4752
4753
    ;; brutal clean up
    (stack-push! (dbr:dbstruct-dbstack dbstruct) dbdat)
    (system "rm -rf tempdir")))

;; (db:extract-ods-file db "outputfile.ods" '(("sysname" "%")("fsname" "%")("datapath" "%")) "%")

;; tiresome setup for rmtmod (and other mods) goes here
(set-fn 'db:dbfile-path common:get-db-tmp-area)
(set-fn 'db:setup       db:setup)







|

4745
4746
4747
4748
4749
4750
4751
4752
4753
    ;; brutal clean up
    (stack-push! (dbr:dbstruct-dbstack dbstruct) dbdat)
    (system "rm -rf tempdir")))

;; (db:extract-ods-file db "outputfile.ods" '(("sysname" "%")("fsname" "%")("datapath" "%")) "%")

;; tiresome setup for rmtmod (and other mods) goes here
;; (set-fn 'db:dbfile-path common:get-db-tmp-area)
(set-fn 'db:setup       db:setup)

Modified http-transport.scm from [1dfb616f68] to [a666907cd8].

66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
	 (ipaddrstr       (let ((ipstr (if (string=? "-" hostn)
					   ;; (string-intersperse (map number->string (u8vector->list (hostname->ip hostname))) ".")
					   (server:get-best-guess-address hostname)
					   #f)))
			    (if ipstr ipstr hostn))) ;; hostname))) 
	 (start-port      (portlogger:open-run-close portlogger:find-port))
	 (link-tree-path  (common:get-linktree))
	 (tmp-area        (common:get-db-tmp-area))
	 (start-file      (conc tmp-area "/.server-start")))
    (debug:print-info 0 *default-log-port* "portlogger recommended port: " start-port)
    ;; set some parameters for the server
    (root-path     (if link-tree-path 
		       link-tree-path
		       (current-directory))) ;; WARNING: SECURITY HOLE. FIX ASAP!
    (handle-directory spiffy-directory-listing)







|







66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
	 (ipaddrstr       (let ((ipstr (if (string=? "-" hostn)
					   ;; (string-intersperse (map number->string (u8vector->list (hostname->ip hostname))) ".")
					   (server:get-best-guess-address hostname)
					   #f)))
			    (if ipstr ipstr hostn))) ;; hostname))) 
	 (start-port      (portlogger:open-run-close portlogger:find-port))
	 (link-tree-path  (common:get-linktree))
	 (tmp-area        (common:get-db-tmp-area *alldat*))
	 (start-file      (conc tmp-area "/.server-start")))
    (debug:print-info 0 *default-log-port* "portlogger recommended port: " start-port)
    ;; set some parameters for the server
    (root-path     (if link-tree-path 
		       link-tree-path
		       (current-directory))) ;; WARNING: SECURITY HOLE. FIX ASAP!
    (handle-directory spiffy-directory-listing)
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
			 (http-transport:server-dat-get-api-req serverdat)
			 (begin
			   (debug:print 0 *default-log-port* "FATAL ERROR: http-transport:client-api-send-receive called with no server info")
			   (exit 1))))
	 (res        (vector #f "uninitialized"))
	 (success    #t)
	 (sparams    (db:obj->string params transport: 'http))
	 (runremote  (or area-dat *runremote*)))
       (debug:print-info 11 *default-log-port* "fullurl=" fullurl ", cmd=" cmd ", params=" params ", run-id=" run-id "\n")
       ;; set up the http-client here
       (max-retry-attempts 1)
       ;; consider all requests indempotent
       (retry-request? (lambda (request)
			 #f))
       ;; send the data and get the response







|







238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
			 (http-transport:server-dat-get-api-req serverdat)
			 (begin
			   (debug:print 0 *default-log-port* "FATAL ERROR: http-transport:client-api-send-receive called with no server info")
			   (exit 1))))
	 (res        (vector #f "uninitialized"))
	 (success    #t)
	 (sparams    (db:obj->string params transport: 'http))
	 (areadat  (or area-dat *areadat*)))
       (debug:print-info 11 *default-log-port* "fullurl=" fullurl ", cmd=" cmd ", params=" params ", run-id=" run-id "\n")
       ;; set up the http-client here
       (max-retry-attempts 1)
       ;; consider all requests indempotent
       (retry-request? (lambda (request)
			 #f))
       ;; send the data and get the response
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
                                                (if (debug:debug-mode 1)
                                                    (debug:print-info 0 *default-log-port* "couldn't talk to server, trying again ...")
                                                    (begin
                                                      (debug:print 0 *default-log-port* "WARNING: failure in with-input-from-request to " fullurl ".")
                                                      (debug:print 0 *default-log-port* " message: " msg)
                                                      (debug:print 0 *default-log-port* " cmd: " cmd " params: " params)
                                                      (debug:print 0 *default-log-port* " call-chain: " call-chain)))
                                                (if runremote
						    (remote-conndat-set! runremote #f))
						;; Killing associated server to allow clean retry.")
						;; (tasks:kill-server-run-id run-id)  ;; better to kill the server in the logic that called this routine?
						(mutex-unlock! *http-mutex*)
					     ;;; (signal (make-composite-condition
					     ;;;          (make-property-condition 'commfail 'message "failed to connect to server")))
					     ;;; "communications failed"
						(db:obj->string #f))







|
|







267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
                                                (if (debug:debug-mode 1)
                                                    (debug:print-info 0 *default-log-port* "couldn't talk to server, trying again ...")
                                                    (begin
                                                      (debug:print 0 *default-log-port* "WARNING: failure in with-input-from-request to " fullurl ".")
                                                      (debug:print 0 *default-log-port* " message: " msg)
                                                      (debug:print 0 *default-log-port* " cmd: " cmd " params: " params)
                                                      (debug:print 0 *default-log-port* " call-chain: " call-chain)))
                                                (if areadat
						    (areadat-conndat-set! areadat #f))
						;; Killing associated server to allow clean retry.")
						;; (tasks:kill-server-run-id run-id)  ;; better to kill the server in the logic that called this routine?
						(mutex-unlock! *http-mutex*)
					     ;;; (signal (make-composite-condition
					     ;;;          (make-property-condition 'commfail 'message "failed to connect to server")))
					     ;;; "communications failed"
						(db:obj->string #f))
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
                       (signal (vector-ref res 0)))
                     res))
	     (signal (make-composite-condition
		      (make-property-condition 
		       'timeout
		       'message "nmsg-transport:client-api-send-receive-raw timed out talking to server")))))))

;; careful closing of connections stored in *runremote*
;;
(define (http-transport:close-connections #!key (area-dat #f))
  (let* ((runremote  (or area-dat *runremote*))
	 (server-dat (if runremote
                         (remote-conndat runremote)
                         #f))) ;; (hash-table-ref/default *runremote* run-id #f)))
    (if (vector? server-dat)
	(let ((api-dat (http-transport:server-dat-get-api-uri server-dat)))
	  (handle-exceptions
	    exn
	    (begin
	      (print-call-chain *default-log-port*)
	      (debug:print-error 0 *default-log-port* " closing connection failed with error: " ((condition-property-accessor 'exn 'message) exn)))







|

|
|
|
|
|







314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
                       (signal (vector-ref res 0)))
                     res))
	     (signal (make-composite-condition
		      (make-property-condition 
		       'timeout
		       'message "nmsg-transport:client-api-send-receive-raw timed out talking to server")))))))

;; careful closing of connections stored in *alldat*
;;
(define (http-transport:close-connections #!key (all-dat #f))
  (let* ((alldat  (or all-dat *alldat*))
	 (server-dat (if alldat
                         (alldat-conndat alldat)
                         #f))) ;; (hash-table-ref/default *areadat* run-id #f)))
    (if (vector? server-dat)
	(let ((api-dat (http-transport:server-dat-get-api-uri server-dat)))
	  (handle-exceptions
	    exn
	    (begin
	      (print-call-chain *default-log-port*)
	      (debug:print-error 0 *default-log-port* " closing connection failed with error: " ((condition-property-accessor 'exn 'message) exn)))
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
;; used and to shutdown after sometime if it is not.
;;
(define (http-transport:keep-running) 
  ;; if none running or if > 20 seconds since 
  ;; server last used then start shutdown
  ;; This thread waits for the server to come alive
  (debug:print-info 0 *default-log-port* "Starting the sync-back, keep alive thread in server")
  (let* ((tmp-area          (common:get-db-tmp-area))
	 (started-file      (conc tmp-area "/.server-started"))
	 (server-start-time (current-seconds))
	 (server-info (let loop ((start-time (current-seconds))
				 (changed    #t)
				 (last-sdat  "not this"))
                        (let ((sdat #f))
			  (thread-sleep! 0.01)







|







353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
;; used and to shutdown after sometime if it is not.
;;
(define (http-transport:keep-running) 
  ;; if none running or if > 20 seconds since 
  ;; server last used then start shutdown
  ;; This thread waits for the server to come alive
  (debug:print-info 0 *default-log-port* "Starting the sync-back, keep alive thread in server")
  (let* ((tmp-area          (common:get-db-tmp-area *alldat*))
	 (started-file      (conc tmp-area "/.server-started"))
	 (server-start-time (current-seconds))
	 (server-info (let loop ((start-time (current-seconds))
				 (changed    #t)
				 (last-sdat  "not this"))
                        (let ((sdat #f))
			  (thread-sleep! 0.01)
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527

;; all routes though here end in exit ...
;;
;; start_server? 
;;
(define (http-transport:launch)
  ;; check that a server start is in progress, pause or exit if so
  (let* ((tmp-area            (common:get-db-tmp-area))
	 (server-start        (conc tmp-area "/.server-start"))
	 (server-started      (conc tmp-area "/.server-started"))
	 (start-time          (common:lazy-modification-time server-start))
	 (started-time        (common:lazy-modification-time server-started))
	 (server-starting     (< start-time started-time)) ;; if start-time is less than started-time then a server is still starting
	 (start-time-old      (> (- (current-seconds) start-time) 5))
         (cleanup-proc        (lambda (msg)







|







513
514
515
516
517
518
519
520
521
522
523
524
525
526
527

;; all routes though here end in exit ...
;;
;; start_server? 
;;
(define (http-transport:launch)
  ;; check that a server start is in progress, pause or exit if so
  (let* ((tmp-area            (common:get-db-tmp-area *alldat*))
	 (server-start        (conc tmp-area "/.server-start"))
	 (server-started      (conc tmp-area "/.server-started"))
	 (start-time          (common:lazy-modification-time server-start))
	 (started-time        (common:lazy-modification-time server-started))
	 (server-starting     (< start-time started-time)) ;; if start-time is less than started-time then a server is still starting
	 (start-time-old      (> (- (current-seconds) start-time) 5))
         (cleanup-proc        (lambda (msg)

Modified launch.scm from [ddbd5933a3] to [423931cfbc].

733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
	      (list  "MT_TEST_NAME" test-name)
	      (list  "MT_ITEM_INFO" (conc itemdat))
	      (list  "MT_ITEMPATH"  item-path)
	      (list  "MT_RUNNAME"   runname)
	      (list  "MT_MEGATEST"  megatest)
	      (list  "MT_TARGET"    target)
	      (list  "MT_LINKTREE"  (common:get-linktree)) ;; (configf:lookup *configdat* "setup" "linktree"))
	      (list  "MT_TESTSUITENAME" (common:get-testsuite-name))))
          ;;(bb-check-path msg: "launch:execute post block 3")

	  (if mt-bindir-path (setenv "PATH" (conc (getenv "PATH") ":" mt-bindir-path)))
          ;;(bb-check-path msg: "launch:execute post block 4")
	  ;; (change-directory top-path)
	  ;; Can setup as client for server mode now
	  ;; (client:setup)







|







733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
	      (list  "MT_TEST_NAME" test-name)
	      (list  "MT_ITEM_INFO" (conc itemdat))
	      (list  "MT_ITEMPATH"  item-path)
	      (list  "MT_RUNNAME"   runname)
	      (list  "MT_MEGATEST"  megatest)
	      (list  "MT_TARGET"    target)
	      (list  "MT_LINKTREE"  (common:get-linktree)) ;; (configf:lookup *configdat* "setup" "linktree"))
	      (list  "MT_TESTSUITENAME" (common:get-area-name))))
          ;;(bb-check-path msg: "launch:execute post block 3")

	  (if mt-bindir-path (setenv "PATH" (conc (getenv "PATH") ":" mt-bindir-path)))
          ;;(bb-check-path msg: "launch:execute post block 4")
	  ;; (change-directory top-path)
	  ;; Can setup as client for server mode now
	  ;; (client:setup)
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
	      (begin
		(debug:print-error 0 *default-log-port* "linktree not defined in [setup] section of megatest.config")
		)))
	(if (and *toppath*
		 (directory-exists? *toppath*))
	    (begin
	      (setenv "MT_RUN_AREA_HOME" *toppath*)
	      (setenv "MT_TESTSUITENAME" (common:get-testsuite-name)))
	    (begin
	      (debug:print-error 0 *default-log-port* "failed to find the top path to your Megatest area.")
	      (set! *toppath* #f) ;; force it to be false so we return #f
	      #f))
	
        ;; one more attempt to cache the configs for future reading
        (let* ((cachefiles   (launch:get-cache-file-paths areapath toppath target mtconfig))







|







1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
	      (begin
		(debug:print-error 0 *default-log-port* "linktree not defined in [setup] section of megatest.config")
		)))
	(if (and *toppath*
		 (directory-exists? *toppath*))
	    (begin
	      (setenv "MT_RUN_AREA_HOME" *toppath*)
	      (setenv "MT_TESTSUITENAME" (common:get-area-name)))
	    (begin
	      (debug:print-error 0 *default-log-port* "failed to find the top path to your Megatest area.")
	      (set! *toppath* #f) ;; force it to be false so we return #f
	      #f))
	
        ;; one more attempt to cache the configs for future reading
        (let* ((cachefiles   (launch:get-cache-file-paths areapath toppath target mtconfig))
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
			      (conc (if dir (conc dir "/") "")
				    (case (string->symbol exe)
				      ((dboard)    "../megatest")
				      ((mtest)     "../megatest")
				      ((dashboard) "megatest")
				      (else exe)))))
	   (launcher        (common:get-launcher *configdat* test-name item-path)) ;; (config-lookup *configdat* "jobtools"     "launcher"))
	   (test-sig        (conc (common:get-testsuite-name) ":" test-name ":" item-path)) ;; (item-list->path itemdat))) ;; test-path is the full path including the item-path
	   (work-area       #f)
	   (toptest-work-area #f) ;; for iterated tests the top test contains data relevant for all
	   (diskpath   #f)
	   (cmdparms   #f)
	   (fullcmd    #f) ;; (define a (with-output-to-string (lambda ()(write x))))
	   (mt-bindir-path #f)
	   (testinfo   (rmt:get-test-info-by-id run-id test-id))







|







1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
			      (conc (if dir (conc dir "/") "")
				    (case (string->symbol exe)
				      ((dboard)    "../megatest")
				      ((mtest)     "../megatest")
				      ((dashboard) "megatest")
				      (else exe)))))
	   (launcher        (common:get-launcher *configdat* test-name item-path)) ;; (config-lookup *configdat* "jobtools"     "launcher"))
	   (test-sig        (conc (common:get-area-name) ":" test-name ":" item-path)) ;; (item-list->path itemdat))) ;; test-path is the full path including the item-path
	   (work-area       #f)
	   (toptest-work-area #f) ;; for iterated tests the top test contains data relevant for all
	   (diskpath   #f)
	   (cmdparms   #f)
	   (fullcmd    #f) ;; (define a (with-output-to-string (lambda ()(write x))))
	   (mt-bindir-path #f)
	   (testinfo   (rmt:get-test-info-by-id run-id test-id))
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
					(list 'homehost  (let* ((hhdat (common:get-homehost)))
							   (if hhdat
							       (car hhdat)
							       #f)))
					(list 'serverurl (if *runremote*
							     (remote-server-url *runremote*)
							     #f)) ;;
					(list 'areaname  (common:get-testsuite-name))
					(list 'toppath   *toppath*)
					(list 'work-area work-area)
					(list 'test-name test-name) 
					(list 'runscript runscript) 
					(list 'run-id    run-id   )
					(list 'test-id   test-id  )
					;; (list 'item-path item-path )







|







1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
					(list 'homehost  (let* ((hhdat (common:get-homehost)))
							   (if hhdat
							       (car hhdat)
							       #f)))
					(list 'serverurl (if *runremote*
							     (remote-server-url *runremote*)
							     #f)) ;;
					(list 'areaname  (common:get-area-name))
					(list 'toppath   *toppath*)
					(list 'work-area work-area)
					(list 'test-name test-name) 
					(list 'runscript runscript) 
					(list 'run-id    run-id   )
					(list 'test-id   test-id  )
					;; (list 'item-path item-path )

Modified rmt.scm from [93351dcc43] to [e750b3ab6f].

52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75

76

77




78
79
80
81
82
83
84

85
86
87
88
89
90
91
92

;;======================================================================
;;  S U P P O R T   F U N C T I O N S
;;======================================================================

(define *send-receive-mutex* (make-mutex)) ;; should have separate mutex per run-id

(define *runremote* (make-remote))

;; this entry point can decide based on cmd whether to dispatch to old api calls via remote or via ulex
;;
(define (rmt:send-receive cmd rid params #!key (attemptnum 1)(area-dat #f))
  (let* ((areapath      *toppath*);; TODO - resolve from dbstruct to be compatible with multiple areas
	 (runremote     (or area-dat
			    *runremote*)))
    ;; ensure we have a record for our connection for given area
    (if (not (remote-hh-dat runremote))
	(begin
	  (remote-server-timeout-set! runremote (server:expiration-timeout))
	  (remote-hh-dat-set!         runremote (common:get-homehost))
	  )) ;; new runremote will come from this on next iteration

    ;; ensure we have a homehost record, do this here instead of in -orig
    (if (or (not (remote-hh-dat runremote))

	    (not (pair? (remote-hh-dat runremote))))  ;; not on homehost

	(remote-hh-dat-set! runremote (common:get-homehost)))





    (if (member cmd '(blah))
	(begin
	  (mutex-lock! *send-receive-mutex*)
	  (let ((ulex:conn (remote-ulex:conn runremote)))
	    (if (not ulex:conn)(remote-ulex:conn-set! runremote (rmtmod:setup-ulex areapath)))
	    (rmtmod:send-receive-ulex ulex:conn cmd rid params attemptnum area-dat)))

	(rmt:send-receive-orig *default-log-port* runremote *rmt-mutex* areapath *db-multi-sync-mutex* cmd rid params *alldata* attemptnum: attemptnum area-dat: area-dat))))

;; bunch of small functions factored out of send-receive to make debug easier
;;

;; (define (rmt:update-db-stats run-id rawcmd params duration)
;;   (mutex-lock! *db-stats-mutex*)
;;   (handle-exceptions







|





|
|

|

|
|
|

|
|
>
|
>
|
>
>
>
>




|
|

>
|







52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99

;;======================================================================
;;  S U P P O R T   F U N C T I O N S
;;======================================================================

(define *send-receive-mutex* (make-mutex)) ;; should have separate mutex per run-id

;; (define *runremote* (make-remote))

;; this entry point can decide based on cmd whether to dispatch to old api calls via remote or via ulex
;;
(define (rmt:send-receive cmd rid params #!key (attemptnum 1)(area-dat #f))
  (let* ((areapath      *toppath*);; TODO - resolve from dbstruct to be compatible with multiple areas
	 (alldat     (or area-dat
			    *alldat*)))
    ;; ensure we have a record for our connection for given area
    (if (not (alldat-hh-dat alldat))
	(begin
	  (alldat-server-timeout-set! alldat (server:expiration-timeout))
	  (alldat-hh-dat-set!         alldat (common:get-homehost))
	  )) ;; new alldat will come from this on next iteration

    ;; ensure we have a homehost record and mtconfig, do this here instead of in -orig
    (if (or (not (alldat-mtconfig *alldat*))
	    (not (alldat-hh-dat alldat))
	    (not (pair? (alldat-hh-dat alldat))))  ;; not on homehost
	(begin
	  (alldat-hh-dat-set! alldat (common:get-homehost))
	  (alldat-mtconfig-set! *alldat* *configdat*)
	  (alldat-areapath-set! *alldat* *toppath*)
	  (alldat-areadat-set!  *alldat* alldat) ;; TODO: converge usage of alldat and area-dat
	  ))

    (if (member cmd '(blah))
	(begin
	  (mutex-lock! *send-receive-mutex*)
	  (let ((ulex:conn (alldat-ulex:conn alldat)))
	    (if (not ulex:conn)(alldat-ulex:conn-set! alldat (rmtmod:setup-ulex areapath)))
	    (rmtmod:send-receive-ulex ulex:conn cmd rid params attemptnum area-dat)))
	(rmt:send-receive-orig *default-log-port* alldat *rmt-mutex* areapath *db-multi-sync-mutex*
			       cmd rid params *alldat* attemptnum: attemptnum area-dat: area-dat))))

;; bunch of small functions factored out of send-receive to make debug easier
;;

;; (define (rmt:update-db-stats run-id rawcmd params duration)
;;   (mutex-lock! *db-stats-mutex*)
;;   (handle-exceptions

Modified rmtmod.scm from [b74f8e8186] to [9698947349].

16
17
18
19
20
21
22

23
24
25
26
27
28
29


30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
;;     You should have received a copy of the GNU General Public License
;;     along with Megatest.  If not, see <http://www.gnu.org/licenses/>.

;;======================================================================

(declare (unit rmtmod))
(declare (uses commonmod))


(module rmtmod
	*
	
(import scheme chicken data-structures extras)
(import (prefix sqlite3 sqlite3:) posix typed-records srfi-18 srfi-69 format ports srfi-1 matchable)
(import commonmod)


(use (prefix ulex ulex:))

(include "common_records.scm")

;; Hack to make these functions visible to the refactored code, goal is to eliminate these over time.
;; (define (rmt:send-receive . params) #f)
;; (define (http-transport:close-connections . params) #f)
;; ;; from remote defstruct in common.scm
;; ;; (define (api:execute-requests . params) #f)
;; (define (http-transport:client-api-send-receive . params) #f)
;; (define (client:setup . params) #f)
;; (define (server:kind-run . params) #f)
;; (define (server:start-and-wait . params) #f)
;; (define (server:check-if-running . params) #f)
;; (define (server:ping . params) #f)
;; (define (common:force-server? . params) #f)
;; 'send-receive rmt:send-receive ...
#;(define (set-functions . alldata)
  (match
   alldata
   ((a b c d e f g h i j) ;; e f g h i j k l)
    (set! http-transport:client-api-send-receive a)
    (set! http-transport:close-connections       b)
    ;; (set! api:execute-requests                   c)
    ;; d
    (set! client:setup                           e)
    (set! server:kind-run                        f)
    (set! server:start-and-wait                  g)
    (set! server:check-if-running                h)
    (set! server:ping                            i)
    (set! common:force-server?                   j)
    )))

(define (rmt:open-qry-close-locally log-port multi-sync-mutex cmd run-id params alldat #!key (remretries 5))
  (let* ((ro-queries     (alldat-read-only-queries alldat))
	 (qry-is-write   (not (member cmd ro-queries)))
	 (db-file-path   (db:dbfile-path)) ;;  0))
	 (dbstruct-local (exec-fn 'db:setup #t))  ;; make-dbr:dbstruct path:  dbdir local: #t)))
	 (read-only      (not (file-write-access? db-file-path)))
	 (start          (current-milliseconds))
	 (resdat         (if (not (and read-only qry-is-write))
			     (let ((v (exec-fn 'api:execute-requests dbstruct-local (vector (symbol->string cmd) params))))
			       (handle-exceptions ;; there has been a long history of receiving strange errors from values returned by the client when things go wrong..
				exn               ;;  This is an attempt to detect that situation and recover gracefully







>







>
>




<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<



|







16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36





























37
38
39
40
41
42
43
44
45
46
47
;;     You should have received a copy of the GNU General Public License
;;     along with Megatest.  If not, see <http://www.gnu.org/licenses/>.

;;======================================================================

(declare (unit rmtmod))
(declare (uses commonmod))
(declare (uses dbmod))

(module rmtmod
	*
	
(import scheme chicken data-structures extras)
(import (prefix sqlite3 sqlite3:) posix typed-records srfi-18 srfi-69 format ports srfi-1 matchable)
(import commonmod)
(import dbmod)

(use (prefix ulex ulex:))

(include "common_records.scm")






























(define (rmt:open-qry-close-locally log-port multi-sync-mutex cmd run-id params alldat #!key (remretries 5))
  (let* ((ro-queries     (alldat-read-only-queries alldat))
	 (qry-is-write   (not (member cmd ro-queries)))
	 (db-file-path   (common:get-db-tmp-area alldat)) ;;  0))
	 (dbstruct-local (exec-fn 'db:setup #t))  ;; make-dbr:dbstruct path:  dbdir local: #t)))
	 (read-only      (not (file-write-access? db-file-path)))
	 (start          (current-milliseconds))
	 (resdat         (if (not (and read-only qry-is-write))
			     (let ((v (exec-fn 'api:execute-requests dbstruct-local (vector (symbol->string cmd) params))))
			       (handle-exceptions ;; there has been a long history of receiving strange errors from values returned by the client when things go wrong..
				exn               ;;  This is an attempt to detect that situation and recover gracefully
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
		(mutex-lock! multi-sync-mutex)
		(set! *db-last-access* start-time)  ;; THIS IS PROBABLY USELESS? (we are on a client)
                (mutex-unlock! multi-sync-mutex)))))
    res))



(define (rmtmod:calc-ro-mode runremote toppath)
  (if (and runremote
	   (remote-ro-mode-checked runremote))
      (remote-ro-mode runremote)
      (let* ((dbfile  (conc toppath "/megatest.db"))
	     (ro-mode (not (file-write-access? dbfile)))) ;; TODO: use dbstruct or runremote to figure this out in future
	(if runremote
	    (begin
	      (remote-ro-mode-set! runremote ro-mode)
	      (remote-ro-mode-checked-set! runremote #t)
	      ro-mode)
	    ro-mode))))

(define (extras-readonly-mode rmt-mutex log-port cmd params)
  ;;(mutex-unlock! rmt-mutex)
  (debug:print-info 12 log-port "rmt:send-receive, case 3")
  (debug:print 0 log-port "WARNING: write transaction requested on a readonly area.  cmd="cmd" params="params)
  #f)

(define (extras-transport-failed log-port rmt-mutex attemptnum runremote areapath cmd rid params alldat)
  (debug:print 0 log-port "WARNING: communication failed. Trying again, try num: " attemptnum)
  ;;(mutex-lock! rmt-mutex)
  (remote-conndat-set!    runremote #f)
  (exec-fn 'http-transport:close-connections area-dat: runremote)
  (remote-server-url-set! runremote #f)
  ;;(mutex-unlock! rmt-mutex)
  (debug:print-info 12 log-port "rmt:send-receive, case  9.1")
  (rmt:send-receive-orig log-port runremote rmt-mutex areapath cmd rid params alldat attemptnum: (+ attemptnum 1)))
  
(define (extras-transport-succeded log-port rmt-mutex attemptnum runremote areapath res params rid cmd alldat)
  (if (and (vector? res)
	   (eq? (vector-length res) 2)
	   (eq? (vector-ref res 1) 'overloaded)) ;; since we are
						 ;; looking at the
						 ;; data to carry the
						 ;; error we'll use a
						 ;; fairly obtuse







|
|
|
|

|
|

|
|









|


|
|
|


|

|







76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
		(mutex-lock! multi-sync-mutex)
		(set! *db-last-access* start-time)  ;; THIS IS PROBABLY USELESS? (we are on a client)
                (mutex-unlock! multi-sync-mutex)))))
    res))



(define (rmtmod:calc-ro-mode areadat toppath)
  (if (and areadat
	   (alldat-ro-mode-checked areadat))
      (alldat-ro-mode areadat)
      (let* ((dbfile  (conc toppath "/megatest.db"))
	     (ro-mode (not (file-write-access? dbfile)))) ;; TODO: use dbstruct or areadat to figure this out in future
	(if areadat
	    (begin
	      (alldat-ro-mode-set! areadat ro-mode)
	      (alldat-ro-mode-checked-set! areadat #t)
	      ro-mode)
	    ro-mode))))

(define (extras-readonly-mode rmt-mutex log-port cmd params)
  ;;(mutex-unlock! rmt-mutex)
  (debug:print-info 12 log-port "rmt:send-receive, case 3")
  (debug:print 0 log-port "WARNING: write transaction requested on a readonly area.  cmd="cmd" params="params)
  #f)

(define (extras-transport-failed log-port rmt-mutex attemptnum areadat areapath cmd rid params alldat)
  (debug:print 0 log-port "WARNING: communication failed. Trying again, try num: " attemptnum)
  ;;(mutex-lock! rmt-mutex)
  (alldat-conndat-set!    areadat #f)
  (exec-fn 'http-transport:close-connections area-dat: areadat)
  (alldat-server-url-set! areadat #f)
  ;;(mutex-unlock! rmt-mutex)
  (debug:print-info 12 log-port "rmt:send-receive, case  9.1")
  (rmt:send-receive-orig log-port areadat rmt-mutex areapath cmd rid params alldat attemptnum: (+ attemptnum 1)))
  
(define (extras-transport-succeded log-port rmt-mutex attemptnum areadat areapath res params rid cmd alldat)
  (if (and (vector? res)
	   (eq? (vector-length res) 2)
	   (eq? (vector-ref res 1) 'overloaded)) ;; since we are
						 ;; looking at the
						 ;; data to carry the
						 ;; error we'll use a
						 ;; fairly obtuse
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
						 ;; server is
						 ;; overloaded and we
						 ;; want to ease off
						 ;; the queries
      (let ((wait-delay (+ attemptnum (* attemptnum 10))))
	(debug:print 0 log-port "WARNING: server is overloaded. Delaying " wait-delay " seconds and trying call again.")
	;;(mutex-lock! rmt-mutex)
	(exec-fn 'http-transport:close-connections area-dat: runremote)
	;; (set! *runremote* #f) ;; force starting over
	(remote-server-url-set! runremote #f) ;; I am hoping this will force a redo on server connection. NOT TESTED
	;;(mutex-unlock! rmt-mutex)
	(thread-sleep! wait-delay)
	(rmt:send-receive-orig log-port runremote rmt-mutex areapath cmd rid params alldat attemptnum: (+ attemptnum 1)))
      res)) ;; All good, return res

;; RA => e.g. usage (rmt:send-receive 'get-var #f (list varname))
;;
;;  add multi-sync-mutex 
;;
(define (rmt:send-receive-orig log-port runremote rmt-mutex toppath multi-sync-mutex cmd rid params alldat #!key (attemptnum 1)(area-dat #f)) ;; start attemptnum at 1 so the modulo below works as expected

  #;(common:telemetry-log (conc "rmt:"(->string cmd))
                        payload: `((rid . ,rid)
                                   (params . ,params)))
                          
  
  ;; do all the prep locked under the rmt-mutex
  ;;(mutex-lock! rmt-mutex)
  
  ;; 1. check if server is started IFF cmd is a write OR if we are not on the homehost, store in runremote
  ;; 2. check the age of the connections. refresh the connection if it is older than timeout-20 seconds.
  ;; 3. do the query, if on homehost use local access
  ;;
  (let* ((start-time    (current-seconds)) ;; snapshot time so all use cases get same value

	 (readonly-mode (rmtmod:calc-ro-mode runremote toppath)))

    ;; (assert (not (pair? (remote-hh-dat runremote))))
    
    ;;(print "BB> readonly-mode is "readonly-mode" dbfile is "dbfile)
    (cond
     ;; give up if more than 15 attempts
     ((> attemptnum 15)
      (debug:print 0 log-port "ERROR: 15 tries to start/connect to server. Giving up.")
      (exit 1))







|
|
|


|






|









|





|

|







127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
						 ;; server is
						 ;; overloaded and we
						 ;; want to ease off
						 ;; the queries
      (let ((wait-delay (+ attemptnum (* attemptnum 10))))
	(debug:print 0 log-port "WARNING: server is overloaded. Delaying " wait-delay " seconds and trying call again.")
	;;(mutex-lock! rmt-mutex)
	(exec-fn 'http-transport:close-connections area-dat: areadat)
	;; (set! *areadat* #f) ;; force starting over
	(alldat-server-url-set! areadat #f) ;; I am hoping this will force a redo on server connection. NOT TESTED
	;;(mutex-unlock! rmt-mutex)
	(thread-sleep! wait-delay)
	(rmt:send-receive-orig log-port areadat rmt-mutex areapath cmd rid params alldat attemptnum: (+ attemptnum 1)))
      res)) ;; All good, return res

;; RA => e.g. usage (rmt:send-receive 'get-var #f (list varname))
;;
;;  add multi-sync-mutex 
;;
(define (rmt:send-receive-orig log-port areadat rmt-mutex toppath multi-sync-mutex cmd rid params alldat #!key (attemptnum 1)(area-dat #f)) ;; start attemptnum at 1 so the modulo below works as expected

  #;(common:telemetry-log (conc "rmt:"(->string cmd))
                        payload: `((rid . ,rid)
                                   (params . ,params)))
                          
  
  ;; do all the prep locked under the rmt-mutex
  ;;(mutex-lock! rmt-mutex)
  
  ;; 1. check if server is started IFF cmd is a write OR if we are not on the homehost, store in areadat
  ;; 2. check the age of the connections. refresh the connection if it is older than timeout-20 seconds.
  ;; 3. do the query, if on homehost use local access
  ;;
  (let* ((start-time    (current-seconds)) ;; snapshot time so all use cases get same value

	 (readonly-mode (rmtmod:calc-ro-mode areadat toppath)))

    ;; (assert (not (pair? (alldat-hh-dat areadat))))
    
    ;;(print "BB> readonly-mode is "readonly-mode" dbfile is "dbfile)
    (cond
     ;; give up if more than 15 attempts
     ((> attemptnum 15)
      (debug:print 0 log-port "ERROR: 15 tries to start/connect to server. Giving up.")
      (exit 1))
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
     (readonly-mode (extras-readonly-mode rmt-mutex log-port cmd params))

     ;; This block was for pre-emptively resetting the connection if there had been no communication for some time.
     ;; I don't think it adds any value. If the server is not there, just fail and start a new connection.
     ;; also, the expire-time calculation might not be correct. We want, time-since-last-server-access > (server:get-timeout)
     ;;
     ;; reset the connection if it has been unused too long
     ((and runremote
           (remote-conndat runremote)
	   (> (current-seconds) ;; if it has been more than server-timeout seconds since last contact, close this connection and start a new on
	      (+ (http-transport:server-dat-get-last-access (remote-conndat runremote))
		 (remote-server-timeout runremote))))
      (debug:print-info 0 log-port "Connection to " (remote-server-url runremote) " expired due to no accesses, forcing new connection.")
      (exec-fn 'http-transport:close-connections area-dat: runremote)
      (remote-conndat-set! runremote #f) ;; invalidate the connection, thus forcing a new connection.
      ;; (mutex-unlock! rmt-mutex)
      (rmt:send-receive-orig log-port runremote rmt-mutex toppath multi-sync-mutex cmd rid params alldat attemptnum: attemptnum))
     

     ;; on homehost and this is a read
     ((and (not (remote-force-server runremote)) ;; honor forced use of server, i.e. server NOT required
	   (pair? (remote-hh-dat runremote))
	   (cdr (remote-hh-dat runremote))       ;; on homehost
           (member cmd api:read-only-queries))   ;; this is a read
      ;; (mutex-unlock! rmt-mutex)
      (debug:print-info 12 log-port "rmt:send-receive, case  5")
      (rmt:open-qry-close-locally log-port multi-sync-mutex cmd 0 params alldat))

     ;; on homehost and this is a write, we already have a server, but server has died
     ((and (cdr (remote-hh-dat runremote))           ;; on homehost
           (not (member cmd api:read-only-queries))  ;; this is a write
           (remote-server-url runremote)             ;; have a server
           (not (exec-fn 'server:ping (remote-server-url runremote))))  ;; server has died. NOTE: this is not a cheap call! Need better approach.
      ;; (set! *runremote* (make-remote)) ;; WARNING - broken this.
      (remote-force-server-set! runremote (exec-fn 'common:force-server?))
      ;; (mutex-unlock! rmt-mutex)
      (debug:print-info 12 log-port "rmt:send-receive, case  6")
      (rmt:send-receive-orig log-port runremote rmt-mutex toppath  multi-sync-mutex cmd rid params alldat attemptnum: attemptnum))

     ;; on homehost and this is a write, we already have a server
     ((and (not (remote-force-server runremote))     ;; honor forced use of server, i.e. server NOT required
	   (cdr (remote-hh-dat runremote))           ;; on homehost
           (not (member cmd api:read-only-queries))  ;; this is a write
           (remote-server-url runremote))            ;; have a server
      ;;(mutex-unlock! rmt-mutex)
      (debug:print-info 12 log-port "rmt:send-receive, case  4.1")
      (rmt:open-qry-close-locally  log-port multi-sync-mutex cmd 0 params alldat))

     ;;  on homehost, no server contact made and this is a write, passively start a server 
     ((and (not (remote-force-server runremote))     ;; honor forced use of server, i.e. server NOT required
	   (cdr (remote-hh-dat runremote))           ;; have homehost
           (not (remote-server-url runremote))       ;; no connection yet
	   (not (member cmd api:read-only-queries))) ;; not a read-only query
      (debug:print-info 12 log-port "rmt:send-receive, case  8")
      (let ((server-url  (exec-fn 'server:check-if-running toppath))) ;; (server:read-dotserver->url toppath))) ;; (server:check-if-running toppath))) ;; Do NOT want to run server:check-if-running - very expensive to do for every write call
	(if server-url
	    (remote-server-url-set! runremote server-url) ;; the string can be consumed by the client setup if needed
	    (if (exec-fn 'common:force-server?)
		(exec-fn 'server:start-and-wait toppath)
		(exec-fn 'server:kind-run toppath))))
      (remote-force-server-set! runremote (exec-fn 'common:force-server?))
      ;; (mutex-unlock! rmt-mutex)
      (debug:print-info 12 log-port "rmt:send-receive, case  8.1")
      (rmt:open-qry-close-locally  log-port multi-sync-mutex cmd 0 params alldat))

     ((or (and (remote-force-server runremote)              ;; we are forcing a server and don't yet have a connection to one
	       (not (remote-conndat runremote)))
	  (and (not (cdr (remote-hh-dat runremote)))        ;; not on a homehost 
	       (not (remote-conndat runremote))))           ;; and no connection
      (debug:print-info 12 log-port "rmt:send-receive, case 9, hh-dat: " (remote-hh-dat runremote) " conndat: " (remote-conndat runremote))
      ;;(mutex-unlock! rmt-mutex)
      (if (not (exec-fn 'server:check-if-running toppath)) ;; who knows, maybe one has started up?
	  (exec-fn 'server:start-and-wait toppath))
      (remote-conndat-set! runremote (rmt:get-connection-info runremote toppath)) ;; calls client:setup which calls client:setup-http
      (rmt:send-receive-orig log-port runremote rmt-mutex toppath multi-sync-mutex cmd rid params alldat attemptnum: attemptnum)) ;; TODO: add back-off timeout as

     ;; all set up if get this far, dispatch the query
     ((and (not (remote-force-server runremote))
	   (cdr (remote-hh-dat runremote))) ;; we are on homehost
      ;;(mutex-unlock! rmt-mutex)
      (debug:print-info 12 log-port "rmt:send-receive, case 10")
      (rmt:open-qry-close-locally  log-port multi-sync-mutex cmd (if rid rid 0) params alldat))

     ;; not on homehost, do server query
     (else (extras-case-11 log-port rmt-mutex runremote toppath cmd params attemptnum rid alldat)))))

(define (extras-case-11 log-port rmt-mutex runremote areapath cmd params attemptnum rid alldat)
  ;; (mutex-unlock! rmt-mutex)
  (debug:print-info 12 log-port "rmt:send-receive, case  9")
  ;; (mutex-lock! rmt-mutex)
  (let* ((conninfo (remote-conndat runremote))
	 (dat      (case (remote-transport runremote)
		     ((http) (condition-case ;; handling here has
					     ;; caused a lot of
					     ;; problems. However it
					     ;; is needed to deal with
					     ;; attemtped
					     ;; communication to
					     ;; servers that have gone
					     ;; away
			      (exec-fn 'http-transport:client-api-send-receive 0 conninfo cmd params)
			      ((commfail)(vector #f "communications fail"))
			      ((exn)(vector #f "other fail" (print-call-chain)))))
		     (else
		      (debug:print 0 log-port "ERROR: transport " (remote-transport runremote) " not supported")
		      (exit))))
	 (success  (if (vector? dat) (vector-ref dat 0) #f))
	 (res      (if (vector? dat) (vector-ref dat 1) #f)))
    (if (and (vector? conninfo) (< 5 (vector-length conninfo)))
	(http-transport:server-dat-update-last-access conninfo) ;; refresh access time
	(begin
	  (debug:print 0 log-port "INFO: Should not get here! conninfo=" conninfo)
	  (set! conninfo #f)
	  (remote-conndat-set! runremote #f) ;; NOTE: *runremote* is global copy of runremote. Purpose: factor out global.
	  (exec-fn 'http-transport:close-connections  area-dat: runremote)))
    (debug:print-info 13 log-port "rmt:send-receive, case  9. conninfo=" conninfo " dat=" dat " runremote = " runremote)
    ;; (mutex-unlock! rmt-mutex)
    (if success ;; success only tells us that the transport was
	;; successful, have to examine the data to see if
	;; there was a detected issue at the other end
	(extras-transport-succeded log-port rmt-mutex attemptnum runremote areapath res params rid cmd alldat)
	(extras-transport-failed log-port rmt-mutex attemptnum runremote areapath cmd rid params alldat)
	)))

;; if a server is either running or in the process of starting call client:setup
;; else return #f to let the calling proc know that there is no server available
;;
(define (rmt:get-connection-info runremote areapath #!key (area-dat #f)) ;; TODO: push areapath down.
  (let* (;; (runremote (or area-dat runremote))
	 (cinfo     (if (remote? runremote)
			(remote-conndat runremote)
			#f)))
	  (if cinfo
	      cinfo
	      (if (exec-fn 'server:check-if-running areapath)
		  (exec-fn 'client:setup runremote areapath)
		  #f))))



;;======================================================================
;; ulex and steps stuff
;;======================================================================







|
|

|
|
|
|
|

|



|
|
|






|

|
|
|
|


|


|
|

|





|
|
|




|



|




|
|
|
|
|



|
|


|
|





|

|



|
|












|








|
|
|




|
|





|
|
|
|




|







182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
     (readonly-mode (extras-readonly-mode rmt-mutex log-port cmd params))

     ;; This block was for pre-emptively resetting the connection if there had been no communication for some time.
     ;; I don't think it adds any value. If the server is not there, just fail and start a new connection.
     ;; also, the expire-time calculation might not be correct. We want, time-since-last-server-access > (server:get-timeout)
     ;;
     ;; reset the connection if it has been unused too long
     ((and areadat
           (alldat-conndat areadat)
	   (> (current-seconds) ;; if it has been more than server-timeout seconds since last contact, close this connection and start a new on
	      (+ (http-transport:server-dat-get-last-access (alldat-conndat areadat))
		 (alldat-server-timeout areadat))))
      (debug:print-info 0 log-port "Connection to " (alldat-server-url areadat) " expired due to no accesses, forcing new connection.")
      (exec-fn 'http-transport:close-connections area-dat: areadat)
      (alldat-conndat-set! areadat #f) ;; invalidate the connection, thus forcing a new connection.
      ;; (mutex-unlock! rmt-mutex)
      (rmt:send-receive-orig log-port areadat rmt-mutex toppath multi-sync-mutex cmd rid params alldat attemptnum: attemptnum))
     

     ;; on homehost and this is a read
     ((and (not (alldat-force-server areadat)) ;; honor forced use of server, i.e. server NOT required
	   (pair? (alldat-hh-dat areadat))
	   (cdr (alldat-hh-dat areadat))       ;; on homehost
           (member cmd api:read-only-queries))   ;; this is a read
      ;; (mutex-unlock! rmt-mutex)
      (debug:print-info 12 log-port "rmt:send-receive, case  5")
      (rmt:open-qry-close-locally log-port multi-sync-mutex cmd 0 params alldat))

     ;; on homehost and this is a write, we already have a server, but server has died
     ((and (cdr (alldat-hh-dat areadat))           ;; on homehost
           (not (member cmd api:read-only-queries))  ;; this is a write
           (alldat-server-url areadat)             ;; have a server
           (not (exec-fn 'server:ping (alldat-server-url areadat))))  ;; server has died. NOTE: this is not a cheap call! Need better approach.
      ;; (set! *areadat* (make-remote)) ;; WARNING - broken this.
      (alldat-force-server-set! areadat (exec-fn 'common:force-server?))
      ;; (mutex-unlock! rmt-mutex)
      (debug:print-info 12 log-port "rmt:send-receive, case  6")
      (rmt:send-receive-orig log-port areadat rmt-mutex toppath  multi-sync-mutex cmd rid params alldat attemptnum: attemptnum))

     ;; on homehost and this is a write, we already have a server
     ((and (not (alldat-force-server areadat))     ;; honor forced use of server, i.e. server NOT required
	   (cdr (alldat-hh-dat areadat))           ;; on homehost
           (not (member cmd api:read-only-queries))  ;; this is a write
           (alldat-server-url areadat))            ;; have a server
      ;;(mutex-unlock! rmt-mutex)
      (debug:print-info 12 log-port "rmt:send-receive, case  4.1")
      (rmt:open-qry-close-locally  log-port multi-sync-mutex cmd 0 params alldat))

     ;;  on homehost, no server contact made and this is a write, passively start a server 
     ((and (not (alldat-force-server areadat))     ;; honor forced use of server, i.e. server NOT required
	   (cdr (alldat-hh-dat areadat))           ;; have homehost
           (not (alldat-server-url areadat))       ;; no connection yet
	   (not (member cmd api:read-only-queries))) ;; not a read-only query
      (debug:print-info 12 log-port "rmt:send-receive, case  8")
      (let ((server-url  (exec-fn 'server:check-if-running toppath))) ;; (server:read-dotserver->url toppath))) ;; (server:check-if-running toppath))) ;; Do NOT want to run server:check-if-running - very expensive to do for every write call
	(if server-url
	    (alldat-server-url-set! areadat server-url) ;; the string can be consumed by the client setup if needed
	    (if (exec-fn 'common:force-server?)
		(exec-fn 'server:start-and-wait toppath)
		(exec-fn 'server:kind-run toppath))))
      (alldat-force-server-set! areadat (exec-fn 'common:force-server?))
      ;; (mutex-unlock! rmt-mutex)
      (debug:print-info 12 log-port "rmt:send-receive, case  8.1")
      (rmt:open-qry-close-locally  log-port multi-sync-mutex cmd 0 params alldat))

     ((or (and (alldat-force-server areadat)              ;; we are forcing a server and don't yet have a connection to one
	       (not (alldat-conndat areadat)))
	  (and (not (cdr (alldat-hh-dat areadat)))        ;; not on a homehost 
	       (not (alldat-conndat areadat))))           ;; and no connection
      (debug:print-info 12 log-port "rmt:send-receive, case 9, hh-dat: " (alldat-hh-dat areadat) " conndat: " (alldat-conndat areadat))
      ;;(mutex-unlock! rmt-mutex)
      (if (not (exec-fn 'server:check-if-running toppath)) ;; who knows, maybe one has started up?
	  (exec-fn 'server:start-and-wait toppath))
      (alldat-conndat-set! areadat (rmt:get-connection-info areadat toppath)) ;; calls client:setup which calls client:setup-http
      (rmt:send-receive-orig log-port areadat rmt-mutex toppath multi-sync-mutex cmd rid params alldat attemptnum: attemptnum)) ;; TODO: add back-off timeout as

     ;; all set up if get this far, dispatch the query
     ((and (not (alldat-force-server areadat))
	   (cdr (alldat-hh-dat areadat))) ;; we are on homehost
      ;;(mutex-unlock! rmt-mutex)
      (debug:print-info 12 log-port "rmt:send-receive, case 10")
      (rmt:open-qry-close-locally  log-port multi-sync-mutex cmd (if rid rid 0) params alldat))

     ;; not on homehost, do server query
     (else (extras-case-11 log-port rmt-mutex areadat toppath cmd params attemptnum rid alldat)))))

(define (extras-case-11 log-port rmt-mutex areadat areapath cmd params attemptnum rid alldat)
  ;; (mutex-unlock! rmt-mutex)
  (debug:print-info 12 log-port "rmt:send-receive, case  9")
  ;; (mutex-lock! rmt-mutex)
  (let* ((conninfo (alldat-conndat areadat))
	 (dat      (case (alldat-transport areadat)
		     ((http) (condition-case ;; handling here has
					     ;; caused a lot of
					     ;; problems. However it
					     ;; is needed to deal with
					     ;; attemtped
					     ;; communication to
					     ;; servers that have gone
					     ;; away
			      (exec-fn 'http-transport:client-api-send-receive 0 conninfo cmd params)
			      ((commfail)(vector #f "communications fail"))
			      ((exn)(vector #f "other fail" (print-call-chain)))))
		     (else
		      (debug:print 0 log-port "ERROR: transport " (alldat-transport areadat) " not supported")
		      (exit))))
	 (success  (if (vector? dat) (vector-ref dat 0) #f))
	 (res      (if (vector? dat) (vector-ref dat 1) #f)))
    (if (and (vector? conninfo) (< 5 (vector-length conninfo)))
	(http-transport:server-dat-update-last-access conninfo) ;; refresh access time
	(begin
	  (debug:print 0 log-port "INFO: Should not get here! conninfo=" conninfo)
	  (set! conninfo #f)
	  (alldat-conndat-set! areadat #f) ;; NOTE: *areadat* is global copy of areadat. Purpose: factor out global.
	  (exec-fn 'http-transport:close-connections  area-dat: areadat)))
    (debug:print-info 13 log-port "rmt:send-receive, case  9. conninfo=" conninfo " dat=" dat " areadat = " areadat)
    ;; (mutex-unlock! rmt-mutex)
    (if success ;; success only tells us that the transport was
	;; successful, have to examine the data to see if
	;; there was a detected issue at the other end
	(extras-transport-succeded log-port rmt-mutex attemptnum areadat areapath res params rid cmd alldat)
	(extras-transport-failed log-port rmt-mutex attemptnum areadat areapath cmd rid params alldat)
	)))

;; if a server is either running or in the process of starting call client:setup
;; else return #f to let the calling proc know that there is no server available
;;
(define (rmt:get-connection-info areadat areapath #!key (area-dat #f)) ;; TODO: push areapath down.
  (let* (;; (areadat (or area-dat areadat))
	 (cinfo     (if (alldat? areadat)
			(alldat-conndat areadat)
			#f)))
	  (if cinfo
	      cinfo
	      (if (exec-fn 'server:check-if-running areapath)
		  (exec-fn 'client:setup areadat areapath)
		  #f))))



;;======================================================================
;; ulex and steps stuff
;;======================================================================

Modified server.scm from [78810e8804] to [944061f452].

122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
  (let* ((curr-host   (get-host-name))
         ;; (attempt-in-progress (server:start-attempted? areapath))
         ;; (dot-server-url (server:check-if-running areapath))
	 (curr-ip     (server:get-best-guess-address curr-host))
	 (curr-pid    (current-process-id))
	 (homehost    (common:get-homehost)) ;; configf:lookup *configdat* "server" "homehost" ))
	 (target-host (car homehost))
	 (testsuite   (common:get-testsuite-name))
	 (logfile     (conc areapath "/logs/server.log")) ;; -" curr-pid "-" target-host ".log"))
	 (cmdln (conc (common:get-megatest-exe)
		      " -server " (or target-host "-") (if (equal? (configf:lookup *configdat* "server" "daemonize") "yes")
							   " -daemonize "
							   "")
		      ;; " -log " logfile
		      " -m testsuite:" testsuite)) ;; (conc " >> " logfile " 2>&1 &")))))







|







122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
  (let* ((curr-host   (get-host-name))
         ;; (attempt-in-progress (server:start-attempted? areapath))
         ;; (dot-server-url (server:check-if-running areapath))
	 (curr-ip     (server:get-best-guess-address curr-host))
	 (curr-pid    (current-process-id))
	 (homehost    (common:get-homehost)) ;; configf:lookup *configdat* "server" "homehost" ))
	 (target-host (car homehost))
	 (testsuite   (common:get-area-name))
	 (logfile     (conc areapath "/logs/server.log")) ;; -" curr-pid "-" target-host ".log"))
	 (cmdln (conc (common:get-megatest-exe)
		      " -server " (or target-host "-") (if (equal? (configf:lookup *configdat* "server" "daemonize") "yes")
							   " -daemonize "
							   "")
		      ;; " -log " logfile
		      " -m testsuite:" testsuite)) ;; (conc " >> " logfile " 2>&1 &")))))
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516

;; moving this here as it needs access to db and cannot be in common.
;;

(define (server:get-bruteforce-syncer dbstruct #!key (fork-to-background #f) (persist-until-sync #f))
  (let* ((sqlite-exe   (or (get-environment-variable "MT_SQLITE3_EXE"))) ;; defined in cfg.sh
         (sync-log     (or (args:get-arg "-sync-log") (conc *toppath* "/logs/sync-" (current-process-id) "-" (get-host-name) ".log")))
	 (tmp-area     (common:get-db-tmp-area))
	 (tmp-db       (conc tmp-area "/megatest.db"))
	 (staging-file (conc *toppath* "/.megatest.db"))
	 (mtdbfile     (conc *toppath* "/megatest.db"))
	 (lockfile     (common:get-sync-lock-filepath))
         (sync-cmd-core     (conc sqlite-exe" " tmp-db " .dump | "sqlite-exe" " staging-file "&>"sync-log))
         (sync-cmd     (if fork-to-background 
                           (conc "/usr/bin/env NBFAKE_LOG="*toppath*"/logs/last-server-sync-"(current-process-id)".log nbfake \""sync-cmd-core" && /bin/mv -f " staging-file " " mtdbfile" \"")







|







502
503
504
505
506
507
508
509
510
511
512
513
514
515
516

;; moving this here as it needs access to db and cannot be in common.
;;

(define (server:get-bruteforce-syncer dbstruct #!key (fork-to-background #f) (persist-until-sync #f))
  (let* ((sqlite-exe   (or (get-environment-variable "MT_SQLITE3_EXE"))) ;; defined in cfg.sh
         (sync-log     (or (args:get-arg "-sync-log") (conc *toppath* "/logs/sync-" (current-process-id) "-" (get-host-name) ".log")))
	 (tmp-area     (common:get-db-tmp-area *alldat*))
	 (tmp-db       (conc tmp-area "/megatest.db"))
	 (staging-file (conc *toppath* "/.megatest.db"))
	 (mtdbfile     (conc *toppath* "/megatest.db"))
	 (lockfile     (common:get-sync-lock-filepath))
         (sync-cmd-core     (conc sqlite-exe" " tmp-db " .dump | "sqlite-exe" " staging-file "&>"sync-log))
         (sync-cmd     (if fork-to-background 
                           (conc "/usr/bin/env NBFAKE_LOG="*toppath*"/logs/last-server-sync-"(current-process-id)".log nbfake \""sync-cmd-core" && /bin/mv -f " staging-file " " mtdbfile" \"")
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
    (set! *no-sync-db* no-sync-db) ;; make the no sync db available to api calls
    (debug:print-info 2 *default-log-port* "Periodic sync thread started.")
    (debug:print-info 3 *default-log-port* "watchdog starting. legacy-sync is " legacy-sync" pid="(current-process-id)  );;  " this-wd-num="this-wd-num)
    (if (and legacy-sync (not *time-to-exit*))
	(let* (;;(dbstruct (db:setup))
	       (mtdb       (dbr:dbstruct-mtdb dbstruct))
	       (mtpath     (db:dbdat-get-path mtdb))
	       (tmp-area   (common:get-db-tmp-area))
	       (start-file (conc tmp-area "/.start-sync"))
	       (end-file   (conc tmp-area "/.end-sync")))
	  (debug:print-info 0 *default-log-port* "Server running, periodic sync started.")
	  (let loop ()
	    ;; sync for filesystem local db writes
	    ;;
	    (mutex-lock! *db-multi-sync-mutex*)







|







626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
    (set! *no-sync-db* no-sync-db) ;; make the no sync db available to api calls
    (debug:print-info 2 *default-log-port* "Periodic sync thread started.")
    (debug:print-info 3 *default-log-port* "watchdog starting. legacy-sync is " legacy-sync" pid="(current-process-id)  );;  " this-wd-num="this-wd-num)
    (if (and legacy-sync (not *time-to-exit*))
	(let* (;;(dbstruct (db:setup))
	       (mtdb       (dbr:dbstruct-mtdb dbstruct))
	       (mtpath     (db:dbdat-get-path mtdb))
	       (tmp-area   (common:get-db-tmp-area *alldat*))
	       (start-file (conc tmp-area "/.start-sync"))
	       (end-file   (conc tmp-area "/.end-sync")))
	  (debug:print-info 0 *default-log-port* "Server running, periodic sync started.")
	  (let loop ()
	    ;; sync for filesystem local db writes
	    ;;
	    (mutex-lock! *db-multi-sync-mutex*)

Modified tasks.scm from [b5c98d9ead] to [11677645f3].

101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
	     (debug:print 5 *default-log-port* " exn=" (condition->list exn))
	     (thread-sleep! 1)
	     (tasks:open-db numretries (- numretries 1)))
	   (begin
	     (print-call-chain (current-error-port))
	     (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
	     (debug:print 5 *default-log-port* " exn=" (condition->list exn))))
       (let* ((dbpath        (db:dbfile-path )) ;; (tasks:get-task-db-path))
	      (dbfile       (conc dbpath "/monitor.db"))
	      (avail        (tasks:wait-on-journal dbpath 10)) ;; wait up to about 10 seconds for the journal to go away
	      (exists       (common:file-exists? dbpath))
	      (write-access (file-write-access? dbpath))
	      (mdb          (cond ;; what the hek is *toppath* doing here?
			     ((and (string? *toppath*)(file-write-access? *toppath*))
			      (sqlite3:open-database dbfile))







|







101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
	     (debug:print 5 *default-log-port* " exn=" (condition->list exn))
	     (thread-sleep! 1)
	     (tasks:open-db numretries (- numretries 1)))
	   (begin
	     (print-call-chain (current-error-port))
	     (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
	     (debug:print 5 *default-log-port* " exn=" (condition->list exn))))
       (let* ((dbpath        (common:get-db-tmp-area *alldat*)) ;; (tasks:get-task-db-path))
	      (dbfile       (conc dbpath "/monitor.db"))
	      (avail        (tasks:wait-on-journal dbpath 10)) ;; wait up to about 10 seconds for the journal to go away
	      (exists       (common:file-exists? dbpath))
	      (write-access (file-write-access? dbpath))
	      (mdb          (cond ;; what the hek is *toppath* doing here?
			     ((and (string? *toppath*)(file-write-access? *toppath*))
			      (sqlite3:open-database dbfile))
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
    res))

;; 
(define (tasks:start-monitor db mdb)
  (if (> (tasks:get-num-alive-monitors mdb) 2) ;; have two running, no need for more
      (debug:print-info 1 *default-log-port* "Not starting monitor, already have more than two running")
      (let* ((megatestdb     (conc *toppath* "/megatest.db"))
	     (monitordbf     (conc (db:dbfile-path #f) "/monitor.db"))
	     (last-db-update 0)) ;; (file-modification-time megatestdb)))
	(task:register-monitor mdb)
	(let loop ((count      0)
		   (next-touch 0)) ;; next-touch is the time where we need to update last_update
	  ;; if the db has been modified we'd best look at the task queue
	  (let ((modtime (file-modification-time megatestdbpath )))
	    (if (> modtime last-db-update)







|







281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
    res))

;; 
(define (tasks:start-monitor db mdb)
  (if (> (tasks:get-num-alive-monitors mdb) 2) ;; have two running, no need for more
      (debug:print-info 1 *default-log-port* "Not starting monitor, already have more than two running")
      (let* ((megatestdb     (conc *toppath* "/megatest.db"))
	     (monitordbf     (conc (common:get-db-tmp-area *alldat*) "/monitor.db"))
	     (last-db-update 0)) ;; (file-modification-time megatestdb)))
	(task:register-monitor mdb)
	(let loop ((count      0)
		   (next-touch 0)) ;; next-touch is the time where we need to update last_update
	  ;; if the db has been modified we'd best look at the task queue
	  (let ((modtime (file-modification-time megatestdbpath )))
	    (if (> modtime last-db-update)

Modified tcmt.scm from [679021e6ef] to [1f13d07e60].

289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
	 (tsname  #f)
	 (flowid  (conc target "/" runname))
	 (tdelay  (string->number (or (args:get-arg "-delay") "15"))))
    (if (and target runname)
	(begin
	  (launch:setup)
	  (set! keys (rmt:get-keys))))
    (set! tsname  (common:get-testsuite-name))
    (print "TCMT: for testsuite=" tsname " found runname=" runname ", target=" target ", keys=" keys " and successfully ran launch:setup. Using " flowid " as the flowId.")
    (let loop ()
      ;;;;;; (handle-exceptions
      ;;;;;;  exn
      ;;;;;;  ;; (print "Process done.")
      ;;;;;;  (begin (print-call-chain) (print "Error message: " ((condition-property-accessor 'exn 'message) exn)))
       (let-values (((pidres exittype exitstatus)







|







289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
	 (tsname  #f)
	 (flowid  (conc target "/" runname))
	 (tdelay  (string->number (or (args:get-arg "-delay") "15"))))
    (if (and target runname)
	(begin
	  (launch:setup)
	  (set! keys (rmt:get-keys))))
    (set! tsname  (common:get-area-name))
    (print "TCMT: for testsuite=" tsname " found runname=" runname ", target=" target ", keys=" keys " and successfully ran launch:setup. Using " flowid " as the flowId.")
    (let loop ()
      ;;;;;; (handle-exceptions
      ;;;;;;  exn
      ;;;;;;  ;; (print "Process done.")
      ;;;;;;  (begin (print-call-chain) (print "Error message: " ((condition-property-accessor 'exn 'message) exn)))
       (let-values (((pidres exittype exitstatus)

Modified tests.scm from [3fce4840b2] to [ffe98b0868].

853
854
855
856
857
858
859
860
861
862
863
864
865
866
867

;; (tests:create-html-tree "test-index.html")
;;
(define (tests:create-html-tree outf)
   (let* ((lockfile  (conc outf ".lock"))
	 			 (runs-to-process '())
         (linktree  (common:get-linktree))
         (area-name (common:get-testsuite-name))
	  		 (keys      (rmt:get-keys))
	  		 (numkeys   (length keys))
         (run-patt (or (args:get-arg "-run-patt")
                        (args:get-arg "-runname")
                        "%"))
         (target (or  (args:get-arg "-target-patt") 
											(args:get-arg "-target")







|







853
854
855
856
857
858
859
860
861
862
863
864
865
866
867

;; (tests:create-html-tree "test-index.html")
;;
(define (tests:create-html-tree outf)
   (let* ((lockfile  (conc outf ".lock"))
	 			 (runs-to-process '())
         (linktree  (common:get-linktree))
         (area-name (common:get-area-name))
	  		 (keys      (rmt:get-keys))
	  		 (numkeys   (length keys))
         (run-patt (or (args:get-arg "-run-patt")
                        (args:get-arg "-runname")
                        "%"))
         (target (or  (args:get-arg "-target-patt") 
											(args:get-arg "-target")
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960


(define (tests:dynamic-dboard page)
;(define (tests:create-html-tree o)
 (let* (
;(page "1")
          (linktree  (common:get-linktree))
         (area-name (common:get-testsuite-name))
	       (keys      (rmt:get-keys))
	       (numkeys   (length keys))
         (targtweaked (make-list numkeys "%"))
         (target-patt (string-join targtweaked "/"))
         (total-runs  (rmt:get-num-runs "%"))
         (pg-size 10)
         (pg (if (equal? page #f)







|







946
947
948
949
950
951
952
953
954
955
956
957
958
959
960


(define (tests:dynamic-dboard page)
;(define (tests:create-html-tree o)
 (let* (
;(page "1")
          (linktree  (common:get-linktree))
         (area-name (common:get-area-name))
	       (keys      (rmt:get-keys))
	       (numkeys   (length keys))
         (targtweaked (make-list numkeys "%"))
         (target-patt (string-join targtweaked "/"))
         (total-runs  (rmt:get-num-runs "%"))
         (pg-size 10)
         (pg (if (equal? page #f)
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
         (html-body (tests:dashboard-body pg pg-size keys numkeys total-runs linktree area-name get-prev-links get-next-links #t "%" target-patt))) ;; update tis function
        html-body))

(define (tests:create-html-summary outf)
 (let* ((lockfile  (conc outf ".lock"))
        (linktree  (common:get-linktree))
				(keys      (rmt:get-keys))
        (area-name (common:get-testsuite-name))
        (run-patt (or (args:get-arg "-run-patt")
                        (args:get-arg "-runname")
                        "%"))
        (target (or (args:get-arg "-target-patt")
                        (args:get-arg "-target")
                        "%"))
         (targlist (string-split target "/"))







|







977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
         (html-body (tests:dashboard-body pg pg-size keys numkeys total-runs linktree area-name get-prev-links get-next-links #t "%" target-patt))) ;; update tis function
        html-body))

(define (tests:create-html-summary outf)
 (let* ((lockfile  (conc outf ".lock"))
        (linktree  (common:get-linktree))
				(keys      (rmt:get-keys))
        (area-name (common:get-area-name))
        (run-patt (or (args:get-arg "-run-patt")
                        (args:get-arg "-runname")
                        "%"))
        (target (or (args:get-arg "-target-patt")
                        (args:get-arg "-target")
                        "%"))
         (targlist (string-split target "/"))
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186

(define (tests:create-html-tree-old outf)
   (let* ((lockfile  (conc outf ".lock"))
	 (runs-to-process '()))
    (if (common:simple-file-lock lockfile)
	(let* ((linktree  (common:get-linktree))
	       (oup       (open-output-file (or outf (conc linktree "/runs-index.html"))))
	       (area-name (common:get-testsuite-name))
	       (keys      (rmt:get-keys))
	       (numkeys   (length keys))
	       (runsdat   (rmt:get-runs "%" #f #f (map (lambda (x)(list x "%")) keys)))
	       (header    (vector-ref runsdat 0))
	       (runs      (vector-ref runsdat 1))
	       (runtreedat (map (lambda (x)
				  (tests:run-record->test-path x numkeys))







|







1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186

(define (tests:create-html-tree-old outf)
   (let* ((lockfile  (conc outf ".lock"))
	 (runs-to-process '()))
    (if (common:simple-file-lock lockfile)
	(let* ((linktree  (common:get-linktree))
	       (oup       (open-output-file (or outf (conc linktree "/runs-index.html"))))
	       (area-name (common:get-area-name))
	       (keys      (rmt:get-keys))
	       (numkeys   (length keys))
	       (runsdat   (rmt:get-runs "%" #f #f (map (lambda (x)(list x "%")) keys)))
	       (header    (vector-ref runsdat 0))
	       (runs      (vector-ref runsdat 1))
	       (runtreedat (map (lambda (x)
				  (tests:run-record->test-path x numkeys))

Modified tests/unittests/all-rmt.scm from [2224634ff2] to [b2b47460cc].

66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
;; let's start up a server the mechanical way
(system "nbfake megatest -server -")
(thread-sleep! 2)
;; (test #f #t (string? (server:start-and-wait *toppath*)))

(test "setup for run" #t (begin (launch:setup)
 				(string? (getenv "MT_RUN_AREA_HOME"))))
(test #f #t (client:setup-http *runremote* toppath))
(test #f #t (vector? (client:setup toppath)))

(test #f #t (vector? (rmt:get-connection-info toppath))) ;; TODO: push areapath down.
(test #f #t (string? (server:check-if-running ".")))
;; DEF (test #f #f (rmt:send-receive-no-auto-client-setup *runremote* 'get-keys #f '()))
;; DEF (rmt:kill-server run-id)
;; DEF (rmt:start-server run-id)







|







66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
;; let's start up a server the mechanical way
(system "nbfake megatest -server -")
(thread-sleep! 2)
;; (test #f #t (string? (server:start-and-wait *toppath*)))

(test "setup for run" #t (begin (launch:setup)
 				(string? (getenv "MT_RUN_AREA_HOME"))))
(test #f #t (client:setup-http *alldat* toppath))
(test #f #t (vector? (client:setup toppath)))

(test #f #t (vector? (rmt:get-connection-info toppath))) ;; TODO: push areapath down.
(test #f #t (string? (server:check-if-running ".")))
;; DEF (test #f #f (rmt:send-receive-no-auto-client-setup *runremote* 'get-keys #f '()))
;; DEF (rmt:kill-server run-id)
;; DEF (rmt:start-server run-id)