Megatest

Check-in [47a4f89869]
Login
Overview
Comment:Merged tcp-inmem
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | v1.80
Files: files | file ages | folders
SHA1: 47a4f89869413e6b39c25bb573a29811104f4789
User & Date: mrwellan on 2023-02-22 11:12:14
Other Links: branch diff | manifest | tags
Context
2023-02-22
11:42
bumped allowed number of running servers check-in: 5fca83553e user: mrwellan tags: v1.80
11:12
Merged tcp-inmem check-in: 47a4f89869 user: mrwellan tags: v1.80
11:05
Oops. Was not creating .megatest directory. Leaf check-in: 0386246065 user: mrwellan tags: v1.80-tcp-inmem
2023-02-10
21:16
Merge v1.80-cleanup as it passed QA check-in: 34c5263e66 user: matt tags: v1.80
Changes

Modified Makefile from [2a2c13125c] to [7b082d9b05].

26
27
28
29
30
31
32
33

34
35









36
37
38
39
40
41
42
           process.scm runs.scm tasks.scm tests.scm genexample.scm	\
           http-transport.scm tdb.scm client.scm mt.scm	\
           ezsteps.scm lock-queue.scm rmt.scm api.scm		\
           subrun.scm portlogger.scm archive.scm env.scm		\
           diff-report.scm cgisetup/models/pgdb.scm

# module source files
MSRCFILES = dbfile.scm debugprint.scm mtargs.scm commonmod.scm dbmod.scm


all : $(PREFIX)/bin/.$(ARCHSTR) mtest dboard mtut tcmt










# dbmod.import.o is just a hack here
mofiles/dbfile.o     : mofiles/debugprint.o mofiles/commonmod.o dbmod.import.o
db.o : dbmod.import.o
mofiles/debugprint.o : mofiles/mtargs.o

# ftail.scm rmtmod.scm commonmod.scm removed







|
>


>
>
>
>
>
>
>
>
>







26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
           process.scm runs.scm tasks.scm tests.scm genexample.scm	\
           http-transport.scm tdb.scm client.scm mt.scm	\
           ezsteps.scm lock-queue.scm rmt.scm api.scm		\
           subrun.scm portlogger.scm archive.scm env.scm		\
           diff-report.scm cgisetup/models/pgdb.scm

# module source files
MSRCFILES = dbfile.scm debugprint.scm mtargs.scm commonmod.scm dbmod.scm \
            tcp-transportmod.scm

all : $(PREFIX)/bin/.$(ARCHSTR) mtest dboard mtut tcmt

transport-mode.scm : transport-mode.scm.template
	cp transport-mode.scm.template transport-mode.scm

dashboard-transport-mode.scm : transport-mode.scm.template
	cp transport-mode.scm.template dashboard-transport-mode.scm

megatest.scm : transport-mode.scm
dashboard.scm : dashboard-transport-mode.scm

# dbmod.import.o is just a hack here
mofiles/dbfile.o     : mofiles/debugprint.o mofiles/commonmod.o dbmod.import.o
db.o : dbmod.import.o
mofiles/debugprint.o : mofiles/mtargs.o

# ftail.scm rmtmod.scm commonmod.scm removed

Modified TODO from [da5eae4898] to [fa3d981ca6].

14
15
16
17
18
19
20



21
22
23
24
25
26
27
# 
#     You should have received a copy of the GNU General Public License
#     along with Megatest.  If not, see <http://www.gnu.org/licenses/>.

TODO
====




WW15
. fill newview matrix with data, filter pipeline gui elements
. improve [script], especially indent handling

WW16
. split db into megatest.db (runs etc.) db/<something>.db
. release basic newview implementation







>
>
>







14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
# 
#     You should have received a copy of the GNU General Public License
#     along with Megatest.  If not, see <http://www.gnu.org/licenses/>.

TODO
====

23WW07
. Remove use of *dbstruct-dbs*

WW15
. fill newview matrix with data, filter pipeline gui elements
. improve [script], especially indent handling

WW16
. split db into megatest.db (runs etc.) db/<something>.db
. release basic newview implementation

Modified api.scm from [f3cc459c57] to [aacd193276].

16
17
18
19
20
21
22
23
24
25
26
27
28
29
30

31
32
33






34
35
36
37
38
39
40
;;     GNU General Public License for more details.
;; 
;;     You should have received a copy of the GNU General Public License
;;     along with Megatest.  If not, see <http://www.gnu.org/licenses/>.
;;
;;======================================================================

(use srfi-69 posix)

(declare (unit api))
(declare (uses rmt))
(declare (uses db))
(declare (uses dbmod))
(declare (uses dbfile))
(declare (uses tasks))


(import dbmod)
(import dbfile)







;; allow these queries through without starting a server
;;
(define api:read-only-queries
  '(get-key-val-pairs
    get-var
    get-keys







<
<






>



>
>
>
>
>
>







16
17
18
19
20
21
22


23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
;;     GNU General Public License for more details.
;; 
;;     You should have received a copy of the GNU General Public License
;;     along with Megatest.  If not, see <http://www.gnu.org/licenses/>.
;;
;;======================================================================



(declare (unit api))
(declare (uses rmt))
(declare (uses db))
(declare (uses dbmod))
(declare (uses dbfile))
(declare (uses tasks))
(declare (uses tcp-transportmod))

(import dbmod)
(import dbfile)
(import tcp-transportmod)

(use srfi-69
     posix
     matchable
     s11n)

;; allow these queries through without starting a server
;;
(define api:read-only-queries
  '(get-key-val-pairs
    get-var
    get-keys
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227











































228
229

230
231
232
233
234
235
236

    ;; TASKS
    tasks-add
    tasks-set-state-given-param-key
    ))

(define *db-write-mutexes* (make-hash-table))

;; These are called by the server on recipt of /api calls
;;    - keep it simple, only return the actual result of the call, i.e. no meta info here
;;
;;    - returns #( flag result )
;;
(define (api:execute-requests dbstruct dat)
  (db:open-no-sync-db) ;; sets *no-sync-db*
;;   (handle-exceptions
;;    exn
;;    (let ((call-chain (get-call-chain)))
;;      (debug:print 0 *default-log-port* "WARNING: api:execute-requests received an exception from peer, dat=" dat ", exn=" exn)
;;      (print-call-chain (current-error-port))
;;      (debug:print 0 *default-log-port* " message: "  ((condition-property-accessor 'exn 'message) exn))       
  ;;      (vector #f (vector exn call-chain dat))) ;; return some stuff for debug if an exception happens
  (if (> *api-process-request-count* 200)
      (begin
	(if (common:low-noise-print 30 "too many threads")
	    (debug:print 0 *default-log-port* "WARNING: "*api-process-request-count*" threads, potential overload, adding 0.5 sec delay."))
	(thread-sleep! 0.5) ;; take a nap
	))
   (cond
    ((not (vector? dat))                    ;; it is an error to not receive a vector
     (vector #f (vector #f "remote must be called with a vector")))
    #;((> *api-process-request-count* 200) ;; 20)
     (debug:print 0 *default-log-port* "WARNING: api:execute-requests received an overloaded message.")
     (set! *server-overloaded* #t)
     (vector #f (vector #f 'overloaded))) ;; the inner vector is what gets returned. nope, don't know why. please refactor!
    (else  
     (let* ((cmd-in            (vector-ref dat 0))
            (cmd               (if (symbol? cmd-in)
				   cmd-in
				   (string->symbol cmd-in)))
            (params            (vector-ref dat 1))
	    (run-id            (if (null? params)
				   0
				   (car params)))
	    (write-mutex       (if (hash-table-exists? *db-write-mutexes* run-id)
				   (hash-table-ref *db-write-mutexes* run-id)
				   (let* ((newmutex (make-mutex)))
				     (hash-table-set! *db-write-mutexes* run-id newmutex)
				     newmutex)))
            (start-t           (current-milliseconds))
            (readonly-mode     (dbr:dbstruct-read-only dbstruct))
            (readonly-command  (member cmd api:read-only-queries))
            (writecmd-in-readonly-mode (and readonly-mode (not readonly-command))))
       (if (not readonly-command)
	   (mutex-lock! write-mutex))
       (let* ((tmppath    (dbr:dbstruct-tmppath  dbstruct))
	      (clean-run-id (cond
			     ((number? run-id)   run-id)
			     ((equal? run-id #f) "main")
			     (else               "other")))
	      (crumbfile  (dbfile:wait-for-qif tmppath clean-run-id (cons cmd params)))
	      (res    
               (if writecmd-in-readonly-mode
                   (conc "attempt to run write command "cmd" on a read-only database")
		   (api:dispatch-request dbstruct cmd run-id params))))
	 (delete-file* crumbfile)
	 (if (not readonly-command)
	     (mutex-unlock! write-mutex))
	 
	 ;; save all stats
	 (let ((delta-t (- (current-milliseconds)
			   start-t))
	       (modified-cmd (if (eq? cmd 'general-call)
				 (string->symbol (conc "general-call-" (car params)))
				 cmd)))
	   (hash-table-set! *db-api-call-time* modified-cmd
			    (cons delta-t (hash-table-ref/default *db-api-call-time* modified-cmd '()))))
	 (if writecmd-in-readonly-mode
             (begin
               #;(common:telemetry-log (conc "api-out:"(->string cmd))
               payload: `((params . ,params)
               (ok-res . #t)))
	       (vector #f res))
             (begin
               #;(common:telemetry-log (conc "api-out:"(->string cmd))
               payload: `((params . ,params)
               (ok-res . #f)))
               (vector #t res))))))))












































(define (api:dispatch-request dbstruct cmd run-id params)

  (case cmd
    ;;===============================================
    ;; READ/WRITE QUERIES
    ;;===============================================

    ((get-keys-write)                        (db:get-keys dbstruct)) ;; force a dummy "write" query to force server; for debug in -repl
    







|







|
|
|
|
|
|







|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>


>







145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285

    ;; TASKS
    tasks-add
    tasks-set-state-given-param-key
    ))

(define *db-write-mutexes* (make-hash-table))
(define *server-signature* #f)
;; These are called by the server on recipt of /api calls
;;    - keep it simple, only return the actual result of the call, i.e. no meta info here
;;
;;    - returns #( flag result )
;;
(define (api:execute-requests dbstruct dat)
  (db:open-no-sync-db) ;; sets *no-sync-db*
  ;;   (handle-exceptions
  ;;    exn
  ;;    (let ((call-chain (get-call-chain)))
  ;;      (debug:print 0 *default-log-port* "WARNING: api:execute-requests received an exception from peer, dat=" dat ", exn=" exn)
  ;;      (print-call-chain (current-error-port))
  ;;      (debug:print 0 *default-log-port* " message: "  ((condition-property-accessor 'exn 'message) exn))       
  ;;      (vector #f (vector exn call-chain dat))) ;; return some stuff for debug if an exception happens
  (if (> *api-process-request-count* 200)
      (begin
	(if (common:low-noise-print 30 "too many threads")
	    (debug:print 0 *default-log-port* "WARNING: "*api-process-request-count*" threads, potential overload, adding 0.5 sec delay."))
	(thread-sleep! 0.5) ;; take a nap
	))
  (cond
   ((not (vector? dat))                    ;; it is an error to not receive a vector
    (vector #f (vector #f "remote must be called with a vector")))
   #;((> *api-process-request-count* 200) ;; 20)
   (debug:print 0 *default-log-port* "WARNING: api:execute-requests received an overloaded message.")
   (set! *server-overloaded* #t)
   (vector #f (vector #f 'overloaded))) ;; the inner vector is what gets returned. nope, don't know why. please refactor!
   (else  
    (let* ((cmd-in            (vector-ref dat 0))
           (cmd               (if (symbol? cmd-in)
				  cmd-in
				  (string->symbol cmd-in)))
           (params            (vector-ref dat 1))
	   (run-id            (if (null? params)
				  0
				  (car params)))
	   (write-mutex       (if (hash-table-exists? *db-write-mutexes* run-id)
				  (hash-table-ref *db-write-mutexes* run-id)
				  (let* ((newmutex (make-mutex)))
				    (hash-table-set! *db-write-mutexes* run-id newmutex)
				    newmutex)))
           (start-t           (current-milliseconds))
           (readonly-mode     (dbr:dbstruct-read-only dbstruct))
           (readonly-command  (member cmd api:read-only-queries))
           (writecmd-in-readonly-mode (and readonly-mode (not readonly-command))))
      (if (not readonly-command)
	  (mutex-lock! write-mutex))
      (let* ((tmppath    (dbr:dbstruct-tmppath  dbstruct))
	     (clean-run-id (cond
			    ((number? run-id)   run-id)
			    ((equal? run-id #f) "main")
			    (else               "other")))
	     (crumbfile  (dbfile:wait-for-qif tmppath clean-run-id (cons cmd params)))
	     (res    
              (if writecmd-in-readonly-mode
                  (conc "attempt to run write command "cmd" on a read-only database")
		  (api:dispatch-request dbstruct cmd run-id params))))
	(delete-file* crumbfile)
	(if (not readonly-command)
	    (mutex-unlock! write-mutex))
	
	;; save all stats
	(let ((delta-t (- (current-milliseconds)
			  start-t))
	      (modified-cmd (if (eq? cmd 'general-call)
				(string->symbol (conc "general-call-" (car params)))
				cmd)))
	  (hash-table-set! *db-api-call-time* modified-cmd
			   (cons delta-t (hash-table-ref/default *db-api-call-time* modified-cmd '()))))
	(if writecmd-in-readonly-mode
            (begin
              #;(common:telemetry-log (conc "api-out:"(->string cmd))
              payload: `((params . ,params)
              (ok-res . #t)))
	      (vector #f res))
            (begin
              #;(common:telemetry-log (conc "api-out:"(->string cmd))
              payload: `((params . ,params)
              (ok-res . #f)))
              (vector #t res))))))))

;; indat is (cmd run-id params meta)
;;
;; WARNING: Do not print anything in the lambda of this function as it
;;          reads/writes to current in/out port
;;
(define (api:tcp-dispatch-request-make-handler dbstruct) ;; cmd run-id params)
  (assert *toppath* "FATAL: api:tcp-dispatch-request-make-handler called but *toppath* not set.")
  (if (not *server-signature*)
      (set! *server-signature* (tt:mk-signature *toppath*)))
  (lambda ()
    (let* ((indat      (deserialize))
	   (newcount   (+ *api-process-request-count* 1))
	   (delay-wait (if (> newcount 10)
			   (- newcount 10)
			   0)))
      (set! *api-process-request-count* newcount)
      (set! *db-last-access* (current-seconds))
      (match indat
	((cmd run-id params meta)
	 (let* ((status  (cond
			  ((> newcount 30) 'busy)
			  ((> newcount 15) 'loaded)
			  (else 'ok)))
		(errmsg  (case status
			   ((busy)   (conc "Server overloaded, "newcount" threads in flight"))
			   ((loaded) (conc "Server loaded, "newcount" threads in flight"))
			   (else     #f)))
		(result  (case status
			   ((busy)  (- newcount 29))
			   ((loaded) #f)
			   (else
			    (case cmd
			      ((ping) *server-signature*)
			      (else
			       (api:dispatch-request dbstruct cmd run-id params))))))
		(meta   `((wait . ,delay-wait)))
		(payload (list status errmsg result meta)))
	   (set! *api-process-request-count* (- *api-process-request-count* 1))
	   (serialize payload)))
	(else
	 (assert #f "FATAL: failed to deserialize indat "indat))))))
       

(define (api:dispatch-request dbstruct cmd run-id params)
  (db:open-no-sync-db)
  (case cmd
    ;;===============================================
    ;; READ/WRITE QUERIES
    ;;===============================================

    ((get-keys-write)                        (db:get-keys dbstruct)) ;; force a dummy "write" query to force server; for debug in -repl
    

Modified client.scm from [6913337164] to [732bd78865].

102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
	    (match server-dat
	      ((host port start-time server-id pid)
	       (debug:print-info 4 *default-log-port* "client:setup server-dat=" server-dat ", remaining-tries=" remaining-tries)
	       (if (not runremote)
                   (begin
		     ;; Here we are creating a runremote where there was none or it was clobbered with #f
		     ;;
		     (set! runremote (make-remote))
                     (let* ((server-info (server:check-if-running areapath)))
		       (remote-server-info-set! runremote server-info)
                       (if server-info
                           (begin
                             (remote-server-url-set! runremote (server:record->url server-info))
                             (remote-server-id-set! runremote (server:record->id server-info)))))))
	       ;; at this point we have a runremote







|







102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
	    (match server-dat
	      ((host port start-time server-id pid)
	       (debug:print-info 4 *default-log-port* "client:setup server-dat=" server-dat ", remaining-tries=" remaining-tries)
	       (if (not runremote)
                   (begin
		     ;; Here we are creating a runremote where there was none or it was clobbered with #f
		     ;;
		     (set! runremote (make-and-init-remote))
                     (let* ((server-info (server:check-if-running areapath)))
		       (remote-server-info-set! runremote server-info)
                       (if server-info
                           (begin
                             (remote-server-url-set! runremote (server:record->url server-info))
                             (remote-server-id-set! runremote (server:record->id server-info)))))))
	       ;; at this point we have a runremote

Modified common.scm from [fd6e725fd9] to [a37f58bd5f].

247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
(define *common:this-exe-name*     (pathname-strip-directory *common:this-exe-fullpath*))

(define (common:get-sync-lock-filepath)
  (let* ((tmp-area     (common:get-db-tmp-area))
         (lockfile     (conc tmp-area "/megatest.db.sync-lock")))
    lockfile))

;;======================================================================
;; when called from a wrapper I need sometimes to find the calling
;; wrapper, this is for dashboard to find the correct megatest.
;;
(define (common:find-local-megatest #!optional (progname "megatest"))
  (let ((res (filter file-exists?
		     (map (lambda (updir)
			    (let* ((lm  (car (argv)))
				   (dir (pathname-directory lm))
				   (exe (pathname-strip-directory lm)))
			      (conc (if dir (conc dir "/") "")
				    (case (string->symbol exe)
				      ((dboard)    (conc updir progname))
				      ((mtest)     (conc updir progname))
				      ((dashboard) progname)
				      (else exe)))))
			  '("../../" "../")))))
    (if (null? res)
	(begin
	  (debug:print 0 *default-log-port* "Failed to find this executable! Using what can be found on the path")
	  progname)
	(car res))))

(define *common:logpro-exit-code->status-sym-alist*
  '( ( 0 . pass )
     ( 1 . fail )
     ( 2 . warn )
     ( 3 . check )
     ( 4 . waived )
     ( 5 . abort )







<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<







247
248
249
250
251
252
253























254
255
256
257
258
259
260
(define *common:this-exe-name*     (pathname-strip-directory *common:this-exe-fullpath*))

(define (common:get-sync-lock-filepath)
  (let* ((tmp-area     (common:get-db-tmp-area))
         (lockfile     (conc tmp-area "/megatest.db.sync-lock")))
    lockfile))
























(define *common:logpro-exit-code->status-sym-alist*
  '( ( 0 . pass )
     ( 1 . fail )
     ( 2 . warn )
     ( 3 . check )
     ( 4 . waived )
     ( 5 . abort )
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
		      (create-directory dbarea)))
		dbpath))
	  #f)))

(define (common:get-area-path-signature)
  (message-digest-string (md5-primitive) *toppath*))

(define (common:get-signature str)
  (message-digest-string (md5-primitive) str))

;;======================================================================
;; E X I T   H A N D L I N G
;;======================================================================

(define (common:run-sync?)
  (and *toppath*               ;; gate if called before *toppath* is set
       (common:on-homehost?)
       (args:get-arg "-server")))

(define (common:human-time)
  (time->string (seconds->local-time (current-seconds)) "%Y-%m-%d %H:%M:%S"))


(define (std-signal-handler signum)
  ;; (signal-mask! signum)
  (set! *time-to-exit* #t) 
  ;;(debug:print-info 13 *default-log-port* "got signal "signum)
  (debug:print-error 0 *default-log-port* "Received signal " signum " aaa exiting promptly")
  ;; (std-exit-procedure) ;; shouldn't need this since we are exiting and it will be called anyway







<
<
<









<
<
<







973
974
975
976
977
978
979



980
981
982
983
984
985
986
987
988



989
990
991
992
993
994
995
		      (create-directory dbarea)))
		dbpath))
	  #f)))

(define (common:get-area-path-signature)
  (message-digest-string (md5-primitive) *toppath*))




;;======================================================================
;; E X I T   H A N D L I N G
;;======================================================================

(define (common:run-sync?)
  (and *toppath*               ;; gate if called before *toppath* is set
       (common:on-homehost?)
       (args:get-arg "-server")))





(define (std-signal-handler signum)
  ;; (signal-mask! signum)
  (set! *time-to-exit* #t) 
  ;;(debug:print-info 13 *default-log-port* "got signal "signum)
  (debug:print-error 0 *default-log-port* "Received signal " signum " aaa exiting promptly")
  ;; (std-exit-procedure) ;; shouldn't need this since we are exiting and it will be called anyway
2041
2042
2043
2044
2045
2046
2047


2048
2049
2050
2051
2052


2053
2054
2055
2056
2057
2058
2059
2060
  (let loop ((start-time (current-seconds))) ;; we saw some instances of this being called before *toppath* was set. This might be an early setup race. This delay should help but it is impossible to test...
    (if (not *toppath*)
	(begin
	  (debug:print 0 *default-log-port* "ERROR: common:wait-for-homehost-load called before *toppath* set.")
	  (thread-sleep! 30)
	  (if (< (- (current-seconds) start-time) 300)
	      (loop start-time)))))


  (let* ((hh-dat (if (common:on-homehost?) ;; if we are on the homehost then pass in #f so the calls are local.
                     #f
                     (server:choose-server *toppath* 'homehost)))
         (hh     (if hh-dat (car hh-dat) #f)))
    (common:wait-for-normalized-load maxnormload msg hh)))



(define (common:get-num-cpus remote-host)
  (let* ((actual-host (or remote-host (get-host-name))))
    ;; hosts had better not be changing the number of cpus too often!
    (or (hash-table-ref/default *numcpus-cache* actual-host #f)
	(let* ((numcpus (or (common:get-cached-info actual-host "num-cpus" age: (+ 2592000 (random 3600)))
			    (let* ((proc   (lambda ()
					     (let loop ((numcpu 0)







>
>
|
|
|
|
|
>
>
|







2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
  (let loop ((start-time (current-seconds))) ;; we saw some instances of this being called before *toppath* was set. This might be an early setup race. This delay should help but it is impossible to test...
    (if (not *toppath*)
	(begin
	  (debug:print 0 *default-log-port* "ERROR: common:wait-for-homehost-load called before *toppath* set.")
	  (thread-sleep! 30)
	  (if (< (- (current-seconds) start-time) 300)
	      (loop start-time)))))
  (case (rmt:transport-mode)
    ((http)
     (let* ((hh-dat (if (common:on-homehost?) ;; if we are on the homehost then pass in #f so the calls are local.
			#f
			(server:choose-server *toppath* 'homehost)))
            (hh     (if hh-dat (car hh-dat) #f)))
       (common:wait-for-normalized-load maxnormload msg hh)))
    (else
     (common:wait-for-normalized-load maxnormload msg (get-host-name)))))
    
(define (common:get-num-cpus remote-host)
  (let* ((actual-host (or remote-host (get-host-name))))
    ;; hosts had better not be changing the number of cpus too often!
    (or (hash-table-ref/default *numcpus-cache* actual-host #f)
	(let* ((numcpus (or (common:get-cached-info actual-host "num-cpus" age: (+ 2592000 (random 3600)))
			    (let* ((proc   (lambda ()
					     (let loop ((numcpu 0)
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
                       (conc "viewscreen " cmd))))
    (debug:print-info 02 *default-log-port* "Running command: " fullcmd)
    (cond
     (with-vars     (common:without-vars  fullcmd))
     (with-orig-env (common:with-orig-env fullcmd))
     (else          (common:without-vars  fullcmd "MT_.*")))))
		  
;;======================================================================
;; T I M E   A N D   D A T E
;;======================================================================

;;======================================================================
;; Convert strings like "5s 2h 3m" => 60x60x2 + 3x60 + 5
(define (common:hms-string->seconds tstr)
  (let ((parts     (string-split-fields "\\w+" tstr))
	(time-secs 0)
	;; s=seconds, m=minutes, h=hours, d=days, M=months, y=years, w=weeks
	(trx       (regexp "(\\d+)([smhdMyw])")))
    (for-each (lambda (part)
		(let ((match  (string-match trx part)))
		  (if match
		      (let ((val (string->number (cadr match)))
			    (unt (caddr match)))
			(if val 
			    (set! time-secs (+ time-secs (* val
							    (case (string->symbol unt)
							      ((s) 1)
							      ((m) 60) ;; minutes
							      ((h) 3600)
							      ((d) 86400)
							      ((w) 604800)
							      ((M) 2628000) ;; aproximately one month
							      ((y) 31536000)
							      (else #f))))))))))
	      parts)
    time-secs))
		       
(define (seconds->hr-min-sec secs)
  (let* ((hrs (quotient secs 3600))
	 (min (quotient (- secs (* hrs 3600)) 60))
	 (sec (- secs (* hrs 3600)(* min 60))))
    (conc (if (> hrs 0)(conc hrs "hr ") "")
	  (if (> min 0)(conc min "m ")  "")
	  sec "s")))

(define (seconds->time-string sec)
  (time->string 
   (seconds->local-time sec) "%H:%M:%S"))

(define (seconds->work-week/day-time sec)
  (time->string
   (seconds->local-time sec) "ww%V.%u %H:%M"))

(define (seconds->work-week/day sec)
  (time->string
   (seconds->local-time sec) "ww%V.%u"))

(define (seconds->year-work-week/day sec)
  (time->string
   (seconds->local-time sec) "%yww%V.%w"))

(define (seconds->year-work-week/day-time sec)
  (time->string
   (seconds->local-time sec) "%Yww%V.%w %H:%M"))

(define (seconds->year-week/day-time sec)
  (time->string
   (seconds->local-time sec) "%Yw%V.%w %H:%M"))

(define (seconds->quarter sec)
  (case (string->number
	 (time->string 
	  (seconds->local-time sec)
	  "%m"))
    ((1 2 3) 1)
    ((4 5 6) 2)
    ((7 8 9) 3)
    ((10 11 12) 4)
    (else #f)))

;;======================================================================
;; basic ISO8601 format (e.g. "2017-02-28 06:02:54") date time => Unix epoch
;;
(define (common:date-time->seconds datetime)
  (local-time->seconds (string->time datetime "%Y-%m-%d %H:%M:%S")))

;;======================================================================
;; given span of seconds tstart to tend
;; find start time to mark and mark delta
;;
(define (common:find-start-mark-and-mark-delta tstart tend)
  (let* ((deltat   (- (max tend (+ tend 10)) tstart)) ;; can't handle runs of less than 4 seconds. Pad it to 10 seconds ...
	 (result   #f)
	 (min      60)
	 (hr       (* 60 60))
	 (day      (* 24 hr))
	 (yr       (* 365 day)) ;; year
	 (mo       (/ yr 12))
	 (wk       (* day 7)))
    (for-each
     (lambda (max-blks)
       (for-each
	(lambda (span) ;; 5 2 1
	  (if (not result)
	      (for-each 
	       (lambda (timeunit timesym) ;; year month day hr min sec
		 (if (not result)
		     (let* ((time-blk (* span timeunit))
			    (num-blks (quotient deltat time-blk)))
		       (if (and (> num-blks 4)(< num-blks max-blks))
			   (let ((first (* (quotient tstart time-blk) time-blk)))
			     (set! result (list span timeunit time-blk first timesym))
			     )))))
	       (list yr mo wk day hr min 1)
	       '(     y  mo w  d   h  m   s))))
	(list 8 6 5 2 1)))
     '(5 10 15 20 30 40 50 500))
    (if values
	(apply values result)
	(values 0 day 1 0 'd))))

;;======================================================================
;; given x y lim return the cron expansion
;;
(define (common:expand-cron-slash x y lim)
  (let loop ((curr x)
	     (res  `()))
    (if (< curr lim)
	(loop (+ curr y) (cons curr res))
	(reverse res))))

;;======================================================================
;; expand a complex cron string to a list of cron strings
;;
;;  x/y   => x, x+y, x+2y, x+3y while x+Ny<max_for_field
;;  a,b,c => a, b ,c
;;
;;   NOTE: with flatten a lot of the crud below can be factored down.
;;
(define (common:cron-expand cron-str)
  (if (list? cron-str)
      (flatten
       (fold (lambda (x res)
	       (if (list? x)
		   (let ((newres (map common:cron-expand x)))
		     (append x newres))
		   (cons x res)))
	     '()
	     cron-str)) ;; (map common:cron-expand cron-str))
      (let ((cron-items (string-split cron-str))
	    (slash-rx   (regexp "(\\d+)/(\\d+)"))
	    (comma-rx   (regexp ".*,.*"))
	    (max-vals   '((min        . 60)
			  (hour       . 24)
			  (dayofmonth . 28) ;;; BUG!!!! This will be a bug for some combinations
			  (month      . 12)
			  (dayofweek  . 7))))
	(if (< (length cron-items) 5) ;; bad spec
	    cron-str ;; `(,cron-str)              ;; just return the string, something downstream will fix it
	    (let loop ((hed  (car cron-items))
		       (tal  (cdr cron-items))
		       (type 'min)
		       (type-tal '(hour dayofmonth month dayofweek))
		       (res  '()))
	      (regex-case
		  hed
		(slash-rx ( _ base incr ) (let* ((basen          (string->number base))
						 (incrn          (string->number incr))
						 (expanded-vals  (common:expand-cron-slash basen incrn (alist-ref type max-vals)))
						 (new-list-crons (fold (lambda (x myres)
									 (cons (conc (if (null? res)
											 ""
											 (conc (string-intersperse res " ") " "))
										     x " " (string-intersperse tal " "))
									       myres))
								       '() expanded-vals)))
					    ;; (print "new-list-crons: " new-list-crons)
					    ;; (fold (lambda (x res)
					    ;; 	    (if (list? x)
					    ;; 		(let ((newres (map common:cron-expand x)))
					    ;; 		  (append x newres))
					    ;; 		(cons x res)))
					    ;; 	  '()
					    (flatten (map common:cron-expand new-list-crons))))
		;;					    (map common:cron-expand (map common:cron-expand new-list-crons))))
		(else (if (null? tal)
			  cron-str
			  (loop (car tal)(cdr tal)(car type-tal)(cdr type-tal)(append res (list hed)))))))))))

;;======================================================================
;; given a cron string and the last time event was processed return #t to run or #f to not run
;;
;;  min    hour   dayofmonth month  dayofweek
;; 0-59    0-23   1-31       1-12   0-6          ### NOTE: dayofweek does not include 7
;;
;;  #t => yes, run the job
;;  #f => no, do not run the job
;;
(define (common:cron-event cron-str now-seconds-in last-done) ;; ref-seconds = #f is NOW.
  (let* ((cron-items     (map string->number (string-split cron-str)))
	 (now-seconds    (or now-seconds-in (current-seconds)))
	 (now-time       (seconds->local-time now-seconds))
	 (last-done-time (seconds->local-time last-done))
	 (all-times      (make-hash-table)))
    ;; (print "cron-items: " cron-items "(length cron-items): " (length cron-items))
    (if (not (eq? (length cron-items) 5)) ;; don't even try to figure out junk strings
	#f
	(match-let (((     cmin chour cdayofmonth cmonth    cdayofweek)
		     cron-items)
		    ;; 0     1    2        3         4    5      6
		    ((nsec nmin nhour ndayofmonth nmonth nyr ndayofweek n7 n8 n9)
		     (vector->list now-time))
		    ((lsec lmin lhour ldayofmonth lmonth lyr ldayofweek l7 l8 l9)
		     (vector->list last-done-time)))
	  ;; create all possible time slots
	  ;; remove invalid slots due to (for example) day of week
	  ;; get the start and end entries for the ref-seconds (current) time
	  ;; if last-done > ref-seconds => this is an ERROR!
	  ;; does the last-done time fall in the legit region?
	  ;;    yes => #f  do not run again this command
	  ;;    no  => #t  ok to run the command
	  (for-each ;; month
	   (lambda (month)
	     (for-each ;; dayofmonth
	      (lambda (dom)
		(for-each
		 (lambda (hr) ;; hour
		   (for-each
		    (lambda (minute) ;; minute
		      (let ((copy-now (apply vector (vector->list now-time))))
			(vector-set! copy-now 0 0) ;; force seconds to zero
			(vector-set! copy-now 1 minute)
			(vector-set! copy-now 2 hr)
			(vector-set! copy-now 3 dom)  ;; dom is already corrected for zero referenced
			(vector-set! copy-now 4 month)
			(let* ((copy-now-secs (local-time->seconds copy-now))
			       (new-copy      (seconds->local-time copy-now-secs))) ;; remake the time vector
			  (if (or (not cdayofweek)
				  (equal? (vector-ref new-copy 6)
					  cdayofweek)) ;; if the day is specified and a match OR if the day is NOT specified
			      (if (or (not cdayofmonth)
				      (equal? (vector-ref new-copy 3)
					      (+ 1 cdayofmonth))) ;; if the month is specified and a match OR if the month is NOT specified
				  (hash-table-set! all-times copy-now-secs new-copy))))))
		    (if cmin
			`(,cmin)  ;; if given cmin, have to use it
			(list (- nmin 1) nmin (+ nmin 1))))) ;; minute
		 (if chour
		     `(,chour)
		     (list (- nhour 1) nhour (+ nhour 1))))) ;; hour
	      (if cdayofmonth
		  `(,cdayofmonth)
		  (list (- ndayofmonth 1) ndayofmonth (+ ndayofmonth 1)))))
	   (if cmonth
	       `(,cmonth)
	       (list (- nmonth 1) nmonth (+ nmonth 1))))
	  (let ((before #f)
		(is-in  #f))
	    (for-each
	     (lambda (moment)
	       (if (and before
			(<= before now-seconds)
			(>= moment now-seconds))
		   (begin
		     ;; (print)
		     ;; (print "Before: " (time->string (seconds->local-time before)))
		     ;; (print "Now:    " (time->string (seconds->local-time now-seconds)))
		     ;; (print "After:  " (time->string (seconds->local-time moment)))
		     ;; (print "Last:   " (time->string (seconds->local-time last-done)))
		     (if (<  last-done before)
			 (set! is-in before))
		     ))
	       (set! before moment))
	     (sort (hash-table-keys all-times) <))
	    is-in)))))

(define (common:extended-cron  cron-str now-seconds-in last-done)
  (let ((expanded-cron (common:cron-expand cron-str)))
    (if (string? expanded-cron)
	(common:cron-event expanded-cron now-seconds-in last-done)
	(let loop ((hed (car expanded-cron))
		   (tal (cdr expanded-cron)))
	  (if (common:cron-event hed now-seconds-in last-done)
	      #t
	      (if (null? tal)
		  #f
		  (loop (car tal)(cdr tal))))))))

;;======================================================================
;; C O L O R S
;;======================================================================
      
(define (common:name->iup-color name)
  (case (string->symbol (string-downcase name))
    ((red)    "223 33 49")







<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<







2626
2627
2628
2629
2630
2631
2632

























































































































































































































































































2633
2634
2635
2636
2637
2638
2639
                       (conc "viewscreen " cmd))))
    (debug:print-info 02 *default-log-port* "Running command: " fullcmd)
    (cond
     (with-vars     (common:without-vars  fullcmd))
     (with-orig-env (common:with-orig-env fullcmd))
     (else          (common:without-vars  fullcmd "MT_.*")))))
		  

























































































































































































































































































;;======================================================================
;; C O L O R S
;;======================================================================
      
(define (common:name->iup-color name)
  (case (string->symbol (string-downcase name))
    ((red)    "223 33 49")

Modified commonmod.scm from [2570fcf4eb] to [56d21c8b94].

15
16
17
18
19
20
21

22
23
24
25
26
27
28


29


30


31



32






33
34
35
36
37
38
39
;; 
;;     You should have received a copy of the GNU General Public License
;;     along with Megatest.  If not, see <http://www.gnu.org/licenses/>.

;;======================================================================

(declare (unit commonmod))


(use srfi-69)

(module commonmod
	*

(import scheme chicken data-structures extras files)


(import (prefix sqlite3 sqlite3:)


	posix typed-records srfi-18 srfi-69


	md5 message-digest



	regex srfi-1)







;;======================================================================
;; CONTENTS
;;
;;  config file utils
;;  misc conversion, data manipulation functions
;;  testsuite and area utilites







>






|
>
>
|
>
>
|
>
>
|
>
>
>
|
>
>
>
>
>
>







15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
;; 
;;     You should have received a copy of the GNU General Public License
;;     along with Megatest.  If not, see <http://www.gnu.org/licenses/>.

;;======================================================================

(declare (unit commonmod))
;; (declare (uses debugprint))

(use srfi-69)

(module commonmod
	*

(import scheme
	chicken

	(prefix sqlite3 sqlite3:)
	data-structures
	extras
	files
	matchable
	md5
	message-digest
	posix
	regex
	regex-case
	srfi-1
	srfi-18
	srfi-69
	typed-records

	;; debugprint
	)

;;======================================================================
;; CONTENTS
;;
;;  config file utils
;;  misc conversion, data manipulation functions
;;  testsuite and area utilites
159
160
161
162
163
164
165






























166
167
168
169
170
171
172
			     (if convert (lazy-convert inval) inval))))
		   (else f))))
	     (filter (lambda (x)
		       (not (string-match "^\\s*" x)))
		     val-list))
	'())))































;;======================================================================
;; testsuite and area utilites
;;======================================================================

(define (get-testsuite-name toppath configdat)
  (or (lookup configdat "setup" "area-name")
      (lookup configdat "setup" "testsuite")







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
			     (if convert (lazy-convert inval) inval))))
		   (else f))))
	     (filter (lambda (x)
		       (not (string-match "^\\s*" x)))
		     val-list))
	'())))

(define (get-cpu-load)
  (let* ((load-info (with-input-from-file "/proc/loadavg" read-lines)))
    (map string->number (string-split load-info))))

(define *current-host-cores* #f)

(define (get-current-host-cores)
  (or *current-host-cores*
      (let ((cpu-info (with-input-from-file "/proc/cpuinfo" read-lines)))
	(let loop ((lines cpu-info))
	  (if (null? lines)
	      1 ;; gotta be at least one!
	      (let* ((inl (car lines))
		     (tail (cdr lines))
		     (parts (string-split inl)))
		(match parts
		  (("cpu" "cores" ":" num) (string->number num))
		  (else (loop tail)))))))))

(define (number-of-processes-running processname)
  (with-input-from-pipe
   (conc "ps -def | egrep \""processname"\" |wc -l")
   (lambda ()
     (string->number (read-line)))))

;; get the normalized (i.e. load / numcpus) for *this* host
;;
(define (get-normalized-cpu-load)
  (/ (get-cpu-load)(get-current-host-cores)))

;;======================================================================
;; testsuite and area utilites
;;======================================================================

(define (get-testsuite-name toppath configdat)
  (or (lookup configdat "setup" "area-name")
      (lookup configdat "setup" "testsuite")
206
207
208
209
210
211
212


































































































































































































































































































213
214
215
216
217


218



219
















220
221







222






223


224
225
;;
(define (get-cfg-areas cfgdat)
  (let ((adat (get-section cfgdat "areas")))
    (map (lambda (entry)
	   `(,(car entry) . 
	     ,(val->alist (cadr entry))))
	 adat)))



































































































































































































































































































;;======================================================================
;; misc stuff
;;======================================================================



;; (define (debug:print . params) #f)



;; (define (debug:print-info . params) #f)
















;; 
;; (define (set-functions dbgp dbgpinfo)







;;   (set! debug:print dbgp)






;;   (set! debug:print-info dbgpinfo))



)







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>





>
>
|
>
>
>
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
|
>
>
>
>
>
>
>
|
>
>
>
>
>
>
|
>
>


252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
;;
(define (get-cfg-areas cfgdat)
  (let ((adat (get-section cfgdat "areas")))
    (map (lambda (entry)
	   `(,(car entry) . 
	     ,(val->alist (cadr entry))))
	 adat)))

;;======================================================================
;; time utils
;;======================================================================

(define (common:human-time)
  (time->string (seconds->local-time (current-seconds)) "%Y-%m-%d %H:%M:%S"))

;;======================================================================
;; T I M E   A N D   D A T E
;;======================================================================

;;======================================================================
;; Convert strings like "5s 2h 3m" => 60x60x2 + 3x60 + 5
(define (common:hms-string->seconds tstr)
  (let ((parts     (string-split-fields "\\w+" tstr))
	(time-secs 0)
	;; s=seconds, m=minutes, h=hours, d=days, M=months, y=years, w=weeks
	(trx       (regexp "(\\d+)([smhdMyw])")))
    (for-each (lambda (part)
		(let ((match  (string-match trx part)))
		  (if match
		      (let ((val (string->number (cadr match)))
			    (unt (caddr match)))
			(if val 
			    (set! time-secs (+ time-secs (* val
							    (case (string->symbol unt)
							      ((s) 1)
							      ((m) 60) ;; minutes
							      ((h) 3600)
							      ((d) 86400)
							      ((w) 604800)
							      ((M) 2628000) ;; aproximately one month
							      ((y) 31536000)
							      (else #f))))))))))
	      parts)
    time-secs))
		       
(define (seconds->hr-min-sec secs)
  (let* ((hrs (quotient secs 3600))
	 (min (quotient (- secs (* hrs 3600)) 60))
	 (sec (- secs (* hrs 3600)(* min 60))))
    (conc (if (> hrs 0)(conc hrs "hr ") "")
	  (if (> min 0)(conc min "m ")  "")
	  sec "s")))

(define (seconds->time-string sec)
  (time->string 
   (seconds->local-time sec) "%H:%M:%S"))

(define (seconds->work-week/day-time sec)
  (time->string
   (seconds->local-time sec) "ww%V.%u %H:%M"))

(define (seconds->work-week/day sec)
  (time->string
   (seconds->local-time sec) "ww%V.%u"))

(define (seconds->year-work-week/day sec)
  (time->string
   (seconds->local-time sec) "%yww%V.%w"))

(define (seconds->year-work-week/day-time sec)
  (time->string
   (seconds->local-time sec) "%Yww%V.%w %H:%M"))

(define (seconds->year-week/day-time sec)
  (time->string
   (seconds->local-time sec) "%Yw%V.%w %H:%M"))

(define (seconds->quarter sec)
  (case (string->number
	 (time->string 
	  (seconds->local-time sec)
	  "%m"))
    ((1 2 3) 1)
    ((4 5 6) 2)
    ((7 8 9) 3)
    ((10 11 12) 4)
    (else #f)))

;;======================================================================
;; basic ISO8601 format (e.g. "2017-02-28 06:02:54") date time => Unix epoch
;;
(define (common:date-time->seconds datetime)
  (local-time->seconds (string->time datetime "%Y-%m-%d %H:%M:%S")))

;;======================================================================
;; given span of seconds tstart to tend
;; find start time to mark and mark delta
;;
(define (common:find-start-mark-and-mark-delta tstart tend)
  (let* ((deltat   (- (max tend (+ tend 10)) tstart)) ;; can't handle runs of less than 4 seconds. Pad it to 10 seconds ...
	 (result   #f)
	 (min      60)
	 (hr       (* 60 60))
	 (day      (* 24 hr))
	 (yr       (* 365 day)) ;; year
	 (mo       (/ yr 12))
	 (wk       (* day 7)))
    (for-each
     (lambda (max-blks)
       (for-each
	(lambda (span) ;; 5 2 1
	  (if (not result)
	      (for-each 
	       (lambda (timeunit timesym) ;; year month day hr min sec
		 (if (not result)
		     (let* ((time-blk (* span timeunit))
			    (num-blks (quotient deltat time-blk)))
		       (if (and (> num-blks 4)(< num-blks max-blks))
			   (let ((first (* (quotient tstart time-blk) time-blk)))
			     (set! result (list span timeunit time-blk first timesym))
			     )))))
	       (list yr mo wk day hr min 1)
	       '(     y  mo w  d   h  m   s))))
	(list 8 6 5 2 1)))
     '(5 10 15 20 30 40 50 500))
    (if values
	(apply values result)
	(values 0 day 1 0 'd))))

;;======================================================================
;; given x y lim return the cron expansion
;;
(define (common:expand-cron-slash x y lim)
  (let loop ((curr x)
	     (res  `()))
    (if (< curr lim)
	(loop (+ curr y) (cons curr res))
	(reverse res))))

;;======================================================================
;; expand a complex cron string to a list of cron strings
;;
;;  x/y   => x, x+y, x+2y, x+3y while x+Ny<max_for_field
;;  a,b,c => a, b ,c
;;
;;   NOTE: with flatten a lot of the crud below can be factored down.
;;
(define (common:cron-expand cron-str)
  (if (list? cron-str)
      (flatten
       (fold (lambda (x res)
	       (if (list? x)
		   (let ((newres (map common:cron-expand x)))
		     (append x newres))
		   (cons x res)))
	     '()
	     cron-str)) ;; (map common:cron-expand cron-str))
      (let ((cron-items (string-split cron-str))
	    (slash-rx   (regexp "(\\d+)/(\\d+)"))
	    (comma-rx   (regexp ".*,.*"))
	    (max-vals   '((min        . 60)
			  (hour       . 24)
			  (dayofmonth . 28) ;;; BUG!!!! This will be a bug for some combinations
			  (month      . 12)
			  (dayofweek  . 7))))
	(if (< (length cron-items) 5) ;; bad spec
	    cron-str ;; `(,cron-str)              ;; just return the string, something downstream will fix it
	    (let loop ((hed  (car cron-items))
		       (tal  (cdr cron-items))
		       (type 'min)
		       (type-tal '(hour dayofmonth month dayofweek))
		       (res  '()))
	      (regex-case
		  hed
		(slash-rx ( _ base incr ) (let* ((basen          (string->number base))
						 (incrn          (string->number incr))
						 (expanded-vals  (common:expand-cron-slash basen incrn (alist-ref type max-vals)))
						 (new-list-crons (fold (lambda (x myres)
									 (cons (conc (if (null? res)
											 ""
											 (conc (string-intersperse res " ") " "))
										     x " " (string-intersperse tal " "))
									       myres))
								       '() expanded-vals)))
					    ;; (print "new-list-crons: " new-list-crons)
					    ;; (fold (lambda (x res)
					    ;; 	    (if (list? x)
					    ;; 		(let ((newres (map common:cron-expand x)))
					    ;; 		  (append x newres))
					    ;; 		(cons x res)))
					    ;; 	  '()
					    (flatten (map common:cron-expand new-list-crons))))
		;;					    (map common:cron-expand (map common:cron-expand new-list-crons))))
		(else (if (null? tal)
			  cron-str
			  (loop (car tal)(cdr tal)(car type-tal)(cdr type-tal)(append res (list hed)))))))))))

;;======================================================================
;; given a cron string and the last time event was processed return #t to run or #f to not run
;;
;;  min    hour   dayofmonth month  dayofweek
;; 0-59    0-23   1-31       1-12   0-6          ### NOTE: dayofweek does not include 7
;;
;;  #t => yes, run the job
;;  #f => no, do not run the job
;;
(define (common:cron-event cron-str now-seconds-in last-done) ;; ref-seconds = #f is NOW.
  (let* ((cron-items     (map string->number (string-split cron-str)))
	 (now-seconds    (or now-seconds-in (current-seconds)))
	 (now-time       (seconds->local-time now-seconds))
	 (last-done-time (seconds->local-time last-done))
	 (all-times      (make-hash-table)))
    ;; (print "cron-items: " cron-items "(length cron-items): " (length cron-items))
    (if (not (eq? (length cron-items) 5)) ;; don't even try to figure out junk strings
	#f
	(match-let (((     cmin chour cdayofmonth cmonth    cdayofweek)
		     cron-items)
		    ;; 0     1    2        3         4    5      6
		    ((nsec nmin nhour ndayofmonth nmonth nyr ndayofweek n7 n8 n9)
		     (vector->list now-time))
		    ((lsec lmin lhour ldayofmonth lmonth lyr ldayofweek l7 l8 l9)
		     (vector->list last-done-time)))
	  ;; create all possible time slots
	  ;; remove invalid slots due to (for example) day of week
	  ;; get the start and end entries for the ref-seconds (current) time
	  ;; if last-done > ref-seconds => this is an ERROR!
	  ;; does the last-done time fall in the legit region?
	  ;;    yes => #f  do not run again this command
	  ;;    no  => #t  ok to run the command
	  (for-each ;; month
	   (lambda (month)
	     (for-each ;; dayofmonth
	      (lambda (dom)
		(for-each
		 (lambda (hr) ;; hour
		   (for-each
		    (lambda (minute) ;; minute
		      (let ((copy-now (apply vector (vector->list now-time))))
			(vector-set! copy-now 0 0) ;; force seconds to zero
			(vector-set! copy-now 1 minute)
			(vector-set! copy-now 2 hr)
			(vector-set! copy-now 3 dom)  ;; dom is already corrected for zero referenced
			(vector-set! copy-now 4 month)
			(let* ((copy-now-secs (local-time->seconds copy-now))
			       (new-copy      (seconds->local-time copy-now-secs))) ;; remake the time vector
			  (if (or (not cdayofweek)
				  (equal? (vector-ref new-copy 6)
					  cdayofweek)) ;; if the day is specified and a match OR if the day is NOT specified
			      (if (or (not cdayofmonth)
				      (equal? (vector-ref new-copy 3)
					      (+ 1 cdayofmonth))) ;; if the month is specified and a match OR if the month is NOT specified
				  (hash-table-set! all-times copy-now-secs new-copy))))))
		    (if cmin
			`(,cmin)  ;; if given cmin, have to use it
			(list (- nmin 1) nmin (+ nmin 1))))) ;; minute
		 (if chour
		     `(,chour)
		     (list (- nhour 1) nhour (+ nhour 1))))) ;; hour
	      (if cdayofmonth
		  `(,cdayofmonth)
		  (list (- ndayofmonth 1) ndayofmonth (+ ndayofmonth 1)))))
	   (if cmonth
	       `(,cmonth)
	       (list (- nmonth 1) nmonth (+ nmonth 1))))
	  (let ((before #f)
		(is-in  #f))
	    (for-each
	     (lambda (moment)
	       (if (and before
			(<= before now-seconds)
			(>= moment now-seconds))
		   (begin
		     ;; (print)
		     ;; (print "Before: " (time->string (seconds->local-time before)))
		     ;; (print "Now:    " (time->string (seconds->local-time now-seconds)))
		     ;; (print "After:  " (time->string (seconds->local-time moment)))
		     ;; (print "Last:   " (time->string (seconds->local-time last-done)))
		     (if (<  last-done before)
			 (set! is-in before))
		     ))
	       (set! before moment))
	     (sort (hash-table-keys all-times) <))
	    is-in)))))

(define (common:extended-cron  cron-str now-seconds-in last-done)
  (let ((expanded-cron (common:cron-expand cron-str)))
    (if (string? expanded-cron)
	(common:cron-event expanded-cron now-seconds-in last-done)
	(let loop ((hed (car expanded-cron))
		   (tal (cdr expanded-cron)))
	  (if (common:cron-event hed now-seconds-in last-done)
	      #t
	      (if (null? tal)
		  #f
		  (loop (car tal)(cdr tal))))))))



;;======================================================================
;; misc stuff
;;======================================================================

(define (common:get-signature str)
  (message-digest-string (md5-primitive) str))

;;======================================================================
;; hash of hashs
;;======================================================================

(define (db:hoh-set! dat key1 key2 val)
  (let* ((subhash (hash-table-ref/default dat key1 #f)))
    (if subhash
	(hash-table-set! subhash key2 val)
	(begin
	  (hash-table-set! dat key1 (make-hash-table))
	  (db:hoh-set! dat key1 key2 val)))))

(define (db:hoh-get dat key1 key2)
  (let* ((subhash (hash-table-ref/default dat key1 #f)))
    (and subhash
	 (hash-table-ref/default subhash key2 #f))))

;;======================================================================
;; when called from a wrapper I need sometimes to find the calling
;; wrapper, this is for dashboard to find the correct megatest.
;;
(define (common:find-local-megatest #!optional (progname "megatest"))
  (let ((res (filter file-exists?
		     (map (lambda (updir)
			    (let* ((lm  (car (argv)))
				   (dir (pathname-directory lm))
				   (exe (pathname-strip-directory lm)))
			      (conc (if dir (conc dir "/") "")
				    (case (string->symbol exe)
				      ((dboard)    (conc updir progname))
				      ((mtest)     (conc updir progname))
				      ((dashboard) progname)
				      (else exe)))))
			  '("../../" "../")))))
    (if (null? res)
	(begin
	  ;; (debug:print 0 *default-log-port* "Failed to find this executable! Using what can be found on the path")
	  progname)
	(car res))))

)

Modified dashboard-context-menu.scm from [83fc3e6c83] to [9827592e3b].

30
31
32
33
34
35
36

37
38
39
40
41
42
43
44
45
46
47


48
49
50
51
52
53
54
(use canvas-draw)

(use srfi-1 posix regex regex-case srfi-69)
(use (prefix sqlite3 sqlite3:))

(declare (unit dashboard-context-menu))
(declare (uses common))

(declare (uses db))
(declare (uses gutils))
(declare (uses rmt))
(declare (uses ezsteps))
;; (declare (uses sdb))
;; (declare (uses filedb))
(declare (uses subrun))

(include "common_records.scm")
(include "db_records.scm")
(include "run_records.scm")



(define (dboard:launch-testpanel run-id test-id)
  (let* ((dboardexe (common:find-local-megatest "dashboard"))
         (cmd (conc dboardexe
                    " -test " run-id "," test-id
                    " &")))
    (system cmd)))







>











>
>







30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
(use canvas-draw)

(use srfi-1 posix regex regex-case srfi-69)
(use (prefix sqlite3 sqlite3:))

(declare (unit dashboard-context-menu))
(declare (uses common))
(declare (uses commonmod))
(declare (uses db))
(declare (uses gutils))
(declare (uses rmt))
(declare (uses ezsteps))
;; (declare (uses sdb))
;; (declare (uses filedb))
(declare (uses subrun))

(include "common_records.scm")
(include "db_records.scm")
(include "run_records.scm")

(import commonmod)

(define (dboard:launch-testpanel run-id test-id)
  (let* ((dboardexe (common:find-local-megatest "dashboard"))
         (cmd (conc dboardexe
                    " -test " run-id "," test-id
                    " &")))
    (system cmd)))

Modified dashboard-tests.scm from [b934cba7e8] to [124658aa85].

29
30
31
32
33
34
35

36
37
38
39
40
41
42


43
44
45
46
47
48
49
(use canvas-draw)

(use srfi-1 posix regex regex-case srfi-69)
(use (prefix sqlite3 sqlite3:))

(declare (unit dashboard-tests))
(declare (uses common))

(declare (uses db))
(declare (uses gutils))
(declare (uses rmt))
(declare (uses ezsteps))
;; (declare (uses sdb))
;; (declare (uses filedb))
(declare (uses subrun))



(include "common_records.scm")
(include "db_records.scm")
(include "run_records.scm")

;;======================================================================
;; C O M M O N







>







>
>







29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
(use canvas-draw)

(use srfi-1 posix regex regex-case srfi-69)
(use (prefix sqlite3 sqlite3:))

(declare (unit dashboard-tests))
(declare (uses common))
(declare (uses commonmod))
(declare (uses db))
(declare (uses gutils))
(declare (uses rmt))
(declare (uses ezsteps))
;; (declare (uses sdb))
;; (declare (uses filedb))
(declare (uses subrun))

(import commonmod)

(include "common_records.scm")
(include "db_records.scm")
(include "run_records.scm")

;;======================================================================
;; C O M M O N
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472


;;======================================================================
;;
;;======================================================================
(define (dashboard-tests:examine-test run-id test-id) ;; run-id run-key origtest)
  (let* ((db-path       (db:dbfile-path)) ;; (conc (configf:lookup *configdat* "setup" "linktree") "/db/" run-id ".db"))
	 (dbstruct      #f) ;; NOT ACTUALLY USED (db:setup)) ;; (make-dbr:dbstruct path:  (db:dbfile-path #f) ;; (configf:lookup *configdat* "setup" "linktree") 
			    ;;		   local: #t))
	 (testdat        (rmt:get-test-info-by-id run-id test-id)) ;; (db:get-test-info-by-id dbstruct run-id test-id))
	 (db-mod-time   0) ;; (file-modification-time db-path))
	 (last-update   0) ;; (current-seconds))
	 (request-update #t))
    (if (not testdat)
	(begin
	  (debug:print 2 *default-log-port* "ERROR: No test data found for test " test-id ", exiting")







|
<







460
461
462
463
464
465
466
467

468
469
470
471
472
473
474


;;======================================================================
;;
;;======================================================================
(define (dashboard-tests:examine-test run-id test-id) ;; run-id run-key origtest)
  (let* ((db-path       (db:dbfile-path)) ;; (conc (configf:lookup *configdat* "setup" "linktree") "/db/" run-id ".db"))
	 (dbstruct      #f) ;; NOT USED

	 (testdat        (rmt:get-test-info-by-id run-id test-id)) ;; (db:get-test-info-by-id dbstruct run-id test-id))
	 (db-mod-time   0) ;; (file-modification-time db-path))
	 (last-update   0) ;; (current-seconds))
	 (request-update #t))
    (if (not testdat)
	(begin
	  (debug:print 2 *default-log-port* "ERROR: No test data found for test " test-id ", exiting")

Modified dashboard.scm from [4ad343f07e] to [1463b2c62c].

42
43
44
45
46
47
48


49


50
51
52
53
54
55
56
57
58




59
60
61
62
63
64
65
(declare (uses dashboard-tests))
(declare (uses tree))
(declare (uses dcommon))
(declare (uses dashboard-context-menu))
(declare (uses vg))
(declare (uses subrun))
(declare (uses mt))


(declare (uses dbfile))        



(include "common_records.scm")
(include "db_records.scm")
(include "run_records.scm")
(include "task_records.scm")
(include "megatest-version.scm")
(include "megatest-fossil-hash.scm")
(include "vg_records.scm")





(dbfile:db-init-proc db:initialize-main-db)

(define help (conc 
	      "Megatest Dashboard, documentation at http://www.kiatoa.com/fossils/megatest version " megatest-version 
              " license GPL, Copyright (C) Matt Welland 2012-2017

Usage: dashboard [options]







>
>
|
>
>









>
>
>
>







42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
(declare (uses dashboard-tests))
(declare (uses tree))
(declare (uses dcommon))
(declare (uses dashboard-context-menu))
(declare (uses vg))
(declare (uses subrun))
(declare (uses mt))
(declare (uses dbmod))
;; (declare (uses dbmemmod))
(declare (uses dbfile))

(import dbmod dbfile)

(include "common_records.scm")
(include "db_records.scm")
(include "run_records.scm")
(include "task_records.scm")
(include "megatest-version.scm")
(include "megatest-fossil-hash.scm")
(include "vg_records.scm")

;; set some parameters here - these need to be put in something that can be loaded from other
;; executables such as dashboard and mtutil
;;
(include "dashboard-transport-mode.scm")
(dbfile:db-init-proc db:initialize-main-db)

(define help (conc 
	      "Megatest Dashboard, documentation at http://www.kiatoa.com/fossils/megatest version " megatest-version 
              " license GPL, Copyright (C) Matt Welland 2012-2017

Usage: dashboard [options]

Modified db.scm from [9c05d0ba74] to [b785a065ec].

20
21
22
23
24
25
26










27
28
29
30
31
32
33

;;======================================================================
;; Database access
;;======================================================================

;; dbstruct vector containing all the relevant dbs like main.db, megatest.db, run.db etc











(use (srfi 18)
     extras
     tcp
     stack
     (prefix sqlite3 sqlite3:)
     srfi-1
     posix







>
>
>
>
>
>
>
>
>
>







20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43

;;======================================================================
;; Database access
;;======================================================================

;; dbstruct vector containing all the relevant dbs like main.db, megatest.db, run.db etc

(declare (unit db))
(declare (uses common))
(declare (uses debugprint))
(declare (uses dbmod))
(declare (uses dbfile))
(declare (uses keys))
(declare (uses ods))
(declare (uses client))
(declare (uses mt))

(use (srfi 18)
     extras
     tcp
     stack
     (prefix sqlite3 sqlite3:)
     srfi-1
     posix
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66

67
68
69
70
71
72
73
74
75
76
77
78







79
80
81
82
83
84
85
     format
     dot-locking
     z3
     typed-records
     matchable
     files)

(declare (unit db))
(declare (uses common))
(declare (uses dbmod))
;; (declare (uses debugprint))
(declare (uses dbfile))
(declare (uses keys))
(declare (uses ods))
(declare (uses client))
(declare (uses mt))

(include "common_records.scm")
(include "db_records.scm")
(include "key_records.scm")
(include "run_records.scm")

(define *number-of-writes* 0)
(define *number-non-write-queries* 0)


(import dbmod)
(import dbfile)

;; record for keeping state,status and count for doing roll-ups in
;; iterated tests
;;
(defstruct dbr:counts
  (state #f)
  (status #f)
  (count  0)) 









;;======================================================================
;; hash of hashs
;;======================================================================


(define (db:hoh-set! dat key1 key2 val)
  (let* ((subhash (hash-table-ref/default dat key1 #f)))







<
<
<
<
<
<
<
<
<
<








>












>
>
>
>
>
>
>







52
53
54
55
56
57
58










59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
     format
     dot-locking
     z3
     typed-records
     matchable
     files)











(include "common_records.scm")
(include "db_records.scm")
(include "key_records.scm")
(include "run_records.scm")

(define *number-of-writes* 0)
(define *number-non-write-queries* 0)

(import debugprint)
(import dbmod)
(import dbfile)

;; record for keeping state,status and count for doing roll-ups in
;; iterated tests
;;
(defstruct dbr:counts
  (state #f)
  (status #f)
  (count  0)) 


(define (db:with-db dbstruct run-id r/w proc . params)
  (case (rmt:transport-mode)
    ((http)(dbfile:with-db dbstruct run-id r/w proc params))
    ((tcp) (dbmod:with-db dbstruct run-id r/w proc params))
    ((nfs) (dbmod:with-db dbstruct run-id r/w proc params))
    (else (assert #f "FATAL: db:with-db called with non-existant transport mode"))))

;;======================================================================
;; hash of hashs
;;======================================================================


(define (db:hoh-set! dat key1 key2 val)
  (let* ((subhash (hash-table-ref/default dat key1 #f)))
122
123
124
125
126
127
128









































































129
130
131
132
133
134
135

(define (db:setup do-sync)
  (assert *toppath* "FATAL: db:setup called before launch:setup has been run.")
  (let* ((tmpdir (common:get-db-tmp-area)))
    (if (not *dbstruct-dbs*)
	(dbfile:setup do-sync *toppath* tmpdir)
	*dbstruct-dbs*)))










































































;; looks up subdb and returns it, if not found then set up
;; and then return it.
;;
#;(define (db:get-db dbstruct run-id)
  (let* ((res (dbfile:get-subdb dbstruct run-id)))
    (if res







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216

(define (db:setup do-sync)
  (assert *toppath* "FATAL: db:setup called before launch:setup has been run.")
  (let* ((tmpdir (common:get-db-tmp-area)))
    (if (not *dbstruct-dbs*)
	(dbfile:setup do-sync *toppath* tmpdir)
	*dbstruct-dbs*)))

;; moved from dbfile
;;
;; ADD run-id SUPPORT
;;
(define (db:create-all-triggers dbstruct)
  (db:with-db
   dbstruct #f #f
   (lambda (dbdat db)
     (db:create-triggers db))))

(define (db:create-triggers db)
    (for-each (lambda (key)
              (sqlite3:execute db (cadr key)))
          db:trigger-list))

(define (db:drop-all-triggers dbstruct)
  (db:with-db
   dbstruct #f #f
   (lambda (dbdat db)
     (db:drop-triggers db))))

(define (db:have-incompletes? dbstruct run-id ovr-deadtime)
  (let* ((incompleted '())
	 (oldlaunched '())
	 (toplevels   '())
	 ;; (deadtime-str (configf:lookup *configdat* "setup" "deadtime"))
	 (deadtime  (or ovr-deadtime 72000))) ;; twenty hours
    (db:with-db
     dbstruct run-id #f
     (lambda (dbdat db)
       
       ;; in RUNNING or REMOTEHOSTSTART for more than 10 minutes
       ;;
       ;; HOWEVER: this code in run:test seems to work fine
       ;;              (> (- (current-seconds)(+ (db:test-get-event_time testdat)
       ;;                     (db:test-get-run_duration testdat)))
       ;;                    600) 
       ;; (db:delay-if-busy dbdat)
       (sqlite3:for-each-row 
        (lambda (test-id run-dir uname testname item-path)
          (if (and (equal? uname "n/a")
                   (equal? item-path "")) ;; this is a toplevel test
              ;; what to do with toplevel? call rollup?
              (begin
                (set! toplevels   (cons (list test-id run-dir uname testname item-path run-id) toplevels)))
                ;; (print-info "Found old toplevel test in RUNNING state, test-id=" test-id))
              (set! incompleted (cons (list test-id run-dir uname testname item-path run-id) incompleted))))
        (db:get-cache-stmth dbdat db
        "SELECT id,rundir,uname,testname,item_path FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > (run_duration + ?) AND state IN ('RUNNING','REMOTEHOSTSTART');")
        run-id deadtime)

       ;; in LAUNCHED for more than one day. Could be long due to job queues TODO/BUG: Need override for this in config
       ;;
       ;; (db:delay-if-busy dbdat)
       (sqlite3:for-each-row
        (lambda (test-id run-dir uname testname item-path)
          (if (and (equal? uname "n/a")
                   (equal? item-path "")) ;; this is a toplevel test
              ;; what to do with toplevel? call rollup?
              (set! toplevels   (cons (list test-id run-dir uname testname item-path run-id) toplevels))
              (set! oldlaunched (cons (list test-id run-dir uname testname item-path run-id) oldlaunched))))
        (db:get-cache-stmth dbdat db
        "SELECT id,rundir,uname,testname,item_path FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > 86400 AND state IN ('LAUNCHED');")
        run-id)
       
       ;; (print-info "Found " (length oldlaunched) " old LAUNCHED items, " (length toplevels) " old LAUNCHED toplevel tests and " (length incompleted) " tests marked RUNNING but apparently dead.")
       (if (and (null? incompleted)
                (null? oldlaunched)
                (null? toplevels))
           #f
           #t)))))


;; looks up subdb and returns it, if not found then set up
;; and then return it.
;;
#;(define (db:get-db dbstruct run-id)
  (let* ((res (dbfile:get-subdb dbstruct run-id)))
    (if res
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
	 (src-area (if old2new *toppath* tmp-area))
	 (dest-area (if old2new tmp-area *toppath*))
	 (dbfiles        (if old2new (glob (conc *toppath* "/.megatest/*.db")) (glob (conc tmp-area "/.megatest/*.db"))))
	 (keys (db:get-keys dbstruct))
	 (sync-durations (make-hash-table)))

    ;; kill servers
    (if (and killservers servers)(db:kill-servers))
    
    (if (not dbfiles)
	(debug:print-error 0 *default-log-port* "no dbfiles found in " (conc *toppath* "/.megatest"))
	(for-each
	 (lambda (srcfile)
	   (debug:print-info 3 *default-log-port* "file: " srcfile)
	   (let* ((fname    (conc (pathname-file srcfile) ".db"))







|







592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
	 (src-area (if old2new *toppath* tmp-area))
	 (dest-area (if old2new tmp-area *toppath*))
	 (dbfiles        (if old2new (glob (conc *toppath* "/.megatest/*.db")) (glob (conc tmp-area "/.megatest/*.db"))))
	 (keys (db:get-keys dbstruct))
	 (sync-durations (make-hash-table)))

    ;; kill servers
    (if killservers (db:kill-servers))
    
    (if (not dbfiles)
	(debug:print-error 0 *default-log-port* "no dbfiles found in " (conc *toppath* "/.megatest"))
	(for-each
	 (lambda (srcfile)
	   (debug:print-info 3 *default-log-port* "file: " srcfile)
	   (let* ((fname    (conc (pathname-file srcfile) ".db"))
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
;; Sync all changed db's
;;
(define (db:tmp->megatest.db-sync dbstruct run-id last-update)
  (let* ((subdbs (hash-table-values (dbr:dbstruct-subdbs dbstruct)))
	 (res    '()))
    (for-each
     (lambda (subdb)
       (let* ((dbname (db:run-id->dbname run-id))
	      (mtdb   (dbr:subdb-mtdb subdb))
	      (tmpdb  (db:get-subdb dbstruct run-id))
	      (refndb (dbr:subdb-refndb subdb))
	      (newres (db:sync-tables (db:sync-all-tables-list dbstruct (db:get-keys dbstruct)) last-update tmpdb refndb mtdb)))
	 ;; (stack-push! (dbr:subdb-dbstack subdb) tmpdb)
	 ;; BUG: verify this is really needed
	 (dbfile:add-dbdat dbstruct run-id tmpdb)
	 (set! res (cons newres res))))







<
|







662
663
664
665
666
667
668

669
670
671
672
673
674
675
676
;; Sync all changed db's
;;
(define (db:tmp->megatest.db-sync dbstruct run-id last-update)
  (let* ((subdbs (hash-table-values (dbr:dbstruct-subdbs dbstruct)))
	 (res    '()))
    (for-each
     (lambda (subdb)

       (let* ((mtdb   (dbr:subdb-mtdb subdb))
	      (tmpdb  (db:get-subdb dbstruct run-id))
	      (refndb (dbr:subdb-refndb subdb))
	      (newres (db:sync-tables (db:sync-all-tables-list dbstruct (db:get-keys dbstruct)) last-update tmpdb refndb mtdb)))
	 ;; (stack-push! (dbr:subdb-dbstack subdb) tmpdb)
	 ;; BUG: verify this is really needed
	 (dbfile:add-dbdat dbstruct run-id tmpdb)
	 (set! res (cons newres res))))
1390
1391
1392
1393
1394
1395
1396







1397
1398
1399
1400
1401
1402
1403
1404
1405

1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
	      (lambda (dbdat db)
		(sqlite3:execute  (db:get-cache-stmth dbdat db "DELETE FROM metadat WHERE var=?;") var))))

;;======================================================================
;; no-sync.db - small bits of data to be shared between servers
;;======================================================================








(define (db:no-sync-db db-in)
  (if db-in
      db-in
      (if *no-sync-db*
	  *no-sync-db*
	  (begin
	    (mutex-lock! *db-access-mutex*)
	    (let ((dbpath (common:get-db-tmp-area))
		  (db     (dbfile:open-no-sync-db dbpath)))

	      (set! *no-sync-db* db)
	      (mutex-unlock! *db-access-mutex*)
	      db)))))

(define (with-no-sync-db proc)
  (let* ((db  (db:no-sync-db *no-sync-db*)))
    (proc db)))

(define (db:open-no-sync-db)
  (dbfile:open-no-sync-db (db:dbfile-path)))

(define (db:no-sync-close-db db stmt-cache)
  (db:safely-close-sqlite3-db db stmt-cache))


;; use a global for some primitive caching, it is just silly to
;; re-read the db over and over again for the keys since they never







>
>
>
>
>
>
>
|
|
|
|
|
|
|
|
|
>
|
|
|


|



|







1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
	      (lambda (dbdat db)
		(sqlite3:execute  (db:get-cache-stmth dbdat db "DELETE FROM metadat WHERE var=?;") var))))

;;======================================================================
;; no-sync.db - small bits of data to be shared between servers
;;======================================================================

(define (db:get-dbsync-path)
  (case (rmt:transport-mode)
    ((http)(common:get-db-tmp-area))
    ((tcp) (conc *toppath*"/.megatest"))
    ((nfs) (conc *toppath*"/.megatest"))
    (else "/tmp/dunno-this-gonna-exist")))

 ;; (define (db:no-sync-db db-in)
 ;;   (if db-in
 ;;       db-in
 ;;       (if *no-sync-db*
 ;; 	  *no-sync-db*
 ;; 	  (begin
 ;; 	    (mutex-lock! *db-access-mutex*)
 ;; 	    (let ((dbpath (db:get-dbsync-path))
 ;; 		  (db     (dbfile:open-no-sync-db dbpath)))
 ;; 	      (assert (sqlite3:database? db) "FATAL: db:no-sync-db failed to open a database")
 ;; 	      (set! *no-sync-db* db)
 ;; 	      (mutex-unlock! *db-access-mutex*)
 ;; 	      db)))))

(define (with-no-sync-db proc)
  (let* ((db  (db:open-no-sync-db)))
    (proc db)))

(define (db:open-no-sync-db)
  (dbfile:open-no-sync-db (db:get-dbsync-path)))

(define (db:no-sync-close-db db stmt-cache)
  (db:safely-close-sqlite3-db db stmt-cache))


;; use a global for some primitive caching, it is just silly to
;; re-read the db over and over again for the keys since they never
1951
1952
1953
1954
1955
1956
1957

1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
			(if offset (conc " OFFSET " offset) "")
			";"))
    (debug:print-info 4 *default-log-port* "runs:get-runs-by-patt qry=" qry-str " " runnamepatt)
    ;(print "runs:get-runs-by-patt qry=" qry-str " " runnamepatt)

    (vector header 
            (reverse

             (db:with-db dbstruct #f #f ;; reads db, does not write to it.
                         (lambda (dbdat db)
                           (sqlite3:fold-row
                            (lambda (res . r)
                              (cons (list->vector r) res))
                            '()
                            db
                            qry-str
                            runnamepatt)))))))

;; use (get-value-by-header (db:get-header runinfo)(db:get-rows runinfo))
;; NOTE: Does NOT return a list of rows (or one row) for the first slot of the vector
;;       this is inconsistent with get-runs but it makes some sense.
;;
(define (db:get-run-info dbstruct run-id)
  ;;(if (hash-table-ref/default *run-info-cache* run-id #f)







>
|
|
|
|
|
|
|
|
|







2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
			(if offset (conc " OFFSET " offset) "")
			";"))
    (debug:print-info 4 *default-log-port* "runs:get-runs-by-patt qry=" qry-str " " runnamepatt)
    ;(print "runs:get-runs-by-patt qry=" qry-str " " runnamepatt)

    (vector header 
            (reverse
             (db:with-db
	      dbstruct #f #f ;; reads db, does not write to it.
              (lambda (dbdat db)
                (sqlite3:fold-row
                 (lambda (res . r)
                   (cons (list->vector r) res))
                 '()
                 db
                 qry-str
                 runnamepatt)))))))

;; use (get-value-by-header (db:get-header runinfo)(db:get-rows runinfo))
;; NOTE: Does NOT return a list of rows (or one row) for the first slot of the vector
;;       this is inconsistent with get-runs but it makes some sense.
;;
(define (db:get-run-info dbstruct run-id)
  ;;(if (hash-table-ref/default *run-info-cache* run-id #f)
4664
4665
4666
4667
4668
4669
4670



4671
4672
4673
4674
4675
4676
4677
				  (let ((db (cdr *task-db*)))
				    (if (sqlite3:database? db)
					(begin
					  (sqlite3:interrupt! db)
					  (sqlite3:finalize! db #t)
					  ;; (vector-set! *task-db* 0 #f)
					  (set! *task-db* #f)))))



			      (if (and (not (args:get-arg "-server"))
				       *runremote*)
				  (begin
				    (debug:print-info 0 *default-log-port* "Closing all client connections...")
				    (http-transport:close-connections *runremote*)
				    #;(http-client#close-all-connections!)))
                              ;; (if (and *runremote*







>
>
>







4753
4754
4755
4756
4757
4758
4759
4760
4761
4762
4763
4764
4765
4766
4767
4768
4769
				  (let ((db (cdr *task-db*)))
				    (if (sqlite3:database? db)
					(begin
					  (sqlite3:interrupt! db)
					  (sqlite3:finalize! db #t)
					  ;; (vector-set! *task-db* 0 #f)
					  (set! *task-db* #f)))))
			      (if (and *no-sync-db*
				       (sqlite3:database? *no-sync-db*))
				  (sqlite3:finalize! *no-sync-db* #t))
			      (if (and (not (args:get-arg "-server"))
				       *runremote*)
				  (begin
				    (debug:print-info 0 *default-log-port* "Closing all client connections...")
				    (http-transport:close-connections *runremote*)
				    #;(http-client#close-all-connections!)))
                              ;; (if (and *runremote*

Modified dbfile.scm from [e35890c342] to [5dd6613875].

39
40
41
42
43
44
45
46

47
48
49
50
51
52
53
54
55
56
57
58
59
60









61
62
63
64
65
66
67
	ports

	commonmod
	;; debugprint
	)

(define keep-age-param (make-parameter 10)) ;; qif file age, if over move to attic
(define num-run-dbs (make-parameter 10))     ;; number of db's in .megatest


;;======================================================================
;;  R E C O R D S
;;======================================================================

;; a single Megatest area with it's multiple dbs is
;; managed in a dbstruct
;;
(defstruct dbr:dbstruct
  (areapath  #f)
  (homehost  #f)
  (tmppath   #f)
  (read-only #f)
  (subdbs (make-hash-table))









  )

;; NOTE: Need one dbr:subdb per main.db, 1.db ...
;;
(defstruct dbr:subdb
  (dbname      #f) ;; .megatest/1.db
  (mtdbfile    #f) ;; mtrah/.megatest/1.db







|
>














>
>
>
>
>
>
>
>
>







39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
	ports

	commonmod
	;; debugprint
	)

(define keep-age-param (make-parameter 10)) ;; qif file age, if over move to attic
(define num-run-dbs    (make-parameter 10))     ;; number of db's in .megatest
(define dbfile:testsuite-name (make-parameter #f))

;;======================================================================
;;  R E C O R D S
;;======================================================================

;; a single Megatest area with it's multiple dbs is
;; managed in a dbstruct
;;
(defstruct dbr:dbstruct
  (areapath  #f)
  (homehost  #f)
  (tmppath   #f)
  (read-only #f)
  (subdbs (make-hash-table))
  ;;
  ;; for the inmem approach (see dbmod.scm)
  ;; this is one db per server
  (inmem     #f)  ;; handle for the in memory copy
  (dbfile    #f)  ;; path to the db file on disk
  (ondiskdb  #f)  ;; handle for the on-disk file
  (dbdat     #f)  ;; create a dbdat for the downstream calls such as db:with-db
  (last-update 0)
  (sync-proc #f)
  )

;; NOTE: Need one dbr:subdb per main.db, 1.db ...
;;
(defstruct dbr:subdb
  (dbname      #f) ;; .megatest/1.db
  (mtdbfile    #f) ;; mtrah/.megatest/1.db
92
93
94
95
96
97
98

99
100
101
102
103
104
105
(define *db-sync-in-progress* #f)
(define *db-with-db-mutex*    (make-mutex))
(define *max-api-process-requests* 0)
(define *api-process-request-count* 0)
(define *db-write-access*     #t)
(define *db-last-sync*        0)                 ;; last time the sync to megatest.db happened
(define *db-multi-sync-mutex* (make-mutex))      ;; protect access to *db-sync-in-progress*, *db-last-sync*


(define (db:generic-error-printout exn . message)
  (print-call-chain (current-error-port))
  (apply dbfile:print-err message)
  (dbfile:print-err
    ", error: "     ((condition-property-accessor 'exn 'message)   exn)
    ", arguments: " ((condition-property-accessor 'exn 'arguments) exn)







>







102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
(define *db-sync-in-progress* #f)
(define *db-with-db-mutex*    (make-mutex))
(define *max-api-process-requests* 0)
(define *api-process-request-count* 0)
(define *db-write-access*     #t)
(define *db-last-sync*        0)                 ;; last time the sync to megatest.db happened
(define *db-multi-sync-mutex* (make-mutex))      ;; protect access to *db-sync-in-progress*, *db-last-sync*
(define *db-last-access*      (current-seconds))

(define (db:generic-error-printout exn . message)
  (print-call-chain (current-error-port))
  (apply dbfile:print-err message)
  (dbfile:print-err
    ", error: "     ((condition-property-accessor 'exn 'message)   exn)
    ", arguments: " ((condition-property-accessor 'exn 'arguments) exn)
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
	   subdbs)
           #t
          )
          #f
  )
)

;; ;; set up a single db (e.g. main.db, 1.db ... etc.)
;; ;;
;; (define (db:setup-db dbstruct areapath run-id)
;;   (let* ((dbname   (db:run-id->dbname run-id))
;; 	 (dbstruct (hash-table-ref/default dbstructs dbname #f)))
;;     (if dbstruct
;; 	dbstruct
;; 	(let* ((dbstruct-new (make-dbr:dbstruct)))
;; 	  (db:open-db dbstruct-new run-id areapath: areapath do-sync: #t)
;; 	  (hash-table-set! dbstructs dbname dbstruct-new)
;; 	  dbstruct-new))))
    
;; ; Returns the dbdat for a particular dbfile inside the area
;; ;;
;; (define (dbr:dbstruct-get-dbdat dbstruct dbfile)
;;   (hash-table-ref/default (dbr:dbstruct-dbdats dbstruct) dbfile #f))
;; 
;; (define (dbr:dbstruct-dbdat-put! dbstruct dbfile db)
;;   (hash-table-set! (dbr:dbstruct-dbdats dbstruct) dbfile db))
;; 
;; (define (db:run-id->first-num run-id)
;;   (let* ((s (number->string run-id))
;; 	 (l (string-length s)))
;;     (substring s (- l 1) l)))

;; 1234 => 4/1234.db
;;   #f => 0/main.db
;;   (abandoned the idea of num/db)
;; 
(define (dbfile:run-id->path apath run-id)
  (conc apath"/"(dbfile:run-id->dbname run-id)))

(define (db:dbname->path apath dbname)
  (conc apath"/"dbname))

(define (dbfile:run-id->dbnum run-id)







<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<







168
169
170
171
172
173
174





























175
176
177
178
179
180
181
	   subdbs)
           #t
          )
          #f
  )
)






























(define (dbfile:run-id->path apath run-id)
  (conc apath"/"(dbfile:run-id->dbname run-id)))

(define (db:dbname->path apath dbname)
  (conc apath"/"dbname))

(define (dbfile:run-id->dbnum run-id)
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
;;
(define (dbfile:setup do-sync areapath tmppath)
  (cond
   (*dbstruct-dbs*
    (dbfile:print-err "WARNING: dbfile:setup called when *dbstruct-dbs* is already initialized")
    *dbstruct-dbs*) ;; TODO: when multiple areas are supported, this optimization will be a hazard
   (else
    (let* ((dbstruct (make-dbr:dbstruct)))
      (set! *dbstruct-dbs* dbstruct)
      (dbr:dbstruct-areapath-set! dbstruct areapath)
      (dbr:dbstruct-tmppath-set!  dbstruct tmppath)
      dbstruct))))

(define (dbfile:get-subdb dbstruct run-id)
  (let* ((dbfname (dbfile:run-id->dbname run-id)))
    (hash-table-ref/default (dbr:dbstruct-subdbs dbstruct) dbfname #f)))

(define (dbfile:set-subdb dbstruct run-id subdb)







|

<
<







195
196
197
198
199
200
201
202
203


204
205
206
207
208
209
210
;;
(define (dbfile:setup do-sync areapath tmppath)
  (cond
   (*dbstruct-dbs*
    (dbfile:print-err "WARNING: dbfile:setup called when *dbstruct-dbs* is already initialized")
    *dbstruct-dbs*) ;; TODO: when multiple areas are supported, this optimization will be a hazard
   (else
    (let* ((dbstruct (make-dbr:dbstruct areapath: areapath tmppath: tmppath)))
      (set! *dbstruct-dbs* dbstruct)


      dbstruct))))

(define (dbfile:get-subdb dbstruct run-id)
  (let* ((dbfname (dbfile:run-id->dbname run-id)))
    (hash-table-ref/default (dbr:dbstruct-subdbs dbstruct) dbfname #f)))

(define (dbfile:set-subdb dbstruct run-id subdb)
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433


434





435
436
437
438
439
440
441
442
443
  (let* ((backupfname (conc fname"-"(current-process-id)".bak"))
	 (cmd (conc "cp "fname" "backupfname";mv "fname" "(conc fname ".delme;")
		    "cp "backupfname" "fname)))
    (dbfile:print-err "WARNING: attempting recovery of file "fname" by running commands:\n"
		      "  "cmd)
    (system cmd)))


(define (dbfile:open-no-sync-db dbpath)
  (if *no-sync-db*
      *no-sync-db*
      (begin
	(if (not (file-exists? dbpath))
	    (create-directory dbpath #t))
	(let* ((dbname    (conc dbpath "/no-sync.db"))
	       (db-exists (file-exists? dbname))
	       (init-proc (lambda (db)
			    (if (not db-exists)
				(begin
				  (sqlite3:execute db "CREATE TABLE IF NOT EXISTS no_sync_metadat (var TEXT,val TEXT, CONSTRAINT no_sync_metadat_constraint UNIQUE (var));"))
				)))


	       (db        (dbfile:cautious-open-database dbname init-proc 0 "WAL"))) ;; (sqlite3:open-database dbname)))





	  ;; (sqlite3:execute db "PRAGMA synchronous = 0;")
	  ;; (sqlite3:set-busy-handler! db (sqlite3:make-busy-timeout 136000)) ;; done in cautious-open-database
	  (set! *no-sync-db* db)
	  db))))

(define (db:no-sync-set db var val)
  (sqlite3:execute db "INSERT OR REPLACE INTO no_sync_metadat (var,val) VALUES (?,?);" var val))

(define (db:no-sync-del! db var)







<













>
>
|
>
>
>
>
>
|
|







393
394
395
396
397
398
399

400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
  (let* ((backupfname (conc fname"-"(current-process-id)".bak"))
	 (cmd (conc "cp "fname" "backupfname";mv "fname" "(conc fname ".delme;")
		    "cp "backupfname" "fname)))
    (dbfile:print-err "WARNING: attempting recovery of file "fname" by running commands:\n"
		      "  "cmd)
    (system cmd)))


(define (dbfile:open-no-sync-db dbpath)
  (if *no-sync-db*
      *no-sync-db*
      (begin
	(if (not (file-exists? dbpath))
	    (create-directory dbpath #t))
	(let* ((dbname    (conc dbpath "/no-sync.db"))
	       (db-exists (file-exists? dbname))
	       (init-proc (lambda (db)
			    (if (not db-exists)
				(begin
				  (sqlite3:execute db "CREATE TABLE IF NOT EXISTS no_sync_metadat (var TEXT,val TEXT, CONSTRAINT no_sync_metadat_constraint UNIQUE (var));"))
				)))
	       (on-tmp      (equal? (car (string-split dbpath "/")) "tmp"))
	       (db        (if on-tmp
			      (dbfile:cautious-open-database dbname init-proc 0 "WAL")
			      (dbfile:cautious-open-database dbname init-proc 0 #f)
			      ;; (sqlite3:open-database dbname)
			      )))
	  (if on-tmp	      ;; done in cautious-open-database
	      (begin
		(sqlite3:execute db "PRAGMA synchronous = 0;")
		(sqlite3:set-busy-handler! db (sqlite3:make-busy-timeout 136000))))
	  (set! *no-sync-db* db)
	  db))))

(define (db:no-sync-set db var val)
  (sqlite3:execute db "INSERT OR REPLACE INTO no_sync_metadat (var,val) VALUES (?,?);" var val))

(define (db:no-sync-del! db var)
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
         (tmpdbfile (dbr:subdb-tmpdbfile subdb))
	 (mtdb      (dbr:subdb-mtdbdat subdb))
         (tmpdb     (db:open-db dbstruct run-id dbinit)) ;; sqlite3-db tmpdbfile #f))
	 (start-t   (current-seconds)))
    (mutex-lock! *db-multi-sync-mutex*)
    (let ((update_info (cons "last_update" (if force-sync 0 *db-last-sync*) )))
      (mutex-unlock! *db-multi-sync-mutex*)
      (db:sync-tables (db:sync-all-tables-list dbstruct keys) update_info tmpdb mtdb))
    (mutex-lock! *db-multi-sync-mutex*)
    (set! *db-last-sync* start-t)
    (set! *db-last-access* start-t)
    (mutex-unlock! *db-multi-sync-mutex*)
    (dbfile:add-dbdat dbstruct run-id tmpdb)
  #t))








|







554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
         (tmpdbfile (dbr:subdb-tmpdbfile subdb))
	 (mtdb      (dbr:subdb-mtdbdat subdb))
         (tmpdb     (db:open-db dbstruct run-id dbinit)) ;; sqlite3-db tmpdbfile #f))
	 (start-t   (current-seconds)))
    (mutex-lock! *db-multi-sync-mutex*)
    (let ((update_info (cons "last_update" (if force-sync 0 *db-last-sync*) )))
      (mutex-unlock! *db-multi-sync-mutex*)
      (db:sync-tables (db:sync-all-tables-list keys) update_info tmpdb mtdb))
    (mutex-lock! *db-multi-sync-mutex*)
    (set! *db-last-sync* start-t)
    (set! *db-last-access* start-t)
    (mutex-unlock! *db-multi-sync-mutex*)
    (dbfile:add-dbdat dbstruct run-id tmpdb)
  #t))

631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
	 '("comment"        #f)
	 '("status"         #f)
	 '("type"           #f)
         '("last_update"    #f))))

;; needs db to get keys, this is for syncing all tables
;;
(define (db:sync-main-list dbstruct keys)
  (let ((keys  keys)) ;; (db:get-keys dbstruct)))
    (list
     (list "keys"
	   '("id"        #f)
	   '("fieldname" #f)
	   '("fieldtype" #f))
     (list "metadat" '("var" #f) '("val" #f))
     (append (list "runs" 







|
|







617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
	 '("comment"        #f)
	 '("status"         #f)
	 '("type"           #f)
         '("last_update"    #f))))

;; needs db to get keys, this is for syncing all tables
;;
(define (db:sync-main-list keys)
  (let ((keys  keys))
    (list
     (list "keys"
	   '("id"        #f)
	   '("fieldname" #f)
	   '("fieldtype" #f))
     (list "metadat" '("var" #f) '("val" #f))
     (append (list "runs" 
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
           '("testpatt"      #f)
           '("keylock"       #f)
           '("params"        #f)
           '("creation_time" #f)
           '("execution_time" #f))
     )))

(define (db:sync-all-tables-list dbstruct keys)
  (append (db:sync-main-list dbstruct keys)
	  db:sync-tests-only))

;; tbls is ( ("tablename" ( "field1" [#f|proc1] ) ( "field2" [#f|proc2] ) .... ) )
;; db's are dbdat's
;;
;; if last-update specified ("field-name" . time-in-seconds)
;;    then sync only records where field-name >= time-in-seconds







|
|







673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
           '("testpatt"      #f)
           '("keylock"       #f)
           '("params"        #f)
           '("creation_time" #f)
           '("execution_time" #f))
     )))

(define (db:sync-all-tables-list keys)
  (append (db:sync-main-list keys)
	  db:sync-tests-only))

;; tbls is ( ("tablename" ( "field1" [#f|proc1] ) ( "field2" [#f|proc2] ) .... ) )
;; db's are dbdat's
;;
;; if last-update specified ("field-name" . time-in-seconds)
;;    then sync only records where field-name >= time-in-seconds
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
                               END;" )
	   (list "update_test_data_trigger"  "CREATE TRIGGER  IF NOT EXISTS update_test_data_trigger AFTER UPDATE ON test_data
                             FOR EACH ROW
                               BEGIN 
                                 UPDATE test_data SET last_update=(strftime('%s','now'))
                                   WHERE id=old.id;
                               END;" )))
;;
;; ADD run-id SUPPORT
;;
(define (db:create-all-triggers dbstruct)
  (db:with-db
   dbstruct #f #f
   (lambda (dbdat db)
     (db:create-triggers db))))

(define (db:create-triggers db)
    (for-each (lambda (key)
              (sqlite3:execute db (cadr key)))
          db:trigger-list))

(define (db:drop-all-triggers dbstruct)
  (db:with-db
   dbstruct #f #f
   (lambda (dbdat db)
     (db:drop-triggers db))))

(define (db:is-trigger-dropped db tbl-name)
  (let* ((trigger-name (if (equal? tbl-name "test_steps")
			   "update_teststeps_trigger" 
                           (conc "update_" tbl-name "_trigger")))
	 (res          #f))
    (sqlite3:for-each-row
     (lambda (name)







<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<







929
930
931
932
933
934
935




















936
937
938
939
940
941
942
                               END;" )
	   (list "update_test_data_trigger"  "CREATE TRIGGER  IF NOT EXISTS update_test_data_trigger AFTER UPDATE ON test_data
                             FOR EACH ROW
                               BEGIN 
                                 UPDATE test_data SET last_update=(strftime('%s','now'))
                                   WHERE id=old.id;
                               END;" )))




















(define (db:is-trigger-dropped db tbl-name)
  (let* ((trigger-name (if (equal? tbl-name "test_steps")
			   "update_teststeps_trigger" 
                           (conc "update_" tbl-name "_trigger")))
	 (res          #f))
    (sqlite3:for-each-row
     (lambda (name)
1010
1011
1012
1013
1014
1015
1016




1017
1018
1019
1020
1021
1022
1023
;;======================================================================

;; call with dbinit=db:initialize-main-db
;;
(define (db:open-db dbstruct run-id dbinit)
  ;; (mutex-lock! *db-open-mutex*)
  (let* ((dbdat (dbfile:open-db dbstruct run-id dbinit)))




    (set! *db-write-access* (not (dbr:dbdat-read-only dbdat)))
    ;; (mutex-unlock! *db-open-mutex*)
    dbdat))

(define dbfile:db-init-proc (make-parameter #f))

;; in xmaxima this gives a curve close to what I want:







>
>
>
>







976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
;;======================================================================

;; call with dbinit=db:initialize-main-db
;;
(define (db:open-db dbstruct run-id dbinit)
  ;; (mutex-lock! *db-open-mutex*)
  (let* ((dbdat (dbfile:open-db dbstruct run-id dbinit)))
              #;(case (rmt:transport-mode)
		  ((http) (dbfile:open-db dbstruct run-id dbinit))
		  ((tcp)  (dbmod:open-db  dbstruct run-id dbinit))
		  (else (assert #f "FATAL: rmt:transport-node not correct value"(rmt:transport-mode))))
    (set! *db-write-access* (not (dbr:dbdat-read-only dbdat)))
    ;; (mutex-unlock! *db-open-mutex*)
    dbdat))

(define dbfile:db-init-proc (make-parameter #f))

;; in xmaxima this gives a curve close to what I want:
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
    crumbn))

(define no-condition-db-with-db (make-parameter #t))

;; (db:with-db dbstruct run-id sqlite3:exec "select blah fgrom blaz;")
;; r/w is a flag to indicate if the db is modified by this query #t = yes, #f = no
;;
(define (db:with-db dbstruct run-id r/w proc . params)
  (assert dbstruct "FATAL: db:with-db called with dbstruct "#f)
  (assert (dbr:dbstruct? dbstruct) "FATAL: dbstruct is "dbstruct)
  (let* ((use-mutex (> *api-process-request-count* 25)) ;; risk of db corruption
	 (have-struct (dbr:dbstruct? dbstruct))
         (dbdat     (if have-struct                ;; this stuff just allows us to call with a db handle directly
			(db:open-db dbstruct run-id (dbfile:db-init-proc)) ;; (dbfile:get-subdb dbstruct run-id)
			#f))







|







1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
    crumbn))

(define no-condition-db-with-db (make-parameter #t))

;; (db:with-db dbstruct run-id sqlite3:exec "select blah fgrom blaz;")
;; r/w is a flag to indicate if the db is modified by this query #t = yes, #f = no
;;
(define (dbfile:with-db dbstruct run-id r/w proc params)
  (assert dbstruct "FATAL: db:with-db called with dbstruct "#f)
  (assert (dbr:dbstruct? dbstruct) "FATAL: dbstruct is "dbstruct)
  (let* ((use-mutex (> *api-process-request-count* 25)) ;; risk of db corruption
	 (have-struct (dbr:dbstruct? dbstruct))
         (dbdat     (if have-struct                ;; this stuff just allows us to call with a db handle directly
			(db:open-db dbstruct run-id (dbfile:db-init-proc)) ;; (dbfile:get-subdb dbstruct run-id)
			#f))
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
	 (stmth       (hash-table-ref/default stmt-cache stmt #f)))
    (or stmth
	(let* ((newstmth (sqlite3:prepare db stmt)))
	  ;; (db:hoh-set! stmt-cache db stmt newstmth)
	  (hash-table-set! stmt-cache stmt newstmth)
	  newstmth))))

(define (db:have-incompletes? dbstruct run-id ovr-deadtime)
  (let* ((incompleted '())
	 (oldlaunched '())
	 (toplevels   '())
	 ;; (deadtime-str (configf:lookup *configdat* "setup" "deadtime"))
	 (deadtime  (or ovr-deadtime 72000))) ;; twenty hours
    (db:with-db
     dbstruct run-id #f
     (lambda (dbdat db)
       
       ;; in RUNNING or REMOTEHOSTSTART for more than 10 minutes
       ;;
       ;; HOWEVER: this code in run:test seems to work fine
       ;;              (> (- (current-seconds)(+ (db:test-get-event_time testdat)
       ;;                     (db:test-get-run_duration testdat)))
       ;;                    600) 
       ;; (db:delay-if-busy dbdat)
       (sqlite3:for-each-row 
        (lambda (test-id run-dir uname testname item-path)
          (if (and (equal? uname "n/a")
                   (equal? item-path "")) ;; this is a toplevel test
              ;; what to do with toplevel? call rollup?
              (begin
                (set! toplevels   (cons (list test-id run-dir uname testname item-path run-id) toplevels)))
                ;; (print-info "Found old toplevel test in RUNNING state, test-id=" test-id))
              (set! incompleted (cons (list test-id run-dir uname testname item-path run-id) incompleted))))
        (db:get-cache-stmth dbdat db
        "SELECT id,rundir,uname,testname,item_path FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > (run_duration + ?) AND state IN ('RUNNING','REMOTEHOSTSTART');")
        run-id deadtime)

       ;; in LAUNCHED for more than one day. Could be long due to job queues TODO/BUG: Need override for this in config
       ;;
       ;; (db:delay-if-busy dbdat)
       (sqlite3:for-each-row
        (lambda (test-id run-dir uname testname item-path)
          (if (and (equal? uname "n/a")
                   (equal? item-path "")) ;; this is a toplevel test
              ;; what to do with toplevel? call rollup?
              (set! toplevels   (cons (list test-id run-dir uname testname item-path run-id) toplevels))
              (set! oldlaunched (cons (list test-id run-dir uname testname item-path run-id) oldlaunched))))
        (db:get-cache-stmth dbdat db
        "SELECT id,rundir,uname,testname,item_path FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > 86400 AND state IN ('LAUNCHED');")
        run-id)
       
       ;; (print-info "Found " (length oldlaunched) " old LAUNCHED items, " (length toplevels) " old LAUNCHED toplevel tests and " (length incompleted) " tests marked RUNNING but apparently dead.")
       (if (and (null? incompleted)
                (null? oldlaunched)
                (null? toplevels))
           #f
           #t)))))


)







<
<
<
<
<
<
<
<
<
|
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<

<
<
<
<
<
<
<
<
<
<
<
<
<
|
<
<
<
<
<
<
<
<
<
1236
1237
1238
1239
1240
1241
1242









1243



















1244













1245









	 (stmth       (hash-table-ref/default stmt-cache stmt #f)))
    (or stmth
	(let* ((newstmth (sqlite3:prepare db stmt)))
	  ;; (db:hoh-set! stmt-cache db stmt newstmth)
	  (hash-table-set! stmt-cache stmt newstmth)
	  newstmth))))












































)









Modified dbmod.scm from [043beb90c3] to [2dfee57e39].

15
16
17
18
19
20
21



22
23
24
25
26




27

28


29




























































30
31






32
33













34





35




















36




37
38
39
40
















41
















































42
43



44


45
46






47
48
49



50



51





























52





53









54




















55
56
;; 
;;     You should have received a copy of the GNU General Public License
;;     along with Megatest.  If not, see <http://www.gnu.org/licenses/>.

;;======================================================================

(declare (unit dbmod))




(module dbmod
	*
	
(import scheme chicken data-structures extras)




(import (prefix sqlite3 sqlite3:)

	posix typed-records srfi-18


	srfi-69)





























































(define (db:run-id->dbname run-id)






  (cond
   ((number? run-id)(conc run-id ".db"))













   ((not run-id)    "main.db")





   (else            run-id)))


























;;======================================================================
;; hash of hashs
;;======================================================================


































































(define (db:hoh-set! dat key1 key2 val)



  (let* ((subhash (hash-table-ref/default dat key1 #f)))


    (if subhash
	(hash-table-set! subhash key2 val)






	(begin
	  (hash-table-set! dat key1 (make-hash-table))
	  (db:hoh-set! dat key1 key2 val)))))







(define (db:hoh-get dat key1 key2)





























  (let* ((subhash (hash-table-ref/default dat key1 #f)))





    (and subhash









	 (hash-table-ref/default subhash key2 #f))))





















)







>
>
>




|
>
>
>
>
|
>
|
>
>
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>

|
>
>
>
>
>
>
|
|
>
>
>
>
>
>
>
>
>
>
>
>
>
|
>
>
>
>
>
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>

>
>
>
>


|

>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>

>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
<
>
>
>
|
>
>
|
|
>
>
>
>
>
>
|
|
|
>
>
>
|
>
>
>
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
>
>
>
>
>
|
>
>
>
>
>
>
>
>
>
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>


15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224

225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
;; 
;;     You should have received a copy of the GNU General Public License
;;     along with Megatest.  If not, see <http://www.gnu.org/licenses/>.

;;======================================================================

(declare (unit dbmod))
(declare (uses dbfile))
(declare (uses commonmod))
(declare (uses debugprint))

(module dbmod
	*
	
(import scheme
	chicken
	data-structures
	extras

	(prefix sqlite3 sqlite3:)
	posix
	typed-records
	srfi-1
	srfi-18
	srfi-69

	commonmod
	dbfile
	debugprint
	)

;; NOTE: This returns only the name "1.db", "main.db", not the path
;;
(define (dbmod:run-id->dbfname run-id)
  (conc (dbfile:run-id->dbnum run-id)".db"))

(define (dbmod:get-dbdir dbstruct)
  (let* ((areapath (dbr:dbstruct-areapath dbstruct))
	 (dbdir    (conc areapath"/.megatest")))
    (if (and (file-write-access? areapath)
	     (not (file-exists? dbdir)))
	(create-directory dbdir))
    dbdir))

(define (dbmod:run-id->full-dbfname dbstruct run-id)
  (conc (dbmod:get-dbdir dbstruct run-id)"/"(dbmod:run-id->dbfname run-id)))

;;======================================================================
;; Read-only inmem cached direct from disk method
;;======================================================================

(define *dbmod:nfs-db-handles* (make-hash-table)) ;; dbfname -> dbstruct

(define (dbmod:nfs-get-dbstruct run-id keys init-proc areapath)
  (assert areapath "FATAL: dbmod:nfs-get-dbstruct called without areapath set.")
  (let* ((dbfname  (dbmod:run-id->dbfname run-id))
	 (dbstruct (hash-table-ref/default *dbmod:nfs-db-handles* dbfname #f)))
    (if dbstruct
	(let* ((last-update (dbr:dbstruct-last-update dbstruct))
	       (curr-secs   (current-seconds)))
	  (if (> (- curr-secs last-update) 2)
	      (begin
		((dbr:dbstruct-sync-proc dbstruct) last-update)
		(dbr:dbstruct-last-update-set! dbstruct curr-secs)))
	  dbstruct)
	(let* ((newdbstruct (dbmod:open-dbmoddb areapath run-id dbfname init-proc keys syncdir: 'fromdisk)))
	  (hash-table-set! *dbmod:nfs-db-handles* dbfname newdbstruct)
	  newdbstruct))))

;;======================================================================
;; The inmem one-db file per server method goes in here
;;======================================================================

(define (dbmod:with-db dbstruct run-id r/w proc params)
  (let* ((dbdat  (dbmod:open-db dbstruct run-id (dbfile:db-init-proc)))
	 (dbh    (dbr:dbdat-dbh dbdat))
	 (dbfile (dbr:dbdat-dbfile dbdat)))
    (apply proc dbdat dbh params)))

(define (dbmod:open-inmem-db initproc)
  (let* ((db      (sqlite3:open-database ":memory:"))
	 (handler (sqlite3:make-busy-timeout 3600)))
    (sqlite3:set-busy-handler! db handler)
    (initproc db)
    db))

(define (dbmod:open-db dbstruct run-id dbinit)
  (or (dbr:dbstruct-dbdat dbstruct)
      (let* ((dbdat (make-dbr:dbdat
		     dbfile: (dbr:dbstruct-dbfile dbstruct)
		     dbh:    (dbr:dbstruct-inmem  dbstruct)
		     )))
	(dbr:dbstruct-dbdat-set! dbstruct dbdat)
	dbdat)))

;; Open the inmem db and the on-disk db
;; populate the inmem db with data
;;
;; Updates fields in dbstruct
;; Returns dbstruct
;;
;; * This routine creates the db if not found
;; * Probably can get rid of the dbstruct-in
;; 
(define (dbmod:open-dbmoddb areapath run-id dbfname-in init-proc keys
			    #!key (dbstruct-in #f)
			    (syncdir 'todisk))
  (let* ((dbstruct     (or dbstruct-in (make-dbr:dbstruct areapath: areapath)))
	 (dbfname      (or dbfname-in (dbmod:run-id->dbfname run-id)))
	 (dbpath       (dbmod:get-dbdir dbstruct))             ;; directory where all the .db files are kept
	 (dbfullname   (conc dbpath"/"dbfname)) ;; (dbmod:run-id->full-dbfname dbstruct run-id))
	 (dbexists     (file-exists? dbfullname))
	 (inmem        (dbmod:open-inmem-db init-proc))
	 (write-access (file-write-access? dbpath))
	 (db           (dbfile:with-simple-file-lock
			(conc dbfullname".lock")
			(lambda ()
			  (let* ((db      (sqlite3:open-database dbfullname))
				 (handler (sqlite3:make-busy-timeout 136000)))
			    (sqlite3:set-busy-handler! db handler)
			    (if write-access
				(init-proc db))
			    db))))
	 (tables       (db:sync-all-tables-list keys)))
    (dbr:dbstruct-inmem-set!    dbstruct inmem)
    (dbr:dbstruct-ondiskdb-set! dbstruct db)
    (dbr:dbstruct-dbfile-set!   dbstruct dbfullname)
    (dbr:dbstruct-sync-proc-set! dbstruct
				 (lambda (last-update)
				   (if (eq? syncdir 'todisk) ;; sync to disk normally, sync from in dashboard
				       (dbmod:sync-tables tables last-update inmem db)
				       (dbmod:sync-tables tables last-update db inmem))))
    (dbmod:sync-tables tables #f db inmem)
    (dbr:dbstruct-last-update-set! dbstruct (current-seconds)) ;; should this be offset back in time by one second?
    dbstruct))

(define (dbmod:close-db dbstruct)
  ;; do final sync to disk file
  ;; (do-sync ...)
  (sqlite3:finalize! (dbr:dbstruct-ondiskdb dbstruct)))

;;======================================================================
;; Sync db
;;======================================================================

(define (dbmod:calc-use-last-update has-last-update fields last-update)
  (cond
   ((and has-last-update
	 (member "last_update" fields))
    #t) ;; if given a number, just use it for all fields
   ((number? last-update) #f) ;; if not matched first entry then ignore last-update for this table
   ((and (pair? last-update)
	 (member (car last-update)    ;; last-update field name
		 (map car fields)))
    #t)
   ((and last-update (not (pair? last-update)) (not (number? last-update)))
    (debug:print 0 *default-log-port* "ERROR: parameter last-update for db:sync-tables must be a pair or a number, received: " last-update);; found in fields
    #f)
   (else
    #f)))

;; tbls is ( ("tablename" ( "field1" [#f|proc1] ) ( "field2" [#f|proc2] ) .... ) )
;; dbs are sqlite3 db handles
;;
;; if last-update specified ("field-name" . time-in-seconds)
;;    then sync only records where field-name >= time-in-seconds
;;    IFF field-name exists
;;
;; Use (db:sync-all-tables-list keys) to get the tbls input
;;
(define (dbmod:sync-tables tbls last-update fromdb todb)
  (let ((stmts       (make-hash-table)) ;; table-field => stmt
	(all-stmts   '())              ;; ( ( stmt1 value1 ) ( stml2 value2 ))
	(numrecs     (make-hash-table))
	(start-time  (current-milliseconds))
	(tot-count   0))
    (for-each ;; table
     (lambda (tabledat)
       (let* ((tablename        (car tabledat))
	      (fields           (cdr tabledat))
	      (has-last-update  (member "last_update" fields))
	      (use-last-update  (dbmod:calc-use-last-update has-last-update fields last-update))
	      (last-update-value (if use-last-update ;; no need to check for has-last-update - it is already accounted for
				     (if (number? last-update)
					 last-update
					 (cdr last-update))
				     #f))
	      (last-update-field (if use-last-update
				     (if (number? last-update)
					 "last_update"
					 (car last-update))
				     #f))
	      (num-fields (length fields))
	      (field->num (make-hash-table))
	      (num->field (apply vector (map car fields))) ;; BBHERE
	      (full-sel   (conc "SELECT " (string-intersperse (map car fields) ",") 
				" FROM " tablename (if use-last-update ;; apply last-update criteria
						       (conc " WHERE " last-update-field " >= " last-update-value)
						       "")
				";"))
	      (full-ins   (conc "INSERT OR REPLACE INTO " tablename " ( " (string-intersperse (map car fields) ",") " ) "
				" VALUES ( " (string-intersperse (make-list num-fields "?") ",") " );"))
	      (fromdat    '())
	      (fromdats   '())
	      (totrecords 0)
	      (batch-len  100) ;; (string->number (or (configf:lookup *configdat* "sync" "batchsize") "100")))
	      (todat      (make-hash-table))
	      (count      0)
              (field-names (map car fields)))
	 

	 ;; set up the field->num table
	 (for-each
	  (lambda (field)
	    (hash-table-set! field->num field count)
	    (set! count (+ count 1)))
	  fields)
	 
	 ;; read the source table
         ;; store a list of all rows in the table in fromdat, up to batch-len.
         ;; Then add fromdat to the fromdats list, clear fromdat and repeat.
	 (sqlite3:for-each-row
	  (lambda (a . b)
	       (set! fromdat (cons (apply vector a b) fromdat))
	       (if (> (length fromdat) batch-len)
		   (begin
		     (set! fromdats (cons fromdat fromdats))
		     (set! fromdat  '())
		     (set! totrecords (+ totrecords 1)))))
	  fromdb
	  full-sel)
	 
         ;; Count less than batch-len as a record
         (if (> (length fromdat) 0)
             (set! totrecords (+ totrecords 1)))
	 
	 ;; tack on remaining records in fromdat
	 (if (not (null? fromdat))
	     (set! fromdats (cons fromdat fromdats)))
	 
	 (sqlite3:for-each-row
	  (lambda (a . b)
	    (hash-table-set! todat a (apply vector a b)))
	  todb
	  full-sel)
	 
	 ;; first pass implementation, just insert all changed rows
	 (let* ((db                 todb)
                (drp-trigger        (if (member "last_update" field-names)
					(db:drop-trigger db tablename) 
					#f))
		(has-last-update    (member "last_update" field-names))
                (is-trigger-dropped (if has-last-update
                                        (db:is-trigger-dropped db tablename)
					#f)) 
		(stmth  (sqlite3:prepare db full-ins))
                (changed-rows 0))
	   (for-each
	    (lambda (fromdat-lst)
	      (sqlite3:with-transaction
	       db
	       (lambda ()
		 (for-each ;; 
		  (lambda (fromrow)
		    (let* ((a    (vector-ref fromrow 0))
			   (curr (hash-table-ref/default todat a #f))
			   (same #t))
		      (let loop ((i 0))
			(if (or (not curr)
				(not (equal? (vector-ref fromrow i)(vector-ref curr i))))
			    (set! same #f))
			(if (and same
				 (< i (- num-fields 1)))
			    (loop (+ i 1))))
		      (if (not same)
			  (begin
			    (apply sqlite3:execute stmth (vector->list fromrow))
			    (hash-table-set! numrecs tablename (+ 1 (hash-table-ref/default numrecs tablename 0)))
                            (set! changed-rows (+ changed-rows 1))))))
		  fromdat-lst))))
	    fromdats)
	   
	   (sqlite3:finalize! stmth)
           (if (member "last_update" field-names)
               (db:create-trigger db tablename)))))
     tbls)
    (let* ((runtime      (- (current-milliseconds) start-time))
	   (should-print (or ;; (debug:debug-mode 12)
			  (common:low-noise-print 120 "db sync")
			  (> runtime 500)))) ;; low and high sync times treated as separate.
      (for-each 
       (lambda (dat)
	 (let ((tblname (car dat))
	       (count   (cdr dat)))
	   (set! tot-count (+ tot-count count)))) 
       (sort (hash-table->alist numrecs)(lambda (a b)(> (cdr a)(cdr b))))))
    tot-count))

;;======================================================================
;; Moved from dbfile
;;======================================================================


)

Modified dcommon.scm from [2cc987e965] to [4b1e45fd47].

706
707
708
709
710
711
712

713

714
715
716
717
718
719
720
				     #:numcol 7
				     #:numcol-visible 7
				     #:numlin-visible 5
				     ))
	 (colnames       (list "Id" "MTver" "Pid" "Host" "Interface:OutPort" "RunTime" "State" "RunId"))
	 (updater        (lambda ()
			   (if (dashboard:monitor-changed? commondat tabdat)

			       (let ((servers  (server:choose-server *toppath* 'all-valid))) ;; (server:get-list *toppath* limit: 10)))

				 (iup:attribute-set! servers-matrix "NUMLIN" (length servers))
				 ;; (set! colnum 0)
				 ;; (for-each (lambda (colname)
				 ;;    	 ;; (print "colnum: " colnum " colname: " colname)
				 ;;    	 (iup:attribute-set! servers-matrix (conc "0:" colnum) colname)
				 ;;    	 (set! colnum (+ 1 colnum)))
				 ;;           colnames)







>
|
>







706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
				     #:numcol 7
				     #:numcol-visible 7
				     #:numlin-visible 5
				     ))
	 (colnames       (list "Id" "MTver" "Pid" "Host" "Interface:OutPort" "RunTime" "State" "RunId"))
	 (updater        (lambda ()
			   (if (dashboard:monitor-changed? commondat tabdat)
			       (let ((servers  (case (rmt:transport-mode)
						 ((http)(server:choose-server *toppath* 'all-valid)) ;; (server:get-list *toppath* limit: 10)))
						 (else '()))))
				 (iup:attribute-set! servers-matrix "NUMLIN" (length servers))
				 ;; (set! colnum 0)
				 ;; (for-each (lambda (colname)
				 ;;    	 ;; (print "colnum: " colnum " colname: " colname)
				 ;;    	 (iup:attribute-set! servers-matrix (conc "0:" colnum) colname)
				 ;;    	 (set! colnum (+ 1 colnum)))
				 ;;           colnames)

Modified launch.scm from [a705b2e0b2] to [c865f0bf0e].

31
32
33
34
35
36
37

38
39
40
41
42
43
44

45
46
47
48
49
50
51
(declare (unit launch))
(declare (uses subrun))
(declare (uses common))
(declare (uses commonmod))
(declare (uses configf))
(declare (uses db))
(declare (uses ezsteps))


(include "common_records.scm")
(include "key_records.scm")
(include "db_records.scm")
(include "megatest-fossil-hash.scm")

(import commonmod)


;;======================================================================
;; ezsteps
;;======================================================================

;; ezsteps were going to be coded as
;; stepname[,predstep1,predstep2 ...] [{VAR1=first,second,third}] command to execute







>






|
>







31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
(declare (unit launch))
(declare (uses subrun))
(declare (uses common))
(declare (uses commonmod))
(declare (uses configf))
(declare (uses db))
(declare (uses ezsteps))
(declare (uses dbfile))

(include "common_records.scm")
(include "key_records.scm")
(include "db_records.scm")
(include "megatest-fossil-hash.scm")

(import commonmod
	dbfile)

;;======================================================================
;; ezsteps
;;======================================================================

;; ezsteps were going to be coded as
;; stepname[,predstep1,predstep2 ...] [{VAR1=first,second,third}] command to execute
1141
1142
1143
1144
1145
1146
1147
1148



1149
1150
1151
1152
1153
1154
1155
	    (begin
	      (setenv "MT_RUN_AREA_HOME" *toppath*)
	      (setenv "MT_TESTSUITENAME" (common:get-testsuite-name)))
	    (begin
	      (debug:print-error 0 *default-log-port* "failed to find the top path to your Megatest area.")
	      (set! *toppath* #f) ;; force it to be false so we return #f
	      #f))
	



        ;; one more attempt to cache the configs for future reading
        (let* ((cachefiles   (launch:get-cache-file-paths areapath toppath target mtconfig))
               (mtcachef     (car cachefiles))
               (rccachef     (cdr cachefiles)))

          ;; trap exception due to stale NFS handle -- Error: (open-output-file) cannot open file - Stale NFS file handle: "...somepath.../.runconfigs.cfg-1.6427-7d1e789cb3f62f9cde719a4865bb51b3c17ea853" - ticket 220546342
          ;; TODO - consider 1) using simple-lock to bracket cache write







|
>
>
>







1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
	    (begin
	      (setenv "MT_RUN_AREA_HOME" *toppath*)
	      (setenv "MT_TESTSUITENAME" (common:get-testsuite-name)))
	    (begin
	      (debug:print-error 0 *default-log-port* "failed to find the top path to your Megatest area.")
	      (set! *toppath* #f) ;; force it to be false so we return #f
	      #f))

	;; needed by various transport and db modules
	(dbfile:testsuite-name (common:get-testsuite-name)) ;; (get-testsuite-name *toppath* *configdat*))

        ;; one more attempt to cache the configs for future reading
        (let* ((cachefiles   (launch:get-cache-file-paths areapath toppath target mtconfig))
               (mtcachef     (car cachefiles))
               (rccachef     (cdr cachefiles)))

          ;; trap exception due to stale NFS handle -- Error: (open-output-file) cannot open file - Stale NFS file handle: "...somepath.../.runconfigs.cfg-1.6427-7d1e789cb3f62f9cde719a4865bb51b3c17ea853" - ticket 220546342
          ;; TODO - consider 1) using simple-lock to bracket cache write
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
			   (write (list (list 'testpath  test-path)
					;; (list 'transport (conc *transport-type*))
					;; (list 'serverinf *server-info*)
					#;(list 'homehost  (let* ((hhdat (server:get-homehost)))
							   (if hhdat
							       (car hhdat)
							       #f)))
					(list 'serverurl (if *runremote*
							     (remote-server-url *runremote*)
							     #f)) ;;
					(list 'areaname  (common:get-testsuite-name))
					(list 'toppath   *toppath*)
					(list 'work-area work-area)
					(list 'test-name test-name) 
					(list 'runscript runscript) 







|







1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
			   (write (list (list 'testpath  test-path)
					;; (list 'transport (conc *transport-type*))
					;; (list 'serverinf *server-info*)
					#;(list 'homehost  (let* ((hhdat (server:get-homehost)))
							   (if hhdat
							       (car hhdat)
							       #f)))
					#;(list 'serverurl (if *runremote* ;; would like to add this back ... WORK NEEDED
							     (remote-server-url *runremote*)
							     #f)) ;;
					(list 'areaname  (common:get-testsuite-name))
					(list 'toppath   *toppath*)
					(list 'work-area work-area)
					(list 'test-name test-name) 
					(list 'runscript runscript) 

Modified margs.scm from [cc1616820d] to [7a5ab19394].

21
22
23
24
25
26
27










28
29
30
31
32
33
34

(define args:arg-hash (make-hash-table))

(define (args:get-arg arg . default)
  (if (null? default)
      (hash-table-ref/default args:arg-hash arg #f)
      (hash-table-ref/default args:arg-hash arg (car default))))











(define (args:any? . args)
  (not (null? (filter (lambda (x) x)
		      (map args:get-arg args)))))

(define (args:get-arg-from ht arg . default)
  (if (null? default)







>
>
>
>
>
>
>
>
>
>







21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44

(define args:arg-hash (make-hash-table))

(define (args:get-arg arg . default)
  (if (null? default)
      (hash-table-ref/default args:arg-hash arg #f)
      (hash-table-ref/default args:arg-hash arg (car default))))

;; get an arg as a number
(define (args:get-arg-number arg . default)
  (let* ((val-str (args:get-arg arg))
	 (val     (if val-str (string->number val-str) #f)))
    (if val
	val
	(if (null? default)
	    #f
	    default))))

(define (args:any? . args)
  (not (null? (filter (lambda (x) x)
		      (map args:get-arg args)))))

(define (args:get-arg-from ht arg . default)
  (if (null? default)

Modified megatest.scm from [79d9696058] to [f617118e28].

21
22
23
24
25
26
27








28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60


61
62


63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84




85
86
87
88
89
90
91

;; fake out readline usage of toplevel-command
(define (toplevel-command . a) #f)

(declare (uses common))
;; (declare (uses megatest-version))
(declare (uses margs))








(declare (uses runs))
(declare (uses launch))
(declare (uses server))
(declare (uses client))
(declare (uses tests))
(declare (uses genexample))
;; (declare (uses daemon))

(declare (uses db))
;; (declare (uses dcommon))

(declare (uses tdb))
(declare (uses mt))
(declare (uses api))
(declare (uses tasks)) ;; only used for debugging.
(declare (uses env))
(declare (uses diff-report))
(declare (uses db))
(declare (uses dbmod))
(declare (uses dbmod.import))
(declare (uses commonmod))
(declare (uses commonmod.import))
(declare (uses dbfile))
(declare (uses dbfile.import))
;; (declare (uses debugprint))
;; (declare (uses debugprint.import))
;; (declare (uses mtargs))
;; (declare (uses mtargs.import))

;; (declare (uses ftail))
;; (import ftail)

(import dbmod


	commonmod
	dbfile)



(define *db* #f) ;; this is only for the repl, do not use in general!!!!

(include "common_records.scm")
(include "key_records.scm")
(include "db_records.scm")
(include "run_records.scm")
(include "megatest-fossil-hash.scm")

(use (prefix sqlite3 sqlite3:) srfi-1 posix regex regex-case srfi-69 (prefix base64 base64:)
     readline apropos json http-client directory-utils typed-records
     http-client srfi-18 extras format)

;; Added for csv stuff - will be removed
;;
(use sparse-vectors)

(require-library mutils)

(define *usage-log-file* #f)    ;; put path to file for logging usage in this var in the ~/.megatestrc file
(define *usage-use-seconds* #t) ;; for Epoc seconds in usage logging change this to #t in ~/.megatestrc file





(dbfile:db-init-proc db:initialize-main-db)

;; load the ~/.megatestrc file, put (use trace)(trace-call-sites #t)(trace function-you-want-to-trace) in this file
;;
(let ((debugcontrolf (conc (get-environment-variable "HOME") "/.megatestrc")))
  (if (common:file-exists? debugcontrolf)
      (load debugcontrolf)))







>
>
>
>
>
>
>
>


















|
|
|
|
|
|


<
<




|
>
>

|
>
>











|










>
>
>
>







21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61


62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105

;; fake out readline usage of toplevel-command
(define (toplevel-command . a) #f)

(declare (uses common))
;; (declare (uses megatest-version))
(declare (uses margs))
(declare (uses mtargs))
(declare (uses mtargs.import))
(declare (uses commonmod))
(declare (uses commonmod.import))
(declare (uses mtargs))
(declare (uses mtargs.import))
(declare (uses debugprint))
(declare (uses debugprint.import))
(declare (uses runs))
(declare (uses launch))
(declare (uses server))
(declare (uses client))
(declare (uses tests))
(declare (uses genexample))
;; (declare (uses daemon))

(declare (uses db))
;; (declare (uses dcommon))

(declare (uses tdb))
(declare (uses mt))
(declare (uses api))
(declare (uses tasks)) ;; only used for debugging.
(declare (uses env))
(declare (uses diff-report))
(declare (uses db))
(declare (uses dbfile))
(declare (uses dbfile.import))
(declare (uses dbmod))
(declare (uses dbmod.import))
(declare (uses tcp-transportmod))
(declare (uses tcp-transportmod.import))
;; (declare (uses debugprint))
;; (declare (uses debugprint.import))



;; (declare (uses ftail))
;; (import ftail)

(import mtargs
        debugprint
	dbmod
	commonmod
	dbfile
	tcp-transportmod
        )

(define *db* #f) ;; this is only for the repl, do not use in general!!!!

(include "common_records.scm")
(include "key_records.scm")
(include "db_records.scm")
(include "run_records.scm")
(include "megatest-fossil-hash.scm")

(use (prefix sqlite3 sqlite3:) srfi-1 posix regex regex-case srfi-69 (prefix base64 base64:)
     readline apropos json http-client directory-utils typed-records
     http-client srfi-18 extras format tcp-server tcp)

;; Added for csv stuff - will be removed
;;
(use sparse-vectors)

(require-library mutils)

(define *usage-log-file* #f)    ;; put path to file for logging usage in this var in the ~/.megatestrc file
(define *usage-use-seconds* #t) ;; for Epoc seconds in usage logging change this to #t in ~/.megatestrc file

;; set some parameters here - these need to be put in something that can be loaded from other
;; executables such as dashboard and mtutil
;;
(include "transport-mode.scm")
(dbfile:db-init-proc db:initialize-main-db)

;; load the ~/.megatestrc file, put (use trace)(trace-call-sites #t)(trace function-you-want-to-trace) in this file
;;
(let ((debugcontrolf (conc (get-environment-variable "HOME") "/.megatestrc")))
  (if (common:file-exists? debugcontrolf)
      (load debugcontrolf)))
371
372
373
374
375
376
377

378
379
380
381
382
383
384
			"-override-timeout"
			"-test-files"  ;; -test-paths is for listing all
			"-load"        ;; load and exectute a scheme file
			"-section"
			"-var"
			"-dumpmode"
			"-run-id"

			"-ping"
			"-refdb2dat"
			"-o"
			"-log"
                        "-sync-log"
			"-since"
			"-fields"







>







385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
			"-override-timeout"
			"-test-files"  ;; -test-paths is for listing all
			"-load"        ;; load and exectute a scheme file
			"-section"
			"-var"
			"-dumpmode"
			"-run-id"
			"-db"
			"-ping"
			"-refdb2dat"
			"-o"
			"-log"
                        "-sync-log"
			"-since"
			"-fields"
583
584
585
586
587
588
589
590
591
592
593

594
595
596
597
598
599
600
601
602
;; this segment will run launch:setup only if -log is not set. This is fairly safe as servers are not
;; manually started and thus should never be started in a non-megatest area. Thus no need to handle situation
;; where (launch:setup) returns #f?
;;
(if (or (args:get-arg "-log")(args:get-arg "-server")) ;; redirect the log always when a server
    (handle-exceptions
	exn
	(begin
	  (print "ERROR: Failed to switch to log output. " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn)
	  )
      (let* ((tl   (or (args:get-arg "-log")(launch:setup)))   ;; run launch:setup if -server, ensure we do NOT run launch:setup if -log specified

	     (logf (or (args:get-arg "-log") ;; use -log unless we are a server, then craft a logfile name
		       (conc tl "/logs/server-" (current-process-id) "-" (get-host-name) ".log")))
	     (oup  (open-logfile logf)))
	(if (not (args:get-arg "-log"))
	    (hash-table-set! args:arg-hash "-log" logf)) ;; fake out future queries of -log
	(debug:print-info 0 *default-log-port* "Sending log output to " logf)
	(set! *default-log-port* oup))))

(if (or (args:get-arg "-h")







|
|
<

>

|







598
599
600
601
602
603
604
605
606

607
608
609
610
611
612
613
614
615
616
617
;; this segment will run launch:setup only if -log is not set. This is fairly safe as servers are not
;; manually started and thus should never be started in a non-megatest area. Thus no need to handle situation
;; where (launch:setup) returns #f?
;;
(if (or (args:get-arg "-log")(args:get-arg "-server")) ;; redirect the log always when a server
    (handle-exceptions
	exn
      (begin
	(print "ERROR: Failed to switch to log output. " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn))

      (let* ((tl   (or (args:get-arg "-log")(launch:setup)))   ;; run launch:setup if -server, ensure we do NOT run launch:setup if -log specified
	     (dbname (args:get-arg "-db"))   ;; for the server logfile name
	     (logf (or (args:get-arg "-log") ;; use -log unless we are a server, then craft a logfile name
		       (conc tl "/logs/server-"(or dbname "unk")"-"(current-process-id) "-" (get-host-name) ".log")))
	     (oup  (open-logfile logf)))
	(if (not (args:get-arg "-log"))
	    (hash-table-set! args:arg-hash "-log" logf)) ;; fake out future queries of -log
	(debug:print-info 0 *default-log-port* "Sending log output to " logf)
	(set! *default-log-port* oup))))

(if (or (args:get-arg "-h")
919
920
921
922
923
924
925


926
927


928








929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944






945
946
947
948
949
950
951
952
953

954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
;; Start the server - can be done in conjunction with -runall or -runtests (one day...)
;;   we start the server if not running else start the client thread
;;======================================================================

;; Server? Start up here.
;;
(if (args:get-arg "-server")


    (let ((tl        (launch:setup)))
      ;; (server:launch 0 'http)


      (http-transport:launch)








      (set! *didsomething* #t)))

;; The adjutant is a bit different, it does NOT run (launch:setup) as it is not necessarily tied to
;; a specific Megatest area. Detail are being hashed out and this may change.
;;
(if (args:get-arg "-adjutant")
    (begin
      (adjutant-run)
      (set! *didsomething* #t)))

(if (or (args:get-arg "-list-servers")
        (args:get-arg "-kill-servers"))
    (let ((tl (launch:setup)))
      (if tl ;; all roads from here exit
	  (let* ((servers (server:choose-server *toppath* 'all-valid)) ;; (server:get-list *toppath*))
		 (fmtstr  "~33a~22a~20a~20a~8a\n"))






	    (format #t fmtstr "ID" "host:port" "age (hms)" "Last mod" "State")
	    (format #t fmtstr "==" "=========" "=========" "========" "=====")
	    (for-each ;;  ( mod-time host port start-time pid )
	     (lambda (server)
	       (let* ((mtm (any->number (car server)))
		      (mod (if mtm (- (current-seconds) mtm) "unk"))
		      (age (- (current-seconds)(or (any->number (list-ref server 3)) (current-seconds))))
		      (url (conc (cadr server) ":" (caddr server)))
		      (pid (list-ref server 4))

		      (alv (if (number? mod)(< mod 10) #f)))
		 (format #t
			 fmtstr
			 pid
			 url
			 (seconds->hr-min-sec age)
			 (seconds->hr-min-sec mod)
			 (if alv "alive" "dead"))
		 (if (and alv
			  (args:get-arg "-kill-servers"))
		     (begin
		       (debug:print-info 0 *default-log-port* "Attempting to kill server with pid " pid)
		       (server:kill server)))))
	     (sort servers (lambda (a b)
			     (let ((ma (or (any->number (car a)) 9e9))
				   (mb (or (any->number (car b)) 9e9)))
			       (> ma mb)))))
	    ;; (debug:print-info 1 *default-log-port* "Done with listservers")
	    (set! *didsomething* #t)
	    (exit))
	  (exit))))
      ;; must do, would have to add checks to many/all calls below

;;======================================================================
;; Weird special calls that need to run *after* the server has started?







>
>
|
<
>
>
|
>
>
>
>
>
>
>
>
















>
>
>
>
>
>
|
|
|

|

|
<

>
|
















<







934
935
936
937
938
939
940
941
942
943

944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983

984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002

1003
1004
1005
1006
1007
1008
1009
;; Start the server - can be done in conjunction with -runall or -runtests (one day...)
;;   we start the server if not running else start the client thread
;;======================================================================

;; Server? Start up here.
;;
(if (args:get-arg "-server")
    (let* (;; (run-id     (args:get-arg "-run-id"))
	   (dbfname    (args:get-arg "-db"))
	   (tl         (launch:setup))

	   (keys       (keys:config-get-fields *configdat*)))
      (case (rmt:transport-mode)
	((http)(http-transport:launch))
	((tcp)
	 (debug:print 0 *default-log-port* "INFO: Running using tcp method.")
	 (if dbfname
	     (tt:start-server tl #f dbfname api:tcp-dispatch-request-make-handler keys)
	     (begin
	       (debug:print 0 *default-log-port* "ERROR: transport mode is tcp - -db is required.")
	       (exit 1))))
	(else (debug:print 0 *default-log-port* "ERROR: rmt:transport-mode value not recognised "(rmt:transport-mode))))
      (set! *didsomething* #t)))

;; The adjutant is a bit different, it does NOT run (launch:setup) as it is not necessarily tied to
;; a specific Megatest area. Detail are being hashed out and this may change.
;;
(if (args:get-arg "-adjutant")
    (begin
      (adjutant-run)
      (set! *didsomething* #t)))

(if (or (args:get-arg "-list-servers")
        (args:get-arg "-kill-servers"))
    (let ((tl (launch:setup)))
      (if tl ;; all roads from here exit
	  (let* ((servers (server:choose-server *toppath* 'all-valid)) ;; (server:get-list *toppath*))
		 (fmtstr  "~33a~22a~20a~20a~8a\n"))
            (if (not servers)
              (begin
                (debug:print-info 1 *default-log-port* "No servers found")
                (exit)
              )
            )
       	    (format #t fmtstr "PID" "host:port" "age (hms)" "Last mod" "State")
	    (format #t fmtstr "===" "=========" "=========" "========" "=====")
	    (for-each ;;  (ip-addr port? mod-time host port start-time pid )
	     (lambda (server)
	       (let* ((mtm (any->number (caddr server)))
		      (mod (if mtm (- (current-seconds) mtm) "unk"))
		      (age (- (current-seconds)(or (any->number mtm) (current-seconds))))

		      (pid (list-ref server 4))
		      (url (conc (car server) ":" (cadr server)))
		      (alv (if (number? mod)(< mod 360) #f)))
		 (format #t
			 fmtstr
			 pid
			 url
			 (seconds->hr-min-sec age)
			 (seconds->hr-min-sec mod)
			 (if alv "alive" "dead"))
		 (if (and alv
			  (args:get-arg "-kill-servers"))
		     (begin
		       (debug:print-info 0 *default-log-port* "Attempting to kill server with pid " pid)
		       (server:kill server)))))
	     (sort servers (lambda (a b)
			     (let ((ma (or (any->number (car a)) 9e9))
				   (mb (or (any->number (car b)) 9e9)))
			       (> ma mb)))))

	    (set! *didsomething* #t)
	    (exit))
	  (exit))))
      ;; must do, would have to add checks to many/all calls below

;;======================================================================
;; Weird special calls that need to run *after* the server has started?
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
;; NOTE: list-runs and list-db-targets operate on local db!!!
;;
;; IDEA: megatest list -runname blah% ...
;;
(if (or (args:get-arg "-list-runs")
	(args:get-arg "-list-db-targets"))
    (if (launch:setup)
	(let* (;; (dbstruct    (make-dbr:dbstruct path: *toppath* local: (args:get-arg "-local")))
	       (runpatt     (args:get-arg "-list-runs"))
               (access-mode (db:get-access-mode))
	       (testpatt    (common:args-get-testpatt #f))
	       ;; (if (args:get-arg "-testpatt") 
	       ;;  	        (args:get-arg "-testpatt") 
	       ;;  	        "%"))
	       (keys        (rmt:get-keys)) ;; (db:get-keys dbstruct))
	       ;; (runsdat  (db:get-runs dbstruct runpatt #f #f '()))







<
|







1399
1400
1401
1402
1403
1404
1405

1406
1407
1408
1409
1410
1411
1412
1413
;; NOTE: list-runs and list-db-targets operate on local db!!!
;;
;; IDEA: megatest list -runname blah% ...
;;
(if (or (args:get-arg "-list-runs")
	(args:get-arg "-list-db-targets"))
    (if (launch:setup)

	(let* ((runpatt     (args:get-arg "-list-runs"))
               (access-mode (db:get-access-mode))
	       (testpatt    (common:args-get-testpatt #f))
	       ;; (if (args:get-arg "-testpatt") 
	       ;;  	        (args:get-arg "-testpatt") 
	       ;;  	        "%"))
	       (keys        (rmt:get-keys)) ;; (db:get-keys dbstruct))
	       ;; (runsdat  (db:get-runs dbstruct runpatt #f #f '()))
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
;;======================================================================

(if (args:get-arg "-extract-ods")
    (general-run-call
     "-extract-ods"
     "Make ods spreadsheet"
     (lambda (target runname keys keyvals)
       (let ((dbstruct   (make-dbr:dbstruct path: *toppath* local: #t))
	     (outputfile (args:get-arg "-extract-ods"))
	     (runspatt   (or (args:get-arg "-runname")(args:get-arg ":runname")))
	     (pathmod    (args:get-arg "-pathmod")))
	     ;; (keyvalalist (keys->alist keys "%")))
	 (debug:print 2 *default-log-port* "Extract ods, outputfile: " outputfile " runspatt: " runspatt " keyvals: " keyvals)
	 (db:extract-ods-file dbstruct outputfile keyvals (if runspatt runspatt "%") pathmod)
	 (db:close-all dbstruct)







|







2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
;;======================================================================

(if (args:get-arg "-extract-ods")
    (general-run-call
     "-extract-ods"
     "Make ods spreadsheet"
     (lambda (target runname keys keyvals)
       (let ((dbstruct   (make-dbr:dbstruct areapath: *toppath* local: #t))
	     (outputfile (args:get-arg "-extract-ods"))
	     (runspatt   (or (args:get-arg "-runname")(args:get-arg ":runname")))
	     (pathmod    (args:get-arg "-pathmod")))
	     ;; (keyvalalist (keys->alist keys "%")))
	 (debug:print 2 *default-log-port* "Extract ods, outputfile: " outputfile " runspatt: " runspatt " keyvals: " keyvals)
	 (db:extract-ods-file dbstruct outputfile keyvals (if runspatt runspatt "%") pathmod)
	 (db:close-all dbstruct)
2306
2307
2308
2309
2310
2311
2312

2313
2314
2315
2316

2317
2318
2319
2320
2321
2322
2323

(if (args:get-arg "-cleanup-db")
    (begin
      (if (not (launch:setup))
	  (begin
	    (debug:print 0 *default-log-port* "Failed to setup, exiting") 
	    (exit 1)))

      (if (not (server:choose-server *toppath* 'home?))
	  (begin
	    (debug:print 0 *default-log-port* "Servers are not running on this host or no servers alive. Cannot run cleanup-db")
	    (exit 1)))

      (let ((dbstructs (db:setup #f)))
        (common:cleanup-db dbstructs))
      (set! *didsomething* #t)))

(if (args:get-arg "-mark-incompletes")
    (begin
      (if (not (launch:setup))







>
|
|
|
|
>







2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355

(if (args:get-arg "-cleanup-db")
    (begin
      (if (not (launch:setup))
	  (begin
	    (debug:print 0 *default-log-port* "Failed to setup, exiting") 
	    (exit 1)))

;;      (if (not (server:choose-server *toppath* 'home?))
;;	  (begin
;;	    (debug:print 0 *default-log-port* "Servers are not running on this host or no servers alive. Cannot run cleanup-db")
;;	    (exit 1)))

      (let ((dbstructs (db:setup #f)))
        (common:cleanup-db dbstructs))
      (set! *didsomething* #t)))

(if (args:get-arg "-mark-incompletes")
    (begin
      (if (not (launch:setup))
2368
2369
2370
2371
2372
2373
2374


2375
2376
2377
2378
2379
2380
2381
2382
    (exit 0)))

(if (or (getenv "MT_RUNSCRIPT")
	(args:get-arg "-repl")
	(args:get-arg "-load"))
    (let* ((toppath (launch:setup))
	   (dbstructs (if (and toppath


                               (server:choose-server toppath 'home?))
                          (db:setup #t)
                          #f))) ;; make-dbr:dbstruct path: toppath local: (args:get-arg "-local")) #f)))
      (if *toppath*
	  (cond
	   ((getenv "MT_RUNSCRIPT")
	    ;; How to run megatest scripts
	    ;;







>
>
|







2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
    (exit 0)))

(if (or (getenv "MT_RUNSCRIPT")
	(args:get-arg "-repl")
	(args:get-arg "-load"))
    (let* ((toppath (launch:setup))
	   (dbstructs (if (and toppath
			       ;; NOTE: server:choose-server is starting a server
			       ;;   either add equivalent for tcp mode or ????
                               #;(server:choose-server toppath 'home?))
                          (db:setup #t)
                          #f))) ;; make-dbr:dbstruct path: toppath local: (args:get-arg "-local")) #f)))
      (if *toppath*
	  (cond
	   ((getenv "MT_RUNSCRIPT")
	    ;; How to run megatest scripts
	    ;;

Modified mtargs/mtargs.scm from [c1be4192c1] to [49b76da6ef].

16
17
18
19
20
21
22

23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46










47
48
49
50
51
52
53
;;     along with mtargs.  If not, see <http://www.gnu.org/licenses/>.


(module mtargs
    (
     arg-hash
     get-arg

     get-arg-from
     get-args
     usage
     print-args
     any-defined?
     )

(import scheme) ;; gives us cond-expand in chicken-4

(cond-expand
 (chicken-5
  (import scheme (chicken base) (chicken port) (chicken file) (chicken process-context))
  (import srfi-69 srfi-1))
 (chicken-4
  (import chicken posix srfi-69 srfi-1))
 (else))

(define usage (make-parameter print))
(define arg-hash (make-hash-table))

(define (get-arg arg . default)
  (if (null? default)
      (hash-table-ref/default arg-hash arg #f)
      (hash-table-ref/default arg-hash arg (car default))))











(define (any-defined? . args)
  (not (null? (filter (lambda (x) x)
		      (map get-arg args)))))

;; (define any any-defined?)








>





|


















>
>
>
>
>
>
>
>
>
>







16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
;;     along with mtargs.  If not, see <http://www.gnu.org/licenses/>.


(module mtargs
    (
     arg-hash
     get-arg
     get-arg-number
     get-arg-from
     get-args
     usage
     print-args
     any-defined?
     ) 

(import scheme) ;; gives us cond-expand in chicken-4

(cond-expand
 (chicken-5
  (import scheme (chicken base) (chicken port) (chicken file) (chicken process-context))
  (import srfi-69 srfi-1))
 (chicken-4
  (import chicken posix srfi-69 srfi-1))
 (else))

(define usage (make-parameter print))
(define arg-hash (make-hash-table))

(define (get-arg arg . default)
  (if (null? default)
      (hash-table-ref/default arg-hash arg #f)
      (hash-table-ref/default arg-hash arg (car default))))

;; get an arg as a number
(define (get-arg-number arg . default)
  (let* ((val-str (get-arg arg))
	 (val     (if val-str (string->number val-str) #f)))
    (if val
	val
	(if (null? default)
	    #f
	    default))))

(define (any-defined? . args)
  (not (null? (filter (lambda (x) x)
		      (map get-arg args)))))

;; (define any any-defined?)

Modified rmt.scm from [a2b373e5f1] to [73327553e5].

19
20
21
22
23
24
25

26



27
28
29

30
31











32
33
34
35
36
37
38
;;======================================================================

(use format typed-records) ;; RADT => purpose of json format??

(declare (unit rmt))
(declare (uses api))
(declare (uses http-transport))

(declare (uses dbfile))



(include "common_records.scm")
;; (declare (uses rmtmod))


(import dbfile) ;; rmtmod)












;;
;; THESE ARE ALL CALLED ON THE CLIENT SIDE!!!
;;

;; generate entries for ~/.megatestrc with the following
;;
;;  grep define ../rmt.scm | grep rmt: |perl -pi -e 's/\(define\s+\((\S+)\W.*$/\1/'|sort -u







>

>
>
>



>


>
>
>
>
>
>
>
>
>
>
>







19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
;;======================================================================

(use format typed-records) ;; RADT => purpose of json format??

(declare (unit rmt))
(declare (uses api))
(declare (uses http-transport))
(declare (uses commonmod))
(declare (uses dbfile))
;; (declare (uses dbmemmod))
(declare (uses dbmod))
(declare (uses tcp-transportmod))
(include "common_records.scm")
;; (declare (uses rmtmod))

;; used by http-transport
(import dbfile) ;; rmtmod)

(import commonmod
;; 	dbmemmod
	dbfile
	dbmod
	tcp-transportmod)

;; http - use the old http + in /tmp db
;; tcp  - use tcp transport with inmem db
;; nfs  - use direct to disk access (read-only)
;;
(define rmt:transport-mode (make-parameter 'http))
;;
;; THESE ARE ALL CALLED ON THE CLIENT SIDE!!!
;;

;; generate entries for ~/.megatestrc with the following
;;
;;  grep define ../rmt.scm | grep rmt: |perl -pi -e 's/\(define\s+\((\S+)\W.*$/\1/'|sort -u
59
60
61
62
63
64
65





66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85



86




87
88
89
90
91
92
93
94
95
96
97

98
99
100
101
102
103
104
105
106
107
108


109





110




111







112




113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
  (let* ((hh-dat (remote-hh-dat runremote)))
    (if (pair? hh-dat)
	(cdr hh-dat)
	(begin
	  (debug:print-info 0 *default-log-port* "hh-dat="hh-dat)
	  #f))))







;;======================================================================

(define *send-receive-mutex* (make-mutex)) ;; should have separate mutex per run-id

;; RA => e.g. usage (rmt:send-receive 'get-var #f (list varname))
;;
(define (rmt:send-receive cmd rid params #!key (attemptnum 1)(area-dat #f)) ;; start attemptnum at 1 so the modulo below works as expected

  #;(common:telemetry-log (conc "rmt:"(->string cmd))
                        payload: `((rid . ,rid)
                                   (params . ,params)))

  (if (> attemptnum 2)
      (debug:print 0 *default-log-port* "INFO: attemptnum in rmt:send-receive is " attemptnum))
    
  (cond
   ((> attemptnum 2) (thread-sleep! 0.05))
   ((> attemptnum 10) (thread-sleep! 0.5))
   ((> attemptnum 20) (thread-sleep! 1)))



  (if (and (> attemptnum 5) (= 0 (modulo attemptnum 15)))  




    (begin (server:run *toppath*) (thread-sleep! 3))) 
  
  
  ;;DOT digraph megatest_state_status {
  ;;DOT   ranksep=0;
  ;;DOT   // rankdir=LR;
  ;;DOT   node [shape="box"];
  ;;DOT "rmt:send-receive" -> MUTEXLOCK;
  ;;DOT { edge [style=invis];"case 1" -> "case 2" -> "case 3" -> "case 4" -> "case 5" -> "case 6" -> "case 7" -> "case 8" -> "case 9" -> "case 10" -> "case 11"; }
  ;; do all the prep locked under the rmt-mutex
  (mutex-lock! *rmt-mutex*)

  
  ;; 1. check if server is started IFF cmd is a write OR if we are not on the homehost, store in runremote
  ;; 2. check the age of the connections. refresh the connection if it is older than timeout-20 seconds.
  ;; 3. do the query, if on homehost use local access
  ;;
  (let* ((start-time    (current-seconds)) ;; snapshot time so all use cases get same value
         (areapath      *toppath*);; TODO - resolve from dbstruct to be compatible with multiple areas
	 (runremote     (or area-dat
			    *runremote*))
         (attemptnum    (+ 1 attemptnum))
	 (readonly-mode (rmtmod:calc-ro-mode runremote *toppath*)))








    ;; DOT INIT_RUNREMOTE; // leaving off - doesn't really add to the clarity




    ;; DOT MUTEXLOCK -> INIT_RUNREMOTE [label="no remote?"];







    ;; DOT INIT_RUNREMOTE -> MUTEXLOCK;




    ;; ensure we have a record for our connection for given area
    (if (not runremote)                   ;; can remove this one. should never get here.         
	(begin
	  (set! *runremote* (make-remote))
          (let* ((server-info (remote-server-info *runremote*))) 
            (if server-info
		(begin
			(remote-server-url-set! *runremote* (server:record->url server-info))
			(remote-server-id-set! *runremote* (server:record->id server-info)))))  
	  (set! runremote   *runremote*))) ;; new runremote will come from this on next iteration

    (http-transport-handler runremote cmd rid params attemptnum area-dat areapath readonly-mode)))

(define (http-transport-handler runremote cmd rid params attemptnum area-dat areapath readonly-mode)
  ;; DOT SET_HOMEHOST; // leaving off - doesn't really add to the clarity
  ;; DOT MUTEXLOCK -> SET_HOMEHOST [label="no homehost?"];
  ;; DOT SET_HOMEHOST -> MUTEXLOCK;
  ;; ensure we have a homehost record
  (if (or (not (pair? (remote-hh-dat runremote)))  ;; not on homehost
	  (not (cdr (remote-hh-dat runremote))))   ;; not on homehost
      (thread-sleep! 0.1) ;; since we shouldn't get here, delay a little
      (let ((hh-data (server:choose-server areapath 'homehost)))
	(remote-hh-dat-set! runremote (or hh-data (cons #f #f)))))
  
  ;;(print "BB> readonly-mode is "readonly-mode" dbfile is "dbfile)
  (cond
   #;((> (- (current-seconds)(remote-connect-time runremote)) 180) ;; reconnect to server every 180 seconds
   (debug:print 0 *default-log-port* "Forcing reconnect to server(s) due to 180 second timeout.")
   (set! *runremote* #f)
   ;; BUG: close-connections should go here?
   (mutex-unlock! *rmt-mutex*)
   (rmt:send-receive cmd rid params attemptnum: 1 area-dat: area-dat))
   
   ;;DOT EXIT;
   ;;DOT MUTEXLOCK -> EXIT [label="> 15 attempts"]; {rank=same "case 1" "EXIT" }
   ;; give up if more than 150 attempts
   ((> attemptnum 150)
    (debug:print 0 *default-log-port* "ERROR: 150 tries to start/connect to server. Giving up.")
    (exit 1))

   ;;DOT CASE2 [label="local\nreadonly\nquery"];
   ;;DOT MUTEXLOCK -> CASE2; {rank=same "case 2" CASE2}
   ;;DOT CASE2 -> "rmt:open-qry-close-locally";
   ;; readonly mode, read request-  handle it - case 2
   ((and readonly-mode
         (member cmd api:read-only-queries)) 
    (mutex-unlock! *rmt-mutex*)
    (debug:print-info 12 *default-log-port* "rmt:send-receive, case 2")
    (rmt:open-qry-close-locally cmd 0 params)
    )

   ;;DOT CASE3 [label="write in\nread-only mode"];
   ;;DOT MUTEXLOCK -> CASE3 [label="readonly\nmode?"]; {rank=same "case 3" CASE3}
   ;;DOT CASE3 -> "#f";
   ;; readonly mode, write request.  Do nothing, return #f
   (readonly-mode (extras-readonly-mode *rmt-mutex* *default-log-port* cmd params))

   ;; This block was for pre-emptively resetting the connection if there had been no communication for some time.
   ;; I don't think it adds any value. If the server is not there, just fail and start a new connection.
   ;; also, the expire-time calculation might not be correct. We want, time-since-last-server-access > (server:get-timeout)
   ;;
   ;;DOT CASE4 [label="reset\nconnection"];
   ;;DOT MUTEXLOCK -> CASE4 [label="have connection,\nlast_access > expire_time"]; {rank=same "case 4" CASE4}
   ;;DOT CASE4 -> "rmt:send-receive";
   ;; reset the connection if it has been unused too long
   ((and runremote
         (remote-api-url runremote)
	 (> (current-seconds) ;; if it has been more than server-timeout seconds since last contact, close this connection and start a new on
	    (+ (remote-last-access runremote)
	       (remote-server-timeout runremote))))
    (debug:print-info 0 *default-log-port* "Connection to " (remote-server-url runremote) " expired due to no accesses in " (remote-server-timeout runremote) " seconds, forcing new connection.")
    (http-transport:close-connections runremote)
    ;; moving this setting of runremote conndat to #f to inside the http-transport:close-connections
    ;; (remote-conndat-set! runremote #f) ;; invalidate the connection, thus forcing a new connection.
    (mutex-unlock! *rmt-mutex*)
    (rmt:send-receive cmd rid params attemptnum: attemptnum))
   
   ;;DOT CASE5 [label="local\nread"];
   ;;DOT MUTEXLOCK -> CASE5 [label="server not required,\non homehost,\nread-only query"]; {rank=same "case 5" CASE5};
   ;;DOT CASE5 -> "rmt:open-qry-close-locally";

   ;; on homehost and this is a read
   ((and (not (remote-force-server runremote)) ;; honor forced use of server, i.e. server NOT required
	 (rmt:on-homehost? runremote)
         (member cmd api:read-only-queries))   ;; this is a read
    (mutex-unlock! *rmt-mutex*)
    (debug:print-info 12 *default-log-port* "rmt:send-receive, case  5")
    (rmt:open-qry-close-locally cmd 0 params))

   ;;DOT CASE6 [label="init\nremote"];
   ;;DOT MUTEXLOCK -> CASE6 [label="on homehost,\nwrite query,\nhave server,\ncan't reach it"]; {rank=same "case 6" CASE6};
   ;;DOT CASE6 -> "rmt:send-receive";
   ;; on homehost and this is a write, we already have a server, but server has died

   ;; reinstate this keep-alive section but inject a time condition into the (add ...
   ;;
   ;; ((and (cdr (remote-hh-dat runremote))           ;; on homehost
   ;;       (not (member cmd api:read-only-queries))  ;; this is a write
   ;;       (remote-server-url runremote)             ;; have a server
   ;;       (not (server:ping (remote-server-url runremote) (remote-server-id runremote))))  ;; server has died. NOTE: this is not a cheap call! Need better approach.
   ;;  (debug:print 0 *default-log-port* "WARNING: server appears to have died, trying to reconnect, case 6")
   ;;  (http-transport:close-connections area-dat: runremote) ;; make sure to clean up
   ;;  (set! *runremote* (make-remote))
   ;;  (let* ((server-info (remote-server-info *runremote*))) 
   ;;        (if server-info
   ;; 		(begin
   ;; 		  (remote-server-url-set! *runremote* (server:record->url server-info))
   ;;              (remote-server-id-set! *runremote* (server:record->id server-info)))))
   ;;  (remote-force-server-set! runremote (common:force-server?))
   ;;  (mutex-unlock! *rmt-mutex*)
   ;;  (debug:print-info 12 *default-log-port* "rmt:send-receive, case  6")
   ;;  (rmt:send-receive cmd rid params attemptnum: attemptnum))

   ;;DOT CASE7 [label="homehost\nwrite"];
   ;;DOT MUTEXLOCK -> CASE7 [label="server not required,\non homehost,\na write,\nhave a server"]; {rank=same "case 7" CASE7};
   ;;DOT CASE7 -> "rmt:open-qry-close-locally";
   ;; on homehost and this is a write, we already have a server
   ((and (not (remote-force-server runremote))     ;; honor forced use of server, i.e. server NOT required
	 (cdr (remote-hh-dat runremote))           ;; on homehost
         (not (member cmd api:read-only-queries))  ;; this is a write
         (remote-server-url runremote))            ;; have a server (needed to sync written data back)
    (mutex-unlock! *rmt-mutex*)
    (debug:print-info 12 *default-log-port* "rmt:send-receive, case  4.1")
    (rmt:open-qry-close-locally cmd 0 params))

   ;;DOT CASE8 [label="force\nserver"];
   ;;DOT MUTEXLOCK -> CASE8 [label="server not required,\nhave homehost info,\nno connection yet,\nnot a read-only query"]; {rank=same "case 8" CASE8};
   ;;DOT CASE8 -> "rmt:open-qry-close-locally";
   ;;  on homehost, no server contact made and this is a write, passively start a server 
   ((and (not (remote-force-server runremote))     ;; honor forced use of server, i.e. server NOT required
	 (cdr (remote-hh-dat runremote))           ;; have homehost
         (not (remote-server-url runremote))       ;; no connection yet
	 (not (member cmd api:read-only-queries))) ;; not a read-only query
    (debug:print-info 12 *default-log-port* "rmt:send-receive, case  8")
    (let ((server-info  (server:check-if-running *toppath*))) ;; (server:read-dotserver->url *toppath*))) ;; (server:check-if-running *toppath*))) ;; Do NOT want to run server:check-if-running - very expensive to do for every write call







>
>
>
>
>








|
<
<
<
<


|




>
>
>

>
>
>
>
|
|
|
<
<
<
<
<
<
<
|
>










|
>
>

>
>
>
>
>
|
>
>
>
>
|
>
>
>
>
>
>
>
|
>
>
>
>
|
|
|
|
|
|
|
|
|
|

<
<
<
<
<
<







<

<
<
<
<
<
<
<
<
<





<
<
<








<
<
<







<
<
<













<
<
<
<








<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<









<
<
<







75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95




96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113







114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163






164
165
166
167
168
169
170

171









172
173
174
175
176



177
178
179
180
181
182
183
184



185
186
187
188
189
190
191



192
193
194
195
196
197
198
199
200
201
202
203
204




205
206
207
208
209
210
211
212



























213
214
215
216
217
218
219
220
221



222
223
224
225
226
227
228
  (let* ((hh-dat (remote-hh-dat runremote)))
    (if (pair? hh-dat)
	(cdr hh-dat)
	(begin
	  (debug:print-info 0 *default-log-port* "hh-dat="hh-dat)
	  #f))))

(define (make-and-init-remote areapath)
   (case (rmt:transport-mode)
     ((http)(make-remote))
     ((tcp) (tt:make-remote areapath))
     (else #f)))

;;======================================================================

(define *send-receive-mutex* (make-mutex)) ;; should have separate mutex per run-id

;; RA => e.g. usage (rmt:send-receive 'get-var #f (list varname))
;;
(define (rmt:send-receive cmd rid params #!key (attemptnum 1)(area-dat #f)) ;; start attemptnum at 1 so the modulo below works as expected
  (assert *toppath* "FATAL: rmt:send-receive called with *toppath* not set.")




  (if (> attemptnum 2)
      (debug:print 0 *default-log-port* "INFO: attemptnum in rmt:send-receive is " attemptnum))
  
  (cond
   ((> attemptnum 2) (thread-sleep! 0.05))
   ((> attemptnum 10) (thread-sleep! 0.5))
   ((> attemptnum 20) (thread-sleep! 1)))

  ;; I'm turning this off, it may make sense to move it
  ;; into http-transport-handler
  (if (and (> attemptnum 5) (= 0 (modulo attemptnum 15)))  
      (begin
	(debug:print 0 *default-log-port* "ERROR: can't connect to server, trying to start a server.")
	(case (rmt:transport-mode)
	  ((http)
	   (server:run *toppath*)
	   (thread-sleep! 3))
	  (else







	   (thread-sleep! 1) ;; for tcp the server is started by routines in tcp-transportmod. For nfs there is no server
	   ))))
  
  ;; 1. check if server is started IFF cmd is a write OR if we are not on the homehost, store in runremote
  ;; 2. check the age of the connections. refresh the connection if it is older than timeout-20 seconds.
  ;; 3. do the query, if on homehost use local access
  ;;
  (let* ((start-time    (current-seconds)) ;; snapshot time so all use cases get same value
         (areapath      *toppath*);; TODO - resolve from dbstruct to be compatible with multiple areas
	 (runremote     (or area-dat
			    *runremote*))
         (attemptnum    (+ 1 attemptnum))
	 (readonly-mode (rmtmod:calc-ro-mode runremote *toppath*))
	 (testsuite     (common:get-testsuite-name))
	 (mtexe         (common:find-local-megatest)))

    (case (rmt:transport-mode)
      ((http)(http-transport-handler runremote cmd rid params attemptnum area-dat areapath readonly-mode))
      ((tcp) (tcp-transport-handler  runremote cmd rid params attemptnum area-dat areapath readonly-mode testsuite mtexe))
      ((nfs) (nfs:transport-handler  runremote cmd rid params attemptnum area-dat areapath readonly-mode testsuite mtexe))
      )))

(define (nfs:transport-handler  runremote cmd run-id params attemptnum area-dat areapath readonly-mode testsuite mtexe)
  (let* ((keys     (common:get-fields *configdat*))
	 (dbstruct (dbmod:nfs-get-dbstruct run-id keys (dbfile:db-init-proc) areapath)))
    (api:dispatch-request dbstruct cmd run-id params)))
	 
(define (tcp-transport-handler runremote cmd run-id params attemptnum area-dat areapath readonly-mode testsuite mtexe)
  (if (not runremote)
      (let* ((newremote  (make-and-init-remote areapath)))
	(set! *runremote* newremote)
	(set! runremote newremote)))
  (let* ((dbfname (conc (dbfile:run-id->dbnum run-id)".db"))) ;;(dbfile:run-id->path areapath run-id)))
    (tt:handler runremote cmd run-id params attemptnum area-dat areapath readonly-mode dbfname testsuite mtexe)))
	
(define (http-transport-handler runremote cmd rid params attemptnum area-dat areapath readonly-mode)
    ;; do all the prep locked under the rmt-mutex
  (mutex-lock! *rmt-mutex*)
  
  ;; ensure we have a record for our connection for given area
  (if (not runremote)                   ;; can remove this one. should never get here.         
      (begin
	(set! *runremote* (make-and-init-remote areapath))
        (let* ((server-info (remote-server-info *runremote*))) 
          (if server-info
	      (begin
		(remote-server-url-set! *runremote* (server:record->url server-info))
		(remote-server-id-set! *runremote* (server:record->id server-info)))))  
	(set! runremote   *runremote*))) ;; new runremote will come from this on next iteration







  ;; ensure we have a homehost record
  (if (or (not (pair? (remote-hh-dat runremote)))  ;; not on homehost
	  (not (cdr (remote-hh-dat runremote))))   ;; not on homehost
      (thread-sleep! 0.1) ;; since we shouldn't get here, delay a little
      (let ((hh-data (server:choose-server areapath 'homehost)))
	(remote-hh-dat-set! runremote (or hh-data (cons #f #f)))))
  

  (cond









   ;; give up if more than 150 attempts
   ((> attemptnum 150)
    (debug:print 0 *default-log-port* "ERROR: 150 tries to start/connect to server. Giving up.")
    (exit 1))




   ;; readonly mode, read request-  handle it - case 2
   ((and readonly-mode
         (member cmd api:read-only-queries)) 
    (mutex-unlock! *rmt-mutex*)
    (debug:print-info 12 *default-log-port* "rmt:send-receive, case 2")
    (rmt:open-qry-close-locally cmd 0 params)
    )




   ;; readonly mode, write request.  Do nothing, return #f
   (readonly-mode (extras-readonly-mode *rmt-mutex* *default-log-port* cmd params))

   ;; This block was for pre-emptively resetting the connection if there had been no communication for some time.
   ;; I don't think it adds any value. If the server is not there, just fail and start a new connection.
   ;; also, the expire-time calculation might not be correct. We want, time-since-last-server-access > (server:get-timeout)
   ;;



   ;; reset the connection if it has been unused too long
   ((and runremote
         (remote-api-url runremote)
	 (> (current-seconds) ;; if it has been more than server-timeout seconds since last contact, close this connection and start a new on
	    (+ (remote-last-access runremote)
	       (remote-server-timeout runremote))))
    (debug:print-info 0 *default-log-port* "Connection to " (remote-server-url runremote) " expired due to no accesses in " (remote-server-timeout runremote) " seconds, forcing new connection.")
    (http-transport:close-connections runremote)
    ;; moving this setting of runremote conndat to #f to inside the http-transport:close-connections
    ;; (remote-conndat-set! runremote #f) ;; invalidate the connection, thus forcing a new connection.
    (mutex-unlock! *rmt-mutex*)
    (rmt:send-receive cmd rid params attemptnum: attemptnum))
   




   ;; on homehost and this is a read
   ((and (not (remote-force-server runremote)) ;; honor forced use of server, i.e. server NOT required
	 (rmt:on-homehost? runremote)
         (member cmd api:read-only-queries))   ;; this is a read
    (mutex-unlock! *rmt-mutex*)
    (debug:print-info 12 *default-log-port* "rmt:send-receive, case  5")
    (rmt:open-qry-close-locally cmd 0 params))




























   ;; on homehost and this is a write, we already have a server
   ((and (not (remote-force-server runremote))     ;; honor forced use of server, i.e. server NOT required
	 (cdr (remote-hh-dat runremote))           ;; on homehost
         (not (member cmd api:read-only-queries))  ;; this is a write
         (remote-server-url runremote))            ;; have a server (needed to sync written data back)
    (mutex-unlock! *rmt-mutex*)
    (debug:print-info 12 *default-log-port* "rmt:send-receive, case  4.1")
    (rmt:open-qry-close-locally cmd 0 params))




   ;;  on homehost, no server contact made and this is a write, passively start a server 
   ((and (not (remote-force-server runremote))     ;; honor forced use of server, i.e. server NOT required
	 (cdr (remote-hh-dat runremote))           ;; have homehost
         (not (remote-server-url runremote))       ;; no connection yet
	 (not (member cmd api:read-only-queries))) ;; not a read-only query
    (debug:print-info 12 *default-log-port* "rmt:send-receive, case  8")
    (let ((server-info  (server:check-if-running *toppath*))) ;; (server:read-dotserver->url *toppath*))) ;; (server:check-if-running *toppath*))) ;; Do NOT want to run server:check-if-running - very expensive to do for every write call
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298

   ;;DOT CASE11 [label="send_receive"];
   ;;DOT MUTEXLOCK -> CASE11 [label="else"]; {rank=same "case 11" CASE11};
   ;;DOT CASE11 -> "rmt:send-receive" [label="call failed"];
   ;;DOT CASE11 -> "RESULT" [label="call succeeded"];
   ;; not on homehost, do server query
   (else (extras-case-11 *default-log-port* runremote cmd params attemptnum rid))))
;;DOT }

;; bunch of small functions factored out of send-receive to make debug easier
;;

(define (extras-case-11 *default-log-port* runremote cmd params attemptnum rid)
  ;; (mutex-unlock! *rmt-mutex*)
  (debug:print-info 12 *default-log-port* "rmt:send-receive, case  9")







<







265
266
267
268
269
270
271

272
273
274
275
276
277
278

   ;;DOT CASE11 [label="send_receive"];
   ;;DOT MUTEXLOCK -> CASE11 [label="else"]; {rank=same "case 11" CASE11};
   ;;DOT CASE11 -> "rmt:send-receive" [label="call failed"];
   ;;DOT CASE11 -> "RESULT" [label="call succeeded"];
   ;; not on homehost, do server query
   (else (extras-case-11 *default-log-port* runremote cmd params attemptnum rid))))


;; bunch of small functions factored out of send-receive to make debug easier
;;

(define (extras-case-11 *default-log-port* runremote cmd params attemptnum rid)
  ;; (mutex-unlock! *rmt-mutex*)
  (debug:print-info 12 *default-log-port* "rmt:send-receive, case  9")
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
			     (loop (car tal)(cdr tal) newmax-cmd currmax)))))))
    (mutex-unlock! *db-stats-mutex*)
    res))

(define (rmt:open-qry-close-locally cmd run-id params #!key (remretries 5))
  (let* ((qry-is-write    (not (member cmd api:read-only-queries)))
	 (db-file-path    (db:dbfile-path)) ;;  0))
	 (dbstructs-local (db:setup #t))  ;; make-dbr:dbstruct path:  dbdir local: #t)))
	 (read-only       (not (file-write-access? db-file-path)))
	 (start           (current-milliseconds))
	 (resdat          (if (not (and read-only qry-is-write))
			      (let ((v (api:execute-requests dbstructs-local (vector (symbol->string cmd) params))))
			;;	(handle-exceptions ;; there has been a long history of receiving strange errors from values returned by the client when things go wrong..
			;;	 exn               ;;  This is an attempt to detect that situation and recover gracefully
			;;	 (begin







|







351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
			     (loop (car tal)(cdr tal) newmax-cmd currmax)))))))
    (mutex-unlock! *db-stats-mutex*)
    res))

(define (rmt:open-qry-close-locally cmd run-id params #!key (remretries 5))
  (let* ((qry-is-write    (not (member cmd api:read-only-queries)))
	 (db-file-path    (db:dbfile-path)) ;;  0))
	 (dbstructs-local (db:setup #t))
	 (read-only       (not (file-write-access? db-file-path)))
	 (start           (current-milliseconds))
	 (resdat          (if (not (and read-only qry-is-write))
			      (let ((v (api:execute-requests dbstructs-local (vector (symbol->string cmd) params))))
			;;	(handle-exceptions ;; there has been a long history of receiving strange errors from values returned by the client when things go wrong..
			;;	 exn               ;;  This is an attempt to detect that situation and recover gracefully
			;;	 (begin
1019
1020
1021
1022
1023
1024
1025
1026
1027


1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038












1039
1040
1041
1042
1043
1044
1045
(define (rmt:test-set-archive-block-id run-id test-id archive-block-id)
  (assert (number? run-id) "FATAL: Run id required.")
  (rmt:send-receive 'test-set-archive-block-id run-id (list run-id test-id archive-block-id)))

(define (rmt:test-get-archive-block-info archive-block-id)
  (rmt:send-receive 'test-get-archive-block-info #f (list archive-block-id)))


(define (rmtmod:calc-ro-mode runremote *toppath*)


  (if (and runremote
	   (remote-ro-mode-checked runremote))
      (remote-ro-mode runremote)
      (let* ((mtcfgfile  (conc *toppath* "/megatest.config"))
	     (ro-mode (not (file-write-access? mtcfgfile)))) ;; TODO: use dbstruct or runremote to figure this out in future
	(if runremote
	    (begin
	      (remote-ro-mode-set! runremote ro-mode)
	      (remote-ro-mode-checked-set! runremote #t)
	      ro-mode)
	    ro-mode))))













(define (extras-readonly-mode rmt-mutex log-port cmd params)
  (mutex-unlock! rmt-mutex)
  (debug:print-info 12 log-port "rmt:send-receive, case 3")
  (debug:print 0 log-port "WARNING: write transaction requested on a readonly area.  cmd="cmd" params="params)
  #f)








<

>
>
|
|
|
|
|
|
|
|
|
|
|
>
>
>
>
>
>
>
>
>
>
>
>







999
1000
1001
1002
1003
1004
1005

1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
(define (rmt:test-set-archive-block-id run-id test-id archive-block-id)
  (assert (number? run-id) "FATAL: Run id required.")
  (rmt:send-receive 'test-set-archive-block-id run-id (list run-id test-id archive-block-id)))

(define (rmt:test-get-archive-block-info archive-block-id)
  (rmt:send-receive 'test-get-archive-block-info #f (list archive-block-id)))


(define (rmtmod:calc-ro-mode runremote *toppath*)
  (case (rmt:transport-mode)
    ((http)
     (if (and runremote
	      (remote-ro-mode-checked runremote))
	 (remote-ro-mode runremote)
	 (let* ((mtcfgfile  (conc *toppath* "/megatest.config"))
		(ro-mode (not (file-write-access? mtcfgfile)))) ;; TODO: use dbstruct or runremote to figure this out in future
	   (if runremote
	       (begin
		 (remote-ro-mode-set! runremote ro-mode)
		 (remote-ro-mode-checked-set! runremote #t)
		 ro-mode)
	       ro-mode))))
    ((tcp)
     (if (and runremote
	      (tt-ro-mode-checked runremote))
	 (tt-ro-mode runremote)
	 (let* ((mtcfgfile  (conc *toppath* "/megatest.config"))
		(ro-mode (not (file-write-access? mtcfgfile)))) ;; TODO: use dbstruct or runremote to figure this out in future
	   (if runremote
	       (begin
		 (tt-ro-mode-set! runremote ro-mode)
		 (tt-ro-mode-checked-set! runremote #t)
		 ro-mode)
	       ro-mode))))))

(define (extras-readonly-mode rmt-mutex log-port cmd params)
  (mutex-unlock! rmt-mutex)
  (debug:print-info 12 log-port "rmt:send-receive, case 3")
  (debug:print 0 log-port "WARNING: write transaction requested on a readonly area.  cmd="cmd" params="params)
  #f)

Modified runs.scm from [e0e242ce63] to [9ed9863b9e].

20
21
22
23
24
25
26

27
28
29
30
31
32
33
34
35
36
37
38
39
40
41


42
43
44
45
46
47
48
(use (prefix sqlite3 sqlite3:) srfi-1 posix regex regex-case srfi-69 (srfi 18) 
     posix-extras directory-utils pathname-expand typed-records format  sxml-serializer
     sxml-modifications matchable)

(declare (unit runs))
(declare (uses db))
(declare (uses common))

(declare (uses items))
(declare (uses runconfig))
(declare (uses tests))
(declare (uses server))
(declare (uses mt))
(declare (uses archive))
;; (declare (uses filedb))

(include "common_records.scm")
(include "key_records.scm")
(include "db_records.scm")
(include "run_records.scm")
(include "test_records.scm")

;; (include "debugger.scm")



;; use this struct to facilitate refactoring
;;

(defstruct runs:dat
  reglen regfull
  runname max-concurrent-jobs run-id







>















>
>







20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
(use (prefix sqlite3 sqlite3:) srfi-1 posix regex regex-case srfi-69 (srfi 18) 
     posix-extras directory-utils pathname-expand typed-records format  sxml-serializer
     sxml-modifications matchable)

(declare (unit runs))
(declare (uses db))
(declare (uses common))
(declare (uses commonmod))
(declare (uses items))
(declare (uses runconfig))
(declare (uses tests))
(declare (uses server))
(declare (uses mt))
(declare (uses archive))
;; (declare (uses filedb))

(include "common_records.scm")
(include "key_records.scm")
(include "db_records.scm")
(include "run_records.scm")
(include "test_records.scm")

;; (include "debugger.scm")

(import commonmod)

;; use this struct to facilitate refactoring
;;

(defstruct runs:dat
  reglen regfull
  runname max-concurrent-jobs run-id

Modified server.scm from [1c00c07593] to [1ebaa53b59].

99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126

(define (server:get-server-id)
  (if *server-id* *server-id*
      (let ((sig (server:mk-signature))) ;; clients re-use the server:mk-signature logic
        (set! *server-id* sig)
        *server-id*)))

;; When using zmq this would send the message back (two step process)
;; with spiffy or rpc this simply returns the return data to be returned
;; 
(define (server:reply return-addr query-sig success/fail result)
  (debug:print-info 11 *default-log-port* "server:reply return-addr=" return-addr ", result=" result)
  ;; (send-message pubsock target send-more: #t)
  ;; (send-message pubsock 
  (case (server:get-transport)
    ((rpc)  (db:obj->string (vector success/fail query-sig result)))
    ((http) (db:obj->string (vector success/fail query-sig result)))
    ((fs)   result)
    (else 
     (debug:print-error 0 *default-log-port* "unrecognised transport type: " *transport-type*)
     result)))

;; Given an area path,  start a server process    ### NOTE ### > file 2>&1 
;; if the target-host is set 
;; try running on that host
;;   incidental: rotate logs in logs/ dir.
;;
(define  (server:run areapath) ;; areapath is *toppath* for a given testsuite area







|
|
|
|
|
|
|
|
|
|
|
|
|
|







99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126

(define (server:get-server-id)
  (if *server-id* *server-id*
      (let ((sig (server:mk-signature))) ;; clients re-use the server:mk-signature logic
        (set! *server-id* sig)
        *server-id*)))

;; ;; When using zmq this would send the message back (two step process)
;; ;; with spiffy or rpc this simply returns the return data to be returned
;; ;; 
;; (define (server:reply return-addr query-sig success/fail result)
;;   (debug:print-info 11 *default-log-port* "server:reply return-addr=" return-addr ", result=" result)
;;   ;; (send-message pubsock target send-more: #t)
;;   ;; (send-message pubsock 
;;   (case (server:get-transport)
;;     ((rpc)  (db:obj->string (vector success/fail query-sig result)))
;;     ((http) (db:obj->string (vector success/fail query-sig result)))
;;     ((fs)   result)
;;     (else 
;;      (debug:print-error 0 *default-log-port* "unrecognised transport type: " *transport-type*)
;;      result)))

;; Given an area path,  start a server process    ### NOTE ### > file 2>&1 
;; if the target-host is set 
;; try running on that host
;;   incidental: rotate logs in logs/ dir.
;;
(define  (server:run areapath) ;; areapath is *toppath* for a given testsuite area
467
468
469
470
471
472
473

474
475
476
477
478
479
480
  ;; find oldest alive
  ;;   1. sort by age ascending and ping until good
  ;; find alive rand from youngest
  ;;   1. sort by age descending
  ;;   2. take five
  ;;   3. check alive, discard if not and repeat
  ;; first we clean up old server files

  (server:clean-up-old areapath)
  (let* ((since-last (- (current-seconds) server-last-start))
        (server-start-delay 10))     
    (if ( < (- (current-seconds) server-last-start) 10 )
      (begin
        (debug:print 2 *default-log-port* "server:choose-server: seconds since last server start: " (- (current-seconds) server-last-start))
        (debug:print 2 *default-log-port* "server:choose-server: last server start less than " server-start-delay " seconds ago. Sleeping " server-start-delay " seconds")







>







467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
  ;; find oldest alive
  ;;   1. sort by age ascending and ping until good
  ;; find alive rand from youngest
  ;;   1. sort by age descending
  ;;   2. take five
  ;;   3. check alive, discard if not and repeat
  ;; first we clean up old server files
  (assert (eq? (rmt:transport-mode) 'http) "FATAL: server:run called with rmt:transport-mode="(rmt:transport-mode))
  (server:clean-up-old areapath)
  (let* ((since-last (- (current-seconds) server-last-start))
        (server-start-delay 10))     
    (if ( < (- (current-seconds) server-last-start) 10 )
      (begin
        (debug:print 2 *default-log-port* "server:choose-server: seconds since last server start: " (- (current-seconds) server-last-start))
        (debug:print 2 *default-log-port* "server:choose-server: last server start less than " server-start-delay " seconds ago. Sleeping " server-start-delay " seconds")
566
567
568
569
570
571
572

573

574
575
576
577
578
579
580
		 (debug:print 0 *default-log-port* "WARNING: failed to delete old server info file "sfile)
		 (delete-file sfile))))))
     sfiles)))

;; would like to eventually get rid of this
;;
(define (common:on-homehost?)

  (server:choose-server *toppath* 'home?))


;; kind start up of server, wait before allowing another server for a given
;; area to be launched
;;
(define (server:kind-run areapath)
  ;; look for $MT_RUN_AREA_HOME/logs/server-start-last
  ;; and wait for it to be at least <server idletime> seconds old







>
|
>







567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
		 (debug:print 0 *default-log-port* "WARNING: failed to delete old server info file "sfile)
		 (delete-file sfile))))))
     sfiles)))

;; would like to eventually get rid of this
;;
(define (common:on-homehost?)
  (if (eq? (rmt:transport-mode) 'http)
      (server:choose-server *toppath* 'home?)
      #t)) ;; there is no homehost for tcp and nfs is always on home so #t should work

;; kind start up of server, wait before allowing another server for a given
;; area to be launched
;;
(define (server:kind-run areapath)
  ;; look for $MT_RUN_AREA_HOME/logs/server-start-last
  ;; and wait for it to be at least <server idletime> seconds old
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688

(define (server:kill servr)
  (handle-exceptions
    exn
    (begin 
      (debug:print-info 0 *default-log-port*  "Unable to get host and/or port from " servr ", exn=" exn)     
    #f)
  (match-let (((mod-time hostname port start-time server-id pid)
	       servr))
    (tasks:kill-server hostname pid))))

;; called in megatest.scm, host-port is string hostname:port
;;
;; NOTE: This is NOT called directly from clients as not all transports support a client running
;;       in the same process as the server.
;;
(define (server:ping host:port server-id #!key (do-exit #f))
  (let* ((host-port (cond
		     ((string? host:port)
		      (let ((slst (string-split   host:port ":")))
			(if (eq? (length slst) 2)
			    (list (car slst)(string->number (cadr slst)))
			    #f)))
		     (else
		      #f))))
    (cond
     ((and (list? host-port)
	   (eq? (length host-port) 2))
      (let* ((myrunremote (make-remote))
	     (iface       (car host-port))
	     (port        (cadr host-port))
	     (server-dat  (client:connect iface port server-id myrunremote))
	     (login-res   (rmt:login-no-auto-client-setup myrunremote)))
	(http-transport:close-connections myrunremote)
	(if (and (list? login-res)
		 (car login-res))







|




















|







656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691

(define (server:kill servr)
  (handle-exceptions
    exn
    (begin 
      (debug:print-info 0 *default-log-port*  "Unable to get host and/or port from " servr ", exn=" exn)     
    #f)
  (match-let (((hostname port start-time server-id pid)
	       servr))
    (tasks:kill-server hostname pid))))

;; called in megatest.scm, host-port is string hostname:port
;;
;; NOTE: This is NOT called directly from clients as not all transports support a client running
;;       in the same process as the server.
;;
(define (server:ping host:port server-id #!key (do-exit #f))
  (let* ((host-port (cond
		     ((string? host:port)
		      (let ((slst (string-split   host:port ":")))
			(if (eq? (length slst) 2)
			    (list (car slst)(string->number (cadr slst)))
			    #f)))
		     (else
		      #f))))
    (cond
     ((and (list? host-port)
	   (eq? (length host-port) 2))
      (let* ((myrunremote (make-and-init-remote *toppath*))
	     (iface       (car host-port))
	     (port        (cadr host-port))
	     (server-dat  (client:connect iface port server-id myrunremote))
	     (login-res   (rmt:login-no-auto-client-setup myrunremote)))
	(http-transport:close-connections myrunremote)
	(if (and (list? login-res)
		 (car login-res))

Added tcp-transportmod.scm version [9959f6dce6].



























































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
;;======================================================================
;; Copyright 2017, Matthew Welland.
;; 
;; This file is part of Megatest.
;; 
;;     Megatest is free software: you can redistribute it and/or modify
;;     it under the terms of the GNU General Public License as published by
;;     the Free Software Foundation, either version 3 of the License, or
;;     (at your option) any later version.
;; 
;;     Megatest is distributed in the hope that it will be useful,
;;     but WITHOUT ANY WARRANTY; without even the implied warranty of
;;     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
;;     GNU General Public License for more details.
;; 
;;     You should have received a copy of the GNU General Public License
;;     along with Megatest.  If not, see <http://www.gnu.org/licenses/>.

;;======================================================================

(declare (unit tcp-transportmod))
(declare (uses debugprint))
(declare (uses commonmod))
(declare (uses dbfile))
(declare (uses dbmod))

(use address-info)

(module tcp-transportmod
	*
	
  (import scheme
	  (prefix sqlite3 sqlite3:)
	  chicken
	  data-structures

	  address-info
	  directory-utils
	  extras
	  files
	  hostinfo
	  matchable
	  md5
	  message-digest
	  ports
	  posix
	  regex
	  regex-case
	  s11n
	  srfi-1
	  srfi-18
	  srfi-4
	  srfi-69
	  stack
	  typed-records
	  tcp-server
	  tcp
	  
	  debugprint
	  commonmod
	  dbfile
	  dbmod
	)

;;======================================================================
;; client
;;======================================================================

;; (define keep-age-param (make-parameter 10)) ;; qif file age, if over move to attic

;; Used ONLY for client
;;
(defstruct tt-conn
  host
  port
  host-port
  dbfname
  server-id
  server-start
  pid
)

;; Used for BOTH clients and servers
(defstruct tt
  ;; client related
  (conns (make-hash-table)) ;; dbfname -> conn

  ;; server related
  (areapath     #f)
  (host         #f)
  (port         #f)
  (conn         #f)
  (cleanup-proc #f)
  (handler      #f) ;; receives data and responds
  (socket       #f)
  (thread       #f)
  (host-port    #f)
  (cmd-thread   #f)
  (ro-mode      #f)
  (ro-mode-checked #f)
  (last-access  (current-seconds))
  (servinf-file #f)
  (last-serv-start 0)
  )

(define (tt:make-remote areapath)
  (make-tt areapath: areapath))

;; 1 ... or #f
(define (tt:valid-run-id run-id)
  (or (number? run-id)
      (not run-id)))

;; do all the busy work of finding and setting up conn for
;; connecting to a server
;; 
(define (tt:client-connect-to-server ttdat dbfname run-id testsuite)
  (assert (tt:valid-run-id run-id) "FATAL: invalid run-id "run-id)
  (let* ((conn (hash-table-ref/default (tt-conns ttdat) dbfname #f))
	 (server-start-proc (lambda ()
			      (tt:server-process-run
			       (tt-areapath ttdat)
			       testsuite ;; (dbfile:testsuite-name)
			       (common:find-local-megatest)
			       run-id))))
    (if conn
	conn ;; we are already connected to the server
	(let* ((sdat (tt:get-current-server-info ttdat dbfname)))
	  (match sdat
	    ((host port start-time server-id pid dbfname2 servinffile)
	     (assert (equal? dbfname dbfname2) "FATAL: read server info from wrong file.")
	     (let* ((host-port (conc host":"port))
		    (conn (make-tt-conn
			   host: host
			   port: port
			   host-port: host-port
			   dbfname: dbfname
			   servinf-file: servinffile
			   server-id: server-id
			   server-start: start-time
			   pid: pid)))
	       (hash-table-set! (tt-conns ttdat) dbfname conn)
	       ;; verify we can talk to this server
	       (if (tt:ping host port server-id)
		   conn
		   (let* ((curr-secs (current-seconds)))
		     ;; rm the (last server) would go here
		     (if (> (- curr-secs (tt-last-serv-start ttdat)) 10)
			 (begin
			   (tt-last-serv-start-set! ttdat curr-secs)
			   (server-start-proc))) ;; don't try and start server unless 30 sec has gone by since last attempt
		     (thread-sleep! 1)
		     (tt:client-connect-to-server ttdat dbfname run-id testsuite)))))
	    (else
	     (if (> (- (current-seconds) (tt-last-serv-start ttdat)) 5) ;; really do not want to swamp the machine with servers
		 (begin
		   (debug:print-info 0 *default-log-port* "No server found. Starting one for run-id "run-id" in dbfile "dbfname)
		   (server-start-proc)
		   (tt-last-serv-start-set! ttdat (current-seconds))))
	     (thread-sleep! 1)
	     (tt:client-connect-to-server ttdat dbfname run-id testsuite)))))))
    
(define (tt:ping host port server-id)
  (let*  ((res (tt:send-receive-direct host port `(ping #f #f #f)))) ;; please send me your server-id
    ;;
    ;; need two threads, one a 5 second timer
    ;;
    (match res
      ((status errmsg result meta)
       (if (equal? result server-id)
	   (begin
	     (debug:print 0 *default-log-port* "Ping to "host":"port" successful.")
	     #t) ;; then we are good
	   (begin
	     (debug:print 0 *default-log-port* "WARNING: server-id does not match, expected: "server-id", got: "result)
	     #f)))
      (else
       ;; (debug:print 0 *default-log-port* "res not in form (status errmsg result meta), got: "res)
       #f))))

;; client side handler
;;
;;(tt:handler #<tt> get-keys #f () 2 #f "/home/matt/data/megatest/ext-tests" #f "main.db" "ext-tests" "/home/matt/data/megatest/bin/.22.04/../megatest")
;;
(define (tt:handler ttdat cmd run-id params attemptnum area-dat areapath readonly-mode dbfname testsuite mtexe)
  ;; NOTE: areapath is passed in and in tt struct. We'll use passed in value for now.
  (let* ((conn (tt:client-connect-to-server ttdat dbfname run-id testsuite))) ;; (hash-table-ref/default (tt-conns ttdat) dbfname #f)))
    (if conn
	;; have connection, call the server
	(let* ((res (tt:send-receive ttdat conn cmd run-id params)))
	  ;; res is (status errmsg result meta)
	  (match res
	    ((status errmsg result meta)
	     (if (list? meta)
		 (let* ((delay-wait (alist-ref 'delay-wait meta)))
		   (if (and (number? delay-wait)
			    (> delay-wait 0))
		       (begin
			 (debug:print 0 *default-log-port* "Server is loaded, delaying "delay-wait" seconds")
			 (thread-sleep! delay-wait)))))
	     (case status
	       ((busy) ;; result will be how long the server wants you to delay
		(debug:print 0 *default-log-port* "WARNING: server is overloaded, will try again in "result" seconds.")
		(thread-sleep! (if (number? result) result 2))
		(tt:handler  ttdat cmd run-id params (+ attemptnum 1) area-dat areapath readonly-mode dbfname testsuite mtexe))
	       ((loaded)
		(debug:print 0 *default-log-port* "WARNING: server is loaded, will try again in a 1/4 second.")
		(thread-sleep! 0.25)
		(tt:handler  ttdat cmd run-id params (+ attemptnum 1) area-dat areapath readonly-mode dbfname testsuite mtexe))
	       (else
		result)))
	    (else
	     (if (not res)
		 (let* ((host    (tt-conn-host conn))
			(port    (tt-conn-port conn))
			;; (dbfname (tt-conn-port conn)) ;; 192.168.0.127:4242-726924:4.db
			(pid     (tt-conn-pid  conn))
			(servinf (conc areapath"/.servinfo/"host":"port"-"pid":"dbfname)))
		   (hash-table-set! (tt-conns ttdat) dbfname #f)
		   (if (file-exists? servinf)
		       (begin
			 (debug:print 0 *default-log-port* "INFO: connection to server "host":"port" broken for "dbfname", attempting to remove servinfo file.")
			 (delete-file* servinf))
		       (debug:print 0 *default-log-port* "INFO: connection to server "host":"port" broken for "dbfname", but do not see servinf file "servinf))
		   (tt:handler ttdat cmd run-id params (+ attemptnum 1) area-dat areapath readonly-mode dbfname testsuite mtexe))
		 (assert #f "FATAL: tt:handler received bad data "res)))))
	(begin
	  (thread-sleep! 1) ;; give it a rest and try again
	  (tt:handler ttdat cmd run-id params attemptnum area-dat areapath readonly-mode dbfname testsuite mtexe)))))

	;; no conn yet, find and or start and find a server
;; 	(let* ((server (tt:find-server ttdat dbfname)))
;; 	  (if server
;; 	      (let* ((conn (tt:client-connect-to-server server)))
;; 		(hash-table-set! (tt-conns ttdat) dbfname conn)
;; 		(tt:handler  ttdat cmd run-id params attemptnum area-dat areapath readonly-mode
;; 			     dbfname testsuite mtexe))
;; 	      ;; no server, try to start a server process
;; 	      (begin
;; 		(tt:server-process-run areapath testsuite mtexe run-id) ;;  #!key (profile-mode "")) 
;; 		(thread-sleep! 1)
;; 		(tt:handler  ttdat cmd run-id params attemptnum area-dat areapath
;; 			     readonly-mode dbfname testsuite mtexe)))))))

(define (tt:bid-for-servership run-id)
  #f)

;; gets server info and appends path to server file
;; sorts by age, oldest first
;;
;; returns list of (host port startseconds server-id servinfofile)
;;
(define (tt:get-server-info-sorted ttdat dbfname)
  (let* ((areapath (tt-areapath ttdat))
	 (sfiles   (tt:find-server areapath dbfname))
	 (sdats    (filter car (map tt:server-get-info sfiles))) ;; first element is #f if the file disappeared while being read
	 (sorted   (sort sdats (lambda (a b)
				 (< (list-ref a 2)(list-ref b 2))))))
    sorted))
    
(define (tt:get-current-server-info ttdat dbfname)
  (assert (tt-areapath ttdat) "FATAL: areapath not set in ttdat.")
  ;;
  ;; TODO - replace most of below with tt;get-server-info-sorted
  ;;
  (let* ((areapath (tt-areapath ttdat))
	 (sfiles   (tt:find-server areapath dbfname))
	 (sdats    (filter car (map tt:server-get-info sfiles))) ;; first element is #f if the file disappeared while being read
	 (sorted   (sort sdats (lambda (a b)
				 (< (list-ref a 2)(list-ref b 2))))))
    (if (null? sorted)
	#f  ;; we'll want to wait until extra servers have exited
	(car sorted))))

(define (tt:send-receive ttdat conn cmd run-id params)
  (let* ((host-port (tt-conn-host-port conn)) ;; (conc (tt-conn-host conn)":"(tt-conn-port conn)))
	 (host      (tt-conn-host conn))
	 (port      (tt-conn-port conn))
	 (dat       (list cmd run-id params #f))) ;; no meta data yet
    (tt:send-receive-direct host port dat)))

(define (tt:send-receive-direct host port dat)
  (assert (number? port) "FATAL: tt:send-receive-direct called with port not a number "port)
  (handle-exceptions
      exn
    #f ;; Add condition-case or better handling here
    (let-values (((inp oup)(tcp-connect host port)))
      (let ((res (if (and inp oup)
		     (begin
		       (serialize dat oup)
		       (close-output-port oup)
		       (deserialize inp))
		     )))
	(close-input-port inp)
	res))))



;;======================================================================
;; server
;;======================================================================

(define (tt:sync-dbs ttdat)
  #f)

;; start the listener and start responding to requests
;;
;; NOTE: organise by dbfname, not run-id so we don't need
;;       to pull in more modules
;;
;; This is the routine called in megatest.scm to start a server
;;
;; Server viability is checked in keep-running. Blindly start and run here.
;;
(define (tt:start-server areapath run-id dbfname-in handler keys)
  (assert areapath "FATAL: areapath not provided for tt:start-server")
  ;; is there already a server for this dbfile? Then exit.
  (let* ((ttdat   (make-tt areapath: areapath))
	 (dbfname (or dbfname-in (dbmod:run-id->dbfname run-id))))
    ;; (servers (tt:find-server areapath dbfname))) ;; should use tt:get-current-server-info instead
    ;; (if (null? servers)
    (let* ((dbstruct   (dbmod:open-dbmoddb areapath run-id dbfname (dbfile:db-init-proc) keys)))
      (tt-handler-set! ttdat (handler dbstruct))
      (let* ((tcp-thread (make-thread
			  (lambda ()
			    (tt:start-tcp-server ttdat)) ;; start the tcp-server which applies handler to incoming data
			  "tcp-server-thread"))
	     (run-thread (make-thread
			  (lambda ()
			    (tt:keep-running ttdat dbfname dbstruct)))))
	(thread-start! tcp-thread)
	(thread-start! run-thread)
	(thread-join! run-thread) ;; run thread will exit on timeout or other conditions
	(exit)))
    ;;(begin
    ;; (debug:print 0 *default-log-port* "INFO: found server(s) already running for db "dbfname", "(string-intersperse servers ",")" Exiting.")
    ;; (exit)))))
    ))

(define (tt:keep-running ttdat dbfname dbstruct)
  ;; verfiy conn for ready
  ;; listener socket has been started by this stage
  ;; wait for a port before creating the registration file
  ;;
  (let* ((cleanup (lambda ()
		    (if (tt-cleanup-proc ttdat)
			((tt-cleanup-proc ttdat))))))
    (let loop ((count 0))
      (if (> count 240)
	  (begin
	    (debug:print 0 *default-log-port* "FATAL: Could not start a tcp server, giving up.")
	    (exit 1))
	  (if (not (tt-port ttdat)) ;; no connection yet
	      (begin
		(thread-sleep! 0.25)
		(loop (+ count 1))))))
    
    ;; load or reload the data into inmem db before
    ;; ((dbr:dbstruct-sync-proc dbstruct) (dbr:dbstruct-last-update dbstruct))
    ;; (dbr:dbstruct-last-update-set! dbstruct (- (current-seconds) 1))
    (tt:create-server-registration-file ttdat dbfname)
    ;; now start watching the last-access, if it hasn't been touched
    ;; in over ten seconds we exit
    (thread-sleep! 0.05) ;; any real need for delay here?
    (let loop ()
      (let* ((servers (tt:get-server-info-sorted ttdat dbfname))
	     (ok      (cond
		       ((null? servers) #f) ;; not ok
		       ((equal? (list-ref (car servers) 6) ;; compare the servinfofile
				(tt-servinf-file ttdat))
			(debug:print-info 0 *default-log-port* "Keep running, I'm the top server.")
			#t)
		       (else
			(debug:print-info 0 *default-log-port* "I'm not the lead server: "servers)
			(let* ((leadsrv (car servers)))
			  (match leadsrv
			    ((host port startseconds server-id pid dbfname servinfofile)
			     (if (tt:ping host port server-id)
				 #f ;; not the server, but all good, want to exit
				 (if (and (file-exists? servinfofile)
					  (> (- (current-seconds)(file-modification-time servinfofile)) 5))
				     (begin
				       ;; can't ping and file has been on disk 5 seconds, go ahead and try to remove it
				       (debug:print-info 0 *default-log-port* "Removing apparently dead server info file: "servinfofile)
				       (delete-file* servinfofile)
				       #t) ;; not the server but the server is not reachable
				     #t)))
			    (else ;; should never get here
			     (debug:print 0 *default-log-port* "BAD SERVER RECORD: "leadsrv)
			     (assert #f "Bad server record "leadsrv))))))))
	(if ok
	    ;; (if (> *api-process-request-count* 0) ;; have requests in flight
	    ;;	(tt-last-access-set! ttdat (current-seconds)))
	    (tt-last-access-set! ttdat *db-last-access*) ;; bit silly, just use db-last-access
	    (begin
	      (cleanup)
	      (exit)))

	(let* ((last-update (dbr:dbstruct-last-update dbstruct))
	       (curr-secs   (current-seconds)))
	  (if (> (- curr-secs last-update) 3) ;; every 3-4 seconds update the db? maybe this should be refresh the inmem?
	      (begin
		((dbr:dbstruct-sync-proc dbstruct) last-update)
		(dbr:dbstruct-last-update-set! dbstruct curr-secs))))
	  
	(if (< (- (current-seconds) (tt-last-access ttdat)) 60)
	    (begin
	      (thread-sleep! 5)
	      (loop)))))
    (cleanup)
    (debug:print 0 *default-log-port* "INFO: Server timed out, exiting.")))

  
;; ;; given an already set up uconn start the cmd-loop
;; ;;
;; (define (tt:cmd-loop ttdat)
;;   (let* ((serv-listener (-socket uconn))
;; 	 (listener      (lambda ()
;; 			  (let loop ((state 'start))
;; 			    (let-values (((inp oup)(tcp-accept serv-listener)))
;; 			      ;; (mutex-lock! *send-mutex*) ;; DOESN'T SEEM TO HELP
;; 			      (let* ((rdat  (deserialize inp)) ;; '(my-host-port qrykey cmd params)
;; 				     (resp  (ulex-handler uconn rdat)))
;; 				(serialize resp oup)
;; 				(close-input-port inp)
;; 				(close-output-port oup)
;; 				;; (mutex-unlock! *send-mutex*) ;; DOESN'T SEEM TO HELP
;; 				)
;; 			      (loop state))))))
;;     ;; start N of them
;;     (let loop ((thnum   0)
;; 	       (threads '()))
;;       (if (< thnum 100)
;; 	  (let* ((th (make-thread listener (conc "listener" thnum))))
;; 	    (thread-start! th)
;; 	    (loop (+ thnum 1)
;; 		  (cons th threads)))
;; 	  (map thread-join! threads)))))
;; 
;; 
;; 
;; (define (wait-and-close uconn)
;;   (thread-join! (udat-cmd-thread uconn))
;;   (tcp-close (udat-socket uconn)))
;; 
;; 

(define (tt:shutdown-server ttdat)
  (let* ((cleanproc (tt-cleanup-proc ttdat)))
    (if cleanproc (cleanproc))
    (tcp-close (tt-socket ttdat)) ;; close up ports here
    ))

;; (define (wait-and-close uconn)
;;   (thread-join! (tt-cmd-thread uconn))
;;   (tcp-close (tt-socket uconn)))

;; return servid
;; side-effects:
;;   ttdat-cleanup-proc is populated with function to remove the serverinfo file
(define (tt:create-server-registration-file ttdat dbfname)
  (let* ((areapath (tt-areapath ttdat))
	 (servdir  (tt:get-servinfo-dir areapath))
	 (host     (tt-host ttdat))
	 (port     (tt-port ttdat))
	 (servinf (conc servdir"/"host":"port"-"(current-process-id)":"dbfname))
	 (serv-id (tt:mk-signature areapath))
	 (clean-proc (lambda ()
		       (delete-file* servinf))))
    (assert (and host port) "FATAL: tt:create-server-registration-file called with no conn, dbfname="dbfname)
    (tt-cleanup-proc-set! ttdat clean-proc)
    (tt-servinf-file-set! ttdat servinf)
    (with-output-to-file servinf
      (lambda ()
	(print "SERVER STARTED: "host":"port" AT "(current-seconds)" server-id: "serv-id" pid: "(current-process-id)" dbfname: "dbfname)))
      serv-id))

;; find valid server
;; get servers listed, last part of name must match :<dbfname>
;; if more than one, wait one second and look again
;; future: ping oldest, if alive remove other :<dbfname> files
;;
(define (tt:find-server areapath dbfname)
  (let* ((servdir  (tt:get-servinfo-dir areapath))
	 (sfiles   (glob (conc servdir"/*:"dbfname))))
    sfiles))

;; given a path to a server info file return: host port startseconds server-id pid dbfname logf
;; example of what it's looking for in the log file:
;;     SERVER STARTED: 10.38.175.67:50216 AT 1616502350.0 server-id: 4907e90fc55c7a09694e3f658c639cf4 
;;
(define (tt:server-get-info logf)
  (let ((server-rx    (regexp "^SERVER STARTED: (\\S+):(\\d+) AT ([\\d\\.]+) server-id: (\\S+) pid: (\\d+) dbfname: (\\S+)")) ;; SERVER STARTED: host:port AT timesecs server id
        (dbprep-rx    (regexp "^SERVER: dbprep"))
        (dbprep-found 0)
	(bad-dat      (list #f #f #f #f #f #f logf)))
     (let ((fdat     (handle-exceptions
			 exn
		       (begin
			 ;; WARNING: this is potentially dangerous to blanket ignore the errors
			 (debug:print-info 0 *default-log-port* "Unable to get server info from "logf", exn=" exn)
			 '()) ;; no idea what went wrong, call it a bad server, return empty list
		       (with-input-from-file logf read-lines))))
       (if (null? fdat) ;; bad data, return bad-dat
	   bad-dat
	   (let loop ((inl  (car fdat))
		      (tail (cdr fdat))
		      (lnum 0))
	     (let ((mlst (string-match server-rx inl))
		   (dbprep (string-match dbprep-rx inl)))
	       (if dbprep (set! dbprep-found 1))
	       (if (not mlst)
		   (if (> lnum 500) ;; give up if more than 500 lines of server log read
		       bad-dat
		       (if (null? tail)
			   bad-dat
			   (loop (car tail)(cdr tail)(+ lnum 1))))
		   (match mlst ;; have a not null list
		     ((_ host port start server-id pid dbfname)
		      (list host
			    (string->number port)
			    (string->number start)
			    server-id
			    (string->number pid)
			    dbfname
			    logf))
		     (else
		      (debug:print 0 *default-log-port* "ERROR: did not recognise SERVER line info "mlst)
		      bad-dat)))))))))

;; Given an area path,  start a server process    ### NOTE ### > file 2>&1 
;; if the target-host is set 
;; try running on that host
;;   incidental: rotate logs in logs/ dir.
;;
(define  (tt:server-process-run areapath testsuite mtexe run-id #!key (profile-mode "")) ;; areapath is *toppath* for a given testsuite area
  (assert areapath  "FATAL: tt:server-process-run called without areapath defined.")
  (assert testsuite "FATAL: tt:server-process-run called without testsuite defined.")
  (assert mtexe     "FATAL: tt:server-process-run called without mtexe defined.")
  (let* ((load (get-normalized-cpu-load))
	 (nrun (number-of-processes-running "mtest.*server")))
    (cond
     ((> load 2.0)
      (debug:print 0 *default-log-port* "Normalized load "load" is over the limit of 2.0. Not starting a server.")
      (thread-sleep! 1))
     ((> nrun 40)
      (debug:print 0 *default-log-port* nrun" servers running on this host, not starting another.")
      (thread-sleep! 1))
     (else
      (if (not (file-exists? (conc areapath"/logs")))
	      (create-directory (conc areapath"/logs") #t))
	  (let* ((logfile   (conc areapath "/logs/server.log")) ;; -" curr-pid "-" target-host ".log"))
		 (cmdln     (conc
			     mtexe
			     " -server - ";; (or target-host "-")
			     " -m testsuite:" testsuite
			     ;; " -run-id " (or run-id "main") ;; NO, we do NOT want to have run id as part of this
			     " -db "  (dbmod:run-id->dbfname run-id)
			     " " profile-mode
			     ))) ;; (conc " >> " logfile " 2>&1 &")))))
	    ;; we want the remote server to start in *toppath* so push there
	    ;; (push-directory areapath) ;; use cd in the command line instead
	    (debug:print 0 *default-log-port* "INFO: Trying to start server in tcp mode (" cmdln ") at "(common:human-time)"...")
	    ;; (debug:print 0 *default-log-port* "INFO: starting server at " (common:human-time))
	    (setenv "NBFAKE_QUIET" "yes") ;; BUG: change to with-environment-variable ...
	    (setenv "NBFAKE_LOG" logfile)
	    (system (conc "cd "areapath" ; nbfake " cmdln))
	    (unsetenv "NBFAKE_QUIET")
	    (unsetenv "NBFAKE_LOG")
	    ;;(pop-directory)
	    )))))

;;======================================================================
;; tcp connection stuff
;;======================================================================

;; find a port and start tcp-server. This only starts the tcp portion of
;; the server, look at (tt:start-server ...) above for the entry point
;; for the entire server system
;;
(define (tt:start-tcp-server ttdat)
  (setup-listener ttdat)
  (let* ((socket   (tt-socket  ttdat))
	 (handler  (tt-handler ttdat)))
    ((make-tcp-server socket handler)
     #f ;; yes, send error messages to std-err
     )))

;; create a tcp listener and return a populated udat struct with
;; my port, address, hostname, pid etc.
;; return #f if fail to find a port to allocate.
;;
;;  if udata-in is #f create the record
;;  if there is already a serv-listener return the udata
;;
(define (setup-listener uconn #!optional (port 4242))
  (assert (tt? uconn) "FATAL: setup-listener called with wrong struct "uconn)
  (handle-exceptions
   exn
   (if (< port 65535)
       (setup-listener uconn (+ port 1))
       #f)
   (connect-listener uconn port)))

(define (connect-listener uconn port)
  ;; (tcp-listener-socket LISTENER)(socket-name so)
  ;; sockaddr-address, sockaddr-port, sockaddr->string
  (let* ((tlsn (tcp-listen port 1000 #f)) ;; (tcp-listen TCPPORT [BACKLOG [HOST]])
	 (addr  (tt:get-best-guess-address (get-host-name)))) ;; (get-my-best-address))) ;; (hostinfo-addresses (host-information (current-hostname)))
    (tt-port-set!      uconn port)
    (tt-host-set!      uconn addr)
    (tt-host-port-set! uconn (conc addr":"port))
    (tt-socket-set!    uconn tlsn)
    uconn))

;;======================================================================
;; utils
;;======================================================================

;; Generate a unique signature for this server
(define (tt:mk-signature areapath)
  (message-digest-string (md5-primitive) 
			 (with-output-to-string
			   (lambda ()
			     (write (list areapath
                                          (current-process-id)
					  (argv)))))))


(define (tt:get-best-guess-address hostname)
  (let ((res #f))
    (for-each 
     (lambda (adr)
       (if (not (eq? (u8vector-ref adr 0) 127))
	   (set! res adr)))
     ;; NOTE: This can fail when there is no mention of the host in /etc/hosts. FIXME
     (vector->list (hostinfo-addresses (hostname->hostinfo hostname))))
    (string-intersperse 
     (map number->string
	  (u8vector->list
	   (if res res (hostname->ip hostname)))) ".")))

(define (tt:get-servinfo-dir areapath)
  (let* ((spath (conc areapath"/.servinfo")))
    (if (not (file-exists? spath))
	(create-directory spath #t))
    spath))

;;======================================================================
;; network utilities
;;======================================================================

;; NOTE: Look at address-info egg as alternative to some of this

(define (rate-ip ipaddr)
  (regex-case ipaddr
    ( "^127\\..*" _ 0 )
    ( "^(10\\.0|192\\.168)\\..*" _ 1 )
    ( else 2 ) ))

;; Change this to bias for addresses with a reasonable broadcast value?
;;
(define (ip-pref-less? a b)
  (> (rate-ip a) (rate-ip b)))

(define (get-my-best-address)
  (let ((all-my-addresses (get-all-ips)))
    (cond
     ((null? all-my-addresses)
      (get-host-name))                                          ;; no interfaces?
     ((eq? (length all-my-addresses) 1)
      (car all-my-addresses))                      ;; only one to choose from, just go with it
     (else
      (car (sort all-my-addresses ip-pref-less?))))))

(define (get-all-ips-sorted)
  (sort (get-all-ips) ip-pref-less?))

(define (get-all-ips)
  (map address-info-host
       (filter (lambda (x)
		 (equal? (address-info-type x) "tcp"))
	       (address-infos (get-host-name)))))

)

Added transport-mode.scm.template version [0281a19fe7].







>
>
>
1
2
3
;; 'http or 'tcp
(rmt:transport-mode 'tcp)
;; (rmt:transport-mode 'http)

Modified utils/nbfake from [3dc733e001] to [3e514ddfd2].

37
38
39
40
41
42
43

44
45
46
47
48
49
50
nbfake <command to run>

nbfake behavior can be changed by setting the following env vars:
   NBFAKE_HOST       SSH to \$NBFAKE_HOST and run command
   NBFAKE_LOG        Logfile for nbfake output
   NB_WASH_GROUPS    comma-separated list of groups to wash into
   NB_WASH_ENABLED   must be set in order to enable wash groups


__EOF
  exit
fi

#==============================================================================
# Setup







>







37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
nbfake <command to run>

nbfake behavior can be changed by setting the following env vars:
   NBFAKE_HOST       SSH to \$NBFAKE_HOST and run command
   NBFAKE_LOG        Logfile for nbfake output
   NB_WASH_GROUPS    comma-separated list of groups to wash into
   NB_WASH_ENABLED   must be set in order to enable wash groups
   NBFAKE_QUIET      set to suppress informational output

__EOF
  exit
fi

#==============================================================================
# Setup
85
86
87
88
89
90
91

92
93
94
95
96
97

98
99
100
101
102
103
104
105
   WASHCMD="wash -q -n $grouplist -X"
fi

#==============================================================================
# Run and log
#==============================================================================


cat <<__EOF >&2
#======================================================================
# NBFAKE logging command to: $MY_NBFAKE_LOG
#     $WASHCMD $*
#======================================================================
__EOF


if [[ -z "$MY_NBFAKE_HOST" ]]; then
  # Run locally
  sh -c "cd $CURRWD;export DISPLAY=$DISPLAY; export PATH=\"$PATH\"; nohup $WASHCMD $* >> $MY_NBFAKE_LOG 2>&1 &"
else
  # run remotely
  ssh -X -n -f $MY_NBFAKE_HOST "sh -c \"cd $CURRWD;export DISPLAY=$DISPLAY; export PATH=\"$PATH\"; nohup $WASHCMD $* >> $MY_NBFAKE_LOG 2>&1 &\""
fi







>






>








86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
   WASHCMD="wash -q -n $grouplist -X"
fi

#==============================================================================
# Run and log
#==============================================================================

if [[ -z "$NBFAKE_QUIET" ]];then
cat <<__EOF >&2
#======================================================================
# NBFAKE logging command to: $MY_NBFAKE_LOG
#     $WASHCMD $*
#======================================================================
__EOF
fi

if [[ -z "$MY_NBFAKE_HOST" ]]; then
  # Run locally
  sh -c "cd $CURRWD;export DISPLAY=$DISPLAY; export PATH=\"$PATH\"; nohup $WASHCMD $* >> $MY_NBFAKE_LOG 2>&1 &"
else
  # run remotely
  ssh -X -n -f $MY_NBFAKE_HOST "sh -c \"cd $CURRWD;export DISPLAY=$DISPLAY; export PATH=\"$PATH\"; nohup $WASHCMD $* >> $MY_NBFAKE_LOG 2>&1 &\""
fi