Megatest

Check-in [900e9ce98b]
Login
Overview
Comment:consolidated tmp dir name functions to common:make-tmpdir-name. Adjusted server start delays and debug messages
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | v1.80
Files: files | file ages | folders
SHA1: 900e9ce98b701c6e44a48e71fc1d9f534d310847
User & Date: mmgraham on 2023-10-19 14:58:49
Other Links: branch diff | manifest | tags
Context
2023-10-19
15:39
covered case where megatest.sh does not exist check-in: 4f1a1fc90c user: mmgraham tags: v1.80
14:58
consolidated tmp dir name functions to common:make-tmpdir-name. Adjusted server start delays and debug messages check-in: 900e9ce98b user: mmgraham tags: v1.80
2023-10-17
19:50
Made the /tmp db location consistent with previous versions, made -kill-servers remove no-sync.db, adjusted debug messages check-in: ccef2ac967 user: mmgraham tags: v1.80
Changes

Modified common.scm from [b817fc7f9a] to [c00500b3f7].

19
20
21
22
23
24
25

26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43







44
45
46
47
48
49
50
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58







+


















+
+
+
+
+
+
+







;;======================================================================

(declare (unit common))
(declare (uses commonmod))
(declare (uses rmtmod))
(declare (uses debugprint))
(declare (uses mtargs))
        

(use srfi-1 data-structures posix regex-case (prefix base64 base64:)
     format dot-locking csv-xml z3 udp ;; sql-de-lite
     hostinfo md5 message-digest typed-records directory-utils stack
     matchable regex posix (srfi 18) extras ;; tcp 
     (prefix nanomsg nmsg:)
     (prefix sqlite3 sqlite3:)
     pkts (prefix dbi dbi:)
     )
(use posix-extras pathname-expand files)


(import commonmod
	debugprint
	rmtmod
	(prefix mtargs args:))

(include "common_records.scm")

(define (common:make-tmpdir-name areapath tmpadj)
  (let* ((area (pathname-file areapath))
         (dname (conc "/tmp/"(current-user-name)"/megatest_localdb/" area "/" (string-translate areapath "/" ".") tmpadj "/.mtdb")))
    (unless (directory-exists? dname)
      (create-directory dname #t))
    dname))

(define (remove-files filespec)
  (let ((files (glob filespec)))
    (for-each delete-file files)))

(define (stop-the-train)
  (thread-start! (make-thread (lambda ()
245
246
247
248
249
250
251
252

253
254
255
256
257
258
259
253
254
255
256
257
258
259

260
261
262
263
264
265
266
267







-
+







;;======================================================================

(define *common:this-exe-fullpath* (common:get-this-exe-fullpath))
(define *common:this-exe-dir*      (pathname-directory *common:this-exe-fullpath*))
(define *common:this-exe-name*     (pathname-strip-directory *common:this-exe-fullpath*))

(define (common:get-sync-lock-filepath)
  (let* ((tmp-area     (dbfile:make-tmpdir-name *toppath* ""))
  (let* ((tmp-area     (common:make-tmpdir-name *toppath* ""))
         (lockfile     (conc tmp-area "/megatest.db.lock")))
    lockfile))

(define *common:logpro-exit-code->status-sym-alist*
  '( ( 0 . pass )
     ( 1 . fail )
     ( 2 . warn )
1531
1532
1533
1534
1535
1536
1537
1538

1539
1540
1541
1542
1543
1544
1545
1539
1540
1541
1542
1543
1544
1545

1546
1547
1548
1549
1550
1551
1552
1553







-
+







;;======================================================================
;; lazy-safe get file mod time. on any error (file not existing etc.) return 0
;;
(define (common:lazy-modification-time fpath)
  (handle-exceptions
      exn
    (begin
      (debug:print 0 *default-log-port* "Failed to get modifcation time for " fpath ", treating it as zero. exn=" exn)
      (debug:print 2 *default-log-port* "Failed to get modification time for " fpath ", treating it as zero. exn=" exn)
      0)
    (if (file-exists? fpath)
	(file-modification-time fpath)
	0)))

;;======================================================================
;; find timestamp of newest file associated with a sqlite db file
2278
2279
2280
2281
2282
2283
2284
2285

2286
2287
2288
2289
2290
2291
2292
2286
2287
2288
2289
2290
2291
2292

2293
2294
2295
2296
2297
2298
2299
2300







-
+







;; returns: ok/not dbspace required-space
;;
(define (common:check-db-dir-space)
  (let* ((required (string->number 
                    ;; default is 1GB (or actually a billion bytes) This is the number of 1 kB blocks.
		    (or (configf:lookup *configdat* "setup" "dbdir-space-required")
			"1000000")))
	 (dbdir    (dbfile:make-tmpdir-name *toppath* "")) ;; (db:get-dbdir))
	 (dbdir    (common:make-tmpdir-name *toppath* "")) ;; (db:get-dbdir))
	 (tdbspace (common:check-space-in-dir dbdir required))
	 (mdbspace (common:check-space-in-dir *toppath* required)))
    (sort (list tdbspace mdbspace) (lambda (a b)
				     (< (cadr a)(cadr b))))))

;;======================================================================
;; check available space in dbdir, exit if insufficient

Modified dashboard-tests.scm from [d3d14d0eb8] to [63a55f86f7].

461
462
463
464
465
466
467
468

469
470
471
472
473
474
475
461
462
463
464
465
466
467

468
469
470
471
472
473
474
475







-
+







    dlog))


;;======================================================================
;;
;;======================================================================
(define (dashboard-tests:examine-test run-id test-id) ;; run-id run-key origtest)
  (let* ((db-path       (db:dbfile-path)) ;; (conc (configf:lookup *configdat* "setup" "linktree") "/db/" run-id ".db"))
  (let* ((db-path       (common:make-tmpdir-name *toppath* "")) ;; (conc (configf:lookup *configdat* "setup" "linktree") "/db/" run-id ".db"))
	 (dbstruct      #f) ;; NOT USED
	 (testdat        (rmt:get-test-info-by-id run-id test-id)) ;; (db:get-test-info-by-id dbstruct run-id test-id))
	 (db-mod-time   0) ;; (file-modification-time db-path))
	 (last-update   0) ;; (current-seconds))
	 (request-update #t))
    (if (not testdat)
	(begin

Modified dashboard.scm from [b1dc6c475c] to [92015a98e3].

400
401
402
403
404
405
406
407
408


409
410
411
412
413
414
415
400
401
402
403
404
405
406


407
408
409
410
411
412
413
414
415







-
-
+
+







(define (dboard:tabdat-make-data)
  (let ((dat (make-dboard:tabdat)))
    (dboard:setup-tabdat dat)
    (dboard:setup-num-rows dat)
    dat))

(define (dboard:setup-tabdat tabdat)
  (dboard:tabdat-dbdir-set! tabdat (dbfile:make-tmpdir-name *toppath* "")) ;; (conc (configf:lookup *configdat* "setup" "linktree") "/.db"))
  (dboard:tabdat-dbfpath-set! tabdat (dbfile:make-tmpdir-name *toppath* ""))
  (dboard:tabdat-dbdir-set! tabdat (common:make-tmpdir-name *toppath* "")) ;; (conc (configf:lookup *configdat* "setup" "linktree") "/.db"))
  (dboard:tabdat-dbfpath-set! tabdat (common:make-tmpdir-name *toppath* ""))
  (dboard:tabdat-monitor-db-path-set! tabdat (conc (dboard:tabdat-dbdir tabdat) "/monitor.db"))


  ;; HACK ALERT: this is a hack, please fix.
  (dboard:tabdat-ro-set! tabdat (not (file-read-access? (dboard:tabdat-dbfpath tabdat))))
  (dboard:tabdat-keys-set! tabdat (rmt:get-keys))
  (dboard:tabdat-dbkeys-set! tabdat (append (dboard:tabdat-keys tabdat) (list "runname")))
926
927
928
929
930
931
932
933

934
935
936
937
938

939
940
941
942
943
944
945
926
927
928
929
930
931
932

933
934
935
936
937

938
939
940
941
942
943
944
945







-
+




-
+







	      (if (null? all-test-ids)
		  (hash-table-delete! (dboard:tabdat-allruns-by-id tabdat) run-id)
		  (hash-table-set!    (dboard:tabdat-allruns-by-id tabdat) run-id run-struct))
	      (if (or (null? tal)
		      (> elapsed-time 2)) ;; stop loading data after 5 seconds, on the next call more data *should* be loaded since get-tests-for-run uses last update
		  (begin
		    (when (> elapsed-time 2)   
                      (debug:print 0 *default-log-port* "NOTE: updates are taking a long time, " elapsed-time "s elapsed.")
                      (debug:print 2 *default-log-port* "NOTE: updates are taking a long time, " elapsed-time "s elapsed.")
                      (let* ((old-val (iup:attribute *tim* "TIME"))
                             (new-val (number->string (inexact->exact (floor (* 2  (string->number old-val)))))))
                        (if (< (string->number new-val) 5000)
                            (begin
			      (debug:print 0 *default-log-port* "NOTE: increasing poll interval from "old-val" to "new-val)
			      (debug:print 2 *default-log-port* "NOTE: increasing poll interval from "old-val" to "new-val)
			      (iup:attribute-set! *tim* "TIME" new-val)))))
		    (dboard:tabdat-allruns-set! tabdat new-res)
		    maxtests)
		  (if (> (dboard:rundat-run-data-offset run-struct) 0)
		      (loop run tal new-res newmaxtests) ;; not done getting data for this run
		      (loop (car tal)(cdr tal) new-res newmaxtests)))))))
    (dboard:tabdat-filters-changed-set! tabdat #f)

Modified db.scm from [6d82fac5fa] to [b1837f1312].

131
132
133
134
135
136
137
138

139
140
141
142
143
144
145
131
132
133
134
135
136
137

138
139
140
141
142
143
144
145







-
+







	   (debug:print-error 0 *default-log-port* " query " stmt " failed, params: " params ", error: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn)
	   (print-call-chain (current-error-port))
	   default)))
   (apply sqlite3:first-result db stmt params)))

(define (db:setup do-sync)
  (assert *toppath* "FATAL: db:setup called before launch:setup has been run.")
  (let* ((tmpdir (dbfile:make-tmpdir-name *toppath* "")))
  (let* ((tmpdir (common:make-tmpdir-name *toppath* "")))
    (if (not *dbstruct-dbs*)
	(dbfile:setup do-sync *toppath* tmpdir)
	*dbstruct-dbs*)))

;; moved from dbfile
;;
;; ADD run-id SUPPORT
458
459
460
461
462
463
464
465

466
467
468
469
470
471
472
458
459
460
461
462
463
464

465
466
467
468
469
470
471
472







-
+







    (max (get-mtime fname)
	 (get-mtime wal-file)
	 (get-mtime shm-file))))
	 
;; (define (db:all-db-sync dbstruct)
;;   (let* ((dbdat (db:open-db dbstruct #f db:initialize-main-db))
;; 	 (data-synced       0) ;; count of changed records
;;     (tmp-area       (dbfile:make-tmpdir-name *toppath*))
;;     (tmp-area       (common:make-tmpdir-name *toppath*))
;;     (dbfiles        (glob (conc tmp-area"/.mtdb/*.db")))
;;     (sync-durations (make-hash-table))
;;     (no-sync-db        (db:open-no-sync-db)))
;;     (for-each
;;      (lambda (file) ;; tmp db file
;;        (debug:print-info 3 *default-log-port* "file: " file)
;;        (let* ((fname       (conc (pathname-file file) ".db")) ;; fname is tmp db file
547
548
549
550
551
552
553
554

555
556
557
558
559
560
561
547
548
549
550
551
552
553

554
555
556
557
558
559
560
561







-
+







;;  'closeall     - close all opened dbs
;;  'schema       - attempt to apply schema changes
;;  run-ids: '(1 2 3 ...) or #f (for all)
;;
(define (db:multi-db-sync dbstruct . options)
  (let* (;; (dbdat       (db:open-db dbstruct #f dbfile:db-init-proc))
	 (data-synced 0) ;; count of changed records
	 (tmp-area       (dbfile:make-tmpdir-name *toppath* ""))
	 (tmp-area       (common:make-tmpdir-name *toppath* ""))
	 (old2new (member 'old2new options))
	 (dejunk (member 'dejunk options))
	 (killservers (member 'killservers options))
	 (src-area (if old2new *toppath* tmp-area))
	 (dest-area (if old2new tmp-area *toppath*))
	 (dbfiles        (if old2new (glob (conc *toppath* "/.mtdb/*.db"))
			     (glob (conc tmp-area "/.mtdb/*.db"))))
1245
1246
1247
1248
1249
1250
1251
1252

1253
1254
1255
1256
1257
1258
1259
1245
1246
1247
1248
1249
1250
1251

1252
1253
1254
1255
1256
1257
1258
1259







-
+








;;======================================================================
;; no-sync.db - small bits of data to be shared between servers
;;======================================================================

(define (db:get-dbsync-path)
  (case (rmt:transport-mode)
    ((http)(dbfile:make-tmpdir-name *toppath* ""))
    ((http)(common:make-tmpdir-name *toppath* ""))
    ((tcp) (conc *toppath*"/.mtdb"))
    ((nfs) (conc *toppath*"/.mtdb"))
    (else "/tmp/dunno-this-gonna-exist")))

;; This is needed for api.scm
(define (db:open-no-sync-db)
   (dbfile:open-no-sync-db (db:get-dbsync-path)))
1571
1572
1573
1574
1575
1576
1577
1578

1579
1580
1581
1582
1583
1584
1585
1571
1572
1573
1574
1575
1576
1577

1578
1579
1580
1581
1582
1583
1584
1585







-
+







    res))

;; TODO: Switch this to use max(update_time) from each run db? Then if using a server there is no disk traffic (using cachedb db) ???
;;
;; NOTE: This DOESN'T (necessarily) get the real run ids, but the number of the <number>.db!!

(define (db:get-changed-run-ids since-time)
  (let* ((dbdir      (dbfile:make-tmpdir-name *toppath* "")) ;; (configf:lookup *configdat* "setup" "dbdir"))
  (let* ((dbdir      (common:make-tmpdir-name *toppath* "")) ;; (configf:lookup *configdat* "setup" "dbdir"))
	 (alldbs     (glob (conc *toppath* "/.mtdb/[0-9]*.db*")))
	 (changed    (filter (lambda (dbfile)
			       (> (file-modification-time dbfile) since-time))
			     alldbs)))
    (delete-duplicates
     (map (lambda (dbfile)
	    (let* ((res (string-match ".*\\/(\\d+)\\.db*" dbfile)))
4305
4306
4307
4308
4309
4310
4311
4312

4313
4314
4315
4316
4317
4318
4319
4305
4306
4307
4308
4309
4310
4311

4312
4313
4314
4315
4316
4317
4318
4319







-
+







          (debug:print 0 *default-log-port* "could not get lock for " from-db " from no-sync-db")
	  #f
        ))))

;; sync for filesystem local db writes
;;
(define (db:run-lock-and-sync no-sync-db)
  (let* ((tmp-area       (dbfile:make-tmpdir-name *toppath* ""))
  (let* ((tmp-area       (common:make-tmpdir-name *toppath* ""))
	 (dbfiles        (glob (conc tmp-area"/.mtdb/*.db")))
	 (sync-durations (make-hash-table)))
    ;; (debug:print-info 0 *default-log-port* "lock-and-sync, dbfiles: "dbfiles)
    (for-each
     (lambda (file)
       (let* ((fname (conc (pathname-file file) ".db"))
	      (fulln (conc *toppath*"/.mtdb/"fname))
4361
4362
4363
4364
4365
4366
4367
4368

4369
4370
4371
4372
4373
4374
4375
4361
4362
4363
4364
4365
4366
4367

4368
4369
4370
4371
4372
4373
4374
4375







-
+







  (thread-sleep! 0.05) ;; delay for startup
  (let ((legacy-sync        (common:run-sync?))
	(sync-stale-seconds (configf:lookup-number *configdat* "server" "sync-stale-seconds" default: 300))
 	(debug-mode         (debug:debug-mode 1))
 	(last-time          (current-seconds))     ;; last time through the sync loop
 	(no-sync-db         (db:open-no-sync-db))
 	(sync-duration      0)  ;; run time of the sync in milliseconds
	(tmp-area           (dbfile:make-tmpdir-name *toppath* "")))
	(tmp-area           (common:make-tmpdir-name *toppath* "")))
    ;; Sync moved to http-transport keep-running loop
    (debug:print-info 2 *default-log-port* "Periodic copy-based sync thread started. syncer is copy-sync, tmp-area is " tmp-area)
    (debug:print-info 3 *default-log-port* "watchdog starting. syncer is copy-sync pid="(current-process-id));;  " this-wd-num="this-wd-num)
    
    (if (and legacy-sync (not *time-to-exit*))
 	(begin
 	  (debug:print-info 0 *default-log-port* "Server running, periodic copy-based sync started.")
4469
4470
4471
4472
4473
4474
4475
4476

4477
4478
4479
4480
4481
4482
4483
4469
4470
4471
4472
4473
4474
4475

4476
4477
4478
4479
4480
4481
4482
4483







-
+







 		      ;;
 		     
 		       (for-each
 			(lambda (subdb)
 			  (let* (;;(dbstruct (db:setup))
 				 (mtdb       (dbr:subdb-mtdb subdb))
 				 (mtpath     (db:dbdat-get-path mtdb))
 				 (tmp-area   (dbfile:make-tmpdir-name *toppath* ""))
 				 (tmp-area   (common:make-tmpdir-name *toppath* ""))
 				 (res        (db:sync-to-megatest.db dbstruct no-sync-db: no-sync-db))) ;; did we sync any data? If so need to set the db touched flag to keep the server alive
 			    (set! sync-duration (- (current-milliseconds) sync-start))
 			    (if (> res 0) ;; some records were transferred, keep the db alive
 				(begin
 				  (mutex-lock! *heartbeat-mutex*)
 				  (set! *db-last-access* (current-seconds))
 				  (mutex-unlock! *heartbeat-mutex*)

Modified dbmod.scm from [1d31a00395] to [9f0ce614a3].

196
197
198
199
200
201
202
203

204
205
206
207
208
209
210
196
197
198
199
200
201
202

203
204
205
206
207
208
209
210







-
+







			    (tmpadj  "")       ;; add to tmp path
			    (syncdir 'todisk)) ;; todisk is used when caching in /tmp and writing data back to MTRAH
  (let* ((dbstruct     (or dbstruct-in (make-dbr:dbstruct areapath: areapath)))
	 (dbfname      (or dbfname-in (dbmod:run-id->dbfname run-id)))
	 (dbpath       (dbmod:get-dbdir dbstruct))             ;; directory where all the .db files are kept
	 (dbfullname   (conc dbpath"/"dbfname)) ;; (dbmod:run-id->full-dbfname dbstruct run-id))
	 (dbexists     (file-exists? dbfullname))
	 (tmpdir       (dbfile:make-tmpdir-name areapath tmpadj))
	 (tmpdir       (common:make-tmpdir-name areapath tmpadj))
	 (tmpdb        (let* ((fname (conc tmpdir"/"dbfname)))
			 fname))
	 (cachedb        (dbmod:open-cachedb-db init-proc
					    ;; (if (eq? (dbfile:cache-method) 'cachedb)
					    ;; 	#f
					    tmpdb
					    ;; )

Modified megatest.scm from [db5cb1955c] to [af8974dd23].

1091
1092
1093
1094
1095
1096
1097

1098


1099
1100
1101
1102
1103
1104
1105
1091
1092
1093
1094
1095
1096
1097
1098

1099
1100
1101
1102
1103
1104
1105
1106
1107







+
-
+
+







              sfiles
            )
          )
       )
       dbfiles
     )
     ;; remove this db, because otherwise metadata contains records for old servers, and this causes a problem with db:no-sync-get-lock-with-id.
     (if (file-exists? (conc *toppath* "/.mtdb/no-sync.db"))
     (delete-file (conc *toppath* "/.mtdb/no-sync.db"))
       (delete-file (conc *toppath* "/.mtdb/no-sync.db"))
     )
     (set! *didsomething* #t)
     (exit)  
  )
)

;;======================================================================
;; Weird special calls that need to run *after* the server has started?
2132
2133
2134
2135
2136
2137
2138
2139

2140
2141

2142
2143
2144
2145
2146
2147
2148
2134
2135
2136
2137
2138
2139
2140

2141
2142

2143
2144
2145
2146
2147
2148
2149
2150







-
+

-
+







             (begin 
             (debug:print-info 1 *default-log-port* "Missing required argument -source <archive path>")
             (exit 1)))
         (if (common:file-exists? (conc  *toppath* "/megatest.db"))
             (begin  
               (debug:print-info 1 *default-log-port* "File " (conc  *toppath* "/megatest.db") " already exists. Please remove it before trying to replicate db")
               (exit 1)))
         (if (and (dbfile:make-tmpdir-name *toppath* "") (> (length (directory   (dbfile:make-tmpdir-name *toppath* "") #f)) 0))
         (if (and (common:make-tmpdir-name *toppath* "") (> (length (directory   (common:make-tmpdir-name *toppath* "") #f)) 0))
           (begin
           (debug:print-info 1 *default-log-port* (dbfile:make-tmpdir-name *toppath* "") " not empty. Please remove it before trying to replicate db")
           (debug:print-info 1 *default-log-port* (common:make-tmpdir-name *toppath* "") " not empty. Please remove it before trying to replicate db")
           (exit 1)))    
          ;; check if timestamp 
          (let* ((source (args:get-arg "-source"))
                (src     (if (not (equal? (substring source 0 1) "/"))
                             (conc (current-directory) "/" source)
                             source))
                (ts (if (args:get-arg "-time-stamp")   (args:get-arg "-time-stamp") "latest")))

Modified rmt.scm from [64f3d622e8] to [564930aec3].

165
166
167
168
169
170
171
172

173
174
175
176
177
178
179
165
166
167
168
169
170
171

172
173
174
175
176
177
178
179







-
+







				 (cons 'none 0))
			     (loop (car tal)(cdr tal) newmax-cmd currmax)))))))
    (mutex-unlock! *db-stats-mutex*)
    res))

(define (rmt:open-qry-close-locally cmd run-id params #!key (remretries 5))
  (let* ((qry-is-write    (not (member cmd api:read-only-queries)))
	 (db-file-path    (db:dbfile-path)) ;;  0))
	 (db-file-path    (common:make-tmpdir-name *toppath* "")) ;;  0))
	 (dbstructs-local (db:setup #t))
	 (read-only       (not (file-write-access? db-file-path)))
	 (start           (current-milliseconds))
	 (resdat          (if (not (and read-only qry-is-write))
			      (let ((v (api:execute-requests dbstructs-local (vector (symbol->string cmd) params))))
			;;	(handle-exceptions ;; there has been a long history of receiving strange errors from values returned by the client when things go wrong..
			;;	 exn               ;;  This is an attempt to detect that situation and recover gracefully

Modified tasks.scm from [4adbc308eb] to [93c938d59a].

82
83
84
85
86
87
88
89

90
91
92
93
94
95
96
82
83
84
85
86
87
88

89
90
91
92
93
94
95
96







-
+







	     (debug:print 5 *default-log-port* " exn=" (condition->list exn))
	     (thread-sleep! 1)
	     (tasks:open-db numretries (- numretries 1)))
	   (begin
	     (print-call-chain (current-error-port))
	     (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
	     (debug:print 5 *default-log-port* " exn=" (condition->list exn))))
       (let* ((dbpath        (db:dbfile-path )) ;; (tasks:get-task-db-path))
       (let* ((dbpath        (common:make-tmpdir-name *toppath* "")) ;; (tasks:get-task-db-path))
	      (dbfile       (conc dbpath "/monitor.db"))
	      (avail        (tasks:wait-on-journal dbpath 10)) ;; wait up to about 10 seconds for the journal to go away
	      (exists       (common:file-exists? dbpath))
	      (write-access (file-write-access? dbpath))
	      (mdb          (cond ;; what the hek is *toppath* doing here?
			     ((and (string? *toppath*)(file-write-access? *toppath*))
			      (sqlite3:open-database dbfile))

Modified tcp-transportmod.scm from [1391b0d841] to [04adce729b].

169
170
171
172
173
174
175
176

177
178
179
180
181
182
183
184

185
186
187
188
189

190
191

192
193



194
195

196
197
198
199
200
201
202
169
170
171
172
173
174
175

176
177
178
179
180
181
182
183

184
185
186
187
188

189
190

191
192

193
194
195
196

197
198
199
200
201
202
203
204







-
+







-
+




-
+

-
+

-
+
+
+

-
+







		 (case ping-res
		   ((running)
                    (debug:print-info 2 *default-log-port* "Setting conn = " conn " in hash table")
		    (hash-table-set! (tt-conns ttdat) dbfname conn) ;;; is this ok to save before validating that the connection is good?
		    conn)
		   ((starting)
		    (thread-sleep! 0.5)
                    (debug:print-info 2 *default-log-port* "server was in starting state, retrying connect")
                    (debug:print-info 0 *default-log-port* "server for " dbfname " is in starting state, retrying connect")
		    (tt:client-connect-to-server ttdat dbfname run-id testsuite))
		   (else
		    (let* ((curr-secs (current-seconds)))
		      ;; rm the (last server) would go here
		      (if (> (- curr-secs (tt-last-serv-start ttdat)) 10)
			  (begin
			    (tt-last-serv-start-set! ttdat curr-secs)
			    (server-start-proc))) ;; start server if 30 sec since last attempt
			    (server-start-proc))) ;; start server if 10 sec since last attempt
		      (thread-sleep! 1)
                      (debug:print-info 2 *default-log-port* "server ping result was neither running nor starting. Retrying connect")
		      (tt:client-connect-to-server ttdat dbfname run-id testsuite)))))))
	    (else ;; no good server found, if haven't started server in > 5 secs, start another
	     (if (> (- (current-seconds) (tt-last-serv-start ttdat)) 5) ;; BUG - grow this number really do not want to swamp the machine with servers
	     (if (> (- (current-seconds) (tt-last-serv-start ttdat)) 3) ;; BUG - grow this number really do not want to swamp the machine with servers
		 (begin
		   (debug:print-info 0 *default-log-port* "No server found. Starting one for run-id "run-id" in dbfile "dbfname)
		   (debug:print-info 0 *default-log-port* "Starting server for "dbfname)
		   (server-start-proc)
		   (tt-last-serv-start-set! ttdat (current-seconds))))
		   (tt-last-serv-start-set! ttdat (current-seconds))
                   (thread-sleep! 3)
                   ))
	     (thread-sleep! 1)
             (debug:print-info 2 *default-log-port* "no good server found, try connect again")
             (debug:print-info 0 *default-log-port* "Connect to server for " dbfname)
	     (tt:client-connect-to-server ttdat dbfname run-id testsuite)))))))

(define (tt:timed-ping host port server-id)
  (let* ((start-time (current-milliseconds))
	 (result     (tt:ping host port server-id)))
    (cons result (- (current-milliseconds) start-time))))
    
793
794
795
796
797
798
799
800

801
802
803
804
805
806
807
795
796
797
798
799
800
801

802
803
804
805
806
807
808
809







-
+







			     " -server - ";; (or target-host "-")
			     " -m testsuite:"testsuite
			     " -db "dbfname ;; (dbmod:run-id->dbfname run-id)
			     " " profile-mode
			     (conc " >> " logfile " 2>&1 &"))))
	    ;; we want the remote server to start in *toppath* so push there
	    ;; (push-directory areapath) ;; use cd in the command line instead
	    (debug:print 0 *default-log-port* "INFO: Trying to start server in tcp mode (" cmdln ") at "(common:human-time)" for "areapath)
	    (debug:print 2 *default-log-port* "INFO: Trying to start server in tcp mode (" cmdln ") at "(common:human-time)" for "areapath)
	    ;; (debug:print 0 *default-log-port* "INFO: starting server at " (common:human-time))

	    (system cmdln)
	    ;; ;; use below to go back to nbfake - nbfake does cause trouble ...
	    ;; (setenv "NBFAKE_QUIET" "yes") ;; BUG: change to with-environment-variable ...
	    ;; (setenv "NBFAKE_LOG" logfile)
	    ;; (system (conc "cd "areapath" ; nbfake " cmdln))