Megatest

Check-in [900e9ce98b]
Login
Overview
Comment:consolidated tmp dir name functions to common:make-tmpdir-name. Adjusted server start delays and debug messages
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | v1.80
Files: files | file ages | folders
SHA1: 900e9ce98b701c6e44a48e71fc1d9f534d310847
User & Date: mmgraham on 2023-10-19 14:58:49
Other Links: branch diff | manifest | tags
Context
2023-10-19
15:39
covered case where megatest.sh does not exist check-in: 4f1a1fc90c user: mmgraham tags: v1.80
14:58
consolidated tmp dir name functions to common:make-tmpdir-name. Adjusted server start delays and debug messages check-in: 900e9ce98b user: mmgraham tags: v1.80
2023-10-17
19:50
Made the /tmp db location consistent with previous versions, made -kill-servers remove no-sync.db, adjusted debug messages check-in: ccef2ac967 user: mmgraham tags: v1.80
Changes

Modified common.scm from [b817fc7f9a] to [c00500b3f7].

19
20
21
22
23
24
25

26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43







44
45
46
47
48
49
50
;;======================================================================

(declare (unit common))
(declare (uses commonmod))
(declare (uses rmtmod))
(declare (uses debugprint))
(declare (uses mtargs))


(use srfi-1 data-structures posix regex-case (prefix base64 base64:)
     format dot-locking csv-xml z3 udp ;; sql-de-lite
     hostinfo md5 message-digest typed-records directory-utils stack
     matchable regex posix (srfi 18) extras ;; tcp 
     (prefix nanomsg nmsg:)
     (prefix sqlite3 sqlite3:)
     pkts (prefix dbi dbi:)
     )
(use posix-extras pathname-expand files)


(import commonmod
	debugprint
	rmtmod
	(prefix mtargs args:))

(include "common_records.scm")








(define (remove-files filespec)
  (let ((files (glob filespec)))
    (for-each delete-file files)))

(define (stop-the-train)
  (thread-start! (make-thread (lambda ()







>


















>
>
>
>
>
>
>







19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
;;======================================================================

(declare (unit common))
(declare (uses commonmod))
(declare (uses rmtmod))
(declare (uses debugprint))
(declare (uses mtargs))
        

(use srfi-1 data-structures posix regex-case (prefix base64 base64:)
     format dot-locking csv-xml z3 udp ;; sql-de-lite
     hostinfo md5 message-digest typed-records directory-utils stack
     matchable regex posix (srfi 18) extras ;; tcp 
     (prefix nanomsg nmsg:)
     (prefix sqlite3 sqlite3:)
     pkts (prefix dbi dbi:)
     )
(use posix-extras pathname-expand files)


(import commonmod
	debugprint
	rmtmod
	(prefix mtargs args:))

(include "common_records.scm")

(define (common:make-tmpdir-name areapath tmpadj)
  (let* ((area (pathname-file areapath))
         (dname (conc "/tmp/"(current-user-name)"/megatest_localdb/" area "/" (string-translate areapath "/" ".") tmpadj "/.mtdb")))
    (unless (directory-exists? dname)
      (create-directory dname #t))
    dname))

(define (remove-files filespec)
  (let ((files (glob filespec)))
    (for-each delete-file files)))

(define (stop-the-train)
  (thread-start! (make-thread (lambda ()
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
;;======================================================================

(define *common:this-exe-fullpath* (common:get-this-exe-fullpath))
(define *common:this-exe-dir*      (pathname-directory *common:this-exe-fullpath*))
(define *common:this-exe-name*     (pathname-strip-directory *common:this-exe-fullpath*))

(define (common:get-sync-lock-filepath)
  (let* ((tmp-area     (dbfile:make-tmpdir-name *toppath* ""))
         (lockfile     (conc tmp-area "/megatest.db.lock")))
    lockfile))

(define *common:logpro-exit-code->status-sym-alist*
  '( ( 0 . pass )
     ( 1 . fail )
     ( 2 . warn )







|







253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
;;======================================================================

(define *common:this-exe-fullpath* (common:get-this-exe-fullpath))
(define *common:this-exe-dir*      (pathname-directory *common:this-exe-fullpath*))
(define *common:this-exe-name*     (pathname-strip-directory *common:this-exe-fullpath*))

(define (common:get-sync-lock-filepath)
  (let* ((tmp-area     (common:make-tmpdir-name *toppath* ""))
         (lockfile     (conc tmp-area "/megatest.db.lock")))
    lockfile))

(define *common:logpro-exit-code->status-sym-alist*
  '( ( 0 . pass )
     ( 1 . fail )
     ( 2 . warn )
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
;;======================================================================
;; lazy-safe get file mod time. on any error (file not existing etc.) return 0
;;
(define (common:lazy-modification-time fpath)
  (handle-exceptions
      exn
    (begin
      (debug:print 0 *default-log-port* "Failed to get modifcation time for " fpath ", treating it as zero. exn=" exn)
      0)
    (if (file-exists? fpath)
	(file-modification-time fpath)
	0)))

;;======================================================================
;; find timestamp of newest file associated with a sqlite db file







|







1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
;;======================================================================
;; lazy-safe get file mod time. on any error (file not existing etc.) return 0
;;
(define (common:lazy-modification-time fpath)
  (handle-exceptions
      exn
    (begin
      (debug:print 2 *default-log-port* "Failed to get modification time for " fpath ", treating it as zero. exn=" exn)
      0)
    (if (file-exists? fpath)
	(file-modification-time fpath)
	0)))

;;======================================================================
;; find timestamp of newest file associated with a sqlite db file
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
;; returns: ok/not dbspace required-space
;;
(define (common:check-db-dir-space)
  (let* ((required (string->number 
                    ;; default is 1GB (or actually a billion bytes) This is the number of 1 kB blocks.
		    (or (configf:lookup *configdat* "setup" "dbdir-space-required")
			"1000000")))
	 (dbdir    (dbfile:make-tmpdir-name *toppath* "")) ;; (db:get-dbdir))
	 (tdbspace (common:check-space-in-dir dbdir required))
	 (mdbspace (common:check-space-in-dir *toppath* required)))
    (sort (list tdbspace mdbspace) (lambda (a b)
				     (< (cadr a)(cadr b))))))

;;======================================================================
;; check available space in dbdir, exit if insufficient







|







2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
;; returns: ok/not dbspace required-space
;;
(define (common:check-db-dir-space)
  (let* ((required (string->number 
                    ;; default is 1GB (or actually a billion bytes) This is the number of 1 kB blocks.
		    (or (configf:lookup *configdat* "setup" "dbdir-space-required")
			"1000000")))
	 (dbdir    (common:make-tmpdir-name *toppath* "")) ;; (db:get-dbdir))
	 (tdbspace (common:check-space-in-dir dbdir required))
	 (mdbspace (common:check-space-in-dir *toppath* required)))
    (sort (list tdbspace mdbspace) (lambda (a b)
				     (< (cadr a)(cadr b))))))

;;======================================================================
;; check available space in dbdir, exit if insufficient

Modified dashboard-tests.scm from [d3d14d0eb8] to [63a55f86f7].

461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
    dlog))


;;======================================================================
;;
;;======================================================================
(define (dashboard-tests:examine-test run-id test-id) ;; run-id run-key origtest)
  (let* ((db-path       (db:dbfile-path)) ;; (conc (configf:lookup *configdat* "setup" "linktree") "/db/" run-id ".db"))
	 (dbstruct      #f) ;; NOT USED
	 (testdat        (rmt:get-test-info-by-id run-id test-id)) ;; (db:get-test-info-by-id dbstruct run-id test-id))
	 (db-mod-time   0) ;; (file-modification-time db-path))
	 (last-update   0) ;; (current-seconds))
	 (request-update #t))
    (if (not testdat)
	(begin







|







461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
    dlog))


;;======================================================================
;;
;;======================================================================
(define (dashboard-tests:examine-test run-id test-id) ;; run-id run-key origtest)
  (let* ((db-path       (common:make-tmpdir-name *toppath* "")) ;; (conc (configf:lookup *configdat* "setup" "linktree") "/db/" run-id ".db"))
	 (dbstruct      #f) ;; NOT USED
	 (testdat        (rmt:get-test-info-by-id run-id test-id)) ;; (db:get-test-info-by-id dbstruct run-id test-id))
	 (db-mod-time   0) ;; (file-modification-time db-path))
	 (last-update   0) ;; (current-seconds))
	 (request-update #t))
    (if (not testdat)
	(begin

Modified dashboard.scm from [b1dc6c475c] to [92015a98e3].

400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
(define (dboard:tabdat-make-data)
  (let ((dat (make-dboard:tabdat)))
    (dboard:setup-tabdat dat)
    (dboard:setup-num-rows dat)
    dat))

(define (dboard:setup-tabdat tabdat)
  (dboard:tabdat-dbdir-set! tabdat (dbfile:make-tmpdir-name *toppath* "")) ;; (conc (configf:lookup *configdat* "setup" "linktree") "/.db"))
  (dboard:tabdat-dbfpath-set! tabdat (dbfile:make-tmpdir-name *toppath* ""))
  (dboard:tabdat-monitor-db-path-set! tabdat (conc (dboard:tabdat-dbdir tabdat) "/monitor.db"))


  ;; HACK ALERT: this is a hack, please fix.
  (dboard:tabdat-ro-set! tabdat (not (file-read-access? (dboard:tabdat-dbfpath tabdat))))
  (dboard:tabdat-keys-set! tabdat (rmt:get-keys))
  (dboard:tabdat-dbkeys-set! tabdat (append (dboard:tabdat-keys tabdat) (list "runname")))







|
|







400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
(define (dboard:tabdat-make-data)
  (let ((dat (make-dboard:tabdat)))
    (dboard:setup-tabdat dat)
    (dboard:setup-num-rows dat)
    dat))

(define (dboard:setup-tabdat tabdat)
  (dboard:tabdat-dbdir-set! tabdat (common:make-tmpdir-name *toppath* "")) ;; (conc (configf:lookup *configdat* "setup" "linktree") "/.db"))
  (dboard:tabdat-dbfpath-set! tabdat (common:make-tmpdir-name *toppath* ""))
  (dboard:tabdat-monitor-db-path-set! tabdat (conc (dboard:tabdat-dbdir tabdat) "/monitor.db"))


  ;; HACK ALERT: this is a hack, please fix.
  (dboard:tabdat-ro-set! tabdat (not (file-read-access? (dboard:tabdat-dbfpath tabdat))))
  (dboard:tabdat-keys-set! tabdat (rmt:get-keys))
  (dboard:tabdat-dbkeys-set! tabdat (append (dboard:tabdat-keys tabdat) (list "runname")))
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
	      (if (null? all-test-ids)
		  (hash-table-delete! (dboard:tabdat-allruns-by-id tabdat) run-id)
		  (hash-table-set!    (dboard:tabdat-allruns-by-id tabdat) run-id run-struct))
	      (if (or (null? tal)
		      (> elapsed-time 2)) ;; stop loading data after 5 seconds, on the next call more data *should* be loaded since get-tests-for-run uses last update
		  (begin
		    (when (> elapsed-time 2)   
                      (debug:print 0 *default-log-port* "NOTE: updates are taking a long time, " elapsed-time "s elapsed.")
                      (let* ((old-val (iup:attribute *tim* "TIME"))
                             (new-val (number->string (inexact->exact (floor (* 2  (string->number old-val)))))))
                        (if (< (string->number new-val) 5000)
                            (begin
			      (debug:print 0 *default-log-port* "NOTE: increasing poll interval from "old-val" to "new-val)
			      (iup:attribute-set! *tim* "TIME" new-val)))))
		    (dboard:tabdat-allruns-set! tabdat new-res)
		    maxtests)
		  (if (> (dboard:rundat-run-data-offset run-struct) 0)
		      (loop run tal new-res newmaxtests) ;; not done getting data for this run
		      (loop (car tal)(cdr tal) new-res newmaxtests)))))))
    (dboard:tabdat-filters-changed-set! tabdat #f)







|




|







926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
	      (if (null? all-test-ids)
		  (hash-table-delete! (dboard:tabdat-allruns-by-id tabdat) run-id)
		  (hash-table-set!    (dboard:tabdat-allruns-by-id tabdat) run-id run-struct))
	      (if (or (null? tal)
		      (> elapsed-time 2)) ;; stop loading data after 5 seconds, on the next call more data *should* be loaded since get-tests-for-run uses last update
		  (begin
		    (when (> elapsed-time 2)   
                      (debug:print 2 *default-log-port* "NOTE: updates are taking a long time, " elapsed-time "s elapsed.")
                      (let* ((old-val (iup:attribute *tim* "TIME"))
                             (new-val (number->string (inexact->exact (floor (* 2  (string->number old-val)))))))
                        (if (< (string->number new-val) 5000)
                            (begin
			      (debug:print 2 *default-log-port* "NOTE: increasing poll interval from "old-val" to "new-val)
			      (iup:attribute-set! *tim* "TIME" new-val)))))
		    (dboard:tabdat-allruns-set! tabdat new-res)
		    maxtests)
		  (if (> (dboard:rundat-run-data-offset run-struct) 0)
		      (loop run tal new-res newmaxtests) ;; not done getting data for this run
		      (loop (car tal)(cdr tal) new-res newmaxtests)))))))
    (dboard:tabdat-filters-changed-set! tabdat #f)

Modified db.scm from [6d82fac5fa] to [b1837f1312].

131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
	   (debug:print-error 0 *default-log-port* " query " stmt " failed, params: " params ", error: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn)
	   (print-call-chain (current-error-port))
	   default)))
   (apply sqlite3:first-result db stmt params)))

(define (db:setup do-sync)
  (assert *toppath* "FATAL: db:setup called before launch:setup has been run.")
  (let* ((tmpdir (dbfile:make-tmpdir-name *toppath* "")))
    (if (not *dbstruct-dbs*)
	(dbfile:setup do-sync *toppath* tmpdir)
	*dbstruct-dbs*)))

;; moved from dbfile
;;
;; ADD run-id SUPPORT







|







131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
	   (debug:print-error 0 *default-log-port* " query " stmt " failed, params: " params ", error: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn)
	   (print-call-chain (current-error-port))
	   default)))
   (apply sqlite3:first-result db stmt params)))

(define (db:setup do-sync)
  (assert *toppath* "FATAL: db:setup called before launch:setup has been run.")
  (let* ((tmpdir (common:make-tmpdir-name *toppath* "")))
    (if (not *dbstruct-dbs*)
	(dbfile:setup do-sync *toppath* tmpdir)
	*dbstruct-dbs*)))

;; moved from dbfile
;;
;; ADD run-id SUPPORT
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
    (max (get-mtime fname)
	 (get-mtime wal-file)
	 (get-mtime shm-file))))
	 
;; (define (db:all-db-sync dbstruct)
;;   (let* ((dbdat (db:open-db dbstruct #f db:initialize-main-db))
;; 	 (data-synced       0) ;; count of changed records
;;     (tmp-area       (dbfile:make-tmpdir-name *toppath*))
;;     (dbfiles        (glob (conc tmp-area"/.mtdb/*.db")))
;;     (sync-durations (make-hash-table))
;;     (no-sync-db        (db:open-no-sync-db)))
;;     (for-each
;;      (lambda (file) ;; tmp db file
;;        (debug:print-info 3 *default-log-port* "file: " file)
;;        (let* ((fname       (conc (pathname-file file) ".db")) ;; fname is tmp db file







|







458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
    (max (get-mtime fname)
	 (get-mtime wal-file)
	 (get-mtime shm-file))))
	 
;; (define (db:all-db-sync dbstruct)
;;   (let* ((dbdat (db:open-db dbstruct #f db:initialize-main-db))
;; 	 (data-synced       0) ;; count of changed records
;;     (tmp-area       (common:make-tmpdir-name *toppath*))
;;     (dbfiles        (glob (conc tmp-area"/.mtdb/*.db")))
;;     (sync-durations (make-hash-table))
;;     (no-sync-db        (db:open-no-sync-db)))
;;     (for-each
;;      (lambda (file) ;; tmp db file
;;        (debug:print-info 3 *default-log-port* "file: " file)
;;        (let* ((fname       (conc (pathname-file file) ".db")) ;; fname is tmp db file
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
;;  'closeall     - close all opened dbs
;;  'schema       - attempt to apply schema changes
;;  run-ids: '(1 2 3 ...) or #f (for all)
;;
(define (db:multi-db-sync dbstruct . options)
  (let* (;; (dbdat       (db:open-db dbstruct #f dbfile:db-init-proc))
	 (data-synced 0) ;; count of changed records
	 (tmp-area       (dbfile:make-tmpdir-name *toppath* ""))
	 (old2new (member 'old2new options))
	 (dejunk (member 'dejunk options))
	 (killservers (member 'killservers options))
	 (src-area (if old2new *toppath* tmp-area))
	 (dest-area (if old2new tmp-area *toppath*))
	 (dbfiles        (if old2new (glob (conc *toppath* "/.mtdb/*.db"))
			     (glob (conc tmp-area "/.mtdb/*.db"))))







|







547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
;;  'closeall     - close all opened dbs
;;  'schema       - attempt to apply schema changes
;;  run-ids: '(1 2 3 ...) or #f (for all)
;;
(define (db:multi-db-sync dbstruct . options)
  (let* (;; (dbdat       (db:open-db dbstruct #f dbfile:db-init-proc))
	 (data-synced 0) ;; count of changed records
	 (tmp-area       (common:make-tmpdir-name *toppath* ""))
	 (old2new (member 'old2new options))
	 (dejunk (member 'dejunk options))
	 (killservers (member 'killservers options))
	 (src-area (if old2new *toppath* tmp-area))
	 (dest-area (if old2new tmp-area *toppath*))
	 (dbfiles        (if old2new (glob (conc *toppath* "/.mtdb/*.db"))
			     (glob (conc tmp-area "/.mtdb/*.db"))))
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259

;;======================================================================
;; no-sync.db - small bits of data to be shared between servers
;;======================================================================

(define (db:get-dbsync-path)
  (case (rmt:transport-mode)
    ((http)(dbfile:make-tmpdir-name *toppath* ""))
    ((tcp) (conc *toppath*"/.mtdb"))
    ((nfs) (conc *toppath*"/.mtdb"))
    (else "/tmp/dunno-this-gonna-exist")))

;; This is needed for api.scm
(define (db:open-no-sync-db)
   (dbfile:open-no-sync-db (db:get-dbsync-path)))







|







1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259

;;======================================================================
;; no-sync.db - small bits of data to be shared between servers
;;======================================================================

(define (db:get-dbsync-path)
  (case (rmt:transport-mode)
    ((http)(common:make-tmpdir-name *toppath* ""))
    ((tcp) (conc *toppath*"/.mtdb"))
    ((nfs) (conc *toppath*"/.mtdb"))
    (else "/tmp/dunno-this-gonna-exist")))

;; This is needed for api.scm
(define (db:open-no-sync-db)
   (dbfile:open-no-sync-db (db:get-dbsync-path)))
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
    res))

;; TODO: Switch this to use max(update_time) from each run db? Then if using a server there is no disk traffic (using cachedb db) ???
;;
;; NOTE: This DOESN'T (necessarily) get the real run ids, but the number of the <number>.db!!

(define (db:get-changed-run-ids since-time)
  (let* ((dbdir      (dbfile:make-tmpdir-name *toppath* "")) ;; (configf:lookup *configdat* "setup" "dbdir"))
	 (alldbs     (glob (conc *toppath* "/.mtdb/[0-9]*.db*")))
	 (changed    (filter (lambda (dbfile)
			       (> (file-modification-time dbfile) since-time))
			     alldbs)))
    (delete-duplicates
     (map (lambda (dbfile)
	    (let* ((res (string-match ".*\\/(\\d+)\\.db*" dbfile)))







|







1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
    res))

;; TODO: Switch this to use max(update_time) from each run db? Then if using a server there is no disk traffic (using cachedb db) ???
;;
;; NOTE: This DOESN'T (necessarily) get the real run ids, but the number of the <number>.db!!

(define (db:get-changed-run-ids since-time)
  (let* ((dbdir      (common:make-tmpdir-name *toppath* "")) ;; (configf:lookup *configdat* "setup" "dbdir"))
	 (alldbs     (glob (conc *toppath* "/.mtdb/[0-9]*.db*")))
	 (changed    (filter (lambda (dbfile)
			       (> (file-modification-time dbfile) since-time))
			     alldbs)))
    (delete-duplicates
     (map (lambda (dbfile)
	    (let* ((res (string-match ".*\\/(\\d+)\\.db*" dbfile)))
4305
4306
4307
4308
4309
4310
4311
4312
4313
4314
4315
4316
4317
4318
4319
          (debug:print 0 *default-log-port* "could not get lock for " from-db " from no-sync-db")
	  #f
        ))))

;; sync for filesystem local db writes
;;
(define (db:run-lock-and-sync no-sync-db)
  (let* ((tmp-area       (dbfile:make-tmpdir-name *toppath* ""))
	 (dbfiles        (glob (conc tmp-area"/.mtdb/*.db")))
	 (sync-durations (make-hash-table)))
    ;; (debug:print-info 0 *default-log-port* "lock-and-sync, dbfiles: "dbfiles)
    (for-each
     (lambda (file)
       (let* ((fname (conc (pathname-file file) ".db"))
	      (fulln (conc *toppath*"/.mtdb/"fname))







|







4305
4306
4307
4308
4309
4310
4311
4312
4313
4314
4315
4316
4317
4318
4319
          (debug:print 0 *default-log-port* "could not get lock for " from-db " from no-sync-db")
	  #f
        ))))

;; sync for filesystem local db writes
;;
(define (db:run-lock-and-sync no-sync-db)
  (let* ((tmp-area       (common:make-tmpdir-name *toppath* ""))
	 (dbfiles        (glob (conc tmp-area"/.mtdb/*.db")))
	 (sync-durations (make-hash-table)))
    ;; (debug:print-info 0 *default-log-port* "lock-and-sync, dbfiles: "dbfiles)
    (for-each
     (lambda (file)
       (let* ((fname (conc (pathname-file file) ".db"))
	      (fulln (conc *toppath*"/.mtdb/"fname))
4361
4362
4363
4364
4365
4366
4367
4368
4369
4370
4371
4372
4373
4374
4375
  (thread-sleep! 0.05) ;; delay for startup
  (let ((legacy-sync        (common:run-sync?))
	(sync-stale-seconds (configf:lookup-number *configdat* "server" "sync-stale-seconds" default: 300))
 	(debug-mode         (debug:debug-mode 1))
 	(last-time          (current-seconds))     ;; last time through the sync loop
 	(no-sync-db         (db:open-no-sync-db))
 	(sync-duration      0)  ;; run time of the sync in milliseconds
	(tmp-area           (dbfile:make-tmpdir-name *toppath* "")))
    ;; Sync moved to http-transport keep-running loop
    (debug:print-info 2 *default-log-port* "Periodic copy-based sync thread started. syncer is copy-sync, tmp-area is " tmp-area)
    (debug:print-info 3 *default-log-port* "watchdog starting. syncer is copy-sync pid="(current-process-id));;  " this-wd-num="this-wd-num)
    
    (if (and legacy-sync (not *time-to-exit*))
 	(begin
 	  (debug:print-info 0 *default-log-port* "Server running, periodic copy-based sync started.")







|







4361
4362
4363
4364
4365
4366
4367
4368
4369
4370
4371
4372
4373
4374
4375
  (thread-sleep! 0.05) ;; delay for startup
  (let ((legacy-sync        (common:run-sync?))
	(sync-stale-seconds (configf:lookup-number *configdat* "server" "sync-stale-seconds" default: 300))
 	(debug-mode         (debug:debug-mode 1))
 	(last-time          (current-seconds))     ;; last time through the sync loop
 	(no-sync-db         (db:open-no-sync-db))
 	(sync-duration      0)  ;; run time of the sync in milliseconds
	(tmp-area           (common:make-tmpdir-name *toppath* "")))
    ;; Sync moved to http-transport keep-running loop
    (debug:print-info 2 *default-log-port* "Periodic copy-based sync thread started. syncer is copy-sync, tmp-area is " tmp-area)
    (debug:print-info 3 *default-log-port* "watchdog starting. syncer is copy-sync pid="(current-process-id));;  " this-wd-num="this-wd-num)
    
    (if (and legacy-sync (not *time-to-exit*))
 	(begin
 	  (debug:print-info 0 *default-log-port* "Server running, periodic copy-based sync started.")
4469
4470
4471
4472
4473
4474
4475
4476
4477
4478
4479
4480
4481
4482
4483
 		      ;;
 		     
 		       (for-each
 			(lambda (subdb)
 			  (let* (;;(dbstruct (db:setup))
 				 (mtdb       (dbr:subdb-mtdb subdb))
 				 (mtpath     (db:dbdat-get-path mtdb))
 				 (tmp-area   (dbfile:make-tmpdir-name *toppath* ""))
 				 (res        (db:sync-to-megatest.db dbstruct no-sync-db: no-sync-db))) ;; did we sync any data? If so need to set the db touched flag to keep the server alive
 			    (set! sync-duration (- (current-milliseconds) sync-start))
 			    (if (> res 0) ;; some records were transferred, keep the db alive
 				(begin
 				  (mutex-lock! *heartbeat-mutex*)
 				  (set! *db-last-access* (current-seconds))
 				  (mutex-unlock! *heartbeat-mutex*)







|







4469
4470
4471
4472
4473
4474
4475
4476
4477
4478
4479
4480
4481
4482
4483
 		      ;;
 		     
 		       (for-each
 			(lambda (subdb)
 			  (let* (;;(dbstruct (db:setup))
 				 (mtdb       (dbr:subdb-mtdb subdb))
 				 (mtpath     (db:dbdat-get-path mtdb))
 				 (tmp-area   (common:make-tmpdir-name *toppath* ""))
 				 (res        (db:sync-to-megatest.db dbstruct no-sync-db: no-sync-db))) ;; did we sync any data? If so need to set the db touched flag to keep the server alive
 			    (set! sync-duration (- (current-milliseconds) sync-start))
 			    (if (> res 0) ;; some records were transferred, keep the db alive
 				(begin
 				  (mutex-lock! *heartbeat-mutex*)
 				  (set! *db-last-access* (current-seconds))
 				  (mutex-unlock! *heartbeat-mutex*)

Modified dbmod.scm from [1d31a00395] to [9f0ce614a3].

196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
			    (tmpadj  "")       ;; add to tmp path
			    (syncdir 'todisk)) ;; todisk is used when caching in /tmp and writing data back to MTRAH
  (let* ((dbstruct     (or dbstruct-in (make-dbr:dbstruct areapath: areapath)))
	 (dbfname      (or dbfname-in (dbmod:run-id->dbfname run-id)))
	 (dbpath       (dbmod:get-dbdir dbstruct))             ;; directory where all the .db files are kept
	 (dbfullname   (conc dbpath"/"dbfname)) ;; (dbmod:run-id->full-dbfname dbstruct run-id))
	 (dbexists     (file-exists? dbfullname))
	 (tmpdir       (dbfile:make-tmpdir-name areapath tmpadj))
	 (tmpdb        (let* ((fname (conc tmpdir"/"dbfname)))
			 fname))
	 (cachedb        (dbmod:open-cachedb-db init-proc
					    ;; (if (eq? (dbfile:cache-method) 'cachedb)
					    ;; 	#f
					    tmpdb
					    ;; )







|







196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
			    (tmpadj  "")       ;; add to tmp path
			    (syncdir 'todisk)) ;; todisk is used when caching in /tmp and writing data back to MTRAH
  (let* ((dbstruct     (or dbstruct-in (make-dbr:dbstruct areapath: areapath)))
	 (dbfname      (or dbfname-in (dbmod:run-id->dbfname run-id)))
	 (dbpath       (dbmod:get-dbdir dbstruct))             ;; directory where all the .db files are kept
	 (dbfullname   (conc dbpath"/"dbfname)) ;; (dbmod:run-id->full-dbfname dbstruct run-id))
	 (dbexists     (file-exists? dbfullname))
	 (tmpdir       (common:make-tmpdir-name areapath tmpadj))
	 (tmpdb        (let* ((fname (conc tmpdir"/"dbfname)))
			 fname))
	 (cachedb        (dbmod:open-cachedb-db init-proc
					    ;; (if (eq? (dbfile:cache-method) 'cachedb)
					    ;; 	#f
					    tmpdb
					    ;; )

Modified megatest.scm from [db5cb1955c] to [af8974dd23].

1091
1092
1093
1094
1095
1096
1097

1098

1099
1100
1101
1102
1103
1104
1105
              sfiles
            )
          )
       )
       dbfiles
     )
     ;; remove this db, because otherwise metadata contains records for old servers, and this causes a problem with db:no-sync-get-lock-with-id.

     (delete-file (conc *toppath* "/.mtdb/no-sync.db"))

     (set! *didsomething* #t)
     (exit)  
  )
)

;;======================================================================
;; Weird special calls that need to run *after* the server has started?







>
|
>







1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
              sfiles
            )
          )
       )
       dbfiles
     )
     ;; remove this db, because otherwise metadata contains records for old servers, and this causes a problem with db:no-sync-get-lock-with-id.
     (if (file-exists? (conc *toppath* "/.mtdb/no-sync.db"))
       (delete-file (conc *toppath* "/.mtdb/no-sync.db"))
     )
     (set! *didsomething* #t)
     (exit)  
  )
)

;;======================================================================
;; Weird special calls that need to run *after* the server has started?
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
             (begin 
             (debug:print-info 1 *default-log-port* "Missing required argument -source <archive path>")
             (exit 1)))
         (if (common:file-exists? (conc  *toppath* "/megatest.db"))
             (begin  
               (debug:print-info 1 *default-log-port* "File " (conc  *toppath* "/megatest.db") " already exists. Please remove it before trying to replicate db")
               (exit 1)))
         (if (and (dbfile:make-tmpdir-name *toppath* "") (> (length (directory   (dbfile:make-tmpdir-name *toppath* "") #f)) 0))
           (begin
           (debug:print-info 1 *default-log-port* (dbfile:make-tmpdir-name *toppath* "") " not empty. Please remove it before trying to replicate db")
           (exit 1)))    
          ;; check if timestamp 
          (let* ((source (args:get-arg "-source"))
                (src     (if (not (equal? (substring source 0 1) "/"))
                             (conc (current-directory) "/" source)
                             source))
                (ts (if (args:get-arg "-time-stamp")   (args:get-arg "-time-stamp") "latest")))







|

|







2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
             (begin 
             (debug:print-info 1 *default-log-port* "Missing required argument -source <archive path>")
             (exit 1)))
         (if (common:file-exists? (conc  *toppath* "/megatest.db"))
             (begin  
               (debug:print-info 1 *default-log-port* "File " (conc  *toppath* "/megatest.db") " already exists. Please remove it before trying to replicate db")
               (exit 1)))
         (if (and (common:make-tmpdir-name *toppath* "") (> (length (directory   (common:make-tmpdir-name *toppath* "") #f)) 0))
           (begin
           (debug:print-info 1 *default-log-port* (common:make-tmpdir-name *toppath* "") " not empty. Please remove it before trying to replicate db")
           (exit 1)))    
          ;; check if timestamp 
          (let* ((source (args:get-arg "-source"))
                (src     (if (not (equal? (substring source 0 1) "/"))
                             (conc (current-directory) "/" source)
                             source))
                (ts (if (args:get-arg "-time-stamp")   (args:get-arg "-time-stamp") "latest")))

Modified rmt.scm from [64f3d622e8] to [564930aec3].

165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
				 (cons 'none 0))
			     (loop (car tal)(cdr tal) newmax-cmd currmax)))))))
    (mutex-unlock! *db-stats-mutex*)
    res))

(define (rmt:open-qry-close-locally cmd run-id params #!key (remretries 5))
  (let* ((qry-is-write    (not (member cmd api:read-only-queries)))
	 (db-file-path    (db:dbfile-path)) ;;  0))
	 (dbstructs-local (db:setup #t))
	 (read-only       (not (file-write-access? db-file-path)))
	 (start           (current-milliseconds))
	 (resdat          (if (not (and read-only qry-is-write))
			      (let ((v (api:execute-requests dbstructs-local (vector (symbol->string cmd) params))))
			;;	(handle-exceptions ;; there has been a long history of receiving strange errors from values returned by the client when things go wrong..
			;;	 exn               ;;  This is an attempt to detect that situation and recover gracefully







|







165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
				 (cons 'none 0))
			     (loop (car tal)(cdr tal) newmax-cmd currmax)))))))
    (mutex-unlock! *db-stats-mutex*)
    res))

(define (rmt:open-qry-close-locally cmd run-id params #!key (remretries 5))
  (let* ((qry-is-write    (not (member cmd api:read-only-queries)))
	 (db-file-path    (common:make-tmpdir-name *toppath* "")) ;;  0))
	 (dbstructs-local (db:setup #t))
	 (read-only       (not (file-write-access? db-file-path)))
	 (start           (current-milliseconds))
	 (resdat          (if (not (and read-only qry-is-write))
			      (let ((v (api:execute-requests dbstructs-local (vector (symbol->string cmd) params))))
			;;	(handle-exceptions ;; there has been a long history of receiving strange errors from values returned by the client when things go wrong..
			;;	 exn               ;;  This is an attempt to detect that situation and recover gracefully

Modified tasks.scm from [4adbc308eb] to [93c938d59a].

82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
	     (debug:print 5 *default-log-port* " exn=" (condition->list exn))
	     (thread-sleep! 1)
	     (tasks:open-db numretries (- numretries 1)))
	   (begin
	     (print-call-chain (current-error-port))
	     (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
	     (debug:print 5 *default-log-port* " exn=" (condition->list exn))))
       (let* ((dbpath        (db:dbfile-path )) ;; (tasks:get-task-db-path))
	      (dbfile       (conc dbpath "/monitor.db"))
	      (avail        (tasks:wait-on-journal dbpath 10)) ;; wait up to about 10 seconds for the journal to go away
	      (exists       (common:file-exists? dbpath))
	      (write-access (file-write-access? dbpath))
	      (mdb          (cond ;; what the hek is *toppath* doing here?
			     ((and (string? *toppath*)(file-write-access? *toppath*))
			      (sqlite3:open-database dbfile))







|







82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
	     (debug:print 5 *default-log-port* " exn=" (condition->list exn))
	     (thread-sleep! 1)
	     (tasks:open-db numretries (- numretries 1)))
	   (begin
	     (print-call-chain (current-error-port))
	     (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
	     (debug:print 5 *default-log-port* " exn=" (condition->list exn))))
       (let* ((dbpath        (common:make-tmpdir-name *toppath* "")) ;; (tasks:get-task-db-path))
	      (dbfile       (conc dbpath "/monitor.db"))
	      (avail        (tasks:wait-on-journal dbpath 10)) ;; wait up to about 10 seconds for the journal to go away
	      (exists       (common:file-exists? dbpath))
	      (write-access (file-write-access? dbpath))
	      (mdb          (cond ;; what the hek is *toppath* doing here?
			     ((and (string? *toppath*)(file-write-access? *toppath*))
			      (sqlite3:open-database dbfile))

Modified tcp-transportmod.scm from [1391b0d841] to [04adce729b].

169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193


194
195
196
197
198
199
200
201
202
		 (case ping-res
		   ((running)
                    (debug:print-info 2 *default-log-port* "Setting conn = " conn " in hash table")
		    (hash-table-set! (tt-conns ttdat) dbfname conn) ;;; is this ok to save before validating that the connection is good?
		    conn)
		   ((starting)
		    (thread-sleep! 0.5)
                    (debug:print-info 2 *default-log-port* "server was in starting state, retrying connect")
		    (tt:client-connect-to-server ttdat dbfname run-id testsuite))
		   (else
		    (let* ((curr-secs (current-seconds)))
		      ;; rm the (last server) would go here
		      (if (> (- curr-secs (tt-last-serv-start ttdat)) 10)
			  (begin
			    (tt-last-serv-start-set! ttdat curr-secs)
			    (server-start-proc))) ;; start server if 30 sec since last attempt
		      (thread-sleep! 1)
                      (debug:print-info 2 *default-log-port* "server ping result was neither running nor starting. Retrying connect")
		      (tt:client-connect-to-server ttdat dbfname run-id testsuite)))))))
	    (else ;; no good server found, if haven't started server in > 5 secs, start another
	     (if (> (- (current-seconds) (tt-last-serv-start ttdat)) 5) ;; BUG - grow this number really do not want to swamp the machine with servers
		 (begin
		   (debug:print-info 0 *default-log-port* "No server found. Starting one for run-id "run-id" in dbfile "dbfname)
		   (server-start-proc)
		   (tt-last-serv-start-set! ttdat (current-seconds))))


	     (thread-sleep! 1)
             (debug:print-info 2 *default-log-port* "no good server found, try connect again")
	     (tt:client-connect-to-server ttdat dbfname run-id testsuite)))))))

(define (tt:timed-ping host port server-id)
  (let* ((start-time (current-milliseconds))
	 (result     (tt:ping host port server-id)))
    (cons result (- (current-milliseconds) start-time))))
    







|







|




|

|

|
>
>

|







169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
		 (case ping-res
		   ((running)
                    (debug:print-info 2 *default-log-port* "Setting conn = " conn " in hash table")
		    (hash-table-set! (tt-conns ttdat) dbfname conn) ;;; is this ok to save before validating that the connection is good?
		    conn)
		   ((starting)
		    (thread-sleep! 0.5)
                    (debug:print-info 0 *default-log-port* "server for " dbfname " is in starting state, retrying connect")
		    (tt:client-connect-to-server ttdat dbfname run-id testsuite))
		   (else
		    (let* ((curr-secs (current-seconds)))
		      ;; rm the (last server) would go here
		      (if (> (- curr-secs (tt-last-serv-start ttdat)) 10)
			  (begin
			    (tt-last-serv-start-set! ttdat curr-secs)
			    (server-start-proc))) ;; start server if 10 sec since last attempt
		      (thread-sleep! 1)
                      (debug:print-info 2 *default-log-port* "server ping result was neither running nor starting. Retrying connect")
		      (tt:client-connect-to-server ttdat dbfname run-id testsuite)))))))
	    (else ;; no good server found, if haven't started server in > 5 secs, start another
	     (if (> (- (current-seconds) (tt-last-serv-start ttdat)) 3) ;; BUG - grow this number really do not want to swamp the machine with servers
		 (begin
		   (debug:print-info 0 *default-log-port* "Starting server for "dbfname)
		   (server-start-proc)
		   (tt-last-serv-start-set! ttdat (current-seconds))
                   (thread-sleep! 3)
                   ))
	     (thread-sleep! 1)
             (debug:print-info 0 *default-log-port* "Connect to server for " dbfname)
	     (tt:client-connect-to-server ttdat dbfname run-id testsuite)))))))

(define (tt:timed-ping host port server-id)
  (let* ((start-time (current-milliseconds))
	 (result     (tt:ping host port server-id)))
    (cons result (- (current-milliseconds) start-time))))
    
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
			     " -server - ";; (or target-host "-")
			     " -m testsuite:"testsuite
			     " -db "dbfname ;; (dbmod:run-id->dbfname run-id)
			     " " profile-mode
			     (conc " >> " logfile " 2>&1 &"))))
	    ;; we want the remote server to start in *toppath* so push there
	    ;; (push-directory areapath) ;; use cd in the command line instead
	    (debug:print 0 *default-log-port* "INFO: Trying to start server in tcp mode (" cmdln ") at "(common:human-time)" for "areapath)
	    ;; (debug:print 0 *default-log-port* "INFO: starting server at " (common:human-time))

	    (system cmdln)
	    ;; ;; use below to go back to nbfake - nbfake does cause trouble ...
	    ;; (setenv "NBFAKE_QUIET" "yes") ;; BUG: change to with-environment-variable ...
	    ;; (setenv "NBFAKE_LOG" logfile)
	    ;; (system (conc "cd "areapath" ; nbfake " cmdln))







|







795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
			     " -server - ";; (or target-host "-")
			     " -m testsuite:"testsuite
			     " -db "dbfname ;; (dbmod:run-id->dbfname run-id)
			     " " profile-mode
			     (conc " >> " logfile " 2>&1 &"))))
	    ;; we want the remote server to start in *toppath* so push there
	    ;; (push-directory areapath) ;; use cd in the command line instead
	    (debug:print 2 *default-log-port* "INFO: Trying to start server in tcp mode (" cmdln ") at "(common:human-time)" for "areapath)
	    ;; (debug:print 0 *default-log-port* "INFO: starting server at " (common:human-time))

	    (system cmdln)
	    ;; ;; use below to go back to nbfake - nbfake does cause trouble ...
	    ;; (setenv "NBFAKE_QUIET" "yes") ;; BUG: change to with-environment-variable ...
	    ;; (setenv "NBFAKE_LOG" logfile)
	    ;; (system (conc "cd "areapath" ; nbfake " cmdln))