Megatest

Check-in [b21843154b]
Login
Overview
Comment:Merged fork
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | v1.80
Files: files | file ages | folders
SHA1: b21843154b0af774a2fbc2630cdeb449c9c72f33
User & Date: mrwellan on 2023-05-11 09:14:12
Other Links: branch diff | manifest | tags
Context
2023-05-11
12:12
Gate running the megatest syncer by existance of lock file. check-in: 12c06b6095 user: matt tags: v1.80
09:14
Merged fork check-in: b21843154b user: mrwellan tags: v1.80
09:12
Added useful target for commits in Makefile check-in: 72d16d9632 user: mrwellan tags: v1.80
09:08
reworked the sync locking, allow parallel servers (needs work to lock in to single machine) check-in: 7fb44b797e user: matt tags: v1.80
Changes

Modified common.scm from [369967adc8] to [8763974ace].

54
55
56
57
58
59
60


61
62
63
64
65
66
67
68
69
;;       (old-exit code)))

(define (stop-the-train)
  (thread-start! (make-thread (lambda ()
				(let loop ()
				  (if (and *toppath*
					   (file-exists? (conc *toppath*"/stop-the-train")))


				      (begin
					(debug:print 0 *default-log-port* "ERROR: found file "*toppath*"/stop-the-train, exiting immediately")
					(exit 1)))
				  (thread-sleep! 5)
				  (loop))))))

;; execute thunk, return value.  If exception thrown, trap exception, return #f, and emit nonfatal condition note to *default-log-port* .
;; arguments - thunk, message
(define (common:fail-safe thunk warning-message-on-exception)







>
>
|
|







54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
;;       (old-exit code)))

(define (stop-the-train)
  (thread-start! (make-thread (lambda ()
				(let loop ()
				  (if (and *toppath*
					   (file-exists? (conc *toppath*"/stop-the-train")))
				      (let* ((msg (conc "ERROR: found file "*toppath*"/stop-the-train, exiting immediately")))
					;; yes, print to current-output-port AND *default-log-port*, annoying but necessary I think
					(print msg)
					(debug:print 0 *default-log-port* msg)
					(exit 1)))
				  (thread-sleep! 5)
				  (loop))))))

;; execute thunk, return value.  If exception thrown, trap exception, return #f, and emit nonfatal condition note to *default-log-port* .
;; arguments - thunk, message
(define (common:fail-safe thunk warning-message-on-exception)

Modified dbfile.scm from [c251acff4f] to [27150153ec].

80
81
82
83
84
85
86

87
88
89
90
91
92
93
  ;;
  ;; for the inmem approach (see dbmod.scm)
  ;; this is one db per server
  (inmem     #f)  ;; handle for the in memory copy
  (dbfile    #f)  ;; path to the db file on disk
  (dbfname   #f)  ;; short name of db file on disk (used to validate accessing correct db)
  (ondiskdb  #f)  ;; handle for the on-disk file

  (dbdat     #f)  ;; create a dbdat for the downstream calls such as db:with-db
  (last-update 0)
  (sync-proc #f)
  )

;; NOTE: Need one dbr:subdb per main.db, 1.db ...
;;







>







80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
  ;;
  ;; for the inmem approach (see dbmod.scm)
  ;; this is one db per server
  (inmem     #f)  ;; handle for the in memory copy
  (dbfile    #f)  ;; path to the db file on disk
  (dbfname   #f)  ;; short name of db file on disk (used to validate accessing correct db)
  (ondiskdb  #f)  ;; handle for the on-disk file
  (dbtmpname #f)  ;; path to db file in /tmp (non-imem method)
  (dbdat     #f)  ;; create a dbdat for the downstream calls such as db:with-db
  (last-update 0)
  (sync-proc #f)
  )

;; NOTE: Need one dbr:subdb per main.db, 1.db ...
;;
514
515
516
517
518
519
520


521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547

548
549
550
551
552
553
554
555
556
557
558
559
			  (string->number res)
			  #f)))
          (if newres
              newres
              res))
        res)))



(define (db:extract-time-identifier instr)
  (let ((tokens (string-split instr "+")))
    (match tokens
      ((t i)(cons (string->number t) i))
      ((t)  (cons (string->number t) #f))
      (else
       (assert #f "FATAL: db:extract-time-identifier handed bad data "instr)))))

;; transaction protected lock aquisition
;; either:
;;    fails    returns  (#f . lock-creation-time)
;;    succeeds (returns (#t . lock-creation-time)
;; use (db:no-sync-del! db keyname) to release the lock
;;
(define (db:no-sync-get-lock-with-id db keyname identifier)
  (sqlite3:with-transaction
   db
   (lambda ()
     (condition-case
      (let* ((curr-val (db:no-sync-get/default db keyname #f)))
	(if curr-val
	    (match (db:extract-time-identifier curr-val)
	       ((timestamp ident)
		(if (equal? ident identifier)
		    #t     ;; this *is* my lock
		    #f))   ;; nope, not my lock
	       (else #f))  ;; nope, not my lock

	    (let ((lock-value (if identifier
				  (conc (current-seconds)"+"identifier)
				  (current-seconds))))
	      (sqlite3:execute db "INSERT OR REPLACE INTO no_sync_metadat (var,val) VALUES(?,?);" keyname lock-value)
	      #t)))
      (exn (io-error)  (dbfile:print-err "ERROR: i/o error with no-sync db. Check permissions, disk space etc. and try again."))
      (exn (corrupt)   (dbfile:print-err "ERROR: database no-sync db is corrupt. Repair it to proceed."))
      (exn (busy)      (dbfile:print-err "ERROR: database no-sync db is locked. Try copying to another location, remove original and copy back."))
      (exn (permission)(dbfile:print-err "ERROR: database no-sync db has some permissions problem."))
      (exn () ;; (status done) ;; I don't know how to detect status done but no data!
	   (dbfile:print-err "ERROR: Unknown error with database no-sync db message: exn="(condition->list exn)", \n"
			     ((condition-property-accessor 'exn 'message) exn))







>
>










|
|









|


|
|
|
>
|



|







515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
			  (string->number res)
			  #f)))
          (if newres
              newres
              res))
        res)))

;; timestring+identifier+payload
;; locks are unique on identifier, payload is informational
(define (db:extract-time-identifier instr)
  (let ((tokens (string-split instr "+")))
    (match tokens
      ((t i)(cons (string->number t) i))
      ((t)  (cons (string->number t) #f))
      (else
       (assert #f "FATAL: db:extract-time-identifier handed bad data "instr)))))

;; transaction protected lock aquisition
;; either:
;;    fails    returns  (#f lock-creation-time identifier)
;;    succeeds (returns (#t lock-creation-time identifier)
;; use (db:no-sync-del! db keyname) to release the lock
;;
(define (db:no-sync-get-lock-with-id db keyname identifier)
  (sqlite3:with-transaction
   db
   (lambda ()
     (condition-case
      (let* ((curr-val (db:no-sync-get/default db keyname #f)))
	(if curr-val
	    (match (db:extract-time-identifier curr-val) ;; result->timestamp, identifier
	       ((timestamp ident)
		(if (equal? ident identifier)
		    (cons #t timestamp)    ;; this *is* my lock
		    (cons #f timestamp)))  ;; nope, not my lock
	       (else (cons #f #f)))  ;; nope, not my lock
	    (let ((curr-sec (current-seconds))
		  (lock-value (if identifier
				  (conc (current-seconds)"+"identifier)
				  (current-seconds))))
	      (sqlite3:execute db "INSERT OR REPLACE INTO no_sync_metadat (var,val) VALUES(?,?);" keyname lock-value)
	      (cons #t curr-sec))))
      (exn (io-error)  (dbfile:print-err "ERROR: i/o error with no-sync db. Check permissions, disk space etc. and try again."))
      (exn (corrupt)   (dbfile:print-err "ERROR: database no-sync db is corrupt. Repair it to proceed."))
      (exn (busy)      (dbfile:print-err "ERROR: database no-sync db is locked. Try copying to another location, remove original and copy back."))
      (exn (permission)(dbfile:print-err "ERROR: database no-sync db has some permissions problem."))
      (exn () ;; (status done) ;; I don't know how to detect status done but no data!
	   (dbfile:print-err "ERROR: Unknown error with database no-sync db message: exn="(condition->list exn)", \n"
			     ((condition-property-accessor 'exn 'message) exn))

Modified dbmod.scm from [78b73bdc31] to [e109e9616a].

27
28
29
30
31
32
33

34
35

36
37
38
39
40
41
42
(module dbmod
	*
	
(import scheme
	chicken
	data-structures
	extras


	(prefix sqlite3 sqlite3:)

	posix
	typed-records
	srfi-1
	srfi-18
	srfi-69

	commonmod







>


>







27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
(module dbmod
	*
	
(import scheme
	chicken
	data-structures
	extras
	files

	(prefix sqlite3 sqlite3:)
	matchable
	posix
	typed-records
	srfi-1
	srfi-18
	srfi-69

	commonmod
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
	 (dbdat     (dbmod:open-db dbstruct run-id (dbfile:db-init-proc)))
	 (dbh       (dbr:dbdat-dbh dbdat)) ;; this will be the inmem handle
	 (dbfile    (dbr:dbdat-dbfile dbdat)))
    ;; if nfs mode do a sync if delta > 2
    (let* ((last-update (dbr:dbstruct-last-update dbstruct))
	   (sync-proc   (dbr:dbstruct-sync-proc dbstruct))
	   (curr-secs   (current-seconds)))
      (if (> (- curr-secs last-update) 3)
	  (begin
	    (sync-proc last-update)

	    ;; MOVE THIS CALL TO INSIDE THE sync-proc CALL
	    (dbr:dbstruct-last-update-set! dbstruct curr-secs)
	    )))
    (assert (sqlite3:database? dbh) "FATAL: bad db handle in dbmod:with-db") 







|







95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
	 (dbdat     (dbmod:open-db dbstruct run-id (dbfile:db-init-proc)))
	 (dbh       (dbr:dbdat-dbh dbdat)) ;; this will be the inmem handle
	 (dbfile    (dbr:dbdat-dbfile dbdat)))
    ;; if nfs mode do a sync if delta > 2
    (let* ((last-update (dbr:dbstruct-last-update dbstruct))
	   (sync-proc   (dbr:dbstruct-sync-proc dbstruct))
	   (curr-secs   (current-seconds)))
      (if (> (- curr-secs last-update) 5)
	  (begin
	    (sync-proc last-update)

	    ;; MOVE THIS CALL TO INSIDE THE sync-proc CALL
	    (dbr:dbstruct-last-update-set! dbstruct curr-secs)
	    )))
    (assert (sqlite3:database? dbh) "FATAL: bad db handle in dbmod:with-db") 
174
175
176
177
178
179
180
181

182
183
184
185
186
187
188
189

190
191
192
193
194
195
196
197
198
199
200
201
202
203

204
205
206
207
208
209
210
211

212
213




214
215
216

217
218
219
220
221
222
223
224
225
226
227
228
229
230
231

232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
			    (syncdir 'todisk))
  (let* ((dbstruct     (or dbstruct-in (make-dbr:dbstruct areapath: areapath)))
	 (dbfname      (or dbfname-in (dbmod:run-id->dbfname run-id)))
	 (dbpath       (dbmod:get-dbdir dbstruct))             ;; directory where all the .db files are kept
	 (dbfullname   (conc dbpath"/"dbfname)) ;; (dbmod:run-id->full-dbfname dbstruct run-id))
	 (dbexists     (file-exists? dbfullname))
	 (tmpdir       (conc "/tmp/"(current-user-name)))
	 (tmpdb        (let* ((fname (conc tmpdir"/" (string-translate areapath "/" ".")"-"(current-process-id)"-"dbfname)))

			 (if (not (file-exists? tmpdir))(create-directory tmpdir))
			 ;; check if tmpdb already exists, either delete it or
			 ;; add something to the name
			 fname))
	 (inmem        (dbmod:open-inmem-db init-proc
					    (if (eq? (dbfile:cache-method) 'inmem)
						#f
						tmpdb)

					    ))
	 (write-access (file-write-access? dbpath))
	 (db           (dbmod:safely-open-db dbfullname init-proc write-access))
	 (tables       (db:sync-all-tables-list keys)))
    (if (not (and (sqlite3:database? inmem)
		  (sqlite3:database? db)))
	(begin
	  (debug:print 0 *default-log-port* "ERROR: Failed to properly open "dbfname-in", exiting immediately.")
	  (exit)))
    ;; (assert (sqlite3:database? inmem) "FATAL: open-dbmoddb: inmem is not a db")
    ;; (assert (sqlite3:database? db) "FATAL:  open-dbmoddb: db is not a db")
    (dbr:dbstruct-inmem-set!     dbstruct inmem)
    (dbr:dbstruct-ondiskdb-set!  dbstruct db)
    (dbr:dbstruct-dbfile-set!    dbstruct dbfullname)

    (dbr:dbstruct-dbfname-set!   dbstruct dbfname)
    (dbr:dbstruct-sync-proc-set! dbstruct
				 (lambda (last-update)
				   (if *sync-in-progress*
				       (debug:print 3 *default-log-port* "WARNING: overlapping calls to sync to disk")
				       (begin
					 (mutex-lock! *db-with-db-mutex*) ;; this mutex is used when overloaded or during a query that modifies the db
					 (set! *sync-in-progress* #t)

					 (dbmod:sync-gasket tables last-update inmem db
							    dbfullname syncdir)




					 (mutex-unlock! *db-with-db-mutex*)
					 (thread-sleep! 0.5) ;; ensure at least 1/2 second down time between sync calls
					 (set! *sync-in-progress* #f)))))

    ;; (dbmod:sync-tables tables #f db inmem)
    ;; (if db
    (dbmod:sync-gasket tables #f inmem db dbfullname 'fromdest) ;; ) ;; load into inmem
    (dbr:dbstruct-last-update-set! dbstruct (current-seconds)) ;; should this be offset back in time by one second?
    dbstruct))

;;    (if (eq? syncdir 'todisk) ;; sync to disk normally, sync from in dashboard
;;        (dbmod:sync-tables tables last-update inmem db)
;;        (dbmod:sync-tables tables last-update db inmem))))

;; direction: 'fromdest 'todest
;;
(define (dbmod:sync-gasket tables last-update inmem dbh dbfname direction)
  (assert (sqlite3:database? inmem) "FATAL: sync-gasket: inmem is not a db")
  (assert (sqlite3:database? inmem) "FATAL: sync-gasket: dbh is not a db")

  (case (dbfile:sync-method)
    ((none) #f)
    ((attach)
     (dbmod:attach-sync tables inmem dbfname direction))
    ((newsync)
     (dbmod:new-sync tables inmem dbh dbfname direction))
    (else
     (case direction
       ((todisk)
	(dbmod:sync-tables tables last-update inmem dbh)
	)
       (else
	(dbmod:sync-tables tables last-update dbh inmem))))))

(define (dbmod:close-db dbstruct)
  ;; do final sync to disk file
  ;; (do-sync ...)
  (sqlite3:finalize! (dbr:dbstruct-ondiskdb dbstruct)))

;;======================================================================







|
>





|
|
|
>








<
|




>





|
|

>
|
|
>
>
>
>
|
|
|
>


|









|


>









|


|







176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201

202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
			    (syncdir 'todisk))
  (let* ((dbstruct     (or dbstruct-in (make-dbr:dbstruct areapath: areapath)))
	 (dbfname      (or dbfname-in (dbmod:run-id->dbfname run-id)))
	 (dbpath       (dbmod:get-dbdir dbstruct))             ;; directory where all the .db files are kept
	 (dbfullname   (conc dbpath"/"dbfname)) ;; (dbmod:run-id->full-dbfname dbstruct run-id))
	 (dbexists     (file-exists? dbfullname))
	 (tmpdir       (conc "/tmp/"(current-user-name)))
	 (tmpdb        (let* ((fname (conc tmpdir"/" (string-translate areapath "/" ".") ;; "-"(current-process-id)
					   "-"dbfname)))
			 (if (not (file-exists? tmpdir))(create-directory tmpdir))
			 ;; check if tmpdb already exists, either delete it or
			 ;; add something to the name
			 fname))
	 (inmem        (dbmod:open-inmem-db init-proc
					    ;; (if (eq? (dbfile:cache-method) 'inmem)
					    ;; 	#f
					    tmpdb
					    ;; )
					    ))
	 (write-access (file-write-access? dbpath))
	 (db           (dbmod:safely-open-db dbfullname init-proc write-access))
	 (tables       (db:sync-all-tables-list keys)))
    (if (not (and (sqlite3:database? inmem)
		  (sqlite3:database? db)))
	(begin
	  (debug:print 0 *default-log-port* "ERROR: Failed to properly open "dbfname-in", exiting immediately.")

	  (exit)))    ;; (assert (sqlite3:database? inmem) "FATAL: open-dbmoddb: inmem is not a db")
    ;; (assert (sqlite3:database? db) "FATAL:  open-dbmoddb: db is not a db")
    (dbr:dbstruct-inmem-set!     dbstruct inmem)
    (dbr:dbstruct-ondiskdb-set!  dbstruct db)
    (dbr:dbstruct-dbfile-set!    dbstruct dbfullname)
    (dbr:dbstruct-dbtmpname-set! dbstruct tmpdb)
    (dbr:dbstruct-dbfname-set!   dbstruct dbfname)
    (dbr:dbstruct-sync-proc-set! dbstruct
				 (lambda (last-update)
				   (if *sync-in-progress*
				       (debug:print 3 *default-log-port* "WARNING: overlapping calls to sync to disk")
				       (let* ((sync-cmd (conc "megatest -db2db -from "tmpdb" -to "dbfullname"&")))
					 ;; (mutex-lock! *db-with-db-mutex*) ;; this mutex is used when overloaded or during a query that modifies the db
					 (set! *sync-in-progress* #t)
					 ;; (if (eq? (dbfile:cache-method) 'inmem)
					 ;;     (dbmod:sync-gasket tables last-update inmem db
					 ;; 			dbfullname syncdir keys)
					 (thread-start! (make-thread
							 (lambda ()
							   (debug:print-info "Running "sync-cmd)
							   (system sync-cmd)
							   ;; (mutex-unlock! *db-with-db-mutex*)
							   ;; (thread-sleep! 0.5) ;; ensure at least 1/2 second down time between sync calls
							   (set! *sync-in-progress* #f))))
					 ))))
    ;; (dbmod:sync-tables tables #f db inmem)
    ;; (if db
    (dbmod:sync-gasket tables #f inmem db dbfullname 'fromdest keys) ;; ) ;; load into inmem
    (dbr:dbstruct-last-update-set! dbstruct (current-seconds)) ;; should this be offset back in time by one second?
    dbstruct))

;;    (if (eq? syncdir 'todisk) ;; sync to disk normally, sync from in dashboard
;;        (dbmod:sync-tables tables last-update inmem db)
;;        (dbmod:sync-tables tables last-update db inmem))))

;; direction: 'fromdest 'todest
;;
(define (dbmod:sync-gasket tables last-update inmem dbh dbfname direction keys)
  (assert (sqlite3:database? inmem) "FATAL: sync-gasket: inmem is not a db")
  (assert (sqlite3:database? inmem) "FATAL: sync-gasket: dbh is not a db")
  (debug:print-info 0 *default-log-port* "dbmod:sync-gasket called with sync-method="(dbfile:sync-method))
  (case (dbfile:sync-method)
    ((none) #f)
    ((attach)
     (dbmod:attach-sync tables inmem dbfname direction))
    ((newsync)
     (dbmod:new-sync tables inmem dbh dbfname direction))
    (else
     (case direction
       ((todisk)
	(dbmod:sync-tables tables last-update keys inmem dbh)
	)
       (else
	(dbmod:sync-tables tables last-update keys dbh inmem))))))

(define (dbmod:close-db dbstruct)
  ;; do final sync to disk file
  ;; (do-sync ...)
  (sqlite3:finalize! (dbr:dbstruct-ondiskdb dbstruct)))

;;======================================================================
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
;;
;; if last-update specified ("field-name" . time-in-seconds)
;;    then sync only records where field-name >= time-in-seconds
;;    IFF field-name exists
;;
;; Use (db:sync-all-tables-list keys) to get the tbls input
;;
(define (dbmod:sync-tables tbls last-update fromdb todb)
  (debug:print-info 2 *default-log-port* "dbmod:sync-tables called, from: "fromdb", to: "todb)
  (assert (sqlite3:database? fromdb) "FATAL: dbmod:sync-tables called with fromdb not a database" fromdb)
  (assert (sqlite3:database? todb) "FATAL: dbmod:sync-tables called with fromdb not a database" todb)
  (let ((stmts       (make-hash-table)) ;; table-field => stmt
	(all-stmts   '())              ;; ( ( stmt1 value1 ) ( stml2 value2 ))
	(numrecs     (make-hash-table))
	(start-time  (current-milliseconds))







|







284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
;;
;; if last-update specified ("field-name" . time-in-seconds)
;;    then sync only records where field-name >= time-in-seconds
;;    IFF field-name exists
;;
;; Use (db:sync-all-tables-list keys) to get the tbls input
;;
(define (dbmod:sync-tables tbls last-update keys fromdb todb)
  (debug:print-info 2 *default-log-port* "dbmod:sync-tables called, from: "fromdb", to: "todb)
  (assert (sqlite3:database? fromdb) "FATAL: dbmod:sync-tables called with fromdb not a database" fromdb)
  (assert (sqlite3:database? todb) "FATAL: dbmod:sync-tables called with fromdb not a database" todb)
  (let ((stmts       (make-hash-table)) ;; table-field => stmt
	(all-stmts   '())              ;; ( ( stmt1 value1 ) ( stml2 value2 ))
	(numrecs     (make-hash-table))
	(start-time  (current-milliseconds))
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
;; direction = fromdest, todest
;; mode = 'full, 'incr
;;
;; Idea: youngest in dest is last_update time
;;
(define (dbmod:new-sync tables dbh1 dbh2 destdbfile direction #!key
			   (mode 'full))
  (debug:print 0 *default-log-port* "Doing sync "direction" "destdbfile)
  (if (not (sqlite3:auto-committing? dbh1))
      (debug:print 0 *default-log-port* "Skipping sync due to transaction in flight.")
      (let* ((table-names  (map car tables))
	     (dest-exists  (file-exists? destdbfile)))
	(assert dest-exists "FATAL: sync called with non-existant file, "destdbfile)
	(for-each
	 (lambda (table)







|







533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
;; direction = fromdest, todest
;; mode = 'full, 'incr
;;
;; Idea: youngest in dest is last_update time
;;
(define (dbmod:new-sync tables dbh1 dbh2 destdbfile direction #!key
			   (mode 'full))
  (debug:print 0 *default-log-port* "Doing new-sync "direction" "destdbfile)
  (if (not (sqlite3:auto-committing? dbh1))
      (debug:print 0 *default-log-port* "Skipping sync due to transaction in flight.")
      (let* ((table-names  (map car tables))
	     (dest-exists  (file-exists? destdbfile)))
	(assert dest-exists "FATAL: sync called with non-existant file, "destdbfile)
	(for-each
	 (lambda (table)
750
751
752
753
754
755
756



757
































		  (debug:print-info 0 *default-log-port* "Found old test in LAUNCHED state, test-id=" test-id
				    " 1 day since event_time marked")
                  (set! oldlaunched (cons (list test-id run-dir uname testname item-path run-id) oldlaunched)))))
	  stmth3
	  run-id))))
    (list incompleted oldlaunched toplevels)))




)







































>
>
>
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
		  (debug:print-info 0 *default-log-port* "Found old test in LAUNCHED state, test-id=" test-id
				    " 1 day since event_time marked")
                  (set! oldlaunched (cons (list test-id run-dir uname testname item-path run-id) oldlaunched)))))
	  stmth3
	  run-id))))
    (list incompleted oldlaunched toplevels)))

;;======================================================================
;; db to db sync
;;======================================================================

(define (dbmod:db-to-db-sync src-db dest-db last-update init-proc keys)
  (if (and (file-exists? src-db) ;; can't proceed without a source
	   (file-read-access? src-db))
      (let* ((have-dest     (file-exists? dest-db))
	     (dest-file-wr  (and have-dest
				 (file-write-access? dest-db))) ;; exists and writable
	     (dest-dir      (or (pathname-directory dest-db)
				"."))
	     (dest-dir-wr   (and (file-exists? dest-dir)
				 (file-write-access? dest-dir)))
	     (d-wr          (or (and have-dest
				     dest-file-wr)
				dest-dir-wr))
	     (copied        (if (and (not have-dest)
				     dest-dir-wr)
				(begin
				  (file-copy src-db dest-db)
				  #t)
				#f)))
	(if copied
	    (begin
	      (debug:print-info 0 *default-log-port* "db-to-db-sync done with file-copy")
	      #t)
	    (let* ((tables (db:sync-all-tables-list keys))
		   (sdb    (dbmod:safely-open-db src-db init-proc #t))
		   (ddb    (dbmod:safely-open-db dest-db init-proc d-wr))
		   (res    (dbmod:sync-gasket tables last-update sdb ddb dest-db 'todest keys)))
	      (sqlite3:finalize! sdb)
	      (sqlite3:finalize! ddb)
	      res)))
      #f))
)

Modified launch.scm from [7e82dfb83e] to [a591d57e2c].

740
741
742
743
744
745
746

747
748
749
750
751
752
753
		 )
              )


	      ;; for automated creation of the rollup html file this is a good place...
	      (if (not (equal? item-path ""))
		      (tests:summarize-items run-id test-id test-name #f))

	      (tests:summarize-test run-id test-id)  ;; don't force - just update if no
              ;; Leave a .final-status file for the top level test
              (tests:save-final-status run-id test-id)
	      (rmt:update-run-stats run-id (rmt:get-raw-run-stats run-id))) ;; end of let*

	    (mutex-unlock! m)
            (launch:end-of-run-check run-id )







>







740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
		 )
              )


	      ;; for automated creation of the rollup html file this is a good place...
	      (if (not (equal? item-path ""))
		      (tests:summarize-items run-id test-id test-name #f))
	      ;; BUG was this meant to be the antecnt of the if above?
	      (tests:summarize-test run-id test-id)  ;; don't force - just update if no
              ;; Leave a .final-status file for the top level test
              (tests:save-final-status run-id test-id)
	      (rmt:update-run-stats run-id (rmt:get-raw-run-stats run-id))) ;; end of let*

	    (mutex-unlock! m)
            (launch:end-of-run-check run-id )
1153
1154
1155
1156
1157
1158
1159
1160

1161
1162
1163
1164




1165
1166
1167
1168
1169
1170
1171

	;; if have -append-config then read and append here
	(let ((cfname (args:get-arg "-append-config")))
	  (if (and cfname
		   (file-read-access? cfname))
	      (read-config cfname *configdat* #t))) ;; values are added to the hash, no need to do anything special.
	;; have config at this time, this is a good place to set params based on config file settings
	(let*  ((dbmode (configf:lookup *configdat* "setup" "dbcache-mode")))

	  (if dbmode
	      (begin
		(debug:print-info 0 *default-log-port* "Overriding dbmode to "dbmode)
		(dbcache-mode (string->symbol dbmode)))))




	
	*toppath*)))


(define (get-best-disk confdat testconfig)
  (let* ((disks   (or (and testconfig (hash-table-ref/default testconfig "disks" #f))
		      (hash-table-ref/default confdat "disks" #f)))







|
>



|
>
>
>
>







1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177

	;; if have -append-config then read and append here
	(let ((cfname (args:get-arg "-append-config")))
	  (if (and cfname
		   (file-read-access? cfname))
	      (read-config cfname *configdat* #t))) ;; values are added to the hash, no need to do anything special.
	;; have config at this time, this is a good place to set params based on config file settings
	(let*  ((dbmode   (configf:lookup *configdat* "setup" "dbcache-mode"))
		(syncmode (configf:lookup *configdat* "setup" "sync-mode")))
	  (if dbmode
	      (begin
		(debug:print-info 0 *default-log-port* "Overriding dbmode to "dbmode)
		(dbcache-mode (string->symbol dbmode))))
	  (if syncmode
	      (begin
		(debug:print-info 0 *default-log-port* "Overriding syncmode to "syncmode)
		(dbfile:sync-method (string->symbol syncmode)))))
	
	*toppath*)))


(define (get-best-disk confdat testconfig)
  (let* ((disks   (or (and testconfig (hash-table-ref/default testconfig "disks" #f))
		      (hash-table-ref/default confdat "disks" #f)))

Modified megatest.scm from [143383aea7] to [bce410093b].

282
283
284
285
286
287
288

289
290
291
292
293
294
295
  -list-run-time          : list time requered to complete runs. It supports following switches
                            -run-patt <patt> -target-patt <patt> -dumpmode <csv,json,plain-text>
  -list-test-time	  : list time requered to complete each test in a run. It following following arguments
                            -runname <patt> -target <patt> -dumpmode <csv,json,plain-text>
  -syscheck               : do some very basic checks; write access and space in tmp, home, runs, links and 
                            is $DISPLAY valid 
  -list-waivers           : dump waivers for specified target, runname, testpatt to stdout


Diff report
  -diff-rep               : generate diff report (must include -src-target, -src-runname, -target, -runname
                                                  and either -diff-email or -diff-html)
  -src-target <target>
  -src-runname <target>
  -diff-email <emails>    : comma separated list of email addresses to send diff report







>







282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
  -list-run-time          : list time requered to complete runs. It supports following switches
                            -run-patt <patt> -target-patt <patt> -dumpmode <csv,json,plain-text>
  -list-test-time	  : list time requered to complete each test in a run. It following following arguments
                            -runname <patt> -target <patt> -dumpmode <csv,json,plain-text>
  -syscheck               : do some very basic checks; write access and space in tmp, home, runs, links and 
                            is $DISPLAY valid 
  -list-waivers           : dump waivers for specified target, runname, testpatt to stdout
  -db2db                  : sync db to db, use -from, -to for dbs, -period and -timeout for continuous sync

Diff report
  -diff-rep               : generate diff report (must include -src-target, -src-runname, -target, -runname
                                                  and either -diff-email or -diff-html)
  -src-target <target>
  -src-runname <target>
  -diff-email <emails>    : comma separated list of email addresses to send diff report
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350

351
352
353
354
355
356
357
			":state"  
			"-state"
			":status"
			"-status"
			"-list-runs"
                        "-testdata-csv"
			"-testpatt"
                        "--modepatt"
                        "-modepatt"
                        "-tagexpr"
			"-itempatt"
			"-setlog"
			"-set-toplog"
			"-runstep"
			"-logpro"
			"-m"
			"-rerun"

			"-days"
			"-rename-run"

			"-to"
			"-dest"
                        "-source" 
                        "-time-stamp" 
			;; values and messages
			":category"
			":variable"







|












>







332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
			":state"  
			"-state"
			":status"
			"-status"
			"-list-runs"
                        "-testdata-csv"
			"-testpatt"
                        ;; "--modepatt"
                        "-modepatt"
                        "-tagexpr"
			"-itempatt"
			"-setlog"
			"-set-toplog"
			"-runstep"
			"-logpro"
			"-m"
			"-rerun"

			"-days"
			"-rename-run"
			"-from"
			"-to"
			"-dest"
                        "-source" 
                        "-time-stamp" 
			;; values and messages
			":category"
			":variable"
376
377
378
379
380
381
382


383
384
385
386
387
388
389
			"-pathmod"
			"-env2file"
			"-envcap"
			"-envdelta"
			"-setvars"
			"-set-state-status"
			"-import-sexpr"



                        ;; move runs stuff here
                        "-remove-keep"           
			"-set-run-status"
			"-age"

			;; archive 







>
>







378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
			"-pathmod"
			"-env2file"
			"-envcap"
			"-envdelta"
			"-setvars"
			"-set-state-status"
			"-import-sexpr"
			"-period"  ;; sync period in seconds
			"-timeout" ;; exit sync if timeout in seconds exceeded since last change

                        ;; move runs stuff here
                        "-remove-keep"           
			"-set-run-status"
			"-age"

			;; archive 
493
494
495
496
497
498
499

500
501
502
503
504
505
506
			"-create-megatest-area"
			"-mark-incompletes"

			"-convert-to-norm"
			"-convert-to-old"
			"-import-megatest.db"
			"-sync-to-megatest.db"

                        "-sync-brute-force"
			"-logging"
			"-v" ;; verbose 2, more than normal (normal is 1)
			"-q" ;; quiet 0, errors/warnings only

                        "-diff-rep"








>







497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
			"-create-megatest-area"
			"-mark-incompletes"

			"-convert-to-norm"
			"-convert-to-old"
			"-import-megatest.db"
			"-sync-to-megatest.db"
			"-db2db"
                        "-sync-brute-force"
			"-logging"
			"-v" ;; verbose 2, more than normal (normal is 1)
			"-q" ;; quiet 0, errors/warnings only

                        "-diff-rep"

2558
2559
2560
2561
2562
2563
2564


































2565
2566
2567
2568
2569
2570
2571
      (set! *didsomething* #t)))

(if (args:get-arg "-sync-to")
    (let ((toppath (launch:setup)))
      (tasks:sync-to-postgres *configdat* (args:get-arg "-sync-to"))
      (set! *didsomething* #t)))



































(if (args:get-arg "-list-test-time")
     (let* ((toppath (launch:setup))) 
     (task:get-test-times)  
     (set! *didsomething* #t)))

(if (args:get-arg "-list-run-time")
     (let* ((toppath (launch:setup))) 







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
      (set! *didsomething* #t)))

(if (args:get-arg "-sync-to")
    (let ((toppath (launch:setup)))
      (tasks:sync-to-postgres *configdat* (args:get-arg "-sync-to"))
      (set! *didsomething* #t)))

;; use with -from and -to
;;
(if (args:get-arg "-db2db")
    (let* ((duh         (launch:setup))
	   (src-db      (args:get-arg "-from"))
	   (dest-db     (args:get-arg "-to"))
	   (sync-period (args:get-arg "-period"))    ;; NOT IMPLEMENTED YET
	   (sync-timeout (args:get-arg "-timeout"))  ;; NOT IMPLEMENTED YET
	   (lockfile    (conc dest-db".sync-lock"))
	   (keys        (db:get-keys #f))
	   )
      
      (if (and src-db dest-db)
	  (if (file-exists? src-db)
	      (if (file-exists? lockfile)
		  (debug:print 0 *default-log-port* "Lock "lockfile" exists, skipping sync...")
		  (begin
		    (with-output-to-file lockfile
		      (lambda ()
			(print (current-process-id))))
		    (debug:print-info 0 *default-log-port* "Attempting to sync data from "src-db" to "dest-db"...")
		    (if (not (file-exists? dest-db))
			(begin
			  (debug:print 0 *default-log-port* "Using copy to create "dest-db" from "src-db)
			  (file-copy src-db dest-db))
			(let ((res (dbmod:db-to-db-sync src-db dest-db 0 (dbfile:db-init-proc) keys)))
			  (if res
			      (debug:print-info 0 *default-log-port* "Synced " res " records from "src-db" to "dest-db)
			      (debug:print-info 0 *default-log-port* "No sync due to permissions or other issue."))))
		    (delete-file* lockfile)))
	      (debug:print 0 *default-log-port* "No sync due to unreadble or non-existant source file"src-db))
	  (debug:print 0 *default-log-port* "Usage for -db2db; -to and -from must be specified"))
      (set! *didsomething* #t)))

(if (args:get-arg "-list-test-time")
     (let* ((toppath (launch:setup))) 
     (task:get-test-times)  
     (set! *didsomething* #t)))

(if (args:get-arg "-list-run-time")
     (let* ((toppath (launch:setup))) 

Modified tcp-transportmod.scm from [9c6068b733] to [f16326ea6c].

487
488
489
490
491
492
493


494


495
496
497
498
499
500
501
	 (areapath     (tt-areapath ttdat))
	 (nosyncdbpath (conc areapath"/.mtdb"))
	 (cleanup (lambda ()
		    (if (tt-cleanup-proc ttdat)
			((tt-cleanup-proc ttdat)))
		    (dbfile:with-no-sync-db nosyncdbpath
					    (lambda (db)


					      (db:no-sync-del! db dbfname))))))


    (set! *server-info* ttdat)
    (let loop ((count 0))
      (if (> count 240)
	  (begin
	    (debug:print 0 *default-log-port* "FATAL: Could not start a tcp server, giving up.")
	    (exit 1))
	  (if (not (tt-port ttdat)) ;; no connection yet







>
>
|
>
>







487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
	 (areapath     (tt-areapath ttdat))
	 (nosyncdbpath (conc areapath"/.mtdb"))
	 (cleanup (lambda ()
		    (if (tt-cleanup-proc ttdat)
			((tt-cleanup-proc ttdat)))
		    (dbfile:with-no-sync-db nosyncdbpath
					    (lambda (db)
					      (let* ((dbtmpname (dbr:dbstruct-dbtmpname dbstruct)))
						(debug:print-info 0 *default-log-port* "Running clean up, including removing db file "dbtmpname)
						(db:no-sync-del! db dbfname)
						#;(if dbtmpname
						    (delete-file dbtmpname))))))))
    (set! *server-info* ttdat)
    (let loop ((count 0))
      (if (> count 240)
	  (begin
	    (debug:print 0 *default-log-port* "FATAL: Could not start a tcp server, giving up.")
	    (exit 1))
	  (if (not (tt-port ttdat)) ;; no connection yet
511
512
513
514
515
516
517

518
519
520
521




522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
      (let* ((servers (tt:get-server-info-sorted ttdat dbfname))
	     (ok      (cond
		       ((null? servers) #f) ;; not ok
		       ((equal? (list-ref (car servers) 6) ;; compare the servinfofile
				(tt-servinf-file ttdat))
			(let* ((res (if db-locked-in
					#t

					(let* ((success (dbfile:with-no-sync-db
							 nosyncdbpath
							 (lambda (db)
							   (db:no-sync-get-lock-with-id db dbfname (tt-servinf-file ttdat))))))




					  (if success
					      (begin
						(tt-state-set! ttdat 'running)
						(debug:print 0 *default-log-port* "Got server lock for "
							     dbfname)
						(set! db-locked-in #t)
						#t)
					      (begin
						(debug:print 0 *default-log-port* "Failed to get server lock for "dbfname)
						#f))))))
			  (if (and res
				   (common:low-noise-print 120 "top server message"))
			      (debug:print-info 0 *default-log-port* "Keep running, I'm the top server for "
						dbfname" on "(tt-host ttdat)":"(tt-port ttdat)))
			  res))
		       (else
			(debug:print-info 0 *default-log-port* "I'm not the lead server: "servers)
			(let* ((leadsrv (car servers)))
			  (match leadsrv







>
|
|
|
|
>
>
>
>










<
|







515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540

541
542
543
544
545
546
547
548
      (let* ((servers (tt:get-server-info-sorted ttdat dbfname))
	     (ok      (cond
		       ((null? servers) #f) ;; not ok
		       ((equal? (list-ref (car servers) 6) ;; compare the servinfofile
				(tt-servinf-file ttdat))
			(let* ((res (if db-locked-in
					#t
					(let* ((lock-result
						(dbfile:with-no-sync-db
						 nosyncdbpath
						 (lambda (db)
						   (db:no-sync-get-lock-with-id db dbfname
										;; (tt-servinf-file ttdat)
										(dbr:dbstruct-dbtmpname dbstruct)
										))))
					       (success (car lock-result)))
					  (if success
					      (begin
						(tt-state-set! ttdat 'running)
						(debug:print 0 *default-log-port* "Got server lock for "
							     dbfname)
						(set! db-locked-in #t)
						#t)
					      (begin
						(debug:print 0 *default-log-port* "Failed to get server lock for "dbfname)
						#f))))))

			  (if (and res (common:low-noise-print 120 "top server message"))
			      (debug:print-info 0 *default-log-port* "Keep running, I'm the top server for "
						dbfname" on "(tt-host ttdat)":"(tt-port ttdat)))
			  res))
		       (else
			(debug:print-info 0 *default-log-port* "I'm not the lead server: "servers)
			(let* ((leadsrv (car servers)))
			  (match leadsrv
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
				       (debug:print 0 *default-log-port* "I'm not the server but could not ping "host":"port", will try again.")
				       (thread-sleep! 1) ;; just because
				       #t)))))
			    (else ;; should never get here
			     (debug:print 0 *default-log-port* "BAD SERVER RECORD: "leadsrv)
			     (assert #f "Bad server record "leadsrv))))))))
	(if ok
	    ;; (if (> *api-process-request-count* 0) ;; have requests in flight
	    ;;	(tt-last-access-set! ttdat (current-seconds)))
	    (tt-last-access-set! ttdat *db-last-access*) ;; bit silly, just use db-last-access
	    (begin
	      (debug:print 0 *default-log-port* "Exiting immediately")
	      (cleanup)
	      (exit)))

	(let* ((last-update (dbr:dbstruct-last-update dbstruct))







<
<







567
568
569
570
571
572
573


574
575
576
577
578
579
580
				       (debug:print 0 *default-log-port* "I'm not the server but could not ping "host":"port", will try again.")
				       (thread-sleep! 1) ;; just because
				       #t)))))
			    (else ;; should never get here
			     (debug:print 0 *default-log-port* "BAD SERVER RECORD: "leadsrv)
			     (assert #f "Bad server record "leadsrv))))))))
	(if ok


	    (tt-last-access-set! ttdat *db-last-access*) ;; bit silly, just use db-last-access
	    (begin
	      (debug:print 0 *default-log-port* "Exiting immediately")
	      (cleanup)
	      (exit)))

	(let* ((last-update (dbr:dbstruct-last-update dbstruct))
642
643
644
645
646
647
648
649

650
651
652
653
654
655
656
  (let* ((areapath (tt-areapath ttdat))
	 (servdir  (tt:get-servinfo-dir areapath))
	 (host     (tt-host ttdat))
	 (port     (tt-port ttdat))
	 (servinf (conc servdir"/"host":"port"-"(current-process-id)":"dbfname))
	 (serv-id (tt:mk-signature areapath))
	 (clean-proc (lambda ()
		       (delete-file* servinf))))

    (assert (and host port) "FATAL: tt:create-server-registration-file called with no conn, dbfname="dbfname)
    (tt-cleanup-proc-set! ttdat clean-proc)
    (tt-servinf-file-set! ttdat servinf)
    (with-output-to-file servinf
      (lambda ()
	(print "SERVER STARTED: "host":"port" AT "(current-seconds)" server-id: "serv-id" pid: "(current-process-id)" dbfname: "dbfname)))
      serv-id))







|
>







648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
  (let* ((areapath (tt-areapath ttdat))
	 (servdir  (tt:get-servinfo-dir areapath))
	 (host     (tt-host ttdat))
	 (port     (tt-port ttdat))
	 (servinf (conc servdir"/"host":"port"-"(current-process-id)":"dbfname))
	 (serv-id (tt:mk-signature areapath))
	 (clean-proc (lambda ()
		       (delete-file* servinf)
		       )))
    (assert (and host port) "FATAL: tt:create-server-registration-file called with no conn, dbfname="dbfname)
    (tt-cleanup-proc-set! ttdat clean-proc)
    (tt-servinf-file-set! ttdat servinf)
    (with-output-to-file servinf
      (lambda ()
	(print "SERVER STARTED: "host":"port" AT "(current-seconds)" server-id: "serv-id" pid: "(current-process-id)" dbfname: "dbfname)))
      serv-id))