Megatest

Check-in [7fb44b797e]
Login
Overview
Comment:reworked the sync locking, allow parallel servers (needs work to lock in to single machine)
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | v1.80
Files: files | file ages | folders
SHA1: 7fb44b797e70c15c101f361a2d7c61f4d9852cef
User & Date: matt on 2023-05-11 09:08:14
Other Links: branch diff | manifest | tags
Context
2023-05-11
09:14
Merged fork check-in: b21843154b user: mrwellan tags: v1.80
09:08
reworked the sync locking, allow parallel servers (needs work to lock in to single machine) check-in: 7fb44b797e user: matt tags: v1.80
05:43
Use a key for the db lock-down that is unique to the db in .mtdb, this should elminate duplicate, overlapping servers. check-in: 138a40d18e user: matt tags: v1.80
Changes

Modified common.scm from [10e4ec655c] to [6b6394cde8].

55
56
57
58
59
60
61

62
63
64
65
66
67
68

(define (stop-the-train)
  (thread-start! (make-thread (lambda ()
				(let loop ()
				  (if (and *toppath*
					   (file-exists? (conc *toppath*"/stop-the-train")))
				      (let* ((msg (conc "ERROR: found file "*toppath*"/stop-the-train, exiting immediately")))

					(print msg)
					(debug:print 0 *default-log-port* msg)
					(exit 1)))
				  (thread-sleep! 5)
				  (loop))))))

;; execute thunk, return value.  If exception thrown, trap exception, return #f, and emit nonfatal condition note to *default-log-port* .







>







55
56
57
58
59
60
61
62
63
64
65
66
67
68
69

(define (stop-the-train)
  (thread-start! (make-thread (lambda ()
				(let loop ()
				  (if (and *toppath*
					   (file-exists? (conc *toppath*"/stop-the-train")))
				      (let* ((msg (conc "ERROR: found file "*toppath*"/stop-the-train, exiting immediately")))
					;; yes, print to current-output-port AND *default-log-port*, annoying but necessary I think
					(print msg)
					(debug:print 0 *default-log-port* msg)
					(exit 1)))
				  (thread-sleep! 5)
				  (loop))))))

;; execute thunk, return value.  If exception thrown, trap exception, return #f, and emit nonfatal condition note to *default-log-port* .

Modified dbfile.scm from [82b1ce2a6f] to [deb13d8b08].

80
81
82
83
84
85
86

87
88
89
90
91
92
93
  ;;
  ;; for the inmem approach (see dbmod.scm)
  ;; this is one db per server
  (inmem     #f)  ;; handle for the in memory copy
  (dbfile    #f)  ;; path to the db file on disk
  (dbfname   #f)  ;; short name of db file on disk (used to validate accessing correct db)
  (ondiskdb  #f)  ;; handle for the on-disk file

  (dbdat     #f)  ;; create a dbdat for the downstream calls such as db:with-db
  (last-update 0)
  (sync-proc #f)
  )

;; NOTE: Need one dbr:subdb per main.db, 1.db ...
;;







>







80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
  ;;
  ;; for the inmem approach (see dbmod.scm)
  ;; this is one db per server
  (inmem     #f)  ;; handle for the in memory copy
  (dbfile    #f)  ;; path to the db file on disk
  (dbfname   #f)  ;; short name of db file on disk (used to validate accessing correct db)
  (ondiskdb  #f)  ;; handle for the on-disk file
  (dbtmpname #f)  ;; path to db file in /tmp (non-imem method)
  (dbdat     #f)  ;; create a dbdat for the downstream calls such as db:with-db
  (last-update 0)
  (sync-proc #f)
  )

;; NOTE: Need one dbr:subdb per main.db, 1.db ...
;;
514
515
516
517
518
519
520


521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547

548
549
550
551
552
553
554
555
556
557
558
559
			  (string->number res)
			  #f)))
          (if newres
              newres
              res))
        res)))



(define (db:extract-time-identifier instr)
  (let ((tokens (string-split instr "+")))
    (match tokens
      ((t i)(cons (string->number t) i))
      ((t)  (cons (string->number t) #f))
      (else
       (assert #f "FATAL: db:extract-time-identifier handed bad data "instr)))))

;; transaction protected lock aquisition
;; either:
;;    fails    returns  (#f . lock-creation-time)
;;    succeeds (returns (#t . lock-creation-time)
;; use (db:no-sync-del! db keyname) to release the lock
;;
(define (db:no-sync-get-lock-with-id db keyname identifier)
  (sqlite3:with-transaction
   db
   (lambda ()
     (condition-case
      (let* ((curr-val (db:no-sync-get/default db keyname #f)))
	(if curr-val
	    (match (db:extract-time-identifier curr-val) ;; result->timestamp, identifier
	       ((timestamp ident)
		(if (equal? ident identifier)
		    #t     ;; this *is* my lock
		    #f))   ;; nope, not my lock
	       (else #f))  ;; nope, not my lock

	    (let ((lock-value (if identifier
				  (conc (current-seconds)"+"identifier)
				  (current-seconds))))
	      (sqlite3:execute db "INSERT OR REPLACE INTO no_sync_metadat (var,val) VALUES(?,?);" keyname lock-value)
	      #t)))
      (exn (io-error)  (dbfile:print-err "ERROR: i/o error with no-sync db. Check permissions, disk space etc. and try again."))
      (exn (corrupt)   (dbfile:print-err "ERROR: database no-sync db is corrupt. Repair it to proceed."))
      (exn (busy)      (dbfile:print-err "ERROR: database no-sync db is locked. Try copying to another location, remove original and copy back."))
      (exn (permission)(dbfile:print-err "ERROR: database no-sync db has some permissions problem."))
      (exn () ;; (status done) ;; I don't know how to detect status done but no data!
	   (dbfile:print-err "ERROR: Unknown error with database no-sync db message: exn="(condition->list exn)", \n"
			     ((condition-property-accessor 'exn 'message) exn))







>
>










|
|












|
|
|
>
|



|







515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
			  (string->number res)
			  #f)))
          (if newres
              newres
              res))
        res)))

;; timestring+identifier+payload
;; locks are unique on identifier, payload is informational
(define (db:extract-time-identifier instr)
  (let ((tokens (string-split instr "+")))
    (match tokens
      ((t i)(cons (string->number t) i))
      ((t)  (cons (string->number t) #f))
      (else
       (assert #f "FATAL: db:extract-time-identifier handed bad data "instr)))))

;; transaction protected lock aquisition
;; either:
;;    fails    returns  (#f lock-creation-time identifier)
;;    succeeds (returns (#t lock-creation-time identifier)
;; use (db:no-sync-del! db keyname) to release the lock
;;
(define (db:no-sync-get-lock-with-id db keyname identifier)
  (sqlite3:with-transaction
   db
   (lambda ()
     (condition-case
      (let* ((curr-val (db:no-sync-get/default db keyname #f)))
	(if curr-val
	    (match (db:extract-time-identifier curr-val) ;; result->timestamp, identifier
	       ((timestamp ident)
		(if (equal? ident identifier)
		    (cons #t timestamp)    ;; this *is* my lock
		    (cons #f timestamp)))  ;; nope, not my lock
	       (else (cons #f #f)))  ;; nope, not my lock
	    (let ((curr-sec (current-seconds))
		  (lock-value (if identifier
				  (conc (current-seconds)"+"identifier)
				  (current-seconds))))
	      (sqlite3:execute db "INSERT OR REPLACE INTO no_sync_metadat (var,val) VALUES(?,?);" keyname lock-value)
	      (cons #t curr-sec))))
      (exn (io-error)  (dbfile:print-err "ERROR: i/o error with no-sync db. Check permissions, disk space etc. and try again."))
      (exn (corrupt)   (dbfile:print-err "ERROR: database no-sync db is corrupt. Repair it to proceed."))
      (exn (busy)      (dbfile:print-err "ERROR: database no-sync db is locked. Try copying to another location, remove original and copy back."))
      (exn (permission)(dbfile:print-err "ERROR: database no-sync db has some permissions problem."))
      (exn () ;; (status done) ;; I don't know how to detect status done but no data!
	   (dbfile:print-err "ERROR: Unknown error with database no-sync db message: exn="(condition->list exn)", \n"
			     ((condition-property-accessor 'exn 'message) exn))

Modified dbmod.scm from [fa16c38514] to [a52cf6d3b4].

95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
	 (dbdat     (dbmod:open-db dbstruct run-id (dbfile:db-init-proc)))
	 (dbh       (dbr:dbdat-dbh dbdat)) ;; this will be the inmem handle
	 (dbfile    (dbr:dbdat-dbfile dbdat)))
    ;; if nfs mode do a sync if delta > 2
    (let* ((last-update (dbr:dbstruct-last-update dbstruct))
	   (sync-proc   (dbr:dbstruct-sync-proc dbstruct))
	   (curr-secs   (current-seconds)))
      (if (> (- curr-secs last-update) 3)
	  (begin
	    (sync-proc last-update)

	    ;; MOVE THIS CALL TO INSIDE THE sync-proc CALL
	    (dbr:dbstruct-last-update-set! dbstruct curr-secs)
	    )))
    (assert (sqlite3:database? dbh) "FATAL: bad db handle in dbmod:with-db") 







|







95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
	 (dbdat     (dbmod:open-db dbstruct run-id (dbfile:db-init-proc)))
	 (dbh       (dbr:dbdat-dbh dbdat)) ;; this will be the inmem handle
	 (dbfile    (dbr:dbdat-dbfile dbdat)))
    ;; if nfs mode do a sync if delta > 2
    (let* ((last-update (dbr:dbstruct-last-update dbstruct))
	   (sync-proc   (dbr:dbstruct-sync-proc dbstruct))
	   (curr-secs   (current-seconds)))
      (if (> (- curr-secs last-update) 5)
	  (begin
	    (sync-proc last-update)

	    ;; MOVE THIS CALL TO INSIDE THE sync-proc CALL
	    (dbr:dbstruct-last-update-set! dbstruct curr-secs)
	    )))
    (assert (sqlite3:database? dbh) "FATAL: bad db handle in dbmod:with-db") 
176
177
178
179
180
181
182
183

184
185
186
187
188
189
190
			    (syncdir 'todisk))
  (let* ((dbstruct     (or dbstruct-in (make-dbr:dbstruct areapath: areapath)))
	 (dbfname      (or dbfname-in (dbmod:run-id->dbfname run-id)))
	 (dbpath       (dbmod:get-dbdir dbstruct))             ;; directory where all the .db files are kept
	 (dbfullname   (conc dbpath"/"dbfname)) ;; (dbmod:run-id->full-dbfname dbstruct run-id))
	 (dbexists     (file-exists? dbfullname))
	 (tmpdir       (conc "/tmp/"(current-user-name)))
	 (tmpdb        (let* ((fname (conc tmpdir"/" (string-translate areapath "/" ".")"-"(current-process-id)"-"dbfname)))

			 (if (not (file-exists? tmpdir))(create-directory tmpdir))
			 ;; check if tmpdb already exists, either delete it or
			 ;; add something to the name
			 fname))
	 (inmem        (dbmod:open-inmem-db init-proc
					    ;; (if (eq? (dbfile:cache-method) 'inmem)
					    ;; 	#f







|
>







176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
			    (syncdir 'todisk))
  (let* ((dbstruct     (or dbstruct-in (make-dbr:dbstruct areapath: areapath)))
	 (dbfname      (or dbfname-in (dbmod:run-id->dbfname run-id)))
	 (dbpath       (dbmod:get-dbdir dbstruct))             ;; directory where all the .db files are kept
	 (dbfullname   (conc dbpath"/"dbfname)) ;; (dbmod:run-id->full-dbfname dbstruct run-id))
	 (dbexists     (file-exists? dbfullname))
	 (tmpdir       (conc "/tmp/"(current-user-name)))
	 (tmpdb        (let* ((fname (conc tmpdir"/" (string-translate areapath "/" ".") ;; "-"(current-process-id)
					   "-"dbfname)))
			 (if (not (file-exists? tmpdir))(create-directory tmpdir))
			 ;; check if tmpdb already exists, either delete it or
			 ;; add something to the name
			 fname))
	 (inmem        (dbmod:open-inmem-db init-proc
					    ;; (if (eq? (dbfile:cache-method) 'inmem)
					    ;; 	#f
199
200
201
202
203
204
205

206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223

224
225
226
227
228
229
230
	(begin
	  (debug:print 0 *default-log-port* "ERROR: Failed to properly open "dbfname-in", exiting immediately.")
	  (exit)))    ;; (assert (sqlite3:database? inmem) "FATAL: open-dbmoddb: inmem is not a db")
    ;; (assert (sqlite3:database? db) "FATAL:  open-dbmoddb: db is not a db")
    (dbr:dbstruct-inmem-set!     dbstruct inmem)
    (dbr:dbstruct-ondiskdb-set!  dbstruct db)
    (dbr:dbstruct-dbfile-set!    dbstruct dbfullname)

    (dbr:dbstruct-dbfname-set!   dbstruct dbfname)
    (dbr:dbstruct-sync-proc-set! dbstruct
				 (lambda (last-update)
				   (if *sync-in-progress*
				       (debug:print 3 *default-log-port* "WARNING: overlapping calls to sync to disk")
				       (let* ((sync-cmd (conc "megatest -db2db -from "tmpdb" -to "dbfullname)))
					 (mutex-lock! *db-with-db-mutex*) ;; this mutex is used when overloaded or during a query that modifies the db
					 (set! *sync-in-progress* #t)
					 ;; (if (eq? (dbfile:cache-method) 'inmem)
					 ;;     (dbmod:sync-gasket tables last-update inmem db
					 ;; 			dbfullname syncdir keys)
					 (thread-start! (make-thread
							 (lambda ()
							   (debug:print-info "Running "sync-cmd)
							   (system sync-cmd))))
					 (mutex-unlock! *db-with-db-mutex*)
					 (thread-sleep! 0.5) ;; ensure at least 1/2 second down time between sync calls
					 (set! *sync-in-progress* #f)))))

    ;; (dbmod:sync-tables tables #f db inmem)
    ;; (if db
    (dbmod:sync-gasket tables #f inmem db dbfullname 'fromdest keys) ;; ) ;; load into inmem
    (dbr:dbstruct-last-update-set! dbstruct (current-seconds)) ;; should this be offset back in time by one second?
    dbstruct))

;;    (if (eq? syncdir 'todisk) ;; sync to disk normally, sync from in dashboard







>





|
|







|
|
|
|
>







200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
	(begin
	  (debug:print 0 *default-log-port* "ERROR: Failed to properly open "dbfname-in", exiting immediately.")
	  (exit)))    ;; (assert (sqlite3:database? inmem) "FATAL: open-dbmoddb: inmem is not a db")
    ;; (assert (sqlite3:database? db) "FATAL:  open-dbmoddb: db is not a db")
    (dbr:dbstruct-inmem-set!     dbstruct inmem)
    (dbr:dbstruct-ondiskdb-set!  dbstruct db)
    (dbr:dbstruct-dbfile-set!    dbstruct dbfullname)
    (dbr:dbstruct-dbtmpname-set! dbstruct tmpdb)
    (dbr:dbstruct-dbfname-set!   dbstruct dbfname)
    (dbr:dbstruct-sync-proc-set! dbstruct
				 (lambda (last-update)
				   (if *sync-in-progress*
				       (debug:print 3 *default-log-port* "WARNING: overlapping calls to sync to disk")
				       (let* ((sync-cmd (conc "megatest -db2db -from "tmpdb" -to "dbfullname"&")))
					 ;; (mutex-lock! *db-with-db-mutex*) ;; this mutex is used when overloaded or during a query that modifies the db
					 (set! *sync-in-progress* #t)
					 ;; (if (eq? (dbfile:cache-method) 'inmem)
					 ;;     (dbmod:sync-gasket tables last-update inmem db
					 ;; 			dbfullname syncdir keys)
					 (thread-start! (make-thread
							 (lambda ()
							   (debug:print-info "Running "sync-cmd)
							   (system sync-cmd)
							   ;; (mutex-unlock! *db-with-db-mutex*)
							   ;; (thread-sleep! 0.5) ;; ensure at least 1/2 second down time between sync calls
							   (set! *sync-in-progress* #f))))
					 ))))
    ;; (dbmod:sync-tables tables #f db inmem)
    ;; (if db
    (dbmod:sync-gasket tables #f inmem db dbfullname 'fromdest keys) ;; ) ;; load into inmem
    (dbr:dbstruct-last-update-set! dbstruct (current-seconds)) ;; should this be offset back in time by one second?
    dbstruct))

;;    (if (eq? syncdir 'todisk) ;; sync to disk normally, sync from in dashboard

Modified megatest.scm from [e2f14e189c] to [612430ed4b].

2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583


2584



2585
2586
2587
2588
2589
2590
2591
2592
2593

2594
2595
2596
2597
2598
2599
2600
;;
(if (args:get-arg "-db2db")
    (let* ((duh         (launch:setup))
	   (src-db      (args:get-arg "-from"))
	   (dest-db     (args:get-arg "-to"))
	   (sync-period (args:get-arg "-period"))    ;; NOT IMPLEMENTED YET
	   (sync-timeout (args:get-arg "-timeout"))  ;; NOT IMPLEMENTED YET
	   (lockfile    (conc dest-db".lock"))
	   (keys        (db:get-keys #f))
	   )
      
      (if (and src-db dest-db)
	  (if (file-exists? src-db)


	      (begin



		(debug:print-info 0 *default-log-port* "Attempting to sync data from "src-db" to "dest-db"...")
		(if (not (file-exists? dest-db))
		    (begin
		      (debug:print 0 *default-log-port* "Using copy to create "dest-db" from "src-db)
		      (file-copy src-db dest-db))
		    (let ((res (dbmod:db-to-db-sync src-db dest-db 0 (dbfile:db-init-proc) keys)))
		      (if res
			  (debug:print-info 0 *default-log-port* "Synced " res " records from "src-db" to "dest-db)
			  (debug:print-info 0 *default-log-port* "No sync due to permissions or other issue.")))))

	      (debug:print 0 *default-log-port* "No sync due to unreadble or non-existant source file"src-db))
	  (debug:print 0 *default-log-port* "Usage for -db2db; -to and -from must be specified"))
      (set! *didsomething* #t)))

(if (args:get-arg "-list-test-time")
     (let* ((toppath (launch:setup))) 
     (task:get-test-times)  







|





>
>
|
>
>
>
|
|
|
|
|
|
|
|
|
>







2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
;;
(if (args:get-arg "-db2db")
    (let* ((duh         (launch:setup))
	   (src-db      (args:get-arg "-from"))
	   (dest-db     (args:get-arg "-to"))
	   (sync-period (args:get-arg "-period"))    ;; NOT IMPLEMENTED YET
	   (sync-timeout (args:get-arg "-timeout"))  ;; NOT IMPLEMENTED YET
	   (lockfile    (conc dest-db".sync-lock"))
	   (keys        (db:get-keys #f))
	   )
      
      (if (and src-db dest-db)
	  (if (file-exists? src-db)
	      (if (file-exists? lockfile)
		  (debug:print 0 *default-log-port* "Lock "lockfile" exists, skipping sync...")
		  (begin
		    (with-output-to-file lockfile
		      (lambda ()
			(print (current-process-id))))
		    (debug:print-info 0 *default-log-port* "Attempting to sync data from "src-db" to "dest-db"...")
		    (if (not (file-exists? dest-db))
			(begin
			  (debug:print 0 *default-log-port* "Using copy to create "dest-db" from "src-db)
			  (file-copy src-db dest-db))
			(let ((res (dbmod:db-to-db-sync src-db dest-db 0 (dbfile:db-init-proc) keys)))
			  (if res
			      (debug:print-info 0 *default-log-port* "Synced " res " records from "src-db" to "dest-db)
			      (debug:print-info 0 *default-log-port* "No sync due to permissions or other issue."))))
		    (delete-file* lockfile)))
	      (debug:print 0 *default-log-port* "No sync due to unreadble or non-existant source file"src-db))
	  (debug:print 0 *default-log-port* "Usage for -db2db; -to and -from must be specified"))
      (set! *didsomething* #t)))

(if (args:get-arg "-list-test-time")
     (let* ((toppath (launch:setup))) 
     (task:get-test-times)  

Modified tcp-transportmod.scm from [3d0d8b0130] to [f16326ea6c].

487
488
489
490
491
492
493


494


495
496
497
498
499
500
501
	 (areapath     (tt-areapath ttdat))
	 (nosyncdbpath (conc areapath"/.mtdb"))
	 (cleanup (lambda ()
		    (if (tt-cleanup-proc ttdat)
			((tt-cleanup-proc ttdat)))
		    (dbfile:with-no-sync-db nosyncdbpath
					    (lambda (db)


					      (db:no-sync-del! db dbfname))))))


    (set! *server-info* ttdat)
    (let loop ((count 0))
      (if (> count 240)
	  (begin
	    (debug:print 0 *default-log-port* "FATAL: Could not start a tcp server, giving up.")
	    (exit 1))
	  (if (not (tt-port ttdat)) ;; no connection yet







>
>
|
>
>







487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
	 (areapath     (tt-areapath ttdat))
	 (nosyncdbpath (conc areapath"/.mtdb"))
	 (cleanup (lambda ()
		    (if (tt-cleanup-proc ttdat)
			((tt-cleanup-proc ttdat)))
		    (dbfile:with-no-sync-db nosyncdbpath
					    (lambda (db)
					      (let* ((dbtmpname (dbr:dbstruct-dbtmpname dbstruct)))
						(debug:print-info 0 *default-log-port* "Running clean up, including removing db file "dbtmpname)
						(db:no-sync-del! db dbfname)
						#;(if dbtmpname
						    (delete-file dbtmpname))))))))
    (set! *server-info* ttdat)
    (let loop ((count 0))
      (if (> count 240)
	  (begin
	    (debug:print 0 *default-log-port* "FATAL: Could not start a tcp server, giving up.")
	    (exit 1))
	  (if (not (tt-port ttdat)) ;; no connection yet
511
512
513
514
515
516
517

518
519
520
521
522
523

524

525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
      (let* ((servers (tt:get-server-info-sorted ttdat dbfname))
	     (ok      (cond
		       ((null? servers) #f) ;; not ok
		       ((equal? (list-ref (car servers) 6) ;; compare the servinfofile
				(tt-servinf-file ttdat))
			(let* ((res (if db-locked-in
					#t

					(let* ((success (dbfile:with-no-sync-db
							 nosyncdbpath
							 (lambda (db)
							   (db:no-sync-get-lock-with-id db dbfname
											;; (tt-servinf-file ttdat) ;; does NOT work, must be unique to the dbname which seems silly but makes sense!
											areapath ;; as good as anything

											)))))

					  (if success
					      (begin
						(tt-state-set! ttdat 'running)
						(debug:print 0 *default-log-port* "Got server lock for "
							     dbfname)
						(set! db-locked-in #t)
						#t)
					      (begin
						(debug:print 0 *default-log-port* "Failed to get server lock for "dbfname)
						#f))))))
			  (if (and res
				   (common:low-noise-print 120 "top server message"))
			      (debug:print-info 0 *default-log-port* "Keep running, I'm the top server for "
						dbfname" on "(tt-host ttdat)":"(tt-port ttdat)))
			  res))
		       (else
			(debug:print-info 0 *default-log-port* "I'm not the lead server: "servers)
			(let* ((leadsrv (car servers)))
			  (match leadsrv







>
|
|
|
|
|
<
>
|
>










<
|







515
516
517
518
519
520
521
522
523
524
525
526
527

528
529
530
531
532
533
534
535
536
537
538
539
540

541
542
543
544
545
546
547
548
      (let* ((servers (tt:get-server-info-sorted ttdat dbfname))
	     (ok      (cond
		       ((null? servers) #f) ;; not ok
		       ((equal? (list-ref (car servers) 6) ;; compare the servinfofile
				(tt-servinf-file ttdat))
			(let* ((res (if db-locked-in
					#t
					(let* ((lock-result
						(dbfile:with-no-sync-db
						 nosyncdbpath
						 (lambda (db)
						   (db:no-sync-get-lock-with-id db dbfname
										;; (tt-servinf-file ttdat)

										(dbr:dbstruct-dbtmpname dbstruct)
										))))
					       (success (car lock-result)))
					  (if success
					      (begin
						(tt-state-set! ttdat 'running)
						(debug:print 0 *default-log-port* "Got server lock for "
							     dbfname)
						(set! db-locked-in #t)
						#t)
					      (begin
						(debug:print 0 *default-log-port* "Failed to get server lock for "dbfname)
						#f))))))

			  (if (and res (common:low-noise-print 120 "top server message"))
			      (debug:print-info 0 *default-log-port* "Keep running, I'm the top server for "
						dbfname" on "(tt-host ttdat)":"(tt-port ttdat)))
			  res))
		       (else
			(debug:print-info 0 *default-log-port* "I'm not the lead server: "servers)
			(let* ((leadsrv (car servers)))
			  (match leadsrv
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
				       (debug:print 0 *default-log-port* "I'm not the server but could not ping "host":"port", will try again.")
				       (thread-sleep! 1) ;; just because
				       #t)))))
			    (else ;; should never get here
			     (debug:print 0 *default-log-port* "BAD SERVER RECORD: "leadsrv)
			     (assert #f "Bad server record "leadsrv))))))))
	(if ok
	    ;; (if (> *api-process-request-count* 0) ;; have requests in flight
	    ;;	(tt-last-access-set! ttdat (current-seconds)))
	    (tt-last-access-set! ttdat *db-last-access*) ;; bit silly, just use db-last-access
	    (begin
	      (debug:print 0 *default-log-port* "Exiting immediately")
	      (cleanup)
	      (exit)))

	(let* ((last-update (dbr:dbstruct-last-update dbstruct))







<
<







567
568
569
570
571
572
573


574
575
576
577
578
579
580
				       (debug:print 0 *default-log-port* "I'm not the server but could not ping "host":"port", will try again.")
				       (thread-sleep! 1) ;; just because
				       #t)))))
			    (else ;; should never get here
			     (debug:print 0 *default-log-port* "BAD SERVER RECORD: "leadsrv)
			     (assert #f "Bad server record "leadsrv))))))))
	(if ok


	    (tt-last-access-set! ttdat *db-last-access*) ;; bit silly, just use db-last-access
	    (begin
	      (debug:print 0 *default-log-port* "Exiting immediately")
	      (cleanup)
	      (exit)))

	(let* ((last-update (dbr:dbstruct-last-update dbstruct))
645
646
647
648
649
650
651
652

653
654
655
656
657
658
659
  (let* ((areapath (tt-areapath ttdat))
	 (servdir  (tt:get-servinfo-dir areapath))
	 (host     (tt-host ttdat))
	 (port     (tt-port ttdat))
	 (servinf (conc servdir"/"host":"port"-"(current-process-id)":"dbfname))
	 (serv-id (tt:mk-signature areapath))
	 (clean-proc (lambda ()
		       (delete-file* servinf))))

    (assert (and host port) "FATAL: tt:create-server-registration-file called with no conn, dbfname="dbfname)
    (tt-cleanup-proc-set! ttdat clean-proc)
    (tt-servinf-file-set! ttdat servinf)
    (with-output-to-file servinf
      (lambda ()
	(print "SERVER STARTED: "host":"port" AT "(current-seconds)" server-id: "serv-id" pid: "(current-process-id)" dbfname: "dbfname)))
      serv-id))







|
>







648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
  (let* ((areapath (tt-areapath ttdat))
	 (servdir  (tt:get-servinfo-dir areapath))
	 (host     (tt-host ttdat))
	 (port     (tt-port ttdat))
	 (servinf (conc servdir"/"host":"port"-"(current-process-id)":"dbfname))
	 (serv-id (tt:mk-signature areapath))
	 (clean-proc (lambda ()
		       (delete-file* servinf)
		       )))
    (assert (and host port) "FATAL: tt:create-server-registration-file called with no conn, dbfname="dbfname)
    (tt-cleanup-proc-set! ttdat clean-proc)
    (tt-servinf-file-set! ttdat servinf)
    (with-output-to-file servinf
      (lambda ()
	(print "SERVER STARTED: "host":"port" AT "(current-seconds)" server-id: "serv-id" pid: "(current-process-id)" dbfname: "dbfname)))
      serv-id))