Megatest

Check-in [ac41427eb2]
Login
Overview
Comment:Fixed server lock. Noticed sync running from dashboard only - seems wrong.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | v1.80
Files: files | file ages | folders
SHA1: ac41427eb2f8bfe8ff6dfec886e89be53fa1b8e5
User & Date: matt on 2023-05-13 00:00:51
Other Links: branch diff | manifest | tags
Context
2023-05-13
00:27
Turned off sync when direct-to-db mode 'none is used check-in: 49befe770d user: matt tags: v1.80
00:00
Fixed server lock. Noticed sync running from dashboard only - seems wrong. check-in: ac41427eb2 user: matt tags: v1.80
2023-05-11
12:12
Gate running the megatest syncer by existance of lock file. check-in: 12c06b6095 user: matt tags: v1.80
Changes

Modified dbfile.scm from [27150153ec] to [5aff2586ce].

539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565




















566
567
568
569
570
571
572
  (sqlite3:with-transaction
   db
   (lambda ()
     (condition-case
      (let* ((curr-val (db:no-sync-get/default db keyname #f)))
	(if curr-val
	    (match (db:extract-time-identifier curr-val) ;; result->timestamp, identifier
	       ((timestamp ident)
		(if (equal? ident identifier)
		    (cons #t timestamp)    ;; this *is* my lock
		    (cons #f timestamp)))  ;; nope, not my lock
	       (else (cons #f #f)))  ;; nope, not my lock
	    (let ((curr-sec (current-seconds))
		  (lock-value (if identifier
				  (conc (current-seconds)"+"identifier)
				  (current-seconds))))
	      (sqlite3:execute db "INSERT OR REPLACE INTO no_sync_metadat (var,val) VALUES(?,?);" keyname lock-value)
	      (cons #t curr-sec))))
      (exn (io-error)  (dbfile:print-err "ERROR: i/o error with no-sync db. Check permissions, disk space etc. and try again."))
      (exn (corrupt)   (dbfile:print-err "ERROR: database no-sync db is corrupt. Repair it to proceed."))
      (exn (busy)      (dbfile:print-err "ERROR: database no-sync db is locked. Try copying to another location, remove original and copy back."))
      (exn (permission)(dbfile:print-err "ERROR: database no-sync db has some permissions problem."))
      (exn () ;; (status done) ;; I don't know how to detect status done but no data!
	   (dbfile:print-err "ERROR: Unknown error with database no-sync db message: exn="(condition->list exn)", \n"
			     ((condition-property-accessor 'exn 'message) exn))
	   #f)))))





















;; transaction protected lock aquisition
;; either:
;;    fails    returns  (#f . lock-creation-time)
;;    succeeds (returns (#t . lock-creation-time)
;; use (db:no-sync-del! db keyname) to release the lock
;;
(define (db:no-sync-get-lock db keyname)







|
|
<
<
|













|

>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







539
540
541
542
543
544
545
546
547


548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
  (sqlite3:with-transaction
   db
   (lambda ()
     (condition-case
      (let* ((curr-val (db:no-sync-get/default db keyname #f)))
	(if curr-val
	    (match (db:extract-time-identifier curr-val) ;; result->timestamp, identifier
	       ((timestamp . ident)
		(cons (equal? ident identifier) timestamp))


	       (else (cons #f 'malformed-lock)))  ;; lock malformed
	    (let ((curr-sec (current-seconds))
		  (lock-value (if identifier
				  (conc (current-seconds)"+"identifier)
				  (current-seconds))))
	      (sqlite3:execute db "INSERT OR REPLACE INTO no_sync_metadat (var,val) VALUES(?,?);" keyname lock-value)
	      (cons #t curr-sec))))
      (exn (io-error)  (dbfile:print-err "ERROR: i/o error with no-sync db. Check permissions, disk space etc. and try again."))
      (exn (corrupt)   (dbfile:print-err "ERROR: database no-sync db is corrupt. Repair it to proceed."))
      (exn (busy)      (dbfile:print-err "ERROR: database no-sync db is locked. Try copying to another location, remove original and copy back."))
      (exn (permission)(dbfile:print-err "ERROR: database no-sync db has some permissions problem."))
      (exn () ;; (status done) ;; I don't know how to detect status done but no data!
	   (dbfile:print-err "ERROR: Unknown error with database no-sync db message: exn="(condition->list exn)", \n"
			     ((condition-property-accessor 'exn 'message) exn))
	   (cons #f #f))))))

(define (db:no-sync-check-lock db keyname identifier)
  (let* ((curr-val (db:no-sync-get/default db keyname #f)))
    (match (db:extract-time-identifier curr-val) ;; result->timestamp, identifier
      ((timestamp . ident)
       (cons (equal? ident identifier) ident))
      (else  (cons #f 'no-lock)))))

;; get the lock, wait 0.25 seconds and verify still have it.
;; this should not be necessary given the use of transaction in
;; db:no-sync-get-lock-with-id but it does seem to be needed
;;
(define (db:no-sync-lock-and-check db keyname identifier)
  (let* ((result  (db:no-sync-get-lock-with-id db keyname identifier))
	 (gotlock (car result)))
    (if gotlock
	(begin
	  (thread-sleep! 0.25)
	  (db:no-sync-check-lock db keyname identifier))
	result)))
    
;; transaction protected lock aquisition
;; either:
;;    fails    returns  (#f . lock-creation-time)
;;    succeeds (returns (#t . lock-creation-time)
;; use (db:no-sync-del! db keyname) to release the lock
;;
(define (db:no-sync-get-lock db keyname)

Modified tcp-transportmod.scm from [f16326ea6c] to [8dca985e36].

515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
      (let* ((servers (tt:get-server-info-sorted ttdat dbfname))
	     (ok      (cond
		       ((null? servers) #f) ;; not ok
		       ((equal? (list-ref (car servers) 6) ;; compare the servinfofile
				(tt-servinf-file ttdat))
			(let* ((res (if db-locked-in
					#t
					(let* ((lock-result
						(dbfile:with-no-sync-db
						 nosyncdbpath
						 (lambda (db)
						   (db:no-sync-get-lock-with-id db dbfname
										;; (tt-servinf-file ttdat)
										(dbr:dbstruct-dbtmpname dbstruct)
										))))
					       (success (car lock-result)))
					  (if success
					      (begin
						(tt-state-set! ttdat 'running)
						(debug:print 0 *default-log-port* "Got server lock for "
							     dbfname)
						(set! db-locked-in #t)
						#t)
					      (begin
						(debug:print 0 *default-log-port* "Failed to get server lock for "dbfname)
						#f))))))
			  (if (and res (common:low-noise-print 120 "top server message"))
			      (debug:print-info 0 *default-log-port* "Keep running, I'm the top server for "







|



|
|
|
|




|
<







515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534

535
536
537
538
539
540
541
      (let* ((servers (tt:get-server-info-sorted ttdat dbfname))
	     (ok      (cond
		       ((null? servers) #f) ;; not ok
		       ((equal? (list-ref (car servers) 6) ;; compare the servinfofile
				(tt-servinf-file ttdat))
			(let* ((res (if db-locked-in
					#t
					(let* ((lock-result  ;; this is the primary lock - need to double verify that got it
						(dbfile:with-no-sync-db
						 nosyncdbpath
						 (lambda (db)
						   (db:no-sync-lock-and-check db dbfname
									      (tt-servinf-file ttdat)
									      ;; (dbr:dbstruct-dbtmpname dbstruct)
									      ))))
					       (success (car lock-result)))
					  (if success
					      (begin
						(tt-state-set! ttdat 'running)
						(debug:print 0 *default-log-port* "Got server lock for " dbfname)

						(set! db-locked-in #t)
						#t)
					      (begin
						(debug:print 0 *default-log-port* "Failed to get server lock for "dbfname)
						#f))))))
			  (if (and res (common:low-noise-print 120 "top server message"))
			      (debug:print-info 0 *default-log-port* "Keep running, I'm the top server for "