Megatest

Check-in [024073632a]
Login
Overview
Comment:Added with-lock options for both simple and no-sync locking systems.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | v1.70-refactor-procedures
Files: files | file ages | folders
SHA1: 024073632ac13c3fe0b44705f95d3a08e80fe9d2
User & Date: matt on 2022-06-07 08:30:51
Other Links: branch diff | manifest | tags
Context
2022-06-07
18:41
merged changes from 024073632a and 9a5898a74e. Fixes for db locking, syncing and dashboard startup check-in: 20a2d7904d user: mmgraham tags: v1.70
08:30
Added with-lock options for both simple and no-sync locking systems. Leaf check-in: 024073632a user: matt tags: v1.70-refactor-procedures
2022-06-06
20:25
Fixed (kind-of) not working no-sync db based lock check-in: 79a7f8d883 user: matt tags: v1.70-refactor-procedures
Changes

Modified dbfile.scm from [a601d0637e] to [3ddda7a78f].

240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
	  (set! *dbfile:num-handles-in-use* (+ *dbfile:num-handles-in-use* 1))
	  (stack-pop! (dbr:subdb-dbstack subdb))))))

;; return a previously opened db handle to the stack of available handles
(define (dbfile:add-dbdat dbstruct run-id dbdat)
  (let* ((subdb (dbfile:get-subdb dbstruct run-id))
	 (age   (- (current-seconds)(dbr:dbdat-birth-sec dbdat))))
    (if (> age 30) ;; just testing - discard and close after 30 sec
	(begin
	  ;; (map sqlite3:finalize! (hash-table-values (dbr:dbdat-stmt-cache dbdat)))
	  ;; (sqlite3:finalize! (dbr:dbdat-dbh dbdat))
	  (dbfile:print-err "INFO: Discarded dbdat over 30 sec old ("age"s)"))
	(begin
	  (set! *dbfile:num-handles-in-use* (- *dbfile:num-handles-in-use* 1))
	  (stack-push! (dbr:subdb-dbstack subdb) dbdat)))))







|







240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
	  (set! *dbfile:num-handles-in-use* (+ *dbfile:num-handles-in-use* 1))
	  (stack-pop! (dbr:subdb-dbstack subdb))))))

;; return a previously opened db handle to the stack of available handles
(define (dbfile:add-dbdat dbstruct run-id dbdat)
  (let* ((subdb (dbfile:get-subdb dbstruct run-id))
	 (age   (- (current-seconds)(dbr:dbdat-birth-sec dbdat))))
    (if (> age 300) ;; just testing - discard and close after 30 sec
	(begin
	  ;; (map sqlite3:finalize! (hash-table-values (dbr:dbdat-stmt-cache dbdat)))
	  ;; (sqlite3:finalize! (dbr:dbdat-dbh dbdat))
	  (dbfile:print-err "INFO: Discarded dbdat over 30 sec old ("age"s)"))
	(begin
	  (set! *dbfile:num-handles-in-use* (- *dbfile:num-handles-in-use* 1))
	  (stack-push! (dbr:subdb-dbstack subdb) dbdat)))))
499
500
501
502
503
504
505
506
507
508
509

510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
	  (dbfile:print-err "INFO: dbfile:cautious-open-database: journal file " busy-file " exists, trying again in few seconds.")
	  (thread-sleep! 1)
	  (if (eq? tries-left 2)
	      (begin
		(dbfile:print-err "INFO: forcing journal rollup "busy-file)
		(dbfile:brute-force-salvage-db fname)))
	  (dbfile:cautious-open-database fname init-proc (- tries-left 1)))
	(let* ((db-exists (file-exists? fname))
	       (result (condition-case
			   (let* ((is-no-sync (substring-index "no-sync.db" fname))
				  (nosyncdb   *no-sync-db*)

				  (lockname   (conc fname ".lock"))
				  (db (begin
					(dbfile:simple-file-lock-and-wait lockname expire-time: 5)
					(if (and (not is-no-sync)
						 nosyncdb)
					    (db:no-sync-get-lock nosyncdb fname))
					(sqlite3:open-database fname))))
			     (if (and init-proc (not db-exists))
				 (init-proc db))
			     (if (and (not is-no-sync)
				      nosyncdb)
				 (db:no-sync-del! nosyncdb fname))
			     (dbfile:simple-file-release-lock lockname)
			     db)
			(exn (io-error)
			     (dbfile:print-err exn "ERROR: i/o error with " fname ". Check permissions, disk space etc. and try again.")
			     (retry))
			(exn (corrupt)
			     (dbfile:print-err exn "ERROR: database " fname " is corrupt. Repair it to proceed.")
			     (retry))
			(exn (busy)
			     (dbfile:print-err exn "ERROR: database " fname
					       " is locked. Try copying to another location, remove original and copy back.")
			     (retry))
			(exn (permission)(dbfile:print-err exn "ERROR: database " fname " has some permissions problem.")
			     (retry))
			(exn ()
			     (dbfile:print-err exn "ERROR: Unknown error with database " fname " message: "
					       ((condition-property-accessor 'exn 'message) exn))
			     (retry)))))
          #;(if (file-write-access? fname)
	    (dbfile:simple-file-release-lock lock-file))
	  result))))

(define (dbfile:brute-force-salvage-db fname)
  (let* ((backupfname (conc fname"-"(current-process-id)".bak"))
	 (cmd (conc "cp "fname" "backupfname";mv "fname" "(conc fname ".delme;")
		    "cp "backupfname" "fname)))
    (dbfile:print-err "WARNING: attempting recovery of file "fname" by running commands:\n"







<
|
<
<
>
|
|
<
<
<
|
|
|
|
<
<
<
<
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|

|







499
500
501
502
503
504
505

506


507
508
509



510
511
512
513




514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
	  (dbfile:print-err "INFO: dbfile:cautious-open-database: journal file " busy-file " exists, trying again in few seconds.")
	  (thread-sleep! 1)
	  (if (eq? tries-left 2)
	      (begin
		(dbfile:print-err "INFO: forcing journal rollup "busy-file)
		(dbfile:brute-force-salvage-db fname)))
	  (dbfile:cautious-open-database fname init-proc (- tries-left 1)))

	(let* ((result (condition-case


			   (dbfile:with-simple-file-lock
			    (conc fname ".lock")
			    (lambda ()



			      (let* ((db-exists (file-exists? fname))
				     (db        (sqlite3:open-database fname)))
				(if (and init-proc (not db-exists))
				    (init-proc db))




				db)))
			 (exn (io-error)
			      (dbfile:print-err exn "ERROR: i/o error with " fname ". Check permissions, disk space etc. and try again.")
			      (retry))
			 (exn (corrupt)
			      (dbfile:print-err exn "ERROR: database " fname " is corrupt. Repair it to proceed.")
			      (retry))
			 (exn (busy)
			      (dbfile:print-err exn "ERROR: database " fname
						" is locked. Try copying to another location, remove original and copy back.")
			      (retry))
			 (exn (permission)(dbfile:print-err exn "ERROR: database " fname " has some permissions problem.")
			      (retry))
			 (exn ()
			      (dbfile:print-err exn "ERROR: Unknown error with database " fname " message: "
						((condition-property-accessor 'exn 'message) exn))
			      (retry)))))
          #;(if (file-write-access? fname)
	  (dbfile:simple-file-release-lock lock-file))
	  result))))

(define (dbfile:brute-force-salvage-db fname)
  (let* ((backupfname (conc fname"-"(current-process-id)".bak"))
	 (cmd (conc "cp "fname" "backupfname";mv "fname" "(conc fname ".delme;")
		    "cp "backupfname" "fname)))
    (dbfile:print-err "WARNING: attempting recovery of file "fname" by running commands:\n"
644
645
646
647
648
649
650


651
652


653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678











679
680
681
682
683
684
685
;; use (db:no-sync-del! db keyname) to release the lock
;;
(define (db:no-sync-get-lock db keyname)
  (sqlite3:with-transaction
   db
   (lambda ()
     (condition-case


	 `(#f . ,(sqlite3:first-result db "SELECT val FROM no_sync_metadat WHERE var=?;" keyname))
       


       (exn (io-error)  (dbfile:print-err "ERROR: i/o error with no-sync db. Check permissions, disk space etc. and try again."))
       (exn (corrupt)   (dbfile:print-err "ERROR: database no-sync db is corrupt. Repair it to proceed."))
       (exn (busy)      (dbfile:print-err "ERROR: database no-sync db is locked. Try copying to another location, remove original and copy back."))
       (exn (permission)(dbfile:print-err "ERROR: database no-sync db has some permissions problem."))
       (exn () ;; (status done) ;; I don't know how to detect status done but no data!
	    (let ((lock-time (current-seconds)))
	      ;; (debug:print-info 2 *default-log-port* "db:no-sync-get-lock keyname=" keyname ", lock-time=" lock-time ", exn=" exn)
	      (sqlite3:execute db "INSERT OR REPLACE INTO no_sync_metadat (var,val) VALUES(?,?);" keyname lock-time)
	      `(#t . ,lock-time)))
       #;(exn ()
	    (dbfile:print-err "ERROR: Unknown error with database no-sync db message: exn="(condition->list exn)", \n"
			      ((condition-property-accessor 'exn 'message) exn))
	    `(#f . ,(current-seconds)))))))

(define (db:no-sync-get-lock-timeout db keyname timeout)
  (let* ((lockdat (db:no-sync-get-lock db keyname)))
    (match lockdat
      ((#f . lock-time)
       (if (> (- (current-seconds) (if (string? lock-time)(string->number lock-time)lock-time)) timeout)
	   (let ((lock-time (current-seconds)))
	     ;; (debug:print-info 2 *default-log-port* "db:no-sync-get-lock keyname=" keyname ", lock-time=" lock-time ", exn=" exn)
	     (sqlite3:execute db "INSERT OR REPLACE INTO no_sync_metadat (var,val) VALUES(?,?);" keyname lock-time)
	     `(#t . ,lock-time))
	   lockdat))
      (else lockdat))))












;;======================================================================
;; sync back functions pulled from db.scm
;;======================================================================

;; Get a lock from the no-sync-db for the from-db, then delta sync the from-db to the to-db, otherwise return #f
;;
(define (db:lock-and-delta-sync no-sync-db dbstruct from-db-file runid keys dbinit)







>
>
|
|
>
>





<
<
<
<
<
















>
>
>
>
>
>
>
>
>
>
>







635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652





653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
;; use (db:no-sync-del! db keyname) to release the lock
;;
(define (db:no-sync-get-lock db keyname)
  (sqlite3:with-transaction
   db
   (lambda ()
     (condition-case
	 (let* ((curr-val (db:no-sync-get/default db keyname #f)))
	   (if curr-val
	       `(#f . ,curr-val)   ;; (sqlite3:first-result db "SELECT val FROM no_sync_metadat WHERE var=?;" keyname))
	       (let ((lock-time (current-seconds)))
		 (sqlite3:execute db "INSERT OR REPLACE INTO no_sync_metadat (var,val) VALUES(?,?);" keyname lock-time)
		 `(#t . ,lock-time))))
       (exn (io-error)  (dbfile:print-err "ERROR: i/o error with no-sync db. Check permissions, disk space etc. and try again."))
       (exn (corrupt)   (dbfile:print-err "ERROR: database no-sync db is corrupt. Repair it to proceed."))
       (exn (busy)      (dbfile:print-err "ERROR: database no-sync db is locked. Try copying to another location, remove original and copy back."))
       (exn (permission)(dbfile:print-err "ERROR: database no-sync db has some permissions problem."))
       (exn () ;; (status done) ;; I don't know how to detect status done but no data!





	    (dbfile:print-err "ERROR: Unknown error with database no-sync db message: exn="(condition->list exn)", \n"
			      ((condition-property-accessor 'exn 'message) exn))
	    `(#f . ,(current-seconds)))))))

(define (db:no-sync-get-lock-timeout db keyname timeout)
  (let* ((lockdat (db:no-sync-get-lock db keyname)))
    (match lockdat
      ((#f . lock-time)
       (if (> (- (current-seconds) (if (string? lock-time)(string->number lock-time)lock-time)) timeout)
	   (let ((lock-time (current-seconds)))
	     ;; (debug:print-info 2 *default-log-port* "db:no-sync-get-lock keyname=" keyname ", lock-time=" lock-time ", exn=" exn)
	     (sqlite3:execute db "INSERT OR REPLACE INTO no_sync_metadat (var,val) VALUES(?,?);" keyname lock-time)
	     `(#t . ,lock-time))
	   lockdat))
      (else lockdat))))

;; NOTE: This will steal the lock after timeout of waiting.
;;
(define (db:with-no-sync-lock db keyname timeout proc)
  (let* ((lockdat  (db:no-sync-get-lock-timeout db keyname))
	 (gotlock  (car lockdat))
	 (locktime (cdr lockdat)))
    (if gotlock
	(let ((res (proc)))
	  (db:no-sync-del! db keyname)
	  res))))
  
;;======================================================================
;; sync back functions pulled from db.scm
;;======================================================================

;; Get a lock from the no-sync-db for the from-db, then delta sync the from-db to the to-db, otherwise return #f
;;
(define (db:lock-and-delta-sync no-sync-db dbstruct from-db-file runid keys dbinit)
1314
1315
1316
1317
1318
1319
1320







1321
1322

(define (dbfile:simple-file-release-lock fname)
  (handle-exceptions
      exn
      #f ;; I don't really care why this failed (at least for now)
    (delete-file* fname)))









)







>
>
>
>
>
>
>
|

1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330

(define (dbfile:simple-file-release-lock fname)
  (handle-exceptions
      exn
      #f ;; I don't really care why this failed (at least for now)
    (delete-file* fname)))

(define (dbfile:with-simple-file-lock fname proc #!key (expire-time 300))
  (let ((gotlock (dbfile:simple-file-lock-and-wait fname expire-time: expire-time)))
    (if gotlock
	(let ((res (proc)))
	  (dbfile:simple-file-release-lock fname)
	  res)
	(assert #t "FATAL: simple file lock never got a lock."))))
  
)