Megatest

Check-in [494cb9b035]
Login
Overview
Comment:Locking of db
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | v1.80
Files: files | file ages | folders
SHA1: 494cb9b03533ac1081fb65ccec93dccc0f3e3e11
User & Date: mrwellan on 2023-03-02 19:12:46
Other Links: branch diff | manifest | tags
Context
2023-03-02
19:19
Better message check-in: c3879943f0 user: mrwellan tags: v1.80
19:12
Locking of db check-in: 494cb9b035 user: mrwellan tags: v1.80
2023-03-01
21:29
Speculative fix for transaction crash. Check if there is a transaction before proceeding. check-in: e8d8a38e49 user: mrwellan tags: v1.80
Changes

Modified db.scm from [e3db33630a] to [5ed64563c8].

1535
1536
1537
1538
1539
1540
1541

1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553


1554
1555
1556
1557
1558
1559
1560
1561
1562

;; extract index number given a header/data structure
(define (db:get-index-by-header header field)
  (list-index (lambda (x)(equal? x field)) header))

;; look up values in a header/data structure
(define (db:get-value-by-header row header field)

  (if (or (null? header) (not row))
      #f
      (let loop ((hed (car header))
                 (tal (cdr header))
                 (n   0))
        (if (equal? hed field)
            (handle-exceptions
             exn
             (begin
               (debug:print 0 *default-log-port* "WARNING: attempt to read non-existant field, row="
			    row " header=" header " field=" field ", exn=" exn)
               #f)


             (vector-ref row n))
	    (if (null? tal) #f (loop (car tal)(cdr tal)(+ n 1)))))))

;; Accessors for the header/data structure
;; get rows and header from 
(define (db:get-header vec)(vector-ref vec 0))
(define (db:get-rows   vec)(vector-ref vec 1))

;;======================================================================







>
|
|
|
|
|
|
|
|
|
|
|
|
>
>
|
|







1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565

;; extract index number given a header/data structure
(define (db:get-index-by-header header field)
  (list-index (lambda (x)(equal? x field)) header))

;; look up values in a header/data structure
(define (db:get-value-by-header row header field)
  (let ((len (vector-length row)))
    (if (or (null? header) (not row))
	#f
	(let loop ((hed (car header))
		   (tal (cdr header))
		   (n   0))
	  (if (equal? hed field)
	      (handle-exceptions
	       exn
	       (begin
		 (debug:print 0 *default-log-port* "WARNING: attempt to read non-existant field, row="
			      row " header=" header " field=" field ", exn=" exn)
		 #f)
	       (if (>= n len)
		   #f
		   (vector-ref row n)))
	      (if (null? tal) #f (loop (car tal)(cdr tal)(+ n 1))))))))

;; Accessors for the header/data structure
;; get rows and header from 
(define (db:get-header vec)(vector-ref vec 0))
(define (db:get-rows   vec)(vector-ref vec 1))

;;======================================================================
3359
3360
3361
3362
3363
3364
3365


3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
         (if (and test-id state status (equal? status "AUTO")) 
             (db:test-data-rollup dbstruct run-id test-id status))
	 (if new-state-eh ;; moved from db:test-set-state-status
	      (mt:process-triggers dbstruct run-id test-id new-state-eh new-status-eh))
         tr-res)))))

(define (db:roll-up-rules state-status-counts state status)


  (let* ((running     (length (filter (lambda (x)
					(member (dbr:counts-state x) *common:running-states*))
				      state-status-counts)))
	 (bad-not-started      (length (filter (lambda (x)
						 (and (equal? (dbr:counts-state x) "NOT_STARTED") 
						      (not (member (dbr:counts-status x)  *common:not-started-ok-statuses*))))
					       state-status-counts)))
	 (all-curr-states      (common:special-sort  ;; worst -> best (sort of)
				(delete-duplicates
				 (if (and state (not (member state *common:dont-roll-up-states*)))
				     (cons state (map dbr:counts-state state-status-counts))
				     (map dbr:counts-state state-status-counts)))
				*common:std-states* >))
	 (all-curr-statuses    (common:special-sort  ;; worst -> best
				(delete-duplicates
				 (if (and state status (not (member state *common:dont-roll-up-states*)))
				     (cons status (map dbr:counts-status state-status-counts))
				     (map dbr:counts-status state-status-counts)))
				*common:std-statuses* >))
	 (non-completes        (filter (lambda (x)
					 (not (member x (cons "COMPLETED" *common:dont-roll-up-states*))))
				       all-curr-states))
	 (preq-fails        (filter (lambda (x)
				      (equal? x "PREQ_FAIL"))
				    all-curr-statuses))
	 (num-non-completes (length non-completes))
	 (newstate          (cond
			     ((> running 0)           "RUNNING")            ;; anything running, call the situation running
			     ((> (length preq-fails) 0) "NOT_STARTED")
			     ((> bad-not-started 0)   "COMPLETED")          ;; we have an ugly situation, it is completed in the sense we cannot do more.
			     ((> num-non-completes 0) (car non-completes))  ;;  (remove (lambda (x)(equal? "COMPLETED" x)) all-curr-states))) ;; only rollup DELETED if all DELETED
			     (else                    (car all-curr-states))))
	 (newstatus         (cond
			     ((> (length preq-fails) 0)  "PREQ_FAIL")
			     ((or (> bad-not-started 0)
				  (and (equal? newstate "NOT_STARTED")
				       (> num-non-completes 0)))
			      "STARTED")
			     (else (car all-curr-statuses)))))
    (debug:print-info 2 *default-log-port*
		      "\n--> probe db:set-state-status-and-roll-up-items: "
		      "\n--> state-status-counts: "(map dbr:counts->alist state-status-counts)
		      "\n--> running:             "running
		      "\n--> bad-not-started:     "bad-not-started
		      "\n--> non-non-completes:   "num-non-completes
		      "\n--> non-completes:       "non-completes
		      "\n--> all-curr-states:     "all-curr-states
		      "\n--> all-curr-statuses:     "all-curr-statuses
		      "\n--> newstate              "newstate
		      "\n--> newstatus            "newstatus
		      "\n\n")
    
    ;; NB// Pass the db so it is part of the transaction
    (list newstate newstatus)))

(define (db:set-state-status-and-roll-up-run dbstruct run-id curr-state curr-status)
    (mutex-lock! *db-transaction-mutex*)
    (db:with-db
     dbstruct run-id #f
     (lambda (dbdat db)
       (let ((tr-res







>
>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|







3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
         (if (and test-id state status (equal? status "AUTO")) 
             (db:test-data-rollup dbstruct run-id test-id status))
	 (if new-state-eh ;; moved from db:test-set-state-status
	      (mt:process-triggers dbstruct run-id test-id new-state-eh new-status-eh))
         tr-res)))))

(define (db:roll-up-rules state-status-counts state status)
  (if (null? state-status-counts)
      '(#f #f)
      (let* ((running     (length (filter (lambda (x)
					    (member (dbr:counts-state x) *common:running-states*))
					  state-status-counts)))
	     (bad-not-started      (length (filter (lambda (x)
						     (and (equal? (dbr:counts-state x) "NOT_STARTED") 
							  (not (member (dbr:counts-status x)  *common:not-started-ok-statuses*))))
						   state-status-counts)))
	     (all-curr-states      (common:special-sort  ;; worst -> best (sort of)
				    (delete-duplicates
				     (if (and state (not (member state *common:dont-roll-up-states*)))
					 (cons state (map dbr:counts-state state-status-counts))
					 (map dbr:counts-state state-status-counts)))
				    *common:std-states* >))
	     (all-curr-statuses    (common:special-sort  ;; worst -> best
				    (delete-duplicates
				     (if (and state status (not (member state *common:dont-roll-up-states*)))
					 (cons status (map dbr:counts-status state-status-counts))
					 (map dbr:counts-status state-status-counts)))
				    *common:std-statuses* >))
	     (non-completes        (filter (lambda (x)
					     (not (member x (cons "COMPLETED" *common:dont-roll-up-states*))))
					   all-curr-states))
	     (preq-fails        (filter (lambda (x)
					  (equal? x "PREQ_FAIL"))
					all-curr-statuses))
	     (num-non-completes (length non-completes))
	     (newstate          (cond
				 ((> running 0)           "RUNNING")            ;; anything running, call the situation running
				 ((> (length preq-fails) 0) "NOT_STARTED")
				 ((> bad-not-started 0)   "COMPLETED")          ;; we have an ugly situation, it is completed in the sense we cannot do more.
				 ((> num-non-completes 0) (car non-completes))  ;;  (remove (lambda (x)(equal? "COMPLETED" x)) all-curr-states))) ;; only rollup DELETED if all DELETED
				 (else                    (car all-curr-states))))
	     (newstatus         (cond
				 ((> (length preq-fails) 0)  "PREQ_FAIL")
				 ((or (> bad-not-started 0)
				      (and (equal? newstate "NOT_STARTED")
					   (> num-non-completes 0)))
				  "STARTED")
				 (else (car all-curr-statuses)))))
	(debug:print-info 2 *default-log-port*
			  "\n--> probe db:set-state-status-and-roll-up-items: "
			  "\n--> state-status-counts: "(map dbr:counts->alist state-status-counts)
			  "\n--> running:             "running
			  "\n--> bad-not-started:     "bad-not-started
			  "\n--> non-non-completes:   "num-non-completes
			  "\n--> non-completes:       "non-completes
			  "\n--> all-curr-states:     "all-curr-states
			  "\n--> all-curr-statuses:     "all-curr-statuses
			  "\n--> newstate              "newstate
			  "\n--> newstatus            "newstatus
			  "\n\n")
	
	;; NB// Pass the db so it is part of the transaction
	(list newstate newstatus))))

(define (db:set-state-status-and-roll-up-run dbstruct run-id curr-state curr-status)
    (mutex-lock! *db-transaction-mutex*)
    (db:with-db
     dbstruct run-id #f
     (lambda (dbdat db)
       (let ((tr-res

Modified dbfile.scm from [a3424eedef] to [776ffe336b].

399
400
401
402
403
404
405
406


407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429












430
431
432
433
434
435
436
437
438
(define (dbfile:brute-force-salvage-db fname)
  (let* ((backupfname (conc fname"-"(current-process-id)".bak"))
	 (cmd (conc "cp "fname" "backupfname";mv "fname" "(conc fname ".delme;")
		    "cp "backupfname" "fname)))
    (dbfile:print-err "WARNING: attempting recovery of file "fname" by running commands:\n"
		      "  "cmd)
    (system cmd)))



(define (dbfile:open-no-sync-db dbpath)
  (if *no-sync-db*
      *no-sync-db*
      (begin
	(if (not (file-exists? dbpath))
	    (create-directory dbpath #t))
	(let* ((dbname    (conc dbpath "/no-sync.db"))
	       (db-exists (file-exists? dbname))
	       (init-proc (lambda (db)
			    (if (not db-exists)
				(begin
				  (sqlite3:execute db "CREATE TABLE IF NOT EXISTS no_sync_metadat (var TEXT,val TEXT, CONSTRAINT no_sync_metadat_constraint UNIQUE (var));"))
				)))
	       (on-tmp      (equal? (car (string-split dbpath "/")) "tmp"))
	       (db        (if on-tmp
			      (dbfile:cautious-open-database dbname init-proc 0 "WAL")
			      (dbfile:cautious-open-database dbname init-proc 0 #f)
			      ;; (sqlite3:open-database dbname)
			      )))
	  (if on-tmp	      ;; done in cautious-open-database
	      (begin
		(sqlite3:execute db "PRAGMA synchronous = 0;")
		(sqlite3:set-busy-handler! db (sqlite3:make-busy-timeout 136000))))












	  (set! *no-sync-db* db)
	  db))))

(define (db:no-sync-set db var val)
  (sqlite3:execute db "INSERT OR REPLACE INTO no_sync_metadat (var,val) VALUES (?,?);" var val))

(define (db:no-sync-del! db var)
  (sqlite3:execute db "DELETE FROM no_sync_metadat WHERE var=?;" var))









>
>
|
<
<
<
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
>
>
>
>
>
>
>
>
>
>
>
>
|
|







399
400
401
402
403
404
405
406
407
408
409



410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
(define (dbfile:brute-force-salvage-db fname)
  (let* ((backupfname (conc fname"-"(current-process-id)".bak"))
	 (cmd (conc "cp "fname" "backupfname";mv "fname" "(conc fname ".delme;")
		    "cp "backupfname" "fname)))
    (dbfile:print-err "WARNING: attempting recovery of file "fname" by running commands:\n"
		      "  "cmd)
    (system cmd)))

;; opens and returns handle and nothing else
;;
(define (dbfile:raw-open-no-sync-db dbpath)



  (if (not (file-exists? dbpath))
      (create-directory dbpath #t))
  (let* ((dbname    (conc dbpath "/no-sync.db"))
	 (db-exists (file-exists? dbname))
	 (init-proc (lambda (db)
		      (if (not db-exists)
			  (begin
			    (sqlite3:execute db "CREATE TABLE IF NOT EXISTS no_sync_metadat (var TEXT,val TEXT, CONSTRAINT no_sync_metadat_constraint UNIQUE (var));"))
			  )))
	 (on-tmp      (equal? (car (string-split dbpath "/")) "tmp"))
	 (db        (if on-tmp
			(dbfile:cautious-open-database dbname init-proc 0 "WAL")
			(dbfile:cautious-open-database dbname init-proc 0 #f)
			;; (sqlite3:open-database dbname)
			)))
    (if on-tmp	      ;; done in cautious-open-database
	(begin
	  (sqlite3:execute db "PRAGMA synchronous = 0;")
	  (sqlite3:set-busy-handler! db (sqlite3:make-busy-timeout 136000))))
    db))

(define (dbfile:with-no-sync-db dbpath proc)
  (let* ((db  (dbfile:raw-open-no-sync-db dbpath))
	 (res (proc db)))
    (sqlite3:finalize! db)
    res))

(define (dbfile:open-no-sync-db dbpath)
  (if *no-sync-db*
      *no-sync-db*
      (let* ((db (dbfile:raw-open-no-sync-db dbpath)))
	(set! *no-sync-db* db)
	db)))

(define (db:no-sync-set db var val)
  (sqlite3:execute db "INSERT OR REPLACE INTO no_sync_metadat (var,val) VALUES (?,?);" var val))

(define (db:no-sync-del! db var)
  (sqlite3:execute db "DELETE FROM no_sync_metadat WHERE var=?;" var))

Modified dbmod.scm from [982e41ee49] to [cf9c562387].

337
338
339
340
341
342
343


344
345
346
347
348
349
350
     (conc "SELECT name FROM pragma_table_info('"tablename"') as tblInfo;"))
    has-last))

;; tbls is ( ("tablename" ( "field1" [#f|proc1] ) ( "field2" [#f|proc2] ) .... ) )
;;
;; direction = fromdest, todest
;; mode = 'full, 'incr


;;
(define (dbmod:attach-sync tables dbh destdbfile direction #!key
			   (mode 'full)
			   (no-update '("keys")) ;; do
			   )
  (debug:print 0 *default-log-port* "Doing sync "direction" "destdbfile)
  (if (not (sqlite3:auto-committing? dbh))







>
>







337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
     (conc "SELECT name FROM pragma_table_info('"tablename"') as tblInfo;"))
    has-last))

;; tbls is ( ("tablename" ( "field1" [#f|proc1] ) ( "field2" [#f|proc2] ) .... ) )
;;
;; direction = fromdest, todest
;; mode = 'full, 'incr
;;
;; Idea: youngest in dest is last_update time
;;
(define (dbmod:attach-sync tables dbh destdbfile direction #!key
			   (mode 'full)
			   (no-update '("keys")) ;; do
			   )
  (debug:print 0 *default-log-port* "Doing sync "direction" "destdbfile)
  (if (not (sqlite3:auto-committing? dbh))

Modified tcp-transportmod.scm from [e26313be2b] to [0f2615acc5].

338
339
340
341
342
343
344



345
346
347



348
349
350
351
352
353
354
    ))

(define (tt:keep-running ttdat dbfname dbstruct)
  ;; verfiy conn for ready
  ;; listener socket has been started by this stage
  ;; wait for a port before creating the registration file
  ;;



  (let* ((cleanup (lambda ()
		    (if (tt-cleanup-proc ttdat)
			((tt-cleanup-proc ttdat))))))



    (let loop ((count 0))
      (if (> count 240)
	  (begin
	    (debug:print 0 *default-log-port* "FATAL: Could not start a tcp server, giving up.")
	    (exit 1))
	  (if (not (tt-port ttdat)) ;; no connection yet
	      (begin







>
>
>
|

|
>
>
>







338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
    ))

(define (tt:keep-running ttdat dbfname dbstruct)
  ;; verfiy conn for ready
  ;; listener socket has been started by this stage
  ;; wait for a port before creating the registration file
  ;;
  (let* ((db-locked-in #f)
	 (areapath     (tt-areapath ttdat))
	 (nosyncdbpath (conc areapath"/.megatest"))
	 (cleanup (lambda ()
		    (if (tt-cleanup-proc ttdat)
			((tt-cleanup-proc ttdat)))
		    (dbfile:with-no-sync-db nosyncdbpath
					    (lambda (db)
					      (db:no-sync-del! db dbfname))))))
    (let loop ((count 0))
      (if (> count 240)
	  (begin
	    (debug:print 0 *default-log-port* "FATAL: Could not start a tcp server, giving up.")
	    (exit 1))
	  (if (not (tt-port ttdat)) ;; no connection yet
	      (begin
365
366
367
368
369
370
371

372





373
374
375
376
377
378
379
    (let loop ()
      (let* ((servers (tt:get-server-info-sorted ttdat dbfname))
	     (ok      (cond
		       ((null? servers) #f) ;; not ok
		       ((equal? (list-ref (car servers) 6) ;; compare the servinfofile
				(tt-servinf-file ttdat))
			(debug:print-info 0 *default-log-port* "Keep running, I'm the top server.")

			#t)





		       (else
			(debug:print-info 0 *default-log-port* "I'm not the lead server: "servers)
			(let* ((leadsrv (car servers)))
			  (match leadsrv
			    ((host port startseconds server-id pid dbfname servinfofile)
			     (if (tt:ping host port server-id)
				 #f ;; not the server, but all good, want to exit







>
|
>
>
>
>
>







371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
    (let loop ()
      (let* ((servers (tt:get-server-info-sorted ttdat dbfname))
	     (ok      (cond
		       ((null? servers) #f) ;; not ok
		       ((equal? (list-ref (car servers) 6) ;; compare the servinfofile
				(tt-servinf-file ttdat))
			(debug:print-info 0 *default-log-port* "Keep running, I'm the top server.")
			(if db-locked-in
			    #t
			    (let* ((lockinfo  (dbfile:with-no-sync-db nosyncdbpath
								      (lambda (db)
									(db:no-sync-get-lock db dbfname))))
				   (success   (car lockinfo)))
			      success)))
		       (else
			(debug:print-info 0 *default-log-port* "I'm not the lead server: "servers)
			(let* ((leadsrv (car servers)))
			  (match leadsrv
			    ((host port startseconds server-id pid dbfname servinfofile)
			     (if (tt:ping host port server-id)
				 #f ;; not the server, but all good, want to exit