Megatest

Check-in [c423bc098e]
Login
Overview
Comment:Fixed bunch of issues with main.db server startup.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | v2.001
Files: files | file ages | folders
SHA1: c423bc098e2280ab1fbe1f1ebca71195862c59fb
User & Date: matt on 2021-12-13 19:56:21
Other Links: branch diff | manifest | tags
Context
2021-12-14
20:05
Mostly got unit tests working again. check-in: c7197c92dd user: matt tags: v2.001
2021-12-13
19:56
Fixed bunch of issues with main.db server startup. check-in: c423bc098e user: matt tags: v2.001
2021-12-08
21:40
Fixed bunch of build issues with autoload and dbi check-in: ffe0a27e42 user: matt tags: v2.001
Changes

Modified build-assist/README from [ff327b7591] to [d108fc0f5b].

16
17
18
19
20
21
22
23

24
25
26
27
28
16
17
18
19
20
21
22

23
24
25
26
27
28







-
+






Get chicken:

wget https://code.call-cc.org/releases/5.3.0/chicken-5.3.0.tar.gz

Extract, build, and install chicken:

tar xf chicken-5.3.0.tar.gz; cd chicken; make PLATFORM=linux PREFIX=$PREFIX install; cd ..
tar xf chicken-5.3.0.tar.gz; cd chicken-5.3.0; make PLATFORM=linux PREFIX=$PREFIX install; cd ..

Install all needed eggs.
for egg in $(cat ../ck5-egg.list);do echo $egg;ck5 chicken-install $egg;done

Now run the script ../iup-compile.sh for remaining instructions

Modified dbmod.scm from [7cd57dd118] to [ddeeddaa42].

42
43
44
45
46
47
48
49

50
51
52
53

54
55
56
57
58
59
60
42
43
44
45
46
47
48

49
50
51
52
53
54
55
56
57
58
59
60
61







-
+




+







db:get-dbdat
db:get-inmem
db:get-ddb
db:open-dbdat
db:open-run-db
db:open-inmem-db
db:setup
db:get-main-lock
;; db:get-main-lock
db:with-lock-db
db:get-iam-server-lock
db:get-locker
db:take-lock
db:steal-lock-db
db:release-lock
db:general-sqlite-error-dump
db:first-result-default
db:generic-error-printout
db:with-db
db:set-sync
db:get-last-update-time
509
510
511
512
513
514
515
516

517
518
519
520
521
522
523
524
525
526
527
528
529

530
531
532
533
534
535
536


537
538
539
540
541

542
543
544
545
546

547
548
549

550
551
552





553
554
555
556
557
558
559
510
511
512
513
514
515
516

517
518
519
520
521
522
523
524
525
526
527
528
529

530
531
532
533
534
535


536
537
538
539
540
541

542
543
544
545
546

547
548
549

550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565







-
+












-
+





-
-
+
+




-
+




-
+


-
+



+
+
+
+
+







;;  NOTE:
;;       These operate directly on the disk file, NOT on the inmemory db
;;       The lockname is the filename (can have many to one, run-id to fname 
;;======================================================================

;; only use for main.db - need to re-write some of this :(
;;
(define (db:get-main-lock dbfile)
#;(define (db:get-main-lock dbfile)
  (db:with-lock-db dbfile
		(lambda (dbh dbfile)
		  (db:get-iam-server-lock dbh dbfile))))

(define (db:with-lock-db dbfile proc)
  (let* ((dbh (db:open-run-db dbfile db:initialize-db))
	 (res (proc dbh dbfile)))
    ;; (sqlite3:finalize! dbh)
    res))

;; called before db is open?
;;
(define (db:get-iam-server-lock dbh dbfname)
(define (db:get-iam-server-lock dbh dbfname port)
  (sqlite3:with-transaction
   dbh
   (lambda ()
     (let* ((locker (db:get-locker dbh dbfname)))
       (if locker
	   #f
	   (db:take-lock dbh dbfname))))))
	   locker
	   (db:take-lock dbh dbfname port))))))
	     
;; (exn sqlite3) 
(define (db:get-locker dbh dbfname)
  (condition-case
   (sqlite3:first-row dbh "SELECT owner_pid,owner_host,event_time FROM locks WHERE lockname=?;" dbfname)
   (sqlite3:first-row dbh "SELECT owner_pid,owner_host,owner_port,event_time FROM locks WHERE lockname=?;" dbfname)
   (exn (sqlite3) #f)))

;; should never fail because it is run in a transaction with a test for the lock
;;
(define (db:take-lock dbh dbfname)
(define (db:take-lock dbh dbfname port)
  ;; (condition-case
  ;;  (begin
     (sqlite3:execute dbh "INSERT INTO locks (lockname,owner_pid,owner_host) VALUES (?,?,?);" dbfname (current-process-id) (get-host-name))
     (sqlite3:execute dbh "INSERT INTO locks (lockname,owner_pid,owner_host,owner_port) VALUES (?,?,?,?);" dbfname (current-process-id) (get-host-name) port)
   ;;   #t)
     ;; (exn (sqlite3) #f)))
     #t)

(define (db:steal-lock-db dbh dbfname port)
  (sqlite3:execute dbh "DELETE FROM locks WHERE lockname=?;" dbfname)
  (sqlite3:execute dbh "INSERT INTO locks (lockname,owner_pid,owner_host,owner_port) VALUES (?,?,?,?);" dbfname (current-process-id) (get-host-name) port)
  #t)

(define (db:release-lock dbh dbfname)
  (sqlite3:execute dbh "DELETE FROM locks WHERE lockname=?;" dbfname))

;;======================================================================
;; SQLITE3 HELPERS
;;======================================================================
1513
1514
1515
1516
1517
1518
1519

1520
1521
1522
1523
1524
1525
1526
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533







+







     db
     (lambda ()
       (sqlite3:execute db "CREATE TABLE IF NOT EXISTS locks
                                   (id INTEGER PRIMARY KEY,
                                    lockname TEXT,
                                    owner_pid INTEGER,
                                    owner_host TEXT,
                                    owner_port TEXT,
                                    event_time TIMESTAMP DEFAULT (strftime('%s','now')),
                               CONSTRAINT lock_constraint UNIQUE (lockname));")

       ;; maps to *srvpktspec* from http-transportmod
       (sqlite3:execute db "CREATE TABLE IF NOT EXISTS servers
                                   (id INTEGER PRIMARY KEY,
                                    host TEXT,

Modified rmtmod.scm from [7c7b81e269] to [a1b386ab00].

1486
1487
1488
1489
1490
1491
1492

1493
1494





1495
1496
1497
1498
1499
1500
1501
1486
1487
1488
1489
1490
1491
1492
1493


1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505







+
-
-
+
+
+
+
+







	       (inmem      (dbr:dbdat-db dbdat))
	       )
	  ;; do a final sync here
	  (debug:print-info 0 *default-log-port* "Doing final sync for "apath" "dbfile" at "(current-seconds))
	  (db:sync-inmem->disk *dbstruct-db* apath dbfile force-sync: #t)
	  ;; let's finalize here
	  (debug:print-info 0 *default-log-port* "Finalizing db and inmem")
	  (if (sqlite3:database? db)
	  (sqlite3:finalize! db)
	  (sqlite3:finalize! inmem)
	      (sqlite3:finalize! db)
	      (debug:print-info 0 *default-log-port* "in rmt:server-shutdown, db is not a database, not finalizing..."))
	  (if (sqlite3:database? inmem)
	      (sqlite3:finalize! inmem)
	      (debug:print-info 0 *default-log-port* "in rmt:server-shutdown, inmem is not a database, not finalizing..."))
	  (debug:print-info 0 *default-log-port* "Finalizing db and inmem complete")
	  (if am-server
	      (if (string-match ".*/main.db$" dbfile)
		  (let ((pkt-file (conc (get-pkts-dir *toppath*)
					"/" (servdat-uuid *server-info*)
					".pkt")))
		    (debug:print-info 0 *default-log-port* "removing pkt "pkt-file)
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824















1825
1826
1827
1828
1829
1830
1831
1819
1820
1821
1822
1823
1824
1825



1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847







-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+








;;======================================================================
;; NEW SERVER METHOD
;;======================================================================

;; only use for main.db - need to re-write some of this :(
;;
(define (get-lock-db sdat dbfile)
  (let* ((dbh (db:open-run-db dbfile db:initialize-db))
	 (res (db:get-iam-server-lock dbh dbfile)))
(define (get-lock-db sdat dbfile port)
  (let* ((dbh (db:open-run-db dbfile db:initialize-db)) ;; open-run-db creates a standard db with schema used by all situations
	 (res (db:get-iam-server-lock dbh dbfile port)))
    ;; res => list then already locked, check server is responsive
    ;;     => #t then sucessfully got the lock
    ;;     => #f reserved for future use as to indicate something went wrong
    (match res
      ((owner_pid owner_host owner_port event_time)
       (if (server-ready? owner_host owner_port "abc")
	   #f
	   (begin
	     (debug:print 0 *default-log-port* "WARNING: stale lock - have to steal it. This may fail.")
	     (db:steal-lock-db dbh dbfile port))))
      (#t  #t) ;; placeholder so that we don't touch res if it is #t
      (else (set! res #f)))
    (sqlite3:finalize! dbh)
    res))


(define (register-server pkts-dir pkt-spec host port servkey ipaddr dbpath)
  (let* ((pkt-dat `((host    . ,host)
		    (port    . ,port)
1917
1918
1919
1920
1921
1922
1923



















1924
1925
1926
1927
1928
1929
1930
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965







+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+







    (if (null? tail)
	res ;; NOTE: sort by age so oldest is considered first
	(let* ((spkt (car tail)))
	  (loop (cdr tail)
		(if (equal? dbpath (alist-ref 'dbpath spkt))
		    (cons spkt res)
		    res))))))

(define (remove-pkts-if-not-alive serv-pkts)
  (filter (lambda (pkt)
	    (let* ((host (alist-ref 'host pkt))
		   (port (alist-ref 'port pkt))
		   (key  (alist-ref 'servkey  pkt))
		   (pktz (alist-ref 'Z        pkt))
		   (res  (handle-exceptions
			     exn
			     #f
			   (server-ready? host port key))))
	      (if res
		  res
		  (let* ((pktsdir (get-pkts-dir *toppath*))
			 (pktpath (conc pktsdir"/"pktz".pkt")))
		    (debug:print 0 *default-log-port* "WARNING: pkt with no server "pktpath)
		    (delete-file* pktpath)
		    #f))))
	  serv-pkts))

;; from viable servers get one that is alive and ready
;;
(define (get-the-server apath serv-pkts)
  (let loop ((tail serv-pkts))
    (if (null? tail)
	#f
2004
2005
2006
2007
2008
2009
2010

2011
2012
2013










2014
2015
2016


2017
2018
2019
2020
2021
2022
2023

2024
2025
2026
2027
2028
2029
2030

2031
2032
2033
2034
2035
2036
2037
2038
2039
2039
2040
2041
2042
2043
2044
2045
2046



2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057


2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073

2074


2075
2076
2077
2078
2079
2080
2081







+
-
-
-
+
+
+
+
+
+
+
+
+
+

-
-
+
+







+






-
+
-
-







				  (get-host-name)
				  (servdat-port sdat) server-key
				  (servdat-host sdat) db-file))
	      ;; (set! *my-signature* (servdat-uuid sdat)) ;; replace with Z, no, stick with proper key
	      ;; now read pkts and see if we are a contender
	      (let* ((all-pkts     (get-all-server-pkts pkts-dir *srvpktspec*))
		     (viables      (get-viable-servers all-pkts db-file))
		     (alive        (remove-pkts-if-not-alive viables))
		     (best-srv     (get-best-candidate viables db-file))
		     (best-srv-key (if best-srv (alist-ref 'servkey best-srv) #f)))
		(debug:print 0 *default-log-port* "best-srv-key: "best-srv-key", server-key: "server-key)
		     (best-srv     (get-best-candidate alive db-file))
		     (best-srv-key (if best-srv (alist-ref 'servkey best-srv) #f))
		     (i-am-srv     (equal? best-srv-key server-key))
		     (delete-pkt   (lambda ()
				     (let* ((pktfile (conc (get-pkts-dir *toppath*)
							 "/" (servdat-uuid *server-info*)
							 ".pkt")))
				       (debug:print-info 0 *default-log-port* "Attempting to remove bogus pkt file "pktfile)
				       (delete-file* pktfile))))) ;; remove immediately instead of waiting for on-exit
		(debug:print 0 *default-log-port* "best-srv-key: "best-srv-key", server-key: "server-key", i-am-srv: "i-am-srv)
		;; am I the best-srv, compare server-keys to know
		(if (equal? best-srv-key server-key)
		    (if (get-lock-db sdat db-file) ;; (db:get-iam-server-lock *dbstruct-db* *toppath* run-id)
		(if i-am-srv
		    (if (get-lock-db sdat db-file (servdat-port sdat)) ;; (db:get-iam-server-lock *dbstruct-db* *toppath* run-id)
			(begin
			  (debug:print 0 *default-log-port* "I'm the server!")
			  (servdat-dbfile-set! sdat db-file)
			  (servdat-status-set! sdat 'db-locked))
			(begin
			  (debug:print 0 *default-log-port* "I'm not the server, exiting.")
			  (bdat-time-to-exit-set! *bdat* #t)
			  (delete-pkt)
			  (thread-sleep! 0.2)
			  (exit)))
		    (begin
		      (debug:print 0 *default-log-port*
				   "Keys do not match "best-srv-key", "server-key", exiting.")
		      (bdat-time-to-exit-set! *bdat* #t)
		      (delete-file* (conc (get-pkts-dir *toppath*)
		      (delete-pkt)
					  "/" (servdat-uuid *server-info*)
					  ".pkt")) ;; remove immediately instead of waiting for on-exit
		      (thread-sleep! 0.2)
		      (exit)))
		sdat))
	    (begin ;; sdat not yet contains server info
	      (debug:print-info 0 *default-log-port* "Still waiting, last-sdat=" last-sdat)
	      (sleep 4)
	      (if (> (- (current-seconds) start-time) 120) ;; been waiting for two minutes