Index: build-assist/README ================================================================== --- build-assist/README +++ build-assist/README @@ -18,11 +18,11 @@ wget https://code.call-cc.org/releases/5.3.0/chicken-5.3.0.tar.gz Extract, build, and install chicken: -tar xf chicken-5.3.0.tar.gz; cd chicken; make PLATFORM=linux PREFIX=$PREFIX install; cd .. +tar xf chicken-5.3.0.tar.gz; cd chicken-5.3.0; make PLATFORM=linux PREFIX=$PREFIX install; cd .. Install all needed eggs. for egg in $(cat ../ck5-egg.list);do echo $egg;ck5 chicken-install $egg;done Now run the script ../iup-compile.sh for remaining instructions Index: dbmod.scm ================================================================== --- dbmod.scm +++ dbmod.scm @@ -44,15 +44,16 @@ db:get-ddb db:open-dbdat db:open-run-db db:open-inmem-db db:setup -db:get-main-lock +;; db:get-main-lock db:with-lock-db db:get-iam-server-lock db:get-locker db:take-lock +db:steal-lock-db db:release-lock db:general-sqlite-error-dump db:first-result-default db:generic-error-printout db:with-db @@ -511,11 +512,11 @@ ;; The lockname is the filename (can have many to one, run-id to fname ;;====================================================================== ;; only use for main.db - need to re-write some of this :( ;; -(define (db:get-main-lock dbfile) +#;(define (db:get-main-lock dbfile) (db:with-lock-db dbfile (lambda (dbh dbfile) (db:get-iam-server-lock dbh dbfile)))) (define (db:with-lock-db dbfile proc) @@ -524,34 +525,39 @@ ;; (sqlite3:finalize! dbh) res)) ;; called before db is open? ;; -(define (db:get-iam-server-lock dbh dbfname) +(define (db:get-iam-server-lock dbh dbfname port) (sqlite3:with-transaction dbh (lambda () (let* ((locker (db:get-locker dbh dbfname))) (if locker - #f - (db:take-lock dbh dbfname)))))) + locker + (db:take-lock dbh dbfname port)))))) ;; (exn sqlite3) (define (db:get-locker dbh dbfname) (condition-case - (sqlite3:first-row dbh "SELECT owner_pid,owner_host,event_time FROM locks WHERE lockname=?;" dbfname) + (sqlite3:first-row dbh "SELECT owner_pid,owner_host,owner_port,event_time FROM locks WHERE lockname=?;" dbfname) (exn (sqlite3) #f))) ;; should never fail because it is run in a transaction with a test for the lock ;; -(define (db:take-lock dbh dbfname) +(define (db:take-lock dbh dbfname port) ;; (condition-case ;; (begin - (sqlite3:execute dbh "INSERT INTO locks (lockname,owner_pid,owner_host) VALUES (?,?,?);" dbfname (current-process-id) (get-host-name)) + (sqlite3:execute dbh "INSERT INTO locks (lockname,owner_pid,owner_host,owner_port) VALUES (?,?,?,?);" dbfname (current-process-id) (get-host-name) port) ;; #t) ;; (exn (sqlite3) #f))) #t) + +(define (db:steal-lock-db dbh dbfname port) + (sqlite3:execute dbh "DELETE FROM locks WHERE lockname=?;" dbfname) + (sqlite3:execute dbh "INSERT INTO locks (lockname,owner_pid,owner_host,owner_port) VALUES (?,?,?,?);" dbfname (current-process-id) (get-host-name) port) + #t) (define (db:release-lock dbh dbfname) (sqlite3:execute dbh "DELETE FROM locks WHERE lockname=?;" dbfname)) ;;====================================================================== @@ -1515,10 +1521,11 @@ (sqlite3:execute db "CREATE TABLE IF NOT EXISTS locks (id INTEGER PRIMARY KEY, lockname TEXT, owner_pid INTEGER, owner_host TEXT, + owner_port TEXT, event_time TIMESTAMP DEFAULT (strftime('%s','now')), CONSTRAINT lock_constraint UNIQUE (lockname));") ;; maps to *srvpktspec* from http-transportmod (sqlite3:execute db "CREATE TABLE IF NOT EXISTS servers Index: rmtmod.scm ================================================================== --- rmtmod.scm +++ rmtmod.scm @@ -1488,12 +1488,16 @@ ;; do a final sync here (debug:print-info 0 *default-log-port* "Doing final sync for "apath" "dbfile" at "(current-seconds)) (db:sync-inmem->disk *dbstruct-db* apath dbfile force-sync: #t) ;; let's finalize here (debug:print-info 0 *default-log-port* "Finalizing db and inmem") - (sqlite3:finalize! db) - (sqlite3:finalize! inmem) + (if (sqlite3:database? db) + (sqlite3:finalize! db) + (debug:print-info 0 *default-log-port* "in rmt:server-shutdown, db is not a database, not finalizing...")) + (if (sqlite3:database? inmem) + (sqlite3:finalize! inmem) + (debug:print-info 0 *default-log-port* "in rmt:server-shutdown, inmem is not a database, not finalizing...")) (debug:print-info 0 *default-log-port* "Finalizing db and inmem complete") (if am-server (if (string-match ".*/main.db$" dbfile) (let ((pkt-file (conc (get-pkts-dir *toppath*) "/" (servdat-uuid *server-info*) @@ -1817,13 +1821,25 @@ ;; NEW SERVER METHOD ;;====================================================================== ;; only use for main.db - need to re-write some of this :( ;; -(define (get-lock-db sdat dbfile) - (let* ((dbh (db:open-run-db dbfile db:initialize-db)) - (res (db:get-iam-server-lock dbh dbfile))) +(define (get-lock-db sdat dbfile port) + (let* ((dbh (db:open-run-db dbfile db:initialize-db)) ;; open-run-db creates a standard db with schema used by all situations + (res (db:get-iam-server-lock dbh dbfile port))) + ;; res => list then already locked, check server is responsive + ;; => #t then sucessfully got the lock + ;; => #f reserved for future use as to indicate something went wrong + (match res + ((owner_pid owner_host owner_port event_time) + (if (server-ready? owner_host owner_port "abc") + #f + (begin + (debug:print 0 *default-log-port* "WARNING: stale lock - have to steal it. This may fail.") + (db:steal-lock-db dbh dbfile port)))) + (#t #t) ;; placeholder so that we don't touch res if it is #t + (else (set! res #f))) (sqlite3:finalize! dbh) res)) (define (register-server pkts-dir pkt-spec host port servkey ipaddr dbpath) @@ -1919,10 +1935,29 @@ (let* ((spkt (car tail))) (loop (cdr tail) (if (equal? dbpath (alist-ref 'dbpath spkt)) (cons spkt res) res)))))) + +(define (remove-pkts-if-not-alive serv-pkts) + (filter (lambda (pkt) + (let* ((host (alist-ref 'host pkt)) + (port (alist-ref 'port pkt)) + (key (alist-ref 'servkey pkt)) + (pktz (alist-ref 'Z pkt)) + (res (handle-exceptions + exn + #f + (server-ready? host port key)))) + (if res + res + (let* ((pktsdir (get-pkts-dir *toppath*)) + (pktpath (conc pktsdir"/"pktz".pkt"))) + (debug:print 0 *default-log-port* "WARNING: pkt with no server "pktpath) + (delete-file* pktpath) + #f)))) + serv-pkts)) ;; from viable servers get one that is alive and ready ;; (define (get-the-server apath serv-pkts) (let loop ((tail serv-pkts)) @@ -2006,32 +2041,39 @@ (servdat-host sdat) db-file)) ;; (set! *my-signature* (servdat-uuid sdat)) ;; replace with Z, no, stick with proper key ;; now read pkts and see if we are a contender (let* ((all-pkts (get-all-server-pkts pkts-dir *srvpktspec*)) (viables (get-viable-servers all-pkts db-file)) - (best-srv (get-best-candidate viables db-file)) - (best-srv-key (if best-srv (alist-ref 'servkey best-srv) #f))) - (debug:print 0 *default-log-port* "best-srv-key: "best-srv-key", server-key: "server-key) + (alive (remove-pkts-if-not-alive viables)) + (best-srv (get-best-candidate alive db-file)) + (best-srv-key (if best-srv (alist-ref 'servkey best-srv) #f)) + (i-am-srv (equal? best-srv-key server-key)) + (delete-pkt (lambda () + (let* ((pktfile (conc (get-pkts-dir *toppath*) + "/" (servdat-uuid *server-info*) + ".pkt"))) + (debug:print-info 0 *default-log-port* "Attempting to remove bogus pkt file "pktfile) + (delete-file* pktfile))))) ;; remove immediately instead of waiting for on-exit + (debug:print 0 *default-log-port* "best-srv-key: "best-srv-key", server-key: "server-key", i-am-srv: "i-am-srv) ;; am I the best-srv, compare server-keys to know - (if (equal? best-srv-key server-key) - (if (get-lock-db sdat db-file) ;; (db:get-iam-server-lock *dbstruct-db* *toppath* run-id) + (if i-am-srv + (if (get-lock-db sdat db-file (servdat-port sdat)) ;; (db:get-iam-server-lock *dbstruct-db* *toppath* run-id) (begin (debug:print 0 *default-log-port* "I'm the server!") (servdat-dbfile-set! sdat db-file) (servdat-status-set! sdat 'db-locked)) (begin (debug:print 0 *default-log-port* "I'm not the server, exiting.") (bdat-time-to-exit-set! *bdat* #t) + (delete-pkt) (thread-sleep! 0.2) (exit))) (begin (debug:print 0 *default-log-port* "Keys do not match "best-srv-key", "server-key", exiting.") (bdat-time-to-exit-set! *bdat* #t) - (delete-file* (conc (get-pkts-dir *toppath*) - "/" (servdat-uuid *server-info*) - ".pkt")) ;; remove immediately instead of waiting for on-exit + (delete-pkt) (thread-sleep! 0.2) (exit))) sdat)) (begin ;; sdat not yet contains server info (debug:print-info 0 *default-log-port* "Still waiting, last-sdat=" last-sdat)