Index: api.scm ================================================================== --- api.scm +++ api.scm @@ -152,31 +152,19 @@ ;; - keep it simple, only return the actual result of the call, i.e. no meta info here ;; ;; - returns #( flag result ) ;; (define (api:execute-requests dbstruct dat) - (db:open-no-sync-db) ;; sets *no-sync-db* - ;; (handle-exceptions - ;; exn - ;; (let ((call-chain (get-call-chain))) - ;; (debug:print 0 *default-log-port* "WARNING: api:execute-requests received an exception from peer, dat=" dat ", exn=" exn) - ;; (print-call-chain (current-error-port)) - ;; (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) - ;; (vector #f (vector exn call-chain dat))) ;; return some stuff for debug if an exception happens (if (> *api-process-request-count* 200) (begin (if (common:low-noise-print 30 "too many threads") (debug:print 0 *default-log-port* "WARNING: "*api-process-request-count*" threads, potential overload, adding 0.5 sec delay.")) (thread-sleep! 0.5) ;; take a nap )) (cond ((not (vector? dat)) ;; it is an error to not receive a vector (vector #f (vector #f "remote must be called with a vector"))) - #;((> *api-process-request-count* 200) ;; 20) - (debug:print 0 *default-log-port* "WARNING: api:execute-requests received an overloaded message.") - (set! *server-overloaded* #t) - (vector #f (vector #f 'overloaded))) ;; the inner vector is what gets returned. nope, don't know why. please refactor! (else (let* ((cmd-in (vector-ref dat 0)) (cmd (if (symbol? cmd-in) cmd-in (string->symbol cmd-in))) @@ -278,11 +266,13 @@ (else (assert #f "FATAL: failed to deserialize indat "indat)))))) (define (api:dispatch-request dbstruct cmd run-id params) - (db:open-no-sync-db) + (if (and (not *no-sync-db*) + (member cmd '(db:no-sync-set db:no-sync-get/default db:no-sync-del! db:no-sync-get-lock ))) + (db:open-no-sync-db)) (case cmd ;;=============================================== ;; READ/WRITE QUERIES ;;=============================================== Index: common.scm ================================================================== --- common.scm +++ common.scm @@ -167,12 +167,10 @@ ;; (define *db-access-mutex* (make-mutex)) ;; moved to dbfile (define *db-transaction-mutex* (make-mutex)) (define *db-cache-path* #f) ;; (define *db-with-db-mutex* (make-mutex)) (define *db-api-call-time* (make-hash-table)) ;; hash of command => (list of times) -;; no sync db -;; (define *no-sync-db* #f) ;; moved to dbfile ;; SERVER (define *my-client-signature* #f) (define *transport-type* 'http) ;; override with [server] transport http|rpc|nmsg (define *runremote* #f) ;; if set up for server communication this will hold Index: db.scm ================================================================== --- db.scm +++ db.scm @@ -1483,57 +1483,19 @@ ((http)(common:get-db-tmp-area)) ((tcp) (conc *toppath*"/.megatest")) ((nfs) (conc *toppath*"/.megatest")) (else "/tmp/dunno-this-gonna-exist"))) - ;; (define (db:no-sync-db db-in) - ;; (if db-in - ;; db-in - ;; (if *no-sync-db* - ;; *no-sync-db* - ;; (begin - ;; (mutex-lock! *db-access-mutex*) - ;; (let ((dbpath (db:get-dbsync-path)) - ;; (db (dbfile:open-no-sync-db dbpath))) - ;; (assert (sqlite3:database? db) "FATAL: db:no-sync-db failed to open a database") - ;; (set! *no-sync-db* db) - ;; (mutex-unlock! *db-access-mutex*) - ;; db))))) - -(define (with-no-sync-db proc) - (let* ((db (db:open-no-sync-db))) - (proc db))) - +;; This is needed for api.scm (define (db:open-no-sync-db) - (dbfile:open-no-sync-db (db:get-dbsync-path))) - -(define (db:no-sync-close-db db stmt-cache) - (db:safely-close-sqlite3-db db stmt-cache)) - - -;; use a global for some primitive caching, it is just silly to -;; re-read the db over and over again for the keys since they never -;; change - + (dbfile:open-no-sync-db (db:get-dbsync-path))) + ;; why get the keys from the db? why not get from the *configdat* ;; using keys:config-get-fields? (define (db:get-keys dbstruct) - (keys:config-get-fields *configdat*) -) - -;; (if *db-keys* *db-keys* -;; (let ((res '())) -;; (db:with-db dbstruct #f #f -;; (lambda (dbdat db) -;; (sqlite3:for-each-row -;; (lambda (key) -;; (set! res (cons key res))) -;; db -;; "SELECT fieldname FROM keys ORDER BY id DESC;"))) -;; (set! *db-keys* res) -;; res))) + (keys:config-get-fields *configdat*)) ;; extract index number given a header/data structure (define (db:get-index-by-header header field) (list-index (lambda (x)(equal? x field)) header)) @@ -4598,11 +4560,10 @@ (last-time (current-seconds)) ;; last time through the sync loop (no-sync-db (db:open-no-sync-db)) (sync-duration 0) ;; run time of the sync in milliseconds (tmp-area (common:get-db-tmp-area))) ;; Sync moved to http-transport keep-running loop - (set! *no-sync-db* no-sync-db) ;; make the no sync db available to api calls (debug:print-info 2 *default-log-port* "Periodic copy-based sync thread started. syncer is copy-sync, tmp-area is " tmp-area) (debug:print-info 3 *default-log-port* "watchdog starting. syncer is copy-sync pid="(current-process-id));; " this-wd-num="this-wd-num) (if (and legacy-sync (not *time-to-exit*)) (begin @@ -4641,11 +4602,10 @@ (last-time (current-seconds)) (no-sync-db (db:open-no-sync-db)) (stmt-cache #f) ;; (dbr:dbstruct-stmt-cache dbstruct)) (sync-duration 0) ;; run time of the sync in milliseconds (subdbs (hash-table-values (dbr:dbstruct-subdbs dbstruct)))) - (set! *no-sync-db* no-sync-db) ;; make the no sync db available to api calls (debug:print-info 2 *default-log-port* "Periodic sync thread started.") (debug:print-info 3 *default-log-port* "watchdog starting. legacy-sync is " legacy-sync" pid="(current-process-id) );; " this-wd-num="this-wd-num) (if (and legacy-sync (not *time-to-exit*)) (begin Index: dbfile.scm ================================================================== --- dbfile.scm +++ dbfile.scm @@ -408,18 +408,25 @@ " "cmd) (system cmd))) ;; opens and returns handle and nothing else ;; +;; NOTE: this is already protected by mutex *no-sync-db-mutex* +;; (define (dbfile:raw-open-no-sync-db dbpath) (if (not (file-exists? dbpath)) (create-directory dbpath #t)) (debug:print-info 0 *default-log-port* "Opening "dbpath"/no-sync.db") (let* ((dbname (conc dbpath "/no-sync.db")) (db-exists (file-exists? dbname)) (init-proc (lambda (db) - (if (not db-exists) + (sqlite3:with-transaction + db + (lambda () + ;; I have been having trouble with init of no-sync.db so + ;; doing the init in a transaction every time (no gating + ;; on file existance. (for-each (lambda (stmt) (sqlite3:execute db stmt)) (list "CREATE TABLE IF NOT EXISTS no_sync_metadat @@ -427,11 +434,11 @@ val TEXT, CONSTRAINT no_sync_metadat_constraint UNIQUE (var));" "CREATE TABLE IF NOT EXISTS no_sync_locks (key TEXT, val TEXT, - CONSTRAINT no_sync_metadat_constraint UNIQUE (var));"))))) + CONSTRAINT no_sync_metadat_constraint UNIQUE (key));")))))) (on-tmp (equal? (car (string-split dbpath "/")) "tmp")) (db (if on-tmp (dbfile:cautious-open-database dbname init-proc 0 "WAL") (dbfile:cautious-open-database dbname init-proc 0 #f) ;; (sqlite3:open-database dbname) @@ -441,16 +448,21 @@ (sqlite3:execute db "PRAGMA synchronous = 0;") (sqlite3:set-busy-handler! db (sqlite3:make-busy-timeout 136000)))) db)) (define (dbfile:with-no-sync-db dbpath proc) - (let* ((db (dbfile:raw-open-no-sync-db dbpath)) + (mutex-lock! *no-sync-db-mutex*) + (let* ((already-open *no-sync-db*) + (db (or already-open (dbfile:raw-open-no-sync-db dbpath))) (res (proc db))) - (sqlite3:finalize! db) + (if (not already-open) + (sqlite3:finalize! db)) + (mutex-unlock! *no-sync-db-mutex*) res)) (define *no-sync-db-mutex* (make-mutex)) + (define (dbfile:open-no-sync-db dbpath) (mutex-lock! *no-sync-db-mutex*) (let* ((res (if *no-sync-db* *no-sync-db* (let* ((db (dbfile:raw-open-no-sync-db dbpath))) Index: server.scm ================================================================== --- server.scm +++ server.scm @@ -750,125 +750,13 @@ (string-intersperse (map number->string (u8vector->list (if res res (hostname->ip hostname)))) "."))) -;; (define server:sync-lock-token "SERVER_SYNC_LOCK") -;; (define (server:release-sync-lock) -;; (db:no-sync-del! *no-sync-db* server:sync-lock-token)) -;; (define (server:have-sync-lock?) -;; (let* ((have-lock-pair (db:no-sync-get-lock *no-sync-db* server:sync-lock-token)) -;; (have-lock? (car have-lock-pair)) -;; (lock-time (cdr have-lock-pair)) -;; (lock-age (- (current-seconds) lock-time))) -;; (cond -;; (have-lock? #t) -;; ((>lock-age -;; (* 3 (configf:lookup-number *configdat* "server" "minimum-intersync-delay" default: 180))) -;; (server:release-sync-lock) -;; (server:have-sync-lock?)) -;; (else #f)))) - ;; moving this here as it needs access to db and cannot be in common. ;; (define (server:get-bruteforce-syncer dbstruct #!key (fork-to-background #f) (persist-until-sync #f)) (debug:print "WARNING: bruteforce-syncer is called but has been disabled!") (lambda () - (debug:print "WARNING: bruteforce-syncer is called but has been disabled!")) - #;(let* ((sqlite-exe (or (get-environment-variable "MT_SQLITE3_EXE"))) ;; defined in cfg.sh - (sync-log (or (args:get-arg "-sync-log") (conc *toppath* "/logs/sync-" (current-process-id) "-" (get-host-name) ".log"))) - (tmp-area (common:get-db-tmp-area)) - (tmp-db (conc tmp-area "/megatest.db")) - (staging-file (conc *toppath* "/.megatest.db")) - (mtdbfile (conc *toppath* "/megatest.db")) - (lockfile (common:get-sync-lock-filepath)) - (sync-cmd-core (conc sqlite-exe" " tmp-db " .dump | "sqlite-exe" " staging-file "&>"sync-log)) - (sync-cmd (if fork-to-background - (conc "/usr/bin/env NBFAKE_LOG="*toppath*"/logs/last-server-sync-"(current-process-id)".log nbfake \""sync-cmd-core" && /bin/mv -f " staging-file " " mtdbfile" \"") - sync-cmd-core)) - (default-min-intersync-delay 2) - (min-intersync-delay (configf:lookup-number *configdat* "server" "minimum-intersync-delay" default: default-min-intersync-delay)) - (default-duty-cycle 0.1) - (duty-cycle (configf:lookup-number *configdat* "server" "sync-duty-cycle" default: default-duty-cycle)) - (last-sync-seconds 10) ;; we will adjust this to a measurement and delay last-sync-seconds * (1 - duty-cycle) - (calculate-off-time (lambda (work-duration duty-cycle) - (* (/ (- 1 duty-cycle) duty-cycle) last-sync-seconds))) - (off-time min-intersync-delay) ;; adjusted in closure below. - (do-a-sync - (lambda () - (BB> "Start do-a-sync with fork-to-background="fork-to-background" persist-until-sync="persist-until-sync) - (let* ((finalres - (let retry-loop ((num-tries 0)) - (if (common:simple-file-lock lockfile) - (begin - (cond - ((not (or fork-to-background persist-until-sync)) - (debug:print 0 *default-log-port* "INFO: syncer thread sleeping for max of (server.minimum-intersync-delay="min-intersync-delay - " , off-time="off-time" seconds ]") - (thread-sleep! (max off-time min-intersync-delay))) - (else - (debug:print 0 *default-log-port* "INFO: syncer thread NOT sleeping ; maybe time-to-exit..."))) - - (if (not (configf:lookup *configdat* "server" "disable-db-snapshot")) - (common:snapshot-file mtdbfile subdir: ".db-snapshot")) - (delete-file* staging-file) - (let* ((start-time (current-milliseconds)) - (res (system sync-cmd)) - (dbbackupfile (conc mtdbfile ".backup")) - (res2 - (cond - ((eq? 0 res ) - (handle-exceptions - exn - #f - (if (file-exists? dbbackupfile) - (delete-file* dbbackupfile) - ) - (if (eq? 0 (file-size sync-log)) - (delete-file* sync-log)) - (system (conc "/bin/mv " staging-file " " mtdbfile)) - - (set! last-sync-seconds (/ (- (current-milliseconds) start-time) 1000)) - (set! off-time (calculate-off-time - last-sync-seconds - (cond - ((and (number? duty-cycle) (> duty-cycle 0) (< duty-cycle 1)) - duty-cycle) - (else - (debug:print 0 *default-log-port* "WARNING: ["(common:human-time)"] server.sync-duty-cycle is invalid. Should be a number between 0 and 1, but "duty-cycle" was specified. Using default value: "default-duty-cycle) - default-duty-cycle)))) - - (debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] pid="(current-process-id)" SYNC took "last-sync-seconds" sec") - (debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] pid="(current-process-id)" SYNC took "last-sync-seconds" sec ; with duty-cycle of "duty-cycle" off time is now "off-time) - 'sync-completed)) - (else - (system (conc "/bin/cp "sync-log" "sync-log".fail")) - (debug:print 0 *default-log-port* "ERROR: ["(common:human-time)"] Sync failed. See log at "sync-log".fail") - (if (file-exists? (conc mtdbfile ".backup")) - (system (conc "/bin/cp "mtdbfile ".backup " mtdbfile))) - #f)))) - (common:simple-file-release-lock lockfile) - (BB> "released lockfile: " lockfile) - (when (common:file-exists? lockfile) - (BB> "DID NOT ACTUALLY RELEASE LOCKFILE")) - res2) ;; end let - );; end begin - ;; else - (cond - (persist-until-sync - (thread-sleep! 1) - (debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] pid="(current-process-id)" other SYNC in progress; we're in a fork-to-background so we need to succeed. Let's wait a jiffy and and try again. num-tries="num-tries" (waiting for lockfile="lockfile" to disappear)") - (retry-loop (add1 num-tries))) - (else - (thread-sleep! (max off-time (+ last-sync-seconds min-intersync-delay))) - (debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] pid="(current-process-id)" other SYNC in progress; not syncing.") - 'parallel-sync-in-progress)) - ) ;; end if got lockfile - ) - )) - (BB> "End do-a-sync with fork-to-background="fork-to-background" persist-until-sync="persist-until-sync" and result="finalres) - finalres) - ) ;; end lambda - )) - do-a-sync)) + (debug:print "WARNING: bruteforce-syncer is called but has been disabled!")))