Index: api.scm ================================================================== --- api.scm +++ api.scm @@ -146,10 +146,11 @@ ;; - keep it simple, only return the actual result of the call, i.e. no meta info here ;; ;; - returns #( flag result ) ;; (define (api:execute-requests dbstruct dat) + (db:open-no-sync-db) ;; sets *no-sync-db* (handle-exceptions exn (let ((call-chain (get-call-chain))) (debug:print 0 *default-log-port* "WARNING: api:execute-requests received an exception from peer, dat=" dat ", exn=" exn) (print-call-chain (current-error-port)) Index: db.scm ================================================================== --- db.scm +++ db.scm @@ -212,28 +212,28 @@ (common:low-noise-print 120 "over-50-parallel-api-requests")) (debug:print-info 0 *default-log-port* *api-process-request-count* " parallel api requests being processed in process " (current-process-id) ", throttling access")) (if (common:low-noise-print 600 (conc "parallel-api-requests" *max-api-process-requests*)) (debug:print-info 2 *default-log-port* "Parallel api request count: " *api-process-request-count* " max parallel requests: " *max-api-process-requests*)) (condition-case - (begin - (if use-mutex (mutex-lock! *db-with-db-mutex*)) - (let ((res (apply proc dbdat db params))) - (if use-mutex (mutex-unlock! *db-with-db-mutex*)) - ;; (if (vector? dbstruct)(db:done-with dbstruct run-id r/w)) - (if dbdat (stack-push! (dbr:subdb-dbstack subdb) dbdat)) - res)) - (exn (io-error) - (db:generic-error-printout exn "ERROR: i/o error with " fname ". Check permissions, disk space etc. and try again.")) - (exn (corrupt) - (db:generic-error-printout exn "ERROR: database " fname " is corrupt. Repair it to proceed.")) - (exn (busy) - (db:generic-error-printout exn "ERROR: database " fname - " is locked. Try copying to another location, remove original and copy back.")) - (exn (permission)(db:generic-error-printout exn "ERROR: database " fname " has some permissions problem.")) - (exn () - (db:generic-error-printout exn "ERROR: Unknown error with database " fname " message: " - ((condition-property-accessor 'exn 'message) exn)))))) + (begin + (if use-mutex (mutex-lock! *db-with-db-mutex*)) + (let ((res (apply proc dbdat db params))) + (if use-mutex (mutex-unlock! *db-with-db-mutex*)) + ;; (if (vector? dbstruct)(db:done-with dbstruct run-id r/w)) + (if dbdat (stack-push! (dbr:subdb-dbstack subdb) dbdat)) + res)) + (exn (io-error) + (db:generic-error-printout exn "ERROR: i/o error with " fname ". Check permissions, disk space etc. and try again.")) + (exn (corrupt) + (db:generic-error-printout exn "ERROR: database " fname " is corrupt. Repair it to proceed.")) + (exn (busy) + (db:generic-error-printout exn "ERROR: database " fname + " is locked. Try copying to another location, remove original and copy back.")) + (exn (permission)(db:generic-error-printout exn "ERROR: database " fname " has some permissions problem.")) + (exn () + (db:generic-error-printout exn "ERROR: Unknown error with database " fname " message: " + ((condition-property-accessor 'exn 'message) exn)))))) ;; NB// #f => return dbdir only ;; (was planned to be; zeroth db with name=main.db) ;; @@ -2105,10 +2105,27 @@ (sqlite3:execute db "DELETE FROM metadat WHERE var=?;" var)))) ;;====================================================================== ;; no-sync.db - small bits of data to be shared between servers ;;====================================================================== + +(define (db:no-sync-db db-in) + (if db-in + db-in + (if *no-sync-db* + *no-sync-db* + (begin + (mutex-lock! *db-access-mutex*) + (let ((dbpath (common:get-db-tmp-area)) + (db (dbfile:open-no-sync-db dbpath))) + (set! *no-sync-db* db) + (mutex-unlock! *db-access-mutex*) + db))))) + +(define (with-no-sync-db proc) + (let* ((db (db:no-sync-db *no-sync-db*))) + (proc db))) (define (db:open-no-sync-db) (dbfile:open-no-sync-db (db:dbfile-path))) (define (db:no-sync-close-db db stmt-cache) @@ -5009,12 +5026,20 @@ ;; (debug:print-info 0 *default-log-port* "lock-and-sync, dbfiles: "dbfiles) (for-each (lambda (file) (let* ((fname (conc (pathname-file file) ".db")) (fulln (conc *toppath*"/.db/"fname)) - (time1 (file-modification-time file)) - (time2 (file-modification-time fulln)) + (time1 (if (file-exists? file) + (file-modification-time file) + (begin + (debug:print-info 0 *default-log-port* "Sync - I do not see file "file) + 1))) + (time2 (if (file-exists? fulln) + (file-modification-time fulln) + (begin + (debug:print-info 0 *default-log-port* "Sync - I do not see file "fulln) + 0))) (changed (> time1 time2)) (do-cp (cond ((not (file-exists? fulln)) ;; shouldn't happen, but this might recover (debug:print-info 0 *default-log-port* "File "fulln" not found! Copying "fname" to "fulln) #t) Index: dbfile.scm ================================================================== --- dbfile.scm +++ dbfile.scm @@ -444,47 +444,62 @@ ;; if we are not a server create a db handle. this is not finalized ;; so watch for problems. I'm still not clear if it is needed to manually ;; finalize sqlite3 dbs with the sqlite3 egg. ;; -(define (db:no-sync-db db-in) - (if db-in - db-in - (if *no-sync-db* - *no-sync-db* - (begin - (mutex-lock! *db-access-mutex*) - (let ((db (dbfile:open-no-sync-db))) - (set! *no-sync-db* db) - (mutex-unlock! *db-access-mutex*) - db))))) + +(define (dbfile:cautious-open-database fname #!optional (tries-left 5)) + (let* ((retry (lambda () + (thread-sleep! 0.5) + (if (> tries-left 0) + (dbfile:cautious-open-database fname (- tries-left 1)))))) + (condition-case + (sqlite3:open-database fname) + (exn (io-error) + (dbfile:print-err exn "ERROR: i/o error with " fname ". Check permissions, disk space etc. and try again.") + (retry)) + (exn (corrupt) + (dbfile:print-err exn "ERROR: database " fname " is corrupt. Repair it to proceed.") + (retry)) + (exn (busy) + (dbfile:print-err exn "ERROR: database " fname + " is locked. Try copying to another location, remove original and copy back.") + (retry)) + (exn (permission)(dbfile:print-err exn "ERROR: database " fname " has some permissions problem.") + (retry)) + (exn () + (dbfile:print-err exn "ERROR: Unknown error with database " fname " message: " + ((condition-property-accessor 'exn 'message) exn)) + (retry))))) + (define (dbfile:open-no-sync-db dbpath) - (let* (;; (dbpath (db:dbfile-path)) - (dbname (conc dbpath "/no-sync.db")) + (if (not (file-exists? dbpath)) + (create-directory dbpath #t)) + (let* ((dbname (conc dbpath "/no-sync.db")) (db-exists (file-exists? dbname)) - (db (sqlite3:open-database dbname))) + (db (dbfile:cautious-open-database dbname))) ;; (sqlite3:open-database dbname))) (sqlite3:set-busy-handler! db (sqlite3:make-busy-timeout 136000)) (if (not db-exists) (begin (sqlite3:execute db "PRAGMA synchronous = 0;") (sqlite3:execute db "CREATE TABLE IF NOT EXISTS no_sync_metadat (var TEXT,val TEXT, CONSTRAINT no_sync_metadat_constraint UNIQUE (var));") - (sqlite3:execute db "PRAGMA journal_mode=WAL;"))) + #;(sqlite3:execute db "PRAGMA journal_mode=WAL;"))) db)) (define (db:no-sync-set db var val) - (sqlite3:execute (db:no-sync-db db) "INSERT OR REPLACE INTO no_sync_metadat (var,val) VALUES (?,?);" var val)) + (sqlite3:execute db "INSERT OR REPLACE INTO no_sync_metadat (var,val) VALUES (?,?);" var val)) (define (db:no-sync-del! db var) - (sqlite3:execute (db:no-sync-db db) "DELETE FROM no_sync_metadat WHERE var=?;" var)) + (sqlite3:execute db "DELETE FROM no_sync_metadat WHERE var=?;" var)) (define (db:no-sync-get/default db var default) (let ((res default)) (sqlite3:for-each-row (lambda (val) (set! res val)) - (db:no-sync-db db) + db "SELECT val FROM no_sync_metadat WHERE var=?;" var) (if res (let ((newres (if (string? res) (string->number res) @@ -498,22 +513,21 @@ ;; either: ;; fails returns (#f . lock-creation-time) ;; succeeds (returns (#t . lock-creation-time) ;; use (db:no-sync-del! db keyname) to release the lock ;; -(define (db:no-sync-get-lock db-in keyname) - (let ((db (db:no-sync-db db-in))) - (sqlite3:with-transaction - db - (lambda () - (handle-exceptions - exn +(define (db:no-sync-get-lock db keyname) + (sqlite3:with-transaction + db + (lambda () + (handle-exceptions + exn (let ((lock-time (current-seconds))) ;; (debug:print-info 2 *default-log-port* "db:no-sync-get-lock keyname=" keyname ", lock-time=" lock-time ", exn=" exn) (sqlite3:execute db "INSERT INTO no_sync_metadat (var,val) VALUES(?,?);" keyname lock-time) `(#t . ,lock-time)) - `(#f . ,(sqlite3:first-result db "SELECT val FROM no_sync_metadat WHERE var=?;" keyname))))))) + `(#f . ,(sqlite3:first-result db "SELECT val FROM no_sync_metadat WHERE var=?;" keyname)))))) ;;====================================================================== ;; file utils ;;====================================================================== Index: http-transport.scm ================================================================== --- http-transport.scm +++ http-transport.scm @@ -403,10 +403,11 @@ ;; if none running or if > 20 seconds since ;; server last used then start shutdown ;; This thread waits for the server to come alive (debug:print-info 0 *default-log-port* "Starting the sync-back, keep alive thread in server") (let* ((sdat #f) + (no-sync-db (db:open-no-sync-db)) (tmp-area (common:get-db-tmp-area)) (started-file (conc tmp-area "/.server-started")) (server-start-time (current-seconds)) (server-info (let loop ((start-time (current-seconds)) (changed #t) Index: tasks.scm ================================================================== --- tasks.scm +++ tasks.scm @@ -514,24 +514,23 @@ (sqlite3:first-row db "SELECT id,action,owner,state,target,name,testpatt,keylock,params WHERE params LIKE ? AND state LIKE ? AND action LIKE ? AND testpatt LIKE ?;" param-key state-patt action-patt test-patt))))) (define (tasks:find-task-queue-records dbstruct target run-name test-patt state-patt action-patt) - ;; (handle-exceptions - ;; exn - ;; '() - ;; (sqlite3:first-row - (let ((db (db:delay-if-busy (db:get-db dbstruct))) - (res '())) - (sqlite3:for-each-row - (lambda (a . b) - (set! res (cons (cons a b) res))) - db "SELECT id,action,owner,state,target,name,testpatt,keylock,params FROM tasks_queue + (db:with-db + dbstruct + #f #f + (lambda (dbdat db) + (let ((res '())) + (sqlite3:for-each-row + (lambda (a . b) + (set! res (cons (cons a b) res))) + db "SELECT id,action,owner,state,target,name,testpatt,keylock,params FROM tasks_queue WHERE target = ? AND name = ? AND state LIKE ? AND action LIKE ? AND testpatt LIKE ?;" - target run-name state-patt action-patt test-patt) - res)) ;; ) + target run-name state-patt action-patt test-patt) + res)))) ;; kill any runner processes (i.e. processes handling -runtests) that match target/runname ;; ;; do a remote call to get the task queue info but do the killing as self here. ;; Index: tests/simplerun/Makefile ================================================================== --- tests/simplerun/Makefile +++ tests/simplerun/Makefile @@ -1,5 +1,5 @@ cleanup : killall mtest dboard -v -9 || true - rm -rf *.log *.bak NB* logs/* .meta .db ../simpleruns/* lt + rm -rf *.log *.bak NB* logs/* .meta .db /tmp/$(USER)/megatest_localdb/simplerun ../simpleruns/* lt