Index: common.scm ================================================================== --- common.scm +++ common.scm @@ -425,12 +425,11 @@ 'adj-target 'new2old '(dejunk) )) ((tcp nfs) - (debug:print 0 *default-log-port* "WARNING: cleanup-db NOT implemented yet for tcp and nfs.") - #;(apply db:multi-db-sync + (apply db:multi-db-sync dbstruct 'schema 'killservers 'adj-target 'new2old Index: db.scm ================================================================== --- db.scm +++ db.scm @@ -131,15 +131,15 @@ (debug:print-error 0 *default-log-port* " query " stmt " failed, params: " params ", error: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn) (print-call-chain (current-error-port)) default))) (apply sqlite3:first-result db stmt params))) -(define (db:setup do-sync) +(define (db:setup) (assert *toppath* "FATAL: db:setup called before launch:setup has been run.") (let* ((tmpdir (common:make-tmpdir-name *toppath* ""))) (if (not *dbstruct-dbs*) - (dbfile:setup do-sync *toppath* tmpdir) + (dbfile:setup (conc *toppath* "/.mtdb") tmpdir) *dbstruct-dbs*))) ;; moved from dbfile ;; ;; ADD run-id SUPPORT @@ -521,23 +521,63 @@ ;; (if dbdat (dbfile:add-dbdat dbstruct #f dbdat)) ;; ) ;; #t) (define (db:kill-servers) - (let* ((servers (server:choose-server *toppath* 'all-valid))) ;; (server:get-list *toppath*)) - (for-each - (lambda (server) - (handle-exceptions - exn - (begin - (debug:print-info 0 *default-log-port* "Unable to get host and/or port from " server ", exn=" exn) - #f) - (match-let (((mod-time host port start-time server-id pid) server)) - (if (and host pid) - (tasks:kill-server host pid))))) - servers) - (delete-file* (common:get-sync-lock-filepath)))) + (let* ((tl (launch:setup)) ;; need this to initialize *toppath* + (servdir (conc *toppath* "/.servinfo")) + (servfiles (glob (conc servdir "/*:*.db"))) + (fmtstr "~10a~22a~10a~25a~25a~8a\n") + (dbfiles (append (glob (conc *toppath* "/.mtdb/main.db")) (glob (conc *toppath* "/.mtdb/?.db"))(glob (conc *toppath* "/.mtdb/??.db")))) + (ttdat (make-tt areapath: *toppath*)) + ) + (format #t fmtstr "DB" "host:port" "PID" "age" "last mod" "state") + (for-each + (lambda (dbfile) + (let* ( + (dbfname (conc (pathname-file dbfile) ".db")) + (sfiles (tt:find-server *toppath* dbfname)) + ) + (for-each + (lambda (sfile) + (let ( + (sinfos (tt:get-server-info-sorted ttdat dbfname)) + ) + (for-each + (lambda (sinfo) + (let* ( + (db (list-ref sinfo 5)) + (pid (list-ref sinfo 4)) + (host (list-ref sinfo 0)) + (port (list-ref sinfo 1)) + (server-id (list-ref sinfo 3)) + (age (seconds->hr-min-sec (- (current-seconds) (list-ref sinfo 2)))) + (last-mod (seconds->string (list-ref sinfo 2))) + (killed (system (conc "ssh " host " kill " pid " > /dev/null"))) + (dummy2 (sleep 1)) + (state (if (> (system (conc "ssh " host " ps " pid " > /dev/null")) 0) "dead" "alive")) + ) + (format #t fmtstr db (conc host ":" port) pid age last-mod state) + (system (conc "rm " sfile)) + ) + ) + sinfos + ) + ) + ) + sfiles + ) + ) + ) + dbfiles + ) + ;; remove this db, because otherwise metadata contains records for old servers, and this causes a problem with db:no-sync-get-lock-with-id. + (if (file-exists? (conc *toppath* "/.mtdb/no-sync.db")) + (delete-file (conc *toppath* "/.mtdb/no-sync.db")) + ) + ) +) ;; options: ;; ;; 'killservers - kills all servers ;; 'dejunk - removes junk records @@ -554,29 +594,29 @@ (tmp-area (common:make-tmpdir-name *toppath* "")) (old2new (member 'old2new options)) (dejunk (member 'dejunk options)) (killservers (member 'killservers options)) (src-area (if old2new *toppath* tmp-area)) - (dest-area (if old2new tmp-area *toppath*)) + (dest-area (if old2new tmp-area (conc *toppath* "/.mtdb"))) (dbfiles (if old2new (glob (conc *toppath* "/.mtdb/*.db")) - (glob (conc tmp-area "/.mtdb/*.db")))) + (glob (conc tmp-area "/*.db")))) (keys (db:get-keys dbstruct)) (sync-durations (make-hash-table))) ;; kill servers - (if killservers (db:kill-servers)) + ;; (if killservers (db:kill-servers)) (if (not dbfiles) (debug:print-error 0 *default-log-port* "no dbfiles found in " (conc *toppath* "/.mtdb")) (for-each (lambda (srcfile) (debug:print-info 3 *default-log-port* "file: " srcfile) (let* ((fname (conc (pathname-file srcfile) ".db")) (basename (pathname-file srcfile)) (run-id (if (string= basename "main") #f (string->number basename))) - (destfile (conc dest-area "/.mtdb/" fname)) - (dest-directory (conc dest-area "/.mtdb/")) + (destfile (conc dest-area "/" fname)) + (dest-directory dest-area) (time1 (file-modification-time srcfile)) (time2 (if (file-exists? destfile) (begin (debug:print-info 2 *default-log-port* "destfile " destfile " exists") (file-modification-time destfile)) @@ -598,14 +638,12 @@ #t) (else #f)))) (if (or dejunk do-cp) (let* ((start-time (current-milliseconds)) - ;; subdb is misnamed - should be dbdat (I think...) - (subdb (dbfile:open-db dbstruct run-id dbfile:db-init-proc)) - ;; (or (dbfile:get-subdb dbstruct run-id) - ;; (dbfile:init-subdb dbstruct run-id dbfile:db-init-proc))) + (subdb (or (dbfile:get-subdb dbstruct run-id) (dbfile:init-subdb dbstruct run-id dbfile:db-init-proc))) + (dbdat (or (dbfile:get-dbdat dbstruct run-id) (dbfile:open-db dbstruct run-id dbfile:db-init-proc))) (mtdb (dbr:subdb-mtdbdat subdb)) ;; ;; BUG: -mrw- I think this next line is wrong. run-id should be the path to .mtdb/.db ;; (tmpdb (dbfile:open-db dbstruct run-id dbfile:db-init-proc))) @@ -613,16 +651,15 @@ (debug:print-info 2 *default-log-port* "delta syncing file: " srcfile ", time diff: " (- time1 time2) " seconds") (if old2new (begin (if dejunk (db:clean-up run-id mtdb)) (db:sync-tables (db:sync-all-tables-list - dbstruct (db:get-keys dbstruct)) #f mtdb tmpdb)) (begin (if dejunk (db:clean-up run-id tmpdb)) - (db:sync-tables (db:sync-all-tables-list dbstruct (db:get-keys dbstruct)) #f tmpdb mtdb))) + (db:sync-tables (db:sync-all-tables-list (db:get-keys dbstruct)) #f tmpdb mtdb))) (hash-table-set! sync-durations (conc srcfile ".db") (- (current-milliseconds) start-time))) (debug:print-info 2 *default-log-port* "skipping delta sync. " srcfile " is up to date")))) dbfiles)) data-synced)) @@ -634,11 +671,11 @@ (for-each (lambda (subdb) (let* ((mtdb (dbr:subdb-mtdb subdb)) (tmpdb (db:get-subdb dbstruct run-id)) (refndb (dbr:subdb-refndb subdb)) - (newres (db:sync-tables (db:sync-all-tables-list dbstruct (db:get-keys dbstruct)) last-update tmpdb refndb mtdb))) + (newres (db:sync-tables (db:sync-all-tables-list (db:get-keys dbstruct)) last-update tmpdb refndb mtdb))) ;; (stack-push! (dbr:subdb-dbstack subdb) tmpdb) ;; BUG: verify this is really needed (dbfile:add-dbdat dbstruct run-id tmpdb) (set! res (cons newres res)))) subdbs) @@ -1142,13 +1179,10 @@ ;; 2. Look at run records ;; a. If have tests that are not deleted, set state='unknown' ;; b. .... ;; (define (db:clean-up run-id dbdat) - (debug:print 2 *default-log-port* "db:clean-up") - - (if run-id (db:clean-up-rundb dbdat) (db:clean-up-maindb dbdat) ) ) Index: dbfile.scm ================================================================== --- dbfile.scm +++ dbfile.scm @@ -268,18 +268,18 @@ (define (dbfile:run-id->dbfname run-id) (conc (dbfile:run-id->dbnum run-id)".db")) ;; the path in MTRAH with the filename (define (dbfile:run-id->dbname run-id) - (conc ".mtdb/"(dbfile:run-id->dbfname run-id))) + (conc (dbfile:run-id->dbfname run-id))) ;; Make the dbstruct, setup up auxillary db's and call for main db at least once ;; ;; called in http-transport and replicated in rmt.scm for *local* access. ;; -(define (dbfile:setup do-sync areapath tmppath) +(define (dbfile:setup areapath tmppath) (cond (*dbstruct-dbs* (dbfile:print-err "WARNING: dbfile:setup called when *dbstruct-dbs* is already initialized") *dbstruct-dbs*) ;; TODO: when multiple areas are supported, this optimization will be a hazard (else @@ -359,11 +359,12 @@ (if dbdat dbdat (let* ((tmppath (dbr:dbstruct-tmppath dbstruct)) (tmpdbpath (dbfile:run-id->path tmppath run-id)) (dbdat (dbfile:open-sqlite3-db tmpdbpath init-proc sync-mode: 0 journal-mode: "WAL"))) - ;; the following line short-circuits the "one db handle per thread" model + + ;; the following line short-circuits the "one db handle per thread" model ;; ;; (dbfile:add-dbdat dbstruct run-id dbdat) ;; dbdat)))))) @@ -454,11 +455,11 @@ (let ((db (sqlite3:open-database fname))) ;; pragmas synchronous not needed because this db is used read-only ;; (sqlite3:execute db (conc "PRAGMA synchronous = "mode";") (sqlite3:set-busy-handler! db (sqlite3:make-busy-timeout 30000)) ;; read-only but still need timeout db ) - (print "file doesn't exist: " fname)))) + (print "cautious-open-database: file doesn't exist: " fname)))) (exn (io-error) (dbfile:print-err exn "ERROR: i/o error with " fname ". Check permissions, disk space etc. and try again.") (retry)) (exn (corrupt) (dbfile:print-err exn "ERROR: database " fname " is corrupt. Repair it to proceed.") Index: megatest.scm ================================================================== --- megatest.scm +++ megatest.scm @@ -2450,11 +2450,11 @@ ;; (if (not (server:choose-server *toppath* 'home?)) ;; (begin ;; (debug:print 0 *default-log-port* "Servers are not running on this host or no servers alive. Cannot run cleanup-db") ;; (exit 1))) - (let ((dbstructs (db:setup #f))) + (let ((dbstructs (db:setup))) (common:cleanup-db dbstructs)) (set! *didsomething* #t))) (if (args:get-arg "-mark-incompletes") (begin