Index: common.scm ================================================================== --- common.scm +++ common.scm @@ -56,12 +56,14 @@ (define (stop-the-train) (thread-start! (make-thread (lambda () (let loop () (if (and *toppath* (file-exists? (conc *toppath*"/stop-the-train"))) - (begin - (debug:print 0 *default-log-port* "ERROR: found file "*toppath*"/stop-the-train, exiting immediately") + (let* ((msg (conc "ERROR: found file "*toppath*"/stop-the-train, exiting immediately"))) + ;; yes, print to current-output-port AND *default-log-port*, annoying but necessary I think + (print msg) + (debug:print 0 *default-log-port* msg) (exit 1))) (thread-sleep! 5) (loop)))))) ;; execute thunk, return value. If exception thrown, trap exception, return #f, and emit nonfatal condition note to *default-log-port* . Index: dbfile.scm ================================================================== --- dbfile.scm +++ dbfile.scm @@ -82,10 +82,11 @@ ;; this is one db per server (inmem #f) ;; handle for the in memory copy (dbfile #f) ;; path to the db file on disk (dbfname #f) ;; short name of db file on disk (used to validate accessing correct db) (ondiskdb #f) ;; handle for the on-disk file + (dbtmpname #f) ;; path to db file in /tmp (non-imem method) (dbdat #f) ;; create a dbdat for the downstream calls such as db:with-db (last-update 0) (sync-proc #f) ) @@ -516,10 +517,12 @@ (if newres newres res)) res))) +;; timestring+identifier+payload +;; locks are unique on identifier, payload is informational (define (db:extract-time-identifier instr) (let ((tokens (string-split instr "+"))) (match tokens ((t i)(cons (string->number t) i)) ((t) (cons (string->number t) #f)) @@ -526,32 +529,33 @@ (else (assert #f "FATAL: db:extract-time-identifier handed bad data "instr))))) ;; transaction protected lock aquisition ;; either: -;; fails returns (#f . lock-creation-time) -;; succeeds (returns (#t . lock-creation-time) +;; fails returns (#f lock-creation-time identifier) +;; succeeds (returns (#t lock-creation-time identifier) ;; use (db:no-sync-del! db keyname) to release the lock ;; (define (db:no-sync-get-lock-with-id db keyname identifier) (sqlite3:with-transaction db (lambda () (condition-case (let* ((curr-val (db:no-sync-get/default db keyname #f))) (if curr-val - (match (db:extract-time-identifier curr-val) + (match (db:extract-time-identifier curr-val) ;; result->timestamp, identifier ((timestamp ident) (if (equal? ident identifier) - #t ;; this *is* my lock - #f)) ;; nope, not my lock - (else #f)) ;; nope, not my lock - (let ((lock-value (if identifier + (cons #t timestamp) ;; this *is* my lock + (cons #f timestamp))) ;; nope, not my lock + (else (cons #f #f))) ;; nope, not my lock + (let ((curr-sec (current-seconds)) + (lock-value (if identifier (conc (current-seconds)"+"identifier) (current-seconds)))) (sqlite3:execute db "INSERT OR REPLACE INTO no_sync_metadat (var,val) VALUES(?,?);" keyname lock-value) - #t))) + (cons #t curr-sec)))) (exn (io-error) (dbfile:print-err "ERROR: i/o error with no-sync db. Check permissions, disk space etc. and try again.")) (exn (corrupt) (dbfile:print-err "ERROR: database no-sync db is corrupt. Repair it to proceed.")) (exn (busy) (dbfile:print-err "ERROR: database no-sync db is locked. Try copying to another location, remove original and copy back.")) (exn (permission)(dbfile:print-err "ERROR: database no-sync db has some permissions problem.")) (exn () ;; (status done) ;; I don't know how to detect status done but no data! Index: dbmod.scm ================================================================== --- dbmod.scm +++ dbmod.scm @@ -29,12 +29,14 @@ (import scheme chicken data-structures extras + files (prefix sqlite3 sqlite3:) + matchable posix typed-records srfi-1 srfi-18 srfi-69 @@ -95,11 +97,11 @@ (dbfile (dbr:dbdat-dbfile dbdat))) ;; if nfs mode do a sync if delta > 2 (let* ((last-update (dbr:dbstruct-last-update dbstruct)) (sync-proc (dbr:dbstruct-sync-proc dbstruct)) (curr-secs (current-seconds))) - (if (> (- curr-secs last-update) 3) + (if (> (- curr-secs last-update) 5) (begin (sync-proc last-update) ;; MOVE THIS CALL TO INSIDE THE sync-proc CALL (dbr:dbstruct-last-update-set! dbstruct curr-secs) @@ -176,74 +178,83 @@ (dbfname (or dbfname-in (dbmod:run-id->dbfname run-id))) (dbpath (dbmod:get-dbdir dbstruct)) ;; directory where all the .db files are kept (dbfullname (conc dbpath"/"dbfname)) ;; (dbmod:run-id->full-dbfname dbstruct run-id)) (dbexists (file-exists? dbfullname)) (tmpdir (conc "/tmp/"(current-user-name))) - (tmpdb (let* ((fname (conc tmpdir"/" (string-translate areapath "/" ".")"-"(current-process-id)"-"dbfname))) + (tmpdb (let* ((fname (conc tmpdir"/" (string-translate areapath "/" ".") ;; "-"(current-process-id) + "-"dbfname))) (if (not (file-exists? tmpdir))(create-directory tmpdir)) ;; check if tmpdb already exists, either delete it or ;; add something to the name fname)) (inmem (dbmod:open-inmem-db init-proc - (if (eq? (dbfile:cache-method) 'inmem) - #f - tmpdb) + ;; (if (eq? (dbfile:cache-method) 'inmem) + ;; #f + tmpdb + ;; ) )) (write-access (file-write-access? dbpath)) (db (dbmod:safely-open-db dbfullname init-proc write-access)) (tables (db:sync-all-tables-list keys))) (if (not (and (sqlite3:database? inmem) (sqlite3:database? db))) (begin (debug:print 0 *default-log-port* "ERROR: Failed to properly open "dbfname-in", exiting immediately.") - (exit))) - ;; (assert (sqlite3:database? inmem) "FATAL: open-dbmoddb: inmem is not a db") + (exit))) ;; (assert (sqlite3:database? inmem) "FATAL: open-dbmoddb: inmem is not a db") ;; (assert (sqlite3:database? db) "FATAL: open-dbmoddb: db is not a db") (dbr:dbstruct-inmem-set! dbstruct inmem) (dbr:dbstruct-ondiskdb-set! dbstruct db) (dbr:dbstruct-dbfile-set! dbstruct dbfullname) + (dbr:dbstruct-dbtmpname-set! dbstruct tmpdb) (dbr:dbstruct-dbfname-set! dbstruct dbfname) (dbr:dbstruct-sync-proc-set! dbstruct (lambda (last-update) (if *sync-in-progress* (debug:print 3 *default-log-port* "WARNING: overlapping calls to sync to disk") - (begin - (mutex-lock! *db-with-db-mutex*) ;; this mutex is used when overloaded or during a query that modifies the db + (let* ((sync-cmd (conc "megatest -db2db -from "tmpdb" -to "dbfullname"&"))) + ;; (mutex-lock! *db-with-db-mutex*) ;; this mutex is used when overloaded or during a query that modifies the db (set! *sync-in-progress* #t) - (dbmod:sync-gasket tables last-update inmem db - dbfullname syncdir) - (mutex-unlock! *db-with-db-mutex*) - (thread-sleep! 0.5) ;; ensure at least 1/2 second down time between sync calls - (set! *sync-in-progress* #f))))) + ;; (if (eq? (dbfile:cache-method) 'inmem) + ;; (dbmod:sync-gasket tables last-update inmem db + ;; dbfullname syncdir keys) + (thread-start! (make-thread + (lambda () + (debug:print-info "Running "sync-cmd) + (system sync-cmd) + ;; (mutex-unlock! *db-with-db-mutex*) + ;; (thread-sleep! 0.5) ;; ensure at least 1/2 second down time between sync calls + (set! *sync-in-progress* #f)))) + )))) ;; (dbmod:sync-tables tables #f db inmem) ;; (if db - (dbmod:sync-gasket tables #f inmem db dbfullname 'fromdest) ;; ) ;; load into inmem + (dbmod:sync-gasket tables #f inmem db dbfullname 'fromdest keys) ;; ) ;; load into inmem (dbr:dbstruct-last-update-set! dbstruct (current-seconds)) ;; should this be offset back in time by one second? dbstruct)) ;; (if (eq? syncdir 'todisk) ;; sync to disk normally, sync from in dashboard ;; (dbmod:sync-tables tables last-update inmem db) ;; (dbmod:sync-tables tables last-update db inmem)))) ;; direction: 'fromdest 'todest ;; -(define (dbmod:sync-gasket tables last-update inmem dbh dbfname direction) +(define (dbmod:sync-gasket tables last-update inmem dbh dbfname direction keys) (assert (sqlite3:database? inmem) "FATAL: sync-gasket: inmem is not a db") (assert (sqlite3:database? inmem) "FATAL: sync-gasket: dbh is not a db") + (debug:print-info 0 *default-log-port* "dbmod:sync-gasket called with sync-method="(dbfile:sync-method)) (case (dbfile:sync-method) ((none) #f) ((attach) (dbmod:attach-sync tables inmem dbfname direction)) ((newsync) (dbmod:new-sync tables inmem dbh dbfname direction)) (else (case direction ((todisk) - (dbmod:sync-tables tables last-update inmem dbh) + (dbmod:sync-tables tables last-update keys inmem dbh) ) (else - (dbmod:sync-tables tables last-update dbh inmem)))))) + (dbmod:sync-tables tables last-update keys dbh inmem)))))) (define (dbmod:close-db dbstruct) ;; do final sync to disk file ;; (do-sync ...) (sqlite3:finalize! (dbr:dbstruct-ondiskdb dbstruct))) @@ -275,11 +286,11 @@ ;; then sync only records where field-name >= time-in-seconds ;; IFF field-name exists ;; ;; Use (db:sync-all-tables-list keys) to get the tbls input ;; -(define (dbmod:sync-tables tbls last-update fromdb todb) +(define (dbmod:sync-tables tbls last-update keys fromdb todb) (debug:print-info 2 *default-log-port* "dbmod:sync-tables called, from: "fromdb", to: "todb) (assert (sqlite3:database? fromdb) "FATAL: dbmod:sync-tables called with fromdb not a database" fromdb) (assert (sqlite3:database? todb) "FATAL: dbmod:sync-tables called with fromdb not a database" todb) (let ((stmts (make-hash-table)) ;; table-field => stmt (all-stmts '()) ;; ( ( stmt1 value1 ) ( stml2 value2 )) @@ -524,11 +535,11 @@ ;; ;; Idea: youngest in dest is last_update time ;; (define (dbmod:new-sync tables dbh1 dbh2 destdbfile direction #!key (mode 'full)) - (debug:print 0 *default-log-port* "Doing sync "direction" "destdbfile) + (debug:print 0 *default-log-port* "Doing new-sync "direction" "destdbfile) (if (not (sqlite3:auto-committing? dbh1)) (debug:print 0 *default-log-port* "Skipping sync due to transaction in flight.") (let* ((table-names (map car tables)) (dest-exists (file-exists? destdbfile))) (assert dest-exists "FATAL: sync called with non-existant file, "destdbfile) @@ -752,6 +763,41 @@ (set! oldlaunched (cons (list test-id run-dir uname testname item-path run-id) oldlaunched))))) stmth3 run-id)))) (list incompleted oldlaunched toplevels))) +;;====================================================================== +;; db to db sync +;;====================================================================== + +(define (dbmod:db-to-db-sync src-db dest-db last-update init-proc keys) + (if (and (file-exists? src-db) ;; can't proceed without a source + (file-read-access? src-db)) + (let* ((have-dest (file-exists? dest-db)) + (dest-file-wr (and have-dest + (file-write-access? dest-db))) ;; exists and writable + (dest-dir (or (pathname-directory dest-db) + ".")) + (dest-dir-wr (and (file-exists? dest-dir) + (file-write-access? dest-dir))) + (d-wr (or (and have-dest + dest-file-wr) + dest-dir-wr)) + (copied (if (and (not have-dest) + dest-dir-wr) + (begin + (file-copy src-db dest-db) + #t) + #f))) + (if copied + (begin + (debug:print-info 0 *default-log-port* "db-to-db-sync done with file-copy") + #t) + (let* ((tables (db:sync-all-tables-list keys)) + (sdb (dbmod:safely-open-db src-db init-proc #t)) + (ddb (dbmod:safely-open-db dest-db init-proc d-wr)) + (res (dbmod:sync-gasket tables last-update sdb ddb dest-db 'todest keys))) + (sqlite3:finalize! sdb) + (sqlite3:finalize! ddb) + res))) + #f)) ) Index: launch.scm ================================================================== --- launch.scm +++ launch.scm @@ -742,10 +742,11 @@ ;; for automated creation of the rollup html file this is a good place... (if (not (equal? item-path "")) (tests:summarize-items run-id test-id test-name #f)) + ;; BUG was this meant to be the antecnt of the if above? (tests:summarize-test run-id test-id) ;; don't force - just update if no ;; Leave a .final-status file for the top level test (tests:save-final-status run-id test-id) (rmt:update-run-stats run-id (rmt:get-raw-run-stats run-id))) ;; end of let* @@ -1155,15 +1156,20 @@ (let ((cfname (args:get-arg "-append-config"))) (if (and cfname (file-read-access? cfname)) (read-config cfname *configdat* #t))) ;; values are added to the hash, no need to do anything special. ;; have config at this time, this is a good place to set params based on config file settings - (let* ((dbmode (configf:lookup *configdat* "setup" "dbcache-mode"))) + (let* ((dbmode (configf:lookup *configdat* "setup" "dbcache-mode")) + (syncmode (configf:lookup *configdat* "setup" "sync-mode"))) (if dbmode (begin (debug:print-info 0 *default-log-port* "Overriding dbmode to "dbmode) - (dbcache-mode (string->symbol dbmode))))) + (dbcache-mode (string->symbol dbmode)))) + (if syncmode + (begin + (debug:print-info 0 *default-log-port* "Overriding syncmode to "syncmode) + (dbfile:sync-method (string->symbol syncmode))))) *toppath*))) (define (get-best-disk confdat testconfig) Index: megatest.scm ================================================================== --- megatest.scm +++ megatest.scm @@ -284,10 +284,11 @@ -list-test-time : list time requered to complete each test in a run. It following following arguments -runname -target -dumpmode -syscheck : do some very basic checks; write access and space in tmp, home, runs, links and is $DISPLAY valid -list-waivers : dump waivers for specified target, runname, testpatt to stdout + -db2db : sync db to db, use -from, -to for dbs, -period and -timeout for continuous sync Diff report -diff-rep : generate diff report (must include -src-target, -src-runname, -target, -runname and either -diff-email or -diff-html) -src-target @@ -333,11 +334,11 @@ ":status" "-status" "-list-runs" "-testdata-csv" "-testpatt" - "--modepatt" + ;; "--modepatt" "-modepatt" "-tagexpr" "-itempatt" "-setlog" "-set-toplog" @@ -346,10 +347,11 @@ "-m" "-rerun" "-days" "-rename-run" + "-from" "-to" "-dest" "-source" "-time-stamp" ;; values and messages @@ -378,10 +380,12 @@ "-envcap" "-envdelta" "-setvars" "-set-state-status" "-import-sexpr" + "-period" ;; sync period in seconds + "-timeout" ;; exit sync if timeout in seconds exceeded since last change ;; move runs stuff here "-remove-keep" "-set-run-status" "-age" @@ -495,10 +499,11 @@ "-convert-to-norm" "-convert-to-old" "-import-megatest.db" "-sync-to-megatest.db" + "-db2db" "-sync-brute-force" "-logging" "-v" ;; verbose 2, more than normal (normal is 1) "-q" ;; quiet 0, errors/warnings only @@ -2560,10 +2565,44 @@ (if (args:get-arg "-sync-to") (let ((toppath (launch:setup))) (tasks:sync-to-postgres *configdat* (args:get-arg "-sync-to")) (set! *didsomething* #t))) +;; use with -from and -to +;; +(if (args:get-arg "-db2db") + (let* ((duh (launch:setup)) + (src-db (args:get-arg "-from")) + (dest-db (args:get-arg "-to")) + (sync-period (args:get-arg "-period")) ;; NOT IMPLEMENTED YET + (sync-timeout (args:get-arg "-timeout")) ;; NOT IMPLEMENTED YET + (lockfile (conc dest-db".sync-lock")) + (keys (db:get-keys #f)) + ) + + (if (and src-db dest-db) + (if (file-exists? src-db) + (if (file-exists? lockfile) + (debug:print 0 *default-log-port* "Lock "lockfile" exists, skipping sync...") + (begin + (with-output-to-file lockfile + (lambda () + (print (current-process-id)))) + (debug:print-info 0 *default-log-port* "Attempting to sync data from "src-db" to "dest-db"...") + (if (not (file-exists? dest-db)) + (begin + (debug:print 0 *default-log-port* "Using copy to create "dest-db" from "src-db) + (file-copy src-db dest-db)) + (let ((res (dbmod:db-to-db-sync src-db dest-db 0 (dbfile:db-init-proc) keys))) + (if res + (debug:print-info 0 *default-log-port* "Synced " res " records from "src-db" to "dest-db) + (debug:print-info 0 *default-log-port* "No sync due to permissions or other issue.")))) + (delete-file* lockfile))) + (debug:print 0 *default-log-port* "No sync due to unreadble or non-existant source file"src-db)) + (debug:print 0 *default-log-port* "Usage for -db2db; -to and -from must be specified")) + (set! *didsomething* #t))) + (if (args:get-arg "-list-test-time") (let* ((toppath (launch:setup))) (task:get-test-times) (set! *didsomething* #t))) Index: tcp-transportmod.scm ================================================================== --- tcp-transportmod.scm +++ tcp-transportmod.scm @@ -489,11 +489,15 @@ (cleanup (lambda () (if (tt-cleanup-proc ttdat) ((tt-cleanup-proc ttdat))) (dbfile:with-no-sync-db nosyncdbpath (lambda (db) - (db:no-sync-del! db dbfname)))))) + (let* ((dbtmpname (dbr:dbstruct-dbtmpname dbstruct))) + (debug:print-info 0 *default-log-port* "Running clean up, including removing db file "dbtmpname) + (db:no-sync-del! db dbfname) + #;(if dbtmpname + (delete-file dbtmpname)))))))) (set! *server-info* ttdat) (let loop ((count 0)) (if (> count 240) (begin (debug:print 0 *default-log-port* "FATAL: Could not start a tcp server, giving up.") @@ -513,14 +517,19 @@ ((null? servers) #f) ;; not ok ((equal? (list-ref (car servers) 6) ;; compare the servinfofile (tt-servinf-file ttdat)) (let* ((res (if db-locked-in #t - (let* ((success (dbfile:with-no-sync-db - nosyncdbpath - (lambda (db) - (db:no-sync-get-lock-with-id db dbfname (tt-servinf-file ttdat)))))) + (let* ((lock-result + (dbfile:with-no-sync-db + nosyncdbpath + (lambda (db) + (db:no-sync-get-lock-with-id db dbfname + ;; (tt-servinf-file ttdat) + (dbr:dbstruct-dbtmpname dbstruct) + )))) + (success (car lock-result))) (if success (begin (tt-state-set! ttdat 'running) (debug:print 0 *default-log-port* "Got server lock for " dbfname) @@ -527,12 +536,11 @@ (set! db-locked-in #t) #t) (begin (debug:print 0 *default-log-port* "Failed to get server lock for "dbfname) #f)))))) - (if (and res - (common:low-noise-print 120 "top server message")) + (if (and res (common:low-noise-print 120 "top server message")) (debug:print-info 0 *default-log-port* "Keep running, I'm the top server for " dbfname" on "(tt-host ttdat)":"(tt-port ttdat))) res)) (else (debug:print-info 0 *default-log-port* "I'm not the lead server: "servers) @@ -561,12 +569,10 @@ #t))))) (else ;; should never get here (debug:print 0 *default-log-port* "BAD SERVER RECORD: "leadsrv) (assert #f "Bad server record "leadsrv)))))))) (if ok - ;; (if (> *api-process-request-count* 0) ;; have requests in flight - ;; (tt-last-access-set! ttdat (current-seconds))) (tt-last-access-set! ttdat *db-last-access*) ;; bit silly, just use db-last-access (begin (debug:print 0 *default-log-port* "Exiting immediately") (cleanup) (exit))) @@ -644,11 +650,12 @@ (host (tt-host ttdat)) (port (tt-port ttdat)) (servinf (conc servdir"/"host":"port"-"(current-process-id)":"dbfname)) (serv-id (tt:mk-signature areapath)) (clean-proc (lambda () - (delete-file* servinf)))) + (delete-file* servinf) + ))) (assert (and host port) "FATAL: tt:create-server-registration-file called with no conn, dbfname="dbfname) (tt-cleanup-proc-set! ttdat clean-proc) (tt-servinf-file-set! ttdat servinf) (with-output-to-file servinf (lambda ()