Index: archive.scm ================================================================== --- archive.scm +++ archive.scm @@ -359,11 +359,11 @@ (archive-dir (if archive-info (cdr archive-info) #f)) (archive-id (if archive-info (car archive-info) -1)) (home-host (server:choose-server *toppath* 'homehost)) (archive-time (seconds->std-time-str (current-seconds))) (archive-staging-db (conc *toppath* "/.db-snapshot/archive_" archive-time)) - (tmp-db-path (conc (common:get-db-tmp-area) "/megatest.db")) + (tmp-db-path (conc (dbfile:make-tmpdir-name *toppath* "") "/megatest.db")) (dbfile (conc archive-staging-db "/megatest.db"))) (create-directory archive-staging-db #t) (let-values (((pid-val exit-status exit-code) (run-n-wait rsync-exe params: (list "-v" (conc (car home-host) ":"tmp-db-path) archive-staging-db) print-cmd: print-prefix))) (if (eq? exit-code 0) (case archiver Index: common.scm ================================================================== --- common.scm +++ common.scm @@ -247,11 +247,11 @@ (define *common:this-exe-fullpath* (common:get-this-exe-fullpath)) (define *common:this-exe-dir* (pathname-directory *common:this-exe-fullpath*)) (define *common:this-exe-name* (pathname-strip-directory *common:this-exe-fullpath*)) (define (common:get-sync-lock-filepath) - (let* ((tmp-area (common:get-db-tmp-area)) + (let* ((tmp-area (dbfile:make-tmpdir-name *toppath* "")) (lockfile (conc tmp-area "/megatest.db.lock"))) lockfile)) (define *common:logpro-exit-code->status-sym-alist* '( ( 0 . pass ) @@ -2280,11 +2280,11 @@ (define (common:check-db-dir-space) (let* ((required (string->number ;; default is 1GB (or actually a billion bytes) This is the number of 1 kB blocks. (or (configf:lookup *configdat* "setup" "dbdir-space-required") "1000000"))) - (dbdir (common:get-db-tmp-area)) ;; (db:get-dbdir)) + (dbdir (dbfile:make-tmpdir-name *toppath* "")) ;; (db:get-dbdir)) (tdbspace (common:check-space-in-dir dbdir required)) (mdbspace (common:check-space-in-dir *toppath* required))) (sort (list tdbspace mdbspace) (lambda (a b) (< (cadr a)(cadr b)))))) Index: dashboard.scm ================================================================== --- dashboard.scm +++ dashboard.scm @@ -402,12 +402,12 @@ (dboard:setup-tabdat dat) (dboard:setup-num-rows dat) dat)) (define (dboard:setup-tabdat tabdat) - (dboard:tabdat-dbdir-set! tabdat (db:dbfile-path)) ;; (conc (configf:lookup *configdat* "setup" "linktree") "/.db")) - (dboard:tabdat-dbfpath-set! tabdat (db:dbfile-path)) + (dboard:tabdat-dbdir-set! tabdat (dbfile:make-tmpdir-name *toppath* "")) ;; (conc (configf:lookup *configdat* "setup" "linktree") "/.db")) + (dboard:tabdat-dbfpath-set! tabdat (dbfile:make-tmpdir-name *toppath* "")) (dboard:tabdat-monitor-db-path-set! tabdat (conc (dboard:tabdat-dbdir tabdat) "/monitor.db")) ;; HACK ALERT: this is a hack, please fix. (dboard:tabdat-ro-set! tabdat (not (file-read-access? (dboard:tabdat-dbfpath tabdat)))) Index: db.scm ================================================================== --- db.scm +++ db.scm @@ -133,11 +133,11 @@ default))) (apply sqlite3:first-result db stmt params))) (define (db:setup do-sync) (assert *toppath* "FATAL: db:setup called before launch:setup has been run.") - (let* ((tmpdir (common:get-db-tmp-area))) + (let* ((tmpdir (dbfile:make-tmpdir-name *toppath* ""))) (if (not *dbstruct-dbs*) (dbfile:setup do-sync *toppath* tmpdir) *dbstruct-dbs*))) ;; moved from dbfile @@ -267,17 +267,10 @@ ", arguments: " ((condition-property-accessor 'exn 'arguments) exn) ", location: " ((condition-property-accessor 'exn 'location) exn) )) -;; NB// #f => return dbdir only -;; (was planned to be; zeroth db with name=main.db) -;; -;; If run-id is #f return to create and retrieve the path where the db will live. -;; -(define db:dbfile-path common:get-db-tmp-area) - (define (db:set-sync db) (let ((syncprag (configf:lookup *configdat* "setup" "sychronous"))) (sqlite3:execute db (conc "PRAGMA synchronous = " (or syncprag 0) ";")))) @@ -467,11 +460,11 @@ (get-mtime shm-file)))) ;; (define (db:all-db-sync dbstruct) ;; (let* ((dbdat (db:open-db dbstruct #f db:initialize-main-db)) ;; (data-synced 0) ;; count of changed records -;; (tmp-area (common:get-db-tmp-area)) +;; (tmp-area (dbfile:make-tmpdir-name *toppath*)) ;; (dbfiles (glob (conc tmp-area"/.mtdb/*.db"))) ;; (sync-durations (make-hash-table)) ;; (no-sync-db (db:open-no-sync-db))) ;; (for-each ;; (lambda (file) ;; tmp db file @@ -556,11 +549,11 @@ ;; run-ids: '(1 2 3 ...) or #f (for all) ;; (define (db:multi-db-sync dbstruct . options) (let* (;; (dbdat (db:open-db dbstruct #f dbfile:db-init-proc)) (data-synced 0) ;; count of changed records - (tmp-area (common:get-db-tmp-area)) + (tmp-area (dbfile:make-tmpdir-name *toppath* "")) (old2new (member 'old2new options)) (dejunk (member 'dejunk options)) (killservers (member 'killservers options)) (src-area (if old2new *toppath* tmp-area)) (dest-area (if old2new tmp-area *toppath*)) @@ -1254,11 +1247,11 @@ ;; no-sync.db - small bits of data to be shared between servers ;;====================================================================== (define (db:get-dbsync-path) (case (rmt:transport-mode) - ((http)(common:get-db-tmp-area)) + ((http)(dbfile:make-tmpdir-name *toppath* "")) ((tcp) (conc *toppath*"/.mtdb")) ((nfs) (conc *toppath*"/.mtdb")) (else "/tmp/dunno-this-gonna-exist"))) ;; This is needed for api.scm @@ -1580,11 +1573,11 @@ ;; TODO: Switch this to use max(update_time) from each run db? Then if using a server there is no disk traffic (using cachedb db) ??? ;; ;; NOTE: This DOESN'T (necessarily) get the real run ids, but the number of the .db!! (define (db:get-changed-run-ids since-time) - (let* ((dbdir (db:dbfile-path)) ;; (configf:lookup *configdat* "setup" "dbdir")) + (let* ((dbdir (dbfile:make-tmpdir-name *toppath* "")) ;; (configf:lookup *configdat* "setup" "dbdir")) (alldbs (glob (conc *toppath* "/.mtdb/[0-9]*.db*"))) (changed (filter (lambda (dbfile) (> (file-modification-time dbfile) since-time)) alldbs))) (delete-duplicates @@ -4314,11 +4307,11 @@ )))) ;; sync for filesystem local db writes ;; (define (db:run-lock-and-sync no-sync-db) - (let* ((tmp-area (common:get-db-tmp-area)) + (let* ((tmp-area (dbfile:make-tmpdir-name *toppath* "")) (dbfiles (glob (conc tmp-area"/.mtdb/*.db"))) (sync-durations (make-hash-table))) ;; (debug:print-info 0 *default-log-port* "lock-and-sync, dbfiles: "dbfiles) (for-each (lambda (file) @@ -4370,11 +4363,11 @@ (sync-stale-seconds (configf:lookup-number *configdat* "server" "sync-stale-seconds" default: 300)) (debug-mode (debug:debug-mode 1)) (last-time (current-seconds)) ;; last time through the sync loop (no-sync-db (db:open-no-sync-db)) (sync-duration 0) ;; run time of the sync in milliseconds - (tmp-area (common:get-db-tmp-area))) + (tmp-area (dbfile:make-tmpdir-name *toppath* ""))) ;; Sync moved to http-transport keep-running loop (debug:print-info 2 *default-log-port* "Periodic copy-based sync thread started. syncer is copy-sync, tmp-area is " tmp-area) (debug:print-info 3 *default-log-port* "watchdog starting. syncer is copy-sync pid="(current-process-id));; " this-wd-num="this-wd-num) (if (and legacy-sync (not *time-to-exit*)) @@ -4478,11 +4471,11 @@ (for-each (lambda (subdb) (let* (;;(dbstruct (db:setup)) (mtdb (dbr:subdb-mtdb subdb)) (mtpath (db:dbdat-get-path mtdb)) - (tmp-area (common:get-db-tmp-area)) + (tmp-area (dbfile:make-tmpdir-name *toppath* "")) (res (db:sync-to-megatest.db dbstruct no-sync-db: no-sync-db))) ;; did we sync any data? If so need to set the db touched flag to keep the server alive (set! sync-duration (- (current-milliseconds) sync-start)) (if (> res 0) ;; some records were transferred, keep the db alive (begin (mutex-lock! *heartbeat-mutex*) Index: dbfile.scm ================================================================== --- dbfile.scm +++ dbfile.scm @@ -242,11 +242,12 @@ #f ) ) (define (dbfile:make-tmpdir-name areapath tmpadj) - (let* ((dname (conc "/tmp/"(current-user-name)"/" (string-translate areapath "/" ".") tmpadj))) + (let* ((area (pathname-file areapath)) + (dname (conc "/tmp/"(current-user-name)"/megatest_localdb/" area "/" (string-translate areapath "/" ".") tmpadj "/.mtdb"))) (unless (directory-exists? dname) (create-directory dname #t)) dname)) (define (dbfile:run-id->path apath run-id) @@ -487,11 +488,11 @@ ;; NOTE: this is already protected by mutex *no-sync-db-mutex* ;; (define (dbfile:raw-open-no-sync-db dbpath) (if (not (file-exists? dbpath)) (create-directory dbpath #t)) - (debug:print-info 0 *default-log-port* "Opening "dbpath"/no-sync.db") + (debug:print-info 2 *default-log-port* "(dbfile:raw-open-no-sync-db: Opening "dbpath"/no-sync.db") (let* ((dbname (conc dbpath "/no-sync.db")) (db-exists (file-exists? dbname)) (init-proc (lambda (db) (sqlite3:with-transaction db @@ -675,20 +676,26 @@ ;; fails returns (#f lock-creation-time identifier) ;; succeeds (returns (#t lock-creation-time identifier) ;; use (db:no-sync-del! db keyname) to release the lock ;; (define (db:no-sync-get-lock-with-id db keyname identifier) + (debug:print 2 *default-log-port* "db:no-sync-get-lock-with-id: db: " db " keyname: " keyname " identifier: " identifier) (sqlite3:with-transaction db (lambda () (condition-case (let* ((curr-val (db:no-sync-get/default db keyname #f))) + (debug:print 2 *default-log-port* "db:no-sync-get-lock-with-id: curr-val: " curr-val) (if curr-val (match (db:extract-time-identifier curr-val) ;; result->timestamp, identifier ((timestamp . ident) (cons (equal? ident identifier) timestamp)) - (else (cons #f 'malformed-lock))) ;; lock malformed + (else + (debug:print 2 *default-log-port* "db:no-sync-get-lock-with-id: malformed lock") + (cons #f 'malformed-lock) + ) + ) ;; lock malformed (let ((curr-sec (current-seconds)) (lock-value (if identifier (conc (current-seconds)"+"identifier) (current-seconds)))) (sqlite3:execute db "INSERT OR REPLACE INTO no_sync_metadat (var,val) VALUES(?,?);" keyname lock-value) Index: dbmod.scm ================================================================== --- dbmod.scm +++ dbmod.scm @@ -224,11 +224,11 @@ (dbr:dbstruct-dbtmpname-set! dbstruct tmpdb) (dbr:dbstruct-dbfname-set! dbstruct dbfname) (dbr:dbstruct-sync-proc-set! dbstruct (lambda (last-update) (if *sync-in-progress* - (debug:print 3 *default-log-port* "WARNING: overlapping calls to sync to disk") + (debug:print 0 *default-log-port* "WARNING: overlapping calls to sync to disk") (let* ((syncer-logfile (conc areapath"/logs/"dbfname"-syncer.log")) (sync-cmd (if (eq? syncdir 'todisk) (conc "(NBFAKE_LOG="syncer-logfile" nbfake megatest -db2db -from "tmpdb" -to "dbfullname" -period 5 -timeout 10 > /dev/null 2&>1)&") (conc "(NBFAKE_LOG="syncer-logfile" nbfake megatest -db2db -from "dbfullname" -to "tmpdb" -period 5 -timeout 10 > /dev/null 2&>1)&"))) (synclock-file (conc dbfullname".lock")) @@ -242,11 +242,11 @@ (thethread (lambda () (thread-start! (make-thread (lambda () (set! *sync-in-progress* #t) - (debug:print-info "Running "sync-cmd) + (debug:print 0 *default-log-port* "Running "sync-cmd) (if (file-exists? syncer-running-file) (debug:print-info 0 *default-log-port* "Syncer still running, skipping syncer start.") (system sync-cmd)) (set! *sync-in-progress* #f))))))) (if ((if (eq? syncdir 'todisk) < >) ;; use less than for todisk, greater than for from disk @@ -474,47 +474,63 @@ (set! has-last #t))) dbh (conc "SELECT name FROM pragma_table_info('"tablename"') as tblInfo;")) has-last)) +(define (replace-question-marks-with-number str num) + (define (replace-helper str index result) + (if (>= index (string-length str)) + result + (let ((char (string-ref str index))) + (if (char=? char #\?) + (replace-helper str (+ index 1) (string-append result (number->string num))) + (replace-helper str (+ index 1) (string-append result (string char))))))) + + (replace-helper str 0 "")) + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; tbls is ( ("tablename" ( "field1" [#f|proc1] ) ( "field2" [#f|proc2] ) .... ) ) ;; ;; direction = fromdest, todisk ;; mode = 'full, 'incr ;; ;; Idea: youngest in dest is last_update time -;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + (define (dbmod:attach-sync tables dbh destdbfile direction #!key (mode 'full) (no-update '("keys")) ;; do ) - (let* ((num-changes 0) + (debug:print-info 2 *default-log-port* "dbmod:attach-sync") + (let* ((num-changes 0) (update-changed (lambda (num-changed table qryname) (if (> num-changed 0) (begin (debug:print-info 0 *default-log-port* "Changed "num-changed" rows for table "table", qry "qryname) (set! num-changes (+ num-changes num-changed))))))) - (debug:print 0 *default-log-port* "Doing sync "direction" "destdbfile) + (debug:print 2 *default-log-port* "Doing sync "direction" "destdbfile) (if (not (sqlite3:auto-committing? dbh)) (debug:print 0 *default-log-port* "Skipping sync due to transaction in flight.") (let* ((table-names (map car tables)) (dest-exists (file-exists? destdbfile))) (assert dest-exists "FATAL: sync called with non-existant file, "destdbfile) ;; attach the destdbfile ;; for each table ;; insert into dest. select * from src.
where last_update>last_update ;; done - (debug:print 0 *default-log-port* "Attaching "destdbfile" as auxdb") + (debug:print 2 *default-log-port* "Attaching "destdbfile" as auxdb") (handle-exceptions exn (begin (debug:print 0 "ATTACH failed, exiting. exn="(condition->list exn)) (exit 1)) (sqlite3:execute dbh (conc "ATTACH '"destdbfile"' AS auxdb;"))) (for-each (lambda (table) - (let* ((tbldat (alist-ref table tables equal?)) + (let* ((dummy (debug:print 2 *default-log-port* "Doing table " table)) + (tbldat (alist-ref table tables equal?)) (fields (map car tbldat)) (no-id-fields (filter (lambda (x)(not (equal? x "id"))) fields)) (fields-str (string-intersperse fields ",")) (no-id-fields-str (string-intersperse no-id-fields ",")) (dir (eq? direction 'todisk)) @@ -529,27 +545,38 @@ " SELECT * FROM "fromdb table";")) (stmt2 (conc "INSERT OR IGNORE INTO "todb table " SELECT * FROM "fromdb table" WHERE "fromdb table".id=?;")) (stmt8 (conc "UPDATE "todb table" SET ("no-id-fields-str") = (SELECT "no-id-fields-str" FROM "fromdb table" WHERE "todb table".id="fromdb table".id" (conc " AND "fromdb table".last_update > "todb table".last_update);") - ");")) - (stmt9 (conc "UPDATE "todb table" SET ("no-id-fields-str") = " + ");")) + (update-string (conc "UPDATE "todb table" SET ")) + (split-update + (let () + (for-each + (lambda (column) + (set! update-string (conc update-string column" = (SELECT "column" FROM "fromdb table" WHERE "fromdb table".id=?), ")) + ) + no-id-fields + ) + ;; drop the last ", " + (conc (substring update-string 0 (-(string-length update-string) 2)) " WHERE "todb table".id=? ") + ) + ) + + + (stmt9 (conc "UPDATE "todb table" SET ("no-id-fields-str") = " "(SELECT "no-id-fields-str" FROM "fromdb table" WHERE "fromdb table".id=?)" " WHERE "todb table".id=?")) (newrec (conc "SELECT id FROM "fromdb table" WHERE id NOT IN (SELECT id FROM "todb table");")) - #;(changedrec (conc "SELECT id FROM "fromdb table" WHERE "fromdb table".last_update > "todb table".last_update AND " - fromdb table".id="todb table".id;")) ;; main = fromdb (changedrec (conc "SELECT "fromdb table".id FROM "fromdb table" join "todb table" on "fromdb table".id="todb table".id WHERE "fromdb table".last_update > "todb table".last_update;")) - ;; SELECT main.tests.id FROM main.tests join auxdb.tests on main.tests.id=auxdb.tests.id WHERE main.tests.last_update > auxdb.tests.last_update;" (start-ms (current-milliseconds)) (new-ids (sqlite3:fold-row (lambda (res id)(cons id res)) '() dbh newrec))) - ;; (debug:print 0 *default-log-port* "Got "(length aux-ids)" in aux-ids and "(length main-ids)" in main-ids") (update-changed (length new-ids) table "new records") (mutex-lock! *db-transaction-mutex*) (handle-exceptions exn - (debug:print 0 *default-log-port* "Transaction update of "table" failed.") + (debug:print 0 *default-log-port* "Transaction update of id fields in "table" failed.") (sqlite3:with-transaction dbh (lambda () (for-each (lambda (id) (sqlite3:execute dbh stmt2 id)) @@ -556,23 +583,40 @@ new-ids)))) (if (member "last_update" fields) (handle-exceptions exn - (debug:print 0 *default-log-port* "Transaction update of "table" failed.") + (debug:print 0 *default-log-port* "Transaction update of non id fields in "table" failed.") (sqlite3:with-transaction dbh (lambda () - (let* ((changed-ids (sqlite3:fold-row (lambda (res id)(cons id res)) '() dbh changedrec))) + (let* ((changed-ids (sqlite3:fold-row (lambda (res id)(cons id res)) '() dbh changedrec)) + (sql-query "") + ) (update-changed (length changed-ids) table "changed records") (for-each (lambda (id) - (sqlite3:execute dbh stmt9 id id)) - changed-ids)))))) - + (let* ((update-with-ids (replace-question-marks-with-number split-update id)) + ) + (debug:print 2 *default-log-port* "about to do sqlite3:execute " dbh " " update-with-ids ) + (handle-exceptions + exn + (debug:print 0 *default-log-port* "update from " fromdb table " to " todb table " failed: " ((condition-property-accessor 'exn 'message) exn)) + (sqlite3:execute dbh update-with-ids) + ) + (debug:print 2 *default-log-port* "after sqlite3:execute") + ) + ) + changed-ids + ) + ) + ) + ) + ) + ) (mutex-unlock! *db-transaction-mutex*) - (debug:print 0 *default-log-port* "Synced table "table + (debug:print 2 *default-log-port* "Synced table "table " in "(- (current-milliseconds) start-ms)"ms") )) table-names) (sqlite3:execute dbh "DETACH auxdb;"))) Index: megatest.scm ================================================================== --- megatest.scm +++ megatest.scm @@ -968,11 +968,11 @@ (tl (launch:setup)) (keys (keys:config-get-fields *configdat*))) (case (rmt:transport-mode) ((tcp) (let* ((timeout (server:expiration-timeout))) - (debug:print 0 *default-log-port* "INFO: Running using tcp method with server timeout of "timeout) + (debug:print 0 *default-log-port* "INFO: Starting server for " dbfname " using tcp method with server timeout of "timeout) (tt-server-timeout-param timeout) (if dbfname (tt:start-server tl #f dbfname api:tcp-dispatch-request-make-handler keys) (begin (debug:print 0 *default-log-port* "ERROR: transport mode is tcp - -db is required.") @@ -1092,10 +1092,12 @@ ) ) ) dbfiles ) + ;; remove this db, because otherwise metadata contains records for old servers, and this causes a problem with db:no-sync-get-lock-with-id. + (delete-file (conc *toppath* "/.mtdb/no-sync.db")) (set! *didsomething* #t) (exit) ) ) @@ -2132,13 +2134,13 @@ (exit 1))) (if (common:file-exists? (conc *toppath* "/megatest.db")) (begin (debug:print-info 1 *default-log-port* "File " (conc *toppath* "/megatest.db") " already exists. Please remove it before trying to replicate db") (exit 1))) - (if (and (common:get-db-tmp-area) (> (length (directory (common:get-db-tmp-area) #f)) 0)) + (if (and (dbfile:make-tmpdir-name *toppath* "") (> (length (directory (dbfile:make-tmpdir-name *toppath* "") #f)) 0)) (begin - (debug:print-info 1 *default-log-port* (common:get-db-tmp-area) " not empty. Please remove it before trying to replicate db") + (debug:print-info 1 *default-log-port* (dbfile:make-tmpdir-name *toppath* "") " not empty. Please remove it before trying to replicate db") (exit 1))) ;; check if timestamp (let* ((source (args:get-arg "-source")) (src (if (not (equal? (substring source 0 1) "/")) (conc (current-directory) "/" source) @@ -2633,10 +2635,11 @@ (if (args:get-arg "-sync-to") (let ((toppath (launch:setup))) (tasks:sync-to-postgres *configdat* (args:get-arg "-sync-to")) (set! *didsomething* #t))) + ;; use with -from and -to ;; (if (args:get-arg "-db2db") (let* ((duh (launch:setup)) @@ -2650,18 +2653,19 @@ (sync-timeout (if sync-timeout-in (string->number sync-timeout-in) #f)) (lockfile (conc dest-db".sync-lock")) (keys (db:get-keys #f)) (thesync (lambda (last-update) (debug:print-info 0 *default-log-port* "Attempting to sync data from "src-db" to "dest-db"...") + (debug:print-info 0 *default-log-port* "PID = " (current-process-id)) (if (not (file-exists? dest-db)) (begin (debug:print 0 *default-log-port* "Using copy to create "dest-db" from "src-db) (file-copy src-db dest-db) 1) (let ((res (dbmod:db-to-db-sync src-db dest-db last-update (dbfile:db-init-proc) keys))) (if res - (debug:print-info 0 *default-log-port* "Synced " res " records from "src-db" to "dest-db) + (debug:print-info 2 *default-log-port* "Synced " res " records from "src-db" to "dest-db) (debug:print-info 0 *default-log-port* "No sync due to permissions or other issue.")) res)))) (start-time (current-seconds)) (synclock-mod-time (if (file-exists? lockfile) (handle-exceptions @@ -2673,11 +2677,17 @@ ) (if (and src-db dest-db) (if (file-exists? src-db) (if (and (file-exists? lockfile) (< age 20)) (debug:print 0 *default-log-port* "Lock "lockfile" exists, skipping sync...") - (begin + (begin + (if (file-exists? lockfile) + (begin + (debug:print 0 *default-log-port* "Deleting old lock file " lockfile) + (delete-file lockfile) + ) + ) (dbfile:with-simple-file-lock lockfile (lambda () (let loop ((last-changed (current-seconds)) (last-update 0)) @@ -2694,11 +2704,11 @@ (> sync-timeout (- now-time last-changed))) (begin (if sync-period (thread-sleep! sync-period)) (loop (if (> changes 0) now-time last-changed) now-time)))))))) (debug:print 0 *default-log-port* "Releasing lock file " lockfile) - ) + ) ) (debug:print 0 *default-log-port* "No sync due to unreadble or non-existant source file"src-db)) (debug:print 0 *default-log-port* "Usage for -db2db; -to and -from must be specified")) (set! *didsomething* #t))) Index: tcp-transportmod.scm ================================================================== --- tcp-transportmod.scm +++ tcp-transportmod.scm @@ -132,26 +132,27 @@ ;; do all the busy work of finding and setting up conn for ;; connecting to a server ;; (define (tt:client-connect-to-server ttdat dbfname run-id testsuite) (assert (tt:valid-run-id run-id dbfname) "FATAL: invalid run-id "run-id) + (debug:print-info 2 *default-log-port* "tt:client-connect-to-server " dbfname " " run-id) (let* ((conn (hash-table-ref/default (tt-conns ttdat) dbfname #f)) (server-start-proc (lambda () (tt:server-process-run (tt-areapath ttdat) testsuite ;; (dbfile:testsuite-name) (common:find-local-megatest) run-id)))) (if conn (begin - ; (debug:print-info 0 *default-log-port* "already connected to the server") + (debug:print-info 2 *default-log-port* "already connected to a server") conn) ;; we are already connected to the server (let* ((sdat (tt:get-current-server-info ttdat dbfname))) (match sdat ((host port start-time server-id pid dbfname2 servinffile) (assert (equal? dbfname dbfname2) "FATAL: read server info from wrong file.") - ;(debug:print-info 0 *default-log-port* "in match servinffile:" servinffile) + (debug:print-info 2 *default-log-port* "no conn - in match servinffile:" servinffile) (let* ((host-port (conc host":"port)) (conn (make-tt-conn host: host port: port host-port: host-port @@ -162,34 +163,38 @@ pid: pid))) ;; verify we can talk to this server (let* ((result (tt:timed-ping host port server-id)) (ping-res (car result)) (ping (cdr result))) - (debug:print-info 0 *default-log-port* "ping time: " ping) + (debug:print-info 2 *default-log-port* "host " host " port " port " ping time: " ping " result " ping-res) (case ping-res ((running) + (debug:print-info 2 *default-log-port* "Setting conn = " conn " in hash table") (hash-table-set! (tt-conns ttdat) dbfname conn) ;;; is this ok to save before validating that the connection is good? conn) ((starting) (thread-sleep! 0.5) + (debug:print-info 2 *default-log-port* "server was in starting state, retrying connect") (tt:client-connect-to-server ttdat dbfname run-id testsuite)) (else (let* ((curr-secs (current-seconds))) ;; rm the (last server) would go here (if (> (- curr-secs (tt-last-serv-start ttdat)) 10) (begin (tt-last-serv-start-set! ttdat curr-secs) (server-start-proc))) ;; start server if 30 sec since last attempt (thread-sleep! 1) + (debug:print-info 2 *default-log-port* "server ping result was neither running nor starting. Retrying connect") (tt:client-connect-to-server ttdat dbfname run-id testsuite))))))) (else ;; no good server found, if haven't started server in > 5 secs, start another (if (> (- (current-seconds) (tt-last-serv-start ttdat)) 5) ;; BUG - grow this number really do not want to swamp the machine with servers (begin (debug:print-info 0 *default-log-port* "No server found. Starting one for run-id "run-id" in dbfile "dbfname) (server-start-proc) (tt-last-serv-start-set! ttdat (current-seconds)))) (thread-sleep! 1) + (debug:print-info 2 *default-log-port* "no good server found, try connect again") (tt:client-connect-to-server ttdat dbfname run-id testsuite))))))) (define (tt:timed-ping host port server-id) (let* ((start-time (current-milliseconds)) (result (tt:ping host port server-id))) @@ -223,10 +228,11 @@ ;; client side handler ;; ;;(tt:handler # get-keys #f () 2 #f "/home/matt/data/megatest/ext-tests" #f "main.db" "ext-tests" "/home/matt/data/megatest/bin/.22.04/../megatest") ;; (define (tt:handler ttdat cmd run-id params attemptnum area-dat areapath readonly-mode dbfname testsuite mtexe) + (debug:print 2 *default-log-port* "tt:handler cmd: " cmd " run-id: " run-id " attemptnum: " attemptnum) ;; NOTE: areapath is passed in and in tt struct. We'll use passed in value for now. (let* ((conn (tt:client-connect-to-server ttdat dbfname run-id testsuite))) ;; (hash-table-ref/default (tt-conns ttdat) dbfname #f))) (if conn ;; have connection, call the server (let* ((res (tt:send-receive ttdat conn cmd run-id params))) @@ -464,10 +470,11 @@ ;; Server viability is checked in keep-running. Blindly start and run here. ;; (define (tt:start-server areapath run-id dbfname-in handler keys) (assert areapath "FATAL: areapath not provided for tt:start-server") ;; is there already a server for this dbfile? Then exit. + (debug:print 2 *default-log-port* "tt:start-server: " dbfname-in) (let* ((ttdat (make-tt areapath: areapath)) (dbfname (or dbfname-in (dbmod:run-id->dbfname run-id))) (servers (tt:find-server areapath dbfname))) ;; should use tt:get-current-server-info instead (if (> (length servers) 4) (begin