Index: Makefile ================================================================== --- Makefile +++ Makefile @@ -28,11 +28,11 @@ ezsteps.scm lock-queue.scm rmt.scm api.scm \ subrun.scm portlogger.scm archive.scm env.scm \ diff-report.scm cgisetup/models/pgdb.scm # module source files -MSRCFILES = dbmod.scm dbfile.scm debugprint.scm mtargs.scm +MSRCFILES = dbmod.scm dbfile.scm debugprint.scm mtargs.scm commonmod.scm mofiles/dbfile.o : mofiles/debugprint.o mofiles/debugprint.o : mofiles/mtargs.o # ftail.scm rmtmod.scm commonmod.scm removed @@ -154,11 +154,12 @@ $(INSTALL) mt-pg.sql $(PREFIX)/share/db/mt-pg.sql # Special dependencies for the includes $(MOFILE) $(MOIMPFILES) : megatest-fossil-hash.scm -# common.o : mofiles/commonmod.o megatest-fossil-hash.scm +common.o : mofiles/commonmod.o megatest-fossil-hash.scm + # mofiles/dbmod.o : mofiles/configfmod.o # commonmod.o dashboard.o megatest.o tcmt.o apimod.o : megatest-fossil-hash.scm tests.o db.o launch.o runs.o dashboard-tests.o \ @@ -166,11 +167,11 @@ monitor.o dashboard.o archive.o megatest.o : db_records.scm megatest-fossil-hash.scm tests.o runs.o dashboard.o dashboard-tests.o dashboard-context-menu.o dashboard-main.o : run_records.scm db.o ezsteps.o keys.o launch.o megatest.o monitor.o runs-for-ref.o runs.o tests.o : key_records.scm -db.o api.o : mofiles/dbmod.o mofiles/dbfile.o mofiles/debugprint.o +db.o api.o : mofiles/dbmod.o mofiles/dbfile.o mofiles/debugprint.o mofiles/commonmod.o tests.o tasks.o dashboard-tasks.o : task_records.scm runs.o : test_records.scm @@ -177,10 +178,12 @@ megatest.o : megatest-fossil-hash.scm megatest-version.scm rmt.scm client.scm common.scm configf.scm dashboard-guimonitor.scm dashboard-tests.scm dashboard.scm db.scm dcommon.scm ezsteps.scm http-transport.scm index-tree.scm items.scm keys.scm launch.scm megatest.scm monitor.scm mt.scm newdashboard.scm runconfig.scm runs.scm server.scm tdb.scm tests.scm tree.scm : common_records.scm megatest-version.scm common_records.scm : altdb.scm + +mofiles/dbfile.o : mofiles/commonmod.o # mofiles/stml2.o : mofiles/cookie.o # configf.o : mofiles/commonmod.o vg.o dashboard.o : vg_records.scm megatest-version.scm Index: api.scm ================================================================== --- api.scm +++ api.scm @@ -147,17 +147,17 @@ ;; ;; - returns #( flag result ) ;; (define (api:execute-requests dbstruct dat) (db:open-no-sync-db) ;; sets *no-sync-db* - (handle-exceptions - exn - (let ((call-chain (get-call-chain))) - (debug:print 0 *default-log-port* "WARNING: api:execute-requests received an exception from peer, dat=" dat ", exn=" exn) - (print-call-chain (current-error-port)) - (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) - (vector #f (vector exn call-chain dat))) ;; return some stuff for debug if an exception happens +;; (handle-exceptions +;; exn +;; (let ((call-chain (get-call-chain))) +;; (debug:print 0 *default-log-port* "WARNING: api:execute-requests received an exception from peer, dat=" dat ", exn=" exn) +;; (print-call-chain (current-error-port)) +;; (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) +;; (vector #f (vector exn call-chain dat))) ;; return some stuff for debug if an exception happens (cond ((not (vector? dat)) ;; it is an error to not receive a vector (vector #f (vector #f "remote must be called with a vector"))) ((> *api-process-request-count* 200) ;; 20) (debug:print 0 *default-log-port* "WARNING: api:execute-requests received an overloaded message.") @@ -239,11 +239,11 @@ ((test-data-rollup) (apply db:test-data-rollup dbstruct params)) ((csv->test-data) (apply db:csv->test-data dbstruct params)) ;; MISC ((sync-inmem->db) (let ((run-id (car params))) - (db:sync-touched dbstruct run-id force-sync: #t))) + (db:sync-touched dbstruct run-id db:initialize-main-db force-sync: #t))) ((mark-incomplete) (apply db:find-and-mark-incomplete dbstruct params)) ((create-all-triggers) (db:create-all-triggers dbstruct)) ((drop-all-triggers) (db:drop-all-triggers dbstruct)) ;; TESTMETA @@ -375,11 +375,11 @@ (vector #f res)) (begin #;(common:telemetry-log (conc "api-out:"(->string cmd)) payload: `((params . ,params) (ok-res . #f))) - (vector #t res)))))))) + (vector #t res))))))) ;; ) ;; http-server send-response ;; api:process-request ;; db:* ;; Index: client.scm ================================================================== --- client.scm +++ client.scm @@ -71,12 +71,18 @@ ;; ;; client:setup ;; ;; lookup_server, need to remove *runremote* stuff ;; - + (define (client:setup-http areapath #!key (remaining-tries 100) (failed-connects 0)(area-dat #f)) + (mutex-lock! *rmt-mutex*) + (let ((res (client:setup-http-baby areapath remaining-tries: remaining-tries failed-connects: failed-connects area-dat: area-dat))) + (mutex-unlock! *rmt-mutex*) + res)) + +(define (client:setup-http-baby areapath #!key (remaining-tries 100) (failed-connects 0)(area-dat #f)) (debug:print-info 2 *default-log-port* "client:setup remaining-tries=" remaining-tries) (server:start-and-wait areapath) (if (<= remaining-tries 0) (begin (debug:print-error 0 *default-log-port* "failed to start or connect to server") @@ -86,11 +92,11 @@ ;; through them searching for a good one. ;; (let* ((server-dat (server:get-rand-best areapath)) ;; (server:get-first-best areapath)) (runremote (or area-dat *runremote*))) (if (not server-dat) ;; no server found - (client:setup-http areapath remaining-tries: (- remaining-tries 1)) + (client:setup-http-baby areapath remaining-tries: (- remaining-tries 1)) (let ((host (cadr server-dat)) (port (caddr server-dat)) (server-id (caddr (cddr server-dat)))) (debug:print-info 4 *default-log-port* "client:setup server-dat=" server-dat ", remaining-tries=" remaining-tries) (if (and (not area-dat) @@ -108,23 +114,26 @@ (ping-res (case *transport-type* ((http)(rmt:login-no-auto-client-setup start-res))))) (if (and start-res ping-res) (let ((runremote (or area-dat *runremote*))) ;; it might have been generated only a few statements ago - (remote-conndat-set! runremote start-res) ;; (hash-table-set! runremote run-id start-res) - (debug:print-info 2 *default-log-port* "connected to " (http-transport:server-dat-make-url start-res)) - start-res) + (if runremote + (begin + (remote-conndat-set! runremote start-res) ;; (hash-table-set! runremote run-id start-res) + (debug:print-info 2 *default-log-port* "connected to " (http-transport:server-dat-make-url start-res)) + start-res) + (client:setup-http-baby areapath remaining-tries: (- remaining-tries 1)))) (begin ;; login failed but have a server record, clean out the record and try again (debug:print-info 0 *default-log-port* "client:setup, login unsuccessful, will attempt to start server ... start-res=" start-res ", server-dat=" server-dat) ;; had runid. Fixes part of Randy;s ticket 1405717332 (case *transport-type* ((http)(http-transport:close-connections))) (remote-conndat-set! runremote #f) ;; (hash-table-delete! runremote run-id) (thread-sleep! 1) - (client:setup-http areapath remaining-tries: (- remaining-tries 1)) + (client:setup-http-baby areapath remaining-tries: (- remaining-tries 1)) ))) (begin ;; no server registered ;; (server:kind-run areapath) (server:start-and-wait areapath) (debug:print-info 0 *default-log-port* "client:setup, no server registered, remaining-tries=" remaining-tries) (thread-sleep! 1) ;; (+ 5 (random (- 20 remaining-tries)))) ;; give server a little time to start up, randomize a little to avoid start storms. - (client:setup-http areapath remaining-tries: (- remaining-tries 1))))))))) + (client:setup-http-baby areapath remaining-tries: (- remaining-tries 1))))))))) Index: common.scm ================================================================== --- common.scm +++ common.scm @@ -26,12 +26,12 @@ (prefix sqlite3 sqlite3:) pkts (prefix dbi dbi:) ) (declare (unit common)) -;; (declare (uses commonmod)) -;; (import commonmod) +(declare (uses commonmod)) +(import commonmod) (include "common_records.scm") ;; (require-library margs) @@ -129,11 +129,11 @@ (define *test-meta-updated* (make-hash-table)) (define *globalexitstatus* 0) ;; attempt to work around possible thread issues (define *passnum* 0) ;; when running track calls to run-tests or similar ;; (define *alt-log-file* #f) ;; used by -log -(define *common:denoise* (make-hash-table)) ;; for low noise printing +;; (define *common:denoise* (make-hash-table)) ;; for low noise printing (define *default-log-port* (current-error-port)) (define *time-zero* (current-seconds)) ;; for the watchdog (define *default-area-tag* "local") ;; DATABASE @@ -141,22 +141,22 @@ ;; db stats (define *db-stats* (make-hash-table)) ;; hash of vectors < count duration-total > (define *db-stats-mutex* (make-mutex)) ;; db access (define *db-last-access* (current-seconds)) ;; last db access, used in server -(define *db-write-access* #t) +;; (define *db-write-access* #t) ;; db sync -(define *db-last-sync* 0) ;; last time the sync to megatest.db happened +;; (define *db-last-sync* 0) ;; last time the sync to megatest.db happened (define *db-sync-in-progress* #f) ;; if there is a sync in progress do not try to start another -(define *db-multi-sync-mutex* (make-mutex)) ;; protect access to *db-sync-in-progress*, *db-last-sync* +;; (define *db-multi-sync-mutex* (make-mutex)) ;; protect access to *db-sync-in-progress*, *db-last-sync* ;; task db (define *task-db* #f) ;; (vector db path-to-db) (define *db-access-allowed* #t) ;; flag to allow access ;; (define *db-access-mutex* (make-mutex)) ;; moved to dbfile (define *db-transaction-mutex* (make-mutex)) (define *db-cache-path* #f) -(define *db-with-db-mutex* (make-mutex)) +;; (define *db-with-db-mutex* (make-mutex)) (define *db-api-call-time* (make-hash-table)) ;; hash of command => (list of times) ;; no sync db ;; (define *no-sync-db* #f) ;; moved to dbfile ;; SERVER @@ -172,12 +172,12 @@ (define *run-id* #f) (define *server-kind-run* (make-hash-table)) (define *home-host* #f) ;; (define *total-non-write-delay* 0) (define *heartbeat-mutex* (make-mutex)) -(define *api-process-request-count* 0) -(define *max-api-process-requests* 0) +;; (define *api-process-request-count* 0) +;; (define *max-api-process-requests* 0) (define *server-overloaded* #f) ;; client (define *rmt-mutex* (make-mutex)) ;; remote access calls mutex @@ -310,10 +310,11 @@ (hh-dat (common:get-homehost)) ;; homehost record ( addr . hhflag ) (server-url #f) ;; (server:check-if-running *toppath*) #f)) (server-id #f) (server-info (if *toppath* (server:check-if-running *toppath*) #f)) (last-server-check 0) ;; last time we checked to see if the server was alive + (connect-time (current-seconds)) (conndat #f) (transport *transport-type*) (server-timeout (server:expiration-timeout)) (force-server #f) (ro-mode #f) @@ -725,54 +726,10 @@ (print-call-chain (current-error-port)) #f) (read (open-input-string (base64:base64-decode instr)))) (read (open-input-string (z3:decode-buffer (base64:base64-decode instr)))))) -;; dot-locking egg seems not to work, using this for now -;; if lock is older than expire-time then remove it and try again -;; to get the lock -;; -(define (common:simple-file-lock fname #!key (expire-time 300)) - (let ((fmod-time (handle-exceptions - ext - (current-seconds) - (file-modification-time fname)))) - (if (common:file-exists? fname) - (if (> (- (current-seconds) fmod-time) expire-time) - (begin - (handle-exceptions exn #f (delete-file* fname)) - (common:simple-file-lock fname expire-time: expire-time)) - #f) - (let ((key-string (conc (get-host-name) "-" (current-process-id)))) - (with-output-to-file fname - (lambda () - (print key-string))) - (thread-sleep! 0.25) - (if (common:file-exists? fname) - (handle-exceptions exn - #f - (with-input-from-file fname - (lambda () - (equal? key-string (read-line))))) - #f))))) - -(define (common:simple-file-lock-and-wait fname #!key (expire-time 300)) - (let ((end-time (+ expire-time (current-seconds)))) - (let loop ((got-lock (common:simple-file-lock fname expire-time: expire-time))) - (if got-lock - #t - (if (> end-time (current-seconds)) - (begin - (thread-sleep! 3) - (loop (common:simple-file-lock fname expire-time: expire-time))) - #f))))) - -(define (common:simple-file-release-lock fname) - (handle-exceptions - exn - #f ;; I don't really care why this failed (at least for now) - (delete-file* fname))) ;;====================================================================== ;; S T A T E S A N D S T A T U S E S ;;====================================================================== Index: commonmod.scm ================================================================== --- commonmod.scm +++ commonmod.scm @@ -17,16 +17,19 @@ ;; along with Megatest. If not, see . ;;====================================================================== (declare (unit commonmod)) + +(use srfi-69) (module commonmod * - + (import scheme chicken data-structures extras files) -(import (prefix sqlite3 sqlite3:) posix typed-records srfi-18 srfi-69 +(import (prefix sqlite3 sqlite3:) + posix typed-records srfi-18 srfi-69 md5 message-digest regex srfi-1) ;;====================================================================== ;; CONTENTS @@ -44,10 +47,21 @@ (conc megatest-version "-" megatest-fossil-hash)) (define (version-signature) (conc megatest-version "-" (substring megatest-fossil-hash 0 4))) +(define *common:denoise* (make-hash-table)) ;; for low noise printing + +(define (common:low-noise-print waitval . keys) + (let* ((key (string-intersperse (map conc keys) "-" )) + (lasttime (hash-table-ref/default *common:denoise* key 0)) + (currtime (current-seconds))) + (if (> (- currtime lasttime) waitval) + (begin + (hash-table-set! *common:denoise* key currtime) + #t) + #f))) ;;====================================================================== ;; config file utils ;;====================================================================== @@ -72,10 +86,55 @@ '()))) ;; should it return empty list or #f to indicate not set? (define (get-section cfgdat section) (hash-table-ref/default cfgdat section '())) + +;; dot-locking egg seems not to work, using this for now +;; if lock is older than expire-time then remove it and try again +;; to get the lock +;; +(define (common:simple-file-lock fname #!key (expire-time 300)) + (let ((fmod-time (handle-exceptions + ext + (current-seconds) + (file-modification-time fname)))) + (if (file-exists? fname) ;; (common:file-exists? fname) + (if (> (- (current-seconds) fmod-time) expire-time) + (begin + (handle-exceptions exn #f (delete-file* fname)) + (common:simple-file-lock fname expire-time: expire-time)) + #f) + (let ((key-string (conc (get-host-name) "-" (current-process-id)))) + (with-output-to-file fname + (lambda () + (print key-string))) + (thread-sleep! 0.25) + (if (file-exists? fname) ;; (common:file-exists? fname) + (handle-exceptions exn + #f + (with-input-from-file fname + (lambda () + (equal? key-string (read-line))))) + #f))))) + +(define (common:simple-file-lock-and-wait fname #!key (expire-time 300)) + (let ((end-time (+ expire-time (current-seconds)))) + (let loop ((got-lock (common:simple-file-lock fname expire-time: expire-time))) + (if got-lock + #t + (if (> end-time (current-seconds)) + (begin + (thread-sleep! 3) + (loop (common:simple-file-lock fname expire-time: expire-time))) + #f))))) + +(define (common:simple-file-release-lock fname) + (handle-exceptions + exn + #f ;; I don't really care why this failed (at least for now) + (delete-file* fname))) ;;====================================================================== ;; misc conversion, data manipulation functions ;;====================================================================== Index: dashboard.scm ================================================================== --- dashboard.scm +++ dashboard.scm @@ -26,10 +26,11 @@ (use canvas-draw) (import canvas-draw-iup) (use ducttape-lib) (use sqlite3 srfi-1 posix regex regex-case srfi-69 typed-records sparse-vectors) ;; defstruct (import (prefix sqlite3 sqlite3:)) +(import dbfile) (declare (uses common)) (declare (uses margs)) (declare (uses keys)) (declare (uses items)) @@ -45,18 +46,21 @@ (declare (uses dashboard-context-menu)) (declare (uses vg)) (declare (uses subrun)) ;; (declare (uses dashboard-main)) (declare (uses mt)) +(declare (uses dbfile)) (include "common_records.scm") (include "db_records.scm") (include "run_records.scm") (include "task_records.scm") (include "megatest-version.scm") (include "megatest-fossil-hash.scm") (include "vg_records.scm") + +(dbfile:db-init-proc db:initialize-main-db) (define help (conc "Megatest Dashboard, documentation at http://www.kiatoa.com/fossils/megatest version " megatest-version " license GPL, Copyright (C) Matt Welland 2012-2017 Index: db.scm ================================================================== --- db.scm +++ db.scm @@ -71,10 +71,11 @@ ;; (defstruct dbr:counts (state #f) (status #f) (count 0)) + ;;====================================================================== ;; hash of hashs ;;====================================================================== @@ -125,23 +126,16 @@ (debug:print-error 0 *default-log-port* " query " stmt " failed, params: " params ", error: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn) (print-call-chain (current-error-port)) default))) (apply sqlite3:first-result db stmt params))) -(define (db:generic-error-printout exn . message) - (print-call-chain (current-error-port)) - (apply debug:print-error 0 *default-log-port* message) - (debug:print-error 0 *default-log-port* ;; " params: " params - ", error: " ((condition-property-accessor 'exn 'message) exn) - ", arguments: " ((condition-property-accessor 'exn 'arguments) exn) - ", location: " ((condition-property-accessor 'exn 'location) exn) - )) - (define (db:setup do-sync) (assert *toppath* "FATAL: db:setup called before launch:setup has been run.") (let* ((tmpdir (common:get-db-tmp-area))) - (dbfile:setup do-sync *toppath* tmpdir))) + (if (not *dbstruct-dbs*) + (dbfile:setup do-sync *toppath* tmpdir) + *dbstruct-dbs*))) ;; looks up subdb and returns it, if not found then set up ;; and then return it. ;; #;(define (db:get-db dbstruct run-id) @@ -194,59 +188,10 @@ ", error: " ((condition-property-accessor 'exn 'message) exn) ", arguments: " ((condition-property-accessor 'exn 'arguments) exn) ", location: " ((condition-property-accessor 'exn 'location) exn) )) -(define (db:open-db dbstruct run-id) - (let* ((dbdat (dbfile:open-db dbstruct run-id db:initialize-main-db))) - (set! *db-write-access* (not (dbr:dbdat-read-only dbdat))) - dbdat)) - -;; (db:with-db dbstruct run-id sqlite3:exec "select blah fgrom blaz;") -;; r/w is a flag to indicate if the db is modified by this query #t = yes, #f = no -;; -(define (db:with-db dbstruct run-id r/w proc . params) - (let* ((have-struct (dbr:dbstruct? dbstruct)) - (dbdat (if have-struct ;; this stuff just allows us to call with a db handle directly - (db:open-db dbstruct run-id) ;; (dbfile:get-subdb dbstruct run-id) - #f)) - (db (if have-struct ;; this stuff just allows us to call with a db handle directly - (dbr:dbdat-dbh dbdat) - dbstruct)) - (fname (if dbdat - (dbr:dbdat-dbfile dbdat) - "nofilenameavailable")) - #;(subdb (if have-struct - (dbfile:get-subdb dbstruct run-id) - #f)) - (use-mutex (> *api-process-request-count* 25))) ;; was 25 - (if (and use-mutex - (common:low-noise-print 120 "over-50-parallel-api-requests")) - (debug:print-info 0 *default-log-port* *api-process-request-count* " parallel api requests being processed in process " (current-process-id) ", throttling access")) - (if (common:low-noise-print 600 (conc "parallel-api-requests" *max-api-process-requests*)) - (debug:print-info 1 *default-log-port* "Parallel api request count: " *api-process-request-count* " max parallel requests: " *max-api-process-requests*)) - (condition-case - (begin - (if use-mutex (mutex-lock! *db-with-db-mutex*)) - (let ((res (apply proc dbdat db params))) - (if use-mutex (mutex-unlock! *db-with-db-mutex*)) - ;; (if (vector? dbstruct)(db:done-with dbstruct run-id r/w)) - (if dbdat - (dbfile:add-dbdat dbstruct run-id dbdat)) - res)) - (exn (io-error) - (db:generic-error-printout exn "ERROR: i/o error with " fname ". Check permissions, disk space etc. and try again.")) - (exn (corrupt) - (db:generic-error-printout exn "ERROR: database " fname " is corrupt. Repair it to proceed.")) - (exn (busy) - (db:generic-error-printout exn "ERROR: database " fname - " is locked. Try copying to another location, remove original and copy back.")) - (exn (permission)(db:generic-error-printout exn "ERROR: database " fname " has some permissions problem.")) - (exn () - (db:generic-error-printout exn "ERROR: Unknown error with database " fname " message: " - ((condition-property-accessor 'exn 'message) exn)))))) - ;; NB// #f => return dbdir only ;; (was planned to be; zeroth db with name=main.db) ;; ;; If run-id is #f return to create and retrieve the path where the db will live. @@ -430,31 +375,10 @@ (if (and dbexists (not write-access)) (set! *db-write-access* #f)) ;; (cons db dbpath))) (make-dbr:dbdat dbfile: dbpath dbh: db read-only: (not write-access)))) -;; sync run from tmp disk to nfs disk if touched -;; -(define (db:sync-touched dbstruct run-id #!key (force-sync #f)) - (debug:print-info 0 *default-log-port* "db:sync-touched Syncing: " (conc (if run-id run-id "main") ".db")) - (let* (;; the subdb is needed to access the mtdbdat - (subdb (or (dbfile:get-subdb dbstruct run-id) - (dbfile:init-subdb dbstruct run-id db:initialize-main-db))) - (tmpdbfile (dbr:subdb-tmpdbfile subdb)) - (mtdb (dbr:subdb-mtdbdat subdb)) - (tmpdb (dbfile:open-db dbstruct run-id db:initialize-main-db)) ;; sqlite3-db tmpdbfile #f)) - (start-t (current-seconds))) - (mutex-lock! *db-multi-sync-mutex*) - (let ((update_info (cons "last_update" (if force-sync 0 *db-last-sync*) ))) - (mutex-unlock! *db-multi-sync-mutex*) - (db:sync-tables (db:sync-all-tables-list dbstruct) update_info tmpdb mtdb)) - (mutex-lock! *db-multi-sync-mutex*) - (set! *db-last-sync* start-t) - (set! *db-last-access* start-t) - (mutex-unlock! *db-multi-sync-mutex*) - (dbfile:add-dbdat dbstruct run-id tmpdb) - #t)) ;; db:safely-close-sqlite3-db and db:close-all were here, moved to dbfile ;; (let ((locdbs (dbr:dbstruct-locdbs dbstruct))) @@ -468,110 +392,10 @@ ;; (handler (make-busy-timeout 3600))) ;; (sqlite3:set-busy-handler! db handler) ;; (db:initialize-run-id-db db) ;; (cons db #f))) -;; just tests, test_steps and test_data tables -(define db:sync-tests-only - (list - ;; (list "strs" - ;; '("id" #f) - ;; '("str" #f)) - (list "tests" - '("id" #f) - '("run_id" #f) - '("testname" #f) - '("host" #f) - '("cpuload" #f) - '("diskfree" #f) - '("uname" #f) - '("rundir" #f) - '("shortdir" #f) - '("item_path" #f) - '("state" #f) - '("status" #f) - '("attemptnum" #f) - '("final_logf" #f) - '("logdat" #f) - '("run_duration" #f) - '("comment" #f) - '("event_time" #f) - '("fail_count" #f) - '("pass_count" #f) - '("archived" #f) - '("last_update" #f)) - (list "test_steps" - '("id" #f) - '("test_id" #f) - '("stepname" #f) - '("state" #f) - '("status" #f) - '("event_time" #f) - '("comment" #f) - '("logfile" #f) - '("last_update" #f)) - (list "test_data" - '("id" #f) - '("test_id" #f) - '("category" #f) - '("variable" #f) - '("value" #f) - '("expected" #f) - '("tol" #f) - '("units" #f) - '("comment" #f) - '("status" #f) - '("type" #f) - '("last_update" #f)))) - -;; needs db to get keys, this is for syncing all tables -;; -(define (db:sync-main-list dbstruct) - (let ((keys (db:get-keys dbstruct))) - (list - (list "keys" - '("id" #f) - '("fieldname" #f) - '("fieldtype" #f)) - (list "metadat" '("var" #f) '("val" #f)) - (append (list "runs" - '("id" #f)) - (map (lambda (k)(list k #f)) - (append keys - (list "runname" "state" "status" "owner" "event_time" "comment" "fail_count" "pass_count" "contour" "last_update")))) - (list "archive_disks" - '("id" #f) - '("archive_area_name" #f) - '("disk_path" #f) - '("last_df" #f) - '("last_df_time" #f) - '("creation_time" #f)) - - (list "archive_blocks" - '("id" #f) - '("archive_disk_id" #f) - '("disk_path" #f) - '("last_du" #f) - '("last_du_time" #f) - '("creation_time" #f)) - - (list "test_meta" - '("id" #f) - '("testname" #f) - '("owner" #f) - '("description" #f) - '("reviewed" #f) - '("iterated" #f) - '("avg_runtime" #f) - '("avg_disk" #f) - '("tags" #f) - '("jobgroup" #f))))) - -(define (db:sync-all-tables-list dbstruct) - (append (db:sync-main-list dbstruct) - db:sync-tests-only)) - ;; use bunch of Unix commands to try to break the lock and recreate the db ;; (define (db:move-and-recreate-db dbdat) (let* ((dbpath (dbr:dbdat-dbfile dbdat)) (dbdir (pathname-directory dbpath)) @@ -645,240 +469,10 @@ (sqlite3:execute db "vacuum;"))) (sqlite3:finalize! db) #t)))))) -;; tbls is ( ("tablename" ( "field1" [#f|proc1] ) ( "field2" [#f|proc2] ) .... ) ) -;; db's are dbdat's -;; -;; if last-update specified ("field-name" . time-in-seconds) -;; then sync only records where field-name >= time-in-seconds -;; IFF field-name exists -;; -(define (db:sync-tables tbls last-update fromdb todb . slave-dbs) - (handle-exceptions - exn - (begin - (debug:print 0 *default-log-port* "EXCEPTION: database probably overloaded or unreadable in db:sync-tables.") - (print-call-chain (current-error-port)) - (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) - (debug:print 0 *default-log-port* "exn=" (condition->list exn)) - (debug:print 0 *default-log-port* " status: " ((condition-property-accessor 'sqlite3 'status) exn)) - (debug:print 0 *default-log-port* " src db: " (dbr:dbdat-dbfile fromdb)) - (for-each (lambda (dbdat) - (let ((dbpath (dbr:dbdat-dbfile dbdat))) - (debug:print 0 *default-log-port* " dbpath: " dbpath) - (if (not (db:repair-db dbdat)) - (begin - (debug:print-error 0 *default-log-port* "Failed to rebuild " dbpath ", exiting now.") - (exit))))) - (cons todb slave-dbs)) - - 0) - - ;; this is the work to be done") - (cond - ((not fromdb) (debug:print 0 *default-log-port* "WARNING: db:sync-tables called with fromdb missing") - -1) - ((not todb) (debug:print 0 *default-log-port* "WARNING: db:sync-tables called with todb missing") - -2) - ((not (sqlite3:database? (dbr:dbdat-dbh fromdb))) - (debug:print-error 0 *default-log-port* "db:sync-tables called with fromdb not a database " fromdb) - -3) - ((not (sqlite3:database? (dbr:dbdat-dbh todb))) - (debug:print-error 0 *default-log-port* "db:sync-tables called with todb not a database " todb) - -4) - - ((not (file-write-access? (dbr:dbdat-dbfile todb))) - (debug:print-error 0 *default-log-port* "db:sync-tables called with todb not a read-only database " todb) - -5) - ((not (null? (let ((readonly-slave-dbs - (filter - (lambda (dbdat) - (not (file-write-access? (dbr:dbdat-dbfile todb)))) - slave-dbs))) - (for-each - (lambda (bad-dbdat) - (debug:print-error - 0 *default-log-port* "db:sync-tables called with todb not a read-only database " bad-dbdat)) - readonly-slave-dbs) - readonly-slave-dbs))) -6) - (else - (debug:print 3 *default-log-port* "db:sync-tables: args are good") - - (let ((stmts (make-hash-table)) ;; table-field => stmt - (all-stmts '()) ;; ( ( stmt1 value1 ) ( stml2 value2 )) - (numrecs (make-hash-table)) - (start-time (current-milliseconds)) - (tot-count 0)) - (for-each ;; table - (lambda (tabledat) - (let* ((tablename (car tabledat)) - (fields (cdr tabledat)) - (has-last-update (member "last_update" fields)) - (use-last-update (cond - ((and has-last-update - (member "last_update" fields)) - #t) ;; if given a number, just use it for all fields - ((number? last-update) #f) ;; if not matched first entry then ignore last-update for this table - ((and (pair? last-update) - (member (car last-update) ;; last-update field name - (map car fields))) - #t) - ((and last-update (not (pair? last-update)) (not (number? last-update))) - (debug:print 0 *default-log-port* "ERROR: parameter last-update for db:sync-tables must be a pair or a number, received: " last-update);; found in fields - #f) - (else - #f))) - (last-update-value (if use-last-update ;; no need to check for has-last-update - it is already accounted for - (if (number? last-update) - last-update - (cdr last-update)) - #f)) - (last-update-field (if use-last-update - (if (number? last-update) - "last_update" - (car last-update)) - #f)) - (num-fields (length fields)) - (field->num (make-hash-table)) - (num->field (apply vector (map car fields))) ;; BBHERE - (full-sel (conc "SELECT " (string-intersperse (map car fields) ",") - " FROM " tablename (if use-last-update ;; apply last-update criteria - (conc " WHERE " last-update-field " >= " last-update-value) - "") - ";")) - (full-ins (conc "INSERT OR REPLACE INTO " tablename " ( " (string-intersperse (map car fields) ",") " ) " - " VALUES ( " (string-intersperse (make-list num-fields "?") ",") " );")) - (fromdat '()) - (fromdats '()) - (totrecords 0) - (batch-len (string->number (or (configf:lookup *configdat* "sync" "batchsize") "100"))) - (todat (make-hash-table)) - (count 0) - (field-names (map car fields)) - (delay-handicap (string->number (or (configf:lookup *configdat* "sync" "delay-handicap") "0"))) - ) - - ;; set up the field->num table - (for-each - (lambda (field) - (hash-table-set! field->num field count) - (set! count (+ count 1))) - fields) - - ;; read the source table - ;; store a list of all rows in the table in fromdat, up to batch-len. - ;; Then add fromdat to the fromdats list, clear fromdat and repeat. - (sqlite3:for-each-row - (lambda (a . b) - (set! fromdat (cons (apply vector a b) fromdat)) - (if (> (length fromdat) batch-len) - (begin - (set! fromdats (cons fromdat fromdats)) - (set! fromdat '()) - (set! totrecords (+ totrecords 1))) - ) - ) - (dbr:dbdat-dbh fromdb) - full-sel) - - ;; Count less than batch-len as a record - (if (> (length fromdat) 0) - (set! totrecords (+ totrecords 1))) - - ;; tack on remaining records in fromdat - (if (not (null? fromdat)) - (set! fromdats (cons fromdat fromdats))) - - (if (common:low-noise-print 120 "sync-records") - (debug:print 0 *default-log-port* "found " totrecords " records to sync")) - - (sqlite3:for-each-row - (lambda (a . b) - (hash-table-set! todat a (apply vector a b))) - (dbr:dbdat-dbh todb) - full-sel) - - (when (and delay-handicap (> delay-handicap 0)) - (debug:print-info 0 *default-log-port* "imposing synthetic sync delay of "delay-handicap" seconds since sync/delay-handicap is configured") - (thread-sleep! delay-handicap) - (debug:print-info 0 *default-log-port* "synthetic sync delay of "delay-handicap" seconds completed") - ) - - ;; first pass implementation, just insert all changed rows - - (for-each - (lambda (targdb) - (let* ((db (dbr:dbdat-dbh targdb)) - (drp-trigger (if (member "last_update" field-names) - (db:drop-trigger db tablename) - #f)) - (is-trigger-dropped (if (member "last_update" field-names) - (db:is-trigger-dropped db tablename) - #f)) - (stmth (sqlite3:prepare db full-ins)) - (changed-rows 0)) - ;; (db:delay-if-busy targdb) ;; NO WAITING - (if (member "last_update" field-names) - (debug:print-info 3 *default-log-port* "is-trigger-dropped: " is-trigger-dropped)) - - (for-each - (lambda (fromdat-lst) - (sqlite3:with-transaction - db - (lambda () - (for-each ;; - (lambda (fromrow) - (let* ((a (vector-ref fromrow 0)) - (curr (hash-table-ref/default todat a #f)) - (same #t)) - (let loop ((i 0)) - (if (or (not curr) - (not (equal? (vector-ref fromrow i)(vector-ref curr i)))) - (set! same #f)) - (if (and same - (< i (- num-fields 1))) - (loop (+ i 1)))) - (if (not same) - (begin - (apply sqlite3:execute stmth (vector->list fromrow)) - (hash-table-set! numrecs tablename (+ 1 (hash-table-ref/default numrecs tablename 0))) - (set! changed-rows (+ changed-rows 1)) - ) - ) - )) - fromdat-lst)))) - fromdats) - - - (if (> changed-rows 0) - (debug:print 0 *default-log-port* "table " tablename " changed rows: " changed-rows) - ) - - - (sqlite3:finalize! stmth) - (if (member "last_update" field-names) - (db:create-trigger db tablename)))) - (append (list todb) slave-dbs) - ) - ) - ) - tbls) - (let* ((runtime (- (current-milliseconds) start-time)) - (should-print (or (debug:debug-mode 12) - (common:low-noise-print 120 "db sync" (> runtime 500))))) ;; low and high sync times treated as separate. - (if should-print (debug:print 0 *default-log-port* "INFO: db sync, total run time " runtime " ms")) - (for-each - (lambda (dat) - (let ((tblname (car dat)) - (count (cdr dat))) - (set! tot-count (+ tot-count count)) - (if (> count 0) - (if should-print (debug:print 0 *default-log-port* (format #f " ~10a ~5a" tblname count)))))) - (sort (hash-table->alist numrecs)(lambda (a b)(> (cdr a)(cdr b)))))) - tot-count))))) (define (db:patch-schema-rundb frundb) ;; ;; remove this some time after September 2016 (added in version v1.6031 ;; @@ -1039,11 +633,11 @@ ;; (lambda () ;; (if (and (common:file-exists? megatest-db) ;; (file-write-access? megatest-db)) ;; (begin ;; (db:sync-to-megatest.db dbstruct 'timestamps) ;; internally mutexes on *db-local-sync* -;; (debug:print-info 2 *default-log-port* "Done syncing to megatest.db")))) +;; (debug:print-info 2 *default-log-port* "Done syncing to megatest.db")))) ;; "call-with-cached-db sync-to-megatest.db")) ;; (cache-db (db:cache-for-read-only ;; megatest-db ;; (conc cache-dir "/" fname) ;; use-last-update: #t))) @@ -1051,37 +645,13 @@ ;; (apply proc cache-db params) ;; )))) - -;; Get a lock from the no-sync-db for the from-db, then delta sync the from-db to the to-db, otherwise return #f - -(define (db:lock-and-delta-sync no-sync-db dbstruct from-db-file runid) - (assert (not *db-sync-in-progress*) "FATAL: db:lock-and-sync called while a sync is in progress.") - (debug:print-info 3 *default-log-port* "db:lock-and-delta-sync") - (let* ((lockdat (db:no-sync-get-lock no-sync-db from-db-file)) - (gotlock (car lockdat)) - (locktime (cdr lockdat))) - - (debug:print-info 3 *default-log-port* "db:lock-and-delta-sync: got lock?") - (if gotlock - (begin - (debug:print 0 *default-log-port* "db:lock-and-delta-sync copying db "runid" at "(current-seconds)) - (db:sync-touched dbstruct runid) - (db:no-sync-del! no-sync-db from-db-file) - #t) - (begin - (debug:print 0 *default-log-port* "could not get lock for " from-db-file " from no-sync-db") - #f - )))) - - - (define (db:all-db-sync dbstruct) - (let* ((dbdat (db:open-db dbstruct #f)) + (let* ((dbdat (db:open-db dbstruct #f db:initialize-main-db)) (data-synced 0) ;; count of changed records (tmp-area (common:get-db-tmp-area)) (dbfiles (glob (conc tmp-area"/.db/*.db"))) (sync-durations (make-hash-table)) (no-sync-db (db:open-no-sync-db))) @@ -1117,11 +687,11 @@ (fname (pathname-file file)) (runid (if (string= fname "main") #f (string->number fname))) ) (debug:print-info 3 *default-log-port* "db:all-db-sync: fname: " fname", delta: " (- time1 time2) " seconds") - (db:lock-and-delta-sync no-sync-db dbstruct fname runid) + (db:lock-and-delta-sync no-sync-db dbstruct fname runid (db:get-keys dbstruct) db:initialize-main-db) (hash-table-set! sync-durations (conc fname".db") (- (current-milliseconds) start-time))) (debug:print-info 3 *default-log-port* "skipping sync. " file " is up to date") ))) dbfiles @@ -1142,125 +712,67 @@ ;; (define (db:multi-db-sync dbstruct . options) (let* ((dbdat (db:open-db dbstruct #f)) (data-synced 0) ;; count of changed records (tmp-area (common:get-db-tmp-area)) - (dbfiles (glob (conc tmp-area"/.db/*.db"))) - (sync-durations (make-hash-table))) - (for-each - (lambda (file) - (debug:print-info 0 *default-log-port* "file: " file) - (let* ((fname (conc (pathname-file file) ".db")) - (fulln (conc *toppath*"/.db/"fname)) - (time1 (if (file-exists? file) - (file-modification-time file) - (begin - (debug:print-info 0 *default-log-port* "Sync - I do not see file "file) - 1))) - (time2 (if (file-exists? fulln) - (file-modification-time fulln) - (begin - (debug:print-info 0 *default-log-port* "Sync - I do not see file "fulln) + (old2new (member 'old2new options)) + (src-area (if old2new *toppath* tmp-area)) + (dest-area (if old2new tmp-area *toppath*)) + (dbfiles (glob (conc tmp-area"/.db/*.db"))) + (keys (db:get-keys dbstruct)) + (sync-durations (make-hash-table))) + + (for-each + (lambda (srcfile) + (debug:print-info 0 *default-log-port* "file: " srcfile) + (let* ((fname (conc (pathname-file srcfile) ".db")) + (basename (pathname-file srcfile)) + (run-id (if (string= basename "main") #f (string->number basename))) + (destfile (conc dest-area "/.db/" fname)) + (time1 (file-modification-time srcfile)) + + (time2 (if (file-exists? destfile) + (file-modification-time destfile) + (begin + (debug:print-info 0 *default-log-port* "Sync - I do not see file " destfile) 0))) (changed (> time1 time2)) - (do-cp (cond - ((not (file-exists? fulln)) ;; shouldn't happen, but this might recover - (debug:print-info 0 *default-log-port* "File "fulln" not found! Copying "fname" to "fulln) + + (do-cp (cond + ((not (file-exists? destfile)) ;; shouldn't happen, but this might recover + (debug:print-info 0 *default-log-port* "File " destfile " not found! Copying "srcfile" to "destfile) #t) (changed ;; (and changed ;; (> (- (current-seconds) time1) 3)) ;; if file is changed and three seconds have passed. #t) ((and changed *time-to-exit*) ;; last sync #t) (else #f)))) - (if do-cp - (let* ((start-time (current-milliseconds))) - (debug:print-info 0 *default-log-port* "delta sync delta file: " fname", delta: " (- time1 time2) " seconds") - (db:lock-and-delta-sync *no-sync-db* file fulln) - (hash-table-set! sync-durations (conc fname".db") (- (current-milliseconds) start-time))) - (debug:print-info 0 *default-log-port* "skipping delta sync. " file " is up to date") - ) + (if do-cp + (let* ( + (start-time (current-milliseconds)) + + (subdb (or (dbfile:get-subdb dbstruct run-id) (dbfile:init-subdb dbstruct run-id dbfile:db-init-proc))) + (mtdb (dbr:subdb-mtdbdat subdb)) + (tmpdb (dbfile:open-db dbstruct run-id dbfile:db-init-proc)) + + ) + (debug:print-info 0 *default-log-port* "delta syncing file: " srcfile ", time diff: " (- time1 time2) " seconds") + (if old2new + (db:sync-tables (db:sync-all-tables-list dbstruct (db:get-keys dbstruct)) #f mtdb tmpdb) + (db:sync-tables (db:sync-all-tables-list dbstruct (db:get-keys dbstruct)) #f tmpdb mtdb) + ) + (hash-table-set! sync-durations (conc srcfile ".db") (- (current-milliseconds) start-time))) + (debug:print-info 0 *default-log-port* "skipping delta sync. " srcfile " is up to date") + ) ) ) dbfiles ) - - - (hash-table->alist sync-durations) - - - - (debug:print 0 *default-log-port* "db:multi-db-sync subdbs: " (hash-table-values (dbr:dbstruct-subdbs dbstruct))) - (for-each - (lambda (subdb) - (let* ((mtdb (dbr:subdb-mtdbdat subdb)) - (tmpdbfile (dbr:subdb-tmpdbfile subdb)) - (main-tmpdb (dbfile:open-db dbstruct #f db:initialize-main-db)) - (allow-cleanup #t) ;; (if run-ids #f #t)) - (servers (server:get-list *toppath*)) ;; (tasks:get-all-servers (db:delay-if-busy tdbdat))) - ) - (debug:print 0 *default-log-port* "db:multi-db-sync mtdb: " mtdb " tmpdbfile:" tmpdbfile ) - (for-each - (lambda (option) - - (case option - ;; kill servers - ((killservers) - (for-each - (lambda (server) - (handle-exceptions - exn - (begin - (debug:print-info 0 *default-log-port* "Unable to get host and/or port from " server ", exn=" exn) - #f) - (match-let (((mod-time host port start-time server-id pid) server)) - (if (and host pid) - (tasks:kill-server host pid))))) - servers) - - ;; /tmp/bjbarcla/megatest_localdb/fullrun/.nfs.pdx.disks.icf_env_disk001.bjbarcla.gwa.issues.mtdev.matt-bisect.megatest.ext-tests.runs.all.v1.65.1.6524.dev.bb-24-justrollup-f8.rollup.fullrun/megatest.db.lock - (delete-file* (common:get-sync-lock-filepath))) - - ;; clear out junk records - ;; - ((dejunk) - ;; (db:delay-if-busy mtdb) ;; ok to delay on mtdb - (when (file-write-access? (dbr:dbdat-dbfile mtdb)) (db:clean-up mtdb)) - (db:clean-up main-tmpdb) - ) - ;; sync from main dbs to /tmp ones. - ;; - ((old2new) - (set! data-synced - (+ (db:sync-tables (db:sync-all-tables-list dbstruct) #f mtdb main-tmpdb) - data-synced))) - - ;; sync from /tmp dbs to main ones. - ;; - ((new2old) - (set! data-synced - (+ (db:sync-tables (db:sync-all-tables-list dbstruct) #f main-tmpdb mtdb) - data-synced))) - - ((adj-target) - (db:adj-target (dbr:dbdat-dbh mtdb)) - (db:adj-target (dbr:dbdat-dbh main-tmpdb)) - ) - - ((schema) - (db:patch-schema-maindb (dbr:dbdat-dbh mtdb)) - (db:patch-schema-maindb (dbr:dbdat-dbh main-tmpdb)) - (db:patch-schema-rundb (dbr:dbdat-dbh mtdb)) - (db:patch-schema-rundb (dbr:dbdat-dbh main-tmpdb)) - ) - ) - (dbfile:add-dbdat dbstruct #f main-tmpdb)) - options))) - (hash-table-values (dbr:dbstruct-subdbs dbstruct))) - (if dbdat (dbfile:add-dbdat dbstruct #f dbdat)) - data-synced) + data-synced + ) ) ;; Sync all changed db's ;; (define (db:tmp->megatest.db-sync dbstruct run-id last-update) @@ -1270,11 +782,11 @@ (lambda (subdb) (let* ((dbname (db:run-id->dbname run-id)) (mtdb (dbr:subdb-mtdb subdb)) (tmpdb (db:get-subdb dbstruct run-id)) (refndb (dbr:subdb-refndb subdb)) - (newres (db:sync-tables (db:sync-all-tables-list dbstruct) last-update tmpdb refndb mtdb))) + (newres (db:sync-tables (db:sync-all-tables-list dbstruct (db:get-keys dbstruct)) last-update tmpdb refndb mtdb))) ;; (stack-push! (dbr:subdb-dbstack subdb) tmpdb) (dbfile:add-dbdat dbstruct run-id tmpdb) (set! res (cons newres res)))) subdbs) res)) @@ -1357,99 +869,10 @@ #;(define open-run-close open-run-close-exception-handling) ;; open-run-close-no-exception-handling ;; open-run-close-exception-handling) ;;) -(define db:trigger-list - (list (list "update_runs_trigger" "CREATE TRIGGER IF NOT EXISTS update_runs_trigger AFTER UPDATE ON runs - FOR EACH ROW - BEGIN - UPDATE runs SET last_update=(strftime('%s','now')) - WHERE id=old.id; - END;" ) - (list "update_run_stats_trigger" "CREATE TRIGGER IF NOT EXISTS update_run_stats_trigger AFTER UPDATE ON run_stats - FOR EACH ROW - BEGIN - UPDATE run_stats SET last_update=(strftime('%s','now')) - WHERE id=old.id; - END;" ) - (list "update_tests_trigger" "CREATE TRIGGER IF NOT EXISTS update_tests_trigger AFTER UPDATE ON tests - FOR EACH ROW - BEGIN - UPDATE tests SET last_update=(strftime('%s','now')) - WHERE id=old.id; - END;" ) - (list "update_teststeps_trigger" "CREATE TRIGGER IF NOT EXISTS update_teststeps_trigger AFTER UPDATE ON test_steps - FOR EACH ROW - BEGIN - UPDATE test_steps SET last_update=(strftime('%s','now')) - WHERE id=old.id; - END;" ) - (list "update_test_data_trigger" "CREATE TRIGGER IF NOT EXISTS update_test_data_trigger AFTER UPDATE ON test_data - FOR EACH ROW - BEGIN - UPDATE test_data SET last_update=(strftime('%s','now')) - WHERE id=old.id; - END;" ))) -;; -;; ADD run-id SUPPORT -;; -(define (db:create-all-triggers dbstruct) -(db:with-db - dbstruct #f #f - (lambda (dbdat db) -(db:create-triggers db)))) - -(define (db:create-triggers db) - (for-each (lambda (key) - (sqlite3:execute db (cadr key))) - db:trigger-list)) - -(define (db:drop-all-triggers dbstruct) - (db:with-db - dbstruct #f #f - (lambda (dbdat db) - (db:drop-triggers db)))) - -(define (db:is-trigger-dropped db tbl-name) - (let* ((trigger-name (if (equal? tbl-name "test_steps") - "update_teststeps_trigger" - (conc "update_" tbl-name "_trigger"))) - (res #f)) - (sqlite3:for-each-row - (lambda (name) - (if (equal? name trigger-name) - (set! res #t))) - db - "SELECT name FROM sqlite_master WHERE type = 'trigger' ;" - ))) - -(define (db:drop-triggers db) - (for-each - (lambda (key) - (sqlite3:execute db (conc "drop trigger if exists " (car key)))) - db:trigger-list)) - -(define (db:drop-trigger db tbl-name) - (let* ((trigger-name (if (equal? tbl-name "test_steps") - "update_teststeps_trigger" - (conc "update_" tbl-name "_trigger")))) - (for-each - (lambda (key) - (if (equal? (car key) trigger-name) - (sqlite3:execute db (conc "drop trigger if exists " trigger-name)))) - db:trigger-list))) - -(define (db:create-trigger db tbl-name) - (let* ((trigger-name (if (equal? tbl-name "test_steps") - "update_teststeps_trigger" - (conc "update_" tbl-name "_trigger")))) - (for-each (lambda (key) - (if (equal? (car key) trigger-name) - (sqlite3:execute db (cadr key)))) - db:trigger-list))) - (define (db:initialize-main-db db) (when (not *configinfo*) (launch:setup)) ;; added because Elena was getting stack dump because *configinfo* below was #f. (let* ((configdat (car *configinfo*)) ;; tut tut, global warning... @@ -2308,10 +1731,11 @@ ;; using keys:config-get-fields? (define (db:get-keys dbstruct) (keys:config-get-fields *configdat*) ) + ;; (if *db-keys* *db-keys* ;; (let ((res '())) ;; (db:with-db dbstruct #f #f ;; (lambda (dbdat db) ;; (sqlite3:for-each-row @@ -2438,16 +1862,18 @@ (andstr (if (> (length keys) 0) " AND " "")) (valslots (keys->valslots keys)) ;; ?,?,? ... (allvals (append (list runname state status user contour) (map cadr keyvals))) (qryvals (append (list runname) (map cadr keyvals))) (key=?str (string-intersperse (map (lambda (k)(conc k "=?")) keys) " AND "))) + (debug:print 0 *default-log-port* "Got here 0.") (debug:print 3 *default-log-port* "keys: " keys " allvals: " allvals " keyvals: " keyvals " key=?str is " key=?str) (debug:print 2 *default-log-port* "NOTE: using target " (string-intersperse (map cadr keyvals) "/") " for this run") (if (and runname (null? (filter (lambda (x)(not x)) keyvals))) ;; there must be a better way to "apply and" (db:with-db dbstruct #f #f (lambda (dbdat db) + (debug:print 0 *default-log-port* "Got here 1.") (let ((res #f)) (apply sqlite3:execute db (conc "INSERT OR IGNORE INTO runs (runname,state,status,owner,event_time,contour" comma keystr ") VALUES (?,?,?,?,strftime('%s','now'),?" comma valslots ");") allvals) (apply sqlite3:for-each-row (lambda (id) Index: dbfile.scm ================================================================== --- dbfile.scm +++ dbfile.scm @@ -18,28 +18,29 @@ ;;====================================================================== (declare (unit dbfile)) ;; (declare (uses debugprint)) -;; (declare (uses commonmod)) +(declare (uses commonmod)) (module dbfile * (import scheme chicken data-structures - extras) + extras + matchable) (import (prefix sqlite3 sqlite3:) - posix typed-records srfi-18 + posix typed-records srfi-18 srfi-1 srfi-69 stack files ports - ;; commonmod + commonmod ) ;; (import debugprint) ;;====================================================================== @@ -77,15 +78,32 @@ ;; need to keep dbhandles and cached statements together (defstruct dbr:dbdat (dbfile #f) (dbh #f) (stmt-cache (make-hash-table)) - (read-only #f)) + (read-only #f) + (birth-sec (current-seconds))) (define *dbstruct-dbs* #f) (define *db-access-mutex* (make-mutex)) (define *no-sync-db* #f) +(define *db-sync-in-progress* #f) +(define *db-with-db-mutex* (make-mutex)) +(define *max-api-process-requests* 0) +(define *api-process-request-count* 0) +(define *db-write-access* #t) +(define *db-last-sync* 0) ;; last time the sync to megatest.db happened +(define *db-multi-sync-mutex* (make-mutex)) ;; protect access to *db-sync-in-progress*, *db-last-sync* + +(define (db:generic-error-printout exn . message) + (print-call-chain (current-error-port)) + (apply dbfile:print-err message) + (dbfile:print-err + ", error: " ((condition-property-accessor 'exn 'message) exn) + ", arguments: " ((condition-property-accessor 'exn 'arguments) exn) + ", location: " ((condition-property-accessor 'exn 'location) exn) + )) (define (dbfile:run-id->key run-id) (or run-id 'main)) (define (db:safely-close-sqlite3-db db stmt-cache #!key (try-num 3)) @@ -186,11 +204,11 @@ ;; called in http-transport and replicated in rmt.scm for *local* access. ;; (define (dbfile:setup do-sync areapath tmppath) (cond (*dbstruct-dbs* - ;; (dbfile:print-err "WARNING: dbfile:setup called when *dbstruct-dbs* is already initialized") + (dbfile:print-err "WARNING: dbfile:setup called when *dbstruct-dbs* is already initialized") *dbstruct-dbs*) ;; TODO: when multiple areas are supported, this optimization will be a hazard (else (let* ((dbstruct (make-dbr:dbstruct))) (set! *dbstruct-dbs* dbstruct) (dbr:dbstruct-areapath-set! dbstruct areapath) @@ -222,13 +240,20 @@ (set! *dbfile:num-handles-in-use* (+ *dbfile:num-handles-in-use* 1)) (stack-pop! (dbr:subdb-dbstack subdb)))))) ;; return a previously opened db handle to the stack of available handles (define (dbfile:add-dbdat dbstruct run-id dbdat) - (let* ((subdb (dbfile:get-subdb dbstruct run-id))) - (set! *dbfile:num-handles-in-use* (- *dbfile:num-handles-in-use* 1)) - (stack-push! (dbr:subdb-dbstack subdb) dbdat))) + (let* ((subdb (dbfile:get-subdb dbstruct run-id)) + (age (- (current-seconds)(dbr:dbdat-birth-sec dbdat)))) + (if (> age 300) ;; just testing - discard and close after 30 sec + (begin + ;; (map sqlite3:finalize! (hash-table-values (dbr:dbdat-stmt-cache dbdat))) + ;; (sqlite3:finalize! (dbr:dbdat-dbh dbdat)) + (dbfile:print-err "INFO: Discarded dbdat over 30 sec old ("age"s)")) + (begin + (set! *dbfile:num-handles-in-use* (- *dbfile:num-handles-in-use* 1)) + (stack-push! (dbr:subdb-dbstack subdb) dbdat))))) ;; set up a subdb ;; (define (dbfile:init-subdb dbstruct run-id init-proc) (let* ((dbname (dbfile:run-id->dbname run-id)) @@ -459,71 +484,65 @@ ;; so watch for problems. I'm still not clear if it is needed to manually ;; finalize sqlite3 dbs with the sqlite3 egg. ;; (define (dbfile:cautious-open-database fname init-proc #!optional (tries-left 50)) - (let* ((parent-dir (or (pathname-directory fname)(current-directory))) ;; no parent? go local - (raw-fname (pathname-file fname)) - (dir-writable (file-write-access? parent-dir)) - (file-exists (file-exists? fname)) - (file-write (if file-exists - (file-write-access? fname) - dir-writable ))) - ;; (mutex-lock! *db-open-mutex*) ;; tried this mutex, not clear it helped. - (if file-write ;; dir-writable - (condition-case - (let* ((lockfname (conc fname ".lock")) - (readyfname (conc parent-dir "/.ready-" raw-fname)) - (readyexists (file-exists? readyfname))) - (if (not readyexists) - (dbfile:simple-file-lock-and-wait lockfname) - ) - (let ((db (sqlite3:open-database fname))) - (sqlite3:set-busy-handler! db (sqlite3:make-busy-timeout 136000)) - (sqlite3:execute db "PRAGMA synchronous = 0;") - (if (not file-exists) - (init-proc db)) - (if (not readyexists) - (begin - (dbfile:simple-file-release-lock lockfname) - (with-output-to-file - readyfname - (lambda () - (print "Ready at " (current-seconds)))))) - db)) - (exn (io-error) (dbfile:print-err "ERROR: i/o error with " fname ". Check permissions, disk space etc. and try again.")) - (exn (corrupt) (dbfile:print-err "ERROR: database " fname " is corrupt. Repair it to proceed.")) - (exn (busy) (dbfile:print-err "ERROR: database " fname " is locked. Try copying to another location, remove original and copy back.")) - (exn (permission)(dbfile:print-err "ERROR: database " fname " has some permissions problem.")) - (exn () (dbfile:print-err "ERROR: Unknown error with database " fname " message: " ((condition-property-accessor 'exn 'message) exn)))) - - - (condition-case - (begin - (dbfile:print-err "WARNING: opening db in non-writable dir " fname) - (let ((db (sqlite3:open-database fname))) - (sqlite3:set-busy-handler! db (sqlite3:make-busy-timeout 136000)) - (sqlite3:execute db "PRAGMA synchronous = 0;") - ;; (mutex-unlock! *db-open-mutex*) - db)) - (exn (io-error) (dbfile:print-err "ERROR: i/o error with " fname ". Check permissions, disk space etc. and try again.")) - (exn (corrupt) (dbfile:print-err "ERROR: database " fname " is corrupt. Repair it to proceed.")) - (exn (busy) (dbfile:print-err "ERROR: database " fname " is locked. Try copying to another location, remove original and copy back.")) - (exn (permission)(dbfile:print-err "ERROR: database " fname " has some permissions problem.")) - (exn () (dbfile:print-err "ERROR: Unknown error with database " fname " message: " ((condition-property-accessor 'exn 'message) exn)))) - ))) - + (let* ((busy-file (conc fname"-journal")) + (delay-time (* (- 51 tries-left) 1.1)) + (retry (lambda () + (thread-sleep! delay-time) + (if (> tries-left 0) + (dbfile:cautious-open-database fname init-proc (- tries-left 1)))))) + (assert (>= tries-left 0) (conc "FATAL: too many attempts in dbfile:cautious-open-database of "fname", giving up.")) + (if (and (file-write-access? fname) + (file-exists? busy-file)) + (begin + (dbfile:print-err "INFO: dbfile:cautious-open-database: journal file " busy-file " exists, trying again in few seconds.") + (thread-sleep! 1) + (if (eq? tries-left 2) + (begin + (dbfile:print-err "INFO: forcing journal rollup "busy-file) + (dbfile:brute-force-salvage-db fname))) + (dbfile:cautious-open-database fname init-proc (- tries-left 1))) + (let* ((result (condition-case + (dbfile:with-simple-file-lock + (conc fname ".lock") + (lambda () + (let* ((db-exists (file-exists? fname)) + (db (sqlite3:open-database fname))) + (if (and init-proc (not db-exists)) + (init-proc db)) + db))) + (exn (io-error) + (dbfile:print-err exn "ERROR: i/o error with " fname ". Check permissions, disk space etc. and try again.") + (retry)) + (exn (corrupt) + (dbfile:print-err exn "ERROR: database " fname " is corrupt. Repair it to proceed.") + (retry)) + (exn (busy) + (dbfile:print-err exn "ERROR: database " fname + " is locked. Try copying to another location, remove original and copy back.") + (retry)) + (exn (permission)(dbfile:print-err exn "ERROR: database " fname " has some permissions problem.") + (retry)) + (exn () + (dbfile:print-err exn "ERROR: Unknown error with database " fname " message: " + ((condition-property-accessor 'exn 'message) exn)) + (retry))))) + #;(if (file-write-access? fname) + (dbfile:simple-file-release-lock lock-file)) + result)))) (define (dbfile:brute-force-salvage-db fname) (let* ((backupfname (conc fname"-"(current-process-id)".bak")) (cmd (conc "cp "fname" "backupfname";mv "fname" "(conc fname ".delme;") "cp "backupfname" "fname))) (dbfile:print-err "WARNING: attempting recovery of file "fname" by running commands:\n" " "cmd) (system cmd))) -(define (dbfile:cautious-open-database-orig fname init-proc #!optional (tries-left 50)) +#;(define (dbfile:cautious-open-database-orig fname init-proc #!optional (tries-left 50)) (let* ((lock-file (conc fname".lock")) (delay-time (* (- 51 tries-left) 1.1)) (retry (lambda () (thread-sleep! delay-time) (if (> tries-left 0) @@ -617,18 +636,601 @@ ;; (define (db:no-sync-get-lock db keyname) (sqlite3:with-transaction db (lambda () - (handle-exceptions - exn - (let ((lock-time (current-seconds))) - ;; (debug:print-info 2 *default-log-port* "db:no-sync-get-lock keyname=" keyname ", lock-time=" lock-time ", exn=" exn) - (sqlite3:execute db "INSERT INTO no_sync_metadat (var,val) VALUES(?,?);" keyname lock-time) - `(#t . ,lock-time)) - `(#f . ,(sqlite3:first-result db "SELECT val FROM no_sync_metadat WHERE var=?;" keyname)))))) - + (condition-case + (let* ((curr-val (db:no-sync-get/default db keyname #f))) + (if curr-val + `(#f . ,curr-val) ;; (sqlite3:first-result db "SELECT val FROM no_sync_metadat WHERE var=?;" keyname)) + (let ((lock-time (current-seconds))) + (sqlite3:execute db "INSERT OR REPLACE INTO no_sync_metadat (var,val) VALUES(?,?);" keyname lock-time) + `(#t . ,lock-time)))) + (exn (io-error) (dbfile:print-err "ERROR: i/o error with no-sync db. Check permissions, disk space etc. and try again.")) + (exn (corrupt) (dbfile:print-err "ERROR: database no-sync db is corrupt. Repair it to proceed.")) + (exn (busy) (dbfile:print-err "ERROR: database no-sync db is locked. Try copying to another location, remove original and copy back.")) + (exn (permission)(dbfile:print-err "ERROR: database no-sync db has some permissions problem.")) + (exn () ;; (status done) ;; I don't know how to detect status done but no data! + (dbfile:print-err "ERROR: Unknown error with database no-sync db message: exn="(condition->list exn)", \n" + ((condition-property-accessor 'exn 'message) exn)) + `(#f . ,(current-seconds))))))) + +(define (db:no-sync-get-lock-timeout db keyname timeout) + (let* ((lockdat (db:no-sync-get-lock db keyname))) + (match lockdat + ((#f . lock-time) + (if (> (- (current-seconds) (if (string? lock-time)(string->number lock-time)lock-time)) timeout) + (let ((lock-time (current-seconds))) + ;; (debug:print-info 2 *default-log-port* "db:no-sync-get-lock keyname=" keyname ", lock-time=" lock-time ", exn=" exn) + (sqlite3:execute db "INSERT OR REPLACE INTO no_sync_metadat (var,val) VALUES(?,?);" keyname lock-time) + `(#t . ,lock-time)) + lockdat)) + (else lockdat)))) + +;; NOTE: This will steal the lock after timeout of waiting. +;; +(define (db:with-no-sync-lock db keyname timeout proc) + (let* ((lockdat (db:no-sync-get-lock-timeout db keyname)) + (gotlock (car lockdat)) + (locktime (cdr lockdat))) + (if gotlock + (let ((res (proc))) + (db:no-sync-del! db keyname) + res)))) + +;;====================================================================== +;; sync back functions pulled from db.scm +;;====================================================================== + +;; Get a lock from the no-sync-db for the from-db, then delta sync the from-db to the to-db, otherwise return #f +;; +(define (db:lock-and-delta-sync no-sync-db dbstruct from-db-file runid keys dbinit) + (assert (not *db-sync-in-progress*) "FATAL: db:lock-and-sync called while a sync is in progress.") + ;; (dbfile:print-err *default-log-port* "db:lock-and-delta-sync") + (let* ((lock-file (conc from-db-file ".lock"))) + (if (common:simple-file-lock lock-file) + (begin + (dbfile:print-err "INFO: db:lock-and-delta-sync copying db "runid" at "(current-seconds)) + (set! *db-sync-in-progress* #t) + (db:sync-touched dbstruct runid keys dbinit) + (set! *db-sync-in-progress* #f) + (delete-file* lock-file) + #t) + (begin + (dbfile:print-err "INFO: could not get lock for " from-db-file ", sync likely in progress.") + #f + )))) + +;; ;; Get a lock from the no-sync-db for the from-db, then delta sync the from-db to the to-db, otherwise return #f +;; ;; +;; (define (db:lock-and-delta-sync-orig no-sync-db dbstruct from-db-file runid keys dbinit) +;; (assert (not *db-sync-in-progress*) "FATAL: db:lock-and-sync called while a sync is in progress.") +;; ;; (dbfile:print-err *default-log-port* "db:lock-and-delta-sync") +;; (let* ((lockdat (db:no-sync-get-lock-timeout no-sync-db from-db-file 60)) +;; (gotlock (car lockdat)) +;; (locktime (cdr lockdat))) +;; ;; (debug:print-info 3 *default-log-port* "db:lock-and-delta-sync: got lock?") +;; +;; (if gotlock +;; (begin +;; (dbfile:print-err "INFO: db:lock-and-delta-sync copying db "runid" at "(current-seconds)) +;; (set! *db-sync-in-progress* #t) +;; (db:sync-touched dbstruct runid keys dbinit) +;; (set! *db-sync-in-progress* #f) +;; (db:no-sync-del! no-sync-db from-db-file) +;; #t) +;; (begin +;; (dbfile:print-err "ERROR: could not get lock for " from-db-file " from no-sync-db") +;; #f +;; )))) + +;; sync run from tmp disk to nfs disk if touched +;; +;; call with dbinit=db:initialize-main-db +;; +(define (db:sync-touched dbstruct run-id keys #!key dbinit (force-sync #f)) + (dbfile:print-err "db:sync-touched Syncing: " (conc (if run-id run-id "main") ".db")) + (let* (;; the subdb is needed to access the mtdbdat + (subdb (or (dbfile:get-subdb dbstruct run-id) + (dbfile:init-subdb dbstruct run-id dbinit))) + (tmpdbfile (dbr:subdb-tmpdbfile subdb)) + (mtdb (dbr:subdb-mtdbdat subdb)) + (tmpdb (dbfile:open-db dbstruct run-id dbinit)) ;; sqlite3-db tmpdbfile #f)) + (start-t (current-seconds))) + (mutex-lock! *db-multi-sync-mutex*) + (let ((update_info (cons "last_update" (if force-sync 0 *db-last-sync*) ))) + (mutex-unlock! *db-multi-sync-mutex*) + (db:sync-tables (db:sync-all-tables-list dbstruct keys) update_info tmpdb mtdb)) + (mutex-lock! *db-multi-sync-mutex*) + (set! *db-last-sync* start-t) + (set! *db-last-access* start-t) + (mutex-unlock! *db-multi-sync-mutex*) + (dbfile:add-dbdat dbstruct run-id tmpdb) + #t)) + +;; just tests, test_steps and test_data tables +(define db:sync-tests-only + (list + ;; (list "strs" + ;; '("id" #f) + ;; '("str" #f)) + (list "tests" + '("id" #f) + '("run_id" #f) + '("testname" #f) + '("host" #f) + '("cpuload" #f) + '("diskfree" #f) + '("uname" #f) + '("rundir" #f) + '("shortdir" #f) + '("item_path" #f) + '("state" #f) + '("status" #f) + '("attemptnum" #f) + '("final_logf" #f) + '("logdat" #f) + '("run_duration" #f) + '("comment" #f) + '("event_time" #f) + '("fail_count" #f) + '("pass_count" #f) + '("archived" #f) + '("last_update" #f)) + (list "test_steps" + '("id" #f) + '("test_id" #f) + '("stepname" #f) + '("state" #f) + '("status" #f) + '("event_time" #f) + '("comment" #f) + '("logfile" #f) + '("last_update" #f)) + (list "test_data" + '("id" #f) + '("test_id" #f) + '("category" #f) + '("variable" #f) + '("value" #f) + '("expected" #f) + '("tol" #f) + '("units" #f) + '("comment" #f) + '("status" #f) + '("type" #f) + '("last_update" #f)))) + +;; needs db to get keys, this is for syncing all tables +;; +(define (db:sync-main-list dbstruct keys) + (let ((keys keys)) ;; (db:get-keys dbstruct))) + (list + (list "keys" + '("id" #f) + '("fieldname" #f) + '("fieldtype" #f)) + (list "metadat" '("var" #f) '("val" #f)) + (append (list "runs" + '("id" #f)) + (map (lambda (k)(list k #f)) + (append keys + (list "runname" "state" "status" "owner" "event_time" "comment" "fail_count" "pass_count" "contour" "last_update")))) + (list "archive_disks" + '("id" #f) + '("archive_area_name" #f) + '("disk_path" #f) + '("last_df" #f) + '("last_df_time" #f) + '("creation_time" #f)) + + (list "archive_blocks" + '("id" #f) + '("archive_disk_id" #f) + '("disk_path" #f) + '("last_du" #f) + '("last_du_time" #f) + '("creation_time" #f)) + + (list "test_meta" + '("id" #f) + '("testname" #f) + '("owner" #f) + '("description" #f) + '("reviewed" #f) + '("iterated" #f) + '("avg_runtime" #f) + '("avg_disk" #f) + '("tags" #f) + '("jobgroup" #f))))) + +(define (db:sync-all-tables-list dbstruct keys) + (append (db:sync-main-list dbstruct keys) + db:sync-tests-only)) + +;; tbls is ( ("tablename" ( "field1" [#f|proc1] ) ( "field2" [#f|proc2] ) .... ) ) +;; db's are dbdat's +;; +;; if last-update specified ("field-name" . time-in-seconds) +;; then sync only records where field-name >= time-in-seconds +;; IFF field-name exists +;; +(define (db:sync-tables tbls last-update fromdb todb . slave-dbs) + (handle-exceptions + exn + (begin + (dbfile:print-err "EXCEPTION: database probably overloaded or unreadable in db:sync-tables.") + (print-call-chain (current-error-port)) + (dbfile:print-err " message: " ((condition-property-accessor 'exn 'message) exn)) + (dbfile:print-err "exn=" (condition->list exn)) + (dbfile:print-err " status: " ((condition-property-accessor 'sqlite3 'status) exn)) + (dbfile:print-err " src db: " (dbr:dbdat-dbfile fromdb)) + (for-each (lambda (dbdat) + (let ((dbpath (dbr:dbdat-dbfile dbdat))) + (dbfile:print-err " dbpath: " dbpath) + (if #t ;; (not (db:repair-db dbdat)) + (begin + (dbfile:print-err "Failed to rebuild (repair is turned off) " dbpath ", exiting now.") + (exit))))) + (cons todb slave-dbs)) + + 0) + + ;; this is the work to be done") + (cond + ((not fromdb) (dbfile:print-err "WARNING: db:sync-tables called with fromdb missing") + -1) + ((not todb) (dbfile:print-err "WARNING: db:sync-tables called with todb missing") + -2) + ((not (sqlite3:database? (dbr:dbdat-dbh fromdb))) + (dbfile:print-err "db:sync-tables called with fromdb not a database " fromdb) + -3) + ((not (sqlite3:database? (dbr:dbdat-dbh todb))) + (dbfile:print-err "db:sync-tables called with todb not a database " todb) + -4) + + ((not (file-write-access? (dbr:dbdat-dbfile todb))) + (dbfile:print-err "db:sync-tables called with todb not a read-only database " todb) + -5) + ((not (null? (let ((readonly-slave-dbs + (filter + (lambda (dbdat) + (not (file-write-access? (dbr:dbdat-dbfile todb)))) + slave-dbs))) + (for-each + (lambda (bad-dbdat) + (dbfile:print-err "db:sync-tables called with todb not a read-only database " bad-dbdat)) + readonly-slave-dbs) + readonly-slave-dbs))) -6) + (else + ;; (dbfile:print-err "db:sync-tables: args are good") + + (let ((stmts (make-hash-table)) ;; table-field => stmt + (all-stmts '()) ;; ( ( stmt1 value1 ) ( stml2 value2 )) + (numrecs (make-hash-table)) + (start-time (current-milliseconds)) + (tot-count 0)) + (for-each ;; table + (lambda (tabledat) + (let* ((tablename (car tabledat)) + (fields (cdr tabledat)) + (has-last-update (member "last_update" fields)) + (use-last-update (cond + ((and has-last-update + (member "last_update" fields)) + #t) ;; if given a number, just use it for all fields + ((number? last-update) #f) ;; if not matched first entry then ignore last-update for this table + ((and (pair? last-update) + (member (car last-update) ;; last-update field name + (map car fields))) + #t) + ((and last-update (not (pair? last-update)) (not (number? last-update))) + (dbfile:print-err "ERROR: parameter last-update for db:sync-tables must be a pair or a number, received: " last-update);; found in fields + #f) + (else + #f))) + (last-update-value (if use-last-update ;; no need to check for has-last-update - it is already accounted for + (if (number? last-update) + last-update + (cdr last-update)) + #f)) + (last-update-field (if use-last-update + (if (number? last-update) + "last_update" + (car last-update)) + #f)) + (num-fields (length fields)) + (field->num (make-hash-table)) + (num->field (apply vector (map car fields))) ;; BBHERE + (full-sel (conc "SELECT " (string-intersperse (map car fields) ",") + " FROM " tablename (if use-last-update ;; apply last-update criteria + (conc " WHERE " last-update-field " >= " last-update-value) + "") + ";")) + (full-ins (conc "INSERT OR REPLACE INTO " tablename " ( " (string-intersperse (map car fields) ",") " ) " + " VALUES ( " (string-intersperse (make-list num-fields "?") ",") " );")) + (fromdat '()) + (fromdats '()) + (totrecords 0) + (batch-len 100) ;; (string->number (or (configf:lookup *configdat* "sync" "batchsize") "100"))) + (todat (make-hash-table)) + (count 0) + (field-names (map car fields)) + (delay-handicap 0) ;; (string->number (or (configf:lookup *configdat* "sync" "delay-handicap") "0"))) + ) + + ;; set up the field->num table + (for-each + (lambda (field) + (hash-table-set! field->num field count) + (set! count (+ count 1))) + fields) + + ;; read the source table + ;; store a list of all rows in the table in fromdat, up to batch-len. + ;; Then add fromdat to the fromdats list, clear fromdat and repeat. + (sqlite3:for-each-row + (lambda (a . b) + (set! fromdat (cons (apply vector a b) fromdat)) + (if (> (length fromdat) batch-len) + (begin + (set! fromdats (cons fromdat fromdats)) + (set! fromdat '()) + (set! totrecords (+ totrecords 1))) + ) + ) + (dbr:dbdat-dbh fromdb) + full-sel) + + ;; Count less than batch-len as a record + (if (> (length fromdat) 0) + (set! totrecords (+ totrecords 1))) + + ;; tack on remaining records in fromdat + (if (not (null? fromdat)) + (set! fromdats (cons fromdat fromdats))) + + (if (common:low-noise-print 120 "sync-records") + (dbfile:print-err "found " totrecords " records to sync")) + + (sqlite3:for-each-row + (lambda (a . b) + (hash-table-set! todat a (apply vector a b))) + (dbr:dbdat-dbh todb) + full-sel) + + (when (and delay-handicap (> delay-handicap 0)) + (dbfile:print-err "imposing synthetic sync delay of "delay-handicap" seconds since sync/delay-handicap is configured") + (thread-sleep! delay-handicap) + (dbfile:print-err "synthetic sync delay of "delay-handicap" seconds completed") + ) + + ;; first pass implementation, just insert all changed rows + + (for-each + (lambda (targdb) + (let* ((db (dbr:dbdat-dbh targdb)) + (drp-trigger (if (member "last_update" field-names) + (db:drop-trigger db tablename) + #f)) + (has-last-update (member "last_update" field-names)) + (is-trigger-dropped (if has-last-update + (db:is-trigger-dropped db tablename) + #f)) + (stmth (sqlite3:prepare db full-ins)) + (changed-rows 0)) + ;; (db:delay-if-busy targdb) ;; NO WAITING + (if (and + has-last-update + (common:low-noise-print 120 "is-trigger-dropped")) + (dbfile:print-err "is-trigger-dropped: " is-trigger-dropped)) + (for-each + (lambda (fromdat-lst) + (sqlite3:with-transaction + db + (lambda () + (for-each ;; + (lambda (fromrow) + (let* ((a (vector-ref fromrow 0)) + (curr (hash-table-ref/default todat a #f)) + (same #t)) + (let loop ((i 0)) + (if (or (not curr) + (not (equal? (vector-ref fromrow i)(vector-ref curr i)))) + (set! same #f)) + (if (and same + (< i (- num-fields 1))) + (loop (+ i 1)))) + (if (not same) + (begin + (apply sqlite3:execute stmth (vector->list fromrow)) + (hash-table-set! numrecs tablename (+ 1 (hash-table-ref/default numrecs tablename 0))) + (set! changed-rows (+ changed-rows 1)) + ) + ) + )) + fromdat-lst)))) + fromdats) + + + (if (> changed-rows 0) + (dbfile:print-err "table " tablename " changed rows: " changed-rows) + ) + + + (sqlite3:finalize! stmth) + (if (member "last_update" field-names) + (db:create-trigger db tablename)))) + (append (list todb) slave-dbs) + ) + ) + ) + tbls) + (let* ((runtime (- (current-milliseconds) start-time)) + (should-print (or ;; (debug:debug-mode 12) + (common:low-noise-print 120 "db sync" (> runtime 500))))) ;; low and high sync times treated as separate. + (if should-print (dbfile:print-err "INFO: db sync, total run time " runtime " ms")) + (for-each + (lambda (dat) + (let ((tblname (car dat)) + (count (cdr dat))) + (set! tot-count (+ tot-count count)) + (if (> count 0) + (if should-print (dbfile:print-err "FIXME: tblname: " tblname", count: "count" "))))) ;; (format #f " ~10a ~5a" tblname count)))))) + (sort (hash-table->alist numrecs)(lambda (a b)(> (cdr a)(cdr b)))))) + tot-count))))) + +;;====================================================================== +;; trigger setup/takedown +;;====================================================================== + +(define db:trigger-list + (list (list "update_runs_trigger" "CREATE TRIGGER IF NOT EXISTS update_runs_trigger AFTER UPDATE ON runs + FOR EACH ROW + BEGIN + UPDATE runs SET last_update=(strftime('%s','now')) + WHERE id=old.id; + END;" ) + (list "update_run_stats_trigger" "CREATE TRIGGER IF NOT EXISTS update_run_stats_trigger AFTER UPDATE ON run_stats + FOR EACH ROW + BEGIN + UPDATE run_stats SET last_update=(strftime('%s','now')) + WHERE id=old.id; + END;" ) + (list "update_tests_trigger" "CREATE TRIGGER IF NOT EXISTS update_tests_trigger AFTER UPDATE ON tests + FOR EACH ROW + BEGIN + UPDATE tests SET last_update=(strftime('%s','now')) + WHERE id=old.id; + END;" ) + (list "update_teststeps_trigger" "CREATE TRIGGER IF NOT EXISTS update_teststeps_trigger AFTER UPDATE ON test_steps + FOR EACH ROW + BEGIN + UPDATE test_steps SET last_update=(strftime('%s','now')) + WHERE id=old.id; + END;" ) + (list "update_test_data_trigger" "CREATE TRIGGER IF NOT EXISTS update_test_data_trigger AFTER UPDATE ON test_data + FOR EACH ROW + BEGIN + UPDATE test_data SET last_update=(strftime('%s','now')) + WHERE id=old.id; + END;" ))) +;; +;; ADD run-id SUPPORT +;; +(define (db:create-all-triggers dbstruct) + (db:with-db + dbstruct #f #f + (lambda (dbdat db) + (db:create-triggers db)))) + +(define (db:create-triggers db) + (for-each (lambda (key) + (sqlite3:execute db (cadr key))) + db:trigger-list)) + +(define (db:drop-all-triggers dbstruct) + (db:with-db + dbstruct #f #f + (lambda (dbdat db) + (db:drop-triggers db)))) + +(define (db:is-trigger-dropped db tbl-name) + (let* ((trigger-name (if (equal? tbl-name "test_steps") + "update_teststeps_trigger" + (conc "update_" tbl-name "_trigger"))) + (res #f)) + (sqlite3:for-each-row + (lambda (name) + (if (equal? name trigger-name) + (set! res #t))) + db + "SELECT name FROM sqlite_master WHERE type = 'trigger' ;") + res)) + +(define (db:drop-triggers db) + (for-each + (lambda (key) + (sqlite3:execute db (conc "drop trigger if exists " (car key)))) + db:trigger-list)) + +(define (db:drop-trigger db tbl-name) + (let* ((trigger-name (if (equal? tbl-name "test_steps") + "update_teststeps_trigger" + (conc "update_" tbl-name "_trigger")))) + (for-each + (lambda (key) + (if (equal? (car key) trigger-name) + (sqlite3:execute db (conc "drop trigger if exists " trigger-name)))) + db:trigger-list))) + +(define (db:create-trigger db tbl-name) + (let* ((trigger-name (if (equal? tbl-name "test_steps") + "update_teststeps_trigger" + (conc "update_" tbl-name "_trigger")))) + (for-each (lambda (key) + (if (equal? (car key) trigger-name) + (sqlite3:execute db (cadr key)))) + db:trigger-list))) + +;;====================================================================== +;; db access stuff +;;====================================================================== + +;; call with dbinit=db:initialize-main-db +;; +(define (db:open-db dbstruct run-id dbinit) + (let* ((dbdat (dbfile:open-db dbstruct run-id dbinit))) + (set! *db-write-access* (not (dbr:dbdat-read-only dbdat))) + dbdat)) + +(define dbfile:db-init-proc (make-parameter #f)) + +;; (db:with-db dbstruct run-id sqlite3:exec "select blah fgrom blaz;") +;; r/w is a flag to indicate if the db is modified by this query #t = yes, #f = no +;; +(define (db:with-db dbstruct run-id r/w proc . params) + (let* ((have-struct (dbr:dbstruct? dbstruct)) + (dbdat (if have-struct ;; this stuff just allows us to call with a db handle directly + (db:open-db dbstruct run-id (dbfile:db-init-proc)) ;; (dbfile:get-subdb dbstruct run-id) + #f)) + (db (if have-struct ;; this stuff just allows us to call with a db handle directly + (dbr:dbdat-dbh dbdat) + dbstruct)) + (fname (if dbdat + (dbr:dbdat-dbfile dbdat) + "nofilenameavailable")) + #;(subdb (if have-struct + (dbfile:get-subdb dbstruct run-id) + #f)) + (use-mutex (> *api-process-request-count* 25))) ;; was 25 + (if (and use-mutex + (common:low-noise-print 120 "over-50-parallel-api-requests")) + (dbfile:print-err *api-process-request-count* " parallel api requests being processed in process " (current-process-id) ", throttling access")) + (if (common:low-noise-print 600 (conc "parallel-api-requests" *max-api-process-requests*)) + (dbfile:print-err "Parallel api request count: " *api-process-request-count* " max parallel requests: " *max-api-process-requests*)) + (condition-case + (begin + (if use-mutex (mutex-lock! *db-with-db-mutex*)) + (let ((res (apply proc dbdat db params))) + (if use-mutex (mutex-unlock! *db-with-db-mutex*)) + ;; (if (vector? dbstruct)(db:done-with dbstruct run-id r/w)) + (if dbdat + (dbfile:add-dbdat dbstruct run-id dbdat)) + res)) + (exn (io-error) + (db:generic-error-printout exn "ERROR: i/o error with " fname ". Check permissions, disk space etc. and try again.")) + (exn (corrupt) + (db:generic-error-printout exn "ERROR: database " fname " is corrupt. Repair it to proceed.")) + (exn (busy) + (db:generic-error-printout exn "ERROR: database " fname + " is locked. Try copying to another location, remove original and copy back.")) + (exn (permission)(db:generic-error-printout exn "ERROR: database " fname " has some permissions problem.")) + (exn () + (db:generic-error-printout exn "ERROR: Unknown error with database " fname " message: " + ((condition-property-accessor 'exn 'message) exn)))))) + ;;====================================================================== ;; file utils ;;====================================================================== @@ -715,7 +1317,14 @@ (handle-exceptions exn #f ;; I don't really care why this failed (at least for now) (delete-file* fname))) - +(define (dbfile:with-simple-file-lock fname proc #!key (expire-time 300)) + (let ((gotlock (dbfile:simple-file-lock-and-wait fname expire-time: expire-time))) + (if gotlock + (let ((res (proc))) + (dbfile:simple-file-release-lock fname) + res) + (assert #t "FATAL: simple file lock never got a lock.")))) + ) Index: http-transport.scm ================================================================== --- http-transport.scm +++ http-transport.scm @@ -36,16 +36,17 @@ (declare (uses server)) ;; (declare (uses daemon)) (declare (uses portlogger)) (declare (uses rmt)) (declare (uses dbfile)) +(declare (uses commonmod)) (include "common_records.scm") (include "db_records.scm") (include "js-path.scm") -(import dbfile) +(import dbfile commonmod) (require-library stml) (define (http-transport:make-server-url hostport) (if (not hostport) #f @@ -284,29 +285,31 @@ (begin (debug:print 0 *default-log-port* "WARNING: failure in with-input-from-request to " fullurl ".") (debug:print 0 *default-log-port* " message: " msg ", exn=" exn) (debug:print 0 *default-log-port* " cmd: " cmd " params: " params " key:" (or server-id "thekey")) (debug:print 0 *default-log-port* " call-chain: " call-chain))) - (if runremote - (remote-conndat-set! runremote #f)) + (set! *runremote* #f) + (set! runremote #f) + ;; (if runremote + ;; (remote-conndat-set! runremote #f)) ;; Killing associated server to allow clean retry.") ;; (tasks:kill-server-run-id run-id) ;; better to kill the server in the logic that called this routine? (mutex-unlock! *http-mutex*) - ;;; (signal (make-composite-condition - ;;; (make-property-condition 'commfail 'message "failed to connect to server"))) - ;;; "communications failed" + ;; (signal (make-composite-condition + ;; (make-property-condition 'commfail 'message "failed to connect to server"))) + ;; "communications failed" (db:obj->string #f)) - (with-input-from-request ;; was dat - fullurl - (list (cons 'key (or server-id "thekey")) - (cons 'cmd cmd) - (cons 'params sparams)) - read-string)) + (with-input-from-request ;; was dat + fullurl + (list (cons 'key (or server-id "thekey")) + (cons 'cmd cmd) + (cons 'params sparams)) + read-string)) transport: 'http) - 0)) ;; added this speculatively + 0)) ;; added this speculatively ;; Shouldn't this be a call to the managed call-all-connections stuff above? - (close-all-connections!) + (close-all-connections!) ;; BUG? WHY IS THIS HERE? Are we failing to reuse connections? (mutex-unlock! *http-mutex*) )) (time-out (lambda () (thread-sleep! 45) (debug:print 0 *default-log-port* "WARNING: send-receive took more than 45 seconds!!") @@ -552,12 +555,16 @@ (debug:print-info 0 *default-log-port* "Server continuing, seconds since last db access: " (- (current-seconds) last-access)) (let ((curr-time (current-seconds))) (handle-exceptions exn (debug:print 0 *default-log-port* "ERROR: Failed to change timestamp on log file " server-log-file ". Are you out of space on that disk? exn=" exn) - (if (not *server-overloaded*) - (change-file-times server-log-file curr-time curr-time))))) + (if (and (< (- (current-seconds) server-start-time) 600) ;; run for ten minutes for experiment, 3600 thereafter + (not *server-overloaded*)) + (change-file-times server-log-file curr-time curr-time) + (if (common:low-noise-print 120 "start new server") + (server:kind-run *toppath*) ;; server:kind-run uses [servers] numservers + ))))) (loop 0 server-state bad-sync-count (current-milliseconds))) (else (debug:print-info 0 *default-log-port* "Server timed out. seconds since last db access: " (- (current-seconds) last-access)) (http-transport:server-shutdown port))))))) Index: megatest-version.scm ================================================================== --- megatest-version.scm +++ megatest-version.scm @@ -18,6 +18,6 @@ ;; Always use two or four digit decimal ;; 1.01, 1.02...1.10,1.11,1.1101 ... 1.99,2.00.. ;; (declare (unit megatest-version)) -(define megatest-version 1.7001) +(define megatest-version 1.7002) Index: megatest.scm ================================================================== --- megatest.scm +++ megatest.scm @@ -40,12 +40,15 @@ (declare (uses mt)) (declare (uses api)) (declare (uses tasks)) ;; only used for debugging. (declare (uses env)) (declare (uses diff-report)) +(declare (uses db)) (declare (uses dbmod)) (declare (uses dbmod.import)) +(declare (uses commonmod)) +(declare (uses commonmod.import)) (declare (uses dbfile)) (declare (uses dbfile.import)) ;; (declare (uses debugprint)) ;; (declare (uses debugprint.import)) ;; (declare (uses mtargs)) @@ -53,10 +56,11 @@ ;; (declare (uses ftail)) ;; (import ftail) (import dbmod + commonmod dbfile) (define *db* #f) ;; this is only for the repl, do not use in general!!!! (include "common_records.scm") @@ -75,10 +79,12 @@ (require-library mutils) (define *usage-log-file* #f) ;; put path to file for logging usage in this var in the ~/.megatestrc file (define *usage-use-seconds* #t) ;; for Epoc seconds in usage logging change this to #t in ~/.megatestrc file + +(dbfile:db-init-proc db:initialize-main-db) ;; load the ~/.megatestrc file, put (use trace)(trace-call-sites #t)(trace function-you-want-to-trace) in this file ;; (let ((debugcontrolf (conc (get-environment-variable "HOME") "/.megatestrc"))) (if (common:file-exists? debugcontrolf) Index: rmt.scm ================================================================== --- rmt.scm +++ rmt.scm @@ -21,14 +21,15 @@ (use format typed-records) ;; RADT => purpose of json format?? (declare (unit rmt)) (declare (uses api)) (declare (uses http-transport)) +(declare (uses dbfile)) (include "common_records.scm") ;; (declare (uses rmtmod)) -;; (import rmtmod) +(import dbfile) ;; rmtmod) ;; ;; THESE ARE ALL CALLED ON THE CLIENT SIDE!!! ;; @@ -63,11 +64,11 @@ (define (rmt:send-receive cmd rid params #!key (attemptnum 1)(area-dat #f)) ;; start attemptnum at 1 so the modulo below works as expected #;(common:telemetry-log (conc "rmt:"(->string cmd)) payload: `((rid . ,rid) (params . ,params))) - + (if (> attemptnum 2) (debug:print 0 *default-log-port* "INFO: attemptnum in rmt:send-receive is " attemptnum)) (cond ((> attemptnum 2) (thread-sleep! 0.05)) @@ -119,10 +120,17 @@ (thread-sleep! 0.1) ;; since we shouldn't get here, delay a little (remote-hh-dat-set! runremote (common:get-homehost))) ;;(print "BB> readonly-mode is "readonly-mode" dbfile is "dbfile) (cond + ((> (- (current-seconds)(remote-connect-time runremote)) 180) ;; reconnect to server every 180 seconds + (debug:print 0 *default-log-port* "Forcing reconnect to server(s) due to 180 second timeout.") + (set! *runremote* #f) + ;; BUG: close-connections should go here? + (mutex-unlock! *rmt-mutex*) + (rmt:send-receive cmd rid params attemptnum: 1 area-dat: area-dat)) + ;;DOT EXIT; ;;DOT MUTEXLOCK -> EXIT [label="> 15 attempts"]; {rank=same "case 1" "EXIT" } ;; give up if more than 150 attempts ((> attemptnum 150) (debug:print 0 *default-log-port* "ERROR: 150 tries to start/connect to server. Giving up.") @@ -375,20 +383,20 @@ (dbstructs-local (db:setup #t)) ;; make-dbr:dbstruct path: dbdir local: #t))) (read-only (not (file-write-access? db-file-path))) (start (current-milliseconds)) (resdat (if (not (and read-only qry-is-write)) (let ((v (api:execute-requests dbstructs-local (vector (symbol->string cmd) params)))) - (handle-exceptions ;; there has been a long history of receiving strange errors from values returned by the client when things go wrong.. - exn ;; This is an attempt to detect that situation and recover gracefully - (begin - (debug:print 0 *default-log-port* "ERROR: bad data from server " v " message: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn) - (vector #t '())) ;; should always get a vector but if something goes wrong return a dummy + ;; (handle-exceptions ;; there has been a long history of receiving strange errors from values returned by the client when things go wrong.. + ;; exn ;; This is an attempt to detect that situation and recover gracefully + ;; (begin + ;; (debug:print 0 *default-log-port* "ERROR: bad data from server " v " message: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn) + ;; (vector #t '())) ;; should always get a vector but if something goes wrong return a dummy (if (and (vector? v) (> (vector-length v) 1)) (let ((newvec (vector (vector-ref v 0)(vector-ref v 1)))) newvec) ;; by copying the vector while inside the error handler we should force the detection of a corrupted record - (vector #t '())))) ;; we could also check that the returned types are valid + (vector #t '()))) ;; ) ;; we could also check that the returned types are valid (vector #t '()))) (success (vector-ref resdat 0)) (res (vector-ref resdat 1)) (duration (- (current-milliseconds) start))) (if (and read-only qry-is-write) @@ -412,16 +420,16 @@ (mutex-unlock! *db-multi-sync-mutex*))))) res)) (define (rmt:send-receive-no-auto-client-setup connection-info cmd run-id params) (let* ((run-id (if run-id run-id 0)) - (res (handle-exceptions - exn - (begin - (print "transport failed. exn=" exn) - #f) - (http-transport:client-api-send-receive run-id connection-info cmd params)))) + (res ;; (handle-exceptions + ;; exn + ;; (begin + ;; (print "transport failed. exn=" exn) + ;; #f) + (http-transport:client-api-send-receive run-id connection-info cmd params))) ;; ) (if (and res (vector-ref res 0)) (vector-ref res 1) ;;; YES!! THIS IS CORRECT!! CHANGE IT HERE, THEN CHANGE rmt:send-receive ALSO!!! #f))) ;;====================================================================== Index: server.scm ================================================================== --- server.scm +++ server.scm @@ -23,18 +23,22 @@ (use spiffy uri-common intarweb http-client spiffy-request-vars) (declare (unit server)) +(declare (uses commonmod)) + (declare (uses common)) (declare (uses db)) (declare (uses tasks)) ;; tasks are where stuff is maintained about what is running. ;; (declare (uses synchash)) (declare (uses http-transport)) ;;(declare (uses rpc-transport)) (declare (uses launch)) ;; (declare (uses daemon)) + +(import commonmod) (include "common_records.scm") (include "db_records.scm") (define (server:make-server-url hostport) Index: tasks.scm ================================================================== --- tasks.scm +++ tasks.scm @@ -20,15 +20,17 @@ (use sqlite3 srfi-1 posix regex regex-case srfi-69 dot-locking format) (import (prefix sqlite3 sqlite3:)) (declare (unit tasks)) +(declare (uses dbfile)) (declare (uses db)) (declare (uses rmt)) (declare (uses common)) (declare (uses pgdb)) +(import dbfile) ;; (import pgdb) ;; pgdb is a module (include "task_records.scm") (include "db_records.scm")