Index: Makefile ================================================================== --- Makefile +++ Makefile @@ -28,11 +28,15 @@ ezsteps.scm lock-queue.scm sdb.scm rmt.scm api.scm \ subrun.scm portlogger.scm archive.scm env.scm \ diff-report.scm cgisetup/models/pgdb.scm # module source files -MSRCFILES = dbmod.scm dbfile.scm +MSRCFILES = dbmod.scm dbfile.scm # debugprint.scm mtargs.scm + +# mofiles/dbfile.o : mofiles/debugprint.o +# mofiles/debugprint.o : mofiles/mtargs.o + # ftail.scm rmtmod.scm commonmod.scm removed # MSRCFILES = ducttape-lib.scm pkts.scm stml2.scm cookie.scm mutils.scm \ # mtargs.scm commonmod.scm dbmod.scm adjutant.scm ulex.scm \ # rmtmod.scm apimod.scm @@ -162,11 +166,11 @@ monitor.o dashboard.o archive.o megatest.o : db_records.scm megatest-fossil-hash.scm tests.o runs.o dashboard.o dashboard-tests.o dashboard-context-menu.o dashboard-main.o : run_records.scm db.o ezsteps.o keys.o launch.o megatest.o monitor.o runs-for-ref.o runs.o tests.o : key_records.scm -db.o api.o : mofiles/dbmod.o mofiles/dbfile.o +db.o api.o : mofiles/dbmod.o mofiles/dbfile.o mofiles/debugprint.o tests.o tasks.o dashboard-tasks.o : task_records.scm runs.o : test_records.scm Index: archive.scm ================================================================== --- archive.scm +++ archive.scm @@ -397,11 +397,11 @@ (bup-restore-params (list "-d" archive-path "restore" "-C" *toppath* archive-internal-path))) (debug:print-info 2 *default-log-port* "Restoring archived data to " *toppath* " from archive in " archive-path " ... " archive-internal-path) (run-n-wait bup-exe params: bup-restore-params print-cmd: "Running:")) (sleep 2) (db:multi-db-sync - (db:setup-db *dbstruct-dbs* *toppath* #f) + (db:setup #t) ;; (db:setup-db *dbstruct-dbs* *toppath* #f) 'killservers ;'dejunk ;'adj-testids 'old2new ) Index: common.scm ================================================================== --- common.scm +++ common.scm @@ -593,11 +593,11 @@ (if (common:on-homehost?) (if (common:api-changed?) (let* ((mtconf (conc (get-environment-variable "MT_RUN_AREA_HOME") "/megatest.config")) (dbfile (conc (get-environment-variable "MT_RUN_AREA_HOME") "/megatest.db")) (read-only (not (file-write-access? dbfile))) - (dbstruct (db:setup-db *dbstruct-dbs* *toppath* #f))) ;; #t))) + (dbstruct (db:setup #t))) ;; (db:setup-db *dbstruct-dbs* *toppath* #f))) ;; #t))) (debug:print 0 *default-log-port* "WARNING: Version mismatch!\n" " expected: " (common:version-signature) "\n" " got: " (common:get-last-run-version)) (cond @@ -1007,70 +1007,10 @@ (args:get-arg "-server"))) (define (common:human-time) (time->string (seconds->local-time (current-seconds)) "%Y-%m-%d %H:%M:%S")) -;;====================================================================== -;; currently the primary job of the watchdog is to run the sync back to megatest.db from the db in /tmp -;; if we are on the homehost and we are a server (by definition we are on the homehost if we are a server) -;; -(define (common:readonly-watchdog dbstruct) - (thread-sleep! 0.05) ;; delay for startup - (debug:print-info 13 *default-log-port* "common:readonly-watchdog entered.") - ;; sync megatest.db to /tmp/.../megatst.db - (let* ((sync-cool-off-duration 3) - (golden-mtdb (dbr:dbstruct-mtdb dbstruct)) - (golden-mtpath (db:dbdat-get-path golden-mtdb)) - (tmp-mtdb (dbr:dbstruct-tmpdb dbstruct)) - (tmp-mtpath (db:dbdat-get-path tmp-mtdb))) - (debug:print-info 0 *default-log-port* "Read-only periodic sync thread started.") - (let loop ((last-sync-time 0)) - (debug:print-info 13 *default-log-port* "loop top tmp-mtpath="tmp-mtpath" golden-mtpath="golden-mtpath) - (let* ((duration-since-last-sync (- (current-seconds) last-sync-time))) - (debug:print-info 13 *default-log-port* "duration-since-last-sync="duration-since-last-sync) - (if (and (not *time-to-exit*) - (< duration-since-last-sync sync-cool-off-duration)) - (thread-sleep! (- sync-cool-off-duration duration-since-last-sync))) - (if (not *time-to-exit*) - (let ((golden-mtdb-mtime (file-modification-time golden-mtpath)) - (tmp-mtdb-mtime (file-modification-time tmp-mtpath))) - (if (> golden-mtdb-mtime tmp-mtdb-mtime) - (if (< golden-mtdb-mtime (- (current-seconds) 3)) ;; file has NOT been touched in past three seconds, this way multiple servers won't fight to sync back - (let ((res (db:multi-db-sync dbstruct 'old2new))) - (debug:print-info 13 *default-log-port* "rosync called, " res " records transferred.")))) - (loop (current-seconds))) - #t))) - (debug:print-info 0 *default-log-port* "Exiting readonly-watchdog timer, *time-to-exit* = " *time-to-exit*" pid="(current-process-id)" mtpath="golden-mtpath))) - -;;====================================================================== -;; TODO: for multiple areas, we will have multiple watchdogs; and multiple threads to manage -(define (common:watchdog) - (debug:print-info 13 *default-log-port* "common:watchdog entered.") - (if (launch:setup) - (if (common:on-homehost?) - (let ((dbstruct (db:setup-db *dbstruct-dbs* *toppath* #f))) ;; #t))) - (debug:print-info 13 *default-log-port* "after db:setup with dbstruct=" dbstruct) - (cond - ((dbr:dbstruct-read-only dbstruct) - (debug:print-info 13 *default-log-port* "loading read-only watchdog") - (common:readonly-watchdog dbstruct)) - (else - (debug:print-info 13 *default-log-port* "loading writable-watchdog.") - (let* ((syncer (or (configf:lookup *configdat* "server" "sync-method") "delta-sync"))) ;; "brute-force-sync"))) - (cond - ((equal? syncer "brute-force-sync") - (server:writable-watchdog-bruteforce dbstruct)) - ((equal? syncer "delta-sync") - (server:writable-watchdog-deltasync dbstruct)) - (else - (debug:print-error 0 *default-log-port* "Unknown server/sync-method specified ("syncer") - valid values are brute-force-sync and delta-sync.") - (exit 1))) - ;;(debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] Syncer started (method="syncer")") - ))) - (debug:print-info 13 *default-log-port* "watchdog done.")) - (debug:print-info 13 *default-log-port* "no need for watchdog on non-homehost")))) - (define (std-exit-procedure) ;;(common:telemetry-log-close) (on-exit (lambda () 0)) ;;(debug:print-info 13 *default-log-port* "std-exit-procedure called; *time-to-exit*="*time-to-exit*) Index: db.scm ================================================================== --- db.scm +++ db.scm @@ -46,10 +46,11 @@ matchable) (declare (unit db)) (declare (uses common)) (declare (uses dbmod)) +;; (declare (uses debugprint)) (declare (uses dbfile)) (declare (uses keys)) (declare (uses ods)) (declare (uses client)) (declare (uses mt)) @@ -429,51 +430,11 @@ (set! *db-last-sync* start-t) (set! *db-last-access* start-t) (mutex-unlock! *db-multi-sync-mutex*) (stack-push! (dbr:subdb-dbstack subdb) tmpdb))) -(define (db:safely-close-sqlite3-db db stmt-cache #!key (try-num 3)) - (if (<= try-num 0) - #f - (handle-exceptions - exn - (begin - (print "Attempt to safely close sqlite3 db failed. Trying again. exn=" exn) - (thread-sleep! 3) - (sqlite3:interrupt! db) - (db:safely-close-sqlite3-db db stmt-cache try-num: (- try-num 1))) - (if (sqlite3:database? db) - (let* ((stmts (and stmt-cache (hash-table-ref/default stmt-cache db #f)))) - (if stmts (map sqlite3:finalize! (hash-table-values stmts))) - (sqlite3:finalize! db) - #t) - #f)))) - -;; close all opened run-id dbs -(define (db:close-all dbstruct) - (if (dbr:dbstruct? dbstruct) - (handle-exceptions - exn - (begin - (debug:print 0 *default-log-port* "WARNING: Finalizing failed, " ((condition-property-accessor 'exn 'message) exn) ", note - exn=" exn) - (print-call-chain *default-log-port*)) - ;; (db:sync-touched dbstruct 0 force-sync: #t) ;; NO. Do not do this here. Instead we rely on a server to be started when there are writes, even if the server itself is not going to be used as a server. - (let* ((subdbs (hash-table-values (dbr:dbstruct-subdbs dbstruct)))) - (for-each - (lambda (subdb) - (let* ((tdbs (stack->list (dbr:subdb-dbstack subdb))) - (mdb (dbr:dbdat-dbh (dbr:subdb-mtdb subdb))) - (rdb (dbr:dbdat-dbh (dbr:subdb-refndb subdb)))) - - (map (lambda (dbdat) - (let* ((stmt-cache (dbr:dbdat-stmt-cache dbdat)) - (dbh (dbr:dbdat-dbh dbdat))) - (db:safely-close-sqlite3-db dbh stmt-cache))) - tdbs))) - subdbs) - (db:safely-close-sqlite3-db mdb stmt-cache) ;; (if (sqlite3:database? mdb) (sqlite3:finalize! mdb)) - (db:safely-close-sqlite3-db rdb stmt-cache))))) ;; (if (sqlite3:database? rdb) (sqlite3:finalize! rdb)))))) +;; db:safely-close-sqlite3-db and db:close-all were here, moved to dbfile ;; (let ((locdbs (dbr:dbstruct-locdbs dbstruct))) ;; (if (hash-table? locdbs) ;; (for-each (lambda (run-id) ;; (db:close-run-db dbstruct run-id)) @@ -5000,6 +4961,235 @@ (stack-push! (dbr:dbstruct-dbstack dbstruct) dbdat) (system "rm -rf tempdir"))) ;; (db:extract-ods-file db "outputfile.ods" '(("sysname" "%")("fsname" "%")("datapath" "%")) "%") +;;====================================================================== +;; moving watch dogs here due to dependencies +;;====================================================================== + +;;====================================================================== +;; currently the primary job of the watchdog is to run the sync back to megatest.db from the db in /tmp +;; if we are on the homehost and we are a server (by definition we are on the homehost if we are a server) +;; +(define (common:readonly-watchdog dbstruct) + (thread-sleep! 0.05) ;; delay for startup + (debug:print-info 13 *default-log-port* "common:readonly-watchdog entered.") + ;; sync megatest.db to /tmp/.../megatst.db + (let* ((sync-cool-off-duration 3) + (golden-mtdb (dbr:dbstruct-mtdb dbstruct)) + (golden-mtpath (db:dbdat-get-path golden-mtdb)) + (tmp-mtdb (dbr:dbstruct-tmpdb dbstruct)) + (tmp-mtpath (db:dbdat-get-path tmp-mtdb))) + (debug:print-info 0 *default-log-port* "Read-only periodic sync thread started.") + (let loop ((last-sync-time 0)) + (debug:print-info 13 *default-log-port* "loop top tmp-mtpath="tmp-mtpath" golden-mtpath="golden-mtpath) + (let* ((duration-since-last-sync (- (current-seconds) last-sync-time))) + (debug:print-info 13 *default-log-port* "duration-since-last-sync="duration-since-last-sync) + (if (and (not *time-to-exit*) + (< duration-since-last-sync sync-cool-off-duration)) + (thread-sleep! (- sync-cool-off-duration duration-since-last-sync))) + (if (not *time-to-exit*) + (let ((golden-mtdb-mtime (file-modification-time golden-mtpath)) + (tmp-mtdb-mtime (file-modification-time tmp-mtpath))) + (if (> golden-mtdb-mtime tmp-mtdb-mtime) + (if (< golden-mtdb-mtime (- (current-seconds) 3)) ;; file has NOT been touched in past three seconds, this way multiple servers won't fight to sync back + (let ((res (db:multi-db-sync dbstruct 'old2new))) + (debug:print-info 13 *default-log-port* "rosync called, " res " records transferred.")))) + (loop (current-seconds))) + #t))) + (debug:print-info 0 *default-log-port* "Exiting readonly-watchdog timer, *time-to-exit* = " *time-to-exit*" pid="(current-process-id)" mtpath="golden-mtpath))) + +;;====================================================================== +;; TODO: for multiple areas, we will have multiple watchdogs; and multiple threads to manage +(define (common:watchdog) + (debug:print-info 13 *default-log-port* "common:watchdog entered.") + (if (launch:setup) + (if (common:on-homehost?) + (let ((dbstruct (db:setup #t))) ;; (db:setup-db *dbstruct-dbs* *toppath* #f))) ;; #t))) + (debug:print-info 13 *default-log-port* "after db:setup with dbstruct=" dbstruct) + (cond + ((dbr:dbstruct-read-only dbstruct) + (debug:print-info 13 *default-log-port* "loading read-only watchdog") + (common:readonly-watchdog dbstruct)) + (else + (debug:print-info 13 *default-log-port* "loading writable-watchdog.") + (let* ((syncer (or (configf:lookup *configdat* "server" "sync-method") "delta-sync"))) ;; "brute-force-sync"))) + (cond + ((equal? syncer "brute-force-sync") + (server:writable-watchdog-bruteforce dbstruct)) + ((equal? syncer "delta-sync") + (server:writable-watchdog-deltasync dbstruct)) + (else + (debug:print-error 0 *default-log-port* "Unknown server/sync-method specified ("syncer") - valid values are brute-force-sync and delta-sync.") + (exit 1))) + ;;(debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] Syncer started (method="syncer")") + ))) + (debug:print-info 13 *default-log-port* "watchdog done.")) + (debug:print-info 13 *default-log-port* "no need for watchdog on non-homehost")))) + +(define (server:writable-watchdog-bruteforce dbstruct) + (thread-sleep! 1) ;; delay for startup + (let* ((do-a-sync (server:get-bruteforce-syncer dbstruct)) + (final-sync (server:get-bruteforce-syncer dbstruct fork-to-background: #t persist-until-sync: #t))) + (when (and (not (args:get-arg "-sync-to-megatest.db")) ;; conditions under which we do not run the sync + (args:get-arg "-server")) + + (let loop () + (do-a-sync) + (if (not *time-to-exit*) (loop))) ;; keep going unless time to exit + + ;; time to exit, close the no-sync db here + (final-sync) + + (if (common:low-noise-print 30) + (debug:print-info 0 *default-log-port* "Exiting watchdog timer, *time-to-exit* = " *time-to-exit*" pid="(current-process-id) + ))))) + +(define (server:writable-watchdog-deltasync dbstruct) + ;; This is awful complex and convoluted. Plan to redo? + ;; for now ... skip it. +;; ==> +;; ==> (thread-sleep! 0.05) ;; delay for startup +;; ==> (let ((legacy-sync (common:run-sync?)) +;; ==> (sync-stale-seconds (configf:lookup-number *configdat* "server" "sync-stale-seconds" default: 300)) +;; ==> (debug-mode (debug:debug-mode 1)) +;; ==> (last-time (current-seconds)) +;; ==> (no-sync-db (db:open-no-sync-db)) +;; ==> (stmt-cache #f) ;; (dbr:dbstruct-stmt-cache dbstruct)) +;; ==> (sync-duration 0) ;; run time of the sync in milliseconds +;; ==> (subdbs (hash-table-values (dbr:dbstruct-subdbs dbstruct)))) +;; ==> (set! *no-sync-db* no-sync-db) ;; make the no sync db available to api calls +;; ==> (debug:print-info 2 *default-log-port* "Periodic sync thread started.") +;; ==> (debug:print-info 3 *default-log-port* "watchdog starting. legacy-sync is " legacy-sync" pid="(current-process-id) );; " this-wd-num="this-wd-num) +;; ==> +;; ==> (if (and legacy-sync (not *time-to-exit*)) +;; ==> (begin +;; ==> (debug:print-info 0 *default-log-port* "Server running, periodic sync started.") + (let loop () +;; ==> ;; sync for filesystem local db writes +;; ==> ;; +;; ==> (mutex-lock! *db-multi-sync-mutex*) +;; ==> (let* ((start-file (conc tmp-area "/.start-sync")) +;; ==> (end-file (conc tmp-area "/.end-sync")) +;; ==> +;; ==> (need-sync (>= *db-last-access* *db-last-sync*)) ;; no sync since last write +;; ==> (sync-in-progress *db-sync-in-progress*) +;; ==> (min-intersync-delay (configf:lookup-number *configdat* "server" "minimum-intersync-delay" default: 5)) +;; ==> (should-sync (and (not *time-to-exit*) +;; ==> (> (- (current-seconds) *db-last-sync*) min-intersync-delay))) ;; sync every five seconds minimum, deprecated logic, can probably be removed +;; ==> (start-time (current-seconds)) +;; ==> (cpu-load-adj (alist-ref 'adj-proc-load (common:get-normalized-cpu-load #f))) +;; ==> (mt-mod-time (file-modification-time mtpath)) +;; ==> (last-sync-start (if (common:file-exists? start-file) +;; ==> (file-modification-time start-file) +;; ==> 0)) +;; ==> (last-sync-end (if (common:file-exists? end-file) +;; ==> (file-modification-time end-file) +;; ==> 10)) +;; ==> (sync-period (+ 3 (* cpu-load-adj 30))) ;; as adjusted load increases increase the sync period +;; ==> (recently-synced (and (< (- start-time mt-mod-time) sync-period) ;; not useful if sync didn't modify megatest.db! +;; ==> (< mt-mod-time last-sync-start))) +;; ==> (sync-done (<= last-sync-start last-sync-end)) +;; ==> (sync-stale (> start-time (+ last-sync-start sync-stale-seconds))) +;; ==> (will-sync (and (not *time-to-exit*) ;; do not start a sync if we are in the process of exiting +;; ==> (or need-sync should-sync) +;; ==> (or sync-done sync-stale) +;; ==> (not sync-in-progress) +;; ==> (not recently-synced)))) +;; ==> (debug:print-info 13 *default-log-port* "WD writable-watchdog top of loop. need-sync="need-sync" sync-in-progress=" sync-in-progress +;; ==> " should-sync="should-sync" start-time="start-time" mt-mod-time="mt-mod-time" recently-synced="recently-synced" will-sync="will-sync +;; ==> " sync-done=" sync-done " sync-period=" sync-period) +;; ==> (if (and (> sync-period 5) +;; ==> (common:low-noise-print 30 "sync-period")) +;; ==> (debug:print-info 0 *default-log-port* "Increased sync period due to long sync times, sync took: " sync-period " seconds.")) +;; ==> ;; (if recently-synced (debug:print-info 0 *default-log-port* "Skipping sync due to recently-synced flag=" recently-synced)) +;; ==> ;; (debug:print-info 0 *default-log-port* "need-sync: " need-sync " sync-in-progress: " sync-in-progress " should-sync: " should-sync " will-sync: " will-sync) +;; ==> (if will-sync (set! *db-sync-in-progress* #t)) +;; ==> (mutex-unlock! *db-multi-sync-mutex*) +;; ==> (if will-sync +;; ==> (let (;; (max-sync-duration (configf:lookup-number *configdat* "server" "max-sync-duration")) ;; KEEPING THIS AVAILABLE BUT SHOULD NOT USE, I'M PRETTY SURE IT DOES NOT WORK! +;; ==> (sync-start (current-milliseconds))) +;; ==> (with-output-to-file start-file (lambda ()(print (current-process-id)))) +;; ==> +;; ==> ;; put lock here +;; ==> +;; ==> ;; (if (or (not max-sync-duration) +;; ==> ;; (< sync-duration max-sync-duration)) ;; NOTE: db:sync-to-megatest.db keeps track of time of last sync and syncs incrementally +;; ==> +;; ==> ;; +;; ==> +;; ==> (for-each +;; ==> (lambda (subdb) +;; ==> (let* (;;(dbstruct (db:setup)) +;; ==> (mtdb (dbr:subdb-mtdb subdb)) +;; ==> (mtpath (db:dbdat-get-path mtdb)) +;; ==> (tmp-area (common:get-db-tmp-area)) +;; ==> (res (db:sync-to-megatest.db dbstruct no-sync-db: no-sync-db))) ;; did we sync any data? If so need to set the db touched flag to keep the server alive +;; ==> (set! sync-duration (- (current-milliseconds) sync-start)) +;; ==> (if (> res 0) ;; some records were transferred, keep the db alive +;; ==> (begin +;; ==> (mutex-lock! *heartbeat-mutex*) +;; ==> (set! *db-last-access* (current-seconds)) +;; ==> (mutex-unlock! *heartbeat-mutex*) +;; ==> (debug:print-info 0 *default-log-port* "sync called, " res " records transferred.")) +;; ==> (debug:print-info 2 *default-log-port* "sync called but zero records transferred"))) +;; ==> ) +;; ==> subdbs))) +;; ==> ;; ;; TODO: factor this next routine out into a function +;; ==> ;; (with-input-from-pipe ;; this should not block other threads but need to verify this +;; ==> ;; (conc "megatest -sync-to-megatest.db -m testsuite:" (common:get-area-name) ":" *toppath*) +;; ==> ;; (lambda () +;; ==> ;; (let loop ((inl (read-line)) +;; ==> ;; (res #f)) +;; ==> ;; (if (eof-object? inl) +;; ==> ;; (begin +;; ==> ;; (set! sync-duration (- (current-milliseconds) sync-start)) +;; ==> ;; (cond +;; ==> ;; ((not res) +;; ==> ;; (debug:print 0 *default-log-port* "ERROR: sync from /tmp db to megatest.db appears to have failed. Recommended that you stop your runs and run \"megatest -cleanup-db\"")) +;; ==> ;; ((> res 0) +;; ==> ;; (mutex-lock! *heartbeat-mutex*) +;; ==> ;; (set! *db-last-access* (current-seconds)) +;; ==> ;; (mutex-unlock! *heartbeat-mutex*)))) +;; ==> ;; (let ((num-synced (let ((matches (string-match "^Synced (\\d+).*$" inl))) +;; ==> ;; (if matches +;; ==> ;; (string->number (cadr matches)) +;; ==> ;; #f)))) +;; ==> ;; (loop (read-line) +;; ==> ;; (or num-synced res)))))))))) +;; ==> +;; ==> (if will-sync +;; ==> (begin +;; ==> (mutex-lock! *db-multi-sync-mutex*) +;; ==> (set! *db-sync-in-progress* #f) +;; ==> (set! *db-last-sync* start-time) +;; ==> (with-output-to-file end-file (lambda ()(print (current-process-id)))) +;; ==> +;; ==> ;; release lock here +;; ==> +;; ==> (mutex-unlock! *db-multi-sync-mutex*))) +;; ==> (if (and debug-mode +;; ==> (> (- start-time last-time) 60)) +;; ==> (begin +;; ==> (set! last-time start-time) +;; ==> (debug:print-info 4 *default-log-port* "timestamp -> " (seconds->time-string (current-seconds)) ", time since start -> " (seconds->hr-min-sec (- (current-seconds) *time-zero*)))))) +;; ==> + ;; keep going unless time to exit + ;; + (if (not *time-to-exit*) + (let delay-loop ((count 0)) + ;;(debug:print-info 13 *default-log-port* "delay-loop top; count="count" pid="(current-process-id)" this-wd-num="this-wd-num" *time-to-exit*="*time-to-exit*) + + (if (and (not *time-to-exit*) + (< count 6)) ;; was 11, changing to 4. + (begin + (thread-sleep! 1) + (delay-loop (+ count 1)))) + (if (not *time-to-exit*) (loop)))) + +;; ==> ;; time to exit, close the no-sync db here +;; ==> (db:no-sync-close-db no-sync-db stmt-cache) + (if (common:low-noise-print 30) + (debug:print-info 0 *default-log-port* "Exiting watchdog timer, *time-to-exit* = " *time-to-exit*" pid="(current-process-id) )))) ;; ))) ;;" this-wd-num="this-wd-num))))))) + Index: dbfile.scm ================================================================== --- dbfile.scm +++ dbfile.scm @@ -17,18 +17,23 @@ ;; along with Megatest. If not, see . ;;====================================================================== (declare (unit dbfile)) +;; (declare (uses debugprint)) (module dbfile * (import scheme chicken data-structures extras) (import (prefix sqlite3 sqlite3:) posix typed-records srfi-18 - srfi-69) + srfi-69 + stack + ) + +;; (import debugprint) ;;====================================================================== ;; R E C O R D S ;;====================================================================== @@ -65,10 +70,53 @@ (stmt-cache (make-hash-table)) (read-only #f)) (define (dbfile:run-id->key run-id) (or run-id 'main)) + +(define (db:safely-close-sqlite3-db db stmt-cache #!key (try-num 3)) + (if (<= try-num 0) + #f + (handle-exceptions + exn + (begin + (print "Attempt to safely close sqlite3 db failed. Trying again. exn=" exn) + (thread-sleep! 3) + (sqlite3:interrupt! db) + (db:safely-close-sqlite3-db db stmt-cache try-num: (- try-num 1))) + (if (sqlite3:database? db) + (let* ((stmts (and stmt-cache (hash-table-ref/default stmt-cache db #f)))) + (if stmts (map sqlite3:finalize! (hash-table-values stmts))) + (sqlite3:finalize! db) + #t) + #f)))) + +;; close all opened run-id dbs +(define (db:close-all dbstruct) + (if (dbr:dbstruct? dbstruct) +;; (handle-exceptions +;; exn +;; (begin +;; (debug:print 0 *default-log-port* "WARNING: Finalizing failed, " ((condition-property-accessor 'exn 'message) exn) ", note - exn=" exn) +;; (print-call-chain *default-log-port*)) + ;; (db:sync-touched dbstruct 0 force-sync: #t) ;; NO. Do not do this here. Instead we rely on a server to be started when there are writes, even if the server itself is not going to be used as a server. + (let* ((subdbs (hash-table-values (dbr:dbstruct-subdbs dbstruct)))) + (for-each + (lambda (subdb) + (let* ((tdbs (stack->list (dbr:subdb-dbstack subdb))) + (mdb (dbr:dbdat-dbh (dbr:subdb-mtdb subdb))) + (rdb (dbr:dbdat-dbh (dbr:subdb-refndb subdb)))) + + (map (lambda (dbdat) + (let* ((stmt-cache (dbr:dbdat-stmt-cache dbdat)) + (dbh (dbr:dbdat-dbh dbdat))) + (db:safely-close-sqlite3-db dbh stmt-cache))) + tdbs) + (db:safely-close-sqlite3-db mdb #f) ;; stmt-cache) ;; (if (sqlite3:database? mdb) (sqlite3:finalize! mdb)) + (db:safely-close-sqlite3-db rdb #f))) ;; stmt-cache))))) ;; (if (sqlite3:database? rdb) (sqlite3:finalize! rdb)))))) + subdbs)))) +;; ) ;; ;; set up a single db (e.g. main.db, 1.db ... etc.) ;; ;; ;; (define (db:setup-db dbstruct areapath run-id) ;; (let* ((dbname (db:run-id->dbname run-id)) ADDED debugprint.scm Index: debugprint.scm ================================================================== --- /dev/null +++ debugprint.scm @@ -0,0 +1,175 @@ + +(declare (unit debugprint)) +(declare (uses mtargs)) + +(module debugprint + * + +;;(import scheme chicken data-structures extras files ports) + (import + scheme + chicken + data-structures + posix + ports + extras + + ;; scheme + ;; chicken.base + ;; chicken.string + ;; chicken.time + ;; chicken.time.posix + ;; chicken.port + ;; chicken.process-context + ;; chicken.process-context.posix + + (prefix mtargs args:) + srfi-1 + ;; system-information + ) + +;;====================================================================== +;; debug stuff +;;====================================================================== + +(define verbosity (make-parameter '())) +(define *default-log-port* (current-error-port)) +(define debug:print-logger (make-parameter #f)) ;; set to a proc to call on every logging print + +(define (debug:setup) + (let ((debugstr (or (args:get-arg "-debug") + (args:get-arg "-debug-noprop") + (get-environment-variable "MT_DEBUG_MODE")))) + (verbosity (debug:calc-verbosity debugstr 'q)) + (debug:check-verbosity (verbosity) debugstr) + ;; if we were handed a bad verbosity rule then we will override it with 1 and continue + (if (not (verbosity))(verbosity 1)) + (if (and (not (args:get-arg "-debug-noprop")) + (or (args:get-arg "-debug") + (not (get-environment-variable "MT_DEBUG_MODE")))) + (setenv #;set-environment-variable! "MT_DEBUG_MODE" (if (list? (verbosity)) + (string-intersperse (map conc (verbosity)) ",") + (conc (verbosity))))))) + +;; check verbosity, #t is ok +(define (debug:check-verbosity verbosity vstr) + (if (not (or (number? verbosity) + (list? verbosity))) + (begin + (print "ERROR: Invalid debug value \"" vstr "\"") + #f) + #t)) + +;;====================================================================== +;; (define (debug:print . params) #f) +;; (define (debug:print-info . params) #f) +;; +;; (define (set-functions dbgp dbgpinfo) +;; (set! debug:print dbgp) +;; (set! debug:print-info dbgpinfo)) + +;;====================================================================== +;; this was cached based on results from profiling but it turned out the profiling +;; somehow went wrong - perhaps too many processes writing to it. Leaving the caching +;; in for now but can probably take it out later. +;; +(define (debug:calc-verbosity vstr arg) ;; arg is 'v (verbose) or 'q (quiet) + (let* ((res (cond + ((number? vstr) vstr) + ((not (string? vstr)) 1) + ;; ((string-match "^\\s*$" vstr) 1) + (vstr (let ((debugvals (filter number? (map string->number (string-split vstr ","))))) + (cond + ((> (length debugvals) 1) debugvals) + ((> (length debugvals) 0)(car debugvals)) + (else 1)))) + ((eq? arg 'v) 2) ;; verbose + ((eq? arg 'q) 0) ;; quiet + (else 1)))) + (verbosity res) + res)) + +;;====================================================================== +;; check verbosity, #t is ok +#;(define (debug-check-verbosity verbosity vstr) + (if (not (or (number? verbosity) + (list? verbosity))) + (begin + (print "ERROR: Invalid debug value \"" vstr "\"") + #f) + #t)) + +(define (debug:debug-mode n) + (let* ((vb (verbosity))) + (cond + ((and (number? vb) ;; number number + (number? n)) + (<= n vb)) + ((and (list? vb) ;; list number + (number? n)) + (member n vb)) + ((and (list? vb) ;; list list + (list? n)) + (not (null? (lset-intersection! eq? vb n)))) + ((and (number? vb) + (list? n)) + (member vb n)) + (else #f)))) + +(define (debug:handle-remote-logging params) + (if (debug:print-logger) ;; NOTE: turn params into string to keep things simple for now + ((debug:print-logger)(conc "REMOTE ("(get-host-name)", pid="(current-process-id)") " + (string-intersperse (map conc params) " ") "; " + (string-intersperse (command-line-arguments) " "))))) + +(define debug:enable-timestamp (make-parameter #t)) + +(define (debug:timestamp) + (if (debug:enable-timestamp) + (conc (time->string + (seconds->local-time (current-seconds)) "%H:%M:%S") " ") + "")) + + (define (debug:print n e . params) + (if (debug:debug-mode n) + (with-output-to-port (or e (current-error-port)) + (lambda () + ;; (if *logging* + ;; (db:log-event (apply conc params)) + (apply print (debug:timestamp) params) + ;; (debug:handle-remote-logging params) + ))) + #t ;; only here to make remote stuff happy. It'd be nice to fix that ... + ) + +(define (debug:print-error n e . params) + ;; normal print + (if (debug:debug-mode n) + (with-output-to-port (if (port? e) e (current-error-port)) + (lambda () + (apply print "ERROR: " (debug:timestamp) params) + ;; (debug:handle-remote-logging (cons "ERROR: " params)) + ))) + ;; pass important messages to stderr + (if (and (eq? n 0)(not (eq? e (current-error-port)))) + (with-output-to-port (current-error-port) + (lambda () + (apply print "ERROR: " (debug:timestamp) params) + )))) + +(define (debug:print-info n e . params) + (if (debug:debug-mode n) + (with-output-to-port (if (port? e) e (current-error-port)) + (lambda () + (apply print "INFO: (" n ") "(debug:timestamp) params) ;; res) + ;; (debug:handle-remote-logging (cons "INFO: " params)) + )))) + +(define (debug:print-warn n e . params) + (if (debug:debug-mode n) + (with-output-to-port (if (port? e) e (current-error-port)) + (lambda () + (apply print "WARN: (" n ") " (debug:timestamp) params) ;; res) + ;; (debug:handle-remote-logging (cons "WARN: " params)) + )))) +) Index: megatest.scm ================================================================== --- megatest.scm +++ megatest.scm @@ -30,10 +30,11 @@ (declare (uses server)) (declare (uses client)) (declare (uses tests)) (declare (uses genexample)) ;; (declare (uses daemon)) + (declare (uses db)) ;; (declare (uses dcommon)) (declare (uses tdb)) (declare (uses mt)) @@ -43,10 +44,14 @@ (declare (uses diff-report)) (declare (uses dbmod)) (declare (uses dbmod.import)) (declare (uses dbfile)) (declare (uses dbfile.import)) +;; (declare (uses debugprint)) +;; (declare (uses debugprint.import)) +;; (declare (uses mtargs)) +;; (declare (uses mtargs.import)) ;; (declare (uses ftail)) ;; (import ftail) (import dbmod Index: mtargs/mtargs.scm ================================================================== --- mtargs/mtargs.scm +++ mtargs/mtargs.scm @@ -56,10 +56,20 @@ (if (string? help) (print help) (print "Usage: " (car (argv)) " ... ")) (exit 0)) + ;; one-of args defined +(define (args:any-defined? . param) + (let ((res #f)) + (for-each + (lambda (arg) + (if (get-arg arg)(set! res #t))) + param) + res)) + +;; args: (define (get-args args params switches arg-hash num-needed) (let* ((numtargs (length args)) (adj-num-needed (if num-needed (+ num-needed 2) #f))) (if (< numtargs (if adj-num-needed adj-num-needed 2)) (if (>= num-needed 1) Index: server.scm ================================================================== --- server.scm +++ server.scm @@ -703,155 +703,5 @@ finalres) ) ;; end lambda )) do-a-sync)) -(define (server:writable-watchdog-bruteforce dbstruct) - (thread-sleep! 1) ;; delay for startup - (let* ((do-a-sync (server:get-bruteforce-syncer dbstruct)) - (final-sync (server:get-bruteforce-syncer dbstruct fork-to-background: #t persist-until-sync: #t))) - (when (and (not (args:get-arg "-sync-to-megatest.db")) ;; conditions under which we do not run the sync - (args:get-arg "-server")) - - (let loop () - (do-a-sync) - (if (not *time-to-exit*) (loop))) ;; keep going unless time to exit - - ;; time to exit, close the no-sync db here - (final-sync) - - (if (common:low-noise-print 30) - (debug:print-info 0 *default-log-port* "Exiting watchdog timer, *time-to-exit* = " *time-to-exit*" pid="(current-process-id) - ))))) - -(define (server:writable-watchdog-deltasync dbstruct) - (thread-sleep! 0.05) ;; delay for startup - (let ((legacy-sync (common:run-sync?)) - (sync-stale-seconds (configf:lookup-number *configdat* "server" "sync-stale-seconds" default: 300)) - (debug-mode (debug:debug-mode 1)) - (last-time (current-seconds)) - (no-sync-db (db:open-no-sync-db)) - (stmt-cache (dbr:dbstruct-stmt-cache dbstruct)) - (sync-duration 0) ;; run time of the sync in milliseconds - ) - (set! *no-sync-db* no-sync-db) ;; make the no sync db available to api calls - (debug:print-info 2 *default-log-port* "Periodic sync thread started.") - (debug:print-info 3 *default-log-port* "watchdog starting. legacy-sync is " legacy-sync" pid="(current-process-id) );; " this-wd-num="this-wd-num) - (if (and legacy-sync (not *time-to-exit*)) - (let* (;;(dbstruct (db:setup)) - (mtdb (dbr:dbstruct-mtdb dbstruct)) - (mtpath (db:dbdat-get-path mtdb)) - (tmp-area (common:get-db-tmp-area)) - (start-file (conc tmp-area "/.start-sync")) - (end-file (conc tmp-area "/.end-sync"))) - (debug:print-info 0 *default-log-port* "Server running, periodic sync started.") - (let loop () - ;; sync for filesystem local db writes - ;; - (mutex-lock! *db-multi-sync-mutex*) - (let* ((need-sync (>= *db-last-access* *db-last-sync*)) ;; no sync since last write - (sync-in-progress *db-sync-in-progress*) - (min-intersync-delay (configf:lookup-number *configdat* "server" "minimum-intersync-delay" default: 5)) - (should-sync (and (not *time-to-exit*) - (> (- (current-seconds) *db-last-sync*) min-intersync-delay))) ;; sync every five seconds minimum, deprecated logic, can probably be removed - (start-time (current-seconds)) - (cpu-load-adj (alist-ref 'adj-proc-load (common:get-normalized-cpu-load #f))) - (mt-mod-time (file-modification-time mtpath)) - (last-sync-start (if (common:file-exists? start-file) - (file-modification-time start-file) - 0)) - (last-sync-end (if (common:file-exists? end-file) - (file-modification-time end-file) - 10)) - (sync-period (+ 3 (* cpu-load-adj 30))) ;; as adjusted load increases increase the sync period - (recently-synced (and (< (- start-time mt-mod-time) sync-period) ;; not useful if sync didn't modify megatest.db! - (< mt-mod-time last-sync-start))) - (sync-done (<= last-sync-start last-sync-end)) - (sync-stale (> start-time (+ last-sync-start sync-stale-seconds))) - (will-sync (and (not *time-to-exit*) ;; do not start a sync if we are in the process of exiting - (or need-sync should-sync) - (or sync-done sync-stale) - (not sync-in-progress) - (not recently-synced)))) - (debug:print-info 13 *default-log-port* "WD writable-watchdog top of loop. need-sync="need-sync" sync-in-progress=" sync-in-progress - " should-sync="should-sync" start-time="start-time" mt-mod-time="mt-mod-time" recently-synced="recently-synced" will-sync="will-sync - " sync-done=" sync-done " sync-period=" sync-period) - (if (and (> sync-period 5) - (common:low-noise-print 30 "sync-period")) - (debug:print-info 0 *default-log-port* "Increased sync period due to long sync times, sync took: " sync-period " seconds.")) - ;; (if recently-synced (debug:print-info 0 *default-log-port* "Skipping sync due to recently-synced flag=" recently-synced)) - ;; (debug:print-info 0 *default-log-port* "need-sync: " need-sync " sync-in-progress: " sync-in-progress " should-sync: " should-sync " will-sync: " will-sync) - (if will-sync (set! *db-sync-in-progress* #t)) - (mutex-unlock! *db-multi-sync-mutex*) - (if will-sync - (let (;; (max-sync-duration (configf:lookup-number *configdat* "server" "max-sync-duration")) ;; KEEPING THIS AVAILABLE BUT SHOULD NOT USE, I'M PRETTY SURE IT DOES NOT WORK! - (sync-start (current-milliseconds))) - (with-output-to-file start-file (lambda ()(print (current-process-id)))) - - ;; put lock here - - ;; (if (or (not max-sync-duration) - ;; (< sync-duration max-sync-duration)) ;; NOTE: db:sync-to-megatest.db keeps track of time of last sync and syncs incrementally - (let ((res (db:sync-to-megatest.db dbstruct no-sync-db: no-sync-db))) ;; did we sync any data? If so need to set the db touched flag to keep the server alive - (set! sync-duration (- (current-milliseconds) sync-start)) - (if (> res 0) ;; some records were transferred, keep the db alive - (begin - (mutex-lock! *heartbeat-mutex*) - (set! *db-last-access* (current-seconds)) - (mutex-unlock! *heartbeat-mutex*) - (debug:print-info 0 *default-log-port* "sync called, " res " records transferred.")) - (debug:print-info 2 *default-log-port* "sync called but zero records transferred"))))) -;; ;; TODO: factor this next routine out into a function -;; (with-input-from-pipe ;; this should not block other threads but need to verify this -;; (conc "megatest -sync-to-megatest.db -m testsuite:" (common:get-area-name) ":" *toppath*) -;; (lambda () -;; (let loop ((inl (read-line)) -;; (res #f)) -;; (if (eof-object? inl) -;; (begin -;; (set! sync-duration (- (current-milliseconds) sync-start)) -;; (cond -;; ((not res) -;; (debug:print 0 *default-log-port* "ERROR: sync from /tmp db to megatest.db appears to have failed. Recommended that you stop your runs and run \"megatest -cleanup-db\"")) -;; ((> res 0) -;; (mutex-lock! *heartbeat-mutex*) -;; (set! *db-last-access* (current-seconds)) -;; (mutex-unlock! *heartbeat-mutex*)))) -;; (let ((num-synced (let ((matches (string-match "^Synced (\\d+).*$" inl))) -;; (if matches -;; (string->number (cadr matches)) -;; #f)))) -;; (loop (read-line) -;; (or num-synced res)))))))))) - (if will-sync - (begin - (mutex-lock! *db-multi-sync-mutex*) - (set! *db-sync-in-progress* #f) - (set! *db-last-sync* start-time) - (with-output-to-file end-file (lambda ()(print (current-process-id)))) - - ;; release lock here - - (mutex-unlock! *db-multi-sync-mutex*))) - (if (and debug-mode - (> (- start-time last-time) 60)) - (begin - (set! last-time start-time) - (debug:print-info 4 *default-log-port* "timestamp -> " (seconds->time-string (current-seconds)) ", time since start -> " (seconds->hr-min-sec (- (current-seconds) *time-zero*)))))) - - ;; keep going unless time to exit - ;; - (if (not *time-to-exit*) - (let delay-loop ((count 0)) - ;;(debug:print-info 13 *default-log-port* "delay-loop top; count="count" pid="(current-process-id)" this-wd-num="this-wd-num" *time-to-exit*="*time-to-exit*) - - (if (and (not *time-to-exit*) - (< count 6)) ;; was 11, changing to 4. - (begin - (thread-sleep! 1) - (delay-loop (+ count 1)))) - (if (not *time-to-exit*) (loop)))) - ;; time to exit, close the no-sync db here - (db:no-sync-close-db no-sync-db stmt-cache) - (if (common:low-noise-print 30) - (debug:print-info 0 *default-log-port* "Exiting watchdog timer, *time-to-exit* = " *time-to-exit*" pid="(current-process-id) ))))))) ;;" this-wd-num="this-wd-num))))))) -