Index: db.scm ================================================================== --- db.scm +++ db.scm @@ -299,10 +299,11 @@ (handle-exceptions exn (begin (debug:print 0 "WARNING: database files may not have been closed correctly. Consider running -cleanup-db") (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn)) + (debug:print 0 " db: " rundb) (print-call-chain) #f) (sqlite3:interrupt! rundb) (sqlite3:finalize! rundb #t)))) ;; (mutex-unlock! *db-sync-mutex*) Index: megatest.scm ================================================================== --- megatest.scm +++ megatest.scm @@ -8,11 +8,13 @@ ;; PURPOSE. ;; (include "common.scm") ;; (include "megatest-version.scm") -(use sqlite3 srfi-1 posix regex regex-case srfi-69 base64 format readline apropos json http-client directory-utils z3) ;; (srfi 18) extras) +(use sqlite3 srfi-1 posix regex regex-case srfi-69 base64 format readline apropos json + http-client directory-utils z3 srfi-18) ;; extras) + (import (prefix sqlite3 sqlite3:)) (import (prefix base64 base64:)) ;; (use zmq) @@ -278,10 +280,44 @@ "-v" ;; verbose 2, more than normal (normal is 1) "-q" ;; quiet 0, errors/warnings only ) args:arg-hash 0)) + +;; The watchdog is to keep an eye on things like db sync etc. +;; +(define *watchdog* + (make-thread + (lambda () + (let loop () + (thread-sleep! 5) ;; five second resolution is only a minor burden and should be tolerable + + ;; sync for filesystem local db writes + ;; + (let ((start-time (current-seconds))) + (mutex-lock! *db-multi-sync-mutex*) + (for-each + (lambda (run-id) + (let ((last-write (hash-table-ref/default *db-local-sync* run-id 0))) + (if ;; (and + (> (- start-time last-write) 5) ;; every five seconds + ;; (common:db-access-allowed?)) + (begin + (db:multi-db-sync (list run-id) 'new2old) + (if (common:low-noise-print 30 "sync new to old") + (debug:print-info 0 "Sync of newdb to olddb for run-id " run-id " completed in " (- (current-seconds) start-time) " seconds")) + (hash-table-delete! *db-local-sync* run-id))))) + (hash-table-keys *db-local-sync*)) + (mutex-unlock! *db-multi-sync-mutex*)) + + ;; keep going unless time to exit + ;; + (if (not *time-to-exit*) + (loop)))) + "Watchdog thread")) + +(thread-start! *watchdog*) (define (std-exit-procedure) (rmt:print-db-stats) (let ((run-ids (hash-table-keys *db-local-sync*))) (if (not (null? run-ids)) @@ -1353,10 +1389,13 @@ (if *runremote* (close-all-connections!)) (if (not *didsomething*) (debug:print 0 help)) +(set! *time-to-exit* #t) +(thread-join! *watchdog*) + (if (not (eq? *globalexitstatus* 0)) (if (or (args:get-arg "-runtests")(args:get-arg "-runall")) (begin (debug:print 0 "NOTE: Subprocesses with non-zero exit code detected: " *globalexitstatus*) (exit 0)) Index: rmt.scm ================================================================== --- rmt.scm +++ rmt.scm @@ -153,14 +153,14 @@ (loop (car tal)(cdr tal) newmax-cmd currmax))))))) (mutex-unlock! *db-stats-mutex*) res)) (define (rmt:open-qry-close-locally cmd run-id params) - (let* ((dbdir (conc (configf:lookup *configdat* "setup" "linktree") "/.db")) - (dbstruct-local (if *dbstruct-db* + (let* ((dbstruct-local (if *dbstruct-db* *dbstruct-db* - (let ((db (make-dbr:dbstruct path: dbdir local: #t))) + (let* ((dbdir (conc (configf:lookup *configdat* "setup" "linktree") "/.db")) + (db (make-dbr:dbstruct path: dbdir local: #t))) (set! *dbstruct-db* db) db))) (db-file-path (db:dbfile-path 0))) ;; (read-only (not (file-read-access? db-file-path))) (let* ((start (current-milliseconds)) @@ -169,20 +169,12 @@ (rmt:update-db-stats cmd params duration) ;; mark this run as dirty if this was a write (if (not (member cmd api:read-only-queries)) (let ((start-time (current-seconds))) (mutex-lock! *db-multi-sync-mutex*) - (let ((last-sync (hash-table-ref/default *db-local-sync* run-id 0))) - (if ;; (and - (> (- start-time last-sync) 5) ;; every five seconds - ;; (common:db-access-allowed?)) - (begin - ;; MOVE THIS TO A THREAD? - (db:multi-db-sync (list run-id) 'new2old) - (if (common:low-noise-print 30 "sync new to old") - (debug:print-info 0 "Sync of newdb to olddb for run-id " run-id " completed in " (- (current-seconds) start-time) " seconds")) - (hash-table-set! *db-local-sync* run-id start-time)))) + (if (not (hash-table-ref/default *db-local-sync* run-id #f)) + (hash-table-set! *db-local-sync* run-id start-time)) ;; the oldest "write" (mutex-unlock! *db-multi-sync-mutex*))) res))) (define (rmt:send-receive-no-auto-client-setup connection-info cmd run-id params) (let* ((run-id (if run-id run-id 0))