Megatest

Artifact [baab89656e]
Login

Artifact baab89656e93171ea0cfb5de1d6d501c1dc4bfce:


;;======================================================================
;; Copyright 2017, Matthew Welland.
;; 
;; This file is part of Megatest.
;; 
;;     Megatest is free software: you can redistribute it and/or modify
;;     it under the terms of the GNU General Public License as published by
;;     the Free Software Foundation, either version 3 of the License, or
;;     (at your option) any later version.
;; 
;;     Megatest is distributed in the hope that it will be useful,
;;     but WITHOUT ANY WARRANTY; without even the implied warranty of
;;     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
;;     GNU General Public License for more details.
;; 
;;     You should have received a copy of the GNU General Public License
;;     along with Megatest.  If not, see <http://www.gnu.org/licenses/>.

;;======================================================================

(declare (unit dbmod))
(declare (uses commonmod))

(module dbmod
	*
	
(import commonmod)

(import scheme chicken data-structures extras)
(import (prefix sqlite3 sqlite3:)
	posix typed-records srfi-18 srfi-69
	stack
	files
	srfi-13
	srfi-1
	)

;;======================================================================
;;  R E C O R D S
;;======================================================================

;; each db entry is a pair ( db . dbfilepath )
;; I propose this record evolves into the area record
;;
(defstruct dbr:dbstruct 
  (tmpdb       #f)
  (dbstack     #f) ;; stack for tmp db handles, do not initialize with a stack
  (mtdb        #f)
  (refndb      #f)
  (homehost    #f) ;; not used yet
  (on-homehost #f) ;; not used yet
  (read-only   #f)
  (stmt-cache  (make-hash-table))
  )                ;; goal is to converge on one struct for an area but for now it is too confusing
  

;; record for keeping state,status and count for doing roll-ups in
;; iterated tests
;;
(defstruct dbr:counts
  (state #f)
  (status #f)
  (count  0)) 

;;======================================================================
;; hash of hashs
;;======================================================================

(define (db:hoh-set! dat key1 key2 val)
  (let* ((subhash (hash-table-ref/default dat key1 #f)))
    (if subhash
	(hash-table-set! subhash key2 val)
	(begin
	  (hash-table-set! dat key1 (make-hash-table))
	  (db:hoh-set! dat key1 key2 val)))))

(define (db:hoh-get dat key1 key2)
  (let* ((subhash (hash-table-ref/default dat key1 #f)))
    (and subhash
	 (hash-table-ref/default subhash key2 #f))))

(define (db:get-cache-stmth dbstruct db stmt)
  (let* ((stmt-cache        (dbr:dbstruct-stmt-cache dbstruct))
	 (stmth             (db:hoh-get stmt-cache db stmt)))
    (or stmth
	(let* ((newstmth (sqlite3:prepare db stmt)))
	  (db:hoh-set! stmt-cache db stmt newstmth)
	  newstmth))))

;;======================================================================
;; SQLITE3 HELPERS
;;======================================================================

(define (db:general-sqlite-error-dump  exn stmt . params)
  (let ((err-status ((condition-property-accessor 'sqlite3 'status #f) exn))) ;; RADT ... how does this work?
    ;; check for (exn sqlite3) ((condition-property-accessor 'exn 'message) exn)
    ;; (print "err-status: " err-status)
    (debug:print-error 0 *default-log-port* " query " stmt " failed, params: " params ", error: " ((condition-property-accessor 'exn 'message) exn))
    (print-call-chain (current-error-port))))

;; convert to -inline 
;;
(define (db:first-result-default db stmt default . params)
  (handle-exceptions
   exn
   (let ((err-status ((condition-property-accessor 'sqlite3 'status #f) exn)))
     ;; check for (exn sqlite3) ((condition-property-accessor 'exn 'message) exn)
     (if (eq? err-status 'done)
	 default
	 (begin
	   (debug:print-error 0 *default-log-port* " query " stmt " failed, params: " params ", error: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn)
	   (print-call-chain (current-error-port))
	   default)))
   (apply sqlite3:first-result db stmt params)))

;; Open the classic megatest.db file (defaults to open in toppath)
;;
;;   NOTE: returns a dbdat not a dbstruct!
;;
(define (db:open-megatest-db #!key (path #f)(name #f))
  (let* ((dbdir        (or path *toppath*))
         (dbpath       (conc  dbdir "/" (or name "megatest.db")))
	 (dbexists     (file-exists? dbpath))
	 (db           (db:lock-create-open dbpath
					    (lambda (db)
                                              (db:initialize-main-db db)
					      ;;(db:initialize-run-id-db db)
					      )))
	 (write-access (file-write-access? dbpath)))
    (debug:print-info 13 *default-log-port* "db:open-megatest-db "dbpath)
    (if (and dbexists (not write-access))
	(set! *db-write-access* #f))
    (cons db dbpath)))

;; open an sql database inside a file lock
;; returns: db existed-prior-to-opening
;; RA => Returns a db handler; sets the lock if opened in writable mode
;;
;; (define *db-open-mutex* (make-mutex))

(define (db:lock-create-open fname initproc)
  (let* ((parent-dir   (or (pathname-directory fname)(current-directory))) ;; no parent? go local
         (raw-fname    (pathname-file fname))
	 (dir-writable (file-write-access? parent-dir))
	 (file-exists  (common:file-exists? fname))
	 (file-write   (if file-exists
			   (file-write-access? fname)
			   dir-writable )))
    ;; (mutex-lock! *db-open-mutex*) ;; tried this mutex, not clear it helped.
    (if file-write ;; dir-writable
	(condition-case
         (let* ((lockfname   (conc fname ".lock"))
                (readyfname  (conc parent-dir "/.ready-" raw-fname))
                (readyexists (common:file-exists? readyfname)))
           (if (not readyexists)
               (common:simple-file-lock-and-wait lockfname))
           (let ((db      (sqlite3:open-database fname)))
             (sqlite3:set-busy-handler! db (sqlite3:make-busy-timeout 136000))
             (sqlite3:execute db "PRAGMA synchronous = 0;")
             #;(if (and (configf:lookup *configdat* "setup" "tmp_mode") (string-match "^/tmp/.*" fname))
                 (begin
                   ;;(print "DEBUG: Setting tmp_mode for " fname) 
                   (sqlite3:execute db (configf:lookup *configdat* "setup" "tmp_mode"))
                   )
                 )  
             #;(if (and (configf:lookup *configdat* "setup" "nfs_mode") (not (string-match "^/tmp/.*" fname)))
                 (begin
                   ;;(print "DEBUG: Setting nfs_mode for " fname)
                   (sqlite3:execute db (configf:lookup *configdat* "setup" "nfs_mode"))
                   )
                 )  
             #;(if (and (not (or (configf:lookup *configdat* "setup" "tmp_mode") (configf:lookup *configdat* "setup" "nfs_mode")))  
                      (configf:lookup *configdat* "setup" "use-wal")
                      (string-match "^/tmp/.*" fname)) ;; this is a file in /tmp
                 (sqlite3:execute db "PRAGMA journal_mode=WAL;")
                 (debug:print 2 *default-log-port* "Creating " fname " in NON-WAL mode."))
             (if (not file-exists)
                 (initproc db))
             (if (not readyexists)
                 (begin
                   (common:simple-file-release-lock lockfname)
                   (with-output-to-file
                       readyfname
                     (lambda ()
                       (print "Ready at " 
                              (seconds->year-work-week/day-time 
                               (current-seconds)))))))
             db))
         (exn (io-error)  (debug:print 0 *default-log-port* "ERROR: i/o error with " fname ". Check permissions, disk space etc. and try again."))
         (exn (corrupt)   (debug:print 0 *default-log-port* "ERROR: database " fname " is corrupt. Repair it to proceed."))
         (exn (busy)      (debug:print 0 *default-log-port* "ERROR: database " fname " is locked. Try copying to another location, remove original and copy back."))
         (exn (permission)(debug:print 0 *default-log-port* "ERROR: database " fname " has some permissions problem."))
         (exn () (debug:print 0 *default-log-port* "ERROR: Unknown error with database " fname " message: " ((condition-property-accessor 'exn 'message) exn))))
        
	(condition-case
         (begin
           (debug:print 2 *default-log-port* "WARNING: opening db in non-writable dir " fname)
           (let ((db (sqlite3:open-database fname)))
             (sqlite3:set-busy-handler! db (sqlite3:make-busy-timeout 136000))
             (sqlite3:execute db "PRAGMA synchronous = 0;")
	     ;; (mutex-unlock! *db-open-mutex*)
             db))
         (exn (io-error)  (debug:print 0 *default-log-port* "ERROR: i/o error with " fname ". Check permissions, disk space etc. and try again."))
         (exn (corrupt)   (debug:print 0 *default-log-port* "ERROR: database " fname " is corrupt. Repair it to proceed."))
         (exn (busy)      (debug:print 0 *default-log-port* "ERROR: database " fname " is locked. Try copying to another location, remove original and copy back."))
         (exn (permission)(debug:print 0 *default-log-port* "ERROR: database " fname " has some permissions problem."))
         (exn () (debug:print 0 *default-log-port* "ERROR: Unknown error with database " fname " message: " ((condition-property-accessor 'exn 'message) exn))))
	)))

(define (db:initialize-main-db dbdat)
  (assert *db-keys* "Attempt to initialize main db before configs are properly processed")
  ;; (if (not *configinfo*)(launch:setup)) ;; added because Elena was getting stack dump because *configinfo* below was #f.
  (let* ((keys     *db-keys*)
	 (havekeys (> (length keys) 0))
	 (keystr   (keys->keystr keys))
	 (fieldstr (keys:make-key/field-string keys)) ;;; configdat))
	 (db       (db:dbdat-get-db dbdat)))
    (for-each (lambda (key)
		(let ((keyn key))
		  (if (member (string-downcase keyn)
			      (list "runname" "state" "status" "owner" "event_time" "comment" "fail_count"
				    "pass_count" "contour"))
		      (begin
			(print "ERROR: your key cannot be named " keyn " as this conflicts with the same named field in the runs table, you must remove your megatest.db and <linktree>/.db before trying again.")
			(exit 1)))))
	      keys)
    (sqlite3:with-transaction
     db
     (lambda ()
      ;; handle-exceptions
      ;; exn
      ;; (begin
      ;;   (debug:print 0 "ERROR: Failed to create tables. Look at your [fields] section, should be: fieldname TEXT DEFAULT 'yourdefault'")
      ;;   (exit))
	(sqlite3:execute db "CREATE TABLE IF NOT EXISTS keys (id INTEGER PRIMARY KEY, fieldname TEXT, fieldtype TEXT, CONSTRAINT keyconstraint UNIQUE (fieldname));")
	(for-each (lambda (key)
		    (sqlite3:execute db "INSERT OR REPLACE INTO keys (fieldname,fieldtype) VALUES (?,?);" key "TEXT"))
		  keys)
	(sqlite3:execute db (conc 
			     "CREATE TABLE IF NOT EXISTS runs (id INTEGER PRIMARY KEY, \n			 " 
			     fieldstr (if havekeys "," "") "
			 runname    TEXT DEFAULT 'norun',
                         contour    TEXT DEFAULT '',
			 state      TEXT DEFAULT '',
			 status     TEXT DEFAULT '',
			 owner      TEXT DEFAULT '',
			 event_time TIMESTAMP DEFAULT (strftime('%s','now')),
			 comment    TEXT DEFAULT '',
			 fail_count INTEGER DEFAULT 0,
			 pass_count INTEGER DEFAULT 0,
                         last_update INTEGER DEFAULT (strftime('%s','now')),
			 CONSTRAINT runsconstraint UNIQUE (runname" (if havekeys "," "") keystr "));"))
        ;; All triggers created at once in end
	;;(sqlite3:execute db "CREATE TRIGGER IF NOT EXISTS update_runs_trigger AFTER UPDATE ON runs
        ;;                    FOR EACH ROW
        ;;                       BEGIN 
        ;;                         UPDATE runs SET last_update=(strftime('%s','now'))
        ;;                           WHERE id=old.id;
        ;;                       END;")
	(sqlite3:execute db "CREATE TABLE IF NOT EXISTS run_stats (
                              id     INTEGER PRIMARY KEY,
                              run_id INTEGER,
                              state  TEXT,
                              status TEXT,
                              count  INTEGER,
                              last_update INTEGER DEFAULT (strftime('%s','now')))")
         ;; All triggers created at once in end
         ;; (sqlite3:execute db "CREATE TRIGGER  IF NOT EXISTS update_run_stats_trigger AFTER UPDATE ON run_stats
         ;;                   FOR EACH ROW
         ;;                     BEGIN 
         ;;                       UPDATE run_stats SET last_update=(strftime('%s','now'))
         ;;                         WHERE id=old.id;
         ;;                      END;")
	(sqlite3:execute db "CREATE TABLE IF NOT EXISTS test_meta (
                                     id          INTEGER PRIMARY KEY,
                                     testname    TEXT DEFAULT '',
                                     author      TEXT DEFAULT '',
                                     owner       TEXT DEFAULT '',
                                     description TEXT DEFAULT '',
                                     reviewed    TIMESTAMP,
                                     iterated    TEXT DEFAULT '',
                                     avg_runtime REAL,
                                     avg_disk    REAL,
                                     tags        TEXT DEFAULT '',
                                     jobgroup    TEXT DEFAULT 'default',
                                CONSTRAINT test_meta_constraint UNIQUE (testname));")
	(sqlite3:execute db "CREATE TABLE IF NOT EXISTS tasks_queue (id INTEGER PRIMARY KEY,
                                action TEXT DEFAULT '',
                                owner TEXT,
                                state TEXT DEFAULT 'new',
                                target TEXT DEFAULT '',
                                name TEXT DEFAULT '',
                                testpatt TEXT DEFAULT '',
                                keylock TEXT,
                                params TEXT,
                                creation_time TIMESTAMP DEFAULT (strftime('%s','now')),
                                execution_time TIMESTAMP);")
	;; archive disk areas, cached info from [archive-disks]
	(sqlite3:execute db "CREATE TABLE IF NOT EXISTS archive_disks (
                                id INTEGER PRIMARY KEY,
                                archive_area_name TEXT,
                                disk_path TEXT,
                                last_df INTEGER DEFAULT -1,
                                last_df_time TIMESTAMP DEFAULT (strftime('%s','now')),
                                creation_time TIMESTAMP DEFAULT (strftime('%s','now')));")
	;; individual bup (or tar) data chunks
	(sqlite3:execute db "CREATE TABLE IF NOT EXISTS archive_blocks (
                                id INTEGER PRIMARY KEY,
                                archive_disk_id INTEGER,
                                disk_path TEXT,
                                last_du INTEGER DEFAULT -1,
                                last_du_time TIMESTAMP DEFAULT (strftime('%s','now')),
                                creation_time TIMESTAMP DEFAULT (strftime('%s','now')));")
	;; tests allocated to what chunks. reusing a chunk for a test/item_path is very efficient
	;; NB// the per run/test recording of where the archive is stored is done in the test
	;;      record. 
	(sqlite3:execute db "CREATE TABLE IF NOT EXISTS archive_allocations (
                                id INTEGER PRIMARY KEY,
                                archive_block_id INTEGER,
                                testname TEXT,
                                item_path TEXT,
                                creation_time TIMESTAMP DEFAULT (strftime('%s','now')));")
	;; move this clean up call somewhere else
	(sqlite3:execute db "DELETE FROM tasks_queue WHERE state='done' AND creation_time < ?;" (- (current-seconds)(* 24 60 60))) ;; remove older than 24 hrs
	(sqlite3:execute db (conc "CREATE INDEX IF NOT EXISTS runs_index ON runs (runname" (if havekeys "," "") keystr ");"))
	;; (sqlite3:execute db "CREATE VIEW runs_tests AS SELECT * FROM runs INNER JOIN tests ON runs.id=tests.run_id;")
	(sqlite3:execute db "CREATE TABLE IF NOT EXISTS extradat (id INTEGER PRIMARY KEY, run_id INTEGER, key TEXT, val TEXT);")
	(sqlite3:execute db "CREATE TABLE IF NOT EXISTS metadat (id INTEGER PRIMARY KEY, var TEXT, val TEXT,
                                  CONSTRAINT metadat_constraint UNIQUE (var));")
	(sqlite3:execute db "CREATE TABLE IF NOT EXISTS access_log (id INTEGER PRIMARY KEY, user TEXT, accessed TIMESTAMP, args TEXT);")
	;; Must do this *after* running patch db !! No more. 
	;; cannot use db:set-var since it will deadlock, hardwire the code here
	(sqlite3:execute db "INSERT OR REPLACE INTO metadat (var,val) VALUES (?,?);" "MEGATEST_VERSION" (common:version-signature))
	(debug:print-info 11 *default-log-port* "db:initialize END") ;; ))))

	;;======================================================================
	;; R U N   S P E C I F I C   D B 
	;;======================================================================
	
	;; (define (db:initialize-run-id-db db)
	;;   (sqlite3:with-transaction 
	;;    db
	;;    (lambda ()
	(sqlite3:execute db "CREATE TABLE IF NOT EXISTS tests 
                    (id INTEGER PRIMARY KEY,
                     run_id       INTEGER   DEFAULT -1,
                     testname     TEXT      DEFAULT 'noname',
                     host         TEXT      DEFAULT 'n/a',
                     cpuload      REAL      DEFAULT -1,
                     diskfree     INTEGER   DEFAULT -1,
                     uname        TEXT      DEFAULT 'n/a', 
                     rundir       TEXT      DEFAULT '/tmp/badname',
                     shortdir     TEXT      DEFAULT '/tmp/badname',
                     item_path    TEXT      DEFAULT '',
                     state        TEXT      DEFAULT 'NOT_STARTED',
                     status       TEXT      DEFAULT 'FAIL',
                     attemptnum   INTEGER   DEFAULT 0,
                     final_logf   TEXT      DEFAULT 'logs/final.log',
                     logdat       TEXT      DEFAULT '', 
                     run_duration INTEGER   DEFAULT 0,
                     comment      TEXT      DEFAULT '',
                     event_time   TIMESTAMP DEFAULT (strftime('%s','now')),
                     fail_count   INTEGER   DEFAULT 0,
                     pass_count   INTEGER   DEFAULT 0,
                     archived     INTEGER   DEFAULT 0, -- 0=no, > 1=archive block id where test data can be found
                     last_update  INTEGER DEFAULT (strftime('%s','now')),
                        CONSTRAINT testsconstraint UNIQUE (run_id, testname, item_path));")
	;; deprecated -- (sqlite3:execute db "CREATE INDEX IF NOT EXISTS tests_index ON tests (run_id, testname, item_path, uname);")
        
        (sqlite3:execute db "CREATE INDEX IF NOT EXISTS tests_run_id_index ON tests (run_id);")  ;; new
        (sqlite3:execute db "CREATE INDEX IF NOT EXISTS tests_testname_index ON tests (testname,item_path);") ;; new
        (sqlite3:execute db "CREATE INDEX IF NOT EXISTS tests_state_status_index ON tests (state, status); ") ;; new

         ;; All triggers created at once in end
	;;(sqlite3:execute db "CREATE TRIGGER  IF NOT EXISTS update_tests_trigger AFTER UPDATE ON tests
        ;;                     FOR EACH ROW
        ;;                       BEGIN 
        ;;                         UPDATE tests SET last_update=(strftime('%s','now'))
        ;;                           WHERE id=old.id;
        ;;                       END;")
	(sqlite3:execute db "CREATE TABLE IF NOT EXISTS test_steps 
                              (id INTEGER PRIMARY KEY,
                               test_id INTEGER, 
                               stepname TEXT, 
                               state TEXT DEFAULT 'NOT_STARTED', 
                               status TEXT DEFAULT 'n/a',
                               event_time TIMESTAMP,
                               comment TEXT DEFAULT '',
                               logfile TEXT DEFAULT '',
                               last_update  INTEGER DEFAULT (strftime('%s','now')),
                               CONSTRAINT test_steps_constraint UNIQUE (test_id,stepname,state));")
	(sqlite3:execute db "CREATE INDEX IF NOT EXISTS teststeps_index ON tests (run_id, testname, item_path);")
        ;; All triggers created at once in end
	;;(sqlite3:execute db "CREATE TRIGGER  IF NOT EXISTS update_teststeps_trigger AFTER UPDATE ON test_steps
        ;;                     FOR EACH ROW
        ;;                       BEGIN 
        ;;                         UPDATE test_steps SET last_update=(strftime('%s','now'))
        ;;                           WHERE id=old.id;
        ;;                       END;")
	(sqlite3:execute db "CREATE TABLE IF NOT EXISTS test_data (id INTEGER PRIMARY KEY,
                                test_id INTEGER,
                                category TEXT DEFAULT '',
                                variable TEXT,
	                        value REAL,
	                        expected REAL,
	                        tol REAL,
                                units TEXT,
                                comment TEXT DEFAULT '',
                                status TEXT DEFAULT 'n/a',
                                type TEXT DEFAULT '',
                                last_update  INTEGER DEFAULT (strftime('%s','now')),
                              CONSTRAINT test_data_constraint UNIQUE (test_id,category,variable));")
	(sqlite3:execute db "CREATE INDEX IF NOT EXISTS test_data_index ON test_data (test_id);")
        ;; All triggers created at once in end
	;;(sqlite3:execute db "CREATE TRIGGER  IF NOT EXISTS update_test_data_trigger AFTER UPDATE ON test_data
        ;;                     FOR EACH ROW
        ;;                       BEGIN 
        ;;                         UPDATE test_data SET last_update=(strftime('%s','now'))
        ;;                           WHERE id=old.id;
        ;;                       END;")
	(sqlite3:execute db "CREATE TABLE IF NOT EXISTS test_rundat (
                              id           INTEGER PRIMARY KEY,
                              test_id      INTEGER,
                              update_time  TIMESTAMP,
                              cpuload      INTEGER DEFAULT -1,
                              diskfree     INTEGER DEFAULT -1,
                              diskusage    INTGER DEFAULT -1,
                              run_duration INTEGER DEFAULT 0);")
	(sqlite3:execute db "CREATE TABLE IF NOT EXISTS archives (
                              id           INTEGER PRIMARY KEY,
                              test_id      INTEGER,
                              state        TEXT DEFAULT 'new',
                              status       TEXT DEFAULT 'n/a',
                              archive_type TEXT DEFAULT 'bup',
                              du           INTEGER,
                              archive_path TEXT);")))
         ;; (print "creating trigges from init") 
        (db:create-triggers db)    
     db)) ;; )

(define (db:create-all-triggers dbstruct)
(db:with-db
   dbstruct #f #f
   (lambda (db)
(db:create-triggers db))))

(define (db:create-triggers db)
    (for-each (lambda (key)
              (sqlite3:execute db (cadr key)))
          db:trigger-list))

(define (db:drop-all-triggers dbstruct)
  (db:with-db
   dbstruct #f #f
   (lambda (db)
     (db:drop-triggers db))))

(define (db:is-trigger-dropped db tbl-name)
  (let* ((trigger-name (if (equal? tbl-name "test_steps")
			   "update_teststeps_trigger" 
                           (conc "update_" tbl-name "_trigger")))
	 (res          #f))
    (sqlite3:for-each-row
     (lambda (name)
       (if (equal? name trigger-name)
	   (set! res #t)))
     db 
     "SELECT name FROM sqlite_master WHERE type = 'trigger' ;" 
     )))

(define (db:drop-triggers db)
  (for-each
   (lambda (key) 
     (sqlite3:execute db (conc "drop trigger if exists " (car key))))
   db:trigger-list))

(define  (db:drop-trigger db tbl-name)
  (let* ((trigger-name (if (equal? tbl-name "test_steps")
			   "update_teststeps_trigger" 
                           (conc "update_" tbl-name "_trigger"))))
    (for-each
     (lambda (key) 
       (if (equal? (car key) trigger-name)
           (sqlite3:execute db (conc "drop trigger if exists " trigger-name))))
     db:trigger-list)))

(define  (db:create-trigger db tbl-name)
      (let* ((trigger-name (if (equal? tbl-name "test_steps")
                              "update_teststeps_trigger" 
                              (conc "update_" tbl-name "_trigger"))))
       (for-each (lambda (key) 
             (if (equal? (car key) trigger-name)
             (sqlite3:execute db (cadr key))))
      db:trigger-list))) 

;; Get/open a database
;;    if run-id => get run specific db
;;    if #f     => get main db
;;    if db already open - return inmem
;;    if db not open, open inmem, rundb and sync then return inmem
;;    inuse gets set automatically for rundb's
;;
(define (db:get-db dbstruct) ;;  run-id)
  (assert *db-cache-path* "ERROR: Attempt to call db:get-db before /tmp area is set up")
  (if (stack? (dbr:dbstruct-dbstack dbstruct))
      (if (stack-empty? (dbr:dbstruct-dbstack dbstruct))
          (let ((newdb (db:open-megatest-db path: *db-cache-path*))) ;; (common:get-db-tmp-area)))) ;; db:dbfile-path))))
            ;; (stack-push! (dbr:dbstruct-dbstack dbstruct) newdb)
            newdb)
          (stack-pop! (dbr:dbstruct-dbstack dbstruct)))
      (db:open-db dbstruct)))

;; ;; legacy handling of structure for managing db's. Refactor this into dbr:?
(define (db:dbdat-get-db dbdat)
  (if (pair? dbdat)
      (car dbdat)
      dbdat))

(define (db:dbdat-get-path dbdat)
  (if (pair? dbdat)
      (cdr dbdat)
      #f))

(define-inline (db:generic-error-printout exn . message)
  (print-call-chain (current-error-port))
  (apply debug:print-error 0 *default-log-port* message)
  (debug:print-error 0 *default-log-port* "   params: " params
		     ", error: "     ((condition-property-accessor 'exn 'message)   exn)
		     ", arguments: " ((condition-property-accessor 'exn 'arguments) exn)
		     ", location: "  ((condition-property-accessor 'exn 'location)  exn)
		     ))

;; (db:with-db dbstruct run-id sqlite3:exec "select blah fgrom blaz;")
;; r/w is a flag to indicate if the db is modified by this query #t = yes, #f = no
;;
(define (db:with-db dbstruct run-id r/w proc . params)
  (let* ((have-struct (dbr:dbstruct? dbstruct))
         (dbdat     (if have-struct 
			(db:get-db dbstruct)
			#f))
	 (db        (if have-struct
			(db:dbdat-get-db dbdat)
			dbstruct))
	 (fname     (db:dbdat-get-path dbdat))
	 (use-mutex (> *api-process-request-count* 25))) ;; was 25
    (if (and use-mutex
	     (common:low-noise-print 120 "over-50-parallel-api-requests"))
	(debug:print-info 0 *default-log-port* *api-process-request-count* " parallel api requests being processed in process " (current-process-id) ", throttling access"))
    (if (common:low-noise-print 600 (conc "parallel-api-requests" *max-api-process-requests*))
	(debug:print-info 2 *default-log-port* "Parallel api request count: " *api-process-request-count* " max parallel requests: " *max-api-process-requests*))
    (condition-case
     (begin
       (if use-mutex (mutex-lock! *db-with-db-mutex*))
       (let ((res (apply proc db params)))
	 (if use-mutex (mutex-unlock! *db-with-db-mutex*))
	 ;; (if (vector? dbstruct)(db:done-with dbstruct run-id r/w))
	 (if dbdat (stack-push! (dbr:dbstruct-dbstack dbstruct) dbdat))
	 res))
     (exn (io-error)
	  (db:generic-error-printout exn "ERROR: i/o error with " fname ". Check permissions, disk space etc. and try again."))
     (exn (corrupt)
	  (db:generic-error-printout exn "ERROR: database " fname " is corrupt. Repair it to proceed."))
     (exn (busy)
	  (db:generic-error-printout exn "ERROR: database " fname
				     " is locked. Try copying to another location, remove original and copy back."))
     (exn (permission)(db:generic-error-printout exn "ERROR: database " fname " has some permissions problem."))
     (exn ()
	  (db:generic-error-printout exn "ERROR: Unknown error with database " fname " message: "
		       ((condition-property-accessor 'exn 'message) exn))))))
      
;; This routine creates the db if not already present. It is only called if the db is not already opened
;;
(define (db:open-db dbstruct #!key (areapath #f)(do-sync #t)) ;; TODO: actually use areapath
  (assert *db-cache-path* "ERROR: db:open-db called before /tmp area has been setup")
  (let ((tmpdb-stack (dbr:dbstruct-dbstack dbstruct))) ;; RA => Returns the first reference in dbstruct
    (if (stack? tmpdb-stack)
	(db:get-db tmpdb-stack) ;; get previously opened db (will create new db handle if all in the stack are already used
        (let* ((max-stale-tmp 10) ;; (configf:lookup-number *configdat* "server" "filling-db-max-stale-seconds" default: 10))
               (dbpath       *db-cache-path*) ;;; (db:dbfile-path ))      ;; path to tmp db area
               (dbexists     (common:file-exists? dbpath))
	       (tmpdbfname   (conc dbpath "/megatest.db"))
	       (dbfexists    (common:file-exists? tmpdbfname))  ;; (conc dbpath "/megatest.db")))
               (mtdbexists   (common:file-exists? (conc *toppath* "/megatest.db")))
							 
               (mtdbmodtime  (if mtdbexists (common:lazy-sqlite-db-modification-time (conc *toppath* "/megatest.db"))   #f))
	        		 (tmpdbmodtime (if dbfexists  (common:lazy-sqlite-db-modification-time tmpdbfname) #f)) 
               (mtdb         (db:open-megatest-db))
               (mtdbpath     (db:dbdat-get-path mtdb))
               (tmpdb        (db:open-megatest-db path: dbpath)) ;; lock-create-open dbpath db:initialize-main-db))
               (refndb       (db:open-megatest-db path: dbpath name: "megatest_ref.db"))
               (write-access (file-write-access? mtdbpath))
	       ;(mtdbmodtime  (if mtdbexists (common:lazy-sqlite-db-modification-time mtdbpath)   #f)) ; moving this before db:open-megatest-db is called. if wal mode is on -WAL and -shm file get created with causing the  tmpdbmodtime timestamp always greater than mtdbmodtime
	       ;(tmpdbmodtime (if dbfexists  (common:lazy-sqlite-db-modification-time tmpdbfname) #f)) 
					;if wal mode is on -WAL and -shm file get created when db:open-megatest-db is called. modtimedelta will always be < 10 so db in tmp not get synced
          ;(tmpdbmodtime (if dbfexists (db:get-last-update-time (car tmpdb)) #f))    
          ;(fmt (file-modification-time tmpdbfname))
	       (modtimedelta (and mtdbmodtime tmpdbmodtime (- mtdbmodtime tmpdbmodtime))))

	  (assert (sqlite3:database? (car mtdb)) "ERROR: in db:open-db and mtdb does not contain database!")
          (when write-access
            (sqlite3:execute (car mtdb) "drop trigger if exists update_tests_trigger")
            (sqlite3:execute (car mtdb) "drop trigger if exists update_runs_trigger"))
          
         ;(print "mtdbmodtime " mtdbmodtime " tmpdbmodtime " tmpdbmodtime " mtdbpath " mtdbpath " " (conc *toppath* "/megatest.db"))
	        ;;(debug:print-info 13 *default-log-port* "db:open-db>> mtdbpath="mtdbpath" mtdbexists="mtdbexists" and write-access="write-access)
          (if (and dbexists (not write-access))
              (begin
                (set! *db-write-access* #f)
                (dbr:dbstruct-read-only-set! dbstruct #t)))
          (dbr:dbstruct-mtdb-set!   dbstruct mtdb)
          (dbr:dbstruct-tmpdb-set!  dbstruct tmpdb)
          (dbr:dbstruct-dbstack-set! dbstruct (make-stack)) ;; BB: why a stack?  Why would the number of db's be indeterminate?  Is this a legacy of 1.db 2.db .. ?
          (stack-push! (dbr:dbstruct-dbstack dbstruct) tmpdb) ;; olddb is already a (cons db path)
          (dbr:dbstruct-refndb-set! dbstruct refndb)
          (if (and  (or (not dbfexists)
			(and modtimedelta
			     (> modtimedelta max-stale-tmp))) ;; if db in tmp is over ten seconds older than the file in MTRA then do a sync back
		    do-sync)
	      (begin
		(debug:print 1 *default-log-port* "filling db " (db:dbdat-get-path tmpdb) " with data \n    from " (db:dbdat-get-path mtdb) " mod time delta: " modtimedelta)
		(db:sync-tables (db:sync-all-tables-list dbstruct) #f mtdb refndb tmpdb)
    ;touch tmp db to avoid wal mode wierdness  
     (set! (file-modification-time tmpdbfname) (current-seconds))  
                (debug:print-info 13 *default-log-port* "db:sync-all-tables-list done.")
                )
	      (debug:print 4 *default-log-port* " db, " (db:dbdat-get-path tmpdb) " already exists or fresh enough, not propogating data from\n     " (db:dbdat-get-path mtdb) " mod time delta: " modtimedelta) )
	  ;; (db:multi-db-sync dbstruct 'old2new))  ;; migrate data from megatest.db automatically
          tmpdb))))

;; just tests, test_steps and test_data tables
(define db:sync-tests-only
  (list
   ;; (list "strs"
   ;;       '("id"             #f)
   ;;       '("str"            #f))
   (list "tests" 
	 '("id"             #f)
	 '("run_id"         #f)
	 '("testname"       #f)
	 '("host"           #f)
	 '("cpuload"        #f)
	 '("diskfree"       #f)
	 '("uname"          #f)
	 '("rundir"         #f)
	 '("shortdir"       #f)
	 '("item_path"      #f)
	 '("state"          #f)
	 '("status"         #f)
	 '("attemptnum"     #f)
	 '("final_logf"     #f)
	 '("logdat"         #f)
	 '("run_duration"   #f)
	 '("comment"        #f)
	 '("event_time"     #f)
	 '("fail_count"     #f)
	 '("pass_count"     #f)
	 '("archived"       #f)
         '("last_update"    #f))
  (list "test_steps"
	 '("id"             #f)
	 '("test_id"        #f)
	 '("stepname"       #f)
	 '("state"          #f)
	 '("status"         #f)
	 '("event_time"     #f)
	 '("comment"        #f)
	 '("logfile"        #f)
         '("last_update"    #f))
   (list "test_data"
	 '("id"             #f)
	 '("test_id"        #f)
	 '("category"       #f)
	 '("variable"       #f)
	 '("value"          #f)
	 '("expected"       #f)
	 '("tol"            #f)
	 '("units"          #f)
	 '("comment"        #f)
	 '("status"         #f)
	 '("type"           #f)
         '("last_update"    #f))))

;; needs db to get keys, this is for syncing all tables
;;
(define (db:sync-main-list dbstruct)
  (assert *db-keys* "ERROR: db:sync-main-list called before *db-keys* has been setup")
  (let ((keys  *db-keys*)) ;; (db:get-keys dbstruct)))
    (list
     (list "keys"
	   '("id"        #f)
	   '("fieldname" #f)
	   '("fieldtype" #f))
     (list "metadat" '("var" #f) '("val" #f))
     (append (list "runs" 
		   '("id"  #f))
	     (map (lambda (k)(list k #f))
		  (append keys
			  (list "runname" "state" "status" "owner" "event_time" "comment" "fail_count" "pass_count" "contour" "last_update"))))
     (list "archive_disks"
           '("id" #f)
           '("archive_area_name" #f) 
           '("disk_path" #f)
           '("last_df" #f)
           '("last_df_time" #f)
           '("creation_time" #f)) 

     (list "archive_blocks"
           '("id" #f)
           '("archive_disk_id" #f) 
           '("disk_path" #f)
           '("last_du" #f)
           '("last_du_time" #f)
           '("creation_time" #f)) 

     (list "test_meta"
	   '("id"             #f)
	   '("testname"       #f)
	   '("owner"          #f)
	   '("description"    #f)
	   '("reviewed"       #f)
	   '("iterated"       #f)
	   '("avg_runtime"    #f)
	   '("avg_disk"       #f)
	   '("tags"           #f)
	   '("jobgroup"       #f)))))

(define (db:sync-all-tables-list dbstruct)
  (append (db:sync-main-list dbstruct)
	  db:sync-tests-only))

;; tbls is ( ("tablename" ( "field1" [#f|proc1] ) ( "field2" [#f|proc2] ) .... ) )
;; db's are dbdat's
;;
;; if last-update specified ("field-name" . time-in-seconds)
;;    then sync only records where field-name >= time-in-seconds
;;    IFF field-name exists
;;
(define (db:sync-tables tbls last-update fromdb todb . slave-dbs)
  (handle-exceptions
   exn
   (begin
     (debug:print 0 *default-log-port* "EXCEPTION: database probably overloaded or unreadable in db:sync-tables.")
     (print-call-chain (current-error-port))
     (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
     (debug:print 5 *default-log-port* "exn=" (condition->list exn))
     (debug:print 0 *default-log-port* " status:  " ((condition-property-accessor 'sqlite3 'status) exn))
     (debug:print 0 *default-log-port* " src db:  " (db:dbdat-get-path fromdb))
     (for-each (lambda (dbdat)
		 (let ((dbpath (db:dbdat-get-path dbdat)))
		   (debug:print 0 *default-log-port* " dbpath:  " dbpath)
		   (if (not (db:repair-db dbdat))
		       (begin
			 (debug:print-error 0 *default-log-port* "Failed to rebuild " dbpath ", exiting now.")
			 (exit)))))
	       (cons todb slave-dbs))
     
     0)
   ;; this is the work to be done
   (cond
    ((not fromdb) (debug:print 3 *default-log-port* "WARNING: db:sync-tables called with fromdb missing")
     -1)
    ((not todb)   (debug:print 3 *default-log-port* "WARNING: db:sync-tables called with todb missing")
     -2)
    ((not (sqlite3:database? (db:dbdat-get-db fromdb)))
     (debug:print-error 0 *default-log-port* "db:sync-tables called with fromdb not a database " fromdb)
     -3)
    ((not (sqlite3:database? (db:dbdat-get-db todb)))
     (debug:print-error 0 *default-log-port* "db:sync-tables called with todb not a database " todb)
     -4)

    ((not (file-write-access? (db:dbdat-get-path todb)))
     (debug:print-error 0 *default-log-port* "db:sync-tables called with todb not a read-only database " todb)
     -5)
    ((not (null? (let ((readonly-slave-dbs
                        (filter
                         (lambda (dbdat)
                           (not (file-write-access? (db:dbdat-get-path todb))))
                         slave-dbs)))
                   (for-each
                    (lambda (bad-dbdat)
                      (debug:print-error
                       0 *default-log-port* "db:sync-tables called with todb not a read-only database " bad-dbdat))
                    readonly-slave-dbs)
                   readonly-slave-dbs))) -6)
    (else
     (let ((stmts       (make-hash-table)) ;; table-field => stmt
	   (all-stmts   '())              ;; ( ( stmt1 value1 ) ( stml2 value2 ))
	   (numrecs     (make-hash-table))
	   (start-time  (current-milliseconds))
	   (tot-count   0))
       (for-each ;; table
	(lambda (tabledat)
	  (let* ((tablename        (car tabledat))
		 (fields           (cdr tabledat))
		 (has-last-update  (member "last_update" fields))
		 (use-last-update  (cond
				    ((and has-last-update
					  (member "last_update" fields))
				     #t) ;; if given a number, just use it for all fields
				    ((number? last-update) #f) ;; if not matched first entry then ignore last-update for this table
				    ((and (pair? last-update)
					  (member (car last-update)    ;; last-update field name
						  (map car fields)))
                                        #t)
				    (last-update
				     (debug:print 0 *default-log-port* "ERROR: parameter last-update for db:sync-tables must be a pair or a number, received: " last-update);; found in fields
				     #f)
				    (else
				     #f)))
		 (last-update-value (if use-last-update ;; no need to check for has-last-update - it is already accounted for
					(if (number? last-update)
					    last-update
					    (cdr last-update))
					#f))
		 (last-update-field (if use-last-update
					(if (number? last-update)
					    "last_update"
					    (car last-update))
					#f))
		 (num-fields (length fields))
		 (field->num (make-hash-table))
		 (num->field (apply vector (map car fields))) ;; BBHERE
		 (full-sel   (conc "SELECT " (string-intersperse (map car fields) ",") 
				   " FROM " tablename (if use-last-update ;; apply last-update criteria
							  (conc " WHERE " last-update-field " >= " last-update-value)
							  "")
				   ";"))
		 (full-ins   (conc "INSERT OR REPLACE INTO " tablename " ( " (string-intersperse (map car fields) ",") " ) "
				   " VALUES ( " (string-intersperse (make-list num-fields "?") ",") " );"))
		 (fromdat    '())
		 (fromdats   '())
		 (totrecords 0)
		 (batch-len  100) ;; (string->number (or (configf:lookup *configdat* "sync" "batchsize") "100")))
		 (todat      (make-hash-table))
		 (count      0)
                 (field-names (map car fields))
                 (delay-handicap 0) ;; (string->number (or (configf:lookup *configdat* "sync" "delay-handicap") "0")))
                 )

	    ;; set up the field->num table
	    (for-each
	     (lambda (field)
	       (hash-table-set! field->num field count)
	       (set! count (+ count 1)))
	     fields)

	    ;; read the source table
	    (sqlite3:for-each-row
	     (lambda (a . b)
	       (set! fromdat (cons (apply vector a b) fromdat))
	       (if (> (length fromdat) batch-len)
		   (begin
		     (set! fromdats (cons fromdat fromdats))
		     (set! fromdat  '())
		     (set! totrecords (+ totrecords 1)))))
	     (db:dbdat-get-db fromdb)
	     full-sel)
	    
	    ;; tack on remaining records in fromdat
	    (if (not (null? fromdat))
		(set! fromdats (cons fromdat fromdats)))

	    (if (common:low-noise-print 120 "sync-records")
		(debug:print-info 4 *default-log-port* "found " totrecords " records to sync"))

	    ;; read the target table; BBHERE
	    (sqlite3:for-each-row
	     (lambda (a . b)
	       (hash-table-set! todat a (apply vector a b)))
	     (db:dbdat-get-db todb)
	     full-sel)

            (when (and delay-handicap (> delay-handicap 0))
              (debug:print-info 0 *default-log-port* "imposing synthetic sync delay of "delay-handicap" seconds since sync/delay-handicap is configured")
              (thread-sleep! delay-handicap)
              (debug:print-info 0 *default-log-port* "synthetic sync delay of "delay-handicap" seconds completed")
              )
            
	    ;; first pass implementation, just insert all changed rows
	    (for-each 
	     (lambda (targdb)
	       (let* ((db                 (db:dbdat-get-db targdb))
                      (drp-trigger        (if (member "last_update" field-names)
					      (db:drop-trigger db tablename) 
					      #f))
                      (is-trigger-dropped (if (member "last_update" field-names)
                                              (db:is-trigger-dropped db tablename)
					      #f)) 
		      (stmth  (sqlite3:prepare db full-ins)))
		 ;; (db:delay-if-busy targdb) ;; NO WAITING
                 (if (member "last_update" field-names)
                     (debug:print-info 3 *default-log-port* "is-trigger-dropped: " is-trigger-dropped)) 
		 (for-each
		  (lambda (fromdat-lst)
		    (sqlite3:with-transaction
		     db
		     (lambda ()
		       (for-each ;; 
			(lambda (fromrow)
			  (let* ((a    (vector-ref fromrow 0))
				 (curr (hash-table-ref/default todat a #f))
				 (same #t))
			    (let loop ((i 0))
			      (if (or (not curr)
				      (not (equal? (vector-ref fromrow i)(vector-ref curr i))))
				  (set! same #f))
			      (if (and same
				       (< i (- num-fields 1)))
				  (loop (+ i 1))))
			    (if (not same)
				(begin
				  (apply sqlite3:execute stmth (vector->list fromrow))
				  (hash-table-set! numrecs tablename (+ 1 (hash-table-ref/default numrecs tablename 0)))))))
			fromdat-lst))))
		  fromdats)
		 (sqlite3:finalize! stmth)
                 (if (member "last_update" field-names)
                    (db:create-trigger db tablename))))
	     (append (list todb) slave-dbs))))
	tbls)
       (let* ((runtime      (- (current-milliseconds) start-time))
	      (should-print (or (debug:debug-mode 12)
				(common:low-noise-print 120 "db sync" (> runtime 500))))) ;; low and high sync times treated as separate.
	 (if should-print (debug:print 3 *default-log-port* "INFO: db sync, total run time " runtime " ms"))
	 (for-each 
	  (lambda (dat)
	    (let ((tblname (car dat))
		  (count   (cdr dat)))
	      (set! tot-count (+ tot-count count))
	      (if (> count 0)
		  (if should-print (debug:print 0 *default-log-port* (format #f "    ~10a ~5a" tblname count))))))
	  (sort (hash-table->alist numrecs)(lambda (a b)(> (cdr a)(cdr b))))))
       tot-count)))))

(define db:trigger-list 
     (list (list "update_runs_trigger"  "CREATE TRIGGER IF NOT EXISTS update_runs_trigger AFTER UPDATE ON runs
                             FOR EACH ROW
                               BEGIN 
                                 UPDATE runs SET last_update=(strftime('%s','now'))
                                   WHERE id=old.id;
                               END;" ) 
	   (list "update_run_stats_trigger"  "CREATE TRIGGER  IF NOT EXISTS update_run_stats_trigger AFTER UPDATE ON run_stats
                             FOR EACH ROW
                               BEGIN 
                                 UPDATE run_stats SET last_update=(strftime('%s','now'))
                                   WHERE id=old.id;
                               END;" )
	   (list "update_tests_trigger"  "CREATE TRIGGER  IF NOT EXISTS update_tests_trigger AFTER UPDATE ON tests
                             FOR EACH ROW
                               BEGIN 
                                 UPDATE tests SET last_update=(strftime('%s','now'))
                                   WHERE id=old.id;
                               END;" )
	   (list "update_teststeps_trigger"  "CREATE TRIGGER  IF NOT EXISTS update_teststeps_trigger AFTER UPDATE ON test_steps
                             FOR EACH ROW
                               BEGIN 
                                 UPDATE test_steps SET last_update=(strftime('%s','now'))
                                   WHERE id=old.id;
                               END;" )
	   (list "update_test_data_trigger"  "CREATE TRIGGER  IF NOT EXISTS update_test_data_trigger AFTER UPDATE ON test_data
                             FOR EACH ROW
                               BEGIN 
                                 UPDATE test_data SET last_update=(strftime('%s','now'))
                                   WHERE id=old.id;
                               END;" )))

;; return #f to indicate the dbdat should be closed/reopened
;; else return dbdat
;;
(define (db:repair-db dbdat #!key (numtries 1))
  (let* ((dbpath   (db:dbdat-get-path        dbdat))
	 (dbdir    (pathname-directory       dbpath))
	 (fname    (pathname-strip-directory dbpath)))
    (debug:print-info 0 *default-log-port* "Checking db " dbpath " for errors.")
    (cond
     ((not (file-write-access? dbdir))
      (debug:print 0 *default-log-port* "WARNING: can't write to " dbdir ", can't fix " fname)
      #f)

     ;; handle special cases, megatest.db and monitor.db
     ;; 
     ;;  NOPE: apply this same approach to all db files
     ;;
     (else ;; ((equal? fname "megatest.db") ;; this file can be regenerated if needed
      (handle-exceptions
       exn
       (begin
	 (print "Problems trying to repair the db, exn=" exn)
	 ;; (db:move-and-recreate-db dbdat)
	 (if (> numtries 0)
	     (db:repair-db dbdat numtries: (- numtries 1))
	     #f)
	 (debug:print 0 *default-log-port* "FATAL: file " dbpath " was found corrupted, an attempt to fix has been made but you must start over.")
	 (debug:print 0 *default-log-port*
		      "   check the following:\n"
		      "      1. full directories, look in ~/ /tmp and " dbdir "\n"
		      "      2. write access to " dbdir "\n\n"
		      "   if the automatic recovery failed you may be able to recover data by doing \"" 
		      (if (member fname '("megatest.db" "monitor.db"))
			  "megatest -cleanup-db"
			  "megatest -import-megatest.db;megatest -cleanup-db")
		      "\"\n")
	 (exit) ;; we can not safely continue when a db was corrupted - even if fixed.
	 )
       ;; test read/write access to the database
       (let ((db (sqlite3:open-database dbpath)))
	 (sqlite3:set-busy-handler! db (sqlite3:make-busy-timeout 136000))
	 (sqlite3:execute db "PRAGMA synchronous = 0;")
	 (cond
	  ((equal? fname "megatest.db")
	   (sqlite3:execute db "DELETE FROM tests WHERE state='DELETED';"))
	  ((equal? fname "main.db")
	   (sqlite3:execute db "DELETE FROM runs WHERE state='deleted';"))
	  ;; this was for the 1.db, 2.db ...
	  #;((string-match "\\d.db" fname)
	   (sqlite3:execute db "UPDATE tests SET state='DELETED' WHERE state='DELETED';"))
	  ((equal? fname "monitor.db")
	   (sqlite3:execute "DELETE FROM servers WHERE state LIKE 'defunct%';"))
	  (else
	   (sqlite3:execute db "vacuum;")))
	 
	 (sqlite3:finalize! db)
	 #t))))))
    
)