Index: api.scm ================================================================== --- api.scm +++ api.scm @@ -19,15 +19,17 @@ ;;====================================================================== (declare (unit api)) (declare (uses db)) (declare (uses debugprint)) +(declare (uses commonmod)) (declare (uses dbmod)) (declare (uses dbfile)) (declare (uses tasks)) (declare (uses tcp-transportmod)) +(import commonmod) (import dbmod) (import dbfile) (import debugprint) (import tcp-transportmod) Index: db.scm ================================================================== --- db.scm +++ db.scm @@ -68,12 +68,12 @@ (define *number-of-writes* 0) (define *number-non-write-queries* 0) (import debugprint) -(import dbmod) (import dbfile) +(import dbmod) (import rmtmod) ;; record for keeping state,status and count for doing roll-ups in ;; iterated tests ;; @@ -80,17 +80,16 @@ (defstruct dbr:counts (state #f) (status #f) (count 0)) - -(define (db:with-db dbstruct run-id r/w proc . params) - (case (rmt:transport-mode) - ((http)(dbfile:with-db dbstruct run-id r/w proc params)) - ((tcp) (dbmod:with-db dbstruct run-id r/w proc params)) - ((nfs) (dbmod:with-db dbstruct run-id r/w proc params)) - (else (assert #f "FATAL: db:with-db called with non-existant transport mode")))) +;; (define (db:with-db dbstruct run-id r/w proc . params) +;; (case (rmt:transport-mode) +;; ((http)(dbfile:with-db dbstruct run-id r/w proc params)) +;; ((tcp) (dbmod:with-db dbstruct run-id r/w proc params)) +;; ((nfs) (dbmod:with-db dbstruct run-id r/w proc params)) +;; (else (assert #f "FATAL: db:with-db called with non-existant transport mode")))) ;;====================================================================== ;; hash of hashs ;;====================================================================== @@ -1128,11 +1127,13 @@ ;; select end_time-now from ;; (select testname,item_path,event_time+run_duration as ;; end_time,strftime('%s','now') as now from tests where state in ;; ('RUNNING','REMOTEHOSTSTART','LAUNCHED')); - +;; +;; NOT EASY TO MIGRATE TO db{file,mod} +;; (define (db:find-and-mark-incomplete dbstruct run-id ovr-deadtime) (let* ((incompleted '()) (oldlaunched '()) (toplevels '()) ;; The default running-deadtime is 720 seconds = 12 minutes. @@ -1384,70 +1385,10 @@ ;; (db:find-and-mark-incomplete db) ;; (db:delay-if-busy dbdat) (sqlite3:execute db "VACUUM;") dead-runs)) -;;====================================================================== -;; M E T A G E T A N D S E T V A R S -;;====================================================================== - -;; returns number if string->number is successful, string otherwise -;; also updates *global-delta* -;; -(define (db:get-var dbstruct var) - (let* ((res #f)) - (db:with-db - dbstruct #f #f ;; for the moment vars are only stored in main.db - (lambda (dbdat db) - (sqlite3:for-each-row - (lambda (val) - (set! res val)) - db - "SELECT val FROM metadat WHERE var=?;" var) - ;; convert to number if can - (if (string? res) - (let ((valnum (string->number res))) - (if valnum (set! res valnum)))) - res)))) - -(define (db:inc-var dbstruct var) - (db:with-db dbstruct #f #t - (lambda (dbdat db) - (sqlite3:execute db "UPDATE metadat SET val=val+1 WHERE var=?;" var)))) - -(define (db:dec-var dbstruct var) - (db:with-db dbstruct #f #t - (lambda (dbdat db) - (sqlite3:execute db "UPDATE metadat SET val=val-1 WHERE var=?;" var)))) - -;; This was part of db:get-var. It was used to estimate the load on -;; the database files. -;; -;; scale by 10, average with current value. -;; (set! *global-delta* (/ (+ *global-delta* (* (- (current-milliseconds) start-ms) -;; (if throttle throttle 0.01))) -;; 2)) -;; (if (> (abs (- *last-global-delta-printed* *global-delta*)) 0.08) ;; don't print all the time, only if it changes a bit -;; (begin -;; (debug:print-info 4 *default-log-port* "launch throttle factor=" *global-delta*) -;; (set! *last-global-delta-printed* *global-delta*))) - -(define (db:set-var dbstruct var val) - (db:with-db dbstruct #f #t - (lambda (dbdat db) - (sqlite3:execute (db:get-cache-stmth dbdat db "INSERT OR REPLACE INTO metadat (var,val) VALUES (?,?);") var val)))) - -(define (db:add-var dbstruct var val) - (db:with-db dbstruct #f #t - (lambda (dbdat db) - (sqlite3:execute (db:get-cache-stmth dbdat db "UPDATE metadat SET val=val+? WHERE var=?;") val var)))) - -(define (db:del-var dbstruct var) - (db:with-db dbstruct #f #t - (lambda (dbdat db) - (sqlite3:execute (db:get-cache-stmth dbdat db "DELETE FROM metadat WHERE var=?;") var)))) - ;;====================================================================== ;; no-sync.db - small bits of data to be shared between servers ;;====================================================================== (define (db:get-dbsync-path) @@ -1500,33 +1441,26 @@ ;;====================================================================== ;; R U N S ;;====================================================================== - - - - (define (db:get-run-times dbstruct run-patt target-patt) (let ((res `()) - (qry (conc "select runname, (max(end_time)-min(event_time))/60 as runtime, target from (select runname, run_id,tests.event_time,tests.event_time+run_duration AS end_time, " (string-join (db:get-keys dbstruct) " || '/' || ") " as target from tests inner join runs on tests.run_id = runs.id where runs.runname like ? and target like ?) group by run_id ;"))) -;(print qry) -(db:with-db + (qry (conc "select runname, (max(end_time)-min(event_time))/60 as runtime, target from (select runname, run_id,tests.event_time,tests.event_time+run_duration AS end_time, " (string-join (db:get-keys dbstruct) " || '/' || ") " as target from tests inner join runs on tests.run_id = runs.id where runs.runname like ? and target like ?) group by run_id ;"))) + ;(print qry) + (db:with-db dbstruct #f ;; this is for the main runs db #f ;; does not modify db (lambda (dbdat db) - (sqlite3:for-each-row - (lambda (runname runtime target ) - (set! res (cons (vector runname runtime target) res))) - db - qry - run-patt target-patt) - - res)))) - - + (sqlite3:for-each-row + (lambda (runname runtime target ) + (set! res (cons (vector runname runtime target) res))) + db + qry + run-patt target-patt) + res)))) (define (db:get-run-name-from-id dbstruct run-id) (db:with-db dbstruct #f ;; this is for the main runs db Index: dbfile.scm ================================================================== --- dbfile.scm +++ dbfile.scm @@ -1143,87 +1143,89 @@ (lambda () (print fname" run-id="run-id" params="params) )) crumbn)) -;; (db:with-db dbstruct run-id sqlite3:exec "select blah fgrom blaz;") -;; r/w is a flag to indicate if the db is modified by this query #t = yes, #f = no -;; -(define (dbfile:with-db dbstruct run-id r/w proc params) - (assert dbstruct "FATAL: db:with-db called with dbstruct "#f) - (assert (dbr:dbstruct? dbstruct) "FATAL: dbstruct is "dbstruct) - ;; Testing 2023, March 14th. I went from full time use of the mutext to no use at all and - ;; didn't see much change in the frequency of the messages: - ;; Warning (#): in thread: (bind!) bad parameter or other API misuse - ;; allowing request count to go up to 1000 and other crashes showed up: - ;; Warning (#): in thread: (deserialize) unexpected end of input: # - ;; - ;; leave it fully on for now, test later if there is a performance issue - ;; - (let* ((use-mutex #t) ;;(> *api-process-request-count* 50)) ;; risk of db corruption - (have-struct (dbr:dbstruct? dbstruct)) - (dbdat (if have-struct ;; this stuff just allows us to call with a db handle directly - (db:open-db dbstruct run-id (dbfile:db-init-proc)) ;; (dbfile:get-subdb dbstruct run-id) - #f)) - (db (if have-struct ;; this stuff just allows us to call with a db handle directly - (dbr:dbdat-dbh dbdat) - dbstruct)) - (fname (if dbdat - (dbr:dbdat-dbfile dbdat) - "nofilenameavailable")) - (jfile (conc fname"-journal")) - (qryproc (lambda () - (if use-mutex (mutex-lock! *db-with-db-mutex*)) - (let ((res (apply proc dbdat db params))) ;; the actual call is here. - (if use-mutex (mutex-unlock! *db-with-db-mutex*)) - ;; (if (vector? dbstruct)(db:done-with dbstruct run-id r/w)) - (if dbdat - (dbfile:add-dbdat dbstruct run-id dbdat)) - ;; (delete-file* crumbfile) - res))) - (stop-train (conc (dbr:dbstruct-areapath dbstruct)"/stop-the-train"))) - - (assert (sqlite3:database? db) "FATAL: db:with-db, db is not a database, db="db - ", fname="fname) - (if (file-exists? jfile) - (begin - (dbfile:print-err "INFO: "jfile" exists, delaying to reduce database load") - (thread-sleep! 0.2))) - (if (and use-mutex - (common:low-noise-print 120 "over-50-parallel-api-requests")) - (dbfile:print-err *api-process-request-count* - " parallel api requests being processed in process " - (current-process-id))) ;; ", throttling access")) - (case (no-condition-db-with-db) - ((production)(qryproc)) - ((suicide-mode) - (handle-exceptions - exn - (with-output-to-file stop-train - (lambda () - (db:generic-error-printout exn "Stop train mode, run-id: "run-id - " params: "params" proc: "proc))) - (qryproc))) - (else - (condition-case - (qryproc) - (exn (io-error) - (db:generic-error-printout exn "ERROR: i/o error with "fname - ". Check permissions, disk space etc. and try again.")) - (exn (corrupt) - (db:generic-error-printout exn "ERROR: database "fname - " is corrupt. Repair it to proceed.")) - (exn (busy) - (db:generic-error-printout exn "ERROR: database "fname - " is locked. Try copying to another location," - " remove original and copy back.")) - (exn (permission)(db:generic-error-printout exn "ERROR: database "fname - " has some permissions problem.")) - (exn () - (db:generic-error-printout exn "ERROR: Unknown error with database "fname - " message: " - ((condition-property-accessor 'exn 'message) exn)))))))) +;; ;; (db:with-db dbstruct run-id sqlite3:exec "select blah fgrom blaz;") +;; ;; r/w is a flag to indicate if the db is modified by this query #t = yes, #f = no +;; ;; +;; ;; Used only with http - to be removed +;; ;; +;; (define (dbfile:with-db dbstruct run-id r/w proc params) +;; (assert dbstruct "FATAL: db:with-db called with dbstruct "#f) +;; (assert (dbr:dbstruct? dbstruct) "FATAL: dbstruct is "dbstruct) +;; ;; Testing 2023, March 14th. I went from full time use of the mutext to no use at all and +;; ;; didn't see much change in the frequency of the messages: +;; ;; Warning (#): in thread: (bind!) bad parameter or other API misuse +;; ;; allowing request count to go up to 1000 and other crashes showed up: +;; ;; Warning (#): in thread: (deserialize) unexpected end of input: # +;; ;; +;; ;; leave it fully on for now, test later if there is a performance issue +;; ;; +;; (let* ((use-mutex #t) ;;(> *api-process-request-count* 50)) ;; risk of db corruption +;; (have-struct (dbr:dbstruct? dbstruct)) +;; (dbdat (if have-struct ;; this stuff just allows us to call with a db handle directly +;; (db:open-db dbstruct run-id (dbfile:db-init-proc)) ;; (dbfile:get-subdb dbstruct run-id) +;; #f)) +;; (db (if have-struct ;; this stuff just allows us to call with a db handle directly +;; (dbr:dbdat-dbh dbdat) +;; dbstruct)) +;; (fname (if dbdat +;; (dbr:dbdat-dbfile dbdat) +;; "nofilenameavailable")) +;; (jfile (conc fname"-journal")) +;; (qryproc (lambda () +;; (if use-mutex (mutex-lock! *db-with-db-mutex*)) +;; (let ((res (apply proc dbdat db params))) ;; the actual call is here. +;; (if use-mutex (mutex-unlock! *db-with-db-mutex*)) +;; ;; (if (vector? dbstruct)(db:done-with dbstruct run-id r/w)) +;; (if dbdat +;; (dbfile:add-dbdat dbstruct run-id dbdat)) +;; ;; (delete-file* crumbfile) +;; res))) +;; (stop-train (conc (dbr:dbstruct-areapath dbstruct)"/stop-the-train"))) +;; +;; (assert (sqlite3:database? db) "FATAL: db:with-db, db is not a database, db="db +;; ", fname="fname) +;; (if (file-exists? jfile) +;; (begin +;; (dbfile:print-err "INFO: "jfile" exists, delaying to reduce database load") +;; (thread-sleep! 0.2))) +;; (if (and use-mutex +;; (common:low-noise-print 120 "over-50-parallel-api-requests")) +;; (dbfile:print-err *api-process-request-count* +;; " parallel api requests being processed in process " +;; (current-process-id))) ;; ", throttling access")) +;; (case (no-condition-db-with-db) +;; ((production)(qryproc)) +;; ((suicide-mode) +;; (handle-exceptions +;; exn +;; (with-output-to-file stop-train +;; (lambda () +;; (db:generic-error-printout exn "Stop train mode, run-id: "run-id +;; " params: "params" proc: "proc))) +;; (qryproc))) +;; (else +;; (condition-case +;; (qryproc) +;; (exn (io-error) +;; (db:generic-error-printout exn "ERROR: i/o error with "fname +;; ". Check permissions, disk space etc. and try again.")) +;; (exn (corrupt) +;; (db:generic-error-printout exn "ERROR: database "fname +;; " is corrupt. Repair it to proceed.")) +;; (exn (busy) +;; (db:generic-error-printout exn "ERROR: database "fname +;; " is locked. Try copying to another location," +;; " remove original and copy back.")) +;; (exn (permission)(db:generic-error-printout exn "ERROR: database "fname +;; " has some permissions problem.")) +;; (exn () +;; (db:generic-error-printout exn "ERROR: Unknown error with database "fname +;; " message: " +;; ((condition-property-accessor 'exn 'message) exn)))))))) ;;====================================================================== ;; another attempt at a transactionized queue ;;====================================================================== Index: dbmod.scm ================================================================== --- dbmod.scm +++ dbmod.scm @@ -84,28 +84,37 @@ ;;====================================================================== ;; The inmem one-db file per server method goes in here ;;====================================================================== (define (dbmod:with-db dbstruct run-id r/w proc params) - (let* ((dbdat (dbmod:open-db dbstruct run-id (dbfile:db-init-proc))) - (dbh (dbr:dbdat-dbh dbdat)) ;; this will be the inmem handle - (dbfile (dbr:dbdat-dbfile dbdat))) + (let* ((use-mutex (> *api-process-request-count* 15)) + (dbdat (dbmod:open-db dbstruct run-id (dbfile:db-init-proc))) + (dbh (dbr:dbdat-dbh dbdat)) ;; this will be the inmem handle + (dbfile (dbr:dbdat-dbfile dbdat))) ;; if nfs mode do a sync if delta > 2 (let* ((last-update (dbr:dbstruct-last-update dbstruct)) (sync-proc (dbr:dbstruct-sync-proc dbstruct)) (curr-secs (current-seconds))) (if (> (- curr-secs last-update) 3) (begin (sync-proc last-update) (dbr:dbstruct-last-update-set! dbstruct curr-secs)))) - (apply proc dbdat dbh params))) + (if use-mutex (mutex-lock! *db-with-db-mutex*)) + (let* ((res (apply proc dbdat dbh params))) + (if use-mutex (mutex-unlock! *db-with-db-mutex*)) + res))) + +(define (db:with-db dbstruct run-id r/w proc . params) + (dbmod:with-db dbstruct run-id r/w proc params)) -(define (dbmod:open-inmem-db initproc) - (let* ((db (sqlite3:open-database ":memory:")) +(define (dbmod:open-inmem-db init-proc #!optional (dbfullname #f)) + (let* ((db (if dbfullname + (dbmod:safely-open-db dbfullname init-proc #t) + (sqlite3:open-database ":memory:"))) (handler (sqlite3:make-busy-timeout 3600))) (sqlite3:set-busy-handler! db handler) - (initproc db) + (init-proc db) db)) (define (dbmod:open-db dbstruct run-id dbinit) (or (dbr:dbstruct-dbdat dbstruct) (let* ((dbdat (make-dbr:dbdat @@ -127,10 +136,21 @@ (dbfile:sync-method))))) (else (debug:print 0 *default-log-port* "Unknown dbfile:cache-method setting: " (dbfile:cache-method)) #f))) + +(define (dbmod:safely-open-db dbfullname init-proc write-access) + (dbfile:with-simple-file-lock + (conc dbfullname".lock") + (lambda () + (let* ((db (sqlite3:open-database dbfullname)) + (handler (sqlite3:make-busy-timeout 136000))) + (sqlite3:set-busy-handler! db handler) + (if write-access + (init-proc db)) + db)))) ;; Open the inmem db and the on-disk db ;; populate the inmem db with data ;; ;; Updates fields in dbstruct @@ -145,21 +165,15 @@ (let* ((dbstruct (or dbstruct-in (make-dbr:dbstruct areapath: areapath))) (dbfname (or dbfname-in (dbmod:run-id->dbfname run-id))) (dbpath (dbmod:get-dbdir dbstruct)) ;; directory where all the .db files are kept (dbfullname (conc dbpath"/"dbfname)) ;; (dbmod:run-id->full-dbfname dbstruct run-id)) (dbexists (file-exists? dbfullname)) - (inmem (dbmod:open-inmem-db init-proc)) + (inmem (dbmod:open-inmem-db init-proc + ;; (conc "/tmp/"dbfname) ;; will create /tmp file + )) (write-access (file-write-access? dbpath)) - (db (dbfile:with-simple-file-lock - (conc dbfullname".lock") - (lambda () - (let* ((db (sqlite3:open-database dbfullname)) - (handler (sqlite3:make-busy-timeout 136000))) - (sqlite3:set-busy-handler! db handler) - (if write-access - (init-proc db)) - db)))) + (db (dbmod:safely-open-db dbfullname init-proc write-access)) (tables (db:sync-all-tables-list keys))) (assert (sqlite3:database? inmem) "FATAL: open-dbmoddb: inmem is not a db") (assert (sqlite3:database? db) "FATAL: open-dbmoddb: db is not a db") (dbr:dbstruct-inmem-set! dbstruct inmem) (dbr:dbstruct-ondiskdb-set! dbstruct db) @@ -569,8 +583,71 @@ (begin (debug:print 0 *default-log-port* "ERROR: removing the journal file " fullpath ", this is not good. Look for disk full, write access and other issues.") (if remove (system (conc "rm -rf " fullpath))) #f))) #t)))))) + + +;;====================================================================== +;; M E T A G E T A N D S E T V A R S +;;====================================================================== + +;; returns number if string->number is successful, string otherwise +;; also updates *global-delta* +;; +(define (db:get-var dbstruct var) + (let* ((res #f)) + (db:with-db + dbstruct #f #f ;; for the moment vars are only stored in main.db + (lambda (dbdat db) + (sqlite3:for-each-row + (lambda (val) + (set! res val)) + db + "SELECT val FROM metadat WHERE var=?;" var) + ;; convert to number if can + (if (string? res) + (let ((valnum (string->number res))) + (if valnum (set! res valnum)))) + res)))) + +(define (db:inc-var dbstruct var) + (db:with-db dbstruct #f #t + (lambda (dbdat db) + (sqlite3:execute db "UPDATE metadat SET val=val+1 WHERE var=?;" var)))) + +(define (db:dec-var dbstruct var) + (db:with-db dbstruct #f #t + (lambda (dbdat db) + (sqlite3:execute db "UPDATE metadat SET val=val-1 WHERE var=?;" var)))) + +;; This was part of db:get-var. It was used to estimate the load on +;; the database files. +;; +;; scale by 10, average with current value. +;; (set! *global-delta* (/ (+ *global-delta* (* (- (current-milliseconds) start-ms) +;; (if throttle throttle 0.01))) +;; 2)) +;; (if (> (abs (- *last-global-delta-printed* *global-delta*)) 0.08) ;; don't print all the time, only if it changes a bit +;; (begin +;; (debug:print-info 4 *default-log-port* "launch throttle factor=" *global-delta*) +;; (set! *last-global-delta-printed* *global-delta*))) + +(define (db:set-var dbstruct var val) + (db:with-db dbstruct #f #t + (lambda (dbdat db) + (sqlite3:execute (db:get-cache-stmth dbdat db "INSERT OR REPLACE INTO metadat (var,val) VALUES (?,?);") + var val)))) + +(define (db:add-var dbstruct var val) + (db:with-db dbstruct #f #t + (lambda (dbdat db) + (sqlite3:execute (db:get-cache-stmth dbdat db "UPDATE metadat SET val=val+? WHERE var=?;") val var)))) + +(define (db:del-var dbstruct var) + (db:with-db dbstruct #f #t + (lambda (dbdat db) + (sqlite3:execute (db:get-cache-stmth dbdat db "DELETE FROM metadat WHERE var=?;") var)))) + ) Index: tasks.scm ================================================================== --- tasks.scm +++ tasks.scm @@ -20,10 +20,11 @@ (declare (unit tasks)) (declare (uses debugprint)) (declare (uses dbfile)) (declare (uses db)) +(declare (uses dbmod)) (declare (uses rmt)) (declare (uses rmtmod)) (declare (uses common)) (declare (uses pgdb)) (declare (uses commonmod)) @@ -32,10 +33,11 @@ (use sqlite3 srfi-1 posix regex regex-case srfi-69 dot-locking format) (import (prefix sqlite3 sqlite3:)) (import commonmod debugprint + dbmod rmtmod (prefix mtargs args:)) (import dbfile) ;; (import pgdb) ;; pgdb is a module