Index: api.scm ================================================================== --- api.scm +++ api.scm @@ -96,12 +96,12 @@ (define api:write-queries '( get-keys-write ;; dummy "write" query to force server start ;; SERVERS - start-server - kill-server + ;; start-server + ;; kill-server ;; TESTS test-set-state-status-by-id delete-test-records delete-old-deleted-test-records @@ -139,10 +139,12 @@ ;; TASKS tasks-add tasks-set-state-given-param-key )) + +(define *db-write-mutexes* (make-hash-table)) ;; These are called by the server on recipt of /api calls ;; - keep it simple, only return the actual result of the call, i.e. no meta info here ;; ;; - returns #( flag result ) @@ -156,12 +158,14 @@ ;; (print-call-chain (current-error-port)) ;; (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) ;; (vector #f (vector exn call-chain dat))) ;; return some stuff for debug if an exception happens (if (> *api-process-request-count* 200) (begin - (debug:print 0 *default-log-port* "WARNING: Over 200 threads, overload, taking a five second nap.") - (thread-sleep! 5))) ;; take a nap + (if (common:low-noise-print 30 "too many threads") + (debug:print 0 *default-log-port* "WARNING: "*api-process-request-count*" threads, potential overload, adding 0.5 sec delay.")) + (thread-sleep! 0.5) ;; take a nap + )) (cond ((not (vector? dat)) ;; it is an error to not receive a vector (vector #f (vector #f "remote must be called with a vector"))) #;((> *api-process-request-count* 200) ;; 20) (debug:print 0 *default-log-port* "WARNING: api:execute-requests received an overloaded message.") @@ -171,222 +175,228 @@ (let* ((cmd-in (vector-ref dat 0)) (cmd (if (symbol? cmd-in) cmd-in (string->symbol cmd-in))) (params (vector-ref dat 1)) + (run-id (if (null? params) + 0 + (car params))) + (write-mutex (if (hash-table-exists? *db-write-mutexes* run-id) + (hash-table-ref *db-write-mutexes* run-id) + (let* ((newmutex (make-mutex))) + (hash-table-set! *db-write-mutexes* run-id newmutex) + newmutex))) (start-t (current-milliseconds)) (readonly-mode (dbr:dbstruct-read-only dbstruct)) (readonly-command (member cmd api:read-only-queries)) - (writecmd-in-readonly-mode (and readonly-mode (not readonly-command))) - #;(foo (begin - (common:telemetry-log (conc "api-in:"(->string cmd)) - payload: `((params . ,params))) - - #t)) - (res - (if writecmd-in-readonly-mode - (conc "attempt to run write command "cmd" on a read-only database") - (case cmd - ;;=============================================== - ;; READ/WRITE QUERIES - ;;=============================================== - - ((get-keys-write) (db:get-keys dbstruct)) ;; force a dummy "write" query to force server; for debug in -repl - - ;; SERVERS - ((start-server) (apply server:kind-run params)) - ((kill-server) (set! *server-run* #f)) - - ;; TESTS - - ;;((test-set-state-status-by-id) (apply mt:test-set-state-status-by-id dbstruct params)) - ;;BB - commented out above because it was calling below, eventually, incorrectly (dbstruct passed to mt:test-set-state-status-by-id, which previosly did more, but now only passes thru to db:set-state-status-and-roll-up-items. - ((test-set-state-status-by-id) - - ;; (define (db:set-state-status-and-roll-up-items dbstruct run-id test-name item-path state status comment) - (db:set-state-status-and-roll-up-items - dbstruct - (list-ref params 0) ; run-id - (list-ref params 1) ; test-name - #f ; item-path - (list-ref params 2) ; state - (list-ref params 3) ; status - (list-ref params 4) ; comment - )) - - ((delete-test-records) (apply db:delete-test-records dbstruct params)) - ((delete-old-deleted-test-records) (apply db:delete-old-deleted-test-records dbstruct params)) - ((test-set-state-status) (apply db:test-set-state-status dbstruct params)) - ((test-set-top-process-pid) (apply db:test-set-top-process-pid dbstruct params)) - ((set-state-status-and-roll-up-items) (apply db:set-state-status-and-roll-up-items dbstruct params)) - ((set-state-status-and-roll-up-run) (apply db:set-state-status-and-roll-up-run dbstruct params)) - ((top-test-set-per-pf-counts) (apply db:top-test-set-per-pf-counts dbstruct params)) - ((test-set-archive-block-id) (apply db:test-set-archive-block-id dbstruct params)) - - ;; RUNS - ((register-run) (apply db:register-run dbstruct params)) - ((set-tests-state-status) (apply db:set-tests-state-status dbstruct params)) - ((delete-run) (apply db:delete-run dbstruct params)) - ((lock/unlock-run) (apply db:lock/unlock-run dbstruct params)) - ((update-run-event_time) (apply db:update-run-event_time dbstruct params)) - ((update-run-stats) (apply db:update-run-stats dbstruct params)) - ((set-var) (apply db:set-var dbstruct params)) - ((inc-var) (apply db:inc-var dbstruct params)) - ((dec-var) (apply db:dec-var dbstruct params)) - ((del-var) (apply db:del-var dbstruct params)) - ((add-var) (apply db:add-var dbstruct params)) - - ;; STEPS - ((teststep-set-status!) (apply db:teststep-set-status! dbstruct params)) - ((delete-steps-for-test!) (apply db:delete-steps-for-test! dbstruct params)) - - ;; TEST DATA - ((test-data-rollup) (apply db:test-data-rollup dbstruct params)) - ((csv->test-data) (apply db:csv->test-data dbstruct params)) - - ;; MISC - ((sync-inmem->db) (let ((run-id (car params))) - (db:sync-touched dbstruct run-id db:initialize-main-db force-sync: #t))) - ((mark-incomplete) (apply db:find-and-mark-incomplete dbstruct params)) - ((create-all-triggers) (db:create-all-triggers dbstruct)) - ((drop-all-triggers) (db:drop-all-triggers dbstruct)) - - ;; TESTMETA - ((testmeta-add-record) (apply db:testmeta-add-record dbstruct params)) - ((testmeta-update-field) (apply db:testmeta-update-field dbstruct params)) - ((get-tests-tags) (db:get-tests-tags dbstruct)) - - ;; TASKS - ((tasks-add) (apply tasks:add dbstruct params)) - ((tasks-set-state-given-param-key) (apply tasks:set-state-given-param-key dbstruct params)) - ((tasks-get-last) (apply tasks:get-last dbstruct params)) - - ;; NO SYNC DB - ((no-sync-set) (apply db:no-sync-set *no-sync-db* params)) - ((no-sync-get/default) (apply db:no-sync-get/default *no-sync-db* params)) - ((no-sync-del!) (apply db:no-sync-del! *no-sync-db* params)) - ((no-sync-get-lock) (apply db:no-sync-get-lock *no-sync-db* params)) - - ;; ARCHIVES - ;; ((archive-get-allocations) - ((archive-register-disk) (apply db:archive-register-disk dbstruct params)) - ((archive-register-block-name)(apply db:archive-register-block-name dbstruct params)) - ;; ((archive-allocate-testsuite/area-to-block)(apply db:archive-allocate-testsuite/area-to-block dbstruct block-id testsuite-name areakey)) - - ;;====================================================================== - ;; READ ONLY QUERIES - ;;====================================================================== - - ;; KEYS - ((get-key-val-pairs) (apply db:get-key-val-pairs dbstruct params)) - ((get-keys) (db:get-keys dbstruct)) - ((get-key-vals) (apply db:get-key-vals dbstruct params)) - ((get-target) (apply db:get-target dbstruct params)) - ((get-targets) (db:get-targets dbstruct)) - - ;; ARCHIVES - ((test-get-archive-block-info) (apply db:test-get-archive-block-info dbstruct params)) - - ;; TESTS - ((test-toplevel-num-items) (apply db:test-toplevel-num-items dbstruct params)) - ((get-test-info-by-id) (apply db:get-test-info-by-id dbstruct params)) - ((test-get-rundir-from-test-id) (apply db:test-get-rundir-from-test-id dbstruct params)) - ((get-count-tests-running-for-testname) (apply db:get-count-tests-running-for-testname dbstruct params)) - ((get-count-tests-running) (apply db:get-count-tests-running dbstruct params)) - ((get-count-tests-running-in-jobgroup) (apply db:get-count-tests-running-in-jobgroup dbstruct params)) - ;; ((delete-test-step-records) (apply db:delete-test-step-records dbstruct params)) - ;; ((get-previous-test-run-record) (apply db:get-previous-test-run-record dbstruct params)) - ((get-matching-previous-test-run-records)(apply db:get-matching-previous-test-run-records dbstruct params)) - ((test-get-logfile-info) (apply db:test-get-logfile-info dbstruct params)) - ((test-get-records-for-index-file) (apply db:test-get-records-for-index-file dbstruct params)) - ((get-testinfo-state-status) (apply db:get-testinfo-state-status dbstruct params)) - ((test-get-top-process-pid) (apply db:test-get-top-process-pid dbstruct params)) - ((test-get-paths-matching-keynames-target-new) (apply db:test-get-paths-matching-keynames-target-new dbstruct params)) - ((get-prereqs-not-met) (apply db:get-prereqs-not-met dbstruct params)) - ((get-count-tests-running-for-run-id) (apply db:get-count-tests-running-for-run-id dbstruct params)) - ((get-not-completed-cnt) (apply db:get-not-completed-cnt dbstruct params)) - ;; ((synchash-get) (apply synchash:server-get dbstruct params)) - ((get-raw-run-stats) (apply db:get-raw-run-stats dbstruct params)) - ((get-test-times) (apply db:get-test-times dbstruct params)) - - ;; RUNS - ((get-run-info) (apply db:get-run-info dbstruct params)) - ((get-run-status) (apply db:get-run-status dbstruct params)) - ((get-run-state) (apply db:get-run-state dbstruct params)) - ((set-run-status) (apply db:set-run-status dbstruct params)) - ((set-run-state-status) (apply db:set-run-state-status dbstruct params)) - ((update-tesdata-on-repilcate-db) (apply db:update-tesdata-on-repilcate-db dbstruct params)) - ((get-tests-for-run) (apply db:get-tests-for-run dbstruct params)) - ((get-tests-for-run-state-status) (apply db:get-tests-for-run-state-status dbstruct params)) - ((get-test-id) (apply db:get-test-id dbstruct params)) - ((get-tests-for-run-mindata) (apply db:get-tests-for-run-mindata dbstruct params)) - ;; ((get-tests-for-runs-mindata) (apply db:get-tests-for-runs-mindata dbstruct params)) - ((get-runs) (apply db:get-runs dbstruct params)) - ((simple-get-runs) (apply db:simple-get-runs dbstruct params)) - ((get-num-runs) (apply db:get-num-runs dbstruct params)) - ((get-runs-cnt-by-patt) (apply db:get-runs-cnt-by-patt dbstruct params)) - ((get-all-run-ids) (db:get-all-run-ids dbstruct)) - ((get-prev-run-ids) (apply db:get-prev-run-ids dbstruct params)) - ((get-run-ids-matching-target) (apply db:get-run-ids-matching-target dbstruct params)) - ((get-runs-by-patt) (apply db:get-runs-by-patt dbstruct params)) - ((get-run-name-from-id) (apply db:get-run-name-from-id dbstruct params)) - ((get-main-run-stats) (apply db:get-main-run-stats dbstruct params)) - ((get-var) (apply db:get-var dbstruct params)) - ((get-run-stats) (apply db:get-run-stats dbstruct params)) - ((get-run-times) (apply db:get-run-times dbstruct params)) - - ;; STEPS - ((get-steps-data) (apply db:get-steps-data dbstruct params)) - ((get-steps-for-test) (apply db:get-steps-for-test dbstruct params)) - ((get-steps-info-by-id) (apply db:get-steps-info-by-id dbstruct params)) - - ;; TEST DATA - ((read-test-data) (apply db:read-test-data dbstruct params)) - ((read-test-data-varpatt) (apply db:read-test-data-varpatt dbstruct params)) - ((get-data-info-by-id) (apply db:get-data-info-by-id dbstruct params)) - - ;; MISC - ((get-latest-host-load) (apply db:get-latest-host-load dbstruct params)) - ((have-incompletes?) (apply db:have-incompletes? dbstruct params)) - ((login) (apply db:login dbstruct params)) - ((general-call) (let ((stmtname (car params)) - (run-id (cadr params)) - (realparams (cddr params))) - (db:general-call dbstruct run-id stmtname realparams))) - ((sdb-qry) (apply sdb:qry params)) - ((ping) (current-process-id)) - ((get-changed-record-ids) (apply db:get-changed-record-ids dbstruct params)) - ((get-run-record-ids) (apply db:get-run-record-ids dbstruct params)) - ;; TESTMETA - ((testmeta-get-record) (apply db:testmeta-get-record dbstruct params)) - - ;; TASKS - ((find-task-queue-records) (apply tasks:find-task-queue-records dbstruct params)) - (else - (debug:print 0 *default-log-port* "ERROR: bad api call " cmd) - (conc "ERROR: BAD api call " cmd)))))) - - - ;; save all stats - (let ((delta-t (- (current-milliseconds) - start-t)) - (modified-cmd (if (eq? cmd 'general-call) - (string->symbol (conc "general-call-" (car params))) - cmd))) - (hash-table-set! *db-api-call-time* modified-cmd - (cons delta-t (hash-table-ref/default *db-api-call-time* modified-cmd '())))) - (if writecmd-in-readonly-mode - (begin - #;(common:telemetry-log (conc "api-out:"(->string cmd)) - payload: `((params . ,params) - (ok-res . #t))) - (vector #f res)) - (begin - #;(common:telemetry-log (conc "api-out:"(->string cmd)) - payload: `((params . ,params) - (ok-res . #f))) - (vector #t res))))))) ;; ) + (writecmd-in-readonly-mode (and readonly-mode (not readonly-command)))) + (if (not readonly-command) + (mutex-lock! write-mutex)) + (let* ((res + (if writecmd-in-readonly-mode + (conc "attempt to run write command "cmd" on a read-only database") + (case cmd + ;;=============================================== + ;; READ/WRITE QUERIES + ;;=============================================== + + ((get-keys-write) (db:get-keys dbstruct)) ;; force a dummy "write" query to force server; for debug in -repl + + ;; SERVERS + ((start-server) (apply server:kind-run params)) + ((kill-server) (set! *server-run* #f)) + + ;; TESTS + + ;;((test-set-state-status-by-id) (apply mt:test-set-state-status-by-id dbstruct params)) + ;;BB - commented out above because it was calling below, eventually, incorrectly (dbstruct passed to mt:test-set-state-status-by-id, which previosly did more, but now only passes thru to db:set-state-status-and-roll-up-items. + ((test-set-state-status-by-id) + + ;; (define (db:set-state-status-and-roll-up-items dbstruct run-id test-name item-path state status comment) + (db:set-state-status-and-roll-up-items + dbstruct + (list-ref params 0) ; run-id + (list-ref params 1) ; test-name + #f ; item-path + (list-ref params 2) ; state + (list-ref params 3) ; status + (list-ref params 4) ; comment + )) + + ((delete-test-records) (apply db:delete-test-records dbstruct params)) + ((delete-old-deleted-test-records) (apply db:delete-old-deleted-test-records dbstruct params)) + ((test-set-state-status) (apply db:test-set-state-status dbstruct params)) + ((test-set-top-process-pid) (apply db:test-set-top-process-pid dbstruct params)) + ((set-state-status-and-roll-up-items) (apply db:set-state-status-and-roll-up-items dbstruct params)) + ((set-state-status-and-roll-up-run) (apply db:set-state-status-and-roll-up-run dbstruct params)) + ((top-test-set-per-pf-counts) (apply db:top-test-set-per-pf-counts dbstruct params)) + ((test-set-archive-block-id) (apply db:test-set-archive-block-id dbstruct params)) + + ;; RUNS + ((register-run) (apply db:register-run dbstruct params)) + ((set-tests-state-status) (apply db:set-tests-state-status dbstruct params)) + ((delete-run) (apply db:delete-run dbstruct params)) + ((lock/unlock-run) (apply db:lock/unlock-run dbstruct params)) + ((update-run-event_time) (apply db:update-run-event_time dbstruct params)) + ((update-run-stats) (apply db:update-run-stats dbstruct params)) + ((set-var) (apply db:set-var dbstruct params)) + ((inc-var) (apply db:inc-var dbstruct params)) + ((dec-var) (apply db:dec-var dbstruct params)) + ((del-var) (apply db:del-var dbstruct params)) + ((add-var) (apply db:add-var dbstruct params)) + + ;; STEPS + ((teststep-set-status!) (apply db:teststep-set-status! dbstruct params)) + ((delete-steps-for-test!) (apply db:delete-steps-for-test! dbstruct params)) + + ;; TEST DATA + ((test-data-rollup) (apply db:test-data-rollup dbstruct params)) + ((csv->test-data) (apply db:csv->test-data dbstruct params)) + + ;; MISC + ((sync-inmem->db) (let ((run-id (car params))) + (db:sync-touched dbstruct run-id db:initialize-main-db force-sync: #t))) + ((mark-incomplete) (apply db:find-and-mark-incomplete dbstruct params)) + ((create-all-triggers) (db:create-all-triggers dbstruct)) + ((drop-all-triggers) (db:drop-all-triggers dbstruct)) + + ;; TESTMETA + ((testmeta-add-record) (apply db:testmeta-add-record dbstruct params)) + ((testmeta-update-field) (apply db:testmeta-update-field dbstruct params)) + ((get-tests-tags) (db:get-tests-tags dbstruct)) + + ;; TASKS + ((tasks-add) (apply tasks:add dbstruct params)) + ((tasks-set-state-given-param-key) (apply tasks:set-state-given-param-key dbstruct params)) + ((tasks-get-last) (apply tasks:get-last dbstruct params)) + + ;; NO SYNC DB + ((no-sync-set) (apply db:no-sync-set *no-sync-db* params)) + ((no-sync-get/default) (apply db:no-sync-get/default *no-sync-db* params)) + ((no-sync-del!) (apply db:no-sync-del! *no-sync-db* params)) + ((no-sync-get-lock) (apply db:no-sync-get-lock *no-sync-db* params)) + + ;; ARCHIVES + ;; ((archive-get-allocations) + ((archive-register-disk) (apply db:archive-register-disk dbstruct params)) + ((archive-register-block-name)(apply db:archive-register-block-name dbstruct params)) + ;; ((archive-allocate-testsuite/area-to-block)(apply db:archive-allocate-testsuite/area-to-block dbstruct block-id testsuite-name areakey)) + + ;;====================================================================== + ;; READ ONLY QUERIES + ;;====================================================================== + + ;; KEYS + ((get-key-val-pairs) (apply db:get-key-val-pairs dbstruct params)) + ((get-keys) (db:get-keys dbstruct)) + ((get-key-vals) (apply db:get-key-vals dbstruct params)) + ((get-target) (apply db:get-target dbstruct params)) + ((get-targets) (db:get-targets dbstruct)) + + ;; ARCHIVES + ((test-get-archive-block-info) (apply db:test-get-archive-block-info dbstruct params)) + + ;; TESTS + ((test-toplevel-num-items) (apply db:test-toplevel-num-items dbstruct params)) + ((get-test-info-by-id) (apply db:get-test-info-by-id dbstruct params)) + ((test-get-rundir-from-test-id) (apply db:test-get-rundir-from-test-id dbstruct params)) + ((get-count-tests-running-for-testname) (apply db:get-count-tests-running-for-testname dbstruct params)) + ((get-count-tests-running) (apply db:get-count-tests-running dbstruct params)) + ((get-count-tests-running-in-jobgroup) (apply db:get-count-tests-running-in-jobgroup dbstruct params)) + ;; ((delete-test-step-records) (apply db:delete-test-step-records dbstruct params)) + ;; ((get-previous-test-run-record) (apply db:get-previous-test-run-record dbstruct params)) + ((get-matching-previous-test-run-records)(apply db:get-matching-previous-test-run-records dbstruct params)) + ((test-get-logfile-info) (apply db:test-get-logfile-info dbstruct params)) + ((test-get-records-for-index-file) (apply db:test-get-records-for-index-file dbstruct params)) + ((get-testinfo-state-status) (apply db:get-testinfo-state-status dbstruct params)) + ((test-get-top-process-pid) (apply db:test-get-top-process-pid dbstruct params)) + ((test-get-paths-matching-keynames-target-new) (apply db:test-get-paths-matching-keynames-target-new dbstruct params)) + ((get-prereqs-not-met) (apply db:get-prereqs-not-met dbstruct params)) + ((get-count-tests-running-for-run-id) (apply db:get-count-tests-running-for-run-id dbstruct params)) + ((get-not-completed-cnt) (apply db:get-not-completed-cnt dbstruct params)) + ;; ((synchash-get) (apply synchash:server-get dbstruct params)) + ((get-raw-run-stats) (apply db:get-raw-run-stats dbstruct params)) + ((get-test-times) (apply db:get-test-times dbstruct params)) + + ;; RUNS + ((get-run-info) (apply db:get-run-info dbstruct params)) + ((get-run-status) (apply db:get-run-status dbstruct params)) + ((get-run-state) (apply db:get-run-state dbstruct params)) + ((set-run-status) (apply db:set-run-status dbstruct params)) + ((set-run-state-status) (apply db:set-run-state-status dbstruct params)) + ((update-tesdata-on-repilcate-db) (apply db:update-tesdata-on-repilcate-db dbstruct params)) + ((get-tests-for-run) (apply db:get-tests-for-run dbstruct params)) + ((get-tests-for-run-state-status) (apply db:get-tests-for-run-state-status dbstruct params)) + ((get-test-id) (apply db:get-test-id dbstruct params)) + ((get-tests-for-run-mindata) (apply db:get-tests-for-run-mindata dbstruct params)) + ;; ((get-tests-for-runs-mindata) (apply db:get-tests-for-runs-mindata dbstruct params)) + ((get-runs) (apply db:get-runs dbstruct params)) + ((simple-get-runs) (apply db:simple-get-runs dbstruct params)) + ((get-num-runs) (apply db:get-num-runs dbstruct params)) + ((get-runs-cnt-by-patt) (apply db:get-runs-cnt-by-patt dbstruct params)) + ((get-all-run-ids) (db:get-all-run-ids dbstruct)) + ((get-prev-run-ids) (apply db:get-prev-run-ids dbstruct params)) + ((get-run-ids-matching-target) (apply db:get-run-ids-matching-target dbstruct params)) + ((get-runs-by-patt) (apply db:get-runs-by-patt dbstruct params)) + ((get-run-name-from-id) (apply db:get-run-name-from-id dbstruct params)) + ((get-main-run-stats) (apply db:get-main-run-stats dbstruct params)) + ((get-var) (apply db:get-var dbstruct params)) + ((get-run-stats) (apply db:get-run-stats dbstruct params)) + ((get-run-times) (apply db:get-run-times dbstruct params)) + + ;; STEPS + ((get-steps-data) (apply db:get-steps-data dbstruct params)) + ((get-steps-for-test) (apply db:get-steps-for-test dbstruct params)) + ((get-steps-info-by-id) (apply db:get-steps-info-by-id dbstruct params)) + + ;; TEST DATA + ((read-test-data) (apply db:read-test-data dbstruct params)) + ((read-test-data-varpatt) (apply db:read-test-data-varpatt dbstruct params)) + ((get-data-info-by-id) (apply db:get-data-info-by-id dbstruct params)) + + ;; MISC + ((get-latest-host-load) (apply db:get-latest-host-load dbstruct params)) + ((have-incompletes?) (apply db:have-incompletes? dbstruct params)) + ((login) (apply db:login dbstruct params)) + ((general-call) (let ((stmtname (car params)) + (run-id (cadr params)) + (realparams (cddr params))) + (db:general-call dbstruct run-id stmtname realparams))) + ((sdb-qry) (apply sdb:qry params)) + ((ping) (current-process-id)) + ((get-changed-record-ids) (apply db:get-changed-record-ids dbstruct params)) + ((get-run-record-ids) (apply db:get-run-record-ids dbstruct params)) + ;; TESTMETA + ((testmeta-get-record) (apply db:testmeta-get-record dbstruct params)) + + ;; TASKS + ((find-task-queue-records) (apply tasks:find-task-queue-records dbstruct params)) + (else + (debug:print 0 *default-log-port* "ERROR: bad api call " cmd) + (conc "ERROR: BAD api call " cmd)))))) + (if (not readonly-command) + (mutex-unlock! write-mutex)) + + ;; save all stats + (let ((delta-t (- (current-milliseconds) + start-t)) + (modified-cmd (if (eq? cmd 'general-call) + (string->symbol (conc "general-call-" (car params))) + cmd))) + (hash-table-set! *db-api-call-time* modified-cmd + (cons delta-t (hash-table-ref/default *db-api-call-time* modified-cmd '())))) + (if writecmd-in-readonly-mode + (begin + #;(common:telemetry-log (conc "api-out:"(->string cmd)) + payload: `((params . ,params) + (ok-res . #t))) + (vector #f res)) + (begin + #;(common:telemetry-log (conc "api-out:"(->string cmd)) + payload: `((params . ,params) + (ok-res . #f))) + (vector #t res)))))))) ;; http-server send-response ;; api:process-request ;; db:* ;; Index: db.scm ================================================================== --- db.scm +++ db.scm @@ -229,26 +229,10 @@ (set! *db-write-access* #f)) ;; (cons db dbpath))) (make-dbr:dbdat dbfile: dbpath dbh: db read-only: (not write-access)))) - -;; db:safely-close-sqlite3-db and db:close-all were here, moved to dbfile - -;; (let ((locdbs (dbr:dbstruct-locdbs dbstruct))) -;; (if (hash-table? locdbs) -;; (for-each (lambda (run-id) -;; (db:close-run-db dbstruct run-id)) -;; (hash-table-keys locdbs))))) - -;; (define (db:open-inmem-db) -;; (let* ((db (sqlite3:open-database ":memory:")) -;; (handler (make-busy-timeout 3600))) -;; (sqlite3:set-busy-handler! db handler) -;; (db:initialize-run-id-db db) -;; (cons db #f))) - ;; use bunch of Unix commands to try to break the lock and recreate the db ;; (define (db:move-and-recreate-db dbdat) (let* ((dbpath (dbr:dbdat-dbfile dbdat)) (dbdir (pathname-directory dbpath)) @@ -323,85 +307,10 @@ (sqlite3:finalize! db) #t)))))) -(define (db:patch-schema-rundb frundb) - ;; - ;; remove this some time after September 2016 (added in version v1.6031 - ;; - (for-each - (lambda (table-name) - (handle-exceptions - exn - (if (string-match ".*duplicate.*" ((condition-property-accessor 'exn 'message) exn)) - (debug:print 0 *default-log-port* "Column last_update already added to " table-name " table") - (db:general-sqlite-error-dump exn "alter table " table-name " ..." #f "none")) - (sqlite3:execute - frundb - (conc "ALTER TABLE " table-name " ADD COLUMN last_update INTEGER DEFAULT 0"))) - (sqlite3:execute - frundb - (conc "DROP TRIGGER IF EXISTS update_" table-name "_trigger;")) - (sqlite3:execute - frundb - (conc "CREATE TRIGGER IF NOT EXISTS update_" table-name "_trigger AFTER UPDATE ON " table-name " - FOR EACH ROW - BEGIN - UPDATE " table-name " SET last_update=(strftime('%s','now')) - WHERE id=old.id; - END;")) - ) - '("tests" "test_steps" "test_data"))) - -(define (db:patch-schema-maindb maindb) - ;; - ;; remove all these some time after september 2016 (added in v1.6031 - ;; - (for-each - (lambda (column type default) - (handle-exceptions - exn - (if (string-match ".*duplicate.*" ((condition-property-accessor 'exn 'message) exn)) - (debug:print 0 *default-log-port* "Column " column " already added to runs table") - (db:general-sqlite-error-dump exn "alter table runs ..." #f "none")) - (sqlite3:execute - maindb - (conc "ALTER TABLE runs ADD COLUMN " column " " type " DEFAULT " default)))) - (list "last_update" "contour") - (list "INTEGER" "TEXT" ) - (list "0" "''" )) - ;; these schema changes don't need exception handling - (sqlite3:execute - maindb - "CREATE TRIGGER IF NOT EXISTS update_runs_trigger AFTER UPDATE ON runs - FOR EACH ROW - BEGIN - UPDATE runs SET last_update=(strftime('%s','now')) - WHERE id=old.id; - END;") - (sqlite3:execute maindb "CREATE TABLE IF NOT EXISTS run_stats ( - id INTEGER PRIMARY KEY, - run_id INTEGER, - state TEXT, - status TEXT, - count INTEGER, - last_update INTEGER DEFAULT (strftime('%s','now')))") - (sqlite3:execute maindb "CREATE TRIGGER IF NOT EXISTS update_run_stats_trigger AFTER UPDATE ON run_stats - FOR EACH ROW - BEGIN - UPDATE run_stats SET last_update=(strftime('%s','now')) - WHERE id=old.id; - END;") - (sqlite3:execute maindb "CREATE TABLE IF NOT EXISTS test_rundat ( - id INTEGER PRIMARY KEY, - test_id INTEGER, - update_time TIMESTAMP, - cpuload INTEGER DEFAULT -1, - diskfree INTEGER DEFAULT -1, - diskusage INTGER DEFAULT -1, - run_duration INTEGER DEFAULT 0);")) (define (db:adj-target db) (let ((fields (configf:get-section *configdat* "fields")) (field-num 0)) ;; because we will be refreshing the keys table it is best to clear it here @@ -895,11 +804,11 @@ event_time TIMESTAMP, comment TEXT DEFAULT '', logfile TEXT DEFAULT '', last_update INTEGER DEFAULT (strftime('%s','now')), CONSTRAINT test_steps_constraint UNIQUE (test_id,stepname,state));") - (sqlite3:execute db "CREATE INDEX IF NOT EXISTS teststeps_index ON tests (run_id, testname, item_path);") + (sqlite3:execute db "CREATE INDEX IF NOT EXISTS teststeps_index ON test_steps (test_id, stepname, state);") ;; All triggers created at once in end ;;(sqlite3:execute db "CREATE TRIGGER IF NOT EXISTS update_teststeps_trigger AFTER UPDATE ON test_steps ;; FOR EACH ROW ;; BEGIN ;; UPDATE test_steps SET last_update=(strftime('%s','now')) @@ -1073,43 +982,10 @@ ;; (db (dbr:dbdat-dbh dbdat)) ;; (res '()) ;; (blocks '())) ;; a block is an archive chunck that can be added too if there is space ;; (sqlite3:for-each-row #f) -;;====================================================================== -;; L O G G I N G D B -;;====================================================================== - -(define (open-logging-db) - (let* ((dbpath (conc (if *toppath* (conc *toppath* "/") "") "logging.db")) ;; fname) - (dbexists (common:file-exists? dbpath)) - (db (sqlite3:open-database dbpath)) - (handler (sqlite3:make-busy-timeout (if (args:get-arg "-override-timeout") - (string->number (args:get-arg "-override-timeout")) - 136000)))) ;; 136000))) - (sqlite3:set-busy-handler! db handler) - (if (not dbexists) - (begin - (sqlite3:execute db "CREATE TABLE IF NOT EXISTS log (id INTEGER PRIMARY KEY,event_time TIMESTAMP DEFAULT (strftime('%s','now')),logline TEXT,pwd TEXT,cmdline TEXT,pid INTEGER);") - (db:set-sync db) ;; (sqlite3:execute db (conc "PRAGMA synchronous = 0;")) - )) - db)) - -(define (db:log-local-event . loglst) - (let ((logline (apply conc loglst))) - (db:log-event logline))) - -(define (db:log-event logline) - (let ((db (open-logging-db))) - (sqlite3:execute db "INSERT INTO log (logline,pwd,cmdline,pid) VALUES (?,?,?,?);" - logline - (current-directory) - (string-intersperse (argv) " ") - (current-process-id)) - (sqlite3:finalize! db) - logline)) - ;;====================================================================== ;; D B U T I L S ;;====================================================================== ;;====================================================================== Index: dbfile.scm ================================================================== --- dbfile.scm +++ dbfile.scm @@ -222,11 +222,11 @@ (hash-table-ref/default (dbr:dbstruct-subdbs dbstruct) dbfname #f))) (define (dbfile:set-subdb dbstruct run-id subdb) (hash-table-set! (dbr:dbstruct-subdbs dbstruct) (dbfile:run-id->dbname run-id) subdb)) -(define *dbfile:num-handles-in-use* 0) +;; (define *dbfile:num-handles-in-use* 0) ;; Get/open a database ;; if run-id => get run specific db ;; if #f => get main db ;; if run-id is a string treat it as a filename @@ -237,18 +237,17 @@ (define (dbfile:get-dbdat dbstruct run-id) (let* ((subdb (dbfile:get-subdb dbstruct run-id))) (if (stack-empty? (dbr:subdb-dbstack subdb)) #f (begin - (set! *dbfile:num-handles-in-use* (+ *dbfile:num-handles-in-use* 1)) (stack-pop! (dbr:subdb-dbstack subdb)))))) ;; return a previously opened db handle to the stack of available handles (define (dbfile:add-dbdat dbstruct run-id dbdat) (let* ((subdb (dbfile:get-subdb dbstruct run-id))) - (set! *dbfile:num-handles-in-use* (- *dbfile:num-handles-in-use* 1)) - (stack-push! (dbr:subdb-dbstack subdb) dbdat))) + (stack-push! (dbr:subdb-dbstack subdb) dbdat) + dbdat)) ;; set up a subdb ;; (define (dbfile:init-subdb dbstruct run-id init-proc) (let* ((dbname (dbfile:run-id->dbname run-id)) @@ -272,14 +271,10 @@ ;; 2. if there is no existing db handle in the stack ;; create a new handle and return it (do NOT add ;; it to the stack). ;; (define (dbfile:open-db dbstruct run-id init-proc) - (if (> *dbfile:num-handles-in-use* 10) - (let* ((wait-delay (- *dbfile:num-handles-in-use* 9))) - (dbfile:print-err "INFO: over ten dbfile handle threads in use ("*dbfile:num-handles-in-use*") delaying "wait-delay" second") - (thread-sleep! wait-delay))) (let* ((subdb (dbfile:get-subdb dbstruct run-id))) (if (not subdb) ;; not yet defined (begin (dbfile:init-subdb dbstruct run-id init-proc) (dbfile:open-db dbstruct run-id init-proc)) @@ -289,11 +284,11 @@ (let* ((tmppath (dbr:dbstruct-tmppath dbstruct)) (tmpdbpath (dbfile:run-id->path tmppath run-id)) (dbdat (dbfile:open-sqlite3-db tmpdbpath init-proc sync-mode: 0 journal-mode: "WAL"))) ;; the following line short-circuits the "one db handle per thread" model ;; - (dbfile:add-dbdat dbstruct run-id dbdat) + ;; (dbfile:add-dbdat dbstruct run-id dbdat) ;; dbdat)))))) ;; COMBINE dbfile:open-sqlite-db and dbfile:lock-create-open ;; @@ -301,12 +296,13 @@ ;; this stuff is for initial debugging, please remove it when ;; this code stabilizes (define *dbopens* (make-hash-table)) (define (dbfile:inc-db-open dbfile) (let* ((curr-opens-count (+ (hash-table-ref/default *dbopens* dbfile 0) 1))) - ;; (if (> curr-opens-count 1) ;; this should NOT be happening - ;; (dbfile:print-err "INFO: db "dbfile" has been opened "curr-opens-count" times!")) + (if (and (> curr-opens-count 1) ;; this should NOT be happening + (common:low-noise-print 15 "db-opens")) + (dbfile:print-err "INFO: db "dbfile" has been opened "curr-opens-count" times!")) (hash-table-set! *dbopens* dbfile curr-opens-count) curr-opens-count)) ;; Open the classic megatest.db file (defaults to open in toppath) ;; @@ -313,11 +309,11 @@ ;; NOTE: returns a dbdat not a dbstruct! ;; (define (dbfile:open-sqlite3-db dbpath init-proc #!key (sync-mode 0)(journal-mode #f)) (let* ((dbexists (file-exists? dbpath)) (write-access (file-write-access? dbpath)) - (db (dbfile:cautious-open-database dbpath init-proc sync-mode journal-mode))) #;(sqlite3:open-database dbpath) + (db (dbfile:cautious-open-database dbpath init-proc sync-mode journal-mode))) (dbfile:inc-db-open dbpath) ;; (init-proc db) (make-dbr:dbdat dbfile: dbpath dbh: db read-only: (not write-access)))) (define (dbfile:print-and-exit . params) @@ -331,11 +327,10 @@ (with-output-to-port (current-error-port) (lambda () (apply print params)))) - (define (dbfile:cautious-open-database fname init-proc sync-mode journal-mode #!optional (tries-left 500)) (let* ((busy-file (conc fname"-journal")) (delay-time (* (- 51 tries-left) 1.1)) (write-access (file-write-access? fname)) (dir-access (file-write-access? (pathname-directory fname))) @@ -378,10 +373,11 @@ (begin (if (file-exists? fname ) (let ((db (sqlite3:open-database fname))) ;; pragmas synchronous not needed because this db is used read-only ;; (sqlite3:execute db (conc "PRAGMA synchronous = "mode";") + (sqlite3:set-busy-handler! db (sqlite3:make-busy-timeout 30000)) ;; read-only but still need timeout db ) (print "file doesn't exist: " fname)))) (exn (io-error) (dbfile:print-err exn "ERROR: i/o error with " fname ". Check permissions, disk space etc. and try again.") (retry)) @@ -422,11 +418,11 @@ (begin (sqlite3:execute db "CREATE TABLE IF NOT EXISTS no_sync_metadat (var TEXT,val TEXT, CONSTRAINT no_sync_metadat_constraint UNIQUE (var));")) ))) (db (dbfile:cautious-open-database dbname init-proc 0 "WAL"))) ;; (sqlite3:open-database dbname))) ;; (sqlite3:execute db "PRAGMA synchronous = 0;") - (sqlite3:set-busy-handler! db (sqlite3:make-busy-timeout 136000)) + ;; (sqlite3:set-busy-handler! db (sqlite3:make-busy-timeout 136000)) ;; done in cautious-open-database (set! *no-sync-db* db) db)))) (define (db:no-sync-set db var val) (sqlite3:execute db "INSERT OR REPLACE INTO no_sync_metadat (var,val) VALUES (?,?);" var val)) @@ -996,11 +992,12 @@ ;; (db:with-db dbstruct run-id sqlite3:exec "select blah fgrom blaz;") ;; r/w is a flag to indicate if the db is modified by this query #t = yes, #f = no ;; (define (db:with-db dbstruct run-id r/w proc . params) - (let* ((have-struct (dbr:dbstruct? dbstruct)) + (let* ((use-mutex (> *api-process-request-count* 25)) ;; risk of db corruption + (have-struct (dbr:dbstruct? dbstruct)) (dbdat (if have-struct ;; this stuff just allows us to call with a db handle directly (db:open-db dbstruct run-id (dbfile:db-init-proc)) ;; (dbfile:get-subdb dbstruct run-id) #f)) (db (if have-struct ;; this stuff just allows us to call with a db handle directly (dbr:dbdat-dbh dbdat) @@ -1010,15 +1007,15 @@ "nofilenameavailable")) (jfile (conc fname"-journal")) #;(subdb (if have-struct (dbfile:get-subdb dbstruct run-id) #f)) - (use-mutex (> *api-process-request-count* 25))) ;; was 25 + ) ;; was 25 (if (file-exists? jfile) (begin - (dbfile:print-err "INFO: "jfile" exists, delaying few seconds to reduce database load") - (thread-sleep! 2))) + (dbfile:print-err "INFO: "jfile" exists, delaying to reduce database load") + (thread-sleep! 0.2))) (if (and use-mutex (common:low-noise-print 120 "over-50-parallel-api-requests")) (dbfile:print-err *api-process-request-count* " parallel api requests being processed in process " (current-process-id) ", throttling access")) (condition-case Index: rmt.scm ================================================================== --- rmt.scm +++ rmt.scm @@ -451,18 +451,18 @@ ;;====================================================================== ;; M I S C ;;====================================================================== (define (rmt:login run-id) - (rmt:send-receive 'login run-id (list *toppath* megatest-version *my-client-signature*))) + (rmt:send-receive 'login run-id (list *toppath* megatest-version (client:get-signature)))) ;; This login does no retries under the hood - it acts a bit like a ping. ;; Deprecated for nmsg-transport. ;; (define (rmt:login-no-auto-client-setup connection-info) (case *transport-type* ;; run-id of 0 is just a placeholder - ((http)(rmt:send-receive-no-auto-client-setup connection-info 'login 0 (list *toppath* megatest-version *my-client-signature*))) + ((http)(rmt:send-receive-no-auto-client-setup connection-info 'login 0 (list *toppath* megatest-version (client:get-signature)))) ;;((nmsg)(nmsg-transport:client-api-send-receive run-id connection-info 'login (list *toppath* megatest-version run-id *my-client-signature*))) )) ;; hand off a call to one of the db:queries statements ;; added run-id to make looking up the correct db possible Index: server.scm ================================================================== --- server.scm +++ server.scm @@ -386,32 +386,28 @@ (server-key (conc (get-host-name) "-" (current-process-id)))) (if (file-exists? start-flag) (let* ((fmodtime (file-modification-time start-flag)) (delta (- (current-seconds) fmodtime)) (old-enough (> delta idletime)) - (new-server-key "") - ) - + (new-server-key "")) ;; write start-flag file, wait 0.25s, then if previously the start-flag file was older than seconds, and the new file still has the same server key as you just wrote, return #t. ;; the intention is to make sure nfs can read the file we just wrote, and make sure it was written by us, and not another process. (if (and old-enough - (begin - (debug:print-info 2 *default-log-port* "Writing " start-flag) - (with-output-to-file start-flag (lambda () (print server-key))) - (thread-sleep! 0.25) - (set! new-server-key (with-input-from-file start-flag (lambda () (read-line)))) - (equal? server-key new-server-key)) - ) - #t - - ;; If either of the above conditions is not true, print a "Gating server start" message, wait + 1, then call this function recursively. - (begin - (debug:print-info 0 *default-log-port* "Gating server start, last start: " - (seconds->time-string fmodtime) ", time since last start: " delta ", required idletime: " idletime ", gating reason:" (if old-enough "another job started a server" "too soon to start another server")) - - (thread-sleep! ( + 1 idletime)) - (server:wait-for-server-start-last-flag areapath))))))) + (begin + (debug:print-info 2 *default-log-port* "Writing " start-flag) + (with-output-to-file start-flag (lambda () (print server-key))) + (thread-sleep! 0.25) + (set! new-server-key (with-input-from-file start-flag (lambda () (read-line)))) + (equal? server-key new-server-key))) + #t + ;; If either of the above conditions is not true, print a "Gating server start" message, wait + 1, then call this function recursively. + (begin + (debug:print-info 0 *default-log-port* "Gating server start, last start: " + (seconds->time-string fmodtime) ", time since last start: " delta ", required idletime: " idletime ", gating reason:" (if old-enough "another job started a server" "too soon to start another server")) + + (thread-sleep! ( + 1 idletime)) + (server:wait-for-server-start-last-flag areapath))))))) ;; kind start up of server, wait before allowing another server for a given ;; area to be launched @@ -419,23 +415,19 @@ (define (server:kind-run areapath) ;; look for $MT_RUN_AREA_HOME/logs/server-start-last ;; and wait for it to be at least seconds old (server:wait-for-server-start-last-flag areapath) (if (not (server:check-if-running areapath)) ;; why try if there is already a server running? - (let* ( - (lock-file (conc areapath "/logs/server-start.lock"))) + (let* ((lock-file (conc areapath "/logs/server-start.lock"))) (let* ((start-flag (conc areapath "/logs/server-start-last"))) (common:simple-file-lock-and-wait lock-file expire-time: 25) (debug:print-info 2 *default-log-port* "server:kind-run: touching " start-flag) (system (conc "touch " start-flag)) ;; lazy but safe (server:run areapath) (thread-sleep! 20) ;; don't release the lock for at least a few seconds. And allow time for the server startup to get to "SERVER STARTED". (common:simple-file-release-lock lock-file))) - - (debug:print-info 0 *default-log-port* "Found server already running. NOT trying to start another.") - ) -) + (debug:print-info 0 *default-log-port* "Found server already running. NOT trying to start another."))) ;; this one seems to be the general entry point ;; (define (server:start-and-wait areapath #!key (timeout 60)) (let ((give-up-time (+ (current-seconds) timeout)))