Index: api.scm ================================================================== --- api.scm +++ api.scm @@ -23,115 +23,12 @@ (declare (unit api)) (declare (uses rmt)) (declare (uses db)) (declare (uses tasks)) -;; allow these queries through without starting a server -;; -(define api:read-only-queries - '(get-key-val-pairs - get-var - get-keys - get-key-vals - test-toplevel-num-items - get-test-info-by-id - get-steps-info-by-id - get-data-info-by-id - test-get-rundir-from-test-id - get-count-tests-running-for-testname - get-count-tests-running - get-count-tests-running-in-jobgroup - get-previous-test-run-record - get-matching-previous-test-run-records - test-get-logfile-info - test-get-records-for-index-file - get-testinfo-state-status - test-get-top-process-pid - test-get-paths-matching-keynames-target-new - get-prereqs-not-met - get-count-tests-running-for-run-id - get-run-info - get-run-status - get-run-state - get-run-stats - get-run-times - get-targets - get-target - ;; register-run - get-tests-tags - get-test-times - get-tests-for-run - get-test-id - get-tests-for-runs-mindata - get-tests-for-run-mindata - get-run-name-from-id - get-runs - simple-get-runs - get-num-runs - get-runs-cnt-by-patt - get-all-run-ids - get-prev-run-ids - get-run-ids-matching-target - get-runs-by-patt - get-steps-data - get-steps-for-test - read-test-data - read-test-data* - login - tasks-get-last - testmeta-get-record - have-incompletes? - synchash-get - get-changed-record-ids - get-run-record-ids - get-not-completed-cnt)) - -(define api:write-queries - '( - get-keys-write ;; dummy "write" query to force server start - - ;; SERVERS - start-server - kill-server - - ;; TESTS - test-set-state-status-by-id - delete-test-records - delete-old-deleted-test-records - test-set-state-status - test-set-top-process-pid - set-state-status-and-roll-up-items - - update-pass-fail-counts - top-test-set-per-pf-counts ;; (db:top-test-set-per-pf-counts (db:get-db *db* 5) 5 "runfirst") - - ;; RUNS - register-run - set-tests-state-status - delete-run - lock/unlock-run - update-run-event_time - mark-incomplete - set-state-status-and-roll-up-run - ;; STEPS - teststep-set-status! - delete-steps-for-test - ;; TEST DATA - test-data-rollup - csv->test-data - - ;; MISC - sync-inmem->db - - ;; TESTMETA - testmeta-add-record - testmeta-update-field - - ;; TASKS - tasks-add - tasks-set-state-given-param-key - )) +;; api:read-only-queries and api:execute-requests have been moved into common_records + ;; These are called by the server on recipt of /api calls ;; - keep it simple, only return the actual result of the call, i.e. no meta info here ;; ;; - returns #( flag result ) @@ -159,11 +56,11 @@ (params (vector-ref dat 1)) (start-t (current-milliseconds)) (readonly-mode (dbr:dbstruct-read-only dbstruct)) (readonly-command (member cmd api:read-only-queries)) (writecmd-in-readonly-mode (and readonly-mode (not readonly-command))) - (foo (begin + #;(foo (begin (common:telemetry-log (conc "api-in:"(->string cmd)) payload: `((params . ,params))) #t)) (res @@ -329,12 +226,12 @@ (run-id (cadr params)) (realparams (cddr params))) (db:general-call dbstruct stmtname realparams))) ((sdb-qry) (apply sdb:qry params)) ((ping) (current-process-id)) - ((get-changed-record-ids) (apply db:get-changed-record-ids dbstruct params)) - ((get-run-record-ids) (apply db:get-run-record-ids dbstruct params)) + ((get-changed-record-ids) (apply db:get-changed-record-ids dbstruct params)) + ((get-run-record-ids) (apply db:get-run-record-ids dbstruct params)) ;; TESTMETA ((testmeta-get-record) (apply db:testmeta-get-record dbstruct params)) ;; TASKS ((find-task-queue-records) (apply tasks:find-task-queue-records dbstruct params)) Index: client.scm ================================================================== --- client.scm +++ client.scm @@ -54,11 +54,11 @@ (else (rpc:client-connect iface port)))) (define (client:setup areapath #!key (remaining-tries 100) (failed-connects 0)) (case (server:get-transport) ((rpc) (rpc-transport:client-setup remaining-tries: remaining-tries failed-connects: failed-connects)) ;;(client:setup-rpc run-id)) - ((http)(client:setup-http areapath remaining-tries: remaining-tries failed-connects: failed-connects)) + ((http)(client:setup-http *runremote* areapath remaining-tries: remaining-tries failed-connects: failed-connects)) (else (rpc-transport:client-setup remaining-tries: remaining-tries failed-connects: failed-connects)))) ;; (client:setup-rpc run-id)))) ;; Do all the connection work, look up the transport type and set up the ;; connection if required. ;; @@ -70,11 +70,11 @@ ;; client:setup ;; ;; lookup_server, need to remove *runremote* stuff ;; -(define (client:setup-http areapath #!key (remaining-tries 100) (failed-connects 0)(area-dat #f)) +(define (client:setup-http runremote areapath #!key (remaining-tries 100) (failed-connects 0)) ;; (area-dat #f)) (debug:print-info 2 *default-log-port* "client:setup remaining-tries=" remaining-tries) (server:start-and-wait areapath) (if (<= remaining-tries 0) (begin (debug:print-error 0 *default-log-port* "failed to start or connect to server") @@ -81,41 +81,37 @@ (exit 1)) ;; ;; Alternatively here, we can get the list of candidate servers and work our way ;; through them searching for a good one. ;; - (let* ((server-dat (server:get-rand-best areapath)) ;; (server:get-first-best areapath)) - (runremote (or area-dat *runremote*))) + (let* ((server-dat (server:get-rand-best areapath))) ;; (server:get-first-best areapath)) (if (not server-dat) ;; no server found - (client:setup-http areapath remaining-tries: (- remaining-tries 1)) + (client:setup-http runremote areapath remaining-tries: (- remaining-tries 1)) (let ((host (cadr server-dat)) (port (caddr server-dat))) (debug:print-info 4 *default-log-port* "client:setup server-dat=" server-dat ", remaining-tries=" remaining-tries) - (if (and (not area-dat) - (not *runremote*)) - (set! *runremote* (make-remote))) (if (and host port) (let* ((start-res (case *transport-type* ((http)(http-transport:client-connect host port)))) (ping-res (case *transport-type* ((http)(rmt:login-no-auto-client-setup start-res))))) (if (and start-res ping-res) - (let ((runremote (or area-dat *runremote*))) ;; it might have been generated only a few statements ago - (remote-conndat-set! runremote start-res) ;; (hash-table-set! runremote run-id start-res) + (begin + (remote-conndat-set! runremote start-res) (debug:print-info 2 *default-log-port* "connected to " (http-transport:server-dat-make-url start-res)) start-res) (begin ;; login failed but have a server record, clean out the record and try again (debug:print-info 0 *default-log-port* "client:setup, login unsuccessful, will attempt to start server ... start-res=" start-res ", server-dat=" server-dat) ;; had runid. Fixes part of Randy;s ticket 1405717332 (case *transport-type* ((http)(http-transport:close-connections))) (remote-conndat-set! runremote #f) ;; (hash-table-delete! runremote run-id) (thread-sleep! 1) - (client:setup-http areapath remaining-tries: (- remaining-tries 1)) + (client:setup-http runremote areapath remaining-tries: (- remaining-tries 1)) ))) (begin ;; no server registered ;; (server:kind-run areapath) (server:start-and-wait areapath) (debug:print-info 0 *default-log-port* "client:setup, no server registered, remaining-tries=" remaining-tries) (thread-sleep! 1) ;; (+ 5 (random (- 20 remaining-tries)))) ;; give server a little time to start up, randomize a little to avoid start storms. - (client:setup-http areapath remaining-tries: (- remaining-tries 1))))))))) + (client:setup-http runremote areapath remaining-tries: (- remaining-tries 1))))))))) Index: common.scm ================================================================== --- common.scm +++ common.scm @@ -158,11 +158,10 @@ (define *no-sync-db* #f) ;; SERVER (define *my-client-signature* #f) (define *transport-type* 'http) ;; override with [server] transport http|rpc|nmsg -(define *runremote* #f) ;; if set up for server communication this will hold ;; (define *max-cache-size* 0) (define *logged-in-clients* (make-hash-table)) (define *server-id* #f) (define *server-info* #f) ;; good candidate for easily convert to non-global (define *time-to-exit* #f) @@ -270,21 +269,10 @@ (else "FAIL"))) (define (common:logpro-exit-code->test-status exit-code) (status-sym->string (common:logpro-exit-code->status-sym exit-code))) -(defstruct remote - (hh-dat (common:get-homehost)) ;; homehost record ( addr . hhflag ) - (server-url (if *toppath* (server:check-if-running *toppath*))) ;; (server:check-if-running *toppath*) #f)) - (last-server-check 0) ;; last time we checked to see if the server was alive - (conndat #f) - (transport *transport-type*) - (server-timeout (server:expiration-timeout)) - (force-server #f) - (ro-mode #f) - (ro-mode-checked #f)) ;; flag that indicates we have checked for ro-mode - ;; launching and hosts (defstruct host (reachable #f) (last-update 0) (last-used 0) @@ -996,14 +984,10 @@ (sqlite3:interrupt! db) (sqlite3:finalize! db #t) ;; (vector-set! *task-db* 0 #f) (set! *task-db* #f))))) (http-client#close-all-connections!) - ;; (if (and *runremote* - ;; (remote-conndat *runremote*)) - ;; (begin - ;; (http-client#close-all-connections!))) ;; for http-client (if (not (eq? *default-log-port* (current-error-port))) (close-output-port *default-log-port*)) (set! *default-log-port* (current-error-port))) "Cleanup db exit thread")) (th2 (make-thread (lambda () (debug:print 4 *default-log-port* "Attempting clean exit. Please be patient and wait a few seconds...") Index: common_records.scm ================================================================== --- common_records.scm +++ common_records.scm @@ -17,13 +17,197 @@ ;; along with Megatest. If not, see . ;; ;;====================================================================== ;; (use trace) +(use typed-records) + +;; globals - modules that include this need these here +(define *verbosity-cache* (make-hash-table)) +(define *verbosity* 0) +(define *default-log-port* (current-error-port)) +(define *logging* #f) +(define *functions* (make-hash-table)) ;; symbol => fn ### TEMPORARY!!! +;; (define *toppath* #f) +(define *transport-type* 'http) + +(define (exec-fn fn . params) + (if (hash-table-exists? *functions* fn) + (apply (hash-table-ref *functions* fn) params) + (begin + (debug:print-error 0 "exec-fn " fn " not found") + #f))) + +(define (set-fn fn-name fn) + (hash-table-set! *functions* fn-name fn)) (include "altdb.scm") + +(defstruct remote + (hh-dat #f) ;; (exec-fn 'common:get-homehost)) ;; homehost record ( addr . hhflag ) + (server-url #f) ;; (if *toppath* (exec-fn 'server:check-if-running *toppath*))) ;; (server:check-if-running *toppath*) #f)) + (last-server-check 0) ;; last time we checked to see if the server was alive + (conndat #f) + (transport *transport-type*) + (server-timeout #f) ;; (exec-fn 'server:expiration-timeout)) + (force-server #f) + (ro-mode #f) + (ro-mode-checked #f) ;; flag that indicates we have checked for ro-mode + (ulex:conn #f) ;; ulex db conn is not exactly a db connector, more like a network connector + ) + +;; Pulled from http-transport.scm + +(define (make-http-transport:server-dat)(make-vector 6)) +(define (http-transport:server-dat-get-iface vec) (vector-ref vec 0)) +(define (http-transport:server-dat-get-port vec) (vector-ref vec 1)) +(define (http-transport:server-dat-get-api-uri vec) (vector-ref vec 2)) +(define (http-transport:server-dat-get-api-url vec) (vector-ref vec 3)) +(define (http-transport:server-dat-get-api-req vec) (vector-ref vec 4)) +(define (http-transport:server-dat-get-last-access vec) (vector-ref vec 5)) +(define (http-transport:server-dat-get-socket vec) (vector-ref vec 6)) + +(define (http-transport:server-dat-make-url vec) + (if (and (http-transport:server-dat-get-iface vec) + (http-transport:server-dat-get-port vec)) + (conc "http://" + (http-transport:server-dat-get-iface vec) + ":" + (http-transport:server-dat-get-port vec)) + #f)) + +(define (http-transport:server-dat-update-last-access vec) + (if (vector? vec) + (vector-set! vec 5 (current-seconds)) + (begin + (print-call-chain (current-error-port)) + (debug:print-error 0 *default-log-port* "call to http-transport:server-dat-update-last-access with non-vector!!")))) + +;;====================================================================== +;; +;;====================================================================== + + +;; allow these queries through without starting a server +;; +(define api:read-only-queries + '(get-key-val-pairs + get-var + get-keys + get-key-vals + test-toplevel-num-items + get-test-info-by-id + get-steps-info-by-id + get-data-info-by-id + test-get-rundir-from-test-id + get-count-tests-running-for-testname + get-count-tests-running + get-count-tests-running-in-jobgroup + get-previous-test-run-record + get-matching-previous-test-run-records + test-get-logfile-info + test-get-records-for-index-file + get-testinfo-state-status + test-get-top-process-pid + test-get-paths-matching-keynames-target-new + get-prereqs-not-met + get-count-tests-running-for-run-id + get-run-info + get-run-status + get-run-state + get-run-stats + get-run-times + get-targets + get-target + ;; register-run + get-tests-tags + get-test-times + get-tests-for-run + get-test-id + get-tests-for-runs-mindata + get-tests-for-run-mindata + get-run-name-from-id + get-runs + simple-get-runs + get-num-runs + get-runs-cnt-by-patt + get-all-run-ids + get-prev-run-ids + get-run-ids-matching-target + get-runs-by-patt + get-steps-data + get-steps-for-test + read-test-data + read-test-data* + login + tasks-get-last + testmeta-get-record + have-incompletes? + synchash-get + get-changed-record-ids + get-run-record-ids + get-not-completed-cnt)) + +(define api:write-queries + '( + get-keys-write ;; dummy "write" query to force server start + + ;; SERVERS + start-server + kill-server + + ;; TESTS + test-set-state-status-by-id + delete-test-records + delete-old-deleted-test-records + test-set-state-status + test-set-top-process-pid + set-state-status-and-roll-up-items + + update-pass-fail-counts + top-test-set-per-pf-counts ;; (db:top-test-set-per-pf-counts (db:get-db *db* 5) 5 "runfirst") + + ;; RUNS + register-run + set-tests-state-status + delete-run + lock/unlock-run + update-run-event_time + mark-incomplete + set-state-status-and-roll-up-run + ;; STEPS + teststep-set-status! + delete-steps-for-test + ;; TEST DATA + test-data-rollup + csv->test-data + + ;; MISC + sync-inmem->db + + ;; TESTMETA + testmeta-add-record + testmeta-update-field + + ;; TASKS + tasks-add + tasks-set-state-given-param-key + )) + +;;====================================================================== +;; ALLDATA +;;====================================================================== +;; +;; attempt to consolidate a bunch of global information into one struct to toss around +(defstruct alldat + (toppath #f) + (read-only-queries api:read-only-queries) + (write-queries api:write-queries)) + +(define *alldata* (make-alldat)) + ;; Some of these routines use: ;; ;; http://www.cs.toronto.edu/~gfb/scheme/simple-macros.html ;; ;; Syntax for defining macros in a simple style similar to function definiton, @@ -80,11 +264,11 @@ ;; this was cached based on results from profiling but it turned out the profiling ;; somehow went wrong - perhaps too many processes writing to it. Leaving the caching ;; in for now but can probably take it out later. ;; -(define (debug:calc-verbosity vstr) +(define (debug:calc-verbosity vstr verbose quiet) ;; verbose and quiet are #f or enabled (or (hash-table-ref/default *verbosity-cache* vstr #f) (let ((res (cond ((number? vstr) vstr) ((not (string? vstr)) 1) ;; ((string-match "^\\s*$" vstr) 1) @@ -91,12 +275,12 @@ (vstr (let ((debugvals (filter number? (map string->number (string-split vstr ","))))) (cond ((> (length debugvals) 1) debugvals) ((> (length debugvals) 0)(car debugvals)) (else 1)))) - ((args:get-arg "-v") 2) - ((args:get-arg "-q") 0) + (verbose 2) ;; ((args:get-arg "-v") 2) + (quiet 0) ;; ((args:get-arg "-q") 0) (else 1)))) (hash-table-set! *verbosity-cache* vstr res) res))) ;; check verbosity, #t is ok @@ -121,29 +305,29 @@ (not (null? (lset-intersection! eq? *verbosity* n)))) ((and (number? *verbosity*) (list? n)) (member *verbosity* n)))) -(define (debug:setup) - (let ((debugstr (or (args:get-arg "-debug") - (getenv "MT_DEBUG_MODE")))) - (set! *verbosity* (debug:calc-verbosity debugstr)) +(define (debug:setup dmode verbose quiet) + (let ((debugstr (or dmode ;; (args:get-arg "-debug") + (get-environment-variable "MT_DEBUG_MODE")))) + (set! *verbosity* (debug:calc-verbosity debugstr verbose quiet)) (debug:check-verbosity *verbosity* debugstr) ;; if we were handed a bad verbosity rule then we will override it with 1 and continue (if (not *verbosity*)(set! *verbosity* 1)) - (if (or (args:get-arg "-debug") - (not (getenv "MT_DEBUG_MODE"))) + (if (or dmode ;; (args:get-arg "-debug") + (not (get-environment-variable "MT_DEBUG_MODE"))) (setenv "MT_DEBUG_MODE" (if (list? *verbosity*) (string-intersperse (map conc *verbosity*) ",") (conc *verbosity*)))))) (define (debug:print n e . params) (if (debug:debug-mode n) (with-output-to-port (or e (current-error-port)) (lambda () (if *logging* - (db:log-event (apply conc params)) + (exec-fn 'db:log-event (apply conc params)) (apply print params) ))))) ;; Brandon's debug printer shortcut (indulge me :) (define *BB-process-starttime* (current-milliseconds)) @@ -218,11 +402,11 @@ ;; normal print (if (debug:debug-mode n) (with-output-to-port (if (port? e) e (current-error-port)) (lambda () (if *logging* - (db:log-event (apply conc params)) + (exec-fn 'db:log-event (apply conc params)) ;; (apply print "pid:" (current-process-id) " " params) (apply print "ERROR: " params) )))) ;; pass important messages to stderr (if (and (eq? n 0)(not (eq? e (current-error-port)))) @@ -235,11 +419,11 @@ (if (debug:debug-mode n) (with-output-to-port (if (port? e) e (current-error-port)) (lambda () (if *logging* (let ((res (format#format #f "INFO: (~a) ~a" n (apply conc params)))) - (db:log-event res)) + (exec-fn 'db:log-event res)) ;; (apply print "pid:" (current-process-id) " " "INFO: (" n ") " params) ;; res) (apply print "INFO: (" n ") " params) ;; res) ))))) Index: dashboard.scm ================================================================== --- dashboard.scm +++ dashboard.scm @@ -511,11 +511,11 @@ 3))) (define (get-curr-sort) (vector-ref *tests-sort-options* *tests-sort-reverse*)) -(debug:setup) +(debug:setup (args:get-arg "-debug")(args:get-arg "-v")(args:get-arg "-q")) ;; (define uidat #f) (define-inline (dboard:uidat-get-keycol vec)(vector-ref vec 0)) (define-inline (dboard:uidat-get-lftcol vec)(vector-ref vec 1)) Index: db.scm ================================================================== --- db.scm +++ db.scm @@ -38,10 +38,14 @@ (include "common_records.scm") (include "db_records.scm") (include "key_records.scm") (include "run_records.scm") + +(declare (uses rmtmod)) +(import rmtmod) + (define *rundb-mutex* (make-mutex)) ;; prevent problems opening/closing rundb's (define *number-of-writes* 0) (define *number-non-write-queries* 0) @@ -304,13 +308,19 @@ ;if wal mode is on -WAL and -shm file get created when db:open-megatest-db is called. modtimedelta will always be < 10 so db in tmp not get synced ;(tmpdbmodtime (if dbfexists (db:get-last-update-time (car tmpdb)) #f)) ;(fmt (file-modification-time tmpdbfname)) (modtimedelta (and mtdbmodtime tmpdbmodtime (- mtdbmodtime tmpdbmodtime)))) - (when write-access - (sqlite3:execute (car mtdb) "drop trigger if exists update_tests_trigger") - (sqlite3:execute (car mtdb) "drop trigger if exists update_runs_trigger")) + (handle-exceptions + exn + (let ((call-chain (get-call-chain)) + (msg ((condition-property-accessor 'exn 'message) exn))) + (debug:print 0 *default-log-port* "ERROR: attempted to drop triggers on MTRA/megatest.db but failed. Error is " msg) + (set! write-access #f)) ;; if we failed to drop the triggers then we probably don't have write access + (when write-access + (sqlite3:execute (car mtdb) "drop trigger if exists update_tests_trigger") + (sqlite3:execute (car mtdb) "drop trigger if exists update_runs_trigger"))) ;(print "mtdbmodtime " mtdbmodtime " tmpdbmodtime " tmpdbmodtime " mtdbpath " mtdbpath " " (conc *toppath* "/megatest.db")) ;;(debug:print-info 13 *default-log-port* "db:open-db>> mtdbpath="mtdbpath" mtdbexists="mtdbexists" and write-access="write-access) (if (and dbexists (not write-access)) (begin @@ -4744,6 +4754,8 @@ (stack-push! (dbr:dbstruct-dbstack dbstruct) dbdat) (system "rm -rf tempdir"))) ;; (db:extract-ods-file db "outputfile.ods" '(("sysname" "%")("fsname" "%")("datapath" "%")) "%") - +;; tiresome setup for rmtmod (and other mods) goes here +(set-fn 'db:dbfile-path common:get-db-tmp-area) +(set-fn 'db:setup db:setup) Index: http-transport.scm ================================================================== --- http-transport.scm +++ http-transport.scm @@ -335,35 +335,11 @@ (close-connection! api-dat) ;;(close-idle-connections!) #t)) #f))) - -(define (make-http-transport:server-dat)(make-vector 6)) -(define (http-transport:server-dat-get-iface vec) (vector-ref vec 0)) -(define (http-transport:server-dat-get-port vec) (vector-ref vec 1)) -(define (http-transport:server-dat-get-api-uri vec) (vector-ref vec 2)) -(define (http-transport:server-dat-get-api-url vec) (vector-ref vec 3)) -(define (http-transport:server-dat-get-api-req vec) (vector-ref vec 4)) -(define (http-transport:server-dat-get-last-access vec) (vector-ref vec 5)) -(define (http-transport:server-dat-get-socket vec) (vector-ref vec 6)) - -(define (http-transport:server-dat-make-url vec) - (if (and (http-transport:server-dat-get-iface vec) - (http-transport:server-dat-get-port vec)) - (conc "http://" - (http-transport:server-dat-get-iface vec) - ":" - (http-transport:server-dat-get-port vec)) - #f)) - -(define (http-transport:server-dat-update-last-access vec) - (if (vector? vec) - (vector-set! vec 5 (current-seconds)) - (begin - (print-call-chain (current-error-port)) - (debug:print-error 0 *default-log-port* "call to http-transport:server-dat-update-last-access with non-vector!!")))) +;; http-transport:server-dat definition moved to common_records.scm ;; ;; connect ;; (define (http-transport:client-connect iface port) Index: megatest.scm ================================================================== --- megatest.scm +++ megatest.scm @@ -45,10 +45,13 @@ (declare (uses genexample)) ;; (declare (uses daemon)) (declare (uses db)) ;; (declare (uses dcommon)) +(declare (uses commonmod)) +(declare (uses rmtmod)) + (declare (uses tdb)) (declare (uses mt)) (declare (uses api)) (declare (uses tasks)) ;; only used for debugging. (declare (uses env)) @@ -608,11 +611,11 @@ ;;====================================================================== ;; Misc setup stuff ;;====================================================================== -(debug:setup) +(debug:setup (args:get-arg "-debug")(args:get-arg "-v")(args:get-arg "-q")) (if (args:get-arg "-logging")(set! *logging* #t)) (if (debug:debug-mode 3) ;; we are obviously debugging (set! open-run-close open-run-close-no-exception-handling)) Index: rmt.scm ================================================================== --- rmt.scm +++ rmt.scm @@ -25,10 +25,14 @@ (declare (uses http-transport)) (include "common_records.scm") (declare (uses rmtmod)) (import rmtmod) +(set-fn 'server:expiration-timeout server:expiration-timeout) +(set-fn 'common:get-homehost common:get-homehost) +(set-fn 'server:check-if-running server:check-if-running) +(set-fn 'api:execute-requests api:execute-requests) ;; ;; THESE ARE ALL CALLED ON THE CLIENT SIDE!!! ;; @@ -38,247 +42,43 @@ ;;====================================================================== ;; S U P P O R T F U N C T I O N S ;;====================================================================== -;; if a server is either running or in the process of starting call client:setup -;; else return #f to let the calling proc know that there is no server available -;; -(define (rmt:get-connection-info areapath #!key (area-dat #f)) ;; TODO: push areapath down. - (let* ((runremote (or area-dat *runremote*)) - (cinfo (if (remote? runremote) - (remote-conndat runremote) - #f))) - (if cinfo - cinfo - (if (server:check-if-running areapath) - (client:setup areapath) - #f)))) - (define *send-receive-mutex* (make-mutex)) ;; should have separate mutex per run-id -;; RA => e.g. usage (rmt:send-receive 'get-var #f (list varname)) -;; -(define (rmt:send-receive cmd rid params #!key (attemptnum 1)(area-dat #f)) ;; start attemptnum at 1 so the modulo below works as expected - - (common:telemetry-log (conc "rmt:"(->string cmd)) - payload: `((rid . ,rid) - (params . ,params))) - - - ;;DOT digraph megatest_state_status { - ;;DOT ranksep=0; - ;;DOT // rankdir=LR; - ;;DOT node [shape="box"]; - ;;DOT "rmt:send-receive" -> MUTEXLOCK; - ;;DOT { edge [style=invis];"case 1" -> "case 2" -> "case 3" -> "case 4" -> "case 5" -> "case 6" -> "case 7" -> "case 8" -> "case 9" -> "case 10" -> "case 11"; } - ;; do all the prep locked under the rmt-mutex - (mutex-lock! *rmt-mutex*) - - ;; 1. check if server is started IFF cmd is a write OR if we are not on the homehost, store in runremote - ;; 2. check the age of the connections. refresh the connection if it is older than timeout-20 seconds. - ;; 3. do the query, if on homehost use local access - ;; - (let* ((start-time (current-seconds)) ;; snapshot time so all use cases get same value - (areapath *toppath*);; TODO - resolve from dbstruct to be compatible with multiple areas +(define *runremote* (make-remote)) + +;; this entry point can decide based on cmd whether to dispatch to old api calls via remote or via ulex +;; +(define (rmt:send-receive cmd rid params #!key (attemptnum 1)(area-dat #f)) + (let* ((areapath *toppath*);; TODO - resolve from dbstruct to be compatible with multiple areas (runremote (or area-dat - *runremote*)) - (readonly-mode (rmtmod:calc-ro-mode runremote *toppath*))) - - ;; DOT INIT_RUNREMOTE; // leaving off - doesn't really add to the clarity - ;; DOT MUTEXLOCK -> INIT_RUNREMOTE [label="no remote?"]; - ;; DOT INIT_RUNREMOTE -> MUTEXLOCK; + *runremote*))) ;; ensure we have a record for our connection for given area - (if (not runremote) ;; can remove this one. should never get here. + (if (not (remote-hh-dat runremote)) + (begin + (remote-server-timeout-set! runremote (server:expiration-timeout)) + (remote-hh-dat-set! runremote (common:get-homehost)) + )) ;; new runremote will come from this on next iteration + + ;; ensure we have a homehost record, do this here instead of in -orig + (if (or (not (remote-hh-dat runremote)) + (not (pair? (remote-hh-dat runremote)))) ;; not on homehost + (remote-hh-dat-set! runremote (common:get-homehost))) + + (if (member cmd '(blah)) (begin - (set! *runremote* (make-remote)) - (set! runremote *runremote*))) ;; new runremote will come from this on next iteration - - ;; DOT SET_HOMEHOST; // leaving off - doesn't really add to the clarity - ;; DOT MUTEXLOCK -> SET_HOMEHOST [label="no homehost?"]; - ;; DOT SET_HOMEHOST -> MUTEXLOCK; - ;; ensure we have a homehost record - (if (not (pair? (remote-hh-dat runremote))) ;; not on homehost - (thread-sleep! 0.1) ;; since we shouldn't get here, delay a little - (remote-hh-dat-set! runremote (common:get-homehost))) - - ;;(print "BB> readonly-mode is "readonly-mode" dbfile is "dbfile) - (cond - ;;DOT EXIT; - ;;DOT MUTEXLOCK -> EXIT [label="> 15 attempts"]; {rank=same "case 1" "EXIT" } - ;; give up if more than 15 attempts - ((> attemptnum 15) - (debug:print 0 *default-log-port* "ERROR: 15 tries to start/connect to server. Giving up.") - (exit 1)) - - ;;DOT CASE2 [label="local\nreadonly\nquery"]; - ;;DOT MUTEXLOCK -> CASE2; {rank=same "case 2" CASE2} - ;;DOT CASE2 -> "rmt:open-qry-close-locally"; - ;; readonly mode, read request- handle it - case 2 - ((and readonly-mode - (member cmd api:read-only-queries)) - (mutex-unlock! *rmt-mutex*) - (debug:print-info 12 *default-log-port* "rmt:send-receive, case 2") - (rmt:open-qry-close-locally cmd 0 params) - ) - - ;;DOT CASE3 [label="write in\nread-only mode"]; - ;;DOT MUTEXLOCK -> CASE3 [label="readonly\nmode?"]; {rank=same "case 3" CASE3} - ;;DOT CASE3 -> "#f"; - ;; readonly mode, write request. Do nothing, return #f - (readonly-mode (extras-readonly-mode *rmt-mutex* *default-log-port* cmd params)) - - ;; This block was for pre-emptively resetting the connection if there had been no communication for some time. - ;; I don't think it adds any value. If the server is not there, just fail and start a new connection. - ;; also, the expire-time calculation might not be correct. We want, time-since-last-server-access > (server:get-timeout) - ;; - ;;DOT CASE4 [label="reset\nconnection"]; - ;;DOT MUTEXLOCK -> CASE4 [label="have connection,\nlast_access > expire_time"]; {rank=same "case 4" CASE4} - ;;DOT CASE4 -> "rmt:send-receive"; - ;; reset the connection if it has been unused too long - ((and runremote - (remote-conndat runremote) - (> (current-seconds) ;; if it has been more than server-timeout seconds since last contact, close this connection and start a new on - (+ (http-transport:server-dat-get-last-access (remote-conndat runremote)) - (remote-server-timeout runremote)))) - (debug:print-info 0 *default-log-port* "Connection to " (remote-server-url runremote) " expired due to no accesses, forcing new connection.") - (http-transport:close-connections area-dat: runremote) - (remote-conndat-set! runremote #f) ;; invalidate the connection, thus forcing a new connection. - (mutex-unlock! *rmt-mutex*) - (rmt:send-receive cmd rid params attemptnum: attemptnum)) - - ;;DOT CASE5 [label="local\nread"]; - ;;DOT MUTEXLOCK -> CASE5 [label="server not required,\non homehost,\nread-only query"]; {rank=same "case 5" CASE5}; - ;;DOT CASE5 -> "rmt:open-qry-close-locally"; - - ;; on homehost and this is a read - ((and (not (remote-force-server runremote)) ;; honor forced use of server, i.e. server NOT required - (cdr (remote-hh-dat runremote)) ;; on homehost - (member cmd api:read-only-queries)) ;; this is a read - (mutex-unlock! *rmt-mutex*) - (debug:print-info 12 *default-log-port* "rmt:send-receive, case 5") - (rmt:open-qry-close-locally cmd 0 params)) - - ;;DOT CASE6 [label="init\nremote"]; - ;;DOT MUTEXLOCK -> CASE6 [label="on homehost,\nwrite query,\nhave server,\ncan't reach it"]; {rank=same "case 6" CASE6}; - ;;DOT CASE6 -> "rmt:send-receive"; - ;; on homehost and this is a write, we already have a server, but server has died - ((and (cdr (remote-hh-dat runremote)) ;; on homehost - (not (member cmd api:read-only-queries)) ;; this is a write - (remote-server-url runremote) ;; have a server - (not (server:ping (remote-server-url runremote)))) ;; server has died. NOTE: this is not a cheap call! Need better approach. - (set! *runremote* (make-remote)) - (remote-force-server-set! runremote (common:force-server?)) - (mutex-unlock! *rmt-mutex*) - (debug:print-info 12 *default-log-port* "rmt:send-receive, case 6") - (rmt:send-receive cmd rid params attemptnum: attemptnum)) - - ;;DOT CASE7 [label="homehost\nwrite"]; - ;;DOT MUTEXLOCK -> CASE7 [label="server not required,\non homehost,\na write,\nhave a server"]; {rank=same "case 7" CASE7}; - ;;DOT CASE7 -> "rmt:open-qry-close-locally"; - ;; on homehost and this is a write, we already have a server - ((and (not (remote-force-server runremote)) ;; honor forced use of server, i.e. server NOT required - (cdr (remote-hh-dat runremote)) ;; on homehost - (not (member cmd api:read-only-queries)) ;; this is a write - (remote-server-url runremote)) ;; have a server - (mutex-unlock! *rmt-mutex*) - (debug:print-info 12 *default-log-port* "rmt:send-receive, case 4.1") - (rmt:open-qry-close-locally cmd 0 params)) - - ;;DOT CASE8 [label="force\nserver"]; - ;;DOT MUTEXLOCK -> CASE8 [label="server not required,\nhave homehost info,\nno connection yet,\nnot a read-only query"]; {rank=same "case 8" CASE8}; - ;;DOT CASE8 -> "rmt:open-qry-close-locally"; - ;; on homehost, no server contact made and this is a write, passively start a server - ((and (not (remote-force-server runremote)) ;; honor forced use of server, i.e. server NOT required - (cdr (remote-hh-dat runremote)) ;; have homehost - (not (remote-server-url runremote)) ;; no connection yet - (not (member cmd api:read-only-queries))) ;; not a read-only query - (debug:print-info 12 *default-log-port* "rmt:send-receive, case 8") - (let ((server-url (server:check-if-running *toppath*))) ;; (server:read-dotserver->url *toppath*))) ;; (server:check-if-running *toppath*))) ;; Do NOT want to run server:check-if-running - very expensive to do for every write call - (if server-url - (remote-server-url-set! runremote server-url) ;; the string can be consumed by the client setup if needed - (if (common:force-server?) - (server:start-and-wait *toppath*) - (server:kind-run *toppath*)))) - (remote-force-server-set! runremote (common:force-server?)) - (mutex-unlock! *rmt-mutex*) - (debug:print-info 12 *default-log-port* "rmt:send-receive, case 8.1") - (rmt:open-qry-close-locally cmd 0 params)) - - ;;DOT CASE9 [label="force server\nnot on homehost"]; - ;;DOT MUTEXLOCK -> CASE9 [label="no connection\nand either require server\nor not on homehost"]; {rank=same "case 9" CASE9}; - ;;DOT CASE9 -> "start\nserver" -> "rmt:send-receive"; - ((or (and (remote-force-server runremote) ;; we are forcing a server and don't yet have a connection to one - (not (remote-conndat runremote))) - (and (not (cdr (remote-hh-dat runremote))) ;; not on a homehost - (not (remote-conndat runremote)))) ;; and no connection - (debug:print-info 12 *default-log-port* "rmt:send-receive, case 9, hh-dat: " (remote-hh-dat runremote) " conndat: " (remote-conndat runremote)) - (mutex-unlock! *rmt-mutex*) - (if (not (server:check-if-running *toppath*)) ;; who knows, maybe one has started up? - (server:start-and-wait *toppath*)) - (remote-conndat-set! runremote (rmt:get-connection-info *toppath*)) ;; calls client:setup which calls client:setup-http - (rmt:send-receive cmd rid params attemptnum: attemptnum)) ;; TODO: add back-off timeout as - - ;;DOT CASE10 [label="on homehost"]; - ;;DOT MUTEXLOCK -> CASE10 [label="server not required,\non homehost"]; {rank=same "case 10" CASE10}; - ;;DOT CASE10 -> "rmt:open-qry-close-locally"; - ;; all set up if get this far, dispatch the query - ((and (not (remote-force-server runremote)) - (cdr (remote-hh-dat runremote))) ;; we are on homehost - (mutex-unlock! *rmt-mutex*) - (debug:print-info 12 *default-log-port* "rmt:send-receive, case 10") - (rmt:open-qry-close-locally cmd (if rid rid 0) params)) - - ;;DOT CASE11 [label="send_receive"]; - ;;DOT MUTEXLOCK -> CASE11 [label="else"]; {rank=same "case 11" CASE11}; - ;;DOT CASE11 -> "rmt:send-receive" [label="call failed"]; - ;;DOT CASE11 -> "RESULT" [label="call succeeded"]; - ;; not on homehost, do server query - (else (extras-case-11 *default-log-port* runremote cmd params attemptnum rid))))) - ;;DOT } + (mutex-lock! *send-receive-mutex*) + (let ((ulex:conn (remote-ulex:conn runremote))) + (if (not ulex:conn)(remote-ulex:conn-set! runremote (rmtmod:setup-ulex areapath))) + (rmtmod:send-receive-ulex ulex:conn cmd rid params attemptnum area-dat))) + (rmt:send-receive-orig *default-log-port* runremote *rmt-mutex* areapath *db-multi-sync-mutex* cmd rid params *alldata* attemptnum: attemptnum area-dat: area-dat)))) ;; bunch of small functions factored out of send-receive to make debug easier ;; -(define (extras-case-11 *default-log-port* runremote cmd params attemptnum rid) - ;; (mutex-unlock! *rmt-mutex*) - (debug:print-info 12 *default-log-port* "rmt:send-receive, case 9") - ;; (mutex-lock! *rmt-mutex*) - (let* ((conninfo (remote-conndat runremote)) - (dat (case (remote-transport runremote) - ((http) (condition-case ;; handling here has - ;; caused a lot of - ;; problems. However it - ;; is needed to deal with - ;; attemtped - ;; communication to - ;; servers that have gone - ;; away - (http-transport:client-api-send-receive 0 conninfo cmd params) - ((commfail)(vector #f "communications fail")) - ((exn)(vector #f "other fail" (print-call-chain))))) - (else - (debug:print 0 *default-log-port* "ERROR: transport " (remote-transport runremote) " not supported") - (exit)))) - (success (if (vector? dat) (vector-ref dat 0) #f)) - (res (if (vector? dat) (vector-ref dat 1) #f))) - (if (and (vector? conninfo) (< 5 (vector-length conninfo))) - (http-transport:server-dat-update-last-access conninfo) ;; refresh access time - (begin - (debug:print 0 *default-log-port* "INFO: Should not get here! conninfo=" conninfo) - (set! conninfo #f) - (remote-conndat-set! *runremote* #f) ;; NOTE: *runremote* is global copy of runremote. Purpose: factor out global. - (http-transport:close-connections area-dat: runremote))) - (debug:print-info 13 *default-log-port* "rmt:send-receive, case 9. conninfo=" conninfo " dat=" dat " runremote = " runremote) - (mutex-unlock! *rmt-mutex*) - (if success ;; success only tells us that the transport was - ;; successful, have to examine the data to see if - ;; there was a detected issue at the other end - (extras-transport-succeded *default-log-port* *rmt-mutex* attemptnum runremote res params rid cmd) - (extras-transport-failed *default-log-port* *rmt-mutex* attemptnum runremote cmd rid params) - ))) - ;; (define (rmt:update-db-stats run-id rawcmd params duration) ;; (mutex-lock! *db-stats-mutex*) ;; (handle-exceptions ;; exn ;; (begin @@ -331,53 +131,10 @@ (cons 'none 0)) (loop (car tal)(cdr tal) newmax-cmd currmax))))))) (mutex-unlock! *db-stats-mutex*) res)) -(define (rmt:open-qry-close-locally cmd run-id params #!key (remretries 5)) - (let* ((qry-is-write (not (member cmd api:read-only-queries))) - (db-file-path (db:dbfile-path)) ;; 0)) - (dbstruct-local (db:setup #t)) ;; make-dbr:dbstruct path: dbdir local: #t))) - (read-only (not (file-write-access? db-file-path))) - (start (current-milliseconds)) - (resdat (if (not (and read-only qry-is-write)) - (let ((v (api:execute-requests dbstruct-local (vector (symbol->string cmd) params)))) - (handle-exceptions ;; there has been a long history of receiving strange errors from values returned by the client when things go wrong.. - exn ;; This is an attempt to detect that situation and recover gracefully - (begin - (debug:print0 *default-log-port* "ERROR: bad data from server " v " message: " ((condition-property-accessor 'exn 'message) exn)) - (vector #t '())) ;; should always get a vector but if something goes wrong return a dummy - (if (and (vector? v) - (> (vector-length v) 1)) - (let ((newvec (vector (vector-ref v 0)(vector-ref v 1)))) - newvec) ;; by copying the vector while inside the error handler we should force the detection of a corrupted record - (vector #t '())))) ;; we could also check that the returned types are valid - (vector #t '()))) - (success (vector-ref resdat 0)) - (res (vector-ref resdat 1)) - (duration (- (current-milliseconds) start))) - (if (and read-only qry-is-write) - (debug:print 0 *default-log-port* "ERROR: attempt to write to read-only database ignored. cmd=" cmd)) - (if (not success) - (if (> remretries 0) - (begin - (debug:print-error 0 *default-log-port* "local query failed. Trying again.") - (thread-sleep! (/ (random 5000) 1000)) ;; some random delay - (rmt:open-qry-close-locally cmd run-id params remretries: (- remretries 1))) - (begin - (debug:print-error 0 *default-log-port* "too many retries in rmt:open-qry-close-locally, giving up") - #f)) - (begin - ;; (rmt:update-db-stats run-id cmd params duration) - ;; mark this run as dirty if this was a write, the watchdog is responsible for syncing it - (if qry-is-write - (let ((start-time (current-seconds))) - (mutex-lock! *db-multi-sync-mutex*) -/ (set! *db-last-access* start-time) ;; THIS IS PROBABLY USELESS? (we are on a client) - (mutex-unlock! *db-multi-sync-mutex*))))) - res)) - (define (rmt:send-receive-no-auto-client-setup connection-info cmd run-id params) (let* ((run-id (if run-id run-id 0)) (res (handle-exceptions exn #f @@ -931,10 +688,16 @@ (rmt:send-receive 'test-set-archive-block-id run-id (list run-id test-id archive-block-id))) (define (rmt:test-get-archive-block-info archive-block-id) (rmt:send-receive 'test-get-archive-block-info #f (list archive-block-id))) -(set-functions rmt:send-receive remote-server-url-set! - http-transport:close-connections remote-conndat-set! - debug:print debug:print-info - remote-ro-mode remote-ro-mode-set! - remote-ro-mode-checked-set! remote-ro-mode-checked) +(set-functions http-transport:client-api-send-receive ;; a + http-transport:close-connections ;; b + api:execute-requests ;; c + #f + client:setup ;; e + server:kind-run ;; f + server:start-and-wait ;; g + server:check-if-running ;; h + server:ping ;; i + common:force-server? ;; j + ) Index: rmtmod.scm ================================================================== --- rmtmod.scm +++ rmtmod.scm @@ -1,7 +1,7 @@ ;;====================================================================== -;; Copyright 2017, Matthew Welland. +;; Copyright 2019, Matthew Welland. ;; ;; This file is part of Megatest. ;; ;; Megatest is free software: you can redistribute it and/or modify ;; it under the terms of the GNU General Public License as published by @@ -23,73 +23,121 @@ (module rmtmod * (import scheme chicken data-structures extras) -(import (prefix sqlite3 sqlite3:) posix typed-records srfi-18) +(import (prefix sqlite3 sqlite3:) posix typed-records srfi-18 srfi-69 format ports srfi-1 matchable) (import commonmod) +(use (prefix ulex ulex:)) + +(include "common_records.scm") ;; Hack to make these functions visible to the refactored code, goal is to eliminate these over time. -(define (rmt:send-receive . params) #f) +;; (define (rmt:send-receive . params) #f) (define (http-transport:close-connections . params) #f) ;; from remote defstruct in common.scm -(define (remote-conndat-set! . params) #f) -(define (remote-server-url-set! . params) #f) -(define (remote-ro-mode . params) #f) -(define (remote-ro-mode-set! . params) #f) -(define (remote-ro-mode-checked-set! . params) #f) -(define (remote-ro-mode-checked . params) #f) -(define (debug:print . params) #f) -(define (debug:print-info . params) #f) - -(define (set-functions send-receive rsus - close-connections rcs - dbgp dbgpinfo - ro-mode ro-mode-set - ro-mode-checked-set ro-mode-checked - ) - (set! rmt:send-receive send-receive) - (set! remote-server-url-set! rsus) - (set! http-transport:close-connections close-connections) - (set! remote-conndat-set! rcs) - (set! debug:print dbgp) - (set! debug:print-info dbgpinfo) - (set! remote-ro-mode ro-mode) - (set! remote-ro-mode-set! ro-mode-set) - (set! remote-ro-mode-checked-set! ro-mode-checked-set) - (set! remote-ro-mode-checked ro-mode-checked)) - -(define (rmtmod:calc-ro-mode runremote *toppath*) +;; (define (api:execute-requests . params) #f) +(define (http-transport:client-api-send-receive . params) #f) +(define (client:setup . params) #f) +(define (server:kind-run . params) #f) +(define (server:start-and-wait . params) #f) +(define (server:check-if-running . params) #f) +(define (server:ping . params) #f) +(define (common:force-server? . params) #f) +;; 'send-receive rmt:send-receive ... +(define (set-functions . alldata) + (match + alldata + ((a b c d e f g h i j) ;; e f g h i j k l) + (set! http-transport:client-api-send-receive a) + (set! http-transport:close-connections b) + ;; (set! api:execute-requests c) + ;; d + (set! client:setup e) + (set! server:kind-run f) + (set! server:start-and-wait g) + (set! server:check-if-running h) + (set! server:ping i) + (set! common:force-server? j) + ))) + +(define (rmt:open-qry-close-locally log-port multi-sync-mutex cmd run-id params alldat #!key (remretries 5)) + (let* ((ro-queries (alldat-read-only-queries alldat)) + (qry-is-write (not (member cmd ro-queries))) + (db-file-path (exec-fn 'db:dbfile-path)) ;; 0)) + (dbstruct-local (exec-fn 'db:setup #t)) ;; make-dbr:dbstruct path: dbdir local: #t))) + (read-only (not (file-write-access? db-file-path))) + (start (current-milliseconds)) + (resdat (if (not (and read-only qry-is-write)) + (let ((v (exec-fn 'api:execute-requests dbstruct-local (vector (symbol->string cmd) params)))) + (handle-exceptions ;; there has been a long history of receiving strange errors from values returned by the client when things go wrong.. + exn ;; This is an attempt to detect that situation and recover gracefully + (begin + (debug:print 0 log-port "ERROR: bad data from server " v " message: " ((condition-property-accessor 'exn 'message) exn)) + (vector #t '())) ;; should always get a vector but if something goes wrong return a dummy + (if (and (vector? v) + (> (vector-length v) 1)) + (let ((newvec (vector (vector-ref v 0)(vector-ref v 1)))) + newvec) ;; by copying the vector while inside the error handler we should force the detection of a corrupted record + (vector #t '())))) ;; we could also check that the returned types are valid + (vector #t '()))) + (success (vector-ref resdat 0)) + (res (vector-ref resdat 1)) + (duration (- (current-milliseconds) start))) + (if (and read-only qry-is-write) + (debug:print 0 log-port "ERROR: attempt to write to read-only database ignored. cmd=" cmd)) + (if (not success) + (if (> remretries 0) + (begin + (debug:print-error 0 log-port "local query failed. Trying again.") + (thread-sleep! (/ (random 5000) 1000)) ;; some random delay + (rmt:open-qry-close-locally log-port multi-sync-mutex cmd run-id params alldat remretries: (- remretries 1))) + (begin + (debug:print-error 0 log-port "too many retries in rmt:open-qry-close-locally, giving up") + #f)) + (begin + ;; (rmt:update-db-stats run-id cmd params duration) + ;; mark this run as dirty if this was a write, the watchdog is responsible for syncing it + #;(if qry-is-write + (let ((start-time (current-seconds))) + (mutex-lock! multi-sync-mutex) + (set! *db-last-access* start-time) ;; THIS IS PROBABLY USELESS? (we are on a client) + (mutex-unlock! multi-sync-mutex))))) + res)) + + + +(define (rmtmod:calc-ro-mode runremote toppath) (if (and runremote (remote-ro-mode-checked runremote)) (remote-ro-mode runremote) - (let* ((dbfile (conc *toppath* "/megatest.db")) + (let* ((dbfile (conc toppath "/megatest.db")) (ro-mode (not (file-write-access? dbfile)))) ;; TODO: use dbstruct or runremote to figure this out in future (if runremote (begin (remote-ro-mode-set! runremote ro-mode) (remote-ro-mode-checked-set! runremote #t) ro-mode) ro-mode)))) (define (extras-readonly-mode rmt-mutex log-port cmd params) - (mutex-unlock! rmt-mutex) + ;;(mutex-unlock! rmt-mutex) (debug:print-info 12 log-port "rmt:send-receive, case 3") (debug:print 0 log-port "WARNING: write transaction requested on a readonly area. cmd="cmd" params="params) #f) -(define (extras-transport-failed *default-log-port* *rmt-mutex* attemptnum runremote cmd rid params) - (debug:print 0 *default-log-port* "WARNING: communication failed. Trying again, try num: " attemptnum) - (mutex-lock! *rmt-mutex*) +(define (extras-transport-failed log-port rmt-mutex attemptnum runremote areapath cmd rid params alldat) + (debug:print 0 log-port "WARNING: communication failed. Trying again, try num: " attemptnum) + ;;(mutex-lock! rmt-mutex) (remote-conndat-set! runremote #f) (http-transport:close-connections area-dat: runremote) (remote-server-url-set! runremote #f) - (mutex-unlock! *rmt-mutex*) - (debug:print-info 12 *default-log-port* "rmt:send-receive, case 9.1") - (rmt:send-receive cmd rid params attemptnum: (+ attemptnum 1))) + ;;(mutex-unlock! rmt-mutex) + (debug:print-info 12 log-port "rmt:send-receive, case 9.1") + (rmt:send-receive-orig log-port runremote rmt-mutex areapath cmd rid params alldat attemptnum: (+ attemptnum 1))) -(define (extras-transport-succeded *default-log-port* *rmt-mutex* attemptnum runremote res params rid cmd) +(define (extras-transport-succeded log-port rmt-mutex attemptnum runremote areapath res params rid cmd alldat) (if (and (vector? res) (eq? (vector-length res) 2) (eq? (vector-ref res 1) 'overloaded)) ;; since we are ;; looking at the ;; data to carry the @@ -105,15 +153,214 @@ ;; server is ;; overloaded and we ;; want to ease off ;; the queries (let ((wait-delay (+ attemptnum (* attemptnum 10)))) - (debug:print 0 *default-log-port* "WARNING: server is overloaded. Delaying " wait-delay " seconds and trying call again.") - (mutex-lock! *rmt-mutex*) + (debug:print 0 log-port "WARNING: server is overloaded. Delaying " wait-delay " seconds and trying call again.") + ;;(mutex-lock! rmt-mutex) (http-transport:close-connections area-dat: runremote) - (set! *runremote* #f) ;; force starting over - (mutex-unlock! *rmt-mutex*) + ;; (set! *runremote* #f) ;; force starting over + (remote-server-url-set! runremote #f) ;; I am hoping this will force a redo on server connection. NOT TESTED + ;;(mutex-unlock! rmt-mutex) (thread-sleep! wait-delay) - (rmt:send-receive cmd rid params attemptnum: (+ attemptnum 1))) + (rmt:send-receive-orig log-port runremote rmt-mutex areapath cmd rid params alldat attemptnum: (+ attemptnum 1))) res)) ;; All good, return res + +;; RA => e.g. usage (rmt:send-receive 'get-var #f (list varname)) +;; +;; add multi-sync-mutex +;; +(define (rmt:send-receive-orig log-port runremote rmt-mutex toppath multi-sync-mutex cmd rid params alldat #!key (attemptnum 1)(area-dat #f)) ;; start attemptnum at 1 so the modulo below works as expected + + #;(common:telemetry-log (conc "rmt:"(->string cmd)) + payload: `((rid . ,rid) + (params . ,params))) + + + ;; do all the prep locked under the rmt-mutex + ;;(mutex-lock! rmt-mutex) + + ;; 1. check if server is started IFF cmd is a write OR if we are not on the homehost, store in runremote + ;; 2. check the age of the connections. refresh the connection if it is older than timeout-20 seconds. + ;; 3. do the query, if on homehost use local access + ;; + (let* ((start-time (current-seconds)) ;; snapshot time so all use cases get same value + + (readonly-mode (rmtmod:calc-ro-mode runremote toppath))) + + ;; (assert (not (pair? (remote-hh-dat runremote)))) + + ;;(print "BB> readonly-mode is "readonly-mode" dbfile is "dbfile) + (cond + ;; give up if more than 15 attempts + ((> attemptnum 15) + (debug:print 0 log-port "ERROR: 15 tries to start/connect to server. Giving up.") + (exit 1)) + + ;; readonly mode, read request- handle it - case 2 + ((and readonly-mode + (member cmd api:read-only-queries)) + ;; (mutex-unlock! rmt-mutex) + (debug:print-info 12 log-port "rmt:send-receive, case 2") + (rmt:open-qry-close-locally log-port multi-sync-mutex cmd 0 params alldat) + ) + + ;; readonly mode, write request. Do nothing, return #f + (readonly-mode (extras-readonly-mode rmt-mutex log-port cmd params)) + + ;; This block was for pre-emptively resetting the connection if there had been no communication for some time. + ;; I don't think it adds any value. If the server is not there, just fail and start a new connection. + ;; also, the expire-time calculation might not be correct. We want, time-since-last-server-access > (server:get-timeout) + ;; + ;; reset the connection if it has been unused too long + ((and runremote + (remote-conndat runremote) + (> (current-seconds) ;; if it has been more than server-timeout seconds since last contact, close this connection and start a new on + (+ (http-transport:server-dat-get-last-access (remote-conndat runremote)) + (remote-server-timeout runremote)))) + (debug:print-info 0 log-port "Connection to " (remote-server-url runremote) " expired due to no accesses, forcing new connection.") + (http-transport:close-connections area-dat: runremote) + (remote-conndat-set! runremote #f) ;; invalidate the connection, thus forcing a new connection. + ;; (mutex-unlock! rmt-mutex) + (rmt:send-receive-orig log-port runremote rmt-mutex toppath multi-sync-mutex cmd rid params alldat attemptnum: attemptnum)) + + + ;; on homehost and this is a read + ((and (not (remote-force-server runremote)) ;; honor forced use of server, i.e. server NOT required + (pair? (remote-hh-dat runremote)) + (cdr (remote-hh-dat runremote)) ;; on homehost + (member cmd api:read-only-queries)) ;; this is a read + ;; (mutex-unlock! rmt-mutex) + (debug:print-info 12 log-port "rmt:send-receive, case 5") + (rmt:open-qry-close-locally log-port multi-sync-mutex cmd 0 params alldat)) + + ;; on homehost and this is a write, we already have a server, but server has died + ((and (cdr (remote-hh-dat runremote)) ;; on homehost + (not (member cmd api:read-only-queries)) ;; this is a write + (remote-server-url runremote) ;; have a server + (not (server:ping (remote-server-url runremote)))) ;; server has died. NOTE: this is not a cheap call! Need better approach. + ;; (set! *runremote* (make-remote)) ;; WARNING - broken this. + (remote-force-server-set! runremote (common:force-server?)) + ;; (mutex-unlock! rmt-mutex) + (debug:print-info 12 log-port "rmt:send-receive, case 6") + (rmt:send-receive-orig log-port runremote rmt-mutex toppath multi-sync-mutex cmd rid params alldat attemptnum: attemptnum)) + + ;; on homehost and this is a write, we already have a server + ((and (not (remote-force-server runremote)) ;; honor forced use of server, i.e. server NOT required + (cdr (remote-hh-dat runremote)) ;; on homehost + (not (member cmd api:read-only-queries)) ;; this is a write + (remote-server-url runremote)) ;; have a server + ;;(mutex-unlock! rmt-mutex) + (debug:print-info 12 log-port "rmt:send-receive, case 4.1") + (rmt:open-qry-close-locally log-port multi-sync-mutex cmd 0 params alldat)) + + ;; on homehost, no server contact made and this is a write, passively start a server + ((and (not (remote-force-server runremote)) ;; honor forced use of server, i.e. server NOT required + (cdr (remote-hh-dat runremote)) ;; have homehost + (not (remote-server-url runremote)) ;; no connection yet + (not (member cmd api:read-only-queries))) ;; not a read-only query + (debug:print-info 12 log-port "rmt:send-receive, case 8") + (let ((server-url (server:check-if-running toppath))) ;; (server:read-dotserver->url toppath))) ;; (server:check-if-running toppath))) ;; Do NOT want to run server:check-if-running - very expensive to do for every write call + (if server-url + (remote-server-url-set! runremote server-url) ;; the string can be consumed by the client setup if needed + (if (common:force-server?) + (server:start-and-wait toppath) + (server:kind-run toppath)))) + (remote-force-server-set! runremote (common:force-server?)) + ;; (mutex-unlock! rmt-mutex) + (debug:print-info 12 log-port "rmt:send-receive, case 8.1") + (rmt:open-qry-close-locally log-port multi-sync-mutex cmd 0 params alldat)) + + ((or (and (remote-force-server runremote) ;; we are forcing a server and don't yet have a connection to one + (not (remote-conndat runremote))) + (and (not (cdr (remote-hh-dat runremote))) ;; not on a homehost + (not (remote-conndat runremote)))) ;; and no connection + (debug:print-info 12 log-port "rmt:send-receive, case 9, hh-dat: " (remote-hh-dat runremote) " conndat: " (remote-conndat runremote)) + ;;(mutex-unlock! rmt-mutex) + (if (not (server:check-if-running toppath)) ;; who knows, maybe one has started up? + (server:start-and-wait toppath)) + (remote-conndat-set! runremote (rmt:get-connection-info runremote toppath)) ;; calls client:setup which calls client:setup-http + (rmt:send-receive-orig log-port runremote rmt-mutex toppath multi-sync-mutex cmd rid params alldat attemptnum: attemptnum)) ;; TODO: add back-off timeout as + + ;; all set up if get this far, dispatch the query + ((and (not (remote-force-server runremote)) + (cdr (remote-hh-dat runremote))) ;; we are on homehost + ;;(mutex-unlock! rmt-mutex) + (debug:print-info 12 log-port "rmt:send-receive, case 10") + (rmt:open-qry-close-locally log-port multi-sync-mutex cmd (if rid rid 0) params alldat)) + + ;; not on homehost, do server query + (else (extras-case-11 log-port rmt-mutex runremote toppath cmd params attemptnum rid alldat))))) + +(define (extras-case-11 log-port rmt-mutex runremote areapath cmd params attemptnum rid alldat) + ;; (mutex-unlock! rmt-mutex) + (debug:print-info 12 log-port "rmt:send-receive, case 9") + ;; (mutex-lock! rmt-mutex) + (let* ((conninfo (remote-conndat runremote)) + (dat (case (remote-transport runremote) + ((http) (condition-case ;; handling here has + ;; caused a lot of + ;; problems. However it + ;; is needed to deal with + ;; attemtped + ;; communication to + ;; servers that have gone + ;; away + (http-transport:client-api-send-receive 0 conninfo cmd params) + ((commfail)(vector #f "communications fail")) + ((exn)(vector #f "other fail" (print-call-chain))))) + (else + (debug:print 0 log-port "ERROR: transport " (remote-transport runremote) " not supported") + (exit)))) + (success (if (vector? dat) (vector-ref dat 0) #f)) + (res (if (vector? dat) (vector-ref dat 1) #f))) + (if (and (vector? conninfo) (< 5 (vector-length conninfo))) + (http-transport:server-dat-update-last-access conninfo) ;; refresh access time + (begin + (debug:print 0 log-port "INFO: Should not get here! conninfo=" conninfo) + (set! conninfo #f) + (remote-conndat-set! runremote #f) ;; NOTE: *runremote* is global copy of runremote. Purpose: factor out global. + (http-transport:close-connections area-dat: runremote))) + (debug:print-info 13 log-port "rmt:send-receive, case 9. conninfo=" conninfo " dat=" dat " runremote = " runremote) + ;; (mutex-unlock! rmt-mutex) + (if success ;; success only tells us that the transport was + ;; successful, have to examine the data to see if + ;; there was a detected issue at the other end + (extras-transport-succeded log-port rmt-mutex attemptnum runremote areapath res params rid cmd alldat) + (extras-transport-failed log-port rmt-mutex attemptnum runremote areapath cmd rid params alldat) + ))) + +;; if a server is either running or in the process of starting call client:setup +;; else return #f to let the calling proc know that there is no server available +;; +(define (rmt:get-connection-info runremote areapath #!key (area-dat #f)) ;; TODO: push areapath down. + (let* (;; (runremote (or area-dat runremote)) + (cinfo (if (remote? runremote) + (remote-conndat runremote) + #f))) + (if cinfo + cinfo + (if (server:check-if-running areapath) + (client:setup runremote areapath) + #f)))) + + + +;;====================================================================== +;; ulex and steps stuff +;;====================================================================== + +(define (rmtmod:setup-ulex toppath) + (ulex:make-area + dbdir: (conc toppath "/ulexdb") + pktsdir: (conc toppath "/pkts") + )) + + + +(define (rmtmod:send-receive-ulex ulex:conn cmd rid params attemptnum area-dat) + #f) + +(use trace)(trace-call-sites #t) +;; (trace member rmtmod:calc-ro-mode rmt:open-qry-close-locally) )