Index: api.scm ================================================================== --- api.scm +++ api.scm @@ -157,10 +157,15 @@ (params (vector-ref dat 1)) (start-t (current-milliseconds)) (readonly-mode (dbr:dbstruct-read-only dbstruct)) (readonly-command (member cmd api:read-only-queries)) (writecmd-in-readonly-mode (and readonly-mode (not readonly-command))) + (foo (begin + (common:telemetry-log (conc "api-in:"(->string cmd)) + payload: `((params . ,params))) + + #t)) (res (if writecmd-in-readonly-mode (conc "attempt to run write command "cmd" on a read-only database") (case cmd ;;=============================================== @@ -327,19 +332,28 @@ ;; TASKS ((find-task-queue-records) (apply tasks:find-task-queue-records dbstruct params)) (else (debug:print 0 *default-log-port* "ERROR: bad api call " cmd) (conc "ERROR: BAD api call " cmd)))))) + ;; save all stats (let ((delta-t (- (current-milliseconds) start-t))) (hash-table-set! *db-api-call-time* cmd (cons delta-t (hash-table-ref/default *db-api-call-time* cmd '())))) (if writecmd-in-readonly-mode - (vector #f res) - (vector #t res))))))) + (begin + (common:telemetry-log (conc "api-out:"(->string cmd)) + payload: `((params . ,params) + (ok-res . #t))) + (vector #f res)) + (begin + (common:telemetry-log (conc "api-out:"(->string cmd)) + payload: `((params . ,params) + (ok-res . #f))) + (vector #t res)))))))) ;; http-server send-response ;; api:process-request ;; db:* ;; Index: common.scm ================================================================== --- common.scm +++ common.scm @@ -888,10 +888,11 @@ (debug:print-info 13 *default-log-port* "watchdog done.")) (debug:print-info 13 *default-log-port* "no need for watchdog on non-homehost")))) (define (std-exit-procedure) + ;;(common:telemetry-log-close) (on-exit (lambda () 0)) ;;(debug:print-info 13 *default-log-port* "std-exit-procedure called; *time-to-exit*="*time-to-exit*) (let ((no-hurry (if *time-to-exit* ;; hurry up #f (begin @@ -3051,28 +3052,57 @@ #t ;; just ignore it, it might have died in the meantime so joining it will throw an exception (thread-join! thread)) ))) (hash-table-keys *common:thread-punchlist*))) +(define *common:telemetry-log-state* 'startup) +(define *common:telemetry-log-socket* #f) + +(define (common:telemetry-log-open) + (if (eq? *common:telemetry-log-state* 'startup) + (let* ((serverhost (configf:lookup *configdat* "telemetry" "host")) + (serverport (configf:lookup-number *configdat* "telemetry" "port")) + (user (or (get-environment-variable "USER") "unknown")) + (host (or (get-environment-variable "HOST") "unknown"))) + (set! *common:telemetry-log-state* + (handle-exceptions + exn + (begin + (debug:print-info 0 *default-log-port* "common-telemetry-log get udp port failure") + 'broken) + (if (and serverhost serverport user host) + (let* ((s (udp-open-socket))) + ;;(udp-bind! s #f 0) + (udp-connect! s serverhost serverport) + (set! *common:telemetry-log-socket* s) + 'open) + 'not-needed)))))) + (define (common:telemetry-log event #!key (payload '())) - (let* ((serverhost (configf:lookup *configdat* "telemetry" "host")) - (serverport (configf:lookup-number *configdat* "telemetry" "port")) - (user (or (get-environment-variable "USER") "unknown")) - (host (or (get-environment-variable "HOST") "unknown"))) + (if (eq? *common:telemetry-log-state* 'startup) + (common:telemetry-log-open)) + (handle-exceptions + exn + (begin + (debug:print-info 0 *default-log-port* "common-telemetry-log failure")) + (if (and *common:telemetry-log-socket* event) + (let* ((user (or (get-environment-variable "USER") "unknown")) + (host (or (get-environment-variable "HOST") "unknown")) + (start (conc "[megatest "event"]")) + (toppath (or *toppath* "/dev/null")) + (payload-serialized + (base64:base64-encode + (z3:encode-buffer + (with-output-to-string (lambda () (pp payload)))))) + (msg (conc user":"host":"start":"(current-process-id)":" + toppath":"payload-serialized))) + (udp-send *common:telemetry-log-socket* msg))))) +(define (common:telemetry-log-close) + (when (and (eq? *common:telemetry-log-state* 'open) *common:telemetry-log-socket*) (handle-exceptions exn - (debug:print-info 0 *default-log-port* "common-telemetry-log failure") - (if (and serverhost serverport user host event) - (let* ((start (conc "[megatest "event"]")) - (toppath (or *toppath* "/dev/null")) - (payload-serialized - (base64:base64-encode - (z3:encode-buffer - (with-output-to-string (lambda () (pp payload)))))) - (msg (conc user":"host":"start":" - toppath":"payload-serialized)) - (s (udp-open-socket))) - (udp-bind! s #f 0) - (udp-connect! s serverhost serverport) - (udp-send s msg) - (udp-close-socket s)))))) + (begin + (debug:print-info 0 *default-log-port* "common-telemetry-log closure failure")) + (begin + (udp-close-socket *common:telemetry-log-socket*) + (set! *common:telemetry-log-socket* #f))))) Index: rmt.scm ================================================================== --- rmt.scm +++ rmt.scm @@ -55,10 +55,15 @@ ;; RA => e.g. usage (rmt:send-receive 'get-var #f (list varname)) ;; (define (rmt:send-receive cmd rid params #!key (attemptnum 1)(area-dat #f)) ;; start attemptnum at 1 so the modulo below works as expected + (common:telemetry-log (conc "rmt:"(->string cmd)) + payload: `((rid . ,rid) + (params . ,params))) + + ;;DOT digraph megatest_state_status { ;;DOT ranksep=0; ;;DOT // rankdir=LR; ;;DOT node [shape="box"]; ;;DOT "rmt:send-receive" -> MUTEXLOCK;