Megatest

Check-in [f5bedac3fb]
Login
Overview
Comment:added telemetry on rmt: and api:
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | v1.65-telemetry
Files: files | file ages | folders
SHA1: f5bedac3fbb783517f59c4b877c69f9ee237df46
User & Date: bjbarcla on 2019-02-13 18:27:00
Other Links: branch diff | manifest | tags
Context
2019-02-13
18:28
added telemetry-daemon check-in: 610250e3f0 user: bjbarcla tags: v1.65-telemetry
18:27
added telemetry on rmt: and api: check-in: f5bedac3fb user: bjbarcla tags: v1.65-telemetry
00:53
removed stray prints check-in: 673fea48bc user: bjbarcla tags: v1.65-telemetry
Changes

Modified api.scm from [1541791de9] to [cf3fabb928].

155
156
157
158
159
160
161





162
163
164
165
166
167
168
				   cmd-in
				   (string->symbol cmd-in)))
            (params            (vector-ref dat 1))
            (start-t           (current-milliseconds))
            (readonly-mode     (dbr:dbstruct-read-only dbstruct))
            (readonly-command  (member cmd api:read-only-queries))
            (writecmd-in-readonly-mode (and readonly-mode (not readonly-command)))





            (res    
             (if writecmd-in-readonly-mode
                 (conc "attempt to run write command "cmd" on a read-only database")
                 (case cmd
                   ;;===============================================
                   ;; READ/WRITE QUERIES
                   ;;===============================================







>
>
>
>
>







155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
				   cmd-in
				   (string->symbol cmd-in)))
            (params            (vector-ref dat 1))
            (start-t           (current-milliseconds))
            (readonly-mode     (dbr:dbstruct-read-only dbstruct))
            (readonly-command  (member cmd api:read-only-queries))
            (writecmd-in-readonly-mode (and readonly-mode (not readonly-command)))
            (foo               (begin
                                 (common:telemetry-log (conc "api-in:"(->string cmd))
                                                       payload: `((params . ,params)))
                                 
                                 #t))
            (res    
             (if writecmd-in-readonly-mode
                 (conc "attempt to run write command "cmd" on a read-only database")
                 (case cmd
                   ;;===============================================
                   ;; READ/WRITE QUERIES
                   ;;===============================================
325
326
327
328
329
330
331

332
333
334
335
336
337
338




339




340
341
342
343
344
345
346
347
                   ((testmeta-get-record)       (apply db:testmeta-get-record dbstruct params))

                   ;; TASKS 
                   ((find-task-queue-records)   (apply tasks:find-task-queue-records dbstruct params))
		   (else
		    (debug:print 0 *default-log-port* "ERROR: bad api call " cmd)
		    (conc "ERROR: BAD api call " cmd))))))

       
       ;; save all stats
       (let ((delta-t (- (current-milliseconds)
			 start-t)))
	 (hash-table-set! *db-api-call-time* cmd
			  (cons delta-t (hash-table-ref/default *db-api-call-time* cmd '()))))
       (if writecmd-in-readonly-mode




	   (vector #f res)




           (vector #t res)))))))

;; http-server  send-response
;;                 api:process-request
;;                    db:*
;;
;; NB// Runs on the server as part of the server loop
;;







>







>
>
>
>
|
>
>
>
>
|







330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
                   ((testmeta-get-record)       (apply db:testmeta-get-record dbstruct params))

                   ;; TASKS 
                   ((find-task-queue-records)   (apply tasks:find-task-queue-records dbstruct params))
		   (else
		    (debug:print 0 *default-log-port* "ERROR: bad api call " cmd)
		    (conc "ERROR: BAD api call " cmd))))))

       
       ;; save all stats
       (let ((delta-t (- (current-milliseconds)
			 start-t)))
	 (hash-table-set! *db-api-call-time* cmd
			  (cons delta-t (hash-table-ref/default *db-api-call-time* cmd '()))))
       (if writecmd-in-readonly-mode
           (begin
             (common:telemetry-log (conc "api-out:"(->string cmd))
                                   payload: `((params . ,params)
                                              (ok-res . #t)))
	     (vector #f res))
           (begin
             (common:telemetry-log (conc "api-out:"(->string cmd))
                                   payload: `((params . ,params)
                                              (ok-res . #f)))
             (vector #t res))))))))

;; http-server  send-response
;;                 api:process-request
;;                    db:*
;;
;; NB// Runs on the server as part of the server loop
;;

Modified common.scm from [5eb5f0d32c] to [2f4d86191c].

886
887
888
889
890
891
892

893
894
895
896
897
898
899
	      (debug:print-info 13 *default-log-port* "loading writable-watchdog.")
	      (server:writable-watchdog dbstruct)))
	    (debug:print-info 13 *default-log-port* "watchdog done."))
	  (debug:print-info 13 *default-log-port* "no need for watchdog on non-homehost"))))


(define (std-exit-procedure)

  (on-exit (lambda () 0))
  ;;(debug:print-info 13 *default-log-port* "std-exit-procedure called; *time-to-exit*="*time-to-exit*)
  (let ((no-hurry  (if *time-to-exit* ;; hurry up
		       #f
		       (begin
			 (set! *time-to-exit* #t)
			 #t))))







>







886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
	      (debug:print-info 13 *default-log-port* "loading writable-watchdog.")
	      (server:writable-watchdog dbstruct)))
	    (debug:print-info 13 *default-log-port* "watchdog done."))
	  (debug:print-info 13 *default-log-port* "no need for watchdog on non-homehost"))))


(define (std-exit-procedure)
  ;;(common:telemetry-log-close)
  (on-exit (lambda () 0))
  ;;(debug:print-info 13 *default-log-port* "std-exit-procedure called; *time-to-exit*="*time-to-exit*)
  (let ((no-hurry  (if *time-to-exit* ;; hurry up
		       #f
		       (begin
			 (set! *time-to-exit* #t)
			 #t))))
3049
3050
3051
3052
3053
3054
3055



3056

3057
3058
3059
3060
3061
3062
3063

3064

3065

















3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076


3077




3078

           (handle-exceptions
           exn
           #t ;; just ignore it, it might have died in the meantime so joining it will throw an exception
           (thread-join! thread))
           )))
   (hash-table-keys *common:thread-punchlist*)))




(define (common:telemetry-log event #!key (payload '()))

  (let* ((serverhost (configf:lookup *configdat* "telemetry" "host"))
         (serverport (configf:lookup-number *configdat* "telemetry" "port"))
         (user (or (get-environment-variable "USER") "unknown"))
         (host (or (get-environment-variable "HOST") "unknown")))

    (handle-exceptions
     exn

     (debug:print-info 0 *default-log-port* "common-telemetry-log failure")

     (if (and serverhost serverport user host event)

















         (let* ((start (conc "[megatest "event"]"))
                (toppath (or *toppath* "/dev/null"))
                (payload-serialized
                 (base64:base64-encode
                  (z3:encode-buffer
                   (with-output-to-string (lambda () (pp payload))))))
                (msg     (conc user":"host":"start":"
                               toppath":"payload-serialized))
                (s (udp-open-socket)))
           (udp-bind! s #f 0)
           (udp-connect! s serverhost serverport)


           (udp-send s msg)




           (udp-close-socket s))))))








>
>
>
|
>
|
|
|
|
|
|
|
>
|
>
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
|
|
|
|
|
|
|
<
|
|
>
>
|
>
>
>
>
|
>
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097

3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
           (handle-exceptions
           exn
           #t ;; just ignore it, it might have died in the meantime so joining it will throw an exception
           (thread-join! thread))
           )))
   (hash-table-keys *common:thread-punchlist*)))

(define *common:telemetry-log-state* 'startup)
(define *common:telemetry-log-socket* #f)

(define (common:telemetry-log-open)
  (if (eq? *common:telemetry-log-state* 'startup)
      (let* ((serverhost (configf:lookup *configdat* "telemetry" "host"))
             (serverport (configf:lookup-number *configdat* "telemetry" "port"))
             (user (or (get-environment-variable "USER") "unknown"))
             (host (or (get-environment-variable "HOST") "unknown")))
        (set! *common:telemetry-log-state*
              (handle-exceptions
               exn
               (begin
                 (debug:print-info 0 *default-log-port* "common-telemetry-log get udp port failure")
                 'broken)
               (if (and serverhost serverport user host)
                   (let* ((s (udp-open-socket)))
                     ;;(udp-bind! s #f 0)
                     (udp-connect! s serverhost serverport)
                     (set! *common:telemetry-log-socket* s)
                     'open)
                   'not-needed))))))
  
(define (common:telemetry-log event #!key (payload '()))
  (if (eq? *common:telemetry-log-state* 'startup)
      (common:telemetry-log-open))
  (handle-exceptions
   exn
   (begin
     (debug:print-info 0 *default-log-port* "common-telemetry-log failure"))
   (if (and *common:telemetry-log-socket* event)
       (let* ((user (or (get-environment-variable "USER") "unknown"))
              (host (or (get-environment-variable "HOST") "unknown"))
              (start (conc "[megatest "event"]"))
              (toppath (or *toppath* "/dev/null"))
              (payload-serialized
               (base64:base64-encode
                (z3:encode-buffer
                 (with-output-to-string (lambda () (pp payload))))))
              (msg     (conc user":"host":"start":"(current-process-id)":"
                             toppath":"payload-serialized)))

         (udp-send *common:telemetry-log-socket* msg)))))

(define (common:telemetry-log-close)
  (when (and (eq? *common:telemetry-log-state* 'open) *common:telemetry-log-socket*)
    (handle-exceptions
     exn
     (begin
       (debug:print-info 0 *default-log-port* "common-telemetry-log closure failure"))
     (begin
       (udp-close-socket *common:telemetry-log-socket*)
       (set! *common:telemetry-log-socket* #f)))))

Modified rmt.scm from [0a05f35135] to [bc89e0120c].

53
54
55
56
57
58
59





60
61
62
63
64
65
66

(define *send-receive-mutex* (make-mutex)) ;; should have separate mutex per run-id

;; RA => e.g. usage (rmt:send-receive 'get-var #f (list varname))
;;
(define (rmt:send-receive cmd rid params #!key (attemptnum 1)(area-dat #f)) ;; start attemptnum at 1 so the modulo below works as expected






  ;;DOT digraph megatest_state_status {
  ;;DOT   ranksep=0;
  ;;DOT   // rankdir=LR;
  ;;DOT   node [shape="box"];
  ;;DOT "rmt:send-receive" -> MUTEXLOCK;
  ;;DOT { edge [style=invis];"case 1" -> "case 2" -> "case 3" -> "case 4" -> "case 5" -> "case 6" -> "case 7" -> "case 8" -> "case 9" -> "case 10" -> "case 11"; }
  ;; do all the prep locked under the rmt-mutex







>
>
>
>
>







53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71

(define *send-receive-mutex* (make-mutex)) ;; should have separate mutex per run-id

;; RA => e.g. usage (rmt:send-receive 'get-var #f (list varname))
;;
(define (rmt:send-receive cmd rid params #!key (attemptnum 1)(area-dat #f)) ;; start attemptnum at 1 so the modulo below works as expected

  (common:telemetry-log (conc "rmt:"(->string cmd))
                        payload: `((rid . ,rid)
                                   (params . ,params)))
                          
  
  ;;DOT digraph megatest_state_status {
  ;;DOT   ranksep=0;
  ;;DOT   // rankdir=LR;
  ;;DOT   node [shape="box"];
  ;;DOT "rmt:send-receive" -> MUTEXLOCK;
  ;;DOT { edge [style=invis];"case 1" -> "case 2" -> "case 3" -> "case 4" -> "case 5" -> "case 6" -> "case 7" -> "case 8" -> "case 9" -> "case 10" -> "case 11"; }
  ;; do all the prep locked under the rmt-mutex