Megatest

Check-in [70f5d2ff31]
Login
Overview
Comment:fixes to crashes with server id mismatch
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | v1.6569-refactor-server-key-chk
Files: files | file ages | folders
SHA1: 70f5d2ff31202b3bc6a1e6bcaec3de9576651c50
User & Date: pjhatwal on 2021-01-28 18:40:38
Other Links: branch diff | manifest | tags
Context
2021-01-29
10:28
changed info debug msgs with keyword failed check-in: 577abe1ca7 user: pjhatwal tags: v1.6569-refactor-server-key-chk
2021-01-28
18:40
fixes to crashes with server id mismatch check-in: 70f5d2ff31 user: pjhatwal tags: v1.6569-refactor-server-key-chk
2021-01-27
15:30
fix for crash with error (match) no matching pattern check-in: 4c4d63c3e0 user: pjhatwal tags: v1.6569-refactor-server-key-chk
Changes

Modified api.scm from [e3eb999523] to [68ac71805c].

376
377
378
379
380
381
382
383
384
385
386
387
388
389
390


391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
;; http-server  send-response
;;                 api:process-request
;;                    db:*
;;
;; NB// Runs on the server as part of the server loop
;;
(define (api:process-request dbstruct $) ;; the $ is the request vars proc
  (set! *api-process-request-count* (+ *api-process-request-count* 1))
  (debug:print 4 *default-log-port* "server-id:"  *server-id*)
  (let* ((cmd     ($ 'cmd))
	 (paramsj ($ 'params))
         (key     ($ 'key))   
	 (params  (db:string->obj paramsj transport: 'http))) ;; incoming data from the POST (or is it a GET?)
    (debug:print 4 *default-log-port* "cmd:" cmd " with params " params "key " key)
    (if (equal? key *server-id*)


	(let* ((resdat  (api:execute-requests dbstruct (vector cmd params))) ;; process the request, resdat = #( flag result )
	       (success (vector-ref resdat 0))
	       (res     (vector-ref resdat 1))) ;; (vector flag payload), get the payload, ignore the flag (why?)
	  (debug:print 4 *default-log-port* "res:" res)
	  (if (not success)
	      (debug:print 0 *default-log-port* "ERROR: success flag is #f for " cmd " with params " params))
	  (if (> *api-process-request-count* *max-api-process-requests*)
	      (set! *max-api-process-requests* *api-process-request-count*))
	  (set! *api-process-request-count* (- *api-process-request-count* 1))
	  ;; This can be here but needs controls to ensure it doesn't run more than every 4 seconds
	  ;; (rmt:dat->json-str
	  ;;  (if (or (string? res)
	  ;;          (list?   res)
	  ;;          (number? res)
	  ;;          (boolean? res))
	  ;;      res 
	  ;;      (list "ERROR, not string, list, number or boolean" 1 cmd params res)))))
	  (db:obj->string res transport: 'http))
	(begin
	  (debug:print 0 *default-log-port*   "Server refused to process request. Sever id mismatch. recived " key " expected:  " *server-id* ".\nOther arguments recived: cmd=" cmd " params = " params) 
	  (db:obj->string (conc "Server refused to process request server-id mismatch: " key ", " *server-id*) transport: 'http)))))








<





|

>
>
|


|













|




376
377
378
379
380
381
382

383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
;; http-server  send-response
;;                 api:process-request
;;                    db:*
;;
;; NB// Runs on the server as part of the server loop
;;
(define (api:process-request dbstruct $) ;; the $ is the request vars proc

  (debug:print 4 *default-log-port* "server-id:"  *server-id*)
  (let* ((cmd     ($ 'cmd))
	 (paramsj ($ 'params))
         (key     ($ 'key))   
	 (params  (db:string->obj paramsj transport: 'http))) ;; incoming data from the POST (or is it a GET?)
    (debug:print 0 *default-log-port* "cmd:" cmd " with params " params "key " key)
    (if (equal? key *server-id*)
      (begin
        (set! *api-process-request-count* (+ *api-process-request-count* 1))
 	(let* ((resdat  (api:execute-requests dbstruct (vector cmd params))) ;; process the request, resdat = #( flag result )
	       (success (vector-ref resdat 0))
	       (res     (vector-ref resdat 1))) ;; (vector flag payload), get the payload, ignore the flag (why?)
	  (debug:print 0 *default-log-port* "res:" res)
	  (if (not success)
	      (debug:print 0 *default-log-port* "ERROR: success flag is #f for " cmd " with params " params))
	  (if (> *api-process-request-count* *max-api-process-requests*)
	      (set! *max-api-process-requests* *api-process-request-count*))
	  (set! *api-process-request-count* (- *api-process-request-count* 1))
	  ;; This can be here but needs controls to ensure it doesn't run more than every 4 seconds
	  ;; (rmt:dat->json-str
	  ;;  (if (or (string? res)
	  ;;          (list?   res)
	  ;;          (number? res)
	  ;;          (boolean? res))
	  ;;      res 
	  ;;      (list "ERROR, not string, list, number or boolean" 1 cmd params res)))))
	  (db:obj->string res transport: 'http)))
	(begin
	  (debug:print 0 *default-log-port*   "Server refused to process request. Sever id mismatch. recived " key " expected:  " *server-id* ".\nOther arguments recived: cmd=" cmd " params = " params) 
	  (db:obj->string (conc "Server refused to process request server-id mismatch: " key ", " *server-id*) transport: 'http)))))

Modified common.scm from [2f158b8f7d] to [bf0a0a25ad].

296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
(define (common:logpro-exit-code->test-status exit-code)
  (status-sym->string (common:logpro-exit-code->status-sym exit-code)))

(defstruct remote
  (hh-dat            (common:get-homehost)) ;; homehost record ( addr . hhflag )
  (server-url        #f) ;; (server:check-if-running *toppath*) #f))
  (server-id         #f)
  (server-info       (if *toppath* (server:check-if-running *toppath*)))
  (last-server-check 0)  ;; last time we checked to see if the server was alive
  (conndat           #f)
  (transport         *transport-type*)
  (server-timeout    (server:expiration-timeout))
  (force-server      #f)
  (ro-mode           #f)  
  (ro-mode-checked   #f)) ;; flag that indicates we have checked for ro-mode







|







296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
(define (common:logpro-exit-code->test-status exit-code)
  (status-sym->string (common:logpro-exit-code->status-sym exit-code)))

(defstruct remote
  (hh-dat            (common:get-homehost)) ;; homehost record ( addr . hhflag )
  (server-url        #f) ;; (server:check-if-running *toppath*) #f))
  (server-id         #f)
  (server-info       (if *toppath* (server:check-if-running *toppath*) #f))
  (last-server-check 0)  ;; last time we checked to see if the server was alive
  (conndat           #f)
  (transport         *transport-type*)
  (server-timeout    (server:expiration-timeout))
  (force-server      #f)
  (ro-mode           #f)  
  (ro-mode-checked   #f)) ;; flag that indicates we have checked for ro-mode

Modified http-transport.scm from [c4772ba536] to [2202b22e9f].

290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
						(mutex-unlock! *http-mutex*)
					     ;;; (signal (make-composite-condition
					     ;;;          (make-property-condition 'commfail 'message "failed to connect to server")))
					     ;;; "communications failed"
						(db:obj->string #f))
					    (with-input-from-request ;; was dat
					     fullurl 
					     (list (cons 'key (or server-id "thekey"))
						   (cons 'cmd cmd)
						   (cons 'params sparams))
					     read-string))
					  transport: 'http)
                                         0)) ;; added this speculatively
			      ;; Shouldn't this be a call to the managed call-all-connections stuff above?
			      (close-all-connections!)







|







290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
						(mutex-unlock! *http-mutex*)
					     ;;; (signal (make-composite-condition
					     ;;;          (make-property-condition 'commfail 'message "failed to connect to server")))
					     ;;; "communications failed"
						(db:obj->string #f))
					    (with-input-from-request ;; was dat
					     fullurl 
					     (list (cons 'key (or server-id   "thekey"))
						   (cons 'cmd cmd)
						   (cons 'params sparams))
					     read-string))
					  transport: 'http)
                                         0)) ;; added this speculatively
			      ;; Shouldn't this be a call to the managed call-all-connections stuff above?
			      (close-all-connections!)
312
313
314
315
316
317
318
319






320
321
322
323
324
325
326
327
	      (th2 (make-thread time-out     "time out")))
	 (thread-start! th1)
	 (thread-start! th2)
	 (thread-join! th1)
          (vector-set! res 0 success)
	 (thread-terminate! th2)
	 (if (vector? res)
	     (if (vector-ref res 0) ;; this is the first flag or the second flag?






		 res ;; this is the *inner* vector? seriously? why?
                 (if (debug:debug-mode 11)
                     (let ((call-chain (get-call-chain))) ;; note: this code also called in nmsg-transport - consider consolidating it
                       (print-call-chain (current-error-port))
                       (debug:print-error 11 *default-log-port* "error above occured at server, res=" res) ;; " message: " ((condition-property-accessor 'exn 'message) exn))
                       (debug:print 11 *default-log-port* " server call chain:")
                       (pp (vector-ref res 1) (current-error-port))
                       (signal (vector-ref res 0)))







|
>
>
>
>
>
>
|







312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
	      (th2 (make-thread time-out     "time out")))
	 (thread-start! th1)
	 (thread-start! th2)
	 (thread-join! th1)
          (vector-set! res 0 success)
	 (thread-terminate! th2)
	 (if (vector? res)
	     (if (vector-ref res 0) ;; this is the first flag or the second flag? 
                 (let* ((res-dat (vector-ref res 1)))
                    (if (and (string? res-dat) (string-contains res-dat "server-id mismatch"))
                     (signal (make-composite-condition
		          (make-property-condition 
		       'servermismatch
		       'message  (vector-ref res 1))))       
		      res)) ;; this is the *inner* vector? seriously? why?
                 (if (debug:debug-mode 11)
                     (let ((call-chain (get-call-chain))) ;; note: this code also called in nmsg-transport - consider consolidating it
                       (print-call-chain (current-error-port))
                       (debug:print-error 11 *default-log-port* "error above occured at server, res=" res) ;; " message: " ((condition-property-accessor 'exn 'message) exn))
                       (debug:print 11 *default-log-port* " server call chain:")
                       (pp (vector-ref res 1) (current-error-port))
                       (signal (vector-ref res 0)))

Modified rmt.scm from [2391f353fe] to [bcbb74efcc].

273
274
275
276
277
278
279

280
281
282
283
284
285
286
					     ;; problems. However it
					     ;; is needed to deal with
					     ;; attemtped
					     ;; communication to
					     ;; servers that have gone
					     ;; away
			      (http-transport:client-api-send-receive 0 conninfo cmd params)

			      ((commfail)(vector #f "communications fail"))
			      ((exn)(vector #f "other fail" (print-call-chain)))))
		     (else
		      (debug:print 0 *default-log-port* "ERROR: transport " (remote-transport runremote) " not supported")
		      (exit))))

;; No Title 







>







273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
					     ;; problems. However it
					     ;; is needed to deal with
					     ;; attemtped
					     ;; communication to
					     ;; servers that have gone
					     ;; away
			      (http-transport:client-api-send-receive 0 conninfo cmd params)
                              ((servermismatch)  (vector #f "Server id mismatch" ))
			      ((commfail)(vector #f "communications fail"))
			      ((exn)(vector #f "other fail" (print-call-chain)))))
		     (else
		      (debug:print 0 *default-log-port* "ERROR: transport " (remote-transport runremote) " not supported")
		      (exit))))

;; No Title 
320
321
322
323
324
325
326


327
328
329
330
331
332
333
334
	  (http-transport:close-connections  area-dat: runremote)))
    (debug:print-info 13 *default-log-port* "rmt:send-receive, case  9. conninfo=" conninfo " dat=" dat " runremote = " runremote)
    (mutex-unlock! *rmt-mutex*)
    (if success ;; success only tells us that the transport was
	;; successful, have to examine the data to see if
	;; there was a detected issue at the other end
	(extras-transport-succeded *default-log-port* *rmt-mutex* attemptnum runremote res params rid cmd)


	(extras-transport-failed *default-log-port* *rmt-mutex* attemptnum runremote cmd rid params)
	)))

(define (rmt:print-db-stats)
  (let ((fmtstr "~40a~7-d~9-d~20,2-f")) ;; "~20,2-f"
    (debug:print 18 *default-log-port* "DB Stats\n========")
    (debug:print 18 *default-log-port* (format #f "~40a~8a~10a~10a" "Cmd" "Count" "TotTime" "Avg"))
    (for-each (lambda (cmd)







>
>
|







321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
	  (http-transport:close-connections  area-dat: runremote)))
    (debug:print-info 13 *default-log-port* "rmt:send-receive, case  9. conninfo=" conninfo " dat=" dat " runremote = " runremote)
    (mutex-unlock! *rmt-mutex*)
    (if success ;; success only tells us that the transport was
	;; successful, have to examine the data to see if
	;; there was a detected issue at the other end
	(extras-transport-succeded *default-log-port* *rmt-mutex* attemptnum runremote res params rid cmd)
	(begin
           (debug:print-error 0 *default-log-port* " dat=" dat) 
           (extras-transport-failed *default-log-port* *rmt-mutex* attemptnum runremote cmd rid params))
	)))

(define (rmt:print-db-stats)
  (let ((fmtstr "~40a~7-d~9-d~20,2-f")) ;; "~20,2-f"
    (debug:print 18 *default-log-port* "DB Stats\n========")
    (debug:print 18 *default-log-port* (format #f "~40a~8a~10a~10a" "Cmd" "Count" "TotTime" "Avg"))
    (for-each (lambda (cmd)

Modified server.scm from [2d034b6d97] to [d9eac964a8].

318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
	  (list-ref srvrs idx))
	#f)))

(define (server:record->id servr)
  (handle-exceptions
   exn
   (begin 
     (debug:print-info 0 *default-log-port*  "failed to get server id from " server ", exn=" exn)     
   #f)
  (match-let (((mod-time host port start-time server-id pid)
	       servr))
    (if server-id
	server-id
	#f))))

(define (server:record->url servr)
  (handle-exceptions
   exn
   (begin 
     (debug:print-info 0 *default-log-port*  "failed to get server url from " server ", exn=" exn)     
   #f)
  (match-let (((mod-time host port start-time server-id pid)
	       servr))
    (if (and host port)
	(conc host ":" port)
	#f))))








|











|







318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
	  (list-ref srvrs idx))
	#f)))

(define (server:record->id servr)
  (handle-exceptions
   exn
   (begin 
     (debug:print-info 0 *default-log-port*  "failed to get server id from " servr ", exn=" exn)     
   #f)
  (match-let (((mod-time host port start-time server-id pid)
	       servr))
    (if server-id
	server-id
	#f))))

(define (server:record->url servr)
  (handle-exceptions
   exn
   (begin 
     (debug:print-info 0 *default-log-port*  "failed to get server url from " servr ", exn=" exn)     
   #f)
  (match-let (((mod-time host port start-time server-id pid)
	       servr))
    (if (and host port)
	(conc host ":" port)
	#f))))

460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
        server-url
	#f)))

(define (server:kill servr)
  (handle-exceptions
    exn
    (begin 
      (debug:print-info 0 *default-log-port*  "failed to get host and/or port from " server ", exn=" exn)     
    #f)
  (match-let (((mod-time hostname port start-time pid)
	       servr))
    (tasks:kill-server hostname pid))))

;; called in megatest.scm, host-port is string hostname:port
;;







|







460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
        server-url
	#f)))

(define (server:kill servr)
  (handle-exceptions
    exn
    (begin 
      (debug:print-info 0 *default-log-port*  "failed to get host and/or port from " servr ", exn=" exn)     
    #f)
  (match-let (((mod-time hostname port start-time pid)
	       servr))
    (tasks:kill-server hostname pid))))

;; called in megatest.scm, host-port is string hostname:port
;;