Megatest

Check-in [70f5d2ff31]
Login
Overview
Comment:fixes to crashes with server id mismatch
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | v1.6569-refactor-server-key-chk
Files: files | file ages | folders
SHA1: 70f5d2ff31202b3bc6a1e6bcaec3de9576651c50
User & Date: pjhatwal on 2021-01-28 18:40:38
Other Links: branch diff | manifest | tags
Context
2021-01-29
10:28
changed info debug msgs with keyword failed check-in: 577abe1ca7 user: pjhatwal tags: v1.6569-refactor-server-key-chk
2021-01-28
18:40
fixes to crashes with server id mismatch check-in: 70f5d2ff31 user: pjhatwal tags: v1.6569-refactor-server-key-chk
2021-01-27
15:30
fix for crash with error (match) no matching pattern check-in: 4c4d63c3e0 user: pjhatwal tags: v1.6569-refactor-server-key-chk
Changes

Modified api.scm from [e3eb999523] to [68ac71805c].

376
377
378
379
380
381
382
383
384
385
386
387
388
389

390


391

392
393
394

395
396
397
398
399
400
401
402
403
404
405
406
407
408

409
410
411
412
376
377
378
379
380
381
382

383
384
385
386
387

388
389
390
391

392
393
394

395
396
397
398
399
400
401
402
403
404
405
406
407
408

409
410
411
412
413







-





-
+

+
+
-
+


-
+













-
+




;; http-server  send-response
;;                 api:process-request
;;                    db:*
;;
;; NB// Runs on the server as part of the server loop
;;
(define (api:process-request dbstruct $) ;; the $ is the request vars proc
  (set! *api-process-request-count* (+ *api-process-request-count* 1))
  (debug:print 4 *default-log-port* "server-id:"  *server-id*)
  (let* ((cmd     ($ 'cmd))
	 (paramsj ($ 'params))
         (key     ($ 'key))   
	 (params  (db:string->obj paramsj transport: 'http))) ;; incoming data from the POST (or is it a GET?)
    (debug:print 4 *default-log-port* "cmd:" cmd " with params " params "key " key)
    (debug:print 0 *default-log-port* "cmd:" cmd " with params " params "key " key)
    (if (equal? key *server-id*)
      (begin
        (set! *api-process-request-count* (+ *api-process-request-count* 1))
	(let* ((resdat  (api:execute-requests dbstruct (vector cmd params))) ;; process the request, resdat = #( flag result )
 	(let* ((resdat  (api:execute-requests dbstruct (vector cmd params))) ;; process the request, resdat = #( flag result )
	       (success (vector-ref resdat 0))
	       (res     (vector-ref resdat 1))) ;; (vector flag payload), get the payload, ignore the flag (why?)
	  (debug:print 4 *default-log-port* "res:" res)
	  (debug:print 0 *default-log-port* "res:" res)
	  (if (not success)
	      (debug:print 0 *default-log-port* "ERROR: success flag is #f for " cmd " with params " params))
	  (if (> *api-process-request-count* *max-api-process-requests*)
	      (set! *max-api-process-requests* *api-process-request-count*))
	  (set! *api-process-request-count* (- *api-process-request-count* 1))
	  ;; This can be here but needs controls to ensure it doesn't run more than every 4 seconds
	  ;; (rmt:dat->json-str
	  ;;  (if (or (string? res)
	  ;;          (list?   res)
	  ;;          (number? res)
	  ;;          (boolean? res))
	  ;;      res 
	  ;;      (list "ERROR, not string, list, number or boolean" 1 cmd params res)))))
	  (db:obj->string res transport: 'http))
	  (db:obj->string res transport: 'http)))
	(begin
	  (debug:print 0 *default-log-port*   "Server refused to process request. Sever id mismatch. recived " key " expected:  " *server-id* ".\nOther arguments recived: cmd=" cmd " params = " params) 
	  (db:obj->string (conc "Server refused to process request server-id mismatch: " key ", " *server-id*) transport: 'http)))))

Modified common.scm from [2f158b8f7d] to [bf0a0a25ad].

296
297
298
299
300
301
302
303

304
305
306
307
308
309
310
296
297
298
299
300
301
302

303
304
305
306
307
308
309
310







-
+







(define (common:logpro-exit-code->test-status exit-code)
  (status-sym->string (common:logpro-exit-code->status-sym exit-code)))

(defstruct remote
  (hh-dat            (common:get-homehost)) ;; homehost record ( addr . hhflag )
  (server-url        #f) ;; (server:check-if-running *toppath*) #f))
  (server-id         #f)
  (server-info       (if *toppath* (server:check-if-running *toppath*)))
  (server-info       (if *toppath* (server:check-if-running *toppath*) #f))
  (last-server-check 0)  ;; last time we checked to see if the server was alive
  (conndat           #f)
  (transport         *transport-type*)
  (server-timeout    (server:expiration-timeout))
  (force-server      #f)
  (ro-mode           #f)  
  (ro-mode-checked   #f)) ;; flag that indicates we have checked for ro-mode

Modified http-transport.scm from [c4772ba536] to [2202b22e9f].

290
291
292
293
294
295
296
297

298
299
300
301
302
303
304
290
291
292
293
294
295
296

297
298
299
300
301
302
303
304







-
+







						(mutex-unlock! *http-mutex*)
					     ;;; (signal (make-composite-condition
					     ;;;          (make-property-condition 'commfail 'message "failed to connect to server")))
					     ;;; "communications failed"
						(db:obj->string #f))
					    (with-input-from-request ;; was dat
					     fullurl 
					     (list (cons 'key (or server-id "thekey"))
					     (list (cons 'key (or server-id   "thekey"))
						   (cons 'cmd cmd)
						   (cons 'params sparams))
					     read-string))
					  transport: 'http)
                                         0)) ;; added this speculatively
			      ;; Shouldn't this be a call to the managed call-all-connections stuff above?
			      (close-all-connections!)
312
313
314
315
316
317
318
319
320








321
322
323
324
325
326
327
312
313
314
315
316
317
318


319
320
321
322
323
324
325
326
327
328
329
330
331
332
333







-
-
+
+
+
+
+
+
+
+







	      (th2 (make-thread time-out     "time out")))
	 (thread-start! th1)
	 (thread-start! th2)
	 (thread-join! th1)
          (vector-set! res 0 success)
	 (thread-terminate! th2)
	 (if (vector? res)
	     (if (vector-ref res 0) ;; this is the first flag or the second flag?
		 res ;; this is the *inner* vector? seriously? why?
	     (if (vector-ref res 0) ;; this is the first flag or the second flag? 
                 (let* ((res-dat (vector-ref res 1)))
                    (if (and (string? res-dat) (string-contains res-dat "server-id mismatch"))
                     (signal (make-composite-condition
		          (make-property-condition 
		       'servermismatch
		       'message  (vector-ref res 1))))       
		      res)) ;; this is the *inner* vector? seriously? why?
                 (if (debug:debug-mode 11)
                     (let ((call-chain (get-call-chain))) ;; note: this code also called in nmsg-transport - consider consolidating it
                       (print-call-chain (current-error-port))
                       (debug:print-error 11 *default-log-port* "error above occured at server, res=" res) ;; " message: " ((condition-property-accessor 'exn 'message) exn))
                       (debug:print 11 *default-log-port* " server call chain:")
                       (pp (vector-ref res 1) (current-error-port))
                       (signal (vector-ref res 0)))

Modified rmt.scm from [2391f353fe] to [bcbb74efcc].

273
274
275
276
277
278
279

280
281
282
283
284
285
286
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287







+







					     ;; problems. However it
					     ;; is needed to deal with
					     ;; attemtped
					     ;; communication to
					     ;; servers that have gone
					     ;; away
			      (http-transport:client-api-send-receive 0 conninfo cmd params)
                              ((servermismatch)  (vector #f "Server id mismatch" ))
			      ((commfail)(vector #f "communications fail"))
			      ((exn)(vector #f "other fail" (print-call-chain)))))
		     (else
		      (debug:print 0 *default-log-port* "ERROR: transport " (remote-transport runremote) " not supported")
		      (exit))))

;; No Title 
320
321
322
323
324
325
326


327

328
329
330
331
332
333
334
321
322
323
324
325
326
327
328
329

330
331
332
333
334
335
336
337







+
+
-
+







	  (http-transport:close-connections  area-dat: runremote)))
    (debug:print-info 13 *default-log-port* "rmt:send-receive, case  9. conninfo=" conninfo " dat=" dat " runremote = " runremote)
    (mutex-unlock! *rmt-mutex*)
    (if success ;; success only tells us that the transport was
	;; successful, have to examine the data to see if
	;; there was a detected issue at the other end
	(extras-transport-succeded *default-log-port* *rmt-mutex* attemptnum runremote res params rid cmd)
	(begin
           (debug:print-error 0 *default-log-port* " dat=" dat) 
	(extras-transport-failed *default-log-port* *rmt-mutex* attemptnum runremote cmd rid params)
           (extras-transport-failed *default-log-port* *rmt-mutex* attemptnum runremote cmd rid params))
	)))

(define (rmt:print-db-stats)
  (let ((fmtstr "~40a~7-d~9-d~20,2-f")) ;; "~20,2-f"
    (debug:print 18 *default-log-port* "DB Stats\n========")
    (debug:print 18 *default-log-port* (format #f "~40a~8a~10a~10a" "Cmd" "Count" "TotTime" "Avg"))
    (for-each (lambda (cmd)

Modified server.scm from [2d034b6d97] to [d9eac964a8].

318
319
320
321
322
323
324
325

326
327
328
329
330
331
332
333
334
335
336
337

338
339
340
341
342
343
344
318
319
320
321
322
323
324

325
326
327
328
329
330
331
332
333
334
335
336

337
338
339
340
341
342
343
344







-
+











-
+







	  (list-ref srvrs idx))
	#f)))

(define (server:record->id servr)
  (handle-exceptions
   exn
   (begin 
     (debug:print-info 0 *default-log-port*  "failed to get server id from " server ", exn=" exn)     
     (debug:print-info 0 *default-log-port*  "failed to get server id from " servr ", exn=" exn)     
   #f)
  (match-let (((mod-time host port start-time server-id pid)
	       servr))
    (if server-id
	server-id
	#f))))

(define (server:record->url servr)
  (handle-exceptions
   exn
   (begin 
     (debug:print-info 0 *default-log-port*  "failed to get server url from " server ", exn=" exn)     
     (debug:print-info 0 *default-log-port*  "failed to get server url from " servr ", exn=" exn)     
   #f)
  (match-let (((mod-time host port start-time server-id pid)
	       servr))
    (if (and host port)
	(conc host ":" port)
	#f))))

460
461
462
463
464
465
466
467

468
469
470
471
472
473
474
460
461
462
463
464
465
466

467
468
469
470
471
472
473
474







-
+







        server-url
	#f)))

(define (server:kill servr)
  (handle-exceptions
    exn
    (begin 
      (debug:print-info 0 *default-log-port*  "failed to get host and/or port from " server ", exn=" exn)     
      (debug:print-info 0 *default-log-port*  "failed to get host and/or port from " servr ", exn=" exn)     
    #f)
  (match-let (((mod-time hostname port start-time pid)
	       servr))
    (tasks:kill-server hostname pid))))

;; called in megatest.scm, host-port is string hostname:port
;;