Megatest

Check-in [33e9582eb3]
Login
Overview
Comment:factored out two functions from http-transport:keep-running to simplify: http-transport:get-server-info,http-transport:sync-inmemdb-to-db
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | rpc-transport | rpc-transport-bbtest
Files: files | file ages | folders
SHA1: 33e9582eb3ff65ce5ef2d78982810f9d5120ad43
User & Date: bjbarcla on 2016-01-08 16:55:04
Other Links: branch diff | manifest | tags
Context
2016-01-14
15:39
wip Closed-Leaf check-in: 9e927a9211 user: bjbarcla tags: rpc-transport, rpc-transport-bbtest
2016-01-08
16:55
factored out two functions from http-transport:keep-running to simplify: http-transport:get-server-info,http-transport:sync-inmemdb-to-db check-in: 33e9582eb3 user: bjbarcla tags: rpc-transport, rpc-transport-bbtest
2016-01-07
17:13
add bbtest rmt api func for testing check-in: 8b8c692892 user: bjbarcla tags: rpc-transport, rpc-transport-bbtest
Changes

Modified http-transport.scm from [d387fec12a] to [e7099e7d7f].

345
346
347
348
349
350
351



















































































352
353
354
355
356
357
358
359
360
361
362
363

364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390

391
392
393
394
395
396

397
398

399
400
401
402
403
404
405
406
407
408

409
410
411
412
413
414
415
416

417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445

446























447
448
449
450
451
452
453
454
455
456

457


458










459








460























461
462
463
464
465
466
467







+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+











-
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-




+





-
+
-
-
+
-
-
-
-
-
-
-
-
-
-
+
-
-
-
-
-
-
-
-
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-







;;
(define (http-transport:client-connect iface port)
  (let* ((api-url      (conc "http://" iface ":" port "/api"))
	 (api-uri      (uri-reference (conc "http://" iface ":" port "/api")))
	 (api-req      (make-request method: 'POST uri: api-uri))
	 (server-dat   (vector iface port api-uri api-url api-req (current-seconds))))
    server-dat))

;;; factored out of http-transport:keep-running
;; return #t if a bad sync occurred and a retry is warranted
;; return #f otherwise
;; side effect - cleans up and exits on exception.
(define (http-transport:sync-inmemdb-to-db tdbdat server-state run-id server-id bad-sync-count)
  (if *inmemdb* 
      (let ((start-time (current-milliseconds))
            (sync-time  #f)
            (rem-time   #f)
            (sync-retry #f))
        ;; inmemdb is a dbstruct
        (condition-case
         (db:sync-touched *inmemdb* *run-id* force-sync: #t)
         ((sync-failed)(cond
                        ((> bad-sync-count 10) ;; time to give up
                         (http-transport:server-shutdown server-id port))
                        (else ;; (> bad-sync-count 0)  ;; we've had a fail or two, delay and loop
                         (thread-sleep! 5)
                         (set! sync-retry #t))))
         ((exn)
          (debug:print 0 "ERROR: error from sync code other than 'sync-failed. Attempting to gracefully shutdown the server")
          (tasks:server-delete-record (db:delay-if-busy tdbdat) server-id " http-transport:keep-running crashed")
          (exit)))
        (if sync-retry
            #t ; return true - retry
            (begin
              (set! sync-time  (- (current-milliseconds) start-time))
              (set! rem-time (quotient (- 4000 sync-time) 1000))
              (debug:print 4 "SYNC: time= " sync-time ", rem-time=" rem-time)
              
              (if (and (<= rem-time 4)
                       (> rem-time 0))
                  (thread-sleep! rem-time)
                  (thread-sleep! 4)) ;; fallback for if the math is changed ...
              
              ;;
              ;; no *inmemdb* yet, set running after our first pass through and start the db
              ;;
              (if (eq? server-state 'available)
                  (let ((new-server-id (tasks:server-am-i-the-server? (db:delay-if-busy tdbdat) run-id))) ;; try to ensure no double registering of servers
                    (if (equal? new-server-id server-id)
                        (begin
                          (tasks:server-set-state! (db:delay-if-busy tdbdat) server-id "dbprep")
                          (thread-sleep! 0.5) ;; give some margin for queries to complete before switching from file based access to server based access
                          (set! *inmemdb*  (db:setup run-id))
                          ;; force initialization
                          ;; (db:get-db *inmemdb* #t)
                          (db:get-db *inmemdb* run-id)
                          (tasks:server-set-state! (db:delay-if-busy tdbdat) server-id "running"))
                        (begin ;; gotta exit nicely
                          (tasks:server-set-state! (db:delay-if-busy tdbdat) server-id "collision")
                          (http-transport:server-shutdown server-id port)))))
              #f))) ; return #f - don't retry
      #f)) ; return #f - don't retry since there is no inmemdb


;;; factored out of http-transport:keep-running
(define (http-transport:get-server-info tdbdat server-start-time server-id run-id)
  (let loop ((start-time (current-seconds))
             (changed    #t)
             (last-sdat  "not this"))
    (let ((sdat #f))
      (thread-sleep! 0.01)
      (debug:print-info 0 "Waiting for server alive signature")
      (mutex-lock!   *heartbeat-mutex*)
      (set! sdat     *server-info*)
      (mutex-unlock! *heartbeat-mutex*)
      (if (and sdat
               (not changed)
               (> (- (current-seconds) start-time) 2))
          sdat
          (begin
            (debug:print-info 0 "Still waiting, last-sdat=" last-sdat)
            (sleep 4)
            (if (> (- (current-seconds) start-time) 120) ;; been waiting for two minutes
                (begin
                  (debug:print 0 "ERROR: transport appears to have died, exiting server " server-id " for run " run-id)
                  (tasks:server-delete-record (db:delay-if-busy tdbdat) server-id "failed to start, never received server alive signature")
                  (exit))
                (loop start-time
                      (equal? sdat last-sdat)
                      sdat)))))))

;; run http-transport:keep-running in a parallel thread to monitor that the db is being 
;; used and to shutdown after sometime if it is not.
;;
(define (http-transport:keep-running server-id run-id)
  ;; if none running or if > 20 seconds since 
  ;; server last used then start shutdown
  ;; This thread waits for the server to come alive
  (debug:print-info 0 "Starting the sync-back, keep alive thread in server for run-id=" run-id)
  (let* ((tdbdat      (tasks:open-db))
	 (server-start-time (current-seconds))
	 (server-info (let loop ((start-time (current-seconds))
	 (server-info (http-transport:get-server-info tdbdat server-start-time server-id run-id))
				 (changed    #t)
				 (last-sdat  "not this"))
                        (let ((sdat #f))
			  (thread-sleep! 0.01)
			  (debug:print-info 0 "Waiting for server alive signature")
                          (mutex-lock! *heartbeat-mutex*)
                          (set! sdat *server-info*)
                          (mutex-unlock! *heartbeat-mutex*)
                          (if (and sdat
				   (not changed)
				   (> (- (current-seconds) start-time) 2))
			      sdat
                              (begin
				(debug:print-info 0 "Still waiting, last-sdat=" last-sdat)
                                (sleep 4)
				(if (> (- (current-seconds) start-time) 120) ;; been waiting for two minutes
				    (begin
				      (debug:print 0 "ERROR: transport appears to have died, exiting server " server-id " for run " run-id)
				      (tasks:server-delete-record (db:delay-if-busy tdbdat) server-id "failed to start, never received server alive signature")
				      (exit))
				    (loop start-time
					  (equal? sdat last-sdat)
					  sdat)))))))
         (iface       (car server-info))
         (port        (cadr server-info))
         (last-access 0)
	 (server-timeout (server:get-timeout)))

    (let loop ((count         0)
	       (server-state 'available)
	       (bad-sync-count 0))

      ;; Use this opportunity to sync the inmemdb to db
      (if *inmemdb* 
      (let ((sync-retry (http-transport:sync-inmemdb-to-db tdbdat server-state run-id server-id bad-sync-count)))
	  (let ((start-time (current-milliseconds))
		(sync-time  #f)
        (if sync-retry
		(rem-time   #f))
	    ;; inmemdb is a dbstruct
	    (condition-case
	     (db:sync-touched *inmemdb* *run-id* force-sync: #t)
	     ((sync-failed)(cond
			    ((> bad-sync-count 10) ;; time to give up
			     (http-transport:server-shutdown server-id port))
			    (else ;; (> bad-sync-count 0)  ;; we've had a fail or two, delay and loop
			     (thread-sleep! 5)
			     (loop count server-state (+ bad-sync-count 1)))))
            (loop count server-state (+ bad-sync-count 1))))
	     ((exn)
	      (debug:print 0 "ERROR: error from sync code other than 'sync-failed. Attempting to gracefully shutdown the server")
	      (tasks:server-delete-record (db:delay-if-busy tdbdat) server-id " http-transport:keep-running crashed")
	      (exit)))
	    (set! sync-time  (- (current-milliseconds) start-time))
	    (set! rem-time (quotient (- 4000 sync-time) 1000))
	    (debug:print 4 "SYNC: time= " sync-time ", rem-time=" rem-time)
	    
            
	    (if (and (<= rem-time 4)
		     (> rem-time 0))
		(thread-sleep! rem-time)
		(thread-sleep! 4))) ;; fallback for if the math is changed ...

	  ;;
	  ;; no *inmemdb* yet, set running after our first pass through and start the db
	  ;;
	  (if (eq? server-state 'available)
	      (let ((new-server-id (tasks:server-am-i-the-server? (db:delay-if-busy tdbdat) run-id))) ;; try to ensure no double registering of servers
		(if (equal? new-server-id server-id)
		    (begin
		      (tasks:server-set-state! (db:delay-if-busy tdbdat) server-id "dbprep")
		      (thread-sleep! 0.5) ;; give some margin for queries to complete before switching from file based access to server based access
		      (set! *inmemdb*  (db:setup run-id))
		      ;; force initialization
		      ;; (db:get-db *inmemdb* #t)
		      (db:get-db *inmemdb* run-id)
		      (tasks:server-set-state! (db:delay-if-busy tdbdat) server-id "running"))
		    (begin ;; gotta exit nicely
		      (tasks:server-set-state! (db:delay-if-busy tdbdat) server-id "collision")
		      (http-transport:server-shutdown server-id port))))))
      
      (if (< count 1) ;; 3x3 = 9 secs aprox
	  (loop (+ count 1) 'running bad-sync-count))
      
      ;; Check that iface and port have not changed (can happen if server port collides)
      (mutex-lock! *heartbeat-mutex*)
      (set! sdat *server-info*)
      (mutex-unlock! *heartbeat-mutex*)