Megatest

Diff
Login

Differences From Artifact [e7099e7d7f]:

To Artifact [09b12e976e]:


355
356
357
358
359
360
361
362

363
364
365
366
367
368
369
370
371
372
373
374
375
376

377
378
379
380
381
382
383
384
385

386
387
388
389
390

391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
;; return #f otherwise
;; side effect - cleans up and exits on exception.
(define (http-transport:sync-inmemdb-to-db tdbdat server-state run-id server-id bad-sync-count)
  (if *inmemdb* 
      (let ((start-time (current-milliseconds))
            (sync-time  #f)
            (rem-time   #f)
            (sync-retry #f))

        ;; inmemdb is a dbstruct
        (condition-case
         (db:sync-touched *inmemdb* *run-id* force-sync: #t)
         ((sync-failed)(cond
                        ((> bad-sync-count 10) ;; time to give up
                         (http-transport:server-shutdown server-id port))
                        (else ;; (> bad-sync-count 0)  ;; we've had a fail or two, delay and loop
                         (thread-sleep! 5)
                         (set! sync-retry #t))))
         ((exn)
          (debug:print 0 "ERROR: error from sync code other than 'sync-failed. Attempting to gracefully shutdown the server")
          (tasks:server-delete-record (db:delay-if-busy tdbdat) server-id " http-transport:keep-running crashed")
          (exit)))
        (if sync-retry

            #t ; return true - retry
            (begin
              (set! sync-time  (- (current-milliseconds) start-time))
              (set! rem-time (quotient (- 4000 sync-time) 1000))
              (debug:print 4 "SYNC: time= " sync-time ", rem-time=" rem-time)
              
              (if (and (<= rem-time 4)
                       (> rem-time 0))
                  (thread-sleep! rem-time)

                  (thread-sleep! 4)) ;; fallback for if the math is changed ...
              
              ;;
              ;; no *inmemdb* yet, set running after our first pass through and start the db
              ;;

              (if (eq? server-state 'available)
                  (let ((new-server-id (tasks:server-am-i-the-server? (db:delay-if-busy tdbdat) run-id))) ;; try to ensure no double registering of servers
                    (if (equal? new-server-id server-id)
                        (begin
                          (tasks:server-set-state! (db:delay-if-busy tdbdat) server-id "dbprep")
                          (thread-sleep! 0.5) ;; give some margin for queries to complete before switching from file based access to server based access
                          (set! *inmemdb*  (db:setup run-id))
                          ;; force initialization
                          ;; (db:get-db *inmemdb* #t)
                          (db:get-db *inmemdb* run-id)
                          (tasks:server-set-state! (db:delay-if-busy tdbdat) server-id "running"))
                        (begin ;; gotta exit nicely
                          (tasks:server-set-state! (db:delay-if-busy tdbdat) server-id "collision")
                          (http-transport:server-shutdown server-id port)))))
              #f))) ; return #f - don't retry
      #f)) ; return #f - don't retry since there is no inmemdb


;;; factored out of http-transport:keep-running
(define (http-transport:get-server-info tdbdat server-start-time server-id run-id)
  (let loop ((start-time (current-seconds))
             (changed    #t)
             (last-sdat  "not this"))
    (let ((sdat #f))
      (thread-sleep! 0.01)
      (debug:print-info 0 "Waiting for server alive signature")
      (mutex-lock!   *heartbeat-mutex*)
      (set! sdat     *server-info*)
      (mutex-unlock! *heartbeat-mutex*)
      (if (and sdat
               (not changed)
               (> (- (current-seconds) start-time) 2))
          sdat
          (begin
            (debug:print-info 0 "Still waiting, last-sdat=" last-sdat)
            (sleep 4)
            (if (> (- (current-seconds) start-time) 120) ;; been waiting for two minutes
                (begin
                  (debug:print 0 "ERROR: transport appears to have died, exiting server " server-id " for run " run-id)
                  (tasks:server-delete-record (db:delay-if-busy tdbdat) server-id "failed to start, never received server alive signature")
                  (exit))
                (loop start-time
                      (equal? sdat last-sdat)







|
>

|
|











>
|








>
|
|
|
|
|
>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<
<
|














|


|
|







355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408


409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
;; return #f otherwise
;; side effect - cleans up and exits on exception.
(define (http-transport:sync-inmemdb-to-db tdbdat server-state run-id server-id bad-sync-count)
  (if *inmemdb* 
      (let ((start-time (current-milliseconds))
            (sync-time  #f)
            (rem-time   #f)
            (sync-retry #f)
            (sync-touched (db:sync-touched *inmemdb* *run-id* force-sync: #t)))
        ;; inmemdb is a dbstruct
        (condition-case sync-touched
         
         ((sync-failed)(cond
                        ((> bad-sync-count 10) ;; time to give up
                         (http-transport:server-shutdown server-id port))
                        (else ;; (> bad-sync-count 0)  ;; we've had a fail or two, delay and loop
                         (thread-sleep! 5)
                         (set! sync-retry #t))))
         ((exn)
          (debug:print 0 "ERROR: error from sync code other than 'sync-failed. Attempting to gracefully shutdown the server")
          (tasks:server-delete-record (db:delay-if-busy tdbdat) server-id " http-transport:keep-running crashed")
          (exit)))
        (if sync-retry
            (begin
              #t) ; return true - retry
            (begin
              (set! sync-time  (- (current-milliseconds) start-time))
              (set! rem-time (quotient (- 4000 sync-time) 1000))
              (debug:print 4 "SYNC: time= " sync-time ", rem-time=" rem-time)
              
              (if (and (<= rem-time 4)
                       (> rem-time 0))
                  (thread-sleep! rem-time)
                  (thread-sleep! 4))))
        #f) ;; fallback for if the math is changed ...
      
      ;;
      ;; no *inmemdb* yet, set running after our first pass through and start the db
      ;;
      (begin
        (if (eq? server-state 'available)
            (let ((new-server-id (tasks:server-am-i-the-server? (db:delay-if-busy tdbdat) run-id))) ;; try to ensure no double registering of servers
              (if (equal? new-server-id server-id)
                  (begin
                    (tasks:server-set-state! (db:delay-if-busy tdbdat) server-id "dbprep")
                    (thread-sleep! 0.5) ;; give some margin for queries to complete before switching from file based access to server based access
                    (set! *inmemdb*  (db:setup run-id))
                    ;; force initialization
                    ;; (db:get-db *inmemdb* #t)
                    (db:get-db *inmemdb* run-id)
                    (tasks:server-set-state! (db:delay-if-busy tdbdat) server-id "running"))
                  (begin ;; gotta exit nicely
                    (tasks:server-set-state! (db:delay-if-busy tdbdat) server-id "collision")
                    (http-transport:server-shutdown server-id port)))))))


  #f)

;;; factored out of http-transport:keep-running
(define (http-transport:get-server-info tdbdat server-start-time server-id run-id)
  (let loop ((start-time (current-seconds))
             (changed    #t)
             (last-sdat  "not this"))
    (let ((sdat #f))
      (thread-sleep! 0.01)
      (debug:print-info 0 "Waiting for server alive signature")
      (mutex-lock!   *heartbeat-mutex*)
      (set! sdat     *server-info*)
      (mutex-unlock! *heartbeat-mutex*)
      (if (and sdat
               (not changed)
               (> (- (current-seconds) start-time) (- (tasks:update-pause-seconds) 1) ))
          sdat
          (begin
            (debug:print-info 0 "Still waiting, sdat="sdat" last-sdat=" last-sdat)
            (sleep (tasks:update-pause-seconds))
            (if (> (- (current-seconds) start-time) 120) ;; been waiting for two minutes
                (begin
                  (debug:print 0 "ERROR: transport appears to have died, exiting server " server-id " for run " run-id)
                  (tasks:server-delete-record (db:delay-if-busy tdbdat) server-id "failed to start, never received server alive signature")
                  (exit))
                (loop start-time
                      (equal? sdat last-sdat)