Megatest

Diff
Login

Differences From Artifact [c5ac09a5b9]:

To Artifact [55e6935b48]:


467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
     ptype: 'server)))

;; ya, fake it for now
;;
(define (register-server-in-db db-file)
  #t)

;; load up the db into inmem
;;
(define (load-up-database db-file)
  (let* ((db (db:open-db db-file)))
    db))

(define (get-pkts-dir)
  (assert *toppath* "ERROR: get-pkts-dir called without *toppath* set. Exiting.")
  (let* ((pdir (conc *toppath* "/.meta/srvpkts")))
     (if (file-exists? pdir)
	 pdir
	 (begin
	   (create-directory pdir #t)







<
<
<
<
<
<







467
468
469
470
471
472
473






474
475
476
477
478
479
480
     ptype: 'server)))

;; ya, fake it for now
;;
(define (register-server-in-db db-file)
  #t)







(define (get-pkts-dir)
  (assert *toppath* "ERROR: get-pkts-dir called without *toppath* set. Exiting.")
  (let* ((pdir (conc *toppath* "/.meta/srvpkts")))
     (if (file-exists? pdir)
	 pdir
	 (begin
	   (create-directory pdir #t)
574
575
576
577
578
579
580





581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603

604
605
606
607
608
609
610
611
612
613

614
615
616
617
618
619
620
621
;; used and to shutdown after sometime if it is not.
;;
(define (http-transport:keep-running) 
  ;; if none running or if > 20 seconds since 
  ;; server last used then start shutdown
  ;; This thread waits for the server to come alive
  (debug:print-info 0 *default-log-port* "Starting the sync-back, keep alive thread in server")





  (let* ((sdat              #f)
	 (tmp-area          (common:get-db-tmp-area))
	 (started-file      (conc tmp-area "/.server-started"))
	 (server-start-time (current-seconds))
	 (pkts-dir          (get-pkts-dir))
	 (server-key        (server:mk-signature))
	 (db-file           (conc *toppath* "/.db/" (or (args:get-arg "-db") "main.db")))
	 (server-info (let loop ((start-time (current-seconds))
				 (changed    #t)
				 (last-sdat  "not this"))
                        (begin ;; let ((sdat #f))
			  (thread-sleep! 0.01)
			  (debug:print-info 0 *default-log-port* "Waiting for server alive signature")
                          (mutex-lock! *heartbeat-mutex*)
                          (set! sdat *server-info*)
                          (mutex-unlock! *heartbeat-mutex*)
                          (if (and sdat
				   (not changed)
				   (> (- (current-seconds) start-time) 2))
			      (begin
				(debug:print-info 0 *default-log-port* "Received server alive signature, now attempting to lock in server")
				;; create a server pkt in *toppath*/.meta/srvpkts
				(register-server pkts-dir *srvpktspec* (get-host-name) (cadr sdat) server-key (car sdat) db-file)


				;; now read pkts and see if we are a contender
				(let* ((all-pkts     (get-all-server-pkts pkts-dir *srvpktspec*))
				       (viables      (get-viable-servers all-pkts db-file))
				       (best-srv     (get-best-candidate viables db-file))
				       (best-srv-key (if best-srv (alist-ref 'servkey best-srv) #f)))
				  ;; am I the best-srv, compare server-keys to know
				  (if (and (equal? best-srv-key server-key)
					   (register-server-in-db db-file))
				      (load-up-database db-file)          ;; ready to go!

				      (bdat-time-to-exit-set! *bdat* #t)) ;; nope, we are not needed, exit when can do
				  sdat))
                              (begin
				(debug:print-info 0 *default-log-port* "Still waiting, last-sdat=" last-sdat)
                                (sleep 4)
				(if (> (- (current-seconds) start-time) 120) ;; been waiting for two minutes
				    (begin
				      (debug:print-error 0 *default-log-port* "transport appears to have died, exiting server")







>
>
>
>
>
|

<



<















|
>









|
>
|







568
569
570
571
572
573
574
575
576
577
578
579
580
581

582
583
584

585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
;; used and to shutdown after sometime if it is not.
;;
(define (http-transport:keep-running) 
  ;; if none running or if > 20 seconds since 
  ;; server last used then start shutdown
  ;; This thread waits for the server to come alive
  (debug:print-info 0 *default-log-port* "Starting the sync-back, keep alive thread in server")
  (let* ((run-id            (let ((rid (args:get-arg "-run-id")))
			      (if rid
				  (string->number rid)
				  #f)))
	 (db-file           (db:run-id->path run-id))
	 (sdat              #f)
	 (tmp-area          (common:get-db-tmp-area))

	 (server-start-time (current-seconds))
	 (pkts-dir          (get-pkts-dir))
	 (server-key        (server:mk-signature))

	 (server-info (let loop ((start-time (current-seconds))
				 (changed    #t)
				 (last-sdat  "not this"))
                        (begin ;; let ((sdat #f))
			  (thread-sleep! 0.01)
			  (debug:print-info 0 *default-log-port* "Waiting for server alive signature")
                          (mutex-lock! *heartbeat-mutex*)
                          (set! sdat *server-info*)
                          (mutex-unlock! *heartbeat-mutex*)
                          (if (and sdat
				   (not changed)
				   (> (- (current-seconds) start-time) 2))
			      (begin
				(debug:print-info 0 *default-log-port* "Received server alive signature, now attempting to lock in server")
				;; create a server pkt in *toppath*/.meta/srvpkts
				(register-server pkts-dir *srvpktspec* (get-host-name)
						 (cadr sdat) server-key (car sdat) db-file)

				;; now read pkts and see if we are a contender
				(let* ((all-pkts     (get-all-server-pkts pkts-dir *srvpktspec*))
				       (viables      (get-viable-servers all-pkts db-file))
				       (best-srv     (get-best-candidate viables db-file))
				       (best-srv-key (if best-srv (alist-ref 'servkey best-srv) #f)))
				  ;; am I the best-srv, compare server-keys to know
				  (if (and (equal? best-srv-key server-key)
					   (register-server-in-db db-file))
				      (if (db:get-iam-server-lock *dbstruct-db* run-id)
					  (debug:print 0 *default-log-port* "I'm the server!")
					  (bdat-time-to-exit-set! *bdat* #t))) ;; nope, we are not needed, exit when can do
				  sdat))
                              (begin
				(debug:print-info 0 *default-log-port* "Still waiting, last-sdat=" last-sdat)
                                (sleep 4)
				(if (> (- (current-seconds) start-time) 120) ;; been waiting for two minutes
				    (begin
				      (debug:print-error 0 *default-log-port* "transport appears to have died, exiting server")
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
				    (loop start-time
					  (equal? sdat last-sdat)
					  sdat)))))))
	 (iface       (car server-info))
         (port        (cadr server-info))
         (last-access 0)
	 (server-timeout (server:expiration-timeout))
	 (server-going  #f)
	 (server-log-file (args:get-arg "-log"))) ;; always set when we are a server

    (handle-exceptions
	exn
      (debug:print 0 *default-log-port* "Failed to create " started-file ", exn=" exn)
      (with-output-to-file started-file (lambda ()(print (current-process-id)))))

    (let loop ((count         0)
	       (server-state 'available)
	       (bad-sync-count 0)
	       (start-time     (current-milliseconds)))
      ;; Use this opportunity to sync the tmp db to megatest.db
      (if (not server-going) ;; *dbstruct-db* 
	  (let ((watchdog (bdat-watchdog *bdat*)))
	    (debug:print 0 *default-log-port* "SERVER: dbprep")
	    (set! *dbstruct-db*  (db:setup #t)) ;;  run-id))
	    (set! server-going #t)
	    (debug:print 0 *default-log-port* "SERVER: running, megatest version: " (common:get-full-version)) ;; NOTE: the server is NOT yet marked as running in the log. We do that in the keep-running routine.
	    (if watchdog
		(if (not (member (thread-state watchdog) '(ready running blocked sleeping dead)))
		    (begin
		      (debug:print-info 0 "Starting watchdog thread (in state "(thread-state watchdog)")")
		      (thread-start! watchdog)))
		(debug:print 0 *default-log-port* "ERROR: *watchdog* not setup, cannot start it."))))







<


<
<
<
<
<





|


|
<







629
630
631
632
633
634
635

636
637





638
639
640
641
642
643
644
645
646

647
648
649
650
651
652
653
				    (loop start-time
					  (equal? sdat last-sdat)
					  sdat)))))))
	 (iface       (car server-info))
         (port        (cadr server-info))
         (last-access 0)
	 (server-timeout (server:expiration-timeout))

	 (server-log-file (args:get-arg "-log"))) ;; always set when we are a server






    (let loop ((count         0)
	       (server-state 'available)
	       (bad-sync-count 0)
	       (start-time     (current-milliseconds)))
      ;; Use this opportunity to sync the tmp db to megatest.db
      (if (not *dbstruct-db* )
	  (let ((watchdog (bdat-watchdog *bdat*)))
	    (debug:print 0 *default-log-port* "SERVER: dbprep")
	    (db:setup run-id) ;; sets *dbstruct-db* as side effect

	    (debug:print 0 *default-log-port* "SERVER: running, megatest version: " (common:get-full-version)) ;; NOTE: the server is NOT yet marked as running in the log. We do that in the keep-running routine.
	    (if watchdog
		(if (not (member (thread-state watchdog) '(ready running blocked sleeping dead)))
		    (begin
		      (debug:print-info 0 "Starting watchdog thread (in state "(thread-state watchdog)")")
		      (thread-start! watchdog)))
		(debug:print 0 *default-log-port* "ERROR: *watchdog* not setup, cannot start it."))))