Megatest

Check-in [cc546c7dfe]
Login
Overview
Comment:server:choose-server now working.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | v1.70-nohomehost
Files: files | file ages | folders
SHA1: cc546c7dfe884140bfc839a8066cea7de21e5575
User & Date: matt on 2022-11-12 18:34:58
Other Links: branch diff | manifest | tags
Context
2022-11-12
19:25
Most routines needed for no-homehost updated. check-in: ed25403d77 user: matt tags: v1.70-nohomehost
18:34
server:choose-server now working. check-in: cc546c7dfe user: matt tags: v1.70-nohomehost
07:03
Steps towards eliminating homehost check-in: a0384d728b user: matt tags: v1.70-nohomehost
Changes

Modified megatest.scm from [7c70251ef1] to [b4770c25e0].

654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
;; for some switches always print the command to stderr
;;
(if (args:any? "-run" "-runall" "-remove-runs" "-set-state-status" "-kill-runs" "-kill-rerun")
    (debug:print 0 *default-log-port* (string-intersperse (argv) " ")))

;; some switches imply homehost. Exit here if not on homehost
;;
(let ((homehost-required  (list "-cleanup-db" "-server")))
  (if (apply args:any? homehost-required)
      (if (not (common:on-homehost?))
	  (for-each
	   (lambda (switch)
	     (if (args:get-arg switch)
		 (begin
		   (debug:print 0 *default-log-port* "ERROR: you must be on the homehost to run with " switch







|







654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
;; for some switches always print the command to stderr
;;
(if (args:any? "-run" "-runall" "-remove-runs" "-set-state-status" "-kill-runs" "-kill-rerun")
    (debug:print 0 *default-log-port* (string-intersperse (argv) " ")))

;; some switches imply homehost. Exit here if not on homehost
;;
(let ((homehost-required  (list "-cleanup-db")))
  (if (apply args:any? homehost-required)
      (if (not (common:on-homehost?))
	  (for-each
	   (lambda (switch)
	     (if (args:get-arg switch)
		 (begin
		   (debug:print 0 *default-log-port* "ERROR: you must be on the homehost to run with " switch

Modified server.scm from [08cd1512fb] to [b8fe843658].

443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462

463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520




521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538

539
540
541
542
543
544
545
546
	 (by-time-asc (sort (hash-table-keys serversdat) ;; list of "host:port"
			    (lambda (a b)
			      (>= (list-ref (hash-table-ref serversdat a) 2)
				  (list-ref (hash-table-ref serversdat b) 2))))))
    (if (not (null? by-time-asc))
	(let* ((oldest     (last by-time-asc))
	       (oldest-dat (hash-table-ref serversdat oldest))
	       (host       (list-ref oldest-dat 1))
	       (all-valid  (filter (lambda (x)
				     (equal? host (list-ref (hash-table-ref serversdat x) 1)))
				   by-time-asc)))
	  (case mode
	    ((info)
	     (print "oldest: "oldest-dat", selected host: "host", all-valid: "all-valid)
	     (print "youngest: "(hash-table-ref serversdat (car all-valid))))
	    ((home) host)
	    ((best)(if (> (length all-valid) 5)
		       (map (lambda (x)
			      (hash-table-ref serversdat x))
			    (take all-valid 5))))

	    (else
	     (debug:print 0 *default-log-port* "ERROR: invalid command "mode)
	     #f))
	  #f))))

(define (server:get-homehost #!key (trynum 5))
  ;; called often especially at start up. use mutex to eliminate collisions
  (mutex-lock! *homehost-mutex*)
  (cond
   (*home-host*
    (mutex-unlock! *homehost-mutex*)
    *home-host*)
   ((not *toppath*)
    (mutex-unlock! *homehost-mutex*)
    (launch:setup) ;; safely mutexed now
    (if (> trynum 0)
	(begin
	  (thread-sleep! 2)
	  (server:get-homehost trynum: (- trynum 1)))
	#f))
   (else
    (let* ((currhost (get-host-name))
	   (bestadrs (server:get-best-guess-address currhost))
	   ;; first look in config, then look in file .homehost, create it if not found
	   (homehost (or (configf:lookup *configdat* "server" "homehost" )
			 (handle-exceptions
			     exn
			     (if (> trynum 0)
				 (let ((delay-time (* (- 5 trynum) 5)))
				   (mutex-unlock! *homehost-mutex*)
				   (debug:print 0 *default-log-port* "ERROR: ["(common:human-time)"] Failed to read .homehost file, delaying "
						delay-time " seconds and trying again, message: "  ((condition-property-accessor 'exn 'message) exn)
						", exn=" exn)
				   (thread-sleep! delay-time)
				   (server:get-homehost trynum: (- trynum 1)))
				 (begin
				   (mutex-unlock! *homehost-mutex*)
				   (debug:print 0 *default-log-port* "ERROR: ["(common:human-time)
						"] Failed to read .homehost file after trying five times. Giving up and exiting, message: "
						((condition-property-accessor 'exn 'message) exn))
				   (exit 1)))
			   (let ((hhf (conc *toppath* "/.homehost")))
			     (if (common:file-exists? hhf)
				 (with-input-from-file hhf read-line)
				 (if (file-write-access? *toppath*)
				     (begin
				       (with-output-to-file hhf
					 (lambda ()
					   (print bestadrs)))
				       (begin
					 (mutex-unlock! *homehost-mutex*)
					 (car (server:get-homehost))))
				     #f))))))
	   (at-home  (or (equal? homehost currhost)
			 (equal? homehost bestadrs))))
      (set! *home-host* (cons homehost at-home))
      (mutex-unlock! *homehost-mutex*)
      *home-host*))))





;;======================================================================
;; am I on the homehost?
;;
(define (common:on-homehost?)
  (let ((hh (server:get-homehost)))
    (if hh
	(cdr hh)
	#f)))

	  
;; kind start up of server, wait before allowing another server for a given
;; area to be launched
;;
(define (server:kind-run areapath)
  ;; look for $MT_RUN_AREA_HOME/logs/server-start-last
  ;; and wait for it to be at least <server idletime> seconds old
  (server:wait-for-server-start-last-flag areapath)

  (if (not (server:check-if-running areapath)) ;; why try if there is already a server running?
      (let* ((lock-file    (conc areapath "/logs/server-start.lock")))
	(let* ((start-flag (conc areapath "/logs/server-start-last")))
	  (common:simple-file-lock-and-wait lock-file expire-time: 25)
	  (debug:print-info  2 *default-log-port* "server:kind-run: touching " start-flag)
	  (system (conc "touch " start-flag)) ;; lazy but safe
	  (server:run areapath)
	  (thread-sleep! 20) ;; don't release the lock for at least a few seconds. And allow time for the server startup to get to "SERVER STARTED".







|

|









|
>


|
|



















<
|
<
<
<
<
|
<
<
<
<
|
|
|
<
<
<
<
|
<
<
<
|
<
<
<
<
<
<
<
<
<
|
|
|
>
>
>
>

















|
>
|







443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486

487




488




489
490
491




492



493









494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
	 (by-time-asc (sort (hash-table-keys serversdat) ;; list of "host:port"
			    (lambda (a b)
			      (>= (list-ref (hash-table-ref serversdat a) 2)
				  (list-ref (hash-table-ref serversdat b) 2))))))
    (if (not (null? by-time-asc))
	(let* ((oldest     (last by-time-asc))
	       (oldest-dat (hash-table-ref serversdat oldest))
	       (host       (list-ref oldest-dat 0))
	       (all-valid  (filter (lambda (x)
				     (equal? host (list-ref (hash-table-ref serversdat x) 0)))
				   by-time-asc)))
	  (case mode
	    ((info)
	     (print "oldest: "oldest-dat", selected host: "host", all-valid: "all-valid)
	     (print "youngest: "(hash-table-ref serversdat (car all-valid))))
	    ((home) host)
	    ((best)(if (> (length all-valid) 5)
		       (map (lambda (x)
			      (hash-table-ref serversdat x))
			    (take all-valid 5))
		       all-valid))
	    (else
	     (debug:print 0 *default-log-port* "ERROR: invalid command "mode)
	     #f)))
	#f)))

(define (server:get-homehost #!key (trynum 5))
  ;; called often especially at start up. use mutex to eliminate collisions
  (mutex-lock! *homehost-mutex*)
  (cond
   (*home-host*
    (mutex-unlock! *homehost-mutex*)
    *home-host*)
   ((not *toppath*)
    (mutex-unlock! *homehost-mutex*)
    (launch:setup) ;; safely mutexed now
    (if (> trynum 0)
	(begin
	  (thread-sleep! 2)
	  (server:get-homehost trynum: (- trynum 1)))
	#f))
   (else
    (let* ((currhost (get-host-name))
	   (bestadrs (server:get-best-guess-address currhost))

	   (homehost (server:choose-server *toppath* 'home))




	   (at-home  (or (equal? homehost currhost)




			 (equal? homehost bestadrs))))

      ;; if no homehost start server, wait a bit and check again




      (if homehost



	  (begin









	    (set! *home-host* (cons homehost at-home))
	    (mutex-unlock! *homehost-mutex*)
	    *home-host*)
	  (begin
	    (server:kind-run *toppath*)
	    (thread-sleep! 5)
	    (server:get-homehost trynum: (- trynum 1))))))))

;;======================================================================
;; am I on the homehost?
;;
(define (common:on-homehost?)
  (let ((hh (server:get-homehost)))
    (if hh
	(cdr hh)
	#f)))

	  
;; kind start up of server, wait before allowing another server for a given
;; area to be launched
;;
(define (server:kind-run areapath)
  ;; look for $MT_RUN_AREA_HOME/logs/server-start-last
  ;; and wait for it to be at least <server idletime> seconds old
  ;; (server:wait-for-server-start-last-flag areapath)
  (server:run areapath)
  #;(if (not (server:check-if-running areapath)) ;; why try if there is already a server running?
      (let* ((lock-file    (conc areapath "/logs/server-start.lock")))
	(let* ((start-flag (conc areapath "/logs/server-start-last")))
	  (common:simple-file-lock-and-wait lock-file expire-time: 25)
	  (debug:print-info  2 *default-log-port* "server:kind-run: touching " start-flag)
	  (system (conc "touch " start-flag)) ;; lazy but safe
	  (server:run areapath)
	  (thread-sleep! 20) ;; don't release the lock for at least a few seconds. And allow time for the server startup to get to "SERVER STARTED".