Megatest

Check-in [cc546c7dfe]
Login
Overview
Comment:server:choose-server now working.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | v1.70-nohomehost
Files: files | file ages | folders
SHA1: cc546c7dfe884140bfc839a8066cea7de21e5575
User & Date: matt on 2022-11-12 18:34:58
Other Links: branch diff | manifest | tags
Context
2022-11-12
19:25
Most routines needed for no-homehost updated. check-in: ed25403d77 user: matt tags: v1.70-nohomehost
18:34
server:choose-server now working. check-in: cc546c7dfe user: matt tags: v1.70-nohomehost
07:03
Steps towards eliminating homehost check-in: a0384d728b user: matt tags: v1.70-nohomehost
Changes

Modified megatest.scm from [7c70251ef1] to [b4770c25e0].

654
655
656
657
658
659
660
661

662
663
664
665
666
667
668
654
655
656
657
658
659
660

661
662
663
664
665
666
667
668







-
+







;; for some switches always print the command to stderr
;;
(if (args:any? "-run" "-runall" "-remove-runs" "-set-state-status" "-kill-runs" "-kill-rerun")
    (debug:print 0 *default-log-port* (string-intersperse (argv) " ")))

;; some switches imply homehost. Exit here if not on homehost
;;
(let ((homehost-required  (list "-cleanup-db" "-server")))
(let ((homehost-required  (list "-cleanup-db")))
  (if (apply args:any? homehost-required)
      (if (not (common:on-homehost?))
	  (for-each
	   (lambda (switch)
	     (if (args:get-arg switch)
		 (begin
		   (debug:print 0 *default-log-port* "ERROR: you must be on the homehost to run with " switch

Modified server.scm from [08cd1512fb] to [b8fe843658].

443
444
445
446
447
448
449
450

451
452

453
454
455
456
457
458
459
460
461
462


463
464
465
466


467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487

488
489
490
491
492

493
494
495
496
497
498
499



500
501
502
503
504

505
506
507
508

509
510
511
512
513
514
515
516
517
518
519
520







521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539



540
541
542
543
544
545
546
443
444
445
446
447
448
449

450
451

452
453
454
455
456
457
458
459
460
461

462
463
464
465


466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486


487





488







489
490
491





492




493












494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517


518
519
520
521
522
523
524
525
526
527







-
+

-
+









-
+
+


-
-
+
+



















-
-
+
-
-
-
-
-
+
-
-
-
-
-
-
-
+
+
+
-
-
-
-
-
+
-
-
-
-
+
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+

















-
-
+
+
+







	 (by-time-asc (sort (hash-table-keys serversdat) ;; list of "host:port"
			    (lambda (a b)
			      (>= (list-ref (hash-table-ref serversdat a) 2)
				  (list-ref (hash-table-ref serversdat b) 2))))))
    (if (not (null? by-time-asc))
	(let* ((oldest     (last by-time-asc))
	       (oldest-dat (hash-table-ref serversdat oldest))
	       (host       (list-ref oldest-dat 1))
	       (host       (list-ref oldest-dat 0))
	       (all-valid  (filter (lambda (x)
				     (equal? host (list-ref (hash-table-ref serversdat x) 1)))
				     (equal? host (list-ref (hash-table-ref serversdat x) 0)))
				   by-time-asc)))
	  (case mode
	    ((info)
	     (print "oldest: "oldest-dat", selected host: "host", all-valid: "all-valid)
	     (print "youngest: "(hash-table-ref serversdat (car all-valid))))
	    ((home) host)
	    ((best)(if (> (length all-valid) 5)
		       (map (lambda (x)
			      (hash-table-ref serversdat x))
			    (take all-valid 5))))
			    (take all-valid 5))
		       all-valid))
	    (else
	     (debug:print 0 *default-log-port* "ERROR: invalid command "mode)
	     #f))
	  #f))))
	     #f)))
	#f)))

(define (server:get-homehost #!key (trynum 5))
  ;; called often especially at start up. use mutex to eliminate collisions
  (mutex-lock! *homehost-mutex*)
  (cond
   (*home-host*
    (mutex-unlock! *homehost-mutex*)
    *home-host*)
   ((not *toppath*)
    (mutex-unlock! *homehost-mutex*)
    (launch:setup) ;; safely mutexed now
    (if (> trynum 0)
	(begin
	  (thread-sleep! 2)
	  (server:get-homehost trynum: (- trynum 1)))
	#f))
   (else
    (let* ((currhost (get-host-name))
	   (bestadrs (server:get-best-guess-address currhost))
	   ;; first look in config, then look in file .homehost, create it if not found
	   (homehost (or (configf:lookup *configdat* "server" "homehost" )
	   (homehost (server:choose-server *toppath* 'home))
			 (handle-exceptions
			     exn
			     (if (> trynum 0)
				 (let ((delay-time (* (- 5 trynum) 5)))
				   (mutex-unlock! *homehost-mutex*)
	   (at-home  (or (equal? homehost currhost)
				   (debug:print 0 *default-log-port* "ERROR: ["(common:human-time)"] Failed to read .homehost file, delaying "
						delay-time " seconds and trying again, message: "  ((condition-property-accessor 'exn 'message) exn)
						", exn=" exn)
				   (thread-sleep! delay-time)
				   (server:get-homehost trynum: (- trynum 1)))
				 (begin
				   (mutex-unlock! *homehost-mutex*)
			 (equal? homehost bestadrs))))

      ;; if no homehost start server, wait a bit and check again
				   (debug:print 0 *default-log-port* "ERROR: ["(common:human-time)
						"] Failed to read .homehost file after trying five times. Giving up and exiting, message: "
						((condition-property-accessor 'exn 'message) exn))
				   (exit 1)))
			   (let ((hhf (conc *toppath* "/.homehost")))
      (if homehost
			     (if (common:file-exists? hhf)
				 (with-input-from-file hhf read-line)
				 (if (file-write-access? *toppath*)
				     (begin
	  (begin
				       (with-output-to-file hhf
					 (lambda ()
					   (print bestadrs)))
				       (begin
					 (mutex-unlock! *homehost-mutex*)
					 (car (server:get-homehost))))
				     #f))))))
	   (at-home  (or (equal? homehost currhost)
			 (equal? homehost bestadrs))))
      (set! *home-host* (cons homehost at-home))
      (mutex-unlock! *homehost-mutex*)
      *home-host*))))
	    (set! *home-host* (cons homehost at-home))
	    (mutex-unlock! *homehost-mutex*)
	    *home-host*)
	  (begin
	    (server:kind-run *toppath*)
	    (thread-sleep! 5)
	    (server:get-homehost trynum: (- trynum 1))))))))

;;======================================================================
;; am I on the homehost?
;;
(define (common:on-homehost?)
  (let ((hh (server:get-homehost)))
    (if hh
	(cdr hh)
	#f)))

	  
;; kind start up of server, wait before allowing another server for a given
;; area to be launched
;;
(define (server:kind-run areapath)
  ;; look for $MT_RUN_AREA_HOME/logs/server-start-last
  ;; and wait for it to be at least <server idletime> seconds old
  (server:wait-for-server-start-last-flag areapath)
  (if (not (server:check-if-running areapath)) ;; why try if there is already a server running?
  ;; (server:wait-for-server-start-last-flag areapath)
  (server:run areapath)
  #;(if (not (server:check-if-running areapath)) ;; why try if there is already a server running?
      (let* ((lock-file    (conc areapath "/logs/server-start.lock")))
	(let* ((start-flag (conc areapath "/logs/server-start-last")))
	  (common:simple-file-lock-and-wait lock-file expire-time: 25)
	  (debug:print-info  2 *default-log-port* "server:kind-run: touching " start-flag)
	  (system (conc "touch " start-flag)) ;; lazy but safe
	  (server:run areapath)
	  (thread-sleep! 20) ;; don't release the lock for at least a few seconds. And allow time for the server startup to get to "SERVER STARTED".