Megatest

Check-in [8adb8a0e7e]
Login
Overview
Comment:Get .servinfo path in a function, create dir if needed. gate server start on high load
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | v1.80 | v1.8003
Files: files | file ages | folders
SHA1: 8adb8a0e7eff704bff37ec1fa4877dc6a1ffb397
User & Date: mrwellan on 2022-12-12 15:25:39
Other Links: branch diff | manifest | tags
Context
2023-01-06
09:23
Synced mtargs with opensrc. Reduced noise in server output check-in: 7f6ea52b90 user: mrwellan tags: v1.80
2022-12-14
17:36
fixed a few crashes in the server log check-in: 5fd84cf5e2 user: mmgraham tags: v1.80
2022-12-12
15:25
Get .servinfo path in a function, create dir if needed. gate server start on high load check-in: 8adb8a0e7e user: mrwellan tags: v1.80, v1.8003
10:41
Changed mt version to 1.8003 check-in: e09f8d91d0 user: mmgraham tags: v1.80
Changes

Modified http-transport.scm from [76afa47739] to [d351abf6cf].

427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
			  (debug:print-info 0 *default-log-port* "Waiting for server alive signature")
                          (mutex-lock! *heartbeat-mutex*)
                          (set! sdat *server-info*)
                          (mutex-unlock! *heartbeat-mutex*)
                          (if (and sdat
				   (not changed)
				   (> (- (current-seconds) start-time) 2))
			      (let* ((servinfodir (conc *toppath*"/.servinfo"))
				     (ipaddr      (car sdat))
				     (port        (cadr sdat))
				     (servinf     (conc servinfodir"/"ipaddr":"port)))
				(set! servinfofile servinf)
				(if (not (file-exists? servinfodir))
				    (create-directory servinfodir #t))
				(with-output-to-file servinf







|







427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
			  (debug:print-info 0 *default-log-port* "Waiting for server alive signature")
                          (mutex-lock! *heartbeat-mutex*)
                          (set! sdat *server-info*)
                          (mutex-unlock! *heartbeat-mutex*)
                          (if (and sdat
				   (not changed)
				   (> (- (current-seconds) start-time) 2))
			      (let* ((servinfodir (server:get-servinfo-dir *toppath*)) ;; (conc *toppath*"/.servinfo"))
				     (ipaddr      (car sdat))
				     (port        (cadr sdat))
				     (servinf     (conc servinfodir"/"ipaddr":"port)))
				(set! servinfofile servinf)
				(if (not (file-exists? servinfodir))
				    (create-directory servinfodir #t))
				(with-output-to-file servinf
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
				sdat)
                              (begin
				(debug:print-info 0 *default-log-port* "Still waiting, last-sdat=" last-sdat)
                                (sleep 4)
				(if (> (- (current-seconds) start-time) 120) ;; been waiting for two minutes
				    (let* ((ipaddr  (car sdat))
					   (port    (cadr sdat))
					   (servinf (conc *toppath*"/.servinfo/"ipaddr":"port)))
				      (debug:print-error 0 *default-log-port* "transport appears to have died, exiting server")
				      ;; (delete-file* servinf) ;; handled by on-exit, can be removed
                                      #;(common:save-pkt `((action . died)
                                                         (T      . server)
                                                         (pid    . ,(current-process-id))
                                                         (ipaddr . ,(car sdat))
                                                         (port   . ,(cadr sdat))







|







459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
				sdat)
                              (begin
				(debug:print-info 0 *default-log-port* "Still waiting, last-sdat=" last-sdat)
                                (sleep 4)
				(if (> (- (current-seconds) start-time) 120) ;; been waiting for two minutes
				    (let* ((ipaddr  (car sdat))
					   (port    (cadr sdat))
					   (servinf (conc (server:get-servinfo-dir *toppath*)"/"ipaddr":"port)))
				      (debug:print-error 0 *default-log-port* "transport appears to have died, exiting server")
				      ;; (delete-file* servinf) ;; handled by on-exit, can be removed
                                      #;(common:save-pkt `((action . died)
                                                         (T      . server)
                                                         (pid    . ,(current-process-id))
                                                         (ipaddr . ,(car sdat))
                                                         (port   . ,(cadr sdat))

Modified server.scm from [ac77c0b1d1] to [80dd7784f0].

402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
		 (server:wait-for-server-start-last-flag areapath)))))))

;; oldest server alive determines host then choose random of youngest
;; five servers on that host
;;
(define (server:get-servers-info areapath)
  (assert *toppath* "FATAL: server:get-servers-info called before *toppath* has been set.")
  (let* ((servinfodir (conc *toppath*"/.servinfo")))
    (if (not (file-exists? servinfodir))
	(create-directory servinfodir))
    (let* ((allfiles    (glob (conc servinfodir"/*")))
	   (res         (make-hash-table)))
      (for-each
       (lambda (f)
	 (let* ((hostport  (pathname-strip-directory f))







|







402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
		 (server:wait-for-server-start-last-flag areapath)))))))

;; oldest server alive determines host then choose random of youngest
;; five servers on that host
;;
(define (server:get-servers-info areapath)
  (assert *toppath* "FATAL: server:get-servers-info called before *toppath* has been set.")
  (let* ((servinfodir (server:get-servinfo-dir areapath))) ;; (conc *toppath*"/.servinfo")))
    (if (not (file-exists? servinfodir))
	(create-directory servinfodir))
    (let* ((allfiles    (glob (conc servinfodir"/*")))
	   (res         (make-hash-table)))
      (for-each
       (lambda (f)
	 (let* ((hostport  (pathname-strip-directory f))
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
;; or another host?
;;
;; returns #t => ok to start another server
;;         #f => not ok to start another server
;;
(define (server:minimal-check areapath)
  (server:clean-up-old areapath)
  (let* ((srvdir      (conc areapath"/.servinfo"))
	 (servrs      (glob (conc srvdir"/*")))
	 (thishostip  (server:get-best-guess-address (get-host-name)))
	 (thisservrs  (glob (conc srvdir"/"thishostip":*")))
	 (homehostinf (server:choose-server areapath 'homehost))
	 (havehome    (car homehostinf))
	 (wearehome   (cdr homehostinf)))
    (debug:print-info 0 *default-log-port* thishostip", have homehost: "havehome", we are homehost: "wearehome







|







429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
;; or another host?
;;
;; returns #t => ok to start another server
;;         #f => not ok to start another server
;;
(define (server:minimal-check areapath)
  (server:clean-up-old areapath)
  (let* ((srvdir      (server:get-servinfo-dir areapath)) ;; (conc areapath"/.servinfo"))
	 (servrs      (glob (conc srvdir"/*")))
	 (thishostip  (server:get-best-guess-address (get-host-name)))
	 (thisservrs  (glob (conc srvdir"/"thishostip":*")))
	 (homehostinf (server:choose-server areapath 'homehost))
	 (havehome    (car homehostinf))
	 (wearehome   (cdr homehostinf)))
    (debug:print-info 0 *default-log-port* thishostip", have homehost: "havehome", we are homehost: "wearehome
516
517
518
519
520
521
522






523
524
525
526
527
528
529
530
531
532
533
	     #f)))
	(begin
	  (server:run areapath)
	  (thread-sleep! 3)
	  (case mode
	    ((homehost) (cons #f #f))
	    (else	#f))))))







(define (server:clean-up-old areapath)
  ;; any server file that has not been touched in ten minutes is effectively dead
  (let* ((sfiles (glob (conc areapath"/.servinfo/*"))))
    (for-each
     (lambda (sfile)
       (let* ((modtime (handle-exceptions
			   exn
			 (begin
			   (debug:print 0 *default-log-port* "WARNING: failed to get modification file for "sfile)
			   (current-seconds))







>
>
>
>
>
>



|







516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
	     #f)))
	(begin
	  (server:run areapath)
	  (thread-sleep! 3)
	  (case mode
	    ((homehost) (cons #f #f))
	    (else	#f))))))

(define (server:get-servinfo-dir areapath)
  (let* ((spath (conc areapath"/.servinfo")))
    (if (not (file-exists? spath))
	(create-directory spath #t))
    spath))

(define (server:clean-up-old areapath)
  ;; any server file that has not been touched in ten minutes is effectively dead
  (let* ((sfiles (glob (conc (server:get-servinfo-dir areapath)"/*"))))
    (for-each
     (lambda (sfile)
       (let* ((modtime (handle-exceptions
			   exn
			 (begin
			   (debug:print 0 *default-log-port* "WARNING: failed to get modification file for "sfile)
			   (current-seconds))
551
552
553
554
555
556
557






558
559
560
561
562
563
564
;; kind start up of server, wait before allowing another server for a given
;; area to be launched
;;
(define (server:kind-run areapath)
  ;; look for $MT_RUN_AREA_HOME/logs/server-start-last
  ;; and wait for it to be at least <server idletime> seconds old
  ;; (server:wait-for-server-start-last-flag areapath)






  (if (< (server:choose-server areapath 'count) 20)
      (server:run areapath))
  #;(if (not (server:check-if-running areapath)) ;; why try if there is already a server running?
      (let* ((lock-file    (conc areapath "/logs/server-start.lock")))
	(let* ((start-flag (conc areapath "/logs/server-start-last")))
	  (common:simple-file-lock-and-wait lock-file expire-time: 25)
	  (debug:print-info  2 *default-log-port* "server:kind-run: touching " start-flag)







>
>
>
>
>
>







557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
;; kind start up of server, wait before allowing another server for a given
;; area to be launched
;;
(define (server:kind-run areapath)
  ;; look for $MT_RUN_AREA_HOME/logs/server-start-last
  ;; and wait for it to be at least <server idletime> seconds old
  ;; (server:wait-for-server-start-last-flag areapath)
  (let loop ()
    (if (> (alist-ref 'adj-proc-load (common:get-normalized-cpu-load #f)) 2)
	(begin
	  (if (common:low-noise-print 30 "our-host-load")
	      (debug:print 0 *default-log-port* "WARNING: system load is high, waiting to start server."))
	  (loop))))
  (if (< (server:choose-server areapath 'count) 20)
      (server:run areapath))
  #;(if (not (server:check-if-running areapath)) ;; why try if there is already a server running?
      (let* ((lock-file    (conc areapath "/logs/server-start.lock")))
	(let* ((start-flag (conc areapath "/logs/server-start-last")))
	  (common:simple-file-lock-and-wait lock-file expire-time: 25)
	  (debug:print-info  2 *default-log-port* "server:kind-run: touching " start-flag)