Megatest

Check-in [889e2e71ef]
Login
Overview
Comment:Turn off waiting in kind-run, rely on gating from wait-for-server-start-last-flag
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | v1.65
Files: files | file ages | folders
SHA1: 889e2e71ef5e685aa8ce4250ae038d578f4edc57
User & Date: matt on 2021-03-12 14:25:17
Other Links: branch diff | manifest | tags
Context
2021-03-12
18:53
Backed out cleanup of config: functions check-in: ae1ac4c3b4 user: mmgraham tags: v1.65
14:25
Turn off waiting in kind-run, rely on gating from wait-for-server-start-last-flag check-in: 889e2e71ef user: matt tags: v1.65
2021-03-11
17:52
updated delay to kind-run check-in: dd2cea12eb user: pjhatwal tags: v1.65, THIS-IS-REAL-1.65
Changes

Modified server.scm from [33c77942b5] to [19fd15cc14].

200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
			      (string->number (cadr dat)) ;; port
			      (string->number (caddr dat))
                              (cadr (cddr dat))))))
                (begin 
                   (if dbprep-found
                      (begin
                         (debug:print-info 0 *default-log-port* "Server is in dbprep at " (current-seconds))
                         (thread-sleep! 25)
                      )
                      (debug:print-info 0 *default-log-port* "Unable to get server info from " logf " at " (current-seconds))
                   )
		    (list #f #f #f #f)))))))))

;; get a list of servers with all relevant data
;; ( mod-time host port start-time pid )







|







200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
			      (string->number (cadr dat)) ;; port
			      (string->number (caddr dat))
                              (cadr (cddr dat))))))
                (begin 
                   (if dbprep-found
                      (begin
                         (debug:print-info 0 *default-log-port* "Server is in dbprep at " (current-seconds))
                         (thread-sleep! 0.5) ;; was 25 sec but that blocked things from starting?
                      )
                      (debug:print-info 0 *default-log-port* "Unable to get server info from " logf " at " (current-seconds))
                   )
		    (list #f #f #f #f)))))))))

;; get a list of servers with all relevant data
;; ( mod-time host port start-time pid )
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426

427
428
429
430
431
432
433
434
  ;; look for $MT_RUN_AREA_HOME/logs/server-start-last
  ;; and wait for it to be at least 3 seconds old
  (server:wait-for-server-start-last-flag areapath)
  (if (not (server:check-if-running areapath)) ;; why try if there is already a server running?
      (let* ((last-run-dat (hash-table-ref/default *server-kind-run* areapath '(0 0))) ;; callnum, whenrun
	     (call-num     (car last-run-dat))
	     (when-run     (cadr last-run-dat))
	     (run-delay    (+ (case call-num
				((0)    0)
				((1)   20)
				((2)  300)
				(else 600))
			      ;(random 5)
                              0))   ;; add a small random number just in case a lot of jobs hit the work hosts simultaneously

	     (lock-file    (conc areapath "/logs/server-start.lock")))
	(if	(> (- (current-seconds) when-run) run-delay)
		(let* ((start-flag (conc areapath "/logs/server-start-last")))
		  (common:simple-file-lock-and-wait lock-file expire-time: 15)
                  (debug:print-info  0 *default-log-port* "server:kind-run: touching " start-flag)
		  (system (conc "touch " start-flag)) ;; lazy but safe
		  (server:run areapath)
		  (thread-sleep! 2) ;; don't release the lock for at least a few seconds
		  (common:simple-file-release-lock lock-file)))

	(hash-table-set! *server-kind-run* areapath (list (+ call-num 1)(current-seconds))))))

;; this one seems to be the general entry point
;;
(define (server:start-and-wait areapath #!key (timeout 60))
  (let ((give-up-time (+ (current-seconds) timeout)))
    (let loop ((server-info (server:check-if-running areapath))
	       (try-num    0))







|




|



|
|
|
|
|
|
|
|
>
|







403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
  ;; look for $MT_RUN_AREA_HOME/logs/server-start-last
  ;; and wait for it to be at least 3 seconds old
  (server:wait-for-server-start-last-flag areapath)
  (if (not (server:check-if-running areapath)) ;; why try if there is already a server running?
      (let* ((last-run-dat (hash-table-ref/default *server-kind-run* areapath '(0 0))) ;; callnum, whenrun
	     (call-num     (car last-run-dat))
	     (when-run     (cadr last-run-dat))
	     (run-delay    (+ (case call-num ;; NOT USED. Waiting is handled by wait-for-server
				((0)    0)
				((1)   20)
				((2)  300)
				(else 600))
			      (random 5)
                              0))   ;; add a small random number just in case a lot of jobs hit the work hosts simultaneously

	     (lock-file    (conc areapath "/logs/server-start.lock")))
	;; (if	(> (- (current-seconds) when-run) run-delay)
	(let* ((start-flag (conc areapath "/logs/server-start-last")))
	  (common:simple-file-lock-and-wait lock-file expire-time: 15)
	  (debug:print-info  0 *default-log-port* "server:kind-run: touching " start-flag)
	  (system (conc "touch " start-flag)) ;; lazy but safe
	  (server:run areapath)
	  (thread-sleep! 2) ;; don't release the lock for at least a few seconds
	  (common:simple-file-release-lock lock-file)))
      (debug:print-info 0 *default-log-port* "Found server already running. NOT trying to start another.")))
	;; (hash-table-set! *server-kind-run* areapath (list (+ call-num 1)(current-seconds))))))

;; this one seems to be the general entry point
;;
(define (server:start-and-wait areapath #!key (timeout 60))
  (let ((give-up-time (+ (current-seconds) timeout)))
    (let loop ((server-info (server:check-if-running areapath))
	       (try-num    0))