Megatest

Diff
Login

Differences From Artifact [2182e473e2]:

To Artifact [8018e361e3]:


206
207
208
209
210
211
212
213

214
215
216
217

218
219
220
221
222
223
224
206
207
208
209
210
211
212

213
214
215
216

217
218
219
220
221
222
223
224







-
+



-
+







		   (if (and (number? delay-wait)
			    (> delay-wait 0))
		       (begin
			 (debug:print 0 *default-log-port* "Server is loaded, delaying "delay-wait" seconds")
			 (thread-sleep! delay-wait)))))
	     (case status
	       ((busy) ;; result will be how long the server wants you to delay
		(debug:print 0 *default-log-port* "WARNING: server is overloaded, will try again in "result" seconds.")
		(debug:print 0 *default-log-port* "WARNING: server for "dbfname" is overloaded, will try again in "result" seconds.")
		(thread-sleep! (if (number? result) result 2))
		(tt:handler  ttdat cmd run-id params (+ attemptnum 1) area-dat areapath readonly-mode dbfname testsuite mtexe))
	       ((loaded)
		(debug:print 0 *default-log-port* "WARNING: server is loaded, will try again in a 1/4 second.")
		(debug:print 0 *default-log-port* "WARNING: server for "dbfname" is loaded, will try again in a 1/4 second.")
		(thread-sleep! 0.25)
		(tt:handler  ttdat cmd run-id params (+ attemptnum 1) area-dat areapath readonly-mode dbfname testsuite mtexe))
	       (else
		result)))
	    (else
	     (if (not res)
		 (let* ((host    (tt-conn-host conn))
265
266
267
268
269
270
271
272







273
274
275
276
277
278
279
265
266
267
268
269
270
271

272
273
274
275
276
277
278
279
280
281
282
283
284
285







-
+
+
+
+
+
+
+







;; returns list of (host port startseconds server-id servinfofile)
;;
(define (tt:get-server-info-sorted ttdat dbfname)
  (let* ((areapath (tt-areapath ttdat))
	 (sfiles   (tt:find-server areapath dbfname))
	 (sdats    (filter car (map tt:server-get-info sfiles))) ;; first element is #f if the file disappeared while being read
	 (sorted   (sort sdats (lambda (a b)
				 (< (list-ref a 2)(list-ref b 2))))))
				 (< (list-ref a 2)(list-ref b 2)))))
	 (count    0))
    (for-each
     (lambda (rec)
       (debug:print 0 *default-log-port* "SERVER #"count": "(string-intersperse (map conc sorted) ", "))
       (set! count (+ count 1)))
     sorted)
    sorted))
    
(define (tt:get-current-server-info ttdat dbfname)
  (assert (tt-areapath ttdat) "FATAL: areapath not set in ttdat.")
  ;;
  ;; TODO - replace most of below with tt;get-server-info-sorted
  ;;
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407





















408
409
410
411
412
413
414
415
416







417
418

419
420
421
422
423
424



425
426
427
428
429
430
431
432
433
434
435
436
437

438

439

440
441
442
443
444
445
446
391
392
393
394
395
396
397
















398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423




424
425
426
427
428
429
430
431

432
433
434
435
436


437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453

454
455
456
457
458
459
460
461
462
463







-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+





-
-
-
-
+
+
+
+
+
+
+

-
+




-
-
+
+
+













+
-
+

+







    (thread-sleep! 0.05) ;; any real need for delay here?
    (let loop ()
      (let* ((servers (tt:get-server-info-sorted ttdat dbfname))
	     (ok      (cond
		       ((null? servers) #f) ;; not ok
		       ((equal? (list-ref (car servers) 6) ;; compare the servinfofile
				(tt-servinf-file ttdat))
			(debug:print-info 0 *default-log-port* "Keep running, I'm the top server on "(tt-host ttdat)":"(tt-port ttdat))
			(if db-locked-in
			    #t
			    (let* ((lockinfo  (dbfile:with-no-sync-db nosyncdbpath
								      (lambda (db)
									(db:no-sync-get-lock db dbfname))))
				   (success   (car lockinfo)))
			      (if success
				  (begin
				    (tt-state-set! ttdat 'running)
				    (debug:print 0 *default-log-port* "Got server lock for "dbfname)
				    (set! db-locked-in #t)
				    #t)
				  (begin
				    (debug:print 0 *default-log-port* "Failed to get server lock for "dbfname)
				    #f)))))
			(let* ((res (if db-locked-in
					#t
					(let* ((lockinfo  (dbfile:with-no-sync-db
							   nosyncdbpath
							   (lambda (db)
							     (db:no-sync-get-lock db dbfname))))
					       (success   (car lockinfo)))
					  (if success
					      (begin
						(tt-state-set! ttdat 'running)
						(debug:print 0 *default-log-port* "Got server lock for "
							     dbfname)
						(set! db-locked-in #t)
						#t)
					      (begin
						(debug:print 0 *default-log-port* "Failed to get server lock for "dbfname)
						#f))))))
			  (if res
			      (debug:print-info 0 *default-log-port* "Keep running, I'm the top server for "
						dbfname" on "(tt-host ttdat)":"(tt-port ttdat)))
			  res))
		       (else
			(debug:print-info 0 *default-log-port* "I'm not the lead server: "servers)
			(let* ((leadsrv (car servers)))
			  (match leadsrv
			    ((host port startseconds server-id pid dbfname servinfofile)
			     (if (tt:ping host port server-id)
				 #f ;; not the server, but all good, want to exit
				 (if (and (file-exists? servinfofile)
					  (> (- (current-seconds)(file-modification-time servinfofile)) 15))
			     (let* ((res  (tt:ping host port server-id)))
			       (debug:print-info 0 *default-log-port* "Ping to "host":"port", with server-id "server-id
						 ", and file "servinfofile" returned "res)
			       (if res
				   #f ;; not the server, but all good, want to exit
				   (if (and (file-exists? servinfofile)
					  (> (- (current-seconds)(file-modification-time servinfofile)) 30))
				     (begin
				       ;; can't ping and file has been on disk 5 seconds, go ahead and try to remove it
				       ;; can't ping and file has been on disk 15 seconds, go ahead and try to remove it
				       (debug:print-info 0 *default-log-port* "Removing apparently dead server info file: "servinfofile)
				       (delete-file* servinfofile)
				       #t) ;; not the server but the server is not reachable
				     (begin
				       (debug:print 0 *default-log-port* "I'm not the server but will try again since "servinfofile" is fresh")
				       #t))))
				       (debug:print 0 *default-log-port* "I'm not the server but could not ping "host":"port", trying again.")
				       (thread-sleep! 1) ;; just because
				       #t)))))
			    (else ;; should never get here
			     (debug:print 0 *default-log-port* "BAD SERVER RECORD: "leadsrv)
			     (assert #f "Bad server record "leadsrv))))))))
	(if ok
	    ;; (if (> *api-process-request-count* 0) ;; have requests in flight
	    ;;	(tt-last-access-set! ttdat (current-seconds)))
	    (tt-last-access-set! ttdat *db-last-access*) ;; bit silly, just use db-last-access
	    (begin
	      (cleanup)
	      (exit)))

	(let* ((last-update (dbr:dbstruct-last-update dbstruct))
	       (curr-secs   (current-seconds)))
	  (if (and (eq? (tt-state ttdat) 'running)
	  (if (> (- curr-secs last-update) 3) ;; every 3-4 seconds update the db? maybe this should be refresh the inmem?
		   (> (- curr-secs last-update) 3)) ;; every 3-4 seconds update the db? maybe this should be refresh the inmem?
	      (begin
		(set! (file-modification-time (tt-servinf-file ttdat)) (current-seconds))
		((dbr:dbstruct-sync-proc dbstruct) last-update)
		(dbr:dbstruct-last-update-set! dbstruct curr-secs))))
	  
	(if (< (- (current-seconds) (tt-last-access ttdat)) 60)
	    (begin
	      (thread-sleep! 5)
	      (loop)))))