Megatest

Check-in [047f95fcaf]
Login
Overview
Comment:Corrected arg errors in calls to cautious-open-database, made choose-server wait 10 seconds after starting a server before starting another. Corrected busy server logic in keep-running.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | v1.80
Files: files | file ages | folders
SHA1: 047f95fcaf1f9cef5903cf9244c450814a9cc25d
User & Date: mmgraham on 2022-12-16 15:49:02
Other Links: branch diff | manifest | tags
Context
2022-12-30
16:18
add wal and shm file times for sync time comparison. Added checking for existence of servers and db files. check-in: 999f3281a2 user: mmgraham tags: v1.80
2022-12-16
15:49
Corrected arg errors in calls to cautious-open-database, made choose-server wait 10 seconds after starting a server before starting another. Corrected busy server logic in keep-running. check-in: 047f95fcaf user: mmgraham tags: v1.80
2022-12-14
17:36
fixed a few crashes in the server log check-in: 5fd84cf5e2 user: mmgraham tags: v1.80
Changes

Modified db.scm from [3cf6da4c07] to [9144f93c2d].

520
521
522
523
524
525
526

527
528
529
530
531
532
533
			   (debug:print-info 0 *default-log-port* "Sync - I do not see file " destfile)
			   0)))
	      (changed ( < (- time2 time1) 6.0)) ;; dest db not updated within last 6 seconds

      (do-cp (cond
		      ((not (file-exists? destfile)) ;; shouldn't happen, but this might recover
		       (debug:print-info 2 *default-log-port* "File " destfile " not found. Copying "srcfile" to "destfile)

                       (system (conc "/bin/mkdir -p " dest-directory))
                       (system (conc "/bin/cp " srcfile " " destfile))
		       #t)
		      (changed ;; (and changed
		       ;; (> (- (current-seconds) time1) 3)) ;; if file is changed and three seconds have passed.
		       #t)
		      ((and changed *time-to-exit*) ;; last sync







>







520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
			   (debug:print-info 0 *default-log-port* "Sync - I do not see file " destfile)
			   0)))
	      (changed ( < (- time2 time1) 6.0)) ;; dest db not updated within last 6 seconds

      (do-cp (cond
		      ((not (file-exists? destfile)) ;; shouldn't happen, but this might recover
		       (debug:print-info 2 *default-log-port* "File " destfile " not found. Copying "srcfile" to "destfile)
                       ;; TODO: Need to fix this for WAL mod. Can't just copy.
                       (system (conc "/bin/mkdir -p " dest-directory))
                       (system (conc "/bin/cp " srcfile " " destfile))
		       #t)
		      (changed ;; (and changed
		       ;; (> (- (current-seconds) time1) 3)) ;; if file is changed and three seconds have passed.
		       #t)
		      ((and changed *time-to-exit*) ;; last sync

Modified dbfile.scm from [e7b0035e4c] to [fc2ee87f83].

349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
	      (dbfile:print-err "INFO: dbfile:cautious-open-database: journal file "
				busy-file" exists, trying again in few seconds."))
	  (thread-sleep! 1)
	  (if (eq? tries-left 2)
	      (begin
	  	(dbfile:print-err "INFO: forcing journal rollup "busy-file)
	  	(dbfile:brute-force-salvage-db fname)))
	  (dbfile:cautious-open-database fname init-proc sync-mode: sync-mode journal-mode (- tries-left 1)))
	
	(let* ((result (condition-case
		        (if dir-access
			    (dbfile:with-simple-file-lock
			     (conc fname ".lock")
			     (lambda ()
			       (let* ((db-exists (file-exists? fname))







|







349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
	      (dbfile:print-err "INFO: dbfile:cautious-open-database: journal file "
				busy-file" exists, trying again in few seconds."))
	  (thread-sleep! 1)
	  (if (eq? tries-left 2)
	      (begin
	  	(dbfile:print-err "INFO: forcing journal rollup "busy-file)
	  	(dbfile:brute-force-salvage-db fname)))
	  (dbfile:cautious-open-database fname init-proc sync-mode journal-mode (- tries-left 1)))
	
	(let* ((result (condition-case
		        (if dir-access
			    (dbfile:with-simple-file-lock
			     (conc fname ".lock")
			     (lambda ()
			       (let* ((db-exists (file-exists? fname))

Modified http-transport.scm from [11dd7abcab] to [e5186c35f3].

151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
	(config-use-proxy (equal? (configf:lookup *configdat* "client" "use-http_proxy") "yes")))
    (if (not config-use-proxy)
	(determine-proxy (constantly #f)))
    (debug:print-info 0 *default-log-port* "http-transport:try-start-server time=" (seconds->time-string (current-seconds)) " ipaddrsstr=" ipaddrstr " portnum=" portnum " config-hostname=" config-hostname)
    (handle-exceptions
	exn
	(begin
	  (print-error-message exn)
	  (if (< portnum 64000)
	      (begin 
		(debug:print 0 *default-log-port* "WARNING: attempt to start server failed. Trying again ...")
		(debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
		(debug:print 5 *default-log-port* "exn=" (condition->list exn))
		(portlogger:open-run-close portlogger:set-failed portnum)
		(debug:print 0 *default-log-port* "WARNING: failed to start on portnum: " portnum ", trying next port")







|







151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
	(config-use-proxy (equal? (configf:lookup *configdat* "client" "use-http_proxy") "yes")))
    (if (not config-use-proxy)
	(determine-proxy (constantly #f)))
    (debug:print-info 0 *default-log-port* "http-transport:try-start-server time=" (seconds->time-string (current-seconds)) " ipaddrsstr=" ipaddrstr " portnum=" portnum " config-hostname=" config-hostname)
    (handle-exceptions
	exn
	(begin
	  ;; (print-error-message exn)
	  (if (< portnum 64000)
	      (begin 
		(debug:print 0 *default-log-port* "WARNING: attempt to start server failed. Trying again ...")
		(debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
		(debug:print 5 *default-log-port* "exn=" (condition->list exn))
		(portlogger:open-run-close portlogger:set-failed portnum)
		(debug:print 0 *default-log-port* "WARNING: failed to start on portnum: " portnum ", trying next port")
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
		(handle-exceptions
		    exn
		    (debug:print 0 *default-log-port* "ERROR: Failed to change timestamp on info file " servinfofile ". Are you out of space on that disk? exn=" exn)
		    (if (and ;; (< (- (current-seconds) server-start-time) 600) ;; run for ten minutes for experiment, 3600 thereafter
			     (not *server-overloaded*)
			     (file-exists? servinfofile))
			(change-file-times servinfofile curr-time curr-time)))
		(if (or (common:low-noise-print 120 "start new server")
			(> *api-process-request-count* 50)) ;; if this server is kind of busy start up another
		    (begin
		      (debug:print-info 0 *default-log-port* "Server is busy, parallel-api-count "*api-process-request-count*", start another if possible...")
		      (server:kind-run *toppath*)
		      (if (> *api-process-request-count* 100)
			  (begin
			    (debug:print-info 0 *default-log-port* "Server is overloaded at parallel-api-count="*api-process-request-count*", removing "servinfofile) 
			    (delete-file* servinfofile)))))))
          (loop 0 server-state bad-sync-count (current-milliseconds)))
         (else
          (debug:print-info 0 *default-log-port* "Server timed out. seconds since last db access: " (- (current-seconds) last-access))
          (http-transport:server-shutdown port)))))))

(define (http-transport:server-shutdown port)







|


|



|







563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
		(handle-exceptions
		    exn
		    (debug:print 0 *default-log-port* "ERROR: Failed to change timestamp on info file " servinfofile ". Are you out of space on that disk? exn=" exn)
		    (if (and ;; (< (- (current-seconds) server-start-time) 600) ;; run for ten minutes for experiment, 3600 thereafter
			     (not *server-overloaded*)
			     (file-exists? servinfofile))
			(change-file-times servinfofile curr-time curr-time)))
		(if (and (common:low-noise-print 120 "start new server")
			(> *api-process-request-count* 50)) ;; if this server is kind of busy start up another
		    (begin
		      (debug:print-info 0 *default-log-port* "Server is busy, api-count "*api-process-request-count*", start another if possible...")
		      (server:kind-run *toppath*)
		      (if (> *api-process-request-count* 100)
			  (begin
			    (debug:print-info 0 *default-log-port* "Server is overloaded at api-count=" *api-process-request-count*", removing "servinfofile) 
			    (delete-file* servinfofile)))))))
          (loop 0 server-state bad-sync-count (current-milliseconds)))
         (else
          (debug:print-info 0 *default-log-port* "Server timed out. seconds since last db access: " (- (current-seconds) last-access))
          (http-transport:server-shutdown port)))))))

(define (http-transport:server-shutdown port)

Modified server.scm from [80dd7784f0] to [c1568ea04e].

447
448
449
450
451
452
453




454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470











471
472
473
474
475
476
477
478


479
480
481
482
483
484
485
     ((and havehome wearehome (< (length thisservrs) 20)) #t) ;; we are home and less than 20 servers, ok to start another
     ((and havehome (not wearehome)) #f)     ;; we are not the home host
     ((and havehome wearehome (>= (length thisservrs) 20)) #f) ;; have enough running
     (else
      (debug:print 0 *default-log-port* "WARNING: Unrecognised scenario, servrs="servrs", thishostip="thishostip", thisservrs="thisservrs)
      #t))))
	 




;; oldest server alive determines host then choose random of youngest
;; five servers on that host
;;
;; mode:
;;   best - get best server (random of newest five)
;;   home - get home host based on oldest server
;;   info - print info
(define (server:choose-server areapath #!optional (mode 'best))
  ;; age is current-starttime
  ;; find oldest alive
  ;;   1. sort by age ascending and ping until good
  ;; find alive rand from youngest
  ;;   1. sort by age descending
  ;;   2. take five
  ;;   3. check alive, discard if not and repeat
  ;; first we clean up old server files
  (server:clean-up-old areapath)











  (let* ((serversdat  (server:get-servers-info areapath))
	 (servkeys    (hash-table-keys serversdat))
	 (by-time-asc (if (not (null? servkeys)) ;; NOTE: Oldest is last
			  (sort servkeys ;; list of "host:port"
				(lambda (a b)
				  (>= (list-ref (hash-table-ref serversdat a) 2)
				      (list-ref (hash-table-ref serversdat b) 2))))
			  '())))


    (if (not (null? by-time-asc))
	(let* ((oldest     (last by-time-asc))
	       (oldest-dat (hash-table-ref serversdat oldest))
	       (host       (list-ref oldest-dat 0))
	       (all-valid  (filter (lambda (x)
				     (equal? host (list-ref (hash-table-ref serversdat x) 0)))
				   by-time-asc))







>
>
>
>

















>
>
>
>
>
>
>
>
>
>
>








>
>







447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
     ((and havehome wearehome (< (length thisservrs) 20)) #t) ;; we are home and less than 20 servers, ok to start another
     ((and havehome (not wearehome)) #f)     ;; we are not the home host
     ((and havehome wearehome (>= (length thisservrs) 20)) #f) ;; have enough running
     (else
      (debug:print 0 *default-log-port* "WARNING: Unrecognised scenario, servrs="servrs", thishostip="thishostip", thisservrs="thisservrs)
      #t))))
	 

(define server-last-start 0)


;; oldest server alive determines host then choose random of youngest
;; five servers on that host
;;
;; mode:
;;   best - get best server (random of newest five)
;;   home - get home host based on oldest server
;;   info - print info
(define (server:choose-server areapath #!optional (mode 'best))
  ;; age is current-starttime
  ;; find oldest alive
  ;;   1. sort by age ascending and ping until good
  ;; find alive rand from youngest
  ;;   1. sort by age descending
  ;;   2. take five
  ;;   3. check alive, discard if not and repeat
  ;; first we clean up old server files
  (server:clean-up-old areapath)
  (let* ((since-last (- (current-seconds) server-last-start))
        (server-start-delay 10))     
    (if ( < (- (current-seconds) server-last-start) 10 )
      (begin
        (debug:print 2 *default-log-port* "server:choose-server: seconds since last server start: " (- (current-seconds) server-last-start))
        (debug:print 2 *default-log-port* "server:choose-server: last server start less than " server-start-delay " seconds ago. Sleeping " server-start-delay " seconds")
        (thread-sleep! server-start-delay)
      )
      (debug:print 2 *default-log-port* "server:choose-server: seconds since last server start: " (- (current-seconds) server-last-start))
    )
  )
  (let* ((serversdat  (server:get-servers-info areapath))
	 (servkeys    (hash-table-keys serversdat))
	 (by-time-asc (if (not (null? servkeys)) ;; NOTE: Oldest is last
			  (sort servkeys ;; list of "host:port"
				(lambda (a b)
				  (>= (list-ref (hash-table-ref serversdat a) 2)
				      (list-ref (hash-table-ref serversdat b) 2))))
			  '())))
    (debug:print 2 *default-log-port* "server:choose-server: serversdat: " serversdat)
    (debug:print 2 *default-log-port* "server:choose-server: servkeys: " servkeys)
    (if (not (null? by-time-asc))
	(let* ((oldest     (last by-time-asc))
	       (oldest-dat (hash-table-ref serversdat oldest))
	       (host       (list-ref oldest-dat 0))
	       (all-valid  (filter (lambda (x)
				     (equal? host (list-ref (hash-table-ref serversdat x) 0)))
				   by-time-asc))
512
513
514
515
516
517
518

519
520
521
522
523
524
525
526
			  (hash-table-ref serversdat (list-ref best-ten (random len)))))
	    ((count)(length all-valid))
	    (else
	     (debug:print 0 *default-log-port* "ERROR: invalid command "mode)
	     #f)))
	(begin
	  (server:run areapath)

	  (thread-sleep! 3)
	  (case mode
	    ((homehost) (cons #f #f))
	    (else	#f))))))

(define (server:get-servinfo-dir areapath)
  (let* ((spath (conc areapath"/.servinfo")))
    (if (not (file-exists? spath))







>
|







529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
			  (hash-table-ref serversdat (list-ref best-ten (random len)))))
	    ((count)(length all-valid))
	    (else
	     (debug:print 0 *default-log-port* "ERROR: invalid command "mode)
	     #f)))
	(begin
	  (server:run areapath)
          (set! server-last-start (current-seconds))
	  ;; (thread-sleep! 3)
	  (case mode
	    ((homehost) (cons #f #f))
	    (else	#f))))))

(define (server:get-servinfo-dir areapath)
  (let* ((spath (conc areapath"/.servinfo")))
    (if (not (file-exists? spath))