Index: dashboard-transport-mode.scm ================================================================== --- dashboard-transport-mode.scm +++ dashboard-transport-mode.scm @@ -3,10 +3,13 @@ ;; ;; sync-method: 'original, 'attach or 'none ;; cache-method: 'tmp or 'none ;; rmt:transport-mode: 'http, 'tcp, 'nfs ;; +;; 'auto +;; read-only query and no servers started - mrah/ +;; ;; NOTE: NOT ALL COMBINATIONS WORK ;; ;;====================================================================== ;; uncomment this block to test without tcp or cachedb Index: megatest.scm ================================================================== --- megatest.scm +++ megatest.scm @@ -1026,10 +1026,11 @@ (if dbfname (tt:start-server tl #f dbfname api:tcp-dispatch-request-make-handler keys) (begin (debug:print 0 *default-log-port* "ERROR: transport mode is tcp - -db is required.") (exit 1))))) + ((nfs)(debug:print 0 *default-log-port* "WARNING: server start called in nfs mode '"(rmt:transport-mode))) (else (debug:print 0 *default-log-port* "ERROR: rmt:transport-mode value not recognised "(rmt:transport-mode)))) (set! *didsomething* #t))) ;; The adjutant is a bit different, it does NOT run (launch:setup) as it is not necessarily tied to ;; a specific Megatest area. Detail are being hashed out and this may change. Index: rmtmod.scm ================================================================== --- rmtmod.scm +++ rmtmod.scm @@ -950,11 +950,10 @@ #f))) #f))) ;; not true strickly speaking, might be runremote was not yet initialized. (define (make-and-init-remote areapath) (case (rmt:transport-mode) - ((http)(make-remote)) ((tcp) (tt:make-remote areapath)) (else #f))) ;; how to make area-dat (define (rmt:set-ttdat areapath ttdat) Index: runsmod.scm ================================================================== --- runsmod.scm +++ runsmod.scm @@ -1188,13 +1188,15 @@ (if (and (not (rmt:on-homehost?)) maxload) ;; only gate if maxload is specified, NOTE: maxload is normalized, i.e. load=1 means all cpus fully utilized (common:wait-for-normalized-load maxload "Waiting for load to drop before starting more tests" #f)) ;; jobtools maxhomehostload is intended to prevent overloading on the homehost which can cause database corruption issues - (if maxhomehostload - (common:wait-for-homehost-load maxhomehostload - (conc "Waiting for homehost load to drop below normalized value of " maxhomehostload)))))) + ;; (if maxhomehostload + ;; (common:wait-for-homehost-load + ;; maxhomehostload + ;; (conc "Waiting for homehost load to drop below normalized value of " maxhomehostload))) + ))) (if (and (not (null? prereqs-not-met)) (runs:lownoise (conc "waiting on tests " prereqs-not-met hed) 60)) Index: servermod.scm ================================================================== --- servermod.scm +++ servermod.scm @@ -272,29 +272,29 @@ ;; or another host? ;; ;; returns #t => ok to start another server ;; #f => not ok to start another server ;; -(define (server:minimal-check areapath) - (server:clean-up-old areapath) - (let* ((srvdir (server:get-servinfo-dir areapath)) ;; (conc areapath"/.servinfo")) - (servrs (glob (conc srvdir"/*"))) - (thishostip (server:get-best-guess-address (get-host-name))) - (thisservrs (glob (conc srvdir"/"thishostip":*"))) - (homehostinf (server:choose-server areapath 'homehost)) - (havehome (car homehostinf)) - (wearehome (cdr homehostinf))) - (debug:print-info 0 *default-log-port* thishostip", have homehost: "havehome", we are homehost: "wearehome - ", numservers: "(length thisservrs)) - (cond - ((not havehome) #t) ;; no homehost yet, go for it - ((and havehome wearehome (< (length thisservrs) 20)) #t) ;; we are home and less than 20 servers, ok to start another - ((and havehome (not wearehome)) #f) ;; we are not the home host - ((and havehome wearehome (>= (length thisservrs) 20)) #f) ;; have enough running - (else - (debug:print 0 *default-log-port* "WARNING: Unrecognised scenario, servrs="servrs", thishostip="thishostip", thisservrs="thisservrs) - #t)))) +;; (define (server:minimal-check areapath) +;; (server:clean-up-old areapath) +;; (let* ((srvdir (server:get-servinfo-dir areapath)) ;; (conc areapath"/.servinfo")) +;; (servrs (glob (conc srvdir"/*"))) +;; (thishostip (server:get-best-guess-address (get-host-name))) +;; (thisservrs (glob (conc srvdir"/"thishostip":*"))) +;; (homehostinf (server:choose-server areapath 'homehost)) +;; (havehome (car homehostinf)) +;; (wearehome (cdr homehostinf))) +;; (debug:print-info 0 *default-log-port* thishostip", have homehost: "havehome", we are homehost: "wearehome +;; ", numservers: "(length thisservrs)) +;; (cond +;; ((not havehome) #t) ;; no homehost yet, go for it +;; ((and havehome wearehome (< (length thisservrs) 20)) #t) ;; we are home and less than 20 servers, ok to start another +;; ((and havehome (not wearehome)) #f) ;; we are not the home host +;; ((and havehome wearehome (>= (length thisservrs) 20)) #f) ;; have enough running +;; (else +;; (debug:print 0 *default-log-port* "WARNING: Unrecognised scenario, servrs="servrs", thishostip="thishostip", thisservrs="thisservrs) +;; #t)))) (define server-last-start 0) @@ -303,87 +303,87 @@ ;; ;; mode: ;; best - get best server (random of newest five) ;; home - get home host based on oldest server ;; info - print info -(define (server:choose-server areapath #!optional (mode 'best)) - ;; age is current-starttime - ;; find oldest alive - ;; 1. sort by age ascending and ping until good - ;; find alive rand from youngest - ;; 1. sort by age descending - ;; 2. take five - ;; 3. check alive, discard if not and repeat - ;; first we clean up old server files - (assert (eq? (rmt:transport-mode) 'http) "FATAL: server:run called with rmt:transport-mode="(rmt:transport-mode)) - (server:clean-up-old areapath) - (let* ((since-last (- (current-seconds) server-last-start)) - (server-start-delay 10)) - (if ( < (- (current-seconds) server-last-start) 10 ) - (begin - (debug:print 2 *default-log-port* "server:choose-server: seconds since last server start: " (- (current-seconds) server-last-start)) - (debug:print 2 *default-log-port* "server:choose-server: last server start less than " server-start-delay " seconds ago. Sleeping " server-start-delay " seconds") - (thread-sleep! server-start-delay) - ) - (debug:print 2 *default-log-port* "server:choose-server: seconds since last server start: " (- (current-seconds) server-last-start)) - ) - ) - (let* ((serversdat (server:get-servers-info areapath)) - (servkeys (hash-table-keys serversdat)) - (by-time-asc (if (not (null? servkeys)) ;; NOTE: Oldest is last - (sort servkeys ;; list of "host:port" - (lambda (a b) - (>= (list-ref (hash-table-ref serversdat a) 2) - (list-ref (hash-table-ref serversdat b) 2)))) - '()))) - (debug:print 2 *default-log-port* "server:choose-server: serversdat: " serversdat) - (debug:print 2 *default-log-port* "server:choose-server: servkeys: " servkeys) - (if (not (null? by-time-asc)) - (let* ((oldest (last by-time-asc)) - (oldest-dat (hash-table-ref serversdat oldest)) - (host (list-ref oldest-dat 0)) - (all-valid (filter (lambda (x) - (equal? host (list-ref (hash-table-ref serversdat x) 0))) - by-time-asc)) - (best-ten (lambda () - (if (> (length all-valid) 11) - (take (drop-right all-valid 1) 10) ;; remove the oldest from consideration so it can age out - (if (> (length all-valid) 8) - (drop-right all-valid 1) - all-valid)))) - (names->dats (lambda (names) - (map (lambda (x) - (hash-table-ref serversdat x)) - names))) - (am-home? (lambda () - (let* ((currhost (get-host-name)) - (bestadrs (server:get-best-guess-address currhost))) - (or (equal? host currhost) - (equal? host bestadrs)))))) - (case mode - ((info) - (debug:print 0 *default-log-port* "oldest: "oldest-dat", selected host: "host", all-valid: "all-valid) - (debug:print 0 *default-log-port* "youngest: "(hash-table-ref serversdat (car all-valid)))) - ((home) host) - ((homehost) (cons host (am-home?))) ;; shut up old code - ((home?) (am-home?)) - ((best-ten)(names->dats (best-ten))) - ((all-valid)(names->dats all-valid)) - ((best) (let* ((best-ten (best-ten)) - (len (length best-ten))) - (hash-table-ref serversdat (list-ref best-ten (random len))))) - ((count)(length all-valid)) - (else - (debug:print 0 *default-log-port* "ERROR: invalid command "mode) - #f))) - (begin - (server:run areapath) - (set! server-last-start (current-seconds)) - ;; (thread-sleep! 3) - (case mode - ((homehost) (cons #f #f)) - (else #f)))))) +;; (define (server:choose-server areapath #!optional (mode 'best)) +;; ;; age is current-starttime +;; ;; find oldest alive +;; ;; 1. sort by age ascending and ping until good +;; ;; find alive rand from youngest +;; ;; 1. sort by age descending +;; ;; 2. take five +;; ;; 3. check alive, discard if not and repeat +;; ;; first we clean up old server files +;; (assert (eq? (rmt:transport-mode) 'http) "FATAL: server:run called with rmt:transport-mode="(rmt:transport-mode)) +;; (server:clean-up-old areapath) +;; (let* ((since-last (- (current-seconds) server-last-start)) +;; (server-start-delay 10)) +;; (if ( < (- (current-seconds) server-last-start) 10 ) +;; (begin +;; (debug:print 2 *default-log-port* "server:choose-server: seconds since last server start: " (- (current-seconds) server-last-start)) +;; (debug:print 2 *default-log-port* "server:choose-server: last server start less than " server-start-delay " seconds ago. Sleeping " server-start-delay " seconds") +;; (thread-sleep! server-start-delay) +;; ) +;; (debug:print 2 *default-log-port* "server:choose-server: seconds since last server start: " (- (current-seconds) server-last-start)) +;; ) +;; ) +;; (let* ((serversdat (server:get-servers-info areapath)) +;; (servkeys (hash-table-keys serversdat)) +;; (by-time-asc (if (not (null? servkeys)) ;; NOTE: Oldest is last +;; (sort servkeys ;; list of "host:port" +;; (lambda (a b) +;; (>= (list-ref (hash-table-ref serversdat a) 2) +;; (list-ref (hash-table-ref serversdat b) 2)))) +;; '()))) +;; (debug:print 2 *default-log-port* "server:choose-server: serversdat: " serversdat) +;; (debug:print 2 *default-log-port* "server:choose-server: servkeys: " servkeys) +;; (if (not (null? by-time-asc)) +;; (let* ((oldest (last by-time-asc)) +;; (oldest-dat (hash-table-ref serversdat oldest)) +;; (host (list-ref oldest-dat 0)) +;; (all-valid (filter (lambda (x) +;; (equal? host (list-ref (hash-table-ref serversdat x) 0))) +;; by-time-asc)) +;; (best-ten (lambda () +;; (if (> (length all-valid) 11) +;; (take (drop-right all-valid 1) 10) ;; remove the oldest from consideration so it can age out +;; (if (> (length all-valid) 8) +;; (drop-right all-valid 1) +;; all-valid)))) +;; (names->dats (lambda (names) +;; (map (lambda (x) +;; (hash-table-ref serversdat x)) +;; names))) +;; (am-home? (lambda () +;; (let* ((currhost (get-host-name)) +;; (bestadrs (server:get-best-guess-address currhost))) +;; (or (equal? host currhost) +;; (equal? host bestadrs)))))) +;; (case mode +;; ((info) +;; (debug:print 0 *default-log-port* "oldest: "oldest-dat", selected host: "host", all-valid: "all-valid) +;; (debug:print 0 *default-log-port* "youngest: "(hash-table-ref serversdat (car all-valid)))) +;; ((home) host) +;; ((homehost) (cons host (am-home?))) ;; shut up old code +;; ((home?) (am-home?)) +;; ((best-ten)(names->dats (best-ten))) +;; ((all-valid)(names->dats all-valid)) +;; ((best) (let* ((best-ten (best-ten)) +;; (len (length best-ten))) +;; (hash-table-ref serversdat (list-ref best-ten (random len))))) +;; ((count)(length all-valid)) +;; (else +;; (debug:print 0 *default-log-port* "ERROR: invalid command "mode) +;; #f))) +;; (begin +;; (server:run areapath) +;; (set! server-last-start (current-seconds)) +;; ;; (thread-sleep! 3) +;; (case mode +;; ((homehost) (cons #f #f)) +;; (else #f)))))) (define (server:get-servinfo-dir areapath) (let* ((spath (conc areapath"/.servinfo"))) (if (not (file-exists? spath)) (create-directory spath #t)) @@ -451,11 +451,11 @@ ;; transport to be used ;; http - use http-transport ;; http-read-cached - use http-transport for writes but in-mem cached for reads (rmode 'http) - (hh-dat (let ((res (or (server:choose-server *toppath* 'homehost) + (hh-dat (let ((res (or ;; (server:choose-server *toppath* 'homehost) (cons #f #f)))) (assert (pair? res)(conc "FATAL: hh-dat should be a pair, got "res)) res)) (server-url #f) ;; (server:check-if-running *toppath*) #f)) (server-id #f) Index: tasksmod.scm ================================================================== --- tasksmod.scm +++ tasksmod.scm @@ -1220,27 +1220,27 @@ ;;====================================================================== ;; (begin ;; (debug:print 0 *default-log-port* "ERROR: cannot migrate version unless on homehost. Exiting.") ;; (exit 1)))) -(define (common:wait-for-homehost-load maxnormload msg) - (let loop ((start-time (current-seconds))) ;; we saw some instances of this being called before *toppath* was set. This might be an early setup race. This delay should help but it is impossible to test... - (if (not *toppath*) - (begin - (debug:print 0 *default-log-port* "ERROR: common:wait-for-homehost-load called before *toppath* set.") - (thread-sleep! 30) - (if (< (- (current-seconds) start-time) 300) - (loop start-time))))) - (case (rmt:transport-mode) - ((http) - (let* ((hh-dat (if (rmt:on-homehost?) ;; if we are on the homehost then pass in #f so the calls are local. - #f - (server:choose-server *toppath* 'homehost))) - (hh (if hh-dat (car hh-dat) #f))) - (common:wait-for-normalized-load maxnormload msg hh))) - (else - (common:wait-for-normalized-load maxnormload msg (get-host-name))))) +;; (define (common:wait-for-homehost-load maxnormload msg) +;; (let loop ((start-time (current-seconds))) ;; we saw some instances of this being called before *toppath* was set. This might be an early setup race. This delay should help but it is impossible to test... +;; (if (not *toppath*) +;; (begin +;; (debug:print 0 *default-log-port* "ERROR: common:wait-for-homehost-load called before *toppath* set.") +;; (thread-sleep! 30) +;; (if (< (- (current-seconds) start-time) 300) +;; (loop start-time))))) +;; (case (rmt:transport-mode) +;; ((http) +;; (let* ((hh-dat (if (rmt:on-homehost?) ;; if we are on the homehost then pass in #f so the calls are local. +;; #f +;; (server:choose-server *toppath* 'homehost))) +;; (hh (if hh-dat (car hh-dat) #f))) +;; (common:wait-for-normalized-load maxnormload msg hh))) +;; (else +;; (common:wait-for-normalized-load maxnormload msg (get-host-name))))) (define (configf:write-alist cdat fname) (if (not (common:faux-lock fname)) (debug:print 0 *default-log-port* "INFO: Could not get lock on " fname))