Index: archive.scm ================================================================== --- archive.scm +++ archive.scm @@ -346,11 +346,11 @@ (rsync-exe (or (configf:lookup *configdat* "archive" "rsync") "rsync")) (print-prefix "Running: ") (archive-info (archive:allocate-new-archive-block blockid-cache *toppath* tsname min-space target-patt run-patt "megatest-db")) (archive-dir (if archive-info (cdr archive-info) #f)) (archive-id (if archive-info (car archive-info) -1)) - (home-host (common:get-homehost)) + (home-host (server:get-homehost)) (archive-time (seconds->std-time-str (current-seconds))) (archive-staging-db (conc *toppath* "/.db-snapshot/archive_" archive-time)) (tmp-db-path (conc (common:get-db-tmp-area) "/megatest.db")) (dbfile (conc archive-staging-db "/megatest.db"))) (create-directory archive-staging-db #t) Index: common.scm ================================================================== --- common.scm +++ common.scm @@ -316,11 +316,11 @@ (define (common:logpro-exit-code->test-status exit-code) (status-sym->string (common:logpro-exit-code->status-sym exit-code))) (defstruct remote - (hh-dat (common:get-homehost)) ;; homehost record ( addr . hhflag ) + (hh-dat (server:get-homehost)) ;; homehost record ( addr . hhflag ) (server-url #f) ;; (server:check-if-running *toppath*) #f)) (server-id #f) (server-info (if *toppath* (server:check-if-running *toppath*) #f)) (last-server-check 0) ;; last time we checked to see if the server was alive (connect-time (current-seconds)) @@ -1306,72 +1306,10 @@ ;;====================================================================== ;; logic for getting homehost. Returns (host . at-home) ;; IF *toppath* is not set, wait up to five seconds trying every two seconds ;; (this is to accomodate the watchdog) ;; -(define (common:get-homehost #!key (trynum 5)) - ;; called often especially at start up. use mutex to eliminate collisions - (mutex-lock! *homehost-mutex*) - (cond - (*home-host* - (mutex-unlock! *homehost-mutex*) - *home-host*) - ((not *toppath*) - (mutex-unlock! *homehost-mutex*) - (launch:setup) ;; safely mutexed now - (if (> trynum 0) - (begin - (thread-sleep! 2) - (common:get-homehost trynum: (- trynum 1))) - #f)) - (else - (let* ((currhost (get-host-name)) - (bestadrs (server:get-best-guess-address currhost)) - ;; first look in config, then look in file .homehost, create it if not found - (homehost (or (configf:lookup *configdat* "server" "homehost" ) - (handle-exceptions - exn - (if (> trynum 0) - (let ((delay-time (* (- 5 trynum) 5))) - (mutex-unlock! *homehost-mutex*) - (debug:print 0 *default-log-port* "ERROR: ["(common:human-time)"] Failed to read .homehost file, delaying " - delay-time " seconds and trying again, message: " ((condition-property-accessor 'exn 'message) exn) - ", exn=" exn) - (thread-sleep! delay-time) - (common:get-homehost trynum: (- trynum 1))) - (begin - (mutex-unlock! *homehost-mutex*) - (debug:print 0 *default-log-port* "ERROR: ["(common:human-time) - "] Failed to read .homehost file after trying five times. Giving up and exiting, message: " - ((condition-property-accessor 'exn 'message) exn)) - (exit 1))) - (let ((hhf (conc *toppath* "/.homehost"))) - (if (common:file-exists? hhf) - (with-input-from-file hhf read-line) - (if (file-write-access? *toppath*) - (begin - (with-output-to-file hhf - (lambda () - (print bestadrs))) - (begin - (mutex-unlock! *homehost-mutex*) - (car (common:get-homehost)))) - #f)))))) - (at-home (or (equal? homehost currhost) - (equal? homehost bestadrs)))) - (set! *home-host* (cons homehost at-home)) - (mutex-unlock! *homehost-mutex*) - *home-host*)))) - -;;====================================================================== -;; am I on the homehost? -;; -(define (common:on-homehost?) - (let ((hh (common:get-homehost))) - (if hh - (cdr hh) - #f))) ;;====================================================================== ;; do we honor the caches of the config files? ;; (define (common:use-cache?) @@ -2050,11 +1988,11 @@ (if (null? tal) #f (loop (car tal)(cdr tal) best-host))))))))) (define (common:wait-for-homehost-load maxnormload msg) (let* ((hh-dat (if (common:on-homehost?) ;; if we are on the homehost then pass in #f so the calls are local. #f - (common:get-homehost))) + (server:get-homehost))) (hh (if hh-dat (car hh-dat) #f))) (common:wait-for-normalized-load maxnormload msg hh))) (define (common:get-num-cpus remote-host) (let* ((actual-host (or remote-host (get-host-name)))) Index: dashboard.scm ================================================================== --- dashboard.scm +++ dashboard.scm @@ -3810,11 +3810,11 @@ ) ) (if (not (common:on-homehost?)) (begin - (debug:print 0 *default-log-port* "WARNING: You are starting the dashboard on a machine that is not the homehost:" (common:get-homehost)) + (debug:print 0 *default-log-port* "WARNING: You are starting the dashboard on a machine that is not the homehost:" (server:get-homehost)) (debug:print 0 *default-log-port* "It will be slower.") )) (if (and (common:file-exists? mtdb-path) Index: launch.scm ================================================================== --- launch.scm +++ launch.scm @@ -1563,11 +1563,11 @@ (with-output-to-string (lambda () ;; (list 'hosts hosts) (write (list (list 'testpath test-path) ;; (list 'transport (conc *transport-type*)) ;; (list 'serverinf *server-info*) - (list 'homehost (let* ((hhdat (common:get-homehost))) + (list 'homehost (let* ((hhdat (server:get-homehost))) (if hhdat (car hhdat) #f))) (list 'serverurl (if *runremote* (remote-server-url *runremote*) Index: rmt.scm ================================================================== --- rmt.scm +++ rmt.scm @@ -116,11 +116,11 @@ ;; DOT MUTEXLOCK -> SET_HOMEHOST [label="no homehost?"]; ;; DOT SET_HOMEHOST -> MUTEXLOCK; ;; ensure we have a homehost record (if (not (pair? (remote-hh-dat runremote))) ;; not on homehost (thread-sleep! 0.1) ;; since we shouldn't get here, delay a little - (remote-hh-dat-set! runremote (common:get-homehost))) + (remote-hh-dat-set! runremote (server:get-homehost))) ;;(print "BB> readonly-mode is "readonly-mode" dbfile is "dbfile) (cond #;((> (- (current-seconds)(remote-connect-time runremote)) 180) ;; reconnect to server every 180 seconds (debug:print 0 *default-log-port* "Forcing reconnect to server(s) due to 180 second timeout.") Index: server.scm ================================================================== --- server.scm +++ server.scm @@ -126,11 +126,11 @@ (let* ((curr-host (get-host-name)) ;; (attempt-in-progress (server:start-attempted? areapath)) ;; (dot-server-url (server:check-if-running areapath)) (curr-ip (server:get-best-guess-address curr-host)) (curr-pid (current-process-id)) - (homehost (common:get-homehost)) ;; configf:lookup *configdat* "server" "homehost" )) + (homehost (server:get-homehost)) ;; configf:lookup *configdat* "server" "homehost" )) (target-host (car homehost)) (testsuite (common:get-testsuite-name)) (logfile (conc areapath "/logs/server.log")) ;; -" curr-pid "-" target-host ".log")) (profile-mode (or (configf:lookup *configdat* "misc" "profilesw") "")) @@ -425,33 +425,112 @@ res)) ;; oldest server alive determines host then choose random of youngest ;; five servers on that host ;; -(define (server:choose-server areapath) +;; mode: +;; best - get best server (random of newest five) +;; home - get home host based on oldest server +;; info - print info +(define (server:choose-server areapath #!optional (mode 'best)) ;; age is current-starttime ;; find oldest alive ;; 1. sort by age ascending and ping until good ;; find alive rand from youngest ;; 1. sort by age descending ;; 2. take five ;; 3. check alive, discard if not and repeat (let* ((serversdat (server:get-servers-info areapath)) - (by-time-asc (sort (hash-table-keys serversdat) + (by-time-asc (sort (hash-table-keys serversdat) ;; list of "host:port" (lambda (a b) (>= (list-ref (hash-table-ref serversdat a) 2) (list-ref (hash-table-ref serversdat b) 2)))))) (if (not (null? by-time-asc)) (let* ((oldest (last by-time-asc)) (oldest-dat (hash-table-ref serversdat oldest)) (host (list-ref oldest-dat 1)) - (all-valid (filter (lambda (x)(equal? host (list-ref x 1))) by-time-asc))) - (print "oldest: "oldest-dat", selected host: "host", all-valid: "all-valid) - (print "youngest: "(hash-table-ref serversdat (car all-valid))) - (car all-valid)) + (all-valid (filter (lambda (x) + (equal? host (list-ref (hash-table-ref serversdat x) 1))) + by-time-asc))) + (case mode + ((info) + (print "oldest: "oldest-dat", selected host: "host", all-valid: "all-valid) + (print "youngest: "(hash-table-ref serversdat (car all-valid)))) + ((home) host) + ((best)(if (> (length all-valid) 5) + (map (lambda (x) + (hash-table-ref serversdat x)) + (take all-valid 5)))) + (else + (debug:print 0 *default-log-port* "ERROR: invalid command "mode) + #f)) + #f)))) + +(define (server:get-homehost #!key (trynum 5)) + ;; called often especially at start up. use mutex to eliminate collisions + (mutex-lock! *homehost-mutex*) + (cond + (*home-host* + (mutex-unlock! *homehost-mutex*) + *home-host*) + ((not *toppath*) + (mutex-unlock! *homehost-mutex*) + (launch:setup) ;; safely mutexed now + (if (> trynum 0) + (begin + (thread-sleep! 2) + (server:get-homehost trynum: (- trynum 1))) + #f)) + (else + (let* ((currhost (get-host-name)) + (bestadrs (server:get-best-guess-address currhost)) + ;; first look in config, then look in file .homehost, create it if not found + (homehost (or (configf:lookup *configdat* "server" "homehost" ) + (handle-exceptions + exn + (if (> trynum 0) + (let ((delay-time (* (- 5 trynum) 5))) + (mutex-unlock! *homehost-mutex*) + (debug:print 0 *default-log-port* "ERROR: ["(common:human-time)"] Failed to read .homehost file, delaying " + delay-time " seconds and trying again, message: " ((condition-property-accessor 'exn 'message) exn) + ", exn=" exn) + (thread-sleep! delay-time) + (server:get-homehost trynum: (- trynum 1))) + (begin + (mutex-unlock! *homehost-mutex*) + (debug:print 0 *default-log-port* "ERROR: ["(common:human-time) + "] Failed to read .homehost file after trying five times. Giving up and exiting, message: " + ((condition-property-accessor 'exn 'message) exn)) + (exit 1))) + (let ((hhf (conc *toppath* "/.homehost"))) + (if (common:file-exists? hhf) + (with-input-from-file hhf read-line) + (if (file-write-access? *toppath*) + (begin + (with-output-to-file hhf + (lambda () + (print bestadrs))) + (begin + (mutex-unlock! *homehost-mutex*) + (car (server:get-homehost)))) + #f)))))) + (at-home (or (equal? homehost currhost) + (equal? homehost bestadrs)))) + (set! *home-host* (cons homehost at-home)) + (mutex-unlock! *homehost-mutex*) + *home-host*)))) + +;;====================================================================== +;; am I on the homehost? +;; +(define (common:on-homehost?) + (let ((hh (server:get-homehost))) + (if hh + (cdr hh) #f))) - + + ;; kind start up of server, wait before allowing another server for a given ;; area to be launched ;; (define (server:kind-run areapath) ;; look for $MT_RUN_AREA_HOME/logs/server-start-last