Index: megatest.scm ================================================================== --- megatest.scm +++ megatest.scm @@ -656,11 +656,11 @@ (if (args:any? "-run" "-runall" "-remove-runs" "-set-state-status" "-kill-runs" "-kill-rerun") (debug:print 0 *default-log-port* (string-intersperse (argv) " "))) ;; some switches imply homehost. Exit here if not on homehost ;; -(let ((homehost-required (list "-cleanup-db" "-server"))) +(let ((homehost-required (list "-cleanup-db"))) (if (apply args:any? homehost-required) (if (not (common:on-homehost?)) (for-each (lambda (switch) (if (args:get-arg switch) Index: server.scm ================================================================== --- server.scm +++ server.scm @@ -445,27 +445,28 @@ (>= (list-ref (hash-table-ref serversdat a) 2) (list-ref (hash-table-ref serversdat b) 2)))))) (if (not (null? by-time-asc)) (let* ((oldest (last by-time-asc)) (oldest-dat (hash-table-ref serversdat oldest)) - (host (list-ref oldest-dat 1)) + (host (list-ref oldest-dat 0)) (all-valid (filter (lambda (x) - (equal? host (list-ref (hash-table-ref serversdat x) 1))) + (equal? host (list-ref (hash-table-ref serversdat x) 0))) by-time-asc))) (case mode ((info) (print "oldest: "oldest-dat", selected host: "host", all-valid: "all-valid) (print "youngest: "(hash-table-ref serversdat (car all-valid)))) ((home) host) ((best)(if (> (length all-valid) 5) (map (lambda (x) (hash-table-ref serversdat x)) - (take all-valid 5)))) + (take all-valid 5)) + all-valid)) (else (debug:print 0 *default-log-port* "ERROR: invalid command "mode) - #f)) - #f)))) + #f))) + #f))) (define (server:get-homehost #!key (trynum 5)) ;; called often especially at start up. use mutex to eliminate collisions (mutex-lock! *homehost-mutex*) (cond @@ -481,45 +482,24 @@ (server:get-homehost trynum: (- trynum 1))) #f)) (else (let* ((currhost (get-host-name)) (bestadrs (server:get-best-guess-address currhost)) - ;; first look in config, then look in file .homehost, create it if not found - (homehost (or (configf:lookup *configdat* "server" "homehost" ) - (handle-exceptions - exn - (if (> trynum 0) - (let ((delay-time (* (- 5 trynum) 5))) - (mutex-unlock! *homehost-mutex*) - (debug:print 0 *default-log-port* "ERROR: ["(common:human-time)"] Failed to read .homehost file, delaying " - delay-time " seconds and trying again, message: " ((condition-property-accessor 'exn 'message) exn) - ", exn=" exn) - (thread-sleep! delay-time) - (server:get-homehost trynum: (- trynum 1))) - (begin - (mutex-unlock! *homehost-mutex*) - (debug:print 0 *default-log-port* "ERROR: ["(common:human-time) - "] Failed to read .homehost file after trying five times. Giving up and exiting, message: " - ((condition-property-accessor 'exn 'message) exn)) - (exit 1))) - (let ((hhf (conc *toppath* "/.homehost"))) - (if (common:file-exists? hhf) - (with-input-from-file hhf read-line) - (if (file-write-access? *toppath*) - (begin - (with-output-to-file hhf - (lambda () - (print bestadrs))) - (begin - (mutex-unlock! *homehost-mutex*) - (car (server:get-homehost)))) - #f)))))) + (homehost (server:choose-server *toppath* 'home)) (at-home (or (equal? homehost currhost) (equal? homehost bestadrs)))) - (set! *home-host* (cons homehost at-home)) - (mutex-unlock! *homehost-mutex*) - *home-host*)))) + + ;; if no homehost start server, wait a bit and check again + (if homehost + (begin + (set! *home-host* (cons homehost at-home)) + (mutex-unlock! *homehost-mutex*) + *home-host*) + (begin + (server:kind-run *toppath*) + (thread-sleep! 5) + (server:get-homehost trynum: (- trynum 1)))))))) ;;====================================================================== ;; am I on the homehost? ;; (define (common:on-homehost?) @@ -533,12 +513,13 @@ ;; area to be launched ;; (define (server:kind-run areapath) ;; look for $MT_RUN_AREA_HOME/logs/server-start-last ;; and wait for it to be at least seconds old - (server:wait-for-server-start-last-flag areapath) - (if (not (server:check-if-running areapath)) ;; why try if there is already a server running? + ;; (server:wait-for-server-start-last-flag areapath) + (server:run areapath) + #;(if (not (server:check-if-running areapath)) ;; why try if there is already a server running? (let* ((lock-file (conc areapath "/logs/server-start.lock"))) (let* ((start-flag (conc areapath "/logs/server-start-last"))) (common:simple-file-lock-and-wait lock-file expire-time: 25) (debug:print-info 2 *default-log-port* "server:kind-run: touching " start-flag) (system (conc "touch " start-flag)) ;; lazy but safe