Index: launch.scm ================================================================== --- launch.scm +++ launch.scm @@ -446,11 +446,12 @@ ;; On NFS it can be slow and unreliable to get needed startup information. ;; i. Check if we are on the homehost, if so, proceed ;; ii. Check if host and port passed in via CMDINFO are valid and if ;; possible use them. - (let ((bestadrs (server:get-best-guess-address (get-host-name)))) + (let ((bestadrs (server:get-best-guess-address (get-host-name))) + (needcare #f)) (if (equal? homehost bestadrs) ;; we are likely on the homehost (debug:print-info 0 *default-log-port* "test " test-name " appears to be running on the homehost " homehost) (let ((host-port (if serverurl (string-split serverurl ":") #f))) (if (not *runremote*)(set! *runremote* (make-remote))) ;; init *runremote* (if (string? homehost) @@ -465,11 +466,27 @@ (let ((url (http-transport:server-dat-make-url start-res))) (remote-conndat-set! *runremote* start-res) (remote-server-url-set! *runremote* url) (debug:print-info 0 *default-log-port* "connected to " url " using CMDINFO data.")) (debug:print-info 0 *default-log-port* "received " host ":" port " for url but could not connect.") - ))))))) + )) + (begin + (debug:print-info 0 *default-log-port* (if host-port + (conc "received invalid host-port information " host-port) + "no host-port information received")) + ;; potential for bad situation if simultaneous starting of hundreds of jobs on servers, set needcare. + (set! needcare #t))) + (begin + (debug:print-info 0 *default-log-port* "received no homehost information. Please report this to support as it should not happen.") + (set! needcare #t))))) + (if needcare ;; due to very slow NFS we will do a brute force mkdir to ensure that the directory inode it truly available on this host + (let ((logdir (conc top-path "/logs"))) ;; we'll try to create this directory + (handle-exceptions + exn + (debug:print 0 *default-log-port* "Failed to create directory " logdir " expect problems, message: " ((condition-property-accessor 'exn 'message) exn)) + (create-directory logdir #t))))) + ;; NFS might not have propagated the directory meta data to the run host - give it time if needed (let loop ((count 0)) (if (or (file-exists? top-path) (> count 10)) (change-directory top-path)