Index: common.scm ================================================================== --- common.scm +++ common.scm @@ -414,38 +414,46 @@ ;; dot-locking egg seems not to work, using this for now ;; if lock is older than expire-time then remove it and try again ;; to get the lock ;; (define (common:simple-file-lock fname #!key (expire-time 300)) - (if (file-exists? fname) - (if (> (- (current-seconds)(file-modification-time fname)) expire-time) - (begin - (delete-file* fname) - (common:simple-file-lock fname expire-time: expire-time)) - #f) - (let ((key-string (conc (get-host-name) "-" (current-process-id)))) - (with-output-to-file fname - (lambda () - (print key-string))) - (thread-sleep! 0.25) - (if (file-exists? fname) - (with-input-from-file fname - (lambda () - (equal? key-string (read-line)))) - #f)))) + (handle-exceptions + exn + #f ;; don't really care what went wrong right now. NOTE: I have not seen this one actually fail. + (if (file-exists? fname) + (if (> (- (current-seconds)(file-modification-time fname)) expire-time) + (begin + (delete-file* fname) + (common:simple-file-lock fname expire-time: expire-time)) + #f) + (let ((key-string (conc (get-host-name) "-" (current-process-id)))) + (with-output-to-file fname + (lambda () + (print key-string))) + (thread-sleep! 0.25) + (if (file-exists? fname) + (with-input-from-file fname + (lambda () + (equal? key-string (read-line)))) + #f))))) (define (common:simple-file-lock-and-wait fname #!key (expire-time 300)) (let ((end-time (+ expire-time (current-seconds)))) (let loop ((got-lock (common:simple-file-lock fname expire-time: expire-time))) (if got-lock #t (if (> end-time (current-seconds)) - (loop (common:simple-file-lock fname expire-time: expire-time)) + (begin + (thread-sleep! 3) + (loop (common:simple-file-lock fname expire-time: expire-time))) #f))))) (define (common:simple-file-release-lock fname) - (delete-file* fname)) + (handle-exceptions + exn + #f ;; I don't really care why this failed (at least for now) + (delete-file* fname))) ;;====================================================================== ;; S T A T E S A N D S T A T U S E S ;;====================================================================== Index: dashboard.scm ================================================================== --- dashboard.scm +++ dashboard.scm @@ -2718,11 +2718,11 @@ (define (dashboard:get-youngest-run-db-mod-time dbdir) (handle-exceptions exn (begin - (debug:print 0 *default-log-port* "WARNING: error in accessing databases in get-youngest-run-db-mod-time: " ((condition-property-accessor 'exn 'message) exn) " db-dir="dbdir) + (debug:print 2 *default-log-port* "WARNING: error in accessing databases in get-youngest-run-db-mod-time: " ((condition-property-accessor 'exn 'message) exn) " db-dir="dbdir) (current-seconds)) ;; something went wrong - just print an error and return current-seconds (common:max (map (lambda (filen) (file-modification-time filen)) (glob (conc dbdir "/*.db*")))))) Index: launch.scm ================================================================== --- launch.scm +++ launch.scm @@ -471,11 +471,16 @@ (if (and start-res ping-res) (let ((url (http-transport:server-dat-make-url start-res))) (remote-conndat-set! *runremote* start-res) (remote-server-url-set! *runremote* url) - (debug:print-info 0 *default-log-port* "connected to " url " using CMDINFO data.")) + (if (server:ping url) + (debug:print-info 0 *default-log-port* "connected to " url " using CMDINFO data.") + (begin + (debug:print-info 0 *default-log-port* "have CMDINFO data but failed to connect to " url) + (remote-conndat-set! *runremote* #f) + (remote-server-url-set! *runremote* #f)))) (debug:print-info 0 *default-log-port* "received " host ":" port " for url but could not connect.") ))))))) ;; NFS might not have propagated the directory meta data to the run host - give it time if needed (let loop ((count 0)) (if (or (file-exists? top-path)