Index: common.scm ================================================================== --- common.scm +++ common.scm @@ -1969,30 +1969,36 @@ (or (common:get-cached-info actual-host "num-cpus" age: (+ 2592000 (random 3600))) ;; hosts had better not be changing the number of cpus too often! (let* ((proc (lambda () (let loop ((numcpu 0) (inl (read-line))) (if (eof-object? inl) - (begin - (common:write-cached-info actual-host "num-cpus" numcpu) - numcpu) + (if (> numcpu 0) + numcpu + #f) ;; if zero return #f so caller knows that things are not working (loop (if (string-match "^processor\\s+:\\s+\\d+$" inl) (+ numcpu 1) numcpu) (read-line)))))) (result (if remote-host (with-input-from-pipe (conc "ssh " remote-host " cat /proc/cpuinfo") proc) (with-input-from-file "/proc/cpuinfo" proc)))) - (common:write-cached-info actual-host "num-cpus" result) + (if (> result 0)(common:write-cached-info actual-host "num-cpus" result)) result)))) ;; wait for normalized cpu load to drop below maxload ;; -(define (common:wait-for-normalized-load maxload msg remote-host) +(define (common:wait-for-normalized-load maxload msg remote-host #!optional (rem-tries 5)) (let ((num-cpus (common:get-num-cpus remote-host))) - (common:wait-for-cpuload maxload num-cpus 15 msg: msg remote-host: remote-host))) + (if num-cpus + (common:wait-for-cpuload maxload num-cpus 15 msg: msg remote-host: remote-host) + (begin + (thread-sleep! (random 60)) ;; we failed to get num cpus. wait a bit and try again + (if (> rem-tries 0) + (common:wait-for-normalized-load maxload msg remote-host (- rem-tries 1)) + #f))))) ;; DO NOT CALL THIS DIRECTLY. It is called from common:wait-for-normalized-load ;; (define (common:wait-for-cpuload maxload-in numcpus-in waitdelay #!key (count 1000) (msg #f)(remote-host #f)(force-maxload #f)) (let* ((loadavg (common:get-cpu-load remote-host))