Index: common.scm ================================================================== --- common.scm +++ common.scm @@ -1813,30 +1813,44 @@ ;; get values from cached info from dropping file in logs dir ;; e.g. key is host and dtype is normalized-load ;; (define (common:get-cached-info key dtype #!key (age 10)) (if *toppath* - (let* ((fullpath (conc *toppath* "/.sysdata/" key "-" dtype ".log"))) + (let* ((fullpath (conc *toppath* "/.sysdata/" key "-" dtype ".log")) + (delfile (lambda () + (debug:print-info 1 *default-log-port* " removing bad file " fullpath ", exn=" exn) + (delete-file* fullpath) + #f))) (if (and (file-exists? fullpath) (file-read-access? fullpath)) (handle-exceptions exn (begin (debug:print 0 *default-log-port* "failed to get cached info from " fullpath ", exn=" exn) #f) (debug:print 2 *default-log-port* "reading file " fullpath) - (let ((real-age (- (current-seconds)(file-change-time fullpath)))) + (let ((real-age (- (current-seconds) + (handle-exceptions + exn + (begin + (debug:print 1 *default-log-port* "Failed to read mod time on file " + fullpath ", using 0, exn=" exn) + 0) + (file-change-time fullpath))))) (if (< real-age age) (handle-exceptions exn - (begin - (debug:print-info 1 *default-log-port* " removing bad file " fullpath ", exn=" exn) - (delete-file* fullpath) - #f) - (with-input-from-file fullpath read)) + (delfile) + (let* ((res (with-input-from-file fullpath read))) + (if (eof-object? res) + (begin + (delfile) + #f) + res))) (begin - (debug:print-info 2 *default-log-port* "file " fullpath " is too old (" real-age" seconds) to trust, skipping reading it") + (debug:print-info 2 *default-log-port* "file " fullpath + " is too old (" real-age" seconds) to trust, skipping reading it") #f)))) (begin (debug:print 2 *default-log-port* "not reading file " fullpath) #f))) #f)) @@ -2102,33 +2116,38 @@ #f (common:get-homehost))) (hh (if hh-dat (car hh-dat) #f))) (common:wait-for-normalized-load maxnormload msg hh))) +(define *numcpus-cache* (make-hash-table)) (define (common:get-num-cpus remote-host) (let* ((actual-host (or remote-host (get-host-name)))) - (or (common:get-cached-info actual-host "num-cpus" age: (+ 2592000 (random 3600))) ;; hosts had better not be changing the number of cpus too often! - (let* ((proc (lambda () - (let loop ((numcpu 0) - (inl (read-line))) - (if (eof-object? inl) - (if (> numcpu 0) - numcpu - #f) ;; if zero return #f so caller knows that things are not working - (loop (if (string-match "^processor\\s+:\\s+\\d+$" inl) - (+ numcpu 1) - numcpu) - (read-line)))))) - (result (if remote-host - (with-input-from-pipe - (conc "ssh " remote-host " cat /proc/cpuinfo") - proc) - (with-input-from-file "/proc/cpuinfo" proc)))) - (if (and (number? result) - (> result 0)) - (common:write-cached-info actual-host "num-cpus" result)) - result)))) + ;; hosts had better not be changing the number of cpus too often! + (or (hash-table-ref/default *numcpus-cache* actual-host #f) + (let* ((numcpus (or (common:get-cached-info actual-host "num-cpus" age: (+ 2592000 (random 3600))) + (let* ((proc (lambda () + (let loop ((numcpu 0) + (inl (read-line))) + (if (eof-object? inl) + (if (> numcpu 0) + numcpu + #f) ;; if zero return #f so caller knows that things are not working + (loop (if (string-match "^processor\\s+:\\s+\\d+$" inl) + (+ numcpu 1) + numcpu) + (read-line)))))) + (result (if remote-host + (with-input-from-pipe + (conc "ssh " remote-host " cat /proc/cpuinfo") + proc) + (with-input-from-file "/proc/cpuinfo" proc)))) + (if (and (number? result) + (> result 0)) + (common:write-cached-info actual-host "num-cpus" result)) + result)))) + (hash-table-set! *numcpus-cache* actual-host numcpus) + numcpus)))) ;; wait for normalized cpu load to drop below maxload ;; (define (common:wait-for-normalized-load maxnormload msg remote-host #!optional (rem-tries 5)) (let ((num-cpus (common:get-num-cpus remote-host))) Index: http-transport.scm ================================================================== --- http-transport.scm +++ http-transport.scm @@ -566,17 +566,17 @@ (debug:print 0 *default-log-port* msg) (if (common:file-exists? full-serv-fname) (system (conc "sleep 1;mv -f " full-serv-fname " " new-serv-fname)) (debug:print 0 *default-log-port* "INFO: cannot move " full-serv-fname " to " new-serv-fname)) (exit))))) - (if (and (not start-time-old) ;; last server start try was less than five seconds ago + #;(if (and (not start-time-old) ;; last server start try was less than five seconds ago (not server-starting)) (begin (cleanup-proc "NOT starting server, there is either a recently started server or a server in process of starting") (exit))) ;; lets not even bother to start if there are already three or more server files ready to go - (let* ((num-alive (server:get-num-alive (server:get-list *toppath*)))) + #;(let* ((num-alive (server:get-num-alive (server:get-list *toppath*)))) (if (> num-alive 3) (begin (cleanup-proc (conc "ERROR: Aborting server start because there are already " num-alive " possible servers either running or starting up")) (exit)))) (common:save-pkt `((action . start) Index: megatest.scm ================================================================== --- megatest.scm +++ megatest.scm @@ -2099,18 +2099,20 @@ (debug:print-error 0 *default-log-port* "You must specify :state and :status with every call to -step") (exit 6)))))) (if (args:get-arg "-step") (begin + (thread-sleep! 1.5) (megatest:step (args:get-arg "-step") (or (args:get-arg "-state")(args:get-arg ":state")) (or (args:get-arg "-status")(args:get-arg ":status")) (args:get-arg "-setlog") (args:get-arg "-m")) ;; (if db (sqlite3:finalize! db)) - (set! *didsomething* #t))) + (set! *didsomething* #t) + (thread-sleep! 1.5))) (if (or (args:get-arg "-setlog") ;; since setting up is so costly lets piggyback on -test-status ;; (not (args:get-arg "-step"))) ;; -setlog may have been processed already in the "-step" previous ;; NEW POLICY - -setlog sets test overall log on every call. (args:get-arg "-set-toplog")