Megatest

Diff
Login

Differences From Artifact [bc77148d79]:

To Artifact [e3a13b4dea]:


2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127

2128
2129
2130
2131
2132
2133
2134
					       ;; at least use 1
	 (loadjmp (- first (if (> next (* numcpus 0.7)) ;; could do something with average of first and next?
			       0
			       next))) ;; we will force a conservative calculation any time next is large.
	 (first-next-avg    (/ (+ first next) 2))
	 ;; add some randomness to the time to break any alignment
	 ;; where netbatch dumps many jobs to machines simultaneously
         (adjwait (min (+ 300 (random 10)) (abs (* (+ (random 10)
						      (/ (- 1000 count) 10)
						      waitdelay)
						   (- first adjmaxload) ))  )))

    ;; let's let the user know once in a long while that load checking
    ;; is happening but not constantly report it
    (if (common:low-noise-print 30 (conc "cpuload" (or remote-host "localhost"))) ;; (> (random 100) 75) ;; about 25% of the time
	(debug:print-info 1 *default-log-port* "Checking cpuload on " (or remote-host "localhost") ", maxload: " maxload
			  ", load: " first ", adjmaxload: " adjmaxload ", loadjmp: " loadjmp))
    (cond
     ((and (< first 0) ;; this indicates the loadavg data is bad - machine may not be reachable







|
|
|
|
>







2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
					       ;; at least use 1
	 (loadjmp (- first (if (> next (* numcpus 0.7)) ;; could do something with average of first and next?
			       0
			       next))) ;; we will force a conservative calculation any time next is large.
	 (first-next-avg    (/ (+ first next) 2))
	 ;; add some randomness to the time to break any alignment
	 ;; where netbatch dumps many jobs to machines simultaneously
         (adjwait           (min (+ 300 (random 10)) (abs (* (+ (random 10)
								(/ (- 1000 count) 10)
								waitdelay)
							     (- first adjmaxload) ))))
	 (load-jump-limit   (configf:lookup-number *configdat* "setup" "load-jump-limit")))
    ;; let's let the user know once in a long while that load checking
    ;; is happening but not constantly report it
    (if (common:low-noise-print 30 (conc "cpuload" (or remote-host "localhost"))) ;; (> (random 100) 75) ;; about 25% of the time
	(debug:print-info 1 *default-log-port* "Checking cpuload on " (or remote-host "localhost") ", maxload: " maxload
			  ", load: " first ", adjmaxload: " adjmaxload ", loadjmp: " loadjmp))
    (cond
     ((and (< first 0) ;; this indicates the loadavg data is bad - machine may not be reachable
2144
2145
2146
2147
2148
2149
2150

2151
2152
2153
2154
2155
2156
2157
2158
2159
			" seconds due to load " first
			" exceeding max of " adjmaxload
			" on server " (or remote-host (get-host-name))
			" (normalized load-limit: " maxload ") " (if msg msg ""))
      (thread-sleep! adjwait)
      (common:wait-for-cpuload maxload numcpus waitdelay count: (- count 1) msg: msg remote-host: remote-host))
     ((and (> loadjmp (cond

		       ((> numcpus 8)(/ numcpus 4))
		       ((> numcpus 4)(/ numcpus 2))
		       (else 0.5)))
	   (> count 0))
      (debug:print-info 0 *default-log-port* "waiting " adjwait " seconds due to possible load jump " loadjmp ". "
			(if msg msg ""))
      (thread-sleep! adjwait)
      (common:wait-for-cpuload maxload numcpus waitdelay count: (- count 1) msg: msg remote-host: remote-host))
     (else







>
|
|







2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
			" seconds due to load " first
			" exceeding max of " adjmaxload
			" on server " (or remote-host (get-host-name))
			" (normalized load-limit: " maxload ") " (if msg msg ""))
      (thread-sleep! adjwait)
      (common:wait-for-cpuload maxload numcpus waitdelay count: (- count 1) msg: msg remote-host: remote-host))
     ((and (> loadjmp (cond
		       (load-jump-limit load-jump-limit)
		       ((> numcpus 8)(/ numcpus 2))
		       ((> numcpus 4)(/ numcpus 1.2))
		       (else 0.5)))
	   (> count 0))
      (debug:print-info 0 *default-log-port* "waiting " adjwait " seconds due to possible load jump " loadjmp ". "
			(if msg msg ""))
      (thread-sleep! adjwait)
      (common:wait-for-cpuload maxload numcpus waitdelay count: (- count 1) msg: msg remote-host: remote-host))
     (else