Megatest

Diff
Login

Differences From Artifact [bc77148d79]:

To Artifact [e3a13b4dea]:


2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127





2128
2129
2130
2131
2132
2133
2134
2117
2118
2119
2120
2121
2122
2123




2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135







-
-
-
-
+
+
+
+
+







					       ;; at least use 1
	 (loadjmp (- first (if (> next (* numcpus 0.7)) ;; could do something with average of first and next?
			       0
			       next))) ;; we will force a conservative calculation any time next is large.
	 (first-next-avg    (/ (+ first next) 2))
	 ;; add some randomness to the time to break any alignment
	 ;; where netbatch dumps many jobs to machines simultaneously
         (adjwait (min (+ 300 (random 10)) (abs (* (+ (random 10)
						      (/ (- 1000 count) 10)
						      waitdelay)
						   (- first adjmaxload) ))  )))
         (adjwait           (min (+ 300 (random 10)) (abs (* (+ (random 10)
								(/ (- 1000 count) 10)
								waitdelay)
							     (- first adjmaxload) ))))
	 (load-jump-limit   (configf:lookup-number *configdat* "setup" "load-jump-limit")))
    ;; let's let the user know once in a long while that load checking
    ;; is happening but not constantly report it
    (if (common:low-noise-print 30 (conc "cpuload" (or remote-host "localhost"))) ;; (> (random 100) 75) ;; about 25% of the time
	(debug:print-info 1 *default-log-port* "Checking cpuload on " (or remote-host "localhost") ", maxload: " maxload
			  ", load: " first ", adjmaxload: " adjmaxload ", loadjmp: " loadjmp))
    (cond
     ((and (< first 0) ;; this indicates the loadavg data is bad - machine may not be reachable
2144
2145
2146
2147
2148
2149
2150

2151
2152


2153
2154
2155
2156
2157
2158
2159
2145
2146
2147
2148
2149
2150
2151
2152


2153
2154
2155
2156
2157
2158
2159
2160
2161







+
-
-
+
+







			" seconds due to load " first
			" exceeding max of " adjmaxload
			" on server " (or remote-host (get-host-name))
			" (normalized load-limit: " maxload ") " (if msg msg ""))
      (thread-sleep! adjwait)
      (common:wait-for-cpuload maxload numcpus waitdelay count: (- count 1) msg: msg remote-host: remote-host))
     ((and (> loadjmp (cond
		       (load-jump-limit load-jump-limit)
		       ((> numcpus 8)(/ numcpus 4))
		       ((> numcpus 4)(/ numcpus 2))
		       ((> numcpus 8)(/ numcpus 2))
		       ((> numcpus 4)(/ numcpus 1.2))
		       (else 0.5)))
	   (> count 0))
      (debug:print-info 0 *default-log-port* "waiting " adjwait " seconds due to possible load jump " loadjmp ". "
			(if msg msg ""))
      (thread-sleep! adjwait)
      (common:wait-for-cpuload maxload numcpus waitdelay count: (- count 1) msg: msg remote-host: remote-host))
     (else