Megatest

Check-in [64d9352976]
Login
Overview
Comment:Add a little randomness
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | v1.65
Files: files | file ages | folders
SHA1: 64d93529768e368d3dbf59e3da39a1750ab556bb
User & Date: mrwellan on 2018-02-02 16:41:26
Other Links: branch diff | manifest | tags
Context
2018-02-03
22:11
Added general small data caching mechanism and setup for caching normalized cpu load check-in: cd9d9347c2 user: matt tags: v1.65
2018-02-02
16:41
Add a little randomness check-in: 64d9352976 user: mrwellan tags: v1.65
15:13
Use a different variable to control starting up servers on overloaded hosts. check-in: 30b0302862 user: mrwellan tags: v1.65
Changes

Modified common.scm from [642e988dd2] to [756b082c80].

1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
		      (common:get-num-cpus remote-host)
		      numcpus-in))
	 (maxload (max maxload-in 0.5)) ;; so maxload must be greater than 0.5 for now BUG - FIXME?
	 (first   (car loadavg))
	 (next    (cadr loadavg))
	 (adjload (* maxload (max 1 numcpus))) ;; possible bug where numcpus (or could be maxload) is zero, crude fallback is to at least use 1
	 (loadjmp (- first next))
         (adjwait (+ (/ (- 1000 count) 10) waitdelay)))
    (cond
     ((and (> first adjload)
	   (> count 0))
      (debug:print-info 0 *default-log-port* "server start delayed " adjwait " seconds due to load " first " exceeding max of " adjload " on server " (or remote-host (get-host-name)) " (normalized load-limit: " maxload ") " (if msg msg ""))
      (thread-sleep! adjwait)
      (common:wait-for-cpuload maxload numcpus waitdelay count: (- count 1) msg: msg remote-host: remote-host))
     ((and (> loadjmp numcpus)







|







1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
		      (common:get-num-cpus remote-host)
		      numcpus-in))
	 (maxload (max maxload-in 0.5)) ;; so maxload must be greater than 0.5 for now BUG - FIXME?
	 (first   (car loadavg))
	 (next    (cadr loadavg))
	 (adjload (* maxload (max 1 numcpus))) ;; possible bug where numcpus (or could be maxload) is zero, crude fallback is to at least use 1
	 (loadjmp (- first next))
         (adjwait (+ (random 10)(/ (- 1000 count) 10) waitdelay))) ;; add some randomness to the time to break any alignment where netbatch dumps many jobs to machines simultaneously
    (cond
     ((and (> first adjload)
	   (> count 0))
      (debug:print-info 0 *default-log-port* "server start delayed " adjwait " seconds due to load " first " exceeding max of " adjload " on server " (or remote-host (get-host-name)) " (normalized load-limit: " maxload ") " (if msg msg ""))
      (thread-sleep! adjwait)
      (common:wait-for-cpuload maxload numcpus waitdelay count: (- count 1) msg: msg remote-host: remote-host))
     ((and (> loadjmp numcpus)

Modified tests/fdktestqa/fdk.config from [5922f41e2b] to [42bf4ada3c].

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
[fields]
SYSTEM TEXT
RELEASE TEXT

[setup]
# Adjust max_concurrent_jobs to limit how much you load your machines
# max_concurrent_jobs 150
max_concurrent_jobs 1000

# This is your link path, you can move it but it is generally better to keep it stable
linktree #{shell readlink -f #{getenv MT_RUN_AREA_HOME}/../simplelinks}

[include testqa/configs/megatest.abc.config]

[jobtools]







|







1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
[fields]
SYSTEM TEXT
RELEASE TEXT

[setup]
# Adjust max_concurrent_jobs to limit how much you load your machines
# max_concurrent_jobs 150
max_concurrent_jobs 3000

# This is your link path, you can move it but it is generally better to keep it stable
linktree #{shell readlink -f #{getenv MT_RUN_AREA_HOME}/../simplelinks}

[include testqa/configs/megatest.abc.config]

[jobtools]

Modified tests/fdktestqa/testqa/Makefile from [b03429fb6d] to [d6828dd3fb].

1
2
3
4
5
6
7

8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
BINDIR    = $(PWD)/../../../bin
PATH     := $(BINDIR):$(PATH)
MEGATEST  = $(BINDIR)/megatest
DASHBOARD = $(BINDIR)/dashboard
NEWDASHBOARD = $(BINDIR)/newdashboard
RUNNAME   = a
NUMTESTS  = 20


all :
	$(MEGATEST) -remove-runs -target a/b -runname c -testpatt %/%
	$(MEGATEST) -run -testpatt % -target a/b -runname c

bigbig :
	for tn in a b c d;do \
	   (NUMTESTS=1000 $(MEGATEST) -run -testpatt % -target a/b -runname $$tn & ) ; \
	done

waitonpatt :
	megatest -remove-runs -runname waitonpatt -target a/b -testpatt %
	NUMTESTS=15 megatest -run -target a/b -runname waitonpatt -testpatt bigrun3/%8

waitonall :
	megatest -remove-runs -runname waitonall -target a/b -testpatt %
	NUMTESTS=20 megatest -run -target a/b -runname waitonall -testpatt alltop

bigrun :







>







|



|







1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
BINDIR    = $(PWD)/../../../bin
PATH     := $(BINDIR):$(PATH)
MEGATEST  = $(BINDIR)/megatest
DASHBOARD = $(BINDIR)/dashboard
NEWDASHBOARD = $(BINDIR)/newdashboard
RUNNAME   = a
NUMTESTS  = 20
SUBTARG = b

all :
	$(MEGATEST) -remove-runs -target a/b -runname c -testpatt %/%
	$(MEGATEST) -run -testpatt % -target a/b -runname c

bigbig :
	for tn in a b c d;do \
	   (NUMTESTS=1000 $(MEGATEST) -run -testpatt % -target a/$(SUBTARG) -runname $$tn & ) ; \
	done

waitonpatt :
	megatest -remove-runs -runname waitonpatt -target a/$(SUBTARG) -testpatt %
	NUMTESTS=15 megatest -run -target a/b -runname waitonpatt -testpatt bigrun3/%8

waitonall :
	megatest -remove-runs -runname waitonall -target a/b -testpatt %
	NUMTESTS=20 megatest -run -target a/b -runname waitonall -testpatt alltop

bigrun :