Megatest

Check-in [5f418512e9]
Login
Overview
Comment:Trimmed more delays
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | development
Files: files | file ages | folders
SHA1: 5f418512e96d476781768cc499adda93c3e2a955
User & Date: matt on 2013-04-17 00:00:00
Other Links: branch diff | manifest | tags
Context
2013-04-17
08:51
bumped version check-in: f8584f2d62 user: icfadm tags: development, v1.5413
00:00
Trimmed more delays check-in: 5f418512e9 user: matt tags: development
2013-04-16
22:34
Cut back growth rate on the can-run-more-tests-delay check-in: ef113f8fda user: matt tags: development
Changes

Modified db.scm from [1adaf7b548] to [597b101439].

1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440

    (debug:print-info 7 "Current write queue length is " queue-len)

    ;; poll for the write to complete, timeout after 10 seconds
    ;; periodic flushing of the queue is taken care of by 
    ;; db:flush-queue
    (let loop ()
      (thread-sleep! 0.002)
      (mutex-lock! *completed-mutex*)
      (if (hash-table-ref/default *completed-writes* qry-sig #f)
	  (begin
	    (hash-table-delete! *completed-writes* qry-sig)
	    (set! got-it #t)))
      (mutex-unlock! *completed-mutex*)
      (if (and (not got-it)







|







1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440

    (debug:print-info 7 "Current write queue length is " queue-len)

    ;; poll for the write to complete, timeout after 10 seconds
    ;; periodic flushing of the queue is taken care of by 
    ;; db:flush-queue
    (let loop ()
      (thread-sleep! 0.001)
      (mutex-lock! *completed-mutex*)
      (if (hash-table-ref/default *completed-writes* qry-sig #f)
	  (begin
	    (hash-table-delete! *completed-writes* qry-sig)
	    (set! got-it #t)))
      (mutex-unlock! *completed-mutex*)
      (if (and (not got-it)

Modified megatest.scm from [33113834d2] to [f7af8ee8db].

31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
(include "common_records.scm")
(include "key_records.scm")
(include "db_records.scm")
(include "megatest-fossil-hash.scm")

;; (use trace dot-locking)
;; (trace
;;  thread-sleep!
;;  sqlite3:execute
;;  sqlite3:for-each-row
;;  open-run-close
;;  runs:can-run-more-tests
;;  cdb:remote-run
;;  nice-path
;;  read-config







|







31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
(include "common_records.scm")
(include "key_records.scm")
(include "db_records.scm")
(include "megatest-fossil-hash.scm")

;; (use trace dot-locking)
;; (trace
;;   thread-sleep!
;;  sqlite3:execute
;;  sqlite3:for-each-row
;;  open-run-close
;;  runs:can-run-more-tests
;;  cdb:remote-run
;;  nice-path
;;  read-config

Modified runs.scm from [f326c575c1] to [f136285a97].

136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
	 (num-running-in-jobgroup (cdb:remote-run db:get-count-tests-running-in-jobgroup #f jobgroup))
	 (max-concurrent-jobs     (let ((mcj (config-lookup *configdat* "setup"     "max_concurrent_jobs")))
				    (if (and mcj (string->number mcj))
					(string->number mcj)
					1)))
	 (job-group-limit         (config-lookup *configdat* "jobgroups" jobgroup)))
    (if (and (> (+ num-running num-running-in-jobgroup) 0)
	     (< *runs:can-run-more-tests-delay* 2))
	(begin
	  (set! *runs:can-run-more-tests-delay* (+ *runs:can-run-more-tests-delay* 0.01))
	  (debug:print-info 14 "can-run-more-tests-delay: " *runs:can-run-more-tests-delay*)))
    (if (not (eq? *last-num-running-tests* num-running))
	(begin
	  (debug:print 2 "max-concurrent-jobs: " max-concurrent-jobs ", num-running: " num-running)
	  (set! *last-num-running-tests* num-running)))
    (if (not (eq? 0 *globalexitstatus*))
	(list #f num-running num-running-in-jobgroup max-concurrent-jobs job-group-limit)







|

|







136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
	 (num-running-in-jobgroup (cdb:remote-run db:get-count-tests-running-in-jobgroup #f jobgroup))
	 (max-concurrent-jobs     (let ((mcj (config-lookup *configdat* "setup"     "max_concurrent_jobs")))
				    (if (and mcj (string->number mcj))
					(string->number mcj)
					1)))
	 (job-group-limit         (config-lookup *configdat* "jobgroups" jobgroup)))
    (if (and (> (+ num-running num-running-in-jobgroup) 0)
	     (< *runs:can-run-more-tests-delay* 1))
	(begin
	  (set! *runs:can-run-more-tests-delay* (+ *runs:can-run-more-tests-delay* 0.009))
	  (debug:print-info 14 "can-run-more-tests-delay: " *runs:can-run-more-tests-delay*)))
    (if (not (eq? *last-num-running-tests* num-running))
	(begin
	  (debug:print 2 "max-concurrent-jobs: " max-concurrent-jobs ", num-running: " num-running)
	  (set! *last-num-running-tests* num-running)))
    (if (not (eq? 0 *globalexitstatus*))
	(list #f num-running num-running-in-jobgroup max-concurrent-jobs job-group-limit)
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
			  (setenv "MT_TEST_NAME" test-name) ;; 
			  (setenv "MT_RUNNAME"   runname)
			  (set-megatest-env-vars run-id) ;; these may be needed by the launching process
			  (let ((items-list (items:get-items-from-config tconfig)))
			    (if (list? items-list)
				(begin
				  (tests:testqueue-set-items! test-record items-list)
				  (thread-sleep! *global-delta*)
				  (loop hed tal reruns))
				(begin
				  (debug:print 0 "ERROR: The proc from reading the setup did not yield a list - please report this")
				  (exit 1))))))
		       ((null? fails)
			(debug:print-info 4 "fails is null, moving on in the queue but keeping " hed " for now")
			;; only increment num-retries when there are no tests runing







|







543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
			  (setenv "MT_TEST_NAME" test-name) ;; 
			  (setenv "MT_RUNNAME"   runname)
			  (set-megatest-env-vars run-id) ;; these may be needed by the launching process
			  (let ((items-list (items:get-items-from-config tconfig)))
			    (if (list? items-list)
				(begin
				  (tests:testqueue-set-items! test-record items-list)
				  ;; (thread-sleep! *global-delta*)
				  (loop hed tal reruns))
				(begin
				  (debug:print 0 "ERROR: The proc from reading the setup did not yield a list - please report this")
				  (exit 1))))))
		       ((null? fails)
			(debug:print-info 4 "fails is null, moving on in the queue but keeping " hed " for now")
			;; only increment num-retries when there are no tests runing
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
			    (loop (car newtal)(cdr newtal) reruns))) ;; an issue with prereqs not yet met?
		       ((and (not (null? fails))(eq? testmode 'normal))
			(debug:print-info 1 "test "  hed " (mode=" testmode ") has failed prerequisite(s); "
				     (string-intersperse (map (lambda (t)(conc (db:test-get-testname t) ":" (db:test-get-state t)"/"(db:test-get-status t))) fails) ", ")
				     ", removing it from to-do list")
			(if (not (null? tal))
			    (begin
			      (thread-sleep! *global-delta*)
			      (loop (car tal)(cdr tal)(cons hed reruns)))))
		       (else
			(debug:print 8 "ERROR: No handler for this condition.")
			(thread-sleep! (+ 1 *global-delta*))
			(loop (car newtal)(cdr newtal) reruns)))) ;; END OF IF CAN RUN MORE

		    ;; if can't run more just loop with next possible test
		    (begin
		      (debug:print-info 4 "processing the case with a lambda for items or 'have-procedure. Moving through the queue without dropping " hed)
		      (thread-sleep! (+ 2 *global-delta*))
		      (loop (car newtal)(cdr newtal) reruns))))) ;; END OF (or (procedure? items)(eq? items 'have-procedure))
	     
	     ;; this case should not happen, added to help catch any bugs
	     ((and (list? items) itemdat)
	      (debug:print 0 "ERROR: Should not have a list of items in a test and the itemspath set - please report this")
	      (exit 1))
	     ((not (null? reruns))
	      (let* ((newlst (tests:filter-non-runnable run-id tal test-records)) ;; i.e. not FAIL, WAIVED, INCOMPLETE, PASS, KILLED,
		     (junked (lset-difference equal? tal newlst)))
		(debug:print-info 4 "full drop through, if reruns is less than 100 we will force retry them, reruns=" reruns ", tal=" tal)
		(if (< num-retries max-retries)
		    (set! newlst (append reruns newlst)))
		(set! num-retries (+ num-retries 1))
		(thread-sleep! (+ 1 *global-delta*))
		(if (not (null? newlst))
		    ;; since reruns have been tacked on to newlst create new reruns from junked
		    (loop (car newlst)(cdr newlst)(delete-duplicates junked)))))
	     ((not (null? tal))
	      (debug:print-info 4 "I'm pretty sure I shouldn't get here."))
	     (else
	      (debug:print-info 4 "Exiting loop with...\n  hed=" hed "\n  tal=" tal "\n  reruns=" reruns))







|









|













|







567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
			    (loop (car newtal)(cdr newtal) reruns))) ;; an issue with prereqs not yet met?
		       ((and (not (null? fails))(eq? testmode 'normal))
			(debug:print-info 1 "test "  hed " (mode=" testmode ") has failed prerequisite(s); "
				     (string-intersperse (map (lambda (t)(conc (db:test-get-testname t) ":" (db:test-get-state t)"/"(db:test-get-status t))) fails) ", ")
				     ", removing it from to-do list")
			(if (not (null? tal))
			    (begin
                              ;; (thread-sleep! *global-delta*)
			      (loop (car tal)(cdr tal)(cons hed reruns)))))
		       (else
			(debug:print 8 "ERROR: No handler for this condition.")
			(thread-sleep! (+ 1 *global-delta*))
			(loop (car newtal)(cdr newtal) reruns)))) ;; END OF IF CAN RUN MORE

		    ;; if can't run more just loop with next possible test
		    (begin
		      (debug:print-info 4 "processing the case with a lambda for items or 'have-procedure. Moving through the queue without dropping " hed)
		      ;; (thread-sleep! (+ 2 *global-delta*))
		      (loop (car newtal)(cdr newtal) reruns))))) ;; END OF (or (procedure? items)(eq? items 'have-procedure))
	     
	     ;; this case should not happen, added to help catch any bugs
	     ((and (list? items) itemdat)
	      (debug:print 0 "ERROR: Should not have a list of items in a test and the itemspath set - please report this")
	      (exit 1))
	     ((not (null? reruns))
	      (let* ((newlst (tests:filter-non-runnable run-id tal test-records)) ;; i.e. not FAIL, WAIVED, INCOMPLETE, PASS, KILLED,
		     (junked (lset-difference equal? tal newlst)))
		(debug:print-info 4 "full drop through, if reruns is less than 100 we will force retry them, reruns=" reruns ", tal=" tal)
		(if (< num-retries max-retries)
		    (set! newlst (append reruns newlst)))
		(set! num-retries (+ num-retries 1))
		;; (thread-sleep! (+ 1 *global-delta*))
		(if (not (null? newlst))
		    ;; since reruns have been tacked on to newlst create new reruns from junked
		    (loop (car newlst)(cdr newlst)(delete-duplicates junked)))))
	     ((not (null? tal))
	      (debug:print-info 4 "I'm pretty sure I shouldn't get here."))
	     (else
	      (debug:print-info 4 "Exiting loop with...\n  hed=" hed "\n  tal=" tal "\n  reruns=" reruns))

Modified server.scm from [2c2aefbe73] to [a854b85066].

77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
	    (set! last-write-flush-time *server:last-write-flush*)
	    (mutex-unlock! *incoming-mutex*)
	    (if (> (- (current-milliseconds) last-write-flush-time) 10)
		(begin
		  (mutex-lock! *db:process-queue-mutex*)
		  (db:process-cached-writes db)
		  (mutex-unlock! *db:process-queue-mutex*)
		  (thread-sleep! 0.01))))
	  (loop)))
      (begin
	(debug:print 0 "ERROR: failed to setup for Megatest in server:write-queue-handler")
	(exit 1))))
    
;;======================================================================
;; S E R V E R   U T I L I T I E S 







|







77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
	    (set! last-write-flush-time *server:last-write-flush*)
	    (mutex-unlock! *incoming-mutex*)
	    (if (> (- (current-milliseconds) last-write-flush-time) 10)
		(begin
		  (mutex-lock! *db:process-queue-mutex*)
		  (db:process-cached-writes db)
		  (mutex-unlock! *db:process-queue-mutex*)
		  (thread-sleep! 0.005))))
	  (loop)))
      (begin
	(debug:print 0 "ERROR: failed to setup for Megatest in server:write-queue-handler")
	(exit 1))))
    
;;======================================================================
;; S E R V E R   U T I L I T I E S 

Modified tests/fdktestqa/testqa/tests/bigrun/testconfig from [b5be798984] to [25b4432948].

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
# Add additional steps here. Format is "stepname script"
[ezsteps]
step1 step1.sh

# Test requirements are specified here
[requirements]
# waiton setup
priority 0

# Iteration for your tests are controlled by the items section
[items]
NUMBER #{scheme (string-intersperse (map number->string (sort (let loop ((a 0)(res '()))(if (< a 500)(loop (+ a 1)(cons a res)) res)) >)) " ")}

# test_meta is a section for storing additional data on your test
[test_meta]
author matt
owner  matt
description An example test
tags tagone,tagtwo











|







1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
# Add additional steps here. Format is "stepname script"
[ezsteps]
step1 step1.sh

# Test requirements are specified here
[requirements]
# waiton setup
priority 0

# Iteration for your tests are controlled by the items section
[items]
NUMBER #{scheme (string-intersperse (map number->string (sort (let loop ((a 0)(res '()))(if (< a 120)(loop (+ a 1)(cons a res)) res)) >)) " ")}

# test_meta is a section for storing additional data on your test
[test_meta]
author matt
owner  matt
description An example test
tags tagone,tagtwo