Megatest

Check-in [a0d9704d2f]
Login
Overview
Comment:Moved delay in can-run-more-tests out of the main path
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | v1.60
Files: files | file ages | folders
SHA1: a0d9704d2fcac7b0596e7f91104e5fae3e44cbb3
User & Date: mrwellan on 2015-03-04 08:41:07
Other Links: branch diff | manifest | tags
Context
2015-03-09
05:19
Merging better-html-update-control to v1.60 check-in: bd663b5d13 user: matt tags: v1.60
05:18
Attempt to improve server recovery in bad situations Closed-Leaf check-in: 4125fe9193 user: matt tags: server-robustness
2015-03-04
18:42
Better method for locking on html update check-in: 150c672a53 user: mrwellan tags: better-html-update-control
08:41
Moved delay in can-run-more-tests out of the main path check-in: a0d9704d2f user: mrwellan tags: v1.60
06:55
Merged in the static-html branch check-in: 16e4ac3a73 user: mrwellan tags: v1.60
Changes

Modified runs.scm from [7e3be6327e] to [61baf1971a].

160
161
162
163
164
165
166
167



168
169
170
171
172
173
174
	(begin
	  (hash-table-set! *runs:denoise* key currtime)
	  #t)
	#f)))

(define (runs:can-run-more-tests run-id jobgroup max-concurrent-jobs)
  (thread-sleep! (cond
        	  ((> *runs:can-run-more-tests-count* 20) 2);; obviously haven't had any work to do for a while



        	  (else 0)))
  (let* ((num-running             (rmt:get-count-tests-running run-id))
	 (num-running-in-jobgroup (rmt:get-count-tests-running-in-jobgroup run-id jobgroup))
	 (job-group-limit         (let ((jobg-count (config-lookup *configdat* "jobgroups" jobgroup)))
				    (if (string? jobg-count)
					(string->number jobg-count)
					jobg-count))))







|
>
>
>







160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
	(begin
	  (hash-table-set! *runs:denoise* key currtime)
	  #t)
	#f)))

(define (runs:can-run-more-tests run-id jobgroup max-concurrent-jobs)
  (thread-sleep! (cond
        	  ((> *runs:can-run-more-tests-count* 20)
		   (if (runs:lownoise "waiting on tasks" 60)
		       (debug:print-info 2 "waiting for tasks to complete, sleeping briefly ..."))
		   2);; obviously haven't had any work to do for a while
        	  (else 0)))
  (let* ((num-running             (rmt:get-count-tests-running run-id))
	 (num-running-in-jobgroup (rmt:get-count-tests-running-in-jobgroup run-id jobgroup))
	 (job-group-limit         (let ((jobg-count (config-lookup *configdat* "jobgroups" jobgroup)))
				    (if (string? jobg-count)
					(string->number jobg-count)
					jobg-count))))
689
690
691
692
693
694
695
696
697
698
699


700
701
702
703
704
705
706
707
     ;; Register tests 
     ;;
     ((not (hash-table-ref/default test-registry (db:test-make-full-name test-name item-path) #f))
      (debug:print-info 4 "Pre-registering test " test-name "/" item-path " to create placeholder" )
      ;; always do firm registration now in v1.60 and greater ;; (eq? *transport-type* 'fs) ;; no point in parallel registration if use fs
      (let register-loop ((numtries 15))
	(rmt:general-call 'register-test run-id run-id test-name item-path)
	(thread-sleep! 0.5)
	(if (rmt:get-test-id run-id test-name item-path)
	    (hash-table-set! test-registry (db:test-make-full-name test-name item-path) 'done)
	    (if (> numtries 0)


		(register-loop (- numtries 1))
		(debug:print 0 "ERROR: failed to register test " (db:test-make-full-name test-name item-path)))))
      (if (not (eq? (hash-table-ref/default test-registry (db:test-make-full-name test-name "") #f) 'done))
	  (begin
	    (rmt:general-call 'register-test run-id run-id test-name "")
	    (if (rmt:get-test-id run-id test-name "")
		(hash-table-set! test-registry (db:test-make-full-name test-name "") 'done))))
      (runs:shrink-can-run-more-tests-count)   ;; DELAY TWEAKER (still needed?)







<



>
>
|







692
693
694
695
696
697
698

699
700
701
702
703
704
705
706
707
708
709
710
711
     ;; Register tests 
     ;;
     ((not (hash-table-ref/default test-registry (db:test-make-full-name test-name item-path) #f))
      (debug:print-info 4 "Pre-registering test " test-name "/" item-path " to create placeholder" )
      ;; always do firm registration now in v1.60 and greater ;; (eq? *transport-type* 'fs) ;; no point in parallel registration if use fs
      (let register-loop ((numtries 15))
	(rmt:general-call 'register-test run-id run-id test-name item-path)

	(if (rmt:get-test-id run-id test-name item-path)
	    (hash-table-set! test-registry (db:test-make-full-name test-name item-path) 'done)
	    (if (> numtries 0)
		(begin
		  (thread-sleep! 0.5)
		  (register-loop (- numtries 1)))
		(debug:print 0 "ERROR: failed to register test " (db:test-make-full-name test-name item-path)))))
      (if (not (eq? (hash-table-ref/default test-registry (db:test-make-full-name test-name "") #f) 'done))
	  (begin
	    (rmt:general-call 'register-test run-id run-id test-name "")
	    (if (rmt:get-test-id run-id test-name "")
		(hash-table-set! test-registry (db:test-make-full-name test-name "") 'done))))
      (runs:shrink-can-run-more-tests-count)   ;; DELAY TWEAKER (still needed?)
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
	   'start)
      (debug:print-info 0 "Waiting on test registration(s): "
			(string-intersperse 
			 (filter (lambda (x)
				   (eq? (hash-table-ref/default test-registry x #f) 'start))
				 (hash-table-keys test-registry))
			 ", "))
      (thread-sleep! 0.1)
      (list hed tal reg reruns))
     
     ;; If no resources are available just kill time and loop again
     ;;
     ((not have-resources) ;; simply try again after waiting a second
      (if (runs:lownoise "no resources" 60)
	  (debug:print-info 1 "no resources to run new tests, waiting ..."))







|







726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
	   'start)
      (debug:print-info 0 "Waiting on test registration(s): "
			(string-intersperse 
			 (filter (lambda (x)
				   (eq? (hash-table-ref/default test-registry x #f) 'start))
				 (hash-table-keys test-registry))
			 ", "))
      (thread-sleep! 0.051)
      (list hed tal reg reruns))
     
     ;; If no resources are available just kill time and loop again
     ;;
     ((not have-resources) ;; simply try again after waiting a second
      (if (runs:lownoise "no resources" 60)
	  (debug:print-info 1 "no resources to run new tests, waiting ..."))