Megatest

Diff
Login

Differences From Artifact [8568995d03]:

To Artifact [0fc56a6830]:


460
461
462
463
464
465
466


467
468
469
470
471
472
473
	      (debug:print-info 0 *default-log-port* "running run-post-hook: \"" run-post-hook "\", log is " actual-logf)
	      (system (conc run-post-hook " >> " actual-logf " 2>&1"))
	      (debug:print-info 0 *default-log-port* "post-hook \"" run-post-hook "\" took " (- (current-seconds) start-time) " seconds to run."))))))

;; return #t when all items in waitors-upon list are represented in test-patt, #f otherwise.
(define (runs:testpatts-mention-waitors-upon? test-patt waitors-upon)
  (null? (tests:filter-test-names-not-matched waitors-upon test-patt)))



;;======================================================================
;; runs:run-tests is called from megatest.scm and itself
;;======================================================================
;;
;;  test-names: Comma separated patterns same as test-patts but used in selection 
;;              of tests to run. The item portions are not respected.







>
>







460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
	      (debug:print-info 0 *default-log-port* "running run-post-hook: \"" run-post-hook "\", log is " actual-logf)
	      (system (conc run-post-hook " >> " actual-logf " 2>&1"))
	      (debug:print-info 0 *default-log-port* "post-hook \"" run-post-hook "\" took " (- (current-seconds) start-time) " seconds to run."))))))

;; return #t when all items in waitors-upon list are represented in test-patt, #f otherwise.
(define (runs:testpatts-mention-waitors-upon? test-patt waitors-upon)
  (null? (tests:filter-test-names-not-matched waitors-upon test-patt)))

(define *find-and-mark-incomplete-last-run* (make-hash-table))

;;======================================================================
;; runs:run-tests is called from megatest.scm and itself
;;======================================================================
;;
;;  test-names: Comma separated patterns same as test-patts but used in selection 
;;              of tests to run. The item portions are not respected.
770
771
772
773
774
775
776
777
778





779
780
781
782
783
784
785
786
					  "runs:run-tests-queue"))
		 (th2        (make-thread (lambda ()			 ;; BBQ: why are we visiting ALL runs here?	    
					    ;; (rmt:find-and-mark-incomplete-all-runs))))) CAN'T INTERRUPT IT ...
					    (let ((run-ids (rmt:get-all-run-ids)))
					      (for-each (lambda (run-id)
							  (if keep-going
							      (handle-exceptions
							       exn
							       (debug:print 0 *default-log-port* "error in calling find-and-mark-incomplete for run-id " run-id ", exn=" exn)





							       (rmt:find-and-mark-incomplete run-id #f)))) ;; ovr-deadtime))) ;; could be root of https://hsdes.intel.com/appstore/article/#/220546828/main -- Title: Megatest jobs show DEAD even though they are still running (1.64/27)
							run-ids)))
					  "runs: mark-incompletes")))
	    ;; (thread-start! th1)
	    (thread-start! th2)
	    ;; (thread-join! th1)
	    ;; just do the main stuff in the main thread
	    (runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests







|
|
>
>
>
>
>
|







772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
					  "runs:run-tests-queue"))
		 (th2        (make-thread (lambda ()			 ;; BBQ: why are we visiting ALL runs here?	    
					    ;; (rmt:find-and-mark-incomplete-all-runs))))) CAN'T INTERRUPT IT ...
					    (let ((run-ids (rmt:get-all-run-ids)))
					      (for-each (lambda (run-id)
							  (if keep-going
							      (handle-exceptions
								  exn
								  (debug:print 0 *default-log-port* "error in calling find-and-mark-incomplete for run-id " run-id ", exn=" exn)
								;; lets run this only if a run has been NOT seen for more than 900 seconds
								(if (> (- (current-seconds)(hash-table-ref/default *find-and-mark-incomplete-last-run* run-id 0)) 900)
								    (begin
								      (rmt:find-and-mark-incomplete run-id #f)
								      (hash-table-set! *find-and-mark-incomplete-last-run* run-id (current-seconds)))
								    )))) ;; ovr-deadtime))) ;; could be root of https://hsdes.intel.com/appstore/article/#/220546828/main -- Title: Megatest jobs show DEAD even though they are still running (1.64/27)
							run-ids)))
					  "runs: mark-incompletes")))
	    ;; (thread-start! th1)
	    (thread-start! th2)
	    ;; (thread-join! th1)
	    ;; just do the main stuff in the main thread
	    (runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests
1868
1869
1870
1871
1872
1873
1874

1875
1876
1877
1878
1879
1880

1881
1882
1883
1884
1885
1886
1887
      ;; (debug:print-info 13 *default-log-port* "num-running=" num-running ", prev-num-running=" prev-num-running)
      (if (and (or (args:get-arg "-run-wait")
		   (equal? (configf:lookup *configdat* "setup" "run-wait") "yes"))
	       (> num-running 0))
	  (begin
	    ;; Here we mark any old defunct tests as incomplete. Do this every fifteen minutes
	    ;; (debug:print 0 *default-log-port* "Got here eh! num-running=" num-running " (> num-running 0) " (> num-running 0))

	    (if (> (current-seconds)(+ last-time-incomplete 900))
		(let ((actual-num-running (rmt:get-count-tests-running-for-run-id run-id)))
		  (debug:print-info 0 *default-log-port* "Marking stuck tests as INCOMPLETE while waiting for run " run-id
				    ". Running as pid " (current-process-id) " on " (get-host-name))
		  (set! last-time-incomplete (current-seconds)) ;; FIXME, this might be causing slow down - use of set!
		  (rmt:find-and-mark-incomplete run-id #f)

		  (debug:print-info 0 *default-log-port* "run-wait specified, waiting on " actual-num-running
				    " tests in RUNNING, REMOTEHOSTSTART or LAUNCHED state at "
				    (time->string (seconds->local-time (current-seconds))))))
	    ;; (if (runs:dat-load-mgmt-function runsdat)((runs:dat-load-mgmt-function runsdat)))
	    (thread-sleep! 5) ;; (if (>= num-running max-concurrent-jobs) 5 1))
	    (wait-loop (rmt:get-count-tests-running-for-run-id run-id)
		       num-running))))







>
|



|

>







1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
      ;; (debug:print-info 13 *default-log-port* "num-running=" num-running ", prev-num-running=" prev-num-running)
      (if (and (or (args:get-arg "-run-wait")
		   (equal? (configf:lookup *configdat* "setup" "run-wait") "yes"))
	       (> num-running 0))
	  (begin
	    ;; Here we mark any old defunct tests as incomplete. Do this every fifteen minutes
	    ;; (debug:print 0 *default-log-port* "Got here eh! num-running=" num-running " (> num-running 0) " (> num-running 0))
	    (if (> (- (current-seconds)(hash-table-ref/default *find-and-mark-incomplete-last-run* run-id 0)) 900)
		;; (begin(if (> (current-seconds)(+ last-time-incomplete 900))
		(let ((actual-num-running (rmt:get-count-tests-running-for-run-id run-id)))
		  (debug:print-info 0 *default-log-port* "Marking stuck tests as INCOMPLETE while waiting for run " run-id
				    ". Running as pid " (current-process-id) " on " (get-host-name))
		  ;; (set! last-time-incomplete (current-seconds)) ;; FIXME, this might be causing slow down - use of set!
		  (rmt:find-and-mark-incomplete run-id #f)
		  (hash-table-set! *find-and-mark-complete-last-run* run-id (current-seconds))
		  (debug:print-info 0 *default-log-port* "run-wait specified, waiting on " actual-num-running
				    " tests in RUNNING, REMOTEHOSTSTART or LAUNCHED state at "
				    (time->string (seconds->local-time (current-seconds))))))
	    ;; (if (runs:dat-load-mgmt-function runsdat)((runs:dat-load-mgmt-function runsdat)))
	    (thread-sleep! 5) ;; (if (>= num-running max-concurrent-jobs) 5 1))
	    (wait-loop (rmt:get-count-tests-running-for-run-id run-id)
		       num-running))))