Megatest

Diff
Login

Differences From Artifact [8568995d03]:

To Artifact [0fc56a6830]:


460
461
462
463
464
465
466


467
468
469
470
471
472
473
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475







+
+







	      (debug:print-info 0 *default-log-port* "running run-post-hook: \"" run-post-hook "\", log is " actual-logf)
	      (system (conc run-post-hook " >> " actual-logf " 2>&1"))
	      (debug:print-info 0 *default-log-port* "post-hook \"" run-post-hook "\" took " (- (current-seconds) start-time) " seconds to run."))))))

;; return #t when all items in waitors-upon list are represented in test-patt, #f otherwise.
(define (runs:testpatts-mention-waitors-upon? test-patt waitors-upon)
  (null? (tests:filter-test-names-not-matched waitors-upon test-patt)))

(define *find-and-mark-incomplete-last-run* (make-hash-table))

;;======================================================================
;; runs:run-tests is called from megatest.scm and itself
;;======================================================================
;;
;;  test-names: Comma separated patterns same as test-patts but used in selection 
;;              of tests to run. The item portions are not respected.
770
771
772
773
774
775
776
777
778
779








780
781
782
783
784
785
786
772
773
774
775
776
777
778



779
780
781
782
783
784
785
786
787
788
789
790
791
792
793







-
-
-
+
+
+
+
+
+
+
+







					  "runs:run-tests-queue"))
		 (th2        (make-thread (lambda ()			 ;; BBQ: why are we visiting ALL runs here?	    
					    ;; (rmt:find-and-mark-incomplete-all-runs))))) CAN'T INTERRUPT IT ...
					    (let ((run-ids (rmt:get-all-run-ids)))
					      (for-each (lambda (run-id)
							  (if keep-going
							      (handle-exceptions
							       exn
							       (debug:print 0 *default-log-port* "error in calling find-and-mark-incomplete for run-id " run-id ", exn=" exn)
							       (rmt:find-and-mark-incomplete run-id #f)))) ;; ovr-deadtime))) ;; could be root of https://hsdes.intel.com/appstore/article/#/220546828/main -- Title: Megatest jobs show DEAD even though they are still running (1.64/27)
								  exn
								  (debug:print 0 *default-log-port* "error in calling find-and-mark-incomplete for run-id " run-id ", exn=" exn)
								;; lets run this only if a run has been NOT seen for more than 900 seconds
								(if (> (- (current-seconds)(hash-table-ref/default *find-and-mark-incomplete-last-run* run-id 0)) 900)
								    (begin
								      (rmt:find-and-mark-incomplete run-id #f)
								      (hash-table-set! *find-and-mark-incomplete-last-run* run-id (current-seconds)))
								    )))) ;; ovr-deadtime))) ;; could be root of https://hsdes.intel.com/appstore/article/#/220546828/main -- Title: Megatest jobs show DEAD even though they are still running (1.64/27)
							run-ids)))
					  "runs: mark-incompletes")))
	    ;; (thread-start! th1)
	    (thread-start! th2)
	    ;; (thread-join! th1)
	    ;; just do the main stuff in the main thread
	    (runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests
1868
1869
1870
1871
1872
1873
1874

1875

1876
1877
1878
1879

1880

1881
1882
1883
1884
1885
1886
1887
1875
1876
1877
1878
1879
1880
1881
1882

1883
1884
1885
1886

1887
1888
1889
1890
1891
1892
1893
1894
1895
1896







+
-
+



-
+

+







      ;; (debug:print-info 13 *default-log-port* "num-running=" num-running ", prev-num-running=" prev-num-running)
      (if (and (or (args:get-arg "-run-wait")
		   (equal? (configf:lookup *configdat* "setup" "run-wait") "yes"))
	       (> num-running 0))
	  (begin
	    ;; Here we mark any old defunct tests as incomplete. Do this every fifteen minutes
	    ;; (debug:print 0 *default-log-port* "Got here eh! num-running=" num-running " (> num-running 0) " (> num-running 0))
	    (if (> (- (current-seconds)(hash-table-ref/default *find-and-mark-incomplete-last-run* run-id 0)) 900)
	    (if (> (current-seconds)(+ last-time-incomplete 900))
		;; (begin(if (> (current-seconds)(+ last-time-incomplete 900))
		(let ((actual-num-running (rmt:get-count-tests-running-for-run-id run-id)))
		  (debug:print-info 0 *default-log-port* "Marking stuck tests as INCOMPLETE while waiting for run " run-id
				    ". Running as pid " (current-process-id) " on " (get-host-name))
		  (set! last-time-incomplete (current-seconds)) ;; FIXME, this might be causing slow down - use of set!
		  ;; (set! last-time-incomplete (current-seconds)) ;; FIXME, this might be causing slow down - use of set!
		  (rmt:find-and-mark-incomplete run-id #f)
		  (hash-table-set! *find-and-mark-complete-last-run* run-id (current-seconds))
		  (debug:print-info 0 *default-log-port* "run-wait specified, waiting on " actual-num-running
				    " tests in RUNNING, REMOTEHOSTSTART or LAUNCHED state at "
				    (time->string (seconds->local-time (current-seconds))))))
	    ;; (if (runs:dat-load-mgmt-function runsdat)((runs:dat-load-mgmt-function runsdat)))
	    (thread-sleep! 5) ;; (if (>= num-running max-concurrent-jobs) 5 1))
	    (wait-loop (rmt:get-count-tests-running-for-run-id run-id)
		       num-running))))