Megatest

Diff
Login

Differences From Artifact [c4f748fe17]:

To Artifact [ac1e77f103]:


175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
  (assert (number? run-id) "FATAL: Run id required.")
  (rmtmod:send-receive 'synchash-get run-id (list run-id proc synckey keynum params)))

(define (rmt:get-tests-for-run-mindata run-id testpatt states status not-in)
  (assert (number? run-id) "FATAL: Run id required.")
  (rmtmod:send-receive 'get-tests-for-run-mindata run-id (list run-id testpatt states status not-in)))
  
;; state and status are extra hints not usually used in the calculation
;;
(define (rmt:set-state-status-and-roll-up-items run-id test-name item-path state status comment)
  (assert (number? run-id) "FATAL: Run id required.")
  (rmtmod:send-receive 'set-state-status-and-roll-up-items run-id (list run-id test-name item-path state status comment)))


;;======================================================================
;; Maintenance
;;======================================================================


(define (rmt:get-toplevels-and-incompletes run-id running-deadtime remotehoststart-deadtime)
  (rmtmod:send-receive 'get-toplevels-and-incompletes run-id (list run-id running-deadtime remotehoststart-deadtime)))

(define (rmt:get-status-from-final-status-file run-dir)
  (let ((infile (conc run-dir "/.final-status")))
    ;; first verify we are able to write the output file
    (if (not (file-read-access? infile))
        (begin 
	  (debug:print 2 *default-log-port* "ERROR: cannot read " infile)
          (debug:print 2 *default-log-port* "ERROR: run-dir is " run-dir)
          #f
          )
        (with-input-from-file infile read-lines)
	)))
  
;;  select end_time-now from
;;      (select testname,item_path,event_time+run_duration as
;;                          end_time,strftime('%s','now') as now from tests where state in
;;      ('RUNNING','REMOTEHOSTSTART','LAUNCHED'));
;;
;; NOT EASY TO MIGRATE TO db{file,mod}
;;
(define (rmt:find-and-mark-incomplete-engine run-id ovr-deadtime cfg-deadtime test-stats-update-period)
  (let* ((incompleted '())
	 (oldlaunched '())
	 (toplevels   '())
          ;; The default running-deadtime is 720 seconds = 12 minutes.
          ;; "(running-deadtime-default (+ server-start-allowance (* 2 launch-monitor-period)))" = 200 + (2 * (200 + 30 + 30))
         (deadtime-trim (or ovr-deadtime cfg-deadtime))
         (server-start-allowance 200)
         (server-overloaded-budget 200)
         (launch-monitor-off-time (or test-stats-update-period 30))
         (launch-monitor-on-time-budget 30)
         (launch-monitor-period (+ launch-monitor-off-time launch-monitor-on-time-budget server-overloaded-budget))
         (remotehoststart-deadtime-default (+ server-start-allowance server-overloaded-budget 30))
         (remotehoststart-deadtime (or deadtime-trim remotehoststart-deadtime-default))
         (running-deadtime-default (+ server-start-allowance (* 2 launch-monitor-period)))
         (running-deadtime (or deadtime-trim running-deadtime-default))) ;; two minutes (30 seconds between updates, this leaves 3x grace period)

    (debug:print-info 4  *default-log-port* "running-deadtime = " running-deadtime)
    (debug:print-info 4  *default-log-port* "deadtime-trim = " deadtime-trim)

    (let* ((dat (rmt:get-toplevels-and-incompletes run-id running-deadtime remotehoststart-deadtime)))
      (set! oldlaunched (list-ref dat 1))
      (set! toplevels   (list-ref dat 2))
      (set! incompleted (list-ref dat 0)))

    (debug:print-info 18 *default-log-port* "Found " (length oldlaunched) " old LAUNCHED items, "
		      (length toplevels) " old LAUNCHED toplevel tests and "
		      (length incompleted) " tests marked RUNNING but apparently dead.")
  
    ;; These are defunct tests, do not do all the overhead of set-state-status. Force them to INCOMPLETE.
    ;;
    ;; (db:delay-if-busy dbdat)
    (let* ((min-incompleted-ids (map car incompleted)) ;; do 'em all
	   (all-ids             (append min-incompleted-ids (map car oldlaunched))))
      (if (> (length all-ids) 0)
	  (begin
	    ;; (launch:is-test-alive "localhost" 435)
	    (debug:print 0 *default-log-port* "WARNING: Marking test(s); " (string-intersperse (map conc all-ids) ", ")
			 " as DEAD")
	    (for-each
             (lambda (test-id)
               (let* ((tinfo   (rmt:get-test-info-by-id run-id test-id))
		      (run-dir (db:test-get-rundir     tinfo))
		      (host    (db:test-get-host       tinfo))
		      (pid     (db:test-get-process_id tinfo))
		      (result (rmt:get-status-from-final-status-file run-dir)))
		 (if (and (list? result) (> (length result) 1) (equal? "PASS" (cadr result)) (equal? "COMPLETED" (car result))) 
		     (begin
		       (debug:print 0 *default-log-port* "INFO: test " test-id " actually passed, so marking PASS not DEAD")
		       (rmt:set-state-status-and-roll-up-items
			run-id test-id 'foo "COMPLETED" "PASS"
			"Test stopped responding but it has PASSED; marking it PASS in the DB."))
		     (let ((is-alive (and (not (eq? pid 0))  ;; 0 is default in re-used field "attemptnum" where pid stored.
					  (commonmod:is-test-alive host pid))))
		       (if is-alive
			   (debug:print 0 *default-log-port* "INFO: test " test-id " on host " host
					" has a process on pid " pid ", NOT setting to DEAD.")
			   (begin
			     (debug:print 0 *default-log-port* "INFO: test " test-id
					  " final state/status is not COMPLETED/PASS. It is " result)
			     (rmt:set-state-status-and-roll-up-items
			      run-id test-id 'foo "COMPLETED" "DEAD"
			      "Test stopped responding while in RUNNING or REMOTEHOSTSTART; presumed dead.")))))))
	     ;; call end of eud of run detection for posthook - from merge, is it needed?
	     ;; (launch:end-of-run-check run-id)
	     all-ids)
	    )))))


)







<
<
<
<
<
<
<




















<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<

<
<
|
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
175
176
177
178
179
180
181







182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201























202


203


















































  (assert (number? run-id) "FATAL: Run id required.")
  (rmtmod:send-receive 'synchash-get run-id (list run-id proc synckey keynum params)))

(define (rmt:get-tests-for-run-mindata run-id testpatt states status not-in)
  (assert (number? run-id) "FATAL: Run id required.")
  (rmtmod:send-receive 'get-tests-for-run-mindata run-id (list run-id testpatt states status not-in)))
  







;;======================================================================
;; Maintenance
;;======================================================================


(define (rmt:get-toplevels-and-incompletes run-id running-deadtime remotehoststart-deadtime)
  (rmtmod:send-receive 'get-toplevels-and-incompletes run-id (list run-id running-deadtime remotehoststart-deadtime)))

(define (rmt:get-status-from-final-status-file run-dir)
  (let ((infile (conc run-dir "/.final-status")))
    ;; first verify we are able to write the output file
    (if (not (file-read-access? infile))
        (begin 
	  (debug:print 2 *default-log-port* "ERROR: cannot read " infile)
          (debug:print 2 *default-log-port* "ERROR: run-dir is " run-dir)
          #f
          )
        (with-input-from-file infile read-lines)
	)))
  


























)