Megatest

Diff
Login

Differences From Artifact [60a12faf67]:

To Artifact [b5df931833]:


248
249
250
251
252
253
254






255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274

275
276
277
278

279
280
281
282
283
284
285
	      tests)
    res))

(define (run-tests db test-names)
  (let* ((keys        (db-get-keys db))
	 (keyvallst   (keys->vallist keys #t))
	 (run-id      (register-run db keys))) ;;  test-name)))






    (let loop ((numtimes 0))
      (for-each 
       (lambda (test-name)
	 (let ((num-running (db:get-count-tests-running db))
	       (max-concurrent-jobs (config-lookup *configdat* "setup" "max_concurrent_jobs")))
	   (print "max-concurrent-jobs: " max-concurrent-jobs ", num-running: " num-running)
	   (if (or (not max-concurrent-jobs)
		   (and max-concurrent-jobs
			(string->number max-concurrent-jobs)
			(not (>= num-running (string->number max-concurrent-jobs)))))
	       (run-one-test db run-id test-name keyvallst)
	       (print "WARNING: Max running jobs exceeded, current number running: " num-running 
		      ", max_concurrent_jobs: \"" max-concurrent-jobs "\""))))
       test-names)
      (if (args:get-arg "-keepgoing")
	  (let ((estrem (db:estimated-tests-remaining db run-id)))
	    (if (> estrem 0)
		(begin
		  (print "Keep going, estimated " estrem " tests remaining to run, will continue in 10 seconds ...")
		  (sleep 10)

		  (loop (+ numtimes 1)))))))))
	   
;; VERY INEFFICIENT! Move stuff that should be done once up to calling proc
(define (run-one-test db run-id test-name keyvallst)

  (print "Launching test " test-name)
  ;; All these vars might be referenced by the testconfig file reader
  (setenv "MT_TEST_NAME" test-name) ;; 
  (setenv "MT_RUNNAME"   (args:get-arg ":runname"))
  (set-megatest-env-vars db run-id) ;; these may be needed by the launching process
  (change-directory *toppath*)
  (let* ((test-path    (conc *toppath* "/tests/" test-name))







>
>
>
>
>
>





|














>




>







248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
	      tests)
    res))

(define (run-tests db test-names)
  (let* ((keys        (db-get-keys db))
	 (keyvallst   (keys->vallist keys #t))
	 (run-id      (register-run db keys))) ;;  test-name)))
    ;; on the first pass or call to run-tests set FAILS to NOT_STARTED if
    ;; -keepgoing is specified
    (if (and (eq? *passnum* 0)
	     (args:get-arg "-keepgoing"))
	(db:set-tests-state-status db run-id test-names #f "FAIL" "NOT_STARTED" "FAIL"))
    (set! *passnum* (+ *passnum* 1))
    (let loop ((numtimes 0))
      (for-each 
       (lambda (test-name)
	 (let ((num-running (db:get-count-tests-running db))
	       (max-concurrent-jobs (config-lookup *configdat* "setup" "max_concurrent_jobs")))
	   ;; (print "max-concurrent-jobs: " max-concurrent-jobs ", num-running: " num-running)
	   (if (or (not max-concurrent-jobs)
		   (and max-concurrent-jobs
			(string->number max-concurrent-jobs)
			(not (>= num-running (string->number max-concurrent-jobs)))))
	       (run-one-test db run-id test-name keyvallst)
	       (print "WARNING: Max running jobs exceeded, current number running: " num-running 
		      ", max_concurrent_jobs: \"" max-concurrent-jobs "\""))))
       test-names)
      (if (args:get-arg "-keepgoing")
	  (let ((estrem (db:estimated-tests-remaining db run-id)))
	    (if (> estrem 0)
		(begin
		  (print "Keep going, estimated " estrem " tests remaining to run, will continue in 10 seconds ...")
		  (sleep 10)
		  ;; (run-waiting-tests db)
		  (loop (+ numtimes 1)))))))))
	   
;; VERY INEFFICIENT! Move stuff that should be done once up to calling proc
(define (run-one-test db run-id test-name keyvallst)
  (run-waiting-tests db)
  (print "Launching test " test-name)
  ;; All these vars might be referenced by the testconfig file reader
  (setenv "MT_TEST_NAME" test-name) ;; 
  (setenv "MT_RUNNAME"   (args:get-arg ":runname"))
  (set-megatest-env-vars db run-id) ;; these may be needed by the launching process
  (change-directory *toppath*)
  (let* ((test-path    (conc *toppath* "/tests/" test-name))
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358






359



360




361



362

363





364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
	      (db:set-comment-for-run db run-id (args:get-arg "-m")))
	  (let loop ((itemdat (car allitems))
		     (tal     (cdr allitems)))
	    ;; (lambda (itemdat) ;;; ((ripeness "overripe") (temperature "cool") (season "summer"))
	    (let* ((item-path     (item-list->path itemdat)) ;; (string-intersperse (map cadr itemdat) "/"))
		   (new-test-path (string-intersperse (cons test-path (map cadr itemdat)) "/"))
		   (new-test-name (if (equal? item-path "") test-name (conc test-name "/" item-path))) ;; just need it to be unique
		   (test-status   #f)
		   (num-running (db:get-count-tests-running db))
		   (max-concurrent-jobs (config-lookup *configdat* "setup" "max_concurrent_jobs")))
	      (print "max-concurrent-jobs: " max-concurrent-jobs ", num-running: " num-running)
	      (if (not (or (not max-concurrent-jobs)
			   (and max-concurrent-jobs
				(string->number max-concurrent-jobs)
				(not (>= num-running (string->number max-concurrent-jobs))))))
		  (print "WARNING: Max running jobs exceeded, current number running: " num-running 
			 ", max_concurrent_jobs: " max-concurrent-jobs)
		  (begin
		    (let loop2 ((ts (db:get-test-info db run-id test-name item-path)) ;; #f)
				(ct 0))
		      (if (and (not ts)
			       (< ct 10))
			  (begin
			    (register-test db run-id test-name item-path)
			    (db:test-set-comment db run-id test-name item-path "")
			    ;; (test-set-status! db run-id test-name "NOT_STARTED" "n/a" itemdat "")
			    ;; (db:set-comment-for-test db run-id test-name item-path "")

			    ;; Move the next line into the test exectute code
			    ;; (db:delete-test-step-records db run-id test-name) ;; clean out if this is a re-run

			    (loop2 (db:get-test-info db run-id test-name item-path)
				   (+ ct 1)))
			  (if ts
			      (set! test-status ts)
			      (begin
				(print "WARNING: Couldn't register test " test-name " with item path " item-path ", skipping")
				(if (not (null? tal))
				    (loop (car tal)(cdr tal)))))))
		    (change-directory test-path)
		    ;; this block is here only to inform the user early on
		    (if (file-exists? runconfigf)
			(setup-env-defaults db runconfigf run-id *already-seen-runconfig-info*)
			(print "WARNING: You do not have a run config file: " runconfigf))
		    ;; (print "run-id: " run-id " test-name: " test-name " item-path: " item-path " test-status: " (test:get-status test-status) " test-state: " (test:get-state test-status))
		    (case (if (args:get-arg "-force")
			      'NOT_STARTED
			      (if test-status
				  (string->symbol (test:get-state test-status))
				  'failed-to-insert))
		      ((failed-to-insert)
		       (print "ERROR: Failed to insert the record into the db"))
		      ((NOT_STARTED COMPLETED) ;; (cadr status is the row id for the run record)






		       (if (and (equal? (test:get-state test-status)  "COMPLETED")



				(or (equal? (test:get-status test-status) "PASS")




				    (equal? (test:get-status test-status) "WARN")



				    (equal? (test:get-status test-status) "CHECK"))

				(not (args:get-arg "-force")))





			   (print "NOTE: Not starting test " new-test-name " as it is state \"COMPLETED\" and status \"" (test:get-status test-status) "\", use -force to override")
			   (let* ((get-prereqs-cmd (lambda ()
						     (db-get-prereqs-not-met db run-id waiton))) ;; check before running ....
				  (launch-cmd      (lambda ()
						     (launch-test db run-id test-conf keyvallst test-name test-path itemdat)))
				  (testrundat      (list get-prereqs-cmd launch-cmd)))
			     (if (or (args:get-arg "-force")
				     (null? ((car testrundat)))) ;; are there any tests that must be run before this one...
				 ((cadr testrundat)) ;; this is the line that launches the test to the remote host
				 (hash-table-set! *waiting-queue* new-test-name testrundat)))))
		      ((KILLED) 
		       (print "NOTE: " new-test-name " is already running or was explictly killed, use -force to launch it."))
		      ((LAUNCHED REMOTEHOSTSTART RUNNING)  
		       (if (> (- (current-seconds)(+ (db:test-get-event_time test-status)
						     (db:test-get-run_duration test-status)))
			      100) ;; i.e. no update for more than 100 seconds
			   (begin
			     (print "WARNING: Test " test-name " appears to be dead.")
			     (test-set-status! db run-id test-name "INCOMPLETE" "STUCK/DEAD" itemdat "Test is stuck or dead"))
			   (print "NOTE: " test-name " is already running")))
		      (else       (print "ERROR: Failed to launch test " new-test-name ". Unrecognised state " (test:get-state test-status))))))
	      (if (not (null? tal))
		  (loop (car tal)(cdr tal)))))))))

(define (run-waiting-tests db)
  (let ((numtries           0)
	(last-try-time      (current-seconds))
	(times              (list 1))) ;; minutes to wait before trying again to kick off runs







|


|














<
<
<
<
<
<



|









|


|
|



|
>
>
>
>
>
>
|
>
>
>
|
>
>
>
>
|
>
>
>
|
>
|
>
>
>
>
>
|
|
|
|
|
|
|
|
|
|



|
|


|


|







314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338






339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
	      (db:set-comment-for-run db run-id (args:get-arg "-m")))
	  (let loop ((itemdat (car allitems))
		     (tal     (cdr allitems)))
	    ;; (lambda (itemdat) ;;; ((ripeness "overripe") (temperature "cool") (season "summer"))
	    (let* ((item-path     (item-list->path itemdat)) ;; (string-intersperse (map cadr itemdat) "/"))
		   (new-test-path (string-intersperse (cons test-path (map cadr itemdat)) "/"))
		   (new-test-name (if (equal? item-path "") test-name (conc test-name "/" item-path))) ;; just need it to be unique
		   (testdat   #f)
		   (num-running (db:get-count-tests-running db))
		   (max-concurrent-jobs (config-lookup *configdat* "setup" "max_concurrent_jobs")))
	      ;; (print "max-concurrent-jobs: " max-concurrent-jobs ", num-running: " num-running)
	      (if (not (or (not max-concurrent-jobs)
			   (and max-concurrent-jobs
				(string->number max-concurrent-jobs)
				(not (>= num-running (string->number max-concurrent-jobs))))))
		  (print "WARNING: Max running jobs exceeded, current number running: " num-running 
			 ", max_concurrent_jobs: " max-concurrent-jobs)
		  (begin
		    (let loop2 ((ts (db:get-test-info db run-id test-name item-path)) ;; #f)
				(ct 0))
		      (if (and (not ts)
			       (< ct 10))
			  (begin
			    (register-test db run-id test-name item-path)
			    (db:test-set-comment db run-id test-name item-path "")






			    (loop2 (db:get-test-info db run-id test-name item-path)
				   (+ ct 1)))
			  (if ts
			      (set! testdat ts)
			      (begin
				(print "WARNING: Couldn't register test " test-name " with item path " item-path ", skipping")
				(if (not (null? tal))
				    (loop (car tal)(cdr tal)))))))
		    (change-directory test-path)
		    ;; this block is here only to inform the user early on
		    (if (file-exists? runconfigf)
			(setup-env-defaults db runconfigf run-id *already-seen-runconfig-info*)
			(print "WARNING: You do not have a run config file: " runconfigf))
		    ;; (print "run-id: " run-id " test-name: " test-name " item-path: " item-path " testdat: " (test:get-status testdat) " test-state: " (test:get-state testdat))
		    (case (if (args:get-arg "-force")
			      'NOT_STARTED
			      (if testdat
				  (string->symbol (test:get-state testdat))
				  'failed-to-insert))
		      ((failed-to-insert)
		       (print "ERROR: Failed to insert the record into the db"))
		      ((NOT_STARTED COMPLETED)
		       (print "Got here, " (test:get-state testdat))
		       (let ((runflag #f))
			 (cond
			  ;; -force, run no matter what
			  ((args:get-arg "-force")(set! runflag #t))
			  ;; NOT_STARTED, run no matter what
			  ((equal? (test:get-state testdat) "NOT_STARTED")(set! runflag #t))
			  ;; not -rerun and PASS, WARN or CHECK, do no run
			  ((and (or (not (args:get-arg "-rerun"))
				    (args:get-arg "-keepgoing"))
				(member (test:get-status testdat) '("PASS" "WARN" "CHECK")))
			   (set! runflag #f))
			  ;; -rerun and status is one of the specifed, run it
			  ((and (args:get-arg "-rerun")
				(let ((rerunlst (string-split (args:get-arg "-rerun") ","))) ;; FAIL,
				  (member (test:get-status testdat) rerunlst)))
			   (set! runflag #t))
			  ;; -keepgoing, do not rerun FAIL
			  ((and (args:get-arg "-keepgoing")
				(member (test:get-status testdat) '("FAIL")))
			   (set! runflag #f))
			  ((and (not (args:get-arg "-rerun"))
				(member (test:get-status testdat) '("FAIL" "n/a")))
			   (set! runflag #t))
			  (else (set! runflag #f)))
			 ;; (print "RUNNING => runflag: " runflag " STATE: " (test:get-state testdat) " STATUS: " (test:get-status testdat))
			 (if (not runflag)
			     (print "NOTE: Not starting test " new-test-name " as it is state \"COMPLETED\" and status \"" (test:get-status testdat) "\", use -force to override")
			     (let* ((get-prereqs-cmd (lambda ()
						       (db-get-prereqs-not-met db run-id waiton))) ;; check before running ....
				    (launch-cmd      (lambda ()
						       (launch-test db run-id test-conf keyvallst test-name test-path itemdat)))
				    (testrundat      (list get-prereqs-cmd launch-cmd)))
			       (if (or (args:get-arg "-force")
				       (null? ((car testrundat)))) ;; are there any tests that must be run before this one...
				   ((cadr testrundat)) ;; this is the line that launches the test to the remote host
				   (hash-table-set! *waiting-queue* new-test-name testrundat))))))
		      ((KILLED) 
		       (print "NOTE: " new-test-name " is already running or was explictly killed, use -force to launch it."))
		      ((LAUNCHED REMOTEHOSTSTART RUNNING)  
		       (if (> (- (current-seconds)(+ (db:test-get-event_time testdat)
						     (db:test-get-run_duration testdat)))
			      100) ;; i.e. no update for more than 100 seconds
			   (begin
			     (print "WARNING: Test " test-name " appears to be dead. Forcing it to state INCOMPLETE and status STUCK/DEAD")
			     (test-set-status! db run-id test-name "INCOMPLETE" "STUCK/DEAD" itemdat "Test is stuck or dead"))
			   (print "NOTE: " test-name " is already running")))
		      (else       (print "ERROR: Failed to launch test " new-test-name ". Unrecognised state " (test:get-state testdat))))))
	      (if (not (null? tal))
		  (loop (car tal)(cdr tal)))))))))

(define (run-waiting-tests db)
  (let ((numtries           0)
	(last-try-time      (current-seconds))
	(times              (list 1))) ;; minutes to wait before trying again to kick off runs