Megatest

Check-in [79361f5222]
Login
Overview
Comment:Queue ideosyncracies seem fixed. Needs more testing...
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | v1.55
Files: files | file ages | folders
SHA1: 79361f52227af77ab374fe3141add941b602749c
User & Date: matt on 2013-09-10 00:27:10
Other Links: branch diff | manifest | tags
Context
2013-09-10
12:04
Fixed couple remaining issues with queue handling check-in: a2c11c53bc user: mrwellan tags: v1.55
00:27
Queue ideosyncracies seem fixed. Needs more testing... check-in: 79361f5222 user: matt tags: v1.55
2013-09-09
16:41
partially borked change to better deal with run queue idiosyncracies check-in: fa1ff570f2 user: mrwellan tags: v1.55
Changes

Modified runs.scm from [de10964a2e] to [c1ce52ca59].

142
143
144
145
146
147
148











149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168

169
170
171
172
173
174
175
176
177
;;
(define *seen-cant-run-tests* (make-hash-table)) ;; use to track tests that we suspect cannot be run
(define (runs:inc-cant-run-tests testname)
  (hash-table-set! *seen-cant-run-tests* testname
		   (+ (hash-table-ref/default *seen-cant-run-tests* testname 0) 1)))
(define (runs:can-keep-running? testname n)
  (< (hash-table-ref/default *seen-cant-run-tests* testname 0) n))












(define (runs:can-run-more-tests jobgroup max-concurrent-jobs)
  (thread-sleep! (cond
		  ((> *runs:can-run-more-tests-count* 20) 2);; obviously haven't had any work to do for a while
		  (else 0)))
  (let* ((num-running             (cdb:remote-run db:get-count-tests-running #f))
	 (num-running-in-jobgroup (cdb:remote-run db:get-count-tests-running-in-jobgroup #f jobgroup))
	 (job-group-limit         (config-lookup *configdat* "jobgroups" jobgroup)))
    (if (> (+ num-running num-running-in-jobgroup) 0)
	(set! *runs:can-run-more-tests-count* (+ *runs:can-run-more-tests-count* 1)))
    (if (not (eq? *last-num-running-tests* num-running))
	(begin
	  (debug:print 2 "max-concurrent-jobs: " max-concurrent-jobs ", num-running: " num-running)
	  (set! *last-num-running-tests* num-running)))
    (if (not (eq? 0 *globalexitstatus*))
	(list #f num-running num-running-in-jobgroup max-concurrent-jobs job-group-limit)
	(let ((can-not-run-more (cond
				 ;; if max-concurrent-jobs is set and the number running is greater 
				 ;; than it than cannot run more jobs
				 ((and max-concurrent-jobs (>= num-running max-concurrent-jobs))

				  (debug:print 0 "WARNING: Max running jobs exceeded, current number running: " num-running 
					       ", max_concurrent_jobs: " max-concurrent-jobs)
				  #t)
				 ;; if job-group-limit is set and number of jobs in the group is greater
				 ;; than the limit then cannot run more jobs of this kind
				 ((and job-group-limit
				       (>= num-running-in-jobgroup job-group-limit))
				  (debug:print 1 "WARNING: number of jobs " num-running-in-jobgroup 
					       " in " jobgroup " exceeded, will not run " (tests:testqueue-get-testname test-record))







>
>
>
>
>
>
>
>
>
>
>




















>
|
|







142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
;;
(define *seen-cant-run-tests* (make-hash-table)) ;; use to track tests that we suspect cannot be run
(define (runs:inc-cant-run-tests testname)
  (hash-table-set! *seen-cant-run-tests* testname
		   (+ (hash-table-ref/default *seen-cant-run-tests* testname 0) 1)))
(define (runs:can-keep-running? testname n)
  (< (hash-table-ref/default *seen-cant-run-tests* testname 0) n))

(define *runs:denoise* (make-hash-table)) ;; key => last-time-ran

(define (runs:lownoise key waitval)
  (let ((lasttime (hash-table-ref/default *runs:denoise* key 0))
	(currtime (current-seconds)))
    (if (> (- currtime lasttime) waitval)
	(begin
	  (hash-table-set! *runs:denoise* key currtime)
	  #t)
	#f)))

(define (runs:can-run-more-tests jobgroup max-concurrent-jobs)
  (thread-sleep! (cond
		  ((> *runs:can-run-more-tests-count* 20) 2);; obviously haven't had any work to do for a while
		  (else 0)))
  (let* ((num-running             (cdb:remote-run db:get-count-tests-running #f))
	 (num-running-in-jobgroup (cdb:remote-run db:get-count-tests-running-in-jobgroup #f jobgroup))
	 (job-group-limit         (config-lookup *configdat* "jobgroups" jobgroup)))
    (if (> (+ num-running num-running-in-jobgroup) 0)
	(set! *runs:can-run-more-tests-count* (+ *runs:can-run-more-tests-count* 1)))
    (if (not (eq? *last-num-running-tests* num-running))
	(begin
	  (debug:print 2 "max-concurrent-jobs: " max-concurrent-jobs ", num-running: " num-running)
	  (set! *last-num-running-tests* num-running)))
    (if (not (eq? 0 *globalexitstatus*))
	(list #f num-running num-running-in-jobgroup max-concurrent-jobs job-group-limit)
	(let ((can-not-run-more (cond
				 ;; if max-concurrent-jobs is set and the number running is greater 
				 ;; than it than cannot run more jobs
				 ((and max-concurrent-jobs (>= num-running max-concurrent-jobs))
				  (if (runs:lownoise "mcj msg" 60)
				      (debug:print 0 "WARNING: Max running jobs exceeded, current number running: " num-running 
						   ", max_concurrent_jobs: " max-concurrent-jobs))
				  #t)
				 ;; if job-group-limit is set and number of jobs in the group is greater
				 ;; than the limit then cannot run more jobs of this kind
				 ((and job-group-limit
				       (>= num-running-in-jobgroup job-group-limit))
				  (debug:print 1 "WARNING: number of jobs " num-running-in-jobgroup 
					       " in " jobgroup " exceeded, will not run " (tests:testqueue-get-testname test-record))
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378










379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424

425
426
427
428
429











430
431
432
433
434
435
436
437
(define (runs:queue-next-reg tal reg n regfull)
  (if regfull
      (cdr reg)
      (if (null? tal) ;; if tal is null and reg not full then '() as reg contents moved to tal
	  '()
	  reg)))

(define (runs:expand-items hed tal reg reruns regfull newtal jobgroup max-concurrent-jobs run-id waitons item-path testmode test-record can-run-more items runname tconfig reglen)
  (let* ((loop-list       (list hed tal reg reruns))
	 (prereqs-not-met (mt:get-prereqs-not-met run-id waitons item-path mode: testmode))
	 (fails           (runs:calc-fails prereqs-not-met))
	 (non-completed   (runs:calc-not-completed prereqs-not-met)))
    (debug:print-info 4 "START OF INNER COND #2 "
		      "\n can-run-more:    " can-run-more
		      "\n testname:        " hed
		      "\n prereqs-not-met: " (runs:pretty-string prereqs-not-met)
		      "\n non-completed:   " (runs:pretty-string non-completed) 
		      "\n fails:           " (runs:pretty-string fails)
		      "\n testmode:        " testmode
		      "\n (eq? testmode 'toplevel): " (eq? testmode 'toplevel)
		      "\n (null? non-completed):    " (null? non-completed)
		      "\n reruns:          " reruns
		      "\n items:           " items
		      "\n can-run-more:    " can-run-more)

    (cond
     ;; all prereqs met, fire off the test
     ;; or, if it is a 'toplevel test and all prereqs not met are COMPLETED then launch










     ((or (null? prereqs-not-met)
	  (and (eq? testmode 'toplevel)
	       (null? non-completed)))
      (debug:print-info 4 "runs:expand-items: (or (null? prereqs-not-met) (and (eq? testmode 'toplevel)(null? non-completed)))")
      (let ((test-name (tests:testqueue-get-testname test-record)))
	(setenv "MT_TEST_NAME" test-name) ;; 
	(setenv "MT_RUNNAME"   runname)
	(set-megatest-env-vars run-id inrunname: runname) ;; these may be needed by the launching process
	(let ((items-list (items:get-items-from-config tconfig)))
	  (if (list? items-list)
	      (begin
		(tests:testqueue-set-items! test-record items-list)
		(list hed tal reg reruns))
	      (begin
		(debug:print 0 "ERROR: The proc from reading the setup did not yield a list - please report this")
		(exit 1))))))

     ((and (null? fails)
	   (not (null? non-completed)))
      (debug:print-info 1 "no fails in prerequisites for " hed ", waiting on tests; "
			(string-intersperse (map (lambda (x)
						   (if (string? x)
						       x
						       (runs:make-full-test-name (db:test-get-testname x)
										 (db:test-get-item-path x))))
						 non-completed) ", ")
			", delay launching " hed)
      ;; num-retries code was here
      ;; we use this opportunity to move contents of reg to tal
      ;; but also lets check that the prerequisites are all in the newtal or reruns lists
      


      ;; (let* ((allinqueue (map (lambda (x)(if (string? x) x (db:test-get-testname x)))
      ;;   		      (append newtal reruns)))
      ;;        (prereqstrs (map (lambda (x)(if (string? x) x (db:test-get-testname x)))
      ;;   		      prereqs-not-met))
      ;;        (notinqueue (filter (lambda (x)
      ;;   			   (not (member x allinqueue)))
      ;;   			 prereqstrs)))
      ;;   (if (null? notinqueue)
      ;;       (if (runs:can-keep-running? hed 5)
      ;;   	(begin
      ;;   	  (runs:inc-cant-run-tests hed)
      ;;   	  (list (car newtal)(append (cdr newtal) reg) '() reruns))
      ;;   	(begin

      ;;   	  (debug:print 0 "WARNING: dropping " hed " from queue as it has prerequisites missing from the queue: " (string-intersperse notinqueue ", "))
      ;;   	  (list (runs:queue-next-hed tal reg reglen regfull)
      ;;   		(runs:queue-next-tal tal reg reglen regfull)
      ;;   		(runs:queue-next-reg tal reg reglen regfull)
      ;;   		reruns)))











	    (list (car newtal)(append (cdr newtal) reg) '() reruns)) ;; )) ;; an issue with prereqs not yet met?

     ((and (null? fails)
	   (null? non-completed))
      (if  (runs:can-keep-running? hed 5)
	  (begin
	    (runs:inc-cant-run-tests hed)
	    (debug:print-info 1 "no fails in prerequisites for " hed " but also none running, keeping " hed " for now. Try count: " (hash-table-ref/default *seen-cant-run-tests* hed 0))







|




















>
>
>
>
>
>
>
>
>
>



















<
<
<
|
<
<
<
<



|
<
<
|
|
|
|
|
|
|
|
|
|
|
|
|
>
|
|
|
|
|
>
>
>
>
>
>
>
>
>
>
>
|







363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419



420




421
422
423
424


425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
(define (runs:queue-next-reg tal reg n regfull)
  (if regfull
      (cdr reg)
      (if (null? tal) ;; if tal is null and reg not full then '() as reg contents moved to tal
	  '()
	  reg)))

(define (runs:expand-items hed tal reg reruns regfull newtal jobgroup max-concurrent-jobs run-id waitons item-path testmode test-record can-run-more items runname tconfig reglen test-registry)
  (let* ((loop-list       (list hed tal reg reruns))
	 (prereqs-not-met (mt:get-prereqs-not-met run-id waitons item-path mode: testmode))
	 (fails           (runs:calc-fails prereqs-not-met))
	 (non-completed   (runs:calc-not-completed prereqs-not-met)))
    (debug:print-info 4 "START OF INNER COND #2 "
		      "\n can-run-more:    " can-run-more
		      "\n testname:        " hed
		      "\n prereqs-not-met: " (runs:pretty-string prereqs-not-met)
		      "\n non-completed:   " (runs:pretty-string non-completed) 
		      "\n fails:           " (runs:pretty-string fails)
		      "\n testmode:        " testmode
		      "\n (eq? testmode 'toplevel): " (eq? testmode 'toplevel)
		      "\n (null? non-completed):    " (null? non-completed)
		      "\n reruns:          " reruns
		      "\n items:           " items
		      "\n can-run-more:    " can-run-more)

    (cond
     ;; all prereqs met, fire off the test
     ;; or, if it is a 'toplevel test and all prereqs not met are COMPLETED then launch
     
     ((member (hash-table-ref/default test-registry (runs:make-full-test-name hed item-path) 'n/a)
	      '(COMPLETED INCOMPLETE)) ;; try to catch repeat processing of COMPLETED tests here
      (debug:print-info 1 "Test " hed " is " (hash-table-ref test-registry (runs:make-full-test-name hed item-path)) ". Removing it from the queue")
      (list (runs:queue-next-hed tal reg reglen regfull)
	    (runs:queue-next-tal tal reg reglen regfull)
	    (runs:queue-next-reg tal reg reglen regfull)
	    reruns))

     ;; 
     ((or (null? prereqs-not-met)
	  (and (eq? testmode 'toplevel)
	       (null? non-completed)))
      (debug:print-info 4 "runs:expand-items: (or (null? prereqs-not-met) (and (eq? testmode 'toplevel)(null? non-completed)))")
      (let ((test-name (tests:testqueue-get-testname test-record)))
	(setenv "MT_TEST_NAME" test-name) ;; 
	(setenv "MT_RUNNAME"   runname)
	(set-megatest-env-vars run-id inrunname: runname) ;; these may be needed by the launching process
	(let ((items-list (items:get-items-from-config tconfig)))
	  (if (list? items-list)
	      (begin
		(tests:testqueue-set-items! test-record items-list)
		(list hed tal reg reruns))
	      (begin
		(debug:print 0 "ERROR: The proc from reading the setup did not yield a list - please report this")
		(exit 1))))))

     ((and (null? fails)
	   (not (null? non-completed)))








      ;; num-retries code was here
      ;; we use this opportunity to move contents of reg to tal
      ;; but also lets check that the prerequisites are all in the newtal or reruns lists



      (let* ((allinqueue (map (lambda (x)(if (string? x) x (db:test-get-testname x)))
        		      (append newtal reruns)))
             (prereqstrs (map (lambda (x)(if (string? x) x (db:test-get-testname x)))
        		      prereqs-not-met))
             (notinqueue (filter (lambda (x)
        			   (not (member x allinqueue)))
        			 prereqstrs)))
        (if (null? notinqueue)
            (if (runs:can-keep-running? hed 5) ;; try five times
        	(begin
        	  (runs:inc-cant-run-tests hed)
        	  (list (car newtal)(append (cdr newtal) reg) '() reruns))
        	(begin
		  (if (runs:lownoise (conc "no fails prereq, null notinqueue " hed) 30)
		      (debug:print 1 "WARNING: test " hed " has no failed prerequisites but does have prerequistes that are NOT in the queue: " (string-intersperse notinqueue ", ")))
        	  (list (runs:queue-next-hed tal reg reglen regfull)
        		(runs:queue-next-tal tal reg reglen regfull)
        		(runs:queue-next-reg tal reg reglen regfull)
        		reruns)))
	    ;; have prereqs in queue, keep going.
	    (begin
	      (if (runs:lownoise (conc "no fails prereq " hed) 30)
		  (debug:print-info 1 "no fails in prerequisites for " hed ", waiting on tests; "
				    (string-intersperse (map (lambda (x)
							       (if (string? x)
								   x
								   (runs:make-full-test-name (db:test-get-testname x)
											     (db:test-get-item-path x))))
							     non-completed) ", ")
				    ". Delaying launch of " hed "."))
	      (list (car newtal)(append (cdr newtal) reg) '() reruns))))) ;; an issue with prereqs not yet met?

     ((and (null? fails)
	   (null? non-completed))
      (if  (runs:can-keep-running? hed 5)
	  (begin
	    (runs:inc-cant-run-tests hed)
	    (debug:print-info 1 "no fails in prerequisites for " hed " but also none running, keeping " hed " for now. Try count: " (hash-table-ref/default *seen-cant-run-tests* hed 0))
451
452
453
454
455
456
457
458



459
460



461











462
463
464
465
466
467
468
			", removing it from to-do list")
      (if (or (not (null? reg))(not (null? tal)))
	  (list (runs:queue-next-hed tal reg reglen regfull)
		(runs:queue-next-tal tal reg reglen regfull)
		(runs:queue-next-reg tal reg reglen regfull)
		(cons hed reruns))
	  #f)) ;; #f flags do not loop




     (else
      (debug:print 1 "ERROR: No handler for this condition.")



      (list (car newtal)(cdr newtal) reg reruns)))))












(define (runs:process-expanded-tests hed tal reg reruns reglen regfull test-record runname test-name item-path jobgroup max-concurrent-jobs run-id waitons item-path testmode test-patts required-tests test-registry registry-mutex flags keyvals run-info newtal all-tests-registry)
  (let* ((run-limits-info         (runs:can-run-more-tests jobgroup max-concurrent-jobs)) ;; look at the test jobgroup and tot jobs running
	 (have-resources          (car run-limits-info))
	 (num-running             (list-ref run-limits-info 1))
	 (num-running-in-jobgroup (list-ref run-limits-info 2)) 
	 (max-concurrent-jobs     (list-ref run-limits-info 3))







|
>
>
>

|
>
>
>
|
>
>
>
>
>
>
>
>
>
>
>







476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
			", removing it from to-do list")
      (if (or (not (null? reg))(not (null? tal)))
	  (list (runs:queue-next-hed tal reg reglen regfull)
		(runs:queue-next-tal tal reg reglen regfull)
		(runs:queue-next-reg tal reg reglen regfull)
		(cons hed reruns))
	  #f)) ;; #f flags do not loop
     ((and (not (null? fails))(eq? testmode 'toplevel))
      (if (or (not (null? reg))(not (null? tal)))
	   (list (car newtal)(append (cdr newtal) reg) '() reruns)
	  #f)) 
     (else
      (debug:print 1 "WARNING: FAILS or incomplete tests are preventing completion of this run. Dropping test " hed " from the run queue")
      (list (runs:queue-next-hed tal reg reglen regfull)
		(runs:queue-next-tal tal reg reglen regfull)
		(runs:queue-next-reg tal reg reglen regfull)
		reruns))))) ;; (list (car newtal)(cdr newtal) reg reruns)))))

(define (runs:mixed-list-testname-and-testrec->list-of-strings inlst)
  (map (lambda (t)
	 (cond
	  ((vector? t)
	   (conc (db:test-get-state t) "/" (db:test-get-status t)))
	  ((string? t)
	   t)
	  (else 
	   (conc t))))
       inlst))

(define (runs:process-expanded-tests hed tal reg reruns reglen regfull test-record runname test-name item-path jobgroup max-concurrent-jobs run-id waitons item-path testmode test-patts required-tests test-registry registry-mutex flags keyvals run-info newtal all-tests-registry)
  (let* ((run-limits-info         (runs:can-run-more-tests jobgroup max-concurrent-jobs)) ;; look at the test jobgroup and tot jobs running
	 (have-resources          (car run-limits-info))
	 (num-running             (list-ref run-limits-info 1))
	 (num-running-in-jobgroup (list-ref run-limits-info 2)) 
	 (max-concurrent-jobs     (list-ref run-limits-info 3))
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
		       (map (lambda (t)
			      (if (vector? t)
				  (conc (db:test-get-state t) "/" (db:test-get-status t))
				  (conc " WARNING: t is not a vector=" t )))
			    prereqs-not-met) ", ") ") fails: " fails)
    
    (if (not (null? prereqs-not-met))
	(debug:print-info 1 "waiting on tests; " (string-intersperse prereqs-not-met ", ")))

    ;; Don't know at this time if the test have been launched at some time in the past
    ;; i.e. is this a re-launch?
    (debug:print-info 4 "run-limits-info = " run-limits-info)
    
    (cond
     







|







518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
		       (map (lambda (t)
			      (if (vector? t)
				  (conc (db:test-get-state t) "/" (db:test-get-status t))
				  (conc " WARNING: t is not a vector=" t )))
			    prereqs-not-met) ", ") ") fails: " fails)
    
    (if (not (null? prereqs-not-met))
	(debug:print-info 1 "waiting on tests; " (string-intersperse (runs:mixed-list-testname-and-testrec->list-of-strings prereqs-not-met) ", ")))

    ;; Don't know at this time if the test have been launched at some time in the past
    ;; i.e. is this a re-launch?
    (debug:print-info 4 "run-limits-info = " run-limits-info)
    
    (cond
     
546
547
548
549
550
551
552

553
554
555
556
557
558
559
560
			 ", "))
      (thread-sleep! 0.1)
      (list hed tal reg reruns))
     
     ;; If no resources are available just kill time and loop again
     ;;
     ((not have-resources) ;; simply try again after waiting a second

      (debug:print-info 1 "no resources to run new tests, waiting ...")
      ;; Have gone back and forth on this but db starvation is an issue.
      ;; wait one second before looking again to run jobs.
      (thread-sleep! 1)
      ;; could have done hed tal here but doing car/cdr of newtal to rotate tests
      (list (car newtal)(cdr newtal) reg reruns))
     
     ;; This is the final stage, everything is in place so launch the test







>
|







588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
			 ", "))
      (thread-sleep! 0.1)
      (list hed tal reg reruns))
     
     ;; If no resources are available just kill time and loop again
     ;;
     ((not have-resources) ;; simply try again after waiting a second
      (if (runs:lownoise "no resources" 60)
	  (debug:print-info 1 "no resources to run new tests, waiting ..."))
      ;; Have gone back and forth on this but db starvation is an issue.
      ;; wait one second before looking again to run jobs.
      (thread-sleep! 1)
      ;; could have done hed tal here but doing car/cdr of newtal to rotate tests
      (list (car newtal)(cdr newtal) reg reruns))
     
     ;; This is the final stage, everything is in place so launch the test
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
	 ;; if items is a proc then need to run items:get-items-from-config, get the list and loop 
	 ;;    - but only do that if resources exist to kick off the job
	 ;; EXPAND ITEMS
	 ((or (procedure? items)(eq? items 'have-procedure))
	  (let ((can-run-more    (runs:can-run-more-tests jobgroup max-concurrent-jobs)))
	    (if (and (list? can-run-more)
		     (car can-run-more))
		(let ((loop-list (runs:expand-items hed tal reg reruns regfull newtal jobgroup max-concurrent-jobs run-id waitons item-path testmode test-record can-run-more items runname tconfig reglen)))
		  (if loop-list
		      (apply loop loop-list)))
		;; if can't run more just loop with next possible test
		(loop (car newtal)(cdr newtal) reg reruns))))
	    
	 ;; this case should not happen, added to help catch any bugs
	 ((and (list? items) itemdat)







|







798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
	 ;; if items is a proc then need to run items:get-items-from-config, get the list and loop 
	 ;;    - but only do that if resources exist to kick off the job
	 ;; EXPAND ITEMS
	 ((or (procedure? items)(eq? items 'have-procedure))
	  (let ((can-run-more    (runs:can-run-more-tests jobgroup max-concurrent-jobs)))
	    (if (and (list? can-run-more)
		     (car can-run-more))
		(let ((loop-list (runs:expand-items hed tal reg reruns regfull newtal jobgroup max-concurrent-jobs run-id waitons item-path testmode test-record can-run-more items runname tconfig reglen test-registry)))
		  (if loop-list
		      (apply loop loop-list)))
		;; if can't run more just loop with next possible test
		(loop (car newtal)(cdr newtal) reg reruns))))
	    
	 ;; this case should not happen, added to help catch any bugs
	 ((and (list? items) itemdat)
968
969
970
971
972
973
974

975

976
977
978
979
980
981
982
	 (if (> (- (current-seconds)(+ (db:test-get-event_time testdat)
				       (db:test-get-run_duration testdat)))
		600) ;; i.e. no update for more than 600 seconds
	     (begin
	       (debug:print 0 "WARNING: Test " test-name " appears to be dead. Forcing it to state INCOMPLETE and status STUCK/DEAD")
	       (tests:test-set-status! test-id "INCOMPLETE" "STUCK/DEAD" "Test is stuck or dead" #f))
	     (debug:print 2 "NOTE: " test-name " is already running")))

	(else       (debug:print 0 "ERROR: Failed to launch test " full-test-name ". Unrecognised state " (test:get-state testdat)))))))


;;======================================================================
;; END OF NEW STUFF
;;======================================================================

(define (get-dir-up-n dir . params) 
  (let ((dparts  (string-split dir "/"))







>
|
>







1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
	 (if (> (- (current-seconds)(+ (db:test-get-event_time testdat)
				       (db:test-get-run_duration testdat)))
		600) ;; i.e. no update for more than 600 seconds
	     (begin
	       (debug:print 0 "WARNING: Test " test-name " appears to be dead. Forcing it to state INCOMPLETE and status STUCK/DEAD")
	       (tests:test-set-status! test-id "INCOMPLETE" "STUCK/DEAD" "Test is stuck or dead" #f))
	     (debug:print 2 "NOTE: " test-name " is already running")))
	(else      
	 (debug:print 0 "ERROR: Failed to launch test " full-test-name ". Unrecognised state " (test:get-state testdat))
	 (hash-table-set! test-registry (runs:make-full-test-name test-name test-path)(string->symbol (test:get-state testdat))))))))

;;======================================================================
;; END OF NEW STUFF
;;======================================================================

(define (get-dir-up-n dir . params) 
  (let ((dparts  (string-split dir "/"))