Megatest

Check-in [a2c11c53bc]
Login
Overview
Comment:Fixed couple remaining issues with queue handling
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | v1.55
Files: files | file ages | folders
SHA1: a2c11c53bc4a4d7b843552f18720a9bfc9a06afe
User & Date: mrwellan on 2013-09-10 12:04:29
Other Links: branch diff | manifest | tags
Context
2013-09-10
17:27
Misc fixes check-in: e5b733b81c user: mrwellan tags: v1.55
12:04
Fixed couple remaining issues with queue handling check-in: a2c11c53bc user: mrwellan tags: v1.55
00:27
Queue ideosyncracies seem fixed. Needs more testing... check-in: 79361f5222 user: matt tags: v1.55
Changes

Modified common.scm from [ff172520ae] to [23d24f4c55].

94
95
96
97
98
99
100




101
102
103
104
105
106
107
;;======================================================================

(define *common:std-states*   
  (list "COMPLETED" "NOT_STARTED" "RUNNING" "REMOTEHOSTSTART" "LAUNCHED" "KILLED" "KILLREQ" "STUCK"))

(define *common:std-statuses*
  (list  "PASS" "WARN" "FAIL" "CHECK" "n/a" "WAIVED" "SKIP" "DELETED" "STUCK/DEAD"))





;;======================================================================
;; D E B U G G I N G   S T U F F 
;;======================================================================

(define *verbosity*         1)
(define *logging*           #f)







>
>
>
>







94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
;;======================================================================

(define *common:std-states*   
  (list "COMPLETED" "NOT_STARTED" "RUNNING" "REMOTEHOSTSTART" "LAUNCHED" "KILLED" "KILLREQ" "STUCK"))

(define *common:std-statuses*
  (list  "PASS" "WARN" "FAIL" "CHECK" "n/a" "WAIVED" "SKIP" "DELETED" "STUCK/DEAD"))

;; These are stopping conditions that prevent a test from being run
(define *common:cant-run-states-sym* 
  '(COMPLETED KILLED WAIVED UNKNOWN INCOMPLETE))

;;======================================================================
;; D E B U G G I N G   S T U F F 
;;======================================================================

(define *verbosity*         1)
(define *logging*           #f)

Modified runs.scm from [c1ce52ca59] to [250a158829].

362
363
364
365
366
367
368


369
370
371
372
373
374
375

(define (runs:queue-next-reg tal reg n regfull)
  (if regfull
      (cdr reg)
      (if (null? tal) ;; if tal is null and reg not full then '() as reg contents moved to tal
	  '()
	  reg)))



(define (runs:expand-items hed tal reg reruns regfull newtal jobgroup max-concurrent-jobs run-id waitons item-path testmode test-record can-run-more items runname tconfig reglen test-registry)
  (let* ((loop-list       (list hed tal reg reruns))
	 (prereqs-not-met (mt:get-prereqs-not-met run-id waitons item-path mode: testmode))
	 (fails           (runs:calc-fails prereqs-not-met))
	 (non-completed   (runs:calc-not-completed prereqs-not-met)))
    (debug:print-info 4 "START OF INNER COND #2 "







>
>







362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377

(define (runs:queue-next-reg tal reg n regfull)
  (if regfull
      (cdr reg)
      (if (null? tal) ;; if tal is null and reg not full then '() as reg contents moved to tal
	  '()
	  reg)))

(define runs:nothing-left-in-queue-count 0)

(define (runs:expand-items hed tal reg reruns regfull newtal jobgroup max-concurrent-jobs run-id waitons item-path testmode test-record can-run-more items runname tconfig reglen test-registry)
  (let* ((loop-list       (list hed tal reg reruns))
	 (prereqs-not-met (mt:get-prereqs-not-met run-id waitons item-path mode: testmode))
	 (fails           (runs:calc-fails prereqs-not-met))
	 (non-completed   (runs:calc-not-completed prereqs-not-met)))
    (debug:print-info 4 "START OF INNER COND #2 "
386
387
388
389
390
391
392
393
394


395
396
397
398











399
400
401
402
403
404
405
		      "\n can-run-more:    " can-run-more)

    (cond
     ;; all prereqs met, fire off the test
     ;; or, if it is a 'toplevel test and all prereqs not met are COMPLETED then launch
     
     ((member (hash-table-ref/default test-registry (runs:make-full-test-name hed item-path) 'n/a)
	      '(COMPLETED INCOMPLETE)) ;; try to catch repeat processing of COMPLETED tests here
      (debug:print-info 1 "Test " hed " is " (hash-table-ref test-registry (runs:make-full-test-name hed item-path)) ". Removing it from the queue")


      (list (runs:queue-next-hed tal reg reglen regfull)
	    (runs:queue-next-tal tal reg reglen regfull)
	    (runs:queue-next-reg tal reg reglen regfull)
	    reruns))












     ;; 
     ((or (null? prereqs-not-met)
	  (and (eq? testmode 'toplevel)
	       (null? non-completed)))
      (debug:print-info 4 "runs:expand-items: (or (null? prereqs-not-met) (and (eq? testmode 'toplevel)(null? non-completed)))")
      (let ((test-name (tests:testqueue-get-testname test-record)))







|
|
>
>
|
|
|
|
>
>
>
>
>
>
>
>
>
>
>







388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
		      "\n can-run-more:    " can-run-more)

    (cond
     ;; all prereqs met, fire off the test
     ;; or, if it is a 'toplevel test and all prereqs not met are COMPLETED then launch
     
     ((member (hash-table-ref/default test-registry (runs:make-full-test-name hed item-path) 'n/a)
	      '(DONOTRUN removed)) ;; *common:cant-run-states-sym*) ;; '(COMPLETED KILLED WAIVED UNKNOWN INCOMPLETE)) ;; try to catch repeat processing of COMPLETED tests here
      (debug:print-info 1 "Test " hed " set to \"" (hash-table-ref test-registry (runs:make-full-test-name hed item-path)) "\". Removing it from the queue")
      (if (or (not (null? tal))
	      (not (null? reg)))
	  (list (runs:queue-next-hed tal reg reglen regfull)
		(runs:queue-next-tal tal reg reglen regfull)
		(runs:queue-next-reg tal reg reglen regfull)
		reruns)
	  (begin
	    (debug:print-info 0 "Nothing left in the queue!")
	    ;; If get here twice then we know we've tried to expand all items
	    ;; since there must be a logic issue with the handling of loops in the 
	    ;; items expand phase we will brute force an exit here.
	    (if (> runs:nothing-left-in-queue-count 2)
		(begin
		  (debug:print 0 "WARNING: this condition is triggered when there were no items to expand and nothing to run. Please check your run for completeness")
		  (exit 0))
		(set! runs:nothing-left-in-queue-count (+ runs:nothing-left-in-queue-count 1)))
	    #f)))

     ;; 
     ((or (null? prereqs-not-met)
	  (and (eq? testmode 'toplevel)
	       (null? non-completed)))
      (debug:print-info 4 "runs:expand-items: (or (null? prereqs-not-met) (and (eq? testmode 'toplevel)(null? non-completed)))")
      (let ((test-name (tests:testqueue-get-testname test-record)))
699
700
701
702
703
704
705
706
707
708

709
710
711
712
713
714
715
716
717
	     (itemdat     (tests:testqueue-get-itemdat    test-record)) ;; itemdat can be a string, list or #f
	     (items       (tests:testqueue-get-items      test-record))
	     (item-path   (item-list->path itemdat))
	     (tfullname   (runs:make-full-test-name test-name item-path))
	     (newtal      (append tal (list hed)))
	     (regfull     (>= (length reg) reglen)))

	;; Fast skip of tests that are already "COMPLETED"
	;;
	(if (equal? (hash-table-ref/default test-registry tfullname #f) 'COMPLETED)

	    (begin
	      (debug:print-info 0 "Skipping COMPLETED test " tfullname)
	      (if (or (not (null? tal))(not (null? reg)))
		  (loop (runs:queue-next-hed tal reg reglen regfull)
			(runs:queue-next-tal tal reg reglen regfull)
			(runs:queue-next-reg tal reg reglen regfull)
			reruns))))
		  ;; (loop (car tal)(cdr tal) reg reruns))))








|

|
>

|







714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
	     (itemdat     (tests:testqueue-get-itemdat    test-record)) ;; itemdat can be a string, list or #f
	     (items       (tests:testqueue-get-items      test-record))
	     (item-path   (item-list->path itemdat))
	     (tfullname   (runs:make-full-test-name test-name item-path))
	     (newtal      (append tal (list hed)))
	     (regfull     (>= (length reg) reglen)))

	;; Fast skip of tests that are already "COMPLETED" - NO! Cannot do that as the items may not have been expanded yet :(
	;;
	(if (member (hash-table-ref/default test-registry tfullname #f) 
		    '(DONOTRUN removed)) ;; *common:cant-run-states-sym*) ;; '(COMPLETED KILLED WAIVED UNKNOWN INCOMPLETE))
	    (begin
	      (debug:print-info 0 "Skipping test " tfullname " as it has been marked do not run due to being completed or not runnable")
	      (if (or (not (null? tal))(not (null? reg)))
		  (loop (runs:queue-next-hed tal reg reglen regfull)
			(runs:queue-next-tal tal reg reglen regfull)
			(runs:queue-next-reg tal reg reglen regfull)
			reruns))))
		  ;; (loop (car tal)(cdr tal) reg reruns))))

950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
	    ;; not -rerun and PASS, WARN or CHECK, do no run
	    ((and (or (not rerun)
		      keepgoing)
		  ;; Require to force re-run for COMPLETED or *anything* + PASS,WARN or CHECK
		  (or (member (test:get-status testdat) '("PASS" "WARN" "CHECK" "SKIP" "WAIVED"))
		      (member (test:get-state  testdat) '("COMPLETED")))) 
	     (debug:print-info 2 "running test " test-name "/" item-path " suppressed as it is " (test:get-state testdat) " and " (test:get-status testdat))
	     (hash-table-set! test-registry full-test-name 'COMPLETED)
	     (set! runflag #f))
	    ;; -rerun and status is one of the specifed, run it
	    ((and rerun
		  (let* ((rerunlst   (string-split rerun ","))
			 (must-rerun (member (test:get-status testdat) rerunlst)))
		    (debug:print-info 3 "-rerun list: " rerun ", test-status: " (test:get-status testdat)", must-rerun: " must-rerun)
		    must-rerun))







|







966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
	    ;; not -rerun and PASS, WARN or CHECK, do no run
	    ((and (or (not rerun)
		      keepgoing)
		  ;; Require to force re-run for COMPLETED or *anything* + PASS,WARN or CHECK
		  (or (member (test:get-status testdat) '("PASS" "WARN" "CHECK" "SKIP" "WAIVED"))
		      (member (test:get-state  testdat) '("COMPLETED")))) 
	     (debug:print-info 2 "running test " test-name "/" item-path " suppressed as it is " (test:get-state testdat) " and " (test:get-status testdat))
	     (hash-table-set! test-registry full-test-name 'DONOTRUN) ;; COMPLETED)
	     (set! runflag #f))
	    ;; -rerun and status is one of the specifed, run it
	    ((and rerun
		  (let* ((rerunlst   (string-split rerun ","))
			 (must-rerun (member (test:get-status testdat) rerunlst)))
		    (debug:print-info 3 "-rerun list: " rerun ", test-status: " (test:get-status testdat)", must-rerun: " must-rerun)
		    must-rerun))
1002
1003
1004
1005
1006
1007
1008
1009

1010
1011
1012
1013
1014
1015
1016
1017
1018
1019


1020


1021
1022
1023
1024
1025
1026
1027
		       (debug:print-info 1 "SKIPPING Test " full-test-name " due to " skip-test))
		     (if (not (launch-test test-id run-id run-info keyvals runname test-conf test-name test-path itemdat flags))
			 (begin
			   (print "ERROR: Failed to launch the test. Exiting as soon as possible")
			   (set! *globalexitstatus* 1) ;; 
			   (process-signal (current-process-id) signal/kill))))))))
	((KILLED) 
	 (debug:print 1 "NOTE: " full-test-name " is already running or was explictly killed, use -force to launch it."))

	((LAUNCHED REMOTEHOSTSTART RUNNING)  
	 (if (> (- (current-seconds)(+ (db:test-get-event_time testdat)
				       (db:test-get-run_duration testdat)))
		600) ;; i.e. no update for more than 600 seconds
	     (begin
	       (debug:print 0 "WARNING: Test " test-name " appears to be dead. Forcing it to state INCOMPLETE and status STUCK/DEAD")
	       (tests:test-set-status! test-id "INCOMPLETE" "STUCK/DEAD" "Test is stuck or dead" #f))
	     (debug:print 2 "NOTE: " test-name " is already running")))
	(else      
	 (debug:print 0 "ERROR: Failed to launch test " full-test-name ". Unrecognised state " (test:get-state testdat))


	 (hash-table-set! test-registry (runs:make-full-test-name test-name test-path)(string->symbol (test:get-state testdat))))))))



;;======================================================================
;; END OF NEW STUFF
;;======================================================================

(define (get-dir-up-n dir . params) 
  (let ((dparts  (string-split dir "/"))







|
>










>
>
|
>
>







1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
		       (debug:print-info 1 "SKIPPING Test " full-test-name " due to " skip-test))
		     (if (not (launch-test test-id run-id run-info keyvals runname test-conf test-name test-path itemdat flags))
			 (begin
			   (print "ERROR: Failed to launch the test. Exiting as soon as possible")
			   (set! *globalexitstatus* 1) ;; 
			   (process-signal (current-process-id) signal/kill))))))))
	((KILLED) 
	 (debug:print 1 "NOTE: " full-test-name " is already running or was explictly killed, use -force to launch it.")
	 (hash-table-set! test-registry (runs:make-full-test-name test-name test-path) 'DONOTRUN)) ;; KILLED))
	((LAUNCHED REMOTEHOSTSTART RUNNING)  
	 (if (> (- (current-seconds)(+ (db:test-get-event_time testdat)
				       (db:test-get-run_duration testdat)))
		600) ;; i.e. no update for more than 600 seconds
	     (begin
	       (debug:print 0 "WARNING: Test " test-name " appears to be dead. Forcing it to state INCOMPLETE and status STUCK/DEAD")
	       (tests:test-set-status! test-id "INCOMPLETE" "STUCK/DEAD" "Test is stuck or dead" #f))
	     (debug:print 2 "NOTE: " test-name " is already running")))
	(else      
	 (debug:print 0 "ERROR: Failed to launch test " full-test-name ". Unrecognised state " (test:get-state testdat))
	 (case (string->symbol (test:get-state testdat)) 
	   ((COMPLETED INCOMPLETE)
	    (hash-table-set! test-registry (runs:make-full-test-name test-name test-path) 'DONOTRUN))
	   (else
	    (hash-table-set! test-registry (runs:make-full-test-name test-name test-path) 'DONOTRUN))))))))

;;======================================================================
;; END OF NEW STUFF
;;======================================================================

(define (get-dir-up-n dir . params) 
  (let ((dparts  (string-split dir "/"))