Megatest

Check-in [6f43371532]
Login
Overview
Comment:Passes bigrun multi-level dependencies test. toprun and fullrun still appear to timeout.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | v1.64-end-runs-fix
Files: files | file ages | folders
SHA1: 6f43371532f34340b347c6e0f9a74e95d302ee44
User & Date: matt on 2017-06-13 23:49:47
Other Links: branch diff | manifest | tags
Context
2017-06-14
00:14
The NOTHING LEFT TO RUN section is not getting hit. Keeping it for now. Closed-Leaf check-in: a16fc0d2d0 user: matt tags: v1.64-end-runs-fix
2017-06-13
23:49
Passes bigrun multi-level dependencies test. toprun and fullrun still appear to timeout. check-in: 6f43371532 user: matt tags: v1.64-end-runs-fix
04:32
Slightly better but still fails the bigrun3 multi-level dependent test run check-in: fda4ad8acc user: matt tags: v1.64-end-runs-fix
Changes

Modified runs.scm from [a83dea5e0f] to [c3d225d834].

963
964
965
966
967
968
969








970
971
972
973
974
975
976
      ;; (thread-sleep! *global-delta*)
      (if (or (not (null? tal))(not (null? reg)))
	  (list (runs:queue-next-hed tal reg reglen regfull)
		(runs:queue-next-tal tal reg reglen regfull)
		(runs:queue-next-reg tal reg reglen regfull)
		reruns)
	  #f))








     
     ;; must be we have unmet prerequisites
     ;;
     (else
      (debug:print 4 *default-log-port* "FAILS: " fails)
      ;; If one or more of the prereqs-not-met are FAIL then we can issue
      ;; a message and drop hed from the items to be processed.







>
>
>
>
>
>
>
>







963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
      ;; (thread-sleep! *global-delta*)
      (if (or (not (null? tal))(not (null? reg)))
	  (list (runs:queue-next-hed tal reg reglen regfull)
		(runs:queue-next-tal tal reg reglen regfull)
		(runs:queue-next-reg tal reg reglen regfull)
		reruns)
	  #f))

     ;; this might speed things up!?
     ;; ((null? (filter (lambda (x)
     ;; 		       (not (member (hash-table-ref/default test-registry x)
     ;; 				    '(done removed))))
     ;; 		     (hash-table-keys test-registry)))
     ;;  (debug:print 0 *default-log-port* "NOTHING LEFT TO RUN!")
     ;;  #f)
     
     ;; must be we have unmet prerequisites
     ;;
     (else
      (debug:print 4 *default-log-port* "FAILS: " fails)
      ;; If one or more of the prereqs-not-met are FAIL then we can issue
      ;; a message and drop hed from the items to be processed.
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
			  reruns ;; WAS: (cons hed reruns) ;; but that makes no sense?
			  ))
		  (let ((nth-try (hash-table-ref/default test-registry hed 0)))
		    (cond
		     ((member "RUNNING" (map db:test-get-state prereqs-not-met))
		      (if (runs:lownoise (conc "possible RUNNING prerequistes " hed) 60)
			  (debug:print 0 *default-log-port* "WARNING: test " hed " has possible RUNNING prerequisites, don't give up on it yet."))
		      (thread-sleep! 4)
		      (list (runs:queue-next-hed newtal reg reglen regfull)
			    (runs:queue-next-tal newtal reg reglen regfull)
			    (runs:queue-next-reg newtal reg reglen regfull)
			    reruns))
		     ((or (not nth-try)
			  (and (number? nth-try)
			       (< nth-try 10)))







|







1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
			  reruns ;; WAS: (cons hed reruns) ;; but that makes no sense?
			  ))
		  (let ((nth-try (hash-table-ref/default test-registry hed 0)))
		    (cond
		     ((member "RUNNING" (map db:test-get-state prereqs-not-met))
		      (if (runs:lownoise (conc "possible RUNNING prerequistes " hed) 60)
			  (debug:print 0 *default-log-port* "WARNING: test " hed " has possible RUNNING prerequisites, don't give up on it yet."))
		      (thread-sleep! 1)
		      (list (runs:queue-next-hed newtal reg reglen regfull)
			    (runs:queue-next-tal newtal reg reglen regfull)
			    (runs:queue-next-reg newtal reg reglen regfull)
			    reruns))
		     ((or (not nth-try)
			  (and (number? nth-try)
			       (< nth-try 10)))
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072

1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
			 (if (null? tal)
			     #f ;; yes, really
			     (list (car tal)(cdr tal) reg reruns)))
			((done)
			 (if (runs:lownoise (conc "FAILED prerequisites or other issue - done" hed) 60)
			     (debug:print 0 *default-log-port* "WARNING: test " hed " has FAILED prerequisites or other issue and is marked \"done\" internally. Dropping it."))
			 (mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "CANNOTRUN" "Failed prerequisites or other issue. CANNOTRUN")
			 (hash-table-set! test-registry hed 0)
			 (list (runs:queue-next-hed newtal reg reglen regfull)
			       (runs:queue-next-tal newtal reg reglen regfull)
			       (runs:queue-next-reg newtal reg reglen regfull)
			       reruns))
			(else
			 (if (runs:lownoise (conc "FAILED prerequisites or other issue" hed) 60)
			     (debug:print 0 *default-log-port* "WARNING: test " hed " has FAILED prerequisites or other issue. Internal state " nth-try " will be overridden and we'll retry."))
			 (mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "KEEP_TRYING" #f)
			 (hash-table-set! test-registry hed 0)
			 (list (runs:queue-next-hed newtal reg reglen regfull)
			       (runs:queue-next-tal newtal reg reglen regfull)
			       (runs:queue-next-reg newtal reg reglen regfull)
			       reruns))))
		     (else
		      (if (runs:lownoise (conc "FAILED prerequitests and we tried" hed) 60)
			  (debug:print 0 *default-log-port* "WARNING: test " hed " has FAILED prerequitests and we've tried at least 10 times to run it. Giving up now."))
		      ;; (debug:print 0 *default-log-port* "         prereqs: " prereqs-not-met)
		      (hash-table-set! test-registry hed 'removed)
		      (mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "TEN_STRIKES" #f)
		      ;; I'm unclear on if this roll up is needed - it may be the root cause of the "all set to FAIL" bug.
		      (rmt:set-state-status-and-roll-up-items run-id test-name item-path #f "FAIL" #f) ;; treat as FAIL
		      (list (if (null? tal)(car newtal)(car tal))
			    tal
			    reg
			    reruns)))))
	      ;; can't drop this - maybe running? Just keep trying

	      (let ((runable-tests (runs:runable-tests prereqs-not-met)))
		(if (null? runable-tests)
		    #f   ;; I think we are truly done here
		    (list (runs:queue-next-hed newtal reg reglen regfull)
			    (runs:queue-next-tal newtal reg reglen regfull)
			    (runs:queue-next-reg newtal reg reglen regfull)
			    reruns)))))))))

;; scan a list of tests looking to see if any are potentially runnable
;;
(define (runs:runable-tests tests)
  (filter (lambda (t)
	    (if (not (vector? t))
		t
		(let ((state  (db:test-get-state t))
		      (status (db:test-get-status t)))
		  (case (string->symbol state)
		    ((COMPLETED INCOMPLETE) #f)
		    ((NOT_STARTED)
		     (if (member status '("TEN_STRIKES" "BLOCKED" "PREQ_FAIL" "ZERO_ITEMS" "PREQ_DISCARDED" "TIMED_OUT" ))
			 #f
			 t))
		    ((DELETED) #f)
		    (else t)))))
	  tests))

;; move all the miscellanea into this struct







|




















|





>



















|







1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
			 (if (null? tal)
			     #f ;; yes, really
			     (list (car tal)(cdr tal) reg reruns)))
			((done)
			 (if (runs:lownoise (conc "FAILED prerequisites or other issue - done" hed) 60)
			     (debug:print 0 *default-log-port* "WARNING: test " hed " has FAILED prerequisites or other issue and is marked \"done\" internally. Dropping it."))
			 (mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "CANNOTRUN" "Failed prerequisites or other issue. CANNOTRUN")
			 (hash-table-set! test-registry hed 'removed)
			 (list (runs:queue-next-hed newtal reg reglen regfull)
			       (runs:queue-next-tal newtal reg reglen regfull)
			       (runs:queue-next-reg newtal reg reglen regfull)
			       reruns))
			(else
			 (if (runs:lownoise (conc "FAILED prerequisites or other issue" hed) 60)
			     (debug:print 0 *default-log-port* "WARNING: test " hed " has FAILED prerequisites or other issue. Internal state " nth-try " will be overridden and we'll retry."))
			 (mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "KEEP_TRYING" #f)
			 (hash-table-set! test-registry hed 0)
			 (list (runs:queue-next-hed newtal reg reglen regfull)
			       (runs:queue-next-tal newtal reg reglen regfull)
			       (runs:queue-next-reg newtal reg reglen regfull)
			       reruns))))
		     (else
		      (if (runs:lownoise (conc "FAILED prerequitests and we tried" hed) 60)
			  (debug:print 0 *default-log-port* "WARNING: test " hed " has FAILED prerequitests and we've tried at least 10 times to run it. Giving up now."))
		      ;; (debug:print 0 *default-log-port* "         prereqs: " prereqs-not-met)
		      (hash-table-set! test-registry hed 'removed)
		      (mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "TEN_STRIKES" #f)
		      ;; I'm unclear on if this roll up is needed - it may be the root cause of the "all set to FAIL" bug.
		      ;; (rmt:set-state-status-and-roll-up-items run-id test-name item-path #f "FAIL" #f) ;; treat as FAIL
		      (list (if (null? tal)(car newtal)(car tal))
			    tal
			    reg
			    reruns)))))
	      ;; can't drop this - maybe running? Just keep trying
	      ;; else clause from (or (not (null? reg))(not (null? tal))) above
	      (let ((runable-tests (runs:runable-tests prereqs-not-met)))
		(if (null? runable-tests)
		    #f   ;; I think we are truly done here
		    (list (runs:queue-next-hed newtal reg reglen regfull)
			    (runs:queue-next-tal newtal reg reglen regfull)
			    (runs:queue-next-reg newtal reg reglen regfull)
			    reruns)))))))))

;; scan a list of tests looking to see if any are potentially runnable
;;
(define (runs:runable-tests tests)
  (filter (lambda (t)
	    (if (not (vector? t))
		t
		(let ((state  (db:test-get-state t))
		      (status (db:test-get-status t)))
		  (case (string->symbol state)
		    ((COMPLETED INCOMPLETE) #f)
		    ((NOT_STARTED)
		     (if (member status '("TEN_STRIKES" "BLOCKED" "PREQ_FAIL" "ZERO_ITEMS" "PREQ_DISCARDED" "TIMED_OUT" "CANNOTRUN"))
			 #f
			 t))
		    ((DELETED) #f)
		    (else t)))))
	  tests))

;; move all the miscellanea into this struct
1332
1333
1334
1335
1336
1337
1338
1339

1340
1341
1342
1343
1344
1345
1346
		     "\n  waitons:     " waitons
		     "\n  num-retries: " num-retries
		     "\n  tal:         " tal
		     "\n  reruns:      " reruns
		     "\n  regfull:     " regfull
		     "\n  reglen:      " reglen
		     "\n  length reg:  " (length reg)
		     "\n  reg:         " reg)


	;; check for hed in waitons => this would be circular, remove it and issue an
	;; error
	(if (member test-name waitons)
	    (begin
	      (debug:print-error 0 *default-log-port* "test " test-name " has listed itself as a waiton, please correct this!")
	      (set! waiton (filter (lambda (x)(not (equal? x hed))) waitons))))







|
>







1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
		     "\n  waitons:     " waitons
		     "\n  num-retries: " num-retries
		     "\n  tal:         " tal
		     "\n  reruns:      " reruns
		     "\n  regfull:     " regfull
		     "\n  reglen:      " reglen
		     "\n  length reg:  " (length reg)
		     "\n  reg:         " reg
		     "\n  flag:        " (hash-table-ref/default test-registry tfullname 'x))

	;; check for hed in waitons => this would be circular, remove it and issue an
	;; error
	(if (member test-name waitons)
	    (begin
	      (debug:print-error 0 *default-log-port* "test " test-name " has listed itself as a waiton, please correct this!")
	      (set! waiton (filter (lambda (x)(not (equal? x hed))) waitons))))
1359
1360
1361
1362
1363
1364
1365







1366
1367

1368
1369
1370
1371
1372
1373
1374
1375
					    (not (member waiton reruns)))
				       1
				       #f))
				 waitons))))) ;; could do this more elegantly with a marker....
	  (debug:print 0 *default-log-port* "WARNING: Marking test " tfullname " as not runnable. It is waiting on tests that cannot be run. Giving up now.")
	  (hash-table-set! test-registry tfullname 'removed))








	 ;; items is #f then the test is ok to be handed off to launch (but not before)
	 ;; 

	 ((not items)
	  (debug:print-info 4 *default-log-port* "OUTER COND: (not items)")
	  (if (and (not (tests:match test-patts (tests:testqueue-get-testname test-record) item-path required: required-tests))
		   (not (null? tal)))
	      (loop (car tal)(cdr tal) reg reruns))
	  (runs:testdat-prereqs-not-met-set! testdat (rmt:get-prereqs-not-met run-id waitons hed item-path mode: testmode itemmaps: itemmaps))
	  (runs:dat-can-run-more-tests-set! runsdat (runs:can-run-more-tests runsdat run-id jobgroup max-concurrent-jobs)) ;; look at the test jobgroup and tot jobs running
	  (let ((loop-list (runs:process-expanded-tests runsdat testdat)))







>
>
>
>
>
>
>
|

>
|







1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
					    (not (member waiton reruns)))
				       1
				       #f))
				 waitons))))) ;; could do this more elegantly with a marker....
	  (debug:print 0 *default-log-port* "WARNING: Marking test " tfullname " as not runnable. It is waiting on tests that cannot be run. Giving up now.")
	  (hash-table-set! test-registry tfullname 'removed))

	 ;; get rid of definitively removed items
	 ((member (hash-table-ref/default test-registry tfullname 'x) '(removed CANNOTRUN))
	  (debug:print 0 *default-log-port* "INFO: Dropping test " tfullname " from the tests queue due to flag "
		       (hash-table-ref/default test-registry tfullname 'x))
	  (if (not (null? tal))
	      (loop (car tal)(cdr tal) reg reruns)))
	 
	 ;; items is #f then the test is ok to be handed off to launch (but not before), check that the test was not marked for removal
	 ;; 
	 ((and (not (member (hash-table-ref/default test-registry tfullname 'x) '(removed CANNOTRUN)))
	       (not items))
	  (debug:print-info 4 *default-log-port* "OUTER COND: (not items)")
	  (if (and (not (tests:match test-patts (tests:testqueue-get-testname test-record) item-path required: required-tests))
		   (not (null? tal)))
	      (loop (car tal)(cdr tal) reg reruns))
	  (runs:testdat-prereqs-not-met-set! testdat (rmt:get-prereqs-not-met run-id waitons hed item-path mode: testmode itemmaps: itemmaps))
	  (runs:dat-can-run-more-tests-set! runsdat (runs:can-run-more-tests runsdat run-id jobgroup max-concurrent-jobs)) ;; look at the test jobgroup and tot jobs running
	  (let ((loop-list (runs:process-expanded-tests runsdat testdat)))