Megatest

Diff
Login

Differences From Artifact [8c7a71404f]:

To Artifact [e0d9d07905]:


1003
1004
1005
1006
1007
1008
1009
1010





1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
									(> (string-length dira)(string-length dirb))
									#f)))))
		       (test-retry-time  (make-hash-table))
		       (allow-run-time   10)) ;; seconds to allow for killing tests before just brutally killing 'em
		   (let loop ((test (car sorted-tests))
			      (tal  (cdr sorted-tests)))
		     (let* ((test-id       (db:test-get-id test))
			    (new-test-dat  (cdb:remote-run db:get-test-info-by-id #f test-id))





			    (item-path     (db:test-get-item-path new-test-dat))
			    (test-name     (db:test-get-testname new-test-dat))
			    (run-dir       (db:test-get-rundir new-test-dat))    ;; run dir is from the link tree
			    (real-dir      (if (file-exists? run-dir)
					       (resolve-pathname run-dir)
					       #f))
			    (test-state    (db:test-get-state new-test-dat))
			    (test-fulln    (db:test-get-fullname new-test-dat)))
			   (case action
			     ((remove-runs)
			      (debug:print-info 0 "test: " test-name " itest-state: " test-state)
			      (if (member test-state (list "RUNNING" "LAUNCHED" "REMOTEHOSTSTART" "KILLREQ"))
				  (begin
				    (if (not (hash-table-ref/default test-retry-time test-fulln #f))
					(begin
					  ;; want to set to REMOVING BUT CANNOT do it here?
					  (hash-table-set! test-retry-time test-fulln (current-seconds))))
				    (if (> (- (current-seconds)(hash-table-ref test-retry-time test-fulln)) allow-run-time)
				      ;; This test is not in a correct state for cleaning up. Let's try some graceful shutdown steps first
				      ;; Set the test to "KILLREQ" and wait five seconds then try again. Repeat up to five times then give
				      ;; up and blow it away.
				      (begin
					(debug:print 0 "WARNING: could not gracefully remove test " test-fulln ", tried to kill it to no avail. Forcing state to FAILEDKILL and continuing")
					(mt:test-set-state-status-by-id (db:test-get-id test) "FAILEDKILL" "n/a" #f)
					(thread-sleep! 1))
				      (begin
					(mt:test-set-state-status-by-id (db:test-get-id test) "KILLREQ" "n/a" #f)
					(thread-sleep! 1)))
				    ;; NOTE: This is suboptimal as the testdata will be used later and the state/status may have changed ...
				    (if (null? tal)
					(loop new-test-dat tal)
					(loop (car tal)(append tal (list new-test-dat)))))
				  (begin
				    (mt:test-set-state-status-by-id (db:test-get-id test) "REMOVING" "LOCKED" #f)
				    (debug:print-info 1 "Attempting to remove " (if real-dir (conc " dir " real-dir " and ") "") " link " run-dir)
				    (if (and real-dir 
					     (> (string-length real-dir) 5)
					     (file-exists? real-dir)) ;; bad heuristic but should prevent /tmp /home etc.
					(begin ;; let* ((realpath (resolve-pathname run-dir)))
					  (debug:print-info 1 "Recursively removing " real-dir)
					  (if (file-exists? real-dir)
					      (runs:safe-delete-test-dir real-dir)
					      (debug:print 0 "WARNING: test dir " real-dir " appears to not exist or is not readable")))
					(if real-dir 
					    (debug:print 0 "WARNING: directory " real-dir " does not exist")
					    (debug:print 0 "WARNING: no real directory corrosponding to link " run-dir ", nothing done")))
				    (if (symbolic-link? run-dir)
					(begin
					  (debug:print-info 1 "Removing symlink " run-dir)
					  (handle-exceptions
					   exn
					   (debug:print 0 "ERROR:  Failed to remove symlink " run-dir ((condition-property-accessor 'exn 'message) exn) ", attempting to continue")
					   (delete-file run-dir)))
					(if (directory? run-dir)
					    (if (> (directory-fold (lambda (f x)(+ 1 x)) 0 run-dir) 0)
						(debug:print 0 "WARNING: refusing to remove " run-dir " as it is not empty")
						(handle-exceptions
						 exn
						 (debug:print 0 "ERROR:  Failed to remove directory " run-dir ((condition-property-accessor 'exn 'message) exn) ", attempting to continue")
						 (delete-directory run-dir)))
					    (if run-dir
						(debug:print 0 "WARNING: not removing " run-dir " as it either doesn't exist or is not a symlink")
						(debug:print 0 "NOTE: the run dir for this test is undefined. Test may have already been deleted."))
					    ))
				    ;; Only delete the records *after* removing the directory. If things fail we have a record 
				    (cdb:remote-run db:delete-test-records db #f (db:test-get-id test))
				    (if (not (null? tal))
					(loop (car tal)(cdr tal))))))
			     ((set-state-status)
			      (debug:print-info 2 "new state " (car state-status) ", new status " (cadr state-status))
			      (mt:test-set-state-status-by-id (db:test-get-id test) (car state-status)(cadr state-status) #f)
			      (if (not (null? tal))
				  (loop (car tal)(cdr tal))))
			     ((run-wait)
			      (debug:print-info 2 "still waiting, " (length tests) " tests still running")
			      (thread-sleep! 10)
			      (let ((new-tests (proc-get-tests run-id)))
				(if (null? new-tests)
				    (debug:print-info 1 "Run completed according to zero tests matching provided criteria.")
				    (loop (car new-tests)(cdr new-tests))))))))
		   )))
	   ;; remove the run if zero tests remain
	   (if (eq? action 'remove-runs)
	       (let ((remtests (mt:get-tests-for-run (db:get-value-by-header run header "id") #f '("DELETED") '("n/a") not-in: #t)))
		 (if (null? remtests) ;; no more tests remaining
		     (let* ((dparts  (string-split lasttpath "/"))
			    (runpath (conc "/" (string-intersperse 
						(take dparts (- (length dparts) 1))







|
>
>
>
>
>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|







1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
									(> (string-length dira)(string-length dirb))
									#f)))))
		       (test-retry-time  (make-hash-table))
		       (allow-run-time   10)) ;; seconds to allow for killing tests before just brutally killing 'em
		   (let loop ((test (car sorted-tests))
			      (tal  (cdr sorted-tests)))
		     (let* ((test-id       (db:test-get-id test))
			    (new-test-dat  (cdb:get-test-info-by-id *runremote* test-id)))
		       (if (not new-test-dat)
			   (begin
			     (debug:print 0 "ERROR: We have a test-id of " test-id " but no record was found. NOTE: No locking of records is done between processes, do not simultaneously remove the same run from two processes!")
			     (if (not (null? tal))
				 (loop (car tal)(cdr tal))))
			   (let* ((item-path     (db:test-get-item-path new-test-dat))
				  (test-name     (db:test-get-testname new-test-dat))
				  (run-dir       (db:test-get-rundir new-test-dat))    ;; run dir is from the link tree
				  (real-dir      (if (file-exists? run-dir)
						     (resolve-pathname run-dir)
						     #f))
				  (test-state    (db:test-get-state new-test-dat))
				  (test-fulln    (db:test-get-fullname new-test-dat)))
			     (case action
			       ((remove-runs)
				(debug:print-info 0 "test: " test-name " itest-state: " test-state)
				(if (member test-state (list "RUNNING" "LAUNCHED" "REMOTEHOSTSTART" "KILLREQ"))
				    (begin
				      (if (not (hash-table-ref/default test-retry-time test-fulln #f))
					  (begin
					    ;; want to set to REMOVING BUT CANNOT do it here?
					    (hash-table-set! test-retry-time test-fulln (current-seconds))))
				      (if (> (- (current-seconds)(hash-table-ref test-retry-time test-fulln)) allow-run-time)
					  ;; This test is not in a correct state for cleaning up. Let's try some graceful shutdown steps first
					  ;; Set the test to "KILLREQ" and wait five seconds then try again. Repeat up to five times then give
					  ;; up and blow it away.
					  (begin
					    (debug:print 0 "WARNING: could not gracefully remove test " test-fulln ", tried to kill it to no avail. Forcing state to FAILEDKILL and continuing")
					    (mt:test-set-state-status-by-id (db:test-get-id test) "FAILEDKILL" "n/a" #f)
					    (thread-sleep! 1))
					  (begin
					    (mt:test-set-state-status-by-id (db:test-get-id test) "KILLREQ" "n/a" #f)
					    (thread-sleep! 1)))
				      ;; NOTE: This is suboptimal as the testdata will be used later and the state/status may have changed ...
				      (if (null? tal)
					  (loop new-test-dat tal)
					  (loop (car tal)(append tal (list new-test-dat)))))
				    (begin
				      (mt:test-set-state-status-by-id (db:test-get-id test) "REMOVING" "LOCKED" #f)
				      (debug:print-info 1 "Attempting to remove " (if real-dir (conc " dir " real-dir " and ") "") " link " run-dir)
				      (if (and real-dir 
					       (> (string-length real-dir) 5)
					       (file-exists? real-dir)) ;; bad heuristic but should prevent /tmp /home etc.
					  (begin ;; let* ((realpath (resolve-pathname run-dir)))
					    (debug:print-info 1 "Recursively removing " real-dir)
					    (if (file-exists? real-dir)
						(runs:safe-delete-test-dir real-dir)
						(debug:print 0 "WARNING: test dir " real-dir " appears to not exist or is not readable")))
					  (if real-dir 
					      (debug:print 0 "WARNING: directory " real-dir " does not exist")
					      (debug:print 0 "WARNING: no real directory corrosponding to link " run-dir ", nothing done")))
				      (if (symbolic-link? run-dir)
					  (begin
					    (debug:print-info 1 "Removing symlink " run-dir)
					    (handle-exceptions
					     exn
					     (debug:print 0 "ERROR:  Failed to remove symlink " run-dir ((condition-property-accessor 'exn 'message) exn) ", attempting to continue")
					     (delete-file run-dir)))
					  (if (directory? run-dir)
					      (if (> (directory-fold (lambda (f x)(+ 1 x)) 0 run-dir) 0)
						  (debug:print 0 "WARNING: refusing to remove " run-dir " as it is not empty")
						  (handle-exceptions
						   exn
						   (debug:print 0 "ERROR:  Failed to remove directory " run-dir ((condition-property-accessor 'exn 'message) exn) ", attempting to continue")
						   (delete-directory run-dir)))
					      (if run-dir
						  (debug:print 0 "WARNING: not removing " run-dir " as it either doesn't exist or is not a symlink")
						  (debug:print 0 "NOTE: the run dir for this test is undefined. Test may have already been deleted."))
					      ))
				      ;; Only delete the records *after* removing the directory. If things fail we have a record 
				      (cdb:remote-run db:delete-test-records db #f (db:test-get-id test))
				      (if (not (null? tal))
					  (loop (car tal)(cdr tal))))))
			       ((set-state-status)
				(debug:print-info 2 "new state " (car state-status) ", new status " (cadr state-status))
				(mt:test-set-state-status-by-id (db:test-get-id test) (car state-status)(cadr state-status) #f)
				(if (not (null? tal))
				    (loop (car tal)(cdr tal))))
			       ((run-wait)
				(debug:print-info 2 "still waiting, " (length tests) " tests still running")
				(thread-sleep! 10)
				(let ((new-tests (proc-get-tests run-id)))
				  (if (null? new-tests)
				      (debug:print-info 1 "Run completed according to zero tests matching provided criteria.")
				      (loop (car new-tests)(cdr new-tests))))))))
		       )))))
	   ;; remove the run if zero tests remain
	   (if (eq? action 'remove-runs)
	       (let ((remtests (mt:get-tests-for-run (db:get-value-by-header run header "id") #f '("DELETED") '("n/a") not-in: #t)))
		 (if (null? remtests) ;; no more tests remaining
		     (let* ((dparts  (string-split lasttpath "/"))
			    (runpath (conc "/" (string-intersperse 
						(take dparts (- (length dparts) 1))