Megatest

Check-in [a035fad97b]
Login
Overview
Comment:parallelized removal of subruns
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | 1.65-subrun-ancilliary-usecases
Files: files | file ages | folders
SHA1: a035fad97b7ad66a7d566285045b6cc1ba0d02bc
User & Date: bjbarcla on 2017-12-27 18:12:04
Other Links: branch diff | manifest | tags
Context
2017-12-27
19:01
updated to work with keep-records and updated manual Leaf check-in: 70391eee14 user: bjbarcla tags: 1.65-subrun-ancilliary-usecases
18:12
parallelized removal of subruns check-in: a035fad97b user: bjbarcla tags: 1.65-subrun-ancilliary-usecases
16:24
subrun kill works but suboptimal (serial kill) check-in: e010ede9bd user: bjbarcla tags: 1.65-subrun-ancilliary-usecases
Changes

Modified common.scm from [bd48028047] to [ebc2b450b4].

17
18
19
20
21
22
23

24
25
26
27
28
29
30
     (prefix sqlite3 sqlite3:)
     pkts
     )

(declare (unit common))

(include "common_records.scm")


;; (require-library margs)
;; (include "margs.scm")

;; (define old-exit exit)
;; 
;; (define (exit . code)







>







17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
     (prefix sqlite3 sqlite3:)
     pkts
     )

(declare (unit common))

(include "common_records.scm")


;; (require-library margs)
;; (include "margs.scm")

;; (define old-exit exit)
;; 
;; (define (exit . code)
2626
2627
2628
2629
2630
2631
2632






                     ((string? new-val)
                      (setenv env-var new-val)))
                    restore-thunk))
                delta-env-alist))))
    (let ((rv (thunk)))
      (for-each (lambda (x) (x)) restore-thunks) ;; restore env to original state
      rv)))













>
>
>
>
>
>
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
                     ((string? new-val)
                      (setenv env-var new-val)))
                    restore-thunk))
                delta-env-alist))))
    (let ((rv (thunk)))
      (for-each (lambda (x) (x)) restore-thunks) ;; restore env to original state
      rv)))

(define (common:send-thunk-to-background-thread thunk #!key (name #f))
  ;;(BB> "launched thread " name)
  (if name
      (thread-start! (make-thread thunk name))
      (thread-start! (make-thread thunk))))

Modified runs.scm from [089f325036] to [b511145c8a].

2047
2048
2049
2050
2051
2052
2053



2054
2055
2056
2057
2058
2059
2060
									 (dirb ;; (rmt:sdb-qry 'getstr 
									  (db:test-get-rundir b))) ;; ) ;; ((filedb:get-path *fdb* (db:test-get-rundir b))))
								     (if (and (string? dira)(string? dirb))
									 (> (string-length dira)(string-length dirb))
									 #f))))))
		       (toplevel-retries (make-hash-table)) ;; try three times to loop through and remove top level tests
		       (test-retry-time  (make-hash-table))



		       (allow-run-time   10)) ;; seconds to allow for killing tests before just brutally killing 'em
		   (let loop ((test (car sorted-tests))
			      (tal  (cdr sorted-tests)))
		     (let* ((test-id       (db:test-get-id test))
			    (new-test-dat  (rmt:get-test-info-by-id run-id test-id)))
		       (if (not new-test-dat)
			   (begin







>
>
>







2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
									 (dirb ;; (rmt:sdb-qry 'getstr 
									  (db:test-get-rundir b))) ;; ) ;; ((filedb:get-path *fdb* (db:test-get-rundir b))))
								     (if (and (string? dira)(string? dirb))
									 (> (string-length dira)(string-length dirb))
									 #f))))))
		       (toplevel-retries (make-hash-table)) ;; try three times to loop through and remove top level tests
		       (test-retry-time  (make-hash-table))
                       (backgrounded-remove-status     (make-hash-table))
                       (backgrounded-remove-last-visit (make-hash-table))
                       (backgrounded-remove-result     (make-hash-table))
		       (allow-run-time   10)) ;; seconds to allow for killing tests before just brutally killing 'em
		   (let loop ((test (car sorted-tests))
			      (tal  (cdr sorted-tests)))
		     (let* ((test-id       (db:test-get-id test))
			    (new-test-dat  (rmt:get-test-info-by-id run-id test-id)))
		       (if (not new-test-dat)
			   (begin
2082
2083
2084
2085
2086
2087
2088
2089





2090




2091
2092





















2093
2094
2095


2096
2097
2098
2099
2100
2101
2102
2103



2104
2105
2106
2107
2108
2109
2110
                                  (hash-table-set! toplevel-retries test-fulln (+ (hash-table-ref/default toplevel-retries test-fulln 0) 1))
                                  (if (> (hash-table-ref toplevel-retries test-fulln) 3)
                                      (if (not (null? tal))
                                          (loop (car tal)(cdr tal))) ;; no else clause - drop it if no more in queue and > 3 tries
                                      (let ((newtal (append tal (list test))))
                                        (loop (car newtal)(cdr newtal))))) ;; loop with test still in queue
                                 (has-subrun
                                  ;; BB TODO - manage toplevasel-retries hash and retries in general





                                  (debug:print 0 *default-log-port* "WARNING: postponing removal of " test-fulln " with run-id " run-id " as it has a subrun")




                                  (let* ((subrun-remove-succeeded
                                          (subrun:remove-subrun run-dir new-test-dat test-name item-path test-state test-fulln toplevel-with-children test)))





















                                    (cond
                                     (subrun-remove-succeeded
                                      


                                      (debug:print 0 *default-log-port* "Now removing of " test-fulln " with run-id " run-id " as it has a subrun")
                                      (runs:remove-test-directory new-test-dat mode))
                                     (else
                                      (let* ((logfile (subrun:get-log-path run-dir "remove")))
                                        (debug:print 0 *default-log-port* "WARNING: removal of subrun failed.  Please check "logfile" for details."))))
                                    
                                  (if (not (null? tal))
                                            (loop (car tal)(cdr tal)))))




                                 (else
                                  (debug:print-info 0 *default-log-port* "test: " test-name " itest-state: " test-state)
                                  (if (member test-state (list "RUNNING" "LAUNCHED" "REMOTEHOSTSTART" "KILLREQ"))
                                      (begin
                                        (if (not (hash-table-ref/default test-retry-time test-fulln #f))
                                            (begin







|
>
>
>
>
>
|
>
>
>
>
|
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
|
|
>
>
|
|
|
|
|
<
|
|
>
>
>







2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135

2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
                                  (hash-table-set! toplevel-retries test-fulln (+ (hash-table-ref/default toplevel-retries test-fulln 0) 1))
                                  (if (> (hash-table-ref toplevel-retries test-fulln) 3)
                                      (if (not (null? tal))
                                          (loop (car tal)(cdr tal))) ;; no else clause - drop it if no more in queue and > 3 tries
                                      (let ((newtal (append tal (list test))))
                                        (loop (car newtal)(cdr newtal))))) ;; loop with test still in queue
                                 (has-subrun
                                  ;; 
                                  (let ((last-visit (hash-table-ref/default backgrounded-remove-last-visit test-fulln 0))
                                        (now        (current-seconds))
                                        (rem-status (hash-table-ref/default backgrounded-remove-status test-fulln 'not-started)))
                                    (case rem-status
                                      ((not-started)
                                       (debug:print 0 *default-log-port* "WARNING: postponing removal of " test-fulln " with run-id " run-id " as it has a subrun")
                                       (hash-table-set! backgrounded-remove-status test-fulln 'started)
                                       (hash-table-set! backgrounded-remove-last-visit test-fulln (current-seconds))
                                       (common:send-thunk-to-background-thread
                                        (lambda ()
                                          (let* ((subrun-remove-succeeded
                                                  (subrun:remove-subrun run-dir new-test-dat test-name item-path test-state test-fulln toplevel-with-children test)))
                                            (hash-table-set! backgrounded-remove-result test-fulln subrun-remove-succeeded)
                                            (hash-table-set! backgrounded-remove-status test-fulln 'done)))
                                        name: (conc "remove-subrun:"test-fulln))
                                       
                                       ;; send to back of line, loop
                                       (let ((newtal (append tal (list test))))
                                        (loop (car newtal)(cdr newtal)))
                                       )
                                      ((started)
                                       ;; if last visit was within last second, sleep 1 second
                                       (if (< (- now last-visit) 1.0)
                                           (thread-sleep! 1.0))
                                       (hash-table-set! backgrounded-remove-last-visit test-fulln (current-seconds))
                                       ;; send to back of line, loop
                                       (let ((newtal (append tal (list test))))
                                        (loop (car newtal)(cdr newtal)))
                                       )
                                      ((done)
                                       ;; drop this one; if remaining, loop, else finish
                                       (hash-table-set! backgrounded-remove-last-visit test-fulln (current-seconds))
                                       (let ((subrun-remove-succeeded (hash-table-ref/default backgrounded-remove-result test-fulln 'exception)))
                                         (cond
                                          ((eq? subrun-remove-succeeded 'exception)
                                           (let* ((logfile (subrun:get-log-path run-dir "remove")))
                                             (debug:print 0 *default-log-port* "ERROR: removing subrun of of " test-fulln " with run-id " run-id " ; see logfile @ "logfile)))
                                          (subrun-remove-succeeded
                                           (debug:print 0 *default-log-port* "Now removing of " test-fulln " with run-id " run-id " since subrun was removed.")
                                           (runs:remove-test-directory new-test-dat mode))
                                          (else
                                           (let* ((logfile (subrun:get-log-path run-dir "remove")))
                                             (debug:print 0 *default-log-port* "WARNING: removal of subrun failed.  Please check "logfile" for details."))))

                                         (if (not (null? tal))
                                             (loop (car tal)(cdr tal)))))
                                      ) ; end case rem-status
                                    ) ; end let
                                  ); end cond has-subrun

                                 (else
                                  (debug:print-info 0 *default-log-port* "test: " test-name " itest-state: " test-state)
                                  (if (member test-state (list "RUNNING" "LAUNCHED" "REMOTEHOSTSTART" "KILLREQ"))
                                      (begin
                                        (if (not (hash-table-ref/default test-retry-time test-fulln #f))
                                            (begin

Modified subrun.scm from [9da03e90bf] to [3571b59cfa].

75
76
77
78
79
80
81
82
83
84
85
86
87
88
89


(define (subrun:remove-subrun test-run-dir new-test-dat test-name item-path test-state test-fulln toplevel-with-children test)
;; set state/status of test item
;; fork off megatest
;; set state/status of test item
;;
  (BB> "Entered subrun:remove-subrun with "test-fulln)
  (if (and (not (subrun:subrun-removed? test-run-dir)) (subrun:subrun-test-initialized? test-run-dir))
      (let* ((remove-result
              (subrun:exec-sub-megatest test-run-dir "-remove-runs" "remove")))
        (if remove-result
            (begin
              (subrun:set-subrun-removed test-run-dir)
              #t)







|







75
76
77
78
79
80
81
82
83
84
85
86
87
88
89


(define (subrun:remove-subrun test-run-dir new-test-dat test-name item-path test-state test-fulln toplevel-with-children test)
;; set state/status of test item
;; fork off megatest
;; set state/status of test item
;;
  ;;(BB> "Entered subrun:remove-subrun with "test-fulln)
  (if (and (not (subrun:subrun-removed? test-run-dir)) (subrun:subrun-test-initialized? test-run-dir))
      (let* ((remove-result
              (subrun:exec-sub-megatest test-run-dir "-remove-runs" "remove")))
        (if remove-result
            (begin
              (subrun:set-subrun-removed test-run-dir)
              #t)