Megatest

Check-in [05b23944bc]
Login
Overview
Comment:wip; added hooks for subrun remove-run handling
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | 1.65-subrun-ancilliary-usecases
Files: files | file ages | folders
SHA1: 05b23944bc99acfed408faa1fb9e847a9f81b687
User & Date: bjbarcla on 2017-12-26 18:07:07
Other Links: branch diff | manifest | tags
Context
2017-12-27
16:24
subrun kill works but suboptimal (serial kill) check-in: e010ede9bd user: bjbarcla tags: 1.65-subrun-ancilliary-usecases
2017-12-26
18:07
wip; added hooks for subrun remove-run handling check-in: 05b23944bc user: bjbarcla tags: 1.65-subrun-ancilliary-usecases
16:14
restored launch functionality using subrun under updated subrun architecture check-in: b1363320bf user: bjbarcla tags: 1.65-subrun-ancilliary-usecases
Changes

Modified runs.scm from [6620831073] to [6aafacc0b0].

2062
2063
2064
2065
2066
2067
2068

2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085






2086
2087
2088
2089
2090
2091
2092
			     (if (not (null? tal))
				 (loop (car tal)(cdr tal))))
			   (let* ((item-path     (db:test-get-item-path new-test-dat))
				  (test-name     (db:test-get-testname new-test-dat))
				  (run-dir       ;;(filedb:get-path *fdb*
				   ;; (rmt:sdb-qry 'getid 
				   (db:test-get-rundir new-test-dat)) ;; )    ;; run dir is from the link tree

				  (test-state    (db:test-get-state new-test-dat))
				  (test-fulln    (db:test-get-fullname new-test-dat))
				  (uname         (db:test-get-uname    new-test-dat))
				  (toplevel-with-children (and (db:test-get-is-toplevel test)
							       (> (rmt:test-toplevel-num-items run-id test-name) 0))))
			     (case action
			       ((remove-runs)
				;; if the test is a toplevel-with-children issue an error and do not remove
				(cond
                                 (toplevel-with-children
                                  (debug:print 0 *default-log-port* "WARNING: skipping removal of " test-fulln " with run-id " run-id " as it has sub tests")
                                  (hash-table-set! toplevel-retries test-fulln (+ (hash-table-ref/default toplevel-retries test-fulln 0) 1))
                                  (if (> (hash-table-ref toplevel-retries test-fulln) 3)
                                      (if (not (null? tal))
                                          (loop (car tal)(cdr tal))) ;; no else clause - drop it if no more in queue and > 3 tries
                                      (let ((newtal (append tal (list test))))
                                        (loop (car newtal)(cdr newtal))))) ;; loop with test still in queue






                                 (else
                                  (debug:print-info 0 *default-log-port* "test: " test-name " itest-state: " test-state)
                                  (if (member test-state (list "RUNNING" "LAUNCHED" "REMOTEHOSTSTART" "KILLREQ"))
                                      (begin
                                        (if (not (hash-table-ref/default test-retry-time test-fulln #f))
                                            (begin
                                              ;; want to set to REMOVING BUT CANNOT do it here?







>

















>
>
>
>
>
>







2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
			     (if (not (null? tal))
				 (loop (car tal)(cdr tal))))
			   (let* ((item-path     (db:test-get-item-path new-test-dat))
				  (test-name     (db:test-get-testname new-test-dat))
				  (run-dir       ;;(filedb:get-path *fdb*
				   ;; (rmt:sdb-qry 'getid 
				   (db:test-get-rundir new-test-dat)) ;; )    ;; run dir is from the link tree
                                  (has-subrun   (subrun:subrun-test-initialized? run-dir))
				  (test-state    (db:test-get-state new-test-dat))
				  (test-fulln    (db:test-get-fullname new-test-dat))
				  (uname         (db:test-get-uname    new-test-dat))
				  (toplevel-with-children (and (db:test-get-is-toplevel test)
							       (> (rmt:test-toplevel-num-items run-id test-name) 0))))
			     (case action
			       ((remove-runs)
				;; if the test is a toplevel-with-children issue an error and do not remove
				(cond
                                 (toplevel-with-children
                                  (debug:print 0 *default-log-port* "WARNING: skipping removal of " test-fulln " with run-id " run-id " as it has sub tests")
                                  (hash-table-set! toplevel-retries test-fulln (+ (hash-table-ref/default toplevel-retries test-fulln 0) 1))
                                  (if (> (hash-table-ref toplevel-retries test-fulln) 3)
                                      (if (not (null? tal))
                                          (loop (car tal)(cdr tal))) ;; no else clause - drop it if no more in queue and > 3 tries
                                      (let ((newtal (append tal (list test))))
                                        (loop (car newtal)(cdr newtal))))) ;; loop with test still in queue
                                 (has-subrun
                                  ;; BB TODO - manage toplevasel-retries hash and retries in general
                                  (subrun:remove-subrun test-run-dir new-test-dat test-name item-path test-state test-fulln toplevel-with-children test)
                                  

                                  )
                                 (else
                                  (debug:print-info 0 *default-log-port* "test: " test-name " itest-state: " test-state)
                                  (if (member test-state (list "RUNNING" "LAUNCHED" "REMOTEHOSTSTART" "KILLREQ"))
                                      (begin
                                        (if (not (hash-table-ref/default test-retry-time test-fulln #f))
                                            (begin
                                              ;; want to set to REMOVING BUT CANNOT do it here?
2108
2109
2110
2111
2112
2113
2114

2115
2116
2117
2118
2119

2120
2121
2122
2123
2124
2125
2126

2127
2128
2129
2130
2131
2132
2133
                                            (loop (car tal)(append tal (list new-test-dat)))))
                                      (begin
                                        (runs:remove-test-directory new-test-dat mode) ;; 'remove-all)
                                        (if (not (null? tal))
                                            (loop (car tal)(cdr tal)))))))
				(rmt:update-run-stats run-id (rmt:get-raw-run-stats run-id)))
			       ((set-state-status)

				(debug:print-info 2 *default-log-port* "new state " (car state-status) ", new status " (cadr state-status))
				(mt:test-set-state-status-by-id run-id (db:test-get-id test) (car state-status)(cadr state-status) #f)
				(if (not (null? tal))
				    (loop (car tal)(cdr tal))))
			       ((run-wait)

				(debug:print-info 2 *default-log-port* "still waiting, " (length tests) " tests still running")
				(thread-sleep! 10)
				(let ((new-tests (proc-get-tests run-id)))
				  (if (null? new-tests)
				      (debug:print-info 1 *default-log-port* "Run completed according to zero tests matching provided criteria.")
				      (loop (car new-tests)(cdr new-tests)))))
			       ((archive)

				(if (and run-dir (not toplevel-with-children))
				    (let ((ddir (conc run-dir "/")))
				      (case (string->symbol (args:get-arg "-archive"))
					((save save-remove keep-html)
					 (if (common:file-exists? ddir)
					     (debug:print-info 0 *default-log-port* "Estimating disk space usage for " test-fulln ": " (common:get-disk-space-used ddir)))))))
				(if (not (null? tal))







>





>







>







2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
                                            (loop (car tal)(append tal (list new-test-dat)))))
                                      (begin
                                        (runs:remove-test-directory new-test-dat mode) ;; 'remove-all)
                                        (if (not (null? tal))
                                            (loop (car tal)(cdr tal)))))))
				(rmt:update-run-stats run-id (rmt:get-raw-run-stats run-id)))
			       ((set-state-status)
                                ;; BB TODO - manage has-subrun case
				(debug:print-info 2 *default-log-port* "new state " (car state-status) ", new status " (cadr state-status))
				(mt:test-set-state-status-by-id run-id (db:test-get-id test) (car state-status)(cadr state-status) #f)
				(if (not (null? tal))
				    (loop (car tal)(cdr tal))))
			       ((run-wait)
                                ;; BB TODO - manage has-subrun case
				(debug:print-info 2 *default-log-port* "still waiting, " (length tests) " tests still running")
				(thread-sleep! 10)
				(let ((new-tests (proc-get-tests run-id)))
				  (if (null? new-tests)
				      (debug:print-info 1 *default-log-port* "Run completed according to zero tests matching provided criteria.")
				      (loop (car new-tests)(cdr new-tests)))))
			       ((archive)
                                ;; BB TODO - manage has-subrun case
				(if (and run-dir (not toplevel-with-children))
				    (let ((ddir (conc run-dir "/")))
				      (case (string->symbol (args:get-arg "-archive"))
					((save save-remove keep-html)
					 (if (common:file-exists? ddir)
					     (debug:print-info 0 *default-log-port* "Estimating disk space usage for " test-fulln ": " (common:get-disk-space-used ddir)))))))
				(if (not (null? tal))

Modified subrun.scm from [5fbd7e058e] to [4e6779a4e5].

26
27
28
29
30
31
32








33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52



























53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75

;(include "common_records.scm")
;;(include "key_records.scm")
;;(include "db_records.scm")
;;(include "run_records.scm")
;;(include "test_records.scm")










(define (subrun:initialize-toprun-test  testconfig test-run-dir)

  (let ((ra (configf:lookup testconfig "subrun" "run-area"))
        (logpro (configf:lookup testconfig "subrun" "logpro"))
        (symlink-target (conc test-run-dir "/subrun-area"))
        )
  (when (not ra)      ;; when runarea is not set we default to *toppath*. However 
              ;; we need to force the setting in the testconfig so it will
          ;; be preserved in the testconfig.subrun file
      (configf:set-section-var testconfig "subrun" "runarea" *toppath*))
    (configf:set-section-var testconfig "logpro" "subrun" logpro) ;; append the logpro rules to the logpro section as stepname subrun

    (if (common:file-exists? symlink-target)
        (delete-file symlink-target))
    
    (create-symbolic-link ra symlink-target)

    (configf:write-alist testconfig "testconfig.subrun")))





























(define (subrun:launch-cmd test-run-dir)
  (let* ((log-prefix "run")
         (switches (subrun:selector+log-switches test-run-dir log-prefix))
         (run-wait #t)
         (cmd      (conc "megatest -run "switches" "
                         (if run-wait "-run-wait " ""))))
    cmd))

;; set state/status of test item
;; fork off megatest
;; set state/status of test item
;;


(define (subrun:selector+log-switches test-run-dir log-prefix)
  (let* ((switch-def-alist (common:get-param-mapping flavor: 'config))
         (subrunfile   (conc test-run-dir "/testconfig.subrun" ))
         (subrundata   (with-input-from-file subrunfile read))
         (subrunconfig (configf:alist->config subrundata))
         (run-area     (configf:lookup subrunconfig "subrun" "run-area"))
         (defvals      `(("start-dir" . ,(or run-area  ;; default values if not specified in subrun section of tconf
                                             (get-environment-variable "MT_RUN_AREA_HOME")







>
>
>
>
>
>
>
>




















>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>









<
<
<
<

<
|







26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96




97

98
99
100
101
102
103
104
105

;(include "common_records.scm")
;;(include "key_records.scm")
;;(include "db_records.scm")
;;(include "run_records.scm")
;;(include "test_records.scm")

(define (subrun:subrun-test-initialized? test-run-dir)
  (if (and (common:file-exists? (conc test-run-dir "/subrun-area") )
           (common:file-exists? (conc test-run-dir "/testconfig.subrun") ))
      #t
      #f))

(define (subrun:testconfig-defines-subrun? testconfig)
  (configf:lookup testconfig "subrun" "runwait")) ;; we use runwait as the flag that a subrun is requested

(define (subrun:initialize-toprun-test  testconfig test-run-dir)

  (let ((ra (configf:lookup testconfig "subrun" "run-area"))
        (logpro (configf:lookup testconfig "subrun" "logpro"))
        (symlink-target (conc test-run-dir "/subrun-area"))
        )
  (when (not ra)      ;; when runarea is not set we default to *toppath*. However 
              ;; we need to force the setting in the testconfig so it will
          ;; be preserved in the testconfig.subrun file
      (configf:set-section-var testconfig "subrun" "runarea" *toppath*))
    (configf:set-section-var testconfig "logpro" "subrun" logpro) ;; append the logpro rules to the logpro section as stepname subrun

    (if (common:file-exists? symlink-target)
        (delete-file symlink-target))
    
    (create-symbolic-link ra symlink-target)

    (configf:write-alist testconfig "testconfig.subrun")))


(define (subrun:remove-subrun test-run-dir new-test-dat test-name item-path test-state test-fulln toplevel-with-children test)
;; set state/status of test item
;; fork off megatest
;; set state/status of test item
;;

  (let* ((subrun-alist (subrun:selector+log-alist test-run-dir log-prefix))
         (runlog       (alist-ref "-log" subrun-alist equal? #f)))
    (if (not (common:file-exists? runlog))
        (BB> "no runlog @ "runlog)
        (if (member test-state (list "RUNNING" "LAUNCHED" "REMOTEHOSTSTART" "KILLREQ"))
            ;; This test is not in a correct state for cleaning up. Let's try some graceful shutdown steps first
            ;; Set the test to "KILLREQ" and wait five seconds then try again. Repeat up to five times then give
            ;; up and blow it away.
            
            ;; call in submegatest:
            ;;  (tasks:kill-runner target run-name testpatt)
            
            (mt:test-set-state-status-by-id run-id (db:test-get-id test) "SUBRUN-KILLREQ" "n/a" #f)
            )

        ;; on success:
        ;;   set state of test, or delete it or whatever
        )
    )
  )

(define (subrun:launch-cmd test-run-dir)
  (let* ((log-prefix "run")
         (switches (subrun:selector+log-switches test-run-dir log-prefix))
         (run-wait #t)
         (cmd      (conc "megatest -run "switches" "
                         (if run-wait "-run-wait " ""))))
    cmd))







(define (subrun:selector+log-alist test-run-dir log-prefix)
  (let* ((switch-def-alist (common:get-param-mapping flavor: 'config))
         (subrunfile   (conc test-run-dir "/testconfig.subrun" ))
         (subrundata   (with-input-from-file subrunfile read))
         (subrunconfig (configf:alist->config subrundata))
         (run-area     (configf:lookup subrunconfig "subrun" "run-area"))
         (defvals      `(("start-dir" . ,(or run-area  ;; default values if not specified in subrun section of tconf
                                             (get-environment-variable "MT_RUN_AREA_HOME")
115
116
117
118
119
120
121

122
123








124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
         (switch-alist  (cons
                         (cons "-log" logfile)
                         (map (lambda (item)
                                (if (equal? (car item) "-testpatt")
                                    (cons "-testpatt" testpatt)
                                    item))
                                switch-alist-pre))))

    ;; note - get precmd from subrun section
    ;;   apply to submegatest commands








    (let* ((res
            (string-intersperse
             (apply
              append
              (map
               (lambda (x)
                 (list (car x) (cdr x)))
               switch-alist))
              " ")))
      res)))


(define (subrun:exec-sub-megatest test-run-dir switches #!key (logfile #f))
  (let* ((real-logfile (or logfile (conc (test-run-dir) "/subrun-"
                                         (string-substitute "[/*]" "_" (string-intersperse switches "^"))"-"
                                         (number->string (current-seconds)) ".log")))
         (selector-switches  (common:sub-megatest-selector-switches test-run-dir))
         (cmd-list `("megatest" ,@selector-switches ,@switches "-log" ,real-logfile))







>


>
>
>
>
>
>
>
>
|
|
|
|
|
|
|
|
|
|
<







145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172

173
174
175
176
177
178
179
         (switch-alist  (cons
                         (cons "-log" logfile)
                         (map (lambda (item)
                                (if (equal? (car item) "-testpatt")
                                    (cons "-testpatt" testpatt)
                                    item))
                                switch-alist-pre))))
    switch-alist))
    ;; note - get precmd from subrun section
    ;;   apply to submegatest commands

(define (subrun:get-log-path test-run-dir log-prefix)
  (let* ((alist (subrun:selector+log-alist test-run-dir log-prefix))
         (res   (alist-ref "-log" alist equal? #f)))
    res))

(define (subrun:selector+log-switches test-run-dir log-prefix)
  (let* ((switch-alist (subrun:selector+log-alist test-run-dir log-prefix))
         (res
          (string-intersperse
           (apply
            append
            (map
             (lambda (x)
               (list (car x) (cdr x)))
             switch-alist))
           " ")))
    res))


(define (subrun:exec-sub-megatest test-run-dir switches #!key (logfile #f))
  (let* ((real-logfile (or logfile (conc (test-run-dir) "/subrun-"
                                         (string-substitute "[/*]" "_" (string-intersperse switches "^"))"-"
                                         (number->string (current-seconds)) ".log")))
         (selector-switches  (common:sub-megatest-selector-switches test-run-dir))
         (cmd-list `("megatest" ,@selector-switches ,@switches "-log" ,real-logfile))